diff options
| author | Ranjanikar Nikhil Prabhakarrao <rprabhakarra@nvidia.com> | 2018-12-13 06:59:20 -0500 |
|---|---|---|
| committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2020-06-30 13:07:26 -0400 |
| commit | f56874aec2ec61f2c341b813cc76de5acc51ea12 (patch) | |
| tree | efd3d6a3921c930a76bf0cb7011ca6b9809ed5f3 | |
| parent | bbef4c6927a13a24821c43cb2b6af72f859f7deb (diff) | |
gpu: nvgpu: add speculative barrier
Data can be speculativerly stored and
code flow can be hijacked.
To mitigate this problem insert a
speculation barrier.
Bug 200447167
Change-Id: Ia865ff2add8b30de49aa970715625b13e8f71c08
Signed-off-by: Ranjanikar Nikhil Prabhakarrao <rprabhakarra@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1972221
(cherry picked from commit f0762ed4831b3fe6cc953a4a4ec26c2537dcb69f)
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/1996052
Reviewed-by: automaticguardword <automaticguardword@nvidia.com>
Reviewed-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
GVS: Gerrit_Virtual_Submit
Tested-by: Deepak Nibade <dnibade@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
| -rw-r--r-- | drivers/gpu/nvgpu/common/fifo/submit.c | 1 | ||||
| -rw-r--r-- | drivers/gpu/nvgpu/common/fifo/tsg.c | 1 | ||||
| -rw-r--r-- | drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 2 | ||||
| -rw-r--r-- | drivers/gpu/nvgpu/gp10b/gr_gp10b.c | 1 | ||||
| -rw-r--r-- | drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 3 | ||||
| -rw-r--r-- | drivers/gpu/nvgpu/os/linux/dmabuf_vidmem.c | 1 | ||||
| -rw-r--r-- | drivers/gpu/nvgpu/os/linux/ioctl_as.c | 2 | ||||
| -rw-r--r-- | drivers/gpu/nvgpu/os/linux/ioctl_channel.c | 5 | ||||
| -rw-r--r-- | drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c | 11 | ||||
| -rw-r--r-- | drivers/gpu/nvgpu/os/linux/ioctl_dbg.c | 10 | ||||
| -rw-r--r-- | drivers/gpu/nvgpu/os/linux/ioctl_tsg.c | 2 | ||||
| -rw-r--r-- | drivers/gpu/nvgpu/os/linux/sched.c | 1 | ||||
| -rw-r--r-- | drivers/gpu/nvgpu/tu104/gr_tu104.c | 549 |
13 files changed, 589 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/common/fifo/submit.c b/drivers/gpu/nvgpu/common/fifo/submit.c index d518fbfb..b0f38ff1 100644 --- a/drivers/gpu/nvgpu/common/fifo/submit.c +++ b/drivers/gpu/nvgpu/common/fifo/submit.c | |||
| @@ -212,6 +212,7 @@ static int nvgpu_submit_append_gpfifo_user_direct(struct channel_gk20a *c, | |||
| 212 | u32 end = start + len; /* exclusive */ | 212 | u32 end = start + len; /* exclusive */ |
| 213 | int err; | 213 | int err; |
| 214 | 214 | ||
| 215 | nvgpu_speculation_barrier(); | ||
| 215 | if (end > gpfifo_size) { | 216 | if (end > gpfifo_size) { |
| 216 | /* wrap-around */ | 217 | /* wrap-around */ |
| 217 | int length0 = gpfifo_size - start; | 218 | int length0 = gpfifo_size - start; |
diff --git a/drivers/gpu/nvgpu/common/fifo/tsg.c b/drivers/gpu/nvgpu/common/fifo/tsg.c index f6c718f0..841dd465 100644 --- a/drivers/gpu/nvgpu/common/fifo/tsg.c +++ b/drivers/gpu/nvgpu/common/fifo/tsg.c | |||
| @@ -219,6 +219,7 @@ int gk20a_tsg_set_runlist_interleave(struct tsg_gk20a *tsg, u32 level) | |||
| 219 | 219 | ||
| 220 | nvgpu_log(g, gpu_dbg_sched, "tsgid=%u interleave=%u", tsg->tsgid, level); | 220 | nvgpu_log(g, gpu_dbg_sched, "tsgid=%u interleave=%u", tsg->tsgid, level); |
| 221 | 221 | ||
| 222 | nvgpu_speculation_barrier(); | ||
| 222 | switch (level) { | 223 | switch (level) { |
| 223 | case NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_LOW: | 224 | case NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_LOW: |
| 224 | case NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_MEDIUM: | 225 | case NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_MEDIUM: |
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index 989ee5c9..636d5714 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c | |||
| @@ -3943,6 +3943,7 @@ int gr_gk20a_add_zbc(struct gk20a *g, struct gr_gk20a *gr, | |||
| 3943 | /* no endian swap ? */ | 3943 | /* no endian swap ? */ |
| 3944 | 3944 | ||
| 3945 | nvgpu_mutex_acquire(&gr->zbc_lock); | 3945 | nvgpu_mutex_acquire(&gr->zbc_lock); |
| 3946 | nvgpu_speculation_barrier(); | ||
| 3946 | switch (zbc_val->type) { | 3947 | switch (zbc_val->type) { |
| 3947 | case GK20A_ZBC_TYPE_COLOR: | 3948 | case GK20A_ZBC_TYPE_COLOR: |
| 3948 | /* search existing tables */ | 3949 | /* search existing tables */ |
| @@ -4047,6 +4048,7 @@ int gr_gk20a_query_zbc(struct gk20a *g, struct gr_gk20a *gr, | |||
| 4047 | u32 index = query_params->index_size; | 4048 | u32 index = query_params->index_size; |
| 4048 | u32 i; | 4049 | u32 i; |
| 4049 | 4050 | ||
| 4051 | nvgpu_speculation_barrier(); | ||
| 4050 | switch (query_params->type) { | 4052 | switch (query_params->type) { |
| 4051 | case GK20A_ZBC_TYPE_INVALID: | 4053 | case GK20A_ZBC_TYPE_INVALID: |
| 4052 | query_params->index_size = GK20A_ZBC_TABLE_SIZE; | 4054 | query_params->index_size = GK20A_ZBC_TABLE_SIZE; |
diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c index 4b42678f..2bcb08a4 100644 --- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c | |||
| @@ -52,6 +52,7 @@ bool gr_gp10b_is_valid_class(struct gk20a *g, u32 class_num) | |||
| 52 | { | 52 | { |
| 53 | bool valid = false; | 53 | bool valid = false; |
| 54 | 54 | ||
| 55 | nvgpu_speculation_barrier(); | ||
| 55 | switch (class_num) { | 56 | switch (class_num) { |
| 56 | case PASCAL_COMPUTE_A: | 57 | case PASCAL_COMPUTE_A: |
| 57 | case PASCAL_A: | 58 | case PASCAL_A: |
diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index 1dfecfc1..5820a695 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c | |||
| @@ -77,6 +77,7 @@ bool gr_gv11b_is_valid_class(struct gk20a *g, u32 class_num) | |||
| 77 | { | 77 | { |
| 78 | bool valid = false; | 78 | bool valid = false; |
| 79 | 79 | ||
| 80 | nvgpu_speculation_barrier(); | ||
| 80 | switch (class_num) { | 81 | switch (class_num) { |
| 81 | case VOLTA_COMPUTE_A: | 82 | case VOLTA_COMPUTE_A: |
| 82 | case VOLTA_A: | 83 | case VOLTA_A: |
| @@ -106,6 +107,7 @@ bool gr_gv11b_is_valid_gfx_class(struct gk20a *g, u32 class_num) | |||
| 106 | { | 107 | { |
| 107 | bool valid = false; | 108 | bool valid = false; |
| 108 | 109 | ||
| 110 | nvgpu_speculation_barrier(); | ||
| 109 | switch (class_num) { | 111 | switch (class_num) { |
| 110 | case VOLTA_A: | 112 | case VOLTA_A: |
| 111 | case PASCAL_A: | 113 | case PASCAL_A: |
| @@ -140,6 +142,7 @@ bool gr_gv11b_is_valid_compute_class(struct gk20a *g, u32 class_num) | |||
| 140 | { | 142 | { |
| 141 | bool valid = false; | 143 | bool valid = false; |
| 142 | 144 | ||
| 145 | nvgpu_speculation_barrier(); | ||
| 143 | switch (class_num) { | 146 | switch (class_num) { |
| 144 | case VOLTA_COMPUTE_A: | 147 | case VOLTA_COMPUTE_A: |
| 145 | case PASCAL_COMPUTE_A: | 148 | case PASCAL_COMPUTE_A: |
diff --git a/drivers/gpu/nvgpu/os/linux/dmabuf_vidmem.c b/drivers/gpu/nvgpu/os/linux/dmabuf_vidmem.c index 8b38a9e1..bada5dc7 100644 --- a/drivers/gpu/nvgpu/os/linux/dmabuf_vidmem.c +++ b/drivers/gpu/nvgpu/os/linux/dmabuf_vidmem.c | |||
| @@ -244,6 +244,7 @@ int nvgpu_vidmem_buf_access_memory(struct gk20a *g, struct dma_buf *dmabuf, | |||
| 244 | vidmem_buf = dmabuf->priv; | 244 | vidmem_buf = dmabuf->priv; |
| 245 | mem = vidmem_buf->mem; | 245 | mem = vidmem_buf->mem; |
| 246 | 246 | ||
| 247 | nvgpu_speculation_barrier(); | ||
| 247 | switch (cmd) { | 248 | switch (cmd) { |
| 248 | case NVGPU_DBG_GPU_IOCTL_ACCESS_FB_MEMORY_CMD_READ: | 249 | case NVGPU_DBG_GPU_IOCTL_ACCESS_FB_MEMORY_CMD_READ: |
| 249 | nvgpu_mem_rd_n(g, mem, offset, buffer, size); | 250 | nvgpu_mem_rd_n(g, mem, offset, buffer, size); |
diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_as.c b/drivers/gpu/nvgpu/os/linux/ioctl_as.c index 3fa8aa2c..f0cec178 100644 --- a/drivers/gpu/nvgpu/os/linux/ioctl_as.c +++ b/drivers/gpu/nvgpu/os/linux/ioctl_as.c | |||
| @@ -170,6 +170,7 @@ static int gk20a_as_ioctl_map_buffer_batch( | |||
| 170 | nvgpu_vm_unmap(as_share->vm, unmap_args.offset, &batch); | 170 | nvgpu_vm_unmap(as_share->vm, unmap_args.offset, &batch); |
| 171 | } | 171 | } |
| 172 | 172 | ||
| 173 | nvgpu_speculation_barrier(); | ||
| 173 | if (err) { | 174 | if (err) { |
| 174 | nvgpu_vm_mapping_batch_finish(as_share->vm, &batch); | 175 | nvgpu_vm_mapping_batch_finish(as_share->vm, &batch); |
| 175 | 176 | ||
| @@ -355,6 +356,7 @@ long gk20a_as_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | |||
| 355 | if (err) | 356 | if (err) |
| 356 | return err; | 357 | return err; |
| 357 | 358 | ||
| 359 | nvgpu_speculation_barrier(); | ||
| 358 | switch (cmd) { | 360 | switch (cmd) { |
| 359 | case NVGPU_AS_IOCTL_BIND_CHANNEL: | 361 | case NVGPU_AS_IOCTL_BIND_CHANNEL: |
| 360 | trace_gk20a_as_ioctl_bind_channel(g->name); | 362 | trace_gk20a_as_ioctl_bind_channel(g->name); |
diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_channel.c b/drivers/gpu/nvgpu/os/linux/ioctl_channel.c index 22177171..3c844491 100644 --- a/drivers/gpu/nvgpu/os/linux/ioctl_channel.c +++ b/drivers/gpu/nvgpu/os/linux/ioctl_channel.c | |||
| @@ -290,6 +290,7 @@ static int gk20a_channel_cycle_stats_snapshot(struct channel_gk20a *ch, | |||
| 290 | if (!args->dmabuf_fd) | 290 | if (!args->dmabuf_fd) |
| 291 | return -EINVAL; | 291 | return -EINVAL; |
| 292 | 292 | ||
| 293 | nvgpu_speculation_barrier(); | ||
| 293 | /* handle the command (most frequent cases first) */ | 294 | /* handle the command (most frequent cases first) */ |
| 294 | switch (args->cmd) { | 295 | switch (args->cmd) { |
| 295 | case NVGPU_IOCTL_CHANNEL_CYCLE_STATS_SNAPSHOT_CMD_FLUSH: | 296 | case NVGPU_IOCTL_CHANNEL_CYCLE_STATS_SNAPSHOT_CMD_FLUSH: |
| @@ -874,6 +875,7 @@ clean_up: | |||
| 874 | */ | 875 | */ |
| 875 | u32 nvgpu_get_common_runlist_level(u32 level) | 876 | u32 nvgpu_get_common_runlist_level(u32 level) |
| 876 | { | 877 | { |
| 878 | nvgpu_speculation_barrier(); | ||
| 877 | switch (level) { | 879 | switch (level) { |
| 878 | case NVGPU_RUNLIST_INTERLEAVE_LEVEL_LOW: | 880 | case NVGPU_RUNLIST_INTERLEAVE_LEVEL_LOW: |
| 879 | return NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_LOW; | 881 | return NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_LOW; |
| @@ -982,6 +984,7 @@ u32 nvgpu_get_ioctl_compute_preempt_mode(u32 compute_preempt_mode) | |||
| 982 | */ | 984 | */ |
| 983 | static u32 nvgpu_get_common_graphics_preempt_mode(u32 graphics_preempt_mode) | 985 | static u32 nvgpu_get_common_graphics_preempt_mode(u32 graphics_preempt_mode) |
| 984 | { | 986 | { |
| 987 | nvgpu_speculation_barrier(); | ||
| 985 | switch (graphics_preempt_mode) { | 988 | switch (graphics_preempt_mode) { |
| 986 | case NVGPU_GRAPHICS_PREEMPTION_MODE_WFI: | 989 | case NVGPU_GRAPHICS_PREEMPTION_MODE_WFI: |
| 987 | return NVGPU_PREEMPTION_MODE_GRAPHICS_WFI; | 990 | return NVGPU_PREEMPTION_MODE_GRAPHICS_WFI; |
| @@ -998,6 +1001,7 @@ static u32 nvgpu_get_common_graphics_preempt_mode(u32 graphics_preempt_mode) | |||
| 998 | */ | 1001 | */ |
| 999 | static u32 nvgpu_get_common_compute_preempt_mode(u32 compute_preempt_mode) | 1002 | static u32 nvgpu_get_common_compute_preempt_mode(u32 compute_preempt_mode) |
| 1000 | { | 1003 | { |
| 1004 | nvgpu_speculation_barrier(); | ||
| 1001 | switch (compute_preempt_mode) { | 1005 | switch (compute_preempt_mode) { |
| 1002 | case NVGPU_COMPUTE_PREEMPTION_MODE_WFI: | 1006 | case NVGPU_COMPUTE_PREEMPTION_MODE_WFI: |
| 1003 | return NVGPU_PREEMPTION_MODE_COMPUTE_WFI; | 1007 | return NVGPU_PREEMPTION_MODE_COMPUTE_WFI; |
| @@ -1121,6 +1125,7 @@ long gk20a_channel_ioctl(struct file *filp, | |||
| 1121 | /* this ioctl call keeps a ref to the file which keeps a ref to the | 1125 | /* this ioctl call keeps a ref to the file which keeps a ref to the |
| 1122 | * channel */ | 1126 | * channel */ |
| 1123 | 1127 | ||
| 1128 | nvgpu_speculation_barrier(); | ||
| 1124 | switch (cmd) { | 1129 | switch (cmd) { |
| 1125 | case NVGPU_IOCTL_CHANNEL_OPEN: | 1130 | case NVGPU_IOCTL_CHANNEL_OPEN: |
| 1126 | err = gk20a_channel_open_ioctl(ch->g, | 1131 | err = gk20a_channel_open_ioctl(ch->g, |
diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c b/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c index 271c5d92..954b08b5 100644 --- a/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c +++ b/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c | |||
| @@ -366,6 +366,7 @@ gk20a_ctrl_ioctl_gpu_characteristics( | |||
| 366 | if (request->gpu_characteristics_buf_size > 0) { | 366 | if (request->gpu_characteristics_buf_size > 0) { |
| 367 | size_t write_size = sizeof(gpu); | 367 | size_t write_size = sizeof(gpu); |
| 368 | 368 | ||
| 369 | nvgpu_speculation_barrier(); | ||
| 369 | if (write_size > request->gpu_characteristics_buf_size) | 370 | if (write_size > request->gpu_characteristics_buf_size) |
| 370 | write_size = request->gpu_characteristics_buf_size; | 371 | write_size = request->gpu_characteristics_buf_size; |
| 371 | 372 | ||
| @@ -556,6 +557,7 @@ static int gk20a_ctrl_get_tpc_masks(struct gk20a *g, | |||
| 556 | if (args->mask_buf_size > 0) { | 557 | if (args->mask_buf_size > 0) { |
| 557 | size_t write_size = gpc_tpc_mask_size; | 558 | size_t write_size = gpc_tpc_mask_size; |
| 558 | 559 | ||
| 560 | nvgpu_speculation_barrier(); | ||
| 559 | if (write_size > args->mask_buf_size) | 561 | if (write_size > args->mask_buf_size) |
| 560 | write_size = args->mask_buf_size; | 562 | write_size = args->mask_buf_size; |
| 561 | 563 | ||
| @@ -580,6 +582,7 @@ static int gk20a_ctrl_get_fbp_l2_masks( | |||
| 580 | if (args->mask_buf_size > 0) { | 582 | if (args->mask_buf_size > 0) { |
| 581 | size_t write_size = fbp_l2_mask_size; | 583 | size_t write_size = fbp_l2_mask_size; |
| 582 | 584 | ||
| 585 | nvgpu_speculation_barrier(); | ||
| 583 | if (write_size > args->mask_buf_size) | 586 | if (write_size > args->mask_buf_size) |
| 584 | write_size = args->mask_buf_size; | 587 | write_size = args->mask_buf_size; |
| 585 | 588 | ||
| @@ -1245,6 +1248,7 @@ static int nvgpu_gpu_clk_set_info(struct gk20a *g, | |||
| 1245 | nvgpu_gpu_convert_clk_domain(clk_info.clk_domain))) | 1248 | nvgpu_gpu_convert_clk_domain(clk_info.clk_domain))) |
| 1246 | return -EINVAL; | 1249 | return -EINVAL; |
| 1247 | } | 1250 | } |
| 1251 | nvgpu_speculation_barrier(); | ||
| 1248 | 1252 | ||
| 1249 | entry = (struct nvgpu_gpu_clk_info __user *) | 1253 | entry = (struct nvgpu_gpu_clk_info __user *) |
| 1250 | (uintptr_t)args->clk_info_entries; | 1254 | (uintptr_t)args->clk_info_entries; |
| @@ -1264,6 +1268,7 @@ static int nvgpu_gpu_clk_set_info(struct gk20a *g, | |||
| 1264 | nvgpu_gpu_convert_clk_domain(clk_info.clk_domain), freq_mhz); | 1268 | nvgpu_gpu_convert_clk_domain(clk_info.clk_domain), freq_mhz); |
| 1265 | } | 1269 | } |
| 1266 | 1270 | ||
| 1271 | nvgpu_speculation_barrier(); | ||
| 1267 | ret = nvgpu_clk_arb_commit_request_fd(g, session, fd); | 1272 | ret = nvgpu_clk_arb_commit_request_fd(g, session, fd); |
| 1268 | if (ret < 0) | 1273 | if (ret < 0) |
| 1269 | return ret; | 1274 | return ret; |
| @@ -1333,6 +1338,7 @@ static int nvgpu_gpu_clk_get_info(struct gk20a *g, | |||
| 1333 | clk_info.clk_type = args->clk_type; | 1338 | clk_info.clk_type = args->clk_type; |
| 1334 | } | 1339 | } |
| 1335 | 1340 | ||
| 1341 | nvgpu_speculation_barrier(); | ||
| 1336 | switch (clk_info.clk_type) { | 1342 | switch (clk_info.clk_type) { |
| 1337 | case NVGPU_GPU_CLK_TYPE_TARGET: | 1343 | case NVGPU_GPU_CLK_TYPE_TARGET: |
| 1338 | err = nvgpu_clk_arb_get_session_target_mhz(session, | 1344 | err = nvgpu_clk_arb_get_session_target_mhz(session, |
| @@ -1366,6 +1372,7 @@ static int nvgpu_gpu_clk_get_info(struct gk20a *g, | |||
| 1366 | return -EFAULT; | 1372 | return -EFAULT; |
| 1367 | } | 1373 | } |
| 1368 | 1374 | ||
| 1375 | nvgpu_speculation_barrier(); | ||
| 1369 | args->num_entries = num_entries; | 1376 | args->num_entries = num_entries; |
| 1370 | 1377 | ||
| 1371 | return 0; | 1378 | return 0; |
| @@ -1403,6 +1410,7 @@ static int nvgpu_gpu_get_voltage(struct gk20a *g, | |||
| 1403 | if (err) | 1410 | if (err) |
| 1404 | return err; | 1411 | return err; |
| 1405 | 1412 | ||
| 1413 | nvgpu_speculation_barrier(); | ||
| 1406 | switch (args->which) { | 1414 | switch (args->which) { |
| 1407 | case NVGPU_GPU_VOLTAGE_CORE: | 1415 | case NVGPU_GPU_VOLTAGE_CORE: |
| 1408 | err = volt_get_voltage(g, CTRL_VOLT_DOMAIN_LOGIC, &args->voltage); | 1416 | err = volt_get_voltage(g, CTRL_VOLT_DOMAIN_LOGIC, &args->voltage); |
| @@ -1625,6 +1633,7 @@ static int nvgpu_gpu_set_deterministic_opts(struct gk20a *g, | |||
| 1625 | break; | 1633 | break; |
| 1626 | } | 1634 | } |
| 1627 | 1635 | ||
| 1636 | nvgpu_speculation_barrier(); | ||
| 1628 | nvgpu_rwsem_up_read(&g->deterministic_busy); | 1637 | nvgpu_rwsem_up_read(&g->deterministic_busy); |
| 1629 | 1638 | ||
| 1630 | out: | 1639 | out: |
| @@ -1668,6 +1677,7 @@ long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg | |||
| 1668 | gk20a_idle(g); | 1677 | gk20a_idle(g); |
| 1669 | } | 1678 | } |
| 1670 | 1679 | ||
| 1680 | nvgpu_speculation_barrier(); | ||
| 1671 | switch (cmd) { | 1681 | switch (cmd) { |
| 1672 | case NVGPU_GPU_IOCTL_ZCULL_GET_CTX_SIZE: | 1682 | case NVGPU_GPU_IOCTL_ZCULL_GET_CTX_SIZE: |
| 1673 | get_ctx_size_args = (struct nvgpu_gpu_zcull_get_ctx_size_args *)buf; | 1683 | get_ctx_size_args = (struct nvgpu_gpu_zcull_get_ctx_size_args *)buf; |
| @@ -1713,6 +1723,7 @@ long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg | |||
| 1713 | zbc_val->format = set_table_args->format; | 1723 | zbc_val->format = set_table_args->format; |
| 1714 | zbc_val->type = set_table_args->type; | 1724 | zbc_val->type = set_table_args->type; |
| 1715 | 1725 | ||
| 1726 | nvgpu_speculation_barrier(); | ||
| 1716 | switch (zbc_val->type) { | 1727 | switch (zbc_val->type) { |
| 1717 | case GK20A_ZBC_TYPE_COLOR: | 1728 | case GK20A_ZBC_TYPE_COLOR: |
| 1718 | for (i = 0; i < GK20A_ZBC_COLOR_VALUE_SIZE; i++) { | 1729 | for (i = 0; i < GK20A_ZBC_COLOR_VALUE_SIZE; i++) { |
diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c b/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c index dc732dc5..0c9b10b5 100644 --- a/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c +++ b/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c | |||
| @@ -314,6 +314,7 @@ static int nvgpu_dbg_gpu_ioctl_read_single_sm_error_state( | |||
| 314 | if (args->sm_error_state_record_size > 0) { | 314 | if (args->sm_error_state_record_size > 0) { |
| 315 | size_t write_size = sizeof(*sm_error_state); | 315 | size_t write_size = sizeof(*sm_error_state); |
| 316 | 316 | ||
| 317 | nvgpu_speculation_barrier(); | ||
| 317 | if (write_size > args->sm_error_state_record_size) | 318 | if (write_size > args->sm_error_state_record_size) |
| 318 | write_size = args->sm_error_state_record_size; | 319 | write_size = args->sm_error_state_record_size; |
| 319 | 320 | ||
| @@ -361,6 +362,7 @@ static int nvgpu_dbg_timeout_enable(struct dbg_session_gk20a *dbg_s, | |||
| 361 | nvgpu_log(g, gpu_dbg_gpu_dbg, "Timeouts mode requested : %d", | 362 | nvgpu_log(g, gpu_dbg_gpu_dbg, "Timeouts mode requested : %d", |
| 362 | timeout_mode); | 363 | timeout_mode); |
| 363 | 364 | ||
| 365 | nvgpu_speculation_barrier(); | ||
| 364 | switch (timeout_mode) { | 366 | switch (timeout_mode) { |
| 365 | case NVGPU_DBG_GPU_IOCTL_TIMEOUT_ENABLE: | 367 | case NVGPU_DBG_GPU_IOCTL_TIMEOUT_ENABLE: |
| 366 | if (dbg_s->is_timeout_disabled == true) | 368 | if (dbg_s->is_timeout_disabled == true) |
| @@ -917,6 +919,7 @@ static int nvgpu_ioctl_channel_reg_ops(struct dbg_session_gk20a *dbg_s, | |||
| 917 | ops_offset += num_ops; | 919 | ops_offset += num_ops; |
| 918 | } | 920 | } |
| 919 | 921 | ||
| 922 | nvgpu_speculation_barrier(); | ||
| 920 | nvgpu_kfree(g, linux_fragment); | 923 | nvgpu_kfree(g, linux_fragment); |
| 921 | 924 | ||
| 922 | /* enable powergate, if previously disabled */ | 925 | /* enable powergate, if previously disabled */ |
| @@ -1007,6 +1010,7 @@ static int nvgpu_dbg_gpu_ioctl_smpc_ctxsw_mode(struct dbg_session_gk20a *dbg_s, | |||
| 1007 | 1010 | ||
| 1008 | static u32 nvgpu_hwpm_ctxsw_mode_to_common_mode(u32 mode) | 1011 | static u32 nvgpu_hwpm_ctxsw_mode_to_common_mode(u32 mode) |
| 1009 | { | 1012 | { |
| 1013 | nvgpu_speculation_barrier(); | ||
| 1010 | switch (mode){ | 1014 | switch (mode){ |
| 1011 | case NVGPU_DBG_GPU_HWPM_CTXSW_MODE_NO_CTXSW: | 1015 | case NVGPU_DBG_GPU_HWPM_CTXSW_MODE_NO_CTXSW: |
| 1012 | return NVGPU_DBG_HWPM_CTXSW_MODE_NO_CTXSW; | 1016 | return NVGPU_DBG_HWPM_CTXSW_MODE_NO_CTXSW; |
| @@ -1153,6 +1157,7 @@ static int nvgpu_dbg_gpu_ioctl_suspend_resume_sm( | |||
| 1153 | goto clean_up; | 1157 | goto clean_up; |
| 1154 | } | 1158 | } |
| 1155 | 1159 | ||
| 1160 | nvgpu_speculation_barrier(); | ||
| 1156 | switch (action) { | 1161 | switch (action) { |
| 1157 | case NVGPU_DBG_GPU_SUSPEND_ALL_SMS: | 1162 | case NVGPU_DBG_GPU_SUSPEND_ALL_SMS: |
| 1158 | gr_gk20a_suspend_context(ch); | 1163 | gr_gk20a_suspend_context(ch); |
| @@ -1366,6 +1371,7 @@ static int gk20a_dbg_gpu_events_ctrl(struct dbg_session_gk20a *dbg_s, | |||
| 1366 | return -EINVAL; | 1371 | return -EINVAL; |
| 1367 | } | 1372 | } |
| 1368 | 1373 | ||
| 1374 | nvgpu_speculation_barrier(); | ||
| 1369 | switch (args->cmd) { | 1375 | switch (args->cmd) { |
| 1370 | case NVGPU_DBG_GPU_EVENTS_CTRL_CMD_ENABLE: | 1376 | case NVGPU_DBG_GPU_EVENTS_CTRL_CMD_ENABLE: |
| 1371 | gk20a_dbg_gpu_events_enable(dbg_s); | 1377 | gk20a_dbg_gpu_events_enable(dbg_s); |
| @@ -1536,6 +1542,7 @@ nvgpu_dbg_gpu_ioctl_suspend_resume_contexts(struct dbg_session_gk20a *dbg_s, | |||
| 1536 | if (err) | 1542 | if (err) |
| 1537 | return err; | 1543 | return err; |
| 1538 | 1544 | ||
| 1545 | nvgpu_speculation_barrier(); | ||
| 1539 | switch (args->action) { | 1546 | switch (args->action) { |
| 1540 | case NVGPU_DBG_GPU_SUSPEND_ALL_CONTEXTS: | 1547 | case NVGPU_DBG_GPU_SUSPEND_ALL_CONTEXTS: |
| 1541 | err = g->ops.gr.suspend_contexts(g, dbg_s, | 1548 | err = g->ops.gr.suspend_contexts(g, dbg_s, |
| @@ -1627,6 +1634,7 @@ static int nvgpu_dbg_gpu_ioctl_access_fb_memory(struct dbg_session_gk20a *dbg_s, | |||
| 1627 | size -= access_size; | 1634 | size -= access_size; |
| 1628 | offset += access_size; | 1635 | offset += access_size; |
| 1629 | } | 1636 | } |
| 1637 | nvgpu_speculation_barrier(); | ||
| 1630 | 1638 | ||
| 1631 | fail_idle: | 1639 | fail_idle: |
| 1632 | gk20a_idle(g); | 1640 | gk20a_idle(g); |
| @@ -1899,6 +1907,7 @@ static int nvgpu_dbg_gpu_set_sm_exception_type_mask( | |||
| 1899 | struct gk20a *g = dbg_s->g; | 1907 | struct gk20a *g = dbg_s->g; |
| 1900 | u32 sm_exception_mask_type = NVGPU_SM_EXCEPTION_TYPE_MASK_NONE; | 1908 | u32 sm_exception_mask_type = NVGPU_SM_EXCEPTION_TYPE_MASK_NONE; |
| 1901 | 1909 | ||
| 1910 | nvgpu_speculation_barrier(); | ||
| 1902 | switch (args->exception_type_mask) { | 1911 | switch (args->exception_type_mask) { |
| 1903 | case NVGPU_DBG_GPU_IOCTL_SET_SM_EXCEPTION_TYPE_MASK_FATAL: | 1912 | case NVGPU_DBG_GPU_IOCTL_SET_SM_EXCEPTION_TYPE_MASK_FATAL: |
| 1904 | sm_exception_mask_type = NVGPU_SM_EXCEPTION_TYPE_MASK_FATAL; | 1913 | sm_exception_mask_type = NVGPU_SM_EXCEPTION_TYPE_MASK_FATAL; |
| @@ -1970,6 +1979,7 @@ long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd, | |||
| 1970 | /* protect from threaded user space calls */ | 1979 | /* protect from threaded user space calls */ |
| 1971 | nvgpu_mutex_acquire(&dbg_s->ioctl_lock); | 1980 | nvgpu_mutex_acquire(&dbg_s->ioctl_lock); |
| 1972 | 1981 | ||
| 1982 | nvgpu_speculation_barrier(); | ||
| 1973 | switch (cmd) { | 1983 | switch (cmd) { |
| 1974 | case NVGPU_DBG_GPU_IOCTL_BIND_CHANNEL: | 1984 | case NVGPU_DBG_GPU_IOCTL_BIND_CHANNEL: |
| 1975 | err = dbg_bind_channel_gk20a(dbg_s, | 1985 | err = dbg_bind_channel_gk20a(dbg_s, |
diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_tsg.c b/drivers/gpu/nvgpu/os/linux/ioctl_tsg.c index a26559f5..2f8cb3ae 100644 --- a/drivers/gpu/nvgpu/os/linux/ioctl_tsg.c +++ b/drivers/gpu/nvgpu/os/linux/ioctl_tsg.c | |||
| @@ -361,6 +361,7 @@ static int gk20a_tsg_event_id_ctrl(struct gk20a *g, struct tsg_gk20a *tsg, | |||
| 361 | if (args->event_id >= NVGPU_IOCTL_CHANNEL_EVENT_ID_MAX) | 361 | if (args->event_id >= NVGPU_IOCTL_CHANNEL_EVENT_ID_MAX) |
| 362 | return -EINVAL; | 362 | return -EINVAL; |
| 363 | 363 | ||
| 364 | nvgpu_speculation_barrier(); | ||
| 364 | switch (args->cmd) { | 365 | switch (args->cmd) { |
| 365 | case NVGPU_IOCTL_CHANNEL_EVENT_ID_CMD_ENABLE: | 366 | case NVGPU_IOCTL_CHANNEL_EVENT_ID_CMD_ENABLE: |
| 366 | err = gk20a_tsg_event_id_enable(tsg, args->event_id, &fd); | 367 | err = gk20a_tsg_event_id_enable(tsg, args->event_id, &fd); |
| @@ -572,6 +573,7 @@ static int gk20a_tsg_ioctl_read_single_sm_error_state(struct gk20a *g, | |||
| 572 | if (args->record_size > 0) { | 573 | if (args->record_size > 0) { |
| 573 | size_t write_size = sizeof(*sm_error_state); | 574 | size_t write_size = sizeof(*sm_error_state); |
| 574 | 575 | ||
| 576 | nvgpu_speculation_barrier(); | ||
| 575 | if (write_size > args->record_size) | 577 | if (write_size > args->record_size) |
| 576 | write_size = args->record_size; | 578 | write_size = args->record_size; |
| 577 | 579 | ||
diff --git a/drivers/gpu/nvgpu/os/linux/sched.c b/drivers/gpu/nvgpu/os/linux/sched.c index 15cbf1ec..30c58a19 100644 --- a/drivers/gpu/nvgpu/os/linux/sched.c +++ b/drivers/gpu/nvgpu/os/linux/sched.c | |||
| @@ -447,6 +447,7 @@ long gk20a_sched_dev_ioctl(struct file *filp, unsigned int cmd, | |||
| 447 | return -EFAULT; | 447 | return -EFAULT; |
| 448 | } | 448 | } |
| 449 | 449 | ||
| 450 | nvgpu_speculation_barrier(); | ||
| 450 | switch (cmd) { | 451 | switch (cmd) { |
| 451 | case NVGPU_SCHED_IOCTL_GET_TSGS: | 452 | case NVGPU_SCHED_IOCTL_GET_TSGS: |
| 452 | err = gk20a_sched_dev_ioctl_get_tsgs(g, | 453 | err = gk20a_sched_dev_ioctl_get_tsgs(g, |
diff --git a/drivers/gpu/nvgpu/tu104/gr_tu104.c b/drivers/gpu/nvgpu/tu104/gr_tu104.c new file mode 100644 index 00000000..fa6995ac --- /dev/null +++ b/drivers/gpu/nvgpu/tu104/gr_tu104.c | |||
| @@ -0,0 +1,549 @@ | |||
| 1 | <<<<<<< HEAD (bbef4c gpu: nvgpu: initialize masks for the perfmon counters 3) | ||
| 2 | ======= | ||
| 3 | /* | ||
| 4 | * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. | ||
| 5 | * | ||
| 6 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
| 7 | * copy of this software and associated documentation files (the "Software"), | ||
| 8 | * to deal in the Software without restriction, including without limitation | ||
| 9 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
| 10 | * and/or sell copies of the Software, and to permit persons to whom the | ||
| 11 | * Software is furnished to do so, subject to the following conditions: | ||
| 12 | * | ||
| 13 | * The above copyright notice and this permission notice shall be included in | ||
| 14 | * all copies or substantial portions of the Software. | ||
| 15 | * | ||
| 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
| 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
| 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
| 19 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
| 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
| 21 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | ||
| 22 | * DEALINGS IN THE SOFTWARE. | ||
| 23 | */ | ||
| 24 | |||
| 25 | #include <nvgpu/types.h> | ||
| 26 | #include <nvgpu/soc.h> | ||
| 27 | #include <nvgpu/io.h> | ||
| 28 | #include <nvgpu/utils.h> | ||
| 29 | #include <nvgpu/gk20a.h> | ||
| 30 | #include <nvgpu/channel.h> | ||
| 31 | #include <nvgpu/netlist.h> | ||
| 32 | |||
| 33 | #include "gk20a/gr_gk20a.h" | ||
| 34 | #include "gk20a/gr_pri_gk20a.h" | ||
| 35 | |||
| 36 | #include "gp10b/gr_gp10b.h" | ||
| 37 | |||
| 38 | #include "gv11b/gr_gv11b.h" | ||
| 39 | |||
| 40 | #include "tu104/gr_tu104.h" | ||
| 41 | |||
| 42 | #include <nvgpu/hw/tu104/hw_gr_tu104.h> | ||
| 43 | |||
| 44 | bool gr_tu104_is_valid_class(struct gk20a *g, u32 class_num) | ||
| 45 | { | ||
| 46 | nvgpu_speculation_barrier(); | ||
| 47 | switch (class_num) { | ||
| 48 | case TURING_CHANNEL_GPFIFO_A: | ||
| 49 | case TURING_A: | ||
| 50 | case TURING_COMPUTE_A: | ||
| 51 | case TURING_DMA_COPY_A: | ||
| 52 | return true; | ||
| 53 | default: | ||
| 54 | break; | ||
| 55 | } | ||
| 56 | |||
| 57 | return gr_gv11b_is_valid_class(g, class_num); | ||
| 58 | }; | ||
| 59 | |||
| 60 | bool gr_tu104_is_valid_gfx_class(struct gk20a *g, u32 class_num) | ||
| 61 | { | ||
| 62 | nvgpu_speculation_barrier(); | ||
| 63 | switch (class_num) { | ||
| 64 | case TURING_A: | ||
| 65 | return true; | ||
| 66 | default: | ||
| 67 | break; | ||
| 68 | } | ||
| 69 | |||
| 70 | return gr_gv11b_is_valid_gfx_class(g, class_num); | ||
| 71 | } | ||
| 72 | |||
| 73 | bool gr_tu104_is_valid_compute_class(struct gk20a *g, u32 class_num) | ||
| 74 | { | ||
| 75 | nvgpu_speculation_barrier(); | ||
| 76 | switch (class_num) { | ||
| 77 | case TURING_COMPUTE_A: | ||
| 78 | return true; | ||
| 79 | default: | ||
| 80 | break; | ||
| 81 | } | ||
| 82 | |||
| 83 | return gr_gv11b_is_valid_compute_class(g, class_num); | ||
| 84 | } | ||
| 85 | |||
| 86 | int gr_tu104_init_sw_bundle64(struct gk20a *g) | ||
| 87 | { | ||
| 88 | u32 i; | ||
| 89 | u32 last_bundle_data_lo = 0; | ||
| 90 | u32 last_bundle_data_hi = 0; | ||
| 91 | int err = 0; | ||
| 92 | struct netlist_av64_list *sw_bundle64_init = | ||
| 93 | &g->netlist_vars->sw_bundle64_init; | ||
| 94 | |||
| 95 | for (i = 0U; i < sw_bundle64_init->count; i++) { | ||
| 96 | if (i == 0U || | ||
| 97 | (last_bundle_data_lo != sw_bundle64_init->l[i].value_lo) || | ||
| 98 | (last_bundle_data_hi != sw_bundle64_init->l[i].value_hi)) { | ||
| 99 | nvgpu_writel(g, gr_pipe_bundle_data_r(), | ||
| 100 | sw_bundle64_init->l[i].value_lo); | ||
| 101 | nvgpu_writel(g, gr_pipe_bundle_data_hi_r(), | ||
| 102 | sw_bundle64_init->l[i].value_hi); | ||
| 103 | |||
| 104 | last_bundle_data_lo = sw_bundle64_init->l[i].value_lo; | ||
| 105 | last_bundle_data_hi = sw_bundle64_init->l[i].value_hi; | ||
| 106 | } | ||
| 107 | |||
| 108 | nvgpu_writel(g, gr_pipe_bundle_address_r(), | ||
| 109 | sw_bundle64_init->l[i].addr); | ||
| 110 | |||
| 111 | if (gr_pipe_bundle_address_value_v(sw_bundle64_init->l[i].addr) | ||
| 112 | == GR_GO_IDLE_BUNDLE) { | ||
| 113 | err = gr_gk20a_wait_idle(g, | ||
| 114 | gk20a_get_gr_idle_timeout(g), | ||
| 115 | GR_IDLE_CHECK_DEFAULT); | ||
| 116 | } else if (nvgpu_platform_is_silicon(g)) { | ||
| 117 | err = gr_gk20a_wait_fe_idle(g, | ||
| 118 | gk20a_get_gr_idle_timeout(g), | ||
| 119 | GR_IDLE_CHECK_DEFAULT); | ||
| 120 | } | ||
| 121 | if (err != 0) { | ||
| 122 | break; | ||
| 123 | } | ||
| 124 | } | ||
| 125 | |||
| 126 | return err; | ||
| 127 | } | ||
| 128 | |||
| 129 | int gr_tu104_alloc_global_ctx_buffers(struct gk20a *g) | ||
| 130 | { | ||
| 131 | int err; | ||
| 132 | struct gr_gk20a *gr = &g->gr; | ||
| 133 | u32 rtv_circular_buffer_size; | ||
| 134 | |||
| 135 | nvgpu_log_fn(g, " "); | ||
| 136 | |||
| 137 | rtv_circular_buffer_size = | ||
| 138 | (gr_scc_rm_rtv_cb_size_div_256b_default_f() + | ||
| 139 | gr_scc_rm_rtv_cb_size_div_256b_db_adder_f()) * | ||
| 140 | gr_scc_bundle_cb_size_div_256b_byte_granularity_v(); | ||
| 141 | nvgpu_log_info(g, "rtv_circular_buffer_size : %u", | ||
| 142 | rtv_circular_buffer_size); | ||
| 143 | |||
| 144 | err = gk20a_gr_alloc_ctx_buffer(g, | ||
| 145 | &gr->global_ctx_buffer[RTV_CIRCULAR_BUFFER], | ||
| 146 | rtv_circular_buffer_size); | ||
| 147 | if (err != 0) { | ||
| 148 | return err; | ||
| 149 | } | ||
| 150 | |||
| 151 | err = gr_gk20a_alloc_global_ctx_buffers(g); | ||
| 152 | if (err != 0) { | ||
| 153 | goto clean_up; | ||
| 154 | } | ||
| 155 | |||
| 156 | return 0; | ||
| 157 | |||
| 158 | clean_up: | ||
| 159 | nvgpu_err(g, "fail"); | ||
| 160 | gk20a_gr_destroy_ctx_buffer(g, | ||
| 161 | &gr->global_ctx_buffer[RTV_CIRCULAR_BUFFER]); | ||
| 162 | |||
| 163 | return err; | ||
| 164 | } | ||
| 165 | |||
| 166 | int gr_tu104_map_global_ctx_buffers(struct gk20a *g, struct vm_gk20a *vm, | ||
| 167 | struct nvgpu_gr_ctx *gr_ctx, bool vpr) | ||
| 168 | { | ||
| 169 | int err; | ||
| 170 | u64 *g_bfr_va; | ||
| 171 | u64 *g_bfr_size; | ||
| 172 | int *g_bfr_index; | ||
| 173 | struct gr_gk20a *gr = &g->gr; | ||
| 174 | struct nvgpu_mem *mem; | ||
| 175 | u64 gpu_va; | ||
| 176 | |||
| 177 | nvgpu_log_fn(g, " "); | ||
| 178 | |||
| 179 | g_bfr_va = gr_ctx->global_ctx_buffer_va; | ||
| 180 | g_bfr_size = gr_ctx->global_ctx_buffer_size; | ||
| 181 | g_bfr_index = gr_ctx->global_ctx_buffer_index; | ||
| 182 | |||
| 183 | /* RTV circular buffer */ | ||
| 184 | mem = &gr->global_ctx_buffer[RTV_CIRCULAR_BUFFER].mem; | ||
| 185 | gpu_va = nvgpu_gmmu_map(vm, mem, mem->size, 0, | ||
| 186 | gk20a_mem_flag_none, true, mem->aperture); | ||
| 187 | if (gpu_va == 0ULL) { | ||
| 188 | return -ENOMEM; | ||
| 189 | } | ||
| 190 | |||
| 191 | g_bfr_va[RTV_CIRCULAR_BUFFER_VA] = gpu_va; | ||
| 192 | g_bfr_size[RTV_CIRCULAR_BUFFER_VA] = mem->size; | ||
| 193 | g_bfr_index[RTV_CIRCULAR_BUFFER_VA] = RTV_CIRCULAR_BUFFER; | ||
| 194 | |||
| 195 | err = gr_gk20a_map_global_ctx_buffers(g, vm, gr_ctx, vpr); | ||
| 196 | if (err != 0) { | ||
| 197 | goto clean_up; | ||
| 198 | } | ||
| 199 | |||
| 200 | return 0; | ||
| 201 | |||
| 202 | clean_up: | ||
| 203 | nvgpu_err(g, "fail"); | ||
| 204 | nvgpu_gmmu_unmap(vm, mem, gpu_va); | ||
| 205 | |||
| 206 | return err; | ||
| 207 | } | ||
| 208 | |||
| 209 | static void gr_tu104_commit_rtv_circular_buffer(struct gk20a *g, | ||
| 210 | struct nvgpu_gr_ctx *gr_ctx, | ||
| 211 | u64 addr, u32 size, u32 gfxpAddSize, bool patch) | ||
| 212 | { | ||
| 213 | gr_gk20a_ctx_patch_write(g, gr_ctx, gr_scc_rm_rtv_cb_base_r(), | ||
| 214 | gr_scc_rm_rtv_cb_base_addr_39_8_f(addr), patch); | ||
| 215 | gr_gk20a_ctx_patch_write(g, gr_ctx, gr_scc_rm_rtv_cb_size_r(), | ||
| 216 | gr_scc_rm_rtv_cb_size_div_256b_f(size), patch); | ||
| 217 | gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_gcc_rm_rtv_cb_base_r(), | ||
| 218 | gr_gpcs_gcc_rm_rtv_cb_base_addr_39_8_f(addr), patch); | ||
| 219 | gr_gk20a_ctx_patch_write(g, gr_ctx, gr_scc_rm_gfxp_reserve_r(), | ||
| 220 | gr_scc_rm_gfxp_reserve_rtv_cb_size_div_256b_f(gfxpAddSize), | ||
| 221 | patch); | ||
| 222 | } | ||
| 223 | |||
| 224 | int gr_tu104_commit_global_ctx_buffers(struct gk20a *g, | ||
| 225 | struct nvgpu_gr_ctx *gr_ctx, bool patch) | ||
| 226 | { | ||
| 227 | int err; | ||
| 228 | u64 addr; | ||
| 229 | u32 size; | ||
| 230 | u32 gfxpaddsize = 0; | ||
| 231 | |||
| 232 | nvgpu_log_fn(g, " "); | ||
| 233 | |||
| 234 | err = gr_gk20a_commit_global_ctx_buffers(g, gr_ctx, patch); | ||
| 235 | if (err != 0) { | ||
| 236 | return err; | ||
| 237 | } | ||
| 238 | |||
| 239 | if (patch) { | ||
| 240 | int err; | ||
| 241 | err = gr_gk20a_ctx_patch_write_begin(g, gr_ctx, false); | ||
| 242 | if (err != 0) { | ||
| 243 | return err; | ||
| 244 | } | ||
| 245 | } | ||
| 246 | |||
| 247 | /* RTV circular buffer */ | ||
| 248 | addr = gr_ctx->global_ctx_buffer_va[RTV_CIRCULAR_BUFFER_VA] >> | ||
| 249 | U64(gr_scc_rm_rtv_cb_base_addr_39_8_align_bits_f()); | ||
| 250 | |||
| 251 | size = (gr_scc_rm_rtv_cb_size_div_256b_default_f() + | ||
| 252 | gr_scc_rm_rtv_cb_size_div_256b_db_adder_f()); | ||
| 253 | |||
| 254 | gr_tu104_commit_rtv_circular_buffer(g, gr_ctx, addr, size, | ||
| 255 | gfxpaddsize, patch); | ||
| 256 | |||
| 257 | if (patch) { | ||
| 258 | gr_gk20a_ctx_patch_write_end(g, gr_ctx, false); | ||
| 259 | } | ||
| 260 | |||
| 261 | return 0; | ||
| 262 | } | ||
| 263 | |||
| 264 | int gr_tu104_alloc_gfxp_rtv_cb(struct gk20a *g, | ||
| 265 | struct nvgpu_gr_ctx *gr_ctx, struct vm_gk20a *vm) | ||
| 266 | { | ||
| 267 | int err; | ||
| 268 | u32 rtv_cb_size; | ||
| 269 | |||
| 270 | nvgpu_log_fn(g, " "); | ||
| 271 | |||
| 272 | rtv_cb_size = | ||
| 273 | (gr_scc_rm_rtv_cb_size_div_256b_default_f() + | ||
| 274 | gr_scc_rm_rtv_cb_size_div_256b_db_adder_f() + | ||
| 275 | gr_scc_rm_rtv_cb_size_div_256b_gfxp_adder_f()) * | ||
| 276 | gr_scc_rm_rtv_cb_size_div_256b_byte_granularity_v(); | ||
| 277 | |||
| 278 | err = gr_gp10b_alloc_buffer(vm, | ||
| 279 | rtv_cb_size, | ||
| 280 | &gr_ctx->gfxp_rtvcb_ctxsw_buffer); | ||
| 281 | |||
| 282 | return err; | ||
| 283 | } | ||
| 284 | |||
| 285 | void gr_tu104_commit_gfxp_rtv_cb(struct gk20a *g, | ||
| 286 | struct nvgpu_gr_ctx *gr_ctx, bool patch) | ||
| 287 | { | ||
| 288 | u64 addr; | ||
| 289 | u32 rtv_cb_size; | ||
| 290 | u32 gfxp_addr_size; | ||
| 291 | |||
| 292 | nvgpu_log_fn(g, " "); | ||
| 293 | |||
| 294 | rtv_cb_size = | ||
| 295 | (gr_scc_rm_rtv_cb_size_div_256b_default_f() + | ||
| 296 | gr_scc_rm_rtv_cb_size_div_256b_db_adder_f() + | ||
| 297 | gr_scc_rm_rtv_cb_size_div_256b_gfxp_adder_f()); | ||
| 298 | gfxp_addr_size = gr_scc_rm_rtv_cb_size_div_256b_gfxp_adder_f(); | ||
| 299 | |||
| 300 | /* GFXP RTV circular buffer */ | ||
| 301 | addr = (u64)(u64_lo32(gr_ctx->gfxp_rtvcb_ctxsw_buffer.gpu_va) >> | ||
| 302 | gr_scc_rm_rtv_cb_base_addr_39_8_align_bits_f()) | | ||
| 303 | (u64)(u64_hi32(gr_ctx->gfxp_rtvcb_ctxsw_buffer.gpu_va) << | ||
| 304 | (32U - gr_scc_rm_rtv_cb_base_addr_39_8_align_bits_f())); | ||
| 305 | |||
| 306 | |||
| 307 | gr_tu104_commit_rtv_circular_buffer(g, gr_ctx, addr, | ||
| 308 | rtv_cb_size, | ||
| 309 | gfxp_addr_size, | ||
| 310 | patch); | ||
| 311 | } | ||
| 312 | |||
| 313 | void gr_tu104_bundle_cb_defaults(struct gk20a *g) | ||
| 314 | { | ||
| 315 | struct gr_gk20a *gr = &g->gr; | ||
| 316 | |||
| 317 | gr->bundle_cb_default_size = | ||
| 318 | gr_scc_bundle_cb_size_div_256b__prod_v(); | ||
| 319 | gr->min_gpm_fifo_depth = | ||
| 320 | gr_pd_ab_dist_cfg2_state_limit_min_gpm_fifo_depths_v(); | ||
| 321 | gr->bundle_cb_token_limit = | ||
| 322 | gr_pd_ab_dist_cfg2_token_limit_init_v(); | ||
| 323 | } | ||
| 324 | |||
| 325 | void gr_tu104_cb_size_default(struct gk20a *g) | ||
| 326 | { | ||
| 327 | struct gr_gk20a *gr = &g->gr; | ||
| 328 | |||
| 329 | if (gr->attrib_cb_default_size == 0U) { | ||
| 330 | gr->attrib_cb_default_size = | ||
| 331 | gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v(); | ||
| 332 | } | ||
| 333 | gr->alpha_cb_default_size = | ||
| 334 | gr_gpc0_ppc0_cbm_alpha_cb_size_v_default_v(); | ||
| 335 | gr->attrib_cb_gfxp_default_size = | ||
| 336 | gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v(); | ||
| 337 | gr->attrib_cb_gfxp_size = | ||
| 338 | gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v(); | ||
| 339 | } | ||
| 340 | |||
| 341 | void gr_tu104_free_gr_ctx(struct gk20a *g, | ||
| 342 | struct vm_gk20a *vm, struct nvgpu_gr_ctx *gr_ctx) | ||
| 343 | { | ||
| 344 | nvgpu_log_fn(g, " "); | ||
| 345 | |||
| 346 | if (gr_ctx != NULL) { | ||
| 347 | nvgpu_dma_unmap_free(vm, &gr_ctx->gfxp_rtvcb_ctxsw_buffer); | ||
| 348 | } | ||
| 349 | |||
| 350 | gr_gk20a_free_gr_ctx(g, vm, gr_ctx); | ||
| 351 | } | ||
| 352 | |||
| 353 | void gr_tu104_enable_gpc_exceptions(struct gk20a *g) | ||
| 354 | { | ||
| 355 | struct gr_gk20a *gr = &g->gr; | ||
| 356 | u32 tpc_mask; | ||
| 357 | |||
| 358 | gk20a_writel(g, gr_gpcs_tpcs_tpccs_tpc_exception_en_r(), | ||
| 359 | gr_gpcs_tpcs_tpccs_tpc_exception_en_sm_enabled_f()); | ||
| 360 | |||
| 361 | tpc_mask = | ||
| 362 | gr_gpcs_gpccs_gpc_exception_en_tpc_f((1 << gr->max_tpc_per_gpc_count) - 1); | ||
| 363 | |||
| 364 | gk20a_writel(g, gr_gpcs_gpccs_gpc_exception_en_r(), | ||
| 365 | (tpc_mask | gr_gpcs_gpccs_gpc_exception_en_gcc_f(1) | | ||
| 366 | gr_gpcs_gpccs_gpc_exception_en_gpccs_f(1) | | ||
| 367 | gr_gpcs_gpccs_gpc_exception_en_gpcmmu_f(1))); | ||
| 368 | } | ||
| 369 | |||
| 370 | int gr_tu104_get_offset_in_gpccs_segment(struct gk20a *g, | ||
| 371 | enum ctxsw_addr_type addr_type, | ||
| 372 | u32 num_tpcs, | ||
| 373 | u32 num_ppcs, | ||
| 374 | u32 reg_list_ppc_count, | ||
| 375 | u32 *__offset_in_segment) | ||
| 376 | { | ||
| 377 | u32 offset_in_segment = 0; | ||
| 378 | u32 num_pes_per_gpc = nvgpu_get_litter_value(g, | ||
| 379 | GPU_LIT_NUM_PES_PER_GPC); | ||
| 380 | |||
| 381 | if (addr_type == CTXSW_ADDR_TYPE_TPC) { | ||
| 382 | /* | ||
| 383 | * reg = g->netlist_vars->ctxsw_regs.tpc.l; | ||
| 384 | * offset_in_segment = 0; | ||
| 385 | */ | ||
| 386 | } else if (addr_type == CTXSW_ADDR_TYPE_PPC) { | ||
| 387 | /* | ||
| 388 | * The ucode stores TPC data before PPC data. | ||
| 389 | * Advance offset past TPC data to PPC data. | ||
| 390 | */ | ||
| 391 | offset_in_segment = | ||
| 392 | ((g->netlist_vars->ctxsw_regs.tpc.count * | ||
| 393 | num_tpcs) << 2); | ||
| 394 | } else if (addr_type == CTXSW_ADDR_TYPE_GPC) { | ||
| 395 | /* | ||
| 396 | * The ucode stores TPC/PPC data before GPC data. | ||
| 397 | * Advance offset past TPC/PPC data to GPC data. | ||
| 398 | * | ||
| 399 | * Note 1 PES_PER_GPC case | ||
| 400 | */ | ||
| 401 | if (num_pes_per_gpc > 1U) { | ||
| 402 | offset_in_segment = | ||
| 403 | (((g->netlist_vars->ctxsw_regs.tpc.count * | ||
| 404 | num_tpcs) << 2) + | ||
| 405 | ((reg_list_ppc_count * num_ppcs) << 2)); | ||
| 406 | } else { | ||
| 407 | offset_in_segment = | ||
| 408 | ((g->netlist_vars->ctxsw_regs.tpc.count * | ||
| 409 | num_tpcs) << 2); | ||
| 410 | } | ||
| 411 | } else if ((addr_type == CTXSW_ADDR_TYPE_EGPC) || | ||
| 412 | (addr_type == CTXSW_ADDR_TYPE_ETPC)) { | ||
| 413 | if (num_pes_per_gpc > 1U) { | ||
| 414 | offset_in_segment = | ||
| 415 | ((g->netlist_vars->ctxsw_regs.tpc.count * | ||
| 416 | num_tpcs) << 2) + | ||
| 417 | ((reg_list_ppc_count * num_ppcs) << 2) + | ||
| 418 | (g->netlist_vars->ctxsw_regs.gpc.count << 2); | ||
| 419 | } else { | ||
| 420 | offset_in_segment = | ||
| 421 | ((g->netlist_vars->ctxsw_regs.tpc.count * | ||
| 422 | num_tpcs) << 2) + | ||
| 423 | (g->netlist_vars->ctxsw_regs.gpc.count << 2); | ||
| 424 | } | ||
| 425 | |||
| 426 | /* aligned to next 256 byte */ | ||
| 427 | offset_in_segment = ALIGN(offset_in_segment, 256); | ||
| 428 | |||
| 429 | nvgpu_log(g, gpu_dbg_info | gpu_dbg_gpu_dbg, | ||
| 430 | "egpc etpc offset_in_segment 0x%#08x", | ||
| 431 | offset_in_segment); | ||
| 432 | } else { | ||
| 433 | nvgpu_log_fn(g, "Unknown address type."); | ||
| 434 | return -EINVAL; | ||
| 435 | } | ||
| 436 | |||
| 437 | *__offset_in_segment = offset_in_segment; | ||
| 438 | return 0; | ||
| 439 | } | ||
| 440 | |||
| 441 | static void gr_tu104_set_sm_disp_ctrl(struct gk20a *g, u32 data) | ||
| 442 | { | ||
| 443 | u32 reg_val; | ||
| 444 | |||
| 445 | nvgpu_log_fn(g, " "); | ||
| 446 | |||
| 447 | reg_val = nvgpu_readl(g, gr_gpcs_tpcs_sm_disp_ctrl_r()); | ||
| 448 | |||
| 449 | if ((data & NVC5C0_SET_SM_DISP_CTRL_COMPUTE_SHADER_QUAD_MASK) | ||
| 450 | == NVC5C0_SET_SM_DISP_CTRL_COMPUTE_SHADER_QUAD_DISABLE) { | ||
| 451 | reg_val = set_field(reg_val, | ||
| 452 | gr_gpcs_tpcs_sm_disp_ctrl_compute_shader_quad_m(), | ||
| 453 | gr_gpcs_tpcs_sm_disp_ctrl_compute_shader_quad_disable_f() | ||
| 454 | ); | ||
| 455 | } else if ((data & NVC5C0_SET_SM_DISP_CTRL_COMPUTE_SHADER_QUAD_MASK) | ||
| 456 | == NVC5C0_SET_SM_DISP_CTRL_COMPUTE_SHADER_QUAD_ENABLE) { | ||
| 457 | reg_val = set_field(reg_val, | ||
| 458 | gr_gpcs_tpcs_sm_disp_ctrl_compute_shader_quad_m(), | ||
| 459 | gr_gpcs_tpcs_sm_disp_ctrl_compute_shader_quad_enable_f() | ||
| 460 | ); | ||
| 461 | } | ||
| 462 | |||
| 463 | nvgpu_writel(g, gr_gpcs_tpcs_sm_disp_ctrl_r(), reg_val); | ||
| 464 | } | ||
| 465 | |||
| 466 | int gr_tu104_handle_sw_method(struct gk20a *g, u32 addr, | ||
| 467 | u32 class_num, u32 offset, u32 data) | ||
| 468 | { | ||
| 469 | nvgpu_log_fn(g, " "); | ||
| 470 | |||
| 471 | if (class_num == TURING_COMPUTE_A) { | ||
| 472 | switch (offset << 2) { | ||
| 473 | case NVC5C0_SET_SHADER_EXCEPTIONS: | ||
| 474 | gv11b_gr_set_shader_exceptions(g, data); | ||
| 475 | break; | ||
| 476 | case NVC5C0_SET_SKEDCHECK: | ||
| 477 | gr_gv11b_set_skedcheck(g, data); | ||
| 478 | break; | ||
| 479 | case NVC5C0_SET_SM_DISP_CTRL: | ||
| 480 | gr_tu104_set_sm_disp_ctrl(g, data); | ||
| 481 | break; | ||
| 482 | case NVC5C0_SET_SHADER_CUT_COLLECTOR: | ||
| 483 | gr_gv11b_set_shader_cut_collector(g, data); | ||
| 484 | break; | ||
| 485 | default: | ||
| 486 | goto fail; | ||
| 487 | } | ||
| 488 | } | ||
| 489 | |||
| 490 | if (class_num == TURING_A) { | ||
| 491 | switch (offset << 2) { | ||
| 492 | case NVC597_SET_SHADER_EXCEPTIONS: | ||
| 493 | gv11b_gr_set_shader_exceptions(g, data); | ||
| 494 | break; | ||
| 495 | case NVC597_SET_CIRCULAR_BUFFER_SIZE: | ||
| 496 | g->ops.gr.set_circular_buffer_size(g, data); | ||
| 497 | break; | ||
| 498 | case NVC597_SET_ALPHA_CIRCULAR_BUFFER_SIZE: | ||
| 499 | g->ops.gr.set_alpha_circular_buffer_size(g, data); | ||
| 500 | break; | ||
| 501 | case NVC597_SET_GO_IDLE_TIMEOUT: | ||
| 502 | gr_gv11b_set_go_idle_timeout(g, data); | ||
| 503 | break; | ||
| 504 | case NVC097_SET_COALESCE_BUFFER_SIZE: | ||
| 505 | gr_gv11b_set_coalesce_buffer_size(g, data); | ||
| 506 | break; | ||
| 507 | case NVC597_SET_TEX_IN_DBG: | ||
| 508 | gr_gv11b_set_tex_in_dbg(g, data); | ||
| 509 | break; | ||
| 510 | case NVC597_SET_SKEDCHECK: | ||
| 511 | gr_gv11b_set_skedcheck(g, data); | ||
| 512 | break; | ||
| 513 | case NVC597_SET_BES_CROP_DEBUG3: | ||
| 514 | g->ops.gr.set_bes_crop_debug3(g, data); | ||
| 515 | break; | ||
| 516 | case NVC597_SET_BES_CROP_DEBUG4: | ||
| 517 | g->ops.gr.set_bes_crop_debug4(g, data); | ||
| 518 | break; | ||
| 519 | case NVC597_SET_SM_DISP_CTRL: | ||
| 520 | gr_tu104_set_sm_disp_ctrl(g, data); | ||
| 521 | break; | ||
| 522 | case NVC597_SET_SHADER_CUT_COLLECTOR: | ||
| 523 | gr_gv11b_set_shader_cut_collector(g, data); | ||
| 524 | break; | ||
| 525 | default: | ||
| 526 | goto fail; | ||
| 527 | } | ||
| 528 | } | ||
| 529 | return 0; | ||
| 530 | |||
| 531 | fail: | ||
| 532 | return -EINVAL; | ||
| 533 | } | ||
| 534 | |||
| 535 | void gr_tu104_init_sm_dsm_reg_info(void) | ||
| 536 | { | ||
| 537 | return; | ||
| 538 | } | ||
| 539 | |||
| 540 | void gr_tu104_get_sm_dsm_perf_ctrl_regs(struct gk20a *g, | ||
| 541 | u32 *num_sm_dsm_perf_ctrl_regs, | ||
| 542 | u32 **sm_dsm_perf_ctrl_regs, | ||
| 543 | u32 *ctrl_register_stride) | ||
| 544 | { | ||
| 545 | *num_sm_dsm_perf_ctrl_regs = 0; | ||
| 546 | *sm_dsm_perf_ctrl_regs = NULL; | ||
| 547 | *ctrl_register_stride = 0; | ||
| 548 | } | ||
| 549 | >>>>>>> CHANGE (f0762e gpu: nvgpu: add speculative barrier) | ||
