summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c265
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.c20
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.h1
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_gk20a.c13
-rw-r--r--drivers/gpu/nvgpu/gk20a/gr_gk20a.h7
-rw-r--r--drivers/gpu/nvgpu/gk20a/hw_fb_gk20a.h10
-rw-r--r--drivers/gpu/nvgpu/gk20a/hw_gr_gk20a.h72
-rw-r--r--drivers/gpu/nvgpu/gm20b/gr_gm20b.c5
-rw-r--r--drivers/gpu/nvgpu/gm20b/hw_fb_gm20b.h8
-rw-r--r--drivers/gpu/nvgpu/gm20b/hw_gr_gm20b.h68
-rw-r--r--include/uapi/linux/nvgpu.h42
11 files changed, 506 insertions, 5 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
index 7b617a03..5df420ff 100644
--- a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
@@ -20,9 +20,16 @@
20#include <linux/anon_inodes.h> 20#include <linux/anon_inodes.h>
21#include <linux/nvgpu.h> 21#include <linux/nvgpu.h>
22#include <uapi/linux/nvgpu.h> 22#include <uapi/linux/nvgpu.h>
23#include <linux/delay.h>
23 24
24#include "gk20a.h" 25#include "gk20a.h"
26#include "gr_gk20a.h"
25#include "fence_gk20a.h" 27#include "fence_gk20a.h"
28#include "regops_gk20a.h"
29#include "hw_gr_gk20a.h"
30#include "hw_fb_gk20a.h"
31#include "hw_proj_gk20a.h"
32
26 33
27int gk20a_ctrl_dev_open(struct inode *inode, struct file *filp) 34int gk20a_ctrl_dev_open(struct inode *inode, struct file *filp)
28{ 35{
@@ -257,6 +264,238 @@ static int nvgpu_gpu_ioctl_l2_fb_ops(struct gk20a *g,
257 return err; 264 return err;
258} 265}
259 266
267/* Invalidate i-cache for kepler & maxwell */
268static int nvgpu_gpu_ioctl_inval_icache(
269 struct gk20a *g,
270 struct nvgpu_gpu_inval_icache_args *args)
271{
272
273 int err = 0;
274 u32 cache_ctrl, regval;
275 struct channel_gk20a *ch;
276 struct nvgpu_dbg_gpu_reg_op ops;
277
278 ch = gk20a_get_channel_from_file(args->channel_fd);
279
280 ops.op = REGOP(READ_32);
281 ops.type = REGOP(TYPE_GR_CTX);
282 ops.status = REGOP(STATUS_SUCCESS);
283 ops.value_hi = 0;
284 ops.and_n_mask_lo = 0;
285 ops.and_n_mask_hi = 0;
286 ops.offset = gr_pri_gpc0_gcc_dbg_r();
287
288 /* Take the global lock, since we'll be doing global regops */
289 mutex_lock(&g->dbg_sessions_lock);
290
291 err = gr_gk20a_exec_ctx_ops(ch, &ops, 1, 0, 1);
292
293 regval = ops.value_lo;
294
295 if (!err) {
296 ops.op = REGOP(WRITE_32);
297 ops.value_lo = set_field(regval, gr_pri_gpcs_gcc_dbg_invalidate_m(), 1);
298 err = gr_gk20a_exec_ctx_ops(ch, &ops, 1, 1, 0);
299 }
300
301 if (err) {
302 gk20a_err(dev_from_gk20a(g), "Failed to access register\n");
303 goto end;
304 }
305
306 cache_ctrl = gk20a_readl(g, gr_pri_gpc0_tpc0_sm_cache_control_r());
307 cache_ctrl = set_field(cache_ctrl, gr_pri_gpcs_tpcs_sm_cache_control_invalidate_cache_m(), 1);
308 gk20a_writel(g, gr_pri_gpc0_tpc0_sm_cache_control_r(), cache_ctrl);
309
310end:
311 mutex_unlock(&g->dbg_sessions_lock);
312 return err;
313}
314
315static int nvgpu_gpu_ioctl_set_mmu_debug_mode(
316 struct gk20a *g,
317 struct nvgpu_gpu_mmu_debug_mode_args *args)
318{
319 int err = 0;
320 u32 mmu_debug_ctrl;
321
322 err = gk20a_busy(g->dev);
323 if (err) {
324 gk20a_err(dev_from_gk20a(g), "failed to power on gpu\n");
325 return -EINVAL;
326 }
327
328 mutex_lock(&g->dbg_sessions_lock);
329
330 if (args->state == 1) {
331 mmu_debug_ctrl = fb_mmu_debug_ctrl_debug_enabled_v();
332 g->mmu_debug_ctrl = true;
333 } else {
334 mmu_debug_ctrl = fb_mmu_debug_ctrl_debug_disabled_v();
335 g->mmu_debug_ctrl = false;
336 }
337
338 mmu_debug_ctrl = gk20a_readl(g, fb_mmu_debug_ctrl_r());
339 mmu_debug_ctrl = set_field(mmu_debug_ctrl, fb_mmu_debug_ctrl_debug_m(), mmu_debug_ctrl);
340 gk20a_writel(g, fb_mmu_debug_ctrl_r(), mmu_debug_ctrl);
341
342 mutex_unlock(&g->dbg_sessions_lock);
343 gk20a_idle(g->dev);
344 return err;
345}
346
347static int nvgpu_gpu_ioctl_set_debug_mode(
348 struct gk20a *g,
349 struct nvgpu_gpu_sm_debug_mode_args *args)
350{
351 int gpc, tpc, err = 0;
352 u32 sm_id, sm_dbgr_ctrl0;
353 struct channel_gk20a *ch;
354 struct nvgpu_dbg_gpu_reg_op ops;
355 u32 tpc_offset, gpc_offset, reg_offset;
356
357 ch = gk20a_get_channel_from_file(args->channel_fd);
358
359 mutex_lock(&g->dbg_sessions_lock);
360
361 for (sm_id = 0; sm_id < g->gr.no_of_sm; sm_id++) {
362 if (args->sms & (1 << sm_id)) {
363 gpc = g->gr.sm_to_cluster[sm_id].gpc_index;
364 tpc = g->gr.sm_to_cluster[sm_id].tpc_index;
365
366 tpc_offset = proj_tpc_in_gpc_stride_v() * tpc;
367 gpc_offset = proj_gpc_stride_v() * gpc;
368 reg_offset = tpc_offset + gpc_offset;
369
370 ops.op = REGOP(READ_32);
371 ops.type = REGOP(TYPE_GR_CTX);
372 ops.status = REGOP(STATUS_SUCCESS);
373 ops.value_hi = 0;
374 ops.and_n_mask_lo = 0;
375 ops.and_n_mask_hi = 0;
376 ops.offset = gr_gpc0_tpc0_sm_dbgr_control0_r() + reg_offset;
377
378 err = gr_gk20a_exec_ctx_ops(ch, &ops, 1, 0, 1);
379 sm_dbgr_ctrl0 = ops.value_lo;
380
381 if (args->enable) {
382 sm_dbgr_ctrl0 = gr_gpc0_tpc0_sm_dbgr_control0_debugger_mode_on_v() |
383 gr_gpc0_tpc0_sm_dbgr_control0_stop_on_any_warp_disable_f() |
384 gr_gpc0_tpc0_sm_dbgr_control0_stop_on_any_sm_disable_f() |
385 sm_dbgr_ctrl0;
386 } else
387 sm_dbgr_ctrl0 = gr_gpc0_tpc0_sm_dbgr_control0_debugger_mode_off_v() | sm_dbgr_ctrl0;
388
389 if (!err) {
390 ops.op = REGOP(WRITE_32);
391 ops.value_lo = sm_dbgr_ctrl0;
392 err = gr_gk20a_exec_ctx_ops(ch, &ops, 1, 1, 0);
393 } else
394 gk20a_err(dev_from_gk20a(g), "Failed to access register\n");
395 }
396 }
397
398 mutex_unlock(&g->dbg_sessions_lock);
399 return err;
400}
401
402static int nvgpu_gpu_ioctl_wait_for_pause(
403 struct gk20a *g,
404 struct nvgpu_gpu_wait_pause_args *args)
405{
406 int err = 0, gpc, tpc;
407 u32 sm_count, sm_id, size;
408 struct warpstate *w_state;
409 struct gr_gk20a *gr = &g->gr;
410 u32 tpc_offset, gpc_offset, reg_offset, global_mask;
411 u64 warps_valid = 0, warps_paused = 0, warps_trapped = 0;
412
413 sm_count = g->gr.gpc_count * g->gr.tpc_count;
414 size = sm_count * sizeof(struct warpstate);
415 w_state = kzalloc(size, GFP_KERNEL);
416
417 global_mask = gr_gpc0_tpc0_sm_hww_global_esr_bpt_int_pending_f() |
418 gr_gpc0_tpc0_sm_hww_global_esr_bpt_pause_pending_f() |
419 gr_gpc0_tpc0_sm_hww_global_esr_single_step_complete_pending_f();
420
421 mutex_lock(&g->dbg_sessions_lock);
422
423 for (sm_id = 0; sm_id < gr->no_of_sm; sm_id++) {
424
425 gpc = g->gr.sm_to_cluster[sm_id].gpc_index;
426 tpc = g->gr.sm_to_cluster[sm_id].tpc_index;
427
428 tpc_offset = proj_tpc_in_gpc_stride_v() * tpc;
429 gpc_offset = proj_gpc_stride_v() * gpc;
430 reg_offset = tpc_offset + gpc_offset;
431
432 /* Wait until all valid warps on the sm are paused. The valid warp mask
433 * must be re-read with the paused mask because new warps may become
434 * valid as the sm is pausing.
435 */
436
437 err = gk20a_gr_lock_down_sm(g, gpc, tpc, global_mask);
438 if (err) {
439 gk20a_err(dev_from_gk20a(g), "sm did not lock down!\n");
440 goto end;
441 }
442
443 /* 64 bit read */
444 warps_valid = (u64)gk20a_readl(g, gr_gpc0_tpc0_sm_warp_valid_mask_r() + reg_offset) << 32;
445 warps_valid |= gk20a_readl(g, gr_gpc0_tpc0_sm_warp_valid_mask_r() + reg_offset + 4);
446
447 /* 64 bit read */
448 warps_paused = (u64)gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_pause_mask_r() + reg_offset) << 32;
449 warps_paused |= gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_pause_mask_r() + reg_offset + 4);
450
451 /* 64 bit read */
452 warps_trapped = (u64)gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_r() + reg_offset) << 32;
453 warps_trapped |= gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_r() + reg_offset + 4);
454
455 w_state[sm_id].valid_warps = warps_valid;
456 w_state[sm_id].trapped_warps = warps_trapped;
457 w_state[sm_id].paused_warps = warps_paused;
458 }
459
460 if (copy_to_user((void __user *)(uintptr_t)args->pwarpstate, w_state, size)) {
461 gk20a_dbg_fn("copy_to_user failed!");
462 err = -EFAULT;
463 }
464
465end:
466 mutex_unlock(&g->dbg_sessions_lock);
467 kfree(w_state);
468 return err;
469}
470
471static int nvgpu_gpu_ioctl_has_any_exception(
472 struct gk20a *g,
473 struct nvgpu_gpu_tpc_exception_en_status_args *args)
474{
475 int err = 0;
476 struct gr_gk20a *gr = &g->gr;
477 u32 sm_id, tpc_exception_en = 0;
478 u32 offset, regval, tpc_offset, gpc_offset;
479
480 mutex_lock(&g->dbg_sessions_lock);
481
482 for (sm_id = 0; sm_id < gr->no_of_sm; sm_id++) {
483
484 tpc_offset = proj_tpc_in_gpc_stride_v() * g->gr.sm_to_cluster[sm_id].tpc_index;
485 gpc_offset = proj_gpc_stride_v() * g->gr.sm_to_cluster[sm_id].gpc_index;
486 offset = tpc_offset + gpc_offset;
487
488 regval = gk20a_readl(g, gr_gpc0_tpc0_tpccs_tpc_exception_en_r() +
489 offset);
490 /* Each bit represents corresponding enablement state, bit 0 corrsponds to SM0 */
491 tpc_exception_en |= gr_gpc0_tpc0_tpccs_tpc_exception_en_sm_v(regval) << sm_id;
492 }
493
494 mutex_unlock(&g->dbg_sessions_lock);
495 args->tpc_exception_en_sm_mask = tpc_exception_en;
496 return err;
497}
498
260long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) 499long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
261{ 500{
262 struct platform_device *dev = filp->private_data; 501 struct platform_device *dev = filp->private_data;
@@ -441,6 +680,31 @@ long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg
441 err = nvgpu_gpu_ioctl_l2_fb_ops(g, 680 err = nvgpu_gpu_ioctl_l2_fb_ops(g,
442 (struct nvgpu_gpu_l2_fb_args *)buf); 681 (struct nvgpu_gpu_l2_fb_args *)buf);
443 break; 682 break;
683 case NVGPU_GPU_IOCTL_INVAL_ICACHE:
684 err = gr_gk20a_elpg_protected_call(g,
685 nvgpu_gpu_ioctl_inval_icache(g, (struct nvgpu_gpu_inval_icache_args *)buf));
686 break;
687
688 case NVGPU_GPU_IOCTL_SET_MMUDEBUG_MODE:
689 err = nvgpu_gpu_ioctl_set_mmu_debug_mode(g,
690 (struct nvgpu_gpu_mmu_debug_mode_args *)buf);
691 break;
692
693 case NVGPU_GPU_IOCTL_SET_SM_DEBUG_MODE:
694 err = gr_gk20a_elpg_protected_call(g,
695 nvgpu_gpu_ioctl_set_debug_mode(g, (struct nvgpu_gpu_sm_debug_mode_args *)buf));
696 break;
697
698 case NVGPU_GPU_IOCTL_WAIT_FOR_PAUSE:
699 err = nvgpu_gpu_ioctl_wait_for_pause(g,
700 (struct nvgpu_gpu_wait_pause_args *)buf);
701 break;
702
703 case NVGPU_GPU_IOCTL_GET_TPC_EXCEPTION_EN_STATUS:
704 err = nvgpu_gpu_ioctl_has_any_exception(g,
705 (struct nvgpu_gpu_tpc_exception_en_status_args *)buf);
706 break;
707
444 default: 708 default:
445 dev_dbg(dev_from_gk20a(g), "unrecognized gpu ioctl cmd: 0x%x", cmd); 709 dev_dbg(dev_from_gk20a(g), "unrecognized gpu ioctl cmd: 0x%x", cmd);
446 err = -ENOTTY; 710 err = -ENOTTY;
@@ -452,4 +716,3 @@ long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg
452 716
453 return err; 717 return err;
454} 718}
455
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c
index 67aa49c6..3389aca5 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.c
@@ -56,6 +56,8 @@
56#include "hw_sim_gk20a.h" 56#include "hw_sim_gk20a.h"
57#include "hw_top_gk20a.h" 57#include "hw_top_gk20a.h"
58#include "hw_ltc_gk20a.h" 58#include "hw_ltc_gk20a.h"
59#include "hw_gr_gk20a.h"
60#include "hw_fb_gk20a.h"
59#include "gk20a_scale.h" 61#include "gk20a_scale.h"
60#include "dbg_gpu_gk20a.h" 62#include "dbg_gpu_gk20a.h"
61#include "hal.h" 63#include "hal.h"
@@ -727,6 +729,21 @@ static int gk20a_detect_chip(struct gk20a *g)
727 return gpu_init_hal(g); 729 return gpu_init_hal(g);
728} 730}
729 731
732void gk20a_pm_restore_debug_setting(struct gk20a *g)
733{
734 u32 mmu_debug_ctrl;
735
736 /* restore mmu debug state */
737 if (g->mmu_debug_ctrl)
738 mmu_debug_ctrl = fb_mmu_debug_ctrl_debug_enabled_v();
739 else
740 mmu_debug_ctrl = fb_mmu_debug_ctrl_debug_disabled_v();
741
742 mmu_debug_ctrl = gk20a_readl(g, fb_mmu_debug_ctrl_r());
743 mmu_debug_ctrl = set_field(mmu_debug_ctrl, fb_mmu_debug_ctrl_debug_m(), mmu_debug_ctrl);
744 gk20a_writel(g, fb_mmu_debug_ctrl_r(), mmu_debug_ctrl);
745}
746
730static int gk20a_pm_finalize_poweron(struct device *dev) 747static int gk20a_pm_finalize_poweron(struct device *dev)
731{ 748{
732 struct platform_device *pdev = to_platform_device(dev); 749 struct platform_device *pdev = to_platform_device(dev);
@@ -851,6 +868,9 @@ static int gk20a_pm_finalize_poweron(struct device *dev)
851 goto done; 868 goto done;
852 } 869 }
853 870
871 /* Restore the debug setting */
872 gk20a_pm_restore_debug_setting(g);
873
854 gk20a_channel_resume(g); 874 gk20a_channel_resume(g);
855 set_user_nice(current, nice_value); 875 set_user_nice(current, nice_value);
856 876
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index b9bdc6e6..bcea5655 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -511,6 +511,7 @@ struct gk20a {
511 struct device_dma_parameters dma_parms; 511 struct device_dma_parameters dma_parms;
512 512
513 struct gk20a_cde_app cde_app; 513 struct gk20a_cde_app cde_app;
514 bool mmu_debug_ctrl;
514}; 515};
515 516
516static inline unsigned long gk20a_get_gr_idle_timeout(struct gk20a *g) 517static inline unsigned long gk20a_get_gr_idle_timeout(struct gk20a *g)
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index 4217658c..7e8d4e13 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -1273,7 +1273,6 @@ static int gr_gk20a_ctx_state_floorsweep(struct gk20a *g)
1273 u32 tpc_index, gpc_index; 1273 u32 tpc_index, gpc_index;
1274 u32 tpc_offset, gpc_offset; 1274 u32 tpc_offset, gpc_offset;
1275 u32 sm_id = 0, gpc_id = 0; 1275 u32 sm_id = 0, gpc_id = 0;
1276 u32 sm_id_to_gpc_id[proj_scal_max_gpcs_v() * proj_scal_max_tpc_per_gpc_v()];
1277 u32 tpc_per_gpc; 1276 u32 tpc_per_gpc;
1278 u32 max_ways_evict = INVALID_MAX_WAYS; 1277 u32 max_ways_evict = INVALID_MAX_WAYS;
1279 u32 l1c_dbg_reg_val; 1278 u32 l1c_dbg_reg_val;
@@ -1295,7 +1294,9 @@ static int gr_gk20a_ctx_state_floorsweep(struct gk20a *g)
1295 gk20a_writel(g, gr_gpc0_tpc0_pe_cfg_smid_r() + gpc_offset + tpc_offset, 1294 gk20a_writel(g, gr_gpc0_tpc0_pe_cfg_smid_r() + gpc_offset + tpc_offset,
1296 gr_gpc0_tpc0_pe_cfg_smid_value_f(sm_id)); 1295 gr_gpc0_tpc0_pe_cfg_smid_value_f(sm_id));
1297 1296
1298 sm_id_to_gpc_id[sm_id] = gpc_index; 1297 g->gr.sm_to_cluster[sm_id].tpc_index = tpc_index;
1298 g->gr.sm_to_cluster[sm_id].gpc_index = gpc_index;
1299
1299 sm_id++; 1300 sm_id++;
1300 } 1301 }
1301 1302
@@ -1306,6 +1307,8 @@ static int gr_gk20a_ctx_state_floorsweep(struct gk20a *g)
1306 } 1307 }
1307 } 1308 }
1308 1309
1310 gr->no_of_sm = sm_id;
1311
1309 for (tpc_index = 0, gpc_id = 0; 1312 for (tpc_index = 0, gpc_id = 0;
1310 tpc_index < gr_pd_num_tpc_per_gpc__size_1_v(); 1313 tpc_index < gr_pd_num_tpc_per_gpc__size_1_v();
1311 tpc_index++, gpc_id += 8) { 1314 tpc_index++, gpc_id += 8) {
@@ -2997,6 +3000,7 @@ static void gk20a_remove_gr_support(struct gr_gk20a *gr)
2997 kfree(gr->pes_tpc_count[1]); 3000 kfree(gr->pes_tpc_count[1]);
2998 kfree(gr->pes_tpc_mask[0]); 3001 kfree(gr->pes_tpc_mask[0]);
2999 kfree(gr->pes_tpc_mask[1]); 3002 kfree(gr->pes_tpc_mask[1]);
3003 kfree(gr->sm_to_cluster);
3000 kfree(gr->gpc_skip_mask); 3004 kfree(gr->gpc_skip_mask);
3001 kfree(gr->map_tiles); 3005 kfree(gr->map_tiles);
3002 gr->gpc_tpc_count = NULL; 3006 gr->gpc_tpc_count = NULL;
@@ -3089,6 +3093,7 @@ static int gr_gk20a_init_gr_config(struct gk20a *g, struct gr_gk20a *gr)
3089 gr->pes_tpc_count[1] = kzalloc(gr->gpc_count * sizeof(u32), GFP_KERNEL); 3093 gr->pes_tpc_count[1] = kzalloc(gr->gpc_count * sizeof(u32), GFP_KERNEL);
3090 gr->pes_tpc_mask[0] = kzalloc(gr->gpc_count * sizeof(u32), GFP_KERNEL); 3094 gr->pes_tpc_mask[0] = kzalloc(gr->gpc_count * sizeof(u32), GFP_KERNEL);
3091 gr->pes_tpc_mask[1] = kzalloc(gr->gpc_count * sizeof(u32), GFP_KERNEL); 3095 gr->pes_tpc_mask[1] = kzalloc(gr->gpc_count * sizeof(u32), GFP_KERNEL);
3096
3092 gr->gpc_skip_mask = 3097 gr->gpc_skip_mask =
3093 kzalloc(gr_pd_dist_skip_table__size_1_v() * 4 * sizeof(u32), 3098 kzalloc(gr_pd_dist_skip_table__size_1_v() * 4 * sizeof(u32),
3094 GFP_KERNEL); 3099 GFP_KERNEL);
@@ -3159,6 +3164,10 @@ static int gr_gk20a_init_gr_config(struct gk20a *g, struct gr_gk20a *gr)
3159 gr->gpc_skip_mask[gpc_index] = gpc_new_skip_mask; 3164 gr->gpc_skip_mask[gpc_index] = gpc_new_skip_mask;
3160 } 3165 }
3161 3166
3167 gr->sm_to_cluster = kzalloc(gr->gpc_count * gr->tpc_count *
3168 sizeof(struct sm_info), GFP_KERNEL);
3169 gr->no_of_sm = 0;
3170
3162 gk20a_dbg_info("fbps: %d", gr->num_fbps); 3171 gk20a_dbg_info("fbps: %d", gr->num_fbps);
3163 gk20a_dbg_info("max_gpc_count: %d", gr->max_gpc_count); 3172 gk20a_dbg_info("max_gpc_count: %d", gr->max_gpc_count);
3164 gk20a_dbg_info("max_fbps_count: %d", gr->max_fbps_count); 3173 gk20a_dbg_info("max_fbps_count: %d", gr->max_fbps_count);
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
index deafc438..6cabe526 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
@@ -165,6 +165,11 @@ struct zbc_query_params {
165 u32 index_size; /* [out] size, [in] index */ 165 u32 index_size; /* [out] size, [in] index */
166}; 166};
167 167
168struct sm_info {
169 u8 gpc_index;
170 u8 tpc_index;
171};
172
168struct gr_gk20a { 173struct gr_gk20a {
169 struct gk20a *g; 174 struct gk20a *g;
170 struct { 175 struct {
@@ -290,6 +295,8 @@ struct gr_gk20a {
290#ifdef CONFIG_ARCH_TEGRA_18x_SOC 295#ifdef CONFIG_ARCH_TEGRA_18x_SOC
291 struct gr_t18x t18x; 296 struct gr_t18x t18x;
292#endif 297#endif
298 u32 no_of_sm;
299 struct sm_info *sm_to_cluster;
293}; 300};
294 301
295void gk20a_fecs_dump_falcon_stats(struct gk20a *g); 302void gk20a_fecs_dump_falcon_stats(struct gk20a *g);
diff --git a/drivers/gpu/nvgpu/gk20a/hw_fb_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_fb_gk20a.h
index 6b8b6718..a0a3ae33 100644
--- a/drivers/gpu/nvgpu/gk20a/hw_fb_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/hw_fb_gk20a.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2012-2014, NVIDIA CORPORATION. All rights reserved. 2 * Copyright (c) 2012-2015, NVIDIA CORPORATION. All rights reserved.
3 * 3 *
4 * This program is free software; you can redistribute it and/or modify it 4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License, 5 * under the terms and conditions of the GNU General Public License,
@@ -194,10 +194,18 @@ static inline u32 fb_mmu_debug_ctrl_debug_v(u32 r)
194{ 194{
195 return (r >> 16) & 0x1; 195 return (r >> 16) & 0x1;
196} 196}
197static inline u32 fb_mmu_debug_ctrl_debug_m(void)
198{
199 return 0x1 << 16;
200}
197static inline u32 fb_mmu_debug_ctrl_debug_enabled_v(void) 201static inline u32 fb_mmu_debug_ctrl_debug_enabled_v(void)
198{ 202{
199 return 0x00000001; 203 return 0x00000001;
200} 204}
205static inline u32 fb_mmu_debug_ctrl_debug_disabled_v(void)
206{
207 return 0x00000000;
208}
201static inline u32 fb_mmu_vpr_info_r(void) 209static inline u32 fb_mmu_vpr_info_r(void)
202{ 210{
203 return 0x00100cd0; 211 return 0x00100cd0;
diff --git a/drivers/gpu/nvgpu/gk20a/hw_gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_gr_gk20a.h
index 8fe75614..8a6c2f23 100644
--- a/drivers/gpu/nvgpu/gk20a/hw_gr_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/hw_gr_gk20a.h
@@ -342,6 +342,30 @@ static inline u32 gr_activity_4_r(void)
342{ 342{
343 return 0x00400390; 343 return 0x00400390;
344} 344}
345static inline u32 gr_pri_gpc0_gcc_dbg_r(void)
346{
347 return 0x00501000;
348}
349static inline u32 gr_pri_gpcs_gcc_dbg_r(void)
350{
351 return 0x00419000;
352}
353static inline u32 gr_pri_gpcs_gcc_dbg_invalidate_m(void)
354{
355 return 0x1 << 1;
356}
357static inline u32 gr_pri_gpc0_tpc0_sm_cache_control_r(void)
358{
359 return 0x005046a4;
360}
361static inline u32 gr_pri_gpcs_tpcs_sm_cache_control_r(void)
362{
363 return 0x00419ea4;
364}
365static inline u32 gr_pri_gpcs_tpcs_sm_cache_control_invalidate_cache_m(void)
366{
367 return 0x1 << 0;
368}
345static inline u32 gr_pri_sked_activity_r(void) 369static inline u32 gr_pri_sked_activity_r(void)
346{ 370{
347 return 0x00407054; 371 return 0x00407054;
@@ -2962,6 +2986,10 @@ static inline u32 gr_gpc0_tpc0_tpccs_tpc_exception_en_r(void)
2962{ 2986{
2963 return 0x0050450c; 2987 return 0x0050450c;
2964} 2988}
2989static inline u32 gr_gpc0_tpc0_tpccs_tpc_exception_en_sm_v(u32 r)
2990{
2991 return (r >> 1) & 0x1;
2992}
2965static inline u32 gr_gpc0_tpc0_tpccs_tpc_exception_en_sm_enabled_f(void) 2993static inline u32 gr_gpc0_tpc0_tpccs_tpc_exception_en_sm_enabled_f(void)
2966{ 2994{
2967 return 0x2; 2995 return 0x2;
@@ -3010,6 +3038,10 @@ static inline u32 gr_gpc0_tpc0_sm_dbgr_control0_debugger_mode_on_v(void)
3010{ 3038{
3011 return 0x00000001; 3039 return 0x00000001;
3012} 3040}
3041static inline u32 gr_gpc0_tpc0_sm_dbgr_control0_debugger_mode_off_v(void)
3042{
3043 return 0x00000000;
3044}
3013static inline u32 gr_gpc0_tpc0_sm_dbgr_control0_stop_trigger_enable_f(void) 3045static inline u32 gr_gpc0_tpc0_sm_dbgr_control0_stop_trigger_enable_f(void)
3014{ 3046{
3015 return 0x80000000; 3047 return 0x80000000;
@@ -3022,10 +3054,50 @@ static inline u32 gr_gpc0_tpc0_sm_dbgr_control0_run_trigger_task_f(void)
3022{ 3054{
3023 return 0x40000000; 3055 return 0x40000000;
3024} 3056}
3057static inline u32 gr_gpc0_tpc0_sm_dbgr_control0_stop_on_any_warp_v(u32 r)
3058{
3059 return (r >> 1) & 0x1;
3060}
3061static inline u32 gr_gpc0_tpc0_sm_dbgr_control0_stop_on_any_warp_disable_f(void)
3062{
3063 return 0x0;
3064}
3065static inline u32 gr_gpc0_tpc0_sm_dbgr_control0_stop_on_any_sm_v(u32 r)
3066{
3067 return (r >> 2) & 0x1;
3068}
3069static inline u32 gr_gpc0_tpc0_sm_dbgr_control0_stop_on_any_sm_disable_f(void)
3070{
3071 return 0x0;
3072}
3073static inline u32 gr_gpc0_tpc0_sm_warp_valid_mask_r(void)
3074{
3075 return 0x00504614;
3076}
3077static inline u32 gr_gpc0_tpc0_sm_dbgr_bpt_pause_mask_r(void)
3078{
3079 return 0x00504624;
3080}
3081static inline u32 gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_r(void)
3082{
3083 return 0x00504634;
3084}
3085static inline u32 gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_stop_on_any_warp_disable_v(void)
3086{
3087 return 0x00000000;
3088}
3089static inline u32 gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_stop_on_any_sm_disable_v(void)
3090{
3091 return 0x00000000;
3092}
3025static inline u32 gr_gpc0_tpc0_sm_dbgr_status0_r(void) 3093static inline u32 gr_gpc0_tpc0_sm_dbgr_status0_r(void)
3026{ 3094{
3027 return 0x0050460c; 3095 return 0x0050460c;
3028} 3096}
3097static inline u32 gr_gpc0_tpc0_sm_dbgr_status0_sm_in_trap_mode_v(u32 r)
3098{
3099 return (r >> 0) & 0x1;
3100}
3029static inline u32 gr_gpc0_tpc0_sm_dbgr_status0_locked_down_v(u32 r) 3101static inline u32 gr_gpc0_tpc0_sm_dbgr_status0_locked_down_v(u32 r)
3030{ 3102{
3031 return (r >> 4) & 0x1; 3103 return (r >> 4) & 0x1;
diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
index 4c2b00a8..3d99e94d 100644
--- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
@@ -533,10 +533,15 @@ static int gr_gm20b_ctx_state_floorsweep(struct gk20a *g)
533 + gpc_offset + tpc_offset, 533 + gpc_offset + tpc_offset,
534 gr_gpc0_tpc0_pe_cfg_smid_value_f(sm_id)); 534 gr_gpc0_tpc0_pe_cfg_smid_value_f(sm_id));
535 535
536 g->gr.sm_to_cluster[sm_id].tpc_index = tpc_index;
537 g->gr.sm_to_cluster[sm_id].gpc_index = gpc_index;
538
536 sm_id++; 539 sm_id++;
537 } 540 }
538 } 541 }
539 542
543 gr->no_of_sm = sm_id;
544
540 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++) 545 for (gpc_index = 0; gpc_index < gr->gpc_count; gpc_index++)
541 tpc_per_gpc |= gr->gpc_tpc_count[gpc_index] 546 tpc_per_gpc |= gr->gpc_tpc_count[gpc_index]
542 << (gr_pd_num_tpc_per_gpc__size_1_v() * gpc_index); 547 << (gr_pd_num_tpc_per_gpc__size_1_v() * gpc_index);
diff --git a/drivers/gpu/nvgpu/gm20b/hw_fb_gm20b.h b/drivers/gpu/nvgpu/gm20b/hw_fb_gm20b.h
index a753074e..214306cb 100644
--- a/drivers/gpu/nvgpu/gm20b/hw_fb_gm20b.h
+++ b/drivers/gpu/nvgpu/gm20b/hw_fb_gm20b.h
@@ -214,10 +214,18 @@ static inline u32 fb_mmu_debug_ctrl_debug_v(u32 r)
214{ 214{
215 return (r >> 16) & 0x1; 215 return (r >> 16) & 0x1;
216} 216}
217static inline u32 fb_mmu_debug_ctrl_debug_m(void)
218{
219 return 0x1 << 16;
220}
217static inline u32 fb_mmu_debug_ctrl_debug_enabled_v(void) 221static inline u32 fb_mmu_debug_ctrl_debug_enabled_v(void)
218{ 222{
219 return 0x00000001; 223 return 0x00000001;
220} 224}
225static inline u32 fb_mmu_debug_ctrl_debug_disabled_v(void)
226{
227 return 0x00000000;
228}
221static inline u32 fb_mmu_vpr_info_r(void) 229static inline u32 fb_mmu_vpr_info_r(void)
222{ 230{
223 return 0x00100cd0; 231 return 0x00100cd0;
diff --git a/drivers/gpu/nvgpu/gm20b/hw_gr_gm20b.h b/drivers/gpu/nvgpu/gm20b/hw_gr_gm20b.h
index 868b8fe7..11605deb 100644
--- a/drivers/gpu/nvgpu/gm20b/hw_gr_gm20b.h
+++ b/drivers/gpu/nvgpu/gm20b/hw_gr_gm20b.h
@@ -330,6 +330,30 @@ static inline u32 gr_activity_4_r(void)
330{ 330{
331 return 0x00400390; 331 return 0x00400390;
332} 332}
333static inline u32 gr_pri_gpc0_gcc_dbg_r(void)
334{
335 return 0x00501000;
336}
337static inline u32 gr_pri_gpcs_gcc_dbg_r(void)
338{
339 return 0x00419000;
340}
341static inline u32 gr_pri_gpcs_gcc_dbg_invalidate_m(void)
342{
343 return 0x1 << 1;
344}
345static inline u32 gr_pri_gpc0_tpc0_sm_cache_control_r(void)
346{
347 return 0x005046a4;
348}
349static inline u32 gr_pri_gpcs_tpcs_sm_cache_control_r(void)
350{
351 return 0x00419ea4;
352}
353static inline u32 gr_pri_gpcs_tpcs_sm_cache_control_invalidate_cache_m(void)
354{
355 return 0x1 << 0;
356}
333static inline u32 gr_pri_sked_activity_r(void) 357static inline u32 gr_pri_sked_activity_r(void)
334{ 358{
335 return 0x00407054; 359 return 0x00407054;
@@ -2998,6 +3022,10 @@ static inline u32 gr_gpc0_tpc0_tpccs_tpc_exception_en_sm_enabled_f(void)
2998{ 3022{
2999 return 0x2; 3023 return 0x2;
3000} 3024}
3025static inline u32 gr_gpc0_tpc0_tpccs_tpc_exception_en_sm_v(u32 r)
3026{
3027 return (r >> 1) & 0x1;
3028}
3001static inline u32 gr_gpcs_gpccs_gpc_exception_en_r(void) 3029static inline u32 gr_gpcs_gpccs_gpc_exception_en_r(void)
3002{ 3030{
3003 return 0x0041ac94; 3031 return 0x0041ac94;
@@ -3054,10 +3082,50 @@ static inline u32 gr_gpc0_tpc0_sm_dbgr_control0_run_trigger_task_f(void)
3054{ 3082{
3055 return 0x40000000; 3083 return 0x40000000;
3056} 3084}
3085static inline u32 gr_gpc0_tpc0_sm_dbgr_control0_stop_on_any_warp_v(u32 r)
3086{
3087 return (r >> 1) & 0x1;
3088}
3089static inline u32 gr_gpc0_tpc0_sm_dbgr_control0_stop_on_any_warp_disable_f(void)
3090{
3091 return 0x0;
3092}
3093static inline u32 gr_gpc0_tpc0_sm_dbgr_control0_stop_on_any_sm_v(u32 r)
3094{
3095 return (r >> 2) & 0x1;
3096}
3097static inline u32 gr_gpc0_tpc0_sm_dbgr_control0_stop_on_any_sm_disable_f(void)
3098{
3099 return 0x0;
3100}
3101static inline u32 gr_gpc0_tpc0_sm_warp_valid_mask_r(void)
3102{
3103 return 0x00504614;
3104}
3105static inline u32 gr_gpc0_tpc0_sm_dbgr_bpt_pause_mask_r(void)
3106{
3107 return 0x00504624;
3108}
3109static inline u32 gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_r(void)
3110{
3111 return 0x00504634;
3112}
3113static inline u32 gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_stop_on_any_warp_disable_v(void)
3114{
3115 return 0x00000000;
3116}
3117static inline u32 gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_stop_on_any_sm_disable_v(void)
3118{
3119 return 0x00000000;
3120}
3057static inline u32 gr_gpc0_tpc0_sm_dbgr_status0_r(void) 3121static inline u32 gr_gpc0_tpc0_sm_dbgr_status0_r(void)
3058{ 3122{
3059 return 0x0050460c; 3123 return 0x0050460c;
3060} 3124}
3125static inline u32 gr_gpc0_tpc0_sm_dbgr_status0_sm_in_trap_mode_v(u32 r)
3126{
3127 return (r >> 0) & 0x1;
3128}
3061static inline u32 gr_gpc0_tpc0_sm_dbgr_status0_locked_down_v(u32 r) 3129static inline u32 gr_gpc0_tpc0_sm_dbgr_status0_locked_down_v(u32 r)
3062{ 3130{
3063 return (r >> 4) & 0x1; 3131 return (r >> 4) & 0x1;
diff --git a/include/uapi/linux/nvgpu.h b/include/uapi/linux/nvgpu.h
index b04ef2e3..3a7c7831 100644
--- a/include/uapi/linux/nvgpu.h
+++ b/include/uapi/linux/nvgpu.h
@@ -252,6 +252,36 @@ struct nvgpu_gpu_l2_fb_args {
252 __u32 reserved; 252 __u32 reserved;
253} __packed; 253} __packed;
254 254
255struct nvgpu_gpu_inval_icache_args {
256 int channel_fd;
257 __u32 reserved;
258} __packed;
259
260struct nvgpu_gpu_mmu_debug_mode_args {
261 __u32 state;
262 __u32 reserved;
263} __packed;
264
265struct nvgpu_gpu_sm_debug_mode_args {
266 int channel_fd;
267 __u32 enable;
268 __u64 sms;
269} __packed;
270
271struct warpstate {
272 __u64 valid_warps;
273 __u64 trapped_warps;
274 __u64 paused_warps;
275};
276
277struct nvgpu_gpu_wait_pause_args {
278 __u64 pwarpstate;
279};
280
281struct nvgpu_gpu_tpc_exception_en_status_args {
282 __u64 tpc_exception_en_sm_mask;
283};
284
255#define NVGPU_GPU_IOCTL_ZCULL_GET_CTX_SIZE \ 285#define NVGPU_GPU_IOCTL_ZCULL_GET_CTX_SIZE \
256 _IOR(NVGPU_GPU_IOCTL_MAGIC, 1, struct nvgpu_gpu_zcull_get_ctx_size_args) 286 _IOR(NVGPU_GPU_IOCTL_MAGIC, 1, struct nvgpu_gpu_zcull_get_ctx_size_args)
257#define NVGPU_GPU_IOCTL_ZCULL_GET_INFO \ 287#define NVGPU_GPU_IOCTL_ZCULL_GET_INFO \
@@ -276,9 +306,19 @@ struct nvgpu_gpu_l2_fb_args {
276 _IOWR(NVGPU_GPU_IOCTL_MAGIC, 11, struct nvgpu_gpu_open_channel_args) 306 _IOWR(NVGPU_GPU_IOCTL_MAGIC, 11, struct nvgpu_gpu_open_channel_args)
277#define NVGPU_GPU_IOCTL_FLUSH_L2 \ 307#define NVGPU_GPU_IOCTL_FLUSH_L2 \
278 _IOWR(NVGPU_DBG_GPU_IOCTL_MAGIC, 12, struct nvgpu_gpu_l2_fb_args) 308 _IOWR(NVGPU_DBG_GPU_IOCTL_MAGIC, 12, struct nvgpu_gpu_l2_fb_args)
309#define NVGPU_GPU_IOCTL_INVAL_ICACHE \
310 _IOWR(NVGPU_GPU_IOCTL_MAGIC, 13, struct nvgpu_gpu_inval_icache_args)
311#define NVGPU_GPU_IOCTL_SET_MMUDEBUG_MODE \
312 _IOWR(NVGPU_GPU_IOCTL_MAGIC, 14, struct nvgpu_gpu_mmu_debug_mode_args)
313#define NVGPU_GPU_IOCTL_SET_SM_DEBUG_MODE \
314 _IOWR(NVGPU_GPU_IOCTL_MAGIC, 15, struct nvgpu_gpu_sm_debug_mode_args)
315#define NVGPU_GPU_IOCTL_WAIT_FOR_PAUSE \
316 _IOWR(NVGPU_GPU_IOCTL_MAGIC, 16, struct nvgpu_gpu_wait_pause_args)
317#define NVGPU_GPU_IOCTL_GET_TPC_EXCEPTION_EN_STATUS \
318 _IOWR(NVGPU_GPU_IOCTL_MAGIC, 17, struct nvgpu_gpu_tpc_exception_en_status_args)
279 319
280#define NVGPU_GPU_IOCTL_LAST \ 320#define NVGPU_GPU_IOCTL_LAST \
281 _IOC_NR(NVGPU_GPU_IOCTL_FLUSH_L2) 321 _IOC_NR(NVGPU_GPU_IOCTL_GET_TPC_EXCEPTION_EN_STATUS)
282#define NVGPU_GPU_IOCTL_MAX_ARG_SIZE \ 322#define NVGPU_GPU_IOCTL_MAX_ARG_SIZE \
283 sizeof(struct nvgpu_gpu_prepare_compressible_read_args) 323 sizeof(struct nvgpu_gpu_prepare_compressible_read_args)
284 324