summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
diff options
context:
space:
mode:
authorsujeet baranwal <sbaranwal@nvidia.com>2015-03-02 18:36:22 -0500
committerDan Willemsen <dwillemsen@nvidia.com>2015-04-04 21:58:04 -0400
commit895675e1d5790e2361b22edb50d702f7dd9a8edd (patch)
treedbe3586cec5351fd2c2eb13d91c258e663d73b08 /drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
parentcf0085ec231246748b34081d2786c29cedcbd706 (diff)
gpu: nvgpu: Removal of regops from CUDA driver
The current CUDA drivers have been using the regops to directly accessing the GPU registers from user space through the dbg node. This is a security hole and needs to be avoided. The patch alternatively implements the similar functionality in the kernel and provide an ioctl for it. Bug 200083334 Change-Id: Ic5ff5a215cbabe7a46837bc4e15efcceb0df0367 Signed-off-by: sujeet baranwal <sbaranwal@nvidia.com> Reviewed-on: http://git-master/r/711758 Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c265
1 files changed, 264 insertions, 1 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
index 7b617a03..5df420ff 100644
--- a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
@@ -20,9 +20,16 @@
20#include <linux/anon_inodes.h> 20#include <linux/anon_inodes.h>
21#include <linux/nvgpu.h> 21#include <linux/nvgpu.h>
22#include <uapi/linux/nvgpu.h> 22#include <uapi/linux/nvgpu.h>
23#include <linux/delay.h>
23 24
24#include "gk20a.h" 25#include "gk20a.h"
26#include "gr_gk20a.h"
25#include "fence_gk20a.h" 27#include "fence_gk20a.h"
28#include "regops_gk20a.h"
29#include "hw_gr_gk20a.h"
30#include "hw_fb_gk20a.h"
31#include "hw_proj_gk20a.h"
32
26 33
27int gk20a_ctrl_dev_open(struct inode *inode, struct file *filp) 34int gk20a_ctrl_dev_open(struct inode *inode, struct file *filp)
28{ 35{
@@ -257,6 +264,238 @@ static int nvgpu_gpu_ioctl_l2_fb_ops(struct gk20a *g,
257 return err; 264 return err;
258} 265}
259 266
267/* Invalidate i-cache for kepler & maxwell */
268static int nvgpu_gpu_ioctl_inval_icache(
269 struct gk20a *g,
270 struct nvgpu_gpu_inval_icache_args *args)
271{
272
273 int err = 0;
274 u32 cache_ctrl, regval;
275 struct channel_gk20a *ch;
276 struct nvgpu_dbg_gpu_reg_op ops;
277
278 ch = gk20a_get_channel_from_file(args->channel_fd);
279
280 ops.op = REGOP(READ_32);
281 ops.type = REGOP(TYPE_GR_CTX);
282 ops.status = REGOP(STATUS_SUCCESS);
283 ops.value_hi = 0;
284 ops.and_n_mask_lo = 0;
285 ops.and_n_mask_hi = 0;
286 ops.offset = gr_pri_gpc0_gcc_dbg_r();
287
288 /* Take the global lock, since we'll be doing global regops */
289 mutex_lock(&g->dbg_sessions_lock);
290
291 err = gr_gk20a_exec_ctx_ops(ch, &ops, 1, 0, 1);
292
293 regval = ops.value_lo;
294
295 if (!err) {
296 ops.op = REGOP(WRITE_32);
297 ops.value_lo = set_field(regval, gr_pri_gpcs_gcc_dbg_invalidate_m(), 1);
298 err = gr_gk20a_exec_ctx_ops(ch, &ops, 1, 1, 0);
299 }
300
301 if (err) {
302 gk20a_err(dev_from_gk20a(g), "Failed to access register\n");
303 goto end;
304 }
305
306 cache_ctrl = gk20a_readl(g, gr_pri_gpc0_tpc0_sm_cache_control_r());
307 cache_ctrl = set_field(cache_ctrl, gr_pri_gpcs_tpcs_sm_cache_control_invalidate_cache_m(), 1);
308 gk20a_writel(g, gr_pri_gpc0_tpc0_sm_cache_control_r(), cache_ctrl);
309
310end:
311 mutex_unlock(&g->dbg_sessions_lock);
312 return err;
313}
314
315static int nvgpu_gpu_ioctl_set_mmu_debug_mode(
316 struct gk20a *g,
317 struct nvgpu_gpu_mmu_debug_mode_args *args)
318{
319 int err = 0;
320 u32 mmu_debug_ctrl;
321
322 err = gk20a_busy(g->dev);
323 if (err) {
324 gk20a_err(dev_from_gk20a(g), "failed to power on gpu\n");
325 return -EINVAL;
326 }
327
328 mutex_lock(&g->dbg_sessions_lock);
329
330 if (args->state == 1) {
331 mmu_debug_ctrl = fb_mmu_debug_ctrl_debug_enabled_v();
332 g->mmu_debug_ctrl = true;
333 } else {
334 mmu_debug_ctrl = fb_mmu_debug_ctrl_debug_disabled_v();
335 g->mmu_debug_ctrl = false;
336 }
337
338 mmu_debug_ctrl = gk20a_readl(g, fb_mmu_debug_ctrl_r());
339 mmu_debug_ctrl = set_field(mmu_debug_ctrl, fb_mmu_debug_ctrl_debug_m(), mmu_debug_ctrl);
340 gk20a_writel(g, fb_mmu_debug_ctrl_r(), mmu_debug_ctrl);
341
342 mutex_unlock(&g->dbg_sessions_lock);
343 gk20a_idle(g->dev);
344 return err;
345}
346
347static int nvgpu_gpu_ioctl_set_debug_mode(
348 struct gk20a *g,
349 struct nvgpu_gpu_sm_debug_mode_args *args)
350{
351 int gpc, tpc, err = 0;
352 u32 sm_id, sm_dbgr_ctrl0;
353 struct channel_gk20a *ch;
354 struct nvgpu_dbg_gpu_reg_op ops;
355 u32 tpc_offset, gpc_offset, reg_offset;
356
357 ch = gk20a_get_channel_from_file(args->channel_fd);
358
359 mutex_lock(&g->dbg_sessions_lock);
360
361 for (sm_id = 0; sm_id < g->gr.no_of_sm; sm_id++) {
362 if (args->sms & (1 << sm_id)) {
363 gpc = g->gr.sm_to_cluster[sm_id].gpc_index;
364 tpc = g->gr.sm_to_cluster[sm_id].tpc_index;
365
366 tpc_offset = proj_tpc_in_gpc_stride_v() * tpc;
367 gpc_offset = proj_gpc_stride_v() * gpc;
368 reg_offset = tpc_offset + gpc_offset;
369
370 ops.op = REGOP(READ_32);
371 ops.type = REGOP(TYPE_GR_CTX);
372 ops.status = REGOP(STATUS_SUCCESS);
373 ops.value_hi = 0;
374 ops.and_n_mask_lo = 0;
375 ops.and_n_mask_hi = 0;
376 ops.offset = gr_gpc0_tpc0_sm_dbgr_control0_r() + reg_offset;
377
378 err = gr_gk20a_exec_ctx_ops(ch, &ops, 1, 0, 1);
379 sm_dbgr_ctrl0 = ops.value_lo;
380
381 if (args->enable) {
382 sm_dbgr_ctrl0 = gr_gpc0_tpc0_sm_dbgr_control0_debugger_mode_on_v() |
383 gr_gpc0_tpc0_sm_dbgr_control0_stop_on_any_warp_disable_f() |
384 gr_gpc0_tpc0_sm_dbgr_control0_stop_on_any_sm_disable_f() |
385 sm_dbgr_ctrl0;
386 } else
387 sm_dbgr_ctrl0 = gr_gpc0_tpc0_sm_dbgr_control0_debugger_mode_off_v() | sm_dbgr_ctrl0;
388
389 if (!err) {
390 ops.op = REGOP(WRITE_32);
391 ops.value_lo = sm_dbgr_ctrl0;
392 err = gr_gk20a_exec_ctx_ops(ch, &ops, 1, 1, 0);
393 } else
394 gk20a_err(dev_from_gk20a(g), "Failed to access register\n");
395 }
396 }
397
398 mutex_unlock(&g->dbg_sessions_lock);
399 return err;
400}
401
402static int nvgpu_gpu_ioctl_wait_for_pause(
403 struct gk20a *g,
404 struct nvgpu_gpu_wait_pause_args *args)
405{
406 int err = 0, gpc, tpc;
407 u32 sm_count, sm_id, size;
408 struct warpstate *w_state;
409 struct gr_gk20a *gr = &g->gr;
410 u32 tpc_offset, gpc_offset, reg_offset, global_mask;
411 u64 warps_valid = 0, warps_paused = 0, warps_trapped = 0;
412
413 sm_count = g->gr.gpc_count * g->gr.tpc_count;
414 size = sm_count * sizeof(struct warpstate);
415 w_state = kzalloc(size, GFP_KERNEL);
416
417 global_mask = gr_gpc0_tpc0_sm_hww_global_esr_bpt_int_pending_f() |
418 gr_gpc0_tpc0_sm_hww_global_esr_bpt_pause_pending_f() |
419 gr_gpc0_tpc0_sm_hww_global_esr_single_step_complete_pending_f();
420
421 mutex_lock(&g->dbg_sessions_lock);
422
423 for (sm_id = 0; sm_id < gr->no_of_sm; sm_id++) {
424
425 gpc = g->gr.sm_to_cluster[sm_id].gpc_index;
426 tpc = g->gr.sm_to_cluster[sm_id].tpc_index;
427
428 tpc_offset = proj_tpc_in_gpc_stride_v() * tpc;
429 gpc_offset = proj_gpc_stride_v() * gpc;
430 reg_offset = tpc_offset + gpc_offset;
431
432 /* Wait until all valid warps on the sm are paused. The valid warp mask
433 * must be re-read with the paused mask because new warps may become
434 * valid as the sm is pausing.
435 */
436
437 err = gk20a_gr_lock_down_sm(g, gpc, tpc, global_mask);
438 if (err) {
439 gk20a_err(dev_from_gk20a(g), "sm did not lock down!\n");
440 goto end;
441 }
442
443 /* 64 bit read */
444 warps_valid = (u64)gk20a_readl(g, gr_gpc0_tpc0_sm_warp_valid_mask_r() + reg_offset) << 32;
445 warps_valid |= gk20a_readl(g, gr_gpc0_tpc0_sm_warp_valid_mask_r() + reg_offset + 4);
446
447 /* 64 bit read */
448 warps_paused = (u64)gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_pause_mask_r() + reg_offset) << 32;
449 warps_paused |= gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_pause_mask_r() + reg_offset + 4);
450
451 /* 64 bit read */
452 warps_trapped = (u64)gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_r() + reg_offset) << 32;
453 warps_trapped |= gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_r() + reg_offset + 4);
454
455 w_state[sm_id].valid_warps = warps_valid;
456 w_state[sm_id].trapped_warps = warps_trapped;
457 w_state[sm_id].paused_warps = warps_paused;
458 }
459
460 if (copy_to_user((void __user *)(uintptr_t)args->pwarpstate, w_state, size)) {
461 gk20a_dbg_fn("copy_to_user failed!");
462 err = -EFAULT;
463 }
464
465end:
466 mutex_unlock(&g->dbg_sessions_lock);
467 kfree(w_state);
468 return err;
469}
470
471static int nvgpu_gpu_ioctl_has_any_exception(
472 struct gk20a *g,
473 struct nvgpu_gpu_tpc_exception_en_status_args *args)
474{
475 int err = 0;
476 struct gr_gk20a *gr = &g->gr;
477 u32 sm_id, tpc_exception_en = 0;
478 u32 offset, regval, tpc_offset, gpc_offset;
479
480 mutex_lock(&g->dbg_sessions_lock);
481
482 for (sm_id = 0; sm_id < gr->no_of_sm; sm_id++) {
483
484 tpc_offset = proj_tpc_in_gpc_stride_v() * g->gr.sm_to_cluster[sm_id].tpc_index;
485 gpc_offset = proj_gpc_stride_v() * g->gr.sm_to_cluster[sm_id].gpc_index;
486 offset = tpc_offset + gpc_offset;
487
488 regval = gk20a_readl(g, gr_gpc0_tpc0_tpccs_tpc_exception_en_r() +
489 offset);
490 /* Each bit represents corresponding enablement state, bit 0 corrsponds to SM0 */
491 tpc_exception_en |= gr_gpc0_tpc0_tpccs_tpc_exception_en_sm_v(regval) << sm_id;
492 }
493
494 mutex_unlock(&g->dbg_sessions_lock);
495 args->tpc_exception_en_sm_mask = tpc_exception_en;
496 return err;
497}
498
260long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) 499long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
261{ 500{
262 struct platform_device *dev = filp->private_data; 501 struct platform_device *dev = filp->private_data;
@@ -441,6 +680,31 @@ long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg
441 err = nvgpu_gpu_ioctl_l2_fb_ops(g, 680 err = nvgpu_gpu_ioctl_l2_fb_ops(g,
442 (struct nvgpu_gpu_l2_fb_args *)buf); 681 (struct nvgpu_gpu_l2_fb_args *)buf);
443 break; 682 break;
683 case NVGPU_GPU_IOCTL_INVAL_ICACHE:
684 err = gr_gk20a_elpg_protected_call(g,
685 nvgpu_gpu_ioctl_inval_icache(g, (struct nvgpu_gpu_inval_icache_args *)buf));
686 break;
687
688 case NVGPU_GPU_IOCTL_SET_MMUDEBUG_MODE:
689 err = nvgpu_gpu_ioctl_set_mmu_debug_mode(g,
690 (struct nvgpu_gpu_mmu_debug_mode_args *)buf);
691 break;
692
693 case NVGPU_GPU_IOCTL_SET_SM_DEBUG_MODE:
694 err = gr_gk20a_elpg_protected_call(g,
695 nvgpu_gpu_ioctl_set_debug_mode(g, (struct nvgpu_gpu_sm_debug_mode_args *)buf));
696 break;
697
698 case NVGPU_GPU_IOCTL_WAIT_FOR_PAUSE:
699 err = nvgpu_gpu_ioctl_wait_for_pause(g,
700 (struct nvgpu_gpu_wait_pause_args *)buf);
701 break;
702
703 case NVGPU_GPU_IOCTL_GET_TPC_EXCEPTION_EN_STATUS:
704 err = nvgpu_gpu_ioctl_has_any_exception(g,
705 (struct nvgpu_gpu_tpc_exception_en_status_args *)buf);
706 break;
707
444 default: 708 default:
445 dev_dbg(dev_from_gk20a(g), "unrecognized gpu ioctl cmd: 0x%x", cmd); 709 dev_dbg(dev_from_gk20a(g), "unrecognized gpu ioctl cmd: 0x%x", cmd);
446 err = -ENOTTY; 710 err = -ENOTTY;
@@ -452,4 +716,3 @@ long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg
452 716
453 return err; 717 return err;
454} 718}
455