diff options
author | sujeet baranwal <sbaranwal@nvidia.com> | 2015-03-02 18:36:22 -0500 |
---|---|---|
committer | Dan Willemsen <dwillemsen@nvidia.com> | 2015-04-04 21:58:04 -0400 |
commit | 895675e1d5790e2361b22edb50d702f7dd9a8edd (patch) | |
tree | dbe3586cec5351fd2c2eb13d91c258e663d73b08 /drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c | |
parent | cf0085ec231246748b34081d2786c29cedcbd706 (diff) |
gpu: nvgpu: Removal of regops from CUDA driver
The current CUDA drivers have been using the regops to
directly accessing the GPU registers from user space through
the dbg node. This is a security hole and needs to be avoided.
The patch alternatively implements the similar functionality
in the kernel and provide an ioctl for it.
Bug 200083334
Change-Id: Ic5ff5a215cbabe7a46837bc4e15efcceb0df0367
Signed-off-by: sujeet baranwal <sbaranwal@nvidia.com>
Reviewed-on: http://git-master/r/711758
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c | 265 |
1 files changed, 264 insertions, 1 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c index 7b617a03..5df420ff 100644 --- a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c | |||
@@ -20,9 +20,16 @@ | |||
20 | #include <linux/anon_inodes.h> | 20 | #include <linux/anon_inodes.h> |
21 | #include <linux/nvgpu.h> | 21 | #include <linux/nvgpu.h> |
22 | #include <uapi/linux/nvgpu.h> | 22 | #include <uapi/linux/nvgpu.h> |
23 | #include <linux/delay.h> | ||
23 | 24 | ||
24 | #include "gk20a.h" | 25 | #include "gk20a.h" |
26 | #include "gr_gk20a.h" | ||
25 | #include "fence_gk20a.h" | 27 | #include "fence_gk20a.h" |
28 | #include "regops_gk20a.h" | ||
29 | #include "hw_gr_gk20a.h" | ||
30 | #include "hw_fb_gk20a.h" | ||
31 | #include "hw_proj_gk20a.h" | ||
32 | |||
26 | 33 | ||
27 | int gk20a_ctrl_dev_open(struct inode *inode, struct file *filp) | 34 | int gk20a_ctrl_dev_open(struct inode *inode, struct file *filp) |
28 | { | 35 | { |
@@ -257,6 +264,238 @@ static int nvgpu_gpu_ioctl_l2_fb_ops(struct gk20a *g, | |||
257 | return err; | 264 | return err; |
258 | } | 265 | } |
259 | 266 | ||
267 | /* Invalidate i-cache for kepler & maxwell */ | ||
268 | static int nvgpu_gpu_ioctl_inval_icache( | ||
269 | struct gk20a *g, | ||
270 | struct nvgpu_gpu_inval_icache_args *args) | ||
271 | { | ||
272 | |||
273 | int err = 0; | ||
274 | u32 cache_ctrl, regval; | ||
275 | struct channel_gk20a *ch; | ||
276 | struct nvgpu_dbg_gpu_reg_op ops; | ||
277 | |||
278 | ch = gk20a_get_channel_from_file(args->channel_fd); | ||
279 | |||
280 | ops.op = REGOP(READ_32); | ||
281 | ops.type = REGOP(TYPE_GR_CTX); | ||
282 | ops.status = REGOP(STATUS_SUCCESS); | ||
283 | ops.value_hi = 0; | ||
284 | ops.and_n_mask_lo = 0; | ||
285 | ops.and_n_mask_hi = 0; | ||
286 | ops.offset = gr_pri_gpc0_gcc_dbg_r(); | ||
287 | |||
288 | /* Take the global lock, since we'll be doing global regops */ | ||
289 | mutex_lock(&g->dbg_sessions_lock); | ||
290 | |||
291 | err = gr_gk20a_exec_ctx_ops(ch, &ops, 1, 0, 1); | ||
292 | |||
293 | regval = ops.value_lo; | ||
294 | |||
295 | if (!err) { | ||
296 | ops.op = REGOP(WRITE_32); | ||
297 | ops.value_lo = set_field(regval, gr_pri_gpcs_gcc_dbg_invalidate_m(), 1); | ||
298 | err = gr_gk20a_exec_ctx_ops(ch, &ops, 1, 1, 0); | ||
299 | } | ||
300 | |||
301 | if (err) { | ||
302 | gk20a_err(dev_from_gk20a(g), "Failed to access register\n"); | ||
303 | goto end; | ||
304 | } | ||
305 | |||
306 | cache_ctrl = gk20a_readl(g, gr_pri_gpc0_tpc0_sm_cache_control_r()); | ||
307 | cache_ctrl = set_field(cache_ctrl, gr_pri_gpcs_tpcs_sm_cache_control_invalidate_cache_m(), 1); | ||
308 | gk20a_writel(g, gr_pri_gpc0_tpc0_sm_cache_control_r(), cache_ctrl); | ||
309 | |||
310 | end: | ||
311 | mutex_unlock(&g->dbg_sessions_lock); | ||
312 | return err; | ||
313 | } | ||
314 | |||
315 | static int nvgpu_gpu_ioctl_set_mmu_debug_mode( | ||
316 | struct gk20a *g, | ||
317 | struct nvgpu_gpu_mmu_debug_mode_args *args) | ||
318 | { | ||
319 | int err = 0; | ||
320 | u32 mmu_debug_ctrl; | ||
321 | |||
322 | err = gk20a_busy(g->dev); | ||
323 | if (err) { | ||
324 | gk20a_err(dev_from_gk20a(g), "failed to power on gpu\n"); | ||
325 | return -EINVAL; | ||
326 | } | ||
327 | |||
328 | mutex_lock(&g->dbg_sessions_lock); | ||
329 | |||
330 | if (args->state == 1) { | ||
331 | mmu_debug_ctrl = fb_mmu_debug_ctrl_debug_enabled_v(); | ||
332 | g->mmu_debug_ctrl = true; | ||
333 | } else { | ||
334 | mmu_debug_ctrl = fb_mmu_debug_ctrl_debug_disabled_v(); | ||
335 | g->mmu_debug_ctrl = false; | ||
336 | } | ||
337 | |||
338 | mmu_debug_ctrl = gk20a_readl(g, fb_mmu_debug_ctrl_r()); | ||
339 | mmu_debug_ctrl = set_field(mmu_debug_ctrl, fb_mmu_debug_ctrl_debug_m(), mmu_debug_ctrl); | ||
340 | gk20a_writel(g, fb_mmu_debug_ctrl_r(), mmu_debug_ctrl); | ||
341 | |||
342 | mutex_unlock(&g->dbg_sessions_lock); | ||
343 | gk20a_idle(g->dev); | ||
344 | return err; | ||
345 | } | ||
346 | |||
347 | static int nvgpu_gpu_ioctl_set_debug_mode( | ||
348 | struct gk20a *g, | ||
349 | struct nvgpu_gpu_sm_debug_mode_args *args) | ||
350 | { | ||
351 | int gpc, tpc, err = 0; | ||
352 | u32 sm_id, sm_dbgr_ctrl0; | ||
353 | struct channel_gk20a *ch; | ||
354 | struct nvgpu_dbg_gpu_reg_op ops; | ||
355 | u32 tpc_offset, gpc_offset, reg_offset; | ||
356 | |||
357 | ch = gk20a_get_channel_from_file(args->channel_fd); | ||
358 | |||
359 | mutex_lock(&g->dbg_sessions_lock); | ||
360 | |||
361 | for (sm_id = 0; sm_id < g->gr.no_of_sm; sm_id++) { | ||
362 | if (args->sms & (1 << sm_id)) { | ||
363 | gpc = g->gr.sm_to_cluster[sm_id].gpc_index; | ||
364 | tpc = g->gr.sm_to_cluster[sm_id].tpc_index; | ||
365 | |||
366 | tpc_offset = proj_tpc_in_gpc_stride_v() * tpc; | ||
367 | gpc_offset = proj_gpc_stride_v() * gpc; | ||
368 | reg_offset = tpc_offset + gpc_offset; | ||
369 | |||
370 | ops.op = REGOP(READ_32); | ||
371 | ops.type = REGOP(TYPE_GR_CTX); | ||
372 | ops.status = REGOP(STATUS_SUCCESS); | ||
373 | ops.value_hi = 0; | ||
374 | ops.and_n_mask_lo = 0; | ||
375 | ops.and_n_mask_hi = 0; | ||
376 | ops.offset = gr_gpc0_tpc0_sm_dbgr_control0_r() + reg_offset; | ||
377 | |||
378 | err = gr_gk20a_exec_ctx_ops(ch, &ops, 1, 0, 1); | ||
379 | sm_dbgr_ctrl0 = ops.value_lo; | ||
380 | |||
381 | if (args->enable) { | ||
382 | sm_dbgr_ctrl0 = gr_gpc0_tpc0_sm_dbgr_control0_debugger_mode_on_v() | | ||
383 | gr_gpc0_tpc0_sm_dbgr_control0_stop_on_any_warp_disable_f() | | ||
384 | gr_gpc0_tpc0_sm_dbgr_control0_stop_on_any_sm_disable_f() | | ||
385 | sm_dbgr_ctrl0; | ||
386 | } else | ||
387 | sm_dbgr_ctrl0 = gr_gpc0_tpc0_sm_dbgr_control0_debugger_mode_off_v() | sm_dbgr_ctrl0; | ||
388 | |||
389 | if (!err) { | ||
390 | ops.op = REGOP(WRITE_32); | ||
391 | ops.value_lo = sm_dbgr_ctrl0; | ||
392 | err = gr_gk20a_exec_ctx_ops(ch, &ops, 1, 1, 0); | ||
393 | } else | ||
394 | gk20a_err(dev_from_gk20a(g), "Failed to access register\n"); | ||
395 | } | ||
396 | } | ||
397 | |||
398 | mutex_unlock(&g->dbg_sessions_lock); | ||
399 | return err; | ||
400 | } | ||
401 | |||
402 | static int nvgpu_gpu_ioctl_wait_for_pause( | ||
403 | struct gk20a *g, | ||
404 | struct nvgpu_gpu_wait_pause_args *args) | ||
405 | { | ||
406 | int err = 0, gpc, tpc; | ||
407 | u32 sm_count, sm_id, size; | ||
408 | struct warpstate *w_state; | ||
409 | struct gr_gk20a *gr = &g->gr; | ||
410 | u32 tpc_offset, gpc_offset, reg_offset, global_mask; | ||
411 | u64 warps_valid = 0, warps_paused = 0, warps_trapped = 0; | ||
412 | |||
413 | sm_count = g->gr.gpc_count * g->gr.tpc_count; | ||
414 | size = sm_count * sizeof(struct warpstate); | ||
415 | w_state = kzalloc(size, GFP_KERNEL); | ||
416 | |||
417 | global_mask = gr_gpc0_tpc0_sm_hww_global_esr_bpt_int_pending_f() | | ||
418 | gr_gpc0_tpc0_sm_hww_global_esr_bpt_pause_pending_f() | | ||
419 | gr_gpc0_tpc0_sm_hww_global_esr_single_step_complete_pending_f(); | ||
420 | |||
421 | mutex_lock(&g->dbg_sessions_lock); | ||
422 | |||
423 | for (sm_id = 0; sm_id < gr->no_of_sm; sm_id++) { | ||
424 | |||
425 | gpc = g->gr.sm_to_cluster[sm_id].gpc_index; | ||
426 | tpc = g->gr.sm_to_cluster[sm_id].tpc_index; | ||
427 | |||
428 | tpc_offset = proj_tpc_in_gpc_stride_v() * tpc; | ||
429 | gpc_offset = proj_gpc_stride_v() * gpc; | ||
430 | reg_offset = tpc_offset + gpc_offset; | ||
431 | |||
432 | /* Wait until all valid warps on the sm are paused. The valid warp mask | ||
433 | * must be re-read with the paused mask because new warps may become | ||
434 | * valid as the sm is pausing. | ||
435 | */ | ||
436 | |||
437 | err = gk20a_gr_lock_down_sm(g, gpc, tpc, global_mask); | ||
438 | if (err) { | ||
439 | gk20a_err(dev_from_gk20a(g), "sm did not lock down!\n"); | ||
440 | goto end; | ||
441 | } | ||
442 | |||
443 | /* 64 bit read */ | ||
444 | warps_valid = (u64)gk20a_readl(g, gr_gpc0_tpc0_sm_warp_valid_mask_r() + reg_offset) << 32; | ||
445 | warps_valid |= gk20a_readl(g, gr_gpc0_tpc0_sm_warp_valid_mask_r() + reg_offset + 4); | ||
446 | |||
447 | /* 64 bit read */ | ||
448 | warps_paused = (u64)gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_pause_mask_r() + reg_offset) << 32; | ||
449 | warps_paused |= gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_pause_mask_r() + reg_offset + 4); | ||
450 | |||
451 | /* 64 bit read */ | ||
452 | warps_trapped = (u64)gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_r() + reg_offset) << 32; | ||
453 | warps_trapped |= gk20a_readl(g, gr_gpc0_tpc0_sm_dbgr_bpt_trap_mask_r() + reg_offset + 4); | ||
454 | |||
455 | w_state[sm_id].valid_warps = warps_valid; | ||
456 | w_state[sm_id].trapped_warps = warps_trapped; | ||
457 | w_state[sm_id].paused_warps = warps_paused; | ||
458 | } | ||
459 | |||
460 | if (copy_to_user((void __user *)(uintptr_t)args->pwarpstate, w_state, size)) { | ||
461 | gk20a_dbg_fn("copy_to_user failed!"); | ||
462 | err = -EFAULT; | ||
463 | } | ||
464 | |||
465 | end: | ||
466 | mutex_unlock(&g->dbg_sessions_lock); | ||
467 | kfree(w_state); | ||
468 | return err; | ||
469 | } | ||
470 | |||
471 | static int nvgpu_gpu_ioctl_has_any_exception( | ||
472 | struct gk20a *g, | ||
473 | struct nvgpu_gpu_tpc_exception_en_status_args *args) | ||
474 | { | ||
475 | int err = 0; | ||
476 | struct gr_gk20a *gr = &g->gr; | ||
477 | u32 sm_id, tpc_exception_en = 0; | ||
478 | u32 offset, regval, tpc_offset, gpc_offset; | ||
479 | |||
480 | mutex_lock(&g->dbg_sessions_lock); | ||
481 | |||
482 | for (sm_id = 0; sm_id < gr->no_of_sm; sm_id++) { | ||
483 | |||
484 | tpc_offset = proj_tpc_in_gpc_stride_v() * g->gr.sm_to_cluster[sm_id].tpc_index; | ||
485 | gpc_offset = proj_gpc_stride_v() * g->gr.sm_to_cluster[sm_id].gpc_index; | ||
486 | offset = tpc_offset + gpc_offset; | ||
487 | |||
488 | regval = gk20a_readl(g, gr_gpc0_tpc0_tpccs_tpc_exception_en_r() + | ||
489 | offset); | ||
490 | /* Each bit represents corresponding enablement state, bit 0 corrsponds to SM0 */ | ||
491 | tpc_exception_en |= gr_gpc0_tpc0_tpccs_tpc_exception_en_sm_v(regval) << sm_id; | ||
492 | } | ||
493 | |||
494 | mutex_unlock(&g->dbg_sessions_lock); | ||
495 | args->tpc_exception_en_sm_mask = tpc_exception_en; | ||
496 | return err; | ||
497 | } | ||
498 | |||
260 | long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | 499 | long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) |
261 | { | 500 | { |
262 | struct platform_device *dev = filp->private_data; | 501 | struct platform_device *dev = filp->private_data; |
@@ -441,6 +680,31 @@ long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg | |||
441 | err = nvgpu_gpu_ioctl_l2_fb_ops(g, | 680 | err = nvgpu_gpu_ioctl_l2_fb_ops(g, |
442 | (struct nvgpu_gpu_l2_fb_args *)buf); | 681 | (struct nvgpu_gpu_l2_fb_args *)buf); |
443 | break; | 682 | break; |
683 | case NVGPU_GPU_IOCTL_INVAL_ICACHE: | ||
684 | err = gr_gk20a_elpg_protected_call(g, | ||
685 | nvgpu_gpu_ioctl_inval_icache(g, (struct nvgpu_gpu_inval_icache_args *)buf)); | ||
686 | break; | ||
687 | |||
688 | case NVGPU_GPU_IOCTL_SET_MMUDEBUG_MODE: | ||
689 | err = nvgpu_gpu_ioctl_set_mmu_debug_mode(g, | ||
690 | (struct nvgpu_gpu_mmu_debug_mode_args *)buf); | ||
691 | break; | ||
692 | |||
693 | case NVGPU_GPU_IOCTL_SET_SM_DEBUG_MODE: | ||
694 | err = gr_gk20a_elpg_protected_call(g, | ||
695 | nvgpu_gpu_ioctl_set_debug_mode(g, (struct nvgpu_gpu_sm_debug_mode_args *)buf)); | ||
696 | break; | ||
697 | |||
698 | case NVGPU_GPU_IOCTL_WAIT_FOR_PAUSE: | ||
699 | err = nvgpu_gpu_ioctl_wait_for_pause(g, | ||
700 | (struct nvgpu_gpu_wait_pause_args *)buf); | ||
701 | break; | ||
702 | |||
703 | case NVGPU_GPU_IOCTL_GET_TPC_EXCEPTION_EN_STATUS: | ||
704 | err = nvgpu_gpu_ioctl_has_any_exception(g, | ||
705 | (struct nvgpu_gpu_tpc_exception_en_status_args *)buf); | ||
706 | break; | ||
707 | |||
444 | default: | 708 | default: |
445 | dev_dbg(dev_from_gk20a(g), "unrecognized gpu ioctl cmd: 0x%x", cmd); | 709 | dev_dbg(dev_from_gk20a(g), "unrecognized gpu ioctl cmd: 0x%x", cmd); |
446 | err = -ENOTTY; | 710 | err = -ENOTTY; |
@@ -452,4 +716,3 @@ long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg | |||
452 | 716 | ||
453 | return err; | 717 | return err; |
454 | } | 718 | } |
455 | |||