diff options
author | David Nieto <dmartineznie@nvidia.com> | 2017-09-18 23:31:28 -0400 |
---|---|---|
committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2017-09-22 18:44:25 -0400 |
commit | 7134e9e852116f86745cd23312bbfba34100bf6d (patch) | |
tree | 763a9db89290450a37ad19d7f57acfa66ce33526 /drivers/gpu/nvgpu/gk20a | |
parent | f6fcecfc6f437a6d24aa113f75e43cb6dbbd5e0f (diff) |
gpu: nvgpu: prevent crash during unbind
This change solves crashes during bind that were introduced in the driver
during the OS unification refactoring due to lack of coverage of the remove()
function.
The fixes during remove are:
(1) Prevent NULL dereference on GPUs with secure boot
(2) Prevent NULL dereferences when fecs_trace is not enabled
(3) Added PRAMIN blocker during driver removal if HW is no longer accesible
(4) Prevent double free of debugfs nodes as they are handled on the
debugfs_remove_recursive() call
(5) quiesce() can now be called without checking is HW accesible flag is set
(6) added function to free irq so no IRQ association is left on the driver after
it is removed
(7) prevent NULL dereference on nvgpu_thread_stop() if the thread is already
stopped
JIRA: EVLR-1739
Change-Id: I787d38f202d5267a6b34815f23e1bc88110e8455
Signed-off-by: David Nieto <dmartineznie@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1563005
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/channel_gk20a.c | 37 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c | 33 |
2 files changed, 38 insertions, 32 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c index 0b8422a6..ea69d7cb 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c | |||
@@ -465,21 +465,30 @@ static void gk20a_free_channel(struct channel_gk20a *ch, bool force) | |||
465 | 465 | ||
466 | trace_gk20a_free_channel(ch->chid); | 466 | trace_gk20a_free_channel(ch->chid); |
467 | 467 | ||
468 | /* abort channel and remove from runlist */ | 468 | /* |
469 | if (gk20a_is_channel_marked_as_tsg(ch)) { | 469 | * Disable channel/TSG and unbind here. This should not be executed if |
470 | err = g->ops.fifo.tsg_unbind_channel(ch); | 470 | * HW access is not available during shutdown/removal path as it will |
471 | if (err) | 471 | * trigger a timeout |
472 | nvgpu_err(g, "failed to unbind channel %d from TSG", ch->chid); | 472 | */ |
473 | /* | 473 | if (!nvgpu_is_enabled(g, NVGPU_DRIVER_IS_DYING)) { |
474 | * Channel is not a part of TSG this point onwards | 474 | /* abort channel and remove from runlist */ |
475 | * So stash its status and use it whenever necessary | 475 | if (gk20a_is_channel_marked_as_tsg(ch)) { |
476 | * e.g. while releasing gr_ctx in g->ops.gr.free_channel_ctx() | 476 | err = g->ops.fifo.tsg_unbind_channel(ch); |
477 | */ | 477 | if (err) |
478 | was_tsg = true; | 478 | nvgpu_err(g, |
479 | } else { | 479 | "failed to unbind channel %d from TSG", |
480 | gk20a_disable_channel(ch); | 480 | ch->chid); |
481 | /* | ||
482 | * Channel is not a part of TSG this point onwards | ||
483 | * So stash its status and use it whenever necessary | ||
484 | * e.g. while releasing gr_ctx in | ||
485 | * g->ops.gr.free_channel_ctx() | ||
486 | */ | ||
487 | was_tsg = true; | ||
488 | } else { | ||
489 | gk20a_disable_channel(ch); | ||
490 | } | ||
481 | } | 491 | } |
482 | |||
483 | /* wait until there's only our ref to the channel */ | 492 | /* wait until there's only our ref to the channel */ |
484 | if (!force) | 493 | if (!force) |
485 | gk20a_wait_until_counter_is_N( | 494 | gk20a_wait_until_counter_is_N( |
diff --git a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c index fea3b0fa..71cba9ec 100644 --- a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c | |||
@@ -67,6 +67,7 @@ struct gk20a_fecs_trace { | |||
67 | struct nvgpu_mutex hash_lock; | 67 | struct nvgpu_mutex hash_lock; |
68 | struct nvgpu_mutex poll_lock; | 68 | struct nvgpu_mutex poll_lock; |
69 | struct nvgpu_thread poll_task; | 69 | struct nvgpu_thread poll_task; |
70 | bool init; | ||
70 | }; | 71 | }; |
71 | 72 | ||
72 | #ifdef CONFIG_GK20A_CTXSW_TRACE | 73 | #ifdef CONFIG_GK20A_CTXSW_TRACE |
@@ -547,23 +548,12 @@ static void gk20a_fecs_trace_debugfs_init(struct gk20a *g) | |||
547 | &gk20a_fecs_trace_debugfs_ring_fops); | 548 | &gk20a_fecs_trace_debugfs_ring_fops); |
548 | } | 549 | } |
549 | 550 | ||
550 | static void gk20a_fecs_trace_debugfs_cleanup(struct gk20a *g) | ||
551 | { | ||
552 | struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); | ||
553 | |||
554 | debugfs_remove_recursive(l->debugfs); | ||
555 | } | ||
556 | |||
557 | #else | 551 | #else |
558 | 552 | ||
559 | static void gk20a_fecs_trace_debugfs_init(struct gk20a *g) | 553 | static void gk20a_fecs_trace_debugfs_init(struct gk20a *g) |
560 | { | 554 | { |
561 | } | 555 | } |
562 | 556 | ||
563 | static inline void gk20a_fecs_trace_debugfs_cleanup(struct gk20a *g) | ||
564 | { | ||
565 | } | ||
566 | |||
567 | #endif /* CONFIG_DEBUG_FS */ | 557 | #endif /* CONFIG_DEBUG_FS */ |
568 | 558 | ||
569 | int gk20a_fecs_trace_init(struct gk20a *g) | 559 | int gk20a_fecs_trace_init(struct gk20a *g) |
@@ -598,6 +588,9 @@ int gk20a_fecs_trace_init(struct gk20a *g) | |||
598 | NVGPU_GPU_FLAGS_SUPPORT_FECS_CTXSW_TRACE; | 588 | NVGPU_GPU_FLAGS_SUPPORT_FECS_CTXSW_TRACE; |
599 | 589 | ||
600 | gk20a_fecs_trace_debugfs_init(g); | 590 | gk20a_fecs_trace_debugfs_init(g); |
591 | |||
592 | trace->init = true; | ||
593 | |||
601 | return 0; | 594 | return 0; |
602 | 595 | ||
603 | clean_hash_lock: | 596 | clean_hash_lock: |
@@ -682,15 +675,17 @@ int gk20a_fecs_trace_unbind_channel(struct gk20a *g, struct channel_gk20a *ch) | |||
682 | { | 675 | { |
683 | u32 context_ptr = gk20a_fecs_trace_fecs_context_ptr(g, ch); | 676 | u32 context_ptr = gk20a_fecs_trace_fecs_context_ptr(g, ch); |
684 | 677 | ||
685 | gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, | 678 | if (g->fecs_trace) { |
679 | gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, | ||
686 | "ch=%p context_ptr=%x", ch, context_ptr); | 680 | "ch=%p context_ptr=%x", ch, context_ptr); |
687 | 681 | ||
688 | if (g->ops.fecs_trace.is_enabled(g)) { | 682 | if (g->ops.fecs_trace.is_enabled(g)) { |
689 | if (g->ops.fecs_trace.flush) | 683 | if (g->ops.fecs_trace.flush) |
690 | g->ops.fecs_trace.flush(g); | 684 | g->ops.fecs_trace.flush(g); |
691 | gk20a_fecs_trace_poll(g); | 685 | gk20a_fecs_trace_poll(g); |
686 | } | ||
687 | gk20a_fecs_trace_hash_del(g, context_ptr); | ||
692 | } | 688 | } |
693 | gk20a_fecs_trace_hash_del(g, context_ptr); | ||
694 | return 0; | 689 | return 0; |
695 | } | 690 | } |
696 | 691 | ||
@@ -709,7 +704,9 @@ int gk20a_fecs_trace_deinit(struct gk20a *g) | |||
709 | { | 704 | { |
710 | struct gk20a_fecs_trace *trace = g->fecs_trace; | 705 | struct gk20a_fecs_trace *trace = g->fecs_trace; |
711 | 706 | ||
712 | gk20a_fecs_trace_debugfs_cleanup(g); | 707 | if (!trace->init) |
708 | return 0; | ||
709 | |||
713 | nvgpu_thread_stop(&trace->poll_task); | 710 | nvgpu_thread_stop(&trace->poll_task); |
714 | gk20a_fecs_trace_free_ring(g); | 711 | gk20a_fecs_trace_free_ring(g); |
715 | gk20a_fecs_trace_free_hash_table(g); | 712 | gk20a_fecs_trace_free_hash_table(g); |