diff options
author | David Nieto <dmartineznie@nvidia.com> | 2017-09-18 23:31:28 -0400 |
---|---|---|
committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2017-09-22 18:44:25 -0400 |
commit | 7134e9e852116f86745cd23312bbfba34100bf6d (patch) | |
tree | 763a9db89290450a37ad19d7f57acfa66ce33526 /drivers/gpu/nvgpu/common/linux | |
parent | f6fcecfc6f437a6d24aa113f75e43cb6dbbd5e0f (diff) |
gpu: nvgpu: prevent crash during unbind
This change solves crashes during bind that were introduced in the driver
during the OS unification refactoring due to lack of coverage of the remove()
function.
The fixes during remove are:
(1) Prevent NULL dereference on GPUs with secure boot
(2) Prevent NULL dereferences when fecs_trace is not enabled
(3) Added PRAMIN blocker during driver removal if HW is no longer accesible
(4) Prevent double free of debugfs nodes as they are handled on the
debugfs_remove_recursive() call
(5) quiesce() can now be called without checking is HW accesible flag is set
(6) added function to free irq so no IRQ association is left on the driver after
it is removed
(7) prevent NULL dereference on nvgpu_thread_stop() if the thread is already
stopped
JIRA: EVLR-1739
Change-Id: I787d38f202d5267a6b34815f23e1bc88110e8455
Signed-off-by: David Nieto <dmartineznie@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1563005
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/common/linux')
-rw-r--r-- | drivers/gpu/nvgpu/common/linux/debug.c | 2 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/common/linux/debug_allocator.c | 2 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/common/linux/debug_clk.c | 1 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/common/linux/debug_pmu.c | 1 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/common/linux/module.c | 55 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/common/linux/module.h | 1 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/common/linux/pci.c | 13 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/common/linux/thread.c | 6 |
8 files changed, 49 insertions, 32 deletions
diff --git a/drivers/gpu/nvgpu/common/linux/debug.c b/drivers/gpu/nvgpu/common/linux/debug.c index 5750800f..08d0e679 100644 --- a/drivers/gpu/nvgpu/common/linux/debug.c +++ b/drivers/gpu/nvgpu/common/linux/debug.c | |||
@@ -409,5 +409,5 @@ void gk20a_debug_deinit(struct gk20a *g) | |||
409 | gk20a_fifo_debugfs_deinit(g); | 409 | gk20a_fifo_debugfs_deinit(g); |
410 | 410 | ||
411 | debugfs_remove_recursive(l->debugfs); | 411 | debugfs_remove_recursive(l->debugfs); |
412 | debugfs_remove_recursive(l->debugfs_alias); | 412 | debugfs_remove(l->debugfs_alias); |
413 | } | 413 | } |
diff --git a/drivers/gpu/nvgpu/common/linux/debug_allocator.c b/drivers/gpu/nvgpu/common/linux/debug_allocator.c index 91ae0512..d63a9030 100644 --- a/drivers/gpu/nvgpu/common/linux/debug_allocator.c +++ b/drivers/gpu/nvgpu/common/linux/debug_allocator.c | |||
@@ -55,8 +55,6 @@ void nvgpu_init_alloc_debug(struct gk20a *g, struct nvgpu_allocator *a) | |||
55 | 55 | ||
56 | void nvgpu_fini_alloc_debug(struct nvgpu_allocator *a) | 56 | void nvgpu_fini_alloc_debug(struct nvgpu_allocator *a) |
57 | { | 57 | { |
58 | if (!IS_ERR_OR_NULL(a->debugfs_entry)) | ||
59 | debugfs_remove(a->debugfs_entry); | ||
60 | } | 58 | } |
61 | 59 | ||
62 | void nvgpu_alloc_debugfs_init(struct gk20a *g) | 60 | void nvgpu_alloc_debugfs_init(struct gk20a *g) |
diff --git a/drivers/gpu/nvgpu/common/linux/debug_clk.c b/drivers/gpu/nvgpu/common/linux/debug_clk.c index b265ca69..81839de7 100644 --- a/drivers/gpu/nvgpu/common/linux/debug_clk.c +++ b/drivers/gpu/nvgpu/common/linux/debug_clk.c | |||
@@ -267,6 +267,5 @@ int gm20b_clk_init_debugfs(struct gk20a *g) | |||
267 | 267 | ||
268 | err_out: | 268 | err_out: |
269 | pr_err("%s: Failed to make debugfs node\n", __func__); | 269 | pr_err("%s: Failed to make debugfs node\n", __func__); |
270 | debugfs_remove_recursive(l->debugfs); | ||
271 | return -ENOMEM; | 270 | return -ENOMEM; |
272 | } | 271 | } |
diff --git a/drivers/gpu/nvgpu/common/linux/debug_pmu.c b/drivers/gpu/nvgpu/common/linux/debug_pmu.c index 191fcb0e..ec997e28 100644 --- a/drivers/gpu/nvgpu/common/linux/debug_pmu.c +++ b/drivers/gpu/nvgpu/common/linux/debug_pmu.c | |||
@@ -477,6 +477,5 @@ int gk20a_pmu_debugfs_init(struct gk20a *g) | |||
477 | return 0; | 477 | return 0; |
478 | err_out: | 478 | err_out: |
479 | pr_err("%s: Failed to make debugfs node\n", __func__); | 479 | pr_err("%s: Failed to make debugfs node\n", __func__); |
480 | debugfs_remove_recursive(l->debugfs); | ||
481 | return -ENOMEM; | 480 | return -ENOMEM; |
482 | } | 481 | } |
diff --git a/drivers/gpu/nvgpu/common/linux/module.c b/drivers/gpu/nvgpu/common/linux/module.c index 46b89ad0..c474f36a 100644 --- a/drivers/gpu/nvgpu/common/linux/module.c +++ b/drivers/gpu/nvgpu/common/linux/module.c | |||
@@ -226,9 +226,12 @@ static int gk20a_pm_prepare_poweroff(struct device *dev) | |||
226 | * After this point, gk20a interrupts should not get | 226 | * After this point, gk20a interrupts should not get |
227 | * serviced. | 227 | * serviced. |
228 | */ | 228 | */ |
229 | disable_irq(g->irq_stall); | 229 | if (g->irqs_enabled) { |
230 | if (g->irq_stall != g->irq_nonstall) | 230 | disable_irq(g->irq_stall); |
231 | disable_irq(g->irq_nonstall); | 231 | if (g->irq_stall != g->irq_nonstall) |
232 | disable_irq(g->irq_nonstall); | ||
233 | g->irqs_enabled = 0; | ||
234 | } | ||
232 | 235 | ||
233 | /* Decrement platform power refcount */ | 236 | /* Decrement platform power refcount */ |
234 | if (platform->idle) | 237 | if (platform->idle) |
@@ -641,6 +644,18 @@ static int gk20a_pm_unrailgate(struct device *dev) | |||
641 | } | 644 | } |
642 | 645 | ||
643 | /* | 646 | /* |
647 | * Remove association of the driver with OS interrupt handler | ||
648 | */ | ||
649 | void nvgpu_free_irq(struct gk20a *g) | ||
650 | { | ||
651 | struct device *dev = dev_from_gk20a(g); | ||
652 | |||
653 | devm_free_irq(dev, g->irq_stall, g); | ||
654 | if (g->irq_stall != g->irq_nonstall) | ||
655 | devm_free_irq(dev, g->irq_nonstall, g); | ||
656 | } | ||
657 | |||
658 | /* | ||
644 | * Idle the GPU in preparation of shutdown/remove. | 659 | * Idle the GPU in preparation of shutdown/remove. |
645 | * gk20a_driver_start_unload() does not idle the GPU, but instead changes the SW | 660 | * gk20a_driver_start_unload() does not idle the GPU, but instead changes the SW |
646 | * state to prevent further activity on the driver SW side. | 661 | * state to prevent further activity on the driver SW side. |
@@ -651,24 +666,27 @@ int nvgpu_quiesce(struct gk20a *g) | |||
651 | int err; | 666 | int err; |
652 | struct device *dev = dev_from_gk20a(g); | 667 | struct device *dev = dev_from_gk20a(g); |
653 | 668 | ||
654 | err = gk20a_wait_for_idle(g); | 669 | if (g->power_on) { |
655 | if (err) { | 670 | err = gk20a_wait_for_idle(g); |
656 | nvgpu_err(g, "failed to idle GPU, err=%d", err); | 671 | if (err) { |
657 | return err; | 672 | nvgpu_err(g, "failed to idle GPU, err=%d", err); |
658 | } | 673 | return err; |
674 | } | ||
659 | 675 | ||
660 | err = gk20a_fifo_disable_all_engine_activity(g, true); | 676 | err = gk20a_fifo_disable_all_engine_activity(g, true); |
661 | if (err) { | 677 | if (err) { |
662 | nvgpu_err(g, "failed to disable engine activity, err=%d", | 678 | nvgpu_err(g, |
663 | err); | 679 | "failed to disable engine activity, err=%d", |
680 | err); | ||
664 | return err; | 681 | return err; |
665 | } | 682 | } |
666 | 683 | ||
667 | err = gk20a_fifo_wait_engine_idle(g); | 684 | err = gk20a_fifo_wait_engine_idle(g); |
668 | if (err) { | 685 | if (err) { |
669 | nvgpu_err(g, "failed to idle engines, err=%d", | 686 | nvgpu_err(g, "failed to idle engines, err=%d", |
670 | err); | 687 | err); |
671 | return err; | 688 | return err; |
689 | } | ||
672 | } | 690 | } |
673 | 691 | ||
674 | if (gk20a_gpu_is_virtual(dev)) | 692 | if (gk20a_gpu_is_virtual(dev)) |
@@ -679,6 +697,7 @@ int nvgpu_quiesce(struct gk20a *g) | |||
679 | if (err) | 697 | if (err) |
680 | nvgpu_err(g, "failed to prepare for poweroff, err=%d", | 698 | nvgpu_err(g, "failed to prepare for poweroff, err=%d", |
681 | err); | 699 | err); |
700 | |||
682 | return err; | 701 | return err; |
683 | } | 702 | } |
684 | 703 | ||
diff --git a/drivers/gpu/nvgpu/common/linux/module.h b/drivers/gpu/nvgpu/common/linux/module.h index def98288..55a3b692 100644 --- a/drivers/gpu/nvgpu/common/linux/module.h +++ b/drivers/gpu/nvgpu/common/linux/module.h | |||
@@ -21,6 +21,7 @@ void gk20a_remove_support(struct gk20a *g); | |||
21 | void gk20a_driver_start_unload(struct gk20a *g); | 21 | void gk20a_driver_start_unload(struct gk20a *g); |
22 | int nvgpu_quiesce(struct gk20a *g); | 22 | int nvgpu_quiesce(struct gk20a *g); |
23 | int nvgpu_remove(struct device *dev, struct class *class); | 23 | int nvgpu_remove(struct device *dev, struct class *class); |
24 | void nvgpu_free_irq(struct gk20a *g); | ||
24 | 25 | ||
25 | extern struct class nvgpu_class; | 26 | extern struct class nvgpu_class; |
26 | 27 | ||
diff --git a/drivers/gpu/nvgpu/common/linux/pci.c b/drivers/gpu/nvgpu/common/linux/pci.c index f1d12367..1a7d1842 100644 --- a/drivers/gpu/nvgpu/common/linux/pci.c +++ b/drivers/gpu/nvgpu/common/linux/pci.c | |||
@@ -521,13 +521,12 @@ static void nvgpu_pci_remove(struct pci_dev *pdev) | |||
521 | if (gk20a_gpu_is_virtual(dev)) | 521 | if (gk20a_gpu_is_virtual(dev)) |
522 | return; | 522 | return; |
523 | 523 | ||
524 | /* only idle the GPU if the GPU is powered on */ | 524 | gk20a_driver_start_unload(g); |
525 | if (g->power_on) { | 525 | err = nvgpu_quiesce(g); |
526 | gk20a_driver_start_unload(g); | 526 | /* TODO: handle failure to idle */ |
527 | err = nvgpu_quiesce(g); | 527 | WARN(err, "gpu failed to idle during driver removal"); |
528 | /* TODO: handle failure to idle */ | 528 | |
529 | WARN(err, "gpu failed to idle during driver removal"); | 529 | nvgpu_free_irq(g); |
530 | } | ||
531 | 530 | ||
532 | nvgpu_remove(dev, &nvgpu_pci_class); | 531 | nvgpu_remove(dev, &nvgpu_pci_class); |
533 | 532 | ||
diff --git a/drivers/gpu/nvgpu/common/linux/thread.c b/drivers/gpu/nvgpu/common/linux/thread.c index fe3906eb..92c556f2 100644 --- a/drivers/gpu/nvgpu/common/linux/thread.c +++ b/drivers/gpu/nvgpu/common/linux/thread.c | |||
@@ -46,8 +46,10 @@ int nvgpu_thread_create(struct nvgpu_thread *thread, | |||
46 | 46 | ||
47 | void nvgpu_thread_stop(struct nvgpu_thread *thread) | 47 | void nvgpu_thread_stop(struct nvgpu_thread *thread) |
48 | { | 48 | { |
49 | kthread_stop(thread->task); | 49 | if (thread->task) { |
50 | thread->task = NULL; | 50 | kthread_stop(thread->task); |
51 | thread->task = NULL; | ||
52 | } | ||
51 | }; | 53 | }; |
52 | 54 | ||
53 | bool nvgpu_thread_should_stop(struct nvgpu_thread *thread) | 55 | bool nvgpu_thread_should_stop(struct nvgpu_thread *thread) |