summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/os/linux/module.c
diff options
context:
space:
mode:
authorDebarshi Dutta <ddutta@nvidia.com>2021-05-17 04:38:25 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2021-05-28 15:10:24 -0400
commit34993e4f7b0d47620e88ba64a6d7c67330d97e35 (patch)
tree2136284f5bd4095780884885413bb268fd318a96 /drivers/gpu/nvgpu/os/linux/module.c
parent5f88598b9e7b2cfe0387733577ece138a7bc912b (diff)
gpu: nvgpu: Add ECC Support for GV11B in Linux
Implement nvgpu plumbing to allow reporting ECC errors(corrected and uncorrected) to a L1SS service(if one exists). This patch includes the following 1) Added code that submits ECC error reports via the Interrupt context directly to a L1SS service in linux OS. 2) Added support for enabling/disabling the error reports via L1SS's registration/deregistration API. Nvgpu simply invokes an empty function until the registration is successful. 3) Added Spinlock to correctly handle concurrency for accessing the correct Ops for submitting requests. 4) Adds error reporting for a subset of interrupts that can be verified via external ECC injection logic. A subsequent patch will add the API for rest of the interrupts. 5) In case of critical(uncorrected errors), change nvgpu's state to quiesce state. Jira L4T-1187 Bug 200700400 Change-Id: Id31f70531fba355e94e72c4f9762593e7667a11c Signed-off-by: Debarshi Dutta <ddutta@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2530411 Tested-by: Bibek Basu <bbasu@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com> Reviewed-by: Bibek Basu <bbasu@nvidia.com> Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> GVS: Gerrit_Virtual_Submit
Diffstat (limited to 'drivers/gpu/nvgpu/os/linux/module.c')
-rw-r--r--drivers/gpu/nvgpu/os/linux/module.c22
1 files changed, 20 insertions, 2 deletions
diff --git a/drivers/gpu/nvgpu/os/linux/module.c b/drivers/gpu/nvgpu/os/linux/module.c
index 807df2ca..fdbab46d 100644
--- a/drivers/gpu/nvgpu/os/linux/module.c
+++ b/drivers/gpu/nvgpu/os/linux/module.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * GK20A Graphics 2 * GK20A Graphics
3 * 3 *
4 * Copyright (c) 2011-2020, NVIDIA CORPORATION. All rights reserved. 4 * Copyright (c) 2011-2021, NVIDIA CORPORATION. All rights reserved.
5 * 5 *
6 * This program is free software; you can redistribute it and/or modify it 6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License, 7 * under the terms and conditions of the GNU General Public License,
@@ -49,6 +49,7 @@
49#include <nvgpu/clk_arb.h> 49#include <nvgpu/clk_arb.h>
50#include <nvgpu/timers.h> 50#include <nvgpu/timers.h>
51#include <nvgpu/channel.h> 51#include <nvgpu/channel.h>
52#include <nvgpu/nvgpu_err.h>
52 53
53#include "platform_gk20a.h" 54#include "platform_gk20a.h"
54#include "sysfs.h" 55#include "sysfs.h"
@@ -355,6 +356,10 @@ int gk20a_pm_finalize_poweron(struct device *dev)
355 gk20a_init_cde_support(l); 356 gk20a_init_cde_support(l);
356#endif 357#endif
357 358
359#ifdef CONFIG_NVGPU_SUPPORT_LINUX_ECC_ERROR_REPORTING
360 nvgpu_enable_ecc_reporting(g);
361#endif
362
358 err = gk20a_sched_ctrl_init(g); 363 err = gk20a_sched_ctrl_init(g);
359 if (err) { 364 if (err) {
360 nvgpu_err(g, "failed to init sched control"); 365 nvgpu_err(g, "failed to init sched control");
@@ -364,9 +369,14 @@ int gk20a_pm_finalize_poweron(struct device *dev)
364 g->sw_ready = true; 369 g->sw_ready = true;
365 370
366done: 371done:
367 if (err) 372 if (err) {
368 g->power_on = false; 373 g->power_on = false;
369 374
375#ifdef CONFIG_NVGPU_SUPPORT_LINUX_ECC_ERROR_REPORTING
376 nvgpu_disable_ecc_reporting(g);
377#endif
378 }
379
370 nvgpu_mutex_release(&g->power_lock); 380 nvgpu_mutex_release(&g->power_lock);
371 return err; 381 return err;
372} 382}
@@ -433,6 +443,10 @@ static int gk20a_pm_prepare_poweroff(struct device *dev)
433 /* Stop CPU from accessing the GPU registers. */ 443 /* Stop CPU from accessing the GPU registers. */
434 gk20a_lockout_registers(g); 444 gk20a_lockout_registers(g);
435 445
446#ifdef CONFIG_NVGPU_SUPPORT_LINUX_ECC_ERROR_REPORTING
447 nvgpu_disable_ecc_reporting(g);
448#endif
449
436 nvgpu_hide_usermode_for_poweroff(g); 450 nvgpu_hide_usermode_for_poweroff(g);
437 nvgpu_mutex_release(&g->power_lock); 451 nvgpu_mutex_release(&g->power_lock);
438 return 0; 452 return 0;
@@ -1382,6 +1396,10 @@ static int gk20a_probe(struct platform_device *dev)
1382 goto return_err; 1396 goto return_err;
1383 } 1397 }
1384 1398
1399#ifdef CONFIG_NVGPU_SUPPORT_LINUX_ECC_ERROR_REPORTING
1400 nvgpu_init_ecc_reporting(gk20a);
1401#endif
1402
1385 gk20a->nvgpu_reboot_nb.notifier_call = 1403 gk20a->nvgpu_reboot_nb.notifier_call =
1386 nvgpu_kernel_shutdown_notification; 1404 nvgpu_kernel_shutdown_notification;
1387 err = register_reboot_notifier(&gk20a->nvgpu_reboot_nb); 1405 err = register_reboot_notifier(&gk20a->nvgpu_reboot_nb);