aboutsummaryrefslogtreecommitdiffstats
path: root/include/nvgpu
diff options
context:
space:
mode:
authorJoshua Bakita <bakitajoshua@gmail.com>2023-10-29 13:07:40 -0400
committerJoshua Bakita <bakitajoshua@gmail.com>2023-10-29 13:10:52 -0400
commit2c5337a24f7f2d02989dfb733c55d6d8c7e90493 (patch)
treeb9f1028cb443b03190b710c0d7ee640bf5958631 /include/nvgpu
parentaa06f84f03cba7ad1aae5cd527355bb3d8c152a6 (diff)
Update includes to L4T r32.7.4 and drop nvgpu/gk20a.h dependency
Also add instructions for updating `include/`. These files are now only needed to build on Linux 4.9-based Tegra platforms.
Diffstat (limited to 'include/nvgpu')
-rw-r--r--include/nvgpu/acr/nvgpu_acr.h6
-rw-r--r--include/nvgpu/bug.h20
-rw-r--r--include/nvgpu/enabled.h9
-rw-r--r--include/nvgpu/gk20a.h8
-rw-r--r--include/nvgpu/hw/gk20a/hw_gr_gk20a.h61
-rw-r--r--include/nvgpu/hw/gm20b/hw_gr_gm20b.h14
-rw-r--r--include/nvgpu/hw/gp106/hw_gr_gp106.h4
-rw-r--r--include/nvgpu/hw/gp10b/hw_gr_gp10b.h4
-rw-r--r--include/nvgpu/hw/gv100/hw_gr_gv100.h4
-rw-r--r--include/nvgpu/hw/gv11b/hw_gr_gv11b.h4
-rw-r--r--include/nvgpu/log.h3
-rw-r--r--include/nvgpu/nvgpu_err.h359
-rw-r--r--include/nvgpu/nvlink.h2
-rw-r--r--include/nvgpu/pmu.h20
-rw-r--r--include/nvgpu/pmuif/gpmuif_pg.h14
-rw-r--r--include/nvgpu/tsg.h4
16 files changed, 523 insertions, 13 deletions
diff --git a/include/nvgpu/acr/nvgpu_acr.h b/include/nvgpu/acr/nvgpu_acr.h
index 7a0143e..cdb7bb8 100644
--- a/include/nvgpu/acr/nvgpu_acr.h
+++ b/include/nvgpu/acr/nvgpu_acr.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2016-2018, NVIDIA CORPORATION. All rights reserved. 2 * Copyright (c) 2016-2021, NVIDIA CORPORATION. All rights reserved.
3 * 3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a 4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"), 5 * copy of this software and associated documentation files (the "Software"),
@@ -39,7 +39,11 @@ struct hs_acr;
39struct nvgpu_acr; 39struct nvgpu_acr;
40 40
41#define HSBIN_ACR_BL_UCODE_IMAGE "pmu_bl.bin" 41#define HSBIN_ACR_BL_UCODE_IMAGE "pmu_bl.bin"
42#define GM20B_HSBIN_ACR_PROD_UCODE "nv_acr_ucode_prod.bin"
43#define GM20B_HSBIN_ACR_DBG_UCODE "nv_acr_ucode_dbg.bin"
42#define HSBIN_ACR_UCODE_IMAGE "acr_ucode.bin" 44#define HSBIN_ACR_UCODE_IMAGE "acr_ucode.bin"
45#define HSBIN_ACR_PROD_UCODE "acr_ucode_prod.bin"
46#define HSBIN_ACR_DBG_UCODE "acr_ucode_dbg.bin"
43#define HSBIN_ACR_AHESASC_PROD_UCODE "acr_ahesasc_prod_ucode.bin" 47#define HSBIN_ACR_AHESASC_PROD_UCODE "acr_ahesasc_prod_ucode.bin"
44#define HSBIN_ACR_ASB_PROD_UCODE "acr_asb_prod_ucode.bin" 48#define HSBIN_ACR_ASB_PROD_UCODE "acr_asb_prod_ucode.bin"
45#define HSBIN_ACR_AHESASC_DBG_UCODE "acr_ahesasc_dbg_ucode.bin" 49#define HSBIN_ACR_AHESASC_DBG_UCODE "acr_ahesasc_dbg_ucode.bin"
diff --git a/include/nvgpu/bug.h b/include/nvgpu/bug.h
index 3d139b7..82d641b 100644
--- a/include/nvgpu/bug.h
+++ b/include/nvgpu/bug.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. 2 * Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
3 * 3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a 4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"), 5 * copy of this software and associated documentation files (the "Software"),
@@ -24,6 +24,24 @@
24 24
25#ifdef __KERNEL__ 25#ifdef __KERNEL__
26#include <linux/bug.h> 26#include <linux/bug.h>
27/*
28 * Define an assert macro that code within nvgpu can use.
29 *
30 * The goal of this macro is for debugging but what that means varies from OS
31 * to OS. On Linux wee don't want to BUG() for general driver misbehaving. BUG()
32 * is a very heavy handed tool - in fact there's probably no where within the
33 * nvgpu core code where it makes sense to use a BUG() when running under Linux.
34 *
35 * However, on QNX (and POSIX) BUG() will just kill the current process. This
36 * means we can use it for handling bugs in nvgpu.
37 *
38 * As a result this macro varies depending on platform.
39 */
40#define nvgpu_assert(cond) ((void) WARN_ON(!(cond)))
41#define nvgpu_do_assert_print(g, fmt, arg...) \
42 do { \
43 nvgpu_err(g, fmt, ##arg); \
44 } while (false)
27#elif defined(__NVGPU_POSIX__) 45#elif defined(__NVGPU_POSIX__)
28#include <nvgpu/posix/bug.h> 46#include <nvgpu/posix/bug.h>
29#else 47#else
diff --git a/include/nvgpu/enabled.h b/include/nvgpu/enabled.h
index ef55dad..51e9358 100644
--- a/include/nvgpu/enabled.h
+++ b/include/nvgpu/enabled.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved. 2 * Copyright (c) 2017-2022, NVIDIA CORPORATION. All rights reserved.
3 * 3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a 4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"), 5 * copy of this software and associated documentation files (the "Software"),
@@ -85,7 +85,12 @@ struct gk20a;
85#define NVGPU_MM_USE_PHYSICAL_SG 27 85#define NVGPU_MM_USE_PHYSICAL_SG 27
86/* WAR for gm20b chips. */ 86/* WAR for gm20b chips. */
87#define NVGPU_MM_FORCE_128K_PMU_VM 28 87#define NVGPU_MM_FORCE_128K_PMU_VM 28
88 88/* SW ERRATA to disable L3 alloc Bit of the physical address.
89 * Bit number varies between SOCs.
90 * E.g. 64GB physical RAM support for gv11b requires this SW errata
91 * to be enabled.
92 */
93#define NVGPU_DISABLE_L3_SUPPORT 29
89/* 94/*
90 * Host flags 95 * Host flags
91 */ 96 */
diff --git a/include/nvgpu/gk20a.h b/include/nvgpu/gk20a.h
index aa95969..19bfaee 100644
--- a/include/nvgpu/gk20a.h
+++ b/include/nvgpu/gk20a.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2011-2020, NVIDIA CORPORATION. All rights reserved. 2 * Copyright (c) 2011-2022, NVIDIA CORPORATION. All rights reserved.
3 * 3 *
4 * GK20A Graphics 4 * GK20A Graphics
5 * 5 *
@@ -517,6 +517,7 @@ struct gpu_ops {
517 u32 *priv_addr_table, 517 u32 *priv_addr_table,
518 u32 *priv_addr_table_index); 518 u32 *priv_addr_table_index);
519 u32 (*fecs_ctxsw_mailbox_size)(void); 519 u32 (*fecs_ctxsw_mailbox_size)(void);
520 u32 (*gpc0_gpccs_ctxsw_mailbox_size)(void);
520 int (*init_sw_bundle64)(struct gk20a *g); 521 int (*init_sw_bundle64)(struct gk20a *g);
521 int (*alloc_global_ctx_buffers)(struct gk20a *g); 522 int (*alloc_global_ctx_buffers)(struct gk20a *g);
522 int (*map_global_ctx_buffers)(struct gk20a *g, 523 int (*map_global_ctx_buffers)(struct gk20a *g,
@@ -719,7 +720,7 @@ struct gpu_ops {
719 struct ch_state *ch_state); 720 struct ch_state *ch_state);
720 u32 (*intr_0_error_mask)(struct gk20a *g); 721 u32 (*intr_0_error_mask)(struct gk20a *g);
721 int (*is_preempt_pending)(struct gk20a *g, u32 id, 722 int (*is_preempt_pending)(struct gk20a *g, u32 id,
722 unsigned int id_type); 723 unsigned int id_type, bool preempt_retries_left);
723 void (*init_pbdma_intr_descs)(struct fifo_gk20a *f); 724 void (*init_pbdma_intr_descs)(struct fifo_gk20a *f);
724 int (*reset_enable_hw)(struct gk20a *g); 725 int (*reset_enable_hw)(struct gk20a *g);
725 int (*setup_userd)(struct channel_gk20a *c); 726 int (*setup_userd)(struct channel_gk20a *c);
@@ -1079,6 +1080,7 @@ struct gpu_ops {
1079 u32 (*pmu_pg_supported_engines_list)(struct gk20a *g); 1080 u32 (*pmu_pg_supported_engines_list)(struct gk20a *g);
1080 u32 (*pmu_pg_engines_feature_list)(struct gk20a *g, 1081 u32 (*pmu_pg_engines_feature_list)(struct gk20a *g,
1081 u32 pg_engine_id); 1082 u32 pg_engine_id);
1083 int (*pmu_process_pg_event)(struct gk20a *g, void *pmumsg);
1082 bool (*pmu_is_lpwr_feature_supported)(struct gk20a *g, 1084 bool (*pmu_is_lpwr_feature_supported)(struct gk20a *g,
1083 u32 feature_id); 1085 u32 feature_id);
1084 int (*pmu_lpwr_enable_pg)(struct gk20a *g, bool pstate_lock); 1086 int (*pmu_lpwr_enable_pg)(struct gk20a *g, bool pstate_lock);
@@ -1793,6 +1795,8 @@ bool gk20a_check_poweron(struct gk20a *g);
1793int gk20a_prepare_poweroff(struct gk20a *g); 1795int gk20a_prepare_poweroff(struct gk20a *g);
1794int gk20a_finalize_poweron(struct gk20a *g); 1796int gk20a_finalize_poweron(struct gk20a *g);
1795 1797
1798int nvgpu_wait_for_stall_interrupts(struct gk20a *g, u32 timeout);
1799int nvgpu_wait_for_nonstall_interrupts(struct gk20a *g, u32 timeout);
1796void nvgpu_wait_for_deferred_interrupts(struct gk20a *g); 1800void nvgpu_wait_for_deferred_interrupts(struct gk20a *g);
1797 1801
1798struct gk20a * __must_check gk20a_get(struct gk20a *g); 1802struct gk20a * __must_check gk20a_get(struct gk20a *g);
diff --git a/include/nvgpu/hw/gk20a/hw_gr_gk20a.h b/include/nvgpu/hw/gk20a/hw_gr_gk20a.h
index 826108f..376cc8f 100644
--- a/include/nvgpu/hw/gk20a/hw_gr_gk20a.h
+++ b/include/nvgpu/hw/gk20a/hw_gr_gk20a.h
@@ -1380,6 +1380,10 @@ static inline u32 gr_gpc0_gpccs_ctxsw_status_1_r(void)
1380{ 1380{
1381 return 0x00502400U; 1381 return 0x00502400U;
1382} 1382}
1383static inline u32 gr_gpc0_gpccs_ctxsw_mailbox__size_1_v(void)
1384{
1385 return 0x00000010U;
1386}
1383static inline u32 gr_fecs_ctxsw_idlestate_r(void) 1387static inline u32 gr_fecs_ctxsw_idlestate_r(void)
1384{ 1388{
1385 return 0x00409420U; 1389 return 0x00409420U;
@@ -3804,4 +3808,61 @@ static inline u32 gr_gpcs_tpcs_sm_dbgr_control0_run_trigger_task_f(void)
3804{ 3808{
3805 return 0x40000000U; 3809 return 0x40000000U;
3806} 3810}
3811
3812static inline u32 gr_gpc0_gpccs_falcon_irqstat_r(void)
3813{
3814 return 0x00502008U;
3815}
3816static inline u32 gr_gpc0_gpccs_falcon_irqmode_r(void)
3817{
3818 return 0x0050200cU;
3819}
3820static inline u32 gr_gpc0_gpccs_falcon_irqmask_r(void)
3821{
3822 return 0x00502018U;
3823}
3824static inline u32 gr_gpc0_gpccs_falcon_irqdest_r(void)
3825{
3826 return 0x0050201cU;
3827}
3828static inline u32 gr_gpc0_gpccs_falcon_debug1_r(void)
3829{
3830 return 0x00502090U;
3831}
3832static inline u32 gr_gpc0_gpccs_falcon_debuginfo_r(void)
3833{
3834 return 0x00502094U;
3835}
3836static inline u32 gr_gpc0_gpccs_falcon_engctl_r(void)
3837{
3838 return 0x005020a4U;
3839}
3840static inline u32 gr_gpc0_gpccs_falcon_curctx_r(void)
3841{
3842 return 0x00502050U;
3843}
3844static inline u32 gr_gpc0_gpccs_falcon_nxtctx_r(void)
3845{
3846 return 0x00502054U;
3847}
3848static inline u32 gr_gpc0_gpccs_ctxsw_mailbox_r(u32 i)
3849{
3850 return 0x00502800U + i*4U;
3851}
3852static inline u32 gr_gpc0_gpccs_falcon_icd_cmd_r(void)
3853{
3854 return 0x00502200U;
3855}
3856static inline u32 gr_gpc0_gpccs_falcon_icd_cmd_opc_rreg_f(void)
3857{
3858 return 0x8U;
3859}
3860static inline u32 gr_gpc0_gpccs_falcon_icd_cmd_idx_f(u32 v)
3861{
3862 return (v & 0x1fU) << 8U;
3863}
3864static inline u32 gr_gpc_gpccs_falcon_icd_rdata_r(void)
3865{
3866 return 0x0050220cU;
3867}
3807#endif 3868#endif
diff --git a/include/nvgpu/hw/gm20b/hw_gr_gm20b.h b/include/nvgpu/hw/gm20b/hw_gr_gm20b.h
index 5bbb3b9..79ad326 100644
--- a/include/nvgpu/hw/gm20b/hw_gr_gm20b.h
+++ b/include/nvgpu/hw/gm20b/hw_gr_gm20b.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2014-2018, NVIDIA CORPORATION. All rights reserved. 2 * Copyright (c) 2014-2023, NVIDIA CORPORATION. All rights reserved.
3 * 3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a 4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"), 5 * copy of this software and associated documentation files (the "Software"),
@@ -1396,6 +1396,10 @@ static inline u32 gr_gpc0_gpccs_ctxsw_status_1_r(void)
1396{ 1396{
1397 return 0x00502400U; 1397 return 0x00502400U;
1398} 1398}
1399static inline u32 gr_gpc0_gpccs_ctxsw_mailbox__size_1_v(void)
1400{
1401 return 0x00000010U;
1402}
1399static inline u32 gr_fecs_ctxsw_idlestate_r(void) 1403static inline u32 gr_fecs_ctxsw_idlestate_r(void)
1400{ 1404{
1401 return 0x00409420U; 1405 return 0x00409420U;
@@ -2344,6 +2348,14 @@ static inline u32 gr_gpcs_tpcs_tex_m_dbg2_su_rd_coalesce_en_m(void)
2344{ 2348{
2345 return 0x1U << 4U; 2349 return 0x1U << 4U;
2346} 2350}
2351static inline u32 gr_gpcs_tpcs_tex_m_dbg2_tex_rd_coalesce_en_f(u32 v)
2352{
2353 return (v & 0x1U) << 5U;
2354}
2355static inline u32 gr_gpcs_tpcs_tex_m_dbg2_tex_rd_coalesce_en_m(void)
2356{
2357 return 0x1U << 5U;
2358}
2347static inline u32 gr_gpccs_falcon_addr_r(void) 2359static inline u32 gr_gpccs_falcon_addr_r(void)
2348{ 2360{
2349 return 0x0041a0acU; 2361 return 0x0041a0acU;
diff --git a/include/nvgpu/hw/gp106/hw_gr_gp106.h b/include/nvgpu/hw/gp106/hw_gr_gp106.h
index 3ebed7e..ac82901 100644
--- a/include/nvgpu/hw/gp106/hw_gr_gp106.h
+++ b/include/nvgpu/hw/gp106/hw_gr_gp106.h
@@ -1508,6 +1508,10 @@ static inline u32 gr_gpc0_gpccs_ctxsw_status_1_r(void)
1508{ 1508{
1509 return 0x00502400U; 1509 return 0x00502400U;
1510} 1510}
1511static inline u32 gr_gpc0_gpccs_ctxsw_mailbox__size_1_v(void)
1512{
1513 return 0x00000010U;
1514}
1511static inline u32 gr_fecs_ctxsw_idlestate_r(void) 1515static inline u32 gr_fecs_ctxsw_idlestate_r(void)
1512{ 1516{
1513 return 0x00409420U; 1517 return 0x00409420U;
diff --git a/include/nvgpu/hw/gp10b/hw_gr_gp10b.h b/include/nvgpu/hw/gp10b/hw_gr_gp10b.h
index f7bc4c2..89c6bba 100644
--- a/include/nvgpu/hw/gp10b/hw_gr_gp10b.h
+++ b/include/nvgpu/hw/gp10b/hw_gr_gp10b.h
@@ -1584,6 +1584,10 @@ static inline u32 gr_gpc0_gpccs_ctxsw_status_1_r(void)
1584{ 1584{
1585 return 0x00502400U; 1585 return 0x00502400U;
1586} 1586}
1587static inline u32 gr_gpc0_gpccs_ctxsw_mailbox__size_1_v(void)
1588{
1589 return 0x00000010U;
1590}
1587static inline u32 gr_fecs_ctxsw_idlestate_r(void) 1591static inline u32 gr_fecs_ctxsw_idlestate_r(void)
1588{ 1592{
1589 return 0x00409420U; 1593 return 0x00409420U;
diff --git a/include/nvgpu/hw/gv100/hw_gr_gv100.h b/include/nvgpu/hw/gv100/hw_gr_gv100.h
index 0f83d6b..3955a63 100644
--- a/include/nvgpu/hw/gv100/hw_gr_gv100.h
+++ b/include/nvgpu/hw/gv100/hw_gr_gv100.h
@@ -1816,6 +1816,10 @@ static inline u32 gr_gpc0_gpccs_ctxsw_status_1_r(void)
1816{ 1816{
1817 return 0x00502400U; 1817 return 0x00502400U;
1818} 1818}
1819static inline u32 gr_gpc0_gpccs_ctxsw_mailbox__size_1_v(void)
1820{
1821 return 0x00000010U;
1822}
1819static inline u32 gr_fecs_ctxsw_idlestate_r(void) 1823static inline u32 gr_fecs_ctxsw_idlestate_r(void)
1820{ 1824{
1821 return 0x00409420U; 1825 return 0x00409420U;
diff --git a/include/nvgpu/hw/gv11b/hw_gr_gv11b.h b/include/nvgpu/hw/gv11b/hw_gr_gv11b.h
index f7d8089..4a3da79 100644
--- a/include/nvgpu/hw/gv11b/hw_gr_gv11b.h
+++ b/include/nvgpu/hw/gv11b/hw_gr_gv11b.h
@@ -2420,6 +2420,10 @@ static inline u32 gr_gpc0_gpccs_ctxsw_status_1_r(void)
2420{ 2420{
2421 return 0x00502400U; 2421 return 0x00502400U;
2422} 2422}
2423static inline u32 gr_gpc0_gpccs_ctxsw_mailbox__size_1_v(void)
2424{
2425 return 0x00000010U;
2426}
2423static inline u32 gr_fecs_ctxsw_idlestate_r(void) 2427static inline u32 gr_fecs_ctxsw_idlestate_r(void)
2424{ 2428{
2425 return 0x00409420U; 2429 return 0x00409420U;
diff --git a/include/nvgpu/log.h b/include/nvgpu/log.h
index 70a1676..2bcca33 100644
--- a/include/nvgpu/log.h
+++ b/include/nvgpu/log.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. 2 * Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
3 * 3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a 4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"), 5 * copy of this software and associated documentation files (the "Software"),
@@ -80,6 +80,7 @@ void __nvgpu_log_dbg(struct gk20a *g, u64 log_mask,
80#define gpu_dbg_vidmem BIT(24) /* VIDMEM tracing. */ 80#define gpu_dbg_vidmem BIT(24) /* VIDMEM tracing. */
81#define gpu_dbg_nvlink BIT(25) /* nvlink Operation tracing. */ 81#define gpu_dbg_nvlink BIT(25) /* nvlink Operation tracing. */
82#define gpu_dbg_clk_arb BIT(26) /* Clk arbiter debugging. */ 82#define gpu_dbg_clk_arb BIT(26) /* Clk arbiter debugging. */
83#define gpu_dbg_ecc BIT(27) /* Print ECC Info Logs. */
83#define gpu_dbg_mem BIT(31) /* memory accesses; very verbose. */ 84#define gpu_dbg_mem BIT(31) /* memory accesses; very verbose. */
84 85
85/** 86/**
diff --git a/include/nvgpu/nvgpu_err.h b/include/nvgpu/nvgpu_err.h
new file mode 100644
index 0000000..0595faf
--- /dev/null
+++ b/include/nvgpu/nvgpu_err.h
@@ -0,0 +1,359 @@
1/*
2 * Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22
23#ifndef NVGPU_NVGPU_ERR_H
24#define NVGPU_NVGPU_ERR_H
25
26/**
27 * @file
28 *
29 * Define indices for HW units and errors. Define structures used to carry error
30 * information. Declare prototype for APIs that are used to report GPU HW errors
31 * to the Safety_Services framework.
32 */
33
34#include <nvgpu/types.h>
35#include <nvgpu/atomic.h>
36
37struct gk20a;
38
39/**
40 * @defgroup INDICES_FOR_GPU_HW_UNITS
41 * Macros used to assign unique index to GPU HW units.
42 * @{
43 */
44#define NVGPU_ERR_MODULE_SM (0U)
45#define NVGPU_ERR_MODULE_FECS (1U)
46#define NVGPU_ERR_MODULE_PMU (2U)
47/**
48 * @}
49 */
50
51/**
52 * @defgroup LIST_OF_ERRORS_REPORTED_FROM_SM
53 * Macros used to assign unique index to errors reported from the SM unit.
54 * @{
55 */
56#define GPU_SM_L1_TAG_ECC_CORRECTED (0U)
57#define GPU_SM_L1_TAG_ECC_UNCORRECTED (1U)
58#define GPU_SM_CBU_ECC_UNCORRECTED (3U)
59#define GPU_SM_LRF_ECC_UNCORRECTED (5U)
60#define GPU_SM_L1_DATA_ECC_UNCORRECTED (7U)
61#define GPU_SM_ICACHE_L0_DATA_ECC_UNCORRECTED (9U)
62#define GPU_SM_ICACHE_L1_DATA_ECC_UNCORRECTED (11U)
63#define GPU_SM_ICACHE_L0_PREDECODE_ECC_UNCORRECTED (13U)
64#define GPU_SM_L1_TAG_MISS_FIFO_ECC_UNCORRECTED (15U)
65#define GPU_SM_L1_TAG_S2R_PIXPRF_ECC_UNCORRECTED (17U)
66#define GPU_SM_ICACHE_L1_PREDECODE_ECC_UNCORRECTED (20U)
67/**
68 * @}
69 */
70
71/**
72 * @defgroup LIST_OF_ERRORS_REPORTED_FROM_FECS
73 * Macros used to assign unique index to errors reported from the FECS unit.
74 * @{
75 */
76#define GPU_FECS_FALCON_IMEM_ECC_CORRECTED (0U)
77#define GPU_FECS_FALCON_IMEM_ECC_UNCORRECTED (1U)
78#define GPU_FECS_FALCON_DMEM_ECC_UNCORRECTED (3U)
79/**
80 * @}
81 */
82
83/**
84 * @defgroup LIST_OF_ERRORS_REPORTED_FROM_GPCCS
85 * Macros used to assign unique index to errors reported from the GPCCS unit.
86 * @{
87 */
88#define GPU_GPCCS_FALCON_IMEM_ECC_CORRECTED (0U)
89#define GPU_GPCCS_FALCON_IMEM_ECC_UNCORRECTED (1U)
90#define GPU_GPCCS_FALCON_DMEM_ECC_UNCORRECTED (3U)
91/**
92 * @}
93 */
94
95/**
96 * @defgroup LIST_OF_ERRORS_REPORTED_FROM_MMU
97 * Macros used to assign unique index to errors reported from the MMU unit.
98 * @{
99 */
100#define GPU_MMU_L1TLB_SA_DATA_ECC_UNCORRECTED (1U)
101#define GPU_MMU_L1TLB_FA_DATA_ECC_UNCORRECTED (3U)
102/**
103 * @}
104 */
105
106/**
107 * @defgroup LIST_OF_ERRORS_REPORTED_FROM_GCC
108 * Macros used to assign unique index to errors reported from the GCC unit.
109 * @{
110 */
111#define GPU_GCC_L15_ECC_UNCORRECTED (1U)
112/**
113 * @}
114 */
115
116
117/**
118 * @defgroup LIST_OF_ERRORS_REPORTED_FROM_PMU
119 * Macros used to assign unique index to errors reported from the PMU unit.
120 * @{
121 */
122#define GPU_PMU_FALCON_IMEM_ECC_CORRECTED (0U)
123#define GPU_PMU_FALCON_IMEM_ECC_UNCORRECTED (1U)
124#define GPU_PMU_FALCON_DMEM_ECC_UNCORRECTED (3U)
125/**
126 * @}
127 */
128
129/**
130 * @defgroup LIST_OF_ERRORS_REPORTED_FROM_LTC
131 * Macros used to assign unique index to errors reported from the LTC unit.
132 * @{
133 */
134#define GPU_LTC_CACHE_DSTG_ECC_CORRECTED (0U)
135#define GPU_LTC_CACHE_DSTG_ECC_UNCORRECTED (1U)
136#define GPU_LTC_CACHE_TSTG_ECC_UNCORRECTED (3U)
137#define GPU_LTC_CACHE_DSTG_BE_ECC_UNCORRECTED (7U)
138/**
139 * @}
140 */
141
142/**
143 * @defgroup LIST_OF_ERRORS_REPORTED_FROM_HUBMMU
144 * Macros used to assign unique index to errors reported from the HUBMMU unit.
145 * @{
146 */
147#define GPU_HUBMMU_L2TLB_SA_DATA_ECC_UNCORRECTED (1U)
148#define GPU_HUBMMU_TLB_SA_DATA_ECC_UNCORRECTED (3U)
149#define GPU_HUBMMU_PTE_DATA_ECC_UNCORRECTED (5U)
150#define GPU_HUBMMU_PDE0_DATA_ECC_UNCORRECTED (7U)
151#define GPU_HUBMMU_PAGE_FAULT_ERROR (8U)
152
153
154#ifdef CONFIG_NVGPU_SUPPORT_LINUX_ECC_ERROR_REPORTING
155/**
156 * @}
157 */
158
159/**
160 * nvgpu_err_desc structure holds fields which describe an error along with
161 * function callback which can be used to inject the error.
162 */
163struct nvgpu_err_desc {
164 /** String representation of error. */
165 const char *name;
166
167 /** Flag to classify an error as critical or non-critical. */
168 bool is_critical;
169
170 /**
171 * Error Threshold: once this threshold value is reached, then the
172 * corresponding error counter will be reset to 0 and the error will be
173 * propagated to Safety_Services.
174 */
175 int err_threshold;
176
177 /**
178 * Total number of times an error has occurred (since its last reset).
179 */
180 nvgpu_atomic_t err_count;
181
182 /** Error ID. */
183 u8 error_id;
184};
185
186/**
187 * gpu_err_header structure holds fields which are required to identify the
188 * version of header, sub-error type, sub-unit id, error address and time stamp.
189 */
190struct gpu_err_header {
191 /** Version of GPU error header. */
192 struct {
193 /** Major version number. */
194 u16 major;
195 /** Minor version number. */
196 u16 minor;
197 } version;
198
199 /** Sub error type corresponding to the error that is being reported. */
200 u32 sub_err_type;
201
202 /** ID of the sub-unit in a HW unit which encountered an error. */
203 u64 sub_unit_id;
204
205 /** Location of the error. */
206 u64 address;
207
208 /** Timestamp in nano seconds. */
209 u64 timestamp_ns;
210};
211
212struct gpu_ecc_error_info {
213 struct gpu_err_header header;
214
215 /** Number of ECC errors. */
216 u64 err_cnt;
217};
218
219/**
220 * nvgpu_err_hw_module structure holds fields which describe the h/w modules
221 * error reporting capabilities.
222 */
223struct nvgpu_err_hw_module {
224 /** String representation of a given HW unit. */
225 const char *name;
226
227 /** HW unit ID. */
228 u32 hw_unit;
229
230 /** Total number of errors reported from a given HW unit. */
231 u32 num_errs;
232
233 u32 base_ecc_service_id;
234
235 /** Used to get error description from look-up table. */
236 struct nvgpu_err_desc *errs;
237};
238
239struct nvgpu_ecc_reporting_ops {
240 void (*report_ecc_err)(struct gk20a *g, u32 hw_unit, u32 inst,
241 u32 err_id, u64 err_addr, u64 err_count);
242};
243
244struct nvgpu_ecc_reporting {
245 struct nvgpu_spinlock lock;
246 /* This flag is protected by the above spinlock */
247 bool ecc_reporting_service_enabled;
248 const struct nvgpu_ecc_reporting_ops *ops;
249};
250
251 /**
252 * This macro is used to initialize the members of nvgpu_err_desc struct.
253 */
254#define GPU_ERR(err, critical, id, threshold, ecount) \
255{ \
256 .name = (err), \
257 .is_critical = (critical), \
258 .error_id = (id), \
259 .err_threshold = (threshold), \
260 .err_count = NVGPU_ATOMIC_INIT(ecount), \
261}
262
263/**
264 * This macro is used to initialize critical errors.
265 */
266#define GPU_CRITERR(err, id, threshold, ecount) \
267 GPU_ERR(err, true, id, threshold, ecount)
268
269/**
270 * This macro is used to initialize non-critical errors.
271 */
272#define GPU_NONCRITERR(err, id, threshold, ecount) \
273 GPU_ERR(err, false, id, threshold, ecount)
274
275/**
276 * @brief GPU HW errors need to be reported to Safety_Services via SDL unit.
277 * This function provides an interface to report ECC erros to SDL unit.
278 *
279 * @param g [in] - The GPU driver struct.
280 * @param hw_unit [in] - Index of HW unit.
281 * - List of valid HW unit IDs
282 * - NVGPU_ERR_MODULE_SM
283 * - NVGPU_ERR_MODULE_FECS
284 * - NVGPU_ERR_MODULE_GPCCS
285 * - NVGPU_ERR_MODULE_MMU
286 * - NVGPU_ERR_MODULE_GCC
287 * - NVGPU_ERR_MODULE_PMU
288 * - NVGPU_ERR_MODULE_LTC
289 * - NVGPU_ERR_MODULE_HUBMMU
290 * @param inst [in] - Instance ID.
291 * - In case of multiple instances of the same HW
292 * unit (e.g., there are multiple instances of
293 * SM), it is used to identify the instance
294 * that encountered a fault.
295 * @param err_id [in] - Error index.
296 * - For SM:
297 * - Min: GPU_SM_L1_TAG_ECC_CORRECTED
298 * - Max: GPU_SM_ICACHE_L1_PREDECODE_ECC_UNCORRECTED
299 * - For FECS:
300 * - Min: GPU_FECS_FALCON_IMEM_ECC_CORRECTED
301 * - Max: GPU_FECS_INVALID_ERROR
302 * - For GPCCS:
303 * - Min: GPU_GPCCS_FALCON_IMEM_ECC_CORRECTED
304 * - Max: GPU_GPCCS_FALCON_DMEM_ECC_UNCORRECTED
305 * - For MMU:
306 * - Min: GPU_MMU_L1TLB_SA_DATA_ECC_UNCORRECTED
307 * - Max: GPU_MMU_L1TLB_FA_DATA_ECC_UNCORRECTED
308 * - For GCC:
309 * - Min: GPU_GCC_L15_ECC_UNCORRECTED
310 * - Max: GPU_GCC_L15_ECC_UNCORRECTED
311 * - For PMU:
312 * - Min: GPU_PMU_FALCON_IMEM_ECC_CORRECTED
313 * - Max: GPU_PMU_FALCON_DMEM_ECC_UNCORRECTED
314 * - For LTC:
315 * - Min: GPU_LTC_CACHE_DSTG_ECC_CORRECTED
316 * - Max: GPU_LTC_CACHE_DSTG_BE_ECC_UNCORRECTED
317 * - For HUBMMU:
318 * - Min: GPU_HUBMMU_L2TLB_SA_DATA_ECC_UNCORRECTED
319 * - Max: GPU_HUBMMU_PDE0_DATA_ECC_UNCORRECTED
320 * @param err_addr [in] - Error address.
321 * - This is the location at which correctable or
322 * uncorrectable error has occurred.
323 * @param err_count [in] - Error count.
324 *
325 * - Checks whether SDL is supported in the current GPU platform. If SDL is not
326 * supported, it simply returns.
327 * - Validates both \a hw_unit and \a err_id indices. In case of a failure,
328 * invokes #nvgpu_sdl_handle_report_failure() api.
329 * - Gets the current time of a clock. In case of a failure, invokes
330 * #nvgpu_sdl_handle_report_failure() api.
331 * - Gets error description from internal look-up table using \a hw_unit and
332 * \a err_id indices.
333 * - Forms error packet using details such as time-stamp, \a hw_unit, \a err_id,
334 * criticality of the error, \a inst, \a err_addr, \a err_count, error
335 * description, and size of the error packet.
336 * - Performs compile-time assert check to ensure that the size of the error
337 * packet does not exceed the maximum allowable size specified in
338 * #MAX_ERR_MSG_SIZE.
339 *
340 * @return None
341 */
342void nvgpu_report_ecc_err(struct gk20a *g, u32 hw_unit, u32 inst,
343 u32 err_id, u64 err_addr, u64 err_count);
344
345void nvgpu_init_ecc_reporting(struct gk20a *g);
346void nvgpu_enable_ecc_reporting(struct gk20a *g);
347void nvgpu_disable_ecc_reporting(struct gk20a *g);
348void nvgpu_deinit_ecc_reporting(struct gk20a *g);
349
350#else
351
352static inline void nvgpu_report_ecc_err(struct gk20a *g, u32 hw_unit, u32 inst,
353 u32 err_id, u64 err_addr, u64 err_count) {
354
355}
356
357#endif /* CONFIG_NVGPU_SUPPORT_LINUX_ECC_ERROR_REPORTING */
358
359#endif /* NVGPU_NVGPU_ERR_H */ \ No newline at end of file
diff --git a/include/nvgpu/nvlink.h b/include/nvgpu/nvlink.h
index 26c83f1..a74111c 100644
--- a/include/nvgpu/nvlink.h
+++ b/include/nvgpu/nvlink.h
@@ -26,7 +26,7 @@
26#include <nvgpu/types.h> 26#include <nvgpu/types.h>
27 27
28#ifdef __KERNEL__ 28#ifdef __KERNEL__
29//#include <nvgpu/linux/nvlink.h> 29#include <nvgpu/linux/nvlink.h>
30#elif defined(__NVGPU_POSIX__) 30#elif defined(__NVGPU_POSIX__)
31#include <nvgpu/posix/nvlink.h> 31#include <nvgpu/posix/nvlink.h>
32#else 32#else
diff --git a/include/nvgpu/pmu.h b/include/nvgpu/pmu.h
index 2b745c7..fb1b016 100644
--- a/include/nvgpu/pmu.h
+++ b/include/nvgpu/pmu.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2017-2019, NVIDIA CORPORATION. All rights reserved. 2 * Copyright (c) 2017-2022, NVIDIA CORPORATION. All rights reserved.
3 * 3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a 4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"), 5 * copy of this software and associated documentation files (the "Software"),
@@ -94,6 +94,23 @@
94#define PMU_STATE_STARTED 7U /* Fully unitialized */ 94#define PMU_STATE_STARTED 7U /* Fully unitialized */
95#define PMU_STATE_EXIT 8U /* Exit PMU state machine */ 95#define PMU_STATE_EXIT 8U /* Exit PMU state machine */
96 96
97/* state transition :
98 * OFF => [OFF_ON_PENDING optional] => ON_PENDING => ON => OFF
99 * ON => OFF is always synchronized
100 */
101/* elpg is off */
102#define PMU_ELPG_STAT_OFF 0U
103/* elpg is on */
104#define PMU_ELPG_STAT_ON 1U
105/* elpg is off, ALLOW cmd has been sent, wait for ack */
106#define PMU_ELPG_STAT_ON_PENDING 2U
107/* elpg is on, DISALLOW cmd has been sent, wait for ack */
108#define PMU_ELPG_STAT_OFF_PENDING 3U
109/* elpg is off, caller has requested on, but ALLOW
110 * cmd hasn't been sent due to ENABLE_ALLOW delay
111 */
112#define PMU_ELPG_STAT_OFF_ON_PENDING 4U
113
97#define GK20A_PMU_UCODE_NB_MAX_OVERLAY 32U 114#define GK20A_PMU_UCODE_NB_MAX_OVERLAY 32U
98#define GK20A_PMU_UCODE_NB_MAX_DATE_LENGTH 64U 115#define GK20A_PMU_UCODE_NB_MAX_DATE_LENGTH 64U
99 116
@@ -351,6 +368,7 @@ struct nvgpu_pmu {
351 u32 stat_dmem_offset[PMU_PG_ELPG_ENGINE_ID_INVALID_ENGINE]; 368 u32 stat_dmem_offset[PMU_PG_ELPG_ENGINE_ID_INVALID_ENGINE];
352 369
353 u32 elpg_stat; 370 u32 elpg_stat;
371 u32 disallow_state;
354 372
355 u32 mscg_stat; 373 u32 mscg_stat;
356 u32 mscg_transition_state; 374 u32 mscg_transition_state;
diff --git a/include/nvgpu/pmuif/gpmuif_pg.h b/include/nvgpu/pmuif/gpmuif_pg.h
index 69a7ea4..58311ae 100644
--- a/include/nvgpu/pmuif/gpmuif_pg.h
+++ b/include/nvgpu/pmuif/gpmuif_pg.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. 2 * Copyright (c) 2017-2022, NVIDIA CORPORATION. All rights reserved.
3 * 3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a 4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"), 5 * copy of this software and associated documentation files (the "Software"),
@@ -33,6 +33,11 @@
33#define PMU_PG_ELPG_ENGINE_ID_INVALID_ENGINE (0x00000005U) 33#define PMU_PG_ELPG_ENGINE_ID_INVALID_ENGINE (0x00000005U)
34#define PMU_PG_ELPG_ENGINE_MAX PMU_PG_ELPG_ENGINE_ID_INVALID_ENGINE 34#define PMU_PG_ELPG_ENGINE_MAX PMU_PG_ELPG_ENGINE_ID_INVALID_ENGINE
35 35
36/* Async PG message IDs */
37enum {
38 PMU_PG_MSG_ASYNC_CMD_DISALLOW,
39};
40
36/* PG message */ 41/* PG message */
37enum { 42enum {
38 PMU_PG_ELPG_MSG_INIT_ACK, 43 PMU_PG_ELPG_MSG_INIT_ACK,
@@ -73,12 +78,19 @@ struct pmu_pg_msg_eng_buf_stat {
73 u8 status; 78 u8 status;
74}; 79};
75 80
81struct pmu_pg_msg_async_cmd_resp {
82 u8 msg_type;
83 u8 ctrl_id;
84 u8 msg_id;
85};
86
76struct pmu_pg_msg { 87struct pmu_pg_msg {
77 union { 88 union {
78 u8 msg_type; 89 u8 msg_type;
79 struct pmu_pg_msg_elpg_msg elpg_msg; 90 struct pmu_pg_msg_elpg_msg elpg_msg;
80 struct pmu_pg_msg_stat stat; 91 struct pmu_pg_msg_stat stat;
81 struct pmu_pg_msg_eng_buf_stat eng_buf_stat; 92 struct pmu_pg_msg_eng_buf_stat eng_buf_stat;
93 struct pmu_pg_msg_async_cmd_resp async_cmd_resp;
82 /* TBD: other pg messages */ 94 /* TBD: other pg messages */
83 union pmu_ap_msg ap_msg; 95 union pmu_ap_msg ap_msg;
84 struct nv_pmu_rppg_msg rppg_msg; 96 struct nv_pmu_rppg_msg rppg_msg;
diff --git a/include/nvgpu/tsg.h b/include/nvgpu/tsg.h
index 7cd97c9..f5391e7 100644
--- a/include/nvgpu/tsg.h
+++ b/include/nvgpu/tsg.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2014-2020, NVIDIA CORPORATION. All rights reserved. 2 * Copyright (c) 2014-2021, NVIDIA CORPORATION. All rights reserved.
3 * 3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a 4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"), 5 * copy of this software and associated documentation files (the "Software"),
@@ -90,7 +90,7 @@ int gk20a_enable_tsg(struct tsg_gk20a *tsg);
90int gk20a_disable_tsg(struct tsg_gk20a *tsg); 90int gk20a_disable_tsg(struct tsg_gk20a *tsg);
91int gk20a_tsg_bind_channel(struct tsg_gk20a *tsg, 91int gk20a_tsg_bind_channel(struct tsg_gk20a *tsg,
92 struct channel_gk20a *ch); 92 struct channel_gk20a *ch);
93int gk20a_tsg_unbind_channel(struct channel_gk20a *ch); 93int gk20a_tsg_unbind_channel(struct channel_gk20a *ch, bool force);
94 94
95void gk20a_tsg_event_id_post_event(struct tsg_gk20a *tsg, 95void gk20a_tsg_event_id_post_event(struct tsg_gk20a *tsg,
96 int event_id); 96 int event_id);