From 8f2cb85983c7cd0c913e6a06df16f929d58a10a9 Mon Sep 17 00:00:00 2001 From: Nitin Kumbhar Date: Thu, 31 May 2018 19:13:43 +0530 Subject: gpu: nvgpu: update ecc sysfs node handling Make ecc sysfs hash table per GPU by adding it as part of nvgpu_os_linux. Using a single hash table might give incorrect results as GPUs have same filenames and a filename is used as a key for a lookup. Add device_attribute as part of struct gk20a_ecc_stat. Using a single array of pointers of device attribute for an ecc_stat results in memory leak and incorrect stats if multiple GPUs are present on the system. This array of pointers will always hold info for GPU which created sysfs nodes last. Fix this by making device attribute array per ecc stat per GPU. Fix ecc stat removal to consider zero sub-units for a given number of hwunits. The multiplication with zero results in not removing any sysfs node at all. Bug 1987855 Change-Id: Ifcacc5623cede8decfe228c02d72786337cd0876 Signed-off-by: Nitin Kumbhar Reviewed-on: https://git-master.nvidia.com/r/1735989 Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/common/linux/module.c | 2 + drivers/gpu/nvgpu/common/linux/os_linux.h | 3 + drivers/gpu/nvgpu/common/linux/pci.c | 2 + .../gpu/nvgpu/common/linux/platform_ecc_sysfs.c | 90 +++--- .../gpu/nvgpu/common/linux/platform_ecc_sysfs.h | 36 +-- .../gpu/nvgpu/common/linux/platform_gp10b_tegra.c | 192 ++++++------- .../gpu/nvgpu/common/linux/platform_gv11b_tegra.c | 310 ++++++++------------- drivers/gpu/nvgpu/gk20a/ecc_gk20a.h | 1 + 8 files changed, 277 insertions(+), 359 deletions(-) (limited to 'drivers/gpu') diff --git a/drivers/gpu/nvgpu/common/linux/module.c b/drivers/gpu/nvgpu/common/linux/module.c index a7289b66..af71cc81 100644 --- a/drivers/gpu/nvgpu/common/linux/module.c +++ b/drivers/gpu/nvgpu/common/linux/module.c @@ -1146,6 +1146,8 @@ static int gk20a_probe(struct platform_device *dev) return -ENOMEM; } + hash_init(l->ecc_sysfs_stats_htable); + gk20a = &l->g; nvgpu_log_fn(gk20a, " "); diff --git a/drivers/gpu/nvgpu/common/linux/os_linux.h b/drivers/gpu/nvgpu/common/linux/os_linux.h index ebe131de..4dcce322 100644 --- a/drivers/gpu/nvgpu/common/linux/os_linux.h +++ b/drivers/gpu/nvgpu/common/linux/os_linux.h @@ -19,6 +19,7 @@ #include #include +#include #include "gk20a/gk20a.h" #include "cde.h" @@ -139,6 +140,8 @@ struct nvgpu_os_linux { struct dentry *debugfs_force_preemption_gfxp; struct dentry *debugfs_dump_ctxsw_stats; #endif + DECLARE_HASHTABLE(ecc_sysfs_stats_htable, 5); + struct gk20a_cde_app cde_app; struct rw_semaphore busy_lock; diff --git a/drivers/gpu/nvgpu/common/linux/pci.c b/drivers/gpu/nvgpu/common/linux/pci.c index 6caf5ad9..1011b441 100644 --- a/drivers/gpu/nvgpu/common/linux/pci.c +++ b/drivers/gpu/nvgpu/common/linux/pci.c @@ -646,6 +646,8 @@ static int nvgpu_pci_probe(struct pci_dev *pdev, return -ENOMEM; } + hash_init(l->ecc_sysfs_stats_htable); + g = &l->g; nvgpu_init_gk20a(g); diff --git a/drivers/gpu/nvgpu/common/linux/platform_ecc_sysfs.c b/drivers/gpu/nvgpu/common/linux/platform_ecc_sysfs.c index ee59e5de..0fe1c8d2 100644 --- a/drivers/gpu/nvgpu/common/linux/platform_ecc_sysfs.c +++ b/drivers/gpu/nvgpu/common/linux/platform_ecc_sysfs.c @@ -14,6 +14,8 @@ * along with this program. If not, see . */ +#include + #include #include #include @@ -28,10 +30,6 @@ #include "platform_gp10b_tegra.h" #include "platform_ecc_sysfs.h" -#define ECC_STAT_NAME_MAX_SIZE 100 - -static DEFINE_HASHTABLE(ecc_hash_table, 5); - static u32 gen_ecc_hash_key(char *str) { int i = 0; @@ -57,6 +55,7 @@ static ssize_t ecc_stat_show(struct device *dev, struct gk20a_ecc_stat *ecc_stat; u32 hash_key; struct gk20a *g = get_gk20a(dev); + struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); if (sscanf(ecc_stat_full_name, "ltc%u_lts%u", &hw_unit, &subunit) == 2) { @@ -78,7 +77,7 @@ static ssize_t ecc_stat_show(struct device *dev, hash_key = gen_ecc_hash_key((char *)ecc_stat_base_name); - hash_for_each_possible(ecc_hash_table, + hash_for_each_possible(l->ecc_sysfs_stats_htable, ecc_stat, hash_node, hash_key) { @@ -91,11 +90,9 @@ static ssize_t ecc_stat_show(struct device *dev, return snprintf(buf, PAGE_SIZE, "Error: No ECC stat found!\n"); } -int gr_gp10b_ecc_stat_create(struct device *dev, - int is_l2, - char *ecc_stat_name, - struct gk20a_ecc_stat *ecc_stat, - struct device_attribute **dev_attr_array) +int nvgpu_gr_ecc_stat_create(struct device *dev, + int is_l2, char *ecc_stat_name, + struct gk20a_ecc_stat *ecc_stat) { struct gk20a *g = get_gk20a(dev); char *ltc_unit_name = "ltc"; @@ -113,32 +110,29 @@ int gr_gp10b_ecc_stat_create(struct device *dev, num_hw_units = g->gr.tpc_count; - return gp10b_ecc_stat_create(dev, num_hw_units, num_subunits, + return nvgpu_ecc_stat_create(dev, num_hw_units, num_subunits, is_l2 ? ltc_unit_name : gr_unit_name, num_subunits ? lts_unit_name: NULL, ecc_stat_name, - ecc_stat, - dev_attr_array); + ecc_stat); } -int gp10b_ecc_stat_create(struct device *dev, - int num_hw_units, - int num_subunits, - char *ecc_unit_name, - char *ecc_subunit_name, - char *ecc_stat_name, - struct gk20a_ecc_stat *ecc_stat, - struct device_attribute **__dev_attr_array) +int nvgpu_ecc_stat_create(struct device *dev, + int num_hw_units, int num_subunits, + char *ecc_unit_name, char *ecc_subunit_name, + char *ecc_stat_name, + struct gk20a_ecc_stat *ecc_stat) { int error = 0; struct gk20a *g = get_gk20a(dev); + struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); int hw_unit = 0; int subunit = 0; int element = 0; u32 hash_key = 0; struct device_attribute *dev_attr_array; - int num_elements = num_subunits ? num_subunits*num_hw_units : + int num_elements = num_subunits ? num_subunits * num_hw_units : num_hw_units; /* Allocate arrays */ @@ -146,6 +140,7 @@ int gp10b_ecc_stat_create(struct device *dev, num_elements); ecc_stat->counters = nvgpu_kzalloc(g, sizeof(u32) * num_elements); ecc_stat->names = nvgpu_kzalloc(g, sizeof(char *) * num_elements); + for (hw_unit = 0; hw_unit < num_elements; hw_unit++) { ecc_stat->names[hw_unit] = nvgpu_kzalloc(g, sizeof(char) * ECC_STAT_NAME_MAX_SIZE); @@ -206,44 +201,58 @@ int gp10b_ecc_stat_create(struct device *dev, /* Add hash table entry */ hash_key = gen_ecc_hash_key(ecc_stat_name); - hash_add(ecc_hash_table, + hash_add(l->ecc_sysfs_stats_htable, &ecc_stat->hash_node, hash_key); - *__dev_attr_array = dev_attr_array; + ecc_stat->attr_array = dev_attr_array; return error; } -void gr_gp10b_ecc_stat_remove(struct device *dev, - int is_l2, - struct gk20a_ecc_stat *ecc_stat, - struct device_attribute *dev_attr_array) +void nvgpu_gr_ecc_stat_remove(struct device *dev, + int is_l2, struct gk20a_ecc_stat *ecc_stat) { struct gk20a *g = get_gk20a(dev); int num_hw_units = 0; + int num_subunits = 0; if (is_l2 == 1) num_hw_units = g->ltc_count; - else if (is_l2 == 2) - num_hw_units = g->ltc_count * g->gr.slices_per_ltc; - else + else if (is_l2 == 2) { + num_hw_units = g->ltc_count; + num_subunits = g->gr.slices_per_ltc; + } else num_hw_units = g->gr.tpc_count; - gp10b_ecc_stat_remove(dev, num_hw_units, ecc_stat, dev_attr_array); + nvgpu_ecc_stat_remove(dev, num_hw_units, num_subunits, ecc_stat); } -void gp10b_ecc_stat_remove(struct device *dev, - int num_hw_units, - struct gk20a_ecc_stat *ecc_stat, - struct device_attribute *dev_attr_array) +void nvgpu_ecc_stat_remove(struct device *dev, + int num_hw_units, int num_subunits, + struct gk20a_ecc_stat *ecc_stat) { struct gk20a *g = get_gk20a(dev); + struct device_attribute *dev_attr_array = ecc_stat->attr_array; int hw_unit = 0; + int subunit = 0; + int element = 0; + int num_elements = num_subunits ? num_subunits * num_hw_units : + num_hw_units; /* Remove sysfs files */ - for (hw_unit = 0; hw_unit < num_hw_units; hw_unit++) { - device_remove_file(dev, &dev_attr_array[hw_unit]); + if (num_subunits) { + for (hw_unit = 0; hw_unit < num_hw_units; hw_unit++) { + for (subunit = 0; subunit < num_subunits; subunit++) { + element = hw_unit * num_subunits + subunit; + + device_remove_file(dev, + &dev_attr_array[element]); + } + } + } else { + for (hw_unit = 0; hw_unit < num_hw_units; hw_unit++) + device_remove_file(dev, &dev_attr_array[hw_unit]); } /* Remove hash table entry */ @@ -251,9 +260,10 @@ void gp10b_ecc_stat_remove(struct device *dev, /* Free arrays */ nvgpu_kfree(g, ecc_stat->counters); - for (hw_unit = 0; hw_unit < num_hw_units; hw_unit++) { + + for (hw_unit = 0; hw_unit < num_elements; hw_unit++) nvgpu_kfree(g, ecc_stat->names[hw_unit]); - } + nvgpu_kfree(g, ecc_stat->names); nvgpu_kfree(g, dev_attr_array); } diff --git a/drivers/gpu/nvgpu/common/linux/platform_ecc_sysfs.h b/drivers/gpu/nvgpu/common/linux/platform_ecc_sysfs.h index d5622757..d29f7bd3 100644 --- a/drivers/gpu/nvgpu/common/linux/platform_ecc_sysfs.h +++ b/drivers/gpu/nvgpu/common/linux/platform_ecc_sysfs.h @@ -19,27 +19,19 @@ #include "gp10b/gr_gp10b.h" -int gr_gp10b_ecc_stat_create(struct device *dev, - int is_l2, - char *ecc_stat_name, - struct gk20a_ecc_stat *ecc_stat, - struct device_attribute **dev_attr_array); -int gp10b_ecc_stat_create(struct device *dev, - int num_hw_units, - int num_subunits, - char *ecc_unit_name, - char *ecc_subunit_name, - char *ecc_stat_name, - struct gk20a_ecc_stat *ecc_stat, - struct device_attribute **__dev_attr_array); +#define ECC_STAT_NAME_MAX_SIZE 100 -void gr_gp10b_ecc_stat_remove(struct device *dev, - int is_l2, - struct gk20a_ecc_stat *ecc_stat, - struct device_attribute *dev_attr_array); - -void gp10b_ecc_stat_remove(struct device *dev, - int hw_units, - struct gk20a_ecc_stat *ecc_stat, - struct device_attribute *dev_attr_array); +int nvgpu_gr_ecc_stat_create(struct device *dev, + int is_l2, char *ecc_stat_name, + struct gk20a_ecc_stat *ecc_stat); +int nvgpu_ecc_stat_create(struct device *dev, + int num_hw_units, int num_subunits, + char *ecc_unit_name, char *ecc_subunit_name, + char *ecc_stat_name, + struct gk20a_ecc_stat *ecc_stat); +void nvgpu_gr_ecc_stat_remove(struct device *dev, + int is_l2, struct gk20a_ecc_stat *ecc_stat); +void nvgpu_ecc_stat_remove(struct device *dev, + int num_hw_units, int num_subunits, + struct gk20a_ecc_stat *ecc_stat); #endif diff --git a/drivers/gpu/nvgpu/common/linux/platform_gp10b_tegra.c b/drivers/gpu/nvgpu/common/linux/platform_gp10b_tegra.c index 6f8cc507..fce16653 100644 --- a/drivers/gpu/nvgpu/common/linux/platform_gp10b_tegra.c +++ b/drivers/gpu/nvgpu/common/linux/platform_gp10b_tegra.c @@ -41,6 +41,7 @@ #include "gk20a/gk20a.h" #include "platform_gk20a.h" +#include "platform_ecc_sysfs.h" #include "platform_gk20a_tegra.h" #include "gp10b/platform_gp10b.h" #include "platform_gp10b_tegra.h" @@ -446,25 +447,6 @@ struct gk20a_platform gp10b_tegra_platform = { .secure_buffer_size = 401408, }; -static struct device_attribute *dev_attr_sm_lrf_ecc_single_err_count_array; -static struct device_attribute *dev_attr_sm_lrf_ecc_double_err_count_array; - -static struct device_attribute *dev_attr_sm_shm_ecc_sec_count_array; -static struct device_attribute *dev_attr_sm_shm_ecc_sed_count_array; -static struct device_attribute *dev_attr_sm_shm_ecc_ded_count_array; - -static struct device_attribute *dev_attr_tex_ecc_total_sec_pipe0_count_array; -static struct device_attribute *dev_attr_tex_ecc_total_ded_pipe0_count_array; -static struct device_attribute *dev_attr_tex_ecc_unique_sec_pipe0_count_array; -static struct device_attribute *dev_attr_tex_ecc_unique_ded_pipe0_count_array; -static struct device_attribute *dev_attr_tex_ecc_total_sec_pipe1_count_array; -static struct device_attribute *dev_attr_tex_ecc_total_ded_pipe1_count_array; -static struct device_attribute *dev_attr_tex_ecc_unique_sec_pipe1_count_array; -static struct device_attribute *dev_attr_tex_ecc_unique_ded_pipe1_count_array; - -static struct device_attribute *dev_attr_l2_ecc_sec_count_array; -static struct device_attribute *dev_attr_l2_ecc_ded_count_array; - void gr_gp10b_create_sysfs(struct gk20a *g) { int error = 0; @@ -477,84 +459,80 @@ void gr_gp10b_create_sysfs(struct gk20a *g) if (g->ecc.gr.sm_lrf_single_err_count.counters != NULL) return; - error |= gr_gp10b_ecc_stat_create(dev, + error |= nvgpu_gr_ecc_stat_create(dev, 0, "sm_lrf_ecc_single_err_count", - &g->ecc.gr.sm_lrf_single_err_count, - &dev_attr_sm_lrf_ecc_single_err_count_array); - error |= gr_gp10b_ecc_stat_create(dev, + &g->ecc.gr.sm_lrf_single_err_count); + + error |= nvgpu_gr_ecc_stat_create(dev, 0, "sm_lrf_ecc_double_err_count", - &g->ecc.gr.sm_lrf_double_err_count, - &dev_attr_sm_lrf_ecc_double_err_count_array); + &g->ecc.gr.sm_lrf_double_err_count); - error |= gr_gp10b_ecc_stat_create(dev, + error |= nvgpu_gr_ecc_stat_create(dev, 0, "sm_shm_ecc_sec_count", - &g->ecc.gr.sm_shm_sec_count, - &dev_attr_sm_shm_ecc_sec_count_array); - error |= gr_gp10b_ecc_stat_create(dev, + &g->ecc.gr.sm_shm_sec_count); + + error |= nvgpu_gr_ecc_stat_create(dev, 0, "sm_shm_ecc_sed_count", - &g->ecc.gr.sm_shm_sed_count, - &dev_attr_sm_shm_ecc_sed_count_array); - error |= gr_gp10b_ecc_stat_create(dev, + &g->ecc.gr.sm_shm_sed_count); + + error |= nvgpu_gr_ecc_stat_create(dev, 0, "sm_shm_ecc_ded_count", - &g->ecc.gr.sm_shm_ded_count, - &dev_attr_sm_shm_ecc_ded_count_array); + &g->ecc.gr.sm_shm_ded_count); - error |= gr_gp10b_ecc_stat_create(dev, + error |= nvgpu_gr_ecc_stat_create(dev, 0, "tex_ecc_total_sec_pipe0_count", - &g->ecc.gr.tex_total_sec_pipe0_count, - &dev_attr_tex_ecc_total_sec_pipe0_count_array); - error |= gr_gp10b_ecc_stat_create(dev, + &g->ecc.gr.tex_total_sec_pipe0_count); + + error |= nvgpu_gr_ecc_stat_create(dev, 0, "tex_ecc_total_ded_pipe0_count", - &g->ecc.gr.tex_total_ded_pipe0_count, - &dev_attr_tex_ecc_total_ded_pipe0_count_array); - error |= gr_gp10b_ecc_stat_create(dev, + &g->ecc.gr.tex_total_ded_pipe0_count); + + error |= nvgpu_gr_ecc_stat_create(dev, 0, "tex_ecc_unique_sec_pipe0_count", - &g->ecc.gr.tex_unique_sec_pipe0_count, - &dev_attr_tex_ecc_unique_sec_pipe0_count_array); - error |= gr_gp10b_ecc_stat_create(dev, + &g->ecc.gr.tex_unique_sec_pipe0_count); + + error |= nvgpu_gr_ecc_stat_create(dev, 0, "tex_ecc_unique_ded_pipe0_count", - &g->ecc.gr.tex_unique_ded_pipe0_count, - &dev_attr_tex_ecc_unique_ded_pipe0_count_array); - error |= gr_gp10b_ecc_stat_create(dev, + &g->ecc.gr.tex_unique_ded_pipe0_count); + + error |= nvgpu_gr_ecc_stat_create(dev, 0, "tex_ecc_total_sec_pipe1_count", - &g->ecc.gr.tex_total_sec_pipe1_count, - &dev_attr_tex_ecc_total_sec_pipe1_count_array); - error |= gr_gp10b_ecc_stat_create(dev, + &g->ecc.gr.tex_total_sec_pipe1_count); + + error |= nvgpu_gr_ecc_stat_create(dev, 0, "tex_ecc_total_ded_pipe1_count", - &g->ecc.gr.tex_total_ded_pipe1_count, - &dev_attr_tex_ecc_total_ded_pipe1_count_array); - error |= gr_gp10b_ecc_stat_create(dev, + &g->ecc.gr.tex_total_ded_pipe1_count); + + error |= nvgpu_gr_ecc_stat_create(dev, 0, "tex_ecc_unique_sec_pipe1_count", - &g->ecc.gr.tex_unique_sec_pipe1_count, - &dev_attr_tex_ecc_unique_sec_pipe1_count_array); - error |= gr_gp10b_ecc_stat_create(dev, + &g->ecc.gr.tex_unique_sec_pipe1_count); + + error |= nvgpu_gr_ecc_stat_create(dev, 0, "tex_ecc_unique_ded_pipe1_count", - &g->ecc.gr.tex_unique_ded_pipe1_count, - &dev_attr_tex_ecc_unique_ded_pipe1_count_array); + &g->ecc.gr.tex_unique_ded_pipe1_count); - error |= gr_gp10b_ecc_stat_create(dev, + error |= nvgpu_gr_ecc_stat_create(dev, 2, "ecc_sec_count", - &g->ecc.ltc.l2_sec_count, - &dev_attr_l2_ecc_sec_count_array); - error |= gr_gp10b_ecc_stat_create(dev, + &g->ecc.ltc.l2_sec_count); + + error |= nvgpu_gr_ecc_stat_create(dev, 2, "ecc_ded_count", - &g->ecc.ltc.l2_ded_count, - &dev_attr_l2_ecc_ded_count_array); + &g->ecc.ltc.l2_ded_count); if (error) dev_err(dev, "Failed to create sysfs attributes!\n"); @@ -567,67 +545,63 @@ void gr_gp10b_remove_sysfs(struct gk20a *g) if (!g->ecc.gr.sm_lrf_single_err_count.counters) return; - gr_gp10b_ecc_stat_remove(dev, + nvgpu_gr_ecc_stat_remove(dev, 0, - &g->ecc.gr.sm_lrf_single_err_count, - dev_attr_sm_lrf_ecc_single_err_count_array); - gr_gp10b_ecc_stat_remove(dev, + &g->ecc.gr.sm_lrf_single_err_count); + + nvgpu_gr_ecc_stat_remove(dev, 0, - &g->ecc.gr.sm_lrf_double_err_count, - dev_attr_sm_lrf_ecc_double_err_count_array); + &g->ecc.gr.sm_lrf_double_err_count); - gr_gp10b_ecc_stat_remove(dev, + nvgpu_gr_ecc_stat_remove(dev, 0, - &g->ecc.gr.sm_shm_sec_count, - dev_attr_sm_shm_ecc_sec_count_array); - gr_gp10b_ecc_stat_remove(dev, + &g->ecc.gr.sm_shm_sec_count); + + nvgpu_gr_ecc_stat_remove(dev, 0, - &g->ecc.gr.sm_shm_sed_count, - dev_attr_sm_shm_ecc_sed_count_array); - gr_gp10b_ecc_stat_remove(dev, + &g->ecc.gr.sm_shm_sed_count); + + nvgpu_gr_ecc_stat_remove(dev, 0, - &g->ecc.gr.sm_shm_ded_count, - dev_attr_sm_shm_ecc_ded_count_array); + &g->ecc.gr.sm_shm_ded_count); - gr_gp10b_ecc_stat_remove(dev, + nvgpu_gr_ecc_stat_remove(dev, 0, - &g->ecc.gr.tex_total_sec_pipe0_count, - dev_attr_tex_ecc_total_sec_pipe0_count_array); - gr_gp10b_ecc_stat_remove(dev, + &g->ecc.gr.tex_total_sec_pipe0_count); + + nvgpu_gr_ecc_stat_remove(dev, 0, - &g->ecc.gr.tex_total_ded_pipe0_count, - dev_attr_tex_ecc_total_ded_pipe0_count_array); - gr_gp10b_ecc_stat_remove(dev, + &g->ecc.gr.tex_total_ded_pipe0_count); + + nvgpu_gr_ecc_stat_remove(dev, 0, - &g->ecc.gr.tex_unique_sec_pipe0_count, - dev_attr_tex_ecc_unique_sec_pipe0_count_array); - gr_gp10b_ecc_stat_remove(dev, + &g->ecc.gr.tex_unique_sec_pipe0_count); + + nvgpu_gr_ecc_stat_remove(dev, 0, - &g->ecc.gr.tex_unique_ded_pipe0_count, - dev_attr_tex_ecc_unique_ded_pipe0_count_array); - gr_gp10b_ecc_stat_remove(dev, + &g->ecc.gr.tex_unique_ded_pipe0_count); + + nvgpu_gr_ecc_stat_remove(dev, 0, - &g->ecc.gr.tex_total_sec_pipe1_count, - dev_attr_tex_ecc_total_sec_pipe1_count_array); - gr_gp10b_ecc_stat_remove(dev, + &g->ecc.gr.tex_total_sec_pipe1_count); + + nvgpu_gr_ecc_stat_remove(dev, 0, - &g->ecc.gr.tex_total_ded_pipe1_count, - dev_attr_tex_ecc_total_ded_pipe1_count_array); - gr_gp10b_ecc_stat_remove(dev, + &g->ecc.gr.tex_total_ded_pipe1_count); + + nvgpu_gr_ecc_stat_remove(dev, 0, - &g->ecc.gr.tex_unique_sec_pipe1_count, - dev_attr_tex_ecc_unique_sec_pipe1_count_array); - gr_gp10b_ecc_stat_remove(dev, + &g->ecc.gr.tex_unique_sec_pipe1_count); + + nvgpu_gr_ecc_stat_remove(dev, 0, - &g->ecc.gr.tex_unique_ded_pipe1_count, - dev_attr_tex_ecc_unique_ded_pipe1_count_array); + &g->ecc.gr.tex_unique_ded_pipe1_count); - gr_gp10b_ecc_stat_remove(dev, + nvgpu_gr_ecc_stat_remove(dev, 2, - &g->ecc.ltc.l2_sec_count, - dev_attr_l2_ecc_sec_count_array); - gr_gp10b_ecc_stat_remove(dev, + &g->ecc.ltc.l2_sec_count); + + nvgpu_gr_ecc_stat_remove(dev, 2, - &g->ecc.ltc.l2_ded_count, - dev_attr_l2_ecc_ded_count_array); + &g->ecc.ltc.l2_ded_count); } diff --git a/drivers/gpu/nvgpu/common/linux/platform_gv11b_tegra.c b/drivers/gpu/nvgpu/common/linux/platform_gv11b_tegra.c index f681fe4b..bf66762b 100644 --- a/drivers/gpu/nvgpu/common/linux/platform_gv11b_tegra.c +++ b/drivers/gpu/nvgpu/common/linux/platform_gv11b_tegra.c @@ -39,6 +39,7 @@ #include "gp10b/platform_gp10b.h" #include "platform_gp10b_tegra.h" +#include "platform_ecc_sysfs.h" #include "os_linux.h" #include "platform_gk20a_tegra.h" @@ -261,41 +262,11 @@ struct gk20a_platform gv11b_tegra_platform = { .secure_buffer_size = 667648, }; -static struct device_attribute *dev_attr_sm_l1_tag_ecc_corrected_err_count_array; -static struct device_attribute *dev_attr_sm_l1_tag_ecc_uncorrected_err_count_array; -static struct device_attribute *dev_attr_sm_cbu_ecc_corrected_err_count_array; -static struct device_attribute *dev_attr_sm_cbu_ecc_uncorrected_err_count_array; -static struct device_attribute *dev_attr_sm_l1_data_ecc_corrected_err_count_array; -static struct device_attribute *dev_attr_sm_l1_data_ecc_uncorrected_err_count_array; -static struct device_attribute *dev_attr_sm_icache_ecc_corrected_err_count_array; -static struct device_attribute *dev_attr_sm_icache_ecc_uncorrected_err_count_array; -static struct device_attribute *dev_attr_gcc_l15_ecc_corrected_err_count_array; -static struct device_attribute *dev_attr_gcc_l15_ecc_uncorrected_err_count_array; -static struct device_attribute *dev_attr_mmu_l1tlb_ecc_corrected_err_count_array; -static struct device_attribute *dev_attr_mmu_l1tlb_ecc_uncorrected_err_count_array; - -static struct device_attribute *dev_attr_fecs_ecc_corrected_err_count_array; -static struct device_attribute *dev_attr_fecs_ecc_uncorrected_err_count_array; -static struct device_attribute *dev_attr_gpccs_ecc_corrected_err_count_array; -static struct device_attribute *dev_attr_gpccs_ecc_uncorrected_err_count_array; - -static struct device_attribute *dev_attr_l2_cache_ecc_corrected_err_count_array; -static struct device_attribute *dev_attr_l2_cache_ecc_uncorrected_err_count_array; - -static struct device_attribute *dev_attr_mmu_l2tlb_ecc_corrected_err_count_array; -static struct device_attribute *dev_attr_mmu_l2tlb_ecc_uncorrected_err_count_array; -static struct device_attribute *dev_attr_mmu_hubtlb_ecc_corrected_err_count_array; -static struct device_attribute *dev_attr_mmu_hubtlb_ecc_uncorrected_err_count_array; -static struct device_attribute *dev_attr_mmu_fillunit_ecc_corrected_err_count_array; -static struct device_attribute *dev_attr_mmu_fillunit_ecc_uncorrected_err_count_array; - -static struct device_attribute *dev_attr_pmu_ecc_corrected_err_count_array; -static struct device_attribute *dev_attr_pmu_ecc_uncorrected_err_count_array; - void gr_gv11b_create_sysfs(struct gk20a *g) { struct device *dev = dev_from_gk20a(g); int error = 0; + /* This stat creation function is called on GR init. GR can get initialized multiple times but we only need to create the ECC stats once. Therefore, add the following check to avoid @@ -305,210 +276,183 @@ void gr_gv11b_create_sysfs(struct gk20a *g) gr_gp10b_create_sysfs(g); - error |= gr_gp10b_ecc_stat_create(dev, + error |= nvgpu_gr_ecc_stat_create(dev, 0, "sm_l1_tag_ecc_corrected_err_count", - &g->ecc.gr.sm_l1_tag_corrected_err_count, - &dev_attr_sm_l1_tag_ecc_corrected_err_count_array); + &g->ecc.gr.sm_l1_tag_corrected_err_count); - error |= gr_gp10b_ecc_stat_create(dev, + error |= nvgpu_gr_ecc_stat_create(dev, 0, "sm_l1_tag_ecc_uncorrected_err_count", - &g->ecc.gr.sm_l1_tag_uncorrected_err_count, - &dev_attr_sm_l1_tag_ecc_uncorrected_err_count_array); + &g->ecc.gr.sm_l1_tag_uncorrected_err_count); - error |= gr_gp10b_ecc_stat_create(dev, + error |= nvgpu_gr_ecc_stat_create(dev, 0, "sm_cbu_ecc_corrected_err_count", - &g->ecc.gr.sm_cbu_corrected_err_count, - &dev_attr_sm_cbu_ecc_corrected_err_count_array); + &g->ecc.gr.sm_cbu_corrected_err_count); - error |= gr_gp10b_ecc_stat_create(dev, + error |= nvgpu_gr_ecc_stat_create(dev, 0, "sm_cbu_ecc_uncorrected_err_count", - &g->ecc.gr.sm_cbu_uncorrected_err_count, - &dev_attr_sm_cbu_ecc_uncorrected_err_count_array); + &g->ecc.gr.sm_cbu_uncorrected_err_count); - error |= gr_gp10b_ecc_stat_create(dev, + error |= nvgpu_gr_ecc_stat_create(dev, 0, "sm_l1_data_ecc_corrected_err_count", - &g->ecc.gr.sm_l1_data_corrected_err_count, - &dev_attr_sm_l1_data_ecc_corrected_err_count_array); + &g->ecc.gr.sm_l1_data_corrected_err_count); - error |= gr_gp10b_ecc_stat_create(dev, + error |= nvgpu_gr_ecc_stat_create(dev, 0, "sm_l1_data_ecc_uncorrected_err_count", - &g->ecc.gr.sm_l1_data_uncorrected_err_count, - &dev_attr_sm_l1_data_ecc_uncorrected_err_count_array); + &g->ecc.gr.sm_l1_data_uncorrected_err_count); - error |= gr_gp10b_ecc_stat_create(dev, + error |= nvgpu_gr_ecc_stat_create(dev, 0, "sm_icache_ecc_corrected_err_count", - &g->ecc.gr.sm_icache_corrected_err_count, - &dev_attr_sm_icache_ecc_corrected_err_count_array); + &g->ecc.gr.sm_icache_corrected_err_count); - error |= gr_gp10b_ecc_stat_create(dev, + error |= nvgpu_gr_ecc_stat_create(dev, 0, "sm_icache_ecc_uncorrected_err_count", - &g->ecc.gr.sm_icache_uncorrected_err_count, - &dev_attr_sm_icache_ecc_uncorrected_err_count_array); + &g->ecc.gr.sm_icache_uncorrected_err_count); - error |= gr_gp10b_ecc_stat_create(dev, + error |= nvgpu_gr_ecc_stat_create(dev, 0, "gcc_l15_ecc_corrected_err_count", - &g->ecc.gr.gcc_l15_corrected_err_count, - &dev_attr_gcc_l15_ecc_corrected_err_count_array); + &g->ecc.gr.gcc_l15_corrected_err_count); - error |= gr_gp10b_ecc_stat_create(dev, + error |= nvgpu_gr_ecc_stat_create(dev, 0, "gcc_l15_ecc_uncorrected_err_count", - &g->ecc.gr.gcc_l15_uncorrected_err_count, - &dev_attr_gcc_l15_ecc_uncorrected_err_count_array); + &g->ecc.gr.gcc_l15_uncorrected_err_count); - error |= gp10b_ecc_stat_create(dev, + error |= nvgpu_ecc_stat_create(dev, g->ltc_count, 0, "ltc", NULL, "l2_cache_uncorrected_err_count", - &g->ecc.ltc.l2_cache_uncorrected_err_count, - &dev_attr_l2_cache_ecc_uncorrected_err_count_array); + &g->ecc.ltc.l2_cache_uncorrected_err_count); - error |= gp10b_ecc_stat_create(dev, + error |= nvgpu_ecc_stat_create(dev, g->ltc_count, 0, "ltc", NULL, "l2_cache_corrected_err_count", - &g->ecc.ltc.l2_cache_corrected_err_count, - &dev_attr_l2_cache_ecc_corrected_err_count_array); + &g->ecc.ltc.l2_cache_corrected_err_count); - error |= gp10b_ecc_stat_create(dev, + error |= nvgpu_ecc_stat_create(dev, 1, 0, "gpc", NULL, "fecs_ecc_uncorrected_err_count", - &g->ecc.gr.fecs_uncorrected_err_count, - &dev_attr_fecs_ecc_uncorrected_err_count_array); + &g->ecc.gr.fecs_uncorrected_err_count); - error |= gp10b_ecc_stat_create(dev, + error |= nvgpu_ecc_stat_create(dev, 1, 0, "gpc", NULL, "fecs_ecc_corrected_err_count", - &g->ecc.gr.fecs_corrected_err_count, - &dev_attr_fecs_ecc_corrected_err_count_array); + &g->ecc.gr.fecs_corrected_err_count); - error |= gp10b_ecc_stat_create(dev, + error |= nvgpu_ecc_stat_create(dev, g->gr.gpc_count, 0, "gpc", NULL, "gpccs_ecc_uncorrected_err_count", - &g->ecc.gr.gpccs_uncorrected_err_count, - &dev_attr_gpccs_ecc_uncorrected_err_count_array); + &g->ecc.gr.gpccs_uncorrected_err_count); - error |= gp10b_ecc_stat_create(dev, + error |= nvgpu_ecc_stat_create(dev, g->gr.gpc_count, 0, "gpc", NULL, "gpccs_ecc_corrected_err_count", - &g->ecc.gr.gpccs_corrected_err_count, - &dev_attr_gpccs_ecc_corrected_err_count_array); + &g->ecc.gr.gpccs_corrected_err_count); - error |= gp10b_ecc_stat_create(dev, + error |= nvgpu_ecc_stat_create(dev, g->gr.gpc_count, 0, "gpc", NULL, "mmu_l1tlb_ecc_uncorrected_err_count", - &g->ecc.gr.mmu_l1tlb_uncorrected_err_count, - &dev_attr_mmu_l1tlb_ecc_uncorrected_err_count_array); + &g->ecc.gr.mmu_l1tlb_uncorrected_err_count); - error |= gp10b_ecc_stat_create(dev, + error |= nvgpu_ecc_stat_create(dev, g->gr.gpc_count, 0, "gpc", NULL, "mmu_l1tlb_ecc_corrected_err_count", - &g->ecc.gr.mmu_l1tlb_corrected_err_count, - &dev_attr_mmu_l1tlb_ecc_corrected_err_count_array); + &g->ecc.gr.mmu_l1tlb_corrected_err_count); - error |= gp10b_ecc_stat_create(dev, + error |= nvgpu_ecc_stat_create(dev, 1, 0, "eng", NULL, "mmu_l2tlb_ecc_uncorrected_err_count", - &g->ecc.fb.mmu_l2tlb_uncorrected_err_count, - &dev_attr_mmu_l2tlb_ecc_uncorrected_err_count_array); + &g->ecc.fb.mmu_l2tlb_uncorrected_err_count); - error |= gp10b_ecc_stat_create(dev, + error |= nvgpu_ecc_stat_create(dev, 1, 0, "eng", NULL, "mmu_l2tlb_ecc_corrected_err_count", - &g->ecc.fb.mmu_l2tlb_corrected_err_count, - &dev_attr_mmu_l2tlb_ecc_corrected_err_count_array); + &g->ecc.fb.mmu_l2tlb_corrected_err_count); - error |= gp10b_ecc_stat_create(dev, + error |= nvgpu_ecc_stat_create(dev, 1, 0, "eng", NULL, "mmu_hubtlb_ecc_uncorrected_err_count", - &g->ecc.fb.mmu_hubtlb_uncorrected_err_count, - &dev_attr_mmu_hubtlb_ecc_uncorrected_err_count_array); + &g->ecc.fb.mmu_hubtlb_uncorrected_err_count); - error |= gp10b_ecc_stat_create(dev, + error |= nvgpu_ecc_stat_create(dev, 1, 0, "eng", NULL, "mmu_hubtlb_ecc_corrected_err_count", - &g->ecc.fb.mmu_hubtlb_corrected_err_count, - &dev_attr_mmu_hubtlb_ecc_corrected_err_count_array); + &g->ecc.fb.mmu_hubtlb_corrected_err_count); - error |= gp10b_ecc_stat_create(dev, + error |= nvgpu_ecc_stat_create(dev, 1, 0, "eng", NULL, "mmu_fillunit_ecc_uncorrected_err_count", - &g->ecc.fb.mmu_fillunit_uncorrected_err_count, - &dev_attr_mmu_fillunit_ecc_uncorrected_err_count_array); + &g->ecc.fb.mmu_fillunit_uncorrected_err_count); - error |= gp10b_ecc_stat_create(dev, + error |= nvgpu_ecc_stat_create(dev, 1, 0, "eng", NULL, "mmu_fillunit_ecc_corrected_err_count", - &g->ecc.fb.mmu_fillunit_corrected_err_count, - &dev_attr_mmu_fillunit_ecc_corrected_err_count_array); + &g->ecc.fb.mmu_fillunit_corrected_err_count); - error |= gp10b_ecc_stat_create(dev, + error |= nvgpu_ecc_stat_create(dev, 1, 0, "eng", NULL, "pmu_ecc_uncorrected_err_count", - &g->ecc.pmu.pmu_uncorrected_err_count, - &dev_attr_pmu_ecc_uncorrected_err_count_array); + &g->ecc.pmu.pmu_uncorrected_err_count); - error |= gp10b_ecc_stat_create(dev, + error |= nvgpu_ecc_stat_create(dev, 1, 0, "eng", NULL, "pmu_ecc_corrected_err_count", - &g->ecc.pmu.pmu_corrected_err_count, - &dev_attr_pmu_ecc_corrected_err_count_array); - + &g->ecc.pmu.pmu_corrected_err_count); if (error) dev_err(dev, "Failed to create gv11b sysfs attributes!\n"); @@ -522,133 +466,123 @@ void gr_gv11b_remove_sysfs(struct gk20a *g) return; gr_gp10b_remove_sysfs(g); - gr_gp10b_ecc_stat_remove(dev, + nvgpu_gr_ecc_stat_remove(dev, 0, - &g->ecc.gr.sm_l1_tag_corrected_err_count, - dev_attr_sm_l1_tag_ecc_corrected_err_count_array); + &g->ecc.gr.sm_l1_tag_corrected_err_count); - gr_gp10b_ecc_stat_remove(dev, + nvgpu_gr_ecc_stat_remove(dev, 0, - &g->ecc.gr.sm_l1_tag_uncorrected_err_count, - dev_attr_sm_l1_tag_ecc_uncorrected_err_count_array); + &g->ecc.gr.sm_l1_tag_uncorrected_err_count); - gr_gp10b_ecc_stat_remove(dev, + nvgpu_gr_ecc_stat_remove(dev, 0, - &g->ecc.gr.sm_cbu_corrected_err_count, - dev_attr_sm_cbu_ecc_corrected_err_count_array); + &g->ecc.gr.sm_cbu_corrected_err_count); - gr_gp10b_ecc_stat_remove(dev, + nvgpu_gr_ecc_stat_remove(dev, 0, - &g->ecc.gr.sm_cbu_uncorrected_err_count, - dev_attr_sm_cbu_ecc_uncorrected_err_count_array); + &g->ecc.gr.sm_cbu_uncorrected_err_count); - gr_gp10b_ecc_stat_remove(dev, + nvgpu_gr_ecc_stat_remove(dev, 0, - &g->ecc.gr.sm_l1_data_corrected_err_count, - dev_attr_sm_l1_data_ecc_corrected_err_count_array); + &g->ecc.gr.sm_l1_data_corrected_err_count); - gr_gp10b_ecc_stat_remove(dev, + nvgpu_gr_ecc_stat_remove(dev, 0, - &g->ecc.gr.sm_l1_data_uncorrected_err_count, - dev_attr_sm_l1_data_ecc_uncorrected_err_count_array); + &g->ecc.gr.sm_l1_data_uncorrected_err_count); - gr_gp10b_ecc_stat_remove(dev, + nvgpu_gr_ecc_stat_remove(dev, 0, - &g->ecc.gr.sm_icache_corrected_err_count, - dev_attr_sm_icache_ecc_corrected_err_count_array); + &g->ecc.gr.sm_icache_corrected_err_count); - gr_gp10b_ecc_stat_remove(dev, + nvgpu_gr_ecc_stat_remove(dev, 0, - &g->ecc.gr.sm_icache_uncorrected_err_count, - dev_attr_sm_icache_ecc_uncorrected_err_count_array); + &g->ecc.gr.sm_icache_uncorrected_err_count); - gr_gp10b_ecc_stat_remove(dev, + nvgpu_gr_ecc_stat_remove(dev, 0, - &g->ecc.gr.gcc_l15_corrected_err_count, - dev_attr_gcc_l15_ecc_corrected_err_count_array); + &g->ecc.gr.gcc_l15_corrected_err_count); - gr_gp10b_ecc_stat_remove(dev, + nvgpu_gr_ecc_stat_remove(dev, 0, - &g->ecc.gr.gcc_l15_uncorrected_err_count, - dev_attr_gcc_l15_ecc_uncorrected_err_count_array); + &g->ecc.gr.gcc_l15_uncorrected_err_count); - gp10b_ecc_stat_remove(dev, + nvgpu_ecc_stat_remove(dev, g->ltc_count, - &g->ecc.ltc.l2_cache_uncorrected_err_count, - dev_attr_l2_cache_ecc_uncorrected_err_count_array); + 0, + &g->ecc.ltc.l2_cache_uncorrected_err_count); - gp10b_ecc_stat_remove(dev, + nvgpu_ecc_stat_remove(dev, g->ltc_count, - &g->ecc.ltc.l2_cache_corrected_err_count, - dev_attr_l2_cache_ecc_corrected_err_count_array); + 0, + &g->ecc.ltc.l2_cache_corrected_err_count); - gp10b_ecc_stat_remove(dev, + nvgpu_ecc_stat_remove(dev, 1, - &g->ecc.gr.fecs_uncorrected_err_count, - dev_attr_fecs_ecc_uncorrected_err_count_array); + 0, + &g->ecc.gr.fecs_uncorrected_err_count); - gp10b_ecc_stat_remove(dev, + nvgpu_ecc_stat_remove(dev, 1, - &g->ecc.gr.fecs_corrected_err_count, - dev_attr_fecs_ecc_corrected_err_count_array); + 0, + &g->ecc.gr.fecs_corrected_err_count); - gp10b_ecc_stat_remove(dev, + nvgpu_ecc_stat_remove(dev, g->gr.gpc_count, - &g->ecc.gr.gpccs_uncorrected_err_count, - dev_attr_gpccs_ecc_uncorrected_err_count_array); + 0, + &g->ecc.gr.gpccs_uncorrected_err_count); - gp10b_ecc_stat_remove(dev, + nvgpu_ecc_stat_remove(dev, g->gr.gpc_count, - &g->ecc.gr.gpccs_corrected_err_count, - dev_attr_gpccs_ecc_corrected_err_count_array); + 0, + &g->ecc.gr.gpccs_corrected_err_count); - gp10b_ecc_stat_remove(dev, + nvgpu_ecc_stat_remove(dev, g->gr.gpc_count, - &g->ecc.gr.mmu_l1tlb_uncorrected_err_count, - dev_attr_mmu_l1tlb_ecc_uncorrected_err_count_array); + 0, + &g->ecc.gr.mmu_l1tlb_uncorrected_err_count); - gp10b_ecc_stat_remove(dev, + nvgpu_ecc_stat_remove(dev, g->gr.gpc_count, - &g->ecc.gr.mmu_l1tlb_corrected_err_count, - dev_attr_mmu_l1tlb_ecc_corrected_err_count_array); + 0, + &g->ecc.gr.mmu_l1tlb_corrected_err_count); - gp10b_ecc_stat_remove(dev, + nvgpu_ecc_stat_remove(dev, 1, - &g->ecc.fb.mmu_l2tlb_uncorrected_err_count, - dev_attr_mmu_l2tlb_ecc_uncorrected_err_count_array); + 0, + &g->ecc.fb.mmu_l2tlb_uncorrected_err_count); - gp10b_ecc_stat_remove(dev, + nvgpu_ecc_stat_remove(dev, 1, - &g->ecc.fb.mmu_l2tlb_corrected_err_count, - dev_attr_mmu_l2tlb_ecc_corrected_err_count_array); + 0, + &g->ecc.fb.mmu_l2tlb_corrected_err_count); - gp10b_ecc_stat_remove(dev, + nvgpu_ecc_stat_remove(dev, 1, - &g->ecc.fb.mmu_hubtlb_uncorrected_err_count, - dev_attr_mmu_hubtlb_ecc_uncorrected_err_count_array); + 0, + &g->ecc.fb.mmu_hubtlb_uncorrected_err_count); - gp10b_ecc_stat_remove(dev, + nvgpu_ecc_stat_remove(dev, 1, - &g->ecc.fb.mmu_hubtlb_corrected_err_count, - dev_attr_mmu_hubtlb_ecc_corrected_err_count_array); + 0, + &g->ecc.fb.mmu_hubtlb_corrected_err_count); - gp10b_ecc_stat_remove(dev, + nvgpu_ecc_stat_remove(dev, 1, - &g->ecc.fb.mmu_fillunit_uncorrected_err_count, - dev_attr_mmu_fillunit_ecc_uncorrected_err_count_array); + 0, + &g->ecc.fb.mmu_fillunit_uncorrected_err_count); - gp10b_ecc_stat_remove(dev, + nvgpu_ecc_stat_remove(dev, 1, - &g->ecc.fb.mmu_fillunit_corrected_err_count, - dev_attr_mmu_fillunit_ecc_corrected_err_count_array); + 0, + &g->ecc.fb.mmu_fillunit_corrected_err_count); - gp10b_ecc_stat_remove(dev, + nvgpu_ecc_stat_remove(dev, 1, - &g->ecc.pmu.pmu_uncorrected_err_count, - dev_attr_pmu_ecc_uncorrected_err_count_array); + 0, + &g->ecc.pmu.pmu_uncorrected_err_count); - gp10b_ecc_stat_remove(dev, + nvgpu_ecc_stat_remove(dev, 1, - &g->ecc.pmu.pmu_corrected_err_count, - dev_attr_pmu_ecc_corrected_err_count_array); + 0, + &g->ecc.pmu.pmu_corrected_err_count); } diff --git a/drivers/gpu/nvgpu/gk20a/ecc_gk20a.h b/drivers/gpu/nvgpu/gk20a/ecc_gk20a.h index af10f828..9c50a809 100644 --- a/drivers/gpu/nvgpu/gk20a/ecc_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/ecc_gk20a.h @@ -30,6 +30,7 @@ struct gk20a_ecc_stat { u32 count; #ifdef CONFIG_SYSFS struct hlist_node hash_node; + struct device_attribute *attr_array; #endif }; -- cgit v1.2.2