From 2a25d03f2b7ad0700f14640282abd72ff587d800 Mon Sep 17 00:00:00 2001 From: Kyle Guo Date: Thu, 12 Jul 2018 17:51:42 -0700 Subject: gpu: nvgpu: vgpu: ecc sysfs support for vgpu - fetch ecc info from RM server and create sysfs nodes - new file ecc_vgpu.c for platform-independent code - add 2 new commands: GET_ECC_INFO and GET_ECC_COUNTER_VALUE JIRA EVLR-2590 Change-Id: I040a9fcd23326e432ca93e9a028319f9c1c570f0 Signed-off-by: Kyle Guo Reviewed-on: https://git-master.nvidia.com/r/1777428 Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/Makefile | 1 + drivers/gpu/nvgpu/include/nvgpu/vgpu/tegra_vgpu.h | 19 +++++ drivers/gpu/nvgpu/include/nvgpu/vgpu/vgpu.h | 3 + drivers/gpu/nvgpu/os/linux/vgpu/sysfs_vgpu.c | 93 +++++++++++++++++++++++ drivers/gpu/nvgpu/vgpu/ecc_vgpu.c | 92 ++++++++++++++++++++++ drivers/gpu/nvgpu/vgpu/ecc_vgpu.h | 39 ++++++++++ 6 files changed, 247 insertions(+) create mode 100644 drivers/gpu/nvgpu/vgpu/ecc_vgpu.c create mode 100644 drivers/gpu/nvgpu/vgpu/ecc_vgpu.h (limited to 'drivers') diff --git a/drivers/gpu/nvgpu/Makefile b/drivers/gpu/nvgpu/Makefile index 4ca4b6d8..7a21d294 100644 --- a/drivers/gpu/nvgpu/Makefile +++ b/drivers/gpu/nvgpu/Makefile @@ -245,6 +245,7 @@ nvgpu-$(CONFIG_TEGRA_GR_VIRTUALIZATION) += \ vgpu/dbg_vgpu.o \ vgpu/tsg_vgpu.o \ vgpu/css_vgpu.o \ + vgpu/ecc_vgpu.o \ vgpu/gm20b/vgpu_gr_gm20b.o \ vgpu/gp10b/vgpu_hal_gp10b.o \ vgpu/gp10b/vgpu_gr_gp10b.o \ diff --git a/drivers/gpu/nvgpu/include/nvgpu/vgpu/tegra_vgpu.h b/drivers/gpu/nvgpu/include/nvgpu/vgpu/tegra_vgpu.h index 1eb26303..39d68dd1 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/vgpu/tegra_vgpu.h +++ b/drivers/gpu/nvgpu/include/nvgpu/vgpu/tegra_vgpu.h @@ -26,6 +26,7 @@ #define __TEGRA_VGPU_H #include +#include /* For NVGPU_ECC_STAT_NAME_MAX_SIZE */ enum { TEGRA_VGPU_MODULE_GPU = 0, @@ -120,6 +121,8 @@ enum { TEGRA_VGPU_CMD_UPDATE_PC_SAMPLING = 81, TEGRA_VGPU_CMD_SUSPEND = 82, TEGRA_VGPU_CMD_RESUME = 83, + TEGRA_VGPU_CMD_GET_ECC_INFO = 84, + TEGRA_VGPU_CMD_GET_ECC_COUNTER_VALUE = 85, }; struct tegra_vgpu_connect_params { @@ -378,6 +381,20 @@ struct tegra_vgpu_channel_free_hwpm_ctx { u64 handle; }; +struct tegra_vgpu_ecc_info_params { + u32 ecc_stats_count; +}; + +struct tegra_vgpu_ecc_info_entry { + u32 ecc_id; + char name[NVGPU_ECC_STAT_NAME_MAX_SIZE]; +}; + +struct tegra_vgpu_ecc_counter_params { + u32 ecc_id; + u32 value; +}; + struct tegra_vgpu_gr_ctx_params { u64 gr_ctx_handle; u64 as_handle; @@ -659,6 +676,8 @@ struct tegra_vgpu_cmd_msg { struct tegra_vgpu_map_syncpt_params map_syncpt; struct tegra_vgpu_tsg_bind_channel_ex_params tsg_bind_channel_ex; struct tegra_vgpu_channel_update_pc_sampling update_pc_sampling; + struct tegra_vgpu_ecc_info_params ecc_info; + struct tegra_vgpu_ecc_counter_params ecc_counter; char padding[192]; } params; }; diff --git a/drivers/gpu/nvgpu/include/nvgpu/vgpu/vgpu.h b/drivers/gpu/nvgpu/include/nvgpu/vgpu/vgpu.h index 1e851b8e..762bc229 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/vgpu/vgpu.h +++ b/drivers/gpu/nvgpu/include/nvgpu/vgpu/vgpu.h @@ -37,11 +37,14 @@ struct gk20a; struct vm_gk20a; struct nvgpu_gr_ctx; struct nvgpu_cpu_time_correlation_sample; +struct vgpu_ecc_stat; struct vgpu_priv_data { u64 virt_handle; struct nvgpu_thread intr_handler; struct tegra_vgpu_constants_params constants; + struct vgpu_ecc_stat *ecc_stats; + int ecc_stats_count; }; struct vgpu_priv_data *vgpu_get_priv_data(struct gk20a *g); diff --git a/drivers/gpu/nvgpu/os/linux/vgpu/sysfs_vgpu.c b/drivers/gpu/nvgpu/os/linux/vgpu/sysfs_vgpu.c index 57aad4b4..30ca7f53 100644 --- a/drivers/gpu/nvgpu/os/linux/vgpu/sysfs_vgpu.c +++ b/drivers/gpu/nvgpu/os/linux/vgpu/sysfs_vgpu.c @@ -18,6 +18,8 @@ #include #include "os/linux/platform_gk20a.h" +#include "os/linux/os_linux.h" +#include "vgpu/ecc_vgpu.h" static ssize_t vgpu_load_show(struct device *dev, struct device_attribute *attr, @@ -38,13 +40,104 @@ static ssize_t vgpu_load_show(struct device *dev, } static DEVICE_ATTR(load, S_IRUGO, vgpu_load_show, NULL); +static ssize_t vgpu_ecc_stat_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct gk20a *g = get_gk20a(dev); + struct tegra_vgpu_cmd_msg msg = {0}; + struct tegra_vgpu_ecc_counter_params *p = &msg.params.ecc_counter; + struct dev_ext_attribute *ext_attr = container_of(attr, + struct dev_ext_attribute, attr); + struct vgpu_ecc_stat *ecc_stat = ext_attr->var; + int err; + + p->ecc_id = ecc_stat->ecc_id; + + msg.cmd = TEGRA_VGPU_CMD_GET_ECC_COUNTER_VALUE; + msg.handle = vgpu_get_handle(g); + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + err = err ? err : msg.ret; + if (unlikely(err)) { + nvgpu_err(g, "ecc: cannot get ECC counter value: %d", err); + return err; + } + + return snprintf(buf, PAGE_SIZE, "%u\n", p->value); +} + +static int vgpu_create_ecc_sysfs(struct device *dev) +{ + struct gk20a *g = get_gk20a(dev); + struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); + struct vgpu_priv_data *priv = vgpu_get_priv_data(g); + struct vgpu_ecc_stat *stats; + struct dev_ext_attribute *attrs; + int err, i, count; + + err = vgpu_ecc_get_info(g); + if (unlikely(err)) { + nvgpu_err(g, "ecc: cannot get ECC info: %d", err); + return err; + } + + stats = priv->ecc_stats; + count = priv->ecc_stats_count; + + attrs = nvgpu_kzalloc(g, count * sizeof(*attrs)); + if (unlikely(!attrs)) { + nvgpu_err(g, "ecc: no memory"); + vgpu_ecc_remove_info(g); + return -ENOMEM; + } + + for (i = 0; i < count; i++) { + sysfs_attr_init(&attrs[i].attr); + attrs[i].attr.attr.name = stats[i].name; + attrs[i].attr.attr.mode = VERIFY_OCTAL_PERMISSIONS(S_IRUGO); + attrs[i].attr.show = vgpu_ecc_stat_show; + attrs[i].attr.store = NULL; + attrs[i].var = &stats[i]; + + err = device_create_file(dev, &attrs[i].attr); + if (unlikely(err)) { + nvgpu_warn(g, "ecc: cannot create file \"%s\": %d", + stats[i].name, err); + } + } + + l->ecc_attrs = attrs; + return 0; +} + +static void vgpu_remove_ecc_sysfs(struct device *dev) +{ + struct gk20a *g = get_gk20a(dev); + struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); + struct vgpu_priv_data *priv = vgpu_get_priv_data(g); + int i; + + if (l->ecc_attrs) { + for (i = 0; i < priv->ecc_stats_count; i++) + device_remove_file(dev, &l->ecc_attrs[i].attr); + + nvgpu_kfree(g, l->ecc_attrs); + l->ecc_attrs = NULL; + } + + vgpu_ecc_remove_info(g); +} + void vgpu_create_sysfs(struct device *dev) { if (device_create_file(dev, &dev_attr_load)) dev_err(dev, "Failed to create vgpu sysfs attributes!\n"); + + vgpu_create_ecc_sysfs(dev); } void vgpu_remove_sysfs(struct device *dev) { device_remove_file(dev, &dev_attr_load); + vgpu_remove_ecc_sysfs(dev); } diff --git a/drivers/gpu/nvgpu/vgpu/ecc_vgpu.c b/drivers/gpu/nvgpu/vgpu/ecc_vgpu.c new file mode 100644 index 00000000..fa44e58f --- /dev/null +++ b/drivers/gpu/nvgpu/vgpu/ecc_vgpu.c @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include + +#include "vgpu/ecc_vgpu.h" + +int vgpu_ecc_get_info(struct gk20a *g) +{ + struct vgpu_priv_data *priv = vgpu_get_priv_data(g); + struct tegra_vgpu_cmd_msg msg = {0}; + struct tegra_vgpu_ecc_info_params *p = &msg.params.ecc_info; + struct tegra_vgpu_ecc_info_entry *entry; + struct vgpu_ecc_stat *stats; + void *handle; + int err, i, count; + size_t oob_size; + + msg.cmd = TEGRA_VGPU_CMD_GET_ECC_INFO; + msg.handle = vgpu_get_handle(g); + err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); + err = err ? err : msg.ret; + if (unlikely(err)) { + nvgpu_err(g, "vgpu get_ecc_info failed, err=%d", err); + return err; + } + + count = p->ecc_stats_count; + + handle = vgpu_ivc_oob_get_ptr(vgpu_ivc_get_server_vmid(), + TEGRA_VGPU_QUEUE_CMD, + (void **)&entry, &oob_size); + if (unlikely(!handle)) + return -EINVAL; + + if (unlikely(oob_size < count * sizeof(*entry))) { + err = -E2BIG; + goto out; + } + + stats = nvgpu_kzalloc(g, count * sizeof(*stats)); + if (unlikely(!stats)) { + err = -ENOMEM; + goto out; + } + + for (i = 0; i < count; i++) { + stats[i].ecc_id = entry[i].ecc_id; + strncpy(stats[i].name, entry[i].name, + NVGPU_ECC_STAT_NAME_MAX_SIZE); + } + + priv->ecc_stats = stats; + priv->ecc_stats_count = count; +out: + vgpu_ivc_oob_put_ptr(handle); + return err; +} + +void vgpu_ecc_remove_info(struct gk20a *g) +{ + struct vgpu_priv_data *priv = vgpu_get_priv_data(g); + + priv->ecc_stats_count = 0; + + if (priv->ecc_stats) { + nvgpu_kfree(g, priv->ecc_stats); + priv->ecc_stats = NULL; + } +} diff --git a/drivers/gpu/nvgpu/vgpu/ecc_vgpu.h b/drivers/gpu/nvgpu/vgpu/ecc_vgpu.h new file mode 100644 index 00000000..0fcaa3fa --- /dev/null +++ b/drivers/gpu/nvgpu/vgpu/ecc_vgpu.h @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef _ECC_VGPU_H_ +#define _ECC_VGPU_H_ + +#include +#include /* For NVGPU_ECC_STAT_NAME_MAX_SIZE */ + +struct gk20a; + +struct vgpu_ecc_stat { + u32 ecc_id; + char name[NVGPU_ECC_STAT_NAME_MAX_SIZE + 1]; +}; + +int vgpu_ecc_get_info(struct gk20a *g); +void vgpu_ecc_remove_info(struct gk20a *g); + +#endif -- cgit v1.2.2