summaryrefslogtreecommitdiffstats
path: root/drivers/gpu
diff options
context:
space:
mode:
authorKyle Guo <kyleg@nvidia.com>2018-07-12 20:51:42 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2018-08-15 00:24:53 -0400
commit2a25d03f2b7ad0700f14640282abd72ff587d800 (patch)
tree990b2f40d4625df7d5a9106d3c98dc0ccddf90ac /drivers/gpu
parent91390d857f6302f9c2923ec4188ea7e24ee537a2 (diff)
gpu: nvgpu: vgpu: ecc sysfs support for vgpu
- fetch ecc info from RM server and create sysfs nodes - new file ecc_vgpu.c for platform-independent code - add 2 new commands: GET_ECC_INFO and GET_ECC_COUNTER_VALUE JIRA EVLR-2590 Change-Id: I040a9fcd23326e432ca93e9a028319f9c1c570f0 Signed-off-by: Kyle Guo <kyleg@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1777428 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu')
-rw-r--r--drivers/gpu/nvgpu/Makefile1
-rw-r--r--drivers/gpu/nvgpu/include/nvgpu/vgpu/tegra_vgpu.h19
-rw-r--r--drivers/gpu/nvgpu/include/nvgpu/vgpu/vgpu.h3
-rw-r--r--drivers/gpu/nvgpu/os/linux/vgpu/sysfs_vgpu.c93
-rw-r--r--drivers/gpu/nvgpu/vgpu/ecc_vgpu.c92
-rw-r--r--drivers/gpu/nvgpu/vgpu/ecc_vgpu.h39
6 files changed, 247 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/Makefile b/drivers/gpu/nvgpu/Makefile
index 4ca4b6d8..7a21d294 100644
--- a/drivers/gpu/nvgpu/Makefile
+++ b/drivers/gpu/nvgpu/Makefile
@@ -245,6 +245,7 @@ nvgpu-$(CONFIG_TEGRA_GR_VIRTUALIZATION) += \
245 vgpu/dbg_vgpu.o \ 245 vgpu/dbg_vgpu.o \
246 vgpu/tsg_vgpu.o \ 246 vgpu/tsg_vgpu.o \
247 vgpu/css_vgpu.o \ 247 vgpu/css_vgpu.o \
248 vgpu/ecc_vgpu.o \
248 vgpu/gm20b/vgpu_gr_gm20b.o \ 249 vgpu/gm20b/vgpu_gr_gm20b.o \
249 vgpu/gp10b/vgpu_hal_gp10b.o \ 250 vgpu/gp10b/vgpu_hal_gp10b.o \
250 vgpu/gp10b/vgpu_gr_gp10b.o \ 251 vgpu/gp10b/vgpu_gr_gp10b.o \
diff --git a/drivers/gpu/nvgpu/include/nvgpu/vgpu/tegra_vgpu.h b/drivers/gpu/nvgpu/include/nvgpu/vgpu/tegra_vgpu.h
index 1eb26303..39d68dd1 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/vgpu/tegra_vgpu.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/vgpu/tegra_vgpu.h
@@ -26,6 +26,7 @@
26#define __TEGRA_VGPU_H 26#define __TEGRA_VGPU_H
27 27
28#include <nvgpu/types.h> 28#include <nvgpu/types.h>
29#include <nvgpu/ecc.h> /* For NVGPU_ECC_STAT_NAME_MAX_SIZE */
29 30
30enum { 31enum {
31 TEGRA_VGPU_MODULE_GPU = 0, 32 TEGRA_VGPU_MODULE_GPU = 0,
@@ -120,6 +121,8 @@ enum {
120 TEGRA_VGPU_CMD_UPDATE_PC_SAMPLING = 81, 121 TEGRA_VGPU_CMD_UPDATE_PC_SAMPLING = 81,
121 TEGRA_VGPU_CMD_SUSPEND = 82, 122 TEGRA_VGPU_CMD_SUSPEND = 82,
122 TEGRA_VGPU_CMD_RESUME = 83, 123 TEGRA_VGPU_CMD_RESUME = 83,
124 TEGRA_VGPU_CMD_GET_ECC_INFO = 84,
125 TEGRA_VGPU_CMD_GET_ECC_COUNTER_VALUE = 85,
123}; 126};
124 127
125struct tegra_vgpu_connect_params { 128struct tegra_vgpu_connect_params {
@@ -378,6 +381,20 @@ struct tegra_vgpu_channel_free_hwpm_ctx {
378 u64 handle; 381 u64 handle;
379}; 382};
380 383
384struct tegra_vgpu_ecc_info_params {
385 u32 ecc_stats_count;
386};
387
388struct tegra_vgpu_ecc_info_entry {
389 u32 ecc_id;
390 char name[NVGPU_ECC_STAT_NAME_MAX_SIZE];
391};
392
393struct tegra_vgpu_ecc_counter_params {
394 u32 ecc_id;
395 u32 value;
396};
397
381struct tegra_vgpu_gr_ctx_params { 398struct tegra_vgpu_gr_ctx_params {
382 u64 gr_ctx_handle; 399 u64 gr_ctx_handle;
383 u64 as_handle; 400 u64 as_handle;
@@ -659,6 +676,8 @@ struct tegra_vgpu_cmd_msg {
659 struct tegra_vgpu_map_syncpt_params map_syncpt; 676 struct tegra_vgpu_map_syncpt_params map_syncpt;
660 struct tegra_vgpu_tsg_bind_channel_ex_params tsg_bind_channel_ex; 677 struct tegra_vgpu_tsg_bind_channel_ex_params tsg_bind_channel_ex;
661 struct tegra_vgpu_channel_update_pc_sampling update_pc_sampling; 678 struct tegra_vgpu_channel_update_pc_sampling update_pc_sampling;
679 struct tegra_vgpu_ecc_info_params ecc_info;
680 struct tegra_vgpu_ecc_counter_params ecc_counter;
662 char padding[192]; 681 char padding[192];
663 } params; 682 } params;
664}; 683};
diff --git a/drivers/gpu/nvgpu/include/nvgpu/vgpu/vgpu.h b/drivers/gpu/nvgpu/include/nvgpu/vgpu/vgpu.h
index 1e851b8e..762bc229 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/vgpu/vgpu.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/vgpu/vgpu.h
@@ -37,11 +37,14 @@ struct gk20a;
37struct vm_gk20a; 37struct vm_gk20a;
38struct nvgpu_gr_ctx; 38struct nvgpu_gr_ctx;
39struct nvgpu_cpu_time_correlation_sample; 39struct nvgpu_cpu_time_correlation_sample;
40struct vgpu_ecc_stat;
40 41
41struct vgpu_priv_data { 42struct vgpu_priv_data {
42 u64 virt_handle; 43 u64 virt_handle;
43 struct nvgpu_thread intr_handler; 44 struct nvgpu_thread intr_handler;
44 struct tegra_vgpu_constants_params constants; 45 struct tegra_vgpu_constants_params constants;
46 struct vgpu_ecc_stat *ecc_stats;
47 int ecc_stats_count;
45}; 48};
46 49
47struct vgpu_priv_data *vgpu_get_priv_data(struct gk20a *g); 50struct vgpu_priv_data *vgpu_get_priv_data(struct gk20a *g);
diff --git a/drivers/gpu/nvgpu/os/linux/vgpu/sysfs_vgpu.c b/drivers/gpu/nvgpu/os/linux/vgpu/sysfs_vgpu.c
index 57aad4b4..30ca7f53 100644
--- a/drivers/gpu/nvgpu/os/linux/vgpu/sysfs_vgpu.c
+++ b/drivers/gpu/nvgpu/os/linux/vgpu/sysfs_vgpu.c
@@ -18,6 +18,8 @@
18#include <nvgpu/vgpu/vgpu.h> 18#include <nvgpu/vgpu/vgpu.h>
19 19
20#include "os/linux/platform_gk20a.h" 20#include "os/linux/platform_gk20a.h"
21#include "os/linux/os_linux.h"
22#include "vgpu/ecc_vgpu.h"
21 23
22static ssize_t vgpu_load_show(struct device *dev, 24static ssize_t vgpu_load_show(struct device *dev,
23 struct device_attribute *attr, 25 struct device_attribute *attr,
@@ -38,13 +40,104 @@ static ssize_t vgpu_load_show(struct device *dev,
38} 40}
39static DEVICE_ATTR(load, S_IRUGO, vgpu_load_show, NULL); 41static DEVICE_ATTR(load, S_IRUGO, vgpu_load_show, NULL);
40 42
43static ssize_t vgpu_ecc_stat_show(struct device *dev,
44 struct device_attribute *attr,
45 char *buf)
46{
47 struct gk20a *g = get_gk20a(dev);
48 struct tegra_vgpu_cmd_msg msg = {0};
49 struct tegra_vgpu_ecc_counter_params *p = &msg.params.ecc_counter;
50 struct dev_ext_attribute *ext_attr = container_of(attr,
51 struct dev_ext_attribute, attr);
52 struct vgpu_ecc_stat *ecc_stat = ext_attr->var;
53 int err;
54
55 p->ecc_id = ecc_stat->ecc_id;
56
57 msg.cmd = TEGRA_VGPU_CMD_GET_ECC_COUNTER_VALUE;
58 msg.handle = vgpu_get_handle(g);
59 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
60 err = err ? err : msg.ret;
61 if (unlikely(err)) {
62 nvgpu_err(g, "ecc: cannot get ECC counter value: %d", err);
63 return err;
64 }
65
66 return snprintf(buf, PAGE_SIZE, "%u\n", p->value);
67}
68
69static int vgpu_create_ecc_sysfs(struct device *dev)
70{
71 struct gk20a *g = get_gk20a(dev);
72 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
73 struct vgpu_priv_data *priv = vgpu_get_priv_data(g);
74 struct vgpu_ecc_stat *stats;
75 struct dev_ext_attribute *attrs;
76 int err, i, count;
77
78 err = vgpu_ecc_get_info(g);
79 if (unlikely(err)) {
80 nvgpu_err(g, "ecc: cannot get ECC info: %d", err);
81 return err;
82 }
83
84 stats = priv->ecc_stats;
85 count = priv->ecc_stats_count;
86
87 attrs = nvgpu_kzalloc(g, count * sizeof(*attrs));
88 if (unlikely(!attrs)) {
89 nvgpu_err(g, "ecc: no memory");
90 vgpu_ecc_remove_info(g);
91 return -ENOMEM;
92 }
93
94 for (i = 0; i < count; i++) {
95 sysfs_attr_init(&attrs[i].attr);
96 attrs[i].attr.attr.name = stats[i].name;
97 attrs[i].attr.attr.mode = VERIFY_OCTAL_PERMISSIONS(S_IRUGO);
98 attrs[i].attr.show = vgpu_ecc_stat_show;
99 attrs[i].attr.store = NULL;
100 attrs[i].var = &stats[i];
101
102 err = device_create_file(dev, &attrs[i].attr);
103 if (unlikely(err)) {
104 nvgpu_warn(g, "ecc: cannot create file \"%s\": %d",
105 stats[i].name, err);
106 }
107 }
108
109 l->ecc_attrs = attrs;
110 return 0;
111}
112
113static void vgpu_remove_ecc_sysfs(struct device *dev)
114{
115 struct gk20a *g = get_gk20a(dev);
116 struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
117 struct vgpu_priv_data *priv = vgpu_get_priv_data(g);
118 int i;
119
120 if (l->ecc_attrs) {
121 for (i = 0; i < priv->ecc_stats_count; i++)
122 device_remove_file(dev, &l->ecc_attrs[i].attr);
123
124 nvgpu_kfree(g, l->ecc_attrs);
125 l->ecc_attrs = NULL;
126 }
127
128 vgpu_ecc_remove_info(g);
129}
130
41void vgpu_create_sysfs(struct device *dev) 131void vgpu_create_sysfs(struct device *dev)
42{ 132{
43 if (device_create_file(dev, &dev_attr_load)) 133 if (device_create_file(dev, &dev_attr_load))
44 dev_err(dev, "Failed to create vgpu sysfs attributes!\n"); 134 dev_err(dev, "Failed to create vgpu sysfs attributes!\n");
135
136 vgpu_create_ecc_sysfs(dev);
45} 137}
46 138
47void vgpu_remove_sysfs(struct device *dev) 139void vgpu_remove_sysfs(struct device *dev)
48{ 140{
49 device_remove_file(dev, &dev_attr_load); 141 device_remove_file(dev, &dev_attr_load);
142 vgpu_remove_ecc_sysfs(dev);
50} 143}
diff --git a/drivers/gpu/nvgpu/vgpu/ecc_vgpu.c b/drivers/gpu/nvgpu/vgpu/ecc_vgpu.c
new file mode 100644
index 00000000..fa44e58f
--- /dev/null
+++ b/drivers/gpu/nvgpu/vgpu/ecc_vgpu.c
@@ -0,0 +1,92 @@
1/*
2 * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22
23#include <nvgpu/kmem.h>
24#include <nvgpu/vgpu/vgpu_ivc.h>
25#include <nvgpu/vgpu/vgpu.h>
26#include <nvgpu/errno.h>
27
28#include "vgpu/ecc_vgpu.h"
29
30int vgpu_ecc_get_info(struct gk20a *g)
31{
32 struct vgpu_priv_data *priv = vgpu_get_priv_data(g);
33 struct tegra_vgpu_cmd_msg msg = {0};
34 struct tegra_vgpu_ecc_info_params *p = &msg.params.ecc_info;
35 struct tegra_vgpu_ecc_info_entry *entry;
36 struct vgpu_ecc_stat *stats;
37 void *handle;
38 int err, i, count;
39 size_t oob_size;
40
41 msg.cmd = TEGRA_VGPU_CMD_GET_ECC_INFO;
42 msg.handle = vgpu_get_handle(g);
43 err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
44 err = err ? err : msg.ret;
45 if (unlikely(err)) {
46 nvgpu_err(g, "vgpu get_ecc_info failed, err=%d", err);
47 return err;
48 }
49
50 count = p->ecc_stats_count;
51
52 handle = vgpu_ivc_oob_get_ptr(vgpu_ivc_get_server_vmid(),
53 TEGRA_VGPU_QUEUE_CMD,
54 (void **)&entry, &oob_size);
55 if (unlikely(!handle))
56 return -EINVAL;
57
58 if (unlikely(oob_size < count * sizeof(*entry))) {
59 err = -E2BIG;
60 goto out;
61 }
62
63 stats = nvgpu_kzalloc(g, count * sizeof(*stats));
64 if (unlikely(!stats)) {
65 err = -ENOMEM;
66 goto out;
67 }
68
69 for (i = 0; i < count; i++) {
70 stats[i].ecc_id = entry[i].ecc_id;
71 strncpy(stats[i].name, entry[i].name,
72 NVGPU_ECC_STAT_NAME_MAX_SIZE);
73 }
74
75 priv->ecc_stats = stats;
76 priv->ecc_stats_count = count;
77out:
78 vgpu_ivc_oob_put_ptr(handle);
79 return err;
80}
81
82void vgpu_ecc_remove_info(struct gk20a *g)
83{
84 struct vgpu_priv_data *priv = vgpu_get_priv_data(g);
85
86 priv->ecc_stats_count = 0;
87
88 if (priv->ecc_stats) {
89 nvgpu_kfree(g, priv->ecc_stats);
90 priv->ecc_stats = NULL;
91 }
92}
diff --git a/drivers/gpu/nvgpu/vgpu/ecc_vgpu.h b/drivers/gpu/nvgpu/vgpu/ecc_vgpu.h
new file mode 100644
index 00000000..0fcaa3fa
--- /dev/null
+++ b/drivers/gpu/nvgpu/vgpu/ecc_vgpu.h
@@ -0,0 +1,39 @@
1/*
2 * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22
23#ifndef _ECC_VGPU_H_
24#define _ECC_VGPU_H_
25
26#include <nvgpu/types.h>
27#include <nvgpu/ecc.h> /* For NVGPU_ECC_STAT_NAME_MAX_SIZE */
28
29struct gk20a;
30
31struct vgpu_ecc_stat {
32 u32 ecc_id;
33 char name[NVGPU_ECC_STAT_NAME_MAX_SIZE + 1];
34};
35
36int vgpu_ecc_get_info(struct gk20a *g);
37void vgpu_ecc_remove_info(struct gk20a *g);
38
39#endif