summaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorSami Kiminki <skiminki@nvidia.com>2016-11-15 14:03:14 -0500
committermobile promotions <svcmobile_promotions@nvidia.com>2016-12-26 03:03:59 -0500
commit425f99335bfa13fa2af4e0865f61a4eb29fad6be (patch)
treead5141bb2300c419a66290c92b3781d11b3016aa /drivers
parent98e349ab7eb01ac27e1e18477674294ca80d2093 (diff)
gpu: nvgpu: gk20a: Allow regops lists longer than 128
Process long regops lists in 4-kB fragments, overcoming the overly low limit of 128 reg ops per IOCTL call. Bump the list limit to 1024 and report the limit in GPU characteristics. Bug 200248726 Change-Id: I3ad49139409f32aea8b1226d6562e88edccc8053 Signed-off-by: Sami Kiminki <skiminki@nvidia.com> Reviewed-on: http://git-master/r/1253716 (cherry picked from commit 22314619b28f52610cb8769cd4c3f9eb01904eab) Reviewed-on: http://git-master/r/1266652 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers')
-rw-r--r--drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c90
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.c4
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.h4
-rw-r--r--drivers/gpu/nvgpu/nvgpu_common.c8
-rw-r--r--drivers/gpu/nvgpu/vgpu/vgpu.c11
5 files changed, 83 insertions, 34 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
index f86a7377..e5529295 100644
--- a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
@@ -1042,14 +1042,24 @@ static int nvgpu_ioctl_channel_reg_ops(struct dbg_session_gk20a *dbg_s,
1042 1042
1043 struct device *dev = dbg_s->dev; 1043 struct device *dev = dbg_s->dev;
1044 struct gk20a *g = get_gk20a(dbg_s->dev); 1044 struct gk20a *g = get_gk20a(dbg_s->dev);
1045 struct nvgpu_dbg_gpu_reg_op *ops;
1046 struct channel_gk20a *ch; 1045 struct channel_gk20a *ch;
1047 u64 ops_size = sizeof(ops[0]) * args->num_ops;
1048 1046
1049 if (args->num_ops > SZ_4K / sizeof(ops[0])) 1047 gk20a_dbg_fn("%d ops, max fragment %d", args->num_ops, g->dbg_regops_tmp_buf_ops);
1048
1049 if (args->num_ops > g->gpu_characteristics.reg_ops_limit) {
1050 gk20a_err(dev, "regops limit exceeded");
1050 return -EINVAL; 1051 return -EINVAL;
1052 }
1051 1053
1052 gk20a_dbg_fn("%d ops, total size %llu", args->num_ops, ops_size); 1054 if (args->num_ops == 0) {
1055 /* Nothing to do */
1056 return 0;
1057 }
1058
1059 if (g->dbg_regops_tmp_buf_ops == 0 || !g->dbg_regops_tmp_buf) {
1060 gk20a_err(dev, "reg ops work buffer not allocated");
1061 return -ENODEV;
1062 }
1053 1063
1054 if (!dbg_s->id) { 1064 if (!dbg_s->id) {
1055 gk20a_err(dev, "can't call reg_ops on an unbound debugger session"); 1065 gk20a_err(dev, "can't call reg_ops on an unbound debugger session");
@@ -1069,21 +1079,6 @@ static int nvgpu_ioctl_channel_reg_ops(struct dbg_session_gk20a *dbg_s,
1069 return -ENODEV; 1079 return -ENODEV;
1070 } 1080 }
1071 1081
1072 ops = kzalloc(ops_size, GFP_KERNEL);
1073 if (!ops) {
1074 gk20a_err(dev, "Allocating memory failed!");
1075 return -ENOMEM;
1076 }
1077
1078 gk20a_dbg_fn("Copying regops from userspace");
1079
1080 if (copy_from_user(ops, (void __user *)(uintptr_t)args->ops,
1081 ops_size)) {
1082 dev_err(dev, "copy_from_user failed!");
1083 err = -EFAULT;
1084 goto clean_up;
1085 }
1086
1087 /* since exec_reg_ops sends methods to the ucode, it must take the 1082 /* since exec_reg_ops sends methods to the ucode, it must take the
1088 * global gpu lock to protect against mixing methods from debug sessions 1083 * global gpu lock to protect against mixing methods from debug sessions
1089 * on other channels */ 1084 * on other channels */
@@ -1099,8 +1094,47 @@ static int nvgpu_ioctl_channel_reg_ops(struct dbg_session_gk20a *dbg_s,
1099 } 1094 }
1100 1095
1101 if (!powergate_err) { 1096 if (!powergate_err) {
1102 err = g->ops.dbg_session_ops.exec_reg_ops(dbg_s, ops, 1097 u64 ops_offset = 0; /* index offset */
1103 args->num_ops); 1098
1099 while (ops_offset < args->num_ops && !err) {
1100 const u64 num_ops =
1101 min(args->num_ops - ops_offset,
1102 (u64)(g->dbg_regops_tmp_buf_ops));
1103 const u64 fragment_size =
1104 num_ops * sizeof(g->dbg_regops_tmp_buf[0]);
1105
1106 void __user *const fragment =
1107 (void __user *)(uintptr_t)
1108 (args->ops +
1109 ops_offset * sizeof(g->dbg_regops_tmp_buf[0]));
1110
1111 gk20a_dbg_fn("Regops fragment: start_op=%llu ops=%llu",
1112 ops_offset, num_ops);
1113
1114 gk20a_dbg_fn("Copying regops from userspace");
1115
1116 if (copy_from_user(g->dbg_regops_tmp_buf,
1117 fragment, fragment_size)) {
1118 dev_err(dev, "copy_from_user failed!");
1119 err = -EFAULT;
1120 break;
1121 }
1122
1123 err = g->ops.dbg_session_ops.exec_reg_ops(
1124 dbg_s, g->dbg_regops_tmp_buf, num_ops);
1125
1126 gk20a_dbg_fn("Copying result to userspace");
1127
1128 if (copy_to_user(fragment, g->dbg_regops_tmp_buf,
1129 fragment_size)) {
1130 dev_err(dev, "copy_to_user failed!");
1131 err = -EFAULT;
1132 break;
1133 }
1134
1135 ops_offset += num_ops;
1136 }
1137
1104 /* enable powergate, if previously disabled */ 1138 /* enable powergate, if previously disabled */
1105 if (is_pg_disabled) { 1139 if (is_pg_disabled) {
1106 powergate_err = 1140 powergate_err =
@@ -1114,21 +1148,9 @@ static int nvgpu_ioctl_channel_reg_ops(struct dbg_session_gk20a *dbg_s,
1114 if (!err && powergate_err) 1148 if (!err && powergate_err)
1115 err = powergate_err; 1149 err = powergate_err;
1116 1150
1117 if (err) { 1151 if (err)
1118 gk20a_err(dev, "dbg regops failed"); 1152 gk20a_err(dev, "dbg regops failed");
1119 goto clean_up;
1120 }
1121
1122 gk20a_dbg_fn("Copying result to userspace");
1123 1153
1124 if (copy_to_user((void __user *)(uintptr_t)args->ops, ops, ops_size)) {
1125 dev_err(dev, "copy_to_user failed!");
1126 err = -EFAULT;
1127 goto clean_up;
1128 }
1129
1130 clean_up:
1131 kfree(ops);
1132 return err; 1154 return err;
1133} 1155}
1134 1156
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c
index e314d6cd..ca2f7b33 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.c
@@ -700,6 +700,8 @@ void gk20a_remove_support(struct device *dev)
700#ifdef CONFIG_TEGRA_COMMON 700#ifdef CONFIG_TEGRA_COMMON
701 tegra_unregister_idle_unidle(); 701 tegra_unregister_idle_unidle();
702#endif 702#endif
703 if (g->dbg_regops_tmp_buf)
704 kfree(g->dbg_regops_tmp_buf);
703 705
704 if (g->pmu.remove_support) 706 if (g->pmu.remove_support)
705 g->pmu.remove_support(&g->pmu); 707 g->pmu.remove_support(&g->pmu);
@@ -2170,6 +2172,8 @@ int gk20a_init_gpu_characteristics(struct gk20a *g)
2170 gpu->pci_class = g->pci_class; 2172 gpu->pci_class = g->pci_class;
2171 gpu->pci_revision = g->pci_revision; 2173 gpu->pci_revision = g->pci_revision;
2172 2174
2175 gpu->reg_ops_limit = 1024;
2176
2173 return 0; 2177 return 0;
2174} 2178}
2175 2179
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index d444447d..d219b815 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -890,6 +890,10 @@ struct gk20a {
890 int dbg_powergating_disabled_refcount; /*refcount for pg disable */ 890 int dbg_powergating_disabled_refcount; /*refcount for pg disable */
891 int dbg_timeout_disabled_refcount; /*refcount for timeout disable */ 891 int dbg_timeout_disabled_refcount; /*refcount for timeout disable */
892 892
893 /* must have dbg_sessions_lock before use */
894 struct nvgpu_dbg_gpu_reg_op *dbg_regops_tmp_buf;
895 u32 dbg_regops_tmp_buf_ops;
896
893 /* 897 /*
894 * When set subsequent VMAs will separate fixed and non-fixed 898 * When set subsequent VMAs will separate fixed and non-fixed
895 * allocations. This avoids conflicts with fixed and non-fixed allocs 899 * allocations. This avoids conflicts with fixed and non-fixed allocs
diff --git a/drivers/gpu/nvgpu/nvgpu_common.c b/drivers/gpu/nvgpu/nvgpu_common.c
index 179464d8..a1f4832b 100644
--- a/drivers/gpu/nvgpu/nvgpu_common.c
+++ b/drivers/gpu/nvgpu/nvgpu_common.c
@@ -155,6 +155,14 @@ int nvgpu_probe(struct gk20a *g,
155 gk20a_create_sysfs(g->dev); 155 gk20a_create_sysfs(g->dev);
156 gk20a_debug_init(g->dev, debugfs_symlink); 156 gk20a_debug_init(g->dev, debugfs_symlink);
157 157
158 g->dbg_regops_tmp_buf = kzalloc(SZ_4K, GFP_KERNEL);
159 if (!g->dbg_regops_tmp_buf) {
160 dev_err(g->dev, "couldn't allocate regops tmp buf");
161 return -ENOMEM;
162 }
163 g->dbg_regops_tmp_buf_ops =
164 SZ_4K / sizeof(g->dbg_regops_tmp_buf[0]);
165
158 g->remove_support = gk20a_remove_support; 166 g->remove_support = gk20a_remove_support;
159 167
160 return 0; 168 return 0;
diff --git a/drivers/gpu/nvgpu/vgpu/vgpu.c b/drivers/gpu/nvgpu/vgpu/vgpu.c
index bd332583..213f6bbb 100644
--- a/drivers/gpu/nvgpu/vgpu/vgpu.c
+++ b/drivers/gpu/nvgpu/vgpu/vgpu.c
@@ -191,6 +191,9 @@ static void vgpu_remove_support(struct device *dev)
191 struct tegra_vgpu_intr_msg msg; 191 struct tegra_vgpu_intr_msg msg;
192 int err; 192 int err;
193 193
194 if (g->dbg_regops_tmp_buf)
195 kfree(g->dbg_regops_tmp_buf);
196
194 if (g->pmu.remove_support) 197 if (g->pmu.remove_support)
195 g->pmu.remove_support(&g->pmu); 198 g->pmu.remove_support(&g->pmu);
196 199
@@ -242,6 +245,14 @@ static int vgpu_init_support(struct platform_device *pdev)
242 mutex_init(&g->client_lock); 245 mutex_init(&g->client_lock);
243 mutex_init(&g->ch_wdt_lock); 246 mutex_init(&g->ch_wdt_lock);
244 247
248 g->dbg_regops_tmp_buf = kzalloc(SZ_4K, GFP_KERNEL);
249 if (!g->dbg_regops_tmp_buf) {
250 dev_err(g->dev, "couldn't allocate regops tmp buf");
251 return -ENOMEM;
252 }
253 g->dbg_regops_tmp_buf_ops =
254 SZ_4K / sizeof(g->dbg_regops_tmp_buf[0]);
255
245 g->remove_support = vgpu_remove_support; 256 g->remove_support = vgpu_remove_support;
246 return 0; 257 return 0;
247 258