summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
diff options
context:
space:
mode:
authorSami Kiminki <skiminki@nvidia.com>2016-11-15 14:03:14 -0500
committermobile promotions <svcmobile_promotions@nvidia.com>2016-12-26 03:03:59 -0500
commit425f99335bfa13fa2af4e0865f61a4eb29fad6be (patch)
treead5141bb2300c419a66290c92b3781d11b3016aa /drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
parent98e349ab7eb01ac27e1e18477674294ca80d2093 (diff)
gpu: nvgpu: gk20a: Allow regops lists longer than 128
Process long regops lists in 4-kB fragments, overcoming the overly low limit of 128 reg ops per IOCTL call. Bump the list limit to 1024 and report the limit in GPU characteristics. Bug 200248726 Change-Id: I3ad49139409f32aea8b1226d6562e88edccc8053 Signed-off-by: Sami Kiminki <skiminki@nvidia.com> Reviewed-on: http://git-master/r/1253716 (cherry picked from commit 22314619b28f52610cb8769cd4c3f9eb01904eab) Reviewed-on: http://git-master/r/1266652 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c90
1 files changed, 56 insertions, 34 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
index f86a7377..e5529295 100644
--- a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
@@ -1042,14 +1042,24 @@ static int nvgpu_ioctl_channel_reg_ops(struct dbg_session_gk20a *dbg_s,
1042 1042
1043 struct device *dev = dbg_s->dev; 1043 struct device *dev = dbg_s->dev;
1044 struct gk20a *g = get_gk20a(dbg_s->dev); 1044 struct gk20a *g = get_gk20a(dbg_s->dev);
1045 struct nvgpu_dbg_gpu_reg_op *ops;
1046 struct channel_gk20a *ch; 1045 struct channel_gk20a *ch;
1047 u64 ops_size = sizeof(ops[0]) * args->num_ops;
1048 1046
1049 if (args->num_ops > SZ_4K / sizeof(ops[0])) 1047 gk20a_dbg_fn("%d ops, max fragment %d", args->num_ops, g->dbg_regops_tmp_buf_ops);
1048
1049 if (args->num_ops > g->gpu_characteristics.reg_ops_limit) {
1050 gk20a_err(dev, "regops limit exceeded");
1050 return -EINVAL; 1051 return -EINVAL;
1052 }
1051 1053
1052 gk20a_dbg_fn("%d ops, total size %llu", args->num_ops, ops_size); 1054 if (args->num_ops == 0) {
1055 /* Nothing to do */
1056 return 0;
1057 }
1058
1059 if (g->dbg_regops_tmp_buf_ops == 0 || !g->dbg_regops_tmp_buf) {
1060 gk20a_err(dev, "reg ops work buffer not allocated");
1061 return -ENODEV;
1062 }
1053 1063
1054 if (!dbg_s->id) { 1064 if (!dbg_s->id) {
1055 gk20a_err(dev, "can't call reg_ops on an unbound debugger session"); 1065 gk20a_err(dev, "can't call reg_ops on an unbound debugger session");
@@ -1069,21 +1079,6 @@ static int nvgpu_ioctl_channel_reg_ops(struct dbg_session_gk20a *dbg_s,
1069 return -ENODEV; 1079 return -ENODEV;
1070 } 1080 }
1071 1081
1072 ops = kzalloc(ops_size, GFP_KERNEL);
1073 if (!ops) {
1074 gk20a_err(dev, "Allocating memory failed!");
1075 return -ENOMEM;
1076 }
1077
1078 gk20a_dbg_fn("Copying regops from userspace");
1079
1080 if (copy_from_user(ops, (void __user *)(uintptr_t)args->ops,
1081 ops_size)) {
1082 dev_err(dev, "copy_from_user failed!");
1083 err = -EFAULT;
1084 goto clean_up;
1085 }
1086
1087 /* since exec_reg_ops sends methods to the ucode, it must take the 1082 /* since exec_reg_ops sends methods to the ucode, it must take the
1088 * global gpu lock to protect against mixing methods from debug sessions 1083 * global gpu lock to protect against mixing methods from debug sessions
1089 * on other channels */ 1084 * on other channels */
@@ -1099,8 +1094,47 @@ static int nvgpu_ioctl_channel_reg_ops(struct dbg_session_gk20a *dbg_s,
1099 } 1094 }
1100 1095
1101 if (!powergate_err) { 1096 if (!powergate_err) {
1102 err = g->ops.dbg_session_ops.exec_reg_ops(dbg_s, ops, 1097 u64 ops_offset = 0; /* index offset */
1103 args->num_ops); 1098
1099 while (ops_offset < args->num_ops && !err) {
1100 const u64 num_ops =
1101 min(args->num_ops - ops_offset,
1102 (u64)(g->dbg_regops_tmp_buf_ops));
1103 const u64 fragment_size =
1104 num_ops * sizeof(g->dbg_regops_tmp_buf[0]);
1105
1106 void __user *const fragment =
1107 (void __user *)(uintptr_t)
1108 (args->ops +
1109 ops_offset * sizeof(g->dbg_regops_tmp_buf[0]));
1110
1111 gk20a_dbg_fn("Regops fragment: start_op=%llu ops=%llu",
1112 ops_offset, num_ops);
1113
1114 gk20a_dbg_fn("Copying regops from userspace");
1115
1116 if (copy_from_user(g->dbg_regops_tmp_buf,
1117 fragment, fragment_size)) {
1118 dev_err(dev, "copy_from_user failed!");
1119 err = -EFAULT;
1120 break;
1121 }
1122
1123 err = g->ops.dbg_session_ops.exec_reg_ops(
1124 dbg_s, g->dbg_regops_tmp_buf, num_ops);
1125
1126 gk20a_dbg_fn("Copying result to userspace");
1127
1128 if (copy_to_user(fragment, g->dbg_regops_tmp_buf,
1129 fragment_size)) {
1130 dev_err(dev, "copy_to_user failed!");
1131 err = -EFAULT;
1132 break;
1133 }
1134
1135 ops_offset += num_ops;
1136 }
1137
1104 /* enable powergate, if previously disabled */ 1138 /* enable powergate, if previously disabled */
1105 if (is_pg_disabled) { 1139 if (is_pg_disabled) {
1106 powergate_err = 1140 powergate_err =
@@ -1114,21 +1148,9 @@ static int nvgpu_ioctl_channel_reg_ops(struct dbg_session_gk20a *dbg_s,
1114 if (!err && powergate_err) 1148 if (!err && powergate_err)
1115 err = powergate_err; 1149 err = powergate_err;
1116 1150
1117 if (err) { 1151 if (err)
1118 gk20a_err(dev, "dbg regops failed"); 1152 gk20a_err(dev, "dbg regops failed");
1119 goto clean_up;
1120 }
1121
1122 gk20a_dbg_fn("Copying result to userspace");
1123 1153
1124 if (copy_to_user((void __user *)(uintptr_t)args->ops, ops, ops_size)) {
1125 dev_err(dev, "copy_to_user failed!");
1126 err = -EFAULT;
1127 goto clean_up;
1128 }
1129
1130 clean_up:
1131 kfree(ops);
1132 return err; 1154 return err;
1133} 1155}
1134 1156