diff options
author | Sami Kiminki <skiminki@nvidia.com> | 2016-11-15 14:03:14 -0500 |
---|---|---|
committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2016-12-26 03:03:59 -0500 |
commit | 425f99335bfa13fa2af4e0865f61a4eb29fad6be (patch) | |
tree | ad5141bb2300c419a66290c92b3781d11b3016aa | |
parent | 98e349ab7eb01ac27e1e18477674294ca80d2093 (diff) |
gpu: nvgpu: gk20a: Allow regops lists longer than 128
Process long regops lists in 4-kB fragments, overcoming the overly
low limit of 128 reg ops per IOCTL call. Bump the list limit to 1024
and report the limit in GPU characteristics.
Bug 200248726
Change-Id: I3ad49139409f32aea8b1226d6562e88edccc8053
Signed-off-by: Sami Kiminki <skiminki@nvidia.com>
Reviewed-on: http://git-master/r/1253716
(cherry picked from commit 22314619b28f52610cb8769cd4c3f9eb01904eab)
Reviewed-on: http://git-master/r/1266652
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c | 90 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gk20a.c | 4 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gk20a.h | 4 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/nvgpu_common.c | 8 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/vgpu/vgpu.c | 11 | ||||
-rw-r--r-- | include/uapi/linux/nvgpu.h | 5 |
6 files changed, 88 insertions, 34 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c index f86a7377..e5529295 100644 --- a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c | |||
@@ -1042,14 +1042,24 @@ static int nvgpu_ioctl_channel_reg_ops(struct dbg_session_gk20a *dbg_s, | |||
1042 | 1042 | ||
1043 | struct device *dev = dbg_s->dev; | 1043 | struct device *dev = dbg_s->dev; |
1044 | struct gk20a *g = get_gk20a(dbg_s->dev); | 1044 | struct gk20a *g = get_gk20a(dbg_s->dev); |
1045 | struct nvgpu_dbg_gpu_reg_op *ops; | ||
1046 | struct channel_gk20a *ch; | 1045 | struct channel_gk20a *ch; |
1047 | u64 ops_size = sizeof(ops[0]) * args->num_ops; | ||
1048 | 1046 | ||
1049 | if (args->num_ops > SZ_4K / sizeof(ops[0])) | 1047 | gk20a_dbg_fn("%d ops, max fragment %d", args->num_ops, g->dbg_regops_tmp_buf_ops); |
1048 | |||
1049 | if (args->num_ops > g->gpu_characteristics.reg_ops_limit) { | ||
1050 | gk20a_err(dev, "regops limit exceeded"); | ||
1050 | return -EINVAL; | 1051 | return -EINVAL; |
1052 | } | ||
1051 | 1053 | ||
1052 | gk20a_dbg_fn("%d ops, total size %llu", args->num_ops, ops_size); | 1054 | if (args->num_ops == 0) { |
1055 | /* Nothing to do */ | ||
1056 | return 0; | ||
1057 | } | ||
1058 | |||
1059 | if (g->dbg_regops_tmp_buf_ops == 0 || !g->dbg_regops_tmp_buf) { | ||
1060 | gk20a_err(dev, "reg ops work buffer not allocated"); | ||
1061 | return -ENODEV; | ||
1062 | } | ||
1053 | 1063 | ||
1054 | if (!dbg_s->id) { | 1064 | if (!dbg_s->id) { |
1055 | gk20a_err(dev, "can't call reg_ops on an unbound debugger session"); | 1065 | gk20a_err(dev, "can't call reg_ops on an unbound debugger session"); |
@@ -1069,21 +1079,6 @@ static int nvgpu_ioctl_channel_reg_ops(struct dbg_session_gk20a *dbg_s, | |||
1069 | return -ENODEV; | 1079 | return -ENODEV; |
1070 | } | 1080 | } |
1071 | 1081 | ||
1072 | ops = kzalloc(ops_size, GFP_KERNEL); | ||
1073 | if (!ops) { | ||
1074 | gk20a_err(dev, "Allocating memory failed!"); | ||
1075 | return -ENOMEM; | ||
1076 | } | ||
1077 | |||
1078 | gk20a_dbg_fn("Copying regops from userspace"); | ||
1079 | |||
1080 | if (copy_from_user(ops, (void __user *)(uintptr_t)args->ops, | ||
1081 | ops_size)) { | ||
1082 | dev_err(dev, "copy_from_user failed!"); | ||
1083 | err = -EFAULT; | ||
1084 | goto clean_up; | ||
1085 | } | ||
1086 | |||
1087 | /* since exec_reg_ops sends methods to the ucode, it must take the | 1082 | /* since exec_reg_ops sends methods to the ucode, it must take the |
1088 | * global gpu lock to protect against mixing methods from debug sessions | 1083 | * global gpu lock to protect against mixing methods from debug sessions |
1089 | * on other channels */ | 1084 | * on other channels */ |
@@ -1099,8 +1094,47 @@ static int nvgpu_ioctl_channel_reg_ops(struct dbg_session_gk20a *dbg_s, | |||
1099 | } | 1094 | } |
1100 | 1095 | ||
1101 | if (!powergate_err) { | 1096 | if (!powergate_err) { |
1102 | err = g->ops.dbg_session_ops.exec_reg_ops(dbg_s, ops, | 1097 | u64 ops_offset = 0; /* index offset */ |
1103 | args->num_ops); | 1098 | |
1099 | while (ops_offset < args->num_ops && !err) { | ||
1100 | const u64 num_ops = | ||
1101 | min(args->num_ops - ops_offset, | ||
1102 | (u64)(g->dbg_regops_tmp_buf_ops)); | ||
1103 | const u64 fragment_size = | ||
1104 | num_ops * sizeof(g->dbg_regops_tmp_buf[0]); | ||
1105 | |||
1106 | void __user *const fragment = | ||
1107 | (void __user *)(uintptr_t) | ||
1108 | (args->ops + | ||
1109 | ops_offset * sizeof(g->dbg_regops_tmp_buf[0])); | ||
1110 | |||
1111 | gk20a_dbg_fn("Regops fragment: start_op=%llu ops=%llu", | ||
1112 | ops_offset, num_ops); | ||
1113 | |||
1114 | gk20a_dbg_fn("Copying regops from userspace"); | ||
1115 | |||
1116 | if (copy_from_user(g->dbg_regops_tmp_buf, | ||
1117 | fragment, fragment_size)) { | ||
1118 | dev_err(dev, "copy_from_user failed!"); | ||
1119 | err = -EFAULT; | ||
1120 | break; | ||
1121 | } | ||
1122 | |||
1123 | err = g->ops.dbg_session_ops.exec_reg_ops( | ||
1124 | dbg_s, g->dbg_regops_tmp_buf, num_ops); | ||
1125 | |||
1126 | gk20a_dbg_fn("Copying result to userspace"); | ||
1127 | |||
1128 | if (copy_to_user(fragment, g->dbg_regops_tmp_buf, | ||
1129 | fragment_size)) { | ||
1130 | dev_err(dev, "copy_to_user failed!"); | ||
1131 | err = -EFAULT; | ||
1132 | break; | ||
1133 | } | ||
1134 | |||
1135 | ops_offset += num_ops; | ||
1136 | } | ||
1137 | |||
1104 | /* enable powergate, if previously disabled */ | 1138 | /* enable powergate, if previously disabled */ |
1105 | if (is_pg_disabled) { | 1139 | if (is_pg_disabled) { |
1106 | powergate_err = | 1140 | powergate_err = |
@@ -1114,21 +1148,9 @@ static int nvgpu_ioctl_channel_reg_ops(struct dbg_session_gk20a *dbg_s, | |||
1114 | if (!err && powergate_err) | 1148 | if (!err && powergate_err) |
1115 | err = powergate_err; | 1149 | err = powergate_err; |
1116 | 1150 | ||
1117 | if (err) { | 1151 | if (err) |
1118 | gk20a_err(dev, "dbg regops failed"); | 1152 | gk20a_err(dev, "dbg regops failed"); |
1119 | goto clean_up; | ||
1120 | } | ||
1121 | |||
1122 | gk20a_dbg_fn("Copying result to userspace"); | ||
1123 | 1153 | ||
1124 | if (copy_to_user((void __user *)(uintptr_t)args->ops, ops, ops_size)) { | ||
1125 | dev_err(dev, "copy_to_user failed!"); | ||
1126 | err = -EFAULT; | ||
1127 | goto clean_up; | ||
1128 | } | ||
1129 | |||
1130 | clean_up: | ||
1131 | kfree(ops); | ||
1132 | return err; | 1154 | return err; |
1133 | } | 1155 | } |
1134 | 1156 | ||
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c index e314d6cd..ca2f7b33 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gk20a.c | |||
@@ -700,6 +700,8 @@ void gk20a_remove_support(struct device *dev) | |||
700 | #ifdef CONFIG_TEGRA_COMMON | 700 | #ifdef CONFIG_TEGRA_COMMON |
701 | tegra_unregister_idle_unidle(); | 701 | tegra_unregister_idle_unidle(); |
702 | #endif | 702 | #endif |
703 | if (g->dbg_regops_tmp_buf) | ||
704 | kfree(g->dbg_regops_tmp_buf); | ||
703 | 705 | ||
704 | if (g->pmu.remove_support) | 706 | if (g->pmu.remove_support) |
705 | g->pmu.remove_support(&g->pmu); | 707 | g->pmu.remove_support(&g->pmu); |
@@ -2170,6 +2172,8 @@ int gk20a_init_gpu_characteristics(struct gk20a *g) | |||
2170 | gpu->pci_class = g->pci_class; | 2172 | gpu->pci_class = g->pci_class; |
2171 | gpu->pci_revision = g->pci_revision; | 2173 | gpu->pci_revision = g->pci_revision; |
2172 | 2174 | ||
2175 | gpu->reg_ops_limit = 1024; | ||
2176 | |||
2173 | return 0; | 2177 | return 0; |
2174 | } | 2178 | } |
2175 | 2179 | ||
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index d444447d..d219b815 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h | |||
@@ -890,6 +890,10 @@ struct gk20a { | |||
890 | int dbg_powergating_disabled_refcount; /*refcount for pg disable */ | 890 | int dbg_powergating_disabled_refcount; /*refcount for pg disable */ |
891 | int dbg_timeout_disabled_refcount; /*refcount for timeout disable */ | 891 | int dbg_timeout_disabled_refcount; /*refcount for timeout disable */ |
892 | 892 | ||
893 | /* must have dbg_sessions_lock before use */ | ||
894 | struct nvgpu_dbg_gpu_reg_op *dbg_regops_tmp_buf; | ||
895 | u32 dbg_regops_tmp_buf_ops; | ||
896 | |||
893 | /* | 897 | /* |
894 | * When set subsequent VMAs will separate fixed and non-fixed | 898 | * When set subsequent VMAs will separate fixed and non-fixed |
895 | * allocations. This avoids conflicts with fixed and non-fixed allocs | 899 | * allocations. This avoids conflicts with fixed and non-fixed allocs |
diff --git a/drivers/gpu/nvgpu/nvgpu_common.c b/drivers/gpu/nvgpu/nvgpu_common.c index 179464d8..a1f4832b 100644 --- a/drivers/gpu/nvgpu/nvgpu_common.c +++ b/drivers/gpu/nvgpu/nvgpu_common.c | |||
@@ -155,6 +155,14 @@ int nvgpu_probe(struct gk20a *g, | |||
155 | gk20a_create_sysfs(g->dev); | 155 | gk20a_create_sysfs(g->dev); |
156 | gk20a_debug_init(g->dev, debugfs_symlink); | 156 | gk20a_debug_init(g->dev, debugfs_symlink); |
157 | 157 | ||
158 | g->dbg_regops_tmp_buf = kzalloc(SZ_4K, GFP_KERNEL); | ||
159 | if (!g->dbg_regops_tmp_buf) { | ||
160 | dev_err(g->dev, "couldn't allocate regops tmp buf"); | ||
161 | return -ENOMEM; | ||
162 | } | ||
163 | g->dbg_regops_tmp_buf_ops = | ||
164 | SZ_4K / sizeof(g->dbg_regops_tmp_buf[0]); | ||
165 | |||
158 | g->remove_support = gk20a_remove_support; | 166 | g->remove_support = gk20a_remove_support; |
159 | 167 | ||
160 | return 0; | 168 | return 0; |
diff --git a/drivers/gpu/nvgpu/vgpu/vgpu.c b/drivers/gpu/nvgpu/vgpu/vgpu.c index bd332583..213f6bbb 100644 --- a/drivers/gpu/nvgpu/vgpu/vgpu.c +++ b/drivers/gpu/nvgpu/vgpu/vgpu.c | |||
@@ -191,6 +191,9 @@ static void vgpu_remove_support(struct device *dev) | |||
191 | struct tegra_vgpu_intr_msg msg; | 191 | struct tegra_vgpu_intr_msg msg; |
192 | int err; | 192 | int err; |
193 | 193 | ||
194 | if (g->dbg_regops_tmp_buf) | ||
195 | kfree(g->dbg_regops_tmp_buf); | ||
196 | |||
194 | if (g->pmu.remove_support) | 197 | if (g->pmu.remove_support) |
195 | g->pmu.remove_support(&g->pmu); | 198 | g->pmu.remove_support(&g->pmu); |
196 | 199 | ||
@@ -242,6 +245,14 @@ static int vgpu_init_support(struct platform_device *pdev) | |||
242 | mutex_init(&g->client_lock); | 245 | mutex_init(&g->client_lock); |
243 | mutex_init(&g->ch_wdt_lock); | 246 | mutex_init(&g->ch_wdt_lock); |
244 | 247 | ||
248 | g->dbg_regops_tmp_buf = kzalloc(SZ_4K, GFP_KERNEL); | ||
249 | if (!g->dbg_regops_tmp_buf) { | ||
250 | dev_err(g->dev, "couldn't allocate regops tmp buf"); | ||
251 | return -ENOMEM; | ||
252 | } | ||
253 | g->dbg_regops_tmp_buf_ops = | ||
254 | SZ_4K / sizeof(g->dbg_regops_tmp_buf[0]); | ||
255 | |||
245 | g->remove_support = vgpu_remove_support; | 256 | g->remove_support = vgpu_remove_support; |
246 | return 0; | 257 | return 0; |
247 | 258 | ||
diff --git a/include/uapi/linux/nvgpu.h b/include/uapi/linux/nvgpu.h index 052bebc8..e844a205 100644 --- a/include/uapi/linux/nvgpu.h +++ b/include/uapi/linux/nvgpu.h | |||
@@ -220,6 +220,11 @@ struct nvgpu_gpu_characteristics { | |||
220 | __u8 vbios_oem_version; | 220 | __u8 vbios_oem_version; |
221 | __u32 vbios_version; | 221 | __u32 vbios_version; |
222 | 222 | ||
223 | /* NVGPU_DBG_GPU_IOCTL_REG_OPS: the upper limit for the number | ||
224 | * of regops */ | ||
225 | __u32 reg_ops_limit; | ||
226 | __u32 reserved1; | ||
227 | |||
223 | /* Notes: | 228 | /* Notes: |
224 | - This struct can be safely appended with new fields. However, always | 229 | - This struct can be safely appended with new fields. However, always |
225 | keep the structure size multiple of 8 and make sure that the binary | 230 | keep the structure size multiple of 8 and make sure that the binary |