From 425f99335bfa13fa2af4e0865f61a4eb29fad6be Mon Sep 17 00:00:00 2001 From: Sami Kiminki Date: Tue, 15 Nov 2016 21:03:14 +0200 Subject: gpu: nvgpu: gk20a: Allow regops lists longer than 128 Process long regops lists in 4-kB fragments, overcoming the overly low limit of 128 reg ops per IOCTL call. Bump the list limit to 1024 and report the limit in GPU characteristics. Bug 200248726 Change-Id: I3ad49139409f32aea8b1226d6562e88edccc8053 Signed-off-by: Sami Kiminki Reviewed-on: http://git-master/r/1253716 (cherry picked from commit 22314619b28f52610cb8769cd4c3f9eb01904eab) Reviewed-on: http://git-master/r/1266652 Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c | 90 ++++++++++++++++++++------------- drivers/gpu/nvgpu/gk20a/gk20a.c | 4 ++ drivers/gpu/nvgpu/gk20a/gk20a.h | 4 ++ drivers/gpu/nvgpu/nvgpu_common.c | 8 +++ drivers/gpu/nvgpu/vgpu/vgpu.c | 11 ++++ 5 files changed, 83 insertions(+), 34 deletions(-) (limited to 'drivers/gpu/nvgpu') diff --git a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c index f86a7377..e5529295 100644 --- a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c @@ -1042,14 +1042,24 @@ static int nvgpu_ioctl_channel_reg_ops(struct dbg_session_gk20a *dbg_s, struct device *dev = dbg_s->dev; struct gk20a *g = get_gk20a(dbg_s->dev); - struct nvgpu_dbg_gpu_reg_op *ops; struct channel_gk20a *ch; - u64 ops_size = sizeof(ops[0]) * args->num_ops; - if (args->num_ops > SZ_4K / sizeof(ops[0])) + gk20a_dbg_fn("%d ops, max fragment %d", args->num_ops, g->dbg_regops_tmp_buf_ops); + + if (args->num_ops > g->gpu_characteristics.reg_ops_limit) { + gk20a_err(dev, "regops limit exceeded"); return -EINVAL; + } - gk20a_dbg_fn("%d ops, total size %llu", args->num_ops, ops_size); + if (args->num_ops == 0) { + /* Nothing to do */ + return 0; + } + + if (g->dbg_regops_tmp_buf_ops == 0 || !g->dbg_regops_tmp_buf) { + gk20a_err(dev, "reg ops work buffer not allocated"); + return -ENODEV; + } if (!dbg_s->id) { gk20a_err(dev, "can't call reg_ops on an unbound debugger session"); @@ -1069,21 +1079,6 @@ static int nvgpu_ioctl_channel_reg_ops(struct dbg_session_gk20a *dbg_s, return -ENODEV; } - ops = kzalloc(ops_size, GFP_KERNEL); - if (!ops) { - gk20a_err(dev, "Allocating memory failed!"); - return -ENOMEM; - } - - gk20a_dbg_fn("Copying regops from userspace"); - - if (copy_from_user(ops, (void __user *)(uintptr_t)args->ops, - ops_size)) { - dev_err(dev, "copy_from_user failed!"); - err = -EFAULT; - goto clean_up; - } - /* since exec_reg_ops sends methods to the ucode, it must take the * global gpu lock to protect against mixing methods from debug sessions * on other channels */ @@ -1099,8 +1094,47 @@ static int nvgpu_ioctl_channel_reg_ops(struct dbg_session_gk20a *dbg_s, } if (!powergate_err) { - err = g->ops.dbg_session_ops.exec_reg_ops(dbg_s, ops, - args->num_ops); + u64 ops_offset = 0; /* index offset */ + + while (ops_offset < args->num_ops && !err) { + const u64 num_ops = + min(args->num_ops - ops_offset, + (u64)(g->dbg_regops_tmp_buf_ops)); + const u64 fragment_size = + num_ops * sizeof(g->dbg_regops_tmp_buf[0]); + + void __user *const fragment = + (void __user *)(uintptr_t) + (args->ops + + ops_offset * sizeof(g->dbg_regops_tmp_buf[0])); + + gk20a_dbg_fn("Regops fragment: start_op=%llu ops=%llu", + ops_offset, num_ops); + + gk20a_dbg_fn("Copying regops from userspace"); + + if (copy_from_user(g->dbg_regops_tmp_buf, + fragment, fragment_size)) { + dev_err(dev, "copy_from_user failed!"); + err = -EFAULT; + break; + } + + err = g->ops.dbg_session_ops.exec_reg_ops( + dbg_s, g->dbg_regops_tmp_buf, num_ops); + + gk20a_dbg_fn("Copying result to userspace"); + + if (copy_to_user(fragment, g->dbg_regops_tmp_buf, + fragment_size)) { + dev_err(dev, "copy_to_user failed!"); + err = -EFAULT; + break; + } + + ops_offset += num_ops; + } + /* enable powergate, if previously disabled */ if (is_pg_disabled) { powergate_err = @@ -1114,21 +1148,9 @@ static int nvgpu_ioctl_channel_reg_ops(struct dbg_session_gk20a *dbg_s, if (!err && powergate_err) err = powergate_err; - if (err) { + if (err) gk20a_err(dev, "dbg regops failed"); - goto clean_up; - } - - gk20a_dbg_fn("Copying result to userspace"); - if (copy_to_user((void __user *)(uintptr_t)args->ops, ops, ops_size)) { - dev_err(dev, "copy_to_user failed!"); - err = -EFAULT; - goto clean_up; - } - - clean_up: - kfree(ops); return err; } diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c index e314d6cd..ca2f7b33 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gk20a.c @@ -700,6 +700,8 @@ void gk20a_remove_support(struct device *dev) #ifdef CONFIG_TEGRA_COMMON tegra_unregister_idle_unidle(); #endif + if (g->dbg_regops_tmp_buf) + kfree(g->dbg_regops_tmp_buf); if (g->pmu.remove_support) g->pmu.remove_support(&g->pmu); @@ -2170,6 +2172,8 @@ int gk20a_init_gpu_characteristics(struct gk20a *g) gpu->pci_class = g->pci_class; gpu->pci_revision = g->pci_revision; + gpu->reg_ops_limit = 1024; + return 0; } diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index d444447d..d219b815 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h @@ -890,6 +890,10 @@ struct gk20a { int dbg_powergating_disabled_refcount; /*refcount for pg disable */ int dbg_timeout_disabled_refcount; /*refcount for timeout disable */ + /* must have dbg_sessions_lock before use */ + struct nvgpu_dbg_gpu_reg_op *dbg_regops_tmp_buf; + u32 dbg_regops_tmp_buf_ops; + /* * When set subsequent VMAs will separate fixed and non-fixed * allocations. This avoids conflicts with fixed and non-fixed allocs diff --git a/drivers/gpu/nvgpu/nvgpu_common.c b/drivers/gpu/nvgpu/nvgpu_common.c index 179464d8..a1f4832b 100644 --- a/drivers/gpu/nvgpu/nvgpu_common.c +++ b/drivers/gpu/nvgpu/nvgpu_common.c @@ -155,6 +155,14 @@ int nvgpu_probe(struct gk20a *g, gk20a_create_sysfs(g->dev); gk20a_debug_init(g->dev, debugfs_symlink); + g->dbg_regops_tmp_buf = kzalloc(SZ_4K, GFP_KERNEL); + if (!g->dbg_regops_tmp_buf) { + dev_err(g->dev, "couldn't allocate regops tmp buf"); + return -ENOMEM; + } + g->dbg_regops_tmp_buf_ops = + SZ_4K / sizeof(g->dbg_regops_tmp_buf[0]); + g->remove_support = gk20a_remove_support; return 0; diff --git a/drivers/gpu/nvgpu/vgpu/vgpu.c b/drivers/gpu/nvgpu/vgpu/vgpu.c index bd332583..213f6bbb 100644 --- a/drivers/gpu/nvgpu/vgpu/vgpu.c +++ b/drivers/gpu/nvgpu/vgpu/vgpu.c @@ -191,6 +191,9 @@ static void vgpu_remove_support(struct device *dev) struct tegra_vgpu_intr_msg msg; int err; + if (g->dbg_regops_tmp_buf) + kfree(g->dbg_regops_tmp_buf); + if (g->pmu.remove_support) g->pmu.remove_support(&g->pmu); @@ -242,6 +245,14 @@ static int vgpu_init_support(struct platform_device *pdev) mutex_init(&g->client_lock); mutex_init(&g->ch_wdt_lock); + g->dbg_regops_tmp_buf = kzalloc(SZ_4K, GFP_KERNEL); + if (!g->dbg_regops_tmp_buf) { + dev_err(g->dev, "couldn't allocate regops tmp buf"); + return -ENOMEM; + } + g->dbg_regops_tmp_buf_ops = + SZ_4K / sizeof(g->dbg_regops_tmp_buf[0]); + g->remove_support = vgpu_remove_support; return 0; -- cgit v1.2.2