From 23e92bee4a8f6b66dfd1f5082af2712c0ae0b602 Mon Sep 17 00:00:00 2001 From: Peter Daifuku Date: Wed, 25 Jan 2017 18:50:44 -0800 Subject: gpu: nvgpu: profiler create/free, hwpm reserve Add support for creating/freeing profiler objects, hwpm reservations Bug 1775465 JIRA EVLR-680 JIRA EVLR-682 Change-Id: I4db83d00e4b0b552b05b9aae96dc553dd1257d88 Signed-off-by: Peter Daifuku Reviewed-on: http://git-master/r/1294401 Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/common/nvgpu_common.c | 2 + drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c | 350 +++++++++++++++++++++++++++++++- drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.h | 11 + drivers/gpu/nvgpu/gk20a/gk20a.h | 5 + drivers/gpu/nvgpu/vgpu/vgpu.c | 2 + include/uapi/linux/nvgpu.h | 20 +- 6 files changed, 382 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/nvgpu/common/nvgpu_common.c b/drivers/gpu/nvgpu/common/nvgpu_common.c index f1109684..d7ff4841 100644 --- a/drivers/gpu/nvgpu/common/nvgpu_common.c +++ b/drivers/gpu/nvgpu/common/nvgpu_common.c @@ -52,6 +52,8 @@ static void nvgpu_init_vars(struct gk20a *g) INIT_LIST_HEAD(&g->pending_sema_waits); nvgpu_raw_spinlock_init(&g->pending_sema_waits_lock); + + INIT_LIST_HEAD(&g->profiler_objects); } static void nvgpu_init_timeout(struct gk20a *g) diff --git a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c index 7eb742ed..12d81343 100644 --- a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c @@ -62,11 +62,11 @@ nvgpu_dbg_gpu_get_session_channel(struct dbg_session_gk20a *dbg_s) return ch; } -/* silly allocator - just increment session id */ -static atomic_t session_id = ATOMIC_INIT(0); -static int generate_session_id(void) +/* silly allocator - just increment id */ +static atomic_t unique_id = ATOMIC_INIT(0); +static int generate_unique_id(void) { - return atomic_add_return(1, &session_id); + return atomic_add_return(1, &unique_id); } static int alloc_session(struct dbg_session_gk20a **_dbg_s) @@ -80,11 +80,27 @@ static int alloc_session(struct dbg_session_gk20a **_dbg_s) if (!dbg_s) return -ENOMEM; - dbg_s->id = generate_session_id(); + dbg_s->id = generate_unique_id(); *_dbg_s = dbg_s; return 0; } +static int alloc_profiler(struct dbg_profiler_object_data **_prof) +{ + struct dbg_profiler_object_data *prof; + *_prof = NULL; + + gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, ""); + + prof = kzalloc(sizeof(*prof), GFP_KERNEL); + if (!prof) + return -ENOMEM; + + prof->prof_handle = generate_unique_id(); + *_prof = prof; + return 0; +} + static int gk20a_dbg_gpu_do_dev_open(struct inode *inode, struct file *filp, bool is_profiler) { @@ -400,13 +416,28 @@ int dbg_unbind_single_channel_gk20a(struct dbg_session_gk20a *dbg_s, { struct gk20a *g = dbg_s->g; int chid; - struct channel_gk20a *ch; struct dbg_session_data *session_data; + struct dbg_profiler_object_data *prof_obj, *tmp_obj; gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, ""); chid = ch_data->chid; - ch = g->fifo.channel + chid; + + /* If there's a profiler ctx reservation record associated with this + * session/channel pair, release it. + */ + list_for_each_entry_safe(prof_obj, tmp_obj, &g->profiler_objects, + prof_obj_entry) { + if ((prof_obj->session_id == dbg_s->id) && + (prof_obj->ch->hw_chid == chid)) { + if (prof_obj->has_reservation) { + g->profiler_reservation_count--; + dbg_s->has_profiler_reservation = false; + } + list_del(&prof_obj->prof_obj_entry); + kfree(prof_obj); + } + } list_del_init(&ch_data->ch_entry); @@ -480,6 +511,7 @@ int gk20a_dbg_gpu_dev_release(struct inode *inode, struct file *filp) { struct dbg_session_gk20a *dbg_s = filp->private_data; struct gk20a *g = dbg_s->g; + struct dbg_profiler_object_data *prof_obj, *tmp_obj; gk20a_dbg(gpu_dbg_gpu_dbg | gpu_dbg_fn, "%s", dev_name(dbg_s->dev)); @@ -494,6 +526,21 @@ int gk20a_dbg_gpu_dev_release(struct inode *inode, struct file *filp) g->ops.dbg_session_ops.dbg_set_powergate(dbg_s, NVGPU_DBG_GPU_POWERGATE_MODE_ENABLE); nvgpu_dbg_timeout_enable(dbg_s, NVGPU_DBG_GPU_IOCTL_TIMEOUT_ENABLE); + + /* Per-context profiler objects were released when we called + * dbg_unbind_all_channels. We could still have global ones. + */ + list_for_each_entry_safe(prof_obj, tmp_obj, &g->profiler_objects, + prof_obj_entry) { + if (prof_obj->session_id == dbg_s->id) { + if (prof_obj->has_reservation) { + g->global_profiler_reservation_held = false; + g->profiler_reservation_count--; + } + list_del(&prof_obj->prof_obj_entry); + kfree(prof_obj); + } + } nvgpu_mutex_release(&g->dbg_sessions_lock); nvgpu_mutex_destroy(&dbg_s->ch_list_lock); @@ -584,6 +631,15 @@ static int nvgpu_dbg_gpu_ioctl_suspend_resume_sm( struct dbg_session_gk20a *dbg_s, struct nvgpu_dbg_gpu_suspend_resume_all_sms_args *args); +static int nvgpu_ioctl_allocate_profiler_object(struct dbg_session_gk20a *dbg_s, + struct nvgpu_dbg_gpu_profiler_obj_mgt_args *args); + +static int nvgpu_ioctl_free_profiler_object(struct dbg_session_gk20a *dbg_s, + struct nvgpu_dbg_gpu_profiler_obj_mgt_args *args); + +static int nvgpu_ioctl_profiler_reserve(struct dbg_session_gk20a *dbg_s, + struct nvgpu_dbg_gpu_profiler_reserve_args *args); + static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s, struct nvgpu_dbg_gpu_perfbuf_map_args *args); @@ -1022,6 +1078,21 @@ long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd, (struct nvgpu_dbg_gpu_access_fb_memory_args *)buf); break; + case NVGPU_DBG_GPU_IOCTL_PROFILER_ALLOCATE: + err = nvgpu_ioctl_allocate_profiler_object(dbg_s, + (struct nvgpu_dbg_gpu_profiler_obj_mgt_args *)buf); + break; + + case NVGPU_DBG_GPU_IOCTL_PROFILER_FREE: + err = nvgpu_ioctl_free_profiler_object(dbg_s, + (struct nvgpu_dbg_gpu_profiler_obj_mgt_args *)buf); + break; + + case NVGPU_DBG_GPU_IOCTL_PROFILER_RESERVE: + err = nvgpu_ioctl_profiler_reserve(dbg_s, + (struct nvgpu_dbg_gpu_profiler_reserve_args *)buf); + break; + default: gk20a_err(dev_from_gk20a(g), "unrecognized dbg gpu ioctl cmd: 0x%x", @@ -1357,6 +1428,16 @@ static int nvgpu_dbg_gpu_ioctl_hwpm_ctxsw_mode(struct dbg_session_gk20a *dbg_s, gk20a_dbg_fn("%s pm ctxsw mode = %d", dev_name(dbg_s->dev), args->mode); + /* Must have a valid reservation to enable/disable hwpm cxtsw. + * Just print an error message for now, but eventually this should + * return an error, at the point where all client sw has been + * cleaned up. + */ + if (!dbg_s->has_profiler_reservation) { + gk20a_err(dev_from_gk20a(g), + "session doesn't have a valid reservation"); + } + err = gk20a_busy(g->dev); if (err) { gk20a_err(dev_from_gk20a(g), "failed to poweron"); @@ -1440,6 +1521,261 @@ clean_up: return err; } +static int nvgpu_ioctl_allocate_profiler_object( + struct dbg_session_gk20a *dbg_s, + struct nvgpu_dbg_gpu_profiler_obj_mgt_args *args) +{ + int err = 0; + struct gk20a *g = get_gk20a(dbg_s->dev); + struct dbg_profiler_object_data *prof_obj; + + gk20a_dbg_fn("%s", dev_name(dbg_s->dev)); + + nvgpu_mutex_acquire(&g->dbg_sessions_lock); + + err = alloc_profiler(&prof_obj); + if (err) + goto clean_up; + + prof_obj->session_id = dbg_s->id; + + if (dbg_s->is_profiler) + prof_obj->ch = NULL; + else { + prof_obj->ch = nvgpu_dbg_gpu_get_session_channel(dbg_s); + if (prof_obj->ch == NULL) { + gk20a_err(dev_from_gk20a(g), + "bind a channel for dbg session"); + kfree(prof_obj); + err = -EINVAL; + goto clean_up; + } + } + + /* Return handle to client */ + args->profiler_handle = prof_obj->prof_handle; + + INIT_LIST_HEAD(&prof_obj->prof_obj_entry); + + list_add(&prof_obj->prof_obj_entry, &g->profiler_objects); +clean_up: + nvgpu_mutex_release(&g->dbg_sessions_lock); + return err; +} + +static int nvgpu_ioctl_free_profiler_object( + struct dbg_session_gk20a *dbg_s, + struct nvgpu_dbg_gpu_profiler_obj_mgt_args *args) +{ + int err = 0; + struct gk20a *g = get_gk20a(dbg_s->dev); + struct dbg_profiler_object_data *prof_obj, *tmp_obj; + bool obj_found = false; + + gk20a_dbg_fn("%s session_id = %d profiler_handle = %x", + dev_name(dbg_s->dev), dbg_s->id, args->profiler_handle); + + nvgpu_mutex_acquire(&g->dbg_sessions_lock); + + /* Remove profiler object from the list, if a match is found */ + list_for_each_entry_safe(prof_obj, tmp_obj, &g->profiler_objects, + prof_obj_entry) { + if (prof_obj->prof_handle == args->profiler_handle) { + if (prof_obj->session_id != dbg_s->id) { + gk20a_err(dev_from_gk20a(g), + "invalid handle %x", + args->profiler_handle); + err = -EINVAL; + break; + } + if (prof_obj->has_reservation) { + if (prof_obj->ch == NULL) + g->global_profiler_reservation_held = false; + g->profiler_reservation_count--; + dbg_s->has_profiler_reservation = false; + } + list_del(&prof_obj->prof_obj_entry); + kfree(prof_obj); + obj_found = true; + break; + } + } + if (!obj_found) { + gk20a_err(dev_from_gk20a(g), "profiler %x not found", + args->profiler_handle); + err = -EINVAL; + } + + nvgpu_mutex_release(&g->dbg_sessions_lock); + return err; +} + +static struct dbg_profiler_object_data *find_matching_prof_obj( + struct dbg_session_gk20a *dbg_s, + u32 profiler_handle) +{ + struct gk20a *g = dbg_s->g; + struct dbg_profiler_object_data *prof_obj; + + list_for_each_entry(prof_obj, &g->profiler_objects, prof_obj_entry) { + if (prof_obj->prof_handle == profiler_handle) { + if (prof_obj->session_id != dbg_s->id) { + gk20a_err(dev_from_gk20a(g), + "invalid handle %x", + profiler_handle); + return NULL; + } + return prof_obj; + } + } + return NULL; +} + +static int nvgpu_profiler_reserve_acquire(struct dbg_session_gk20a *dbg_s, + u32 profiler_handle) +{ + struct gk20a *g = dbg_s->g; + struct dbg_profiler_object_data *prof_obj, *my_prof_obj; + int err = 0; + + gk20a_dbg_fn("%s profiler_handle = %x", dev_name(dbg_s->dev), profiler_handle); + + if (g->profiler_reservation_count < 0) { + gk20a_err(dev_from_gk20a(g), "Negative reservation count!"); + return -EINVAL; + } + + nvgpu_mutex_acquire(&g->dbg_sessions_lock); + + /* Find matching object. */ + my_prof_obj = find_matching_prof_obj(dbg_s, profiler_handle); + + if (!my_prof_obj) { + gk20a_err(dev_from_gk20a(g), "object not found"); + err = -EINVAL; + goto exit; + } + + /* If we already have the reservation, we're done */ + if (my_prof_obj->has_reservation) { + err = 0; + goto exit; + } + + if (my_prof_obj->ch == NULL) { + /* Global reservations are only allowed if there are no other + * global or per-context reservations currently held + */ + if (g->profiler_reservation_count > 0) { + gk20a_err(dev_from_gk20a(g), + "global reserve: have existing reservation"); + err = -EBUSY; + goto exit; + } + + my_prof_obj->has_reservation = true; + g->global_profiler_reservation_held = true; + g->profiler_reservation_count = 1; + dbg_s->has_profiler_reservation = true; + } else if (g->global_profiler_reservation_held) { + /* If there's a global reservation, + * we can't take a per-context one. + */ + gk20a_err(dev_from_gk20a(g), + "per-ctxt reserve: global reservation in effect"); + err = -EBUSY; + goto exit; + } else if (gk20a_is_channel_marked_as_tsg(my_prof_obj->ch)) { + /* TSG: check that another channel in the TSG + * doesn't already have the reservation + */ + int my_tsgid = my_prof_obj->ch->tsgid; + + list_for_each_entry(prof_obj, &g->profiler_objects, + prof_obj_entry) { + if (prof_obj->has_reservation && + (prof_obj->ch->tsgid == my_tsgid)) { + gk20a_err(dev_from_gk20a(g), + "per-ctxt reserve (tsg): already reserved"); + err = -EBUSY; + goto exit; + } + } + + my_prof_obj->has_reservation = true; + g->profiler_reservation_count++; + dbg_s->has_profiler_reservation = true; + } else { + /* channel: check that some other profiler object doesn't + * already have the reservation. + */ + struct channel_gk20a *my_ch = my_prof_obj->ch; + + list_for_each_entry(prof_obj, &g->profiler_objects, + prof_obj_entry) { + if (prof_obj->has_reservation && + (prof_obj->ch == my_ch)) { + gk20a_err(dev_from_gk20a(g), + "per-ctxt reserve (ch): already reserved"); + err = -EBUSY; + goto exit; + } + } + + my_prof_obj->has_reservation = true; + g->profiler_reservation_count++; + dbg_s->has_profiler_reservation = true; + } +exit: + nvgpu_mutex_release(&g->dbg_sessions_lock); + return err; +} + +static int nvgpu_profiler_reserve_release(struct dbg_session_gk20a *dbg_s, + u32 profiler_handle) +{ + struct gk20a *g = dbg_s->g; + struct dbg_profiler_object_data *prof_obj; + int err = 0; + + gk20a_dbg_fn("%s profiler_handle = %x", dev_name(dbg_s->dev), profiler_handle); + + nvgpu_mutex_acquire(&g->dbg_sessions_lock); + + /* Find matching object. */ + prof_obj = find_matching_prof_obj(dbg_s, profiler_handle); + + if (!prof_obj) { + gk20a_err(dev_from_gk20a(g), "object not found"); + err = -EINVAL; + goto exit; + } + + if (prof_obj->has_reservation) { + prof_obj->has_reservation = false; + if (prof_obj->ch == NULL) + g->global_profiler_reservation_held = false; + g->profiler_reservation_count--; + dbg_s->has_profiler_reservation = false; + } else { + gk20a_err(dev_from_gk20a(g), "No reservation found"); + err = -EINVAL; + goto exit; + } +exit: + nvgpu_mutex_release(&g->dbg_sessions_lock); + return err; +} + +static int nvgpu_ioctl_profiler_reserve(struct dbg_session_gk20a *dbg_s, + struct nvgpu_dbg_gpu_profiler_reserve_args *args) +{ + if (args->acquire) + return nvgpu_profiler_reserve_acquire(dbg_s, args->profiler_handle); + + return nvgpu_profiler_reserve_release(dbg_s, args->profiler_handle); +} + static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s, struct nvgpu_dbg_gpu_perfbuf_map_args *args) { diff --git a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.h b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.h index caa9395b..e8d2dbe5 100644 --- a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.h @@ -47,6 +47,9 @@ struct dbg_session_gk20a { /* profiler session, if any */ bool is_profiler; + /* has a valid profiler reservation */ + bool has_profiler_reservation; + /* power enabled or disabled */ bool is_pg_disabled; @@ -90,6 +93,14 @@ struct dbg_session_channel_data { struct dbg_session_data *session_data; }; +struct dbg_profiler_object_data { + int session_id; + u32 prof_handle; + struct channel_gk20a *ch; + bool has_reservation; + struct list_head prof_obj_entry; +}; + int dbg_unbind_single_channel_gk20a(struct dbg_session_gk20a *dbg_s, struct dbg_session_channel_data *ch_data); diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index 5bf1e02d..7c836b3c 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h @@ -966,6 +966,11 @@ struct gk20a { struct nvgpu_dbg_gpu_reg_op *dbg_regops_tmp_buf; u32 dbg_regops_tmp_buf_ops; + /* For profiler reservations */ + struct list_head profiler_objects; + bool global_profiler_reservation_held; + int profiler_reservation_count; + void (*remove_support)(struct device *); u64 pg_ingating_time_us; diff --git a/drivers/gpu/nvgpu/vgpu/vgpu.c b/drivers/gpu/nvgpu/vgpu/vgpu.c index a97c179f..2d7417f0 100644 --- a/drivers/gpu/nvgpu/vgpu/vgpu.c +++ b/drivers/gpu/nvgpu/vgpu/vgpu.c @@ -251,6 +251,8 @@ static int vgpu_init_support(struct platform_device *pdev) nvgpu_mutex_init(&g->dbg_sessions_lock); nvgpu_mutex_init(&g->client_lock); + INIT_LIST_HEAD(&g->profiler_objects); + g->dbg_regops_tmp_buf = kzalloc(SZ_4K, GFP_KERNEL); if (!g->dbg_regops_tmp_buf) { dev_err(g->dev, "couldn't allocate regops tmp buf"); diff --git a/include/uapi/linux/nvgpu.h b/include/uapi/linux/nvgpu.h index 400d430b..75011998 100644 --- a/include/uapi/linux/nvgpu.h +++ b/include/uapi/linux/nvgpu.h @@ -1264,9 +1264,27 @@ struct nvgpu_dbg_gpu_access_fb_memory_args { #define NVGPU_DBG_GPU_IOCTL_ACCESS_FB_MEMORY \ _IOWR(NVGPU_DBG_GPU_IOCTL_MAGIC, 19, struct nvgpu_dbg_gpu_access_fb_memory_args) +struct nvgpu_dbg_gpu_profiler_obj_mgt_args { + __u32 profiler_handle; + __u32 reserved; +}; + +#define NVGPU_DBG_GPU_IOCTL_PROFILER_ALLOCATE \ + _IOWR(NVGPU_DBG_GPU_IOCTL_MAGIC, 20, struct nvgpu_dbg_gpu_profiler_obj_mgt_args) + +#define NVGPU_DBG_GPU_IOCTL_PROFILER_FREE \ + _IOWR(NVGPU_DBG_GPU_IOCTL_MAGIC, 21, struct nvgpu_dbg_gpu_profiler_obj_mgt_args) + +struct nvgpu_dbg_gpu_profiler_reserve_args { + __u32 profiler_handle; + __u32 acquire; +}; + +#define NVGPU_DBG_GPU_IOCTL_PROFILER_RESERVE \ + _IOWR(NVGPU_DBG_GPU_IOCTL_MAGIC, 22, struct nvgpu_dbg_gpu_profiler_reserve_args) #define NVGPU_DBG_GPU_IOCTL_LAST \ - _IOC_NR(NVGPU_DBG_GPU_IOCTL_ACCESS_FB_MEMORY) + _IOC_NR(NVGPU_DBG_GPU_IOCTL_PROFILER_RESERVE) #define NVGPU_DBG_GPU_IOCTL_MAX_ARG_SIZE \ sizeof(struct nvgpu_dbg_gpu_access_fb_memory_args) -- cgit v1.2.2