From 23e92bee4a8f6b66dfd1f5082af2712c0ae0b602 Mon Sep 17 00:00:00 2001
From: Peter Daifuku <pdaifuku@nvidia.com>
Date: Wed, 25 Jan 2017 18:50:44 -0800
Subject: gpu: nvgpu: profiler create/free, hwpm reserve

Add support for creating/freeing profiler objects, hwpm reservations

Bug 1775465
JIRA EVLR-680
JIRA EVLR-682

Change-Id: I4db83d00e4b0b552b05b9aae96dc553dd1257d88
Signed-off-by: Peter Daifuku <pdaifuku@nvidia.com>
Reviewed-on: http://git-master/r/1294401
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
---
 drivers/gpu/nvgpu/common/nvgpu_common.c |   2 +
 drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c | 350 +++++++++++++++++++++++++++++++-
 drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.h |  11 +
 drivers/gpu/nvgpu/gk20a/gk20a.h         |   5 +
 drivers/gpu/nvgpu/vgpu/vgpu.c           |   2 +
 include/uapi/linux/nvgpu.h              |  20 +-
 6 files changed, 382 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/nvgpu/common/nvgpu_common.c b/drivers/gpu/nvgpu/common/nvgpu_common.c
index f1109684..d7ff4841 100644
--- a/drivers/gpu/nvgpu/common/nvgpu_common.c
+++ b/drivers/gpu/nvgpu/common/nvgpu_common.c
@@ -52,6 +52,8 @@ static void nvgpu_init_vars(struct gk20a *g)
 
 	INIT_LIST_HEAD(&g->pending_sema_waits);
 	nvgpu_raw_spinlock_init(&g->pending_sema_waits_lock);
+
+	INIT_LIST_HEAD(&g->profiler_objects);
 }
 
 static void nvgpu_init_timeout(struct gk20a *g)
diff --git a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
index 7eb742ed..12d81343 100644
--- a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
@@ -62,11 +62,11 @@ nvgpu_dbg_gpu_get_session_channel(struct dbg_session_gk20a *dbg_s)
 	return ch;
 }
 
-/* silly allocator - just increment session id */
-static atomic_t session_id = ATOMIC_INIT(0);
-static int generate_session_id(void)
+/* silly allocator - just increment id */
+static atomic_t unique_id = ATOMIC_INIT(0);
+static int generate_unique_id(void)
 {
-	return atomic_add_return(1, &session_id);
+	return atomic_add_return(1, &unique_id);
 }
 
 static int alloc_session(struct dbg_session_gk20a **_dbg_s)
@@ -80,11 +80,27 @@ static int alloc_session(struct dbg_session_gk20a **_dbg_s)
 	if (!dbg_s)
 		return -ENOMEM;
 
-	dbg_s->id = generate_session_id();
+	dbg_s->id = generate_unique_id();
 	*_dbg_s = dbg_s;
 	return 0;
 }
 
+static int alloc_profiler(struct dbg_profiler_object_data **_prof)
+{
+	struct dbg_profiler_object_data *prof;
+	*_prof = NULL;
+
+	gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "");
+
+	prof = kzalloc(sizeof(*prof), GFP_KERNEL);
+	if (!prof)
+		return -ENOMEM;
+
+	prof->prof_handle = generate_unique_id();
+	*_prof = prof;
+	return 0;
+}
+
 static int gk20a_dbg_gpu_do_dev_open(struct inode *inode,
 		struct file *filp, bool is_profiler)
 {
@@ -400,13 +416,28 @@ int dbg_unbind_single_channel_gk20a(struct dbg_session_gk20a *dbg_s,
 {
 	struct gk20a *g = dbg_s->g;
 	int chid;
-	struct channel_gk20a *ch;
 	struct dbg_session_data *session_data;
+	struct dbg_profiler_object_data *prof_obj, *tmp_obj;
 
 	gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "");
 
 	chid = ch_data->chid;
-	ch = g->fifo.channel + chid;
+
+	/* If there's a profiler ctx reservation record associated with this
+	 * session/channel pair, release it.
+	 */
+	list_for_each_entry_safe(prof_obj, tmp_obj, &g->profiler_objects,
+							prof_obj_entry) {
+		if ((prof_obj->session_id == dbg_s->id) &&
+			(prof_obj->ch->hw_chid == chid)) {
+			if (prof_obj->has_reservation) {
+				g->profiler_reservation_count--;
+				dbg_s->has_profiler_reservation = false;
+			}
+			list_del(&prof_obj->prof_obj_entry);
+			kfree(prof_obj);
+		}
+	}
 
 	list_del_init(&ch_data->ch_entry);
 
@@ -480,6 +511,7 @@ int gk20a_dbg_gpu_dev_release(struct inode *inode, struct file *filp)
 {
 	struct dbg_session_gk20a *dbg_s = filp->private_data;
 	struct gk20a *g = dbg_s->g;
+	struct dbg_profiler_object_data *prof_obj, *tmp_obj;
 
 	gk20a_dbg(gpu_dbg_gpu_dbg | gpu_dbg_fn, "%s", dev_name(dbg_s->dev));
 
@@ -494,6 +526,21 @@ int gk20a_dbg_gpu_dev_release(struct inode *inode, struct file *filp)
 	g->ops.dbg_session_ops.dbg_set_powergate(dbg_s,
 				NVGPU_DBG_GPU_POWERGATE_MODE_ENABLE);
 	nvgpu_dbg_timeout_enable(dbg_s, NVGPU_DBG_GPU_IOCTL_TIMEOUT_ENABLE);
+
+	/* Per-context profiler objects were released when we called
+	 * dbg_unbind_all_channels. We could still have global ones.
+	 */
+	list_for_each_entry_safe(prof_obj, tmp_obj, &g->profiler_objects,
+							prof_obj_entry) {
+		if (prof_obj->session_id == dbg_s->id) {
+			if (prof_obj->has_reservation) {
+				g->global_profiler_reservation_held = false;
+				g->profiler_reservation_count--;
+			}
+			list_del(&prof_obj->prof_obj_entry);
+			kfree(prof_obj);
+		}
+	}
 	nvgpu_mutex_release(&g->dbg_sessions_lock);
 
 	nvgpu_mutex_destroy(&dbg_s->ch_list_lock);
@@ -584,6 +631,15 @@ static int nvgpu_dbg_gpu_ioctl_suspend_resume_sm(
 		struct dbg_session_gk20a *dbg_s,
 		struct nvgpu_dbg_gpu_suspend_resume_all_sms_args *args);
 
+static int nvgpu_ioctl_allocate_profiler_object(struct dbg_session_gk20a *dbg_s,
+				struct nvgpu_dbg_gpu_profiler_obj_mgt_args *args);
+
+static int nvgpu_ioctl_free_profiler_object(struct dbg_session_gk20a *dbg_s,
+				struct nvgpu_dbg_gpu_profiler_obj_mgt_args *args);
+
+static int nvgpu_ioctl_profiler_reserve(struct dbg_session_gk20a *dbg_s,
+			   struct nvgpu_dbg_gpu_profiler_reserve_args *args);
+
 static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s,
 		struct nvgpu_dbg_gpu_perfbuf_map_args *args);
 
@@ -1022,6 +1078,21 @@ long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd,
 			(struct nvgpu_dbg_gpu_access_fb_memory_args *)buf);
 		break;
 
+	case NVGPU_DBG_GPU_IOCTL_PROFILER_ALLOCATE:
+		err = nvgpu_ioctl_allocate_profiler_object(dbg_s,
+			(struct nvgpu_dbg_gpu_profiler_obj_mgt_args *)buf);
+		break;
+
+	case NVGPU_DBG_GPU_IOCTL_PROFILER_FREE:
+		err = nvgpu_ioctl_free_profiler_object(dbg_s,
+			(struct nvgpu_dbg_gpu_profiler_obj_mgt_args *)buf);
+		break;
+
+	case NVGPU_DBG_GPU_IOCTL_PROFILER_RESERVE:
+		err = nvgpu_ioctl_profiler_reserve(dbg_s,
+			   (struct nvgpu_dbg_gpu_profiler_reserve_args *)buf);
+		break;
+
 	default:
 		gk20a_err(dev_from_gk20a(g),
 			   "unrecognized dbg gpu ioctl cmd: 0x%x",
@@ -1357,6 +1428,16 @@ static int nvgpu_dbg_gpu_ioctl_hwpm_ctxsw_mode(struct dbg_session_gk20a *dbg_s,
 	gk20a_dbg_fn("%s pm ctxsw mode = %d",
 		     dev_name(dbg_s->dev), args->mode);
 
+	/* Must have a valid reservation to enable/disable hwpm cxtsw.
+	 * Just print an error message for now, but eventually this should
+	 * return an error, at the point where all client sw has been
+	 * cleaned up.
+	 */
+	if (!dbg_s->has_profiler_reservation) {
+		gk20a_err(dev_from_gk20a(g),
+			"session doesn't have a valid reservation");
+	}
+
 	err = gk20a_busy(g->dev);
 	if (err) {
 		gk20a_err(dev_from_gk20a(g), "failed to poweron");
@@ -1440,6 +1521,261 @@ clean_up:
 	return  err;
 }
 
+static int nvgpu_ioctl_allocate_profiler_object(
+				struct dbg_session_gk20a *dbg_s,
+				struct nvgpu_dbg_gpu_profiler_obj_mgt_args *args)
+{
+	int err = 0;
+	struct gk20a *g = get_gk20a(dbg_s->dev);
+	struct dbg_profiler_object_data *prof_obj;
+
+	gk20a_dbg_fn("%s", dev_name(dbg_s->dev));
+
+	nvgpu_mutex_acquire(&g->dbg_sessions_lock);
+
+	err = alloc_profiler(&prof_obj);
+	if (err)
+		goto clean_up;
+
+	prof_obj->session_id = dbg_s->id;
+
+	if (dbg_s->is_profiler)
+		prof_obj->ch = NULL;
+	else {
+		prof_obj->ch = nvgpu_dbg_gpu_get_session_channel(dbg_s);
+		if (prof_obj->ch == NULL) {
+			gk20a_err(dev_from_gk20a(g),
+				"bind a channel for dbg session");
+			kfree(prof_obj);
+			err = -EINVAL;
+			goto clean_up;
+		}
+	}
+
+	/* Return handle to client */
+	args->profiler_handle = prof_obj->prof_handle;
+
+	INIT_LIST_HEAD(&prof_obj->prof_obj_entry);
+
+	list_add(&prof_obj->prof_obj_entry, &g->profiler_objects);
+clean_up:
+	nvgpu_mutex_release(&g->dbg_sessions_lock);
+	return  err;
+}
+
+static int nvgpu_ioctl_free_profiler_object(
+				struct dbg_session_gk20a *dbg_s,
+				struct nvgpu_dbg_gpu_profiler_obj_mgt_args *args)
+{
+	int err = 0;
+	struct gk20a *g = get_gk20a(dbg_s->dev);
+	struct dbg_profiler_object_data *prof_obj, *tmp_obj;
+	bool obj_found = false;
+
+	gk20a_dbg_fn("%s session_id = %d profiler_handle = %x",
+		     dev_name(dbg_s->dev), dbg_s->id, args->profiler_handle);
+
+	nvgpu_mutex_acquire(&g->dbg_sessions_lock);
+
+	/* Remove profiler object from the list, if a match is found */
+	list_for_each_entry_safe(prof_obj, tmp_obj, &g->profiler_objects,
+							prof_obj_entry) {
+		if (prof_obj->prof_handle == args->profiler_handle) {
+			if (prof_obj->session_id != dbg_s->id) {
+				gk20a_err(dev_from_gk20a(g),
+						"invalid handle %x",
+						args->profiler_handle);
+				err = -EINVAL;
+				break;
+			}
+			if (prof_obj->has_reservation) {
+				if (prof_obj->ch == NULL)
+					g->global_profiler_reservation_held = false;
+				g->profiler_reservation_count--;
+				dbg_s->has_profiler_reservation = false;
+			}
+			list_del(&prof_obj->prof_obj_entry);
+			kfree(prof_obj);
+			obj_found = true;
+			break;
+		}
+	}
+	if (!obj_found) {
+		gk20a_err(dev_from_gk20a(g), "profiler %x not found",
+							args->profiler_handle);
+		err = -EINVAL;
+	}
+
+	nvgpu_mutex_release(&g->dbg_sessions_lock);
+	return  err;
+}
+
+static struct dbg_profiler_object_data *find_matching_prof_obj(
+						struct dbg_session_gk20a *dbg_s,
+						u32 profiler_handle)
+{
+	struct gk20a *g = dbg_s->g;
+	struct dbg_profiler_object_data *prof_obj;
+
+	list_for_each_entry(prof_obj, &g->profiler_objects, prof_obj_entry) {
+		if (prof_obj->prof_handle == profiler_handle) {
+			if (prof_obj->session_id != dbg_s->id) {
+				gk20a_err(dev_from_gk20a(g),
+						"invalid handle %x",
+						profiler_handle);
+				return NULL;
+			}
+			return prof_obj;
+		}
+	}
+	return NULL;
+}
+
+static int nvgpu_profiler_reserve_acquire(struct dbg_session_gk20a *dbg_s,
+								u32 profiler_handle)
+{
+	struct gk20a *g = dbg_s->g;
+	struct dbg_profiler_object_data *prof_obj, *my_prof_obj;
+	int err = 0;
+
+	gk20a_dbg_fn("%s profiler_handle = %x", dev_name(dbg_s->dev), profiler_handle);
+
+	if (g->profiler_reservation_count < 0) {
+		gk20a_err(dev_from_gk20a(g), "Negative reservation count!");
+		return -EINVAL;
+	}
+
+	nvgpu_mutex_acquire(&g->dbg_sessions_lock);
+
+	/* Find matching object. */
+	my_prof_obj = find_matching_prof_obj(dbg_s, profiler_handle);
+
+	if (!my_prof_obj) {
+		gk20a_err(dev_from_gk20a(g), "object not found");
+		err = -EINVAL;
+		goto exit;
+	}
+
+	/* If we already have the reservation, we're done */
+	if (my_prof_obj->has_reservation) {
+		err = 0;
+		goto exit;
+	}
+
+	if (my_prof_obj->ch == NULL) {
+		/* Global reservations are only allowed if there are no other
+		 * global or per-context reservations currently held
+		 */
+		if (g->profiler_reservation_count > 0) {
+			gk20a_err(dev_from_gk20a(g),
+				"global reserve: have existing reservation");
+			err =  -EBUSY;
+			goto exit;
+		}
+
+		my_prof_obj->has_reservation = true;
+		g->global_profiler_reservation_held = true;
+		g->profiler_reservation_count = 1;
+		dbg_s->has_profiler_reservation = true;
+	} else if (g->global_profiler_reservation_held) {
+		/* If there's a global reservation,
+		 * we can't take a per-context one.
+		 */
+		gk20a_err(dev_from_gk20a(g),
+			"per-ctxt reserve: global reservation in effect");
+		err = -EBUSY;
+		goto exit;
+	} else if (gk20a_is_channel_marked_as_tsg(my_prof_obj->ch)) {
+		/* TSG: check that another channel in the TSG
+		 * doesn't already have the reservation
+		 */
+		int my_tsgid = my_prof_obj->ch->tsgid;
+
+		list_for_each_entry(prof_obj, &g->profiler_objects,
+							prof_obj_entry) {
+			if (prof_obj->has_reservation &&
+					(prof_obj->ch->tsgid == my_tsgid)) {
+				gk20a_err(dev_from_gk20a(g),
+				    "per-ctxt reserve (tsg): already reserved");
+				err = -EBUSY;
+				goto exit;
+			}
+		}
+
+		my_prof_obj->has_reservation = true;
+		g->profiler_reservation_count++;
+		dbg_s->has_profiler_reservation = true;
+	} else {
+		/* channel: check that some other profiler object doesn't
+		 * already have the reservation.
+		 */
+		struct channel_gk20a *my_ch = my_prof_obj->ch;
+
+		list_for_each_entry(prof_obj, &g->profiler_objects,
+							prof_obj_entry) {
+			if (prof_obj->has_reservation &&
+						(prof_obj->ch == my_ch)) {
+				gk20a_err(dev_from_gk20a(g),
+				    "per-ctxt reserve (ch): already reserved");
+				err = -EBUSY;
+				goto exit;
+			}
+		}
+
+		my_prof_obj->has_reservation = true;
+		g->profiler_reservation_count++;
+		dbg_s->has_profiler_reservation = true;
+	}
+exit:
+	nvgpu_mutex_release(&g->dbg_sessions_lock);
+	return err;
+}
+
+static int nvgpu_profiler_reserve_release(struct dbg_session_gk20a *dbg_s,
+								u32 profiler_handle)
+{
+	struct gk20a *g = dbg_s->g;
+	struct dbg_profiler_object_data *prof_obj;
+	int err = 0;
+
+	gk20a_dbg_fn("%s profiler_handle = %x", dev_name(dbg_s->dev), profiler_handle);
+
+	nvgpu_mutex_acquire(&g->dbg_sessions_lock);
+
+	/* Find matching object. */
+	prof_obj = find_matching_prof_obj(dbg_s, profiler_handle);
+
+	if (!prof_obj) {
+		gk20a_err(dev_from_gk20a(g), "object not found");
+		err = -EINVAL;
+		goto exit;
+	}
+
+	if (prof_obj->has_reservation) {
+		prof_obj->has_reservation = false;
+		if (prof_obj->ch == NULL)
+			g->global_profiler_reservation_held = false;
+		g->profiler_reservation_count--;
+		dbg_s->has_profiler_reservation = false;
+	} else {
+		gk20a_err(dev_from_gk20a(g), "No reservation found");
+		err = -EINVAL;
+		goto exit;
+	}
+exit:
+	nvgpu_mutex_release(&g->dbg_sessions_lock);
+	return err;
+}
+
+static int nvgpu_ioctl_profiler_reserve(struct dbg_session_gk20a *dbg_s,
+			   struct nvgpu_dbg_gpu_profiler_reserve_args *args)
+{
+	if (args->acquire)
+		return nvgpu_profiler_reserve_acquire(dbg_s, args->profiler_handle);
+
+	return nvgpu_profiler_reserve_release(dbg_s, args->profiler_handle);
+}
+
 static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s,
 		struct nvgpu_dbg_gpu_perfbuf_map_args *args)
 {
diff --git a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.h b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.h
index caa9395b..e8d2dbe5 100644
--- a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.h
@@ -47,6 +47,9 @@ struct dbg_session_gk20a {
 	/* profiler session, if any */
 	bool is_profiler;
 
+	/* has a valid profiler reservation */
+	bool has_profiler_reservation;
+
 	/* power enabled or disabled */
 	bool is_pg_disabled;
 
@@ -90,6 +93,14 @@ struct dbg_session_channel_data {
 	struct dbg_session_data *session_data;
 };
 
+struct dbg_profiler_object_data {
+	int session_id;
+	u32 prof_handle;
+	struct channel_gk20a *ch;
+	bool has_reservation;
+	struct list_head prof_obj_entry;
+};
+
 int dbg_unbind_single_channel_gk20a(struct dbg_session_gk20a *dbg_s,
 			struct dbg_session_channel_data *ch_data);
 
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index 5bf1e02d..7c836b3c 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -966,6 +966,11 @@ struct gk20a {
 	struct nvgpu_dbg_gpu_reg_op *dbg_regops_tmp_buf;
 	u32 dbg_regops_tmp_buf_ops;
 
+	/* For profiler reservations */
+	struct list_head profiler_objects;
+	bool global_profiler_reservation_held;
+	int profiler_reservation_count;
+
 	void (*remove_support)(struct device *);
 
 	u64 pg_ingating_time_us;
diff --git a/drivers/gpu/nvgpu/vgpu/vgpu.c b/drivers/gpu/nvgpu/vgpu/vgpu.c
index a97c179f..2d7417f0 100644
--- a/drivers/gpu/nvgpu/vgpu/vgpu.c
+++ b/drivers/gpu/nvgpu/vgpu/vgpu.c
@@ -251,6 +251,8 @@ static int vgpu_init_support(struct platform_device *pdev)
 	nvgpu_mutex_init(&g->dbg_sessions_lock);
 	nvgpu_mutex_init(&g->client_lock);
 
+	INIT_LIST_HEAD(&g->profiler_objects);
+
 	g->dbg_regops_tmp_buf = kzalloc(SZ_4K, GFP_KERNEL);
 	if (!g->dbg_regops_tmp_buf) {
 		dev_err(g->dev, "couldn't allocate regops tmp buf");
diff --git a/include/uapi/linux/nvgpu.h b/include/uapi/linux/nvgpu.h
index 400d430b..75011998 100644
--- a/include/uapi/linux/nvgpu.h
+++ b/include/uapi/linux/nvgpu.h
@@ -1264,9 +1264,27 @@ struct nvgpu_dbg_gpu_access_fb_memory_args {
 #define NVGPU_DBG_GPU_IOCTL_ACCESS_FB_MEMORY	\
 	_IOWR(NVGPU_DBG_GPU_IOCTL_MAGIC, 19, struct nvgpu_dbg_gpu_access_fb_memory_args)
 
+struct nvgpu_dbg_gpu_profiler_obj_mgt_args {
+	__u32 profiler_handle;
+	__u32 reserved;
+};
+
+#define NVGPU_DBG_GPU_IOCTL_PROFILER_ALLOCATE	\
+	_IOWR(NVGPU_DBG_GPU_IOCTL_MAGIC, 20, struct nvgpu_dbg_gpu_profiler_obj_mgt_args)
+
+#define NVGPU_DBG_GPU_IOCTL_PROFILER_FREE	\
+	_IOWR(NVGPU_DBG_GPU_IOCTL_MAGIC, 21, struct nvgpu_dbg_gpu_profiler_obj_mgt_args)
+
+struct nvgpu_dbg_gpu_profiler_reserve_args {
+	__u32 profiler_handle;
+	__u32 acquire;
+};
+
+#define NVGPU_DBG_GPU_IOCTL_PROFILER_RESERVE			\
+	_IOWR(NVGPU_DBG_GPU_IOCTL_MAGIC, 22, struct nvgpu_dbg_gpu_profiler_reserve_args)
 
 #define NVGPU_DBG_GPU_IOCTL_LAST		\
-	_IOC_NR(NVGPU_DBG_GPU_IOCTL_ACCESS_FB_MEMORY)
+	_IOC_NR(NVGPU_DBG_GPU_IOCTL_PROFILER_RESERVE)
 
 #define NVGPU_DBG_GPU_IOCTL_MAX_ARG_SIZE		\
 	sizeof(struct nvgpu_dbg_gpu_access_fb_memory_args)
-- 
cgit v1.2.2