gpu: nvgpu: use inplace allocation in sync framework

This change is the first of a series of changes to support the usage of pre-allocated job tracking resources in the submit path. With this change, we still maintain a dynamically-allocated joblist, but make the necessary changes in the channel_sync & fence framework to use in-place allocations. Specifically, we: 1) Update channel sync framework routines to take in pre-allocated priv_cmd_entry(s) & gk20a_fence(s) rather than dynamically allocating themselves 2) Move allocation of priv_cmd_entry(s) & gk20a_fence(s) to gk20a_submit_prepare_syncs 3) Modify fence framework to have seperate allocation and init APIs. We expose allocation as a seperate API, so the client can allocate the object before passing it into the channel sync framework. 4) Fix clean_up logic in channel sync framework Bug 1795076 Change-Id: I96db457683cd207fd029c31c45f548f98055e844 Signed-off-by: Sachit Kadle <skadle@nvidia.com> Reviewed-on: http://git-master/r/1206725 (cherry picked from commit 9d196fd10db6c2f934c2a53b1fc0500eb4626624) Reviewed-on: http://git-master/r/1223933 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
author: Sachit Kadle <skadle@nvidia.com> 2016-08-22 21:06:30 -0400
committer: mobile promotions <svcmobile_promotions@nvidia.com> 2016-10-20 11:14:04 -0400
commit: 63e8592e06939e20c7b9e56b430353ebbee31ad6 (patch)
tree: b91247eebf886f4e987d38eb4069aceace284ecf /drivers/gpu/nvgpu/gk20a/fence_gk20a.c
parent: 3c2656c8c6ebf7cef7376d3a28451249643121c4 (diff)
1 files changed, 61 insertions, 27 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/fence_gk20a.c b/drivers/gpu/nvgpu/gk20a/fence_gk20a.c
index 596dc549..f788829f 100644
--- a/drivers/gpu/nvgpu/gk20a/fence_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fence_gk20a.c
@@ -63,16 +63,27 @@ struct gk20a_fence *gk20a_fence_get(struct gk20a_fence *f)
        return f;
 }
+static inline bool gk20a_fence_is_valid(struct gk20a_fence *f)
+{
+        bool valid = f->valid;
+        rmb();
+        return valid;
+}
 int gk20a_fence_wait(struct gk20a_fence *f, int timeout)
 {
-        if (!tegra_platform_is_silicon())
+        if (f && gk20a_fence_is_valid(f)) {
-                timeout = (u32)MAX_SCHEDULE_TIMEOUT;
+                if (!tegra_platform_is_silicon())
-        return f->ops->wait(f, timeout);
+                        timeout = (u32)MAX_SCHEDULE_TIMEOUT;
+                return f->ops->wait(f, timeout);
+        }
+        return 0;
 }
 bool gk20a_fence_is_expired(struct gk20a_fence *f)
 {
-        if (f && f->ops)
+        if (f && gk20a_fence_is_valid(f) && f->ops)
                return f->ops->is_expired(f);
        else
                return true;
@@ -83,7 +94,7 @@ int gk20a_fence_install_fd(struct gk20a_fence *f)
 #ifdef CONFIG_SYNC
        int fd;
-        if (!f->sync_fence)
+        if (!f || !gk20a_fence_is_valid(f) || !f->sync_fence)
                return -EINVAL;
        fd = get_unused_fd_flags(O_RDWR);
@@ -98,18 +109,28 @@ int gk20a_fence_install_fd(struct gk20a_fence *f)
 #endif
 }
-struct gk20a_fence *gk20a_alloc_fence(const struct gk20a_fence_ops *ops,
+struct gk20a_fence *gk20a_alloc_fence(struct channel_gk20a *c)
-                                      struct sync_fence *sync_fence, bool wfi)
 {
-        struct gk20a_fence *f = kzalloc(sizeof(*f), GFP_KERNEL);
+        struct gk20a_fence *fence;
-        if (!f)
+        fence = kzalloc(sizeof(struct gk20a_fence), GFP_KERNEL);
+        if (!fence)
                return NULL;
-        kref_init(&f->ref);
+        kref_init(&fence->ref);
+        return fence;
+}
+void gk20a_init_fence(struct gk20a_fence *f,
+                const struct gk20a_fence_ops *ops,
+                struct sync_fence *sync_fence, bool wfi)
+{
+        if (!f)
+                return;
        f->ops = ops;
        f->sync_fence = sync_fence;
        f->wfi = wfi;
        f->syncpt_id = -1;
-        return f;
 }
 /* Fences that are backed by GPU semaphores: */
@@ -143,14 +164,15 @@ static const struct gk20a_fence_ops gk20a_semaphore_fence_ops = {
 };
 /* This function takes ownership of the semaphore */
-struct gk20a_fence *gk20a_fence_from_semaphore(
+int gk20a_fence_from_semaphore(
+                struct gk20a_fence *fence_out,
                struct sync_timeline *timeline,
                struct gk20a_semaphore *semaphore,
                wait_queue_head_t *semaphore_wq,
                struct sync_fence *dependency,
                bool wfi, bool need_sync_fence)
 {
-        struct gk20a_fence *f;
+        struct gk20a_fence *f = fence_out;
        struct sync_fence *sync_fence = NULL;
 #ifdef CONFIG_SYNC
@@ -159,21 +181,26 @@ struct gk20a_fence *gk20a_fence_from_semaphore(
                                        dependency, "f-gk20a-0x%04x",
                                        gk20a_semaphore_gpu_ro_va(semaphore));
                if (!sync_fence)
-                        return NULL;
+                        return -1;
        }
 #endif
-        f  = gk20a_alloc_fence(&gk20a_semaphore_fence_ops, sync_fence, wfi);
+        gk20a_init_fence(f, &gk20a_semaphore_fence_ops, sync_fence, wfi);
        if (!f) {
 #ifdef CONFIG_SYNC
                sync_fence_put(sync_fence);
 #endif
-                return NULL;
+                return -EINVAL;
        }
        f->semaphore = semaphore;
        f->semaphore_wq = semaphore_wq;
-        return f;
+        /* commit previous writes before setting the valid flag */
+        wmb();
+        f->valid = true;
+        return 0;
 }
 #ifdef CONFIG_TEGRA_GK20A
@@ -197,11 +224,13 @@ static const struct gk20a_fence_ops gk20a_syncpt_fence_ops = {
        .is_expired = &gk20a_syncpt_fence_is_expired,
 };
-struct gk20a_fence *gk20a_fence_from_syncpt(struct platform_device *host1x_pdev,
+int gk20a_fence_from_syncpt(
-                                            u32 id, u32 value, bool wfi,
+                struct gk20a_fence *fence_out,
-                                            bool need_sync_fence)
+                struct platform_device *host1x_pdev,
+                u32 id, u32 value, bool wfi,
+                bool need_sync_fence)
 {
-        struct gk20a_fence *f;
+        struct gk20a_fence *f = fence_out;
        struct sync_fence *sync_fence = NULL;
 #ifdef CONFIG_SYNC
@@ -214,27 +243,32 @@ struct gk20a_fence *gk20a_fence_from_syncpt(struct platform_device *host1x_pdev,
                sync_fence = nvhost_sync_create_fence(host1x_pdev, &pt, 1,
                                                      "fence");
                if (IS_ERR(sync_fence))
-                        return NULL;
+                        return -1;
        }
 #endif
-        f = gk20a_alloc_fence(&gk20a_syncpt_fence_ops, sync_fence, wfi);
+        gk20a_init_fence(f, &gk20a_syncpt_fence_ops, sync_fence, wfi);
        if (!f) {
 #ifdef CONFIG_SYNC
                if (sync_fence)
                        sync_fence_put(sync_fence);
 #endif
-                return NULL;
+                return -EINVAL;
        }
        f->host1x_pdev = host1x_pdev;
        f->syncpt_id = id;
        f->syncpt_value = value;
-        return f;
+        /* commit previous writes before setting the valid flag */
+        wmb();
+        f->valid = true;
+        return 0;
 }
 #else
-struct gk20a_fence *gk20a_fence_from_syncpt(struct platform_device *host1x_pdev,
+int gk20a_fence_from_syncpt(struct platform_device *host1x_pdev,
                                            u32 id, u32 value, bool wfi)
 {
-        return NULL;
+        return -EINVAL;
 }
 #endif
author	Sachit Kadle <skadle@nvidia.com>	2016-08-22 21:06:30 -0400
committer	mobile promotions <svcmobile_promotions@nvidia.com>	2016-10-20 11:14:04 -0400
commit	63e8592e06939e20c7b9e56b430353ebbee31ad6 (patch)
tree	b91247eebf886f4e987d38eb4069aceace284ecf /drivers/gpu/nvgpu/gk20a/fence_gk20a.c
parent	3c2656c8c6ebf7cef7376d3a28451249643121c4 (diff)

diff --git a/drivers/gpu/nvgpu/gk20a/fence_gk20a.c b/drivers/gpu/nvgpu/gk20a/fence_gk20a.c index 596dc549..f788829f 100644 --- a/drivers/gpu/nvgpu/gk20a/fence_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fence_gk20a.c
@@ -63,16 +63,27 @@ struct gk20a_fence gk20a_fence_get(struct gk20a_fence f)
63	return f;	63	return f;
64	}	64	}
65		65
		66	static inline bool gk20a_fence_is_valid(struct gk20a_fence *f)
		67	{
		68	bool valid = f->valid;
		69
		70	rmb();
		71	return valid;
		72	}
		73
66	int gk20a_fence_wait(struct gk20a_fence *f, int timeout)	74	int gk20a_fence_wait(struct gk20a_fence *f, int timeout)
67	{	75	{
68	if (!tegra_platform_is_silicon())	76	if (f && gk20a_fence_is_valid(f)) {
69	timeout = (u32)MAX_SCHEDULE_TIMEOUT;	77	if (!tegra_platform_is_silicon())
70	return f->ops->wait(f, timeout);	78	timeout = (u32)MAX_SCHEDULE_TIMEOUT;
		79	return f->ops->wait(f, timeout);
		80	}
		81	return 0;
71	}	82	}
72		83
73	bool gk20a_fence_is_expired(struct gk20a_fence *f)	84	bool gk20a_fence_is_expired(struct gk20a_fence *f)
74	{	85	{
75	if (f && f->ops)	86	if (f && gk20a_fence_is_valid(f) && f->ops)
76	return f->ops->is_expired(f);	87	return f->ops->is_expired(f);
77	else	88	else
78	return true;	89	return true;
@@ -83,7 +94,7 @@ int gk20a_fence_install_fd(struct gk20a_fence *f)
83	#ifdef CONFIG_SYNC	94	#ifdef CONFIG_SYNC
84	int fd;	95	int fd;
85		96
86	if (!f->sync_fence)	97	if (!f \|\| !gk20a_fence_is_valid(f) \|\| !f->sync_fence)
87	return -EINVAL;	98	return -EINVAL;
88		99
89	fd = get_unused_fd_flags(O_RDWR);	100	fd = get_unused_fd_flags(O_RDWR);
@@ -98,18 +109,28 @@ int gk20a_fence_install_fd(struct gk20a_fence *f)
98	#endif	109	#endif
99	}	110	}
100		111
101	struct gk20a_fence gk20a_alloc_fence(const struct gk20a_fence_ops ops,	112	struct gk20a_fence gk20a_alloc_fence(struct channel_gk20a c)
102	struct sync_fence *sync_fence, bool wfi)
103	{	113	{
104	struct gk20a_fence f = kzalloc(sizeof(f), GFP_KERNEL);	114	struct gk20a_fence *fence;
105	if (!f)	115
		116	fence = kzalloc(sizeof(struct gk20a_fence), GFP_KERNEL);
		117	if (!fence)
106	return NULL;	118	return NULL;
107	kref_init(&f->ref);	119
		120	kref_init(&fence->ref);
		121	return fence;
		122	}
		123
		124	void gk20a_init_fence(struct gk20a_fence *f,
		125	const struct gk20a_fence_ops *ops,
		126	struct sync_fence *sync_fence, bool wfi)
		127	{
		128	if (!f)
		129	return;
108	f->ops = ops;	130	f->ops = ops;
109	f->sync_fence = sync_fence;	131	f->sync_fence = sync_fence;
110	f->wfi = wfi;	132	f->wfi = wfi;
111	f->syncpt_id = -1;	133	f->syncpt_id = -1;
112	return f;
113	}	134	}
114		135
115	/* Fences that are backed by GPU semaphores: */	136	/* Fences that are backed by GPU semaphores: */
@@ -143,14 +164,15 @@ static const struct gk20a_fence_ops gk20a_semaphore_fence_ops = {
143	};	164	};
144		165
145	/* This function takes ownership of the semaphore */	166	/* This function takes ownership of the semaphore */
146	struct gk20a_fence *gk20a_fence_from_semaphore(	167	int gk20a_fence_from_semaphore(
		168	struct gk20a_fence *fence_out,
147	struct sync_timeline *timeline,	169	struct sync_timeline *timeline,
148	struct gk20a_semaphore *semaphore,	170	struct gk20a_semaphore *semaphore,
149	wait_queue_head_t *semaphore_wq,	171	wait_queue_head_t *semaphore_wq,
150	struct sync_fence *dependency,	172	struct sync_fence *dependency,
151	bool wfi, bool need_sync_fence)	173	bool wfi, bool need_sync_fence)
152	{	174	{
153	struct gk20a_fence *f;	175	struct gk20a_fence *f = fence_out;
154	struct sync_fence *sync_fence = NULL;	176	struct sync_fence *sync_fence = NULL;
155		177
156	#ifdef CONFIG_SYNC	178	#ifdef CONFIG_SYNC
@@ -159,21 +181,26 @@ struct gk20a_fence *gk20a_fence_from_semaphore(
159	dependency, "f-gk20a-0x%04x",	181	dependency, "f-gk20a-0x%04x",
160	gk20a_semaphore_gpu_ro_va(semaphore));	182	gk20a_semaphore_gpu_ro_va(semaphore));
161	if (!sync_fence)	183	if (!sync_fence)
162	return NULL;	184	return -1;
163	}	185	}
164	#endif	186	#endif
165		187
166	f = gk20a_alloc_fence(&gk20a_semaphore_fence_ops, sync_fence, wfi);	188	gk20a_init_fence(f, &gk20a_semaphore_fence_ops, sync_fence, wfi);
167	if (!f) {	189	if (!f) {
168	#ifdef CONFIG_SYNC	190	#ifdef CONFIG_SYNC
169	sync_fence_put(sync_fence);	191	sync_fence_put(sync_fence);
170	#endif	192	#endif
171	return NULL;	193	return -EINVAL;
172	}	194	}
173		195
174	f->semaphore = semaphore;	196	f->semaphore = semaphore;
175	f->semaphore_wq = semaphore_wq;	197	f->semaphore_wq = semaphore_wq;
176	return f;	198
		199	/* commit previous writes before setting the valid flag */
		200	wmb();
		201	f->valid = true;
		202
		203	return 0;
177	}	204	}
178		205
179	#ifdef CONFIG_TEGRA_GK20A	206	#ifdef CONFIG_TEGRA_GK20A
@@ -197,11 +224,13 @@ static const struct gk20a_fence_ops gk20a_syncpt_fence_ops = {
197	.is_expired = &gk20a_syncpt_fence_is_expired,	224	.is_expired = &gk20a_syncpt_fence_is_expired,
198	};	225	};
199		226
200	struct gk20a_fence gk20a_fence_from_syncpt(struct platform_device host1x_pdev,	227	int gk20a_fence_from_syncpt(
201	u32 id, u32 value, bool wfi,	228	struct gk20a_fence *fence_out,
202	bool need_sync_fence)	229	struct platform_device *host1x_pdev,
		230	u32 id, u32 value, bool wfi,
		231	bool need_sync_fence)
203	{	232	{
204	struct gk20a_fence *f;	233	struct gk20a_fence *f = fence_out;
205	struct sync_fence *sync_fence = NULL;	234	struct sync_fence *sync_fence = NULL;
206		235
207	#ifdef CONFIG_SYNC	236	#ifdef CONFIG_SYNC
@@ -214,27 +243,32 @@ struct gk20a_fence gk20a_fence_from_syncpt(struct platform_device host1x_pdev,
214	sync_fence = nvhost_sync_create_fence(host1x_pdev, &pt, 1,	243	sync_fence = nvhost_sync_create_fence(host1x_pdev, &pt, 1,
215	"fence");	244	"fence");
216	if (IS_ERR(sync_fence))	245	if (IS_ERR(sync_fence))
217	return NULL;	246	return -1;
218	}	247	}
219	#endif	248	#endif
220		249
221	f = gk20a_alloc_fence(&gk20a_syncpt_fence_ops, sync_fence, wfi);	250	gk20a_init_fence(f, &gk20a_syncpt_fence_ops, sync_fence, wfi);
222	if (!f) {	251	if (!f) {
223	#ifdef CONFIG_SYNC	252	#ifdef CONFIG_SYNC
224	if (sync_fence)	253	if (sync_fence)
225	sync_fence_put(sync_fence);	254	sync_fence_put(sync_fence);
226	#endif	255	#endif
227	return NULL;	256	return -EINVAL;
228	}	257	}
229	f->host1x_pdev = host1x_pdev;	258	f->host1x_pdev = host1x_pdev;
230	f->syncpt_id = id;	259	f->syncpt_id = id;
231	f->syncpt_value = value;	260	f->syncpt_value = value;
232	return f;	261
		262	/* commit previous writes before setting the valid flag */
		263	wmb();
		264	f->valid = true;
		265
		266	return 0;
233	}	267	}
234	#else	268	#else
235	struct gk20a_fence gk20a_fence_from_syncpt(struct platform_device host1x_pdev,	269	int gk20a_fence_from_syncpt(struct platform_device *host1x_pdev,
236	u32 id, u32 value, bool wfi)	270	u32 id, u32 value, bool wfi)
237	{	271	{
238	return NULL;	272	return -EINVAL;
239	}	273	}
240	#endif	274	#endif