summaryrefslogtreecommitdiffstats
path: root/drivers/gpu
diff options
context:
space:
mode:
authorKonsta Holtta <kholtta@nvidia.com>2017-04-06 03:48:35 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2017-04-11 12:57:21 -0400
commit1a4647272f4fe50137c79583b698c1ef6f5def12 (patch)
tree0eee9698405e4467a7644c74c3d41d4bca933a8f /drivers/gpu
parent6de456f8407ba7230410175f1aff90541137d19b (diff)
gpu: nvgpu: remove fence dependency tracking
In preparation for better abstraction in job synchronization, drop support for the dependency fences tracked via submit pre-fences in semaphore-based syncs. This has only worked for semaphores, not nvhost syncpoints, and hasn't really been used. The dependency was printed in the sync framework's sync pt value string. Remove also the userspace-visible gk20a_sync_pt_info which is not used and depends on this feature (providing a duration since the dependency fence's timestamp). Jira NVGPU-43 Change-Id: Ia2b26502a9dc8f5bef5470f94b1475001f621da1 Signed-off-by: Konsta Holtta <kholtta@nvidia.com> Reviewed-on: http://git-master/r/1456880 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu')
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c25
-rw-r--r--drivers/gpu/nvgpu/gk20a/fence_gk20a.c3
-rw-r--r--drivers/gpu/nvgpu/gk20a/fence_gk20a.h1
-rw-r--r--drivers/gpu/nvgpu/gk20a/sync_gk20a.c96
-rw-r--r--drivers/gpu/nvgpu/gk20a/sync_gk20a.h1
5 files changed, 12 insertions, 114 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
index fbeb1e4a..6d0f0854 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
@@ -678,7 +678,7 @@ static int gk20a_channel_semaphore_wait_fd(
678 sema->timeline, 678 sema->timeline,
679 fp_sema, 679 fp_sema,
680 &c->semaphore_wq, 680 &c->semaphore_wq,
681 NULL, false, false); 681 false, false);
682 if (err) { 682 if (err) {
683 nvgpu_semaphore_put(fp_sema); 683 nvgpu_semaphore_put(fp_sema);
684 goto clean_up_priv_cmd; 684 goto clean_up_priv_cmd;
@@ -742,7 +742,7 @@ static int gk20a_channel_semaphore_wait_fd(
742 * already signaled 742 * already signaled
743 */ 743 */
744 err = gk20a_fence_from_semaphore(c->g, fence, sema->timeline, w->sema, 744 err = gk20a_fence_from_semaphore(c->g, fence, sema->timeline, w->sema,
745 &c->semaphore_wq, NULL, false, false); 745 &c->semaphore_wq, false, false);
746 if (err) 746 if (err)
747 goto clean_up_sema; 747 goto clean_up_sema;
748 748
@@ -787,7 +787,6 @@ clean_up_sync_fence:
787 787
788static int __gk20a_channel_semaphore_incr( 788static int __gk20a_channel_semaphore_incr(
789 struct gk20a_channel_sync *s, bool wfi_cmd, 789 struct gk20a_channel_sync *s, bool wfi_cmd,
790 struct sync_fence *dependency,
791 struct priv_cmd_entry *incr_cmd, 790 struct priv_cmd_entry *incr_cmd,
792 struct gk20a_fence *fence, 791 struct gk20a_fence *fence,
793 bool need_sync_fence) 792 bool need_sync_fence)
@@ -820,7 +819,7 @@ static int __gk20a_channel_semaphore_incr(
820 err = gk20a_fence_from_semaphore(c->g, fence, 819 err = gk20a_fence_from_semaphore(c->g, fence,
821 sp->timeline, semaphore, 820 sp->timeline, semaphore,
822 &c->semaphore_wq, 821 &c->semaphore_wq,
823 dependency, wfi_cmd, 822 wfi_cmd,
824 need_sync_fence); 823 need_sync_fence);
825 if (err) 824 if (err)
826 goto clean_up_sema; 825 goto clean_up_sema;
@@ -839,7 +838,6 @@ static int gk20a_channel_semaphore_incr_wfi(
839{ 838{
840 return __gk20a_channel_semaphore_incr(s, 839 return __gk20a_channel_semaphore_incr(s,
841 true /* wfi */, 840 true /* wfi */,
842 NULL,
843 entry, fence, true); 841 entry, fence, true);
844} 842}
845 843
@@ -854,7 +852,6 @@ static int gk20a_channel_semaphore_incr(
854 * a fence to user space. */ 852 * a fence to user space. */
855 return __gk20a_channel_semaphore_incr(s, 853 return __gk20a_channel_semaphore_incr(s,
856 false /* no wfi */, 854 false /* no wfi */,
857 NULL,
858 entry, fence, need_sync_fence); 855 entry, fence, need_sync_fence);
859} 856}
860 857
@@ -868,22 +865,12 @@ static int gk20a_channel_semaphore_incr_user(
868 bool register_irq) 865 bool register_irq)
869{ 866{
870#ifdef CONFIG_SYNC 867#ifdef CONFIG_SYNC
871 struct sync_fence *dependency = NULL;
872 int err; 868 int err;
873 869
874 if (wait_fence_fd >= 0) { 870 err = __gk20a_channel_semaphore_incr(s, wfi, entry, fence,
875 dependency = gk20a_sync_fence_fdget(wait_fence_fd); 871 need_sync_fence);
876 if (!dependency) 872 if (err)
877 return -EINVAL;
878 }
879
880 err = __gk20a_channel_semaphore_incr(s, wfi, dependency,
881 entry, fence, need_sync_fence);
882 if (err) {
883 if (dependency)
884 sync_fence_put(dependency);
885 return err; 873 return err;
886 }
887 874
888 return 0; 875 return 0;
889#else 876#else
diff --git a/drivers/gpu/nvgpu/gk20a/fence_gk20a.c b/drivers/gpu/nvgpu/gk20a/fence_gk20a.c
index 63da0959..a5aeae08 100644
--- a/drivers/gpu/nvgpu/gk20a/fence_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fence_gk20a.c
@@ -233,7 +233,6 @@ int gk20a_fence_from_semaphore(
233 struct sync_timeline *timeline, 233 struct sync_timeline *timeline,
234 struct nvgpu_semaphore *semaphore, 234 struct nvgpu_semaphore *semaphore,
235 wait_queue_head_t *semaphore_wq, 235 wait_queue_head_t *semaphore_wq,
236 struct sync_fence *dependency,
237 bool wfi, bool need_sync_fence) 236 bool wfi, bool need_sync_fence)
238{ 237{
239 struct gk20a_fence *f = fence_out; 238 struct gk20a_fence *f = fence_out;
@@ -242,7 +241,7 @@ int gk20a_fence_from_semaphore(
242#ifdef CONFIG_SYNC 241#ifdef CONFIG_SYNC
243 if (need_sync_fence) { 242 if (need_sync_fence) {
244 sync_fence = gk20a_sync_fence_create(g, timeline, semaphore, 243 sync_fence = gk20a_sync_fence_create(g, timeline, semaphore,
245 dependency, "f-gk20a-0x%04x", 244 "f-gk20a-0x%04x",
246 nvgpu_semaphore_gpu_ro_va(semaphore)); 245 nvgpu_semaphore_gpu_ro_va(semaphore));
247 if (!sync_fence) 246 if (!sync_fence)
248 return -1; 247 return -1;
diff --git a/drivers/gpu/nvgpu/gk20a/fence_gk20a.h b/drivers/gpu/nvgpu/gk20a/fence_gk20a.h
index fccf0c27..426556cc 100644
--- a/drivers/gpu/nvgpu/gk20a/fence_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/fence_gk20a.h
@@ -58,7 +58,6 @@ int gk20a_fence_from_semaphore(
58 struct sync_timeline *timeline, 58 struct sync_timeline *timeline,
59 struct nvgpu_semaphore *semaphore, 59 struct nvgpu_semaphore *semaphore,
60 wait_queue_head_t *semaphore_wq, 60 wait_queue_head_t *semaphore_wq,
61 struct sync_fence *dependency,
62 bool wfi, bool need_sync_fence); 61 bool wfi, bool need_sync_fence);
63 62
64int gk20a_fence_from_syncpt( 63int gk20a_fence_from_syncpt(
diff --git a/drivers/gpu/nvgpu/gk20a/sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/sync_gk20a.c
index 5b888299..ecc96a7b 100644
--- a/drivers/gpu/nvgpu/gk20a/sync_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/sync_gk20a.c
@@ -47,8 +47,6 @@ struct gk20a_sync_pt {
47 u32 thresh; 47 u32 thresh;
48 struct nvgpu_semaphore *sema; 48 struct nvgpu_semaphore *sema;
49 struct gk20a_sync_timeline *obj; 49 struct gk20a_sync_timeline *obj;
50 struct sync_fence *dep;
51 ktime_t dep_timestamp;
52 50
53 /* 51 /*
54 * Use a spin lock here since it will have better performance 52 * Use a spin lock here since it will have better performance
@@ -206,8 +204,6 @@ static void gk20a_sync_pt_free_shared(struct kref *ref)
206 container_of(ref, struct gk20a_sync_pt, refcount); 204 container_of(ref, struct gk20a_sync_pt, refcount);
207 struct gk20a *g = pt->g; 205 struct gk20a *g = pt->g;
208 206
209 if (pt->dep)
210 sync_fence_put(pt->dep);
211 if (pt->sema) 207 if (pt->sema)
212 nvgpu_semaphore_put(pt->sema); 208 nvgpu_semaphore_put(pt->sema);
213 nvgpu_kfree(g, pt); 209 nvgpu_kfree(g, pt);
@@ -216,8 +212,7 @@ static void gk20a_sync_pt_free_shared(struct kref *ref)
216static struct gk20a_sync_pt *gk20a_sync_pt_create_shared( 212static struct gk20a_sync_pt *gk20a_sync_pt_create_shared(
217 struct gk20a *g, 213 struct gk20a *g,
218 struct gk20a_sync_timeline *obj, 214 struct gk20a_sync_timeline *obj,
219 struct nvgpu_semaphore *sema, 215 struct nvgpu_semaphore *sema)
220 struct sync_fence *dependency)
221{ 216{
222 struct gk20a_sync_pt *shared; 217 struct gk20a_sync_pt *shared;
223 218
@@ -231,20 +226,6 @@ static struct gk20a_sync_pt *gk20a_sync_pt_create_shared(
231 shared->sema = sema; 226 shared->sema = sema;
232 shared->thresh = ++obj->max; /* sync framework has a lock */ 227 shared->thresh = ++obj->max; /* sync framework has a lock */
233 228
234 /* Store the dependency fence for this pt. */
235 if (dependency) {
236#if LINUX_VERSION_CODE < KERNEL_VERSION(3,18,0)
237 if (dependency->status == 0)
238#else
239 if (!atomic_read(&dependency->status))
240#endif
241 shared->dep = dependency;
242 else {
243 shared->dep_timestamp = ktime_get();
244 sync_fence_put(dependency);
245 }
246 }
247
248 nvgpu_spinlock_init(&shared->lock); 229 nvgpu_spinlock_init(&shared->lock);
249 230
250 nvgpu_semaphore_get(sema); 231 nvgpu_semaphore_get(sema);
@@ -255,8 +236,7 @@ static struct gk20a_sync_pt *gk20a_sync_pt_create_shared(
255static struct sync_pt *gk20a_sync_pt_create_inst( 236static struct sync_pt *gk20a_sync_pt_create_inst(
256 struct gk20a *g, 237 struct gk20a *g,
257 struct gk20a_sync_timeline *obj, 238 struct gk20a_sync_timeline *obj,
258 struct nvgpu_semaphore *sema, 239 struct nvgpu_semaphore *sema)
259 struct sync_fence *dependency)
260{ 240{
261 struct gk20a_sync_pt_inst *pti; 241 struct gk20a_sync_pt_inst *pti;
262 242
@@ -265,7 +245,7 @@ static struct sync_pt *gk20a_sync_pt_create_inst(
265 if (!pti) 245 if (!pti)
266 return NULL; 246 return NULL;
267 247
268 pti->shared = gk20a_sync_pt_create_shared(g, obj, sema, dependency); 248 pti->shared = gk20a_sync_pt_create_shared(g, obj, sema);
269 if (!pti->shared) { 249 if (!pti->shared) {
270 sync_pt_free(&pti->pt); 250 sync_pt_free(&pti->pt);
271 return NULL; 251 return NULL;
@@ -303,9 +283,6 @@ static int gk20a_sync_pt_has_signaled(struct sync_pt *sync_pt)
303{ 283{
304 struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt); 284 struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt);
305 struct gk20a_sync_timeline *obj = pt->obj; 285 struct gk20a_sync_timeline *obj = pt->obj;
306#if LINUX_VERSION_CODE < KERNEL_VERSION(3,18,0)
307 struct sync_pt *pos;
308#endif
309 bool signaled = true; 286 bool signaled = true;
310 287
311 nvgpu_spinlock_acquire(&pt->lock); 288 nvgpu_spinlock_acquire(&pt->lock);
@@ -321,29 +298,6 @@ static int gk20a_sync_pt_has_signaled(struct sync_pt *sync_pt)
321 obj->min) == 1) 298 obj->min) == 1)
322 obj->min = pt->thresh; 299 obj->min = pt->thresh;
323 300
324 /* Release the dependency fence, but get its timestamp
325 * first.*/
326 if (pt->dep) {
327 s64 ns = 0;
328#if LINUX_VERSION_CODE < KERNEL_VERSION(3,18,0)
329 struct list_head *dep_pts = &pt->dep->pt_list_head;
330 list_for_each_entry(pos, dep_pts, pt_list) {
331 ns = max(ns, ktime_to_ns(pos->timestamp));
332 }
333#else
334 struct fence *fence;
335 int i;
336
337 for (i = 0; i < pt->dep->num_fences; i++) {
338 fence = pt->dep->cbs[i].sync_pt;
339 ns = max(ns, ktime_to_ns(fence->timestamp));
340 }
341#endif
342 pt->dep_timestamp = ns_to_ktime(ns);
343 sync_fence_put(pt->dep);
344 pt->dep = NULL;
345 }
346
347 /* Release the semaphore to the pool. */ 301 /* Release the semaphore to the pool. */
348 nvgpu_semaphore_put(pt->sema); 302 nvgpu_semaphore_put(pt->sema);
349 pt->sema = NULL; 303 pt->sema = NULL;
@@ -354,18 +308,6 @@ done:
354 return signaled; 308 return signaled;
355} 309}
356 310
357static inline ktime_t gk20a_sync_pt_duration(struct sync_pt *sync_pt)
358{
359 struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt);
360 if (!gk20a_sync_pt_has_signaled(sync_pt) || !pt->dep_timestamp.tv64)
361 return ns_to_ktime(0);
362#if LINUX_VERSION_CODE < KERNEL_VERSION(3,18,0)
363 return ktime_sub(sync_pt->timestamp, pt->dep_timestamp);
364#else
365 return ktime_sub(sync_pt->base.timestamp, pt->dep_timestamp);
366#endif
367}
368
369static int gk20a_sync_pt_compare(struct sync_pt *a, struct sync_pt *b) 311static int gk20a_sync_pt_compare(struct sync_pt *a, struct sync_pt *b)
370{ 312{
371 bool a_expired; 313 bool a_expired;
@@ -426,39 +368,13 @@ static void gk20a_sync_pt_value_str(struct sync_pt *sync_pt, char *str,
426 int size) 368 int size)
427{ 369{
428 struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt); 370 struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt);
429 ktime_t dur = gk20a_sync_pt_duration(sync_pt);
430 371
431 if (pt->sema) { 372 if (pt->sema) {
432 gk20a_sync_pt_value_str_for_sema(pt, str, size); 373 gk20a_sync_pt_value_str_for_sema(pt, str, size);
433 return; 374 return;
434 } 375 }
435 376
436 if (pt->dep) { 377 snprintf(str, size, "%d", pt->thresh);
437 snprintf(str, size, "(dep: [%p] %s) %d",
438 pt->dep, pt->dep->name, pt->thresh);
439 } else if (dur.tv64) {
440 struct timeval tv = ktime_to_timeval(dur);
441 snprintf(str, size, "(took %ld.%03ld ms) %d",
442 tv.tv_sec * 1000 + tv.tv_usec / 1000,
443 tv.tv_usec % 1000,
444 pt->thresh);
445 } else {
446 snprintf(str, size, "%d", pt->thresh);
447 }
448}
449
450static int gk20a_sync_fill_driver_data(struct sync_pt *sync_pt,
451 void *data, int size)
452{
453 struct gk20a_sync_pt_info info;
454
455 if (size < (int)sizeof(info))
456 return -ENOMEM;
457
458 info.hw_op_ns = ktime_to_ns(gk20a_sync_pt_duration(sync_pt));
459 memcpy(data, &info, sizeof(info));
460
461 return sizeof(info);
462} 378}
463 379
464static const struct sync_timeline_ops gk20a_sync_timeline_ops = { 380static const struct sync_timeline_ops gk20a_sync_timeline_ops = {
@@ -467,7 +383,6 @@ static const struct sync_timeline_ops gk20a_sync_timeline_ops = {
467 .has_signaled = gk20a_sync_pt_has_signaled, 383 .has_signaled = gk20a_sync_pt_has_signaled,
468 .compare = gk20a_sync_pt_compare, 384 .compare = gk20a_sync_pt_compare,
469 .free_pt = gk20a_sync_pt_free_inst, 385 .free_pt = gk20a_sync_pt_free_inst,
470 .fill_driver_data = gk20a_sync_fill_driver_data,
471 .timeline_value_str = gk20a_sync_timeline_value_str, 386 .timeline_value_str = gk20a_sync_timeline_value_str,
472 .pt_value_str = gk20a_sync_pt_value_str, 387 .pt_value_str = gk20a_sync_pt_value_str,
473}; 388};
@@ -515,7 +430,6 @@ struct sync_fence *gk20a_sync_fence_create(
515 struct gk20a *g, 430 struct gk20a *g,
516 struct sync_timeline *obj, 431 struct sync_timeline *obj,
517 struct nvgpu_semaphore *sema, 432 struct nvgpu_semaphore *sema,
518 struct sync_fence *dependency,
519 const char *fmt, ...) 433 const char *fmt, ...)
520{ 434{
521 char name[30]; 435 char name[30];
@@ -524,7 +438,7 @@ struct sync_fence *gk20a_sync_fence_create(
524 struct sync_fence *fence; 438 struct sync_fence *fence;
525 struct gk20a_sync_timeline *timeline = to_gk20a_timeline(obj); 439 struct gk20a_sync_timeline *timeline = to_gk20a_timeline(obj);
526 440
527 pt = gk20a_sync_pt_create_inst(g, timeline, sema, dependency); 441 pt = gk20a_sync_pt_create_inst(g, timeline, sema);
528 if (pt == NULL) 442 if (pt == NULL)
529 return NULL; 443 return NULL;
530 444
diff --git a/drivers/gpu/nvgpu/gk20a/sync_gk20a.h b/drivers/gpu/nvgpu/gk20a/sync_gk20a.h
index 33112bb3..a422377c 100644
--- a/drivers/gpu/nvgpu/gk20a/sync_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/sync_gk20a.h
@@ -37,7 +37,6 @@ struct sync_fence *gk20a_sync_fence_create(
37 struct gk20a *g, 37 struct gk20a *g,
38 struct sync_timeline *, 38 struct sync_timeline *,
39 struct nvgpu_semaphore *, 39 struct nvgpu_semaphore *,
40 struct sync_fence *dependency,
41 const char *fmt, ...); 40 const char *fmt, ...);
42struct sync_fence *gk20a_sync_fence_fdget(int fd); 41struct sync_fence *gk20a_sync_fence_fdget(int fd);
43#else 42#else