summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/sync_gk20a.c
diff options
context:
space:
mode:
authorKonsta Holtta <kholtta@nvidia.com>2018-03-13 10:58:01 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2018-03-16 10:34:01 -0400
commit34323b559590ed8f1c64ecbb7ffbd838a6478594 (patch)
treec6258e44413a2f15ded4cf63e4a16f5118921703 /drivers/gpu/nvgpu/gk20a/sync_gk20a.c
parentfb40f2a80739985abac273bc493e07341aa003af (diff)
gpu: nvgpu: wait for all prefence semas on gpu
The pre-fence wait for semaphores in the submit path has supported a fast path for fences that have only one underlying semaphore. The fast path just inserts the wait on this sema to the pushbuffer directly. For other fences, the path has been using a CPU wait indirection, signaling another semaphore when we get the CPU-side callback. Instead of only supporting prefences with one sema, unroll all the individual semaphores and insert waits for each to a pushbuffer, like we've already been doing with syncpoints. Now all sema-backed syncs get the fast path. This simplifies the logic and makes it more explicit that only foreign fences need the CPU wait. There is no need to hold references to the sync fence or the semas inside: this submitted job only needs the global read-only sema mapping that is guaranteed to stay alive while the VM of this channel stays alive, and the job does not outlive this channel. Jira NVGPU-43 Jira NVGPU-66 Jira NVGPU-513 Change-Id: I7cfbb510001d998a864aed8d6afd1582b9adb80d Signed-off-by: Konsta Holtta <kholtta@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1636345 Reviewed-by: Alex Waterman <alexw@nvidia.com> Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/sync_gk20a.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/sync_gk20a.c90
1 files changed, 39 insertions, 51 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/sync_gk20a.c
index f6d16b90..a8600bce 100644
--- a/drivers/gpu/nvgpu/gk20a/sync_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/sync_gk20a.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * GK20A Sync Framework Integration 2 * GK20A Sync Framework Integration
3 * 3 *
4 * Copyright (c) 2014-2017, NVIDIA CORPORATION. All rights reserved. 4 * Copyright (c) 2014-2018, NVIDIA CORPORATION. All rights reserved.
5 * 5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a 6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"), 7 * copy of this software and associated documentation files (the "Software"),
@@ -70,55 +70,6 @@ struct gk20a_sync_pt_inst {
70}; 70};
71 71
72/** 72/**
73 * Check if the passed sync_fence is backed by a single GPU semaphore. In such
74 * cases we can short circuit a lot of SW involved in signaling pre-fences and
75 * post fences.
76 *
77 * For now reject multi-sync_pt fences. This could be changed in future. It
78 * would require that the sema fast path push a sema acquire for each semaphore
79 * in the fence.
80 */
81int gk20a_is_sema_backed_sync_fence(struct sync_fence *fence)
82{
83 struct sync_timeline *t;
84
85 struct fence *pt = fence->cbs[0].sync_pt;
86 struct sync_pt *spt = sync_pt_from_fence(pt);
87
88 if (fence->num_fences != 1)
89 return 0;
90
91 if (spt == NULL)
92 return 0;
93
94 t = sync_pt_parent(spt);
95
96 if (t->ops == &gk20a_sync_timeline_ops)
97 return 1;
98 return 0;
99}
100
101struct nvgpu_semaphore *gk20a_sync_fence_get_sema(struct sync_fence *f)
102{
103 struct sync_pt *spt;
104 struct gk20a_sync_pt_inst *pti;
105
106 struct fence *pt;
107
108 if (!f)
109 return NULL;
110
111 if (!gk20a_is_sema_backed_sync_fence(f))
112 return NULL;
113
114 pt = f->cbs[0].sync_pt;
115 spt = sync_pt_from_fence(pt);
116 pti = container_of(spt, struct gk20a_sync_pt_inst, pt);
117
118 return pti->shared->sema;
119}
120
121/**
122 * Compares sync pt values a and b, both of which will trigger either before 73 * Compares sync pt values a and b, both of which will trigger either before
123 * or after ref (i.e. a and b trigger before ref, or a and b trigger after 74 * or after ref (i.e. a and b trigger before ref, or a and b trigger after
124 * ref). Supplying ref allows us to handle wrapping correctly. 75 * ref). Supplying ref allows us to handle wrapping correctly.
@@ -371,7 +322,44 @@ static const struct sync_timeline_ops gk20a_sync_timeline_ops = {
371 322
372struct sync_fence *gk20a_sync_fence_fdget(int fd) 323struct sync_fence *gk20a_sync_fence_fdget(int fd)
373{ 324{
374 return sync_fence_fdget(fd); 325 struct sync_fence *fence = sync_fence_fdget(fd);
326 int i;
327
328 if (!fence)
329 return NULL;
330
331 for (i = 0; i < fence->num_fences; i++) {
332 struct fence *pt = fence->cbs[i].sync_pt;
333 struct sync_pt *spt = sync_pt_from_fence(pt);
334 struct sync_timeline *t;
335
336 if (spt == NULL) {
337 sync_fence_put(fence);
338 return NULL;
339 }
340
341 t = sync_pt_parent(spt);
342 if (t->ops != &gk20a_sync_timeline_ops) {
343 sync_fence_put(fence);
344 return NULL;
345 }
346 }
347
348 return fence;
349}
350
351struct nvgpu_semaphore *gk20a_sync_pt_sema(struct sync_pt *spt)
352{
353 struct gk20a_sync_pt *pt = to_gk20a_sync_pt(spt);
354 struct nvgpu_semaphore *sema;
355
356 nvgpu_spinlock_acquire(&pt->lock);
357 sema = pt->sema;
358 if (sema)
359 nvgpu_semaphore_get(sema);
360 nvgpu_spinlock_release(&pt->lock);
361
362 return sema;
375} 363}
376 364
377void gk20a_sync_timeline_signal(struct sync_timeline *timeline) 365void gk20a_sync_timeline_signal(struct sync_timeline *timeline)