diff options
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c | 25 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/fence_gk20a.c | 3 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/fence_gk20a.h | 1 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/sync_gk20a.c | 96 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/sync_gk20a.h | 1 | ||||
-rw-r--r-- | include/uapi/linux/nvgpu.h | 4 |
6 files changed, 12 insertions, 118 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c index fbeb1e4a..6d0f0854 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c | |||
@@ -678,7 +678,7 @@ static int gk20a_channel_semaphore_wait_fd( | |||
678 | sema->timeline, | 678 | sema->timeline, |
679 | fp_sema, | 679 | fp_sema, |
680 | &c->semaphore_wq, | 680 | &c->semaphore_wq, |
681 | NULL, false, false); | 681 | false, false); |
682 | if (err) { | 682 | if (err) { |
683 | nvgpu_semaphore_put(fp_sema); | 683 | nvgpu_semaphore_put(fp_sema); |
684 | goto clean_up_priv_cmd; | 684 | goto clean_up_priv_cmd; |
@@ -742,7 +742,7 @@ static int gk20a_channel_semaphore_wait_fd( | |||
742 | * already signaled | 742 | * already signaled |
743 | */ | 743 | */ |
744 | err = gk20a_fence_from_semaphore(c->g, fence, sema->timeline, w->sema, | 744 | err = gk20a_fence_from_semaphore(c->g, fence, sema->timeline, w->sema, |
745 | &c->semaphore_wq, NULL, false, false); | 745 | &c->semaphore_wq, false, false); |
746 | if (err) | 746 | if (err) |
747 | goto clean_up_sema; | 747 | goto clean_up_sema; |
748 | 748 | ||
@@ -787,7 +787,6 @@ clean_up_sync_fence: | |||
787 | 787 | ||
788 | static int __gk20a_channel_semaphore_incr( | 788 | static int __gk20a_channel_semaphore_incr( |
789 | struct gk20a_channel_sync *s, bool wfi_cmd, | 789 | struct gk20a_channel_sync *s, bool wfi_cmd, |
790 | struct sync_fence *dependency, | ||
791 | struct priv_cmd_entry *incr_cmd, | 790 | struct priv_cmd_entry *incr_cmd, |
792 | struct gk20a_fence *fence, | 791 | struct gk20a_fence *fence, |
793 | bool need_sync_fence) | 792 | bool need_sync_fence) |
@@ -820,7 +819,7 @@ static int __gk20a_channel_semaphore_incr( | |||
820 | err = gk20a_fence_from_semaphore(c->g, fence, | 819 | err = gk20a_fence_from_semaphore(c->g, fence, |
821 | sp->timeline, semaphore, | 820 | sp->timeline, semaphore, |
822 | &c->semaphore_wq, | 821 | &c->semaphore_wq, |
823 | dependency, wfi_cmd, | 822 | wfi_cmd, |
824 | need_sync_fence); | 823 | need_sync_fence); |
825 | if (err) | 824 | if (err) |
826 | goto clean_up_sema; | 825 | goto clean_up_sema; |
@@ -839,7 +838,6 @@ static int gk20a_channel_semaphore_incr_wfi( | |||
839 | { | 838 | { |
840 | return __gk20a_channel_semaphore_incr(s, | 839 | return __gk20a_channel_semaphore_incr(s, |
841 | true /* wfi */, | 840 | true /* wfi */, |
842 | NULL, | ||
843 | entry, fence, true); | 841 | entry, fence, true); |
844 | } | 842 | } |
845 | 843 | ||
@@ -854,7 +852,6 @@ static int gk20a_channel_semaphore_incr( | |||
854 | * a fence to user space. */ | 852 | * a fence to user space. */ |
855 | return __gk20a_channel_semaphore_incr(s, | 853 | return __gk20a_channel_semaphore_incr(s, |
856 | false /* no wfi */, | 854 | false /* no wfi */, |
857 | NULL, | ||
858 | entry, fence, need_sync_fence); | 855 | entry, fence, need_sync_fence); |
859 | } | 856 | } |
860 | 857 | ||
@@ -868,22 +865,12 @@ static int gk20a_channel_semaphore_incr_user( | |||
868 | bool register_irq) | 865 | bool register_irq) |
869 | { | 866 | { |
870 | #ifdef CONFIG_SYNC | 867 | #ifdef CONFIG_SYNC |
871 | struct sync_fence *dependency = NULL; | ||
872 | int err; | 868 | int err; |
873 | 869 | ||
874 | if (wait_fence_fd >= 0) { | 870 | err = __gk20a_channel_semaphore_incr(s, wfi, entry, fence, |
875 | dependency = gk20a_sync_fence_fdget(wait_fence_fd); | 871 | need_sync_fence); |
876 | if (!dependency) | 872 | if (err) |
877 | return -EINVAL; | ||
878 | } | ||
879 | |||
880 | err = __gk20a_channel_semaphore_incr(s, wfi, dependency, | ||
881 | entry, fence, need_sync_fence); | ||
882 | if (err) { | ||
883 | if (dependency) | ||
884 | sync_fence_put(dependency); | ||
885 | return err; | 873 | return err; |
886 | } | ||
887 | 874 | ||
888 | return 0; | 875 | return 0; |
889 | #else | 876 | #else |
diff --git a/drivers/gpu/nvgpu/gk20a/fence_gk20a.c b/drivers/gpu/nvgpu/gk20a/fence_gk20a.c index 63da0959..a5aeae08 100644 --- a/drivers/gpu/nvgpu/gk20a/fence_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fence_gk20a.c | |||
@@ -233,7 +233,6 @@ int gk20a_fence_from_semaphore( | |||
233 | struct sync_timeline *timeline, | 233 | struct sync_timeline *timeline, |
234 | struct nvgpu_semaphore *semaphore, | 234 | struct nvgpu_semaphore *semaphore, |
235 | wait_queue_head_t *semaphore_wq, | 235 | wait_queue_head_t *semaphore_wq, |
236 | struct sync_fence *dependency, | ||
237 | bool wfi, bool need_sync_fence) | 236 | bool wfi, bool need_sync_fence) |
238 | { | 237 | { |
239 | struct gk20a_fence *f = fence_out; | 238 | struct gk20a_fence *f = fence_out; |
@@ -242,7 +241,7 @@ int gk20a_fence_from_semaphore( | |||
242 | #ifdef CONFIG_SYNC | 241 | #ifdef CONFIG_SYNC |
243 | if (need_sync_fence) { | 242 | if (need_sync_fence) { |
244 | sync_fence = gk20a_sync_fence_create(g, timeline, semaphore, | 243 | sync_fence = gk20a_sync_fence_create(g, timeline, semaphore, |
245 | dependency, "f-gk20a-0x%04x", | 244 | "f-gk20a-0x%04x", |
246 | nvgpu_semaphore_gpu_ro_va(semaphore)); | 245 | nvgpu_semaphore_gpu_ro_va(semaphore)); |
247 | if (!sync_fence) | 246 | if (!sync_fence) |
248 | return -1; | 247 | return -1; |
diff --git a/drivers/gpu/nvgpu/gk20a/fence_gk20a.h b/drivers/gpu/nvgpu/gk20a/fence_gk20a.h index fccf0c27..426556cc 100644 --- a/drivers/gpu/nvgpu/gk20a/fence_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/fence_gk20a.h | |||
@@ -58,7 +58,6 @@ int gk20a_fence_from_semaphore( | |||
58 | struct sync_timeline *timeline, | 58 | struct sync_timeline *timeline, |
59 | struct nvgpu_semaphore *semaphore, | 59 | struct nvgpu_semaphore *semaphore, |
60 | wait_queue_head_t *semaphore_wq, | 60 | wait_queue_head_t *semaphore_wq, |
61 | struct sync_fence *dependency, | ||
62 | bool wfi, bool need_sync_fence); | 61 | bool wfi, bool need_sync_fence); |
63 | 62 | ||
64 | int gk20a_fence_from_syncpt( | 63 | int gk20a_fence_from_syncpt( |
diff --git a/drivers/gpu/nvgpu/gk20a/sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/sync_gk20a.c index 5b888299..ecc96a7b 100644 --- a/drivers/gpu/nvgpu/gk20a/sync_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/sync_gk20a.c | |||
@@ -47,8 +47,6 @@ struct gk20a_sync_pt { | |||
47 | u32 thresh; | 47 | u32 thresh; |
48 | struct nvgpu_semaphore *sema; | 48 | struct nvgpu_semaphore *sema; |
49 | struct gk20a_sync_timeline *obj; | 49 | struct gk20a_sync_timeline *obj; |
50 | struct sync_fence *dep; | ||
51 | ktime_t dep_timestamp; | ||
52 | 50 | ||
53 | /* | 51 | /* |
54 | * Use a spin lock here since it will have better performance | 52 | * Use a spin lock here since it will have better performance |
@@ -206,8 +204,6 @@ static void gk20a_sync_pt_free_shared(struct kref *ref) | |||
206 | container_of(ref, struct gk20a_sync_pt, refcount); | 204 | container_of(ref, struct gk20a_sync_pt, refcount); |
207 | struct gk20a *g = pt->g; | 205 | struct gk20a *g = pt->g; |
208 | 206 | ||
209 | if (pt->dep) | ||
210 | sync_fence_put(pt->dep); | ||
211 | if (pt->sema) | 207 | if (pt->sema) |
212 | nvgpu_semaphore_put(pt->sema); | 208 | nvgpu_semaphore_put(pt->sema); |
213 | nvgpu_kfree(g, pt); | 209 | nvgpu_kfree(g, pt); |
@@ -216,8 +212,7 @@ static void gk20a_sync_pt_free_shared(struct kref *ref) | |||
216 | static struct gk20a_sync_pt *gk20a_sync_pt_create_shared( | 212 | static struct gk20a_sync_pt *gk20a_sync_pt_create_shared( |
217 | struct gk20a *g, | 213 | struct gk20a *g, |
218 | struct gk20a_sync_timeline *obj, | 214 | struct gk20a_sync_timeline *obj, |
219 | struct nvgpu_semaphore *sema, | 215 | struct nvgpu_semaphore *sema) |
220 | struct sync_fence *dependency) | ||
221 | { | 216 | { |
222 | struct gk20a_sync_pt *shared; | 217 | struct gk20a_sync_pt *shared; |
223 | 218 | ||
@@ -231,20 +226,6 @@ static struct gk20a_sync_pt *gk20a_sync_pt_create_shared( | |||
231 | shared->sema = sema; | 226 | shared->sema = sema; |
232 | shared->thresh = ++obj->max; /* sync framework has a lock */ | 227 | shared->thresh = ++obj->max; /* sync framework has a lock */ |
233 | 228 | ||
234 | /* Store the dependency fence for this pt. */ | ||
235 | if (dependency) { | ||
236 | #if LINUX_VERSION_CODE < KERNEL_VERSION(3,18,0) | ||
237 | if (dependency->status == 0) | ||
238 | #else | ||
239 | if (!atomic_read(&dependency->status)) | ||
240 | #endif | ||
241 | shared->dep = dependency; | ||
242 | else { | ||
243 | shared->dep_timestamp = ktime_get(); | ||
244 | sync_fence_put(dependency); | ||
245 | } | ||
246 | } | ||
247 | |||
248 | nvgpu_spinlock_init(&shared->lock); | 229 | nvgpu_spinlock_init(&shared->lock); |
249 | 230 | ||
250 | nvgpu_semaphore_get(sema); | 231 | nvgpu_semaphore_get(sema); |
@@ -255,8 +236,7 @@ static struct gk20a_sync_pt *gk20a_sync_pt_create_shared( | |||
255 | static struct sync_pt *gk20a_sync_pt_create_inst( | 236 | static struct sync_pt *gk20a_sync_pt_create_inst( |
256 | struct gk20a *g, | 237 | struct gk20a *g, |
257 | struct gk20a_sync_timeline *obj, | 238 | struct gk20a_sync_timeline *obj, |
258 | struct nvgpu_semaphore *sema, | 239 | struct nvgpu_semaphore *sema) |
259 | struct sync_fence *dependency) | ||
260 | { | 240 | { |
261 | struct gk20a_sync_pt_inst *pti; | 241 | struct gk20a_sync_pt_inst *pti; |
262 | 242 | ||
@@ -265,7 +245,7 @@ static struct sync_pt *gk20a_sync_pt_create_inst( | |||
265 | if (!pti) | 245 | if (!pti) |
266 | return NULL; | 246 | return NULL; |
267 | 247 | ||
268 | pti->shared = gk20a_sync_pt_create_shared(g, obj, sema, dependency); | 248 | pti->shared = gk20a_sync_pt_create_shared(g, obj, sema); |
269 | if (!pti->shared) { | 249 | if (!pti->shared) { |
270 | sync_pt_free(&pti->pt); | 250 | sync_pt_free(&pti->pt); |
271 | return NULL; | 251 | return NULL; |
@@ -303,9 +283,6 @@ static int gk20a_sync_pt_has_signaled(struct sync_pt *sync_pt) | |||
303 | { | 283 | { |
304 | struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt); | 284 | struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt); |
305 | struct gk20a_sync_timeline *obj = pt->obj; | 285 | struct gk20a_sync_timeline *obj = pt->obj; |
306 | #if LINUX_VERSION_CODE < KERNEL_VERSION(3,18,0) | ||
307 | struct sync_pt *pos; | ||
308 | #endif | ||
309 | bool signaled = true; | 286 | bool signaled = true; |
310 | 287 | ||
311 | nvgpu_spinlock_acquire(&pt->lock); | 288 | nvgpu_spinlock_acquire(&pt->lock); |
@@ -321,29 +298,6 @@ static int gk20a_sync_pt_has_signaled(struct sync_pt *sync_pt) | |||
321 | obj->min) == 1) | 298 | obj->min) == 1) |
322 | obj->min = pt->thresh; | 299 | obj->min = pt->thresh; |
323 | 300 | ||
324 | /* Release the dependency fence, but get its timestamp | ||
325 | * first.*/ | ||
326 | if (pt->dep) { | ||
327 | s64 ns = 0; | ||
328 | #if LINUX_VERSION_CODE < KERNEL_VERSION(3,18,0) | ||
329 | struct list_head *dep_pts = &pt->dep->pt_list_head; | ||
330 | list_for_each_entry(pos, dep_pts, pt_list) { | ||
331 | ns = max(ns, ktime_to_ns(pos->timestamp)); | ||
332 | } | ||
333 | #else | ||
334 | struct fence *fence; | ||
335 | int i; | ||
336 | |||
337 | for (i = 0; i < pt->dep->num_fences; i++) { | ||
338 | fence = pt->dep->cbs[i].sync_pt; | ||
339 | ns = max(ns, ktime_to_ns(fence->timestamp)); | ||
340 | } | ||
341 | #endif | ||
342 | pt->dep_timestamp = ns_to_ktime(ns); | ||
343 | sync_fence_put(pt->dep); | ||
344 | pt->dep = NULL; | ||
345 | } | ||
346 | |||
347 | /* Release the semaphore to the pool. */ | 301 | /* Release the semaphore to the pool. */ |
348 | nvgpu_semaphore_put(pt->sema); | 302 | nvgpu_semaphore_put(pt->sema); |
349 | pt->sema = NULL; | 303 | pt->sema = NULL; |
@@ -354,18 +308,6 @@ done: | |||
354 | return signaled; | 308 | return signaled; |
355 | } | 309 | } |
356 | 310 | ||
357 | static inline ktime_t gk20a_sync_pt_duration(struct sync_pt *sync_pt) | ||
358 | { | ||
359 | struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt); | ||
360 | if (!gk20a_sync_pt_has_signaled(sync_pt) || !pt->dep_timestamp.tv64) | ||
361 | return ns_to_ktime(0); | ||
362 | #if LINUX_VERSION_CODE < KERNEL_VERSION(3,18,0) | ||
363 | return ktime_sub(sync_pt->timestamp, pt->dep_timestamp); | ||
364 | #else | ||
365 | return ktime_sub(sync_pt->base.timestamp, pt->dep_timestamp); | ||
366 | #endif | ||
367 | } | ||
368 | |||
369 | static int gk20a_sync_pt_compare(struct sync_pt *a, struct sync_pt *b) | 311 | static int gk20a_sync_pt_compare(struct sync_pt *a, struct sync_pt *b) |
370 | { | 312 | { |
371 | bool a_expired; | 313 | bool a_expired; |
@@ -426,39 +368,13 @@ static void gk20a_sync_pt_value_str(struct sync_pt *sync_pt, char *str, | |||
426 | int size) | 368 | int size) |
427 | { | 369 | { |
428 | struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt); | 370 | struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt); |
429 | ktime_t dur = gk20a_sync_pt_duration(sync_pt); | ||
430 | 371 | ||
431 | if (pt->sema) { | 372 | if (pt->sema) { |
432 | gk20a_sync_pt_value_str_for_sema(pt, str, size); | 373 | gk20a_sync_pt_value_str_for_sema(pt, str, size); |
433 | return; | 374 | return; |
434 | } | 375 | } |
435 | 376 | ||
436 | if (pt->dep) { | 377 | snprintf(str, size, "%d", pt->thresh); |
437 | snprintf(str, size, "(dep: [%p] %s) %d", | ||
438 | pt->dep, pt->dep->name, pt->thresh); | ||
439 | } else if (dur.tv64) { | ||
440 | struct timeval tv = ktime_to_timeval(dur); | ||
441 | snprintf(str, size, "(took %ld.%03ld ms) %d", | ||
442 | tv.tv_sec * 1000 + tv.tv_usec / 1000, | ||
443 | tv.tv_usec % 1000, | ||
444 | pt->thresh); | ||
445 | } else { | ||
446 | snprintf(str, size, "%d", pt->thresh); | ||
447 | } | ||
448 | } | ||
449 | |||
450 | static int gk20a_sync_fill_driver_data(struct sync_pt *sync_pt, | ||
451 | void *data, int size) | ||
452 | { | ||
453 | struct gk20a_sync_pt_info info; | ||
454 | |||
455 | if (size < (int)sizeof(info)) | ||
456 | return -ENOMEM; | ||
457 | |||
458 | info.hw_op_ns = ktime_to_ns(gk20a_sync_pt_duration(sync_pt)); | ||
459 | memcpy(data, &info, sizeof(info)); | ||
460 | |||
461 | return sizeof(info); | ||
462 | } | 378 | } |
463 | 379 | ||
464 | static const struct sync_timeline_ops gk20a_sync_timeline_ops = { | 380 | static const struct sync_timeline_ops gk20a_sync_timeline_ops = { |
@@ -467,7 +383,6 @@ static const struct sync_timeline_ops gk20a_sync_timeline_ops = { | |||
467 | .has_signaled = gk20a_sync_pt_has_signaled, | 383 | .has_signaled = gk20a_sync_pt_has_signaled, |
468 | .compare = gk20a_sync_pt_compare, | 384 | .compare = gk20a_sync_pt_compare, |
469 | .free_pt = gk20a_sync_pt_free_inst, | 385 | .free_pt = gk20a_sync_pt_free_inst, |
470 | .fill_driver_data = gk20a_sync_fill_driver_data, | ||
471 | .timeline_value_str = gk20a_sync_timeline_value_str, | 386 | .timeline_value_str = gk20a_sync_timeline_value_str, |
472 | .pt_value_str = gk20a_sync_pt_value_str, | 387 | .pt_value_str = gk20a_sync_pt_value_str, |
473 | }; | 388 | }; |
@@ -515,7 +430,6 @@ struct sync_fence *gk20a_sync_fence_create( | |||
515 | struct gk20a *g, | 430 | struct gk20a *g, |
516 | struct sync_timeline *obj, | 431 | struct sync_timeline *obj, |
517 | struct nvgpu_semaphore *sema, | 432 | struct nvgpu_semaphore *sema, |
518 | struct sync_fence *dependency, | ||
519 | const char *fmt, ...) | 433 | const char *fmt, ...) |
520 | { | 434 | { |
521 | char name[30]; | 435 | char name[30]; |
@@ -524,7 +438,7 @@ struct sync_fence *gk20a_sync_fence_create( | |||
524 | struct sync_fence *fence; | 438 | struct sync_fence *fence; |
525 | struct gk20a_sync_timeline *timeline = to_gk20a_timeline(obj); | 439 | struct gk20a_sync_timeline *timeline = to_gk20a_timeline(obj); |
526 | 440 | ||
527 | pt = gk20a_sync_pt_create_inst(g, timeline, sema, dependency); | 441 | pt = gk20a_sync_pt_create_inst(g, timeline, sema); |
528 | if (pt == NULL) | 442 | if (pt == NULL) |
529 | return NULL; | 443 | return NULL; |
530 | 444 | ||
diff --git a/drivers/gpu/nvgpu/gk20a/sync_gk20a.h b/drivers/gpu/nvgpu/gk20a/sync_gk20a.h index 33112bb3..a422377c 100644 --- a/drivers/gpu/nvgpu/gk20a/sync_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/sync_gk20a.h | |||
@@ -37,7 +37,6 @@ struct sync_fence *gk20a_sync_fence_create( | |||
37 | struct gk20a *g, | 37 | struct gk20a *g, |
38 | struct sync_timeline *, | 38 | struct sync_timeline *, |
39 | struct nvgpu_semaphore *, | 39 | struct nvgpu_semaphore *, |
40 | struct sync_fence *dependency, | ||
41 | const char *fmt, ...); | 40 | const char *fmt, ...); |
42 | struct sync_fence *gk20a_sync_fence_fdget(int fd); | 41 | struct sync_fence *gk20a_sync_fence_fdget(int fd); |
43 | #else | 42 | #else |
diff --git a/include/uapi/linux/nvgpu.h b/include/uapi/linux/nvgpu.h index ca9b49e6..e25667cb 100644 --- a/include/uapi/linux/nvgpu.h +++ b/include/uapi/linux/nvgpu.h | |||
@@ -1354,10 +1354,6 @@ struct nvgpu_alloc_gpfifo_ex_args { | |||
1354 | __u32 reserved[5]; | 1354 | __u32 reserved[5]; |
1355 | }; | 1355 | }; |
1356 | 1356 | ||
1357 | struct gk20a_sync_pt_info { | ||
1358 | __u64 hw_op_ns; | ||
1359 | }; | ||
1360 | |||
1361 | struct nvgpu_fence { | 1357 | struct nvgpu_fence { |
1362 | __u32 id; /* syncpoint id or sync fence fd */ | 1358 | __u32 id; /* syncpoint id or sync fence fd */ |
1363 | __u32 value; /* syncpoint value (discarded when using sync fence) */ | 1359 | __u32 value; /* syncpoint value (discarded when using sync fence) */ |