summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c25
-rw-r--r--drivers/gpu/nvgpu/gk20a/fence_gk20a.c3
-rw-r--r--drivers/gpu/nvgpu/gk20a/fence_gk20a.h1
-rw-r--r--drivers/gpu/nvgpu/gk20a/sync_gk20a.c96
-rw-r--r--drivers/gpu/nvgpu/gk20a/sync_gk20a.h1
-rw-r--r--include/uapi/linux/nvgpu.h4
6 files changed, 12 insertions, 118 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
index fbeb1e4a..6d0f0854 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
@@ -678,7 +678,7 @@ static int gk20a_channel_semaphore_wait_fd(
678 sema->timeline, 678 sema->timeline,
679 fp_sema, 679 fp_sema,
680 &c->semaphore_wq, 680 &c->semaphore_wq,
681 NULL, false, false); 681 false, false);
682 if (err) { 682 if (err) {
683 nvgpu_semaphore_put(fp_sema); 683 nvgpu_semaphore_put(fp_sema);
684 goto clean_up_priv_cmd; 684 goto clean_up_priv_cmd;
@@ -742,7 +742,7 @@ static int gk20a_channel_semaphore_wait_fd(
742 * already signaled 742 * already signaled
743 */ 743 */
744 err = gk20a_fence_from_semaphore(c->g, fence, sema->timeline, w->sema, 744 err = gk20a_fence_from_semaphore(c->g, fence, sema->timeline, w->sema,
745 &c->semaphore_wq, NULL, false, false); 745 &c->semaphore_wq, false, false);
746 if (err) 746 if (err)
747 goto clean_up_sema; 747 goto clean_up_sema;
748 748
@@ -787,7 +787,6 @@ clean_up_sync_fence:
787 787
788static int __gk20a_channel_semaphore_incr( 788static int __gk20a_channel_semaphore_incr(
789 struct gk20a_channel_sync *s, bool wfi_cmd, 789 struct gk20a_channel_sync *s, bool wfi_cmd,
790 struct sync_fence *dependency,
791 struct priv_cmd_entry *incr_cmd, 790 struct priv_cmd_entry *incr_cmd,
792 struct gk20a_fence *fence, 791 struct gk20a_fence *fence,
793 bool need_sync_fence) 792 bool need_sync_fence)
@@ -820,7 +819,7 @@ static int __gk20a_channel_semaphore_incr(
820 err = gk20a_fence_from_semaphore(c->g, fence, 819 err = gk20a_fence_from_semaphore(c->g, fence,
821 sp->timeline, semaphore, 820 sp->timeline, semaphore,
822 &c->semaphore_wq, 821 &c->semaphore_wq,
823 dependency, wfi_cmd, 822 wfi_cmd,
824 need_sync_fence); 823 need_sync_fence);
825 if (err) 824 if (err)
826 goto clean_up_sema; 825 goto clean_up_sema;
@@ -839,7 +838,6 @@ static int gk20a_channel_semaphore_incr_wfi(
839{ 838{
840 return __gk20a_channel_semaphore_incr(s, 839 return __gk20a_channel_semaphore_incr(s,
841 true /* wfi */, 840 true /* wfi */,
842 NULL,
843 entry, fence, true); 841 entry, fence, true);
844} 842}
845 843
@@ -854,7 +852,6 @@ static int gk20a_channel_semaphore_incr(
854 * a fence to user space. */ 852 * a fence to user space. */
855 return __gk20a_channel_semaphore_incr(s, 853 return __gk20a_channel_semaphore_incr(s,
856 false /* no wfi */, 854 false /* no wfi */,
857 NULL,
858 entry, fence, need_sync_fence); 855 entry, fence, need_sync_fence);
859} 856}
860 857
@@ -868,22 +865,12 @@ static int gk20a_channel_semaphore_incr_user(
868 bool register_irq) 865 bool register_irq)
869{ 866{
870#ifdef CONFIG_SYNC 867#ifdef CONFIG_SYNC
871 struct sync_fence *dependency = NULL;
872 int err; 868 int err;
873 869
874 if (wait_fence_fd >= 0) { 870 err = __gk20a_channel_semaphore_incr(s, wfi, entry, fence,
875 dependency = gk20a_sync_fence_fdget(wait_fence_fd); 871 need_sync_fence);
876 if (!dependency) 872 if (err)
877 return -EINVAL;
878 }
879
880 err = __gk20a_channel_semaphore_incr(s, wfi, dependency,
881 entry, fence, need_sync_fence);
882 if (err) {
883 if (dependency)
884 sync_fence_put(dependency);
885 return err; 873 return err;
886 }
887 874
888 return 0; 875 return 0;
889#else 876#else
diff --git a/drivers/gpu/nvgpu/gk20a/fence_gk20a.c b/drivers/gpu/nvgpu/gk20a/fence_gk20a.c
index 63da0959..a5aeae08 100644
--- a/drivers/gpu/nvgpu/gk20a/fence_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fence_gk20a.c
@@ -233,7 +233,6 @@ int gk20a_fence_from_semaphore(
233 struct sync_timeline *timeline, 233 struct sync_timeline *timeline,
234 struct nvgpu_semaphore *semaphore, 234 struct nvgpu_semaphore *semaphore,
235 wait_queue_head_t *semaphore_wq, 235 wait_queue_head_t *semaphore_wq,
236 struct sync_fence *dependency,
237 bool wfi, bool need_sync_fence) 236 bool wfi, bool need_sync_fence)
238{ 237{
239 struct gk20a_fence *f = fence_out; 238 struct gk20a_fence *f = fence_out;
@@ -242,7 +241,7 @@ int gk20a_fence_from_semaphore(
242#ifdef CONFIG_SYNC 241#ifdef CONFIG_SYNC
243 if (need_sync_fence) { 242 if (need_sync_fence) {
244 sync_fence = gk20a_sync_fence_create(g, timeline, semaphore, 243 sync_fence = gk20a_sync_fence_create(g, timeline, semaphore,
245 dependency, "f-gk20a-0x%04x", 244 "f-gk20a-0x%04x",
246 nvgpu_semaphore_gpu_ro_va(semaphore)); 245 nvgpu_semaphore_gpu_ro_va(semaphore));
247 if (!sync_fence) 246 if (!sync_fence)
248 return -1; 247 return -1;
diff --git a/drivers/gpu/nvgpu/gk20a/fence_gk20a.h b/drivers/gpu/nvgpu/gk20a/fence_gk20a.h
index fccf0c27..426556cc 100644
--- a/drivers/gpu/nvgpu/gk20a/fence_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/fence_gk20a.h
@@ -58,7 +58,6 @@ int gk20a_fence_from_semaphore(
58 struct sync_timeline *timeline, 58 struct sync_timeline *timeline,
59 struct nvgpu_semaphore *semaphore, 59 struct nvgpu_semaphore *semaphore,
60 wait_queue_head_t *semaphore_wq, 60 wait_queue_head_t *semaphore_wq,
61 struct sync_fence *dependency,
62 bool wfi, bool need_sync_fence); 61 bool wfi, bool need_sync_fence);
63 62
64int gk20a_fence_from_syncpt( 63int gk20a_fence_from_syncpt(
diff --git a/drivers/gpu/nvgpu/gk20a/sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/sync_gk20a.c
index 5b888299..ecc96a7b 100644
--- a/drivers/gpu/nvgpu/gk20a/sync_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/sync_gk20a.c
@@ -47,8 +47,6 @@ struct gk20a_sync_pt {
47 u32 thresh; 47 u32 thresh;
48 struct nvgpu_semaphore *sema; 48 struct nvgpu_semaphore *sema;
49 struct gk20a_sync_timeline *obj; 49 struct gk20a_sync_timeline *obj;
50 struct sync_fence *dep;
51 ktime_t dep_timestamp;
52 50
53 /* 51 /*
54 * Use a spin lock here since it will have better performance 52 * Use a spin lock here since it will have better performance
@@ -206,8 +204,6 @@ static void gk20a_sync_pt_free_shared(struct kref *ref)
206 container_of(ref, struct gk20a_sync_pt, refcount); 204 container_of(ref, struct gk20a_sync_pt, refcount);
207 struct gk20a *g = pt->g; 205 struct gk20a *g = pt->g;
208 206
209 if (pt->dep)
210 sync_fence_put(pt->dep);
211 if (pt->sema) 207 if (pt->sema)
212 nvgpu_semaphore_put(pt->sema); 208 nvgpu_semaphore_put(pt->sema);
213 nvgpu_kfree(g, pt); 209 nvgpu_kfree(g, pt);
@@ -216,8 +212,7 @@ static void gk20a_sync_pt_free_shared(struct kref *ref)
216static struct gk20a_sync_pt *gk20a_sync_pt_create_shared( 212static struct gk20a_sync_pt *gk20a_sync_pt_create_shared(
217 struct gk20a *g, 213 struct gk20a *g,
218 struct gk20a_sync_timeline *obj, 214 struct gk20a_sync_timeline *obj,
219 struct nvgpu_semaphore *sema, 215 struct nvgpu_semaphore *sema)
220 struct sync_fence *dependency)
221{ 216{
222 struct gk20a_sync_pt *shared; 217 struct gk20a_sync_pt *shared;
223 218
@@ -231,20 +226,6 @@ static struct gk20a_sync_pt *gk20a_sync_pt_create_shared(
231 shared->sema = sema; 226 shared->sema = sema;
232 shared->thresh = ++obj->max; /* sync framework has a lock */ 227 shared->thresh = ++obj->max; /* sync framework has a lock */
233 228
234 /* Store the dependency fence for this pt. */
235 if (dependency) {
236#if LINUX_VERSION_CODE < KERNEL_VERSION(3,18,0)
237 if (dependency->status == 0)
238#else
239 if (!atomic_read(&dependency->status))
240#endif
241 shared->dep = dependency;
242 else {
243 shared->dep_timestamp = ktime_get();
244 sync_fence_put(dependency);
245 }
246 }
247
248 nvgpu_spinlock_init(&shared->lock); 229 nvgpu_spinlock_init(&shared->lock);
249 230
250 nvgpu_semaphore_get(sema); 231 nvgpu_semaphore_get(sema);
@@ -255,8 +236,7 @@ static struct gk20a_sync_pt *gk20a_sync_pt_create_shared(
255static struct sync_pt *gk20a_sync_pt_create_inst( 236static struct sync_pt *gk20a_sync_pt_create_inst(
256 struct gk20a *g, 237 struct gk20a *g,
257 struct gk20a_sync_timeline *obj, 238 struct gk20a_sync_timeline *obj,
258 struct nvgpu_semaphore *sema, 239 struct nvgpu_semaphore *sema)
259 struct sync_fence *dependency)
260{ 240{
261 struct gk20a_sync_pt_inst *pti; 241 struct gk20a_sync_pt_inst *pti;
262 242
@@ -265,7 +245,7 @@ static struct sync_pt *gk20a_sync_pt_create_inst(
265 if (!pti) 245 if (!pti)
266 return NULL; 246 return NULL;
267 247
268 pti->shared = gk20a_sync_pt_create_shared(g, obj, sema, dependency); 248 pti->shared = gk20a_sync_pt_create_shared(g, obj, sema);
269 if (!pti->shared) { 249 if (!pti->shared) {
270 sync_pt_free(&pti->pt); 250 sync_pt_free(&pti->pt);
271 return NULL; 251 return NULL;
@@ -303,9 +283,6 @@ static int gk20a_sync_pt_has_signaled(struct sync_pt *sync_pt)
303{ 283{
304 struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt); 284 struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt);
305 struct gk20a_sync_timeline *obj = pt->obj; 285 struct gk20a_sync_timeline *obj = pt->obj;
306#if LINUX_VERSION_CODE < KERNEL_VERSION(3,18,0)
307 struct sync_pt *pos;
308#endif
309 bool signaled = true; 286 bool signaled = true;
310 287
311 nvgpu_spinlock_acquire(&pt->lock); 288 nvgpu_spinlock_acquire(&pt->lock);
@@ -321,29 +298,6 @@ static int gk20a_sync_pt_has_signaled(struct sync_pt *sync_pt)
321 obj->min) == 1) 298 obj->min) == 1)
322 obj->min = pt->thresh; 299 obj->min = pt->thresh;
323 300
324 /* Release the dependency fence, but get its timestamp
325 * first.*/
326 if (pt->dep) {
327 s64 ns = 0;
328#if LINUX_VERSION_CODE < KERNEL_VERSION(3,18,0)
329 struct list_head *dep_pts = &pt->dep->pt_list_head;
330 list_for_each_entry(pos, dep_pts, pt_list) {
331 ns = max(ns, ktime_to_ns(pos->timestamp));
332 }
333#else
334 struct fence *fence;
335 int i;
336
337 for (i = 0; i < pt->dep->num_fences; i++) {
338 fence = pt->dep->cbs[i].sync_pt;
339 ns = max(ns, ktime_to_ns(fence->timestamp));
340 }
341#endif
342 pt->dep_timestamp = ns_to_ktime(ns);
343 sync_fence_put(pt->dep);
344 pt->dep = NULL;
345 }
346
347 /* Release the semaphore to the pool. */ 301 /* Release the semaphore to the pool. */
348 nvgpu_semaphore_put(pt->sema); 302 nvgpu_semaphore_put(pt->sema);
349 pt->sema = NULL; 303 pt->sema = NULL;
@@ -354,18 +308,6 @@ done:
354 return signaled; 308 return signaled;
355} 309}
356 310
357static inline ktime_t gk20a_sync_pt_duration(struct sync_pt *sync_pt)
358{
359 struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt);
360 if (!gk20a_sync_pt_has_signaled(sync_pt) || !pt->dep_timestamp.tv64)
361 return ns_to_ktime(0);
362#if LINUX_VERSION_CODE < KERNEL_VERSION(3,18,0)
363 return ktime_sub(sync_pt->timestamp, pt->dep_timestamp);
364#else
365 return ktime_sub(sync_pt->base.timestamp, pt->dep_timestamp);
366#endif
367}
368
369static int gk20a_sync_pt_compare(struct sync_pt *a, struct sync_pt *b) 311static int gk20a_sync_pt_compare(struct sync_pt *a, struct sync_pt *b)
370{ 312{
371 bool a_expired; 313 bool a_expired;
@@ -426,39 +368,13 @@ static void gk20a_sync_pt_value_str(struct sync_pt *sync_pt, char *str,
426 int size) 368 int size)
427{ 369{
428 struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt); 370 struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt);
429 ktime_t dur = gk20a_sync_pt_duration(sync_pt);
430 371
431 if (pt->sema) { 372 if (pt->sema) {
432 gk20a_sync_pt_value_str_for_sema(pt, str, size); 373 gk20a_sync_pt_value_str_for_sema(pt, str, size);
433 return; 374 return;
434 } 375 }
435 376
436 if (pt->dep) { 377 snprintf(str, size, "%d", pt->thresh);
437 snprintf(str, size, "(dep: [%p] %s) %d",
438 pt->dep, pt->dep->name, pt->thresh);
439 } else if (dur.tv64) {
440 struct timeval tv = ktime_to_timeval(dur);
441 snprintf(str, size, "(took %ld.%03ld ms) %d",
442 tv.tv_sec * 1000 + tv.tv_usec / 1000,
443 tv.tv_usec % 1000,
444 pt->thresh);
445 } else {
446 snprintf(str, size, "%d", pt->thresh);
447 }
448}
449
450static int gk20a_sync_fill_driver_data(struct sync_pt *sync_pt,
451 void *data, int size)
452{
453 struct gk20a_sync_pt_info info;
454
455 if (size < (int)sizeof(info))
456 return -ENOMEM;
457
458 info.hw_op_ns = ktime_to_ns(gk20a_sync_pt_duration(sync_pt));
459 memcpy(data, &info, sizeof(info));
460
461 return sizeof(info);
462} 378}
463 379
464static const struct sync_timeline_ops gk20a_sync_timeline_ops = { 380static const struct sync_timeline_ops gk20a_sync_timeline_ops = {
@@ -467,7 +383,6 @@ static const struct sync_timeline_ops gk20a_sync_timeline_ops = {
467 .has_signaled = gk20a_sync_pt_has_signaled, 383 .has_signaled = gk20a_sync_pt_has_signaled,
468 .compare = gk20a_sync_pt_compare, 384 .compare = gk20a_sync_pt_compare,
469 .free_pt = gk20a_sync_pt_free_inst, 385 .free_pt = gk20a_sync_pt_free_inst,
470 .fill_driver_data = gk20a_sync_fill_driver_data,
471 .timeline_value_str = gk20a_sync_timeline_value_str, 386 .timeline_value_str = gk20a_sync_timeline_value_str,
472 .pt_value_str = gk20a_sync_pt_value_str, 387 .pt_value_str = gk20a_sync_pt_value_str,
473}; 388};
@@ -515,7 +430,6 @@ struct sync_fence *gk20a_sync_fence_create(
515 struct gk20a *g, 430 struct gk20a *g,
516 struct sync_timeline *obj, 431 struct sync_timeline *obj,
517 struct nvgpu_semaphore *sema, 432 struct nvgpu_semaphore *sema,
518 struct sync_fence *dependency,
519 const char *fmt, ...) 433 const char *fmt, ...)
520{ 434{
521 char name[30]; 435 char name[30];
@@ -524,7 +438,7 @@ struct sync_fence *gk20a_sync_fence_create(
524 struct sync_fence *fence; 438 struct sync_fence *fence;
525 struct gk20a_sync_timeline *timeline = to_gk20a_timeline(obj); 439 struct gk20a_sync_timeline *timeline = to_gk20a_timeline(obj);
526 440
527 pt = gk20a_sync_pt_create_inst(g, timeline, sema, dependency); 441 pt = gk20a_sync_pt_create_inst(g, timeline, sema);
528 if (pt == NULL) 442 if (pt == NULL)
529 return NULL; 443 return NULL;
530 444
diff --git a/drivers/gpu/nvgpu/gk20a/sync_gk20a.h b/drivers/gpu/nvgpu/gk20a/sync_gk20a.h
index 33112bb3..a422377c 100644
--- a/drivers/gpu/nvgpu/gk20a/sync_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/sync_gk20a.h
@@ -37,7 +37,6 @@ struct sync_fence *gk20a_sync_fence_create(
37 struct gk20a *g, 37 struct gk20a *g,
38 struct sync_timeline *, 38 struct sync_timeline *,
39 struct nvgpu_semaphore *, 39 struct nvgpu_semaphore *,
40 struct sync_fence *dependency,
41 const char *fmt, ...); 40 const char *fmt, ...);
42struct sync_fence *gk20a_sync_fence_fdget(int fd); 41struct sync_fence *gk20a_sync_fence_fdget(int fd);
43#else 42#else
diff --git a/include/uapi/linux/nvgpu.h b/include/uapi/linux/nvgpu.h
index ca9b49e6..e25667cb 100644
--- a/include/uapi/linux/nvgpu.h
+++ b/include/uapi/linux/nvgpu.h
@@ -1354,10 +1354,6 @@ struct nvgpu_alloc_gpfifo_ex_args {
1354 __u32 reserved[5]; 1354 __u32 reserved[5];
1355}; 1355};
1356 1356
1357struct gk20a_sync_pt_info {
1358 __u64 hw_op_ns;
1359};
1360
1361struct nvgpu_fence { 1357struct nvgpu_fence {
1362 __u32 id; /* syncpoint id or sync fence fd */ 1358 __u32 id; /* syncpoint id or sync fence fd */
1363 __u32 value; /* syncpoint value (discarded when using sync fence) */ 1359 __u32 value; /* syncpoint value (discarded when using sync fence) */