summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/gk20a/sync_gk20a.c
diff options
context:
space:
mode:
authorKonsta Holtta <kholtta@nvidia.com>2017-04-06 03:48:35 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2017-04-11 12:57:21 -0400
commit1a4647272f4fe50137c79583b698c1ef6f5def12 (patch)
tree0eee9698405e4467a7644c74c3d41d4bca933a8f /drivers/gpu/nvgpu/gk20a/sync_gk20a.c
parent6de456f8407ba7230410175f1aff90541137d19b (diff)
gpu: nvgpu: remove fence dependency tracking
In preparation for better abstraction in job synchronization, drop support for the dependency fences tracked via submit pre-fences in semaphore-based syncs. This has only worked for semaphores, not nvhost syncpoints, and hasn't really been used. The dependency was printed in the sync framework's sync pt value string. Remove also the userspace-visible gk20a_sync_pt_info which is not used and depends on this feature (providing a duration since the dependency fence's timestamp). Jira NVGPU-43 Change-Id: Ia2b26502a9dc8f5bef5470f94b1475001f621da1 Signed-off-by: Konsta Holtta <kholtta@nvidia.com> Reviewed-on: http://git-master/r/1456880 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/sync_gk20a.c')
-rw-r--r--drivers/gpu/nvgpu/gk20a/sync_gk20a.c96
1 files changed, 5 insertions, 91 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/sync_gk20a.c
index 5b888299..ecc96a7b 100644
--- a/drivers/gpu/nvgpu/gk20a/sync_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/sync_gk20a.c
@@ -47,8 +47,6 @@ struct gk20a_sync_pt {
47 u32 thresh; 47 u32 thresh;
48 struct nvgpu_semaphore *sema; 48 struct nvgpu_semaphore *sema;
49 struct gk20a_sync_timeline *obj; 49 struct gk20a_sync_timeline *obj;
50 struct sync_fence *dep;
51 ktime_t dep_timestamp;
52 50
53 /* 51 /*
54 * Use a spin lock here since it will have better performance 52 * Use a spin lock here since it will have better performance
@@ -206,8 +204,6 @@ static void gk20a_sync_pt_free_shared(struct kref *ref)
206 container_of(ref, struct gk20a_sync_pt, refcount); 204 container_of(ref, struct gk20a_sync_pt, refcount);
207 struct gk20a *g = pt->g; 205 struct gk20a *g = pt->g;
208 206
209 if (pt->dep)
210 sync_fence_put(pt->dep);
211 if (pt->sema) 207 if (pt->sema)
212 nvgpu_semaphore_put(pt->sema); 208 nvgpu_semaphore_put(pt->sema);
213 nvgpu_kfree(g, pt); 209 nvgpu_kfree(g, pt);
@@ -216,8 +212,7 @@ static void gk20a_sync_pt_free_shared(struct kref *ref)
216static struct gk20a_sync_pt *gk20a_sync_pt_create_shared( 212static struct gk20a_sync_pt *gk20a_sync_pt_create_shared(
217 struct gk20a *g, 213 struct gk20a *g,
218 struct gk20a_sync_timeline *obj, 214 struct gk20a_sync_timeline *obj,
219 struct nvgpu_semaphore *sema, 215 struct nvgpu_semaphore *sema)
220 struct sync_fence *dependency)
221{ 216{
222 struct gk20a_sync_pt *shared; 217 struct gk20a_sync_pt *shared;
223 218
@@ -231,20 +226,6 @@ static struct gk20a_sync_pt *gk20a_sync_pt_create_shared(
231 shared->sema = sema; 226 shared->sema = sema;
232 shared->thresh = ++obj->max; /* sync framework has a lock */ 227 shared->thresh = ++obj->max; /* sync framework has a lock */
233 228
234 /* Store the dependency fence for this pt. */
235 if (dependency) {
236#if LINUX_VERSION_CODE < KERNEL_VERSION(3,18,0)
237 if (dependency->status == 0)
238#else
239 if (!atomic_read(&dependency->status))
240#endif
241 shared->dep = dependency;
242 else {
243 shared->dep_timestamp = ktime_get();
244 sync_fence_put(dependency);
245 }
246 }
247
248 nvgpu_spinlock_init(&shared->lock); 229 nvgpu_spinlock_init(&shared->lock);
249 230
250 nvgpu_semaphore_get(sema); 231 nvgpu_semaphore_get(sema);
@@ -255,8 +236,7 @@ static struct gk20a_sync_pt *gk20a_sync_pt_create_shared(
255static struct sync_pt *gk20a_sync_pt_create_inst( 236static struct sync_pt *gk20a_sync_pt_create_inst(
256 struct gk20a *g, 237 struct gk20a *g,
257 struct gk20a_sync_timeline *obj, 238 struct gk20a_sync_timeline *obj,
258 struct nvgpu_semaphore *sema, 239 struct nvgpu_semaphore *sema)
259 struct sync_fence *dependency)
260{ 240{
261 struct gk20a_sync_pt_inst *pti; 241 struct gk20a_sync_pt_inst *pti;
262 242
@@ -265,7 +245,7 @@ static struct sync_pt *gk20a_sync_pt_create_inst(
265 if (!pti) 245 if (!pti)
266 return NULL; 246 return NULL;
267 247
268 pti->shared = gk20a_sync_pt_create_shared(g, obj, sema, dependency); 248 pti->shared = gk20a_sync_pt_create_shared(g, obj, sema);
269 if (!pti->shared) { 249 if (!pti->shared) {
270 sync_pt_free(&pti->pt); 250 sync_pt_free(&pti->pt);
271 return NULL; 251 return NULL;
@@ -303,9 +283,6 @@ static int gk20a_sync_pt_has_signaled(struct sync_pt *sync_pt)
303{ 283{
304 struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt); 284 struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt);
305 struct gk20a_sync_timeline *obj = pt->obj; 285 struct gk20a_sync_timeline *obj = pt->obj;
306#if LINUX_VERSION_CODE < KERNEL_VERSION(3,18,0)
307 struct sync_pt *pos;
308#endif
309 bool signaled = true; 286 bool signaled = true;
310 287
311 nvgpu_spinlock_acquire(&pt->lock); 288 nvgpu_spinlock_acquire(&pt->lock);
@@ -321,29 +298,6 @@ static int gk20a_sync_pt_has_signaled(struct sync_pt *sync_pt)
321 obj->min) == 1) 298 obj->min) == 1)
322 obj->min = pt->thresh; 299 obj->min = pt->thresh;
323 300
324 /* Release the dependency fence, but get its timestamp
325 * first.*/
326 if (pt->dep) {
327 s64 ns = 0;
328#if LINUX_VERSION_CODE < KERNEL_VERSION(3,18,0)
329 struct list_head *dep_pts = &pt->dep->pt_list_head;
330 list_for_each_entry(pos, dep_pts, pt_list) {
331 ns = max(ns, ktime_to_ns(pos->timestamp));
332 }
333#else
334 struct fence *fence;
335 int i;
336
337 for (i = 0; i < pt->dep->num_fences; i++) {
338 fence = pt->dep->cbs[i].sync_pt;
339 ns = max(ns, ktime_to_ns(fence->timestamp));
340 }
341#endif
342 pt->dep_timestamp = ns_to_ktime(ns);
343 sync_fence_put(pt->dep);
344 pt->dep = NULL;
345 }
346
347 /* Release the semaphore to the pool. */ 301 /* Release the semaphore to the pool. */
348 nvgpu_semaphore_put(pt->sema); 302 nvgpu_semaphore_put(pt->sema);
349 pt->sema = NULL; 303 pt->sema = NULL;
@@ -354,18 +308,6 @@ done:
354 return signaled; 308 return signaled;
355} 309}
356 310
357static inline ktime_t gk20a_sync_pt_duration(struct sync_pt *sync_pt)
358{
359 struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt);
360 if (!gk20a_sync_pt_has_signaled(sync_pt) || !pt->dep_timestamp.tv64)
361 return ns_to_ktime(0);
362#if LINUX_VERSION_CODE < KERNEL_VERSION(3,18,0)
363 return ktime_sub(sync_pt->timestamp, pt->dep_timestamp);
364#else
365 return ktime_sub(sync_pt->base.timestamp, pt->dep_timestamp);
366#endif
367}
368
369static int gk20a_sync_pt_compare(struct sync_pt *a, struct sync_pt *b) 311static int gk20a_sync_pt_compare(struct sync_pt *a, struct sync_pt *b)
370{ 312{
371 bool a_expired; 313 bool a_expired;
@@ -426,39 +368,13 @@ static void gk20a_sync_pt_value_str(struct sync_pt *sync_pt, char *str,
426 int size) 368 int size)
427{ 369{
428 struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt); 370 struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt);
429 ktime_t dur = gk20a_sync_pt_duration(sync_pt);
430 371
431 if (pt->sema) { 372 if (pt->sema) {
432 gk20a_sync_pt_value_str_for_sema(pt, str, size); 373 gk20a_sync_pt_value_str_for_sema(pt, str, size);
433 return; 374 return;
434 } 375 }
435 376
436 if (pt->dep) { 377 snprintf(str, size, "%d", pt->thresh);
437 snprintf(str, size, "(dep: [%p] %s) %d",
438 pt->dep, pt->dep->name, pt->thresh);
439 } else if (dur.tv64) {
440 struct timeval tv = ktime_to_timeval(dur);
441 snprintf(str, size, "(took %ld.%03ld ms) %d",
442 tv.tv_sec * 1000 + tv.tv_usec / 1000,
443 tv.tv_usec % 1000,
444 pt->thresh);
445 } else {
446 snprintf(str, size, "%d", pt->thresh);
447 }
448}
449
450static int gk20a_sync_fill_driver_data(struct sync_pt *sync_pt,
451 void *data, int size)
452{
453 struct gk20a_sync_pt_info info;
454
455 if (size < (int)sizeof(info))
456 return -ENOMEM;
457
458 info.hw_op_ns = ktime_to_ns(gk20a_sync_pt_duration(sync_pt));
459 memcpy(data, &info, sizeof(info));
460
461 return sizeof(info);
462} 378}
463 379
464static const struct sync_timeline_ops gk20a_sync_timeline_ops = { 380static const struct sync_timeline_ops gk20a_sync_timeline_ops = {
@@ -467,7 +383,6 @@ static const struct sync_timeline_ops gk20a_sync_timeline_ops = {
467 .has_signaled = gk20a_sync_pt_has_signaled, 383 .has_signaled = gk20a_sync_pt_has_signaled,
468 .compare = gk20a_sync_pt_compare, 384 .compare = gk20a_sync_pt_compare,
469 .free_pt = gk20a_sync_pt_free_inst, 385 .free_pt = gk20a_sync_pt_free_inst,
470 .fill_driver_data = gk20a_sync_fill_driver_data,
471 .timeline_value_str = gk20a_sync_timeline_value_str, 386 .timeline_value_str = gk20a_sync_timeline_value_str,
472 .pt_value_str = gk20a_sync_pt_value_str, 387 .pt_value_str = gk20a_sync_pt_value_str,
473}; 388};
@@ -515,7 +430,6 @@ struct sync_fence *gk20a_sync_fence_create(
515 struct gk20a *g, 430 struct gk20a *g,
516 struct sync_timeline *obj, 431 struct sync_timeline *obj,
517 struct nvgpu_semaphore *sema, 432 struct nvgpu_semaphore *sema,
518 struct sync_fence *dependency,
519 const char *fmt, ...) 433 const char *fmt, ...)
520{ 434{
521 char name[30]; 435 char name[30];
@@ -524,7 +438,7 @@ struct sync_fence *gk20a_sync_fence_create(
524 struct sync_fence *fence; 438 struct sync_fence *fence;
525 struct gk20a_sync_timeline *timeline = to_gk20a_timeline(obj); 439 struct gk20a_sync_timeline *timeline = to_gk20a_timeline(obj);
526 440
527 pt = gk20a_sync_pt_create_inst(g, timeline, sema, dependency); 441 pt = gk20a_sync_pt_create_inst(g, timeline, sema);
528 if (pt == NULL) 442 if (pt == NULL)
529 return NULL; 443 return NULL;
530 444