diff options
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/sync_gk20a.c')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/sync_gk20a.c | 96 |
1 files changed, 5 insertions, 91 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/sync_gk20a.c index 5b888299..ecc96a7b 100644 --- a/drivers/gpu/nvgpu/gk20a/sync_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/sync_gk20a.c | |||
@@ -47,8 +47,6 @@ struct gk20a_sync_pt { | |||
47 | u32 thresh; | 47 | u32 thresh; |
48 | struct nvgpu_semaphore *sema; | 48 | struct nvgpu_semaphore *sema; |
49 | struct gk20a_sync_timeline *obj; | 49 | struct gk20a_sync_timeline *obj; |
50 | struct sync_fence *dep; | ||
51 | ktime_t dep_timestamp; | ||
52 | 50 | ||
53 | /* | 51 | /* |
54 | * Use a spin lock here since it will have better performance | 52 | * Use a spin lock here since it will have better performance |
@@ -206,8 +204,6 @@ static void gk20a_sync_pt_free_shared(struct kref *ref) | |||
206 | container_of(ref, struct gk20a_sync_pt, refcount); | 204 | container_of(ref, struct gk20a_sync_pt, refcount); |
207 | struct gk20a *g = pt->g; | 205 | struct gk20a *g = pt->g; |
208 | 206 | ||
209 | if (pt->dep) | ||
210 | sync_fence_put(pt->dep); | ||
211 | if (pt->sema) | 207 | if (pt->sema) |
212 | nvgpu_semaphore_put(pt->sema); | 208 | nvgpu_semaphore_put(pt->sema); |
213 | nvgpu_kfree(g, pt); | 209 | nvgpu_kfree(g, pt); |
@@ -216,8 +212,7 @@ static void gk20a_sync_pt_free_shared(struct kref *ref) | |||
216 | static struct gk20a_sync_pt *gk20a_sync_pt_create_shared( | 212 | static struct gk20a_sync_pt *gk20a_sync_pt_create_shared( |
217 | struct gk20a *g, | 213 | struct gk20a *g, |
218 | struct gk20a_sync_timeline *obj, | 214 | struct gk20a_sync_timeline *obj, |
219 | struct nvgpu_semaphore *sema, | 215 | struct nvgpu_semaphore *sema) |
220 | struct sync_fence *dependency) | ||
221 | { | 216 | { |
222 | struct gk20a_sync_pt *shared; | 217 | struct gk20a_sync_pt *shared; |
223 | 218 | ||
@@ -231,20 +226,6 @@ static struct gk20a_sync_pt *gk20a_sync_pt_create_shared( | |||
231 | shared->sema = sema; | 226 | shared->sema = sema; |
232 | shared->thresh = ++obj->max; /* sync framework has a lock */ | 227 | shared->thresh = ++obj->max; /* sync framework has a lock */ |
233 | 228 | ||
234 | /* Store the dependency fence for this pt. */ | ||
235 | if (dependency) { | ||
236 | #if LINUX_VERSION_CODE < KERNEL_VERSION(3,18,0) | ||
237 | if (dependency->status == 0) | ||
238 | #else | ||
239 | if (!atomic_read(&dependency->status)) | ||
240 | #endif | ||
241 | shared->dep = dependency; | ||
242 | else { | ||
243 | shared->dep_timestamp = ktime_get(); | ||
244 | sync_fence_put(dependency); | ||
245 | } | ||
246 | } | ||
247 | |||
248 | nvgpu_spinlock_init(&shared->lock); | 229 | nvgpu_spinlock_init(&shared->lock); |
249 | 230 | ||
250 | nvgpu_semaphore_get(sema); | 231 | nvgpu_semaphore_get(sema); |
@@ -255,8 +236,7 @@ static struct gk20a_sync_pt *gk20a_sync_pt_create_shared( | |||
255 | static struct sync_pt *gk20a_sync_pt_create_inst( | 236 | static struct sync_pt *gk20a_sync_pt_create_inst( |
256 | struct gk20a *g, | 237 | struct gk20a *g, |
257 | struct gk20a_sync_timeline *obj, | 238 | struct gk20a_sync_timeline *obj, |
258 | struct nvgpu_semaphore *sema, | 239 | struct nvgpu_semaphore *sema) |
259 | struct sync_fence *dependency) | ||
260 | { | 240 | { |
261 | struct gk20a_sync_pt_inst *pti; | 241 | struct gk20a_sync_pt_inst *pti; |
262 | 242 | ||
@@ -265,7 +245,7 @@ static struct sync_pt *gk20a_sync_pt_create_inst( | |||
265 | if (!pti) | 245 | if (!pti) |
266 | return NULL; | 246 | return NULL; |
267 | 247 | ||
268 | pti->shared = gk20a_sync_pt_create_shared(g, obj, sema, dependency); | 248 | pti->shared = gk20a_sync_pt_create_shared(g, obj, sema); |
269 | if (!pti->shared) { | 249 | if (!pti->shared) { |
270 | sync_pt_free(&pti->pt); | 250 | sync_pt_free(&pti->pt); |
271 | return NULL; | 251 | return NULL; |
@@ -303,9 +283,6 @@ static int gk20a_sync_pt_has_signaled(struct sync_pt *sync_pt) | |||
303 | { | 283 | { |
304 | struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt); | 284 | struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt); |
305 | struct gk20a_sync_timeline *obj = pt->obj; | 285 | struct gk20a_sync_timeline *obj = pt->obj; |
306 | #if LINUX_VERSION_CODE < KERNEL_VERSION(3,18,0) | ||
307 | struct sync_pt *pos; | ||
308 | #endif | ||
309 | bool signaled = true; | 286 | bool signaled = true; |
310 | 287 | ||
311 | nvgpu_spinlock_acquire(&pt->lock); | 288 | nvgpu_spinlock_acquire(&pt->lock); |
@@ -321,29 +298,6 @@ static int gk20a_sync_pt_has_signaled(struct sync_pt *sync_pt) | |||
321 | obj->min) == 1) | 298 | obj->min) == 1) |
322 | obj->min = pt->thresh; | 299 | obj->min = pt->thresh; |
323 | 300 | ||
324 | /* Release the dependency fence, but get its timestamp | ||
325 | * first.*/ | ||
326 | if (pt->dep) { | ||
327 | s64 ns = 0; | ||
328 | #if LINUX_VERSION_CODE < KERNEL_VERSION(3,18,0) | ||
329 | struct list_head *dep_pts = &pt->dep->pt_list_head; | ||
330 | list_for_each_entry(pos, dep_pts, pt_list) { | ||
331 | ns = max(ns, ktime_to_ns(pos->timestamp)); | ||
332 | } | ||
333 | #else | ||
334 | struct fence *fence; | ||
335 | int i; | ||
336 | |||
337 | for (i = 0; i < pt->dep->num_fences; i++) { | ||
338 | fence = pt->dep->cbs[i].sync_pt; | ||
339 | ns = max(ns, ktime_to_ns(fence->timestamp)); | ||
340 | } | ||
341 | #endif | ||
342 | pt->dep_timestamp = ns_to_ktime(ns); | ||
343 | sync_fence_put(pt->dep); | ||
344 | pt->dep = NULL; | ||
345 | } | ||
346 | |||
347 | /* Release the semaphore to the pool. */ | 301 | /* Release the semaphore to the pool. */ |
348 | nvgpu_semaphore_put(pt->sema); | 302 | nvgpu_semaphore_put(pt->sema); |
349 | pt->sema = NULL; | 303 | pt->sema = NULL; |
@@ -354,18 +308,6 @@ done: | |||
354 | return signaled; | 308 | return signaled; |
355 | } | 309 | } |
356 | 310 | ||
357 | static inline ktime_t gk20a_sync_pt_duration(struct sync_pt *sync_pt) | ||
358 | { | ||
359 | struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt); | ||
360 | if (!gk20a_sync_pt_has_signaled(sync_pt) || !pt->dep_timestamp.tv64) | ||
361 | return ns_to_ktime(0); | ||
362 | #if LINUX_VERSION_CODE < KERNEL_VERSION(3,18,0) | ||
363 | return ktime_sub(sync_pt->timestamp, pt->dep_timestamp); | ||
364 | #else | ||
365 | return ktime_sub(sync_pt->base.timestamp, pt->dep_timestamp); | ||
366 | #endif | ||
367 | } | ||
368 | |||
369 | static int gk20a_sync_pt_compare(struct sync_pt *a, struct sync_pt *b) | 311 | static int gk20a_sync_pt_compare(struct sync_pt *a, struct sync_pt *b) |
370 | { | 312 | { |
371 | bool a_expired; | 313 | bool a_expired; |
@@ -426,39 +368,13 @@ static void gk20a_sync_pt_value_str(struct sync_pt *sync_pt, char *str, | |||
426 | int size) | 368 | int size) |
427 | { | 369 | { |
428 | struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt); | 370 | struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt); |
429 | ktime_t dur = gk20a_sync_pt_duration(sync_pt); | ||
430 | 371 | ||
431 | if (pt->sema) { | 372 | if (pt->sema) { |
432 | gk20a_sync_pt_value_str_for_sema(pt, str, size); | 373 | gk20a_sync_pt_value_str_for_sema(pt, str, size); |
433 | return; | 374 | return; |
434 | } | 375 | } |
435 | 376 | ||
436 | if (pt->dep) { | 377 | snprintf(str, size, "%d", pt->thresh); |
437 | snprintf(str, size, "(dep: [%p] %s) %d", | ||
438 | pt->dep, pt->dep->name, pt->thresh); | ||
439 | } else if (dur.tv64) { | ||
440 | struct timeval tv = ktime_to_timeval(dur); | ||
441 | snprintf(str, size, "(took %ld.%03ld ms) %d", | ||
442 | tv.tv_sec * 1000 + tv.tv_usec / 1000, | ||
443 | tv.tv_usec % 1000, | ||
444 | pt->thresh); | ||
445 | } else { | ||
446 | snprintf(str, size, "%d", pt->thresh); | ||
447 | } | ||
448 | } | ||
449 | |||
450 | static int gk20a_sync_fill_driver_data(struct sync_pt *sync_pt, | ||
451 | void *data, int size) | ||
452 | { | ||
453 | struct gk20a_sync_pt_info info; | ||
454 | |||
455 | if (size < (int)sizeof(info)) | ||
456 | return -ENOMEM; | ||
457 | |||
458 | info.hw_op_ns = ktime_to_ns(gk20a_sync_pt_duration(sync_pt)); | ||
459 | memcpy(data, &info, sizeof(info)); | ||
460 | |||
461 | return sizeof(info); | ||
462 | } | 378 | } |
463 | 379 | ||
464 | static const struct sync_timeline_ops gk20a_sync_timeline_ops = { | 380 | static const struct sync_timeline_ops gk20a_sync_timeline_ops = { |
@@ -467,7 +383,6 @@ static const struct sync_timeline_ops gk20a_sync_timeline_ops = { | |||
467 | .has_signaled = gk20a_sync_pt_has_signaled, | 383 | .has_signaled = gk20a_sync_pt_has_signaled, |
468 | .compare = gk20a_sync_pt_compare, | 384 | .compare = gk20a_sync_pt_compare, |
469 | .free_pt = gk20a_sync_pt_free_inst, | 385 | .free_pt = gk20a_sync_pt_free_inst, |
470 | .fill_driver_data = gk20a_sync_fill_driver_data, | ||
471 | .timeline_value_str = gk20a_sync_timeline_value_str, | 386 | .timeline_value_str = gk20a_sync_timeline_value_str, |
472 | .pt_value_str = gk20a_sync_pt_value_str, | 387 | .pt_value_str = gk20a_sync_pt_value_str, |
473 | }; | 388 | }; |
@@ -515,7 +430,6 @@ struct sync_fence *gk20a_sync_fence_create( | |||
515 | struct gk20a *g, | 430 | struct gk20a *g, |
516 | struct sync_timeline *obj, | 431 | struct sync_timeline *obj, |
517 | struct nvgpu_semaphore *sema, | 432 | struct nvgpu_semaphore *sema, |
518 | struct sync_fence *dependency, | ||
519 | const char *fmt, ...) | 433 | const char *fmt, ...) |
520 | { | 434 | { |
521 | char name[30]; | 435 | char name[30]; |
@@ -524,7 +438,7 @@ struct sync_fence *gk20a_sync_fence_create( | |||
524 | struct sync_fence *fence; | 438 | struct sync_fence *fence; |
525 | struct gk20a_sync_timeline *timeline = to_gk20a_timeline(obj); | 439 | struct gk20a_sync_timeline *timeline = to_gk20a_timeline(obj); |
526 | 440 | ||
527 | pt = gk20a_sync_pt_create_inst(g, timeline, sema, dependency); | 441 | pt = gk20a_sync_pt_create_inst(g, timeline, sema); |
528 | if (pt == NULL) | 442 | if (pt == NULL) |
529 | return NULL; | 443 | return NULL; |
530 | 444 | ||