summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c19
-rw-r--r--drivers/gpu/nvgpu/gk20a/sync_gk20a.c124
-rw-r--r--drivers/gpu/nvgpu/gk20a/sync_gk20a.h9
3 files changed, 51 insertions, 101 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
index 98363c88..0eff4e2f 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
@@ -510,24 +510,7 @@ static int gk20a_channel_semaphore_wait_syncpt(
510 return -ENODEV; 510 return -ENODEV;
511} 511}
512 512
513/*
514 * UGHHH - the sync_fence underlying implementation changes from 3.10 to 3.18.
515 * But since there's no API for getting the underlying sync_pts we have to do
516 * some conditional compilation.
517 */
518#ifdef CONFIG_SYNC 513#ifdef CONFIG_SYNC
519static struct gk20a_semaphore *sema_from_sync_fence(struct sync_fence *f)
520{
521#if LINUX_VERSION_CODE < KERNEL_VERSION(3,18,0)
522 struct sync_pt *pt;
523
524 pt = list_first_entry(&f->pt_list_head, struct sync_pt, pt_list);
525 return gk20a_sync_pt_inst_get_sema(pt);
526#else
527 return gk20a_sync_pt_inst_get_sema(f->cbs[0].sync_pt);
528#endif
529}
530
531/* 514/*
532 * Attempt a fast path for waiting on a sync_fence. Basically if the passed 515 * Attempt a fast path for waiting on a sync_fence. Basically if the passed
533 * sync_fence is backed by a gk20a_semaphore then there's no reason to go 516 * sync_fence is backed by a gk20a_semaphore then there's no reason to go
@@ -551,7 +534,7 @@ static int __semaphore_wait_fd_fast_path(struct channel_gk20a *c,
551 if (!gk20a_is_sema_backed_sync_fence(fence)) 534 if (!gk20a_is_sema_backed_sync_fence(fence))
552 return -ENODEV; 535 return -ENODEV;
553 536
554 sema = sema_from_sync_fence(fence); 537 sema = gk20a_sync_fence_get_sema(fence);
555 538
556 /* 539 /*
557 * If there's no underlying sema then that means the underlying sema has 540 * If there's no underlying sema then that means the underlying sema has
diff --git a/drivers/gpu/nvgpu/gk20a/sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/sync_gk20a.c
index de30eed4..af6af70e 100644
--- a/drivers/gpu/nvgpu/gk20a/sync_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/sync_gk20a.c
@@ -57,110 +57,85 @@ struct gk20a_sync_pt {
57 57
58struct gk20a_sync_pt_inst { 58struct gk20a_sync_pt_inst {
59 struct sync_pt pt; 59 struct sync_pt pt;
60
61 /*
62 * Magic number to identify a gk20a_sync_pt_inst from either a struct
63 * fence or a struct sync_pt.
64 */
65#define GK20A_SYNC_PT_INST_MAGIC 0xb333eeef;
66 u32 magic;
67
68 struct gk20a_sync_pt *shared; 60 struct gk20a_sync_pt *shared;
69}; 61};
70 62
71/** 63/**
72 * Check if a sync_pt is a gk20a_sync_pt_inst.
73 */
74int __gk20a_is_gk20a_sync_pt_inst(struct sync_pt *pt)
75{
76 struct gk20a_sync_pt_inst *pti =
77 container_of(pt, struct gk20a_sync_pt_inst, pt);
78
79 return pti->magic == GK20A_SYNC_PT_INST_MAGIC;
80}
81
82#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,18,0)
83/**
84 * Check if a fence is a gk20a_sync_pt_inst.
85 */
86int gk20a_is_gk20a_sync_pt_inst(struct fence *f)
87{
88 struct sync_pt *pt = container_of(f, struct sync_pt, base);
89
90 return __gk20a_is_gk20a_sync_pt_inst(pt);
91}
92
93/**
94 * Get the underlying semaphore from a gk20a_sync_pt_inst. This assumes the
95 * passed fence is in fact a gk20a_sync_pt_inst - use
96 * gk20a_is_gk20a_sync_pt_inst() to verify this before using this function.
97 */
98struct gk20a_semaphore *gk20a_sync_pt_inst_get_sema(struct fence *f)
99{
100 struct sync_pt *pt = container_of(f, struct sync_pt, base);
101 struct gk20a_sync_pt_inst *pti =
102 container_of(pt, struct gk20a_sync_pt_inst, pt);
103
104 BUG_ON(!gk20a_is_gk20a_sync_pt_inst(f));
105
106 return pti->shared->sema;
107}
108#else
109/**
110 * Get the underlying semaphore from a gk20a_sync_pt_inst. This assumes the
111 * passed sync_pt is in fact a gk20a_sync_pt_inst - use
112 * gk20a_is_gk20a_sync_pt_inst() to verify this before using this function.
113 */
114struct gk20a_semaphore *gk20a_sync_pt_inst_get_sema(struct sync_pt *pt)
115{
116 struct gk20a_sync_pt_inst *pti;
117
118 BUG_ON(!__gk20a_is_gk20a_sync_pt_inst(pt));
119 pti = container_of(pt, struct gk20a_sync_pt_inst, pt);
120
121 return pti->shared->sema;
122}
123#endif
124
125/**
126 * Check if the passed sync_fence is backed by a single GPU semaphore. In such 64 * Check if the passed sync_fence is backed by a single GPU semaphore. In such
127 * cases we can short circuit a lot of SW involved in signaling pre-fences and 65 * cases we can short circuit a lot of SW involved in signaling pre-fences and
128 * post fences. 66 * post fences.
67 *
68 * For now reject multi-sync_pt fences. This could be changed in future. It
69 * would require that the sema fast path push a sema acquire for each semaphore
70 * in the fence.
129 */ 71 */
130int gk20a_is_sema_backed_sync_fence(struct sync_fence *fence) 72int gk20a_is_sema_backed_sync_fence(struct sync_fence *fence)
131{ 73{
74 struct sync_timeline *t;
75
132#if LINUX_VERSION_CODE < KERNEL_VERSION(3,18,0) 76#if LINUX_VERSION_CODE < KERNEL_VERSION(3,18,0)
133 struct sync_pt *pt; 77 struct sync_pt *spt;
134 int i = 0; 78 int i = 0;
135 79
136 if (list_empty(&fence->pt_list_head)) 80 if (list_empty(&fence->pt_list_head))
137 return 0; 81 return 0;
138 82
139 /* 83 list_for_each_entry(spt, &fence->pt_list_head, pt_list) {
140 * For now reject multi-sync_pt fences. This could be changed in
141 * future. It would require that the sema fast path push a sema
142 * acquire for each semaphore in the fence.
143 */
144 list_for_each_entry(pt, &fence->pt_list_head, pt_list) {
145 i++; 84 i++;
146 85
147 if (i >= 2) 86 if (i >= 2)
148 return 0; 87 return 0;
149 } 88 }
150 89
151 pt = list_first_entry(&fence->pt_list_head, struct sync_pt, pt_list); 90 spt = list_first_entry(&fence->pt_list_head, struct sync_pt, pt_list);
152 return __gk20a_is_gk20a_sync_pt_inst(pt); 91 t = spt->parent;
153
154#else 92#else
155 struct sync_fence_cb *cb0 = &fence->cbs[0]; 93 struct fence *pt = fence->cbs[0].sync_pt;
94 struct sync_pt *spt = sync_pt_from_fence(pt);
156 95
157 if (fence->num_fences != 1) 96 if (fence->num_fences != 1)
158 return 0; 97 return 0;
159 98
160 return gk20a_is_gk20a_sync_pt_inst(cb0->sync_pt); 99 if (spt == NULL)
100 return 0;
101
102 t = sync_pt_parent(spt);
161#endif 103#endif
104
105 if (t->ops == &gk20a_sync_timeline_ops)
106 return 1;
107 return 0;
162} 108}
163 109
110struct gk20a_semaphore *gk20a_sync_fence_get_sema(struct sync_fence *f)
111{
112 struct sync_pt *spt;
113 struct gk20a_sync_pt_inst *pti;
114
115#if LINUX_VERSION_CODE < KERNEL_VERSION(3,18,0)
116 if (!f)
117 return NULL;
118
119 if (!gk20a_is_sema_backed_sync_fence(f))
120 return NULL;
121
122 spt = list_first_entry(&f->pt_list_head, struct sync_pt, pt_list);
123#else
124 struct fence *pt;
125
126 if (!f)
127 return NULL;
128
129 if (!gk20a_is_sema_backed_sync_fence(f))
130 return NULL;
131
132 pt = f->cbs[0].sync_pt;
133 spt = sync_pt_from_fence(pt);
134#endif
135 pti = container_of(spt, struct gk20a_sync_pt_inst, pt);
136
137 return pti->shared->sema;
138}
164 139
165/** 140/**
166 * Compares sync pt values a and b, both of which will trigger either before 141 * Compares sync pt values a and b, both of which will trigger either before
@@ -283,7 +258,6 @@ static struct sync_pt *gk20a_sync_pt_create_inst(
283 if (!pti) 258 if (!pti)
284 return NULL; 259 return NULL;
285 260
286 pti->magic = GK20A_SYNC_PT_INST_MAGIC;
287 pti->shared = gk20a_sync_pt_create_shared(obj, sema, dependency); 261 pti->shared = gk20a_sync_pt_create_shared(obj, sema, dependency);
288 if (!pti->shared) { 262 if (!pti->shared) {
289 sync_pt_free(&pti->pt); 263 sync_pt_free(&pti->pt);
diff --git a/drivers/gpu/nvgpu/gk20a/sync_gk20a.h b/drivers/gpu/nvgpu/gk20a/sync_gk20a.h
index f885febd..e5b31471 100644
--- a/drivers/gpu/nvgpu/gk20a/sync_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/sync_gk20a.h
@@ -27,15 +27,8 @@ struct sync_pt;
27struct gk20a_semaphore; 27struct gk20a_semaphore;
28struct fence; 28struct fence;
29 29
30int __gk20a_is_gk20a_sync_pt_inst(struct sync_pt *pt);
31int gk20a_is_gk20a_sync_pt_inst(struct fence *f);
32int gk20a_is_sema_backed_sync_fence(struct sync_fence *fence); 30int gk20a_is_sema_backed_sync_fence(struct sync_fence *fence);
33 31struct gk20a_semaphore *gk20a_sync_fence_get_sema(struct sync_fence *f);
34#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,18,0)
35struct gk20a_semaphore *gk20a_sync_pt_inst_get_sema(struct fence *f);
36#else
37struct gk20a_semaphore *gk20a_sync_pt_inst_get_sema(struct sync_pt *pt);
38#endif
39 32
40#ifdef CONFIG_SYNC 33#ifdef CONFIG_SYNC
41struct sync_timeline *gk20a_sync_timeline_create(const char *fmt, ...); 34struct sync_timeline *gk20a_sync_timeline_create(const char *fmt, ...);