diff options
Diffstat (limited to 'drivers/gpu/nvgpu/gk20a/fence_gk20a.c')
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/fence_gk20a.c | 353 |
1 files changed, 353 insertions, 0 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/fence_gk20a.c b/drivers/gpu/nvgpu/gk20a/fence_gk20a.c new file mode 100644 index 00000000..d0df8857 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/fence_gk20a.c | |||
@@ -0,0 +1,353 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2014-2017, NVIDIA CORPORATION. All rights reserved. | ||
3 | * | ||
4 | * Permission is hereby granted, free of charge, to any person obtaining a | ||
5 | * copy of this software and associated documentation files (the "Software"), | ||
6 | * to deal in the Software without restriction, including without limitation | ||
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | ||
8 | * and/or sell copies of the Software, and to permit persons to whom the | ||
9 | * Software is furnished to do so, subject to the following conditions: | ||
10 | * | ||
11 | * The above copyright notice and this permission notice shall be included in | ||
12 | * all copies or substantial portions of the Software. | ||
13 | * | ||
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | ||
17 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||
19 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | ||
20 | * DEALINGS IN THE SOFTWARE. | ||
21 | */ | ||
22 | |||
23 | #include "fence_gk20a.h" | ||
24 | |||
25 | #include <linux/file.h> | ||
26 | #include <linux/fs.h> | ||
27 | |||
28 | #include <nvgpu/semaphore.h> | ||
29 | #include <nvgpu/kmem.h> | ||
30 | #include <nvgpu/soc.h> | ||
31 | #include <nvgpu/nvhost.h> | ||
32 | #include <nvgpu/barrier.h> | ||
33 | |||
34 | #include "gk20a.h" | ||
35 | #include "channel_gk20a.h" | ||
36 | #include "sync_gk20a.h" | ||
37 | |||
38 | #ifdef CONFIG_SYNC | ||
39 | #include "../drivers/staging/android/sync.h" | ||
40 | #endif | ||
41 | |||
42 | struct gk20a_fence_ops { | ||
43 | int (*wait)(struct gk20a_fence *, long timeout); | ||
44 | bool (*is_expired)(struct gk20a_fence *); | ||
45 | void *(*free)(struct nvgpu_ref *); | ||
46 | }; | ||
47 | |||
48 | static void gk20a_fence_free(struct nvgpu_ref *ref) | ||
49 | { | ||
50 | struct gk20a_fence *f = | ||
51 | container_of(ref, struct gk20a_fence, ref); | ||
52 | struct gk20a *g = f->g; | ||
53 | |||
54 | #ifdef CONFIG_SYNC | ||
55 | if (f->sync_fence) | ||
56 | sync_fence_put(f->sync_fence); | ||
57 | #endif | ||
58 | if (f->semaphore) | ||
59 | nvgpu_semaphore_put(f->semaphore); | ||
60 | |||
61 | if (f->allocator) { | ||
62 | if (nvgpu_alloc_initialized(f->allocator)) | ||
63 | nvgpu_free(f->allocator, (u64)(uintptr_t)f); | ||
64 | } else | ||
65 | nvgpu_kfree(g, f); | ||
66 | } | ||
67 | |||
68 | void gk20a_fence_put(struct gk20a_fence *f) | ||
69 | { | ||
70 | if (f) | ||
71 | nvgpu_ref_put(&f->ref, gk20a_fence_free); | ||
72 | } | ||
73 | |||
74 | struct gk20a_fence *gk20a_fence_get(struct gk20a_fence *f) | ||
75 | { | ||
76 | if (f) | ||
77 | nvgpu_ref_get(&f->ref); | ||
78 | return f; | ||
79 | } | ||
80 | |||
81 | static inline bool gk20a_fence_is_valid(struct gk20a_fence *f) | ||
82 | { | ||
83 | bool valid = f->valid; | ||
84 | |||
85 | nvgpu_smp_rmb(); | ||
86 | return valid; | ||
87 | } | ||
88 | |||
89 | int gk20a_fence_wait(struct gk20a *g, struct gk20a_fence *f, | ||
90 | unsigned long timeout) | ||
91 | { | ||
92 | if (f && gk20a_fence_is_valid(f)) { | ||
93 | if (!nvgpu_platform_is_silicon(g)) | ||
94 | timeout = MAX_SCHEDULE_TIMEOUT; | ||
95 | return f->ops->wait(f, timeout); | ||
96 | } | ||
97 | return 0; | ||
98 | } | ||
99 | |||
100 | bool gk20a_fence_is_expired(struct gk20a_fence *f) | ||
101 | { | ||
102 | if (f && gk20a_fence_is_valid(f) && f->ops) | ||
103 | return f->ops->is_expired(f); | ||
104 | else | ||
105 | return true; | ||
106 | } | ||
107 | |||
108 | int gk20a_fence_install_fd(struct gk20a_fence *f) | ||
109 | { | ||
110 | #ifdef CONFIG_SYNC | ||
111 | int fd; | ||
112 | |||
113 | if (!f || !gk20a_fence_is_valid(f) || !f->sync_fence) | ||
114 | return -EINVAL; | ||
115 | |||
116 | fd = get_unused_fd_flags(O_RDWR); | ||
117 | if (fd < 0) | ||
118 | return fd; | ||
119 | |||
120 | sync_fence_get(f->sync_fence); | ||
121 | sync_fence_install(f->sync_fence, fd); | ||
122 | return fd; | ||
123 | #else | ||
124 | return -ENODEV; | ||
125 | #endif | ||
126 | } | ||
127 | |||
128 | int gk20a_alloc_fence_pool(struct channel_gk20a *c, unsigned int count) | ||
129 | { | ||
130 | int err; | ||
131 | size_t size; | ||
132 | struct gk20a_fence *fence_pool = NULL; | ||
133 | |||
134 | size = sizeof(struct gk20a_fence); | ||
135 | if (count <= UINT_MAX / size) { | ||
136 | size = count * size; | ||
137 | fence_pool = nvgpu_vzalloc(c->g, size); | ||
138 | } | ||
139 | |||
140 | if (!fence_pool) | ||
141 | return -ENOMEM; | ||
142 | |||
143 | err = nvgpu_lockless_allocator_init(c->g, &c->fence_allocator, | ||
144 | "fence_pool", (size_t)fence_pool, size, | ||
145 | sizeof(struct gk20a_fence), 0); | ||
146 | if (err) | ||
147 | goto fail; | ||
148 | |||
149 | return 0; | ||
150 | |||
151 | fail: | ||
152 | nvgpu_vfree(c->g, fence_pool); | ||
153 | return err; | ||
154 | } | ||
155 | |||
156 | void gk20a_free_fence_pool(struct channel_gk20a *c) | ||
157 | { | ||
158 | if (nvgpu_alloc_initialized(&c->fence_allocator)) { | ||
159 | void *base = (void *)(uintptr_t) | ||
160 | nvgpu_alloc_base(&c->fence_allocator); | ||
161 | |||
162 | nvgpu_alloc_destroy(&c->fence_allocator); | ||
163 | nvgpu_vfree(c->g, base); | ||
164 | } | ||
165 | } | ||
166 | |||
167 | struct gk20a_fence *gk20a_alloc_fence(struct channel_gk20a *c) | ||
168 | { | ||
169 | struct gk20a_fence *fence = NULL; | ||
170 | |||
171 | if (channel_gk20a_is_prealloc_enabled(c)) { | ||
172 | if (nvgpu_alloc_initialized(&c->fence_allocator)) { | ||
173 | fence = (struct gk20a_fence *)(uintptr_t) | ||
174 | nvgpu_alloc(&c->fence_allocator, | ||
175 | sizeof(struct gk20a_fence)); | ||
176 | |||
177 | /* clear the node and reset the allocator pointer */ | ||
178 | if (fence) { | ||
179 | memset(fence, 0, sizeof(*fence)); | ||
180 | fence->allocator = &c->fence_allocator; | ||
181 | } | ||
182 | } | ||
183 | } else | ||
184 | fence = nvgpu_kzalloc(c->g, sizeof(struct gk20a_fence)); | ||
185 | |||
186 | if (fence) { | ||
187 | nvgpu_ref_init(&fence->ref); | ||
188 | fence->g = c->g; | ||
189 | } | ||
190 | |||
191 | return fence; | ||
192 | } | ||
193 | |||
194 | void gk20a_init_fence(struct gk20a_fence *f, | ||
195 | const struct gk20a_fence_ops *ops, | ||
196 | struct sync_fence *sync_fence, bool wfi) | ||
197 | { | ||
198 | if (!f) | ||
199 | return; | ||
200 | f->ops = ops; | ||
201 | f->sync_fence = sync_fence; | ||
202 | f->wfi = wfi; | ||
203 | f->syncpt_id = -1; | ||
204 | } | ||
205 | |||
206 | /* Fences that are backed by GPU semaphores: */ | ||
207 | |||
208 | static int nvgpu_semaphore_fence_wait(struct gk20a_fence *f, long timeout) | ||
209 | { | ||
210 | if (!nvgpu_semaphore_is_acquired(f->semaphore)) | ||
211 | return 0; | ||
212 | |||
213 | return NVGPU_COND_WAIT_INTERRUPTIBLE( | ||
214 | f->semaphore_wq, | ||
215 | !nvgpu_semaphore_is_acquired(f->semaphore), | ||
216 | timeout); | ||
217 | } | ||
218 | |||
219 | static bool nvgpu_semaphore_fence_is_expired(struct gk20a_fence *f) | ||
220 | { | ||
221 | return !nvgpu_semaphore_is_acquired(f->semaphore); | ||
222 | } | ||
223 | |||
224 | static const struct gk20a_fence_ops nvgpu_semaphore_fence_ops = { | ||
225 | .wait = &nvgpu_semaphore_fence_wait, | ||
226 | .is_expired = &nvgpu_semaphore_fence_is_expired, | ||
227 | }; | ||
228 | |||
229 | /* This function takes ownership of the semaphore */ | ||
230 | int gk20a_fence_from_semaphore( | ||
231 | struct gk20a *g, | ||
232 | struct gk20a_fence *fence_out, | ||
233 | struct sync_timeline *timeline, | ||
234 | struct nvgpu_semaphore *semaphore, | ||
235 | struct nvgpu_cond *semaphore_wq, | ||
236 | bool wfi, bool need_sync_fence) | ||
237 | { | ||
238 | struct gk20a_fence *f = fence_out; | ||
239 | struct sync_fence *sync_fence = NULL; | ||
240 | |||
241 | #ifdef CONFIG_SYNC | ||
242 | if (need_sync_fence) { | ||
243 | sync_fence = gk20a_sync_fence_create(g, timeline, semaphore, | ||
244 | "f-gk20a-0x%04x", | ||
245 | nvgpu_semaphore_gpu_ro_va(semaphore)); | ||
246 | if (!sync_fence) | ||
247 | return -1; | ||
248 | } | ||
249 | #endif | ||
250 | |||
251 | gk20a_init_fence(f, &nvgpu_semaphore_fence_ops, sync_fence, wfi); | ||
252 | if (!f) { | ||
253 | #ifdef CONFIG_SYNC | ||
254 | if (sync_fence) | ||
255 | sync_fence_put(sync_fence); | ||
256 | #endif | ||
257 | return -EINVAL; | ||
258 | } | ||
259 | |||
260 | f->semaphore = semaphore; | ||
261 | f->semaphore_wq = semaphore_wq; | ||
262 | |||
263 | /* commit previous writes before setting the valid flag */ | ||
264 | nvgpu_smp_wmb(); | ||
265 | f->valid = true; | ||
266 | |||
267 | return 0; | ||
268 | } | ||
269 | |||
270 | #ifdef CONFIG_TEGRA_GK20A_NVHOST | ||
271 | /* Fences that are backed by host1x syncpoints: */ | ||
272 | |||
273 | static int gk20a_syncpt_fence_wait(struct gk20a_fence *f, long timeout) | ||
274 | { | ||
275 | return nvgpu_nvhost_syncpt_wait_timeout_ext( | ||
276 | f->nvhost_dev, f->syncpt_id, f->syncpt_value, | ||
277 | (u32)timeout, NULL, NULL); | ||
278 | } | ||
279 | |||
280 | static bool gk20a_syncpt_fence_is_expired(struct gk20a_fence *f) | ||
281 | { | ||
282 | |||
283 | /* | ||
284 | * In cases we don't register a notifier, we can't expect the | ||
285 | * syncpt value to be updated. For this case, we force a read | ||
286 | * of the value from HW, and then check for expiration. | ||
287 | */ | ||
288 | if (!nvgpu_nvhost_syncpt_is_expired_ext(f->nvhost_dev, f->syncpt_id, | ||
289 | f->syncpt_value)) { | ||
290 | u32 val; | ||
291 | |||
292 | if (!nvgpu_nvhost_syncpt_read_ext_check(f->nvhost_dev, | ||
293 | f->syncpt_id, &val)) { | ||
294 | return nvgpu_nvhost_syncpt_is_expired_ext( | ||
295 | f->nvhost_dev, | ||
296 | f->syncpt_id, f->syncpt_value); | ||
297 | } | ||
298 | } | ||
299 | |||
300 | return true; | ||
301 | } | ||
302 | |||
303 | static const struct gk20a_fence_ops gk20a_syncpt_fence_ops = { | ||
304 | .wait = &gk20a_syncpt_fence_wait, | ||
305 | .is_expired = &gk20a_syncpt_fence_is_expired, | ||
306 | }; | ||
307 | |||
308 | int gk20a_fence_from_syncpt( | ||
309 | struct gk20a_fence *fence_out, | ||
310 | struct nvgpu_nvhost_dev *nvhost_dev, | ||
311 | u32 id, u32 value, bool wfi, | ||
312 | bool need_sync_fence) | ||
313 | { | ||
314 | struct gk20a_fence *f = fence_out; | ||
315 | struct sync_fence *sync_fence = NULL; | ||
316 | |||
317 | #ifdef CONFIG_SYNC | ||
318 | if (need_sync_fence) { | ||
319 | sync_fence = nvgpu_nvhost_sync_create_fence(nvhost_dev, | ||
320 | id, value, 1, "fence"); | ||
321 | if (IS_ERR(sync_fence)) | ||
322 | return -1; | ||
323 | } | ||
324 | #endif | ||
325 | |||
326 | gk20a_init_fence(f, &gk20a_syncpt_fence_ops, sync_fence, wfi); | ||
327 | if (!f) { | ||
328 | #ifdef CONFIG_SYNC | ||
329 | if (sync_fence) | ||
330 | sync_fence_put(sync_fence); | ||
331 | #endif | ||
332 | return -EINVAL; | ||
333 | } | ||
334 | f->nvhost_dev = nvhost_dev; | ||
335 | f->syncpt_id = id; | ||
336 | f->syncpt_value = value; | ||
337 | |||
338 | /* commit previous writes before setting the valid flag */ | ||
339 | nvgpu_smp_wmb(); | ||
340 | f->valid = true; | ||
341 | |||
342 | return 0; | ||
343 | } | ||
344 | #else | ||
345 | int gk20a_fence_from_syncpt( | ||
346 | struct gk20a_fence *fence_out, | ||
347 | struct nvgpu_nvhost_dev *nvhost_dev, | ||
348 | u32 id, u32 value, bool wfi, | ||
349 | bool need_sync_fence) | ||
350 | { | ||
351 | return -EINVAL; | ||
352 | } | ||
353 | #endif | ||