diff options
-rw-r--r-- | drivers/gpu/nvgpu/common/semaphore.c | 139 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/include/nvgpu/semaphore.h | 148 |
2 files changed, 150 insertions, 137 deletions
diff --git a/drivers/gpu/nvgpu/common/semaphore.c b/drivers/gpu/nvgpu/common/semaphore.c index 25bd3be3..5814a737 100644 --- a/drivers/gpu/nvgpu/common/semaphore.c +++ b/drivers/gpu/nvgpu/common/semaphore.c | |||
@@ -28,6 +28,7 @@ | |||
28 | #include <nvgpu/kmem.h> | 28 | #include <nvgpu/kmem.h> |
29 | #include <nvgpu/bug.h> | 29 | #include <nvgpu/bug.h> |
30 | #include <nvgpu/sizes.h> | 30 | #include <nvgpu/sizes.h> |
31 | #include <nvgpu/channel.h> | ||
31 | 32 | ||
32 | #include "gk20a/gk20a.h" | 33 | #include "gk20a/gk20a.h" |
33 | #include "gk20a/mm_gk20a.h" | 34 | #include "gk20a/mm_gk20a.h" |
@@ -485,3 +486,141 @@ void nvgpu_semaphore_get(struct nvgpu_semaphore *s) | |||
485 | { | 486 | { |
486 | nvgpu_ref_get(&s->ref); | 487 | nvgpu_ref_get(&s->ref); |
487 | } | 488 | } |
489 | |||
490 | /* | ||
491 | * Return the address of a specific semaphore. | ||
492 | * | ||
493 | * Don't call this on a semaphore you don't own - the VA returned will make no | ||
494 | * sense in your specific channel's VM. | ||
495 | */ | ||
496 | u64 nvgpu_semaphore_gpu_rw_va(struct nvgpu_semaphore *s) | ||
497 | { | ||
498 | return __nvgpu_semaphore_pool_gpu_va(s->location.pool, false) + | ||
499 | s->location.offset; | ||
500 | } | ||
501 | |||
502 | /* | ||
503 | * Get the global RO address for the semaphore. Can be called on any semaphore | ||
504 | * regardless of whether you own it. | ||
505 | */ | ||
506 | u64 nvgpu_semaphore_gpu_ro_va(struct nvgpu_semaphore *s) | ||
507 | { | ||
508 | return __nvgpu_semaphore_pool_gpu_va(s->location.pool, true) + | ||
509 | s->location.offset; | ||
510 | } | ||
511 | |||
512 | u64 nvgpu_hw_sema_addr(struct nvgpu_semaphore_int *hw_sema) | ||
513 | { | ||
514 | return __nvgpu_semaphore_pool_gpu_va(hw_sema->location.pool, true) + | ||
515 | hw_sema->location.offset; | ||
516 | } | ||
517 | |||
518 | u32 __nvgpu_semaphore_read(struct nvgpu_semaphore_int *hw_sema) | ||
519 | { | ||
520 | return nvgpu_mem_rd(hw_sema->ch->g, &hw_sema->location.pool->rw_mem, | ||
521 | hw_sema->location.offset); | ||
522 | } | ||
523 | |||
524 | /* | ||
525 | * Read the underlying value from a semaphore. | ||
526 | */ | ||
527 | u32 nvgpu_semaphore_read(struct nvgpu_semaphore *s) | ||
528 | { | ||
529 | return nvgpu_mem_rd(s->g, &s->location.pool->rw_mem, | ||
530 | s->location.offset); | ||
531 | } | ||
532 | |||
533 | /* | ||
534 | * Check if "racer" is over "goal" with wraparound handling. | ||
535 | */ | ||
536 | static bool __nvgpu_semaphore_value_released(u32 goal, u32 racer) | ||
537 | { | ||
538 | /* | ||
539 | * Handle wraparound with the same heuristic as the hardware does: | ||
540 | * although the integer will eventually wrap around, consider a sema | ||
541 | * released against a threshold if its value has passed that threshold | ||
542 | * but has not wrapped over half of the u32 range over that threshold; | ||
543 | * such wrapping is unlikely to happen during a sema lifetime. | ||
544 | * | ||
545 | * Values for [goal, goal + 0x7fffffff] are considered signaled; that's | ||
546 | * precisely half of the 32-bit space. If racer == goal + 0x80000000, | ||
547 | * then it needs 0x80000000 increments to wrap again and signal. | ||
548 | * | ||
549 | * Unsigned arithmetic is used because it's well-defined. This is | ||
550 | * effectively the same as: signed_racer - signed_goal > 0. | ||
551 | */ | ||
552 | |||
553 | return racer - goal < 0x80000000; | ||
554 | } | ||
555 | |||
556 | u32 nvgpu_semaphore_get_value(struct nvgpu_semaphore *s) | ||
557 | { | ||
558 | return (u32)nvgpu_atomic_read(&s->value); | ||
559 | } | ||
560 | |||
561 | bool nvgpu_semaphore_is_released(struct nvgpu_semaphore *s) | ||
562 | { | ||
563 | u32 sema_val = nvgpu_semaphore_read(s); | ||
564 | u32 wait_payload = nvgpu_semaphore_get_value(s); | ||
565 | |||
566 | return __nvgpu_semaphore_value_released(wait_payload, sema_val); | ||
567 | } | ||
568 | |||
569 | bool nvgpu_semaphore_is_acquired(struct nvgpu_semaphore *s) | ||
570 | { | ||
571 | return !nvgpu_semaphore_is_released(s); | ||
572 | } | ||
573 | |||
574 | /* | ||
575 | * Fast-forward the hw sema to its tracked max value. | ||
576 | * | ||
577 | * Return true if the sema wasn't at the max value and needed updating, false | ||
578 | * otherwise. | ||
579 | */ | ||
580 | bool nvgpu_semaphore_reset(struct nvgpu_semaphore_int *hw_sema) | ||
581 | { | ||
582 | u32 threshold = (u32)nvgpu_atomic_read(&hw_sema->next_value); | ||
583 | u32 current_val = __nvgpu_semaphore_read(hw_sema); | ||
584 | |||
585 | /* | ||
586 | * If the semaphore has already reached the value we would write then | ||
587 | * this is really just a NO-OP. However, the sema value shouldn't be | ||
588 | * more than what we expect to be the max. | ||
589 | */ | ||
590 | |||
591 | if (WARN_ON(__nvgpu_semaphore_value_released(threshold + 1, | ||
592 | current_val))) | ||
593 | return false; | ||
594 | |||
595 | if (current_val == threshold) | ||
596 | return false; | ||
597 | |||
598 | nvgpu_mem_wr(hw_sema->ch->g, &hw_sema->location.pool->rw_mem, | ||
599 | hw_sema->location.offset, threshold); | ||
600 | |||
601 | gpu_sema_verbose_dbg(hw_sema->ch->g, "(c=%d) RESET %u -> %u", | ||
602 | hw_sema->ch->chid, current_val, threshold); | ||
603 | |||
604 | return true; | ||
605 | } | ||
606 | |||
607 | /* | ||
608 | * Update nvgpu-tracked shadow of the value in "hw_sema" and mark the threshold | ||
609 | * value to "s" which represents the increment that the caller must write in a | ||
610 | * pushbuf. The same nvgpu_semaphore will also represent an output fence; when | ||
611 | * nvgpu_semaphore_is_released(s) == true, the gpu is done with this increment. | ||
612 | */ | ||
613 | void nvgpu_semaphore_prepare(struct nvgpu_semaphore *s, | ||
614 | struct nvgpu_semaphore_int *hw_sema) | ||
615 | { | ||
616 | int next = nvgpu_atomic_add_return(1, &hw_sema->next_value); | ||
617 | |||
618 | /* "s" should be an uninitialized sema. */ | ||
619 | WARN_ON(s->incremented); | ||
620 | |||
621 | nvgpu_atomic_set(&s->value, next); | ||
622 | s->incremented = 1; | ||
623 | |||
624 | gpu_sema_verbose_dbg(s->g, "INCR sema for c=%d (%u)", | ||
625 | hw_sema->ch->chid, next); | ||
626 | } | ||
diff --git a/drivers/gpu/nvgpu/include/nvgpu/semaphore.h b/drivers/gpu/nvgpu/include/nvgpu/semaphore.h index 9ab6cc67..a7bd79cb 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/semaphore.h +++ b/drivers/gpu/nvgpu/include/nvgpu/semaphore.h | |||
@@ -24,16 +24,13 @@ | |||
24 | #define SEMAPHORE_GK20A_H | 24 | #define SEMAPHORE_GK20A_H |
25 | 25 | ||
26 | #include <nvgpu/log.h> | 26 | #include <nvgpu/log.h> |
27 | #include <nvgpu/timers.h> | ||
28 | #include <nvgpu/atomic.h> | 27 | #include <nvgpu/atomic.h> |
29 | #include <nvgpu/bug.h> | ||
30 | #include <nvgpu/kref.h> | 28 | #include <nvgpu/kref.h> |
31 | #include <nvgpu/list.h> | 29 | #include <nvgpu/list.h> |
32 | #include <nvgpu/nvgpu_mem.h> | 30 | #include <nvgpu/nvgpu_mem.h> |
33 | 31 | ||
34 | #include "gk20a/gk20a.h" | 32 | #include "gk20a/gk20a.h" |
35 | #include "gk20a/mm_gk20a.h" | 33 | #include "gk20a/mm_gk20a.h" |
36 | #include "gk20a/channel_gk20a.h" | ||
37 | 34 | ||
38 | #define gpu_sema_dbg(g, fmt, args...) \ | 35 | #define gpu_sema_dbg(g, fmt, args...) \ |
39 | nvgpu_log(g, gpu_dbg_sema, fmt, ##args) | 36 | nvgpu_log(g, gpu_dbg_sema, fmt, ##args) |
@@ -191,141 +188,18 @@ void nvgpu_semaphore_put(struct nvgpu_semaphore *s); | |||
191 | void nvgpu_semaphore_get(struct nvgpu_semaphore *s); | 188 | void nvgpu_semaphore_get(struct nvgpu_semaphore *s); |
192 | void nvgpu_semaphore_free_hw_sema(struct channel_gk20a *ch); | 189 | void nvgpu_semaphore_free_hw_sema(struct channel_gk20a *ch); |
193 | 190 | ||
194 | /* | 191 | u64 nvgpu_semaphore_gpu_rw_va(struct nvgpu_semaphore *s); |
195 | * Return the address of a specific semaphore. | 192 | u64 nvgpu_semaphore_gpu_ro_va(struct nvgpu_semaphore *s); |
196 | * | 193 | u64 nvgpu_hw_sema_addr(struct nvgpu_semaphore_int *hw_sema); |
197 | * Don't call this on a semaphore you don't own - the VA returned will make no | ||
198 | * sense in your specific channel's VM. | ||
199 | */ | ||
200 | static inline u64 nvgpu_semaphore_gpu_rw_va(struct nvgpu_semaphore *s) | ||
201 | { | ||
202 | return __nvgpu_semaphore_pool_gpu_va(s->location.pool, false) + | ||
203 | s->location.offset; | ||
204 | } | ||
205 | |||
206 | /* | ||
207 | * Get the global RO address for the semaphore. Can be called on any semaphore | ||
208 | * regardless of whether you own it. | ||
209 | */ | ||
210 | static inline u64 nvgpu_semaphore_gpu_ro_va(struct nvgpu_semaphore *s) | ||
211 | { | ||
212 | return __nvgpu_semaphore_pool_gpu_va(s->location.pool, true) + | ||
213 | s->location.offset; | ||
214 | } | ||
215 | |||
216 | static inline u64 nvgpu_hw_sema_addr(struct nvgpu_semaphore_int *hw_sema) | ||
217 | { | ||
218 | return __nvgpu_semaphore_pool_gpu_va(hw_sema->location.pool, true) + | ||
219 | hw_sema->location.offset; | ||
220 | } | ||
221 | |||
222 | static inline u32 __nvgpu_semaphore_read(struct nvgpu_semaphore_int *hw_sema) | ||
223 | { | ||
224 | return nvgpu_mem_rd(hw_sema->ch->g, &hw_sema->location.pool->rw_mem, | ||
225 | hw_sema->location.offset); | ||
226 | } | ||
227 | |||
228 | /* | ||
229 | * Read the underlying value from a semaphore. | ||
230 | */ | ||
231 | static inline u32 nvgpu_semaphore_read(struct nvgpu_semaphore *s) | ||
232 | { | ||
233 | return nvgpu_mem_rd(s->g, &s->location.pool->rw_mem, | ||
234 | s->location.offset); | ||
235 | } | ||
236 | |||
237 | /* | ||
238 | * Check if "racer" is over "goal" with wraparound handling. | ||
239 | */ | ||
240 | static inline bool __nvgpu_semaphore_value_released(u32 goal, u32 racer) | ||
241 | { | ||
242 | /* | ||
243 | * Handle wraparound with the same heuristic as the hardware does: | ||
244 | * although the integer will eventually wrap around, consider a sema | ||
245 | * released against a threshold if its value has passed that threshold | ||
246 | * but has not wrapped over half of the u32 range over that threshold; | ||
247 | * such wrapping is unlikely to happen during a sema lifetime. | ||
248 | * | ||
249 | * Values for [goal, goal + 0x7fffffff] are considered signaled; that's | ||
250 | * precisely half of the 32-bit space. If racer == goal + 0x80000000, | ||
251 | * then it needs 0x80000000 increments to wrap again and signal. | ||
252 | * | ||
253 | * Unsigned arithmetic is used because it's well-defined. This is | ||
254 | * effectively the same as: signed_racer - signed_goal > 0. | ||
255 | */ | ||
256 | |||
257 | return racer - goal < 0x80000000; | ||
258 | } | ||
259 | |||
260 | static inline u32 nvgpu_semaphore_get_value(struct nvgpu_semaphore *s) | ||
261 | { | ||
262 | return (u32)nvgpu_atomic_read(&s->value); | ||
263 | } | ||
264 | |||
265 | static inline bool nvgpu_semaphore_is_released(struct nvgpu_semaphore *s) | ||
266 | { | ||
267 | u32 sema_val = nvgpu_semaphore_read(s); | ||
268 | u32 wait_payload = nvgpu_semaphore_get_value(s); | ||
269 | |||
270 | return __nvgpu_semaphore_value_released(wait_payload, sema_val); | ||
271 | } | ||
272 | |||
273 | static inline bool nvgpu_semaphore_is_acquired(struct nvgpu_semaphore *s) | ||
274 | { | ||
275 | return !nvgpu_semaphore_is_released(s); | ||
276 | } | ||
277 | |||
278 | /* | ||
279 | * Fast-forward the hw sema to its tracked max value. | ||
280 | * | ||
281 | * Return true if the sema wasn't at the max value and needed updating, false | ||
282 | * otherwise. | ||
283 | */ | ||
284 | static inline bool nvgpu_semaphore_reset(struct nvgpu_semaphore_int *hw_sema) | ||
285 | { | ||
286 | u32 threshold = (u32)nvgpu_atomic_read(&hw_sema->next_value); | ||
287 | u32 current_val = __nvgpu_semaphore_read(hw_sema); | ||
288 | |||
289 | /* | ||
290 | * If the semaphore has already reached the value we would write then | ||
291 | * this is really just a NO-OP. However, the sema value shouldn't be | ||
292 | * more than what we expect to be the max. | ||
293 | */ | ||
294 | |||
295 | if (WARN_ON(__nvgpu_semaphore_value_released(threshold + 1, | ||
296 | current_val))) | ||
297 | return false; | ||
298 | |||
299 | if (current_val == threshold) | ||
300 | return false; | ||
301 | |||
302 | nvgpu_mem_wr(hw_sema->ch->g, &hw_sema->location.pool->rw_mem, | ||
303 | hw_sema->location.offset, threshold); | ||
304 | |||
305 | gpu_sema_verbose_dbg(hw_sema->ch->g, "(c=%d) RESET %u -> %u", | ||
306 | hw_sema->ch->chid, current_val, threshold); | ||
307 | |||
308 | return true; | ||
309 | } | ||
310 | |||
311 | /* | ||
312 | * Update nvgpu-tracked shadow of the value in "hw_sema" and mark the threshold | ||
313 | * value to "s" which represents the increment that the caller must write in a | ||
314 | * pushbuf. The same nvgpu_semaphore will also represent an output fence; when | ||
315 | * nvgpu_semaphore_is_released(s) == true, the gpu is done with this increment. | ||
316 | */ | ||
317 | static inline void nvgpu_semaphore_prepare(struct nvgpu_semaphore *s, | ||
318 | struct nvgpu_semaphore_int *hw_sema) | ||
319 | { | ||
320 | int next = nvgpu_atomic_add_return(1, &hw_sema->next_value); | ||
321 | 194 | ||
322 | /* "s" should be an uninitialized sema. */ | 195 | u32 __nvgpu_semaphore_read(struct nvgpu_semaphore_int *hw_sema); |
323 | WARN_ON(s->incremented); | 196 | u32 nvgpu_semaphore_read(struct nvgpu_semaphore *s); |
197 | u32 nvgpu_semaphore_get_value(struct nvgpu_semaphore *s); | ||
198 | bool nvgpu_semaphore_is_released(struct nvgpu_semaphore *s); | ||
199 | bool nvgpu_semaphore_is_acquired(struct nvgpu_semaphore *s); | ||
324 | 200 | ||
325 | nvgpu_atomic_set(&s->value, next); | 201 | bool nvgpu_semaphore_reset(struct nvgpu_semaphore_int *hw_sema); |
326 | s->incremented = 1; | 202 | void nvgpu_semaphore_prepare(struct nvgpu_semaphore *s, |
203 | struct nvgpu_semaphore_int *hw_sema); | ||
327 | 204 | ||
328 | gpu_sema_verbose_dbg(s->g, "INCR sema for c=%d (%u)", | ||
329 | hw_sema->ch->chid, next); | ||
330 | } | ||
331 | #endif | 205 | #endif |