diff options
Diffstat (limited to 'drivers/gpu')
-rw-r--r-- | drivers/gpu/nvgpu/include/nvgpu/semaphore.h | 54 |
1 files changed, 36 insertions, 18 deletions
diff --git a/drivers/gpu/nvgpu/include/nvgpu/semaphore.h b/drivers/gpu/nvgpu/include/nvgpu/semaphore.h index a4af1ca3..9c74d300 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/semaphore.h +++ b/drivers/gpu/nvgpu/include/nvgpu/semaphore.h | |||
@@ -230,23 +230,26 @@ static inline u32 nvgpu_semaphore_read(struct nvgpu_semaphore *s) | |||
230 | } | 230 | } |
231 | 231 | ||
232 | /* | 232 | /* |
233 | * TODO: handle wrap around... Hmm, how to do this? | 233 | * Check if "racer" is over "goal" with wraparound handling. |
234 | */ | 234 | */ |
235 | static inline bool nvgpu_semaphore_is_released(struct nvgpu_semaphore *s) | 235 | static inline bool __nvgpu_semaphore_value_released(u32 goal, u32 racer) |
236 | { | 236 | { |
237 | u32 sema_val = nvgpu_semaphore_read(s); | ||
238 | |||
239 | /* | 237 | /* |
240 | * If the underlying semaphore value is greater than or equal to | 238 | * Handle wraparound with the same heuristic as the hardware does: |
241 | * the value of the semaphore then the semaphore has been signaled | 239 | * although the integer will eventually wrap around, consider a sema |
242 | * (a.k.a. released). | 240 | * released against a threshold if its value has passed that threshold |
241 | * but has not wrapped over half of the u32 range over that threshold; | ||
242 | * such wrapping is unlikely to happen during a sema lifetime. | ||
243 | * | ||
244 | * Values for [goal, goal + 0x7fffffff] are considered signaled; that's | ||
245 | * precisely half of the 32-bit space. If racer == goal + 0x80000000, | ||
246 | * then it needs 0x80000000 increments to wrap again and signal. | ||
247 | * | ||
248 | * Unsigned arithmetic is used because it's well-defined. This is | ||
249 | * effectively the same as: signed_racer - signed_goal > 0. | ||
243 | */ | 250 | */ |
244 | return (int)sema_val >= nvgpu_atomic_read(&s->value); | ||
245 | } | ||
246 | 251 | ||
247 | static inline bool nvgpu_semaphore_is_acquired(struct nvgpu_semaphore *s) | 252 | return racer - goal < 0x80000000; |
248 | { | ||
249 | return !nvgpu_semaphore_is_released(s); | ||
250 | } | 253 | } |
251 | 254 | ||
252 | static inline u32 nvgpu_semaphore_get_value(struct nvgpu_semaphore *s) | 255 | static inline u32 nvgpu_semaphore_get_value(struct nvgpu_semaphore *s) |
@@ -254,6 +257,19 @@ static inline u32 nvgpu_semaphore_get_value(struct nvgpu_semaphore *s) | |||
254 | return (u32)nvgpu_atomic_read(&s->value); | 257 | return (u32)nvgpu_atomic_read(&s->value); |
255 | } | 258 | } |
256 | 259 | ||
260 | static inline bool nvgpu_semaphore_is_released(struct nvgpu_semaphore *s) | ||
261 | { | ||
262 | u32 sema_val = nvgpu_semaphore_read(s); | ||
263 | u32 wait_payload = nvgpu_semaphore_get_value(s); | ||
264 | |||
265 | return __nvgpu_semaphore_value_released(wait_payload, sema_val); | ||
266 | } | ||
267 | |||
268 | static inline bool nvgpu_semaphore_is_acquired(struct nvgpu_semaphore *s) | ||
269 | { | ||
270 | return !nvgpu_semaphore_is_released(s); | ||
271 | } | ||
272 | |||
257 | static inline u32 nvgpu_semaphore_next_value(struct nvgpu_semaphore *s) | 273 | static inline u32 nvgpu_semaphore_next_value(struct nvgpu_semaphore *s) |
258 | { | 274 | { |
259 | return (u32)nvgpu_atomic_read(&s->hw_sema->next_value); | 275 | return (u32)nvgpu_atomic_read(&s->hw_sema->next_value); |
@@ -261,14 +277,14 @@ static inline u32 nvgpu_semaphore_next_value(struct nvgpu_semaphore *s) | |||
261 | 277 | ||
262 | /* | 278 | /* |
263 | * If @force is set then this will not wait for the underlying semaphore to | 279 | * If @force is set then this will not wait for the underlying semaphore to |
264 | * catch up to the passed semaphore. | 280 | * catch up to the passed semaphore threshold. |
265 | */ | 281 | */ |
266 | static inline void __nvgpu_semaphore_release(struct nvgpu_semaphore *s, | 282 | static inline void __nvgpu_semaphore_release(struct nvgpu_semaphore *s, |
267 | bool force) | 283 | bool force) |
268 | { | 284 | { |
269 | struct nvgpu_semaphore_int *hw_sema = s->hw_sema; | 285 | struct nvgpu_semaphore_int *hw_sema = s->hw_sema; |
270 | u32 current_val; | 286 | u32 current_val; |
271 | u32 val = nvgpu_semaphore_get_value(s); | 287 | u32 threshold = nvgpu_semaphore_get_value(s); |
272 | int attempts = 0; | 288 | int attempts = 0; |
273 | 289 | ||
274 | /* | 290 | /* |
@@ -277,7 +293,8 @@ static inline void __nvgpu_semaphore_release(struct nvgpu_semaphore *s, | |||
277 | * | 293 | * |
278 | * TODO: tune the wait a little better. | 294 | * TODO: tune the wait a little better. |
279 | */ | 295 | */ |
280 | while ((current_val = nvgpu_semaphore_read(s)) < (val - 1)) { | 296 | while (!__nvgpu_semaphore_value_released(threshold - 1, |
297 | current_val = nvgpu_semaphore_read(s))) { | ||
281 | if (force) | 298 | if (force) |
282 | break; | 299 | break; |
283 | nvgpu_msleep(100); | 300 | nvgpu_msleep(100); |
@@ -292,13 +309,14 @@ static inline void __nvgpu_semaphore_release(struct nvgpu_semaphore *s, | |||
292 | * If the semaphore has already passed the value we would write then | 309 | * If the semaphore has already passed the value we would write then |
293 | * this is really just a NO-OP. | 310 | * this is really just a NO-OP. |
294 | */ | 311 | */ |
295 | if (current_val >= val) | 312 | if (__nvgpu_semaphore_value_released(threshold, current_val)) |
296 | return; | 313 | return; |
297 | 314 | ||
298 | nvgpu_mem_wr(hw_sema->ch->g, &hw_sema->p->rw_mem, hw_sema->offset, val); | 315 | nvgpu_mem_wr(hw_sema->ch->g, &hw_sema->p->rw_mem, hw_sema->offset, |
316 | threshold); | ||
299 | 317 | ||
300 | gpu_sema_verbose_dbg(hw_sema->p->sema_sea->gk20a, | 318 | gpu_sema_verbose_dbg(hw_sema->p->sema_sea->gk20a, |
301 | "(c=%d) WRITE %u", hw_sema->ch->chid, val); | 319 | "(c=%d) WRITE %u", hw_sema->ch->chid, threshold); |
302 | } | 320 | } |
303 | 321 | ||
304 | static inline void nvgpu_semaphore_release(struct nvgpu_semaphore *s) | 322 | static inline void nvgpu_semaphore_release(struct nvgpu_semaphore *s) |