diff options
author | Konsta Holtta <kholtta@nvidia.com> | 2018-02-01 10:10:55 -0500 |
---|---|---|
committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2018-03-08 07:27:10 -0500 |
commit | 4320877eb49ef16a5620536427f77de44bee12b7 (patch) | |
tree | bef4e7b948ea132fe3cd695217b9e7693f04275c /drivers | |
parent | b94770dc4d2d96b80b14b9942595d3e7fc2bbf6c (diff) |
gpu: nvgpu: handle semaphore wraparound
Compare gpu semaphores in the kernel in the same way as the hardware
does: released if value is over threshold, but at most half of u32's
range. This makes it possible to skip zeroing the sema values when semas
are allocated, so that they'd be just monotonically increasing numbers
like syncpoints are.
Jira NVGPU-514
Change-Id: I3bae352fbacfe9690666765b9ecdeae6f0813ea1
Signed-off-by: Konsta Holtta <kholtta@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1652086
Reviewed-by: Automatic_Commit_Validation_User
Reviewed-by: Alex Waterman <alexw@nvidia.com>
Reviewed-by: Vijayakumar Subbu <vsubbu@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/gpu/nvgpu/include/nvgpu/semaphore.h | 54 |
1 files changed, 36 insertions, 18 deletions
diff --git a/drivers/gpu/nvgpu/include/nvgpu/semaphore.h b/drivers/gpu/nvgpu/include/nvgpu/semaphore.h index a4af1ca3..9c74d300 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/semaphore.h +++ b/drivers/gpu/nvgpu/include/nvgpu/semaphore.h | |||
@@ -230,23 +230,26 @@ static inline u32 nvgpu_semaphore_read(struct nvgpu_semaphore *s) | |||
230 | } | 230 | } |
231 | 231 | ||
232 | /* | 232 | /* |
233 | * TODO: handle wrap around... Hmm, how to do this? | 233 | * Check if "racer" is over "goal" with wraparound handling. |
234 | */ | 234 | */ |
235 | static inline bool nvgpu_semaphore_is_released(struct nvgpu_semaphore *s) | 235 | static inline bool __nvgpu_semaphore_value_released(u32 goal, u32 racer) |
236 | { | 236 | { |
237 | u32 sema_val = nvgpu_semaphore_read(s); | ||
238 | |||
239 | /* | 237 | /* |
240 | * If the underlying semaphore value is greater than or equal to | 238 | * Handle wraparound with the same heuristic as the hardware does: |
241 | * the value of the semaphore then the semaphore has been signaled | 239 | * although the integer will eventually wrap around, consider a sema |
242 | * (a.k.a. released). | 240 | * released against a threshold if its value has passed that threshold |
241 | * but has not wrapped over half of the u32 range over that threshold; | ||
242 | * such wrapping is unlikely to happen during a sema lifetime. | ||
243 | * | ||
244 | * Values for [goal, goal + 0x7fffffff] are considered signaled; that's | ||
245 | * precisely half of the 32-bit space. If racer == goal + 0x80000000, | ||
246 | * then it needs 0x80000000 increments to wrap again and signal. | ||
247 | * | ||
248 | * Unsigned arithmetic is used because it's well-defined. This is | ||
249 | * effectively the same as: signed_racer - signed_goal > 0. | ||
243 | */ | 250 | */ |
244 | return (int)sema_val >= nvgpu_atomic_read(&s->value); | ||
245 | } | ||
246 | 251 | ||
247 | static inline bool nvgpu_semaphore_is_acquired(struct nvgpu_semaphore *s) | 252 | return racer - goal < 0x80000000; |
248 | { | ||
249 | return !nvgpu_semaphore_is_released(s); | ||
250 | } | 253 | } |
251 | 254 | ||
252 | static inline u32 nvgpu_semaphore_get_value(struct nvgpu_semaphore *s) | 255 | static inline u32 nvgpu_semaphore_get_value(struct nvgpu_semaphore *s) |
@@ -254,6 +257,19 @@ static inline u32 nvgpu_semaphore_get_value(struct nvgpu_semaphore *s) | |||
254 | return (u32)nvgpu_atomic_read(&s->value); | 257 | return (u32)nvgpu_atomic_read(&s->value); |
255 | } | 258 | } |
256 | 259 | ||
260 | static inline bool nvgpu_semaphore_is_released(struct nvgpu_semaphore *s) | ||
261 | { | ||
262 | u32 sema_val = nvgpu_semaphore_read(s); | ||
263 | u32 wait_payload = nvgpu_semaphore_get_value(s); | ||
264 | |||
265 | return __nvgpu_semaphore_value_released(wait_payload, sema_val); | ||
266 | } | ||
267 | |||
268 | static inline bool nvgpu_semaphore_is_acquired(struct nvgpu_semaphore *s) | ||
269 | { | ||
270 | return !nvgpu_semaphore_is_released(s); | ||
271 | } | ||
272 | |||
257 | static inline u32 nvgpu_semaphore_next_value(struct nvgpu_semaphore *s) | 273 | static inline u32 nvgpu_semaphore_next_value(struct nvgpu_semaphore *s) |
258 | { | 274 | { |
259 | return (u32)nvgpu_atomic_read(&s->hw_sema->next_value); | 275 | return (u32)nvgpu_atomic_read(&s->hw_sema->next_value); |
@@ -261,14 +277,14 @@ static inline u32 nvgpu_semaphore_next_value(struct nvgpu_semaphore *s) | |||
261 | 277 | ||
262 | /* | 278 | /* |
263 | * If @force is set then this will not wait for the underlying semaphore to | 279 | * If @force is set then this will not wait for the underlying semaphore to |
264 | * catch up to the passed semaphore. | 280 | * catch up to the passed semaphore threshold. |
265 | */ | 281 | */ |
266 | static inline void __nvgpu_semaphore_release(struct nvgpu_semaphore *s, | 282 | static inline void __nvgpu_semaphore_release(struct nvgpu_semaphore *s, |
267 | bool force) | 283 | bool force) |
268 | { | 284 | { |
269 | struct nvgpu_semaphore_int *hw_sema = s->hw_sema; | 285 | struct nvgpu_semaphore_int *hw_sema = s->hw_sema; |
270 | u32 current_val; | 286 | u32 current_val; |
271 | u32 val = nvgpu_semaphore_get_value(s); | 287 | u32 threshold = nvgpu_semaphore_get_value(s); |
272 | int attempts = 0; | 288 | int attempts = 0; |
273 | 289 | ||
274 | /* | 290 | /* |
@@ -277,7 +293,8 @@ static inline void __nvgpu_semaphore_release(struct nvgpu_semaphore *s, | |||
277 | * | 293 | * |
278 | * TODO: tune the wait a little better. | 294 | * TODO: tune the wait a little better. |
279 | */ | 295 | */ |
280 | while ((current_val = nvgpu_semaphore_read(s)) < (val - 1)) { | 296 | while (!__nvgpu_semaphore_value_released(threshold - 1, |
297 | current_val = nvgpu_semaphore_read(s))) { | ||
281 | if (force) | 298 | if (force) |
282 | break; | 299 | break; |
283 | nvgpu_msleep(100); | 300 | nvgpu_msleep(100); |
@@ -292,13 +309,14 @@ static inline void __nvgpu_semaphore_release(struct nvgpu_semaphore *s, | |||
292 | * If the semaphore has already passed the value we would write then | 309 | * If the semaphore has already passed the value we would write then |
293 | * this is really just a NO-OP. | 310 | * this is really just a NO-OP. |
294 | */ | 311 | */ |
295 | if (current_val >= val) | 312 | if (__nvgpu_semaphore_value_released(threshold, current_val)) |
296 | return; | 313 | return; |
297 | 314 | ||
298 | nvgpu_mem_wr(hw_sema->ch->g, &hw_sema->p->rw_mem, hw_sema->offset, val); | 315 | nvgpu_mem_wr(hw_sema->ch->g, &hw_sema->p->rw_mem, hw_sema->offset, |
316 | threshold); | ||
299 | 317 | ||
300 | gpu_sema_verbose_dbg(hw_sema->p->sema_sea->gk20a, | 318 | gpu_sema_verbose_dbg(hw_sema->p->sema_sea->gk20a, |
301 | "(c=%d) WRITE %u", hw_sema->ch->chid, val); | 319 | "(c=%d) WRITE %u", hw_sema->ch->chid, threshold); |
302 | } | 320 | } |
303 | 321 | ||
304 | static inline void nvgpu_semaphore_release(struct nvgpu_semaphore *s) | 322 | static inline void nvgpu_semaphore_release(struct nvgpu_semaphore *s) |