summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/gpu/nvgpu/include/nvgpu/semaphore.h54
1 files changed, 36 insertions, 18 deletions
diff --git a/drivers/gpu/nvgpu/include/nvgpu/semaphore.h b/drivers/gpu/nvgpu/include/nvgpu/semaphore.h
index a4af1ca3..9c74d300 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/semaphore.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/semaphore.h
@@ -230,23 +230,26 @@ static inline u32 nvgpu_semaphore_read(struct nvgpu_semaphore *s)
230} 230}
231 231
232/* 232/*
233 * TODO: handle wrap around... Hmm, how to do this? 233 * Check if "racer" is over "goal" with wraparound handling.
234 */ 234 */
235static inline bool nvgpu_semaphore_is_released(struct nvgpu_semaphore *s) 235static inline bool __nvgpu_semaphore_value_released(u32 goal, u32 racer)
236{ 236{
237 u32 sema_val = nvgpu_semaphore_read(s);
238
239 /* 237 /*
240 * If the underlying semaphore value is greater than or equal to 238 * Handle wraparound with the same heuristic as the hardware does:
241 * the value of the semaphore then the semaphore has been signaled 239 * although the integer will eventually wrap around, consider a sema
242 * (a.k.a. released). 240 * released against a threshold if its value has passed that threshold
241 * but has not wrapped over half of the u32 range over that threshold;
242 * such wrapping is unlikely to happen during a sema lifetime.
243 *
244 * Values for [goal, goal + 0x7fffffff] are considered signaled; that's
245 * precisely half of the 32-bit space. If racer == goal + 0x80000000,
246 * then it needs 0x80000000 increments to wrap again and signal.
247 *
248 * Unsigned arithmetic is used because it's well-defined. This is
249 * effectively the same as: signed_racer - signed_goal > 0.
243 */ 250 */
244 return (int)sema_val >= nvgpu_atomic_read(&s->value);
245}
246 251
247static inline bool nvgpu_semaphore_is_acquired(struct nvgpu_semaphore *s) 252 return racer - goal < 0x80000000;
248{
249 return !nvgpu_semaphore_is_released(s);
250} 253}
251 254
252static inline u32 nvgpu_semaphore_get_value(struct nvgpu_semaphore *s) 255static inline u32 nvgpu_semaphore_get_value(struct nvgpu_semaphore *s)
@@ -254,6 +257,19 @@ static inline u32 nvgpu_semaphore_get_value(struct nvgpu_semaphore *s)
254 return (u32)nvgpu_atomic_read(&s->value); 257 return (u32)nvgpu_atomic_read(&s->value);
255} 258}
256 259
260static inline bool nvgpu_semaphore_is_released(struct nvgpu_semaphore *s)
261{
262 u32 sema_val = nvgpu_semaphore_read(s);
263 u32 wait_payload = nvgpu_semaphore_get_value(s);
264
265 return __nvgpu_semaphore_value_released(wait_payload, sema_val);
266}
267
268static inline bool nvgpu_semaphore_is_acquired(struct nvgpu_semaphore *s)
269{
270 return !nvgpu_semaphore_is_released(s);
271}
272
257static inline u32 nvgpu_semaphore_next_value(struct nvgpu_semaphore *s) 273static inline u32 nvgpu_semaphore_next_value(struct nvgpu_semaphore *s)
258{ 274{
259 return (u32)nvgpu_atomic_read(&s->hw_sema->next_value); 275 return (u32)nvgpu_atomic_read(&s->hw_sema->next_value);
@@ -261,14 +277,14 @@ static inline u32 nvgpu_semaphore_next_value(struct nvgpu_semaphore *s)
261 277
262/* 278/*
263 * If @force is set then this will not wait for the underlying semaphore to 279 * If @force is set then this will not wait for the underlying semaphore to
264 * catch up to the passed semaphore. 280 * catch up to the passed semaphore threshold.
265 */ 281 */
266static inline void __nvgpu_semaphore_release(struct nvgpu_semaphore *s, 282static inline void __nvgpu_semaphore_release(struct nvgpu_semaphore *s,
267 bool force) 283 bool force)
268{ 284{
269 struct nvgpu_semaphore_int *hw_sema = s->hw_sema; 285 struct nvgpu_semaphore_int *hw_sema = s->hw_sema;
270 u32 current_val; 286 u32 current_val;
271 u32 val = nvgpu_semaphore_get_value(s); 287 u32 threshold = nvgpu_semaphore_get_value(s);
272 int attempts = 0; 288 int attempts = 0;
273 289
274 /* 290 /*
@@ -277,7 +293,8 @@ static inline void __nvgpu_semaphore_release(struct nvgpu_semaphore *s,
277 * 293 *
278 * TODO: tune the wait a little better. 294 * TODO: tune the wait a little better.
279 */ 295 */
280 while ((current_val = nvgpu_semaphore_read(s)) < (val - 1)) { 296 while (!__nvgpu_semaphore_value_released(threshold - 1,
297 current_val = nvgpu_semaphore_read(s))) {
281 if (force) 298 if (force)
282 break; 299 break;
283 nvgpu_msleep(100); 300 nvgpu_msleep(100);
@@ -292,13 +309,14 @@ static inline void __nvgpu_semaphore_release(struct nvgpu_semaphore *s,
292 * If the semaphore has already passed the value we would write then 309 * If the semaphore has already passed the value we would write then
293 * this is really just a NO-OP. 310 * this is really just a NO-OP.
294 */ 311 */
295 if (current_val >= val) 312 if (__nvgpu_semaphore_value_released(threshold, current_val))
296 return; 313 return;
297 314
298 nvgpu_mem_wr(hw_sema->ch->g, &hw_sema->p->rw_mem, hw_sema->offset, val); 315 nvgpu_mem_wr(hw_sema->ch->g, &hw_sema->p->rw_mem, hw_sema->offset,
316 threshold);
299 317
300 gpu_sema_verbose_dbg(hw_sema->p->sema_sea->gk20a, 318 gpu_sema_verbose_dbg(hw_sema->p->sema_sea->gk20a,
301 "(c=%d) WRITE %u", hw_sema->ch->chid, val); 319 "(c=%d) WRITE %u", hw_sema->ch->chid, threshold);
302} 320}
303 321
304static inline void nvgpu_semaphore_release(struct nvgpu_semaphore *s) 322static inline void nvgpu_semaphore_release(struct nvgpu_semaphore *s)