summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/nvgpu/include
diff options
context:
space:
mode:
authorKonsta Holtta <kholtta@nvidia.com>2018-02-01 10:10:55 -0500
committermobile promotions <svcmobile_promotions@nvidia.com>2018-03-08 07:27:10 -0500
commit4320877eb49ef16a5620536427f77de44bee12b7 (patch)
treebef4e7b948ea132fe3cd695217b9e7693f04275c /drivers/gpu/nvgpu/include
parentb94770dc4d2d96b80b14b9942595d3e7fc2bbf6c (diff)
gpu: nvgpu: handle semaphore wraparound
Compare gpu semaphores in the kernel in the same way as the hardware does: released if value is over threshold, but at most half of u32's range. This makes it possible to skip zeroing the sema values when semas are allocated, so that they'd be just monotonically increasing numbers like syncpoints are. Jira NVGPU-514 Change-Id: I3bae352fbacfe9690666765b9ecdeae6f0813ea1 Signed-off-by: Konsta Holtta <kholtta@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1652086 Reviewed-by: Automatic_Commit_Validation_User Reviewed-by: Alex Waterman <alexw@nvidia.com> Reviewed-by: Vijayakumar Subbu <vsubbu@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'drivers/gpu/nvgpu/include')
-rw-r--r--drivers/gpu/nvgpu/include/nvgpu/semaphore.h54
1 files changed, 36 insertions, 18 deletions
diff --git a/drivers/gpu/nvgpu/include/nvgpu/semaphore.h b/drivers/gpu/nvgpu/include/nvgpu/semaphore.h
index a4af1ca3..9c74d300 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/semaphore.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/semaphore.h
@@ -230,23 +230,26 @@ static inline u32 nvgpu_semaphore_read(struct nvgpu_semaphore *s)
230} 230}
231 231
232/* 232/*
233 * TODO: handle wrap around... Hmm, how to do this? 233 * Check if "racer" is over "goal" with wraparound handling.
234 */ 234 */
235static inline bool nvgpu_semaphore_is_released(struct nvgpu_semaphore *s) 235static inline bool __nvgpu_semaphore_value_released(u32 goal, u32 racer)
236{ 236{
237 u32 sema_val = nvgpu_semaphore_read(s);
238
239 /* 237 /*
240 * If the underlying semaphore value is greater than or equal to 238 * Handle wraparound with the same heuristic as the hardware does:
241 * the value of the semaphore then the semaphore has been signaled 239 * although the integer will eventually wrap around, consider a sema
242 * (a.k.a. released). 240 * released against a threshold if its value has passed that threshold
241 * but has not wrapped over half of the u32 range over that threshold;
242 * such wrapping is unlikely to happen during a sema lifetime.
243 *
244 * Values for [goal, goal + 0x7fffffff] are considered signaled; that's
245 * precisely half of the 32-bit space. If racer == goal + 0x80000000,
246 * then it needs 0x80000000 increments to wrap again and signal.
247 *
248 * Unsigned arithmetic is used because it's well-defined. This is
249 * effectively the same as: signed_racer - signed_goal > 0.
243 */ 250 */
244 return (int)sema_val >= nvgpu_atomic_read(&s->value);
245}
246 251
247static inline bool nvgpu_semaphore_is_acquired(struct nvgpu_semaphore *s) 252 return racer - goal < 0x80000000;
248{
249 return !nvgpu_semaphore_is_released(s);
250} 253}
251 254
252static inline u32 nvgpu_semaphore_get_value(struct nvgpu_semaphore *s) 255static inline u32 nvgpu_semaphore_get_value(struct nvgpu_semaphore *s)
@@ -254,6 +257,19 @@ static inline u32 nvgpu_semaphore_get_value(struct nvgpu_semaphore *s)
254 return (u32)nvgpu_atomic_read(&s->value); 257 return (u32)nvgpu_atomic_read(&s->value);
255} 258}
256 259
260static inline bool nvgpu_semaphore_is_released(struct nvgpu_semaphore *s)
261{
262 u32 sema_val = nvgpu_semaphore_read(s);
263 u32 wait_payload = nvgpu_semaphore_get_value(s);
264
265 return __nvgpu_semaphore_value_released(wait_payload, sema_val);
266}
267
268static inline bool nvgpu_semaphore_is_acquired(struct nvgpu_semaphore *s)
269{
270 return !nvgpu_semaphore_is_released(s);
271}
272
257static inline u32 nvgpu_semaphore_next_value(struct nvgpu_semaphore *s) 273static inline u32 nvgpu_semaphore_next_value(struct nvgpu_semaphore *s)
258{ 274{
259 return (u32)nvgpu_atomic_read(&s->hw_sema->next_value); 275 return (u32)nvgpu_atomic_read(&s->hw_sema->next_value);
@@ -261,14 +277,14 @@ static inline u32 nvgpu_semaphore_next_value(struct nvgpu_semaphore *s)
261 277
262/* 278/*
263 * If @force is set then this will not wait for the underlying semaphore to 279 * If @force is set then this will not wait for the underlying semaphore to
264 * catch up to the passed semaphore. 280 * catch up to the passed semaphore threshold.
265 */ 281 */
266static inline void __nvgpu_semaphore_release(struct nvgpu_semaphore *s, 282static inline void __nvgpu_semaphore_release(struct nvgpu_semaphore *s,
267 bool force) 283 bool force)
268{ 284{
269 struct nvgpu_semaphore_int *hw_sema = s->hw_sema; 285 struct nvgpu_semaphore_int *hw_sema = s->hw_sema;
270 u32 current_val; 286 u32 current_val;
271 u32 val = nvgpu_semaphore_get_value(s); 287 u32 threshold = nvgpu_semaphore_get_value(s);
272 int attempts = 0; 288 int attempts = 0;
273 289
274 /* 290 /*
@@ -277,7 +293,8 @@ static inline void __nvgpu_semaphore_release(struct nvgpu_semaphore *s,
277 * 293 *
278 * TODO: tune the wait a little better. 294 * TODO: tune the wait a little better.
279 */ 295 */
280 while ((current_val = nvgpu_semaphore_read(s)) < (val - 1)) { 296 while (!__nvgpu_semaphore_value_released(threshold - 1,
297 current_val = nvgpu_semaphore_read(s))) {
281 if (force) 298 if (force)
282 break; 299 break;
283 nvgpu_msleep(100); 300 nvgpu_msleep(100);
@@ -292,13 +309,14 @@ static inline void __nvgpu_semaphore_release(struct nvgpu_semaphore *s,
292 * If the semaphore has already passed the value we would write then 309 * If the semaphore has already passed the value we would write then
293 * this is really just a NO-OP. 310 * this is really just a NO-OP.
294 */ 311 */
295 if (current_val >= val) 312 if (__nvgpu_semaphore_value_released(threshold, current_val))
296 return; 313 return;
297 314
298 nvgpu_mem_wr(hw_sema->ch->g, &hw_sema->p->rw_mem, hw_sema->offset, val); 315 nvgpu_mem_wr(hw_sema->ch->g, &hw_sema->p->rw_mem, hw_sema->offset,
316 threshold);
299 317
300 gpu_sema_verbose_dbg(hw_sema->p->sema_sea->gk20a, 318 gpu_sema_verbose_dbg(hw_sema->p->sema_sea->gk20a,
301 "(c=%d) WRITE %u", hw_sema->ch->chid, val); 319 "(c=%d) WRITE %u", hw_sema->ch->chid, threshold);
302} 320}
303 321
304static inline void nvgpu_semaphore_release(struct nvgpu_semaphore *s) 322static inline void nvgpu_semaphore_release(struct nvgpu_semaphore *s)