diff options
author | David Li <davli@nvidia.com> | 2018-04-26 05:00:01 -0400 |
---|---|---|
committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2018-05-18 02:34:20 -0400 |
commit | a807cf20419af737a79a3d0c7fcc1068ac6b724a (patch) | |
tree | 4efc94d09217bd5e7fdad973b8dacfdee9bab8dd /include/trace/events/gk20a.h | |
parent | 8ac538e1b16c68ef4a5b9d85a82bbfc2b3fabd72 (diff) |
gpu: nvgpu: add NVGPU_IOCTL_CHANNEL_RESCHEDULE_RUNLIST
Add NVGPU_IOCTL_CHANNEL_RESCHEDULE_RUNLIST ioctl to reschedule runlist,
and optionally check host and FECS status to preempt pending load of
context not belonging to the calling channel on GR engine during context
switch.
This should be called immediately after a submit to decrease worst case
submit to start latency for high interleave channel.
There is less than 0.002% chance that the ioctl blocks up to couple
miliseconds due to race condition of FECS status changing while being read.
For GV11B it will always preempt pending load of unwanted context since
there is no chance that ioctl blocks due to race condition.
Also fix bug with host reschedule for multiple runlists which needs to
write both runlist registers.
Bug 1987640
Bug 1924808
Change-Id: I0b7e2f91bd18b0b20928e5a3311b9426b1bf1848
Signed-off-by: David Li <davli@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1549050
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'include/trace/events/gk20a.h')
-rw-r--r-- | include/trace/events/gk20a.h | 47 |
1 files changed, 46 insertions, 1 deletions
diff --git a/include/trace/events/gk20a.h b/include/trace/events/gk20a.h index 99726e4c..ef51451a 100644 --- a/include/trace/events/gk20a.h +++ b/include/trace/events/gk20a.h | |||
@@ -1,7 +1,7 @@ | |||
1 | /* | 1 | /* |
2 | * gk20a event logging to ftrace. | 2 | * gk20a event logging to ftrace. |
3 | * | 3 | * |
4 | * Copyright (c) 2014-2016, NVIDIA CORPORATION. All rights reserved. | 4 | * Copyright (c) 2014-2018, NVIDIA CORPORATION. All rights reserved. |
5 | * | 5 | * |
6 | * This program is free software; you can redistribute it and/or modify it | 6 | * This program is free software; you can redistribute it and/or modify it |
7 | * under the terms and conditions of the GNU General Public License, | 7 | * under the terms and conditions of the GNU General Public License, |
@@ -346,6 +346,51 @@ TRACE_EVENT(gk20a_channel_submitted_gpfifo, | |||
346 | __entry->flags, __entry->incr_id, __entry->incr_value) | 346 | __entry->flags, __entry->incr_id, __entry->incr_value) |
347 | ); | 347 | ); |
348 | 348 | ||
349 | TRACE_EVENT(gk20a_reschedule_preempt_next, | ||
350 | TP_PROTO(u32 chid, u32 fecs0, u32 engstat, u32 fecs1, u32 fecs2, | ||
351 | u32 preempt), | ||
352 | |||
353 | TP_ARGS(chid, fecs0, engstat, fecs1, fecs2, preempt), | ||
354 | |||
355 | TP_STRUCT__entry( | ||
356 | __field(u32, chid) | ||
357 | __field(u32, fecs0) | ||
358 | __field(u32, engstat) | ||
359 | __field(u32, fecs1) | ||
360 | __field(u32, fecs2) | ||
361 | __field(u32, preempt) | ||
362 | ), | ||
363 | |||
364 | TP_fast_assign( | ||
365 | __entry->chid = chid; | ||
366 | __entry->fecs0 = fecs0; | ||
367 | __entry->engstat = engstat; | ||
368 | __entry->fecs1 = fecs1; | ||
369 | __entry->fecs2 = fecs2; | ||
370 | __entry->preempt = preempt; | ||
371 | ), | ||
372 | |||
373 | TP_printk("chid=%d, fecs0=%#x, engstat=%#x, fecs1=%#x, fecs2=%#x," | ||
374 | " preempt=%#x", __entry->chid, __entry->fecs0, __entry->engstat, | ||
375 | __entry->fecs1, __entry->fecs2, __entry->preempt) | ||
376 | ); | ||
377 | |||
378 | TRACE_EVENT(gk20a_reschedule_preempted_next, | ||
379 | TP_PROTO(u32 chid), | ||
380 | |||
381 | TP_ARGS(chid), | ||
382 | |||
383 | TP_STRUCT__entry( | ||
384 | __field(u32, chid) | ||
385 | ), | ||
386 | |||
387 | TP_fast_assign( | ||
388 | __entry->chid = chid; | ||
389 | ), | ||
390 | |||
391 | TP_printk("chid=%d", __entry->chid) | ||
392 | ); | ||
393 | |||
349 | TRACE_EVENT(gk20a_channel_reset, | 394 | TRACE_EVENT(gk20a_channel_reset, |
350 | TP_PROTO(u32 chid, u32 tsgid), | 395 | TP_PROTO(u32 chid, u32 tsgid), |
351 | 396 | ||