summaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
authorDavid Li <davli@nvidia.com>2018-04-26 05:00:01 -0400
committermobile promotions <svcmobile_promotions@nvidia.com>2018-05-18 02:34:20 -0400
commita807cf20419af737a79a3d0c7fcc1068ac6b724a (patch)
tree4efc94d09217bd5e7fdad973b8dacfdee9bab8dd /include
parent8ac538e1b16c68ef4a5b9d85a82bbfc2b3fabd72 (diff)
gpu: nvgpu: add NVGPU_IOCTL_CHANNEL_RESCHEDULE_RUNLIST
Add NVGPU_IOCTL_CHANNEL_RESCHEDULE_RUNLIST ioctl to reschedule runlist, and optionally check host and FECS status to preempt pending load of context not belonging to the calling channel on GR engine during context switch. This should be called immediately after a submit to decrease worst case submit to start latency for high interleave channel. There is less than 0.002% chance that the ioctl blocks up to couple miliseconds due to race condition of FECS status changing while being read. For GV11B it will always preempt pending load of unwanted context since there is no chance that ioctl blocks due to race condition. Also fix bug with host reschedule for multiple runlists which needs to write both runlist registers. Bug 1987640 Bug 1924808 Change-Id: I0b7e2f91bd18b0b20928e5a3311b9426b1bf1848 Signed-off-by: David Li <davli@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1549050 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Diffstat (limited to 'include')
-rw-r--r--include/trace/events/gk20a.h47
-rw-r--r--include/uapi/linux/nvgpu.h13
2 files changed, 55 insertions, 5 deletions
diff --git a/include/trace/events/gk20a.h b/include/trace/events/gk20a.h
index 99726e4c..ef51451a 100644
--- a/include/trace/events/gk20a.h
+++ b/include/trace/events/gk20a.h
@@ -1,7 +1,7 @@
1/* 1/*
2 * gk20a event logging to ftrace. 2 * gk20a event logging to ftrace.
3 * 3 *
4 * Copyright (c) 2014-2016, NVIDIA CORPORATION. All rights reserved. 4 * Copyright (c) 2014-2018, NVIDIA CORPORATION. All rights reserved.
5 * 5 *
6 * This program is free software; you can redistribute it and/or modify it 6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License, 7 * under the terms and conditions of the GNU General Public License,
@@ -346,6 +346,51 @@ TRACE_EVENT(gk20a_channel_submitted_gpfifo,
346 __entry->flags, __entry->incr_id, __entry->incr_value) 346 __entry->flags, __entry->incr_id, __entry->incr_value)
347); 347);
348 348
349TRACE_EVENT(gk20a_reschedule_preempt_next,
350 TP_PROTO(u32 chid, u32 fecs0, u32 engstat, u32 fecs1, u32 fecs2,
351 u32 preempt),
352
353 TP_ARGS(chid, fecs0, engstat, fecs1, fecs2, preempt),
354
355 TP_STRUCT__entry(
356 __field(u32, chid)
357 __field(u32, fecs0)
358 __field(u32, engstat)
359 __field(u32, fecs1)
360 __field(u32, fecs2)
361 __field(u32, preempt)
362 ),
363
364 TP_fast_assign(
365 __entry->chid = chid;
366 __entry->fecs0 = fecs0;
367 __entry->engstat = engstat;
368 __entry->fecs1 = fecs1;
369 __entry->fecs2 = fecs2;
370 __entry->preempt = preempt;
371 ),
372
373 TP_printk("chid=%d, fecs0=%#x, engstat=%#x, fecs1=%#x, fecs2=%#x,"
374 " preempt=%#x", __entry->chid, __entry->fecs0, __entry->engstat,
375 __entry->fecs1, __entry->fecs2, __entry->preempt)
376);
377
378TRACE_EVENT(gk20a_reschedule_preempted_next,
379 TP_PROTO(u32 chid),
380
381 TP_ARGS(chid),
382
383 TP_STRUCT__entry(
384 __field(u32, chid)
385 ),
386
387 TP_fast_assign(
388 __entry->chid = chid;
389 ),
390
391 TP_printk("chid=%d", __entry->chid)
392);
393
349TRACE_EVENT(gk20a_channel_reset, 394TRACE_EVENT(gk20a_channel_reset,
350 TP_PROTO(u32 chid, u32 tsgid), 395 TP_PROTO(u32 chid, u32 tsgid),
351 396
diff --git a/include/uapi/linux/nvgpu.h b/include/uapi/linux/nvgpu.h
index d97f8fb6..b14610bd 100644
--- a/include/uapi/linux/nvgpu.h
+++ b/include/uapi/linux/nvgpu.h
@@ -141,7 +141,7 @@ struct nvgpu_gpu_zbc_query_table_args {
141#define NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_SUBMIT_FULL (1ULL << 19) 141#define NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_SUBMIT_FULL (1ULL << 19)
142/* IO coherence support is available */ 142/* IO coherence support is available */
143#define NVGPU_GPU_FLAGS_SUPPORT_IO_COHERENCE (1ULL << 20) 143#define NVGPU_GPU_FLAGS_SUPPORT_IO_COHERENCE (1ULL << 20)
144/* NVGPU_SUBMIT_GPFIFO_FLAGS_RESCHEDULE_RUNLIST is available */ 144/* NVGPU_IOCTL_CHANNEL_RESCHEDULE_RUNLIST is available */
145#define NVGPU_GPU_FLAGS_SUPPORT_RESCHEDULE_RUNLIST (1ULL << 21) 145#define NVGPU_GPU_FLAGS_SUPPORT_RESCHEDULE_RUNLIST (1ULL << 21)
146/* subcontexts are available */ 146/* subcontexts are available */
147#define NVGPU_GPU_FLAGS_SUPPORT_TSG_SUBCONTEXTS (1ULL << 22) 147#define NVGPU_GPU_FLAGS_SUPPORT_TSG_SUBCONTEXTS (1ULL << 22)
@@ -1477,8 +1477,6 @@ struct nvgpu_fence {
1477#define NVGPU_SUBMIT_GPFIFO_FLAGS_SUPPRESS_WFI (1 << 4) 1477#define NVGPU_SUBMIT_GPFIFO_FLAGS_SUPPRESS_WFI (1 << 4)
1478/* skip buffer refcounting during submit */ 1478/* skip buffer refcounting during submit */
1479#define NVGPU_SUBMIT_GPFIFO_FLAGS_SKIP_BUFFER_REFCOUNTING (1 << 5) 1479#define NVGPU_SUBMIT_GPFIFO_FLAGS_SKIP_BUFFER_REFCOUNTING (1 << 5)
1480/* expire current timeslice and reschedule runlist from front */
1481#define NVGPU_SUBMIT_GPFIFO_FLAGS_RESCHEDULE_RUNLIST (1 << 6)
1482 1480
1483struct nvgpu_submit_gpfifo_args { 1481struct nvgpu_submit_gpfifo_args {
1484 __u64 gpfifo; 1482 __u64 gpfifo;
@@ -1659,6 +1657,11 @@ struct nvgpu_get_user_syncpoint_args {
1659 __u32 syncpoint_max; /* out */ 1657 __u32 syncpoint_max; /* out */
1660}; 1658};
1661 1659
1660struct nvgpu_reschedule_runlist_args {
1661#define NVGPU_RESCHEDULE_RUNLIST_PREEMPT_NEXT (1 << 0)
1662 __u32 flags;
1663};
1664
1662#define NVGPU_IOCTL_CHANNEL_SET_NVMAP_FD \ 1665#define NVGPU_IOCTL_CHANNEL_SET_NVMAP_FD \
1663 _IOW(NVGPU_IOCTL_MAGIC, 5, struct nvgpu_set_nvmap_fd_args) 1666 _IOW(NVGPU_IOCTL_MAGIC, 5, struct nvgpu_set_nvmap_fd_args)
1664#define NVGPU_IOCTL_CHANNEL_SET_TIMEOUT \ 1667#define NVGPU_IOCTL_CHANNEL_SET_TIMEOUT \
@@ -1711,9 +1714,11 @@ struct nvgpu_get_user_syncpoint_args {
1711 _IOW(NVGPU_IOCTL_MAGIC, 125, struct nvgpu_timeslice_args) 1714 _IOW(NVGPU_IOCTL_MAGIC, 125, struct nvgpu_timeslice_args)
1712#define NVGPU_IOCTL_CHANNEL_GET_USER_SYNCPOINT \ 1715#define NVGPU_IOCTL_CHANNEL_GET_USER_SYNCPOINT \
1713 _IOR(NVGPU_IOCTL_MAGIC, 126, struct nvgpu_get_user_syncpoint_args) 1716 _IOR(NVGPU_IOCTL_MAGIC, 126, struct nvgpu_get_user_syncpoint_args)
1717#define NVGPU_IOCTL_CHANNEL_RESCHEDULE_RUNLIST \
1718 _IOW(NVGPU_IOCTL_MAGIC, 127, struct nvgpu_reschedule_runlist_args)
1714 1719
1715#define NVGPU_IOCTL_CHANNEL_LAST \ 1720#define NVGPU_IOCTL_CHANNEL_LAST \
1716 _IOC_NR(NVGPU_IOCTL_CHANNEL_GET_USER_SYNCPOINT) 1721 _IOC_NR(NVGPU_IOCTL_CHANNEL_RESCHEDULE_RUNLIST)
1717#define NVGPU_IOCTL_CHANNEL_MAX_ARG_SIZE sizeof(struct nvgpu_alloc_gpfifo_ex_args) 1722#define NVGPU_IOCTL_CHANNEL_MAX_ARG_SIZE sizeof(struct nvgpu_alloc_gpfifo_ex_args)
1718 1723
1719/* 1724/*