From ee9733e587d977610975435a84e5af7cabba8870 Mon Sep 17 00:00:00 2001 From: Konsta Holtta Date: Tue, 2 May 2017 16:01:51 +0300 Subject: gpu: nvgpu: expose deterministic submit support Add these bits in the gpu characteristics flags: NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_SUBMIT_NO_JOBTRACKING - fast submits with no in-kernel job tracking are supported. NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_SUBMIT_FULL - deterministic submits also with job tracking and num_inflight_jobs set are supported. Either of these may get disabled if the particular channel or submit still requires features that block these. Make gk20a_channel_sync_needs_sync_framework() take a gk20a pointer instead of a channel pointer so that it can be called without a channel. It does not need any per-channel data. Bug 200291300 Change-Id: I5f82510b6d39b53bcf6f1006dd83bdd9053963a0 Signed-off-by: Konsta Holtta Reviewed-on: http://git-master/r/1456845 Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/gk20a/channel_gk20a.c | 2 +- drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c | 6 ++---- drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h | 2 +- drivers/gpu/nvgpu/gk20a/gk20a.c | 19 +++++++++++++++++++ include/uapi/linux/nvgpu.h | 22 ++++++++++++++++++++-- 5 files changed, 43 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c index 9902cb5e..db44d11b 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c @@ -2468,7 +2468,7 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c, return -EINVAL; need_sync_framework = force_need_sync_fence || - gk20a_channel_sync_needs_sync_framework(c) || + gk20a_channel_sync_needs_sync_framework(g) || (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE && (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT || flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET)); diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c index ddd94a2d..ed83663d 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c @@ -972,9 +972,7 @@ struct gk20a_channel_sync *gk20a_channel_sync_create(struct channel_gk20a *c) return gk20a_channel_semaphore_create(c); } -bool gk20a_channel_sync_needs_sync_framework(struct channel_gk20a *c) +bool gk20a_channel_sync_needs_sync_framework(struct gk20a *g) { - if (gk20a_platform_has_syncpoints(c->g)) - return false; - return true; + return !gk20a_platform_has_syncpoints(g); } diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h index 8b1f85a1..4efd1b76 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h @@ -99,7 +99,7 @@ struct gk20a_channel_sync { void gk20a_channel_sync_destroy(struct gk20a_channel_sync *sync); struct gk20a_channel_sync *gk20a_channel_sync_create(struct channel_gk20a *c); -bool gk20a_channel_sync_needs_sync_framework(struct channel_gk20a *c); +bool gk20a_channel_sync_needs_sync_framework(struct gk20a *g); #ifdef CONFIG_SYNC void gk20a_channel_cancel_pending_sema_waits(struct gk20a *g); diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c index 8ea2929f..7f110821 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gk20a.c @@ -461,6 +461,25 @@ int gk20a_init_gpu_characteristics(struct gk20a *g) if (gk20a_platform_has_syncpoints(g)) gpu->flags |= NVGPU_GPU_FLAGS_HAS_SYNCPOINTS; + /* + * Railgating needs job tracking which prevents fast submits. They're + * supported otherwise, provided that the user doesn't request anything + * that depends on job tracking. (Here, fast means strictly no + * metadata, just the gpfifo contents are copied and gp_put updated). + */ + if (!platform->can_railgate) + gpu->flags |= NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_SUBMIT_NO_JOBTRACKING; + + /* + * Railgating and sync framework require deferred job cleanup which + * prevents deterministic submits. They're supported otherwise, + * provided that the user doesn't request anything that depends on + * deferred cleanup. + */ + if (!platform->can_railgate + && !gk20a_channel_sync_needs_sync_framework(g)) + gpu->flags |= NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_SUBMIT_FULL; + gpu->flags |= NVGPU_GPU_FLAGS_SUPPORT_USERSPACE_MANAGED_AS; gpu->flags |= NVGPU_GPU_FLAGS_SUPPORT_TSG; gpu->flags |= NVGPU_GPU_FLAGS_SUPPORT_MAP_COMPBITS; diff --git a/include/uapi/linux/nvgpu.h b/include/uapi/linux/nvgpu.h index e25667cb..401722b1 100644 --- a/include/uapi/linux/nvgpu.h +++ b/include/uapi/linux/nvgpu.h @@ -137,6 +137,10 @@ struct nvgpu_gpu_zbc_query_table_args { #define NVGPU_GPU_FLAGS_SUPPORT_FECS_CTXSW_TRACE (1ULL << 16) /* NVGPU_AS_IOCTL_MAP_BUFFER_COMPBITS is available */ #define NVGPU_GPU_FLAGS_SUPPORT_MAP_COMPBITS (1ULL << 17) +/* Fast deterministic submits with no job tracking are supported */ +#define NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_SUBMIT_NO_JOBTRACKING (1ULL << 18) +/* Deterministic submits are supported even with job tracking */ +#define NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_SUBMIT_FULL (1ULL << 19) struct nvgpu_gpu_characteristics { __u32 arch; @@ -1348,8 +1352,22 @@ struct nvgpu_alloc_gpfifo_args { struct nvgpu_alloc_gpfifo_ex_args { __u32 num_entries; __u32 num_inflight_jobs; -#define NVGPU_ALLOC_GPFIFO_EX_FLAGS_VPR_ENABLED (1 << 0) /* set owner channel of this gpfifo as a vpr channel */ -#define NVGPU_ALLOC_GPFIFO_EX_FLAGS_DETERMINISTIC (1 << 1) /* channel shall exhibit deterministic behavior in the submit path */ +/* Set owner channel of this gpfifo as a vpr channel. */ +#define NVGPU_ALLOC_GPFIFO_EX_FLAGS_VPR_ENABLED (1 << 0) +/* + * Channel shall exhibit deterministic behavior in the submit path. + * + * With this flag, any submits with in-kernel job tracking also require that + * num_inflight_jobs is nonzero, and additionally that + * NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_SUBMIT_FULL is found in gpu + * characteristics.flags. + * + * Note that fast submits (with no in-kernel job tracking) are also + * deterministic and are supported if the characteristics flags contain + * NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_SUBMIT_NO_JOBTRACKING; this flag or + * num_inflight_jobs are not necessary in that case. + */ +#define NVGPU_ALLOC_GPFIFO_EX_FLAGS_DETERMINISTIC (1 << 1) __u32 flags; __u32 reserved[5]; }; -- cgit v1.2.2