summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_gk20a.c2
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c6
-rw-r--r--drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h2
-rw-r--r--drivers/gpu/nvgpu/gk20a/gk20a.c19
-rw-r--r--include/uapi/linux/nvgpu.h22
5 files changed, 43 insertions, 8 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
index 9902cb5e..db44d11b 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -2468,7 +2468,7 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
2468 return -EINVAL; 2468 return -EINVAL;
2469 2469
2470 need_sync_framework = force_need_sync_fence || 2470 need_sync_framework = force_need_sync_fence ||
2471 gk20a_channel_sync_needs_sync_framework(c) || 2471 gk20a_channel_sync_needs_sync_framework(g) ||
2472 (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE && 2472 (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE &&
2473 (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT || 2473 (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT ||
2474 flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET)); 2474 flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET));
diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
index ddd94a2d..ed83663d 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
@@ -972,9 +972,7 @@ struct gk20a_channel_sync *gk20a_channel_sync_create(struct channel_gk20a *c)
972 return gk20a_channel_semaphore_create(c); 972 return gk20a_channel_semaphore_create(c);
973} 973}
974 974
975bool gk20a_channel_sync_needs_sync_framework(struct channel_gk20a *c) 975bool gk20a_channel_sync_needs_sync_framework(struct gk20a *g)
976{ 976{
977 if (gk20a_platform_has_syncpoints(c->g)) 977 return !gk20a_platform_has_syncpoints(g);
978 return false;
979 return true;
980} 978}
diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h
index 8b1f85a1..4efd1b76 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h
@@ -99,7 +99,7 @@ struct gk20a_channel_sync {
99 99
100void gk20a_channel_sync_destroy(struct gk20a_channel_sync *sync); 100void gk20a_channel_sync_destroy(struct gk20a_channel_sync *sync);
101struct gk20a_channel_sync *gk20a_channel_sync_create(struct channel_gk20a *c); 101struct gk20a_channel_sync *gk20a_channel_sync_create(struct channel_gk20a *c);
102bool gk20a_channel_sync_needs_sync_framework(struct channel_gk20a *c); 102bool gk20a_channel_sync_needs_sync_framework(struct gk20a *g);
103 103
104#ifdef CONFIG_SYNC 104#ifdef CONFIG_SYNC
105void gk20a_channel_cancel_pending_sema_waits(struct gk20a *g); 105void gk20a_channel_cancel_pending_sema_waits(struct gk20a *g);
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c
index 8ea2929f..7f110821 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.c
@@ -461,6 +461,25 @@ int gk20a_init_gpu_characteristics(struct gk20a *g)
461 if (gk20a_platform_has_syncpoints(g)) 461 if (gk20a_platform_has_syncpoints(g))
462 gpu->flags |= NVGPU_GPU_FLAGS_HAS_SYNCPOINTS; 462 gpu->flags |= NVGPU_GPU_FLAGS_HAS_SYNCPOINTS;
463 463
464 /*
465 * Railgating needs job tracking which prevents fast submits. They're
466 * supported otherwise, provided that the user doesn't request anything
467 * that depends on job tracking. (Here, fast means strictly no
468 * metadata, just the gpfifo contents are copied and gp_put updated).
469 */
470 if (!platform->can_railgate)
471 gpu->flags |= NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_SUBMIT_NO_JOBTRACKING;
472
473 /*
474 * Railgating and sync framework require deferred job cleanup which
475 * prevents deterministic submits. They're supported otherwise,
476 * provided that the user doesn't request anything that depends on
477 * deferred cleanup.
478 */
479 if (!platform->can_railgate
480 && !gk20a_channel_sync_needs_sync_framework(g))
481 gpu->flags |= NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_SUBMIT_FULL;
482
464 gpu->flags |= NVGPU_GPU_FLAGS_SUPPORT_USERSPACE_MANAGED_AS; 483 gpu->flags |= NVGPU_GPU_FLAGS_SUPPORT_USERSPACE_MANAGED_AS;
465 gpu->flags |= NVGPU_GPU_FLAGS_SUPPORT_TSG; 484 gpu->flags |= NVGPU_GPU_FLAGS_SUPPORT_TSG;
466 gpu->flags |= NVGPU_GPU_FLAGS_SUPPORT_MAP_COMPBITS; 485 gpu->flags |= NVGPU_GPU_FLAGS_SUPPORT_MAP_COMPBITS;
diff --git a/include/uapi/linux/nvgpu.h b/include/uapi/linux/nvgpu.h
index e25667cb..401722b1 100644
--- a/include/uapi/linux/nvgpu.h
+++ b/include/uapi/linux/nvgpu.h
@@ -137,6 +137,10 @@ struct nvgpu_gpu_zbc_query_table_args {
137#define NVGPU_GPU_FLAGS_SUPPORT_FECS_CTXSW_TRACE (1ULL << 16) 137#define NVGPU_GPU_FLAGS_SUPPORT_FECS_CTXSW_TRACE (1ULL << 16)
138/* NVGPU_AS_IOCTL_MAP_BUFFER_COMPBITS is available */ 138/* NVGPU_AS_IOCTL_MAP_BUFFER_COMPBITS is available */
139#define NVGPU_GPU_FLAGS_SUPPORT_MAP_COMPBITS (1ULL << 17) 139#define NVGPU_GPU_FLAGS_SUPPORT_MAP_COMPBITS (1ULL << 17)
140/* Fast deterministic submits with no job tracking are supported */
141#define NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_SUBMIT_NO_JOBTRACKING (1ULL << 18)
142/* Deterministic submits are supported even with job tracking */
143#define NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_SUBMIT_FULL (1ULL << 19)
140 144
141struct nvgpu_gpu_characteristics { 145struct nvgpu_gpu_characteristics {
142 __u32 arch; 146 __u32 arch;
@@ -1348,8 +1352,22 @@ struct nvgpu_alloc_gpfifo_args {
1348struct nvgpu_alloc_gpfifo_ex_args { 1352struct nvgpu_alloc_gpfifo_ex_args {
1349 __u32 num_entries; 1353 __u32 num_entries;
1350 __u32 num_inflight_jobs; 1354 __u32 num_inflight_jobs;
1351#define NVGPU_ALLOC_GPFIFO_EX_FLAGS_VPR_ENABLED (1 << 0) /* set owner channel of this gpfifo as a vpr channel */ 1355/* Set owner channel of this gpfifo as a vpr channel. */
1352#define NVGPU_ALLOC_GPFIFO_EX_FLAGS_DETERMINISTIC (1 << 1) /* channel shall exhibit deterministic behavior in the submit path */ 1356#define NVGPU_ALLOC_GPFIFO_EX_FLAGS_VPR_ENABLED (1 << 0)
1357/*
1358 * Channel shall exhibit deterministic behavior in the submit path.
1359 *
1360 * With this flag, any submits with in-kernel job tracking also require that
1361 * num_inflight_jobs is nonzero, and additionally that
1362 * NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_SUBMIT_FULL is found in gpu
1363 * characteristics.flags.
1364 *
1365 * Note that fast submits (with no in-kernel job tracking) are also
1366 * deterministic and are supported if the characteristics flags contain
1367 * NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_SUBMIT_NO_JOBTRACKING; this flag or
1368 * num_inflight_jobs are not necessary in that case.
1369 */
1370#define NVGPU_ALLOC_GPFIFO_EX_FLAGS_DETERMINISTIC (1 << 1)
1353 __u32 flags; 1371 __u32 flags;
1354 __u32 reserved[5]; 1372 __u32 reserved[5];
1355}; 1373};