diff options
author | Konsta Holtta <kholtta@nvidia.com> | 2017-05-02 09:01:51 -0400 |
---|---|---|
committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2017-05-05 10:54:18 -0400 |
commit | ee9733e587d977610975435a84e5af7cabba8870 (patch) | |
tree | 6515a93b3fd6267d1e9626469816747359b197e8 | |
parent | 744e2d202e2d38b0d8ff8b55e1e84daf75e39b48 (diff) |
gpu: nvgpu: expose deterministic submit support
Add these bits in the gpu characteristics flags:
NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_SUBMIT_NO_JOBTRACKING - fast
submits with no in-kernel job tracking are supported.
NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_SUBMIT_FULL - deterministic
submits also with job tracking and num_inflight_jobs set are supported.
Either of these may get disabled if the particular channel or submit
still requires features that block these.
Make gk20a_channel_sync_needs_sync_framework() take a gk20a pointer
instead of a channel pointer so that it can be called without a channel.
It does not need any per-channel data.
Bug 200291300
Change-Id: I5f82510b6d39b53bcf6f1006dd83bdd9053963a0
Signed-off-by: Konsta Holtta <kholtta@nvidia.com>
Reviewed-on: http://git-master/r/1456845
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/channel_gk20a.c | 2 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c | 6 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h | 2 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gk20a.c | 19 | ||||
-rw-r--r-- | include/uapi/linux/nvgpu.h | 22 |
5 files changed, 43 insertions, 8 deletions
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c index 9902cb5e..db44d11b 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c | |||
@@ -2468,7 +2468,7 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c, | |||
2468 | return -EINVAL; | 2468 | return -EINVAL; |
2469 | 2469 | ||
2470 | need_sync_framework = force_need_sync_fence || | 2470 | need_sync_framework = force_need_sync_fence || |
2471 | gk20a_channel_sync_needs_sync_framework(c) || | 2471 | gk20a_channel_sync_needs_sync_framework(g) || |
2472 | (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE && | 2472 | (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE && |
2473 | (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT || | 2473 | (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT || |
2474 | flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET)); | 2474 | flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET)); |
diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c index ddd94a2d..ed83663d 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c | |||
@@ -972,9 +972,7 @@ struct gk20a_channel_sync *gk20a_channel_sync_create(struct channel_gk20a *c) | |||
972 | return gk20a_channel_semaphore_create(c); | 972 | return gk20a_channel_semaphore_create(c); |
973 | } | 973 | } |
974 | 974 | ||
975 | bool gk20a_channel_sync_needs_sync_framework(struct channel_gk20a *c) | 975 | bool gk20a_channel_sync_needs_sync_framework(struct gk20a *g) |
976 | { | 976 | { |
977 | if (gk20a_platform_has_syncpoints(c->g)) | 977 | return !gk20a_platform_has_syncpoints(g); |
978 | return false; | ||
979 | return true; | ||
980 | } | 978 | } |
diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h index 8b1f85a1..4efd1b76 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h | |||
@@ -99,7 +99,7 @@ struct gk20a_channel_sync { | |||
99 | 99 | ||
100 | void gk20a_channel_sync_destroy(struct gk20a_channel_sync *sync); | 100 | void gk20a_channel_sync_destroy(struct gk20a_channel_sync *sync); |
101 | struct gk20a_channel_sync *gk20a_channel_sync_create(struct channel_gk20a *c); | 101 | struct gk20a_channel_sync *gk20a_channel_sync_create(struct channel_gk20a *c); |
102 | bool gk20a_channel_sync_needs_sync_framework(struct channel_gk20a *c); | 102 | bool gk20a_channel_sync_needs_sync_framework(struct gk20a *g); |
103 | 103 | ||
104 | #ifdef CONFIG_SYNC | 104 | #ifdef CONFIG_SYNC |
105 | void gk20a_channel_cancel_pending_sema_waits(struct gk20a *g); | 105 | void gk20a_channel_cancel_pending_sema_waits(struct gk20a *g); |
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c index 8ea2929f..7f110821 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gk20a.c | |||
@@ -461,6 +461,25 @@ int gk20a_init_gpu_characteristics(struct gk20a *g) | |||
461 | if (gk20a_platform_has_syncpoints(g)) | 461 | if (gk20a_platform_has_syncpoints(g)) |
462 | gpu->flags |= NVGPU_GPU_FLAGS_HAS_SYNCPOINTS; | 462 | gpu->flags |= NVGPU_GPU_FLAGS_HAS_SYNCPOINTS; |
463 | 463 | ||
464 | /* | ||
465 | * Railgating needs job tracking which prevents fast submits. They're | ||
466 | * supported otherwise, provided that the user doesn't request anything | ||
467 | * that depends on job tracking. (Here, fast means strictly no | ||
468 | * metadata, just the gpfifo contents are copied and gp_put updated). | ||
469 | */ | ||
470 | if (!platform->can_railgate) | ||
471 | gpu->flags |= NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_SUBMIT_NO_JOBTRACKING; | ||
472 | |||
473 | /* | ||
474 | * Railgating and sync framework require deferred job cleanup which | ||
475 | * prevents deterministic submits. They're supported otherwise, | ||
476 | * provided that the user doesn't request anything that depends on | ||
477 | * deferred cleanup. | ||
478 | */ | ||
479 | if (!platform->can_railgate | ||
480 | && !gk20a_channel_sync_needs_sync_framework(g)) | ||
481 | gpu->flags |= NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_SUBMIT_FULL; | ||
482 | |||
464 | gpu->flags |= NVGPU_GPU_FLAGS_SUPPORT_USERSPACE_MANAGED_AS; | 483 | gpu->flags |= NVGPU_GPU_FLAGS_SUPPORT_USERSPACE_MANAGED_AS; |
465 | gpu->flags |= NVGPU_GPU_FLAGS_SUPPORT_TSG; | 484 | gpu->flags |= NVGPU_GPU_FLAGS_SUPPORT_TSG; |
466 | gpu->flags |= NVGPU_GPU_FLAGS_SUPPORT_MAP_COMPBITS; | 485 | gpu->flags |= NVGPU_GPU_FLAGS_SUPPORT_MAP_COMPBITS; |
diff --git a/include/uapi/linux/nvgpu.h b/include/uapi/linux/nvgpu.h index e25667cb..401722b1 100644 --- a/include/uapi/linux/nvgpu.h +++ b/include/uapi/linux/nvgpu.h | |||
@@ -137,6 +137,10 @@ struct nvgpu_gpu_zbc_query_table_args { | |||
137 | #define NVGPU_GPU_FLAGS_SUPPORT_FECS_CTXSW_TRACE (1ULL << 16) | 137 | #define NVGPU_GPU_FLAGS_SUPPORT_FECS_CTXSW_TRACE (1ULL << 16) |
138 | /* NVGPU_AS_IOCTL_MAP_BUFFER_COMPBITS is available */ | 138 | /* NVGPU_AS_IOCTL_MAP_BUFFER_COMPBITS is available */ |
139 | #define NVGPU_GPU_FLAGS_SUPPORT_MAP_COMPBITS (1ULL << 17) | 139 | #define NVGPU_GPU_FLAGS_SUPPORT_MAP_COMPBITS (1ULL << 17) |
140 | /* Fast deterministic submits with no job tracking are supported */ | ||
141 | #define NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_SUBMIT_NO_JOBTRACKING (1ULL << 18) | ||
142 | /* Deterministic submits are supported even with job tracking */ | ||
143 | #define NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_SUBMIT_FULL (1ULL << 19) | ||
140 | 144 | ||
141 | struct nvgpu_gpu_characteristics { | 145 | struct nvgpu_gpu_characteristics { |
142 | __u32 arch; | 146 | __u32 arch; |
@@ -1348,8 +1352,22 @@ struct nvgpu_alloc_gpfifo_args { | |||
1348 | struct nvgpu_alloc_gpfifo_ex_args { | 1352 | struct nvgpu_alloc_gpfifo_ex_args { |
1349 | __u32 num_entries; | 1353 | __u32 num_entries; |
1350 | __u32 num_inflight_jobs; | 1354 | __u32 num_inflight_jobs; |
1351 | #define NVGPU_ALLOC_GPFIFO_EX_FLAGS_VPR_ENABLED (1 << 0) /* set owner channel of this gpfifo as a vpr channel */ | 1355 | /* Set owner channel of this gpfifo as a vpr channel. */ |
1352 | #define NVGPU_ALLOC_GPFIFO_EX_FLAGS_DETERMINISTIC (1 << 1) /* channel shall exhibit deterministic behavior in the submit path */ | 1356 | #define NVGPU_ALLOC_GPFIFO_EX_FLAGS_VPR_ENABLED (1 << 0) |
1357 | /* | ||
1358 | * Channel shall exhibit deterministic behavior in the submit path. | ||
1359 | * | ||
1360 | * With this flag, any submits with in-kernel job tracking also require that | ||
1361 | * num_inflight_jobs is nonzero, and additionally that | ||
1362 | * NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_SUBMIT_FULL is found in gpu | ||
1363 | * characteristics.flags. | ||
1364 | * | ||
1365 | * Note that fast submits (with no in-kernel job tracking) are also | ||
1366 | * deterministic and are supported if the characteristics flags contain | ||
1367 | * NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_SUBMIT_NO_JOBTRACKING; this flag or | ||
1368 | * num_inflight_jobs are not necessary in that case. | ||
1369 | */ | ||
1370 | #define NVGPU_ALLOC_GPFIFO_EX_FLAGS_DETERMINISTIC (1 << 1) | ||
1353 | __u32 flags; | 1371 | __u32 flags; |
1354 | __u32 reserved[5]; | 1372 | __u32 reserved[5]; |
1355 | }; | 1373 | }; |