diff options
author | Konsta Holtta <kholtta@nvidia.com> | 2017-11-06 07:25:47 -0500 |
---|---|---|
committer | mobile promotions <svcmobile_promotions@nvidia.com> | 2017-11-06 15:27:35 -0500 |
commit | 8bdce5337ee5f4d1e1f6d4c7b2dc0abe4a532893 (patch) | |
tree | 26274011c5223478744127fe0edf60ca99bd1a35 | |
parent | 1480afeb013decec1d5451fd0d3eeaffa8e17bb6 (diff) |
gpu: nvgpu: support tuning per-ch deterministic opts
Add a new ioctl NVGPU_GPU_IOCTL_SET_DETERMINISTIC_OPTS to adjust
deterministic options on a per-channel basis. Currently, the only
supported option is to relax the no-railgating requirement on open
deterministic channels. This also disallows submits on such channels,
until the railgate option is reset.
Bug 200327089
Change-Id: If4f0f51fd1d40ad7407d13638150d7402479aff0
Signed-off-by: Konsta Holtta <kholtta@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1554563
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
-rw-r--r-- | drivers/gpu/nvgpu/common/linux/channel.c | 11 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/common/linux/ioctl_ctrl.c | 115 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/channel_gk20a.c | 5 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/channel_gk20a.h | 2 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/gk20a/gk20a.c | 2 | ||||
-rw-r--r-- | drivers/gpu/nvgpu/include/nvgpu/enabled.h | 3 | ||||
-rw-r--r-- | include/uapi/linux/nvgpu.h | 44 |
7 files changed, 179 insertions, 3 deletions
diff --git a/drivers/gpu/nvgpu/common/linux/channel.c b/drivers/gpu/nvgpu/common/linux/channel.c index 716c5820..c295336f 100644 --- a/drivers/gpu/nvgpu/common/linux/channel.c +++ b/drivers/gpu/nvgpu/common/linux/channel.c | |||
@@ -537,6 +537,17 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c, | |||
537 | if (c->deterministic) | 537 | if (c->deterministic) |
538 | nvgpu_rwsem_down_read(&g->deterministic_busy); | 538 | nvgpu_rwsem_down_read(&g->deterministic_busy); |
539 | 539 | ||
540 | if (c->deterministic && c->deterministic_railgate_allowed) { | ||
541 | /* | ||
542 | * Nope - this channel has dropped its own power ref. As | ||
543 | * deterministic submits don't hold power on per each submitted | ||
544 | * job like normal ones do, the GPU might railgate any time now | ||
545 | * and thus submit is disallowed. | ||
546 | */ | ||
547 | err = -EINVAL; | ||
548 | goto clean_up; | ||
549 | } | ||
550 | |||
540 | trace_gk20a_channel_submit_gpfifo(g->name, | 551 | trace_gk20a_channel_submit_gpfifo(g->name, |
541 | c->chid, | 552 | c->chid, |
542 | num_entries, | 553 | num_entries, |
diff --git a/drivers/gpu/nvgpu/common/linux/ioctl_ctrl.c b/drivers/gpu/nvgpu/common/linux/ioctl_ctrl.c index 73911717..08a831b9 100644 --- a/drivers/gpu/nvgpu/common/linux/ioctl_ctrl.c +++ b/drivers/gpu/nvgpu/common/linux/ioctl_ctrl.c | |||
@@ -161,6 +161,8 @@ static struct nvgpu_flags_mapping flags_mapping[] = { | |||
161 | NVGPU_SUPPORT_DETERMINISTIC_SUBMIT_NO_JOBTRACKING}, | 161 | NVGPU_SUPPORT_DETERMINISTIC_SUBMIT_NO_JOBTRACKING}, |
162 | {NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_SUBMIT_FULL, | 162 | {NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_SUBMIT_FULL, |
163 | NVGPU_SUPPORT_DETERMINISTIC_SUBMIT_FULL}, | 163 | NVGPU_SUPPORT_DETERMINISTIC_SUBMIT_FULL}, |
164 | {NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_OPTS, | ||
165 | NVGPU_SUPPORT_DETERMINISTIC_OPTS}, | ||
164 | {NVGPU_GPU_FLAGS_SUPPORT_IO_COHERENCE, | 166 | {NVGPU_GPU_FLAGS_SUPPORT_IO_COHERENCE, |
165 | NVGPU_SUPPORT_IO_COHERENCE}, | 167 | NVGPU_SUPPORT_IO_COHERENCE}, |
166 | {NVGPU_GPU_FLAGS_SUPPORT_RESCHEDULE_RUNLIST, | 168 | {NVGPU_GPU_FLAGS_SUPPORT_RESCHEDULE_RUNLIST, |
@@ -1319,6 +1321,114 @@ static int nvgpu_gpu_set_therm_alert_limit(struct gk20a *g, | |||
1319 | return err; | 1321 | return err; |
1320 | } | 1322 | } |
1321 | 1323 | ||
1324 | static int nvgpu_gpu_set_deterministic_ch_railgate(struct channel_gk20a *ch, | ||
1325 | u32 flags) | ||
1326 | { | ||
1327 | int err = 0; | ||
1328 | bool allow; | ||
1329 | bool disallow; | ||
1330 | |||
1331 | allow = flags & | ||
1332 | NVGPU_GPU_SET_DETERMINISTIC_OPTS_FLAGS_ALLOW_RAILGATING; | ||
1333 | |||
1334 | disallow = flags & | ||
1335 | NVGPU_GPU_SET_DETERMINISTIC_OPTS_FLAGS_DISALLOW_RAILGATING; | ||
1336 | |||
1337 | /* Can't be both at the same time */ | ||
1338 | if (allow && disallow) | ||
1339 | return -EINVAL; | ||
1340 | |||
1341 | /* Nothing to do */ | ||
1342 | if (!allow && !disallow) | ||
1343 | return 0; | ||
1344 | |||
1345 | /* | ||
1346 | * Moving into explicit idle or back from it? A call that doesn't | ||
1347 | * change the status is a no-op. | ||
1348 | */ | ||
1349 | if (!ch->deterministic_railgate_allowed && | ||
1350 | allow) { | ||
1351 | gk20a_idle(ch->g); | ||
1352 | } else if (ch->deterministic_railgate_allowed && | ||
1353 | !allow) { | ||
1354 | err = gk20a_busy(ch->g); | ||
1355 | if (err) { | ||
1356 | nvgpu_warn(ch->g, | ||
1357 | "cannot busy to restore deterministic ch"); | ||
1358 | return err; | ||
1359 | } | ||
1360 | } | ||
1361 | ch->deterministic_railgate_allowed = allow; | ||
1362 | |||
1363 | return err; | ||
1364 | } | ||
1365 | |||
1366 | static int nvgpu_gpu_set_deterministic_ch(struct channel_gk20a *ch, u32 flags) | ||
1367 | { | ||
1368 | if (!ch->deterministic) | ||
1369 | return -EINVAL; | ||
1370 | |||
1371 | return nvgpu_gpu_set_deterministic_ch_railgate(ch, flags); | ||
1372 | } | ||
1373 | |||
1374 | static int nvgpu_gpu_set_deterministic_opts(struct gk20a *g, | ||
1375 | struct nvgpu_gpu_set_deterministic_opts_args *args) | ||
1376 | { | ||
1377 | int __user *user_channels; | ||
1378 | u32 i = 0; | ||
1379 | int err = 0; | ||
1380 | |||
1381 | gk20a_dbg_fn(""); | ||
1382 | |||
1383 | user_channels = (int __user *)(uintptr_t)args->channels; | ||
1384 | |||
1385 | /* Upper limit; prevent holding deterministic_busy for long */ | ||
1386 | if (args->num_channels > g->fifo.num_channels) { | ||
1387 | err = -EINVAL; | ||
1388 | goto out; | ||
1389 | } | ||
1390 | |||
1391 | /* Trivial sanity check first */ | ||
1392 | if (!access_ok(VERIFY_READ, user_channels, | ||
1393 | args->num_channels * sizeof(int))) { | ||
1394 | err = -EFAULT; | ||
1395 | goto out; | ||
1396 | } | ||
1397 | |||
1398 | nvgpu_rwsem_down_read(&g->deterministic_busy); | ||
1399 | |||
1400 | /* note: we exit at the first failure */ | ||
1401 | for (; i < args->num_channels; i++) { | ||
1402 | int ch_fd = 0; | ||
1403 | struct channel_gk20a *ch; | ||
1404 | |||
1405 | if (copy_from_user(&ch_fd, &user_channels[i], sizeof(int))) { | ||
1406 | /* User raced with above access_ok */ | ||
1407 | err = -EFAULT; | ||
1408 | break; | ||
1409 | } | ||
1410 | |||
1411 | ch = gk20a_get_channel_from_file(ch_fd); | ||
1412 | if (!ch) { | ||
1413 | err = -EINVAL; | ||
1414 | break; | ||
1415 | } | ||
1416 | |||
1417 | err = nvgpu_gpu_set_deterministic_ch(ch, args->flags); | ||
1418 | |||
1419 | gk20a_channel_put(ch); | ||
1420 | |||
1421 | if (err) | ||
1422 | break; | ||
1423 | } | ||
1424 | |||
1425 | nvgpu_rwsem_up_read(&g->deterministic_busy); | ||
1426 | |||
1427 | out: | ||
1428 | args->num_channels = i; | ||
1429 | return err; | ||
1430 | } | ||
1431 | |||
1322 | long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | 1432 | long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) |
1323 | { | 1433 | { |
1324 | struct gk20a_ctrl_priv *priv = filp->private_data; | 1434 | struct gk20a_ctrl_priv *priv = filp->private_data; |
@@ -1633,6 +1743,11 @@ long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg | |||
1633 | (struct nvgpu_gpu_set_therm_alert_limit_args *)buf); | 1743 | (struct nvgpu_gpu_set_therm_alert_limit_args *)buf); |
1634 | break; | 1744 | break; |
1635 | 1745 | ||
1746 | case NVGPU_GPU_IOCTL_SET_DETERMINISTIC_OPTS: | ||
1747 | err = nvgpu_gpu_set_deterministic_opts(g, | ||
1748 | (struct nvgpu_gpu_set_deterministic_opts_args *)buf); | ||
1749 | break; | ||
1750 | |||
1636 | default: | 1751 | default: |
1637 | gk20a_dbg_info("unrecognized gpu ioctl cmd: 0x%x", cmd); | 1752 | gk20a_dbg_info("unrecognized gpu ioctl cmd: 0x%x", cmd); |
1638 | err = -ENOTTY; | 1753 | err = -ENOTTY; |
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c index 805902eb..6c607ae2 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c | |||
@@ -578,7 +578,10 @@ unbind: | |||
578 | if (ch->deterministic) { | 578 | if (ch->deterministic) { |
579 | nvgpu_rwsem_down_read(&g->deterministic_busy); | 579 | nvgpu_rwsem_down_read(&g->deterministic_busy); |
580 | ch->deterministic = false; | 580 | ch->deterministic = false; |
581 | gk20a_idle(g); | 581 | if (!ch->deterministic_railgate_allowed) |
582 | gk20a_idle(g); | ||
583 | ch->deterministic_railgate_allowed = false; | ||
584 | |||
582 | nvgpu_rwsem_up_read(&g->deterministic_busy); | 585 | nvgpu_rwsem_up_read(&g->deterministic_busy); |
583 | } | 586 | } |
584 | 587 | ||
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h index 762daea9..0cb60200 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h | |||
@@ -208,6 +208,8 @@ struct channel_gk20a { | |||
208 | bool first_init; | 208 | bool first_init; |
209 | bool vpr; | 209 | bool vpr; |
210 | bool deterministic; | 210 | bool deterministic; |
211 | /* deterministic, but explicitly idle and submits disallowed */ | ||
212 | bool deterministic_railgate_allowed; | ||
211 | bool cde; | 213 | bool cde; |
212 | pid_t pid; | 214 | pid_t pid; |
213 | pid_t tgid; | 215 | pid_t tgid; |
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c index e8db9d2c..756db415 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gk20a.c | |||
@@ -439,6 +439,8 @@ int gk20a_init_gpu_characteristics(struct gk20a *g) | |||
439 | NVGPU_SUPPORT_DETERMINISTIC_SUBMIT_FULL, | 439 | NVGPU_SUPPORT_DETERMINISTIC_SUBMIT_FULL, |
440 | true); | 440 | true); |
441 | 441 | ||
442 | __nvgpu_set_enabled(g, NVGPU_SUPPORT_DETERMINISTIC_OPTS, true); | ||
443 | |||
442 | __nvgpu_set_enabled(g, NVGPU_SUPPORT_USERSPACE_MANAGED_AS, true); | 444 | __nvgpu_set_enabled(g, NVGPU_SUPPORT_USERSPACE_MANAGED_AS, true); |
443 | __nvgpu_set_enabled(g, NVGPU_SUPPORT_TSG, true); | 445 | __nvgpu_set_enabled(g, NVGPU_SUPPORT_TSG, true); |
444 | 446 | ||
diff --git a/drivers/gpu/nvgpu/include/nvgpu/enabled.h b/drivers/gpu/nvgpu/include/nvgpu/enabled.h index 14595497..82f9e6d2 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/enabled.h +++ b/drivers/gpu/nvgpu/include/nvgpu/enabled.h | |||
@@ -91,6 +91,9 @@ struct gk20a; | |||
91 | /* FECS context switch tracing is available */ | 91 | /* FECS context switch tracing is available */ |
92 | #define NVGPU_SUPPORT_FECS_CTXSW_TRACE 38 | 92 | #define NVGPU_SUPPORT_FECS_CTXSW_TRACE 38 |
93 | 93 | ||
94 | /* NVGPU_GPU_IOCTL_SET_DETERMINISTIC_OPTS is available */ | ||
95 | #define NVGPU_SUPPORT_DETERMINISTIC_OPTS 39 | ||
96 | |||
94 | /* | 97 | /* |
95 | * Security flags | 98 | * Security flags |
96 | */ | 99 | */ |
diff --git a/include/uapi/linux/nvgpu.h b/include/uapi/linux/nvgpu.h index 29541031..b8e1e71c 100644 --- a/include/uapi/linux/nvgpu.h +++ b/include/uapi/linux/nvgpu.h | |||
@@ -148,7 +148,8 @@ struct nvgpu_gpu_zbc_query_table_args { | |||
148 | #define NVGPU_GPU_FLAGS_SUPPORT_RESCHEDULE_RUNLIST (1ULL << 21) | 148 | #define NVGPU_GPU_FLAGS_SUPPORT_RESCHEDULE_RUNLIST (1ULL << 21) |
149 | /* Direct PTE kind control is supported (map_buffer_ex) */ | 149 | /* Direct PTE kind control is supported (map_buffer_ex) */ |
150 | #define NVGPU_GPU_FLAGS_SUPPORT_MAP_DIRECT_KIND_CTRL (1ULL << 23) | 150 | #define NVGPU_GPU_FLAGS_SUPPORT_MAP_DIRECT_KIND_CTRL (1ULL << 23) |
151 | 151 | /* NVGPU_GPU_IOCTL_SET_DETERMINISTIC_OPTS is available */ | |
152 | #define NVGPU_GPU_FLAGS_SUPPORT_DETERMINISTIC_OPTS (1ULL << 24) | ||
152 | 153 | ||
153 | struct nvgpu_gpu_characteristics { | 154 | struct nvgpu_gpu_characteristics { |
154 | __u32 arch; | 155 | __u32 arch; |
@@ -801,6 +802,42 @@ struct nvgpu_gpu_set_therm_alert_limit_args { | |||
801 | __s32 temp_f24_8; | 802 | __s32 temp_f24_8; |
802 | }; | 803 | }; |
803 | 804 | ||
805 | /* | ||
806 | * Adjust options of deterministic channels in channel batches. | ||
807 | * | ||
808 | * This supports only one option currently: relax railgate blocking by | ||
809 | * "disabling" the channel. | ||
810 | * | ||
811 | * Open deterministic channels do not allow the GPU to railgate by default. It | ||
812 | * may be preferable to hold preopened channel contexts open and idle and still | ||
813 | * railgate the GPU, taking the channels back into use dynamically in userspace | ||
814 | * as an optimization. This ioctl allows to drop or reacquire the requirement | ||
815 | * to hold GPU power on for individual channels. If allow_railgate is set on a | ||
816 | * channel, no work can be submitted to it. | ||
817 | * | ||
818 | * num_channels is updated to signify how many channels were updated | ||
819 | * successfully. It can be used to test which was the first update to fail. | ||
820 | */ | ||
821 | struct nvgpu_gpu_set_deterministic_opts_args { | ||
822 | __u32 num_channels; /* in/out */ | ||
823 | /* | ||
824 | * Set or unset the railgating reference held by deterministic channels. If | ||
825 | * the channel status is already the same as the flag, this is a no-op. Both | ||
826 | * of these flags cannot be set at the same time. If none are set, the state | ||
827 | * is left as is. | ||
828 | */ | ||
829 | #define NVGPU_GPU_SET_DETERMINISTIC_OPTS_FLAGS_ALLOW_RAILGATING (1 << 0) | ||
830 | #define NVGPU_GPU_SET_DETERMINISTIC_OPTS_FLAGS_DISALLOW_RAILGATING (1 << 1) | ||
831 | __u32 flags; /* in */ | ||
832 | /* | ||
833 | * This is a pointer to an array of size num_channels. | ||
834 | * | ||
835 | * The channels have to be valid fds and be previously set as | ||
836 | * deterministic. | ||
837 | */ | ||
838 | __u64 channels; /* in */ | ||
839 | }; | ||
840 | |||
804 | #define NVGPU_GPU_IOCTL_ZCULL_GET_CTX_SIZE \ | 841 | #define NVGPU_GPU_IOCTL_ZCULL_GET_CTX_SIZE \ |
805 | _IOR(NVGPU_GPU_IOCTL_MAGIC, 1, struct nvgpu_gpu_zcull_get_ctx_size_args) | 842 | _IOR(NVGPU_GPU_IOCTL_MAGIC, 1, struct nvgpu_gpu_zcull_get_ctx_size_args) |
806 | #define NVGPU_GPU_IOCTL_ZCULL_GET_INFO \ | 843 | #define NVGPU_GPU_IOCTL_ZCULL_GET_INFO \ |
@@ -885,8 +922,11 @@ struct nvgpu_gpu_set_therm_alert_limit_args { | |||
885 | #define NVGPU_GPU_IOCTL_SET_THERM_ALERT_LIMIT \ | 922 | #define NVGPU_GPU_IOCTL_SET_THERM_ALERT_LIMIT \ |
886 | _IOWR(NVGPU_GPU_IOCTL_MAGIC, 39, \ | 923 | _IOWR(NVGPU_GPU_IOCTL_MAGIC, 39, \ |
887 | struct nvgpu_gpu_set_therm_alert_limit_args) | 924 | struct nvgpu_gpu_set_therm_alert_limit_args) |
925 | #define NVGPU_GPU_IOCTL_SET_DETERMINISTIC_OPTS \ | ||
926 | _IOWR(NVGPU_GPU_IOCTL_MAGIC, 40, \ | ||
927 | struct nvgpu_gpu_set_deterministic_opts_args) | ||
888 | #define NVGPU_GPU_IOCTL_LAST \ | 928 | #define NVGPU_GPU_IOCTL_LAST \ |
889 | _IOC_NR(NVGPU_GPU_IOCTL_SET_THERM_ALERT_LIMIT) | 929 | _IOC_NR(NVGPU_GPU_IOCTL_SET_DETERMINISTIC_OPTS) |
890 | #define NVGPU_GPU_IOCTL_MAX_ARG_SIZE \ | 930 | #define NVGPU_GPU_IOCTL_MAX_ARG_SIZE \ |
891 | sizeof(struct nvgpu_gpu_get_cpu_time_correlation_info_args) | 931 | sizeof(struct nvgpu_gpu_get_cpu_time_correlation_info_args) |
892 | 932 | ||