diff options
author | Felix Kuehling <Felix.Kuehling@amd.com> | 2018-03-15 17:27:51 -0400 |
---|---|---|
committer | Oded Gabbay <oded.gabbay@gmail.com> | 2018-03-15 17:27:51 -0400 |
commit | 5ec7e02854b3b9b55936c3b44b8acfb85e333f49 (patch) | |
tree | 6b0af8e325673361dbeb172f667ceaac88afb868 /drivers | |
parent | 552764b680a65d6069ad651b356d5465082939d0 (diff) |
drm/amdkfd: Add ioctls for GPUVM memory management
v2:
* Fix error handling after kfd_bind_process_to_device in
kfd_ioctl_map_memory_to_gpu
v3:
* Add ioctl to acquire VM from a DRM FD
v4:
* Return number of successful map/unmap operations in failure cases
* Facilitate partial retry after failed map/unmap
* Added comments with parameter descriptions to new APIs
* Defined AMDKFD_IOC_FREE_MEMORY_OF_GPU write-only
Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 377 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 8 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/include/kgd_kfd_interface.h | 2 |
3 files changed, 387 insertions, 0 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 7d4009418ec3..a563ff2ca7dd 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | |||
@@ -24,6 +24,7 @@ | |||
24 | #include <linux/export.h> | 24 | #include <linux/export.h> |
25 | #include <linux/err.h> | 25 | #include <linux/err.h> |
26 | #include <linux/fs.h> | 26 | #include <linux/fs.h> |
27 | #include <linux/file.h> | ||
27 | #include <linux/sched.h> | 28 | #include <linux/sched.h> |
28 | #include <linux/slab.h> | 29 | #include <linux/slab.h> |
29 | #include <linux/uaccess.h> | 30 | #include <linux/uaccess.h> |
@@ -1046,6 +1047,366 @@ static int kfd_ioctl_get_tile_config(struct file *filep, | |||
1046 | return 0; | 1047 | return 0; |
1047 | } | 1048 | } |
1048 | 1049 | ||
1050 | static int kfd_ioctl_acquire_vm(struct file *filep, struct kfd_process *p, | ||
1051 | void *data) | ||
1052 | { | ||
1053 | struct kfd_ioctl_acquire_vm_args *args = data; | ||
1054 | struct kfd_process_device *pdd; | ||
1055 | struct kfd_dev *dev; | ||
1056 | struct file *drm_file; | ||
1057 | int ret; | ||
1058 | |||
1059 | dev = kfd_device_by_id(args->gpu_id); | ||
1060 | if (!dev) | ||
1061 | return -EINVAL; | ||
1062 | |||
1063 | drm_file = fget(args->drm_fd); | ||
1064 | if (!drm_file) | ||
1065 | return -EINVAL; | ||
1066 | |||
1067 | mutex_lock(&p->mutex); | ||
1068 | |||
1069 | pdd = kfd_get_process_device_data(dev, p); | ||
1070 | if (!pdd) { | ||
1071 | ret = -EINVAL; | ||
1072 | goto err_unlock; | ||
1073 | } | ||
1074 | |||
1075 | if (pdd->drm_file) { | ||
1076 | ret = pdd->drm_file == drm_file ? 0 : -EBUSY; | ||
1077 | goto err_unlock; | ||
1078 | } | ||
1079 | |||
1080 | ret = kfd_process_device_init_vm(pdd, drm_file); | ||
1081 | if (ret) | ||
1082 | goto err_unlock; | ||
1083 | /* On success, the PDD keeps the drm_file reference */ | ||
1084 | mutex_unlock(&p->mutex); | ||
1085 | |||
1086 | return 0; | ||
1087 | |||
1088 | err_unlock: | ||
1089 | mutex_unlock(&p->mutex); | ||
1090 | fput(drm_file); | ||
1091 | return ret; | ||
1092 | } | ||
1093 | |||
1094 | bool kfd_dev_is_large_bar(struct kfd_dev *dev) | ||
1095 | { | ||
1096 | struct kfd_local_mem_info mem_info; | ||
1097 | |||
1098 | if (dev->device_info->needs_iommu_device) | ||
1099 | return false; | ||
1100 | |||
1101 | dev->kfd2kgd->get_local_mem_info(dev->kgd, &mem_info); | ||
1102 | if (mem_info.local_mem_size_private == 0 && | ||
1103 | mem_info.local_mem_size_public > 0) | ||
1104 | return true; | ||
1105 | return false; | ||
1106 | } | ||
1107 | |||
1108 | static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep, | ||
1109 | struct kfd_process *p, void *data) | ||
1110 | { | ||
1111 | struct kfd_ioctl_alloc_memory_of_gpu_args *args = data; | ||
1112 | struct kfd_process_device *pdd; | ||
1113 | void *mem; | ||
1114 | struct kfd_dev *dev; | ||
1115 | int idr_handle; | ||
1116 | long err; | ||
1117 | uint64_t offset = args->mmap_offset; | ||
1118 | uint32_t flags = args->flags; | ||
1119 | |||
1120 | if (args->size == 0) | ||
1121 | return -EINVAL; | ||
1122 | |||
1123 | dev = kfd_device_by_id(args->gpu_id); | ||
1124 | if (!dev) | ||
1125 | return -EINVAL; | ||
1126 | |||
1127 | if ((flags & KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC) && | ||
1128 | (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) && | ||
1129 | !kfd_dev_is_large_bar(dev)) { | ||
1130 | pr_err("Alloc host visible vram on small bar is not allowed\n"); | ||
1131 | return -EINVAL; | ||
1132 | } | ||
1133 | |||
1134 | mutex_lock(&p->mutex); | ||
1135 | |||
1136 | pdd = kfd_bind_process_to_device(dev, p); | ||
1137 | if (IS_ERR(pdd)) { | ||
1138 | err = PTR_ERR(pdd); | ||
1139 | goto err_unlock; | ||
1140 | } | ||
1141 | |||
1142 | err = dev->kfd2kgd->alloc_memory_of_gpu( | ||
1143 | dev->kgd, args->va_addr, args->size, | ||
1144 | pdd->vm, (struct kgd_mem **) &mem, &offset, | ||
1145 | flags); | ||
1146 | |||
1147 | if (err) | ||
1148 | goto err_unlock; | ||
1149 | |||
1150 | idr_handle = kfd_process_device_create_obj_handle(pdd, mem); | ||
1151 | if (idr_handle < 0) { | ||
1152 | err = -EFAULT; | ||
1153 | goto err_free; | ||
1154 | } | ||
1155 | |||
1156 | mutex_unlock(&p->mutex); | ||
1157 | |||
1158 | args->handle = MAKE_HANDLE(args->gpu_id, idr_handle); | ||
1159 | args->mmap_offset = offset; | ||
1160 | |||
1161 | return 0; | ||
1162 | |||
1163 | err_free: | ||
1164 | dev->kfd2kgd->free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem); | ||
1165 | err_unlock: | ||
1166 | mutex_unlock(&p->mutex); | ||
1167 | return err; | ||
1168 | } | ||
1169 | |||
1170 | static int kfd_ioctl_free_memory_of_gpu(struct file *filep, | ||
1171 | struct kfd_process *p, void *data) | ||
1172 | { | ||
1173 | struct kfd_ioctl_free_memory_of_gpu_args *args = data; | ||
1174 | struct kfd_process_device *pdd; | ||
1175 | void *mem; | ||
1176 | struct kfd_dev *dev; | ||
1177 | int ret; | ||
1178 | |||
1179 | dev = kfd_device_by_id(GET_GPU_ID(args->handle)); | ||
1180 | if (!dev) | ||
1181 | return -EINVAL; | ||
1182 | |||
1183 | mutex_lock(&p->mutex); | ||
1184 | |||
1185 | pdd = kfd_get_process_device_data(dev, p); | ||
1186 | if (!pdd) { | ||
1187 | pr_err("Process device data doesn't exist\n"); | ||
1188 | ret = -EINVAL; | ||
1189 | goto err_unlock; | ||
1190 | } | ||
1191 | |||
1192 | mem = kfd_process_device_translate_handle( | ||
1193 | pdd, GET_IDR_HANDLE(args->handle)); | ||
1194 | if (!mem) { | ||
1195 | ret = -EINVAL; | ||
1196 | goto err_unlock; | ||
1197 | } | ||
1198 | |||
1199 | ret = dev->kfd2kgd->free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem); | ||
1200 | |||
1201 | /* If freeing the buffer failed, leave the handle in place for | ||
1202 | * clean-up during process tear-down. | ||
1203 | */ | ||
1204 | if (!ret) | ||
1205 | kfd_process_device_remove_obj_handle( | ||
1206 | pdd, GET_IDR_HANDLE(args->handle)); | ||
1207 | |||
1208 | err_unlock: | ||
1209 | mutex_unlock(&p->mutex); | ||
1210 | return ret; | ||
1211 | } | ||
1212 | |||
1213 | static int kfd_ioctl_map_memory_to_gpu(struct file *filep, | ||
1214 | struct kfd_process *p, void *data) | ||
1215 | { | ||
1216 | struct kfd_ioctl_map_memory_to_gpu_args *args = data; | ||
1217 | struct kfd_process_device *pdd, *peer_pdd; | ||
1218 | void *mem; | ||
1219 | struct kfd_dev *dev, *peer; | ||
1220 | long err = 0; | ||
1221 | int i; | ||
1222 | uint32_t *devices_arr = NULL; | ||
1223 | |||
1224 | dev = kfd_device_by_id(GET_GPU_ID(args->handle)); | ||
1225 | if (!dev) | ||
1226 | return -EINVAL; | ||
1227 | |||
1228 | if (!args->n_devices) { | ||
1229 | pr_debug("Device IDs array empty\n"); | ||
1230 | return -EINVAL; | ||
1231 | } | ||
1232 | if (args->n_success > args->n_devices) { | ||
1233 | pr_debug("n_success exceeds n_devices\n"); | ||
1234 | return -EINVAL; | ||
1235 | } | ||
1236 | |||
1237 | devices_arr = kmalloc(args->n_devices * sizeof(*devices_arr), | ||
1238 | GFP_KERNEL); | ||
1239 | if (!devices_arr) | ||
1240 | return -ENOMEM; | ||
1241 | |||
1242 | err = copy_from_user(devices_arr, | ||
1243 | (void __user *)args->device_ids_array_ptr, | ||
1244 | args->n_devices * sizeof(*devices_arr)); | ||
1245 | if (err != 0) { | ||
1246 | err = -EFAULT; | ||
1247 | goto copy_from_user_failed; | ||
1248 | } | ||
1249 | |||
1250 | mutex_lock(&p->mutex); | ||
1251 | |||
1252 | pdd = kfd_bind_process_to_device(dev, p); | ||
1253 | if (IS_ERR(pdd)) { | ||
1254 | err = PTR_ERR(pdd); | ||
1255 | goto bind_process_to_device_failed; | ||
1256 | } | ||
1257 | |||
1258 | mem = kfd_process_device_translate_handle(pdd, | ||
1259 | GET_IDR_HANDLE(args->handle)); | ||
1260 | if (!mem) { | ||
1261 | err = -ENOMEM; | ||
1262 | goto get_mem_obj_from_handle_failed; | ||
1263 | } | ||
1264 | |||
1265 | for (i = args->n_success; i < args->n_devices; i++) { | ||
1266 | peer = kfd_device_by_id(devices_arr[i]); | ||
1267 | if (!peer) { | ||
1268 | pr_debug("Getting device by id failed for 0x%x\n", | ||
1269 | devices_arr[i]); | ||
1270 | err = -EINVAL; | ||
1271 | goto get_mem_obj_from_handle_failed; | ||
1272 | } | ||
1273 | |||
1274 | peer_pdd = kfd_bind_process_to_device(peer, p); | ||
1275 | if (IS_ERR(peer_pdd)) { | ||
1276 | err = PTR_ERR(peer_pdd); | ||
1277 | goto get_mem_obj_from_handle_failed; | ||
1278 | } | ||
1279 | err = peer->kfd2kgd->map_memory_to_gpu( | ||
1280 | peer->kgd, (struct kgd_mem *)mem, peer_pdd->vm); | ||
1281 | if (err) { | ||
1282 | pr_err("Failed to map to gpu %d/%d\n", | ||
1283 | i, args->n_devices); | ||
1284 | goto map_memory_to_gpu_failed; | ||
1285 | } | ||
1286 | args->n_success = i+1; | ||
1287 | } | ||
1288 | |||
1289 | mutex_unlock(&p->mutex); | ||
1290 | |||
1291 | err = dev->kfd2kgd->sync_memory(dev->kgd, (struct kgd_mem *) mem, true); | ||
1292 | if (err) { | ||
1293 | pr_debug("Sync memory failed, wait interrupted by user signal\n"); | ||
1294 | goto sync_memory_failed; | ||
1295 | } | ||
1296 | |||
1297 | /* Flush TLBs after waiting for the page table updates to complete */ | ||
1298 | for (i = 0; i < args->n_devices; i++) { | ||
1299 | peer = kfd_device_by_id(devices_arr[i]); | ||
1300 | if (WARN_ON_ONCE(!peer)) | ||
1301 | continue; | ||
1302 | peer_pdd = kfd_get_process_device_data(peer, p); | ||
1303 | if (WARN_ON_ONCE(!peer_pdd)) | ||
1304 | continue; | ||
1305 | kfd_flush_tlb(peer_pdd); | ||
1306 | } | ||
1307 | |||
1308 | kfree(devices_arr); | ||
1309 | |||
1310 | return err; | ||
1311 | |||
1312 | bind_process_to_device_failed: | ||
1313 | get_mem_obj_from_handle_failed: | ||
1314 | map_memory_to_gpu_failed: | ||
1315 | mutex_unlock(&p->mutex); | ||
1316 | copy_from_user_failed: | ||
1317 | sync_memory_failed: | ||
1318 | kfree(devices_arr); | ||
1319 | |||
1320 | return err; | ||
1321 | } | ||
1322 | |||
1323 | static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep, | ||
1324 | struct kfd_process *p, void *data) | ||
1325 | { | ||
1326 | struct kfd_ioctl_unmap_memory_from_gpu_args *args = data; | ||
1327 | struct kfd_process_device *pdd, *peer_pdd; | ||
1328 | void *mem; | ||
1329 | struct kfd_dev *dev, *peer; | ||
1330 | long err = 0; | ||
1331 | uint32_t *devices_arr = NULL, i; | ||
1332 | |||
1333 | dev = kfd_device_by_id(GET_GPU_ID(args->handle)); | ||
1334 | if (!dev) | ||
1335 | return -EINVAL; | ||
1336 | |||
1337 | if (!args->n_devices) { | ||
1338 | pr_debug("Device IDs array empty\n"); | ||
1339 | return -EINVAL; | ||
1340 | } | ||
1341 | if (args->n_success > args->n_devices) { | ||
1342 | pr_debug("n_success exceeds n_devices\n"); | ||
1343 | return -EINVAL; | ||
1344 | } | ||
1345 | |||
1346 | devices_arr = kmalloc(args->n_devices * sizeof(*devices_arr), | ||
1347 | GFP_KERNEL); | ||
1348 | if (!devices_arr) | ||
1349 | return -ENOMEM; | ||
1350 | |||
1351 | err = copy_from_user(devices_arr, | ||
1352 | (void __user *)args->device_ids_array_ptr, | ||
1353 | args->n_devices * sizeof(*devices_arr)); | ||
1354 | if (err != 0) { | ||
1355 | err = -EFAULT; | ||
1356 | goto copy_from_user_failed; | ||
1357 | } | ||
1358 | |||
1359 | mutex_lock(&p->mutex); | ||
1360 | |||
1361 | pdd = kfd_get_process_device_data(dev, p); | ||
1362 | if (!pdd) { | ||
1363 | err = PTR_ERR(pdd); | ||
1364 | goto bind_process_to_device_failed; | ||
1365 | } | ||
1366 | |||
1367 | mem = kfd_process_device_translate_handle(pdd, | ||
1368 | GET_IDR_HANDLE(args->handle)); | ||
1369 | if (!mem) { | ||
1370 | err = -ENOMEM; | ||
1371 | goto get_mem_obj_from_handle_failed; | ||
1372 | } | ||
1373 | |||
1374 | for (i = args->n_success; i < args->n_devices; i++) { | ||
1375 | peer = kfd_device_by_id(devices_arr[i]); | ||
1376 | if (!peer) { | ||
1377 | err = -EINVAL; | ||
1378 | goto get_mem_obj_from_handle_failed; | ||
1379 | } | ||
1380 | |||
1381 | peer_pdd = kfd_get_process_device_data(peer, p); | ||
1382 | if (!peer_pdd) { | ||
1383 | err = -ENODEV; | ||
1384 | goto get_mem_obj_from_handle_failed; | ||
1385 | } | ||
1386 | err = dev->kfd2kgd->unmap_memory_to_gpu( | ||
1387 | peer->kgd, (struct kgd_mem *)mem, peer_pdd->vm); | ||
1388 | if (err) { | ||
1389 | pr_err("Failed to unmap from gpu %d/%d\n", | ||
1390 | i, args->n_devices); | ||
1391 | goto unmap_memory_from_gpu_failed; | ||
1392 | } | ||
1393 | args->n_success = i+1; | ||
1394 | } | ||
1395 | kfree(devices_arr); | ||
1396 | |||
1397 | mutex_unlock(&p->mutex); | ||
1398 | |||
1399 | return 0; | ||
1400 | |||
1401 | bind_process_to_device_failed: | ||
1402 | get_mem_obj_from_handle_failed: | ||
1403 | unmap_memory_from_gpu_failed: | ||
1404 | mutex_unlock(&p->mutex); | ||
1405 | copy_from_user_failed: | ||
1406 | kfree(devices_arr); | ||
1407 | return err; | ||
1408 | } | ||
1409 | |||
1049 | #define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \ | 1410 | #define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \ |
1050 | [_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, \ | 1411 | [_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, \ |
1051 | .cmd_drv = 0, .name = #ioctl} | 1412 | .cmd_drv = 0, .name = #ioctl} |
@@ -1111,6 +1472,22 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = { | |||
1111 | 1472 | ||
1112 | AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_PROCESS_APERTURES_NEW, | 1473 | AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_PROCESS_APERTURES_NEW, |
1113 | kfd_ioctl_get_process_apertures_new, 0), | 1474 | kfd_ioctl_get_process_apertures_new, 0), |
1475 | |||
1476 | AMDKFD_IOCTL_DEF(AMDKFD_IOC_ACQUIRE_VM, | ||
1477 | kfd_ioctl_acquire_vm, 0), | ||
1478 | |||
1479 | AMDKFD_IOCTL_DEF(AMDKFD_IOC_ALLOC_MEMORY_OF_GPU, | ||
1480 | kfd_ioctl_alloc_memory_of_gpu, 0), | ||
1481 | |||
1482 | AMDKFD_IOCTL_DEF(AMDKFD_IOC_FREE_MEMORY_OF_GPU, | ||
1483 | kfd_ioctl_free_memory_of_gpu, 0), | ||
1484 | |||
1485 | AMDKFD_IOCTL_DEF(AMDKFD_IOC_MAP_MEMORY_TO_GPU, | ||
1486 | kfd_ioctl_map_memory_to_gpu, 0), | ||
1487 | |||
1488 | AMDKFD_IOCTL_DEF(AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU, | ||
1489 | kfd_ioctl_unmap_memory_from_gpu, 0), | ||
1490 | |||
1114 | }; | 1491 | }; |
1115 | 1492 | ||
1116 | #define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls) | 1493 | #define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls) |
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index aaed005ce1f5..1542807373d7 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h | |||
@@ -509,6 +509,14 @@ struct qcm_process_device { | |||
509 | int kgd2kfd_schedule_evict_and_restore_process(struct mm_struct *mm, | 509 | int kgd2kfd_schedule_evict_and_restore_process(struct mm_struct *mm, |
510 | struct dma_fence *fence); | 510 | struct dma_fence *fence); |
511 | 511 | ||
512 | /* 8 byte handle containing GPU ID in the most significant 4 bytes and | ||
513 | * idr_handle in the least significant 4 bytes | ||
514 | */ | ||
515 | #define MAKE_HANDLE(gpu_id, idr_handle) \ | ||
516 | (((uint64_t)(gpu_id) << 32) + idr_handle) | ||
517 | #define GET_GPU_ID(handle) (handle >> 32) | ||
518 | #define GET_IDR_HANDLE(handle) (handle & 0xFFFFFFFF) | ||
519 | |||
512 | enum kfd_pdd_bound { | 520 | enum kfd_pdd_bound { |
513 | PDD_UNBOUND = 0, | 521 | PDD_UNBOUND = 0, |
514 | PDD_BOUND, | 522 | PDD_BOUND, |
diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h index b1f35c8be2cf..237289a72bb7 100644 --- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h | |||
@@ -130,6 +130,7 @@ struct tile_config { | |||
130 | 130 | ||
131 | /* | 131 | /* |
132 | * Allocation flag domains | 132 | * Allocation flag domains |
133 | * NOTE: This must match the corresponding definitions in kfd_ioctl.h. | ||
133 | */ | 134 | */ |
134 | #define ALLOC_MEM_FLAGS_VRAM (1 << 0) | 135 | #define ALLOC_MEM_FLAGS_VRAM (1 << 0) |
135 | #define ALLOC_MEM_FLAGS_GTT (1 << 1) | 136 | #define ALLOC_MEM_FLAGS_GTT (1 << 1) |
@@ -138,6 +139,7 @@ struct tile_config { | |||
138 | 139 | ||
139 | /* | 140 | /* |
140 | * Allocation flags attributes/access options. | 141 | * Allocation flags attributes/access options. |
142 | * NOTE: This must match the corresponding definitions in kfd_ioctl.h. | ||
141 | */ | 143 | */ |
142 | #define ALLOC_MEM_FLAGS_WRITABLE (1 << 31) | 144 | #define ALLOC_MEM_FLAGS_WRITABLE (1 << 31) |
143 | #define ALLOC_MEM_FLAGS_EXECUTABLE (1 << 30) | 145 | #define ALLOC_MEM_FLAGS_EXECUTABLE (1 << 30) |