aboutsummaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorFelix Kuehling <Felix.Kuehling@amd.com>2018-03-15 17:27:51 -0400
committerOded Gabbay <oded.gabbay@gmail.com>2018-03-15 17:27:51 -0400
commit5ec7e02854b3b9b55936c3b44b8acfb85e333f49 (patch)
tree6b0af8e325673361dbeb172f667ceaac88afb868 /drivers
parent552764b680a65d6069ad651b356d5465082939d0 (diff)
drm/amdkfd: Add ioctls for GPUVM memory management
v2: * Fix error handling after kfd_bind_process_to_device in kfd_ioctl_map_memory_to_gpu v3: * Add ioctl to acquire VM from a DRM FD v4: * Return number of successful map/unmap operations in failure cases * Facilitate partial retry after failed map/unmap * Added comments with parameter descriptions to new APIs * Defined AMDKFD_IOC_FREE_MEMORY_OF_GPU write-only Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com> Acked-by: Christian König <christian.koenig@amd.com> Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
Diffstat (limited to 'drivers')
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_chardev.c377
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_priv.h8
-rw-r--r--drivers/gpu/drm/amd/include/kgd_kfd_interface.h2
3 files changed, 387 insertions, 0 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 7d4009418ec3..a563ff2ca7dd 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -24,6 +24,7 @@
24#include <linux/export.h> 24#include <linux/export.h>
25#include <linux/err.h> 25#include <linux/err.h>
26#include <linux/fs.h> 26#include <linux/fs.h>
27#include <linux/file.h>
27#include <linux/sched.h> 28#include <linux/sched.h>
28#include <linux/slab.h> 29#include <linux/slab.h>
29#include <linux/uaccess.h> 30#include <linux/uaccess.h>
@@ -1046,6 +1047,366 @@ static int kfd_ioctl_get_tile_config(struct file *filep,
1046 return 0; 1047 return 0;
1047} 1048}
1048 1049
1050static int kfd_ioctl_acquire_vm(struct file *filep, struct kfd_process *p,
1051 void *data)
1052{
1053 struct kfd_ioctl_acquire_vm_args *args = data;
1054 struct kfd_process_device *pdd;
1055 struct kfd_dev *dev;
1056 struct file *drm_file;
1057 int ret;
1058
1059 dev = kfd_device_by_id(args->gpu_id);
1060 if (!dev)
1061 return -EINVAL;
1062
1063 drm_file = fget(args->drm_fd);
1064 if (!drm_file)
1065 return -EINVAL;
1066
1067 mutex_lock(&p->mutex);
1068
1069 pdd = kfd_get_process_device_data(dev, p);
1070 if (!pdd) {
1071 ret = -EINVAL;
1072 goto err_unlock;
1073 }
1074
1075 if (pdd->drm_file) {
1076 ret = pdd->drm_file == drm_file ? 0 : -EBUSY;
1077 goto err_unlock;
1078 }
1079
1080 ret = kfd_process_device_init_vm(pdd, drm_file);
1081 if (ret)
1082 goto err_unlock;
1083 /* On success, the PDD keeps the drm_file reference */
1084 mutex_unlock(&p->mutex);
1085
1086 return 0;
1087
1088err_unlock:
1089 mutex_unlock(&p->mutex);
1090 fput(drm_file);
1091 return ret;
1092}
1093
1094bool kfd_dev_is_large_bar(struct kfd_dev *dev)
1095{
1096 struct kfd_local_mem_info mem_info;
1097
1098 if (dev->device_info->needs_iommu_device)
1099 return false;
1100
1101 dev->kfd2kgd->get_local_mem_info(dev->kgd, &mem_info);
1102 if (mem_info.local_mem_size_private == 0 &&
1103 mem_info.local_mem_size_public > 0)
1104 return true;
1105 return false;
1106}
1107
1108static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
1109 struct kfd_process *p, void *data)
1110{
1111 struct kfd_ioctl_alloc_memory_of_gpu_args *args = data;
1112 struct kfd_process_device *pdd;
1113 void *mem;
1114 struct kfd_dev *dev;
1115 int idr_handle;
1116 long err;
1117 uint64_t offset = args->mmap_offset;
1118 uint32_t flags = args->flags;
1119
1120 if (args->size == 0)
1121 return -EINVAL;
1122
1123 dev = kfd_device_by_id(args->gpu_id);
1124 if (!dev)
1125 return -EINVAL;
1126
1127 if ((flags & KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC) &&
1128 (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) &&
1129 !kfd_dev_is_large_bar(dev)) {
1130 pr_err("Alloc host visible vram on small bar is not allowed\n");
1131 return -EINVAL;
1132 }
1133
1134 mutex_lock(&p->mutex);
1135
1136 pdd = kfd_bind_process_to_device(dev, p);
1137 if (IS_ERR(pdd)) {
1138 err = PTR_ERR(pdd);
1139 goto err_unlock;
1140 }
1141
1142 err = dev->kfd2kgd->alloc_memory_of_gpu(
1143 dev->kgd, args->va_addr, args->size,
1144 pdd->vm, (struct kgd_mem **) &mem, &offset,
1145 flags);
1146
1147 if (err)
1148 goto err_unlock;
1149
1150 idr_handle = kfd_process_device_create_obj_handle(pdd, mem);
1151 if (idr_handle < 0) {
1152 err = -EFAULT;
1153 goto err_free;
1154 }
1155
1156 mutex_unlock(&p->mutex);
1157
1158 args->handle = MAKE_HANDLE(args->gpu_id, idr_handle);
1159 args->mmap_offset = offset;
1160
1161 return 0;
1162
1163err_free:
1164 dev->kfd2kgd->free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem);
1165err_unlock:
1166 mutex_unlock(&p->mutex);
1167 return err;
1168}
1169
1170static int kfd_ioctl_free_memory_of_gpu(struct file *filep,
1171 struct kfd_process *p, void *data)
1172{
1173 struct kfd_ioctl_free_memory_of_gpu_args *args = data;
1174 struct kfd_process_device *pdd;
1175 void *mem;
1176 struct kfd_dev *dev;
1177 int ret;
1178
1179 dev = kfd_device_by_id(GET_GPU_ID(args->handle));
1180 if (!dev)
1181 return -EINVAL;
1182
1183 mutex_lock(&p->mutex);
1184
1185 pdd = kfd_get_process_device_data(dev, p);
1186 if (!pdd) {
1187 pr_err("Process device data doesn't exist\n");
1188 ret = -EINVAL;
1189 goto err_unlock;
1190 }
1191
1192 mem = kfd_process_device_translate_handle(
1193 pdd, GET_IDR_HANDLE(args->handle));
1194 if (!mem) {
1195 ret = -EINVAL;
1196 goto err_unlock;
1197 }
1198
1199 ret = dev->kfd2kgd->free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem);
1200
1201 /* If freeing the buffer failed, leave the handle in place for
1202 * clean-up during process tear-down.
1203 */
1204 if (!ret)
1205 kfd_process_device_remove_obj_handle(
1206 pdd, GET_IDR_HANDLE(args->handle));
1207
1208err_unlock:
1209 mutex_unlock(&p->mutex);
1210 return ret;
1211}
1212
1213static int kfd_ioctl_map_memory_to_gpu(struct file *filep,
1214 struct kfd_process *p, void *data)
1215{
1216 struct kfd_ioctl_map_memory_to_gpu_args *args = data;
1217 struct kfd_process_device *pdd, *peer_pdd;
1218 void *mem;
1219 struct kfd_dev *dev, *peer;
1220 long err = 0;
1221 int i;
1222 uint32_t *devices_arr = NULL;
1223
1224 dev = kfd_device_by_id(GET_GPU_ID(args->handle));
1225 if (!dev)
1226 return -EINVAL;
1227
1228 if (!args->n_devices) {
1229 pr_debug("Device IDs array empty\n");
1230 return -EINVAL;
1231 }
1232 if (args->n_success > args->n_devices) {
1233 pr_debug("n_success exceeds n_devices\n");
1234 return -EINVAL;
1235 }
1236
1237 devices_arr = kmalloc(args->n_devices * sizeof(*devices_arr),
1238 GFP_KERNEL);
1239 if (!devices_arr)
1240 return -ENOMEM;
1241
1242 err = copy_from_user(devices_arr,
1243 (void __user *)args->device_ids_array_ptr,
1244 args->n_devices * sizeof(*devices_arr));
1245 if (err != 0) {
1246 err = -EFAULT;
1247 goto copy_from_user_failed;
1248 }
1249
1250 mutex_lock(&p->mutex);
1251
1252 pdd = kfd_bind_process_to_device(dev, p);
1253 if (IS_ERR(pdd)) {
1254 err = PTR_ERR(pdd);
1255 goto bind_process_to_device_failed;
1256 }
1257
1258 mem = kfd_process_device_translate_handle(pdd,
1259 GET_IDR_HANDLE(args->handle));
1260 if (!mem) {
1261 err = -ENOMEM;
1262 goto get_mem_obj_from_handle_failed;
1263 }
1264
1265 for (i = args->n_success; i < args->n_devices; i++) {
1266 peer = kfd_device_by_id(devices_arr[i]);
1267 if (!peer) {
1268 pr_debug("Getting device by id failed for 0x%x\n",
1269 devices_arr[i]);
1270 err = -EINVAL;
1271 goto get_mem_obj_from_handle_failed;
1272 }
1273
1274 peer_pdd = kfd_bind_process_to_device(peer, p);
1275 if (IS_ERR(peer_pdd)) {
1276 err = PTR_ERR(peer_pdd);
1277 goto get_mem_obj_from_handle_failed;
1278 }
1279 err = peer->kfd2kgd->map_memory_to_gpu(
1280 peer->kgd, (struct kgd_mem *)mem, peer_pdd->vm);
1281 if (err) {
1282 pr_err("Failed to map to gpu %d/%d\n",
1283 i, args->n_devices);
1284 goto map_memory_to_gpu_failed;
1285 }
1286 args->n_success = i+1;
1287 }
1288
1289 mutex_unlock(&p->mutex);
1290
1291 err = dev->kfd2kgd->sync_memory(dev->kgd, (struct kgd_mem *) mem, true);
1292 if (err) {
1293 pr_debug("Sync memory failed, wait interrupted by user signal\n");
1294 goto sync_memory_failed;
1295 }
1296
1297 /* Flush TLBs after waiting for the page table updates to complete */
1298 for (i = 0; i < args->n_devices; i++) {
1299 peer = kfd_device_by_id(devices_arr[i]);
1300 if (WARN_ON_ONCE(!peer))
1301 continue;
1302 peer_pdd = kfd_get_process_device_data(peer, p);
1303 if (WARN_ON_ONCE(!peer_pdd))
1304 continue;
1305 kfd_flush_tlb(peer_pdd);
1306 }
1307
1308 kfree(devices_arr);
1309
1310 return err;
1311
1312bind_process_to_device_failed:
1313get_mem_obj_from_handle_failed:
1314map_memory_to_gpu_failed:
1315 mutex_unlock(&p->mutex);
1316copy_from_user_failed:
1317sync_memory_failed:
1318 kfree(devices_arr);
1319
1320 return err;
1321}
1322
1323static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
1324 struct kfd_process *p, void *data)
1325{
1326 struct kfd_ioctl_unmap_memory_from_gpu_args *args = data;
1327 struct kfd_process_device *pdd, *peer_pdd;
1328 void *mem;
1329 struct kfd_dev *dev, *peer;
1330 long err = 0;
1331 uint32_t *devices_arr = NULL, i;
1332
1333 dev = kfd_device_by_id(GET_GPU_ID(args->handle));
1334 if (!dev)
1335 return -EINVAL;
1336
1337 if (!args->n_devices) {
1338 pr_debug("Device IDs array empty\n");
1339 return -EINVAL;
1340 }
1341 if (args->n_success > args->n_devices) {
1342 pr_debug("n_success exceeds n_devices\n");
1343 return -EINVAL;
1344 }
1345
1346 devices_arr = kmalloc(args->n_devices * sizeof(*devices_arr),
1347 GFP_KERNEL);
1348 if (!devices_arr)
1349 return -ENOMEM;
1350
1351 err = copy_from_user(devices_arr,
1352 (void __user *)args->device_ids_array_ptr,
1353 args->n_devices * sizeof(*devices_arr));
1354 if (err != 0) {
1355 err = -EFAULT;
1356 goto copy_from_user_failed;
1357 }
1358
1359 mutex_lock(&p->mutex);
1360
1361 pdd = kfd_get_process_device_data(dev, p);
1362 if (!pdd) {
1363 err = PTR_ERR(pdd);
1364 goto bind_process_to_device_failed;
1365 }
1366
1367 mem = kfd_process_device_translate_handle(pdd,
1368 GET_IDR_HANDLE(args->handle));
1369 if (!mem) {
1370 err = -ENOMEM;
1371 goto get_mem_obj_from_handle_failed;
1372 }
1373
1374 for (i = args->n_success; i < args->n_devices; i++) {
1375 peer = kfd_device_by_id(devices_arr[i]);
1376 if (!peer) {
1377 err = -EINVAL;
1378 goto get_mem_obj_from_handle_failed;
1379 }
1380
1381 peer_pdd = kfd_get_process_device_data(peer, p);
1382 if (!peer_pdd) {
1383 err = -ENODEV;
1384 goto get_mem_obj_from_handle_failed;
1385 }
1386 err = dev->kfd2kgd->unmap_memory_to_gpu(
1387 peer->kgd, (struct kgd_mem *)mem, peer_pdd->vm);
1388 if (err) {
1389 pr_err("Failed to unmap from gpu %d/%d\n",
1390 i, args->n_devices);
1391 goto unmap_memory_from_gpu_failed;
1392 }
1393 args->n_success = i+1;
1394 }
1395 kfree(devices_arr);
1396
1397 mutex_unlock(&p->mutex);
1398
1399 return 0;
1400
1401bind_process_to_device_failed:
1402get_mem_obj_from_handle_failed:
1403unmap_memory_from_gpu_failed:
1404 mutex_unlock(&p->mutex);
1405copy_from_user_failed:
1406 kfree(devices_arr);
1407 return err;
1408}
1409
1049#define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \ 1410#define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \
1050 [_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, \ 1411 [_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, \
1051 .cmd_drv = 0, .name = #ioctl} 1412 .cmd_drv = 0, .name = #ioctl}
@@ -1111,6 +1472,22 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {
1111 1472
1112 AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_PROCESS_APERTURES_NEW, 1473 AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_PROCESS_APERTURES_NEW,
1113 kfd_ioctl_get_process_apertures_new, 0), 1474 kfd_ioctl_get_process_apertures_new, 0),
1475
1476 AMDKFD_IOCTL_DEF(AMDKFD_IOC_ACQUIRE_VM,
1477 kfd_ioctl_acquire_vm, 0),
1478
1479 AMDKFD_IOCTL_DEF(AMDKFD_IOC_ALLOC_MEMORY_OF_GPU,
1480 kfd_ioctl_alloc_memory_of_gpu, 0),
1481
1482 AMDKFD_IOCTL_DEF(AMDKFD_IOC_FREE_MEMORY_OF_GPU,
1483 kfd_ioctl_free_memory_of_gpu, 0),
1484
1485 AMDKFD_IOCTL_DEF(AMDKFD_IOC_MAP_MEMORY_TO_GPU,
1486 kfd_ioctl_map_memory_to_gpu, 0),
1487
1488 AMDKFD_IOCTL_DEF(AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU,
1489 kfd_ioctl_unmap_memory_from_gpu, 0),
1490
1114}; 1491};
1115 1492
1116#define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls) 1493#define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index aaed005ce1f5..1542807373d7 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -509,6 +509,14 @@ struct qcm_process_device {
509int kgd2kfd_schedule_evict_and_restore_process(struct mm_struct *mm, 509int kgd2kfd_schedule_evict_and_restore_process(struct mm_struct *mm,
510 struct dma_fence *fence); 510 struct dma_fence *fence);
511 511
512/* 8 byte handle containing GPU ID in the most significant 4 bytes and
513 * idr_handle in the least significant 4 bytes
514 */
515#define MAKE_HANDLE(gpu_id, idr_handle) \
516 (((uint64_t)(gpu_id) << 32) + idr_handle)
517#define GET_GPU_ID(handle) (handle >> 32)
518#define GET_IDR_HANDLE(handle) (handle & 0xFFFFFFFF)
519
512enum kfd_pdd_bound { 520enum kfd_pdd_bound {
513 PDD_UNBOUND = 0, 521 PDD_UNBOUND = 0,
514 PDD_BOUND, 522 PDD_BOUND,
diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
index b1f35c8be2cf..237289a72bb7 100644
--- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
@@ -130,6 +130,7 @@ struct tile_config {
130 130
131/* 131/*
132 * Allocation flag domains 132 * Allocation flag domains
133 * NOTE: This must match the corresponding definitions in kfd_ioctl.h.
133 */ 134 */
134#define ALLOC_MEM_FLAGS_VRAM (1 << 0) 135#define ALLOC_MEM_FLAGS_VRAM (1 << 0)
135#define ALLOC_MEM_FLAGS_GTT (1 << 1) 136#define ALLOC_MEM_FLAGS_GTT (1 << 1)
@@ -138,6 +139,7 @@ struct tile_config {
138 139
139/* 140/*
140 * Allocation flags attributes/access options. 141 * Allocation flags attributes/access options.
142 * NOTE: This must match the corresponding definitions in kfd_ioctl.h.
141 */ 143 */
142#define ALLOC_MEM_FLAGS_WRITABLE (1 << 31) 144#define ALLOC_MEM_FLAGS_WRITABLE (1 << 31)
143#define ALLOC_MEM_FLAGS_EXECUTABLE (1 << 30) 145#define ALLOC_MEM_FLAGS_EXECUTABLE (1 << 30)