diff options
| -rw-r--r-- | drivers/gpu/drm/i915/i915_gem.c | 166 |
1 files changed, 139 insertions, 27 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 37427e4016cb..35f8c7bd0d32 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c | |||
| @@ -223,29 +223,34 @@ fast_user_write(struct io_mapping *mapping, | |||
| 223 | */ | 223 | */ |
| 224 | 224 | ||
| 225 | static inline int | 225 | static inline int |
| 226 | slow_user_write(struct io_mapping *mapping, | 226 | slow_kernel_write(struct io_mapping *mapping, |
| 227 | loff_t page_base, int page_offset, | 227 | loff_t gtt_base, int gtt_offset, |
| 228 | char __user *user_data, | 228 | struct page *user_page, int user_offset, |
| 229 | int length) | 229 | int length) |
| 230 | { | 230 | { |
| 231 | char __iomem *vaddr; | 231 | char *src_vaddr, *dst_vaddr; |
| 232 | unsigned long unwritten; | 232 | unsigned long unwritten; |
| 233 | 233 | ||
| 234 | vaddr = io_mapping_map_wc(mapping, page_base); | 234 | dst_vaddr = io_mapping_map_atomic_wc(mapping, gtt_base); |
| 235 | if (vaddr == NULL) | 235 | src_vaddr = kmap_atomic(user_page, KM_USER1); |
| 236 | return -EFAULT; | 236 | unwritten = __copy_from_user_inatomic_nocache(dst_vaddr + gtt_offset, |
| 237 | unwritten = __copy_from_user(vaddr + page_offset, | 237 | src_vaddr + user_offset, |
| 238 | user_data, length); | 238 | length); |
| 239 | io_mapping_unmap(vaddr); | 239 | kunmap_atomic(src_vaddr, KM_USER1); |
| 240 | io_mapping_unmap_atomic(dst_vaddr); | ||
| 240 | if (unwritten) | 241 | if (unwritten) |
| 241 | return -EFAULT; | 242 | return -EFAULT; |
| 242 | return 0; | 243 | return 0; |
| 243 | } | 244 | } |
| 244 | 245 | ||
| 246 | /** | ||
| 247 | * This is the fast pwrite path, where we copy the data directly from the | ||
| 248 | * user into the GTT, uncached. | ||
| 249 | */ | ||
| 245 | static int | 250 | static int |
| 246 | i915_gem_gtt_pwrite(struct drm_device *dev, struct drm_gem_object *obj, | 251 | i915_gem_gtt_pwrite_fast(struct drm_device *dev, struct drm_gem_object *obj, |
| 247 | struct drm_i915_gem_pwrite *args, | 252 | struct drm_i915_gem_pwrite *args, |
| 248 | struct drm_file *file_priv) | 253 | struct drm_file *file_priv) |
| 249 | { | 254 | { |
| 250 | struct drm_i915_gem_object *obj_priv = obj->driver_private; | 255 | struct drm_i915_gem_object *obj_priv = obj->driver_private; |
| 251 | drm_i915_private_t *dev_priv = dev->dev_private; | 256 | drm_i915_private_t *dev_priv = dev->dev_private; |
| @@ -273,7 +278,6 @@ i915_gem_gtt_pwrite(struct drm_device *dev, struct drm_gem_object *obj, | |||
| 273 | 278 | ||
| 274 | obj_priv = obj->driver_private; | 279 | obj_priv = obj->driver_private; |
| 275 | offset = obj_priv->gtt_offset + args->offset; | 280 | offset = obj_priv->gtt_offset + args->offset; |
| 276 | obj_priv->dirty = 1; | ||
| 277 | 281 | ||
| 278 | while (remain > 0) { | 282 | while (remain > 0) { |
| 279 | /* Operation in this page | 283 | /* Operation in this page |
| @@ -292,16 +296,11 @@ i915_gem_gtt_pwrite(struct drm_device *dev, struct drm_gem_object *obj, | |||
| 292 | page_offset, user_data, page_length); | 296 | page_offset, user_data, page_length); |
| 293 | 297 | ||
| 294 | /* If we get a fault while copying data, then (presumably) our | 298 | /* If we get a fault while copying data, then (presumably) our |
| 295 | * source page isn't available. In this case, use the | 299 | * source page isn't available. Return the error and we'll |
| 296 | * non-atomic function | 300 | * retry in the slow path. |
| 297 | */ | 301 | */ |
| 298 | if (ret) { | 302 | if (ret) |
| 299 | ret = slow_user_write (dev_priv->mm.gtt_mapping, | 303 | goto fail; |
| 300 | page_base, page_offset, | ||
| 301 | user_data, page_length); | ||
| 302 | if (ret) | ||
| 303 | goto fail; | ||
| 304 | } | ||
| 305 | 304 | ||
| 306 | remain -= page_length; | 305 | remain -= page_length; |
| 307 | user_data += page_length; | 306 | user_data += page_length; |
| @@ -315,6 +314,115 @@ fail: | |||
| 315 | return ret; | 314 | return ret; |
| 316 | } | 315 | } |
| 317 | 316 | ||
| 317 | /** | ||
| 318 | * This is the fallback GTT pwrite path, which uses get_user_pages to pin | ||
| 319 | * the memory and maps it using kmap_atomic for copying. | ||
| 320 | * | ||
| 321 | * This code resulted in x11perf -rgb10text consuming about 10% more CPU | ||
| 322 | * than using i915_gem_gtt_pwrite_fast on a G45 (32-bit). | ||
| 323 | */ | ||
| 324 | static int | ||
| 325 | i915_gem_gtt_pwrite_slow(struct drm_device *dev, struct drm_gem_object *obj, | ||
| 326 | struct drm_i915_gem_pwrite *args, | ||
| 327 | struct drm_file *file_priv) | ||
| 328 | { | ||
| 329 | struct drm_i915_gem_object *obj_priv = obj->driver_private; | ||
| 330 | drm_i915_private_t *dev_priv = dev->dev_private; | ||
| 331 | ssize_t remain; | ||
| 332 | loff_t gtt_page_base, offset; | ||
| 333 | loff_t first_data_page, last_data_page, num_pages; | ||
| 334 | loff_t pinned_pages, i; | ||
| 335 | struct page **user_pages; | ||
| 336 | struct mm_struct *mm = current->mm; | ||
| 337 | int gtt_page_offset, data_page_offset, data_page_index, page_length; | ||
| 338 | int ret; | ||
| 339 | uint64_t data_ptr = args->data_ptr; | ||
| 340 | |||
| 341 | remain = args->size; | ||
| 342 | |||
| 343 | /* Pin the user pages containing the data. We can't fault while | ||
| 344 | * holding the struct mutex, and all of the pwrite implementations | ||
| 345 | * want to hold it while dereferencing the user data. | ||
| 346 | */ | ||
| 347 | first_data_page = data_ptr / PAGE_SIZE; | ||
| 348 | last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE; | ||
| 349 | num_pages = last_data_page - first_data_page + 1; | ||
| 350 | |||
| 351 | user_pages = kcalloc(num_pages, sizeof(struct page *), GFP_KERNEL); | ||
| 352 | if (user_pages == NULL) | ||
| 353 | return -ENOMEM; | ||
| 354 | |||
| 355 | down_read(&mm->mmap_sem); | ||
| 356 | pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr, | ||
| 357 | num_pages, 0, 0, user_pages, NULL); | ||
| 358 | up_read(&mm->mmap_sem); | ||
| 359 | if (pinned_pages < num_pages) { | ||
| 360 | ret = -EFAULT; | ||
| 361 | goto out_unpin_pages; | ||
| 362 | } | ||
| 363 | |||
| 364 | mutex_lock(&dev->struct_mutex); | ||
| 365 | ret = i915_gem_object_pin(obj, 0); | ||
| 366 | if (ret) | ||
| 367 | goto out_unlock; | ||
| 368 | |||
| 369 | ret = i915_gem_object_set_to_gtt_domain(obj, 1); | ||
| 370 | if (ret) | ||
| 371 | goto out_unpin_object; | ||
| 372 | |||
| 373 | obj_priv = obj->driver_private; | ||
| 374 | offset = obj_priv->gtt_offset + args->offset; | ||
| 375 | |||
| 376 | while (remain > 0) { | ||
| 377 | /* Operation in this page | ||
| 378 | * | ||
| 379 | * gtt_page_base = page offset within aperture | ||
| 380 | * gtt_page_offset = offset within page in aperture | ||
| 381 | * data_page_index = page number in get_user_pages return | ||
| 382 | * data_page_offset = offset with data_page_index page. | ||
| 383 | * page_length = bytes to copy for this page | ||
| 384 | */ | ||
| 385 | gtt_page_base = offset & PAGE_MASK; | ||
| 386 | gtt_page_offset = offset & ~PAGE_MASK; | ||
| 387 | data_page_index = data_ptr / PAGE_SIZE - first_data_page; | ||
| 388 | data_page_offset = data_ptr & ~PAGE_MASK; | ||
| 389 | |||
| 390 | page_length = remain; | ||
| 391 | if ((gtt_page_offset + page_length) > PAGE_SIZE) | ||
| 392 | page_length = PAGE_SIZE - gtt_page_offset; | ||
| 393 | if ((data_page_offset + page_length) > PAGE_SIZE) | ||
| 394 | page_length = PAGE_SIZE - data_page_offset; | ||
| 395 | |||
| 396 | ret = slow_kernel_write(dev_priv->mm.gtt_mapping, | ||
| 397 | gtt_page_base, gtt_page_offset, | ||
| 398 | user_pages[data_page_index], | ||
| 399 | data_page_offset, | ||
| 400 | page_length); | ||
| 401 | |||
| 402 | /* If we get a fault while copying data, then (presumably) our | ||
| 403 | * source page isn't available. Return the error and we'll | ||
| 404 | * retry in the slow path. | ||
| 405 | */ | ||
| 406 | if (ret) | ||
| 407 | goto out_unpin_object; | ||
| 408 | |||
| 409 | remain -= page_length; | ||
| 410 | offset += page_length; | ||
| 411 | data_ptr += page_length; | ||
| 412 | } | ||
| 413 | |||
| 414 | out_unpin_object: | ||
| 415 | i915_gem_object_unpin(obj); | ||
| 416 | out_unlock: | ||
| 417 | mutex_unlock(&dev->struct_mutex); | ||
| 418 | out_unpin_pages: | ||
| 419 | for (i = 0; i < pinned_pages; i++) | ||
| 420 | page_cache_release(user_pages[i]); | ||
| 421 | kfree(user_pages); | ||
| 422 | |||
| 423 | return ret; | ||
| 424 | } | ||
| 425 | |||
| 318 | static int | 426 | static int |
| 319 | i915_gem_shmem_pwrite(struct drm_device *dev, struct drm_gem_object *obj, | 427 | i915_gem_shmem_pwrite(struct drm_device *dev, struct drm_gem_object *obj, |
| 320 | struct drm_i915_gem_pwrite *args, | 428 | struct drm_i915_gem_pwrite *args, |
| @@ -388,9 +496,13 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, | |||
| 388 | if (obj_priv->phys_obj) | 496 | if (obj_priv->phys_obj) |
| 389 | ret = i915_gem_phys_pwrite(dev, obj, args, file_priv); | 497 | ret = i915_gem_phys_pwrite(dev, obj, args, file_priv); |
| 390 | else if (obj_priv->tiling_mode == I915_TILING_NONE && | 498 | else if (obj_priv->tiling_mode == I915_TILING_NONE && |
| 391 | dev->gtt_total != 0) | 499 | dev->gtt_total != 0) { |
| 392 | ret = i915_gem_gtt_pwrite(dev, obj, args, file_priv); | 500 | ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file_priv); |
| 393 | else | 501 | if (ret == -EFAULT) { |
| 502 | ret = i915_gem_gtt_pwrite_slow(dev, obj, args, | ||
| 503 | file_priv); | ||
| 504 | } | ||
| 505 | } else | ||
| 394 | ret = i915_gem_shmem_pwrite(dev, obj, args, file_priv); | 506 | ret = i915_gem_shmem_pwrite(dev, obj, args, file_priv); |
| 395 | 507 | ||
| 396 | #if WATCH_PWRITE | 508 | #if WATCH_PWRITE |
