diff options
author | Eric Anholt <eric@anholt.net> | 2009-03-09 12:42:23 -0400 |
---|---|---|
committer | Eric Anholt <eric@anholt.net> | 2009-03-27 17:45:52 -0400 |
commit | 3de09aa3b38910d366f4710ffdf430c9d387d1a3 (patch) | |
tree | 9434d262168de074b76cce179562c3bd3afcb449 /drivers/gpu | |
parent | 13520b051e8888dd3af9bda639d83e7df76613d1 (diff) |
drm/i915: Fix lock order reversal in GTT pwrite path.
Since the pagefault path determines that the lock order we use has to be
mmap_sem -> struct_mutex, we can't allow page faults to occur while the
struct_mutex is held. To fix this in pwrite, we first try optimistically to
see if we can copy from user without faulting. If it fails, fall back to
using get_user_pages to pin the user's memory, and map those pages
atomically when copying it to the GPU.
Signed-off-by: Eric Anholt <eric@anholt.net>
Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org>
Diffstat (limited to 'drivers/gpu')
-rw-r--r-- | drivers/gpu/drm/i915/i915_gem.c | 166 |
1 files changed, 139 insertions, 27 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 37427e4016cb..35f8c7bd0d32 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c | |||
@@ -223,29 +223,34 @@ fast_user_write(struct io_mapping *mapping, | |||
223 | */ | 223 | */ |
224 | 224 | ||
225 | static inline int | 225 | static inline int |
226 | slow_user_write(struct io_mapping *mapping, | 226 | slow_kernel_write(struct io_mapping *mapping, |
227 | loff_t page_base, int page_offset, | 227 | loff_t gtt_base, int gtt_offset, |
228 | char __user *user_data, | 228 | struct page *user_page, int user_offset, |
229 | int length) | 229 | int length) |
230 | { | 230 | { |
231 | char __iomem *vaddr; | 231 | char *src_vaddr, *dst_vaddr; |
232 | unsigned long unwritten; | 232 | unsigned long unwritten; |
233 | 233 | ||
234 | vaddr = io_mapping_map_wc(mapping, page_base); | 234 | dst_vaddr = io_mapping_map_atomic_wc(mapping, gtt_base); |
235 | if (vaddr == NULL) | 235 | src_vaddr = kmap_atomic(user_page, KM_USER1); |
236 | return -EFAULT; | 236 | unwritten = __copy_from_user_inatomic_nocache(dst_vaddr + gtt_offset, |
237 | unwritten = __copy_from_user(vaddr + page_offset, | 237 | src_vaddr + user_offset, |
238 | user_data, length); | 238 | length); |
239 | io_mapping_unmap(vaddr); | 239 | kunmap_atomic(src_vaddr, KM_USER1); |
240 | io_mapping_unmap_atomic(dst_vaddr); | ||
240 | if (unwritten) | 241 | if (unwritten) |
241 | return -EFAULT; | 242 | return -EFAULT; |
242 | return 0; | 243 | return 0; |
243 | } | 244 | } |
244 | 245 | ||
246 | /** | ||
247 | * This is the fast pwrite path, where we copy the data directly from the | ||
248 | * user into the GTT, uncached. | ||
249 | */ | ||
245 | static int | 250 | static int |
246 | i915_gem_gtt_pwrite(struct drm_device *dev, struct drm_gem_object *obj, | 251 | i915_gem_gtt_pwrite_fast(struct drm_device *dev, struct drm_gem_object *obj, |
247 | struct drm_i915_gem_pwrite *args, | 252 | struct drm_i915_gem_pwrite *args, |
248 | struct drm_file *file_priv) | 253 | struct drm_file *file_priv) |
249 | { | 254 | { |
250 | struct drm_i915_gem_object *obj_priv = obj->driver_private; | 255 | struct drm_i915_gem_object *obj_priv = obj->driver_private; |
251 | drm_i915_private_t *dev_priv = dev->dev_private; | 256 | drm_i915_private_t *dev_priv = dev->dev_private; |
@@ -273,7 +278,6 @@ i915_gem_gtt_pwrite(struct drm_device *dev, struct drm_gem_object *obj, | |||
273 | 278 | ||
274 | obj_priv = obj->driver_private; | 279 | obj_priv = obj->driver_private; |
275 | offset = obj_priv->gtt_offset + args->offset; | 280 | offset = obj_priv->gtt_offset + args->offset; |
276 | obj_priv->dirty = 1; | ||
277 | 281 | ||
278 | while (remain > 0) { | 282 | while (remain > 0) { |
279 | /* Operation in this page | 283 | /* Operation in this page |
@@ -292,16 +296,11 @@ i915_gem_gtt_pwrite(struct drm_device *dev, struct drm_gem_object *obj, | |||
292 | page_offset, user_data, page_length); | 296 | page_offset, user_data, page_length); |
293 | 297 | ||
294 | /* If we get a fault while copying data, then (presumably) our | 298 | /* If we get a fault while copying data, then (presumably) our |
295 | * source page isn't available. In this case, use the | 299 | * source page isn't available. Return the error and we'll |
296 | * non-atomic function | 300 | * retry in the slow path. |
297 | */ | 301 | */ |
298 | if (ret) { | 302 | if (ret) |
299 | ret = slow_user_write (dev_priv->mm.gtt_mapping, | 303 | goto fail; |
300 | page_base, page_offset, | ||
301 | user_data, page_length); | ||
302 | if (ret) | ||
303 | goto fail; | ||
304 | } | ||
305 | 304 | ||
306 | remain -= page_length; | 305 | remain -= page_length; |
307 | user_data += page_length; | 306 | user_data += page_length; |
@@ -315,6 +314,115 @@ fail: | |||
315 | return ret; | 314 | return ret; |
316 | } | 315 | } |
317 | 316 | ||
317 | /** | ||
318 | * This is the fallback GTT pwrite path, which uses get_user_pages to pin | ||
319 | * the memory and maps it using kmap_atomic for copying. | ||
320 | * | ||
321 | * This code resulted in x11perf -rgb10text consuming about 10% more CPU | ||
322 | * than using i915_gem_gtt_pwrite_fast on a G45 (32-bit). | ||
323 | */ | ||
324 | static int | ||
325 | i915_gem_gtt_pwrite_slow(struct drm_device *dev, struct drm_gem_object *obj, | ||
326 | struct drm_i915_gem_pwrite *args, | ||
327 | struct drm_file *file_priv) | ||
328 | { | ||
329 | struct drm_i915_gem_object *obj_priv = obj->driver_private; | ||
330 | drm_i915_private_t *dev_priv = dev->dev_private; | ||
331 | ssize_t remain; | ||
332 | loff_t gtt_page_base, offset; | ||
333 | loff_t first_data_page, last_data_page, num_pages; | ||
334 | loff_t pinned_pages, i; | ||
335 | struct page **user_pages; | ||
336 | struct mm_struct *mm = current->mm; | ||
337 | int gtt_page_offset, data_page_offset, data_page_index, page_length; | ||
338 | int ret; | ||
339 | uint64_t data_ptr = args->data_ptr; | ||
340 | |||
341 | remain = args->size; | ||
342 | |||
343 | /* Pin the user pages containing the data. We can't fault while | ||
344 | * holding the struct mutex, and all of the pwrite implementations | ||
345 | * want to hold it while dereferencing the user data. | ||
346 | */ | ||
347 | first_data_page = data_ptr / PAGE_SIZE; | ||
348 | last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE; | ||
349 | num_pages = last_data_page - first_data_page + 1; | ||
350 | |||
351 | user_pages = kcalloc(num_pages, sizeof(struct page *), GFP_KERNEL); | ||
352 | if (user_pages == NULL) | ||
353 | return -ENOMEM; | ||
354 | |||
355 | down_read(&mm->mmap_sem); | ||
356 | pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr, | ||
357 | num_pages, 0, 0, user_pages, NULL); | ||
358 | up_read(&mm->mmap_sem); | ||
359 | if (pinned_pages < num_pages) { | ||
360 | ret = -EFAULT; | ||
361 | goto out_unpin_pages; | ||
362 | } | ||
363 | |||
364 | mutex_lock(&dev->struct_mutex); | ||
365 | ret = i915_gem_object_pin(obj, 0); | ||
366 | if (ret) | ||
367 | goto out_unlock; | ||
368 | |||
369 | ret = i915_gem_object_set_to_gtt_domain(obj, 1); | ||
370 | if (ret) | ||
371 | goto out_unpin_object; | ||
372 | |||
373 | obj_priv = obj->driver_private; | ||
374 | offset = obj_priv->gtt_offset + args->offset; | ||
375 | |||
376 | while (remain > 0) { | ||
377 | /* Operation in this page | ||
378 | * | ||
379 | * gtt_page_base = page offset within aperture | ||
380 | * gtt_page_offset = offset within page in aperture | ||
381 | * data_page_index = page number in get_user_pages return | ||
382 | * data_page_offset = offset with data_page_index page. | ||
383 | * page_length = bytes to copy for this page | ||
384 | */ | ||
385 | gtt_page_base = offset & PAGE_MASK; | ||
386 | gtt_page_offset = offset & ~PAGE_MASK; | ||
387 | data_page_index = data_ptr / PAGE_SIZE - first_data_page; | ||
388 | data_page_offset = data_ptr & ~PAGE_MASK; | ||
389 | |||
390 | page_length = remain; | ||
391 | if ((gtt_page_offset + page_length) > PAGE_SIZE) | ||
392 | page_length = PAGE_SIZE - gtt_page_offset; | ||
393 | if ((data_page_offset + page_length) > PAGE_SIZE) | ||
394 | page_length = PAGE_SIZE - data_page_offset; | ||
395 | |||
396 | ret = slow_kernel_write(dev_priv->mm.gtt_mapping, | ||
397 | gtt_page_base, gtt_page_offset, | ||
398 | user_pages[data_page_index], | ||
399 | data_page_offset, | ||
400 | page_length); | ||
401 | |||
402 | /* If we get a fault while copying data, then (presumably) our | ||
403 | * source page isn't available. Return the error and we'll | ||
404 | * retry in the slow path. | ||
405 | */ | ||
406 | if (ret) | ||
407 | goto out_unpin_object; | ||
408 | |||
409 | remain -= page_length; | ||
410 | offset += page_length; | ||
411 | data_ptr += page_length; | ||
412 | } | ||
413 | |||
414 | out_unpin_object: | ||
415 | i915_gem_object_unpin(obj); | ||
416 | out_unlock: | ||
417 | mutex_unlock(&dev->struct_mutex); | ||
418 | out_unpin_pages: | ||
419 | for (i = 0; i < pinned_pages; i++) | ||
420 | page_cache_release(user_pages[i]); | ||
421 | kfree(user_pages); | ||
422 | |||
423 | return ret; | ||
424 | } | ||
425 | |||
318 | static int | 426 | static int |
319 | i915_gem_shmem_pwrite(struct drm_device *dev, struct drm_gem_object *obj, | 427 | i915_gem_shmem_pwrite(struct drm_device *dev, struct drm_gem_object *obj, |
320 | struct drm_i915_gem_pwrite *args, | 428 | struct drm_i915_gem_pwrite *args, |
@@ -388,9 +496,13 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, | |||
388 | if (obj_priv->phys_obj) | 496 | if (obj_priv->phys_obj) |
389 | ret = i915_gem_phys_pwrite(dev, obj, args, file_priv); | 497 | ret = i915_gem_phys_pwrite(dev, obj, args, file_priv); |
390 | else if (obj_priv->tiling_mode == I915_TILING_NONE && | 498 | else if (obj_priv->tiling_mode == I915_TILING_NONE && |
391 | dev->gtt_total != 0) | 499 | dev->gtt_total != 0) { |
392 | ret = i915_gem_gtt_pwrite(dev, obj, args, file_priv); | 500 | ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file_priv); |
393 | else | 501 | if (ret == -EFAULT) { |
502 | ret = i915_gem_gtt_pwrite_slow(dev, obj, args, | ||
503 | file_priv); | ||
504 | } | ||
505 | } else | ||
394 | ret = i915_gem_shmem_pwrite(dev, obj, args, file_priv); | 506 | ret = i915_gem_shmem_pwrite(dev, obj, args, file_priv); |
395 | 507 | ||
396 | #if WATCH_PWRITE | 508 | #if WATCH_PWRITE |