aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/i915/i915_gem.c
diff options
context:
space:
mode:
authorEric Anholt <eric@anholt.net>2009-03-09 12:42:23 -0400
committerEric Anholt <eric@anholt.net>2009-03-27 17:45:52 -0400
commit3de09aa3b38910d366f4710ffdf430c9d387d1a3 (patch)
tree9434d262168de074b76cce179562c3bd3afcb449 /drivers/gpu/drm/i915/i915_gem.c
parent13520b051e8888dd3af9bda639d83e7df76613d1 (diff)
drm/i915: Fix lock order reversal in GTT pwrite path.
Since the pagefault path determines that the lock order we use has to be mmap_sem -> struct_mutex, we can't allow page faults to occur while the struct_mutex is held. To fix this in pwrite, we first try optimistically to see if we can copy from user without faulting. If it fails, fall back to using get_user_pages to pin the user's memory, and map those pages atomically when copying it to the GPU. Signed-off-by: Eric Anholt <eric@anholt.net> Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org>
Diffstat (limited to 'drivers/gpu/drm/i915/i915_gem.c')
-rw-r--r--drivers/gpu/drm/i915/i915_gem.c166
1 files changed, 139 insertions, 27 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 37427e4016cb..35f8c7bd0d32 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -223,29 +223,34 @@ fast_user_write(struct io_mapping *mapping,
223 */ 223 */
224 224
225static inline int 225static inline int
226slow_user_write(struct io_mapping *mapping, 226slow_kernel_write(struct io_mapping *mapping,
227 loff_t page_base, int page_offset, 227 loff_t gtt_base, int gtt_offset,
228 char __user *user_data, 228 struct page *user_page, int user_offset,
229 int length) 229 int length)
230{ 230{
231 char __iomem *vaddr; 231 char *src_vaddr, *dst_vaddr;
232 unsigned long unwritten; 232 unsigned long unwritten;
233 233
234 vaddr = io_mapping_map_wc(mapping, page_base); 234 dst_vaddr = io_mapping_map_atomic_wc(mapping, gtt_base);
235 if (vaddr == NULL) 235 src_vaddr = kmap_atomic(user_page, KM_USER1);
236 return -EFAULT; 236 unwritten = __copy_from_user_inatomic_nocache(dst_vaddr + gtt_offset,
237 unwritten = __copy_from_user(vaddr + page_offset, 237 src_vaddr + user_offset,
238 user_data, length); 238 length);
239 io_mapping_unmap(vaddr); 239 kunmap_atomic(src_vaddr, KM_USER1);
240 io_mapping_unmap_atomic(dst_vaddr);
240 if (unwritten) 241 if (unwritten)
241 return -EFAULT; 242 return -EFAULT;
242 return 0; 243 return 0;
243} 244}
244 245
246/**
247 * This is the fast pwrite path, where we copy the data directly from the
248 * user into the GTT, uncached.
249 */
245static int 250static int
246i915_gem_gtt_pwrite(struct drm_device *dev, struct drm_gem_object *obj, 251i915_gem_gtt_pwrite_fast(struct drm_device *dev, struct drm_gem_object *obj,
247 struct drm_i915_gem_pwrite *args, 252 struct drm_i915_gem_pwrite *args,
248 struct drm_file *file_priv) 253 struct drm_file *file_priv)
249{ 254{
250 struct drm_i915_gem_object *obj_priv = obj->driver_private; 255 struct drm_i915_gem_object *obj_priv = obj->driver_private;
251 drm_i915_private_t *dev_priv = dev->dev_private; 256 drm_i915_private_t *dev_priv = dev->dev_private;
@@ -273,7 +278,6 @@ i915_gem_gtt_pwrite(struct drm_device *dev, struct drm_gem_object *obj,
273 278
274 obj_priv = obj->driver_private; 279 obj_priv = obj->driver_private;
275 offset = obj_priv->gtt_offset + args->offset; 280 offset = obj_priv->gtt_offset + args->offset;
276 obj_priv->dirty = 1;
277 281
278 while (remain > 0) { 282 while (remain > 0) {
279 /* Operation in this page 283 /* Operation in this page
@@ -292,16 +296,11 @@ i915_gem_gtt_pwrite(struct drm_device *dev, struct drm_gem_object *obj,
292 page_offset, user_data, page_length); 296 page_offset, user_data, page_length);
293 297
294 /* If we get a fault while copying data, then (presumably) our 298 /* If we get a fault while copying data, then (presumably) our
295 * source page isn't available. In this case, use the 299 * source page isn't available. Return the error and we'll
296 * non-atomic function 300 * retry in the slow path.
297 */ 301 */
298 if (ret) { 302 if (ret)
299 ret = slow_user_write (dev_priv->mm.gtt_mapping, 303 goto fail;
300 page_base, page_offset,
301 user_data, page_length);
302 if (ret)
303 goto fail;
304 }
305 304
306 remain -= page_length; 305 remain -= page_length;
307 user_data += page_length; 306 user_data += page_length;
@@ -315,6 +314,115 @@ fail:
315 return ret; 314 return ret;
316} 315}
317 316
317/**
318 * This is the fallback GTT pwrite path, which uses get_user_pages to pin
319 * the memory and maps it using kmap_atomic for copying.
320 *
321 * This code resulted in x11perf -rgb10text consuming about 10% more CPU
322 * than using i915_gem_gtt_pwrite_fast on a G45 (32-bit).
323 */
324static int
325i915_gem_gtt_pwrite_slow(struct drm_device *dev, struct drm_gem_object *obj,
326 struct drm_i915_gem_pwrite *args,
327 struct drm_file *file_priv)
328{
329 struct drm_i915_gem_object *obj_priv = obj->driver_private;
330 drm_i915_private_t *dev_priv = dev->dev_private;
331 ssize_t remain;
332 loff_t gtt_page_base, offset;
333 loff_t first_data_page, last_data_page, num_pages;
334 loff_t pinned_pages, i;
335 struct page **user_pages;
336 struct mm_struct *mm = current->mm;
337 int gtt_page_offset, data_page_offset, data_page_index, page_length;
338 int ret;
339 uint64_t data_ptr = args->data_ptr;
340
341 remain = args->size;
342
343 /* Pin the user pages containing the data. We can't fault while
344 * holding the struct mutex, and all of the pwrite implementations
345 * want to hold it while dereferencing the user data.
346 */
347 first_data_page = data_ptr / PAGE_SIZE;
348 last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE;
349 num_pages = last_data_page - first_data_page + 1;
350
351 user_pages = kcalloc(num_pages, sizeof(struct page *), GFP_KERNEL);
352 if (user_pages == NULL)
353 return -ENOMEM;
354
355 down_read(&mm->mmap_sem);
356 pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr,
357 num_pages, 0, 0, user_pages, NULL);
358 up_read(&mm->mmap_sem);
359 if (pinned_pages < num_pages) {
360 ret = -EFAULT;
361 goto out_unpin_pages;
362 }
363
364 mutex_lock(&dev->struct_mutex);
365 ret = i915_gem_object_pin(obj, 0);
366 if (ret)
367 goto out_unlock;
368
369 ret = i915_gem_object_set_to_gtt_domain(obj, 1);
370 if (ret)
371 goto out_unpin_object;
372
373 obj_priv = obj->driver_private;
374 offset = obj_priv->gtt_offset + args->offset;
375
376 while (remain > 0) {
377 /* Operation in this page
378 *
379 * gtt_page_base = page offset within aperture
380 * gtt_page_offset = offset within page in aperture
381 * data_page_index = page number in get_user_pages return
382 * data_page_offset = offset with data_page_index page.
383 * page_length = bytes to copy for this page
384 */
385 gtt_page_base = offset & PAGE_MASK;
386 gtt_page_offset = offset & ~PAGE_MASK;
387 data_page_index = data_ptr / PAGE_SIZE - first_data_page;
388 data_page_offset = data_ptr & ~PAGE_MASK;
389
390 page_length = remain;
391 if ((gtt_page_offset + page_length) > PAGE_SIZE)
392 page_length = PAGE_SIZE - gtt_page_offset;
393 if ((data_page_offset + page_length) > PAGE_SIZE)
394 page_length = PAGE_SIZE - data_page_offset;
395
396 ret = slow_kernel_write(dev_priv->mm.gtt_mapping,
397 gtt_page_base, gtt_page_offset,
398 user_pages[data_page_index],
399 data_page_offset,
400 page_length);
401
402 /* If we get a fault while copying data, then (presumably) our
403 * source page isn't available. Return the error and we'll
404 * retry in the slow path.
405 */
406 if (ret)
407 goto out_unpin_object;
408
409 remain -= page_length;
410 offset += page_length;
411 data_ptr += page_length;
412 }
413
414out_unpin_object:
415 i915_gem_object_unpin(obj);
416out_unlock:
417 mutex_unlock(&dev->struct_mutex);
418out_unpin_pages:
419 for (i = 0; i < pinned_pages; i++)
420 page_cache_release(user_pages[i]);
421 kfree(user_pages);
422
423 return ret;
424}
425
318static int 426static int
319i915_gem_shmem_pwrite(struct drm_device *dev, struct drm_gem_object *obj, 427i915_gem_shmem_pwrite(struct drm_device *dev, struct drm_gem_object *obj,
320 struct drm_i915_gem_pwrite *args, 428 struct drm_i915_gem_pwrite *args,
@@ -388,9 +496,13 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
388 if (obj_priv->phys_obj) 496 if (obj_priv->phys_obj)
389 ret = i915_gem_phys_pwrite(dev, obj, args, file_priv); 497 ret = i915_gem_phys_pwrite(dev, obj, args, file_priv);
390 else if (obj_priv->tiling_mode == I915_TILING_NONE && 498 else if (obj_priv->tiling_mode == I915_TILING_NONE &&
391 dev->gtt_total != 0) 499 dev->gtt_total != 0) {
392 ret = i915_gem_gtt_pwrite(dev, obj, args, file_priv); 500 ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file_priv);
393 else 501 if (ret == -EFAULT) {
502 ret = i915_gem_gtt_pwrite_slow(dev, obj, args,
503 file_priv);
504 }
505 } else
394 ret = i915_gem_shmem_pwrite(dev, obj, args, file_priv); 506 ret = i915_gem_shmem_pwrite(dev, obj, args, file_priv);
395 507
396#if WATCH_PWRITE 508#if WATCH_PWRITE