diff options
Diffstat (limited to 'drivers/gpu/drm/i915/i915_gem.c')
| -rw-r--r-- | drivers/gpu/drm/i915/i915_gem.c | 204 |
1 files changed, 105 insertions, 99 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 17ae330ff26..6b4a2bd2064 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c | |||
| @@ -79,6 +79,28 @@ i915_gem_init_ioctl(struct drm_device *dev, void *data, | |||
| 79 | return 0; | 79 | return 0; |
| 80 | } | 80 | } |
| 81 | 81 | ||
| 82 | int | ||
| 83 | i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, | ||
| 84 | struct drm_file *file_priv) | ||
| 85 | { | ||
| 86 | drm_i915_private_t *dev_priv = dev->dev_private; | ||
| 87 | struct drm_i915_gem_get_aperture *args = data; | ||
| 88 | struct drm_i915_gem_object *obj_priv; | ||
| 89 | |||
| 90 | if (!(dev->driver->driver_features & DRIVER_GEM)) | ||
| 91 | return -ENODEV; | ||
| 92 | |||
| 93 | args->aper_size = dev->gtt_total; | ||
| 94 | args->aper_available_size = args->aper_size; | ||
| 95 | |||
| 96 | list_for_each_entry(obj_priv, &dev_priv->mm.active_list, list) { | ||
| 97 | if (obj_priv->pin_count > 0) | ||
| 98 | args->aper_available_size -= obj_priv->obj->size; | ||
| 99 | } | ||
| 100 | |||
| 101 | return 0; | ||
| 102 | } | ||
| 103 | |||
| 82 | 104 | ||
| 83 | /** | 105 | /** |
| 84 | * Creates a new mm object and returns a handle to it. | 106 | * Creates a new mm object and returns a handle to it. |
| @@ -171,35 +193,50 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data, | |||
| 171 | return 0; | 193 | return 0; |
| 172 | } | 194 | } |
| 173 | 195 | ||
| 174 | /* | 196 | /* This is the fast write path which cannot handle |
| 175 | * Try to write quickly with an atomic kmap. Return true on success. | 197 | * page faults in the source data |
| 176 | * | ||
| 177 | * If this fails (which includes a partial write), we'll redo the whole | ||
| 178 | * thing with the slow version. | ||
| 179 | * | ||
| 180 | * This is a workaround for the low performance of iounmap (approximate | ||
| 181 | * 10% cpu cost on normal 3D workloads). kmap_atomic on HIGHMEM kernels | ||
| 182 | * happens to let us map card memory without taking IPIs. When the vmap | ||
| 183 | * rework lands we should be able to dump this hack. | ||
| 184 | */ | 198 | */ |
| 185 | static inline int fast_user_write(unsigned long pfn, char __user *user_data, | 199 | |
| 186 | int l, int o) | 200 | static inline int |
| 201 | fast_user_write(struct io_mapping *mapping, | ||
| 202 | loff_t page_base, int page_offset, | ||
| 203 | char __user *user_data, | ||
| 204 | int length) | ||
| 187 | { | 205 | { |
| 188 | #ifdef CONFIG_HIGHMEM | ||
| 189 | unsigned long unwritten; | ||
| 190 | char *vaddr_atomic; | 206 | char *vaddr_atomic; |
| 207 | unsigned long unwritten; | ||
| 191 | 208 | ||
| 192 | vaddr_atomic = kmap_atomic_pfn(pfn, KM_USER0); | 209 | vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base); |
| 193 | #if WATCH_PWRITE | 210 | unwritten = __copy_from_user_inatomic_nocache(vaddr_atomic + page_offset, |
| 194 | DRM_INFO("pwrite i %d o %d l %d pfn %ld vaddr %p\n", | 211 | user_data, length); |
| 195 | i, o, l, pfn, vaddr_atomic); | 212 | io_mapping_unmap_atomic(vaddr_atomic); |
| 196 | #endif | 213 | if (unwritten) |
| 197 | unwritten = __copy_from_user_inatomic_nocache(vaddr_atomic + o, user_data, l); | 214 | return -EFAULT; |
| 198 | kunmap_atomic(vaddr_atomic, KM_USER0); | 215 | return 0; |
| 199 | return !unwritten; | 216 | } |
| 200 | #else | 217 | |
| 218 | /* Here's the write path which can sleep for | ||
| 219 | * page faults | ||
| 220 | */ | ||
| 221 | |||
| 222 | static inline int | ||
| 223 | slow_user_write(struct io_mapping *mapping, | ||
| 224 | loff_t page_base, int page_offset, | ||
| 225 | char __user *user_data, | ||
| 226 | int length) | ||
| 227 | { | ||
| 228 | char __iomem *vaddr; | ||
| 229 | unsigned long unwritten; | ||
| 230 | |||
| 231 | vaddr = io_mapping_map_wc(mapping, page_base); | ||
| 232 | if (vaddr == NULL) | ||
| 233 | return -EFAULT; | ||
| 234 | unwritten = __copy_from_user(vaddr + page_offset, | ||
| 235 | user_data, length); | ||
| 236 | io_mapping_unmap(vaddr); | ||
| 237 | if (unwritten) | ||
| 238 | return -EFAULT; | ||
| 201 | return 0; | 239 | return 0; |
| 202 | #endif | ||
| 203 | } | 240 | } |
| 204 | 241 | ||
| 205 | static int | 242 | static int |
| @@ -208,10 +245,12 @@ i915_gem_gtt_pwrite(struct drm_device *dev, struct drm_gem_object *obj, | |||
| 208 | struct drm_file *file_priv) | 245 | struct drm_file *file_priv) |
| 209 | { | 246 | { |
| 210 | struct drm_i915_gem_object *obj_priv = obj->driver_private; | 247 | struct drm_i915_gem_object *obj_priv = obj->driver_private; |
| 248 | drm_i915_private_t *dev_priv = dev->dev_private; | ||
| 211 | ssize_t remain; | 249 | ssize_t remain; |
| 212 | loff_t offset; | 250 | loff_t offset, page_base; |
| 213 | char __user *user_data; | 251 | char __user *user_data; |
| 214 | int ret = 0; | 252 | int page_offset, page_length; |
| 253 | int ret; | ||
| 215 | 254 | ||
| 216 | user_data = (char __user *) (uintptr_t) args->data_ptr; | 255 | user_data = (char __user *) (uintptr_t) args->data_ptr; |
| 217 | remain = args->size; | 256 | remain = args->size; |
| @@ -235,57 +274,37 @@ i915_gem_gtt_pwrite(struct drm_device *dev, struct drm_gem_object *obj, | |||
| 235 | obj_priv->dirty = 1; | 274 | obj_priv->dirty = 1; |
| 236 | 275 | ||
| 237 | while (remain > 0) { | 276 | while (remain > 0) { |
| 238 | unsigned long pfn; | ||
| 239 | int i, o, l; | ||
| 240 | |||
| 241 | /* Operation in this page | 277 | /* Operation in this page |
| 242 | * | 278 | * |
| 243 | * i = page number | 279 | * page_base = page offset within aperture |
| 244 | * o = offset within page | 280 | * page_offset = offset within page |
| 245 | * l = bytes to copy | 281 | * page_length = bytes to copy for this page |
| 246 | */ | 282 | */ |
| 247 | i = offset >> PAGE_SHIFT; | 283 | page_base = (offset & ~(PAGE_SIZE-1)); |
| 248 | o = offset & (PAGE_SIZE-1); | 284 | page_offset = offset & (PAGE_SIZE-1); |
| 249 | l = remain; | 285 | page_length = remain; |
| 250 | if ((o + l) > PAGE_SIZE) | 286 | if ((page_offset + remain) > PAGE_SIZE) |
| 251 | l = PAGE_SIZE - o; | 287 | page_length = PAGE_SIZE - page_offset; |
| 252 | 288 | ||
| 253 | pfn = (dev->agp->base >> PAGE_SHIFT) + i; | 289 | ret = fast_user_write (dev_priv->mm.gtt_mapping, page_base, |
| 254 | 290 | page_offset, user_data, page_length); | |
| 255 | if (!fast_user_write(pfn, user_data, l, o)) { | 291 | |
| 256 | unsigned long unwritten; | 292 | /* If we get a fault while copying data, then (presumably) our |
| 257 | char __iomem *vaddr; | 293 | * source page isn't available. In this case, use the |
| 258 | 294 | * non-atomic function | |
| 259 | vaddr = ioremap_wc(pfn << PAGE_SHIFT, PAGE_SIZE); | 295 | */ |
| 260 | #if WATCH_PWRITE | 296 | if (ret) { |
| 261 | DRM_INFO("pwrite slow i %d o %d l %d " | 297 | ret = slow_user_write (dev_priv->mm.gtt_mapping, |
| 262 | "pfn %ld vaddr %p\n", | 298 | page_base, page_offset, |
| 263 | i, o, l, pfn, vaddr); | 299 | user_data, page_length); |
| 264 | #endif | 300 | if (ret) |
| 265 | if (vaddr == NULL) { | ||
| 266 | ret = -EFAULT; | ||
| 267 | goto fail; | ||
| 268 | } | ||
| 269 | unwritten = __copy_from_user(vaddr + o, user_data, l); | ||
| 270 | #if WATCH_PWRITE | ||
| 271 | DRM_INFO("unwritten %ld\n", unwritten); | ||
| 272 | #endif | ||
| 273 | iounmap(vaddr); | ||
| 274 | if (unwritten) { | ||
| 275 | ret = -EFAULT; | ||
| 276 | goto fail; | 301 | goto fail; |
| 277 | } | ||
| 278 | } | 302 | } |
| 279 | 303 | ||
| 280 | remain -= l; | 304 | remain -= page_length; |
| 281 | user_data += l; | 305 | user_data += page_length; |
| 282 | offset += l; | 306 | offset += page_length; |
| 283 | } | 307 | } |
| 284 | #if WATCH_PWRITE && 1 | ||
| 285 | i915_gem_clflush_object(obj); | ||
| 286 | i915_gem_dump_object(obj, args->offset + args->size, __func__, ~0); | ||
| 287 | i915_gem_clflush_object(obj); | ||
| 288 | #endif | ||
| 289 | 308 | ||
| 290 | fail: | 309 | fail: |
| 291 | i915_gem_object_unpin(obj); | 310 | i915_gem_object_unpin(obj); |
| @@ -1436,11 +1455,9 @@ i915_gem_object_set_domain_range(struct drm_gem_object *obj, | |||
| 1436 | read_domains, write_domain); | 1455 | read_domains, write_domain); |
| 1437 | 1456 | ||
| 1438 | /* Wait on any GPU rendering to the object to be flushed. */ | 1457 | /* Wait on any GPU rendering to the object to be flushed. */ |
| 1439 | if (obj->write_domain & ~(I915_GEM_DOMAIN_CPU | I915_GEM_DOMAIN_GTT)) { | 1458 | ret = i915_gem_object_wait_rendering(obj); |
| 1440 | ret = i915_gem_object_wait_rendering(obj); | 1459 | if (ret) |
| 1441 | if (ret) | 1460 | return ret; |
| 1442 | return ret; | ||
| 1443 | } | ||
| 1444 | 1461 | ||
| 1445 | if (obj_priv->page_cpu_valid == NULL) { | 1462 | if (obj_priv->page_cpu_valid == NULL) { |
| 1446 | obj_priv->page_cpu_valid = drm_calloc(1, obj->size / PAGE_SIZE, | 1463 | obj_priv->page_cpu_valid = drm_calloc(1, obj->size / PAGE_SIZE, |
| @@ -1503,12 +1520,12 @@ i915_gem_object_pin_and_relocate(struct drm_gem_object *obj, | |||
| 1503 | struct drm_i915_gem_exec_object *entry) | 1520 | struct drm_i915_gem_exec_object *entry) |
| 1504 | { | 1521 | { |
| 1505 | struct drm_device *dev = obj->dev; | 1522 | struct drm_device *dev = obj->dev; |
| 1523 | drm_i915_private_t *dev_priv = dev->dev_private; | ||
| 1506 | struct drm_i915_gem_relocation_entry reloc; | 1524 | struct drm_i915_gem_relocation_entry reloc; |
| 1507 | struct drm_i915_gem_relocation_entry __user *relocs; | 1525 | struct drm_i915_gem_relocation_entry __user *relocs; |
| 1508 | struct drm_i915_gem_object *obj_priv = obj->driver_private; | 1526 | struct drm_i915_gem_object *obj_priv = obj->driver_private; |
| 1509 | int i, ret; | 1527 | int i, ret; |
| 1510 | uint32_t last_reloc_offset = -1; | 1528 | void __iomem *reloc_page; |
| 1511 | void __iomem *reloc_page = NULL; | ||
| 1512 | 1529 | ||
| 1513 | /* Choose the GTT offset for our buffer and put it there. */ | 1530 | /* Choose the GTT offset for our buffer and put it there. */ |
| 1514 | ret = i915_gem_object_pin(obj, (uint32_t) entry->alignment); | 1531 | ret = i915_gem_object_pin(obj, (uint32_t) entry->alignment); |
| @@ -1631,26 +1648,11 @@ i915_gem_object_pin_and_relocate(struct drm_gem_object *obj, | |||
| 1631 | * perform. | 1648 | * perform. |
| 1632 | */ | 1649 | */ |
| 1633 | reloc_offset = obj_priv->gtt_offset + reloc.offset; | 1650 | reloc_offset = obj_priv->gtt_offset + reloc.offset; |
| 1634 | if (reloc_page == NULL || | 1651 | reloc_page = io_mapping_map_atomic_wc(dev_priv->mm.gtt_mapping, |
| 1635 | (last_reloc_offset & ~(PAGE_SIZE - 1)) != | 1652 | (reloc_offset & |
| 1636 | (reloc_offset & ~(PAGE_SIZE - 1))) { | 1653 | ~(PAGE_SIZE - 1))); |
| 1637 | if (reloc_page != NULL) | ||
| 1638 | iounmap(reloc_page); | ||
| 1639 | |||
| 1640 | reloc_page = ioremap_wc(dev->agp->base + | ||
| 1641 | (reloc_offset & | ||
| 1642 | ~(PAGE_SIZE - 1)), | ||
| 1643 | PAGE_SIZE); | ||
| 1644 | last_reloc_offset = reloc_offset; | ||
| 1645 | if (reloc_page == NULL) { | ||
| 1646 | drm_gem_object_unreference(target_obj); | ||
| 1647 | i915_gem_object_unpin(obj); | ||
| 1648 | return -ENOMEM; | ||
| 1649 | } | ||
| 1650 | } | ||
| 1651 | |||
| 1652 | reloc_entry = (uint32_t __iomem *)(reloc_page + | 1654 | reloc_entry = (uint32_t __iomem *)(reloc_page + |
| 1653 | (reloc_offset & (PAGE_SIZE - 1))); | 1655 | (reloc_offset & (PAGE_SIZE - 1))); |
| 1654 | reloc_val = target_obj_priv->gtt_offset + reloc.delta; | 1656 | reloc_val = target_obj_priv->gtt_offset + reloc.delta; |
| 1655 | 1657 | ||
| 1656 | #if WATCH_BUF | 1658 | #if WATCH_BUF |
| @@ -1659,6 +1661,7 @@ i915_gem_object_pin_and_relocate(struct drm_gem_object *obj, | |||
| 1659 | readl(reloc_entry), reloc_val); | 1661 | readl(reloc_entry), reloc_val); |
| 1660 | #endif | 1662 | #endif |
| 1661 | writel(reloc_val, reloc_entry); | 1663 | writel(reloc_val, reloc_entry); |
| 1664 | io_mapping_unmap_atomic(reloc_page); | ||
| 1662 | 1665 | ||
| 1663 | /* Write the updated presumed offset for this entry back out | 1666 | /* Write the updated presumed offset for this entry back out |
| 1664 | * to the user. | 1667 | * to the user. |
| @@ -1674,9 +1677,6 @@ i915_gem_object_pin_and_relocate(struct drm_gem_object *obj, | |||
| 1674 | drm_gem_object_unreference(target_obj); | 1677 | drm_gem_object_unreference(target_obj); |
| 1675 | } | 1678 | } |
| 1676 | 1679 | ||
| 1677 | if (reloc_page != NULL) | ||
| 1678 | iounmap(reloc_page); | ||
| 1679 | |||
| 1680 | #if WATCH_BUF | 1680 | #if WATCH_BUF |
| 1681 | if (0) | 1681 | if (0) |
| 1682 | i915_gem_dump_object(obj, 128, __func__, ~0); | 1682 | i915_gem_dump_object(obj, 128, __func__, ~0); |
| @@ -2518,6 +2518,10 @@ i915_gem_entervt_ioctl(struct drm_device *dev, void *data, | |||
| 2518 | if (ret != 0) | 2518 | if (ret != 0) |
| 2519 | return ret; | 2519 | return ret; |
| 2520 | 2520 | ||
| 2521 | dev_priv->mm.gtt_mapping = io_mapping_create_wc(dev->agp->base, | ||
| 2522 | dev->agp->agp_info.aper_size | ||
| 2523 | * 1024 * 1024); | ||
| 2524 | |||
| 2521 | mutex_lock(&dev->struct_mutex); | 2525 | mutex_lock(&dev->struct_mutex); |
| 2522 | BUG_ON(!list_empty(&dev_priv->mm.active_list)); | 2526 | BUG_ON(!list_empty(&dev_priv->mm.active_list)); |
| 2523 | BUG_ON(!list_empty(&dev_priv->mm.flushing_list)); | 2527 | BUG_ON(!list_empty(&dev_priv->mm.flushing_list)); |
| @@ -2535,11 +2539,13 @@ int | |||
| 2535 | i915_gem_leavevt_ioctl(struct drm_device *dev, void *data, | 2539 | i915_gem_leavevt_ioctl(struct drm_device *dev, void *data, |
| 2536 | struct drm_file *file_priv) | 2540 | struct drm_file *file_priv) |
| 2537 | { | 2541 | { |
| 2542 | drm_i915_private_t *dev_priv = dev->dev_private; | ||
| 2538 | int ret; | 2543 | int ret; |
| 2539 | 2544 | ||
| 2540 | ret = i915_gem_idle(dev); | 2545 | ret = i915_gem_idle(dev); |
| 2541 | drm_irq_uninstall(dev); | 2546 | drm_irq_uninstall(dev); |
| 2542 | 2547 | ||
| 2548 | io_mapping_free(dev_priv->mm.gtt_mapping); | ||
| 2543 | return ret; | 2549 | return ret; |
| 2544 | } | 2550 | } |
| 2545 | 2551 | ||
