diff options
author | Keith Packard <keithp@keithp.com> | 2008-10-30 22:38:48 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2008-10-31 05:12:40 -0400 |
commit | 0839ccb8ac6a9e2d5e175a4ae9c82b5c574d510d (patch) | |
tree | c231d0492e4c03ae8d33f2101f21ff2928e56710 | |
parent | 9663f2e6a6cf3f82b06d8fb699b11b80f92553ba (diff) |
i915: use io-mapping interfaces instead of a variety of mapping kludges
Impact: optimize/clean-up the IO mapping implementation of the i915 DRM driver
Switch the i915 device aperture mapping to the io-mapping interface, taking
advantage of the cleaner API to extend it across all of the mapping uses,
including both pwrite and relocation updates.
This dramatically improves performance on 64-bit kernels which were using
the same slow path as 32-bit non-HIGHMEM kernels prior to this patch.
Signed-off-by: Keith Packard <keithp@keithp.com>
Signed-off-by: Eric Anholt <eric@anholt.net>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r-- | drivers/gpu/drm/i915/i915_drv.h | 3 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/i915_gem.c | 174 |
2 files changed, 83 insertions, 94 deletions
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index f20ffe17df71..126b2f13258c 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h | |||
@@ -31,6 +31,7 @@ | |||
31 | #define _I915_DRV_H_ | 31 | #define _I915_DRV_H_ |
32 | 32 | ||
33 | #include "i915_reg.h" | 33 | #include "i915_reg.h" |
34 | #include <linux/io-mapping.h> | ||
34 | 35 | ||
35 | /* General customization: | 36 | /* General customization: |
36 | */ | 37 | */ |
@@ -246,6 +247,8 @@ typedef struct drm_i915_private { | |||
246 | struct { | 247 | struct { |
247 | struct drm_mm gtt_space; | 248 | struct drm_mm gtt_space; |
248 | 249 | ||
250 | struct io_mapping *gtt_mapping; | ||
251 | |||
249 | /** | 252 | /** |
250 | * List of objects currently involved in rendering from the | 253 | * List of objects currently involved in rendering from the |
251 | * ringbuffer. | 254 | * ringbuffer. |
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 17ae330ff269..61183b95b108 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c | |||
@@ -171,35 +171,50 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data, | |||
171 | return 0; | 171 | return 0; |
172 | } | 172 | } |
173 | 173 | ||
174 | /* | 174 | /* This is the fast write path which cannot handle |
175 | * Try to write quickly with an atomic kmap. Return true on success. | 175 | * page faults in the source data |
176 | * | ||
177 | * If this fails (which includes a partial write), we'll redo the whole | ||
178 | * thing with the slow version. | ||
179 | * | ||
180 | * This is a workaround for the low performance of iounmap (approximate | ||
181 | * 10% cpu cost on normal 3D workloads). kmap_atomic on HIGHMEM kernels | ||
182 | * happens to let us map card memory without taking IPIs. When the vmap | ||
183 | * rework lands we should be able to dump this hack. | ||
184 | */ | 176 | */ |
185 | static inline int fast_user_write(unsigned long pfn, char __user *user_data, | 177 | |
186 | int l, int o) | 178 | static inline int |
179 | fast_user_write(struct io_mapping *mapping, | ||
180 | loff_t page_base, int page_offset, | ||
181 | char __user *user_data, | ||
182 | int length) | ||
187 | { | 183 | { |
188 | #ifdef CONFIG_HIGHMEM | ||
189 | unsigned long unwritten; | ||
190 | char *vaddr_atomic; | 184 | char *vaddr_atomic; |
185 | unsigned long unwritten; | ||
191 | 186 | ||
192 | vaddr_atomic = kmap_atomic_pfn(pfn, KM_USER0); | 187 | vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base); |
193 | #if WATCH_PWRITE | 188 | unwritten = __copy_from_user_inatomic_nocache(vaddr_atomic + page_offset, |
194 | DRM_INFO("pwrite i %d o %d l %d pfn %ld vaddr %p\n", | 189 | user_data, length); |
195 | i, o, l, pfn, vaddr_atomic); | 190 | io_mapping_unmap_atomic(vaddr_atomic); |
196 | #endif | 191 | if (unwritten) |
197 | unwritten = __copy_from_user_inatomic_nocache(vaddr_atomic + o, user_data, l); | 192 | return -EFAULT; |
198 | kunmap_atomic(vaddr_atomic, KM_USER0); | 193 | return 0; |
199 | return !unwritten; | 194 | } |
200 | #else | 195 | |
196 | /* Here's the write path which can sleep for | ||
197 | * page faults | ||
198 | */ | ||
199 | |||
200 | static inline int | ||
201 | slow_user_write(struct io_mapping *mapping, | ||
202 | loff_t page_base, int page_offset, | ||
203 | char __user *user_data, | ||
204 | int length) | ||
205 | { | ||
206 | char __iomem *vaddr; | ||
207 | unsigned long unwritten; | ||
208 | |||
209 | vaddr = io_mapping_map_wc(mapping, page_base); | ||
210 | if (vaddr == NULL) | ||
211 | return -EFAULT; | ||
212 | unwritten = __copy_from_user(vaddr + page_offset, | ||
213 | user_data, length); | ||
214 | io_mapping_unmap(vaddr); | ||
215 | if (unwritten) | ||
216 | return -EFAULT; | ||
201 | return 0; | 217 | return 0; |
202 | #endif | ||
203 | } | 218 | } |
204 | 219 | ||
205 | static int | 220 | static int |
@@ -208,10 +223,12 @@ i915_gem_gtt_pwrite(struct drm_device *dev, struct drm_gem_object *obj, | |||
208 | struct drm_file *file_priv) | 223 | struct drm_file *file_priv) |
209 | { | 224 | { |
210 | struct drm_i915_gem_object *obj_priv = obj->driver_private; | 225 | struct drm_i915_gem_object *obj_priv = obj->driver_private; |
226 | drm_i915_private_t *dev_priv = dev->dev_private; | ||
211 | ssize_t remain; | 227 | ssize_t remain; |
212 | loff_t offset; | 228 | loff_t offset, page_base; |
213 | char __user *user_data; | 229 | char __user *user_data; |
214 | int ret = 0; | 230 | int page_offset, page_length; |
231 | int ret; | ||
215 | 232 | ||
216 | user_data = (char __user *) (uintptr_t) args->data_ptr; | 233 | user_data = (char __user *) (uintptr_t) args->data_ptr; |
217 | remain = args->size; | 234 | remain = args->size; |
@@ -235,57 +252,37 @@ i915_gem_gtt_pwrite(struct drm_device *dev, struct drm_gem_object *obj, | |||
235 | obj_priv->dirty = 1; | 252 | obj_priv->dirty = 1; |
236 | 253 | ||
237 | while (remain > 0) { | 254 | while (remain > 0) { |
238 | unsigned long pfn; | ||
239 | int i, o, l; | ||
240 | |||
241 | /* Operation in this page | 255 | /* Operation in this page |
242 | * | 256 | * |
243 | * i = page number | 257 | * page_base = page offset within aperture |
244 | * o = offset within page | 258 | * page_offset = offset within page |
245 | * l = bytes to copy | 259 | * page_length = bytes to copy for this page |
246 | */ | 260 | */ |
247 | i = offset >> PAGE_SHIFT; | 261 | page_base = (offset & ~(PAGE_SIZE-1)); |
248 | o = offset & (PAGE_SIZE-1); | 262 | page_offset = offset & (PAGE_SIZE-1); |
249 | l = remain; | 263 | page_length = remain; |
250 | if ((o + l) > PAGE_SIZE) | 264 | if ((page_offset + remain) > PAGE_SIZE) |
251 | l = PAGE_SIZE - o; | 265 | page_length = PAGE_SIZE - page_offset; |
252 | 266 | ||
253 | pfn = (dev->agp->base >> PAGE_SHIFT) + i; | 267 | ret = fast_user_write (dev_priv->mm.gtt_mapping, page_base, |
254 | 268 | page_offset, user_data, page_length); | |
255 | if (!fast_user_write(pfn, user_data, l, o)) { | 269 | |
256 | unsigned long unwritten; | 270 | /* If we get a fault while copying data, then (presumably) our |
257 | char __iomem *vaddr; | 271 | * source page isn't available. In this case, use the |
258 | 272 | * non-atomic function | |
259 | vaddr = ioremap_wc(pfn << PAGE_SHIFT, PAGE_SIZE); | 273 | */ |
260 | #if WATCH_PWRITE | 274 | if (ret) { |
261 | DRM_INFO("pwrite slow i %d o %d l %d " | 275 | ret = slow_user_write (dev_priv->mm.gtt_mapping, |
262 | "pfn %ld vaddr %p\n", | 276 | page_base, page_offset, |
263 | i, o, l, pfn, vaddr); | 277 | user_data, page_length); |
264 | #endif | 278 | if (ret) |
265 | if (vaddr == NULL) { | ||
266 | ret = -EFAULT; | ||
267 | goto fail; | ||
268 | } | ||
269 | unwritten = __copy_from_user(vaddr + o, user_data, l); | ||
270 | #if WATCH_PWRITE | ||
271 | DRM_INFO("unwritten %ld\n", unwritten); | ||
272 | #endif | ||
273 | iounmap(vaddr); | ||
274 | if (unwritten) { | ||
275 | ret = -EFAULT; | ||
276 | goto fail; | 279 | goto fail; |
277 | } | ||
278 | } | 280 | } |
279 | 281 | ||
280 | remain -= l; | 282 | remain -= page_length; |
281 | user_data += l; | 283 | user_data += page_length; |
282 | offset += l; | 284 | offset += page_length; |
283 | } | 285 | } |
284 | #if WATCH_PWRITE && 1 | ||
285 | i915_gem_clflush_object(obj); | ||
286 | i915_gem_dump_object(obj, args->offset + args->size, __func__, ~0); | ||
287 | i915_gem_clflush_object(obj); | ||
288 | #endif | ||
289 | 286 | ||
290 | fail: | 287 | fail: |
291 | i915_gem_object_unpin(obj); | 288 | i915_gem_object_unpin(obj); |
@@ -1503,12 +1500,12 @@ i915_gem_object_pin_and_relocate(struct drm_gem_object *obj, | |||
1503 | struct drm_i915_gem_exec_object *entry) | 1500 | struct drm_i915_gem_exec_object *entry) |
1504 | { | 1501 | { |
1505 | struct drm_device *dev = obj->dev; | 1502 | struct drm_device *dev = obj->dev; |
1503 | drm_i915_private_t *dev_priv = dev->dev_private; | ||
1506 | struct drm_i915_gem_relocation_entry reloc; | 1504 | struct drm_i915_gem_relocation_entry reloc; |
1507 | struct drm_i915_gem_relocation_entry __user *relocs; | 1505 | struct drm_i915_gem_relocation_entry __user *relocs; |
1508 | struct drm_i915_gem_object *obj_priv = obj->driver_private; | 1506 | struct drm_i915_gem_object *obj_priv = obj->driver_private; |
1509 | int i, ret; | 1507 | int i, ret; |
1510 | uint32_t last_reloc_offset = -1; | 1508 | void __iomem *reloc_page; |
1511 | void __iomem *reloc_page = NULL; | ||
1512 | 1509 | ||
1513 | /* Choose the GTT offset for our buffer and put it there. */ | 1510 | /* Choose the GTT offset for our buffer and put it there. */ |
1514 | ret = i915_gem_object_pin(obj, (uint32_t) entry->alignment); | 1511 | ret = i915_gem_object_pin(obj, (uint32_t) entry->alignment); |
@@ -1631,26 +1628,11 @@ i915_gem_object_pin_and_relocate(struct drm_gem_object *obj, | |||
1631 | * perform. | 1628 | * perform. |
1632 | */ | 1629 | */ |
1633 | reloc_offset = obj_priv->gtt_offset + reloc.offset; | 1630 | reloc_offset = obj_priv->gtt_offset + reloc.offset; |
1634 | if (reloc_page == NULL || | 1631 | reloc_page = io_mapping_map_atomic_wc(dev_priv->mm.gtt_mapping, |
1635 | (last_reloc_offset & ~(PAGE_SIZE - 1)) != | 1632 | (reloc_offset & |
1636 | (reloc_offset & ~(PAGE_SIZE - 1))) { | 1633 | ~(PAGE_SIZE - 1))); |
1637 | if (reloc_page != NULL) | ||
1638 | iounmap(reloc_page); | ||
1639 | |||
1640 | reloc_page = ioremap_wc(dev->agp->base + | ||
1641 | (reloc_offset & | ||
1642 | ~(PAGE_SIZE - 1)), | ||
1643 | PAGE_SIZE); | ||
1644 | last_reloc_offset = reloc_offset; | ||
1645 | if (reloc_page == NULL) { | ||
1646 | drm_gem_object_unreference(target_obj); | ||
1647 | i915_gem_object_unpin(obj); | ||
1648 | return -ENOMEM; | ||
1649 | } | ||
1650 | } | ||
1651 | |||
1652 | reloc_entry = (uint32_t __iomem *)(reloc_page + | 1634 | reloc_entry = (uint32_t __iomem *)(reloc_page + |
1653 | (reloc_offset & (PAGE_SIZE - 1))); | 1635 | (reloc_offset & (PAGE_SIZE - 1))); |
1654 | reloc_val = target_obj_priv->gtt_offset + reloc.delta; | 1636 | reloc_val = target_obj_priv->gtt_offset + reloc.delta; |
1655 | 1637 | ||
1656 | #if WATCH_BUF | 1638 | #if WATCH_BUF |
@@ -1659,6 +1641,7 @@ i915_gem_object_pin_and_relocate(struct drm_gem_object *obj, | |||
1659 | readl(reloc_entry), reloc_val); | 1641 | readl(reloc_entry), reloc_val); |
1660 | #endif | 1642 | #endif |
1661 | writel(reloc_val, reloc_entry); | 1643 | writel(reloc_val, reloc_entry); |
1644 | io_mapping_unmap_atomic(reloc_page); | ||
1662 | 1645 | ||
1663 | /* Write the updated presumed offset for this entry back out | 1646 | /* Write the updated presumed offset for this entry back out |
1664 | * to the user. | 1647 | * to the user. |
@@ -1674,9 +1657,6 @@ i915_gem_object_pin_and_relocate(struct drm_gem_object *obj, | |||
1674 | drm_gem_object_unreference(target_obj); | 1657 | drm_gem_object_unreference(target_obj); |
1675 | } | 1658 | } |
1676 | 1659 | ||
1677 | if (reloc_page != NULL) | ||
1678 | iounmap(reloc_page); | ||
1679 | |||
1680 | #if WATCH_BUF | 1660 | #if WATCH_BUF |
1681 | if (0) | 1661 | if (0) |
1682 | i915_gem_dump_object(obj, 128, __func__, ~0); | 1662 | i915_gem_dump_object(obj, 128, __func__, ~0); |
@@ -2518,6 +2498,10 @@ i915_gem_entervt_ioctl(struct drm_device *dev, void *data, | |||
2518 | if (ret != 0) | 2498 | if (ret != 0) |
2519 | return ret; | 2499 | return ret; |
2520 | 2500 | ||
2501 | dev_priv->mm.gtt_mapping = io_mapping_create_wc(dev->agp->base, | ||
2502 | dev->agp->agp_info.aper_size | ||
2503 | * 1024 * 1024); | ||
2504 | |||
2521 | mutex_lock(&dev->struct_mutex); | 2505 | mutex_lock(&dev->struct_mutex); |
2522 | BUG_ON(!list_empty(&dev_priv->mm.active_list)); | 2506 | BUG_ON(!list_empty(&dev_priv->mm.active_list)); |
2523 | BUG_ON(!list_empty(&dev_priv->mm.flushing_list)); | 2507 | BUG_ON(!list_empty(&dev_priv->mm.flushing_list)); |
@@ -2535,11 +2519,13 @@ int | |||
2535 | i915_gem_leavevt_ioctl(struct drm_device *dev, void *data, | 2519 | i915_gem_leavevt_ioctl(struct drm_device *dev, void *data, |
2536 | struct drm_file *file_priv) | 2520 | struct drm_file *file_priv) |
2537 | { | 2521 | { |
2522 | drm_i915_private_t *dev_priv = dev->dev_private; | ||
2538 | int ret; | 2523 | int ret; |
2539 | 2524 | ||
2540 | ret = i915_gem_idle(dev); | 2525 | ret = i915_gem_idle(dev); |
2541 | drm_irq_uninstall(dev); | 2526 | drm_irq_uninstall(dev); |
2542 | 2527 | ||
2528 | io_mapping_free(dev_priv->mm.gtt_mapping); | ||
2543 | return ret; | 2529 | return ret; |
2544 | } | 2530 | } |
2545 | 2531 | ||