aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKeith Packard <keithp@keithp.com>2008-10-30 22:38:48 -0400
committerIngo Molnar <mingo@elte.hu>2008-10-31 05:12:40 -0400
commit0839ccb8ac6a9e2d5e175a4ae9c82b5c574d510d (patch)
treec231d0492e4c03ae8d33f2101f21ff2928e56710
parent9663f2e6a6cf3f82b06d8fb699b11b80f92553ba (diff)
i915: use io-mapping interfaces instead of a variety of mapping kludges
Impact: optimize/clean-up the IO mapping implementation of the i915 DRM driver Switch the i915 device aperture mapping to the io-mapping interface, taking advantage of the cleaner API to extend it across all of the mapping uses, including both pwrite and relocation updates. This dramatically improves performance on 64-bit kernels which were using the same slow path as 32-bit non-HIGHMEM kernels prior to this patch. Signed-off-by: Keith Packard <keithp@keithp.com> Signed-off-by: Eric Anholt <eric@anholt.net> Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r--drivers/gpu/drm/i915/i915_drv.h3
-rw-r--r--drivers/gpu/drm/i915/i915_gem.c174
2 files changed, 83 insertions, 94 deletions
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index f20ffe17df71..126b2f13258c 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -31,6 +31,7 @@
31#define _I915_DRV_H_ 31#define _I915_DRV_H_
32 32
33#include "i915_reg.h" 33#include "i915_reg.h"
34#include <linux/io-mapping.h>
34 35
35/* General customization: 36/* General customization:
36 */ 37 */
@@ -246,6 +247,8 @@ typedef struct drm_i915_private {
246 struct { 247 struct {
247 struct drm_mm gtt_space; 248 struct drm_mm gtt_space;
248 249
250 struct io_mapping *gtt_mapping;
251
249 /** 252 /**
250 * List of objects currently involved in rendering from the 253 * List of objects currently involved in rendering from the
251 * ringbuffer. 254 * ringbuffer.
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 17ae330ff269..61183b95b108 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -171,35 +171,50 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data,
171 return 0; 171 return 0;
172} 172}
173 173
174/* 174/* This is the fast write path which cannot handle
175 * Try to write quickly with an atomic kmap. Return true on success. 175 * page faults in the source data
176 *
177 * If this fails (which includes a partial write), we'll redo the whole
178 * thing with the slow version.
179 *
180 * This is a workaround for the low performance of iounmap (approximate
181 * 10% cpu cost on normal 3D workloads). kmap_atomic on HIGHMEM kernels
182 * happens to let us map card memory without taking IPIs. When the vmap
183 * rework lands we should be able to dump this hack.
184 */ 176 */
185static inline int fast_user_write(unsigned long pfn, char __user *user_data, 177
186 int l, int o) 178static inline int
179fast_user_write(struct io_mapping *mapping,
180 loff_t page_base, int page_offset,
181 char __user *user_data,
182 int length)
187{ 183{
188#ifdef CONFIG_HIGHMEM
189 unsigned long unwritten;
190 char *vaddr_atomic; 184 char *vaddr_atomic;
185 unsigned long unwritten;
191 186
192 vaddr_atomic = kmap_atomic_pfn(pfn, KM_USER0); 187 vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base);
193#if WATCH_PWRITE 188 unwritten = __copy_from_user_inatomic_nocache(vaddr_atomic + page_offset,
194 DRM_INFO("pwrite i %d o %d l %d pfn %ld vaddr %p\n", 189 user_data, length);
195 i, o, l, pfn, vaddr_atomic); 190 io_mapping_unmap_atomic(vaddr_atomic);
196#endif 191 if (unwritten)
197 unwritten = __copy_from_user_inatomic_nocache(vaddr_atomic + o, user_data, l); 192 return -EFAULT;
198 kunmap_atomic(vaddr_atomic, KM_USER0); 193 return 0;
199 return !unwritten; 194}
200#else 195
196/* Here's the write path which can sleep for
197 * page faults
198 */
199
200static inline int
201slow_user_write(struct io_mapping *mapping,
202 loff_t page_base, int page_offset,
203 char __user *user_data,
204 int length)
205{
206 char __iomem *vaddr;
207 unsigned long unwritten;
208
209 vaddr = io_mapping_map_wc(mapping, page_base);
210 if (vaddr == NULL)
211 return -EFAULT;
212 unwritten = __copy_from_user(vaddr + page_offset,
213 user_data, length);
214 io_mapping_unmap(vaddr);
215 if (unwritten)
216 return -EFAULT;
201 return 0; 217 return 0;
202#endif
203} 218}
204 219
205static int 220static int
@@ -208,10 +223,12 @@ i915_gem_gtt_pwrite(struct drm_device *dev, struct drm_gem_object *obj,
208 struct drm_file *file_priv) 223 struct drm_file *file_priv)
209{ 224{
210 struct drm_i915_gem_object *obj_priv = obj->driver_private; 225 struct drm_i915_gem_object *obj_priv = obj->driver_private;
226 drm_i915_private_t *dev_priv = dev->dev_private;
211 ssize_t remain; 227 ssize_t remain;
212 loff_t offset; 228 loff_t offset, page_base;
213 char __user *user_data; 229 char __user *user_data;
214 int ret = 0; 230 int page_offset, page_length;
231 int ret;
215 232
216 user_data = (char __user *) (uintptr_t) args->data_ptr; 233 user_data = (char __user *) (uintptr_t) args->data_ptr;
217 remain = args->size; 234 remain = args->size;
@@ -235,57 +252,37 @@ i915_gem_gtt_pwrite(struct drm_device *dev, struct drm_gem_object *obj,
235 obj_priv->dirty = 1; 252 obj_priv->dirty = 1;
236 253
237 while (remain > 0) { 254 while (remain > 0) {
238 unsigned long pfn;
239 int i, o, l;
240
241 /* Operation in this page 255 /* Operation in this page
242 * 256 *
243 * i = page number 257 * page_base = page offset within aperture
244 * o = offset within page 258 * page_offset = offset within page
245 * l = bytes to copy 259 * page_length = bytes to copy for this page
246 */ 260 */
247 i = offset >> PAGE_SHIFT; 261 page_base = (offset & ~(PAGE_SIZE-1));
248 o = offset & (PAGE_SIZE-1); 262 page_offset = offset & (PAGE_SIZE-1);
249 l = remain; 263 page_length = remain;
250 if ((o + l) > PAGE_SIZE) 264 if ((page_offset + remain) > PAGE_SIZE)
251 l = PAGE_SIZE - o; 265 page_length = PAGE_SIZE - page_offset;
252 266
253 pfn = (dev->agp->base >> PAGE_SHIFT) + i; 267 ret = fast_user_write (dev_priv->mm.gtt_mapping, page_base,
254 268 page_offset, user_data, page_length);
255 if (!fast_user_write(pfn, user_data, l, o)) { 269
256 unsigned long unwritten; 270 /* If we get a fault while copying data, then (presumably) our
257 char __iomem *vaddr; 271 * source page isn't available. In this case, use the
258 272 * non-atomic function
259 vaddr = ioremap_wc(pfn << PAGE_SHIFT, PAGE_SIZE); 273 */
260#if WATCH_PWRITE 274 if (ret) {
261 DRM_INFO("pwrite slow i %d o %d l %d " 275 ret = slow_user_write (dev_priv->mm.gtt_mapping,
262 "pfn %ld vaddr %p\n", 276 page_base, page_offset,
263 i, o, l, pfn, vaddr); 277 user_data, page_length);
264#endif 278 if (ret)
265 if (vaddr == NULL) {
266 ret = -EFAULT;
267 goto fail;
268 }
269 unwritten = __copy_from_user(vaddr + o, user_data, l);
270#if WATCH_PWRITE
271 DRM_INFO("unwritten %ld\n", unwritten);
272#endif
273 iounmap(vaddr);
274 if (unwritten) {
275 ret = -EFAULT;
276 goto fail; 279 goto fail;
277 }
278 } 280 }
279 281
280 remain -= l; 282 remain -= page_length;
281 user_data += l; 283 user_data += page_length;
282 offset += l; 284 offset += page_length;
283 } 285 }
284#if WATCH_PWRITE && 1
285 i915_gem_clflush_object(obj);
286 i915_gem_dump_object(obj, args->offset + args->size, __func__, ~0);
287 i915_gem_clflush_object(obj);
288#endif
289 286
290fail: 287fail:
291 i915_gem_object_unpin(obj); 288 i915_gem_object_unpin(obj);
@@ -1503,12 +1500,12 @@ i915_gem_object_pin_and_relocate(struct drm_gem_object *obj,
1503 struct drm_i915_gem_exec_object *entry) 1500 struct drm_i915_gem_exec_object *entry)
1504{ 1501{
1505 struct drm_device *dev = obj->dev; 1502 struct drm_device *dev = obj->dev;
1503 drm_i915_private_t *dev_priv = dev->dev_private;
1506 struct drm_i915_gem_relocation_entry reloc; 1504 struct drm_i915_gem_relocation_entry reloc;
1507 struct drm_i915_gem_relocation_entry __user *relocs; 1505 struct drm_i915_gem_relocation_entry __user *relocs;
1508 struct drm_i915_gem_object *obj_priv = obj->driver_private; 1506 struct drm_i915_gem_object *obj_priv = obj->driver_private;
1509 int i, ret; 1507 int i, ret;
1510 uint32_t last_reloc_offset = -1; 1508 void __iomem *reloc_page;
1511 void __iomem *reloc_page = NULL;
1512 1509
1513 /* Choose the GTT offset for our buffer and put it there. */ 1510 /* Choose the GTT offset for our buffer and put it there. */
1514 ret = i915_gem_object_pin(obj, (uint32_t) entry->alignment); 1511 ret = i915_gem_object_pin(obj, (uint32_t) entry->alignment);
@@ -1631,26 +1628,11 @@ i915_gem_object_pin_and_relocate(struct drm_gem_object *obj,
1631 * perform. 1628 * perform.
1632 */ 1629 */
1633 reloc_offset = obj_priv->gtt_offset + reloc.offset; 1630 reloc_offset = obj_priv->gtt_offset + reloc.offset;
1634 if (reloc_page == NULL || 1631 reloc_page = io_mapping_map_atomic_wc(dev_priv->mm.gtt_mapping,
1635 (last_reloc_offset & ~(PAGE_SIZE - 1)) != 1632 (reloc_offset &
1636 (reloc_offset & ~(PAGE_SIZE - 1))) { 1633 ~(PAGE_SIZE - 1)));
1637 if (reloc_page != NULL)
1638 iounmap(reloc_page);
1639
1640 reloc_page = ioremap_wc(dev->agp->base +
1641 (reloc_offset &
1642 ~(PAGE_SIZE - 1)),
1643 PAGE_SIZE);
1644 last_reloc_offset = reloc_offset;
1645 if (reloc_page == NULL) {
1646 drm_gem_object_unreference(target_obj);
1647 i915_gem_object_unpin(obj);
1648 return -ENOMEM;
1649 }
1650 }
1651
1652 reloc_entry = (uint32_t __iomem *)(reloc_page + 1634 reloc_entry = (uint32_t __iomem *)(reloc_page +
1653 (reloc_offset & (PAGE_SIZE - 1))); 1635 (reloc_offset & (PAGE_SIZE - 1)));
1654 reloc_val = target_obj_priv->gtt_offset + reloc.delta; 1636 reloc_val = target_obj_priv->gtt_offset + reloc.delta;
1655 1637
1656#if WATCH_BUF 1638#if WATCH_BUF
@@ -1659,6 +1641,7 @@ i915_gem_object_pin_and_relocate(struct drm_gem_object *obj,
1659 readl(reloc_entry), reloc_val); 1641 readl(reloc_entry), reloc_val);
1660#endif 1642#endif
1661 writel(reloc_val, reloc_entry); 1643 writel(reloc_val, reloc_entry);
1644 io_mapping_unmap_atomic(reloc_page);
1662 1645
1663 /* Write the updated presumed offset for this entry back out 1646 /* Write the updated presumed offset for this entry back out
1664 * to the user. 1647 * to the user.
@@ -1674,9 +1657,6 @@ i915_gem_object_pin_and_relocate(struct drm_gem_object *obj,
1674 drm_gem_object_unreference(target_obj); 1657 drm_gem_object_unreference(target_obj);
1675 } 1658 }
1676 1659
1677 if (reloc_page != NULL)
1678 iounmap(reloc_page);
1679
1680#if WATCH_BUF 1660#if WATCH_BUF
1681 if (0) 1661 if (0)
1682 i915_gem_dump_object(obj, 128, __func__, ~0); 1662 i915_gem_dump_object(obj, 128, __func__, ~0);
@@ -2518,6 +2498,10 @@ i915_gem_entervt_ioctl(struct drm_device *dev, void *data,
2518 if (ret != 0) 2498 if (ret != 0)
2519 return ret; 2499 return ret;
2520 2500
2501 dev_priv->mm.gtt_mapping = io_mapping_create_wc(dev->agp->base,
2502 dev->agp->agp_info.aper_size
2503 * 1024 * 1024);
2504
2521 mutex_lock(&dev->struct_mutex); 2505 mutex_lock(&dev->struct_mutex);
2522 BUG_ON(!list_empty(&dev_priv->mm.active_list)); 2506 BUG_ON(!list_empty(&dev_priv->mm.active_list));
2523 BUG_ON(!list_empty(&dev_priv->mm.flushing_list)); 2507 BUG_ON(!list_empty(&dev_priv->mm.flushing_list));
@@ -2535,11 +2519,13 @@ int
2535i915_gem_leavevt_ioctl(struct drm_device *dev, void *data, 2519i915_gem_leavevt_ioctl(struct drm_device *dev, void *data,
2536 struct drm_file *file_priv) 2520 struct drm_file *file_priv)
2537{ 2521{
2522 drm_i915_private_t *dev_priv = dev->dev_private;
2538 int ret; 2523 int ret;
2539 2524
2540 ret = i915_gem_idle(dev); 2525 ret = i915_gem_idle(dev);
2541 drm_irq_uninstall(dev); 2526 drm_irq_uninstall(dev);
2542 2527
2528 io_mapping_free(dev_priv->mm.gtt_mapping);
2543 return ret; 2529 return ret;
2544} 2530}
2545 2531