aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorThomas Schlichter <thomas.schlichter@web.de>2010-11-27 08:17:55 -0500
committerPaul Mundt <lethal@linux-sh.org>2011-03-22 03:20:44 -0400
commit803a4e14a7dceaf01dbc0e02c0fdea2eba4c81b3 (patch)
treeb48aedf2155d355f13cf1607e2ff794244c9050d
parent7b0e278519eba8050f2b37d7c408958745f30ecd (diff)
uvesafb,vesafb: create WC or WB PAT-entries
with an PAT-enabled kernel, when using uvesafb or vesafb, these drivers will create uncached-minus PAT entries for the framebuffer memory because they use ioremap() (not the *_cache or *_wc variants). When the framebuffer memory intersects with the video RAM used by Xorg, the complete video RAM will be mapped uncached-minus what results in a serve performance penalty. Here are the correct MTRR entries created by uvesafb: schlicht@netbook:~$ cat /proc/mtrr reg00: base=0x000000000 ( 0MB), size= 2048MB, count=1: write-back reg01: base=0x06ff00000 ( 1791MB), size= 1MB, count=1: uncachable reg02: base=0x070000000 ( 1792MB), size= 256MB, count=1: uncachable reg03: base=0x0d0000000 ( 3328MB), size= 16MB, count=1: write-combining And here are the problematic PAT entries: schlicht@netbook:~$ sudo cat /sys/kernel/debug/x86/pat_memtype_list PAT memtype list: write-back @ 0x0-0x1000 uncached-minus @ 0x6fedd000-0x6fee3000 uncached-minus @ 0x6fee2000-0x6fee3000 uncached-minus @ 0x6fee2000-0x6fee3000 uncached-minus @ 0x6fee2000-0x6fee3000 uncached-minus @ 0x6fee2000-0x6fee3000 uncached-minus @ 0x6fee2000-0x6fee3000 uncached-minus @ 0x6fee2000-0x6fee3000 uncached-minus @ 0x6fee2000-0x6fee3000 uncached-minus @ 0x6fee3000-0x6fee4000 uncached-minus @ 0x6fee3000-0x6fee4000 uncached-minus @ 0x6fee3000-0x6fee4000 uncached-minus @ 0xd0000000-0xe0000000 <-- created by xserver-xorg uncached-minus @ 0xd0000000-0xd1194000 <-- created by uvesafb uncached-minus @ 0xf4000000-0xf4009000 uncached-minus @ 0xf4200000-0xf4400000 uncached-minus @ 0xf5000000-0xf5010000 uncached-minus @ 0xf5100000-0xf5104000 uncached-minus @ 0xf5400000-0xf5404000 uncached-minus @ 0xf5404000-0xf5405000 uncached-minus @ 0xf5404000-0xf5405000 uncached-minus @ 0xfed00000-0xfed01000 Therefore I created the attached patch for uvesafb which uses ioremap_wc() to create the correct PAT entries, as shown below: schlicht@netbook:~$ sudo cat /sys/kernel/debug/x86/pat_memtype_list PAT memtype list: write-back @ 0x0-0x1000 uncached-minus @ 0x6fedd000-0x6fee3000 uncached-minus @ 0x6fee2000-0x6fee3000 uncached-minus @ 0x6fee2000-0x6fee3000 uncached-minus @ 0x6fee2000-0x6fee3000 uncached-minus @ 0x6fee2000-0x6fee3000 uncached-minus @ 0x6fee2000-0x6fee3000 uncached-minus @ 0x6fee2000-0x6fee3000 uncached-minus @ 0x6fee2000-0x6fee3000 uncached-minus @ 0x6fee3000-0x6fee4000 uncached-minus @ 0x6fee3000-0x6fee4000 uncached-minus @ 0x6fee3000-0x6fee4000 write-combining @ 0xd0000000-0xe0000000 write-combining @ 0xd0000000-0xd1194000 uncached-minus @ 0xf4000000-0xf4009000 uncached-minus @ 0xf4200000-0xf4400000 uncached-minus @ 0xf5000000-0xf5010000 uncached-minus @ 0xf5100000-0xf5104000 uncached-minus @ 0xf5400000-0xf5404000 uncached-minus @ 0xf5404000-0xf5405000 uncached-minus @ 0xf5404000-0xf5405000 uncached-minus @ 0xfed00000-0xfed01000 This results in a performance gain, objectively measurable with e.g. x11perf -comppixwin10 -comppixwin100 -comppixwin500: 1: x11perf_xaa.log 2: x11perf_xaa_patched.log 1 2 Operation -------- ---------------- ----------------- 124000.0 202000.0 ( 1.63) Composite 10x10 from pixmap to window 3340.0 24400.0 ( 7.31) Composite 100x100 from pixmap to window 131.0 1150.0 ( 8.78) Composite 500x500 from pixmap to window You can see the serve performance gain when composing larger pixmaps to window. The patches replace the ioremap() function with the variant matching the mtrr- parameter. To create "write-back" PAT entries, the ioremap_cache() function must be called after creating the MTRR entries, and the ioremap_cache() region must completely fit into the MTRR region, this is why the MTRR region size is now rounded up to the next power-of-two. Signed-off-by: Thomas Schlichter <thomas.schlichter@web.de> Signed-off-by: Paul Mundt <lethal@linux-sh.org>
-rw-r--r--drivers/video/uvesafb.c49
-rw-r--r--drivers/video/vesafb.c44
2 files changed, 64 insertions, 29 deletions
diff --git a/drivers/video/uvesafb.c b/drivers/video/uvesafb.c
index 5180a215d781..7f8472cc993b 100644
--- a/drivers/video/uvesafb.c
+++ b/drivers/video/uvesafb.c
@@ -1552,8 +1552,7 @@ static void __devinit uvesafb_init_mtrr(struct fb_info *info)
1552 int rc; 1552 int rc;
1553 1553
1554 /* Find the largest power-of-two */ 1554 /* Find the largest power-of-two */
1555 while (temp_size & (temp_size - 1)) 1555 temp_size = roundup_pow_of_two(temp_size);
1556 temp_size &= (temp_size - 1);
1557 1556
1558 /* Try and find a power of two to add */ 1557 /* Try and find a power of two to add */
1559 do { 1558 do {
@@ -1566,6 +1565,28 @@ static void __devinit uvesafb_init_mtrr(struct fb_info *info)
1566#endif /* CONFIG_MTRR */ 1565#endif /* CONFIG_MTRR */
1567} 1566}
1568 1567
1568static void __devinit uvesafb_ioremap(struct fb_info *info)
1569{
1570#ifdef CONFIG_X86
1571 switch (mtrr) {
1572 case 1: /* uncachable */
1573 info->screen_base = ioremap_nocache(info->fix.smem_start, info->fix.smem_len);
1574 break;
1575 case 2: /* write-back */
1576 info->screen_base = ioremap_cache(info->fix.smem_start, info->fix.smem_len);
1577 break;
1578 case 3: /* write-combining */
1579 info->screen_base = ioremap_wc(info->fix.smem_start, info->fix.smem_len);
1580 break;
1581 case 4: /* write-through */
1582 default:
1583 info->screen_base = ioremap(info->fix.smem_start, info->fix.smem_len);
1584 break;
1585 }
1586#else
1587 info->screen_base = ioremap(info->fix.smem_start, info->fix.smem_len);
1588#endif /* CONFIG_X86 */
1589}
1569 1590
1570static ssize_t uvesafb_show_vbe_ver(struct device *dev, 1591static ssize_t uvesafb_show_vbe_ver(struct device *dev,
1571 struct device_attribute *attr, char *buf) 1592 struct device_attribute *attr, char *buf)
@@ -1736,15 +1757,22 @@ static int __devinit uvesafb_probe(struct platform_device *dev)
1736 1757
1737 uvesafb_init_info(info, mode); 1758 uvesafb_init_info(info, mode);
1738 1759
1760 if (!request_region(0x3c0, 32, "uvesafb")) {
1761 printk(KERN_ERR "uvesafb: request region 0x3c0-0x3e0 failed\n");
1762 err = -EIO;
1763 goto out_mode;
1764 }
1765
1739 if (!request_mem_region(info->fix.smem_start, info->fix.smem_len, 1766 if (!request_mem_region(info->fix.smem_start, info->fix.smem_len,
1740 "uvesafb")) { 1767 "uvesafb")) {
1741 printk(KERN_ERR "uvesafb: cannot reserve video memory at " 1768 printk(KERN_ERR "uvesafb: cannot reserve video memory at "
1742 "0x%lx\n", info->fix.smem_start); 1769 "0x%lx\n", info->fix.smem_start);
1743 err = -EIO; 1770 err = -EIO;
1744 goto out_mode; 1771 goto out_reg;
1745 } 1772 }
1746 1773
1747 info->screen_base = ioremap(info->fix.smem_start, info->fix.smem_len); 1774 uvesafb_init_mtrr(info);
1775 uvesafb_ioremap(info);
1748 1776
1749 if (!info->screen_base) { 1777 if (!info->screen_base) {
1750 printk(KERN_ERR 1778 printk(KERN_ERR
@@ -1755,20 +1783,13 @@ static int __devinit uvesafb_probe(struct platform_device *dev)
1755 goto out_mem; 1783 goto out_mem;
1756 } 1784 }
1757 1785
1758 if (!request_region(0x3c0, 32, "uvesafb")) {
1759 printk(KERN_ERR "uvesafb: request region 0x3c0-0x3e0 failed\n");
1760 err = -EIO;
1761 goto out_unmap;
1762 }
1763
1764 uvesafb_init_mtrr(info);
1765 platform_set_drvdata(dev, info); 1786 platform_set_drvdata(dev, info);
1766 1787
1767 if (register_framebuffer(info) < 0) { 1788 if (register_framebuffer(info) < 0) {
1768 printk(KERN_ERR 1789 printk(KERN_ERR
1769 "uvesafb: failed to register framebuffer device\n"); 1790 "uvesafb: failed to register framebuffer device\n");
1770 err = -EINVAL; 1791 err = -EINVAL;
1771 goto out_reg; 1792 goto out_unmap;
1772 } 1793 }
1773 1794
1774 printk(KERN_INFO "uvesafb: framebuffer at 0x%lx, mapped to 0x%p, " 1795 printk(KERN_INFO "uvesafb: framebuffer at 0x%lx, mapped to 0x%p, "
@@ -1785,12 +1806,12 @@ static int __devinit uvesafb_probe(struct platform_device *dev)
1785 1806
1786 return 0; 1807 return 0;
1787 1808
1788out_reg:
1789 release_region(0x3c0, 32);
1790out_unmap: 1809out_unmap:
1791 iounmap(info->screen_base); 1810 iounmap(info->screen_base);
1792out_mem: 1811out_mem:
1793 release_mem_region(info->fix.smem_start, info->fix.smem_len); 1812 release_mem_region(info->fix.smem_start, info->fix.smem_len);
1813out_reg:
1814 release_region(0x3c0, 32);
1794out_mode: 1815out_mode:
1795 if (!list_empty(&info->modelist)) 1816 if (!list_empty(&info->modelist))
1796 fb_destroy_modelist(&info->modelist); 1817 fb_destroy_modelist(&info->modelist);
diff --git a/drivers/video/vesafb.c b/drivers/video/vesafb.c
index 6a069d047914..a99bbe86db13 100644
--- a/drivers/video/vesafb.c
+++ b/drivers/video/vesafb.c
@@ -303,19 +303,6 @@ static int __init vesafb_probe(struct platform_device *dev)
303 info->apertures->ranges[0].base = screen_info.lfb_base; 303 info->apertures->ranges[0].base = screen_info.lfb_base;
304 info->apertures->ranges[0].size = size_total; 304 info->apertures->ranges[0].size = size_total;
305 305
306 info->screen_base = ioremap(vesafb_fix.smem_start, vesafb_fix.smem_len);
307 if (!info->screen_base) {
308 printk(KERN_ERR
309 "vesafb: abort, cannot ioremap video memory 0x%x @ 0x%lx\n",
310 vesafb_fix.smem_len, vesafb_fix.smem_start);
311 err = -EIO;
312 goto err;
313 }
314
315 printk(KERN_INFO "vesafb: framebuffer at 0x%lx, mapped to 0x%p, "
316 "using %dk, total %dk\n",
317 vesafb_fix.smem_start, info->screen_base,
318 size_remap/1024, size_total/1024);
319 printk(KERN_INFO "vesafb: mode is %dx%dx%d, linelength=%d, pages=%d\n", 306 printk(KERN_INFO "vesafb: mode is %dx%dx%d, linelength=%d, pages=%d\n",
320 vesafb_defined.xres, vesafb_defined.yres, vesafb_defined.bits_per_pixel, vesafb_fix.line_length, screen_info.pages); 307 vesafb_defined.xres, vesafb_defined.yres, vesafb_defined.bits_per_pixel, vesafb_fix.line_length, screen_info.pages);
321 308
@@ -438,8 +425,7 @@ static int __init vesafb_probe(struct platform_device *dev)
438 int rc; 425 int rc;
439 426
440 /* Find the largest power-of-two */ 427 /* Find the largest power-of-two */
441 while (temp_size & (temp_size - 1)) 428 temp_size = roundup_pow_of_two(temp_size);
442 temp_size &= (temp_size - 1);
443 429
444 /* Try and find a power of two to add */ 430 /* Try and find a power of two to add */
445 do { 431 do {
@@ -451,6 +437,34 @@ static int __init vesafb_probe(struct platform_device *dev)
451 } 437 }
452#endif 438#endif
453 439
440 switch (mtrr) {
441 case 1: /* uncachable */
442 info->screen_base = ioremap_nocache(vesafb_fix.smem_start, vesafb_fix.smem_len);
443 break;
444 case 2: /* write-back */
445 info->screen_base = ioremap_cache(vesafb_fix.smem_start, vesafb_fix.smem_len);
446 break;
447 case 3: /* write-combining */
448 info->screen_base = ioremap_wc(vesafb_fix.smem_start, vesafb_fix.smem_len);
449 break;
450 case 4: /* write-through */
451 default:
452 info->screen_base = ioremap(vesafb_fix.smem_start, vesafb_fix.smem_len);
453 break;
454 }
455 if (!info->screen_base) {
456 printk(KERN_ERR
457 "vesafb: abort, cannot ioremap video memory 0x%x @ 0x%lx\n",
458 vesafb_fix.smem_len, vesafb_fix.smem_start);
459 err = -EIO;
460 goto err;
461 }
462
463 printk(KERN_INFO "vesafb: framebuffer at 0x%lx, mapped to 0x%p, "
464 "using %dk, total %dk\n",
465 vesafb_fix.smem_start, info->screen_base,
466 size_remap/1024, size_total/1024);
467
454 info->fbops = &vesafb_ops; 468 info->fbops = &vesafb_ops;
455 info->var = vesafb_defined; 469 info->var = vesafb_defined;
456 info->fix = vesafb_fix; 470 info->fix = vesafb_fix;