diff options
author | Kirill A. Shutemov <kirill.shutemov@linux.intel.com> | 2017-09-08 17:56:03 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2017-09-13 05:26:52 -0400 |
commit | 5b65c4677a57a1d4414212f9995aa0e46a21ff80 (patch) | |
tree | 0b47579035adebb1af14f009abf355e447ced9b0 | |
parent | 9e52fc2b50de3a1c08b44f94c610fbe998c0031a (diff) |
mm, x86/mm: Fix performance regression in get_user_pages_fast()
The 0-day test bot found a performance regression that was tracked down to
switching x86 to the generic get_user_pages_fast() implementation:
http://lkml.kernel.org/r/20170710024020.GA26389@yexl-desktop
The regression was caused by the fact that we now use local_irq_save() +
local_irq_restore() in get_user_pages_fast() to disable interrupts.
In x86 implementation local_irq_disable() + local_irq_enable() was used.
The fix is to make get_user_pages_fast() use local_irq_disable(),
leaving local_irq_save() for __get_user_pages_fast() that can be called
with interrupts disabled.
Numbers for pinning a gigabyte of memory, one page a time, 20 repeats:
Before: Average: 14.91 ms, stddev: 0.45 ms
After: Average: 10.76 ms, stddev: 0.18 ms
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Huang Ying <ying.huang@intel.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Thorsten Leemhuis <regressions@leemhuis.info>
Cc: linux-mm@kvack.org
Fixes: e585513b76f7 ("x86/mm/gup: Switch GUP to the generic get_user_page_fast() implementation")
Link: http://lkml.kernel.org/r/20170908215603.9189-3-kirill.shutemov@linux.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r-- | mm/gup.c | 97 |
1 files changed, 58 insertions, 39 deletions
@@ -1618,6 +1618,47 @@ static int gup_p4d_range(pgd_t pgd, unsigned long addr, unsigned long end, | |||
1618 | return 1; | 1618 | return 1; |
1619 | } | 1619 | } |
1620 | 1620 | ||
1621 | static void gup_pgd_range(unsigned long addr, unsigned long end, | ||
1622 | int write, struct page **pages, int *nr) | ||
1623 | { | ||
1624 | unsigned long next; | ||
1625 | pgd_t *pgdp; | ||
1626 | |||
1627 | pgdp = pgd_offset(current->mm, addr); | ||
1628 | do { | ||
1629 | pgd_t pgd = READ_ONCE(*pgdp); | ||
1630 | |||
1631 | next = pgd_addr_end(addr, end); | ||
1632 | if (pgd_none(pgd)) | ||
1633 | return; | ||
1634 | if (unlikely(pgd_huge(pgd))) { | ||
1635 | if (!gup_huge_pgd(pgd, pgdp, addr, next, write, | ||
1636 | pages, nr)) | ||
1637 | return; | ||
1638 | } else if (unlikely(is_hugepd(__hugepd(pgd_val(pgd))))) { | ||
1639 | if (!gup_huge_pd(__hugepd(pgd_val(pgd)), addr, | ||
1640 | PGDIR_SHIFT, next, write, pages, nr)) | ||
1641 | return; | ||
1642 | } else if (!gup_p4d_range(pgd, addr, next, write, pages, nr)) | ||
1643 | return; | ||
1644 | } while (pgdp++, addr = next, addr != end); | ||
1645 | } | ||
1646 | |||
1647 | #ifndef gup_fast_permitted | ||
1648 | /* | ||
1649 | * Check if it's allowed to use __get_user_pages_fast() for the range, or | ||
1650 | * we need to fall back to the slow version: | ||
1651 | */ | ||
1652 | bool gup_fast_permitted(unsigned long start, int nr_pages, int write) | ||
1653 | { | ||
1654 | unsigned long len, end; | ||
1655 | |||
1656 | len = (unsigned long) nr_pages << PAGE_SHIFT; | ||
1657 | end = start + len; | ||
1658 | return end >= start; | ||
1659 | } | ||
1660 | #endif | ||
1661 | |||
1621 | /* | 1662 | /* |
1622 | * Like get_user_pages_fast() except it's IRQ-safe in that it won't fall back to | 1663 | * Like get_user_pages_fast() except it's IRQ-safe in that it won't fall back to |
1623 | * the regular GUP. It will only return non-negative values. | 1664 | * the regular GUP. It will only return non-negative values. |
@@ -1625,10 +1666,8 @@ static int gup_p4d_range(pgd_t pgd, unsigned long addr, unsigned long end, | |||
1625 | int __get_user_pages_fast(unsigned long start, int nr_pages, int write, | 1666 | int __get_user_pages_fast(unsigned long start, int nr_pages, int write, |
1626 | struct page **pages) | 1667 | struct page **pages) |
1627 | { | 1668 | { |
1628 | struct mm_struct *mm = current->mm; | ||
1629 | unsigned long addr, len, end; | 1669 | unsigned long addr, len, end; |
1630 | unsigned long next, flags; | 1670 | unsigned long flags; |
1631 | pgd_t *pgdp; | ||
1632 | int nr = 0; | 1671 | int nr = 0; |
1633 | 1672 | ||
1634 | start &= PAGE_MASK; | 1673 | start &= PAGE_MASK; |
@@ -1652,45 +1691,15 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write, | |||
1652 | * block IPIs that come from THPs splitting. | 1691 | * block IPIs that come from THPs splitting. |
1653 | */ | 1692 | */ |
1654 | 1693 | ||
1655 | local_irq_save(flags); | 1694 | if (gup_fast_permitted(start, nr_pages, write)) { |
1656 | pgdp = pgd_offset(mm, addr); | 1695 | local_irq_save(flags); |
1657 | do { | 1696 | gup_pgd_range(addr, end, write, pages, &nr); |
1658 | pgd_t pgd = READ_ONCE(*pgdp); | 1697 | local_irq_restore(flags); |
1659 | 1698 | } | |
1660 | next = pgd_addr_end(addr, end); | ||
1661 | if (pgd_none(pgd)) | ||
1662 | break; | ||
1663 | if (unlikely(pgd_huge(pgd))) { | ||
1664 | if (!gup_huge_pgd(pgd, pgdp, addr, next, write, | ||
1665 | pages, &nr)) | ||
1666 | break; | ||
1667 | } else if (unlikely(is_hugepd(__hugepd(pgd_val(pgd))))) { | ||
1668 | if (!gup_huge_pd(__hugepd(pgd_val(pgd)), addr, | ||
1669 | PGDIR_SHIFT, next, write, pages, &nr)) | ||
1670 | break; | ||
1671 | } else if (!gup_p4d_range(pgd, addr, next, write, pages, &nr)) | ||
1672 | break; | ||
1673 | } while (pgdp++, addr = next, addr != end); | ||
1674 | local_irq_restore(flags); | ||
1675 | 1699 | ||
1676 | return nr; | 1700 | return nr; |
1677 | } | 1701 | } |
1678 | 1702 | ||
1679 | #ifndef gup_fast_permitted | ||
1680 | /* | ||
1681 | * Check if it's allowed to use __get_user_pages_fast() for the range, or | ||
1682 | * we need to fall back to the slow version: | ||
1683 | */ | ||
1684 | bool gup_fast_permitted(unsigned long start, int nr_pages, int write) | ||
1685 | { | ||
1686 | unsigned long len, end; | ||
1687 | |||
1688 | len = (unsigned long) nr_pages << PAGE_SHIFT; | ||
1689 | end = start + len; | ||
1690 | return end >= start; | ||
1691 | } | ||
1692 | #endif | ||
1693 | |||
1694 | /** | 1703 | /** |
1695 | * get_user_pages_fast() - pin user pages in memory | 1704 | * get_user_pages_fast() - pin user pages in memory |
1696 | * @start: starting user address | 1705 | * @start: starting user address |
@@ -1710,12 +1719,22 @@ bool gup_fast_permitted(unsigned long start, int nr_pages, int write) | |||
1710 | int get_user_pages_fast(unsigned long start, int nr_pages, int write, | 1719 | int get_user_pages_fast(unsigned long start, int nr_pages, int write, |
1711 | struct page **pages) | 1720 | struct page **pages) |
1712 | { | 1721 | { |
1722 | unsigned long addr, len, end; | ||
1713 | int nr = 0, ret = 0; | 1723 | int nr = 0, ret = 0; |
1714 | 1724 | ||
1715 | start &= PAGE_MASK; | 1725 | start &= PAGE_MASK; |
1726 | addr = start; | ||
1727 | len = (unsigned long) nr_pages << PAGE_SHIFT; | ||
1728 | end = start + len; | ||
1729 | |||
1730 | if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ, | ||
1731 | (void __user *)start, len))) | ||
1732 | return 0; | ||
1716 | 1733 | ||
1717 | if (gup_fast_permitted(start, nr_pages, write)) { | 1734 | if (gup_fast_permitted(start, nr_pages, write)) { |
1718 | nr = __get_user_pages_fast(start, nr_pages, write, pages); | 1735 | local_irq_disable(); |
1736 | gup_pgd_range(addr, end, write, pages, &nr); | ||
1737 | local_irq_enable(); | ||
1719 | ret = nr; | 1738 | ret = nr; |
1720 | } | 1739 | } |
1721 | 1740 | ||