aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndrea Arcangeli <aarcange@redhat.com>2014-01-21 18:48:54 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2014-01-21 19:19:43 -0500
commit44518d2b32646e37b4b7a0813bbbe98dc21c7f8f (patch)
tree9ab4e057cb7ce8ed5cb6a5f6085c902b8ba6f109
parentca641514f4056deee1fb2eb356e2c99b98718ade (diff)
mm: tail page refcounting optimization for slab and hugetlbfs
This skips the _mapcount mangling for slab and hugetlbfs pages. The main trouble in doing this is to guarantee that PageSlab and PageHeadHuge remains constant for all get_page/put_page run on the tail of slab or hugetlbfs compound pages. Otherwise if they're set during get_page but not set during put_page, the _mapcount of the tail page would underflow. PageHeadHuge will remain true until the compound page is released and enters the buddy allocator so it won't risk to change even if the tail page is the last reference left on the page. PG_slab instead is cleared before the slab frees the head page with put_page, so if the tail pin is released after the slab freed the page, we would have a problem. But in the slab case the tail pin cannot be the last reference left on the page. This is because the slab code is free to reuse the compound page after a kfree/kmem_cache_free without having to check if there's any tail pin left. In turn all tail pins must be always released while the head is still pinned by the slab code and so we know PG_slab will be still set too. Signed-off-by: Andrea Arcangeli <aarcange@redhat.com> Reviewed-by: Khalid Aziz <khalid.aziz@oracle.com> Cc: Pravin Shelar <pshelar@nicira.com> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: Ben Hutchings <bhutchings@solarflare.com> Cc: Christoph Lameter <cl@linux.com> Cc: Johannes Weiner <jweiner@redhat.com> Cc: Mel Gorman <mgorman@suse.de> Cc: Rik van Riel <riel@redhat.com> Cc: Andi Kleen <andi@firstfloor.org> Cc: Minchan Kim <minchan@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--include/linux/hugetlb.h6
-rw-r--r--include/linux/mm.h32
-rw-r--r--mm/internal.h3
-rw-r--r--mm/swap.c33
4 files changed, 60 insertions, 14 deletions
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 251233c1494d..d01cc972a1d9 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -31,7 +31,6 @@ struct hugepage_subpool *hugepage_new_subpool(long nr_blocks);
31void hugepage_put_subpool(struct hugepage_subpool *spool); 31void hugepage_put_subpool(struct hugepage_subpool *spool);
32 32
33int PageHuge(struct page *page); 33int PageHuge(struct page *page);
34int PageHeadHuge(struct page *page_head);
35 34
36void reset_vma_resv_huge_pages(struct vm_area_struct *vma); 35void reset_vma_resv_huge_pages(struct vm_area_struct *vma);
37int hugetlb_sysctl_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *); 36int hugetlb_sysctl_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *);
@@ -104,11 +103,6 @@ static inline int PageHuge(struct page *page)
104 return 0; 103 return 0;
105} 104}
106 105
107static inline int PageHeadHuge(struct page *page_head)
108{
109 return 0;
110}
111
112static inline void reset_vma_resv_huge_pages(struct vm_area_struct *vma) 106static inline void reset_vma_resv_huge_pages(struct vm_area_struct *vma)
113{ 107{
114} 108}
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 9fac6dd69b11..f95c71b7c1fd 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -414,15 +414,45 @@ static inline int page_count(struct page *page)
414 return atomic_read(&compound_head(page)->_count); 414 return atomic_read(&compound_head(page)->_count);
415} 415}
416 416
417#ifdef CONFIG_HUGETLB_PAGE
418extern int PageHeadHuge(struct page *page_head);
419#else /* CONFIG_HUGETLB_PAGE */
420static inline int PageHeadHuge(struct page *page_head)
421{
422 return 0;
423}
424#endif /* CONFIG_HUGETLB_PAGE */
425
426static inline bool __compound_tail_refcounted(struct page *page)
427{
428 return !PageSlab(page) && !PageHeadHuge(page);
429}
430
431/*
432 * This takes a head page as parameter and tells if the
433 * tail page reference counting can be skipped.
434 *
435 * For this to be safe, PageSlab and PageHeadHuge must remain true on
436 * any given page where they return true here, until all tail pins
437 * have been released.
438 */
439static inline bool compound_tail_refcounted(struct page *page)
440{
441 VM_BUG_ON(!PageHead(page));
442 return __compound_tail_refcounted(page);
443}
444
417static inline void get_huge_page_tail(struct page *page) 445static inline void get_huge_page_tail(struct page *page)
418{ 446{
419 /* 447 /*
420 * __split_huge_page_refcount() cannot run 448 * __split_huge_page_refcount() cannot run
421 * from under us. 449 * from under us.
450 * In turn no need of compound_trans_head here.
422 */ 451 */
423 VM_BUG_ON(page_mapcount(page) < 0); 452 VM_BUG_ON(page_mapcount(page) < 0);
424 VM_BUG_ON(atomic_read(&page->_count) != 0); 453 VM_BUG_ON(atomic_read(&page->_count) != 0);
425 atomic_inc(&page->_mapcount); 454 if (compound_tail_refcounted(compound_head(page)))
455 atomic_inc(&page->_mapcount);
426} 456}
427 457
428extern bool __get_page_tail(struct page *page); 458extern bool __get_page_tail(struct page *page);
diff --git a/mm/internal.h b/mm/internal.h
index 684f7aa9692a..a85a3ab1f7ef 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -51,7 +51,8 @@ static inline void __get_page_tail_foll(struct page *page,
51 VM_BUG_ON(page_mapcount(page) < 0); 51 VM_BUG_ON(page_mapcount(page) < 0);
52 if (get_page_head) 52 if (get_page_head)
53 atomic_inc(&page->first_page->_count); 53 atomic_inc(&page->first_page->_count);
54 atomic_inc(&page->_mapcount); 54 if (compound_tail_refcounted(page->first_page))
55 atomic_inc(&page->_mapcount);
55} 56}
56 57
57/* 58/*
diff --git a/mm/swap.c b/mm/swap.c
index e2757fbb04ea..bba4aa5bf686 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -88,8 +88,9 @@ static void put_compound_page(struct page *page)
88 88
89 /* 89 /*
90 * THP can not break up slab pages so avoid taking 90 * THP can not break up slab pages so avoid taking
91 * compound_lock(). Slab performs non-atomic bit ops 91 * compound_lock() and skip the tail page refcounting
92 * on page->flags for better performance. In 92 * (in _mapcount) too. Slab performs non-atomic bit
93 * ops on page->flags for better performance. In
93 * particular slab_unlock() in slub used to be a hot 94 * particular slab_unlock() in slub used to be a hot
94 * path. It is still hot on arches that do not support 95 * path. It is still hot on arches that do not support
95 * this_cpu_cmpxchg_double(). 96 * this_cpu_cmpxchg_double().
@@ -102,7 +103,7 @@ static void put_compound_page(struct page *page)
102 * PageTail clear after smp_rmb() and we'll treat it 103 * PageTail clear after smp_rmb() and we'll treat it
103 * as a single page. 104 * as a single page.
104 */ 105 */
105 if (PageSlab(page_head) || PageHeadHuge(page_head)) { 106 if (!__compound_tail_refcounted(page_head)) {
106 /* 107 /*
107 * If "page" is a THP tail, we must read the tail page 108 * If "page" is a THP tail, we must read the tail page
108 * flags after the head page flags. The 109 * flags after the head page flags. The
@@ -117,10 +118,30 @@ static void put_compound_page(struct page *page)
117 * cannot race here. 118 * cannot race here.
118 */ 119 */
119 VM_BUG_ON(!PageHead(page_head)); 120 VM_BUG_ON(!PageHead(page_head));
120 VM_BUG_ON(page_mapcount(page) <= 0); 121 VM_BUG_ON(page_mapcount(page) != 0);
121 atomic_dec(&page->_mapcount); 122 if (put_page_testzero(page_head)) {
122 if (put_page_testzero(page_head)) 123 /*
124 * If this is the tail of a
125 * slab compound page, the
126 * tail pin must not be the
127 * last reference held on the
128 * page, because the PG_slab
129 * cannot be cleared before
130 * all tail pins (which skips
131 * the _mapcount tail
132 * refcounting) have been
133 * released. For hugetlbfs the
134 * tail pin may be the last
135 * reference on the page
136 * instead, because
137 * PageHeadHuge will not go
138 * away until the compound
139 * page enters the buddy
140 * allocator.
141 */
142 VM_BUG_ON(PageSlab(page_head));
123 __put_compound_page(page_head); 143 __put_compound_page(page_head);
144 }
124 return; 145 return;
125 } else 146 } else
126 /* 147 /*