aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorKhalid Aziz <khalid.aziz@oracle.com>2013-09-11 17:22:20 -0400
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2013-10-01 12:17:48 -0400
commit09642082b35034719f6916ee83b0a6251620ba74 (patch)
tree12307edce98647d926c18f14024545f5e3380e59 /mm
parent3ed3690eac0c839c6216fd7f7ce0add6a1f593b2 (diff)
mm: fix aio performance regression for database caused by THP
commit 7cb2ef56e6a8b7b368b2e883a0a47d02fed66911 upstream. I am working with a tool that simulates oracle database I/O workload. This tool (orion to be specific - <http://docs.oracle.com/cd/E11882_01/server.112/e16638/iodesign.htm#autoId24>) allocates hugetlbfs pages using shmget() with SHM_HUGETLB flag. It then does aio into these pages from flash disks using various common block sizes used by database. I am looking at performance with two of the most common block sizes - 1M and 64K. aio performance with these two block sizes plunged after Transparent HugePages was introduced in the kernel. Here are performance numbers: pre-THP 2.6.39 3.11-rc5 1M read 8384 MB/s 5629 MB/s 6501 MB/s 64K read 7867 MB/s 4576 MB/s 4251 MB/s I have narrowed the performance impact down to the overheads introduced by THP in __get_page_tail() and put_compound_page() routines. perf top shows >40% of cycles being spent in these two routines. Every time direct I/O to hugetlbfs pages starts, kernel calls get_page() to grab a reference to the pages and calls put_page() when I/O completes to put the reference away. THP introduced significant amount of locking overhead to get_page() and put_page() when dealing with compound pages because hugepages can be split underneath get_page() and put_page(). It added this overhead irrespective of whether it is dealing with hugetlbfs pages or transparent hugepages. This resulted in 20%-45% drop in aio performance when using hugetlbfs pages. Since hugetlbfs pages can not be split, there is no reason to go through all the locking overhead for these pages from what I can see. I added code to __get_page_tail() and put_compound_page() to bypass all the locking code when working with hugetlbfs pages. This improved performance significantly. Performance numbers with this patch: pre-THP 3.11-rc5 3.11-rc5 + Patch 1M read 8384 MB/s 6501 MB/s 8371 MB/s 64K read 7867 MB/s 4251 MB/s 6510 MB/s Performance with 64K read is still lower than what it was before THP, but still a 53% improvement. It does mean there is more work to be done but I will take a 53% improvement for now. Please take a look at the following patch and let me know if it looks reasonable. [akpm@linux-foundation.org: tweak comments] Signed-off-by: Khalid Aziz <khalid.aziz@oracle.com> Cc: Pravin B Shelar <pshelar@nicira.com> Cc: Christoph Lameter <cl@linux.com> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Mel Gorman <mel@csn.ul.ie> Cc: Rik van Riel <riel@redhat.com> Cc: Minchan Kim <minchan@kernel.org> Cc: Andi Kleen <andi@firstfloor.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/swap.c77
1 files changed, 52 insertions, 25 deletions
diff --git a/mm/swap.c b/mm/swap.c
index dfd7d71d6841..9f2225f2b5b0 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -31,6 +31,7 @@
31#include <linux/memcontrol.h> 31#include <linux/memcontrol.h>
32#include <linux/gfp.h> 32#include <linux/gfp.h>
33#include <linux/uio.h> 33#include <linux/uio.h>
34#include <linux/hugetlb.h>
34 35
35#include "internal.h" 36#include "internal.h"
36 37
@@ -78,6 +79,19 @@ static void __put_compound_page(struct page *page)
78 79
79static void put_compound_page(struct page *page) 80static void put_compound_page(struct page *page)
80{ 81{
82 /*
83 * hugetlbfs pages cannot be split from under us. If this is a
84 * hugetlbfs page, check refcount on head page and release the page if
85 * the refcount becomes zero.
86 */
87 if (PageHuge(page)) {
88 page = compound_head(page);
89 if (put_page_testzero(page))
90 __put_compound_page(page);
91
92 return;
93 }
94
81 if (unlikely(PageTail(page))) { 95 if (unlikely(PageTail(page))) {
82 /* __split_huge_page_refcount can run under us */ 96 /* __split_huge_page_refcount can run under us */
83 struct page *page_head = compound_trans_head(page); 97 struct page *page_head = compound_trans_head(page);
@@ -181,38 +195,51 @@ bool __get_page_tail(struct page *page)
181 * proper PT lock that already serializes against 195 * proper PT lock that already serializes against
182 * split_huge_page(). 196 * split_huge_page().
183 */ 197 */
184 unsigned long flags;
185 bool got = false; 198 bool got = false;
186 struct page *page_head = compound_trans_head(page); 199 struct page *page_head;
187 200
188 if (likely(page != page_head && get_page_unless_zero(page_head))) { 201 /*
202 * If this is a hugetlbfs page it cannot be split under us. Simply
203 * increment refcount for the head page.
204 */
205 if (PageHuge(page)) {
206 page_head = compound_head(page);
207 atomic_inc(&page_head->_count);
208 got = true;
209 } else {
210 unsigned long flags;
211
212 page_head = compound_trans_head(page);
213 if (likely(page != page_head &&
214 get_page_unless_zero(page_head))) {
215
216 /* Ref to put_compound_page() comment. */
217 if (PageSlab(page_head)) {
218 if (likely(PageTail(page))) {
219 __get_page_tail_foll(page, false);
220 return true;
221 } else {
222 put_page(page_head);
223 return false;
224 }
225 }
189 226
190 /* Ref to put_compound_page() comment. */ 227 /*
191 if (PageSlab(page_head)) { 228 * page_head wasn't a dangling pointer but it
229 * may not be a head page anymore by the time
230 * we obtain the lock. That is ok as long as it
231 * can't be freed from under us.
232 */
233 flags = compound_lock_irqsave(page_head);
234 /* here __split_huge_page_refcount won't run anymore */
192 if (likely(PageTail(page))) { 235 if (likely(PageTail(page))) {
193 __get_page_tail_foll(page, false); 236 __get_page_tail_foll(page, false);
194 return true; 237 got = true;
195 } else {
196 put_page(page_head);
197 return false;
198 } 238 }
239 compound_unlock_irqrestore(page_head, flags);
240 if (unlikely(!got))
241 put_page(page_head);
199 } 242 }
200
201 /*
202 * page_head wasn't a dangling pointer but it
203 * may not be a head page anymore by the time
204 * we obtain the lock. That is ok as long as it
205 * can't be freed from under us.
206 */
207 flags = compound_lock_irqsave(page_head);
208 /* here __split_huge_page_refcount won't run anymore */
209 if (likely(PageTail(page))) {
210 __get_page_tail_foll(page, false);
211 got = true;
212 }
213 compound_unlock_irqrestore(page_head, flags);
214 if (unlikely(!got))
215 put_page(page_head);
216 } 243 }
217 return got; 244 return got;
218} 245}