aboutsummaryrefslogtreecommitdiffstats
path: root/mm/page_alloc.c
diff options
context:
space:
mode:
authorWaiman Long <longman@redhat.com>2018-12-28 03:38:51 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2018-12-28 15:11:51 -0500
commit3c0c12cc8f00ca5f81acb010023b8eb13e9a7004 (patch)
treed4b287cf806869f491db8ea42a3ff666394b9add /mm/page_alloc.c
parent3cfd22be0ad663248fadfc8f6ffa3e255c394552 (diff)
mm/page_alloc.c: don't call kasan_free_pages() at deferred mem init
When CONFIG_KASAN is enabled on large memory SMP systems, the deferrred pages initialization can take a long time. Below were the reported init times on a 8-socket 96-core 4TB IvyBridge system. 1) Non-debug kernel without CONFIG_KASAN [ 8.764222] node 1 initialised, 132086516 pages in 7027ms 2) Debug kernel with CONFIG_KASAN [ 146.288115] node 1 initialised, 132075466 pages in 143052ms So the page init time in a debug kernel was 20X of the non-debug kernel. The long init time can be problematic as the page initialization is done with interrupt disabled. In this particular case, it caused the appearance of following warning messages as well as NMI backtraces of all the cores that were doing the initialization. [ 68.240049] rcu: INFO: rcu_sched detected stalls on CPUs/tasks: [ 68.241000] rcu: 25-...0: (100 ticks this GP) idle=b72/1/0x4000000000000000 softirq=915/915 fqs=16252 [ 68.241000] rcu: 44-...0: (95 ticks this GP) idle=49a/1/0x4000000000000000 softirq=788/788 fqs=16253 [ 68.241000] rcu: 54-...0: (104 ticks this GP) idle=03a/1/0x4000000000000000 softirq=721/825 fqs=16253 [ 68.241000] rcu: 60-...0: (103 ticks this GP) idle=cbe/1/0x4000000000000000 softirq=637/740 fqs=16253 [ 68.241000] rcu: 72-...0: (105 ticks this GP) idle=786/1/0x4000000000000000 softirq=536/641 fqs=16253 [ 68.241000] rcu: 84-...0: (99 ticks this GP) idle=292/1/0x4000000000000000 softirq=537/537 fqs=16253 [ 68.241000] rcu: 111-...0: (104 ticks this GP) idle=bde/1/0x4000000000000000 softirq=474/476 fqs=16253 [ 68.241000] rcu: (detected by 13, t=65018 jiffies, g=249, q=2) The long init time was mainly caused by the call to kasan_free_pages() to poison the newly initialized pages. On a 4TB system, we are talking about almost 500GB of memory probably on the same node. In reality, we may not need to poison the newly initialized pages before they are ever allocated. So KASAN poisoning of freed pages before the completion of deferred memory initialization is now disabled. Those pages will be properly poisoned when they are allocated or freed after deferred pages initialization is done. With this change, the new page initialization time became: [ 21.948010] node 1 initialised, 132075466 pages in 18702ms This was still about double the non-debug kernel time, but was much better than before. Link: http://lkml.kernel.org/r/1544459388-8736-1-git-send-email-longman@redhat.com Signed-off-by: Waiman Long <longman@redhat.com> Reviewed-by: Andrew Morton <akpm@linux-foundation.org> Cc: Andrey Ryabinin <aryabinin@virtuozzo.com> Cc: Alexander Potapenko <glider@google.com> Cc: Dmitry Vyukov <dvyukov@google.com> Cc: Michal Hocko <mhocko@suse.com> Cc: Pasha Tatashin <Pavel.Tatashin@microsoft.com> Cc: Oscar Salvador <osalvador@suse.de> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r--mm/page_alloc.c37
1 files changed, 29 insertions, 8 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index cd1c9d32ef9a..2cd1f9bb1b52 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -294,6 +294,32 @@ EXPORT_SYMBOL(nr_online_nodes);
294int page_group_by_mobility_disabled __read_mostly; 294int page_group_by_mobility_disabled __read_mostly;
295 295
296#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT 296#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
297/*
298 * During boot we initialize deferred pages on-demand, as needed, but once
299 * page_alloc_init_late() has finished, the deferred pages are all initialized,
300 * and we can permanently disable that path.
301 */
302static DEFINE_STATIC_KEY_TRUE(deferred_pages);
303
304/*
305 * Calling kasan_free_pages() only after deferred memory initialization
306 * has completed. Poisoning pages during deferred memory init will greatly
307 * lengthen the process and cause problem in large memory systems as the
308 * deferred pages initialization is done with interrupt disabled.
309 *
310 * Assuming that there will be no reference to those newly initialized
311 * pages before they are ever allocated, this should have no effect on
312 * KASAN memory tracking as the poison will be properly inserted at page
313 * allocation time. The only corner case is when pages are allocated by
314 * on-demand allocation and then freed again before the deferred pages
315 * initialization is done, but this is not likely to happen.
316 */
317static inline void kasan_free_nondeferred_pages(struct page *page, int order)
318{
319 if (!static_branch_unlikely(&deferred_pages))
320 kasan_free_pages(page, order);
321}
322
297/* Returns true if the struct page for the pfn is uninitialised */ 323/* Returns true if the struct page for the pfn is uninitialised */
298static inline bool __meminit early_page_uninitialised(unsigned long pfn) 324static inline bool __meminit early_page_uninitialised(unsigned long pfn)
299{ 325{
@@ -340,6 +366,8 @@ defer_init(int nid, unsigned long pfn, unsigned long end_pfn)
340 return false; 366 return false;
341} 367}
342#else 368#else
369#define kasan_free_nondeferred_pages(p, o) kasan_free_pages(p, o)
370
343static inline bool early_page_uninitialised(unsigned long pfn) 371static inline bool early_page_uninitialised(unsigned long pfn)
344{ 372{
345 return false; 373 return false;
@@ -1043,7 +1071,7 @@ static __always_inline bool free_pages_prepare(struct page *page,
1043 arch_free_page(page, order); 1071 arch_free_page(page, order);
1044 kernel_poison_pages(page, 1 << order, 0); 1072 kernel_poison_pages(page, 1 << order, 0);
1045 kernel_map_pages(page, 1 << order, 0); 1073 kernel_map_pages(page, 1 << order, 0);
1046 kasan_free_pages(page, order); 1074 kasan_free_nondeferred_pages(page, order);
1047 1075
1048 return true; 1076 return true;
1049} 1077}
@@ -1613,13 +1641,6 @@ static int __init deferred_init_memmap(void *data)
1613} 1641}
1614 1642
1615/* 1643/*
1616 * During boot we initialize deferred pages on-demand, as needed, but once
1617 * page_alloc_init_late() has finished, the deferred pages are all initialized,
1618 * and we can permanently disable that path.
1619 */
1620static DEFINE_STATIC_KEY_TRUE(deferred_pages);
1621
1622/*
1623 * If this zone has deferred pages, try to grow it by initializing enough 1644 * If this zone has deferred pages, try to grow it by initializing enough
1624 * deferred pages to satisfy the allocation specified by order, rounded up to 1645 * deferred pages to satisfy the allocation specified by order, rounded up to
1625 * the nearest PAGES_PER_SECTION boundary. So we're adding memory in increments 1646 * the nearest PAGES_PER_SECTION boundary. So we're adding memory in increments