summaryrefslogtreecommitdiffstats
path: root/mm/page_alloc.c
diff options
context:
space:
mode:
authorChen Yu <yu.c.chen@intel.com>2017-08-25 18:55:30 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2017-08-25 19:12:46 -0400
commit556b969a1cfe2686aae149137fa1dfcac0eefe54 (patch)
tree89b530e83cad7ad1d59a2e14aa4746f63a52bcb1 /mm/page_alloc.c
parent90a6cd503982bfd33ce8c70eb49bd2dd33bc6325 (diff)
PM/hibernate: touch NMI watchdog when creating snapshot
There is a problem that when counting the pages for creating the hibernation snapshot will take significant amount of time, especially on system with large memory. Since the counting job is performed with irq disabled, this might lead to NMI lockup. The following warning were found on a system with 1.5TB DRAM: Freezing user space processes ... (elapsed 0.002 seconds) done. OOM killer disabled. PM: Preallocating image memory... NMI watchdog: Watchdog detected hard LOCKUP on cpu 27 CPU: 27 PID: 3128 Comm: systemd-sleep Not tainted 4.13.0-0.rc2.git0.1.fc27.x86_64 #1 task: ffff9f01971ac000 task.stack: ffffb1a3f325c000 RIP: 0010:memory_bm_find_bit+0xf4/0x100 Call Trace: swsusp_set_page_free+0x2b/0x30 mark_free_pages+0x147/0x1c0 count_data_pages+0x41/0xa0 hibernate_preallocate_memory+0x80/0x450 hibernation_snapshot+0x58/0x410 hibernate+0x17c/0x310 state_store+0xdf/0xf0 kobj_attr_store+0xf/0x20 sysfs_kf_write+0x37/0x40 kernfs_fop_write+0x11c/0x1a0 __vfs_write+0x37/0x170 vfs_write+0xb1/0x1a0 SyS_write+0x55/0xc0 entry_SYSCALL_64_fastpath+0x1a/0xa5 ... done (allocated 6590003 pages) PM: Allocated 26360012 kbytes in 19.89 seconds (1325.28 MB/s) It has taken nearly 20 seconds(2.10GHz CPU) thus the NMI lockup was triggered. In case the timeout of the NMI watch dog has been set to 1 second, a safe interval should be 6590003/20 = 320k pages in theory. However there might also be some platforms running at a lower frequency, so feed the watchdog every 100k pages. [yu.c.chen@intel.com: simplification] Link: http://lkml.kernel.org/r/1503460079-29721-1-git-send-email-yu.c.chen@intel.com [yu.c.chen@intel.com: use interval of 128k instead of 100k to avoid modulus] Link: http://lkml.kernel.org/r/1503328098-5120-1-git-send-email-yu.c.chen@intel.com Signed-off-by: Chen Yu <yu.c.chen@intel.com> Reported-by: Jan Filipcewicz <jan.filipcewicz@intel.com> Suggested-by: Michal Hocko <mhocko@suse.com> Reviewed-by: Michal Hocko <mhocko@suse.com> Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Cc: Mel Gorman <mgorman@techsingularity.net> Cc: Vlastimil Babka <vbabka@suse.cz> Cc: Len Brown <lenb@kernel.org> Cc: Dan Williams <dan.j.williams@intel.com> Cc: <stable@vger.kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r--mm/page_alloc.c20
1 files changed, 18 insertions, 2 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 1bad301820c7..7a58eb5757e3 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -66,6 +66,7 @@
66#include <linux/kthread.h> 66#include <linux/kthread.h>
67#include <linux/memcontrol.h> 67#include <linux/memcontrol.h>
68#include <linux/ftrace.h> 68#include <linux/ftrace.h>
69#include <linux/nmi.h>
69 70
70#include <asm/sections.h> 71#include <asm/sections.h>
71#include <asm/tlbflush.h> 72#include <asm/tlbflush.h>
@@ -2535,9 +2536,14 @@ void drain_all_pages(struct zone *zone)
2535 2536
2536#ifdef CONFIG_HIBERNATION 2537#ifdef CONFIG_HIBERNATION
2537 2538
2539/*
2540 * Touch the watchdog for every WD_PAGE_COUNT pages.
2541 */
2542#define WD_PAGE_COUNT (128*1024)
2543
2538void mark_free_pages(struct zone *zone) 2544void mark_free_pages(struct zone *zone)
2539{ 2545{
2540 unsigned long pfn, max_zone_pfn; 2546 unsigned long pfn, max_zone_pfn, page_count = WD_PAGE_COUNT;
2541 unsigned long flags; 2547 unsigned long flags;
2542 unsigned int order, t; 2548 unsigned int order, t;
2543 struct page *page; 2549 struct page *page;
@@ -2552,6 +2558,11 @@ void mark_free_pages(struct zone *zone)
2552 if (pfn_valid(pfn)) { 2558 if (pfn_valid(pfn)) {
2553 page = pfn_to_page(pfn); 2559 page = pfn_to_page(pfn);
2554 2560
2561 if (!--page_count) {
2562 touch_nmi_watchdog();
2563 page_count = WD_PAGE_COUNT;
2564 }
2565
2555 if (page_zone(page) != zone) 2566 if (page_zone(page) != zone)
2556 continue; 2567 continue;
2557 2568
@@ -2565,8 +2576,13 @@ void mark_free_pages(struct zone *zone)
2565 unsigned long i; 2576 unsigned long i;
2566 2577
2567 pfn = page_to_pfn(page); 2578 pfn = page_to_pfn(page);
2568 for (i = 0; i < (1UL << order); i++) 2579 for (i = 0; i < (1UL << order); i++) {
2580 if (!--page_count) {
2581 touch_nmi_watchdog();
2582 page_count = WD_PAGE_COUNT;
2583 }
2569 swsusp_set_page_free(pfn_to_page(pfn + i)); 2584 swsusp_set_page_free(pfn_to_page(pfn + i));
2585 }
2570 } 2586 }
2571 } 2587 }
2572 spin_unlock_irqrestore(&zone->lock, flags); 2588 spin_unlock_irqrestore(&zone->lock, flags);