summaryrefslogtreecommitdiffstats
path: root/mm/page_alloc.c
diff options
context:
space:
mode:
authorPavel Tatashin <pasha.tatashin@oracle.com>2018-04-05 19:22:27 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2018-04-06 00:36:24 -0400
commit3a2d7fa8a3d5ae740bd0c21d933acc6220857ed0 (patch)
tree30eb570bb5050346cb4f5315743faebcdb6e389c /mm/page_alloc.c
parent8e7a0c9100cade3bdbf851206b892b6f98eb39c9 (diff)
mm: disable interrupts while initializing deferred pages
Vlastimil Babka reported about a window issue during which when deferred pages are initialized, and the current version of on-demand initialization is finished, allocations may fail. While this is highly unlikely scenario, since this kind of allocation request must be large, and must come from interrupt handler, we still want to cover it. We solve this by initializing deferred pages with interrupts disabled, and holding node_size_lock spin lock while pages in the node are being initialized. The on-demand deferred page initialization that comes later will use the same lock, and thus synchronize with deferred_init_memmap(). It is unlikely for threads that initialize deferred pages to be interrupted. They run soon after smp_init(), but before modules are initialized, and long before user space programs. This is why there is no adverse effect of having these threads running with interrupts disabled. [pasha.tatashin@oracle.com: v6] Link: http://lkml.kernel.org/r/20180313182355.17669-2-pasha.tatashin@oracle.com Link: http://lkml.kernel.org/r/20180309220807.24961-2-pasha.tatashin@oracle.com Signed-off-by: Pavel Tatashin <pasha.tatashin@oracle.com> Reviewed-by: Andrew Morton <akpm@linux-foundation.org> Cc: Steven Sistare <steven.sistare@oracle.com> Cc: Daniel Jordan <daniel.m.jordan@oracle.com> Cc: Masayoshi Mizuma <m.mizuma@jp.fujitsu.com> Cc: Michal Hocko <mhocko@suse.com> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: AKASHI Takahiro <takahiro.akashi@linaro.org> Cc: Gioh Kim <gi-oh.kim@profitbricks.com> Cc: Heiko Carstens <heiko.carstens@de.ibm.com> Cc: Yaowei Bai <baiyaowei@cmss.chinamobile.com> Cc: Wei Yang <richard.weiyang@gmail.com> Cc: Paul Burton <paul.burton@mips.com> Cc: Miles Chen <miles.chen@mediatek.com> Cc: Vlastimil Babka <vbabka@suse.cz> Cc: Mel Gorman <mgorman@suse.de> Cc: Johannes Weiner <hannes@cmpxchg.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r--mm/page_alloc.c19
1 files changed, 11 insertions, 8 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 531d6acb0106..cf5555df78bd 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1506,7 +1506,7 @@ static void __init deferred_free_pages(int nid, int zid, unsigned long pfn,
1506 } else if (!(pfn & nr_pgmask)) { 1506 } else if (!(pfn & nr_pgmask)) {
1507 deferred_free_range(pfn - nr_free, nr_free); 1507 deferred_free_range(pfn - nr_free, nr_free);
1508 nr_free = 1; 1508 nr_free = 1;
1509 cond_resched(); 1509 touch_nmi_watchdog();
1510 } else { 1510 } else {
1511 nr_free++; 1511 nr_free++;
1512 } 1512 }
@@ -1535,7 +1535,7 @@ static unsigned long __init deferred_init_pages(int nid, int zid,
1535 continue; 1535 continue;
1536 } else if (!page || !(pfn & nr_pgmask)) { 1536 } else if (!page || !(pfn & nr_pgmask)) {
1537 page = pfn_to_page(pfn); 1537 page = pfn_to_page(pfn);
1538 cond_resched(); 1538 touch_nmi_watchdog();
1539 } else { 1539 } else {
1540 page++; 1540 page++;
1541 } 1541 }
@@ -1552,23 +1552,25 @@ static int __init deferred_init_memmap(void *data)
1552 int nid = pgdat->node_id; 1552 int nid = pgdat->node_id;
1553 unsigned long start = jiffies; 1553 unsigned long start = jiffies;
1554 unsigned long nr_pages = 0; 1554 unsigned long nr_pages = 0;
1555 unsigned long spfn, epfn; 1555 unsigned long spfn, epfn, first_init_pfn, flags;
1556 phys_addr_t spa, epa; 1556 phys_addr_t spa, epa;
1557 int zid; 1557 int zid;
1558 struct zone *zone; 1558 struct zone *zone;
1559 unsigned long first_init_pfn = pgdat->first_deferred_pfn;
1560 const struct cpumask *cpumask = cpumask_of_node(pgdat->node_id); 1559 const struct cpumask *cpumask = cpumask_of_node(pgdat->node_id);
1561 u64 i; 1560 u64 i;
1562 1561
1562 /* Bind memory initialisation thread to a local node if possible */
1563 if (!cpumask_empty(cpumask))
1564 set_cpus_allowed_ptr(current, cpumask);
1565
1566 pgdat_resize_lock(pgdat, &flags);
1567 first_init_pfn = pgdat->first_deferred_pfn;
1563 if (first_init_pfn == ULONG_MAX) { 1568 if (first_init_pfn == ULONG_MAX) {
1569 pgdat_resize_unlock(pgdat, &flags);
1564 pgdat_init_report_one_done(); 1570 pgdat_init_report_one_done();
1565 return 0; 1571 return 0;
1566 } 1572 }
1567 1573
1568 /* Bind memory initialisation thread to a local node if possible */
1569 if (!cpumask_empty(cpumask))
1570 set_cpus_allowed_ptr(current, cpumask);
1571
1572 /* Sanity check boundaries */ 1574 /* Sanity check boundaries */
1573 BUG_ON(pgdat->first_deferred_pfn < pgdat->node_start_pfn); 1575 BUG_ON(pgdat->first_deferred_pfn < pgdat->node_start_pfn);
1574 BUG_ON(pgdat->first_deferred_pfn > pgdat_end_pfn(pgdat)); 1576 BUG_ON(pgdat->first_deferred_pfn > pgdat_end_pfn(pgdat));
@@ -1598,6 +1600,7 @@ static int __init deferred_init_memmap(void *data)
1598 epfn = min_t(unsigned long, zone_end_pfn(zone), PFN_DOWN(epa)); 1600 epfn = min_t(unsigned long, zone_end_pfn(zone), PFN_DOWN(epa));
1599 deferred_free_pages(nid, zid, spfn, epfn); 1601 deferred_free_pages(nid, zid, spfn, epfn);
1600 } 1602 }
1603 pgdat_resize_unlock(pgdat, &flags);
1601 1604
1602 /* Sanity check that the next zone really is unpopulated */ 1605 /* Sanity check that the next zone really is unpopulated */
1603 WARN_ON(++zid < MAX_NR_ZONES && populated_zone(++zone)); 1606 WARN_ON(++zid < MAX_NR_ZONES && populated_zone(++zone));