1 files changed, 339 insertions, 509 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 9dd443d89d8b..d2186ecb36f7 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -57,6 +57,7 @@
 #include <linux/ftrace_event.h>
 #include <linux/memcontrol.h>
 #include <linux/prefetch.h>
+#include <linux/page-debug-flags.h>
 #include <asm/tlbflush.h>
 #include <asm/div64.h>
@@ -96,6 +97,14 @@ EXPORT_SYMBOL(node_states);
 unsigned long totalram_pages __read_mostly;
 unsigned long totalreserve_pages __read_mostly;
+/*
+ * When calculating the number of globally allowed dirty pages, there
+ * is a certain number of per-zone reserves that should not be
+ * considered dirtyable memory.  This is the sum of those reserves
+ * over all existing zones that contribute dirtyable memory.
+ */
+unsigned long dirty_balance_reserve __read_mostly;
 int percpu_pagelist_fraction;
 gfp_t gfp_allowed_mask __read_mostly = GFP_BOOT_MASK;
@@ -127,6 +136,13 @@ void pm_restrict_gfp_mask(void)
        saved_gfp_mask = gfp_allowed_mask;
        gfp_allowed_mask &= ~GFP_IOFS;
 }
+bool pm_suspended_storage(void)
+{
+        if ((gfp_allowed_mask & GFP_IOFS) == GFP_IOFS)
+                return false;
+        return true;
+}
 #endif /* CONFIG_PM_SLEEP */
 #ifdef CONFIG_HUGETLB_PAGE_SIZE_VARIABLE
@@ -181,39 +197,17 @@ static unsigned long __meminitdata nr_kernel_pages;
 static unsigned long __meminitdata nr_all_pages;
 static unsigned long __meminitdata dma_reserve;
-#ifdef CONFIG_ARCH_POPULATES_NODE_MAP
+#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
-  /*
+static unsigned long __meminitdata arch_zone_lowest_possible_pfn[MAX_NR_ZONES];
-   * MAX_ACTIVE_REGIONS determines the maximum number of distinct
+static unsigned long __meminitdata arch_zone_highest_possible_pfn[MAX_NR_ZONES];
-   * ranges of memory (RAM) that may be registered with add_active_range().
+static unsigned long __initdata required_kernelcore;
-   * Ranges passed to add_active_range() will be merged if possible
+static unsigned long __initdata required_movablecore;
-   * so the number of times add_active_range() can be called is
+static unsigned long __meminitdata zone_movable_pfn[MAX_NUMNODES];
-   * related to the number of nodes and the number of holes
-   */
+/* movable_zone is the "real" zone pages in ZONE_MOVABLE are taken from */
-  #ifdef CONFIG_MAX_ACTIVE_REGIONS
+int movable_zone;
-    /* Allow an architecture to set MAX_ACTIVE_REGIONS to save memory */
+EXPORT_SYMBOL(movable_zone);
-    #define MAX_ACTIVE_REGIONS CONFIG_MAX_ACTIVE_REGIONS
+#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
-  #else
-    #if MAX_NUMNODES >= 32
-      /* If there can be many nodes, allow up to 50 holes per node */
-      #define MAX_ACTIVE_REGIONS (MAX_NUMNODES*50)
-    #else
-      /* By default, allow up to 256 distinct regions */
-      #define MAX_ACTIVE_REGIONS 256
-    #endif
-  #endif
-  static struct node_active_region __meminitdata early_node_map[MAX_ACTIVE_REGIONS];
-  static int __meminitdata nr_nodemap_entries;
-  static unsigned long __meminitdata arch_zone_lowest_possible_pfn[MAX_NR_ZONES];
-  static unsigned long __meminitdata arch_zone_highest_possible_pfn[MAX_NR_ZONES];
-  static unsigned long __initdata required_kernelcore;
-  static unsigned long __initdata required_movablecore;
-  static unsigned long __meminitdata zone_movable_pfn[MAX_NUMNODES];
-  /* movable_zone is the "real" zone pages in ZONE_MOVABLE are taken from */
-  int movable_zone;
-  EXPORT_SYMBOL(movable_zone);
-#endif /* CONFIG_ARCH_POPULATES_NODE_MAP */
 #if MAX_NUMNODES > 1
 int nr_node_ids __read_mostly = MAX_NUMNODES;
@@ -333,8 +327,8 @@ out:
 *
 * The remaining PAGE_SIZE pages are called "tail pages".
 *
- * All pages have PG_compound set.  All pages have their ->private pointing at
+ * All pages have PG_compound set.  All tail pages have their ->first_page
- * the head page (even the head page has this).
+ * pointing at the head page.
 *
 * The first tail page's ->lru.next holds the address of the compound page's
 * put_page() function.  Its ->lru.prev holds the order of allocation.
@@ -356,8 +350,8 @@ void prep_compound_page(struct page *page, unsigned long order)
        __SetPageHead(page);
        for (i = 1; i < nr_pages; i++) {
                struct page *p = page + i;
                __SetPageTail(p);
+                set_page_count(p, 0);
                p->first_page = page;
        }
 }
@@ -403,6 +397,37 @@ static inline void prep_zero_page(struct page *page, int order, gfp_t gfp_flags)
                clear_highpage(page + i);
 }
+#ifdef CONFIG_DEBUG_PAGEALLOC
+unsigned int _debug_guardpage_minorder;
+static int __init debug_guardpage_minorder_setup(char *buf)
+{
+        unsigned long res;
+        if (kstrtoul(buf, 10, &res) < 0 ||  res > MAX_ORDER / 2) {
+                printk(KERN_ERR "Bad debug_guardpage_minorder value\n");
+                return 0;
+        }
+        _debug_guardpage_minorder = res;
+        printk(KERN_INFO "Setting debug_guardpage_minorder to %lu\n", res);
+        return 0;
+}
+__setup("debug_guardpage_minorder=", debug_guardpage_minorder_setup);
+static inline void set_page_guard_flag(struct page *page)
+{
+        __set_bit(PAGE_DEBUG_FLAG_GUARD, &page->debug_flags);
+}
+static inline void clear_page_guard_flag(struct page *page)
+{
+        __clear_bit(PAGE_DEBUG_FLAG_GUARD, &page->debug_flags);
+}
+#else
+static inline void set_page_guard_flag(struct page *page) { }
+static inline void clear_page_guard_flag(struct page *page) { }
+#endif
 static inline void set_page_order(struct page *page, int order)
 {
        set_page_private(page, order);
@@ -460,6 +485,11 @@ static inline int page_is_buddy(struct page *page, struct page *buddy,
        if (page_zone_id(page) != page_zone_id(buddy))
                return 0;
+        if (page_is_guard(buddy) && page_order(buddy) == order) {
+                VM_BUG_ON(page_count(buddy) != 0);
+                return 1;
+        }
        if (PageBuddy(buddy) && page_order(buddy) == order) {
                VM_BUG_ON(page_count(buddy) != 0);
                return 1;
@@ -516,11 +546,19 @@ static inline void __free_one_page(struct page *page,
                buddy = page + (buddy_idx - page_idx);
                if (!page_is_buddy(page, buddy, order))
                        break;
+                /*
-                /* Our buddy is free, merge with it and move up one order. */
+                 * Our buddy is free or it is CONFIG_DEBUG_PAGEALLOC guard page,
-                list_del(&buddy->lru);
+                 * merge with it and move up one order.
-                zone->free_area[order].nr_free--;
+                 */
-                rmv_page_order(buddy);
+                if (page_is_guard(buddy)) {
+                        clear_page_guard_flag(buddy);
+                        set_page_private(page, 0);
+                        __mod_zone_page_state(zone, NR_FREE_PAGES, 1 << order);
+                } else {
+                        list_del(&buddy->lru);
+                        zone->free_area[order].nr_free--;
+                        rmv_page_order(buddy);
+                }
                combined_idx = buddy_idx & page_idx;
                page = page + (combined_idx - page_idx);
                page_idx = combined_idx;
@@ -654,7 +692,7 @@ static bool free_pages_prepare(struct page *page, unsigned int order)
        int i;
        int bad = 0;
-        trace_mm_page_free_direct(page, order);
+        trace_mm_page_free(page, order);
        kmemcheck_free_shadow(page, order);
        if (PageAnon(page))
@@ -692,32 +730,23 @@ static void __free_pages_ok(struct page *page, unsigned int order)
        local_irq_restore(flags);
 }
-/*
- * permit the bootmem allocator to evade page validation on high-order frees
- */
 void __meminit __free_pages_bootmem(struct page *page, unsigned int order)
 {
-        if (order == 0) {
+        unsigned int nr_pages = 1 << order;
-                __ClearPageReserved(page);
+        unsigned int loop;
-                set_page_count(page, 0);
-                set_page_refcounted(page);
-                __free_page(page);
-        } else {
-                int loop;
-                prefetchw(page);
+        prefetchw(page);
-                for (loop = 0; loop < BITS_PER_LONG; loop++) {
+        for (loop = 0; loop < nr_pages; loop++) {
-                        struct page *p = &page[loop];
+                struct page *p = &page[loop];
-                        if (loop + 1 < BITS_PER_LONG)
+                if (loop + 1 < nr_pages)
-                                prefetchw(p + 1);
+                        prefetchw(p + 1);
-                        __ClearPageReserved(p);
+                __ClearPageReserved(p);
-                        set_page_count(p, 0);
+                set_page_count(p, 0);
-                }
-                set_page_refcounted(page);
-                __free_pages(page, order);
        }
+        set_page_refcounted(page);
+        __free_pages(page, order);
 }
@@ -746,6 +775,23 @@ static inline void expand(struct zone *zone, struct page *page,
                high--;
                size >>= 1;
                VM_BUG_ON(bad_range(zone, &page[size]));
+#ifdef CONFIG_DEBUG_PAGEALLOC
+                if (high < debug_guardpage_minorder()) {
+                        /*
+                         * Mark as guard pages (or page), that will allow to
+                         * merge back to allocator when buddy will be freed.
+                         * Corresponding page table entries will not be touched,
+                         * pages will stay not present in virtual address space
+                         */
+                        INIT_LIST_HEAD(&page[size].lru);
+                        set_page_guard_flag(&page[size]);
+                        set_page_private(&page[size], high);
+                        /* Guard pages are not available for any usage */
+                        __mod_zone_page_state(zone, NR_FREE_PAGES, -(1 << high));
+                        continue;
+                }
+#endif
                list_add(&page[size].lru, &area->free_list[migratetype]);
                area->nr_free++;
                set_page_order(&page[size], high);
@@ -1211,6 +1257,19 @@ out:
 }
 /*
+ * Free a list of 0-order pages
+ */
+void free_hot_cold_page_list(struct list_head *list, int cold)
+{
+        struct page *page, *next;
+        list_for_each_entry_safe(page, next, list, lru) {
+                trace_mm_page_free_batched(page, cold);
+                free_hot_cold_page(page, cold);
+        }
+}
+/*
 * split_page takes a non-compound higher-order page, and splits it into
 * n (1<<order) sub-pages: page[0..n]
 * Each sub-page must be freed individually.
@@ -1408,7 +1467,7 @@ static int should_fail_alloc_page(gfp_t gfp_mask, unsigned int order)
 static int __init fail_page_alloc_debugfs(void)
 {
-        mode_t mode = S_IFREG | S_IRUSR | S_IWUSR;
+        umode_t mode = S_IFREG | S_IRUSR | S_IWUSR;
        struct dentry *dir;
        dir = fault_create_debugfs_attr("fail_page_alloc", NULL,
@@ -1457,7 +1516,7 @@ static bool __zone_watermark_ok(struct zone *z, int order, unsigned long mark,
        long min = mark;
        int o;
-        free_pages -= (1 << order) + 1;
+        free_pages -= (1 << order) - 1;
        if (alloc_flags & ALLOC_HIGH)
                min -= min / 2;
        if (alloc_flags & ALLOC_HARDER)
@@ -1667,6 +1726,35 @@ zonelist_scan:
                if ((alloc_flags & ALLOC_CPUSET) &&
                        !cpuset_zone_allowed_softwall(zone, gfp_mask))
                                continue;
+                /*
+                 * When allocating a page cache page for writing, we
+                 * want to get it from a zone that is within its dirty
+                 * limit, such that no single zone holds more than its
+                 * proportional share of globally allowed dirty pages.
+                 * The dirty limits take into account the zone's
+                 * lowmem reserves and high watermark so that kswapd
+                 * should be able to balance it without having to
+                 * write pages from its LRU list.
+                 *
+                 * This may look like it could increase pressure on
+                 * lower zones by failing allocations in higher zones
+                 * before they are full.  But the pages that do spill
+                 * over are limited as the lower zones are protected
+                 * by this very same mechanism.  It should not become
+                 * a practical burden to them.
+                 *
+                 * XXX: For now, allow allocations to potentially
+                 * exceed the per-zone dirty limit in the slowpath
+                 * (ALLOC_WMARK_LOW unset) before going into reclaim,
+                 * which is important when on a NUMA setup the allowed
+                 * zones are together not big enough to reach the
+                 * global limit.  The proper fix for these situations
+                 * will require awareness of zones in the
+                 * dirty-throttling and the flusher threads.
+                 */
+                if ((alloc_flags & ALLOC_WMARK_LOW) &&
+                    (gfp_mask & __GFP_WRITE) && !zone_dirty_ok(zone))
+                        goto this_zone_full;
                BUILD_BUG_ON(ALLOC_NO_WATERMARKS < NR_WMARK);
                if (!(alloc_flags & ALLOC_NO_WATERMARKS)) {
@@ -1756,7 +1844,8 @@ void warn_alloc_failed(gfp_t gfp_mask, int order, const char *fmt, ...)
 {
        unsigned int filter = SHOW_MEM_FILTER_NODES;
-        if ((gfp_mask & __GFP_NOWARN) || !__ratelimit(&nopage_rs))
+        if ((gfp_mask & __GFP_NOWARN) || !__ratelimit(&nopage_rs) ||
+            debug_guardpage_minorder() > 0)
                return;
        /*
@@ -1795,12 +1884,25 @@ void warn_alloc_failed(gfp_t gfp_mask, int order, const char *fmt, ...)
 static inline int
 should_alloc_retry(gfp_t gfp_mask, unsigned int order,
+                                unsigned long did_some_progress,
                                unsigned long pages_reclaimed)
 {
        /* Do not loop if specifically requested */
        if (gfp_mask & __GFP_NORETRY)
                return 0;
+        /* Always retry if specifically requested */
+        if (gfp_mask & __GFP_NOFAIL)
+                return 1;
+        /*
+         * Suspend converts GFP_KERNEL to __GFP_WAIT which can prevent reclaim
+         * making forward progress without invoking OOM. Suspend also disables
+         * storage devices so kswapd will not help. Bail if we are suspending.
+         */
+        if (!did_some_progress && pm_suspended_storage())
+                return 0;
        /*
         * In this implementation, order <= PAGE_ALLOC_COSTLY_ORDER
         * means __GFP_NOFAIL, but that may not be true in other
@@ -1819,13 +1921,6 @@ should_alloc_retry(gfp_t gfp_mask, unsigned int order,
        if (gfp_mask & __GFP_REPEAT && pages_reclaimed < (1 << order))
                return 1;
-        /*
-         * Don't let big-order allocations loop unless the caller
-         * explicitly requests that.
-         */
-        if (gfp_mask & __GFP_NOFAIL)
-                return 1;
        return 0;
 }
@@ -1886,14 +1981,20 @@ static struct page *
 __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
        struct zonelist *zonelist, enum zone_type high_zoneidx,
        nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone,
-        int migratetype, unsigned long *did_some_progress,
+        int migratetype, bool sync_migration,
-        bool sync_migration)
+        bool *deferred_compaction,
+        unsigned long *did_some_progress)
 {
        struct page *page;
-        if (!order || compaction_deferred(preferred_zone))
+        if (!order)
                return NULL;
+        if (compaction_deferred(preferred_zone)) {
+                *deferred_compaction = true;
+                return NULL;
+        }
        current->flags |= PF_MEMALLOC;
        *did_some_progress = try_to_compact_pages(zonelist, order, gfp_mask,
                                                nodemask, sync_migration);
@@ -1921,7 +2022,13 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
                 * but not enough to satisfy watermarks.
                 */
                count_vm_event(COMPACTFAIL);
-                defer_compaction(preferred_zone);
+                /*
+                 * As async compaction considers a subset of pageblocks, only
+                 * defer if the failure was a sync compaction failure.
+                 */
+                if (sync_migration)
+                        defer_compaction(preferred_zone);
                cond_resched();
        }
@@ -1933,8 +2040,9 @@ static inline struct page *
 __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
        struct zonelist *zonelist, enum zone_type high_zoneidx,
        nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone,
-        int migratetype, unsigned long *did_some_progress,
+        int migratetype, bool sync_migration,
-        bool sync_migration)
+        bool *deferred_compaction,
+        unsigned long *did_some_progress)
 {
        return NULL;
 }
@@ -2084,6 +2192,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
        unsigned long pages_reclaimed = 0;
        unsigned long did_some_progress;
        bool sync_migration = false;
+        bool deferred_compaction = false;
        /*
         * In the slowpath, we sanity check order to avoid ever trying to
@@ -2164,12 +2273,22 @@ rebalance:
                                        zonelist, high_zoneidx,
                                        nodemask,
                                        alloc_flags, preferred_zone,
-                                        migratetype, &did_some_progress,
+                                        migratetype, sync_migration,
-                                        sync_migration);
+                                        &deferred_compaction,
+                                        &did_some_progress);
        if (page)
                goto got_pg;
        sync_migration = true;
+        /*
+         * If compaction is deferred for high-order allocations, it is because
+         * sync compaction recently failed. In this is the case and the caller
+         * has requested the system not be heavily disrupted, fail the
+         * allocation now instead of entering direct reclaim
+         */
+        if (deferred_compaction && (gfp_mask & __GFP_NO_KSWAPD))
+                goto nopage;
        /* Try direct reclaim and then allocating */
        page = __alloc_pages_direct_reclaim(gfp_mask, order,
                                        zonelist, high_zoneidx,
@@ -2218,7 +2337,8 @@ rebalance:
        /* Check if we should retry the allocation */
        pages_reclaimed += did_some_progress;
-        if (should_alloc_retry(gfp_mask, order, pages_reclaimed)) {
+        if (should_alloc_retry(gfp_mask, order, did_some_progress,
+                                                pages_reclaimed)) {
                /* Wait for some write requests to complete then retry */
                wait_iff_congested(preferred_zone, BLK_RW_ASYNC, HZ/50);
                goto rebalance;
@@ -2232,8 +2352,9 @@ rebalance:
                                        zonelist, high_zoneidx,
                                        nodemask,
                                        alloc_flags, preferred_zone,
-                                        migratetype, &did_some_progress,
+                                        migratetype, sync_migration,
-                                        sync_migration);
+                                        &deferred_compaction,
+                                        &did_some_progress);
                if (page)
                        goto got_pg;
        }
@@ -2328,16 +2449,6 @@ unsigned long get_zeroed_page(gfp_t gfp_mask)
 }
 EXPORT_SYMBOL(get_zeroed_page);
-void __pagevec_free(struct pagevec *pvec)
-{
-        int i = pagevec_count(pvec);
-        while (--i >= 0) {
-                trace_mm_pagevec_free(pvec->pages[i], pvec->cold);
-                free_hot_cold_page(pvec->pages[i], pvec->cold);
-        }
-}
 void __free_pages(struct page *page, unsigned int order)
 {
        if (put_page_testzero(page)) {
@@ -3377,9 +3488,15 @@ static void setup_zone_migrate_reserve(struct zone *zone)
        unsigned long block_migratetype;
        int reserve;
-        /* Get the start pfn, end pfn and the number of blocks to reserve */
+        /*
+         * Get the start pfn, end pfn and the number of blocks to reserve
+         * We have to be careful to be aligned to pageblock_nr_pages to
+         * make sure that we always check pfn_valid for the first page in
+         * the block.
+         */
        start_pfn = zone->zone_start_pfn;
        end_pfn = start_pfn + zone->spanned_pages;
+        start_pfn = roundup(start_pfn, pageblock_nr_pages);
        reserve = roundup(min_wmark_pages(zone), pageblock_nr_pages) >>
                                                        pageblock_order;
@@ -3401,25 +3518,33 @@ static void setup_zone_migrate_reserve(struct zone *zone)
                if (page_to_nid(page) != zone_to_nid(zone))
                        continue;
-                /* Blocks with reserved pages will never free, skip them. */
-                block_end_pfn = min(pfn + pageblock_nr_pages, end_pfn);
-                if (pageblock_is_reserved(pfn, block_end_pfn))
-                        continue;
                block_migratetype = get_pageblock_migratetype(page);
-                /* If this block is reserved, account for it */
+                /* Only test what is necessary when the reserves are not met */
-                if (reserve > 0 && block_migratetype == MIGRATE_RESERVE) {
+                if (reserve > 0) {
-                        reserve--;
+                        /*
-                        continue;
+                         * Blocks with reserved pages will never free, skip
-                }
+                         * them.
+                         */
+                        block_end_pfn = min(pfn + pageblock_nr_pages, end_pfn);
+                        if (pageblock_is_reserved(pfn, block_end_pfn))
+                                continue;
-                /* Suitable for reserving if this block is movable */
+                        /* If this block is reserved, account for it */
-                if (reserve > 0 && block_migratetype == MIGRATE_MOVABLE) {
+                        if (block_migratetype == MIGRATE_RESERVE) {
-                        set_pageblock_migratetype(page, MIGRATE_RESERVE);
+                                reserve--;
-                        move_freepages_block(zone, page, MIGRATE_RESERVE);
+                                continue;
-                        reserve--;
+                        }
-                        continue;
+                        /* Suitable for reserving if this block is movable */
+                        if (block_migratetype == MIGRATE_MOVABLE) {
+                                set_pageblock_migratetype(page,
+                                                        MIGRATE_RESERVE);
+                                move_freepages_block(zone, page,
+                                                        MIGRATE_RESERVE);
+                                reserve--;
+                                continue;
+                        }
                }
                /*
@@ -3731,35 +3856,7 @@ __meminit int init_currently_empty_zone(struct zone *zone,
        return 0;
 }
-#ifdef CONFIG_ARCH_POPULATES_NODE_MAP
+#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
-/*
- * Basic iterator support. Return the first range of PFNs for a node
- * Note: nid == MAX_NUMNODES returns first region regardless of node
- */
-static int __meminit first_active_region_index_in_nid(int nid)
-{
-        int i;
-        for (i = 0; i < nr_nodemap_entries; i++)
-                if (nid == MAX_NUMNODES || early_node_map[i].nid == nid)
-                        return i;
-        return -1;
-}
-/*
- * Basic iterator support. Return the next active range of PFNs for a node
- * Note: nid == MAX_NUMNODES returns next region regardless of node
- */
-static int __meminit next_active_region_index_in_nid(int index, int nid)
-{
-        for (index = index + 1; index < nr_nodemap_entries; index++)
-                if (nid == MAX_NUMNODES || early_node_map[index].nid == nid)
-                        return index;
-        return -1;
-}
 #ifndef CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID
 /*
 * Required by SPARSEMEM. Given a PFN, return what node the PFN is on.
@@ -3769,15 +3866,12 @@ static int __meminit next_active_region_index_in_nid(int index, int nid)
 */
 int __meminit __early_pfn_to_nid(unsigned long pfn)
 {
-        int i;
+        unsigned long start_pfn, end_pfn;
+        int i, nid;
-        for (i = 0; i < nr_nodemap_entries; i++) {
-                unsigned long start_pfn = early_node_map[i].start_pfn;
-                unsigned long end_pfn = early_node_map[i].end_pfn;
+        for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid)
                if (start_pfn <= pfn && pfn < end_pfn)
-                        return early_node_map[i].nid;
+                        return nid;
-        }
        /* This is a memory hole */
        return -1;
 }
@@ -3806,11 +3900,6 @@ bool __meminit early_pfn_in_nid(unsigned long pfn, int node)
 }
 #endif
-/* Basic iterator support to walk early_node_map[] */
-#define for_each_active_range_index_in_nid(i, nid) \
-        for (i = first_active_region_index_in_nid(nid); i != -1; \
-                                i = next_active_region_index_in_nid(i, nid))
 /**
 * free_bootmem_with_active_regions - Call free_bootmem_node for each active range
 * @nid: The node to free memory on. If MAX_NUMNODES, all nodes are freed.
@@ -3820,122 +3909,34 @@ bool __meminit early_pfn_in_nid(unsigned long pfn, int node)
 * add_active_ranges() contain no holes and may be freed, this
 * this function may be used instead of calling free_bootmem() manually.
 */
-void __init free_bootmem_with_active_regions(int nid,
+void __init free_bootmem_with_active_regions(int nid, unsigned long max_low_pfn)
-                                                unsigned long max_low_pfn)
-{
-        int i;
-        for_each_active_range_index_in_nid(i, nid) {
-                unsigned long size_pages = 0;
-                unsigned long end_pfn = early_node_map[i].end_pfn;
-                if (early_node_map[i].start_pfn >= max_low_pfn)
-                        continue;
-                if (end_pfn > max_low_pfn)
-                        end_pfn = max_low_pfn;
-                size_pages = end_pfn - early_node_map[i].start_pfn;
-                free_bootmem_node(NODE_DATA(early_node_map[i].nid),
-                                PFN_PHYS(early_node_map[i].start_pfn),
-                                size_pages << PAGE_SHIFT);
-        }
-}
-#ifdef CONFIG_HAVE_MEMBLOCK
-/*
- * Basic iterator support. Return the last range of PFNs for a node
- * Note: nid == MAX_NUMNODES returns last region regardless of node
- */
-static int __meminit last_active_region_index_in_nid(int nid)
-{
-        int i;
-        for (i = nr_nodemap_entries - 1; i >= 0; i--)
-                if (nid == MAX_NUMNODES || early_node_map[i].nid == nid)
-                        return i;
-        return -1;
-}
-/*
- * Basic iterator support. Return the previous active range of PFNs for a node
- * Note: nid == MAX_NUMNODES returns next region regardless of node
- */
-static int __meminit previous_active_region_index_in_nid(int index, int nid)
-{
-        for (index = index - 1; index >= 0; index--)
-                if (nid == MAX_NUMNODES || early_node_map[index].nid == nid)
-                        return index;
-        return -1;
-}
-#define for_each_active_range_index_in_nid_reverse(i, nid) \
-        for (i = last_active_region_index_in_nid(nid); i != -1; \
-                                i = previous_active_region_index_in_nid(i, nid))
-u64 __init find_memory_core_early(int nid, u64 size, u64 align,
-                                        u64 goal, u64 limit)
 {
-        int i;
+        unsigned long start_pfn, end_pfn;
+        int i, this_nid;
-        /* Need to go over early_node_map to find out good range for node */
-        for_each_active_range_index_in_nid_reverse(i, nid) {
-                u64 addr;
-                u64 ei_start, ei_last;
-                u64 final_start, final_end;
-                ei_last = early_node_map[i].end_pfn;
-                ei_last <<= PAGE_SHIFT;
-                ei_start = early_node_map[i].start_pfn;
-                ei_start <<= PAGE_SHIFT;
-                final_start = max(ei_start, goal);
-                final_end = min(ei_last, limit);
-                if (final_start >= final_end)
-                        continue;
-                addr = memblock_find_in_range(final_start, final_end, size, align);
-                if (addr == MEMBLOCK_ERROR)
+        for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, &this_nid) {
-                        continue;
+                start_pfn = min(start_pfn, max_low_pfn);
+                end_pfn = min(end_pfn, max_low_pfn);
-                return addr;
+                if (start_pfn < end_pfn)
+                        free_bootmem_node(NODE_DATA(this_nid),
+                                          PFN_PHYS(start_pfn),
+                                          (end_pfn - start_pfn) << PAGE_SHIFT);
        }
-        return MEMBLOCK_ERROR;
 }
-#endif
 int __init add_from_early_node_map(struct range *range, int az,
                                   int nr_range, int nid)
 {
+        unsigned long start_pfn, end_pfn;
        int i;
-        u64 start, end;
        /* need to go over early_node_map to find out good range for node */
-        for_each_active_range_index_in_nid(i, nid) {
+        for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL)
-                start = early_node_map[i].start_pfn;
+                nr_range = add_range(range, az, nr_range, start_pfn, end_pfn);
-                end = early_node_map[i].end_pfn;
-                nr_range = add_range(range, az, nr_range, start, end);
-        }
        return nr_range;
 }
-void __init work_with_active_regions(int nid, work_fn_t work_fn, void *data)
-{
-        int i;
-        int ret;
-        for_each_active_range_index_in_nid(i, nid) {
-                ret = work_fn(early_node_map[i].start_pfn,
-                              early_node_map[i].end_pfn, data);
-                if (ret)
-                        break;
-        }
-}
 /**
 * sparse_memory_present_with_active_regions - Call memory_present for each active range
 * @nid: The node to call memory_present for. If MAX_NUMNODES, all nodes will be used.
@@ -3946,12 +3947,11 @@ void __init work_with_active_regions(int nid, work_fn_t work_fn, void *data)
 */
 void __init sparse_memory_present_with_active_regions(int nid)
 {
-        int i;
+        unsigned long start_pfn, end_pfn;
+        int i, this_nid;
-        for_each_active_range_index_in_nid(i, nid)
+        for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, &this_nid)
-                memory_present(early_node_map[i].nid,
+                memory_present(this_nid, start_pfn, end_pfn);
-                                early_node_map[i].start_pfn,
-                                early_node_map[i].end_pfn);
 }
 /**
@@ -3968,13 +3968,15 @@ void __init sparse_memory_present_with_active_regions(int nid)
 void __meminit get_pfn_range_for_nid(unsigned int nid,
                        unsigned long *start_pfn, unsigned long *end_pfn)
 {
+        unsigned long this_start_pfn, this_end_pfn;
        int i;
        *start_pfn = -1UL;
        *end_pfn = 0;
-        for_each_active_range_index_in_nid(i, nid) {
+        for_each_mem_pfn_range(i, nid, &this_start_pfn, &this_end_pfn, NULL) {
-                *start_pfn = min(*start_pfn, early_node_map[i].start_pfn);
+                *start_pfn = min(*start_pfn, this_start_pfn);
-                *end_pfn = max(*end_pfn, early_node_map[i].end_pfn);
+                *end_pfn = max(*end_pfn, this_end_pfn);
        }
        if (*start_pfn == -1UL)
@@ -4077,46 +4079,16 @@ unsigned long __meminit __absent_pages_in_range(int nid,
                                unsigned long range_start_pfn,
                                unsigned long range_end_pfn)
 {
-        int i = 0;
+        unsigned long nr_absent = range_end_pfn - range_start_pfn;
-        unsigned long prev_end_pfn = 0, hole_pages = 0;
+        unsigned long start_pfn, end_pfn;
-        unsigned long start_pfn;
+        int i;
-        /* Find the end_pfn of the first active range of pfns in the node */
-        i = first_active_region_index_in_nid(nid);
-        if (i == -1)
-                return 0;
-        prev_end_pfn = min(early_node_map[i].start_pfn, range_end_pfn);
-        /* Account for ranges before physical memory on this node */
-        if (early_node_map[i].start_pfn > range_start_pfn)
-                hole_pages = prev_end_pfn - range_start_pfn;
-        /* Find all holes for the zone within the node */
-        for (; i != -1; i = next_active_region_index_in_nid(i, nid)) {
-                /* No need to continue if prev_end_pfn is outside the zone */
-                if (prev_end_pfn >= range_end_pfn)
-                        break;
-                /* Make sure the end of the zone is not within the hole */
-                start_pfn = min(early_node_map[i].start_pfn, range_end_pfn);
-                prev_end_pfn = max(prev_end_pfn, range_start_pfn);
-                /* Update the hole size cound and move on */
+        for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
-                if (start_pfn > range_start_pfn) {
+                start_pfn = clamp(start_pfn, range_start_pfn, range_end_pfn);
-                        BUG_ON(prev_end_pfn > start_pfn);
+                end_pfn = clamp(end_pfn, range_start_pfn, range_end_pfn);
-                        hole_pages += start_pfn - prev_end_pfn;
+                nr_absent -= end_pfn - start_pfn;
-                }
-                prev_end_pfn = early_node_map[i].end_pfn;
        }
+        return nr_absent;
-        /* Account for ranges past physical memory on this node */
-        if (range_end_pfn > prev_end_pfn)
-                hole_pages += range_end_pfn -
-                                max(range_start_pfn, prev_end_pfn);
-        return hole_pages;
 }
 /**
@@ -4137,14 +4109,14 @@ static unsigned long __meminit zone_absent_pages_in_node(int nid,
                                        unsigned long zone_type,
                                        unsigned long *ignored)
 {
+        unsigned long zone_low = arch_zone_lowest_possible_pfn[zone_type];
+        unsigned long zone_high = arch_zone_highest_possible_pfn[zone_type];
        unsigned long node_start_pfn, node_end_pfn;
        unsigned long zone_start_pfn, zone_end_pfn;
        get_pfn_range_for_nid(nid, &node_start_pfn, &node_end_pfn);
-        zone_start_pfn = max(arch_zone_lowest_possible_pfn[zone_type],
+        zone_start_pfn = clamp(node_start_pfn, zone_low, zone_high);
-                                                        node_start_pfn);
+        zone_end_pfn = clamp(node_end_pfn, zone_low, zone_high);
-        zone_end_pfn = min(arch_zone_highest_possible_pfn[zone_type],
-                                                        node_end_pfn);
        adjust_zone_range_for_zone_movable(nid, zone_type,
                        node_start_pfn, node_end_pfn,
@@ -4152,7 +4124,7 @@ static unsigned long __meminit zone_absent_pages_in_node(int nid,
        return __absent_pages_in_range(nid, zone_start_pfn, zone_end_pfn);
 }
-#else
+#else /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
 static inline unsigned long __meminit zone_spanned_pages_in_node(int nid,
                                        unsigned long zone_type,
                                        unsigned long *zones_size)
@@ -4170,7 +4142,7 @@ static inline unsigned long __meminit zone_absent_pages_in_node(int nid,
        return zholes_size[zone_type];
 }
-#endif
+#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
 static void __meminit calculate_node_totalpages(struct pglist_data *pgdat,
                unsigned long *zones_size, unsigned long *zholes_size)
@@ -4290,7 +4262,7 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat,
        for (j = 0; j < MAX_NR_ZONES; j++) {
                struct zone *zone = pgdat->node_zones + j;
                unsigned long size, realsize, memmap_pages;
-                enum lru_list l;
+                enum lru_list lru;
                size = zone_spanned_pages_in_node(nid, j, zones_size);
                realsize = size - zone_absent_pages_in_node(nid, j,
@@ -4340,8 +4312,8 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat,
                zone->zone_pgdat = pgdat;
                zone_pcp_init(zone);
-                for_each_lru(l)
+                for_each_lru(lru)
-                        INIT_LIST_HEAD(&zone->lru[l].list);
+                        INIT_LIST_HEAD(&zone->lruvec.lists[lru]);
                zone->reclaim_stat.recent_rotated[0] = 0;
                zone->reclaim_stat.recent_rotated[1] = 0;
                zone->reclaim_stat.recent_scanned[0] = 0;
@@ -4393,10 +4365,10 @@ static void __init_refok alloc_node_mem_map(struct pglist_data *pgdat)
         */
        if (pgdat == NODE_DATA(0)) {
                mem_map = NODE_DATA(0)->node_mem_map;
-#ifdef CONFIG_ARCH_POPULATES_NODE_MAP
+#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
                if (page_to_pfn(mem_map) != pgdat->node_start_pfn)
                        mem_map -= (pgdat->node_start_pfn - ARCH_PFN_OFFSET);
-#endif /* CONFIG_ARCH_POPULATES_NODE_MAP */
+#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
        }
 #endif
 #endif /* CONFIG_FLAT_NODE_MEM_MAP */
@@ -4421,7 +4393,7 @@ void __paginginit free_area_init_node(int nid, unsigned long *zones_size,
        free_area_init_core(pgdat, zones_size, zholes_size);
 }
-#ifdef CONFIG_ARCH_POPULATES_NODE_MAP
+#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
 #if MAX_NUMNODES > 1
 /*
@@ -4443,170 +4415,6 @@ static inline void setup_nr_node_ids(void)
 #endif
 /**
- * add_active_range - Register a range of PFNs backed by physical memory
- * @nid: The node ID the range resides on
- * @start_pfn: The start PFN of the available physical memory
- * @end_pfn: The end PFN of the available physical memory
- *
- * These ranges are stored in an early_node_map[] and later used by
- * free_area_init_nodes() to calculate zone sizes and holes. If the
- * range spans a memory hole, it is up to the architecture to ensure
- * the memory is not freed by the bootmem allocator. If possible
- * the range being registered will be merged with existing ranges.
- */
-void __init add_active_range(unsigned int nid, unsigned long start_pfn,
-                                                unsigned long end_pfn)
-{
-        int i;
-        mminit_dprintk(MMINIT_TRACE, "memory_register",
-                        "Entering add_active_range(%d, %#lx, %#lx) "
-                        "%d entries of %d used\n",
-                        nid, start_pfn, end_pfn,
-                        nr_nodemap_entries, MAX_ACTIVE_REGIONS);
-        mminit_validate_memmodel_limits(&start_pfn, &end_pfn);
-        /* Merge with existing active regions if possible */
-        for (i = 0; i < nr_nodemap_entries; i++) {
-                if (early_node_map[i].nid != nid)
-                        continue;
-                /* Skip if an existing region covers this new one */
-                if (start_pfn >= early_node_map[i].start_pfn &&
-                                end_pfn <= early_node_map[i].end_pfn)
-                        return;
-                /* Merge forward if suitable */
-                if (start_pfn <= early_node_map[i].end_pfn &&
-                                end_pfn > early_node_map[i].end_pfn) {
-                        early_node_map[i].end_pfn = end_pfn;
-                        return;
-                }
-                /* Merge backward if suitable */
-                if (start_pfn < early_node_map[i].start_pfn &&
-                                end_pfn >= early_node_map[i].start_pfn) {
-                        early_node_map[i].start_pfn = start_pfn;
-                        return;
-                }
-        }
-        /* Check that early_node_map is large enough */
-        if (i >= MAX_ACTIVE_REGIONS) {
-                printk(KERN_CRIT "More than %d memory regions, truncating\n",
-                                                        MAX_ACTIVE_REGIONS);
-                return;
-        }
-        early_node_map[i].nid = nid;
-        early_node_map[i].start_pfn = start_pfn;
-        early_node_map[i].end_pfn = end_pfn;
-        nr_nodemap_entries = i + 1;
-}
-/**
- * remove_active_range - Shrink an existing registered range of PFNs
- * @nid: The node id the range is on that should be shrunk
- * @start_pfn: The new PFN of the range
- * @end_pfn: The new PFN of the range
- *
- * i386 with NUMA use alloc_remap() to store a node_mem_map on a local node.
- * The map is kept near the end physical page range that has already been
- * registered. This function allows an arch to shrink an existing registered
- * range.
- */
-void __init remove_active_range(unsigned int nid, unsigned long start_pfn,
-                                unsigned long end_pfn)
-{
-        int i, j;
-        int removed = 0;
-        printk(KERN_DEBUG "remove_active_range (%d, %lu, %lu)\n",
-                          nid, start_pfn, end_pfn);
-        /* Find the old active region end and shrink */
-        for_each_active_range_index_in_nid(i, nid) {
-                if (early_node_map[i].start_pfn >= start_pfn &&
-                    early_node_map[i].end_pfn <= end_pfn) {
-                        /* clear it */
-                        early_node_map[i].start_pfn = 0;
-                        early_node_map[i].end_pfn = 0;
-                        removed = 1;
-                        continue;
-                }
-                if (early_node_map[i].start_pfn < start_pfn &&
-                    early_node_map[i].end_pfn > start_pfn) {
-                        unsigned long temp_end_pfn = early_node_map[i].end_pfn;
-                        early_node_map[i].end_pfn = start_pfn;
-                        if (temp_end_pfn > end_pfn)
-                                add_active_range(nid, end_pfn, temp_end_pfn);
-                        continue;
-                }
-                if (early_node_map[i].start_pfn >= start_pfn &&
-                    early_node_map[i].end_pfn > end_pfn &&
-                    early_node_map[i].start_pfn < end_pfn) {
-                        early_node_map[i].start_pfn = end_pfn;
-                        continue;
-                }
-        }
-        if (!removed)
-                return;
-        /* remove the blank ones */
-        for (i = nr_nodemap_entries - 1; i > 0; i--) {
-                if (early_node_map[i].nid != nid)
-                        continue;
-                if (early_node_map[i].end_pfn)
-                        continue;
-                /* we found it, get rid of it */
-                for (j = i; j < nr_nodemap_entries - 1; j++)
-                        memcpy(&early_node_map[j], &early_node_map[j+1],
-                                sizeof(early_node_map[j]));
-                j = nr_nodemap_entries - 1;
-                memset(&early_node_map[j], 0, sizeof(early_node_map[j]));
-                nr_nodemap_entries--;
-        }
-}
-/**
- * remove_all_active_ranges - Remove all currently registered regions
- *
- * During discovery, it may be found that a table like SRAT is invalid
- * and an alternative discovery method must be used. This function removes
- * all currently registered regions.
- */
-void __init remove_all_active_ranges(void)
-{
-        memset(early_node_map, 0, sizeof(early_node_map));
-        nr_nodemap_entries = 0;
-}
-/* Compare two active node_active_regions */
-static int __init cmp_node_active_region(const void *a, const void *b)
-{
-        struct node_active_region *arange = (struct node_active_region *)a;
-        struct node_active_region *brange = (struct node_active_region *)b;
-        /* Done this way to avoid overflows */
-        if (arange->start_pfn > brange->start_pfn)
-                return 1;
-        if (arange->start_pfn < brange->start_pfn)
-                return -1;
-        return 0;
-}
-/* sort the node_map by start_pfn */
-void __init sort_node_map(void)
-{
-        sort(early_node_map, (size_t)nr_nodemap_entries,
-                        sizeof(struct node_active_region),
-                        cmp_node_active_region, NULL);
-}
-/**
 * node_map_pfn_alignment - determine the maximum internode alignment
 *
 * This function should be called after node map is populated and sorted.
@@ -4628,15 +4436,11 @@ void __init sort_node_map(void)
 unsigned long __init node_map_pfn_alignment(void)
 {
        unsigned long accl_mask = 0, last_end = 0;
+        unsigned long start, end, mask;
        int last_nid = -1;
-        int i;
+        int i, nid;
-        for_each_active_range_index_in_nid(i, MAX_NUMNODES) {
-                int nid = early_node_map[i].nid;
-                unsigned long start = early_node_map[i].start_pfn;
-                unsigned long end = early_node_map[i].end_pfn;
-                unsigned long mask;
+        for_each_mem_pfn_range(i, MAX_NUMNODES, &start, &end, &nid) {
                if (!start || last_nid < 0 || last_nid == nid) {
                        last_nid = nid;
                        last_end = end;
@@ -4663,12 +4467,12 @@ unsigned long __init node_map_pfn_alignment(void)
 /* Find the lowest pfn for a node */
 static unsigned long __init find_min_pfn_for_node(int nid)
 {
-        int i;
        unsigned long min_pfn = ULONG_MAX;
+        unsigned long start_pfn;
+        int i;
-        /* Assuming a sorted map, the first range found has the starting pfn */
+        for_each_mem_pfn_range(i, nid, &start_pfn, NULL, NULL)
-        for_each_active_range_index_in_nid(i, nid)
+                min_pfn = min(min_pfn, start_pfn);
-                min_pfn = min(min_pfn, early_node_map[i].start_pfn);
        if (min_pfn == ULONG_MAX) {
                printk(KERN_WARNING
@@ -4697,15 +4501,16 @@ unsigned long __init find_min_pfn_with_active_regions(void)
 */
 static unsigned long __init early_calculate_totalpages(void)
 {
-        int i;
        unsigned long totalpages = 0;
+        unsigned long start_pfn, end_pfn;
+        int i, nid;
+        for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid) {
+                unsigned long pages = end_pfn - start_pfn;
-        for (i = 0; i < nr_nodemap_entries; i++) {
-                unsigned long pages = early_node_map[i].end_pfn -
-                                                early_node_map[i].start_pfn;
                totalpages += pages;
                if (pages)
-                        node_set_state(early_node_map[i].nid, N_HIGH_MEMORY);
+                        node_set_state(nid, N_HIGH_MEMORY);
        }
        return totalpages;
 }
@@ -4760,6 +4565,8 @@ restart:
        /* Spread kernelcore memory as evenly as possible throughout nodes */
        kernelcore_node = required_kernelcore / usable_nodes;
        for_each_node_state(nid, N_HIGH_MEMORY) {
+                unsigned long start_pfn, end_pfn;
                /*
                 * Recalculate kernelcore_node if the division per node
                 * now exceeds what is necessary to satisfy the requested
@@ -4776,13 +4583,10 @@ restart:
                kernelcore_remaining = kernelcore_node;
                /* Go through each range of PFNs within this node */
-                for_each_active_range_index_in_nid(i, nid) {
+                for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
-                        unsigned long start_pfn, end_pfn;
                        unsigned long size_pages;
-                        start_pfn = max(early_node_map[i].start_pfn,
+                        start_pfn = max(start_pfn, zone_movable_pfn[nid]);
-                                                zone_movable_pfn[nid]);
-                        end_pfn = early_node_map[i].end_pfn;
                        if (start_pfn >= end_pfn)
                                continue;
@@ -4863,8 +4667,10 @@ static void check_for_regular_memory(pg_data_t *pgdat)
        for (zone_type = 0; zone_type <= ZONE_NORMAL; zone_type++) {
                struct zone *zone = &pgdat->node_zones[zone_type];
-                if (zone->present_pages)
+                if (zone->present_pages) {
                        node_set_state(zone_to_nid(zone), N_NORMAL_MEMORY);
+                        break;
+                }
        }
 #endif
 }
@@ -4884,11 +4690,8 @@ static void check_for_regular_memory(pg_data_t *pgdat)
 */
 void __init free_area_init_nodes(unsigned long *max_zone_pfn)
 {
-        unsigned long nid;
+        unsigned long start_pfn, end_pfn;
-        int i;
+        int i, nid;
-        /* Sort early_node_map as initialisation assumes it is sorted */
-        sort_node_map();
        /* Record where the zone boundaries are */
        memset(arch_zone_lowest_possible_pfn, 0,
@@ -4935,11 +4738,9 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn)
        }
        /* Print out the early_node_map[] */
-        printk("early_node_map[%d] active PFN ranges\n", nr_nodemap_entries);
+        printk("Early memory PFN ranges\n");
-        for (i = 0; i < nr_nodemap_entries; i++)
+        for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid)
-                printk("  %3d: %0#10lx -> %0#10lx\n", early_node_map[i].nid,
+                printk("  %3d: %0#10lx -> %0#10lx\n", nid, start_pfn, end_pfn);
-                                                early_node_map[i].start_pfn,
-                                                early_node_map[i].end_pfn);
        /* Initialise every node */
        mminit_verify_pageflags_layout();
@@ -4992,7 +4793,7 @@ static int __init cmdline_parse_movablecore(char *p)
 early_param("kernelcore", cmdline_parse_kernelcore);
 early_param("movablecore", cmdline_parse_movablecore);
-#endif /* CONFIG_ARCH_POPULATES_NODE_MAP */
+#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
 /**
 * set_dma_reserve - set the specified number of pages reserved in the first zone
@@ -5076,8 +4877,19 @@ static void calculate_totalreserve_pages(void)
                        if (max > zone->present_pages)
                                max = zone->present_pages;
                        reserve_pages += max;
+                        /*
+                         * Lowmem reserves are not available to
+                         * GFP_HIGHUSER page cache allocations and
+                         * kswapd tries to balance zones to their high
+                         * watermark.  As a result, neither should be
+                         * regarded as dirtyable memory, to prevent a
+                         * situation where reclaim has to clean pages
+                         * in order to balance the zones.
+                         */
+                        zone->dirty_balance_reserve = max;
                }
        }
+        dirty_balance_reserve = reserve_pages;
        totalreserve_pages = reserve_pages;
 }
@@ -5601,7 +5413,25 @@ __count_immobile_pages(struct zone *zone, struct page *page, int count)
 bool is_pageblock_removable_nolock(struct page *page)
 {
-        struct zone *zone = page_zone(page);
+        struct zone *zone;
+        unsigned long pfn;
+        /*
+         * We have to be careful here because we are iterating over memory
+         * sections which are not zone aware so we might end up outside of
+         * the zone but still within the section.
+         * We have to take care about the node as well. If the node is offline
+         * its NODE_DATA will be NULL - see page_zone.
+         */
+        if (!node_online(page_to_nid(page)))
+                return false;
+        zone = page_zone(page);
+        pfn = page_to_pfn(page);
+        if (zone->zone_start_pfn > pfn ||
+                        zone->zone_start_pfn + zone->spanned_pages <= pfn)
+                return false;
        return __count_immobile_pages(zone, page, 0);
 }