1 files changed, 213 insertions, 184 deletions
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 196709f5ee58..88c5fed8b9a4 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -128,7 +128,7 @@ struct scan_control {
 * From 0 .. 100.  Higher means more swappy.
 */
 int vm_swappiness = 60;
-long vm_total_pages;    /* The total number of pages which the VM controls */
+unsigned long vm_total_pages;   /* The total number of pages which the VM controls */
 static LIST_HEAD(shrinker_list);
 static DECLARE_RWSEM(shrinker_rwsem);
@@ -1579,16 +1579,6 @@ static inline int inactive_anon_is_low(struct lruvec *lruvec)
 }
 #endif
-static int inactive_file_is_low_global(struct zone *zone)
-{
-        unsigned long active, inactive;
-        active = zone_page_state(zone, NR_ACTIVE_FILE);
-        inactive = zone_page_state(zone, NR_INACTIVE_FILE);
-        return (active > inactive);
-}
 /**
 * inactive_file_is_low - check if file pages need to be deactivated
 * @lruvec: LRU vector to check
@@ -1605,10 +1595,13 @@ static int inactive_file_is_low_global(struct zone *zone)
 */
 static int inactive_file_is_low(struct lruvec *lruvec)
 {
-        if (!mem_cgroup_disabled())
+        unsigned long inactive;
-                return mem_cgroup_inactive_file_is_low(lruvec);
+        unsigned long active;
+        inactive = get_lru_size(lruvec, LRU_INACTIVE_FILE);
+        active = get_lru_size(lruvec, LRU_ACTIVE_FILE);
-        return inactive_file_is_low_global(lruvec_zone(lruvec));
+        return active > inactive;
 }
 static int inactive_list_is_low(struct lruvec *lruvec, enum lru_list lru)
@@ -1638,6 +1631,13 @@ static int vmscan_swappiness(struct scan_control *sc)
        return mem_cgroup_swappiness(sc->target_mem_cgroup);
 }
+enum scan_balance {
+        SCAN_EQUAL,
+        SCAN_FRACT,
+        SCAN_ANON,
+        SCAN_FILE,
+};
 /*
 * Determine how aggressively the anon and file LRU lists should be
 * scanned.  The relative value of each set of LRU lists is determined
@@ -1650,15 +1650,16 @@ static int vmscan_swappiness(struct scan_control *sc)
 static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
                           unsigned long *nr)
 {
-        unsigned long anon, file, free;
+        struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat;
+        u64 fraction[2];
+        u64 denominator = 0;    /* gcc */
+        struct zone *zone = lruvec_zone(lruvec);
        unsigned long anon_prio, file_prio;
+        enum scan_balance scan_balance;
+        unsigned long anon, file, free;
+        bool force_scan = false;
        unsigned long ap, fp;
-        struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat;
-        u64 fraction[2], denominator;
        enum lru_list lru;
-        int noswap = 0;
-        bool force_scan = false;
-        struct zone *zone = lruvec_zone(lruvec);
        /*
         * If the zone or memcg is small, nr[l] can be 0.  This
@@ -1676,11 +1677,30 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
                force_scan = true;
        /* If we have no swap space, do not bother scanning anon pages. */
-        if (!sc->may_swap || (nr_swap_pages <= 0)) {
+        if (!sc->may_swap || (get_nr_swap_pages() <= 0)) {
-                noswap = 1;
+                scan_balance = SCAN_FILE;
-                fraction[0] = 0;
+                goto out;
-                fraction[1] = 1;
+        }
-                denominator = 1;
+        /*
+         * Global reclaim will swap to prevent OOM even with no
+         * swappiness, but memcg users want to use this knob to
+         * disable swapping for individual groups completely when
+         * using the memory controller's swap limit feature would be
+         * too expensive.
+         */
+        if (!global_reclaim(sc) && !vmscan_swappiness(sc)) {
+                scan_balance = SCAN_FILE;
+                goto out;
+        }
+        /*
+         * Do not apply any pressure balancing cleverness when the
+         * system is close to OOM, scan both anon and file equally
+         * (unless the swappiness setting disagrees with swapping).
+         */
+        if (!sc->priority && vmscan_swappiness(sc)) {
+                scan_balance = SCAN_EQUAL;
                goto out;
        }
@@ -1689,30 +1709,32 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
        file  = get_lru_size(lruvec, LRU_ACTIVE_FILE) +
                get_lru_size(lruvec, LRU_INACTIVE_FILE);
+        /*
+         * If it's foreseeable that reclaiming the file cache won't be
+         * enough to get the zone back into a desirable shape, we have
+         * to swap.  Better start now and leave the - probably heavily
+         * thrashing - remaining file pages alone.
+         */
        if (global_reclaim(sc)) {
-                free  = zone_page_state(zone, NR_FREE_PAGES);
+                free = zone_page_state(zone, NR_FREE_PAGES);
                if (unlikely(file + free <= high_wmark_pages(zone))) {
-                        /*
+                        scan_balance = SCAN_ANON;
-                         * If we have very few page cache pages, force-scan
-                         * anon pages.
-                         */
-                        fraction[0] = 1;
-                        fraction[1] = 0;
-                        denominator = 1;
-                        goto out;
-                } else if (!inactive_file_is_low_global(zone)) {
-                        /*
-                         * There is enough inactive page cache, do not
-                         * reclaim anything from the working set right now.
-                         */
-                        fraction[0] = 0;
-                        fraction[1] = 1;
-                        denominator = 1;
                        goto out;
                }
        }
        /*
+         * There is enough inactive page cache, do not reclaim
+         * anything from the anonymous working set right now.
+         */
+        if (!inactive_file_is_low(lruvec)) {
+                scan_balance = SCAN_FILE;
+                goto out;
+        }
+        scan_balance = SCAN_FRACT;
+        /*
         * With swappiness at 100, anonymous and file have the same priority.
         * This scanning priority is essentially the inverse of IO cost.
         */
@@ -1759,19 +1781,92 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
 out:
        for_each_evictable_lru(lru) {
                int file = is_file_lru(lru);
+                unsigned long size;
                unsigned long scan;
-                scan = get_lru_size(lruvec, lru);
+                size = get_lru_size(lruvec, lru);
-                if (sc->priority || noswap || !vmscan_swappiness(sc)) {
+                scan = size >> sc->priority;
-                        scan >>= sc->priority;
-                        if (!scan && force_scan)
+                if (!scan && force_scan)
-                                scan = SWAP_CLUSTER_MAX;
+                        scan = min(size, SWAP_CLUSTER_MAX);
+                switch (scan_balance) {
+                case SCAN_EQUAL:
+                        /* Scan lists relative to size */
+                        break;
+                case SCAN_FRACT:
+                        /*
+                         * Scan types proportional to swappiness and
+                         * their relative recent reclaim efficiency.
+                         */
                        scan = div64_u64(scan * fraction[file], denominator);
+                        break;
+                case SCAN_FILE:
+                case SCAN_ANON:
+                        /* Scan one type exclusively */
+                        if ((scan_balance == SCAN_FILE) != file)
+                                scan = 0;
+                        break;
+                default:
+                        /* Look ma, no brain */
+                        BUG();
                }
                nr[lru] = scan;
        }
 }
+/*
+ * This is a basic per-zone page freer.  Used by both kswapd and direct reclaim.
+ */
+static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
+{
+        unsigned long nr[NR_LRU_LISTS];
+        unsigned long nr_to_scan;
+        enum lru_list lru;
+        unsigned long nr_reclaimed = 0;
+        unsigned long nr_to_reclaim = sc->nr_to_reclaim;
+        struct blk_plug plug;
+        get_scan_count(lruvec, sc, nr);
+        blk_start_plug(&plug);
+        while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] ||
+                                        nr[LRU_INACTIVE_FILE]) {
+                for_each_evictable_lru(lru) {
+                        if (nr[lru]) {
+                                nr_to_scan = min(nr[lru], SWAP_CLUSTER_MAX);
+                                nr[lru] -= nr_to_scan;
+                                nr_reclaimed += shrink_list(lru, nr_to_scan,
+                                                            lruvec, sc);
+                        }
+                }
+                /*
+                 * On large memory systems, scan >> priority can become
+                 * really large. This is fine for the starting priority;
+                 * we want to put equal scanning pressure on each zone.
+                 * However, if the VM has a harder time of freeing pages,
+                 * with multiple processes reclaiming pages, the total
+                 * freeing target can get unreasonably large.
+                 */
+                if (nr_reclaimed >= nr_to_reclaim &&
+                    sc->priority < DEF_PRIORITY)
+                        break;
+        }
+        blk_finish_plug(&plug);
+        sc->nr_reclaimed += nr_reclaimed;
+        /*
+         * Even if we did not try to evict anon pages at all, we want to
+         * rebalance the anon lru active/inactive ratio.
+         */
+        if (inactive_anon_is_low(lruvec))
+                shrink_active_list(SWAP_CLUSTER_MAX, lruvec,
+                                   sc, LRU_ACTIVE_ANON);
+        throttle_vm_writeout(sc->gfp_mask);
+}
 /* Use reclaim/compaction for costly allocs or under memory pressure */
 static bool in_reclaim_compaction(struct scan_control *sc)
 {
@@ -1790,7 +1885,7 @@ static bool in_reclaim_compaction(struct scan_control *sc)
 * calls try_to_compact_zone() that it will have enough free pages to succeed.
 * It will give up earlier than that if there is difficulty reclaiming pages.
 */
-static inline bool should_continue_reclaim(struct lruvec *lruvec,
+static inline bool should_continue_reclaim(struct zone *zone,
                                        unsigned long nr_reclaimed,
                                        unsigned long nr_scanned,
                                        struct scan_control *sc)
@@ -1830,15 +1925,15 @@ static inline bool should_continue_reclaim(struct lruvec *lruvec,
         * inactive lists are large enough, continue reclaiming
         */
        pages_for_compaction = (2UL << sc->order);
-        inactive_lru_pages = get_lru_size(lruvec, LRU_INACTIVE_FILE);
+        inactive_lru_pages = zone_page_state(zone, NR_INACTIVE_FILE);
-        if (nr_swap_pages > 0)
+        if (get_nr_swap_pages() > 0)
-                inactive_lru_pages += get_lru_size(lruvec, LRU_INACTIVE_ANON);
+                inactive_lru_pages += zone_page_state(zone, NR_INACTIVE_ANON);
        if (sc->nr_reclaimed < pages_for_compaction &&
                        inactive_lru_pages > pages_for_compaction)
                return true;
        /* If compaction would go ahead or the allocation would succeed, stop */
-        switch (compaction_suitable(lruvec_zone(lruvec), sc->order)) {
+        switch (compaction_suitable(zone, sc->order)) {
        case COMPACT_PARTIAL:
        case COMPACT_CONTINUE:
                return false;
@@ -1847,98 +1942,48 @@ static inline bool should_continue_reclaim(struct lruvec *lruvec,
        }
 }
-/*
+static void shrink_zone(struct zone *zone, struct scan_control *sc)
- * This is a basic per-zone page freer.  Used by both kswapd and direct reclaim.
- */
-static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
 {
-        unsigned long nr[NR_LRU_LISTS];
-        unsigned long nr_to_scan;
-        enum lru_list lru;
        unsigned long nr_reclaimed, nr_scanned;
-        unsigned long nr_to_reclaim = sc->nr_to_reclaim;
-        struct blk_plug plug;
-restart:
-        nr_reclaimed = 0;
-        nr_scanned = sc->nr_scanned;
-        get_scan_count(lruvec, sc, nr);
-        blk_start_plug(&plug);
-        while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] ||
-                                        nr[LRU_INACTIVE_FILE]) {
-                for_each_evictable_lru(lru) {
-                        if (nr[lru]) {
-                                nr_to_scan = min_t(unsigned long,
-                                                   nr[lru], SWAP_CLUSTER_MAX);
-                                nr[lru] -= nr_to_scan;
-                                nr_reclaimed += shrink_list(lru, nr_to_scan,
-                                                            lruvec, sc);
-                        }
-                }
-                /*
-                 * On large memory systems, scan >> priority can become
-                 * really large. This is fine for the starting priority;
-                 * we want to put equal scanning pressure on each zone.
-                 * However, if the VM has a harder time of freeing pages,
-                 * with multiple processes reclaiming pages, the total
-                 * freeing target can get unreasonably large.
-                 */
-                if (nr_reclaimed >= nr_to_reclaim &&
-                    sc->priority < DEF_PRIORITY)
-                        break;
-        }
-        blk_finish_plug(&plug);
-        sc->nr_reclaimed += nr_reclaimed;
-        /*
+        do {
-         * Even if we did not try to evict anon pages at all, we want to
+                struct mem_cgroup *root = sc->target_mem_cgroup;
-         * rebalance the anon lru active/inactive ratio.
+                struct mem_cgroup_reclaim_cookie reclaim = {
-         */
+                        .zone = zone,
-        if (inactive_anon_is_low(lruvec))
+                        .priority = sc->priority,
-                shrink_active_list(SWAP_CLUSTER_MAX, lruvec,
+                };
-                                   sc, LRU_ACTIVE_ANON);
+                struct mem_cgroup *memcg;
-        /* reclaim/compaction might need reclaim to continue */
-        if (should_continue_reclaim(lruvec, nr_reclaimed,
-                                    sc->nr_scanned - nr_scanned, sc))
-                goto restart;
-        throttle_vm_writeout(sc->gfp_mask);
+                nr_reclaimed = sc->nr_reclaimed;
-}
+                nr_scanned = sc->nr_scanned;
-static void shrink_zone(struct zone *zone, struct scan_control *sc)
+                memcg = mem_cgroup_iter(root, NULL, &reclaim);
-{
+                do {
-        struct mem_cgroup *root = sc->target_mem_cgroup;
+                        struct lruvec *lruvec;
-        struct mem_cgroup_reclaim_cookie reclaim = {
-                .zone = zone,
-                .priority = sc->priority,
-        };
-        struct mem_cgroup *memcg;
-        memcg = mem_cgroup_iter(root, NULL, &reclaim);
+                        lruvec = mem_cgroup_zone_lruvec(zone, memcg);
-        do {
-                struct lruvec *lruvec = mem_cgroup_zone_lruvec(zone, memcg);
-                shrink_lruvec(lruvec, sc);
+                        shrink_lruvec(lruvec, sc);
-                /*
+                        /*
-                 * Limit reclaim has historically picked one memcg and
+                         * Direct reclaim and kswapd have to scan all memory
-                 * scanned it with decreasing priority levels until
+                         * cgroups to fulfill the overall scan target for the
-                 * nr_to_reclaim had been reclaimed.  This priority
+                         * zone.
-                 * cycle is thus over after a single memcg.
+                         *
-                 *
+                         * Limit reclaim, on the other hand, only cares about
-                 * Direct reclaim and kswapd, on the other hand, have
+                         * nr_to_reclaim pages to be reclaimed and it will
-                 * to scan all memory cgroups to fulfill the overall
+                         * retry with decreasing priority if one round over the
-                 * scan target for the zone.
+                         * whole hierarchy is not sufficient.
-                 */
+                         */
-                if (!global_reclaim(sc)) {
+                        if (!global_reclaim(sc) &&
-                        mem_cgroup_iter_break(root, memcg);
+                                        sc->nr_reclaimed >= sc->nr_to_reclaim) {
-                        break;
+                                mem_cgroup_iter_break(root, memcg);
-                }
+                                break;
-                memcg = mem_cgroup_iter(root, memcg, &reclaim);
+                        }
-        } while (memcg);
+                        memcg = mem_cgroup_iter(root, memcg, &reclaim);
+                } while (memcg);
+        } while (should_continue_reclaim(zone, sc->nr_reclaimed - nr_reclaimed,
+                                         sc->nr_scanned - nr_scanned, sc));
 }
 /* Returns true if compaction should go ahead for a high-order request */
@@ -1958,7 +2003,7 @@ static inline bool compaction_ready(struct zone *zone, struct scan_control *sc)
         * a reasonable chance of completing and allocating the page
         */
        balance_gap = min(low_wmark_pages(zone),
-                (zone->present_pages + KSWAPD_ZONE_BALANCE_GAP_RATIO-1) /
+                (zone->managed_pages + KSWAPD_ZONE_BALANCE_GAP_RATIO-1) /
                        KSWAPD_ZONE_BALANCE_GAP_RATIO);
        watermark = high_wmark_pages(zone) + balance_gap + (2UL << sc->order);
        watermark_ok = zone_watermark_ok_safe(zone, 0, watermark, 0, 0);
@@ -2150,6 +2195,13 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
                        goto out;
                /*
+                 * If we're getting trouble reclaiming, start doing
+                 * writepage even in laptop mode.
+                 */
+                if (sc->priority < DEF_PRIORITY - 2)
+                        sc->may_writepage = 1;
+                /*
                 * Try to write back as many pages as we just scanned.  This
                 * tends to cause slow streaming writers to write data to the
                 * disk smoothly, at the dirtying rate, which is nice.   But
@@ -2300,7 +2352,7 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
 {
        unsigned long nr_reclaimed;
        struct scan_control sc = {
-                .gfp_mask = gfp_mask,
+                .gfp_mask = (gfp_mask = memalloc_noio_flags(gfp_mask)),
                .may_writepage = !laptop_mode,
                .nr_to_reclaim = SWAP_CLUSTER_MAX,
                .may_unmap = 1,
@@ -2473,7 +2525,7 @@ static bool zone_balanced(struct zone *zone, int order,
 */
 static bool pgdat_balanced(pg_data_t *pgdat, int order, int classzone_idx)
 {
-        unsigned long present_pages = 0;
+        unsigned long managed_pages = 0;
        unsigned long balanced_pages = 0;
        int i;
@@ -2484,7 +2536,7 @@ static bool pgdat_balanced(pg_data_t *pgdat, int order, int classzone_idx)
                if (!populated_zone(zone))
                        continue;
-                present_pages += zone->present_pages;
+                managed_pages += zone->managed_pages;
                /*
                 * A special case here:
@@ -2494,18 +2546,18 @@ static bool pgdat_balanced(pg_data_t *pgdat, int order, int classzone_idx)
                 * they must be considered balanced here as well!
                 */
                if (zone->all_unreclaimable) {
-                        balanced_pages += zone->present_pages;
+                        balanced_pages += zone->managed_pages;
                        continue;
                }
                if (zone_balanced(zone, order, 0, i))
-                        balanced_pages += zone->present_pages;
+                        balanced_pages += zone->managed_pages;
                else if (!order)
                        return false;
        }
        if (order)
-                return balanced_pages >= (present_pages >> 2);
+                return balanced_pages >= (managed_pages >> 2);
        else
                return true;
 }
@@ -2564,7 +2616,7 @@ static bool prepare_kswapd_sleep(pg_data_t *pgdat, int order, long remaining,
 static unsigned long balance_pgdat(pg_data_t *pgdat, int order,
                                                        int *classzone_idx)
 {
-        struct zone *unbalanced_zone;
+        bool pgdat_is_balanced = false;
        int i;
        int end_zone = 0;       /* Inclusive.  0 = ZONE_DMA */
        unsigned long total_scanned;
@@ -2595,9 +2647,6 @@ loop_again:
        do {
                unsigned long lru_pages = 0;
-                int has_under_min_watermark_zone = 0;
-                unbalanced_zone = NULL;
                /*
                 * Scan in the highmem->dma direction for the highest
@@ -2638,8 +2687,11 @@ loop_again:
                                zone_clear_flag(zone, ZONE_CONGESTED);
                        }
                }
-                if (i < 0)
+                if (i < 0) {
+                        pgdat_is_balanced = true;
                        goto out;
+                }
                for (i = 0; i <= end_zone; i++) {
                        struct zone *zone = pgdat->node_zones + i;
@@ -2689,7 +2741,7 @@ loop_again:
                         * of the zone, whichever is smaller.
                         */
                        balance_gap = min(low_wmark_pages(zone),
-                                (zone->present_pages +
+                                (zone->managed_pages +
                                        KSWAPD_ZONE_BALANCE_GAP_RATIO-1) /
                                KSWAPD_ZONE_BALANCE_GAP_RATIO);
                        /*
@@ -2720,12 +2772,10 @@ loop_again:
                        }
                        /*
-                         * If we've done a decent amount of scanning and
+                         * If we're getting trouble reclaiming, start doing
-                         * the reclaim ratio is low, start doing writepage
+                         * writepage even in laptop mode.
-                         * even in laptop mode
                         */
-                        if (total_scanned > SWAP_CLUSTER_MAX * 2 &&
+                        if (sc.priority < DEF_PRIORITY - 2)
-                            total_scanned > sc.nr_reclaimed + sc.nr_reclaimed / 2)
                                sc.may_writepage = 1;
                        if (zone->all_unreclaimable) {
@@ -2734,17 +2784,7 @@ loop_again:
                                continue;
                        }
-                        if (!zone_balanced(zone, testorder, 0, end_zone)) {
+                        if (zone_balanced(zone, testorder, 0, end_zone))
-                                unbalanced_zone = zone;
-                                /*
-                                 * We are still under min water mark.  This
-                                 * means that we have a GFP_ATOMIC allocation
-                                 * failure risk. Hurry up!
-                                 */
-                                if (!zone_watermark_ok_safe(zone, order,
-                                            min_wmark_pages(zone), end_zone, 0))
-                                        has_under_min_watermark_zone = 1;
-                        } else {
                                /*
                                 * If a zone reaches its high watermark,
                                 * consider it to be no longer congested. It's
@@ -2753,8 +2793,6 @@ loop_again:
                                 * speculatively avoid congestion waits
                                 */
                                zone_clear_flag(zone, ZONE_CONGESTED);
-                        }
                }
                /*
@@ -2766,17 +2804,9 @@ loop_again:
                                pfmemalloc_watermark_ok(pgdat))
                        wake_up(&pgdat->pfmemalloc_wait);
-                if (pgdat_balanced(pgdat, order, *classzone_idx))
+                if (pgdat_balanced(pgdat, order, *classzone_idx)) {
+                        pgdat_is_balanced = true;
                        break;          /* kswapd: all done */
-                /*
-                 * OK, kswapd is getting into trouble.  Take a nap, then take
-                 * another pass across the zones.
-                 */
-                if (total_scanned && (sc.priority < DEF_PRIORITY - 2)) {
-                        if (has_under_min_watermark_zone)
-                                count_vm_event(KSWAPD_SKIP_CONGESTION_WAIT);
-                        else if (unbalanced_zone)
-                                wait_iff_congested(unbalanced_zone, BLK_RW_ASYNC, HZ/10);
                }
                /*
@@ -2788,9 +2818,9 @@ loop_again:
                if (sc.nr_reclaimed >= SWAP_CLUSTER_MAX)
                        break;
        } while (--sc.priority >= 0);
-out:
-        if (!pgdat_balanced(pgdat, order, *classzone_idx)) {
+out:
+        if (!pgdat_is_balanced) {
                cond_resched();
                try_to_freeze();
@@ -3053,7 +3083,7 @@ unsigned long global_reclaimable_pages(void)
        nr = global_page_state(NR_ACTIVE_FILE) +
             global_page_state(NR_INACTIVE_FILE);
-        if (nr_swap_pages > 0)
+        if (get_nr_swap_pages() > 0)
                nr += global_page_state(NR_ACTIVE_ANON) +
                      global_page_state(NR_INACTIVE_ANON);
@@ -3067,7 +3097,7 @@ unsigned long zone_reclaimable_pages(struct zone *zone)
        nr = zone_page_state(zone, NR_ACTIVE_FILE) +
             zone_page_state(zone, NR_INACTIVE_FILE);
-        if (nr_swap_pages > 0)
+        if (get_nr_swap_pages() > 0)
                nr += zone_page_state(zone, NR_ACTIVE_ANON) +
                      zone_page_state(zone, NR_INACTIVE_ANON);
@@ -3280,9 +3310,8 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
                .may_writepage = !!(zone_reclaim_mode & RECLAIM_WRITE),
                .may_unmap = !!(zone_reclaim_mode & RECLAIM_SWAP),
                .may_swap = 1,
-                .nr_to_reclaim = max_t(unsigned long, nr_pages,
+                .nr_to_reclaim = max(nr_pages, SWAP_CLUSTER_MAX),
-                                       SWAP_CLUSTER_MAX),
+                .gfp_mask = (gfp_mask = memalloc_noio_flags(gfp_mask)),
-                .gfp_mask = gfp_mask,
                .order = order,
                .priority = ZONE_RECLAIM_PRIORITY,
        };