Merge branch 'dmaengine' into async-tx-next

Conflicts: crypto/async_tx/async_xor.c drivers/dma/ioat/dma_v2.h drivers/dma/ioat/pci.c drivers/md/raid5.c
author: Dan Williams <dan.j.williams@intel.com> 2009-09-08 20:55:21 -0400
committer: Dan Williams <dan.j.williams@intel.com> 2009-09-08 20:55:21 -0400
commit: bbb20089a3275a19e475dbc21320c3742e3ca423 (patch)
tree: 216fdc1cbef450ca688135c5b8969169482d9a48 /mm/vmscan.c
parent: 3e48e656903e9fd8bc805c6a2c4264d7808d315b (diff)
parent: 657a77fa7284d8ae28dfa48f1dc5d919bf5b2843 (diff)
1 files changed, 240 insertions, 140 deletions
diff --git a/mm/vmscan.c b/mm/vmscan.c
index d254306562cd..54155268dfca 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -470,8 +470,7 @@ static int __remove_mapping(struct address_space *mapping, struct page *page)
                swp_entry_t swap = { .val = page_private(page) };
                __delete_from_swap_cache(page);
                spin_unlock_irq(&mapping->tree_lock);
-                mem_cgroup_uncharge_swapcache(page, swap);
+                swapcache_free(swap, page);
-                swap_free(swap);
        } else {
                __remove_from_page_cache(page);
                spin_unlock_irq(&mapping->tree_lock);
@@ -514,7 +513,6 @@ int remove_mapping(struct address_space *mapping, struct page *page)
 *
 * lru_lock must not be held, interrupts must be enabled.
 */
-#ifdef CONFIG_UNEVICTABLE_LRU
 void putback_lru_page(struct page *page)
 {
        int lru;
@@ -568,20 +566,6 @@ redo:
        put_page(page);         /* drop ref from isolate */
 }
-#else /* CONFIG_UNEVICTABLE_LRU */
-void putback_lru_page(struct page *page)
-{
-        int lru;
-        VM_BUG_ON(PageLRU(page));
-        lru = !!TestClearPageActive(page) + page_is_file_cache(page);
-        lru_cache_add_lru(page, lru);
-        put_page(page);
-}
-#endif /* CONFIG_UNEVICTABLE_LRU */
 /*
 * shrink_page_list() returns the number of reclaimed pages
 */
@@ -593,6 +577,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
        struct pagevec freed_pvec;
        int pgactivate = 0;
        unsigned long nr_reclaimed = 0;
+        unsigned long vm_flags;
        cond_resched();
@@ -643,7 +628,8 @@ static unsigned long shrink_page_list(struct list_head *page_list,
                                goto keep_locked;
                }
-                referenced = page_referenced(page, 1, sc->mem_cgroup);
+                referenced = page_referenced(page, 1,
+                                                sc->mem_cgroup, &vm_flags);
                /* In active use or really unfreeable?  Activate it. */
                if (sc->order <= PAGE_ALLOC_COSTLY_ORDER &&
                                        referenced && page_mapping_inuse(page))
@@ -851,7 +837,6 @@ int __isolate_lru_page(struct page *page, int mode, int file)
                 */
                ClearPageLRU(page);
                ret = 0;
-                mem_cgroup_del_lru(page);
        }
        return ret;
@@ -899,12 +884,14 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
                switch (__isolate_lru_page(page, mode, file)) {
                case 0:
                        list_move(&page->lru, dst);
+                        mem_cgroup_del_lru(page);
                        nr_taken++;
                        break;
                case -EBUSY:
                        /* else it is being freed elsewhere */
                        list_move(&page->lru, src);
+                        mem_cgroup_rotate_lru_list(page, page_lru(page));
                        continue;
                default:
@@ -943,18 +930,11 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
                        /* Check that we have not crossed a zone boundary. */
                        if (unlikely(page_zone_id(cursor_page) != zone_id))
                                continue;
-                        switch (__isolate_lru_page(cursor_page, mode, file)) {
+                        if (__isolate_lru_page(cursor_page, mode, file) == 0) {
-                        case 0:
                                list_move(&cursor_page->lru, dst);
+                                mem_cgroup_del_lru(cursor_page);
                                nr_taken++;
                                scan++;
-                                break;
-                        case -EBUSY:
-                                /* else it is being freed elsewhere */
-                                list_move(&cursor_page->lru, src);
-                        default:
-                                break;  /* ! on LRU or wrong list */
                        }
                }
        }
@@ -1061,6 +1041,19 @@ static unsigned long shrink_inactive_list(unsigned long max_scan,
        unsigned long nr_scanned = 0;
        unsigned long nr_reclaimed = 0;
        struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc);
+        int lumpy_reclaim = 0;
+        /*
+         * If we need a large contiguous chunk of memory, or have
+         * trouble getting a small set of contiguous pages, we
+         * will reclaim both active and inactive pages.
+         *
+         * We use the same threshold as pageout congestion_wait below.
+         */
+        if (sc->order > PAGE_ALLOC_COSTLY_ORDER)
+                lumpy_reclaim = 1;
+        else if (sc->order && priority < DEF_PRIORITY - 2)
+                lumpy_reclaim = 1;
        pagevec_init(&pvec, 1);
@@ -1073,19 +1066,7 @@ static unsigned long shrink_inactive_list(unsigned long max_scan,
                unsigned long nr_freed;
                unsigned long nr_active;
                unsigned int count[NR_LRU_LISTS] = { 0, };
-                int mode = ISOLATE_INACTIVE;
+                int mode = lumpy_reclaim ? ISOLATE_BOTH : ISOLATE_INACTIVE;
-                /*
-                 * If we need a large contiguous chunk of memory, or have
-                 * trouble getting a small set of contiguous pages, we
-                 * will reclaim both active and inactive pages.
-                 *
-                 * We use the same threshold as pageout congestion_wait below.
-                 */
-                if (sc->order > PAGE_ALLOC_COSTLY_ORDER)
-                        mode = ISOLATE_BOTH;
-                else if (sc->order && priority < DEF_PRIORITY - 2)
-                        mode = ISOLATE_BOTH;
                nr_taken = sc->isolate_pages(sc->swap_cluster_max,
                             &page_list, &nr_scan, sc->order, mode,
@@ -1122,7 +1103,7 @@ static unsigned long shrink_inactive_list(unsigned long max_scan,
                 * but that should be acceptable to the caller
                 */
                if (nr_freed < nr_taken && !current_is_kswapd() &&
-                                        sc->order > PAGE_ALLOC_COSTLY_ORDER) {
+                    lumpy_reclaim) {
                        congestion_wait(WRITE, HZ/10);
                        /*
@@ -1217,18 +1198,54 @@ static inline void note_zone_scanning_priority(struct zone *zone, int priority)
 * But we had to alter page->flags anyway.
 */
+static void move_active_pages_to_lru(struct zone *zone,
+                                     struct list_head *list,
+                                     enum lru_list lru)
+{
+        unsigned long pgmoved = 0;
+        struct pagevec pvec;
+        struct page *page;
+        pagevec_init(&pvec, 1);
+        while (!list_empty(list)) {
+                page = lru_to_page(list);
+                prefetchw_prev_lru_page(page, list, flags);
+                VM_BUG_ON(PageLRU(page));
+                SetPageLRU(page);
+                VM_BUG_ON(!PageActive(page));
+                if (!is_active_lru(lru))
+                        ClearPageActive(page);  /* we are de-activating */
+                list_move(&page->lru, &zone->lru[lru].list);
+                mem_cgroup_add_lru_list(page, lru);
+                pgmoved++;
+                if (!pagevec_add(&pvec, page) || list_empty(list)) {
+                        spin_unlock_irq(&zone->lru_lock);
+                        if (buffer_heads_over_limit)
+                                pagevec_strip(&pvec);
+                        __pagevec_release(&pvec);
+                        spin_lock_irq(&zone->lru_lock);
+                }
+        }
+        __mod_zone_page_state(zone, NR_LRU_BASE + lru, pgmoved);
+        if (!is_active_lru(lru))
+                __count_vm_events(PGDEACTIVATE, pgmoved);
+}
 static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
                        struct scan_control *sc, int priority, int file)
 {
        unsigned long pgmoved;
-        int pgdeactivate = 0;
        unsigned long pgscanned;
+        unsigned long vm_flags;
        LIST_HEAD(l_hold);      /* The pages which were snipped off */
+        LIST_HEAD(l_active);
        LIST_HEAD(l_inactive);
        struct page *page;
-        struct pagevec pvec;
-        enum lru_list lru;
        struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc);
        lru_add_drain();
@@ -1245,13 +1262,14 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
        }
        reclaim_stat->recent_scanned[!!file] += pgmoved;
+        __count_zone_vm_events(PGREFILL, zone, pgscanned);
        if (file)
                __mod_zone_page_state(zone, NR_ACTIVE_FILE, -pgmoved);
        else
                __mod_zone_page_state(zone, NR_ACTIVE_ANON, -pgmoved);
        spin_unlock_irq(&zone->lru_lock);
-        pgmoved = 0;
+        pgmoved = 0;  /* count referenced (mapping) mapped pages */
        while (!list_empty(&l_hold)) {
                cond_resched();
                page = lru_to_page(&l_hold);
@@ -1264,58 +1282,44 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
                /* page_referenced clears PageReferenced */
                if (page_mapping_inuse(page) &&
-                    page_referenced(page, 0, sc->mem_cgroup))
+                    page_referenced(page, 0, sc->mem_cgroup, &vm_flags)) {
                        pgmoved++;
+                        /*
+                         * Identify referenced, file-backed active pages and
+                         * give them one more trip around the active list. So
+                         * that executable code get better chances to stay in
+                         * memory under moderate memory pressure.  Anon pages
+                         * are not likely to be evicted by use-once streaming
+                         * IO, plus JVM can create lots of anon VM_EXEC pages,
+                         * so we ignore them here.
+                         */
+                        if ((vm_flags & VM_EXEC) && !PageAnon(page)) {
+                                list_add(&page->lru, &l_active);
+                                continue;
+                        }
+                }
                list_add(&page->lru, &l_inactive);
        }
        /*
-         * Move the pages to the [file or anon] inactive list.
+         * Move pages back to the lru list.
         */
-        pagevec_init(&pvec, 1);
-        lru = LRU_BASE + file * LRU_FILE;
        spin_lock_irq(&zone->lru_lock);
        /*
-         * Count referenced pages from currently used mappings as
+         * Count referenced pages from currently used mappings as rotated,
-         * rotated, even though they are moved to the inactive list.
+         * even though only some of them are actually re-activated.  This
-         * This helps balance scan pressure between file and anonymous
+         * helps balance scan pressure between file and anonymous pages in
-         * pages in get_scan_ratio.
+         * get_scan_ratio.
         */
        reclaim_stat->recent_rotated[!!file] += pgmoved;
-        pgmoved = 0;
+        move_active_pages_to_lru(zone, &l_active,
-        while (!list_empty(&l_inactive)) {
+                                                LRU_ACTIVE + file * LRU_FILE);
-                page = lru_to_page(&l_inactive);
+        move_active_pages_to_lru(zone, &l_inactive,
-                prefetchw_prev_lru_page(page, &l_inactive, flags);
+                                                LRU_BASE   + file * LRU_FILE);
-                VM_BUG_ON(PageLRU(page));
-                SetPageLRU(page);
-                VM_BUG_ON(!PageActive(page));
-                ClearPageActive(page);
-                list_move(&page->lru, &zone->lru[lru].list);
-                mem_cgroup_add_lru_list(page, lru);
-                pgmoved++;
-                if (!pagevec_add(&pvec, page)) {
-                        __mod_zone_page_state(zone, NR_LRU_BASE + lru, pgmoved);
-                        spin_unlock_irq(&zone->lru_lock);
-                        pgdeactivate += pgmoved;
-                        pgmoved = 0;
-                        if (buffer_heads_over_limit)
-                                pagevec_strip(&pvec);
-                        __pagevec_release(&pvec);
-                        spin_lock_irq(&zone->lru_lock);
-                }
-        }
-        __mod_zone_page_state(zone, NR_LRU_BASE + lru, pgmoved);
-        pgdeactivate += pgmoved;
-        __count_zone_vm_events(PGREFILL, zone, pgscanned);
-        __count_vm_events(PGDEACTIVATE, pgdeactivate);
        spin_unlock_irq(&zone->lru_lock);
-        if (buffer_heads_over_limit)
-                pagevec_strip(&pvec);
-        pagevec_release(&pvec);
 }
 static int inactive_anon_is_low_global(struct zone *zone)
@@ -1350,12 +1354,48 @@ static int inactive_anon_is_low(struct zone *zone, struct scan_control *sc)
        return low;
 }
+static int inactive_file_is_low_global(struct zone *zone)
+{
+        unsigned long active, inactive;
+        active = zone_page_state(zone, NR_ACTIVE_FILE);
+        inactive = zone_page_state(zone, NR_INACTIVE_FILE);
+        return (active > inactive);
+}
+/**
+ * inactive_file_is_low - check if file pages need to be deactivated
+ * @zone: zone to check
+ * @sc:   scan control of this context
+ *
+ * When the system is doing streaming IO, memory pressure here
+ * ensures that active file pages get deactivated, until more
+ * than half of the file pages are on the inactive list.
+ *
+ * Once we get to that situation, protect the system's working
+ * set from being evicted by disabling active file page aging.
+ *
+ * This uses a different ratio than the anonymous pages, because
+ * the page cache uses a use-once replacement algorithm.
+ */
+static int inactive_file_is_low(struct zone *zone, struct scan_control *sc)
+{
+        int low;
+        if (scanning_global_lru(sc))
+                low = inactive_file_is_low_global(zone);
+        else
+                low = mem_cgroup_inactive_file_is_low(sc->mem_cgroup);
+        return low;
+}
 static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan,
        struct zone *zone, struct scan_control *sc, int priority)
 {
        int file = is_file_lru(lru);
-        if (lru == LRU_ACTIVE_FILE) {
+        if (lru == LRU_ACTIVE_FILE && inactive_file_is_low(zone, sc)) {
                shrink_active_list(nr_to_scan, zone, sc, priority, file);
                return 0;
        }
@@ -1384,13 +1424,6 @@ static void get_scan_ratio(struct zone *zone, struct scan_control *sc,
        unsigned long ap, fp;
        struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc);
-        /* If we have no swap space, do not bother scanning anon pages. */
-        if (!sc->may_swap || (nr_swap_pages <= 0)) {
-                percent[0] = 0;
-                percent[1] = 100;
-                return;
-        }
        anon  = zone_nr_pages(zone, sc, LRU_ACTIVE_ANON) +
                zone_nr_pages(zone, sc, LRU_INACTIVE_ANON);
        file  = zone_nr_pages(zone, sc, LRU_ACTIVE_FILE) +
@@ -1400,7 +1433,7 @@ static void get_scan_ratio(struct zone *zone, struct scan_control *sc,
                free  = zone_page_state(zone, NR_FREE_PAGES);
                /* If we have very few page cache pages,
                   force-scan anon pages. */
-                if (unlikely(file + free <= zone->pages_high)) {
+                if (unlikely(file + free <= high_wmark_pages(zone))) {
                        percent[0] = 100;
                        percent[1] = 0;
                        return;
@@ -1455,6 +1488,26 @@ static void get_scan_ratio(struct zone *zone, struct scan_control *sc,
        percent[1] = 100 - percent[0];
 }
+/*
+ * Smallish @nr_to_scan's are deposited in @nr_saved_scan,
+ * until we collected @swap_cluster_max pages to scan.
+ */
+static unsigned long nr_scan_try_batch(unsigned long nr_to_scan,
+                                       unsigned long *nr_saved_scan,
+                                       unsigned long swap_cluster_max)
+{
+        unsigned long nr;
+        *nr_saved_scan += nr_to_scan;
+        nr = *nr_saved_scan;
+        if (nr >= swap_cluster_max)
+                *nr_saved_scan = 0;
+        else
+                nr = 0;
+        return nr;
+}
 /*
 * This is a basic per-zone page freer.  Used by both kswapd and direct reclaim.
@@ -1468,26 +1521,30 @@ static void shrink_zone(int priority, struct zone *zone,
        enum lru_list l;
        unsigned long nr_reclaimed = sc->nr_reclaimed;
        unsigned long swap_cluster_max = sc->swap_cluster_max;
+        int noswap = 0;
-        get_scan_ratio(zone, sc, percent);
+        /* If we have no swap space, do not bother scanning anon pages. */
+        if (!sc->may_swap || (nr_swap_pages <= 0)) {
+                noswap = 1;
+                percent[0] = 0;
+                percent[1] = 100;
+        } else
+                get_scan_ratio(zone, sc, percent);
        for_each_evictable_lru(l) {
                int file = is_file_lru(l);
                unsigned long scan;
                scan = zone_nr_pages(zone, sc, l);
-                if (priority) {
+                if (priority || noswap) {
                        scan >>= priority;
                        scan = (scan * percent[file]) / 100;
                }
-                if (scanning_global_lru(sc)) {
+                if (scanning_global_lru(sc))
-                        zone->lru[l].nr_scan += scan;
+                        nr[l] = nr_scan_try_batch(scan,
-                        nr[l] = zone->lru[l].nr_scan;
+                                                  &zone->lru[l].nr_saved_scan,
-                        if (nr[l] >= swap_cluster_max)
+                                                  swap_cluster_max);
-                                zone->lru[l].nr_scan = 0;
+                else
-                        else
-                                nr[l] = 0;
-                } else
                        nr[l] = scan;
        }
@@ -1521,7 +1578,7 @@ static void shrink_zone(int priority, struct zone *zone,
         * Even if we did not try to evict anon pages at all, we want to
         * rebalance the anon lru active/inactive ratio.
         */
-        if (inactive_anon_is_low(zone, sc))
+        if (inactive_anon_is_low(zone, sc) && nr_swap_pages > 0)
                shrink_active_list(SWAP_CLUSTER_MAX, zone, sc, priority, 0);
        throttle_vm_writeout(sc->gfp_mask);
@@ -1532,11 +1589,13 @@ static void shrink_zone(int priority, struct zone *zone,
 * try to reclaim pages from zones which will satisfy the caller's allocation
 * request.
 *
- * We reclaim from a zone even if that zone is over pages_high.  Because:
+ * We reclaim from a zone even if that zone is over high_wmark_pages(zone).
+ * Because:
 * a) The caller may be trying to free *extra* pages to satisfy a higher-order
 *    allocation or
- * b) The zones may be over pages_high but they must go *over* pages_high to
+ * b) The target zone may be at high_wmark_pages(zone) but the lower zones
- *    satisfy the `incremental min' zone defense algorithm.
+ *    must go *over* high_wmark_pages(zone) to satisfy the `incremental min'
+ *    zone defense algorithm.
 *
 * If a zone is deemed to be full of pinned pages then just give it a light
 * scan then give up on it.
@@ -1742,7 +1801,7 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont,
 /*
 * For kswapd, balance_pgdat() will work across all this node's zones until
- * they are all at pages_high.
+ * they are all at high_wmark_pages(zone).
 *
 * Returns the number of pages which were actually freed.
 *
@@ -1755,11 +1814,11 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont,
 * the zone for when the problem goes away.
 *
 * kswapd scans the zones in the highmem->normal->dma direction.  It skips
- * zones which have free_pages > pages_high, but once a zone is found to have
+ * zones which have free_pages > high_wmark_pages(zone), but once a zone is
- * free_pages <= pages_high, we scan that zone and the lower zones regardless
+ * found to have free_pages <= high_wmark_pages(zone), we scan that zone and the
- * of the number of free pages in the lower zones.  This interoperates with
+ * lower zones regardless of the number of free pages in the lower zones. This
- * the page allocator fallback scheme to ensure that aging of pages is balanced
+ * interoperates with the page allocator fallback scheme to ensure that aging
- * across the zones.
+ * of pages is balanced across the zones.
 */
 static unsigned long balance_pgdat(pg_data_t *pgdat, int order)
 {
@@ -1780,7 +1839,8 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order)
        };
        /*
         * temp_priority is used to remember the scanning priority at which
-         * this zone was successfully refilled to free_pages == pages_high.
+         * this zone was successfully refilled to
+         * free_pages == high_wmark_pages(zone).
         */
        int temp_priority[MAX_NR_ZONES];
@@ -1825,8 +1885,8 @@ loop_again:
                                shrink_active_list(SWAP_CLUSTER_MAX, zone,
                                                        &sc, priority, 0);
-                        if (!zone_watermark_ok(zone, order, zone->pages_high,
+                        if (!zone_watermark_ok(zone, order,
-                                               0, 0)) {
+                                        high_wmark_pages(zone), 0, 0)) {
                                end_zone = i;
                                break;
                        }
@@ -1860,8 +1920,8 @@ loop_again:
                                        priority != DEF_PRIORITY)
                                continue;
-                        if (!zone_watermark_ok(zone, order, zone->pages_high,
+                        if (!zone_watermark_ok(zone, order,
-                                               end_zone, 0))
+                                        high_wmark_pages(zone), end_zone, 0))
                                all_zones_ok = 0;
                        temp_priority[i] = priority;
                        sc.nr_scanned = 0;
@@ -1870,8 +1930,8 @@ loop_again:
                         * We put equal pressure on every zone, unless one
                         * zone has way too many pages free already.
                         */
-                        if (!zone_watermark_ok(zone, order, 8*zone->pages_high,
+                        if (!zone_watermark_ok(zone, order,
-                                                end_zone, 0))
+                                        8*high_wmark_pages(zone), end_zone, 0))
                                shrink_zone(priority, zone, &sc);
                        reclaim_state->reclaimed_slab = 0;
                        nr_slab = shrink_slab(sc.nr_scanned, GFP_KERNEL,
@@ -2037,7 +2097,7 @@ void wakeup_kswapd(struct zone *zone, int order)
                return;
        pgdat = zone->zone_pgdat;
-        if (zone_watermark_ok(zone, order, zone->pages_low, 0, 0))
+        if (zone_watermark_ok(zone, order, low_wmark_pages(zone), 0, 0))
                return;
        if (pgdat->kswapd_max_order < order)
                pgdat->kswapd_max_order = order;
@@ -2056,7 +2116,7 @@ unsigned long global_lru_pages(void)
                + global_page_state(NR_INACTIVE_FILE);
 }
-#ifdef CONFIG_PM
+#ifdef CONFIG_HIBERNATION
 /*
 * Helper function for shrink_all_memory().  Tries to reclaim 'nr_pages' pages
 * from LRU lists system-wide, for given pass and priority.
@@ -2084,11 +2144,11 @@ static void shrink_all_zones(unsigned long nr_pages, int prio,
                                                l == LRU_ACTIVE_FILE))
                                continue;
-                        zone->lru[l].nr_scan += (lru_pages >> prio) + 1;
+                        zone->lru[l].nr_saved_scan += (lru_pages >> prio) + 1;
-                        if (zone->lru[l].nr_scan >= nr_pages || pass > 3) {
+                        if (zone->lru[l].nr_saved_scan >= nr_pages || pass > 3) {
                                unsigned long nr_to_scan;
-                                zone->lru[l].nr_scan = 0;
+                                zone->lru[l].nr_saved_scan = 0;
                                nr_to_scan = min(nr_pages, lru_pages);
                                nr_reclaimed += shrink_list(l, nr_to_scan, zone,
                                                                sc, prio);
@@ -2196,7 +2256,7 @@ out:
        return sc.nr_reclaimed;
 }
-#endif
+#endif /* CONFIG_HIBERNATION */
 /* It's optimal to keep kswapds on the same CPUs as their memory, but
   not required for correctness.  So if the last cpu in a node goes
@@ -2290,6 +2350,48 @@ int sysctl_min_unmapped_ratio = 1;
 */
 int sysctl_min_slab_ratio = 5;
+static inline unsigned long zone_unmapped_file_pages(struct zone *zone)
+{
+        unsigned long file_mapped = zone_page_state(zone, NR_FILE_MAPPED);
+        unsigned long file_lru = zone_page_state(zone, NR_INACTIVE_FILE) +
+                zone_page_state(zone, NR_ACTIVE_FILE);
+        /*
+         * It's possible for there to be more file mapped pages than
+         * accounted for by the pages on the file LRU lists because
+         * tmpfs pages accounted for as ANON can also be FILE_MAPPED
+         */
+        return (file_lru > file_mapped) ? (file_lru - file_mapped) : 0;
+}
+/* Work out how many page cache pages we can reclaim in this reclaim_mode */
+static long zone_pagecache_reclaimable(struct zone *zone)
+{
+        long nr_pagecache_reclaimable;
+        long delta = 0;
+        /*
+         * If RECLAIM_SWAP is set, then all file pages are considered
+         * potentially reclaimable. Otherwise, we have to worry about
+         * pages like swapcache and zone_unmapped_file_pages() provides
+         * a better estimate
+         */
+        if (zone_reclaim_mode & RECLAIM_SWAP)
+                nr_pagecache_reclaimable = zone_page_state(zone, NR_FILE_PAGES);
+        else
+                nr_pagecache_reclaimable = zone_unmapped_file_pages(zone);
+        /* If we can't clean pages, remove dirty pages from consideration */
+        if (!(zone_reclaim_mode & RECLAIM_WRITE))
+                delta += zone_page_state(zone, NR_FILE_DIRTY);
+        /* Watch for any possible underflows due to delta */
+        if (unlikely(delta > nr_pagecache_reclaimable))
+                delta = nr_pagecache_reclaimable;
+        return nr_pagecache_reclaimable - delta;
+}
 /*
 * Try to free up some pages from this zone through reclaim.
 */
@@ -2324,9 +2426,7 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
        reclaim_state.reclaimed_slab = 0;
        p->reclaim_state = &reclaim_state;
-        if (zone_page_state(zone, NR_FILE_PAGES) -
+        if (zone_pagecache_reclaimable(zone) > zone->min_unmapped_pages) {
-                zone_page_state(zone, NR_FILE_MAPPED) >
-                zone->min_unmapped_pages) {
                /*
                 * Free memory by calling shrink zone with increasing
                 * priorities until we have enough memory freed.
@@ -2384,20 +2484,18 @@ int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
         * if less than a specified percentage of the zone is used by
         * unmapped file backed pages.
         */
-        if (zone_page_state(zone, NR_FILE_PAGES) -
+        if (zone_pagecache_reclaimable(zone) <= zone->min_unmapped_pages &&
-            zone_page_state(zone, NR_FILE_MAPPED) <= zone->min_unmapped_pages
+            zone_page_state(zone, NR_SLAB_RECLAIMABLE) <= zone->min_slab_pages)
-            && zone_page_state(zone, NR_SLAB_RECLAIMABLE)
+                return ZONE_RECLAIM_FULL;
-                        <= zone->min_slab_pages)
-                return 0;
        if (zone_is_all_unreclaimable(zone))
-                return 0;
+                return ZONE_RECLAIM_FULL;
        /*
         * Do not scan if the allocation should not be delayed.
         */
        if (!(gfp_mask & __GFP_WAIT) || (current->flags & PF_MEMALLOC))
-                        return 0;
+                return ZONE_RECLAIM_NOSCAN;
        /*
         * Only run zone reclaim on the local zone or on zones that do not
@@ -2407,18 +2505,21 @@ int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
         */
        node_id = zone_to_nid(zone);
        if (node_state(node_id, N_CPU) && node_id != numa_node_id())
-                return 0;
+                return ZONE_RECLAIM_NOSCAN;
        if (zone_test_and_set_flag(zone, ZONE_RECLAIM_LOCKED))
-                return 0;
+                return ZONE_RECLAIM_NOSCAN;
        ret = __zone_reclaim(zone, gfp_mask, order);
        zone_clear_flag(zone, ZONE_RECLAIM_LOCKED);
+        if (!ret)
+                count_vm_event(PGSCAN_ZONE_RECLAIM_FAILED);
        return ret;
 }
 #endif
-#ifdef CONFIG_UNEVICTABLE_LRU
 /*
 * page_evictable - test whether a page is evictable
 * @page: the page to test
@@ -2665,4 +2766,3 @@ void scan_unevictable_unregister_node(struct node *node)
        sysdev_remove_file(&node->sysdev, &attr_scan_unevictable_pages);
 }
-#endif
author	Dan Williams <dan.j.williams@intel.com>	2009-09-08 20:55:21 -0400
committer	Dan Williams <dan.j.williams@intel.com>	2009-09-08 20:55:21 -0400
commit	bbb20089a3275a19e475dbc21320c3742e3ca423 (patch)
tree	216fdc1cbef450ca688135c5b8969169482d9a48 /mm/vmscan.c
parent	3e48e656903e9fd8bc805c6a2c4264d7808d315b (diff)
parent	657a77fa7284d8ae28dfa48f1dc5d919bf5b2843 (diff)