Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6

author: Andi Kleen <andi@basil.nowhere.org> 2006-11-21 04:22:09 -0500
committer: Andi Kleen <andi@basil.nowhere.org> 2006-11-21 04:22:09 -0500
commit: 1b7f6a626f0ff511c3840678466cbfe1d62c0b29 (patch)
tree: 415e8c838c0067bff384afb8a2c91e5f7c6d11d3 /mm
parent: b3edc9cec07ade41aaf1804f7c9e876afa90c862 (diff)
parent: 3f5a6ca31c334011fd929501a078424c0d3f71be (diff)
11 files changed, 92 insertions, 67 deletions
diff --git a/mm/filemap.c b/mm/filemap.c
index cb26e33fd0ff..7b84dc814347 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -467,25 +467,15 @@ int add_to_page_cache_lru(struct page *page, struct address_space *mapping,
 }
 #ifdef CONFIG_NUMA
-struct page *page_cache_alloc(struct address_space *x)
+struct page *__page_cache_alloc(gfp_t gfp)
 {
        if (cpuset_do_page_mem_spread()) {
                int n = cpuset_mem_spread_node();
-                return alloc_pages_node(n, mapping_gfp_mask(x), 0);
+                return alloc_pages_node(n, gfp, 0);
        }
-        return alloc_pages(mapping_gfp_mask(x), 0);
+        return alloc_pages(gfp, 0);
 }
-EXPORT_SYMBOL(page_cache_alloc);
+EXPORT_SYMBOL(__page_cache_alloc);
-struct page *page_cache_alloc_cold(struct address_space *x)
-{
-        if (cpuset_do_page_mem_spread()) {
-                int n = cpuset_mem_spread_node();
-                return alloc_pages_node(n, mapping_gfp_mask(x)|__GFP_COLD, 0);
-        }
-        return alloc_pages(mapping_gfp_mask(x)|__GFP_COLD, 0);
-}
-EXPORT_SYMBOL(page_cache_alloc_cold);
 #endif
 static int __sleep_on_page_lock(void *word)
@@ -826,7 +816,6 @@ struct page *
 grab_cache_page_nowait(struct address_space *mapping, unsigned long index)
 {
        struct page *page = find_get_page(mapping, index);
-        gfp_t gfp_mask;
        if (page) {
                if (!TestSetPageLocked(page))
@@ -834,9 +823,8 @@ grab_cache_page_nowait(struct address_space *mapping, unsigned long index)
                page_cache_release(page);
                return NULL;
        }
-        gfp_mask = mapping_gfp_mask(mapping) & ~__GFP_FS;
+        page = __page_cache_alloc(mapping_gfp_mask(mapping) & ~__GFP_FS);
-        page = alloc_pages(gfp_mask, 0);
+        if (page && add_to_page_cache_lru(page, mapping, index, GFP_KERNEL)) {
-        if (page && add_to_page_cache_lru(page, mapping, index, gfp_mask)) {
                page_cache_release(page);
                page = NULL;
        }
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 2dbec90dc3ba..a088f593a807 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -478,6 +478,9 @@ int hugetlb_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
 retry:
        page = find_lock_page(mapping, idx);
        if (!page) {
+                size = i_size_read(mapping->host) >> HPAGE_SHIFT;
+                if (idx >= size)
+                        goto out;
                if (hugetlb_get_quota(mapping))
                        goto out;
                page = alloc_huge_page(vma, address);
diff --git a/mm/migrate.c b/mm/migrate.c
index ba2453f9483d..b4979d423d2b 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -952,7 +952,8 @@ asmlinkage long sys_move_pages(pid_t pid, unsigned long nr_pages,
                                goto out;
                        pm[i].node = node;
-                }
+                } else
+                        pm[i].node = 0; /* anything to not match MAX_NUMNODES */
        }
        /* End marker */
        pm[nr_pages].node = MAX_NUMNODES;
diff --git a/mm/mmap.c b/mm/mmap.c
index 497e502dfd6b..7b40abd7cba2 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1379,7 +1379,7 @@ get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
                 * Check if the given range is hugepage aligned, and
                 * can be made suitable for hugepages.
                 */
-                ret = prepare_hugepage_range(addr, len);
+                ret = prepare_hugepage_range(addr, len, pgoff);
        } else {
                /*
                 * Ensure that a normal request is not falling in a
@@ -1880,6 +1880,9 @@ unsigned long do_brk(unsigned long addr, unsigned long len)
        if ((addr + len) > TASK_SIZE || (addr + len) < addr)
                return -EINVAL;
+        if (is_hugepage_only_range(mm, addr, len))
+                return -EINVAL;
        flags = VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags;
        error = arch_mmap_check(addr, len, flags);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index f5fc45472d5c..bf2f6cff1d6a 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -853,7 +853,7 @@ again:
                pcp = &zone_pcp(zone, cpu)->pcp[cold];
                local_irq_save(flags);
                if (!pcp->count) {
-                        pcp->count += rmqueue_bulk(zone, 0,
+                        pcp->count = rmqueue_bulk(zone, 0,
                                                pcp->batch, &pcp->list);
                        if (unlikely(!pcp->count))
                                goto failed;
@@ -2261,7 +2261,7 @@ unsigned long __init __absent_pages_in_range(int nid,
        /* Account for ranges past physical memory on this node */
        if (range_end_pfn > prev_end_pfn)
-                hole_pages = range_end_pfn -
+                hole_pages += range_end_pfn -
                                max(range_start_pfn, prev_end_pfn);
        return hole_pages;
@@ -2407,7 +2407,7 @@ static void __meminit free_area_init_core(struct pglist_data *pgdat,
                zone->zone_pgdat = pgdat;
                zone->free_pages = 0;
-                zone->temp_priority = zone->prev_priority = DEF_PRIORITY;
+                zone->prev_priority = DEF_PRIORITY;
                zone_pcp_init(zone);
                INIT_LIST_HEAD(&zone->active_list);
diff --git a/mm/readahead.c b/mm/readahead.c
index 1ba736ac0367..23cb61a01c6e 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -173,6 +173,8 @@ static int read_pages(struct address_space *mapping, struct file *filp,
        if (mapping->a_ops->readpages) {
                ret = mapping->a_ops->readpages(filp, mapping, pages, nr_pages);
+                /* Clean up the remaining pages */
+                put_pages_list(pages);
                goto out;
        }
diff --git a/mm/slab.c b/mm/slab.c
index 84c631f30741..3c4a7e34eddc 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -883,7 +883,7 @@ static void init_reap_node(int cpu)
        if (node == MAX_NUMNODES)
                node = first_node(node_online_map);
-        __get_cpu_var(reap_node) = node;
+        per_cpu(reap_node, cpu) = node;
 }
 static void next_reap_node(void)
diff --git a/mm/sparse.c b/mm/sparse.c
index 86c52ab80878..b3c82ba30012 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -211,7 +211,7 @@ static struct page *__kmalloc_section_memmap(unsigned long nr_pages)
        struct page *page, *ret;
        unsigned long memmap_size = sizeof(struct page) * nr_pages;
-        page = alloc_pages(GFP_KERNEL, get_order(memmap_size));
+        page = alloc_pages(GFP_KERNEL|__GFP_NOWARN, get_order(memmap_size));
        if (page)
                goto got_map_page;
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 1133dd3aafcf..86897ee792d6 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -160,13 +160,15 @@ int map_vm_area(struct vm_struct *area, pgprot_t prot, struct page ***pages)
        return err;
 }
-struct vm_struct *__get_vm_area_node(unsigned long size, unsigned long flags,
+static struct vm_struct *__get_vm_area_node(unsigned long size, unsigned long flags,
-                                unsigned long start, unsigned long end, int node)
+                                            unsigned long start, unsigned long end,
+                                            int node, gfp_t gfp_mask)
 {
        struct vm_struct **p, *tmp, *area;
        unsigned long align = 1;
        unsigned long addr;
+        BUG_ON(in_interrupt());
        if (flags & VM_IOREMAP) {
                int bit = fls(size);
@@ -179,16 +181,13 @@ struct vm_struct *__get_vm_area_node(unsigned long size, unsigned long flags,
        }
        addr = ALIGN(start, align);
        size = PAGE_ALIGN(size);
+        if (unlikely(!size))
+                return NULL;
-        area = kmalloc_node(sizeof(*area), GFP_KERNEL, node);
+        area = kmalloc_node(sizeof(*area), gfp_mask & GFP_LEVEL_MASK, node);
        if (unlikely(!area))
                return NULL;
-        if (unlikely(!size)) {
-                kfree (area);
-                return NULL;
-        }
        /*
         * We always allocate a guard page.
         */
@@ -236,7 +235,7 @@ out:
 struct vm_struct *__get_vm_area(unsigned long size, unsigned long flags,
                                unsigned long start, unsigned long end)
 {
-        return __get_vm_area_node(size, flags, start, end, -1);
+        return __get_vm_area_node(size, flags, start, end, -1, GFP_KERNEL);
 }
 /**
@@ -253,9 +252,11 @@ struct vm_struct *get_vm_area(unsigned long size, unsigned long flags)
        return __get_vm_area(size, flags, VMALLOC_START, VMALLOC_END);
 }
-struct vm_struct *get_vm_area_node(unsigned long size, unsigned long flags, int node)
+struct vm_struct *get_vm_area_node(unsigned long size, unsigned long flags,
+                                   int node, gfp_t gfp_mask)
 {
-        return __get_vm_area_node(size, flags, VMALLOC_START, VMALLOC_END, node);
+        return __get_vm_area_node(size, flags, VMALLOC_START, VMALLOC_END, node,
+                                  gfp_mask);
 }
 /* Caller must hold vmlist_lock */
@@ -487,7 +488,7 @@ static void *__vmalloc_node(unsigned long size, gfp_t gfp_mask, pgprot_t prot,
        if (!size || (size >> PAGE_SHIFT) > num_physpages)
                return NULL;
-        area = get_vm_area_node(size, VM_ALLOC, node);
+        area = get_vm_area_node(size, VM_ALLOC, node, gfp_mask);
        if (!area)
                return NULL;
@@ -528,11 +529,12 @@ void *vmalloc_user(unsigned long size)
        void *ret;
        ret = __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO, PAGE_KERNEL);
-        write_lock(&vmlist_lock);
+        if (ret) {
-        area = __find_vm_area(ret);
+                write_lock(&vmlist_lock);
-        area->flags |= VM_USERMAP;
+                area = __find_vm_area(ret);
-        write_unlock(&vmlist_lock);
+                area->flags |= VM_USERMAP;
+                write_unlock(&vmlist_lock);
+        }
        return ret;
 }
 EXPORT_SYMBOL(vmalloc_user);
@@ -601,11 +603,12 @@ void *vmalloc_32_user(unsigned long size)
        void *ret;
        ret = __vmalloc(size, GFP_KERNEL | __GFP_ZERO, PAGE_KERNEL);
-        write_lock(&vmlist_lock);
+        if (ret) {
-        area = __find_vm_area(ret);
+                write_lock(&vmlist_lock);
-        area->flags |= VM_USERMAP;
+                area = __find_vm_area(ret);
-        write_unlock(&vmlist_lock);
+                area->flags |= VM_USERMAP;
+                write_unlock(&vmlist_lock);
+        }
        return ret;
 }
 EXPORT_SYMBOL(vmalloc_32_user);
diff --git a/mm/vmscan.c b/mm/vmscan.c
index f05527bf792b..518540a4a2a6 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -723,6 +723,20 @@ done:
        return nr_reclaimed;
 }
+/*
+ * We are about to scan this zone at a certain priority level.  If that priority
+ * level is smaller (ie: more urgent) than the previous priority, then note
+ * that priority level within the zone.  This is done so that when the next
+ * process comes in to scan this zone, it will immediately start out at this
+ * priority level rather than having to build up its own scanning priority.
+ * Here, this priority affects only the reclaim-mapped threshold.
+ */
+static inline void note_zone_scanning_priority(struct zone *zone, int priority)
+{
+        if (priority < zone->prev_priority)
+                zone->prev_priority = priority;
+}
 static inline int zone_is_near_oom(struct zone *zone)
 {
        return zone->pages_scanned >= (zone->nr_active + zone->nr_inactive)*3;
@@ -746,7 +760,7 @@ static inline int zone_is_near_oom(struct zone *zone)
 * But we had to alter page->flags anyway.
 */
 static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
-                                struct scan_control *sc)
+                                struct scan_control *sc, int priority)
 {
        unsigned long pgmoved;
        int pgdeactivate = 0;
@@ -770,7 +784,7 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
                 * `distress' is a measure of how much trouble we're having
                 * reclaiming pages.  0 -> no problems.  100 -> great trouble.
                 */
-                distress = 100 >> zone->prev_priority;
+                distress = 100 >> min(zone->prev_priority, priority);
                /*
                 * The point of this algorithm is to decide when to start
@@ -922,7 +936,7 @@ static unsigned long shrink_zone(int priority, struct zone *zone,
                        nr_to_scan = min(nr_active,
                                        (unsigned long)sc->swap_cluster_max);
                        nr_active -= nr_to_scan;
-                        shrink_active_list(nr_to_scan, zone, sc);
+                        shrink_active_list(nr_to_scan, zone, sc, priority);
                }
                if (nr_inactive) {
@@ -972,9 +986,7 @@ static unsigned long shrink_zones(int priority, struct zone **zones,
                if (!cpuset_zone_allowed(zone, __GFP_HARDWALL))
                        continue;
-                zone->temp_priority = priority;
+                note_zone_scanning_priority(zone, priority);
-                if (zone->prev_priority > priority)
-                        zone->prev_priority = priority;
                if (zone->all_unreclaimable && priority != DEF_PRIORITY)
                        continue;       /* Let kswapd poll it */
@@ -1024,7 +1036,6 @@ unsigned long try_to_free_pages(struct zone **zones, gfp_t gfp_mask)
                if (!cpuset_zone_allowed(zone, __GFP_HARDWALL))
                        continue;
-                zone->temp_priority = DEF_PRIORITY;
                lru_pages += zone->nr_active + zone->nr_inactive;
        }
@@ -1065,13 +1076,22 @@ unsigned long try_to_free_pages(struct zone **zones, gfp_t gfp_mask)
        if (!sc.all_unreclaimable)
                ret = 1;
 out:
+        /*
+         * Now that we've scanned all the zones at this priority level, note
+         * that level within the zone so that the next thread which performs
+         * scanning of this zone will immediately start out at this priority
+         * level.  This affects only the decision whether or not to bring
+         * mapped pages onto the inactive list.
+         */
+        if (priority < 0)
+                priority = 0;
        for (i = 0; zones[i] != 0; i++) {
                struct zone *zone = zones[i];
                if (!cpuset_zone_allowed(zone, __GFP_HARDWALL))
                        continue;
-                zone->prev_priority = zone->temp_priority;
+                zone->prev_priority = priority;
        }
        return ret;
 }
@@ -1111,6 +1131,11 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order)
                .swap_cluster_max = SWAP_CLUSTER_MAX,
                .swappiness = vm_swappiness,
        };
+        /*
+         * temp_priority is used to remember the scanning priority at which
+         * this zone was successfully refilled to free_pages == pages_high.
+         */
+        int temp_priority[MAX_NR_ZONES];
 loop_again:
        total_scanned = 0;
@@ -1118,11 +1143,8 @@ loop_again:
        sc.may_writepage = !laptop_mode;
        count_vm_event(PAGEOUTRUN);
-        for (i = 0; i < pgdat->nr_zones; i++) {
+        for (i = 0; i < pgdat->nr_zones; i++)
-                struct zone *zone = pgdat->node_zones + i;
+                temp_priority[i] = DEF_PRIORITY;
-                zone->temp_priority = DEF_PRIORITY;
-        }
        for (priority = DEF_PRIORITY; priority >= 0; priority--) {
                int end_zone = 0;       /* Inclusive.  0 = ZONE_DMA */
@@ -1183,10 +1205,9 @@ scan:
                        if (!zone_watermark_ok(zone, order, zone->pages_high,
                                               end_zone, 0))
                                all_zones_ok = 0;
-                        zone->temp_priority = priority;
+                        temp_priority[i] = priority;
-                        if (zone->prev_priority > priority)
-                                zone->prev_priority = priority;
                        sc.nr_scanned = 0;
+                        note_zone_scanning_priority(zone, priority);
                        nr_reclaimed += shrink_zone(priority, zone, &sc);
                        reclaim_state->reclaimed_slab = 0;
                        nr_slab = shrink_slab(sc.nr_scanned, GFP_KERNEL,
@@ -1226,10 +1247,15 @@ scan:
                        break;
        }
 out:
+        /*
+         * Note within each zone the priority level at which this zone was
+         * brought into a happy state.  So that the next thread which scans this
+         * zone will start out at that priority level.
+         */
        for (i = 0; i < pgdat->nr_zones; i++) {
                struct zone *zone = pgdat->node_zones + i;
-                zone->prev_priority = zone->temp_priority;
+                zone->prev_priority = temp_priority[i];
        }
        if (!all_zones_ok) {
                cond_resched();
@@ -1358,7 +1384,7 @@ static unsigned long shrink_all_zones(unsigned long nr_pages, int pass,
                        if (zone->nr_scan_active >= nr_pages || pass > 3) {
                                zone->nr_scan_active = 0;
                                nr_to_scan = min(nr_pages, zone->nr_active);
-                                shrink_active_list(nr_to_scan, zone, sc);
+                                shrink_active_list(nr_to_scan, zone, sc, prio);
                        }
                }
@@ -1614,6 +1640,7 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
                 */
                priority = ZONE_RECLAIM_PRIORITY;
                do {
+                        note_zone_scanning_priority(zone, priority);
                        nr_reclaimed += shrink_zone(priority, zone, &sc);
                        priority--;
                } while (priority >= 0 && nr_reclaimed < nr_pages);
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 45b124e012f5..8614e8f6743b 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -587,11 +587,9 @@ static int zoneinfo_show(struct seq_file *m, void *arg)
                seq_printf(m,
                           "\n  all_unreclaimable: %u"
                           "\n  prev_priority:     %i"
-                           "\n  temp_priority:     %i"
                           "\n  start_pfn:         %lu",
                           zone->all_unreclaimable,
                           zone->prev_priority,
-                           zone->temp_priority,
                           zone->zone_start_pfn);
                spin_unlock_irqrestore(&zone->lock, flags);
                seq_putc(m, '\n');
author	Andi Kleen <andi@basil.nowhere.org>	2006-11-21 04:22:09 -0500
committer	Andi Kleen <andi@basil.nowhere.org>	2006-11-21 04:22:09 -0500
commit	1b7f6a626f0ff511c3840678466cbfe1d62c0b29 (patch)
tree	415e8c838c0067bff384afb8a2c91e5f7c6d11d3 /mm
parent	b3edc9cec07ade41aaf1804f7c9e876afa90c862 (diff)
parent	3f5a6ca31c334011fd929501a078424c0d3f71be (diff)