6 files changed, 182 insertions, 63 deletions
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 67af4cea1e23..954981b14303 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -197,7 +197,7 @@ static struct mempolicy *mpol_new(int mode, nodemask_t *nodes)
        return policy;
 }
-static void gather_stats(struct page *, void *);
+static void gather_stats(struct page *, void *, int pte_dirty);
 static void migrate_page_add(struct page *page, struct list_head *pagelist,
                                unsigned long flags);
@@ -239,7 +239,7 @@ static int check_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
                        continue;
                if (flags & MPOL_MF_STATS)
-                        gather_stats(page, private);
+                        gather_stats(page, private, pte_dirty(*pte));
                else if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL))
                        migrate_page_add(page, private, flags);
                else
@@ -954,7 +954,8 @@ asmlinkage long sys_migrate_pages(pid_t pid, unsigned long maxnode,
                goto out;
        }
-        err = do_migrate_pages(mm, &old, &new, MPOL_MF_MOVE);
+        err = do_migrate_pages(mm, &old, &new,
+                capable(CAP_SYS_ADMIN) ? MPOL_MF_MOVE_ALL : MPOL_MF_MOVE);
 out:
        mmput(mm);
        return err;
@@ -1752,66 +1753,145 @@ static inline int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol)
 struct numa_maps {
        unsigned long pages;
        unsigned long anon;
-        unsigned long mapped;
+        unsigned long active;
+        unsigned long writeback;
        unsigned long mapcount_max;
+        unsigned long dirty;
+        unsigned long swapcache;
        unsigned long node[MAX_NUMNODES];
 };
-static void gather_stats(struct page *page, void *private)
+static void gather_stats(struct page *page, void *private, int pte_dirty)
 {
        struct numa_maps *md = private;
        int count = page_mapcount(page);
-        if (count)
+        md->pages++;
-                md->mapped++;
+        if (pte_dirty || PageDirty(page))
+                md->dirty++;
-        if (count > md->mapcount_max)
+        if (PageSwapCache(page))
-                md->mapcount_max = count;
+                md->swapcache++;
-        md->pages++;
+        if (PageActive(page))
+                md->active++;
+        if (PageWriteback(page))
+                md->writeback++;
        if (PageAnon(page))
                md->anon++;
+        if (count > md->mapcount_max)
+                md->mapcount_max = count;
        md->node[page_to_nid(page)]++;
        cond_resched();
 }
+#ifdef CONFIG_HUGETLB_PAGE
+static void check_huge_range(struct vm_area_struct *vma,
+                unsigned long start, unsigned long end,
+                struct numa_maps *md)
+{
+        unsigned long addr;
+        struct page *page;
+        for (addr = start; addr < end; addr += HPAGE_SIZE) {
+                pte_t *ptep = huge_pte_offset(vma->vm_mm, addr & HPAGE_MASK);
+                pte_t pte;
+                if (!ptep)
+                        continue;
+                pte = *ptep;
+                if (pte_none(pte))
+                        continue;
+                page = pte_page(pte);
+                if (!page)
+                        continue;
+                gather_stats(page, md, pte_dirty(*ptep));
+        }
+}
+#else
+static inline void check_huge_range(struct vm_area_struct *vma,
+                unsigned long start, unsigned long end,
+                struct numa_maps *md)
+{
+}
+#endif
 int show_numa_map(struct seq_file *m, void *v)
 {
        struct task_struct *task = m->private;
        struct vm_area_struct *vma = v;
        struct numa_maps *md;
+        struct file *file = vma->vm_file;
+        struct mm_struct *mm = vma->vm_mm;
        int n;
        char buffer[50];
-        if (!vma->vm_mm)
+        if (!mm)
                return 0;
        md = kzalloc(sizeof(struct numa_maps), GFP_KERNEL);
        if (!md)
                return 0;
-        check_pgd_range(vma, vma->vm_start, vma->vm_end,
+        mpol_to_str(buffer, sizeof(buffer),
-                    &node_online_map, MPOL_MF_STATS, md);
+                        get_vma_policy(task, vma, vma->vm_start));
-        if (md->pages) {
+        seq_printf(m, "%08lx %s", vma->vm_start, buffer);
-                mpol_to_str(buffer, sizeof(buffer),
-                            get_vma_policy(task, vma, vma->vm_start));
-                seq_printf(m, "%08lx %s pages=%lu mapped=%lu maxref=%lu",
+        if (file) {
-                           vma->vm_start, buffer, md->pages,
+                seq_printf(m, " file=");
-                           md->mapped, md->mapcount_max);
+                seq_path(m, file->f_vfsmnt, file->f_dentry, "\n\t= ");
+        } else if (vma->vm_start <= mm->brk && vma->vm_end >= mm->start_brk) {
+                seq_printf(m, " heap");
+        } else if (vma->vm_start <= mm->start_stack &&
+                        vma->vm_end >= mm->start_stack) {
+                seq_printf(m, " stack");
+        }
-                if (md->anon)
+        if (is_vm_hugetlb_page(vma)) {
-                        seq_printf(m," anon=%lu",md->anon);
+                check_huge_range(vma, vma->vm_start, vma->vm_end, md);
+                seq_printf(m, " huge");
+        } else {
+                check_pgd_range(vma, vma->vm_start, vma->vm_end,
+                                &node_online_map, MPOL_MF_STATS, md);
+        }
-                for_each_online_node(n)
+        if (!md->pages)
-                        if (md->node[n])
+                goto out;
-                                seq_printf(m, " N%d=%lu", n, md->node[n]);
-                seq_putc(m, '\n');
+        if (md->anon)
-        }
+                seq_printf(m," anon=%lu",md->anon);
+        if (md->dirty)
+                seq_printf(m," dirty=%lu",md->dirty);
+        if (md->pages != md->anon && md->pages != md->dirty)
+                seq_printf(m, " mapped=%lu", md->pages);
+        if (md->mapcount_max > 1)
+                seq_printf(m, " mapmax=%lu", md->mapcount_max);
+        if (md->swapcache)
+                seq_printf(m," swapcache=%lu", md->swapcache);
+        if (md->active < md->pages && !is_vm_hugetlb_page(vma))
+                seq_printf(m," active=%lu", md->active);
+        if (md->writeback)
+                seq_printf(m," writeback=%lu", md->writeback);
+        for_each_online_node(n)
+                if (md->node[n])
+                        seq_printf(m, " N%d=%lu", n, md->node[n]);
+out:
+        seq_putc(m, '\n');
        kfree(md);
        if (m->count < m->size)
diff --git a/mm/nommu.c b/mm/nommu.c
index 99d21020ec9d..4951f4786f28 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -53,7 +53,6 @@ DECLARE_RWSEM(nommu_vma_sem);
 struct vm_operations_struct generic_file_vm_ops = {
 };
-EXPORT_SYMBOL(vmalloc);
 EXPORT_SYMBOL(vfree);
 EXPORT_SYMBOL(vmalloc_to_page);
 EXPORT_SYMBOL(vmalloc_32);
@@ -205,6 +204,13 @@ void *vmalloc(unsigned long size)
 {
       return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL);
 }
+EXPORT_SYMBOL(vmalloc);
+void *vmalloc_node(unsigned long size, int node)
+{
+        return vmalloc(size);
+}
+EXPORT_SYMBOL(vmalloc_node);
 /*
 *      vmalloc_32  -  allocate virtually continguos memory (32bit addressable)
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 8123fad5a485..78747afad6b0 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -302,7 +302,7 @@ void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order)
 {
        struct mm_struct *mm = NULL;
        task_t *p;
-        unsigned long points;
+        unsigned long points = 0;
        if (printk_ratelimit()) {
                printk("oom-killer: gfp_mask=0x%x, order=%d\n",
@@ -355,6 +355,7 @@ retry:
        }
 out:
+        read_unlock(&tasklist_lock);
        cpuset_unlock();
        if (mm)
                mmput(mm);
@@ -364,5 +365,5 @@ out:
         * retry to allocate memory unless "p" is current
         */
        if (!test_thread_flag(TIF_MEMDIE))
-                schedule_timeout_interruptible(1);
+                schedule_timeout_uninterruptible(1);
 }
diff --git a/mm/rmap.c b/mm/rmap.c
index df2c41c2a9a2..d8ce5ff61454 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -212,25 +212,33 @@ out:
 * through real pte's pointing to valid pages and then releasing
 * the page from the swap cache.
 *
- * Must hold page lock on page.
+ * Must hold page lock on page and mmap_sem of one vma that contains
+ * the page.
 */
 void remove_from_swap(struct page *page)
 {
        struct anon_vma *anon_vma;
        struct vm_area_struct *vma;
+        unsigned long mapping;
-        if (!PageAnon(page) || !PageSwapCache(page))
+        if (!PageSwapCache(page))
                return;
-        anon_vma = page_lock_anon_vma(page);
+        mapping = (unsigned long)page->mapping;
-        if (!anon_vma)
+        if (!mapping || (mapping & PAGE_MAPPING_ANON) == 0)
                return;
+        /*
+         * We hold the mmap_sem lock. So no need to call page_lock_anon_vma.
+         */
+        anon_vma = (struct anon_vma *) (mapping - PAGE_MAPPING_ANON);
+        spin_lock(&anon_vma->lock);
        list_for_each_entry(vma, &anon_vma->head, anon_vma_node)
                remove_vma_swap(vma, page);
        spin_unlock(&anon_vma->lock);
        delete_from_swap_cache(page);
 }
 EXPORT_SYMBOL(remove_from_swap);
diff --git a/mm/slab.c b/mm/slab.c
index add05d808a4a..61800b88e241 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -1124,6 +1124,7 @@ void __init kmem_cache_init(void)
        struct cache_sizes *sizes;
        struct cache_names *names;
        int i;
+        int order;
        for (i = 0; i < NUM_INIT_LISTS; i++) {
                kmem_list3_init(&initkmem_list3[i]);
@@ -1167,11 +1168,15 @@ void __init kmem_cache_init(void)
        cache_cache.buffer_size = ALIGN(cache_cache.buffer_size, cache_line_size());
-        cache_estimate(0, cache_cache.buffer_size, cache_line_size(), 0,
+        for (order = 0; order < MAX_ORDER; order++) {
-                       &left_over, &cache_cache.num);
+                cache_estimate(order, cache_cache.buffer_size,
+                        cache_line_size(), 0, &left_over, &cache_cache.num);
+                if (cache_cache.num)
+                        break;
+        }
        if (!cache_cache.num)
                BUG();
+        cache_cache.gfporder = order;
        cache_cache.colour = left_over / cache_cache.colour_off;
        cache_cache.slab_size = ALIGN(cache_cache.num * sizeof(kmem_bufctl_t) +
                                      sizeof(struct slab), cache_line_size());
@@ -1628,36 +1633,44 @@ static inline size_t calculate_slab_order(struct kmem_cache *cachep,
                        size_t size, size_t align, unsigned long flags)
 {
        size_t left_over = 0;
+        int gfporder;
-        for (;; cachep->gfporder++) {
+        for (gfporder = 0 ; gfporder <= MAX_GFP_ORDER; gfporder++) {
                unsigned int num;
                size_t remainder;
-                if (cachep->gfporder > MAX_GFP_ORDER) {
+                cache_estimate(gfporder, size, align, flags, &remainder, &num);
-                        cachep->num = 0;
-                        break;
-                }
-                cache_estimate(cachep->gfporder, size, align, flags,
-                               &remainder, &num);
                if (!num)
                        continue;
                /* More than offslab_limit objects will cause problems */
-                if (flags & CFLGS_OFF_SLAB && cachep->num > offslab_limit)
+                if ((flags & CFLGS_OFF_SLAB) && num > offslab_limit)
                        break;
+                /* Found something acceptable - save it away */
                cachep->num = num;
+                cachep->gfporder = gfporder;
                left_over = remainder;
                /*
+                 * A VFS-reclaimable slab tends to have most allocations
+                 * as GFP_NOFS and we really don't want to have to be allocating
+                 * higher-order pages when we are unable to shrink dcache.
+                 */
+                if (flags & SLAB_RECLAIM_ACCOUNT)
+                        break;
+                /*
                 * Large number of objects is good, but very large slabs are
                 * currently bad for the gfp()s.
                 */
-                if (cachep->gfporder >= slab_break_gfp_order)
+                if (gfporder >= slab_break_gfp_order)
                        break;
-                if ((left_over * 8) <= (PAGE_SIZE << cachep->gfporder))
+                /*
-                        /* Acceptable internal fragmentation */
+                 * Acceptable internal fragmentation?
+                 */
+                if ((left_over * 8) <= (PAGE_SIZE << gfporder))
                        break;
        }
        return left_over;
@@ -1869,17 +1882,7 @@ kmem_cache_create (const char *name, size_t size, size_t align,
        size = ALIGN(size, align);
-        if ((flags & SLAB_RECLAIM_ACCOUNT) && size <= PAGE_SIZE) {
+        left_over = calculate_slab_order(cachep, size, align, flags);
-                /*
-                 * A VFS-reclaimable slab tends to have most allocations
-                 * as GFP_NOFS and we really don't want to have to be allocating
-                 * higher-order pages when we are unable to shrink dcache.
-                 */
-                cachep->gfporder = 0;
-                cache_estimate(cachep->gfporder, size, align, flags,
-                               &left_over, &cachep->num);
-        } else
-                left_over = calculate_slab_order(cachep, size, align, flags);
        if (!cachep->num) {
                printk("kmem_cache_create: couldn't create cache %s.\n", name);
@@ -2554,7 +2557,7 @@ static void check_slabp(struct kmem_cache *cachep, struct slab *slabp)
                       "slab: Internal list corruption detected in cache '%s'(%d), slabp %p(%d). Hexdump:\n",
                       cachep->name, cachep->num, slabp, slabp->inuse);
                for (i = 0;
-                     i < sizeof(slabp) + cachep->num * sizeof(kmem_bufctl_t);
+                     i < sizeof(*slabp) + cachep->num * sizeof(kmem_bufctl_t);
                     i++) {
                        if ((i % 16) == 0)
                                printk("\n%03x:", i);
diff --git a/mm/swap.c b/mm/swap.c
index cce3dda59c59..e9ec06d845e8 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -489,13 +489,34 @@ void percpu_counter_mod(struct percpu_counter *fbc, long amount)
        if (count >= FBC_BATCH || count <= -FBC_BATCH) {
                spin_lock(&fbc->lock);
                fbc->count += count;
+                *pcount = 0;
                spin_unlock(&fbc->lock);
-                count = 0;
+        } else {
+                *pcount = count;
        }
-        *pcount = count;
        put_cpu();
 }
 EXPORT_SYMBOL(percpu_counter_mod);
+/*
+ * Add up all the per-cpu counts, return the result.  This is a more accurate
+ * but much slower version of percpu_counter_read_positive()
+ */
+long percpu_counter_sum(struct percpu_counter *fbc)
+{
+        long ret;
+        int cpu;
+        spin_lock(&fbc->lock);
+        ret = fbc->count;
+        for_each_cpu(cpu) {
+                long *pcount = per_cpu_ptr(fbc->counters, cpu);
+                ret += *pcount;
+        }
+        spin_unlock(&fbc->lock);
+        return ret < 0 ? 0 : ret;
+}
+EXPORT_SYMBOL(percpu_counter_sum);
 #endif
 /*

diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 67af4cea1e23..954981b14303 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c
@@ -197,7 +197,7 @@ static struct mempolicy mpol_new(int mode, nodemask_t nodes)
197	return policy;	197	return policy;
198	}	198	}
199		199
200	static void gather_stats(struct page , void );	200	static void gather_stats(struct page , void , int pte_dirty);
201	static void migrate_page_add(struct page page, struct list_head pagelist,	201	static void migrate_page_add(struct page page, struct list_head pagelist,
202	unsigned long flags);	202	unsigned long flags);
203		203
@@ -239,7 +239,7 @@ static int check_pte_range(struct vm_area_struct vma, pmd_t pmd,
239	continue;	239	continue;
240		240
241	if (flags & MPOL_MF_STATS)	241	if (flags & MPOL_MF_STATS)
242	gather_stats(page, private);	242	gather_stats(page, private, pte_dirty(*pte));
243	else if (flags & (MPOL_MF_MOVE \| MPOL_MF_MOVE_ALL))	243	else if (flags & (MPOL_MF_MOVE \| MPOL_MF_MOVE_ALL))
244	migrate_page_add(page, private, flags);	244	migrate_page_add(page, private, flags);
245	else	245	else
@@ -954,7 +954,8 @@ asmlinkage long sys_migrate_pages(pid_t pid, unsigned long maxnode,
954	goto out;	954	goto out;
955	}	955	}
956		956
957	err = do_migrate_pages(mm, &old, &new, MPOL_MF_MOVE);	957	err = do_migrate_pages(mm, &old, &new,
		958	capable(CAP_SYS_ADMIN) ? MPOL_MF_MOVE_ALL : MPOL_MF_MOVE);
958	out:	959	out:
959	mmput(mm);	960	mmput(mm);
960	return err;	961	return err;
@@ -1752,66 +1753,145 @@ static inline int mpol_to_str(char buffer, int maxlen, struct mempolicy pol)
1752	struct numa_maps {	1753	struct numa_maps {
1753	unsigned long pages;	1754	unsigned long pages;
1754	unsigned long anon;	1755	unsigned long anon;
1755	unsigned long mapped;	1756	unsigned long active;
		1757	unsigned long writeback;
1756	unsigned long mapcount_max;	1758	unsigned long mapcount_max;
		1759	unsigned long dirty;
		1760	unsigned long swapcache;
1757	unsigned long node[MAX_NUMNODES];	1761	unsigned long node[MAX_NUMNODES];
1758	};	1762	};
1759		1763
1760	static void gather_stats(struct page page, void private)	1764	static void gather_stats(struct page page, void private, int pte_dirty)
1761	{	1765	{
1762	struct numa_maps *md = private;	1766	struct numa_maps *md = private;
1763	int count = page_mapcount(page);	1767	int count = page_mapcount(page);
1764		1768
1765	if (count)	1769	md->pages++;
1766	md->mapped++;	1770	if (pte_dirty \|\| PageDirty(page))
		1771	md->dirty++;
1767		1772
1768	if (count > md->mapcount_max)	1773	if (PageSwapCache(page))
1769	md->mapcount_max = count;	1774	md->swapcache++;
1770		1775
1771	md->pages++;	1776	if (PageActive(page))
		1777	md->active++;
		1778
		1779	if (PageWriteback(page))
		1780	md->writeback++;
1772		1781
1773	if (PageAnon(page))	1782	if (PageAnon(page))
1774	md->anon++;	1783	md->anon++;
1775		1784
		1785	if (count > md->mapcount_max)
		1786	md->mapcount_max = count;
		1787
1776	md->node[page_to_nid(page)]++;	1788	md->node[page_to_nid(page)]++;
1777	cond_resched();	1789	cond_resched();
1778	}	1790	}
1779		1791
		1792	#ifdef CONFIG_HUGETLB_PAGE
		1793	static void check_huge_range(struct vm_area_struct *vma,
		1794	unsigned long start, unsigned long end,
		1795	struct numa_maps *md)
		1796	{
		1797	unsigned long addr;
		1798	struct page *page;
		1799
		1800	for (addr = start; addr < end; addr += HPAGE_SIZE) {
		1801	pte_t *ptep = huge_pte_offset(vma->vm_mm, addr & HPAGE_MASK);
		1802	pte_t pte;
		1803
		1804	if (!ptep)
		1805	continue;
		1806
		1807	pte = *ptep;
		1808	if (pte_none(pte))
		1809	continue;
		1810
		1811	page = pte_page(pte);
		1812	if (!page)
		1813	continue;
		1814
		1815	gather_stats(page, md, pte_dirty(*ptep));
		1816	}
		1817	}
		1818	#else
		1819	static inline void check_huge_range(struct vm_area_struct *vma,
		1820	unsigned long start, unsigned long end,
		1821	struct numa_maps *md)
		1822	{
		1823	}
		1824	#endif
		1825
1780	int show_numa_map(struct seq_file m, void v)	1826	int show_numa_map(struct seq_file m, void v)
1781	{	1827	{
1782	struct task_struct *task = m->private;	1828	struct task_struct *task = m->private;
1783	struct vm_area_struct *vma = v;	1829	struct vm_area_struct *vma = v;
1784	struct numa_maps *md;	1830	struct numa_maps *md;
		1831	struct file *file = vma->vm_file;
		1832	struct mm_struct *mm = vma->vm_mm;
1785	int n;	1833	int n;
1786	char buffer[50];	1834	char buffer[50];
1787		1835
1788	if (!vma->vm_mm)	1836	if (!mm)
1789	return 0;	1837	return 0;
1790		1838
1791	md = kzalloc(sizeof(struct numa_maps), GFP_KERNEL);	1839	md = kzalloc(sizeof(struct numa_maps), GFP_KERNEL);
1792	if (!md)	1840	if (!md)
1793	return 0;	1841	return 0;
1794		1842
1795	check_pgd_range(vma, vma->vm_start, vma->vm_end,	1843	mpol_to_str(buffer, sizeof(buffer),
1796	&node_online_map, MPOL_MF_STATS, md);	1844	get_vma_policy(task, vma, vma->vm_start));
1797		1845
1798	if (md->pages) {	1846	seq_printf(m, "%08lx %s", vma->vm_start, buffer);
1799	mpol_to_str(buffer, sizeof(buffer),
1800	get_vma_policy(task, vma, vma->vm_start));
1801		1847
1802	seq_printf(m, "%08lx %s pages=%lu mapped=%lu maxref=%lu",	1848	if (file) {
1803	vma->vm_start, buffer, md->pages,	1849	seq_printf(m, " file=");
1804	md->mapped, md->mapcount_max);	1850	seq_path(m, file->f_vfsmnt, file->f_dentry, "\n\t= ");
		1851	} else if (vma->vm_start <= mm->brk && vma->vm_end >= mm->start_brk) {
		1852	seq_printf(m, " heap");
		1853	} else if (vma->vm_start <= mm->start_stack &&
		1854	vma->vm_end >= mm->start_stack) {
		1855	seq_printf(m, " stack");
		1856	}
1805		1857
1806	if (md->anon)	1858	if (is_vm_hugetlb_page(vma)) {
1807	seq_printf(m," anon=%lu",md->anon);	1859	check_huge_range(vma, vma->vm_start, vma->vm_end, md);
		1860	seq_printf(m, " huge");
		1861	} else {
		1862	check_pgd_range(vma, vma->vm_start, vma->vm_end,
		1863	&node_online_map, MPOL_MF_STATS, md);
		1864	}
1808		1865
1809	for_each_online_node(n)	1866	if (!md->pages)
1810	if (md->node[n])	1867	goto out;
1811	seq_printf(m, " N%d=%lu", n, md->node[n]);
1812		1868
1813	seq_putc(m, '\n');	1869	if (md->anon)
1814	}	1870	seq_printf(m," anon=%lu",md->anon);
		1871
		1872	if (md->dirty)
		1873	seq_printf(m," dirty=%lu",md->dirty);
		1874
		1875	if (md->pages != md->anon && md->pages != md->dirty)
		1876	seq_printf(m, " mapped=%lu", md->pages);
		1877
		1878	if (md->mapcount_max > 1)
		1879	seq_printf(m, " mapmax=%lu", md->mapcount_max);
		1880
		1881	if (md->swapcache)
		1882	seq_printf(m," swapcache=%lu", md->swapcache);
		1883
		1884	if (md->active < md->pages && !is_vm_hugetlb_page(vma))
		1885	seq_printf(m," active=%lu", md->active);
		1886
		1887	if (md->writeback)
		1888	seq_printf(m," writeback=%lu", md->writeback);
		1889
		1890	for_each_online_node(n)
		1891	if (md->node[n])
		1892	seq_printf(m, " N%d=%lu", n, md->node[n]);
		1893	out:
		1894	seq_putc(m, '\n');
1815	kfree(md);	1895	kfree(md);
1816		1896
1817	if (m->count < m->size)	1897	if (m->count < m->size)


diff --git a/mm/nommu.c b/mm/nommu.c index 99d21020ec9d..4951f4786f28 100644 --- a/mm/nommu.c +++ b/mm/nommu.c
@@ -53,7 +53,6 @@ DECLARE_RWSEM(nommu_vma_sem);
53	struct vm_operations_struct generic_file_vm_ops = {	53	struct vm_operations_struct generic_file_vm_ops = {
54	};	54	};
55		55
56	EXPORT_SYMBOL(vmalloc);
57	EXPORT_SYMBOL(vfree);	56	EXPORT_SYMBOL(vfree);
58	EXPORT_SYMBOL(vmalloc_to_page);	57	EXPORT_SYMBOL(vmalloc_to_page);
59	EXPORT_SYMBOL(vmalloc_32);	58	EXPORT_SYMBOL(vmalloc_32);
@@ -205,6 +204,13 @@ void *vmalloc(unsigned long size)
205	{	204	{
206	return __vmalloc(size, GFP_KERNEL \| __GFP_HIGHMEM, PAGE_KERNEL);	205	return __vmalloc(size, GFP_KERNEL \| __GFP_HIGHMEM, PAGE_KERNEL);
207	}	206	}
		207	EXPORT_SYMBOL(vmalloc);
		208
		209	void *vmalloc_node(unsigned long size, int node)
		210	{
		211	return vmalloc(size);
		212	}
		213	EXPORT_SYMBOL(vmalloc_node);
208		214
209	/*	215	/*
210	* vmalloc_32 - allocate virtually continguos memory (32bit addressable)	216	* vmalloc_32 - allocate virtually continguos memory (32bit addressable)


diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 8123fad5a485..78747afad6b0 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c
@@ -302,7 +302,7 @@ void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order)
302	{	302	{
303	struct mm_struct *mm = NULL;	303	struct mm_struct *mm = NULL;
304	task_t *p;	304	task_t *p;
305	unsigned long points;	305	unsigned long points = 0;
306		306
307	if (printk_ratelimit()) {	307	if (printk_ratelimit()) {
308	printk("oom-killer: gfp_mask=0x%x, order=%d\n",	308	printk("oom-killer: gfp_mask=0x%x, order=%d\n",
@@ -355,6 +355,7 @@ retry:
355	}	355	}
356		356
357	out:	357	out:
		358	read_unlock(&tasklist_lock);
358	cpuset_unlock();	359	cpuset_unlock();
359	if (mm)	360	if (mm)
360	mmput(mm);	361	mmput(mm);
@@ -364,5 +365,5 @@ out:
364	* retry to allocate memory unless "p" is current	365	* retry to allocate memory unless "p" is current
365	*/	366	*/
366	if (!test_thread_flag(TIF_MEMDIE))	367	if (!test_thread_flag(TIF_MEMDIE))
367	schedule_timeout_interruptible(1);	368	schedule_timeout_uninterruptible(1);
368	}	369	}


diff --git a/mm/rmap.c b/mm/rmap.c index df2c41c2a9a2..d8ce5ff61454 100644 --- a/mm/rmap.c +++ b/mm/rmap.c
@@ -212,25 +212,33 @@ out:
212	* through real pte's pointing to valid pages and then releasing	212	* through real pte's pointing to valid pages and then releasing
213	* the page from the swap cache.	213	* the page from the swap cache.
214	*	214	*
215	* Must hold page lock on page.	215	* Must hold page lock on page and mmap_sem of one vma that contains
		216	* the page.
216	*/	217	*/
217	void remove_from_swap(struct page *page)	218	void remove_from_swap(struct page *page)
218	{	219	{
219	struct anon_vma *anon_vma;	220	struct anon_vma *anon_vma;
220	struct vm_area_struct *vma;	221	struct vm_area_struct *vma;
		222	unsigned long mapping;
221		223
222	if (!PageAnon(page) \|\| !PageSwapCache(page))	224	if (!PageSwapCache(page))
223	return;	225	return;
224		226
225	anon_vma = page_lock_anon_vma(page);	227	mapping = (unsigned long)page->mapping;
226	if (!anon_vma)	228
		229	if (!mapping \|\| (mapping & PAGE_MAPPING_ANON) == 0)
227	return;	230	return;
228		231
		232	/*
		233	* We hold the mmap_sem lock. So no need to call page_lock_anon_vma.
		234	*/
		235	anon_vma = (struct anon_vma *) (mapping - PAGE_MAPPING_ANON);
		236	spin_lock(&anon_vma->lock);
		237
229	list_for_each_entry(vma, &anon_vma->head, anon_vma_node)	238	list_for_each_entry(vma, &anon_vma->head, anon_vma_node)
230	remove_vma_swap(vma, page);	239	remove_vma_swap(vma, page);
231		240
232	spin_unlock(&anon_vma->lock);	241	spin_unlock(&anon_vma->lock);
233
234	delete_from_swap_cache(page);	242	delete_from_swap_cache(page);
235	}	243	}
236	EXPORT_SYMBOL(remove_from_swap);	244	EXPORT_SYMBOL(remove_from_swap);


diff --git a/mm/slab.c b/mm/slab.c index add05d808a4a..61800b88e241 100644 --- a/mm/slab.c +++ b/mm/slab.c
@@ -1124,6 +1124,7 @@ void __init kmem_cache_init(void)
1124	struct cache_sizes *sizes;	1124	struct cache_sizes *sizes;
1125	struct cache_names *names;	1125	struct cache_names *names;
1126	int i;	1126	int i;
		1127	int order;
1127		1128
1128	for (i = 0; i < NUM_INIT_LISTS; i++) {	1129	for (i = 0; i < NUM_INIT_LISTS; i++) {
1129	kmem_list3_init(&initkmem_list3[i]);	1130	kmem_list3_init(&initkmem_list3[i]);
@@ -1167,11 +1168,15 @@ void __init kmem_cache_init(void)
1167		1168
1168	cache_cache.buffer_size = ALIGN(cache_cache.buffer_size, cache_line_size());	1169	cache_cache.buffer_size = ALIGN(cache_cache.buffer_size, cache_line_size());
1169		1170
1170	cache_estimate(0, cache_cache.buffer_size, cache_line_size(), 0,	1171	for (order = 0; order < MAX_ORDER; order++) {
1171	&left_over, &cache_cache.num);	1172	cache_estimate(order, cache_cache.buffer_size,
		1173	cache_line_size(), 0, &left_over, &cache_cache.num);
		1174	if (cache_cache.num)
		1175	break;
		1176	}
1172	if (!cache_cache.num)	1177	if (!cache_cache.num)
1173	BUG();	1178	BUG();
1174		1179	cache_cache.gfporder = order;
1175	cache_cache.colour = left_over / cache_cache.colour_off;	1180	cache_cache.colour = left_over / cache_cache.colour_off;
1176	cache_cache.slab_size = ALIGN(cache_cache.num * sizeof(kmem_bufctl_t) +	1181	cache_cache.slab_size = ALIGN(cache_cache.num * sizeof(kmem_bufctl_t) +
1177	sizeof(struct slab), cache_line_size());	1182	sizeof(struct slab), cache_line_size());
@@ -1628,36 +1633,44 @@ static inline size_t calculate_slab_order(struct kmem_cache *cachep,
1628	size_t size, size_t align, unsigned long flags)	1633	size_t size, size_t align, unsigned long flags)
1629	{	1634	{
1630	size_t left_over = 0;	1635	size_t left_over = 0;
		1636	int gfporder;
1631		1637
1632	for (;; cachep->gfporder++) {	1638	for (gfporder = 0 ; gfporder <= MAX_GFP_ORDER; gfporder++) {
1633	unsigned int num;	1639	unsigned int num;
1634	size_t remainder;	1640	size_t remainder;
1635		1641
1636	if (cachep->gfporder > MAX_GFP_ORDER) {	1642	cache_estimate(gfporder, size, align, flags, &remainder, &num);
1637	cachep->num = 0;
1638	break;
1639	}
1640
1641	cache_estimate(cachep->gfporder, size, align, flags,
1642	&remainder, &num);
1643	if (!num)	1643	if (!num)
1644	continue;	1644	continue;
		1645
1645	/* More than offslab_limit objects will cause problems */	1646	/* More than offslab_limit objects will cause problems */
1646	if (flags & CFLGS_OFF_SLAB && cachep->num > offslab_limit)	1647	if ((flags & CFLGS_OFF_SLAB) && num > offslab_limit)
1647	break;	1648	break;
1648		1649
		1650	/* Found something acceptable - save it away */
1649	cachep->num = num;	1651	cachep->num = num;
		1652	cachep->gfporder = gfporder;
1650	left_over = remainder;	1653	left_over = remainder;
1651		1654
1652	/*	1655	/*
		1656	* A VFS-reclaimable slab tends to have most allocations
		1657	* as GFP_NOFS and we really don't want to have to be allocating
		1658	* higher-order pages when we are unable to shrink dcache.
		1659	*/
		1660	if (flags & SLAB_RECLAIM_ACCOUNT)
		1661	break;
		1662
		1663	/*
1653	* Large number of objects is good, but very large slabs are	1664	* Large number of objects is good, but very large slabs are
1654	* currently bad for the gfp()s.	1665	* currently bad for the gfp()s.
1655	*/	1666	*/
1656	if (cachep->gfporder >= slab_break_gfp_order)	1667	if (gfporder >= slab_break_gfp_order)
1657	break;	1668	break;
1658		1669
1659	if ((left_over * 8) <= (PAGE_SIZE << cachep->gfporder))	1670	/*
1660	/* Acceptable internal fragmentation */	1671	* Acceptable internal fragmentation?
		1672	*/
		1673	if ((left_over * 8) <= (PAGE_SIZE << gfporder))
1661	break;	1674	break;
1662	}	1675	}
1663	return left_over;	1676	return left_over;
@@ -1869,17 +1882,7 @@ kmem_cache_create (const char *name, size_t size, size_t align,
1869		1882
1870	size = ALIGN(size, align);	1883	size = ALIGN(size, align);
1871		1884
1872	if ((flags & SLAB_RECLAIM_ACCOUNT) && size <= PAGE_SIZE) {	1885	left_over = calculate_slab_order(cachep, size, align, flags);
1873	/*
1874	* A VFS-reclaimable slab tends to have most allocations
1875	* as GFP_NOFS and we really don't want to have to be allocating
1876	* higher-order pages when we are unable to shrink dcache.
1877	*/
1878	cachep->gfporder = 0;
1879	cache_estimate(cachep->gfporder, size, align, flags,
1880	&left_over, &cachep->num);
1881	} else
1882	left_over = calculate_slab_order(cachep, size, align, flags);
1883		1886
1884	if (!cachep->num) {	1887	if (!cachep->num) {
1885	printk("kmem_cache_create: couldn't create cache %s.\n", name);	1888	printk("kmem_cache_create: couldn't create cache %s.\n", name);
@@ -2554,7 +2557,7 @@ static void check_slabp(struct kmem_cache cachep, struct slab slabp)
2554	"slab: Internal list corruption detected in cache '%s'(%d), slabp %p(%d). Hexdump:\n",	2557	"slab: Internal list corruption detected in cache '%s'(%d), slabp %p(%d). Hexdump:\n",
2555	cachep->name, cachep->num, slabp, slabp->inuse);	2558	cachep->name, cachep->num, slabp, slabp->inuse);
2556	for (i = 0;	2559	for (i = 0;
2557	i < sizeof(slabp) + cachep->num * sizeof(kmem_bufctl_t);	2560	i < sizeof(slabp) + cachep->num sizeof(kmem_bufctl_t);
2558	i++) {	2561	i++) {
2559	if ((i % 16) == 0)	2562	if ((i % 16) == 0)
2560	printk("\n%03x:", i);	2563	printk("\n%03x:", i);


diff --git a/mm/swap.c b/mm/swap.c index cce3dda59c59..e9ec06d845e8 100644 --- a/mm/swap.c +++ b/mm/swap.c
@@ -489,13 +489,34 @@ void percpu_counter_mod(struct percpu_counter *fbc, long amount)
489	if (count >= FBC_BATCH \|\| count <= -FBC_BATCH) {	489	if (count >= FBC_BATCH \|\| count <= -FBC_BATCH) {
490	spin_lock(&fbc->lock);	490	spin_lock(&fbc->lock);
491	fbc->count += count;	491	fbc->count += count;
		492	*pcount = 0;
492	spin_unlock(&fbc->lock);	493	spin_unlock(&fbc->lock);
493	count = 0;	494	} else {
		495	*pcount = count;
494	}	496	}
495	*pcount = count;
496	put_cpu();	497	put_cpu();
497	}	498	}
498	EXPORT_SYMBOL(percpu_counter_mod);	499	EXPORT_SYMBOL(percpu_counter_mod);
		500
		501	/*
		502	* Add up all the per-cpu counts, return the result. This is a more accurate
		503	* but much slower version of percpu_counter_read_positive()
		504	*/
		505	long percpu_counter_sum(struct percpu_counter *fbc)
		506	{
		507	long ret;
		508	int cpu;
		509
		510	spin_lock(&fbc->lock);
		511	ret = fbc->count;
		512	for_each_cpu(cpu) {
		513	long *pcount = per_cpu_ptr(fbc->counters, cpu);
		514	ret += *pcount;
		515	}
		516	spin_unlock(&fbc->lock);
		517	return ret < 0 ? 0 : ret;
		518	}
		519	EXPORT_SYMBOL(percpu_counter_sum);
499	#endif	520	#endif
500		521
501	/*	522	/*