Merge commit 'v2.6.27-rc5' into tip/oprofile

Conflicts: arch/x86/oprofile/nmi_int.c
author: Robert Richter <robert.richter@amd.com> 2008-09-24 05:25:31 -0400
committer: Robert Richter <robert.richter@amd.com> 2008-09-24 05:25:31 -0400
commit: f78e80209cf143be49f268c340431ae9fa3abb74 (patch)
tree: 820fa64b688099dfdd93d27ba03252738ca5c7e2 /mm
parent: 4c168eaf7ea39f25a45a3d8c7eebc3fedb633a1d (diff)
parent: 24342c34a022ee90839873d91396045e12ef1090 (diff)
7 files changed, 126 insertions, 40 deletions
diff --git a/mm/bootmem.c b/mm/bootmem.c
index e023c68b0255..ad8eec6e44a8 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -405,6 +405,29 @@ int __init reserve_bootmem(unsigned long addr, unsigned long size,
 }
 #endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */
+static unsigned long align_idx(struct bootmem_data *bdata, unsigned long idx,
+                        unsigned long step)
+{
+        unsigned long base = bdata->node_min_pfn;
+        /*
+         * Align the index with respect to the node start so that the
+         * combination of both satisfies the requested alignment.
+         */
+        return ALIGN(base + idx, step) - base;
+}
+static unsigned long align_off(struct bootmem_data *bdata, unsigned long off,
+                        unsigned long align)
+{
+        unsigned long base = PFN_PHYS(bdata->node_min_pfn);
+        /* Same as align_idx for byte offsets */
+        return ALIGN(base + off, align) - base;
+}
 static void * __init alloc_bootmem_core(struct bootmem_data *bdata,
                                unsigned long size, unsigned long align,
                                unsigned long goal, unsigned long limit)
@@ -441,7 +464,7 @@ static void * __init alloc_bootmem_core(struct bootmem_data *bdata,
        else
                start = ALIGN(min, step);
-        sidx = start - bdata->node_min_pfn;;
+        sidx = start - bdata->node_min_pfn;
        midx = max - bdata->node_min_pfn;
        if (bdata->hint_idx > sidx) {
@@ -450,7 +473,7 @@ static void * __init alloc_bootmem_core(struct bootmem_data *bdata,
                 * catch the fallback below.
                 */
                fallback = sidx + 1;
-                sidx = ALIGN(bdata->hint_idx, step);
+                sidx = align_idx(bdata, bdata->hint_idx, step);
        }
        while (1) {
@@ -459,7 +482,7 @@ static void * __init alloc_bootmem_core(struct bootmem_data *bdata,
                unsigned long eidx, i, start_off, end_off;
 find_block:
                sidx = find_next_zero_bit(bdata->node_bootmem_map, midx, sidx);
-                sidx = ALIGN(sidx, step);
+                sidx = align_idx(bdata, sidx, step);
                eidx = sidx + PFN_UP(size);
                if (sidx >= midx || eidx > midx)
@@ -467,7 +490,7 @@ find_block:
                for (i = sidx; i < eidx; i++)
                        if (test_bit(i, bdata->node_bootmem_map)) {
-                                sidx = ALIGN(i, step);
+                                sidx = align_idx(bdata, i, step);
                                if (sidx == i)
                                        sidx += step;
                                goto find_block;
@@ -475,7 +498,7 @@ find_block:
                if (bdata->last_end_off & (PAGE_SIZE - 1) &&
                                PFN_DOWN(bdata->last_end_off) + 1 == sidx)
-                        start_off = ALIGN(bdata->last_end_off, align);
+                        start_off = align_off(bdata, bdata->last_end_off, align);
                else
                        start_off = PFN_PHYS(sidx);
@@ -499,7 +522,7 @@ find_block:
        }
        if (fallback) {
-                sidx = ALIGN(fallback - 1, step);
+                sidx = align_idx(bdata, fallback - 1, step);
                fallback = 0;
                goto find_block;
        }
diff --git a/mm/filemap_xip.c b/mm/filemap_xip.c
index 380ab402d711..b5167dfb2f2d 100644
--- a/mm/filemap_xip.c
+++ b/mm/filemap_xip.c
@@ -15,6 +15,8 @@
 #include <linux/rmap.h>
 #include <linux/mmu_notifier.h>
 #include <linux/sched.h>
+#include <linux/seqlock.h>
+#include <linux/mutex.h>
 #include <asm/tlbflush.h>
 #include <asm/io.h>
@@ -22,22 +24,18 @@
 * We do use our own empty page to avoid interference with other users
 * of ZERO_PAGE(), such as /dev/zero
 */
+static DEFINE_MUTEX(xip_sparse_mutex);
+static seqcount_t xip_sparse_seq = SEQCNT_ZERO;
 static struct page *__xip_sparse_page;
+/* called under xip_sparse_mutex */
 static struct page *xip_sparse_page(void)
 {
        if (!__xip_sparse_page) {
                struct page *page = alloc_page(GFP_HIGHUSER | __GFP_ZERO);
-                if (page) {
+                if (page)
-                        static DEFINE_SPINLOCK(xip_alloc_lock);
+                        __xip_sparse_page = page;
-                        spin_lock(&xip_alloc_lock);
-                        if (!__xip_sparse_page)
-                                __xip_sparse_page = page;
-                        else
-                                __free_page(page);
-                        spin_unlock(&xip_alloc_lock);
-                }
        }
        return __xip_sparse_page;
 }
@@ -174,18 +172,23 @@ __xip_unmap (struct address_space * mapping,
        pte_t pteval;
        spinlock_t *ptl;
        struct page *page;
+        unsigned count;
+        int locked = 0;
+        count = read_seqcount_begin(&xip_sparse_seq);
        page = __xip_sparse_page;
        if (!page)
                return;
+retry:
        spin_lock(&mapping->i_mmap_lock);
        vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
                mm = vma->vm_mm;
                address = vma->vm_start +
                        ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
                BUG_ON(address < vma->vm_start || address >= vma->vm_end);
-                pte = page_check_address(page, mm, address, &ptl);
+                pte = page_check_address(page, mm, address, &ptl, 1);
                if (pte) {
                        /* Nuke the page table entry. */
                        flush_cache_page(vma, address, pte_pfn(*pte));
@@ -198,6 +201,14 @@ __xip_unmap (struct address_space * mapping,
                }
        }
        spin_unlock(&mapping->i_mmap_lock);
+        if (locked) {
+                mutex_unlock(&xip_sparse_mutex);
+        } else if (read_seqcount_retry(&xip_sparse_seq, count)) {
+                mutex_lock(&xip_sparse_mutex);
+                locked = 1;
+                goto retry;
+        }
 }
 /*
@@ -218,7 +229,7 @@ static int xip_file_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
        int error;
        /* XXX: are VM_FAULT_ codes OK? */
+again:
        size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
        if (vmf->pgoff >= size)
                return VM_FAULT_SIGBUS;
@@ -237,8 +248,10 @@ static int xip_file_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
                int err;
                /* maybe shared writable, allocate new block */
+                mutex_lock(&xip_sparse_mutex);
                error = mapping->a_ops->get_xip_mem(mapping, vmf->pgoff, 1,
                                                        &xip_mem, &xip_pfn);
+                mutex_unlock(&xip_sparse_mutex);
                if (error)
                        return VM_FAULT_SIGBUS;
                /* unmap sparse mappings at pgoff from all other vmas */
@@ -252,14 +265,34 @@ found:
                BUG_ON(err);
                return VM_FAULT_NOPAGE;
        } else {
+                int err, ret = VM_FAULT_OOM;
+                mutex_lock(&xip_sparse_mutex);
+                write_seqcount_begin(&xip_sparse_seq);
+                error = mapping->a_ops->get_xip_mem(mapping, vmf->pgoff, 0,
+                                                        &xip_mem, &xip_pfn);
+                if (unlikely(!error)) {
+                        write_seqcount_end(&xip_sparse_seq);
+                        mutex_unlock(&xip_sparse_mutex);
+                        goto again;
+                }
+                if (error != -ENODATA)
+                        goto out;
                /* not shared and writable, use xip_sparse_page() */
                page = xip_sparse_page();
                if (!page)
-                        return VM_FAULT_OOM;
+                        goto out;
+                err = vm_insert_page(vma, (unsigned long)vmf->virtual_address,
+                                                        page);
+                if (err == -ENOMEM)
+                        goto out;
-                page_cache_get(page);
+                ret = VM_FAULT_NOPAGE;
-                vmf->page = page;
+out:
-                return 0;
+                write_seqcount_end(&xip_sparse_seq);
+                mutex_unlock(&xip_sparse_mutex);
+                return ret;
        }
 }
@@ -308,8 +341,10 @@ __xip_file_write(struct file *filp, const char __user *buf,
                                                &xip_mem, &xip_pfn);
                if (status == -ENODATA) {
                        /* we allocate a new page unmap it */
+                        mutex_lock(&xip_sparse_mutex);
                        status = a_ops->get_xip_mem(mapping, index, 1,
                                                        &xip_mem, &xip_pfn);
+                        mutex_unlock(&xip_sparse_mutex);
                        if (!status)
                                /* unmap page at pgoff from all other vmas */
                                __xip_unmap(mapping, index);
diff --git a/mm/mm_init.c b/mm/mm_init.c
index 936ef2efd892..4e0e26591dfa 100644
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -12,7 +12,7 @@
 #include "internal.h"
 #ifdef CONFIG_DEBUG_MEMORY_INIT
-int __meminitdata mminit_loglevel;
+int mminit_loglevel;
 #ifndef SECTIONS_SHIFT
 #define SECTIONS_SHIFT  0
diff --git a/mm/rmap.c b/mm/rmap.c
index 1ea4e6fcee77..0383acfcb068 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -224,10 +224,14 @@ unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma)
 /*
 * Check that @page is mapped at @address into @mm.
 *
+ * If @sync is false, page_check_address may perform a racy check to avoid
+ * the page table lock when the pte is not present (helpful when reclaiming
+ * highly shared pages).
+ *
 * On success returns with pte mapped and locked.
 */
 pte_t *page_check_address(struct page *page, struct mm_struct *mm,
-                          unsigned long address, spinlock_t **ptlp)
+                          unsigned long address, spinlock_t **ptlp, int sync)
 {
        pgd_t *pgd;
        pud_t *pud;
@@ -249,7 +253,7 @@ pte_t *page_check_address(struct page *page, struct mm_struct *mm,
        pte = pte_offset_map(pmd, address);
        /* Make a quick check before getting the lock */
-        if (!pte_present(*pte)) {
+        if (!sync && !pte_present(*pte)) {
                pte_unmap(pte);
                return NULL;
        }
@@ -281,7 +285,7 @@ static int page_referenced_one(struct page *page,
        if (address == -EFAULT)
                goto out;
-        pte = page_check_address(page, mm, address, &ptl);
+        pte = page_check_address(page, mm, address, &ptl, 0);
        if (!pte)
                goto out;
@@ -450,7 +454,7 @@ static int page_mkclean_one(struct page *page, struct vm_area_struct *vma)
        if (address == -EFAULT)
                goto out;
-        pte = page_check_address(page, mm, address, &ptl);
+        pte = page_check_address(page, mm, address, &ptl, 1);
        if (!pte)
                goto out;
@@ -659,23 +663,30 @@ void page_remove_rmap(struct page *page, struct vm_area_struct *vma)
                }
                /*
-                 * It would be tidy to reset the PageAnon mapping here,
+                 * Now that the last pte has gone, s390 must transfer dirty
-                 * but that might overwrite a racing page_add_anon_rmap
+                 * flag from storage key to struct page.  We can usually skip
-                 * which increments mapcount after us but sets mapping
+                 * this if the page is anon, so about to be freed; but perhaps
-                 * before us: so leave the reset to free_hot_cold_page,
+                 * not if it's in swapcache - there might be another pte slot
-                 * and remember that it's only reliable while mapped.
+                 * containing the swap entry, but page not yet written to swap.
-                 * Leaving it set also helps swapoff to reinstate ptes
-                 * faster for those pages still in swapcache.
                 */
                if ((!PageAnon(page) || PageSwapCache(page)) &&
                    page_test_dirty(page)) {
                        page_clear_dirty(page);
                        set_page_dirty(page);
                }
-                mem_cgroup_uncharge_page(page);
+                mem_cgroup_uncharge_page(page);
                __dec_zone_page_state(page,
-                                PageAnon(page) ? NR_ANON_PAGES : NR_FILE_MAPPED);
+                        PageAnon(page) ? NR_ANON_PAGES : NR_FILE_MAPPED);
+                /*
+                 * It would be tidy to reset the PageAnon mapping here,
+                 * but that might overwrite a racing page_add_anon_rmap
+                 * which increments mapcount after us but sets mapping
+                 * before us: so leave the reset to free_hot_cold_page,
+                 * and remember that it's only reliable while mapped.
+                 * Leaving it set also helps swapoff to reinstate ptes
+                 * faster for those pages still in swapcache.
+                 */
        }
 }
@@ -697,7 +708,7 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
        if (address == -EFAULT)
                goto out;
-        pte = page_check_address(page, mm, address, &ptl);
+        pte = page_check_address(page, mm, address, &ptl, 0);
        if (!pte)
                goto out;
diff --git a/mm/slub.c b/mm/slub.c
index 4f5b96149458..fb486d5540f8 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -2312,7 +2312,7 @@ static int kmem_cache_open(struct kmem_cache *s, gfp_t gfpflags,
        s->refcount = 1;
 #ifdef CONFIG_NUMA
-        s->remote_node_defrag_ratio = 100;
+        s->remote_node_defrag_ratio = 1000;
 #endif
        if (!init_kmem_cache_nodes(s, gfpflags & ~SLUB_DMA))
                goto error;
@@ -4058,7 +4058,7 @@ static ssize_t remote_node_defrag_ratio_store(struct kmem_cache *s,
        if (err)
                return err;
-        if (ratio < 100)
+        if (ratio <= 100)
                s->remote_node_defrag_ratio = ratio * 10;
        return length;
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 167cf2dc8a03..797c3831cbec 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -60,7 +60,7 @@ void show_swap_cache_info(void)
        printk("Swap cache stats: add %lu, delete %lu, find %lu/%lu\n",
                swap_cache_info.add_total, swap_cache_info.del_total,
                swap_cache_info.find_success, swap_cache_info.find_total);
-        printk("Free swap  = %lukB\n", nr_swap_pages << (PAGE_SHIFT - 10));
+        printk("Free swap  = %ldkB\n", nr_swap_pages << (PAGE_SHIFT - 10));
        printk("Total swap = %lukB\n", total_swap_pages << (PAGE_SHIFT - 10));
 }
diff --git a/mm/vmstat.c b/mm/vmstat.c
index b0d08e667ece..d7826af2fb07 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -516,9 +516,26 @@ static void pagetypeinfo_showblockcount_print(struct seq_file *m,
                        continue;
                page = pfn_to_page(pfn);
+#ifdef CONFIG_ARCH_FLATMEM_HAS_HOLES
+                /*
+                 * Ordinarily, memory holes in flatmem still have a valid
+                 * memmap for the PFN range. However, an architecture for
+                 * embedded systems (e.g. ARM) can free up the memmap backing
+                 * holes to save memory on the assumption the memmap is
+                 * never used. The page_zone linkages are then broken even
+                 * though pfn_valid() returns true. Skip the page if the
+                 * linkages are broken. Even if this test passed, the impact
+                 * is that the counters for the movable type are off but
+                 * fragmentation monitoring is likely meaningless on small
+                 * systems.
+                 */
+                if (page_zone(page) != zone)
+                        continue;
+#endif
                mtype = get_pageblock_migratetype(page);
-                count[mtype]++;
+                if (mtype < MIGRATE_TYPES)
+                        count[mtype]++;
        }
        /* Print counts */
author	Robert Richter <robert.richter@amd.com>	2008-09-24 05:25:31 -0400
committer	Robert Richter <robert.richter@amd.com>	2008-09-24 05:25:31 -0400
commit	f78e80209cf143be49f268c340431ae9fa3abb74 (patch)
tree	820fa64b688099dfdd93d27ba03252738ca5c7e2 /mm
parent	4c168eaf7ea39f25a45a3d8c7eebc3fedb633a1d (diff)
parent	24342c34a022ee90839873d91396045e12ef1090 (diff)

diff --git a/mm/bootmem.c b/mm/bootmem.c index e023c68b0255..ad8eec6e44a8 100644 --- a/mm/bootmem.c +++ b/mm/bootmem.c
@@ -405,6 +405,29 @@ int __init reserve_bootmem(unsigned long addr, unsigned long size,
405	}	405	}
406	#endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */	406	#endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */
407		407
		408	static unsigned long align_idx(struct bootmem_data *bdata, unsigned long idx,
		409	unsigned long step)
		410	{
		411	unsigned long base = bdata->node_min_pfn;
		412
		413	/*
		414	* Align the index with respect to the node start so that the
		415	* combination of both satisfies the requested alignment.
		416	*/
		417
		418	return ALIGN(base + idx, step) - base;
		419	}
		420
		421	static unsigned long align_off(struct bootmem_data *bdata, unsigned long off,
		422	unsigned long align)
		423	{
		424	unsigned long base = PFN_PHYS(bdata->node_min_pfn);
		425
		426	/* Same as align_idx for byte offsets */
		427
		428	return ALIGN(base + off, align) - base;
		429	}
		430
408	static void * __init alloc_bootmem_core(struct bootmem_data *bdata,	431	static void * __init alloc_bootmem_core(struct bootmem_data *bdata,
409	unsigned long size, unsigned long align,	432	unsigned long size, unsigned long align,
410	unsigned long goal, unsigned long limit)	433	unsigned long goal, unsigned long limit)
@@ -441,7 +464,7 @@ static void * __init alloc_bootmem_core(struct bootmem_data *bdata,
441	else	464	else
442	start = ALIGN(min, step);	465	start = ALIGN(min, step);
443		466
444	sidx = start - bdata->node_min_pfn;;	467	sidx = start - bdata->node_min_pfn;
445	midx = max - bdata->node_min_pfn;	468	midx = max - bdata->node_min_pfn;
446		469
447	if (bdata->hint_idx > sidx) {	470	if (bdata->hint_idx > sidx) {
@@ -450,7 +473,7 @@ static void * __init alloc_bootmem_core(struct bootmem_data *bdata,
450	* catch the fallback below.	473	* catch the fallback below.
451	*/	474	*/
452	fallback = sidx + 1;	475	fallback = sidx + 1;
453	sidx = ALIGN(bdata->hint_idx, step);	476	sidx = align_idx(bdata, bdata->hint_idx, step);
454	}	477	}
455		478
456	while (1) {	479	while (1) {
@@ -459,7 +482,7 @@ static void * __init alloc_bootmem_core(struct bootmem_data *bdata,
459	unsigned long eidx, i, start_off, end_off;	482	unsigned long eidx, i, start_off, end_off;
460	find_block:	483	find_block:
461	sidx = find_next_zero_bit(bdata->node_bootmem_map, midx, sidx);	484	sidx = find_next_zero_bit(bdata->node_bootmem_map, midx, sidx);
462	sidx = ALIGN(sidx, step);	485	sidx = align_idx(bdata, sidx, step);
463	eidx = sidx + PFN_UP(size);	486	eidx = sidx + PFN_UP(size);
464		487
465	if (sidx >= midx \|\| eidx > midx)	488	if (sidx >= midx \|\| eidx > midx)
@@ -467,7 +490,7 @@ find_block:
467		490
468	for (i = sidx; i < eidx; i++)	491	for (i = sidx; i < eidx; i++)
469	if (test_bit(i, bdata->node_bootmem_map)) {	492	if (test_bit(i, bdata->node_bootmem_map)) {
470	sidx = ALIGN(i, step);	493	sidx = align_idx(bdata, i, step);
471	if (sidx == i)	494	if (sidx == i)
472	sidx += step;	495	sidx += step;
473	goto find_block;	496	goto find_block;
@@ -475,7 +498,7 @@ find_block:
475		498
476	if (bdata->last_end_off & (PAGE_SIZE - 1) &&	499	if (bdata->last_end_off & (PAGE_SIZE - 1) &&
477	PFN_DOWN(bdata->last_end_off) + 1 == sidx)	500	PFN_DOWN(bdata->last_end_off) + 1 == sidx)
478	start_off = ALIGN(bdata->last_end_off, align);	501	start_off = align_off(bdata, bdata->last_end_off, align);
479	else	502	else
480	start_off = PFN_PHYS(sidx);	503	start_off = PFN_PHYS(sidx);
481		504
@@ -499,7 +522,7 @@ find_block:
499	}	522	}
500		523
501	if (fallback) {	524	if (fallback) {
502	sidx = ALIGN(fallback - 1, step);	525	sidx = align_idx(bdata, fallback - 1, step);
503	fallback = 0;	526	fallback = 0;
504	goto find_block;	527	goto find_block;
505	}	528	}


diff --git a/mm/filemap_xip.c b/mm/filemap_xip.c index 380ab402d711..b5167dfb2f2d 100644 --- a/mm/filemap_xip.c +++ b/mm/filemap_xip.c
@@ -15,6 +15,8 @@
15	#include <linux/rmap.h>	15	#include <linux/rmap.h>
16	#include <linux/mmu_notifier.h>	16	#include <linux/mmu_notifier.h>
17	#include <linux/sched.h>	17	#include <linux/sched.h>
		18	#include <linux/seqlock.h>
		19	#include <linux/mutex.h>
18	#include <asm/tlbflush.h>	20	#include <asm/tlbflush.h>
19	#include <asm/io.h>	21	#include <asm/io.h>
20		22
@@ -22,22 +24,18 @@
22	* We do use our own empty page to avoid interference with other users	24	* We do use our own empty page to avoid interference with other users
23	* of ZERO_PAGE(), such as /dev/zero	25	* of ZERO_PAGE(), such as /dev/zero
24	*/	26	*/
		27	static DEFINE_MUTEX(xip_sparse_mutex);
		28	static seqcount_t xip_sparse_seq = SEQCNT_ZERO;
25	static struct page *__xip_sparse_page;	29	static struct page *__xip_sparse_page;
26		30
		31	/* called under xip_sparse_mutex */
27	static struct page *xip_sparse_page(void)	32	static struct page *xip_sparse_page(void)
28	{	33	{
29	if (!__xip_sparse_page) {	34	if (!__xip_sparse_page) {
30	struct page *page = alloc_page(GFP_HIGHUSER \| __GFP_ZERO);	35	struct page *page = alloc_page(GFP_HIGHUSER \| __GFP_ZERO);
31		36
32	if (page) {	37	if (page)
33	static DEFINE_SPINLOCK(xip_alloc_lock);	38	__xip_sparse_page = page;
34	spin_lock(&xip_alloc_lock);
35	if (!__xip_sparse_page)
36	__xip_sparse_page = page;
37	else
38	__free_page(page);
39	spin_unlock(&xip_alloc_lock);
40	}
41	}	39	}
42	return __xip_sparse_page;	40	return __xip_sparse_page;
43	}	41	}
@@ -174,18 +172,23 @@ __xip_unmap (struct address_space * mapping,
174	pte_t pteval;	172	pte_t pteval;
175	spinlock_t *ptl;	173	spinlock_t *ptl;
176	struct page *page;	174	struct page *page;
		175	unsigned count;
		176	int locked = 0;
		177
		178	count = read_seqcount_begin(&xip_sparse_seq);
177		179
178	page = __xip_sparse_page;	180	page = __xip_sparse_page;
179	if (!page)	181	if (!page)
180	return;	182	return;
181		183
		184	retry:
182	spin_lock(&mapping->i_mmap_lock);	185	spin_lock(&mapping->i_mmap_lock);
183	vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {	186	vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
184	mm = vma->vm_mm;	187	mm = vma->vm_mm;
185	address = vma->vm_start +	188	address = vma->vm_start +
186	((pgoff - vma->vm_pgoff) << PAGE_SHIFT);	189	((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
187	BUG_ON(address < vma->vm_start \|\| address >= vma->vm_end);	190	BUG_ON(address < vma->vm_start \|\| address >= vma->vm_end);
188	pte = page_check_address(page, mm, address, &ptl);	191	pte = page_check_address(page, mm, address, &ptl, 1);
189	if (pte) {	192	if (pte) {
190	/* Nuke the page table entry. */	193	/* Nuke the page table entry. */
191	flush_cache_page(vma, address, pte_pfn(*pte));	194	flush_cache_page(vma, address, pte_pfn(*pte));
@@ -198,6 +201,14 @@ __xip_unmap (struct address_space * mapping,
198	}	201	}
199	}	202	}
200	spin_unlock(&mapping->i_mmap_lock);	203	spin_unlock(&mapping->i_mmap_lock);
		204
		205	if (locked) {
		206	mutex_unlock(&xip_sparse_mutex);
		207	} else if (read_seqcount_retry(&xip_sparse_seq, count)) {
		208	mutex_lock(&xip_sparse_mutex);
		209	locked = 1;
		210	goto retry;
		211	}
201	}	212	}
202		213
203	/*	214	/*
@@ -218,7 +229,7 @@ static int xip_file_fault(struct vm_area_struct vma, struct vm_fault vmf)
218	int error;	229	int error;
219		230
220	/* XXX: are VM_FAULT_ codes OK? */	231	/* XXX: are VM_FAULT_ codes OK? */
221		232	again:
222	size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;	233	size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
223	if (vmf->pgoff >= size)	234	if (vmf->pgoff >= size)
224	return VM_FAULT_SIGBUS;	235	return VM_FAULT_SIGBUS;
@@ -237,8 +248,10 @@ static int xip_file_fault(struct vm_area_struct vma, struct vm_fault vmf)
237	int err;	248	int err;
238		249
239	/* maybe shared writable, allocate new block */	250	/* maybe shared writable, allocate new block */
		251	mutex_lock(&xip_sparse_mutex);
240	error = mapping->a_ops->get_xip_mem(mapping, vmf->pgoff, 1,	252	error = mapping->a_ops->get_xip_mem(mapping, vmf->pgoff, 1,
241	&xip_mem, &xip_pfn);	253	&xip_mem, &xip_pfn);
		254	mutex_unlock(&xip_sparse_mutex);
242	if (error)	255	if (error)
243	return VM_FAULT_SIGBUS;	256	return VM_FAULT_SIGBUS;
244	/* unmap sparse mappings at pgoff from all other vmas */	257	/* unmap sparse mappings at pgoff from all other vmas */
@@ -252,14 +265,34 @@ found:
252	BUG_ON(err);	265	BUG_ON(err);
253	return VM_FAULT_NOPAGE;	266	return VM_FAULT_NOPAGE;
254	} else {	267	} else {
		268	int err, ret = VM_FAULT_OOM;
		269
		270	mutex_lock(&xip_sparse_mutex);
		271	write_seqcount_begin(&xip_sparse_seq);
		272	error = mapping->a_ops->get_xip_mem(mapping, vmf->pgoff, 0,
		273	&xip_mem, &xip_pfn);
		274	if (unlikely(!error)) {
		275	write_seqcount_end(&xip_sparse_seq);
		276	mutex_unlock(&xip_sparse_mutex);
		277	goto again;
		278	}
		279	if (error != -ENODATA)
		280	goto out;
255	/* not shared and writable, use xip_sparse_page() */	281	/* not shared and writable, use xip_sparse_page() */
256	page = xip_sparse_page();	282	page = xip_sparse_page();
257	if (!page)	283	if (!page)
258	return VM_FAULT_OOM;	284	goto out;
		285	err = vm_insert_page(vma, (unsigned long)vmf->virtual_address,
		286	page);
		287	if (err == -ENOMEM)
		288	goto out;
259		289
260	page_cache_get(page);	290	ret = VM_FAULT_NOPAGE;
261	vmf->page = page;	291	out:
262	return 0;	292	write_seqcount_end(&xip_sparse_seq);
		293	mutex_unlock(&xip_sparse_mutex);
		294
		295	return ret;
263	}	296	}
264	}	297	}
265		298
@@ -308,8 +341,10 @@ __xip_file_write(struct file filp, const char __user buf,
308	&xip_mem, &xip_pfn);	341	&xip_mem, &xip_pfn);
309	if (status == -ENODATA) {	342	if (status == -ENODATA) {
310	/* we allocate a new page unmap it */	343	/* we allocate a new page unmap it */
		344	mutex_lock(&xip_sparse_mutex);
311	status = a_ops->get_xip_mem(mapping, index, 1,	345	status = a_ops->get_xip_mem(mapping, index, 1,
312	&xip_mem, &xip_pfn);	346	&xip_mem, &xip_pfn);
		347	mutex_unlock(&xip_sparse_mutex);
313	if (!status)	348	if (!status)
314	/* unmap page at pgoff from all other vmas */	349	/* unmap page at pgoff from all other vmas */
315	__xip_unmap(mapping, index);	350	__xip_unmap(mapping, index);


diff --git a/mm/mm_init.c b/mm/mm_init.c index 936ef2efd892..4e0e26591dfa 100644 --- a/mm/mm_init.c +++ b/mm/mm_init.c
@@ -12,7 +12,7 @@
12	#include "internal.h"	12	#include "internal.h"
13		13
14	#ifdef CONFIG_DEBUG_MEMORY_INIT	14	#ifdef CONFIG_DEBUG_MEMORY_INIT
15	int __meminitdata mminit_loglevel;	15	int mminit_loglevel;
16		16
17	#ifndef SECTIONS_SHIFT	17	#ifndef SECTIONS_SHIFT
18	#define SECTIONS_SHIFT 0	18	#define SECTIONS_SHIFT 0


diff --git a/mm/rmap.c b/mm/rmap.c index 1ea4e6fcee77..0383acfcb068 100644 --- a/mm/rmap.c +++ b/mm/rmap.c
@@ -224,10 +224,14 @@ unsigned long page_address_in_vma(struct page page, struct vm_area_struct vma)
224	/*	224	/*
225	* Check that @page is mapped at @address into @mm.	225	* Check that @page is mapped at @address into @mm.
226	*	226	*
		227	* If @sync is false, page_check_address may perform a racy check to avoid
		228	* the page table lock when the pte is not present (helpful when reclaiming
		229	* highly shared pages).
		230	*
227	* On success returns with pte mapped and locked.	231	* On success returns with pte mapped and locked.
228	*/	232	*/
229	pte_t page_check_address(struct page page, struct mm_struct *mm,	233	pte_t page_check_address(struct page page, struct mm_struct *mm,
230	unsigned long address, spinlock_t **ptlp)	234	unsigned long address, spinlock_t **ptlp, int sync)
231	{	235	{
232	pgd_t *pgd;	236	pgd_t *pgd;
233	pud_t *pud;	237	pud_t *pud;
@@ -249,7 +253,7 @@ pte_t page_check_address(struct page page, struct mm_struct *mm,
249		253
250	pte = pte_offset_map(pmd, address);	254	pte = pte_offset_map(pmd, address);
251	/* Make a quick check before getting the lock */	255	/* Make a quick check before getting the lock */
252	if (!pte_present(*pte)) {	256	if (!sync && !pte_present(*pte)) {
253	pte_unmap(pte);	257	pte_unmap(pte);
254	return NULL;	258	return NULL;
255	}	259	}
@@ -281,7 +285,7 @@ static int page_referenced_one(struct page *page,
281	if (address == -EFAULT)	285	if (address == -EFAULT)
282	goto out;	286	goto out;
283		287
284	pte = page_check_address(page, mm, address, &ptl);	288	pte = page_check_address(page, mm, address, &ptl, 0);
285	if (!pte)	289	if (!pte)
286	goto out;	290	goto out;
287		291
@@ -450,7 +454,7 @@ static int page_mkclean_one(struct page page, struct vm_area_struct vma)
450	if (address == -EFAULT)	454	if (address == -EFAULT)
451	goto out;	455	goto out;
452		456
453	pte = page_check_address(page, mm, address, &ptl);	457	pte = page_check_address(page, mm, address, &ptl, 1);
454	if (!pte)	458	if (!pte)
455	goto out;	459	goto out;
456		460
@@ -659,23 +663,30 @@ void page_remove_rmap(struct page page, struct vm_area_struct vma)
659	}	663	}
660		664
661	/*	665	/*
662	* It would be tidy to reset the PageAnon mapping here,	666	* Now that the last pte has gone, s390 must transfer dirty
663	* but that might overwrite a racing page_add_anon_rmap	667	* flag from storage key to struct page. We can usually skip
664	* which increments mapcount after us but sets mapping	668	* this if the page is anon, so about to be freed; but perhaps
665	* before us: so leave the reset to free_hot_cold_page,	669	* not if it's in swapcache - there might be another pte slot
666	* and remember that it's only reliable while mapped.	670	* containing the swap entry, but page not yet written to swap.
667	* Leaving it set also helps swapoff to reinstate ptes
668	* faster for those pages still in swapcache.
669	*/	671	*/
670	if ((!PageAnon(page) \|\| PageSwapCache(page)) &&	672	if ((!PageAnon(page) \|\| PageSwapCache(page)) &&
671	page_test_dirty(page)) {	673	page_test_dirty(page)) {
672	page_clear_dirty(page);	674	page_clear_dirty(page);
673	set_page_dirty(page);	675	set_page_dirty(page);
674	}	676	}
675	mem_cgroup_uncharge_page(page);
676		677
		678	mem_cgroup_uncharge_page(page);
677	__dec_zone_page_state(page,	679	__dec_zone_page_state(page,
678	PageAnon(page) ? NR_ANON_PAGES : NR_FILE_MAPPED);	680	PageAnon(page) ? NR_ANON_PAGES : NR_FILE_MAPPED);
		681	/*
		682	* It would be tidy to reset the PageAnon mapping here,
		683	* but that might overwrite a racing page_add_anon_rmap
		684	* which increments mapcount after us but sets mapping
		685	* before us: so leave the reset to free_hot_cold_page,
		686	* and remember that it's only reliable while mapped.
		687	* Leaving it set also helps swapoff to reinstate ptes
		688	* faster for those pages still in swapcache.
		689	*/
679	}	690	}
680	}	691	}
681		692
@@ -697,7 +708,7 @@ static int try_to_unmap_one(struct page page, struct vm_area_struct vma,
697	if (address == -EFAULT)	708	if (address == -EFAULT)
698	goto out;	709	goto out;
699		710
700	pte = page_check_address(page, mm, address, &ptl);	711	pte = page_check_address(page, mm, address, &ptl, 0);
701	if (!pte)	712	if (!pte)
702	goto out;	713	goto out;
703		714


diff --git a/mm/slub.c b/mm/slub.c index 4f5b96149458..fb486d5540f8 100644 --- a/mm/slub.c +++ b/mm/slub.c
@@ -2312,7 +2312,7 @@ static int kmem_cache_open(struct kmem_cache *s, gfp_t gfpflags,
2312		2312
2313	s->refcount = 1;	2313	s->refcount = 1;
2314	#ifdef CONFIG_NUMA	2314	#ifdef CONFIG_NUMA
2315	s->remote_node_defrag_ratio = 100;	2315	s->remote_node_defrag_ratio = 1000;
2316	#endif	2316	#endif
2317	if (!init_kmem_cache_nodes(s, gfpflags & ~SLUB_DMA))	2317	if (!init_kmem_cache_nodes(s, gfpflags & ~SLUB_DMA))
2318	goto error;	2318	goto error;
@@ -4058,7 +4058,7 @@ static ssize_t remote_node_defrag_ratio_store(struct kmem_cache *s,
4058	if (err)	4058	if (err)
4059	return err;	4059	return err;
4060		4060
4061	if (ratio < 100)	4061	if (ratio <= 100)
4062	s->remote_node_defrag_ratio = ratio * 10;	4062	s->remote_node_defrag_ratio = ratio * 10;
4063		4063
4064	return length;	4064	return length;


diff --git a/mm/swap_state.c b/mm/swap_state.c index 167cf2dc8a03..797c3831cbec 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c
@@ -60,7 +60,7 @@ void show_swap_cache_info(void)
60	printk("Swap cache stats: add %lu, delete %lu, find %lu/%lu\n",	60	printk("Swap cache stats: add %lu, delete %lu, find %lu/%lu\n",
61	swap_cache_info.add_total, swap_cache_info.del_total,	61	swap_cache_info.add_total, swap_cache_info.del_total,
62	swap_cache_info.find_success, swap_cache_info.find_total);	62	swap_cache_info.find_success, swap_cache_info.find_total);
63	printk("Free swap = %lukB\n", nr_swap_pages << (PAGE_SHIFT - 10));	63	printk("Free swap = %ldkB\n", nr_swap_pages << (PAGE_SHIFT - 10));
64	printk("Total swap = %lukB\n", total_swap_pages << (PAGE_SHIFT - 10));	64	printk("Total swap = %lukB\n", total_swap_pages << (PAGE_SHIFT - 10));
65	}	65	}
66		66


diff --git a/mm/vmstat.c b/mm/vmstat.c index b0d08e667ece..d7826af2fb07 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c
@@ -516,9 +516,26 @@ static void pagetypeinfo_showblockcount_print(struct seq_file *m,
516	continue;	516	continue;
517		517
518	page = pfn_to_page(pfn);	518	page = pfn_to_page(pfn);
		519	#ifdef CONFIG_ARCH_FLATMEM_HAS_HOLES
		520	/*
		521	* Ordinarily, memory holes in flatmem still have a valid
		522	* memmap for the PFN range. However, an architecture for
		523	* embedded systems (e.g. ARM) can free up the memmap backing
		524	* holes to save memory on the assumption the memmap is
		525	* never used. The page_zone linkages are then broken even
		526	* though pfn_valid() returns true. Skip the page if the
		527	* linkages are broken. Even if this test passed, the impact
		528	* is that the counters for the movable type are off but
		529	* fragmentation monitoring is likely meaningless on small
		530	* systems.
		531	*/
		532	if (page_zone(page) != zone)
		533	continue;
		534	#endif
519	mtype = get_pageblock_migratetype(page);	535	mtype = get_pageblock_migratetype(page);
520		536
521	count[mtype]++;	537	if (mtype < MIGRATE_TYPES)
		538	count[mtype]++;
522	}	539	}
523		540
524	/* Print counts */	541	/* Print counts */