remove ZERO_PAGE

The commit b5810039a54e5babf428e9a1e89fc1940fabff11 contains the note A last caveat: the ZERO_PAGE is now refcounted and managed with rmap (and thus mapcounted and count towards shared rss). These writes to the struct page could cause excessive cacheline bouncing on big systems. There are a number of ways this could be addressed if it is an issue. And indeed this cacheline bouncing has shown up on large SGI systems. There was a situation where an Altix system was essentially livelocked tearing down ZERO_PAGE pagetables when an HPC app aborted during startup. This situation can be avoided in userspace, but it does highlight the potential scalability problem with refcounting ZERO_PAGE, and corner cases where it can really hurt (we don't want the system to livelock!). There are several broad ways to fix this problem: 1. add back some special casing to avoid refcounting ZERO_PAGE 2. per-node or per-cpu ZERO_PAGES 3. remove the ZERO_PAGE completely I will argue for 3. The others should also fix the problem, but they result in more complex code than does 3, with little or no real benefit that I can see. Why? Inserting a ZERO_PAGE for anonymous read faults appears to be a false optimisation: if an application is performance critical, it would not be doing many read faults of new memory, or at least it could be expected to write to that memory soon afterwards. If cache or memory use is critical, it should not be working with a significant number of ZERO_PAGEs anyway (a more compact representation of zeroes should be used). As a sanity check -- mesuring on my desktop system, there are never many mappings to the ZERO_PAGE (eg. 2 or 3), thus memory usage here should not increase much without it. When running a make -j4 kernel compile on my dual core system, there are about 1,000 mappings to the ZERO_PAGE created per second, but about 1,000 ZERO_PAGE COW faults per second (less than 1 ZERO_PAGE mapping per second is torn down without being COWed). So removing ZERO_PAGE will save 1,000 page faults per second when running kbuild, while keeping it only saves less than 1 page clearing operation per second. 1 page clear is cheaper than a thousand faults, presumably, so there isn't an obvious loss. Neither the logical argument nor these basic tests give a guarantee of no regressions. However, this is a reasonable opportunity to try to remove the ZERO_PAGE from the pagefault path. If it is found to cause regressions, we can reintroduce it and just avoid refcounting it. The /dev/zero ZERO_PAGE usage and TLB tricks also get nuked. I don't see much use to them except on benchmarks. All other users of ZERO_PAGE are converted just to use ZERO_PAGE(0) for simplicity. We can look at replacing them all and maybe ripping out ZERO_PAGE completely when we are more satisfied with this solution. Signed-off-by: Nick Piggin <npiggin@suse.de> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus "snif" Torvalds <torvalds@linux-foundation.org>
author: Nick Piggin <npiggin@suse.de> 2007-10-16 04:24:40 -0400
committer: Linus Torvalds <torvalds@woody.linux-foundation.org> 2007-10-16 12:42:53 -0400
commit: 557ed1fa2620dc119adb86b34c614e152a629a80 (patch)
tree: d00b31a7f197583c2bd8fffa1fd135fbbb5d6abc
parent: aadb4bc4a1f9108c1d0fbd121827c936c2ed4217 (diff)
6 files changed, 42 insertions, 244 deletions
diff --git a/drivers/char/mem.c b/drivers/char/mem.c
index bbee97ff355f..64551ab6be03 100644
--- a/drivers/char/mem.c
+++ b/drivers/char/mem.c
@@ -625,65 +625,10 @@ static ssize_t splice_write_null(struct pipe_inode_info *pipe,struct file *out,
        return splice_from_pipe(pipe, out, ppos, len, flags, pipe_to_null);
 }
-#ifdef CONFIG_MMU
-/*
- * For fun, we are using the MMU for this.
- */
-static inline size_t read_zero_pagealigned(char __user * buf, size_t size)
-{
-        struct mm_struct *mm;
-        struct vm_area_struct * vma;
-        unsigned long addr=(unsigned long)buf;
-        mm = current->mm;
-        /* Oops, this was forgotten before. -ben */
-        down_read(&mm->mmap_sem);
-        /* For private mappings, just map in zero pages. */
-        for (vma = find_vma(mm, addr); vma; vma = vma->vm_next) {
-                unsigned long count;
-                if (vma->vm_start > addr || (vma->vm_flags & VM_WRITE) == 0)
-                        goto out_up;
-                if (vma->vm_flags & (VM_SHARED | VM_HUGETLB))
-                        break;
-                count = vma->vm_end - addr;
-                if (count > size)
-                        count = size;
-                zap_page_range(vma, addr, count, NULL);
-                if (zeromap_page_range(vma, addr, count, PAGE_COPY))
-                        break;
-                size -= count;
-                buf += count;
-                addr += count;
-                if (size == 0)
-                        goto out_up;
-        }
-        up_read(&mm->mmap_sem);
-        
-        /* The shared case is hard. Let's do the conventional zeroing. */ 
-        do {
-                unsigned long unwritten = clear_user(buf, PAGE_SIZE);
-                if (unwritten)
-                        return size + unwritten - PAGE_SIZE;
-                cond_resched();
-                buf += PAGE_SIZE;
-                size -= PAGE_SIZE;
-        } while (size);
-        return size;
-out_up:
-        up_read(&mm->mmap_sem);
-        return size;
-}
 static ssize_t read_zero(struct file * file, char __user * buf, 
                         size_t count, loff_t *ppos)
 {
-        unsigned long left, unwritten, written = 0;
+        size_t written;
        if (!count)
                return 0;
@@ -691,69 +636,33 @@ static ssize_t read_zero(struct file * file, char __user * buf,
        if (!access_ok(VERIFY_WRITE, buf, count))
                return -EFAULT;
-        left = count;
+        written = 0;
+        while (count) {
-        /* do we want to be clever? Arbitrary cut-off */
+                unsigned long unwritten;
-        if (count >= PAGE_SIZE*4) {
+                size_t chunk = count;
-                unsigned long partial;
-                /* How much left of the page? */
+                if (chunk > PAGE_SIZE)
-                partial = (PAGE_SIZE-1) & -(unsigned long) buf;
+                        chunk = PAGE_SIZE;      /* Just for latency reasons */
-                unwritten = clear_user(buf, partial);
+                unwritten = clear_user(buf, chunk);
-                written = partial - unwritten;
+                written += chunk - unwritten;
-                if (unwritten)
-                        goto out;
-                left -= partial;
-                buf += partial;
-                unwritten = read_zero_pagealigned(buf, left & PAGE_MASK);
-                written += (left & PAGE_MASK) - unwritten;
                if (unwritten)
-                        goto out;
+                        break;
-                buf += left & PAGE_MASK;
-                left &= ~PAGE_MASK;
-        }
-        unwritten = clear_user(buf, left);
-        written += left - unwritten;
-out:
-        return written ? written : -EFAULT;
-}
-static int mmap_zero(struct file * file, struct vm_area_struct * vma)
-{
-        int err;
-        if (vma->vm_flags & VM_SHARED)
-                return shmem_zero_setup(vma);
-        err = zeromap_page_range(vma, vma->vm_start,
-                        vma->vm_end - vma->vm_start, vma->vm_page_prot);
-        BUG_ON(err == -EEXIST);
-        return err;
-}
-#else /* CONFIG_MMU */
-static ssize_t read_zero(struct file * file, char * buf, 
-                         size_t count, loff_t *ppos)
-{
-        size_t todo = count;
-        while (todo) {
-                size_t chunk = todo;
-                if (chunk > 4096)
-                        chunk = 4096;   /* Just for latency reasons */
-                if (clear_user(buf, chunk))
-                        return -EFAULT;
                buf += chunk;
-                todo -= chunk;
+                count -= chunk;
                cond_resched();
        }
-        return count;
+        return written ? written : -EFAULT;
 }
 static int mmap_zero(struct file * file, struct vm_area_struct * vma)
 {
+#ifndef CONFIG_MMU
        return -ENOSYS;
+#endif
+        if (vma->vm_flags & VM_SHARED)
+                return shmem_zero_setup(vma);
+        return 0;
 }
-#endif /* CONFIG_MMU */
 static ssize_t write_full(struct file * file, const char __user * buf,
                          size_t count, loff_t *ppos)
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index b1013f34085d..f3037c645ca9 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1725,7 +1725,7 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file)
                                                &page, &vma) <= 0) {
                                DUMP_SEEK(PAGE_SIZE);
                        } else {
-                                if (page == ZERO_PAGE(addr)) {
+                                if (page == ZERO_PAGE(0)) {
                                        if (!dump_seek(file, PAGE_SIZE)) {
                                                page_cache_release(page);
                                                goto end_coredump;
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 2f5d8dbe676d..c5ca2f0aca7f 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -1488,7 +1488,7 @@ static int elf_fdpic_dump_segments(struct file *file, size_t *size,
                                           &page, &vma) <= 0) {
                                DUMP_SEEK(file->f_pos + PAGE_SIZE);
                        }
-                        else if (page == ZERO_PAGE(addr)) {
+                        else if (page == ZERO_PAGE(0)) {
                                page_cache_release(page);
                                DUMP_SEEK(file->f_pos + PAGE_SIZE);
                        }
diff --git a/fs/direct-io.c b/fs/direct-io.c
index b5928a7b6a5a..acf0da1bd257 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -163,7 +163,7 @@ static int dio_refill_pages(struct dio *dio)
        up_read(&current->mm->mmap_sem);
        if (ret < 0 && dio->blocks_available && (dio->rw & WRITE)) {
-                struct page *page = ZERO_PAGE(dio->curr_user_address);
+                struct page *page = ZERO_PAGE(0);
                /*
                 * A memory fault, but the filesystem has some outstanding
                 * mapped blocks.  We need to use those blocks up to avoid
@@ -763,7 +763,7 @@ static void dio_zero_block(struct dio *dio, int end)
        this_chunk_bytes = this_chunk_blocks << dio->blkbits;
-        page = ZERO_PAGE(dio->curr_user_address);
+        page = ZERO_PAGE(0);
        if (submit_page_section(dio, page, 0, this_chunk_bytes, 
                                dio->next_block_for_io))
                return;
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 291c4cc06ea7..fbbc29a29dff 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -779,8 +779,6 @@ void free_pgtables(struct mmu_gather **tlb, struct vm_area_struct *start_vma,
                unsigned long floor, unsigned long ceiling);
 int copy_page_range(struct mm_struct *dst, struct mm_struct *src,
                        struct vm_area_struct *vma);
-int zeromap_page_range(struct vm_area_struct *vma, unsigned long from,
-                        unsigned long size, pgprot_t prot);
 void unmap_mapping_range(struct address_space *mapping,
                loff_t const holebegin, loff_t const holelen, int even_cows);
diff --git a/mm/memory.c b/mm/memory.c
index f82b359b2745..2a8430844b6d 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -966,7 +966,7 @@ no_page_table:
         * has touched so far, we don't want to allocate page tables.
         */
        if (flags & FOLL_ANON) {
-                page = ZERO_PAGE(address);
+                page = ZERO_PAGE(0);
                if (flags & FOLL_GET)
                        get_page(page);
                BUG_ON(flags & FOLL_WRITE);
@@ -1111,95 +1111,6 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
 }
 EXPORT_SYMBOL(get_user_pages);
-static int zeromap_pte_range(struct mm_struct *mm, pmd_t *pmd,
-                        unsigned long addr, unsigned long end, pgprot_t prot)
-{
-        pte_t *pte;
-        spinlock_t *ptl;
-        int err = 0;
-        pte = pte_alloc_map_lock(mm, pmd, addr, &ptl);
-        if (!pte)
-                return -EAGAIN;
-        arch_enter_lazy_mmu_mode();
-        do {
-                struct page *page = ZERO_PAGE(addr);
-                pte_t zero_pte = pte_wrprotect(mk_pte(page, prot));
-                if (unlikely(!pte_none(*pte))) {
-                        err = -EEXIST;
-                        pte++;
-                        break;
-                }
-                page_cache_get(page);
-                page_add_file_rmap(page);
-                inc_mm_counter(mm, file_rss);
-                set_pte_at(mm, addr, pte, zero_pte);
-        } while (pte++, addr += PAGE_SIZE, addr != end);
-        arch_leave_lazy_mmu_mode();
-        pte_unmap_unlock(pte - 1, ptl);
-        return err;
-}
-static inline int zeromap_pmd_range(struct mm_struct *mm, pud_t *pud,
-                        unsigned long addr, unsigned long end, pgprot_t prot)
-{
-        pmd_t *pmd;
-        unsigned long next;
-        int err;
-        pmd = pmd_alloc(mm, pud, addr);
-        if (!pmd)
-                return -EAGAIN;
-        do {
-                next = pmd_addr_end(addr, end);
-                err = zeromap_pte_range(mm, pmd, addr, next, prot);
-                if (err)
-                        break;
-        } while (pmd++, addr = next, addr != end);
-        return err;
-}
-static inline int zeromap_pud_range(struct mm_struct *mm, pgd_t *pgd,
-                        unsigned long addr, unsigned long end, pgprot_t prot)
-{
-        pud_t *pud;
-        unsigned long next;
-        int err;
-        pud = pud_alloc(mm, pgd, addr);
-        if (!pud)
-                return -EAGAIN;
-        do {
-                next = pud_addr_end(addr, end);
-                err = zeromap_pmd_range(mm, pud, addr, next, prot);
-                if (err)
-                        break;
-        } while (pud++, addr = next, addr != end);
-        return err;
-}
-int zeromap_page_range(struct vm_area_struct *vma,
-                        unsigned long addr, unsigned long size, pgprot_t prot)
-{
-        pgd_t *pgd;
-        unsigned long next;
-        unsigned long end = addr + size;
-        struct mm_struct *mm = vma->vm_mm;
-        int err;
-        BUG_ON(addr >= end);
-        pgd = pgd_offset(mm, addr);
-        flush_cache_range(vma, addr, end);
-        do {
-                next = pgd_addr_end(addr, end);
-                err = zeromap_pud_range(mm, pgd, addr, next, prot);
-                if (err)
-                        break;
-        } while (pgd++, addr = next, addr != end);
-        return err;
-}
 pte_t * fastcall get_locked_pte(struct mm_struct *mm, unsigned long addr, spinlock_t **ptl)
 {
        pgd_t * pgd = pgd_offset(mm, addr);
@@ -1717,16 +1628,11 @@ gotten:
        if (unlikely(anon_vma_prepare(vma)))
                goto oom;
-        if (old_page == ZERO_PAGE(address)) {
+        VM_BUG_ON(old_page == ZERO_PAGE(0));
-                new_page = alloc_zeroed_user_highpage_movable(vma, address);
+        new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address);
-                if (!new_page)
+        if (!new_page)
-                        goto oom;
+                goto oom;
-        } else {
+        cow_user_page(new_page, old_page, address, vma);
-                new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address);
-                if (!new_page)
-                        goto oom;
-                cow_user_page(new_page, old_page, address, vma);
-        }
        /*
         * Re-check the pte - we dropped the lock
@@ -2252,39 +2158,24 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
        spinlock_t *ptl;
        pte_t entry;
-        if (write_access) {
+        /* Allocate our own private page. */
-                /* Allocate our own private page. */
+        pte_unmap(page_table);
-                pte_unmap(page_table);
-                if (unlikely(anon_vma_prepare(vma)))
-                        goto oom;
-                page = alloc_zeroed_user_highpage_movable(vma, address);
-                if (!page)
-                        goto oom;
-                entry = mk_pte(page, vma->vm_page_prot);
-                entry = maybe_mkwrite(pte_mkdirty(entry), vma);
-                page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
+        if (unlikely(anon_vma_prepare(vma)))
-                if (!pte_none(*page_table))
+                goto oom;
-                        goto release;
+        page = alloc_zeroed_user_highpage_movable(vma, address);
-                inc_mm_counter(mm, anon_rss);
+        if (!page)
-                lru_cache_add_active(page);
+                goto oom;
-                page_add_new_anon_rmap(page, vma, address);
-        } else {
-                /* Map the ZERO_PAGE - vm_page_prot is readonly */
-                page = ZERO_PAGE(address);
-                page_cache_get(page);
-                entry = mk_pte(page, vma->vm_page_prot);
-                ptl = pte_lockptr(mm, pmd);
+        entry = mk_pte(page, vma->vm_page_prot);
-                spin_lock(ptl);
+        entry = maybe_mkwrite(pte_mkdirty(entry), vma);
-                if (!pte_none(*page_table))
-                        goto release;
-                inc_mm_counter(mm, file_rss);
-                page_add_file_rmap(page);
-        }
+        page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
+        if (!pte_none(*page_table))
+                goto release;
+        inc_mm_counter(mm, anon_rss);
+        lru_cache_add_active(page);
+        page_add_new_anon_rmap(page, vma, address);
        set_pte_at(mm, address, page_table, entry);
        /* No need to invalidate - it was non-present before */
author	Nick Piggin <npiggin@suse.de>	2007-10-16 04:24:40 -0400
committer	Linus Torvalds <torvalds@woody.linux-foundation.org>	2007-10-16 12:42:53 -0400
commit	557ed1fa2620dc119adb86b34c614e152a629a80 (patch)
tree	d00b31a7f197583c2bd8fffa1fd135fbbb5d6abc
parent	aadb4bc4a1f9108c1d0fbd121827c936c2ed4217 (diff)

diff --git a/drivers/char/mem.c b/drivers/char/mem.c index bbee97ff355f..64551ab6be03 100644 --- a/drivers/char/mem.c +++ b/drivers/char/mem.c
@@ -625,65 +625,10 @@ static ssize_t splice_write_null(struct pipe_inode_info pipe,struct file out,
625	return splice_from_pipe(pipe, out, ppos, len, flags, pipe_to_null);	625	return splice_from_pipe(pipe, out, ppos, len, flags, pipe_to_null);
626	}	626	}
627		627
628	#ifdef CONFIG_MMU
629	/*
630	* For fun, we are using the MMU for this.
631	*/
632	static inline size_t read_zero_pagealigned(char __user * buf, size_t size)
633	{
634	struct mm_struct *mm;
635	struct vm_area_struct * vma;
636	unsigned long addr=(unsigned long)buf;
637
638	mm = current->mm;
639	/* Oops, this was forgotten before. -ben */
640	down_read(&mm->mmap_sem);
641
642	/* For private mappings, just map in zero pages. */
643	for (vma = find_vma(mm, addr); vma; vma = vma->vm_next) {
644	unsigned long count;
645
646	if (vma->vm_start > addr \|\| (vma->vm_flags & VM_WRITE) == 0)
647	goto out_up;
648	if (vma->vm_flags & (VM_SHARED \| VM_HUGETLB))
649	break;
650	count = vma->vm_end - addr;
651	if (count > size)
652	count = size;
653
654	zap_page_range(vma, addr, count, NULL);
655	if (zeromap_page_range(vma, addr, count, PAGE_COPY))
656	break;
657
658	size -= count;
659	buf += count;
660	addr += count;
661	if (size == 0)
662	goto out_up;
663	}
664
665	up_read(&mm->mmap_sem);
666
667	/* The shared case is hard. Let's do the conventional zeroing. */
668	do {
669	unsigned long unwritten = clear_user(buf, PAGE_SIZE);
670	if (unwritten)
671	return size + unwritten - PAGE_SIZE;
672	cond_resched();
673	buf += PAGE_SIZE;
674	size -= PAGE_SIZE;
675	} while (size);
676
677	return size;
678	out_up:
679	up_read(&mm->mmap_sem);
680	return size;
681	}
682
683	static ssize_t read_zero(struct file * file, char __user * buf,	628	static ssize_t read_zero(struct file * file, char __user * buf,
684	size_t count, loff_t *ppos)	629	size_t count, loff_t *ppos)
685	{	630	{
686	unsigned long left, unwritten, written = 0;	631	size_t written;
687		632
688	if (!count)	633	if (!count)
689	return 0;	634	return 0;
@@ -691,69 +636,33 @@ static ssize_t read_zero(struct file * file, char __user * buf,
691	if (!access_ok(VERIFY_WRITE, buf, count))	636	if (!access_ok(VERIFY_WRITE, buf, count))
692	return -EFAULT;	637	return -EFAULT;
693		638
694	left = count;	639	written = 0;
695		640	while (count) {
696	/* do we want to be clever? Arbitrary cut-off */	641	unsigned long unwritten;
697	if (count >= PAGE_SIZE*4) {	642	size_t chunk = count;
698	unsigned long partial;
699		643
700	/* How much left of the page? */	644	if (chunk > PAGE_SIZE)
701	partial = (PAGE_SIZE-1) & -(unsigned long) buf;	645	chunk = PAGE_SIZE; /* Just for latency reasons */
702	unwritten = clear_user(buf, partial);	646	unwritten = clear_user(buf, chunk);
703	written = partial - unwritten;	647	written += chunk - unwritten;
704	if (unwritten)
705	goto out;
706	left -= partial;
707	buf += partial;
708	unwritten = read_zero_pagealigned(buf, left & PAGE_MASK);
709	written += (left & PAGE_MASK) - unwritten;
710	if (unwritten)	648	if (unwritten)
711	goto out;	649	break;
712	buf += left & PAGE_MASK;
713	left &= ~PAGE_MASK;
714	}
715	unwritten = clear_user(buf, left);
716	written += left - unwritten;
717	out:
718	return written ? written : -EFAULT;
719	}
720
721	static int mmap_zero(struct file * file, struct vm_area_struct * vma)
722	{
723	int err;
724
725	if (vma->vm_flags & VM_SHARED)
726	return shmem_zero_setup(vma);
727	err = zeromap_page_range(vma, vma->vm_start,
728	vma->vm_end - vma->vm_start, vma->vm_page_prot);
729	BUG_ON(err == -EEXIST);
730	return err;
731	}
732	#else /* CONFIG_MMU */
733	static ssize_t read_zero(struct file * file, char * buf,
734	size_t count, loff_t *ppos)
735	{
736	size_t todo = count;
737
738	while (todo) {
739	size_t chunk = todo;
740
741	if (chunk > 4096)
742	chunk = 4096; /* Just for latency reasons */
743	if (clear_user(buf, chunk))
744	return -EFAULT;
745	buf += chunk;	650	buf += chunk;
746	todo -= chunk;	651	count -= chunk;
747	cond_resched();	652	cond_resched();
748	}	653	}
749	return count;	654	return written ? written : -EFAULT;
750	}	655	}
751		656
752	static int mmap_zero(struct file * file, struct vm_area_struct * vma)	657	static int mmap_zero(struct file * file, struct vm_area_struct * vma)
753	{	658	{
		659	#ifndef CONFIG_MMU
754	return -ENOSYS;	660	return -ENOSYS;
		661	#endif
		662	if (vma->vm_flags & VM_SHARED)
		663	return shmem_zero_setup(vma);
		664	return 0;
755	}	665	}
756	#endif /* CONFIG_MMU */
757		666
758	static ssize_t write_full(struct file * file, const char __user * buf,	667	static ssize_t write_full(struct file * file, const char __user * buf,
759	size_t count, loff_t *ppos)	668	size_t count, loff_t *ppos)


diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index b1013f34085d..f3037c645ca9 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c
@@ -1725,7 +1725,7 @@ static int elf_core_dump(long signr, struct pt_regs regs, struct file file)
1725	&page, &vma) <= 0) {	1725	&page, &vma) <= 0) {
1726	DUMP_SEEK(PAGE_SIZE);	1726	DUMP_SEEK(PAGE_SIZE);
1727	} else {	1727	} else {
1728	if (page == ZERO_PAGE(addr)) {	1728	if (page == ZERO_PAGE(0)) {
1729	if (!dump_seek(file, PAGE_SIZE)) {	1729	if (!dump_seek(file, PAGE_SIZE)) {
1730	page_cache_release(page);	1730	page_cache_release(page);
1731	goto end_coredump;	1731	goto end_coredump;


diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index 2f5d8dbe676d..c5ca2f0aca7f 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c
@@ -1488,7 +1488,7 @@ static int elf_fdpic_dump_segments(struct file file, size_t size,
1488	&page, &vma) <= 0) {	1488	&page, &vma) <= 0) {
1489	DUMP_SEEK(file->f_pos + PAGE_SIZE);	1489	DUMP_SEEK(file->f_pos + PAGE_SIZE);
1490	}	1490	}
1491	else if (page == ZERO_PAGE(addr)) {	1491	else if (page == ZERO_PAGE(0)) {
1492	page_cache_release(page);	1492	page_cache_release(page);
1493	DUMP_SEEK(file->f_pos + PAGE_SIZE);	1493	DUMP_SEEK(file->f_pos + PAGE_SIZE);
1494	}	1494	}


diff --git a/fs/direct-io.c b/fs/direct-io.c index b5928a7b6a5a..acf0da1bd257 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c
@@ -163,7 +163,7 @@ static int dio_refill_pages(struct dio *dio)
163	up_read(&current->mm->mmap_sem);	163	up_read(&current->mm->mmap_sem);
164		164
165	if (ret < 0 && dio->blocks_available && (dio->rw & WRITE)) {	165	if (ret < 0 && dio->blocks_available && (dio->rw & WRITE)) {
166	struct page *page = ZERO_PAGE(dio->curr_user_address);	166	struct page *page = ZERO_PAGE(0);
167	/*	167	/*
168	* A memory fault, but the filesystem has some outstanding	168	* A memory fault, but the filesystem has some outstanding
169	* mapped blocks. We need to use those blocks up to avoid	169	* mapped blocks. We need to use those blocks up to avoid
@@ -763,7 +763,7 @@ static void dio_zero_block(struct dio *dio, int end)
763		763
764	this_chunk_bytes = this_chunk_blocks << dio->blkbits;	764	this_chunk_bytes = this_chunk_blocks << dio->blkbits;
765		765
766	page = ZERO_PAGE(dio->curr_user_address);	766	page = ZERO_PAGE(0);
767	if (submit_page_section(dio, page, 0, this_chunk_bytes,	767	if (submit_page_section(dio, page, 0, this_chunk_bytes,
768	dio->next_block_for_io))	768	dio->next_block_for_io))
769	return;	769	return;


diff --git a/include/linux/mm.h b/include/linux/mm.h index 291c4cc06ea7..fbbc29a29dff 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h
@@ -779,8 +779,6 @@ void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct start_vma,
779	unsigned long floor, unsigned long ceiling);	779	unsigned long floor, unsigned long ceiling);
780	int copy_page_range(struct mm_struct dst, struct mm_struct src,	780	int copy_page_range(struct mm_struct dst, struct mm_struct src,
781	struct vm_area_struct *vma);	781	struct vm_area_struct *vma);
782	int zeromap_page_range(struct vm_area_struct *vma, unsigned long from,
783	unsigned long size, pgprot_t prot);
784	void unmap_mapping_range(struct address_space *mapping,	782	void unmap_mapping_range(struct address_space *mapping,
785	loff_t const holebegin, loff_t const holelen, int even_cows);	783	loff_t const holebegin, loff_t const holelen, int even_cows);
786		784


diff --git a/mm/memory.c b/mm/memory.c index f82b359b2745..2a8430844b6d 100644 --- a/mm/memory.c +++ b/mm/memory.c
@@ -966,7 +966,7 @@ no_page_table:
966	* has touched so far, we don't want to allocate page tables.	966	* has touched so far, we don't want to allocate page tables.
967	*/	967	*/
968	if (flags & FOLL_ANON) {	968	if (flags & FOLL_ANON) {
969	page = ZERO_PAGE(address);	969	page = ZERO_PAGE(0);
970	if (flags & FOLL_GET)	970	if (flags & FOLL_GET)
971	get_page(page);	971	get_page(page);
972	BUG_ON(flags & FOLL_WRITE);	972	BUG_ON(flags & FOLL_WRITE);
@@ -1111,95 +1111,6 @@ int get_user_pages(struct task_struct tsk, struct mm_struct mm,
1111	}	1111	}
1112	EXPORT_SYMBOL(get_user_pages);	1112	EXPORT_SYMBOL(get_user_pages);
1113		1113
1114	static int zeromap_pte_range(struct mm_struct mm, pmd_t pmd,
1115	unsigned long addr, unsigned long end, pgprot_t prot)
1116	{
1117	pte_t *pte;
1118	spinlock_t *ptl;
1119	int err = 0;
1120
1121	pte = pte_alloc_map_lock(mm, pmd, addr, &ptl);
1122	if (!pte)
1123	return -EAGAIN;
1124	arch_enter_lazy_mmu_mode();
1125	do {
1126	struct page *page = ZERO_PAGE(addr);
1127	pte_t zero_pte = pte_wrprotect(mk_pte(page, prot));
1128
1129	if (unlikely(!pte_none(*pte))) {
1130	err = -EEXIST;
1131	pte++;
1132	break;
1133	}
1134	page_cache_get(page);
1135	page_add_file_rmap(page);
1136	inc_mm_counter(mm, file_rss);
1137	set_pte_at(mm, addr, pte, zero_pte);
1138	} while (pte++, addr += PAGE_SIZE, addr != end);
1139	arch_leave_lazy_mmu_mode();
1140	pte_unmap_unlock(pte - 1, ptl);
1141	return err;
1142	}
1143
1144	static inline int zeromap_pmd_range(struct mm_struct mm, pud_t pud,
1145	unsigned long addr, unsigned long end, pgprot_t prot)
1146	{
1147	pmd_t *pmd;
1148	unsigned long next;
1149	int err;
1150
1151	pmd = pmd_alloc(mm, pud, addr);
1152	if (!pmd)
1153	return -EAGAIN;
1154	do {
1155	next = pmd_addr_end(addr, end);
1156	err = zeromap_pte_range(mm, pmd, addr, next, prot);
1157	if (err)
1158	break;
1159	} while (pmd++, addr = next, addr != end);
1160	return err;
1161	}
1162
1163	static inline int zeromap_pud_range(struct mm_struct mm, pgd_t pgd,
1164	unsigned long addr, unsigned long end, pgprot_t prot)
1165	{
1166	pud_t *pud;
1167	unsigned long next;
1168	int err;
1169
1170	pud = pud_alloc(mm, pgd, addr);
1171	if (!pud)
1172	return -EAGAIN;
1173	do {
1174	next = pud_addr_end(addr, end);
1175	err = zeromap_pmd_range(mm, pud, addr, next, prot);
1176	if (err)
1177	break;
1178	} while (pud++, addr = next, addr != end);
1179	return err;
1180	}
1181
1182	int zeromap_page_range(struct vm_area_struct *vma,
1183	unsigned long addr, unsigned long size, pgprot_t prot)
1184	{
1185	pgd_t *pgd;
1186	unsigned long next;
1187	unsigned long end = addr + size;
1188	struct mm_struct *mm = vma->vm_mm;
1189	int err;
1190
1191	BUG_ON(addr >= end);
1192	pgd = pgd_offset(mm, addr);
1193	flush_cache_range(vma, addr, end);
1194	do {
1195	next = pgd_addr_end(addr, end);
1196	err = zeromap_pud_range(mm, pgd, addr, next, prot);
1197	if (err)
1198	break;
1199	} while (pgd++, addr = next, addr != end);
1200	return err;
1201	}
1202
1203	pte_t * fastcall get_locked_pte(struct mm_struct mm, unsigned long addr, spinlock_t *ptl)	1114	pte_t * fastcall get_locked_pte(struct mm_struct mm, unsigned long addr, spinlock_t *ptl)
1204	{	1115	{
1205	pgd_t * pgd = pgd_offset(mm, addr);	1116	pgd_t * pgd = pgd_offset(mm, addr);
@@ -1717,16 +1628,11 @@ gotten:
1717		1628
1718	if (unlikely(anon_vma_prepare(vma)))	1629	if (unlikely(anon_vma_prepare(vma)))
1719	goto oom;	1630	goto oom;
1720	if (old_page == ZERO_PAGE(address)) {	1631	VM_BUG_ON(old_page == ZERO_PAGE(0));
1721	new_page = alloc_zeroed_user_highpage_movable(vma, address);	1632	new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address);
1722	if (!new_page)	1633	if (!new_page)
1723	goto oom;	1634	goto oom;
1724	} else {	1635	cow_user_page(new_page, old_page, address, vma);
1725	new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address);
1726	if (!new_page)
1727	goto oom;
1728	cow_user_page(new_page, old_page, address, vma);
1729	}
1730		1636
1731	/*	1637	/*
1732	* Re-check the pte - we dropped the lock	1638	* Re-check the pte - we dropped the lock
@@ -2252,39 +2158,24 @@ static int do_anonymous_page(struct mm_struct mm, struct vm_area_struct vma,
2252	spinlock_t *ptl;	2158	spinlock_t *ptl;
2253	pte_t entry;	2159	pte_t entry;
2254		2160
2255	if (write_access) {	2161	/* Allocate our own private page. */
2256	/* Allocate our own private page. */	2162	pte_unmap(page_table);
2257	pte_unmap(page_table);
2258
2259	if (unlikely(anon_vma_prepare(vma)))
2260	goto oom;
2261	page = alloc_zeroed_user_highpage_movable(vma, address);
2262	if (!page)
2263	goto oom;
2264
2265	entry = mk_pte(page, vma->vm_page_prot);
2266	entry = maybe_mkwrite(pte_mkdirty(entry), vma);
2267		2163
2268	page_table = pte_offset_map_lock(mm, pmd, address, &ptl);	2164	if (unlikely(anon_vma_prepare(vma)))
2269	if (!pte_none(*page_table))	2165	goto oom;
2270	goto release;	2166	page = alloc_zeroed_user_highpage_movable(vma, address);
2271	inc_mm_counter(mm, anon_rss);	2167	if (!page)
2272	lru_cache_add_active(page);	2168	goto oom;
2273	page_add_new_anon_rmap(page, vma, address);
2274	} else {
2275	/* Map the ZERO_PAGE - vm_page_prot is readonly */
2276	page = ZERO_PAGE(address);
2277	page_cache_get(page);
2278	entry = mk_pte(page, vma->vm_page_prot);
2279		2169
2280	ptl = pte_lockptr(mm, pmd);	2170	entry = mk_pte(page, vma->vm_page_prot);
2281	spin_lock(ptl);	2171	entry = maybe_mkwrite(pte_mkdirty(entry), vma);
2282	if (!pte_none(*page_table))
2283	goto release;
2284	inc_mm_counter(mm, file_rss);
2285	page_add_file_rmap(page);
2286	}
2287		2172
		2173	page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
		2174	if (!pte_none(*page_table))
		2175	goto release;
		2176	inc_mm_counter(mm, anon_rss);
		2177	lru_cache_add_active(page);
		2178	page_add_new_anon_rmap(page, vma, address);
2288	set_pte_at(mm, address, page_table, entry);	2179	set_pte_at(mm, address, page_table, entry);
2289		2180
2290	/* No need to invalidate - it was non-present before */	2181	/* No need to invalidate - it was non-present before */