aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/ppc64/kernel/vdso.c12
-rw-r--r--arch/sparc/mm/generic.c3
-rw-r--r--arch/sparc64/mm/generic.c3
-rw-r--r--drivers/scsi/sg.c12
-rw-r--r--drivers/scsi/st.c10
-rw-r--r--fs/direct-io.c4
-rw-r--r--include/linux/mm.h5
-rw-r--r--kernel/power/swsusp.c25
-rw-r--r--mm/bootmem.c1
-rw-r--r--mm/filemap_xip.c11
-rw-r--r--mm/fremap.c23
-rw-r--r--mm/madvise.c2
-rw-r--r--mm/memory.c131
-rw-r--r--mm/mempolicy.c29
-rw-r--r--mm/mmap.c11
-rw-r--r--mm/mprotect.c8
-rw-r--r--mm/msync.c17
-rw-r--r--mm/page_alloc.c14
-rw-r--r--mm/rmap.c14
-rw-r--r--mm/shmem.c4
-rw-r--r--mm/swap.c4
-rw-r--r--sound/core/pcm_native.c9
22 files changed, 218 insertions, 134 deletions
diff --git a/arch/ppc64/kernel/vdso.c b/arch/ppc64/kernel/vdso.c
index efa985f05aca..4aacf521e3e4 100644
--- a/arch/ppc64/kernel/vdso.c
+++ b/arch/ppc64/kernel/vdso.c
@@ -176,13 +176,13 @@ static struct page * vdso_vma_nopage(struct vm_area_struct * vma,
176 return NOPAGE_SIGBUS; 176 return NOPAGE_SIGBUS;
177 177
178 /* 178 /*
179 * Last page is systemcfg, special handling here, no get_page() a 179 * Last page is systemcfg.
180 * this is a reserved page
181 */ 180 */
182 if ((vma->vm_end - address) <= PAGE_SIZE) 181 if ((vma->vm_end - address) <= PAGE_SIZE)
183 return virt_to_page(systemcfg); 182 pg = virt_to_page(systemcfg);
183 else
184 pg = virt_to_page(vbase + offset);
184 185
185 pg = virt_to_page(vbase + offset);
186 get_page(pg); 186 get_page(pg);
187 DBG(" ->page count: %d\n", page_count(pg)); 187 DBG(" ->page count: %d\n", page_count(pg));
188 188
@@ -259,7 +259,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int executable_stack)
259 * gettimeofday will be totally dead. It's fine to use that for setting 259 * gettimeofday will be totally dead. It's fine to use that for setting
260 * breakpoints in the vDSO code pages though 260 * breakpoints in the vDSO code pages though
261 */ 261 */
262 vma->vm_flags = VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC; 262 vma->vm_flags = VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC | VM_RESERVED;
263 vma->vm_flags |= mm->def_flags; 263 vma->vm_flags |= mm->def_flags;
264 vma->vm_page_prot = protection_map[vma->vm_flags & 0x7]; 264 vma->vm_page_prot = protection_map[vma->vm_flags & 0x7];
265 vma->vm_ops = &vdso_vmops; 265 vma->vm_ops = &vdso_vmops;
@@ -603,6 +603,8 @@ void __init vdso_init(void)
603 ClearPageReserved(pg); 603 ClearPageReserved(pg);
604 get_page(pg); 604 get_page(pg);
605 } 605 }
606
607 get_page(virt_to_page(systemcfg));
606} 608}
607 609
608int in_gate_area_no_task(unsigned long addr) 610int in_gate_area_no_task(unsigned long addr)
diff --git a/arch/sparc/mm/generic.c b/arch/sparc/mm/generic.c
index 20ccb957fb77..659c9a71f867 100644
--- a/arch/sparc/mm/generic.c
+++ b/arch/sparc/mm/generic.c
@@ -73,6 +73,9 @@ int io_remap_pfn_range(struct vm_area_struct *vma, unsigned long from,
73 int space = GET_IOSPACE(pfn); 73 int space = GET_IOSPACE(pfn);
74 unsigned long offset = GET_PFN(pfn) << PAGE_SHIFT; 74 unsigned long offset = GET_PFN(pfn) << PAGE_SHIFT;
75 75
76 /* See comment in mm/memory.c remap_pfn_range */
77 vma->vm_flags |= VM_IO | VM_RESERVED;
78
76 prot = __pgprot(pg_iobits); 79 prot = __pgprot(pg_iobits);
77 offset -= from; 80 offset -= from;
78 dir = pgd_offset(mm, from); 81 dir = pgd_offset(mm, from);
diff --git a/arch/sparc64/mm/generic.c b/arch/sparc64/mm/generic.c
index c954d91f01d0..afc01cec701f 100644
--- a/arch/sparc64/mm/generic.c
+++ b/arch/sparc64/mm/generic.c
@@ -127,6 +127,9 @@ int io_remap_pfn_range(struct vm_area_struct *vma, unsigned long from,
127 int space = GET_IOSPACE(pfn); 127 int space = GET_IOSPACE(pfn);
128 unsigned long offset = GET_PFN(pfn) << PAGE_SHIFT; 128 unsigned long offset = GET_PFN(pfn) << PAGE_SHIFT;
129 129
130 /* See comment in mm/memory.c remap_pfn_range */
131 vma->vm_flags |= VM_IO | VM_RESERVED;
132
130 prot = __pgprot(pg_iobits); 133 prot = __pgprot(pg_iobits);
131 offset -= from; 134 offset -= from;
132 dir = pgd_offset(mm, from); 135 dir = pgd_offset(mm, from);
diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
index 861e51375d70..2d30b46806bf 100644
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -1886,13 +1886,17 @@ st_unmap_user_pages(struct scatterlist *sgl, const unsigned int nr_pages,
1886 int i; 1886 int i;
1887 1887
1888 for (i=0; i < nr_pages; i++) { 1888 for (i=0; i < nr_pages; i++) {
1889 if (dirtied && !PageReserved(sgl[i].page)) 1889 struct page *page = sgl[i].page;
1890 SetPageDirty(sgl[i].page); 1890
1891 /* unlock_page(sgl[i].page); */ 1891 /* XXX: just for debug. Remove when PageReserved is removed */
1892 BUG_ON(PageReserved(page));
1893 if (dirtied)
1894 SetPageDirty(page);
1895 /* unlock_page(page); */
1892 /* FIXME: cache flush missing for rw==READ 1896 /* FIXME: cache flush missing for rw==READ
1893 * FIXME: call the correct reference counting function 1897 * FIXME: call the correct reference counting function
1894 */ 1898 */
1895 page_cache_release(sgl[i].page); 1899 page_cache_release(page);
1896 } 1900 }
1897 1901
1898 return 0; 1902 return 0;
diff --git a/drivers/scsi/st.c b/drivers/scsi/st.c
index 5eb54d8019b4..da9766283bd7 100644
--- a/drivers/scsi/st.c
+++ b/drivers/scsi/st.c
@@ -4526,12 +4526,16 @@ static int sgl_unmap_user_pages(struct scatterlist *sgl, const unsigned int nr_p
4526 int i; 4526 int i;
4527 4527
4528 for (i=0; i < nr_pages; i++) { 4528 for (i=0; i < nr_pages; i++) {
4529 if (dirtied && !PageReserved(sgl[i].page)) 4529 struct page *page = sgl[i].page;
4530 SetPageDirty(sgl[i].page); 4530
4531 /* XXX: just for debug. Remove when PageReserved is removed */
4532 BUG_ON(PageReserved(page));
4533 if (dirtied)
4534 SetPageDirty(page);
4531 /* FIXME: cache flush missing for rw==READ 4535 /* FIXME: cache flush missing for rw==READ
4532 * FIXME: call the correct reference counting function 4536 * FIXME: call the correct reference counting function
4533 */ 4537 */
4534 page_cache_release(sgl[i].page); 4538 page_cache_release(page);
4535 } 4539 }
4536 4540
4537 return 0; 4541 return 0;
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 0d06097bc995..3931e7f1e6bf 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -162,6 +162,7 @@ static int dio_refill_pages(struct dio *dio)
162 up_read(&current->mm->mmap_sem); 162 up_read(&current->mm->mmap_sem);
163 163
164 if (ret < 0 && dio->blocks_available && (dio->rw == WRITE)) { 164 if (ret < 0 && dio->blocks_available && (dio->rw == WRITE)) {
165 struct page *page = ZERO_PAGE(dio->curr_user_address);
165 /* 166 /*
166 * A memory fault, but the filesystem has some outstanding 167 * A memory fault, but the filesystem has some outstanding
167 * mapped blocks. We need to use those blocks up to avoid 168 * mapped blocks. We need to use those blocks up to avoid
@@ -169,7 +170,8 @@ static int dio_refill_pages(struct dio *dio)
169 */ 170 */
170 if (dio->page_errors == 0) 171 if (dio->page_errors == 0)
171 dio->page_errors = ret; 172 dio->page_errors = ret;
172 dio->pages[0] = ZERO_PAGE(dio->curr_user_address); 173 page_cache_get(page);
174 dio->pages[0] = page;
173 dio->head = 0; 175 dio->head = 0;
174 dio->tail = 1; 176 dio->tail = 1;
175 ret = 0; 177 ret = 0;
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 0c64484d8ae0..da42093250c3 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -157,7 +157,7 @@ extern unsigned int kobjsize(const void *objp);
157 157
158#define VM_DONTCOPY 0x00020000 /* Do not copy this vma on fork */ 158#define VM_DONTCOPY 0x00020000 /* Do not copy this vma on fork */
159#define VM_DONTEXPAND 0x00040000 /* Cannot expand with mremap() */ 159#define VM_DONTEXPAND 0x00040000 /* Cannot expand with mremap() */
160#define VM_RESERVED 0x00080000 /* Don't unmap it from swap_out */ 160#define VM_RESERVED 0x00080000 /* Pages managed in a special way */
161#define VM_ACCOUNT 0x00100000 /* Is a VM accounted object */ 161#define VM_ACCOUNT 0x00100000 /* Is a VM accounted object */
162#define VM_HUGETLB 0x00400000 /* Huge TLB Page VM */ 162#define VM_HUGETLB 0x00400000 /* Huge TLB Page VM */
163#define VM_NONLINEAR 0x00800000 /* Is non-linear (remap_file_pages) */ 163#define VM_NONLINEAR 0x00800000 /* Is non-linear (remap_file_pages) */
@@ -338,7 +338,7 @@ static inline void get_page(struct page *page)
338 338
339static inline void put_page(struct page *page) 339static inline void put_page(struct page *page)
340{ 340{
341 if (!PageReserved(page) && put_page_testzero(page)) 341 if (put_page_testzero(page))
342 __page_cache_release(page); 342 __page_cache_release(page);
343} 343}
344 344
@@ -723,6 +723,7 @@ void install_arg_page(struct vm_area_struct *, struct page *, unsigned long);
723 723
724int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, 724int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, unsigned long start,
725 int len, int write, int force, struct page **pages, struct vm_area_struct **vmas); 725 int len, int write, int force, struct page **pages, struct vm_area_struct **vmas);
726void print_bad_pte(struct vm_area_struct *, pte_t, unsigned long);
726 727
727int __set_page_dirty_buffers(struct page *page); 728int __set_page_dirty_buffers(struct page *page);
728int __set_page_dirty_nobuffers(struct page *page); 729int __set_page_dirty_nobuffers(struct page *page);
diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c
index 10bc5ec496d7..016504ccfccf 100644
--- a/kernel/power/swsusp.c
+++ b/kernel/power/swsusp.c
@@ -578,15 +578,23 @@ static int save_highmem_zone(struct zone *zone)
578 continue; 578 continue;
579 page = pfn_to_page(pfn); 579 page = pfn_to_page(pfn);
580 /* 580 /*
581 * This condition results from rvmalloc() sans vmalloc_32() 581 * PageReserved results from rvmalloc() sans vmalloc_32()
582 * and architectural memory reservations. This should be 582 * and architectural memory reservations.
583 * corrected eventually when the cases giving rise to this 583 *
584 * are better understood. 584 * rvmalloc should not cause this, because all implementations
585 * appear to always be using vmalloc_32 on architectures with
586 * highmem. This is a good thing, because we would like to save
587 * rvmalloc pages.
588 *
589 * It appears to be triggered by pages which do not point to
590 * valid memory (see arch/i386/mm/init.c:one_highpage_init(),
591 * which sets PageReserved if the page does not point to valid
592 * RAM.
593 *
594 * XXX: must remove usage of PageReserved!
585 */ 595 */
586 if (PageReserved(page)) { 596 if (PageReserved(page))
587 printk("highmem reserved page?!\n");
588 continue; 597 continue;
589 }
590 BUG_ON(PageNosave(page)); 598 BUG_ON(PageNosave(page));
591 if (PageNosaveFree(page)) 599 if (PageNosaveFree(page))
592 continue; 600 continue;
@@ -672,10 +680,9 @@ static int saveable(struct zone * zone, unsigned long * zone_pfn)
672 return 0; 680 return 0;
673 681
674 page = pfn_to_page(pfn); 682 page = pfn_to_page(pfn);
675 BUG_ON(PageReserved(page) && PageNosave(page));
676 if (PageNosave(page)) 683 if (PageNosave(page))
677 return 0; 684 return 0;
678 if (PageReserved(page) && pfn_is_nosave(pfn)) { 685 if (pfn_is_nosave(pfn)) {
679 pr_debug("[nosave pfn 0x%lx]", pfn); 686 pr_debug("[nosave pfn 0x%lx]", pfn);
680 return 0; 687 return 0;
681 } 688 }
diff --git a/mm/bootmem.c b/mm/bootmem.c
index a58699b6579e..e8c567177dcf 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -305,6 +305,7 @@ static unsigned long __init free_all_bootmem_core(pg_data_t *pgdat)
305 if (j + 16 < BITS_PER_LONG) 305 if (j + 16 < BITS_PER_LONG)
306 prefetchw(page + j + 16); 306 prefetchw(page + j + 16);
307 __ClearPageReserved(page + j); 307 __ClearPageReserved(page + j);
308 set_page_count(page + j, 0);
308 } 309 }
309 __free_pages(page, order); 310 __free_pages(page, order);
310 i += BITS_PER_LONG; 311 i += BITS_PER_LONG;
diff --git a/mm/filemap_xip.c b/mm/filemap_xip.c
index 8c199f537732..9354ee279b13 100644
--- a/mm/filemap_xip.c
+++ b/mm/filemap_xip.c
@@ -174,6 +174,7 @@ __xip_unmap (struct address_space * mapping,
174 unsigned long address; 174 unsigned long address;
175 pte_t *pte; 175 pte_t *pte;
176 pte_t pteval; 176 pte_t pteval;
177 struct page *page = ZERO_PAGE(address);
177 178
178 spin_lock(&mapping->i_mmap_lock); 179 spin_lock(&mapping->i_mmap_lock);
179 vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) { 180 vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
@@ -185,15 +186,17 @@ __xip_unmap (struct address_space * mapping,
185 * We need the page_table_lock to protect us from page faults, 186 * We need the page_table_lock to protect us from page faults,
186 * munmap, fork, etc... 187 * munmap, fork, etc...
187 */ 188 */
188 pte = page_check_address(ZERO_PAGE(address), mm, 189 pte = page_check_address(page, mm, address);
189 address);
190 if (!IS_ERR(pte)) { 190 if (!IS_ERR(pte)) {
191 /* Nuke the page table entry. */ 191 /* Nuke the page table entry. */
192 flush_cache_page(vma, address, pte_pfn(*pte)); 192 flush_cache_page(vma, address, pte_pfn(*pte));
193 pteval = ptep_clear_flush(vma, address, pte); 193 pteval = ptep_clear_flush(vma, address, pte);
194 page_remove_rmap(page);
195 dec_mm_counter(mm, file_rss);
194 BUG_ON(pte_dirty(pteval)); 196 BUG_ON(pte_dirty(pteval));
195 pte_unmap(pte); 197 pte_unmap(pte);
196 spin_unlock(&mm->page_table_lock); 198 spin_unlock(&mm->page_table_lock);
199 page_cache_release(page);
197 } 200 }
198 } 201 }
199 spin_unlock(&mapping->i_mmap_lock); 202 spin_unlock(&mapping->i_mmap_lock);
@@ -228,7 +231,7 @@ xip_file_nopage(struct vm_area_struct * area,
228 231
229 page = mapping->a_ops->get_xip_page(mapping, pgoff*(PAGE_SIZE/512), 0); 232 page = mapping->a_ops->get_xip_page(mapping, pgoff*(PAGE_SIZE/512), 0);
230 if (!IS_ERR(page)) { 233 if (!IS_ERR(page)) {
231 return page; 234 goto out;
232 } 235 }
233 if (PTR_ERR(page) != -ENODATA) 236 if (PTR_ERR(page) != -ENODATA)
234 return NULL; 237 return NULL;
@@ -249,6 +252,8 @@ xip_file_nopage(struct vm_area_struct * area,
249 page = ZERO_PAGE(address); 252 page = ZERO_PAGE(address);
250 } 253 }
251 254
255out:
256 page_cache_get(page);
252 return page; 257 return page;
253} 258}
254 259
diff --git a/mm/fremap.c b/mm/fremap.c
index fd7f2a17ff3e..224cc1598b35 100644
--- a/mm/fremap.c
+++ b/mm/fremap.c
@@ -29,19 +29,20 @@ static inline void zap_pte(struct mm_struct *mm, struct vm_area_struct *vma,
29 return; 29 return;
30 if (pte_present(pte)) { 30 if (pte_present(pte)) {
31 unsigned long pfn = pte_pfn(pte); 31 unsigned long pfn = pte_pfn(pte);
32 struct page *page;
32 33
33 flush_cache_page(vma, addr, pfn); 34 flush_cache_page(vma, addr, pfn);
34 pte = ptep_clear_flush(vma, addr, ptep); 35 pte = ptep_clear_flush(vma, addr, ptep);
35 if (pfn_valid(pfn)) { 36 if (unlikely(!pfn_valid(pfn))) {
36 struct page *page = pfn_to_page(pfn); 37 print_bad_pte(vma, pte, addr);
37 if (!PageReserved(page)) { 38 return;
38 if (pte_dirty(pte))
39 set_page_dirty(page);
40 page_remove_rmap(page);
41 page_cache_release(page);
42 dec_mm_counter(mm, file_rss);
43 }
44 } 39 }
40 page = pfn_to_page(pfn);
41 if (pte_dirty(pte))
42 set_page_dirty(page);
43 page_remove_rmap(page);
44 page_cache_release(page);
45 dec_mm_counter(mm, file_rss);
45 } else { 46 } else {
46 if (!pte_file(pte)) 47 if (!pte_file(pte))
47 free_swap_and_cache(pte_to_swp_entry(pte)); 48 free_swap_and_cache(pte_to_swp_entry(pte));
@@ -65,6 +66,8 @@ int install_page(struct mm_struct *mm, struct vm_area_struct *vma,
65 pgd_t *pgd; 66 pgd_t *pgd;
66 pte_t pte_val; 67 pte_t pte_val;
67 68
69 BUG_ON(vma->vm_flags & VM_RESERVED);
70
68 pgd = pgd_offset(mm, addr); 71 pgd = pgd_offset(mm, addr);
69 spin_lock(&mm->page_table_lock); 72 spin_lock(&mm->page_table_lock);
70 73
@@ -125,6 +128,8 @@ int install_file_pte(struct mm_struct *mm, struct vm_area_struct *vma,
125 pgd_t *pgd; 128 pgd_t *pgd;
126 pte_t pte_val; 129 pte_t pte_val;
127 130
131 BUG_ON(vma->vm_flags & VM_RESERVED);
132
128 pgd = pgd_offset(mm, addr); 133 pgd = pgd_offset(mm, addr);
129 spin_lock(&mm->page_table_lock); 134 spin_lock(&mm->page_table_lock);
130 135
diff --git a/mm/madvise.c b/mm/madvise.c
index 20e075d1c64c..17aaf3e16449 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -126,7 +126,7 @@ static long madvise_dontneed(struct vm_area_struct * vma,
126 unsigned long start, unsigned long end) 126 unsigned long start, unsigned long end)
127{ 127{
128 *prev = vma; 128 *prev = vma;
129 if ((vma->vm_flags & VM_LOCKED) || is_vm_hugetlb_page(vma)) 129 if (vma->vm_flags & (VM_LOCKED|VM_HUGETLB|VM_RESERVED))
130 return -EINVAL; 130 return -EINVAL;
131 131
132 if (unlikely(vma->vm_flags & VM_NONLINEAR)) { 132 if (unlikely(vma->vm_flags & VM_NONLINEAR)) {
diff --git a/mm/memory.c b/mm/memory.c
index da642b5528fa..e83f9440bb66 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -343,6 +343,23 @@ static inline void add_mm_rss(struct mm_struct *mm, int file_rss, int anon_rss)
343#define NO_RSS 2 /* Increment neither file_rss nor anon_rss */ 343#define NO_RSS 2 /* Increment neither file_rss nor anon_rss */
344 344
345/* 345/*
346 * This function is called to print an error when a pte in a
347 * !VM_RESERVED region is found pointing to an invalid pfn (which
348 * is an error.
349 *
350 * The calling function must still handle the error.
351 */
352void print_bad_pte(struct vm_area_struct *vma, pte_t pte, unsigned long vaddr)
353{
354 printk(KERN_ERR "Bad pte = %08llx, process = %s, "
355 "vm_flags = %lx, vaddr = %lx\n",
356 (long long)pte_val(pte),
357 (vma->vm_mm == current->mm ? current->comm : "???"),
358 vma->vm_flags, vaddr);
359 dump_stack();
360}
361
362/*
346 * copy one vm_area from one task to the other. Assumes the page tables 363 * copy one vm_area from one task to the other. Assumes the page tables
347 * already present in the new task to be cleared in the whole range 364 * already present in the new task to be cleared in the whole range
348 * covered by this vma. 365 * covered by this vma.
@@ -353,9 +370,10 @@ static inline void add_mm_rss(struct mm_struct *mm, int file_rss, int anon_rss)
353 370
354static inline int 371static inline int
355copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, 372copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
356 pte_t *dst_pte, pte_t *src_pte, unsigned long vm_flags, 373 pte_t *dst_pte, pte_t *src_pte, struct vm_area_struct *vma,
357 unsigned long addr) 374 unsigned long addr)
358{ 375{
376 unsigned long vm_flags = vma->vm_flags;
359 pte_t pte = *src_pte; 377 pte_t pte = *src_pte;
360 struct page *page; 378 struct page *page;
361 unsigned long pfn; 379 unsigned long pfn;
@@ -375,18 +393,22 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
375 goto out_set_pte; 393 goto out_set_pte;
376 } 394 }
377 395
396 /* If the region is VM_RESERVED, the mapping is not
397 * mapped via rmap - duplicate the pte as is.
398 */
399 if (vm_flags & VM_RESERVED)
400 goto out_set_pte;
401
378 pfn = pte_pfn(pte); 402 pfn = pte_pfn(pte);
379 /* the pte points outside of valid memory, the 403 /* If the pte points outside of valid memory but
380 * mapping is assumed to be good, meaningful 404 * the region is not VM_RESERVED, we have a problem.
381 * and not mapped via rmap - duplicate the
382 * mapping as is.
383 */ 405 */
384 page = NULL; 406 if (unlikely(!pfn_valid(pfn))) {
385 if (pfn_valid(pfn)) 407 print_bad_pte(vma, pte, addr);
386 page = pfn_to_page(pfn); 408 goto out_set_pte; /* try to do something sane */
409 }
387 410
388 if (!page || PageReserved(page)) 411 page = pfn_to_page(pfn);
389 goto out_set_pte;
390 412
391 /* 413 /*
392 * If it's a COW mapping, write protect it both 414 * If it's a COW mapping, write protect it both
@@ -418,7 +440,6 @@ static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
418 unsigned long addr, unsigned long end) 440 unsigned long addr, unsigned long end)
419{ 441{
420 pte_t *src_pte, *dst_pte; 442 pte_t *src_pte, *dst_pte;
421 unsigned long vm_flags = vma->vm_flags;
422 int progress = 0; 443 int progress = 0;
423 int rss[NO_RSS+1], anon; 444 int rss[NO_RSS+1], anon;
424 445
@@ -446,8 +467,7 @@ again:
446 progress++; 467 progress++;
447 continue; 468 continue;
448 } 469 }
449 anon = copy_one_pte(dst_mm, src_mm, dst_pte, src_pte, 470 anon = copy_one_pte(dst_mm, src_mm, dst_pte, src_pte, vma,addr);
450 vm_flags, addr);
451 rss[anon]++; 471 rss[anon]++;
452 progress += 8; 472 progress += 8;
453 } while (dst_pte++, src_pte++, addr += PAGE_SIZE, addr != end); 473 } while (dst_pte++, src_pte++, addr += PAGE_SIZE, addr != end);
@@ -541,10 +561,12 @@ int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
541 return 0; 561 return 0;
542} 562}
543 563
544static void zap_pte_range(struct mmu_gather *tlb, pmd_t *pmd, 564static void zap_pte_range(struct mmu_gather *tlb,
565 struct vm_area_struct *vma, pmd_t *pmd,
545 unsigned long addr, unsigned long end, 566 unsigned long addr, unsigned long end,
546 struct zap_details *details) 567 struct zap_details *details)
547{ 568{
569 struct mm_struct *mm = tlb->mm;
548 pte_t *pte; 570 pte_t *pte;
549 int file_rss = 0; 571 int file_rss = 0;
550 int anon_rss = 0; 572 int anon_rss = 0;
@@ -556,11 +578,12 @@ static void zap_pte_range(struct mmu_gather *tlb, pmd_t *pmd,
556 continue; 578 continue;
557 if (pte_present(ptent)) { 579 if (pte_present(ptent)) {
558 struct page *page = NULL; 580 struct page *page = NULL;
559 unsigned long pfn = pte_pfn(ptent); 581 if (!(vma->vm_flags & VM_RESERVED)) {
560 if (pfn_valid(pfn)) { 582 unsigned long pfn = pte_pfn(ptent);
561 page = pfn_to_page(pfn); 583 if (unlikely(!pfn_valid(pfn)))
562 if (PageReserved(page)) 584 print_bad_pte(vma, ptent, addr);
563 page = NULL; 585 else
586 page = pfn_to_page(pfn);
564 } 587 }
565 if (unlikely(details) && page) { 588 if (unlikely(details) && page) {
566 /* 589 /*
@@ -580,7 +603,7 @@ static void zap_pte_range(struct mmu_gather *tlb, pmd_t *pmd,
580 page->index > details->last_index)) 603 page->index > details->last_index))
581 continue; 604 continue;
582 } 605 }
583 ptent = ptep_get_and_clear_full(tlb->mm, addr, pte, 606 ptent = ptep_get_and_clear_full(mm, addr, pte,
584 tlb->fullmm); 607 tlb->fullmm);
585 tlb_remove_tlb_entry(tlb, pte, addr); 608 tlb_remove_tlb_entry(tlb, pte, addr);
586 if (unlikely(!page)) 609 if (unlikely(!page))
@@ -588,7 +611,7 @@ static void zap_pte_range(struct mmu_gather *tlb, pmd_t *pmd,
588 if (unlikely(details) && details->nonlinear_vma 611 if (unlikely(details) && details->nonlinear_vma
589 && linear_page_index(details->nonlinear_vma, 612 && linear_page_index(details->nonlinear_vma,
590 addr) != page->index) 613 addr) != page->index)
591 set_pte_at(tlb->mm, addr, pte, 614 set_pte_at(mm, addr, pte,
592 pgoff_to_pte(page->index)); 615 pgoff_to_pte(page->index));
593 if (PageAnon(page)) 616 if (PageAnon(page))
594 anon_rss++; 617 anon_rss++;
@@ -611,14 +634,15 @@ static void zap_pte_range(struct mmu_gather *tlb, pmd_t *pmd,
611 continue; 634 continue;
612 if (!pte_file(ptent)) 635 if (!pte_file(ptent))
613 free_swap_and_cache(pte_to_swp_entry(ptent)); 636 free_swap_and_cache(pte_to_swp_entry(ptent));
614 pte_clear_full(tlb->mm, addr, pte, tlb->fullmm); 637 pte_clear_full(mm, addr, pte, tlb->fullmm);
615 } while (pte++, addr += PAGE_SIZE, addr != end); 638 } while (pte++, addr += PAGE_SIZE, addr != end);
616 639
617 add_mm_rss(tlb->mm, -file_rss, -anon_rss); 640 add_mm_rss(mm, -file_rss, -anon_rss);
618 pte_unmap(pte - 1); 641 pte_unmap(pte - 1);
619} 642}
620 643
621static inline void zap_pmd_range(struct mmu_gather *tlb, pud_t *pud, 644static inline void zap_pmd_range(struct mmu_gather *tlb,
645 struct vm_area_struct *vma, pud_t *pud,
622 unsigned long addr, unsigned long end, 646 unsigned long addr, unsigned long end,
623 struct zap_details *details) 647 struct zap_details *details)
624{ 648{
@@ -630,11 +654,12 @@ static inline void zap_pmd_range(struct mmu_gather *tlb, pud_t *pud,
630 next = pmd_addr_end(addr, end); 654 next = pmd_addr_end(addr, end);
631 if (pmd_none_or_clear_bad(pmd)) 655 if (pmd_none_or_clear_bad(pmd))
632 continue; 656 continue;
633 zap_pte_range(tlb, pmd, addr, next, details); 657 zap_pte_range(tlb, vma, pmd, addr, next, details);
634 } while (pmd++, addr = next, addr != end); 658 } while (pmd++, addr = next, addr != end);
635} 659}
636 660
637static inline void zap_pud_range(struct mmu_gather *tlb, pgd_t *pgd, 661static inline void zap_pud_range(struct mmu_gather *tlb,
662 struct vm_area_struct *vma, pgd_t *pgd,
638 unsigned long addr, unsigned long end, 663 unsigned long addr, unsigned long end,
639 struct zap_details *details) 664 struct zap_details *details)
640{ 665{
@@ -646,7 +671,7 @@ static inline void zap_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
646 next = pud_addr_end(addr, end); 671 next = pud_addr_end(addr, end);
647 if (pud_none_or_clear_bad(pud)) 672 if (pud_none_or_clear_bad(pud))
648 continue; 673 continue;
649 zap_pmd_range(tlb, pud, addr, next, details); 674 zap_pmd_range(tlb, vma, pud, addr, next, details);
650 } while (pud++, addr = next, addr != end); 675 } while (pud++, addr = next, addr != end);
651} 676}
652 677
@@ -667,7 +692,7 @@ static void unmap_page_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
667 next = pgd_addr_end(addr, end); 692 next = pgd_addr_end(addr, end);
668 if (pgd_none_or_clear_bad(pgd)) 693 if (pgd_none_or_clear_bad(pgd))
669 continue; 694 continue;
670 zap_pud_range(tlb, pgd, addr, next, details); 695 zap_pud_range(tlb, vma, pgd, addr, next, details);
671 } while (pgd++, addr = next, addr != end); 696 } while (pgd++, addr = next, addr != end);
672 tlb_end_vma(tlb, vma); 697 tlb_end_vma(tlb, vma);
673} 698}
@@ -967,7 +992,7 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
967 continue; 992 continue;
968 } 993 }
969 994
970 if (!vma || (vma->vm_flags & VM_IO) 995 if (!vma || (vma->vm_flags & (VM_IO | VM_RESERVED))
971 || !(flags & vma->vm_flags)) 996 || !(flags & vma->vm_flags))
972 return i ? : -EFAULT; 997 return i ? : -EFAULT;
973 998
@@ -1027,8 +1052,7 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
1027 if (pages) { 1052 if (pages) {
1028 pages[i] = page; 1053 pages[i] = page;
1029 flush_dcache_page(page); 1054 flush_dcache_page(page);
1030 if (!PageReserved(page)) 1055 page_cache_get(page);
1031 page_cache_get(page);
1032 } 1056 }
1033 if (vmas) 1057 if (vmas)
1034 vmas[i] = vma; 1058 vmas[i] = vma;
@@ -1051,7 +1075,11 @@ static int zeromap_pte_range(struct mm_struct *mm, pmd_t *pmd,
1051 if (!pte) 1075 if (!pte)
1052 return -ENOMEM; 1076 return -ENOMEM;
1053 do { 1077 do {
1054 pte_t zero_pte = pte_wrprotect(mk_pte(ZERO_PAGE(addr), prot)); 1078 struct page *page = ZERO_PAGE(addr);
1079 pte_t zero_pte = pte_wrprotect(mk_pte(page, prot));
1080 page_cache_get(page);
1081 page_add_file_rmap(page);
1082 inc_mm_counter(mm, file_rss);
1055 BUG_ON(!pte_none(*pte)); 1083 BUG_ON(!pte_none(*pte));
1056 set_pte_at(mm, addr, pte, zero_pte); 1084 set_pte_at(mm, addr, pte, zero_pte);
1057 } while (pte++, addr += PAGE_SIZE, addr != end); 1085 } while (pte++, addr += PAGE_SIZE, addr != end);
@@ -1132,8 +1160,7 @@ static int remap_pte_range(struct mm_struct *mm, pmd_t *pmd,
1132 return -ENOMEM; 1160 return -ENOMEM;
1133 do { 1161 do {
1134 BUG_ON(!pte_none(*pte)); 1162 BUG_ON(!pte_none(*pte));
1135 if (!pfn_valid(pfn) || PageReserved(pfn_to_page(pfn))) 1163 set_pte_at(mm, addr, pte, pfn_pte(pfn, prot));
1136 set_pte_at(mm, addr, pte, pfn_pte(pfn, prot));
1137 pfn++; 1164 pfn++;
1138 } while (pte++, addr += PAGE_SIZE, addr != end); 1165 } while (pte++, addr += PAGE_SIZE, addr != end);
1139 pte_unmap(pte - 1); 1166 pte_unmap(pte - 1);
@@ -1195,8 +1222,8 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr,
1195 * rest of the world about it: 1222 * rest of the world about it:
1196 * VM_IO tells people not to look at these pages 1223 * VM_IO tells people not to look at these pages
1197 * (accesses can have side effects). 1224 * (accesses can have side effects).
1198 * VM_RESERVED tells swapout not to try to touch 1225 * VM_RESERVED tells the core MM not to "manage" these pages
1199 * this region. 1226 * (e.g. refcount, mapcount, try to swap them out).
1200 */ 1227 */
1201 vma->vm_flags |= VM_IO | VM_RESERVED; 1228 vma->vm_flags |= VM_IO | VM_RESERVED;
1202 1229
@@ -1256,11 +1283,13 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
1256 pte_t entry; 1283 pte_t entry;
1257 int ret = VM_FAULT_MINOR; 1284 int ret = VM_FAULT_MINOR;
1258 1285
1286 BUG_ON(vma->vm_flags & VM_RESERVED);
1287
1259 if (unlikely(!pfn_valid(pfn))) { 1288 if (unlikely(!pfn_valid(pfn))) {
1260 /* 1289 /*
1261 * Page table corrupted: show pte and kill process. 1290 * Page table corrupted: show pte and kill process.
1262 */ 1291 */
1263 pte_ERROR(orig_pte); 1292 print_bad_pte(vma, orig_pte, address);
1264 ret = VM_FAULT_OOM; 1293 ret = VM_FAULT_OOM;
1265 goto unlock; 1294 goto unlock;
1266 } 1295 }
@@ -1284,8 +1313,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
1284 /* 1313 /*
1285 * Ok, we need to copy. Oh, well.. 1314 * Ok, we need to copy. Oh, well..
1286 */ 1315 */
1287 if (!PageReserved(old_page)) 1316 page_cache_get(old_page);
1288 page_cache_get(old_page);
1289 pte_unmap(page_table); 1317 pte_unmap(page_table);
1290 spin_unlock(&mm->page_table_lock); 1318 spin_unlock(&mm->page_table_lock);
1291 1319
@@ -1308,14 +1336,10 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
1308 spin_lock(&mm->page_table_lock); 1336 spin_lock(&mm->page_table_lock);
1309 page_table = pte_offset_map(pmd, address); 1337 page_table = pte_offset_map(pmd, address);
1310 if (likely(pte_same(*page_table, orig_pte))) { 1338 if (likely(pte_same(*page_table, orig_pte))) {
1311 if (PageReserved(old_page)) 1339 page_remove_rmap(old_page);
1340 if (!PageAnon(old_page)) {
1312 inc_mm_counter(mm, anon_rss); 1341 inc_mm_counter(mm, anon_rss);
1313 else { 1342 dec_mm_counter(mm, file_rss);
1314 page_remove_rmap(old_page);
1315 if (!PageAnon(old_page)) {
1316 inc_mm_counter(mm, anon_rss);
1317 dec_mm_counter(mm, file_rss);
1318 }
1319 } 1343 }
1320 flush_cache_page(vma, address, pfn); 1344 flush_cache_page(vma, address, pfn);
1321 entry = mk_pte(new_page, vma->vm_page_prot); 1345 entry = mk_pte(new_page, vma->vm_page_prot);
@@ -1769,14 +1793,13 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
1769 unsigned long address, pte_t *page_table, pmd_t *pmd, 1793 unsigned long address, pte_t *page_table, pmd_t *pmd,
1770 int write_access) 1794 int write_access)
1771{ 1795{
1796 struct page *page = ZERO_PAGE(addr);
1772 pte_t entry; 1797 pte_t entry;
1773 1798
1774 /* Mapping of ZERO_PAGE - vm_page_prot is readonly */ 1799 /* Mapping of ZERO_PAGE - vm_page_prot is readonly */
1775 entry = mk_pte(ZERO_PAGE(addr), vma->vm_page_prot); 1800 entry = mk_pte(page, vma->vm_page_prot);
1776 1801
1777 if (write_access) { 1802 if (write_access) {
1778 struct page *page;
1779
1780 /* Allocate our own private page. */ 1803 /* Allocate our own private page. */
1781 pte_unmap(page_table); 1804 pte_unmap(page_table);
1782 spin_unlock(&mm->page_table_lock); 1805 spin_unlock(&mm->page_table_lock);
@@ -1800,6 +1823,10 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
1800 lru_cache_add_active(page); 1823 lru_cache_add_active(page);
1801 SetPageReferenced(page); 1824 SetPageReferenced(page);
1802 page_add_anon_rmap(page, vma, address); 1825 page_add_anon_rmap(page, vma, address);
1826 } else {
1827 inc_mm_counter(mm, file_rss);
1828 page_add_file_rmap(page);
1829 page_cache_get(page);
1803 } 1830 }
1804 1831
1805 set_pte_at(mm, address, page_table, entry); 1832 set_pte_at(mm, address, page_table, entry);
@@ -1916,7 +1943,7 @@ retry:
1916 inc_mm_counter(mm, anon_rss); 1943 inc_mm_counter(mm, anon_rss);
1917 lru_cache_add_active(new_page); 1944 lru_cache_add_active(new_page);
1918 page_add_anon_rmap(new_page, vma, address); 1945 page_add_anon_rmap(new_page, vma, address);
1919 } else if (!PageReserved(new_page)) { 1946 } else if (!(vma->vm_flags & VM_RESERVED)) {
1920 inc_mm_counter(mm, file_rss); 1947 inc_mm_counter(mm, file_rss);
1921 page_add_file_rmap(new_page); 1948 page_add_file_rmap(new_page);
1922 } 1949 }
@@ -1957,7 +1984,7 @@ static int do_file_page(struct mm_struct *mm, struct vm_area_struct *vma,
1957 /* 1984 /*
1958 * Page table corrupted: show pte and kill process. 1985 * Page table corrupted: show pte and kill process.
1959 */ 1986 */
1960 pte_ERROR(orig_pte); 1987 print_bad_pte(vma, orig_pte, address);
1961 return VM_FAULT_OOM; 1988 return VM_FAULT_OOM;
1962 } 1989 }
1963 /* We can then assume vm->vm_ops && vma->vm_ops->populate */ 1990 /* We can then assume vm->vm_ops && vma->vm_ops->populate */
@@ -2232,7 +2259,7 @@ static int __init gate_vma_init(void)
2232 gate_vma.vm_start = FIXADDR_USER_START; 2259 gate_vma.vm_start = FIXADDR_USER_START;
2233 gate_vma.vm_end = FIXADDR_USER_END; 2260 gate_vma.vm_end = FIXADDR_USER_END;
2234 gate_vma.vm_page_prot = PAGE_READONLY; 2261 gate_vma.vm_page_prot = PAGE_READONLY;
2235 gate_vma.vm_flags = 0; 2262 gate_vma.vm_flags = VM_RESERVED;
2236 return 0; 2263 return 0;
2237} 2264}
2238__initcall(gate_vma_init); 2265__initcall(gate_vma_init);
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 43b1199af591..11d824f282f1 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -223,13 +223,13 @@ static struct mempolicy *mpol_new(int mode, nodemask_t *nodes)
223} 223}
224 224
225/* Ensure all existing pages follow the policy. */ 225/* Ensure all existing pages follow the policy. */
226static int check_pte_range(struct mm_struct *mm, pmd_t *pmd, 226static int check_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
227 unsigned long addr, unsigned long end, nodemask_t *nodes) 227 unsigned long addr, unsigned long end, nodemask_t *nodes)
228{ 228{
229 pte_t *orig_pte; 229 pte_t *orig_pte;
230 pte_t *pte; 230 pte_t *pte;
231 231
232 spin_lock(&mm->page_table_lock); 232 spin_lock(&vma->vm_mm->page_table_lock);
233 orig_pte = pte = pte_offset_map(pmd, addr); 233 orig_pte = pte = pte_offset_map(pmd, addr);
234 do { 234 do {
235 unsigned long pfn; 235 unsigned long pfn;
@@ -238,18 +238,20 @@ static int check_pte_range(struct mm_struct *mm, pmd_t *pmd,
238 if (!pte_present(*pte)) 238 if (!pte_present(*pte))
239 continue; 239 continue;
240 pfn = pte_pfn(*pte); 240 pfn = pte_pfn(*pte);
241 if (!pfn_valid(pfn)) 241 if (!pfn_valid(pfn)) {
242 print_bad_pte(vma, *pte, addr);
242 continue; 243 continue;
244 }
243 nid = pfn_to_nid(pfn); 245 nid = pfn_to_nid(pfn);
244 if (!node_isset(nid, *nodes)) 246 if (!node_isset(nid, *nodes))
245 break; 247 break;
246 } while (pte++, addr += PAGE_SIZE, addr != end); 248 } while (pte++, addr += PAGE_SIZE, addr != end);
247 pte_unmap(orig_pte); 249 pte_unmap(orig_pte);
248 spin_unlock(&mm->page_table_lock); 250 spin_unlock(&vma->vm_mm->page_table_lock);
249 return addr != end; 251 return addr != end;
250} 252}
251 253
252static inline int check_pmd_range(struct mm_struct *mm, pud_t *pud, 254static inline int check_pmd_range(struct vm_area_struct *vma, pud_t *pud,
253 unsigned long addr, unsigned long end, nodemask_t *nodes) 255 unsigned long addr, unsigned long end, nodemask_t *nodes)
254{ 256{
255 pmd_t *pmd; 257 pmd_t *pmd;
@@ -260,13 +262,13 @@ static inline int check_pmd_range(struct mm_struct *mm, pud_t *pud,
260 next = pmd_addr_end(addr, end); 262 next = pmd_addr_end(addr, end);
261 if (pmd_none_or_clear_bad(pmd)) 263 if (pmd_none_or_clear_bad(pmd))
262 continue; 264 continue;
263 if (check_pte_range(mm, pmd, addr, next, nodes)) 265 if (check_pte_range(vma, pmd, addr, next, nodes))
264 return -EIO; 266 return -EIO;
265 } while (pmd++, addr = next, addr != end); 267 } while (pmd++, addr = next, addr != end);
266 return 0; 268 return 0;
267} 269}
268 270
269static inline int check_pud_range(struct mm_struct *mm, pgd_t *pgd, 271static inline int check_pud_range(struct vm_area_struct *vma, pgd_t *pgd,
270 unsigned long addr, unsigned long end, nodemask_t *nodes) 272 unsigned long addr, unsigned long end, nodemask_t *nodes)
271{ 273{
272 pud_t *pud; 274 pud_t *pud;
@@ -277,24 +279,24 @@ static inline int check_pud_range(struct mm_struct *mm, pgd_t *pgd,
277 next = pud_addr_end(addr, end); 279 next = pud_addr_end(addr, end);
278 if (pud_none_or_clear_bad(pud)) 280 if (pud_none_or_clear_bad(pud))
279 continue; 281 continue;
280 if (check_pmd_range(mm, pud, addr, next, nodes)) 282 if (check_pmd_range(vma, pud, addr, next, nodes))
281 return -EIO; 283 return -EIO;
282 } while (pud++, addr = next, addr != end); 284 } while (pud++, addr = next, addr != end);
283 return 0; 285 return 0;
284} 286}
285 287
286static inline int check_pgd_range(struct mm_struct *mm, 288static inline int check_pgd_range(struct vm_area_struct *vma,
287 unsigned long addr, unsigned long end, nodemask_t *nodes) 289 unsigned long addr, unsigned long end, nodemask_t *nodes)
288{ 290{
289 pgd_t *pgd; 291 pgd_t *pgd;
290 unsigned long next; 292 unsigned long next;
291 293
292 pgd = pgd_offset(mm, addr); 294 pgd = pgd_offset(vma->vm_mm, addr);
293 do { 295 do {
294 next = pgd_addr_end(addr, end); 296 next = pgd_addr_end(addr, end);
295 if (pgd_none_or_clear_bad(pgd)) 297 if (pgd_none_or_clear_bad(pgd))
296 continue; 298 continue;
297 if (check_pud_range(mm, pgd, addr, next, nodes)) 299 if (check_pud_range(vma, pgd, addr, next, nodes))
298 return -EIO; 300 return -EIO;
299 } while (pgd++, addr = next, addr != end); 301 } while (pgd++, addr = next, addr != end);
300 return 0; 302 return 0;
@@ -311,6 +313,8 @@ check_range(struct mm_struct *mm, unsigned long start, unsigned long end,
311 first = find_vma(mm, start); 313 first = find_vma(mm, start);
312 if (!first) 314 if (!first)
313 return ERR_PTR(-EFAULT); 315 return ERR_PTR(-EFAULT);
316 if (first->vm_flags & VM_RESERVED)
317 return ERR_PTR(-EACCES);
314 prev = NULL; 318 prev = NULL;
315 for (vma = first; vma && vma->vm_start < end; vma = vma->vm_next) { 319 for (vma = first; vma && vma->vm_start < end; vma = vma->vm_next) {
316 if (!vma->vm_next && vma->vm_end < end) 320 if (!vma->vm_next && vma->vm_end < end)
@@ -323,8 +327,7 @@ check_range(struct mm_struct *mm, unsigned long start, unsigned long end,
323 endvma = end; 327 endvma = end;
324 if (vma->vm_start > start) 328 if (vma->vm_start > start)
325 start = vma->vm_start; 329 start = vma->vm_start;
326 err = check_pgd_range(vma->vm_mm, 330 err = check_pgd_range(vma, start, endvma, nodes);
327 start, endvma, nodes);
328 if (err) { 331 if (err) {
329 first = ERR_PTR(err); 332 first = ERR_PTR(err);
330 break; 333 break;
diff --git a/mm/mmap.c b/mm/mmap.c
index 459b9f068ad7..8a111792b8db 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1088,6 +1088,17 @@ munmap_back:
1088 error = file->f_op->mmap(file, vma); 1088 error = file->f_op->mmap(file, vma);
1089 if (error) 1089 if (error)
1090 goto unmap_and_free_vma; 1090 goto unmap_and_free_vma;
1091 if ((vma->vm_flags & (VM_SHARED | VM_WRITE | VM_RESERVED))
1092 == (VM_WRITE | VM_RESERVED)) {
1093 printk(KERN_WARNING "program %s is using MAP_PRIVATE, "
1094 "PROT_WRITE mmap of VM_RESERVED memory, which "
1095 "is deprecated. Please report this to "
1096 "linux-kernel@vger.kernel.org\n",current->comm);
1097 if (vma->vm_ops && vma->vm_ops->close)
1098 vma->vm_ops->close(vma);
1099 error = -EACCES;
1100 goto unmap_and_free_vma;
1101 }
1091 } else if (vm_flags & VM_SHARED) { 1102 } else if (vm_flags & VM_SHARED) {
1092 error = shmem_zero_setup(vma); 1103 error = shmem_zero_setup(vma);
1093 if (error) 1104 if (error)
diff --git a/mm/mprotect.c b/mm/mprotect.c
index b426f01c5e9c..672a76fddd5e 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -125,6 +125,14 @@ mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev,
125 * a MAP_NORESERVE private mapping to writable will now reserve. 125 * a MAP_NORESERVE private mapping to writable will now reserve.
126 */ 126 */
127 if (newflags & VM_WRITE) { 127 if (newflags & VM_WRITE) {
128 if (oldflags & VM_RESERVED) {
129 BUG_ON(oldflags & VM_WRITE);
130 printk(KERN_WARNING "program %s is using MAP_PRIVATE, "
131 "PROT_WRITE mprotect of VM_RESERVED memory, "
132 "which is deprecated. Please report this to "
133 "linux-kernel@vger.kernel.org\n",current->comm);
134 return -EACCES;
135 }
128 if (!(oldflags & (VM_ACCOUNT|VM_WRITE|VM_SHARED|VM_HUGETLB))) { 136 if (!(oldflags & (VM_ACCOUNT|VM_WRITE|VM_SHARED|VM_HUGETLB))) {
129 charged = nrpages; 137 charged = nrpages;
130 if (security_vm_enough_memory(charged)) 138 if (security_vm_enough_memory(charged))
diff --git a/mm/msync.c b/mm/msync.c
index 3b5f1c521d4b..860395486060 100644
--- a/mm/msync.c
+++ b/mm/msync.c
@@ -25,6 +25,7 @@
25static void msync_pte_range(struct vm_area_struct *vma, pmd_t *pmd, 25static void msync_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
26 unsigned long addr, unsigned long end) 26 unsigned long addr, unsigned long end)
27{ 27{
28 struct mm_struct *mm = vma->vm_mm;
28 pte_t *pte; 29 pte_t *pte;
29 int progress = 0; 30 int progress = 0;
30 31
@@ -37,7 +38,7 @@ again:
37 if (progress >= 64) { 38 if (progress >= 64) {
38 progress = 0; 39 progress = 0;
39 if (need_resched() || 40 if (need_resched() ||
40 need_lockbreak(&vma->vm_mm->page_table_lock)) 41 need_lockbreak(&mm->page_table_lock))
41 break; 42 break;
42 } 43 }
43 progress++; 44 progress++;
@@ -46,11 +47,11 @@ again:
46 if (!pte_maybe_dirty(*pte)) 47 if (!pte_maybe_dirty(*pte))
47 continue; 48 continue;
48 pfn = pte_pfn(*pte); 49 pfn = pte_pfn(*pte);
49 if (!pfn_valid(pfn)) 50 if (unlikely(!pfn_valid(pfn))) {
51 print_bad_pte(vma, *pte, addr);
50 continue; 52 continue;
53 }
51 page = pfn_to_page(pfn); 54 page = pfn_to_page(pfn);
52 if (PageReserved(page))
53 continue;
54 55
55 if (ptep_clear_flush_dirty(vma, addr, pte) || 56 if (ptep_clear_flush_dirty(vma, addr, pte) ||
56 page_test_and_clear_dirty(page)) 57 page_test_and_clear_dirty(page))
@@ -58,7 +59,7 @@ again:
58 progress += 3; 59 progress += 3;
59 } while (pte++, addr += PAGE_SIZE, addr != end); 60 } while (pte++, addr += PAGE_SIZE, addr != end);
60 pte_unmap(pte - 1); 61 pte_unmap(pte - 1);
61 cond_resched_lock(&vma->vm_mm->page_table_lock); 62 cond_resched_lock(&mm->page_table_lock);
62 if (addr != end) 63 if (addr != end)
63 goto again; 64 goto again;
64} 65}
@@ -102,8 +103,10 @@ static void msync_page_range(struct vm_area_struct *vma,
102 103
103 /* For hugepages we can't go walking the page table normally, 104 /* For hugepages we can't go walking the page table normally,
104 * but that's ok, hugetlbfs is memory based, so we don't need 105 * but that's ok, hugetlbfs is memory based, so we don't need
105 * to do anything more on an msync() */ 106 * to do anything more on an msync().
106 if (is_vm_hugetlb_page(vma)) 107 * Can't do anything with VM_RESERVED regions either.
108 */
109 if (vma->vm_flags & (VM_HUGETLB|VM_RESERVED))
107 return; 110 return;
108 111
109 BUG_ON(addr >= end); 112 BUG_ON(addr >= end);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 60663232fbb2..0541288ebf4b 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -114,7 +114,8 @@ static void bad_page(const char *function, struct page *page)
114 1 << PG_reclaim | 114 1 << PG_reclaim |
115 1 << PG_slab | 115 1 << PG_slab |
116 1 << PG_swapcache | 116 1 << PG_swapcache |
117 1 << PG_writeback); 117 1 << PG_writeback |
118 1 << PG_reserved );
118 set_page_count(page, 0); 119 set_page_count(page, 0);
119 reset_page_mapcount(page); 120 reset_page_mapcount(page);
120 page->mapping = NULL; 121 page->mapping = NULL;
@@ -244,7 +245,6 @@ static inline int page_is_buddy(struct page *page, int order)
244{ 245{
245 if (PagePrivate(page) && 246 if (PagePrivate(page) &&
246 (page_order(page) == order) && 247 (page_order(page) == order) &&
247 !PageReserved(page) &&
248 page_count(page) == 0) 248 page_count(page) == 0)
249 return 1; 249 return 1;
250 return 0; 250 return 0;
@@ -327,7 +327,8 @@ static inline void free_pages_check(const char *function, struct page *page)
327 1 << PG_reclaim | 327 1 << PG_reclaim |
328 1 << PG_slab | 328 1 << PG_slab |
329 1 << PG_swapcache | 329 1 << PG_swapcache |
330 1 << PG_writeback ))) 330 1 << PG_writeback |
331 1 << PG_reserved )))
331 bad_page(function, page); 332 bad_page(function, page);
332 if (PageDirty(page)) 333 if (PageDirty(page))
333 __ClearPageDirty(page); 334 __ClearPageDirty(page);
@@ -455,7 +456,8 @@ static void prep_new_page(struct page *page, int order)
455 1 << PG_reclaim | 456 1 << PG_reclaim |
456 1 << PG_slab | 457 1 << PG_slab |
457 1 << PG_swapcache | 458 1 << PG_swapcache |
458 1 << PG_writeback ))) 459 1 << PG_writeback |
460 1 << PG_reserved )))
459 bad_page(__FUNCTION__, page); 461 bad_page(__FUNCTION__, page);
460 462
461 page->flags &= ~(1 << PG_uptodate | 1 << PG_error | 463 page->flags &= ~(1 << PG_uptodate | 1 << PG_error |
@@ -1016,7 +1018,7 @@ void __pagevec_free(struct pagevec *pvec)
1016 1018
1017fastcall void __free_pages(struct page *page, unsigned int order) 1019fastcall void __free_pages(struct page *page, unsigned int order)
1018{ 1020{
1019 if (!PageReserved(page) && put_page_testzero(page)) { 1021 if (put_page_testzero(page)) {
1020 if (order == 0) 1022 if (order == 0)
1021 free_hot_page(page); 1023 free_hot_page(page);
1022 else 1024 else
@@ -1674,7 +1676,7 @@ void __init memmap_init_zone(unsigned long size, int nid, unsigned long zone,
1674 continue; 1676 continue;
1675 page = pfn_to_page(pfn); 1677 page = pfn_to_page(pfn);
1676 set_page_links(page, zone, nid, pfn); 1678 set_page_links(page, zone, nid, pfn);
1677 set_page_count(page, 0); 1679 set_page_count(page, 1);
1678 reset_page_mapcount(page); 1680 reset_page_mapcount(page);
1679 SetPageReserved(page); 1681 SetPageReserved(page);
1680 INIT_LIST_HEAD(&page->lru); 1682 INIT_LIST_HEAD(&page->lru);
diff --git a/mm/rmap.c b/mm/rmap.c
index 504757624cce..f69d5342ce7f 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -443,8 +443,6 @@ int page_referenced(struct page *page, int is_locked, int ignore_token)
443void page_add_anon_rmap(struct page *page, 443void page_add_anon_rmap(struct page *page,
444 struct vm_area_struct *vma, unsigned long address) 444 struct vm_area_struct *vma, unsigned long address)
445{ 445{
446 BUG_ON(PageReserved(page));
447
448 if (atomic_inc_and_test(&page->_mapcount)) { 446 if (atomic_inc_and_test(&page->_mapcount)) {
449 struct anon_vma *anon_vma = vma->anon_vma; 447 struct anon_vma *anon_vma = vma->anon_vma;
450 448
@@ -468,8 +466,7 @@ void page_add_anon_rmap(struct page *page,
468void page_add_file_rmap(struct page *page) 466void page_add_file_rmap(struct page *page)
469{ 467{
470 BUG_ON(PageAnon(page)); 468 BUG_ON(PageAnon(page));
471 if (!pfn_valid(page_to_pfn(page)) || PageReserved(page)) 469 BUG_ON(!pfn_valid(page_to_pfn(page)));
472 return;
473 470
474 if (atomic_inc_and_test(&page->_mapcount)) 471 if (atomic_inc_and_test(&page->_mapcount))
475 inc_page_state(nr_mapped); 472 inc_page_state(nr_mapped);
@@ -483,8 +480,6 @@ void page_add_file_rmap(struct page *page)
483 */ 480 */
484void page_remove_rmap(struct page *page) 481void page_remove_rmap(struct page *page)
485{ 482{
486 BUG_ON(PageReserved(page));
487
488 if (atomic_add_negative(-1, &page->_mapcount)) { 483 if (atomic_add_negative(-1, &page->_mapcount)) {
489 BUG_ON(page_mapcount(page) < 0); 484 BUG_ON(page_mapcount(page) < 0);
490 /* 485 /*
@@ -640,13 +635,13 @@ static void try_to_unmap_cluster(unsigned long cursor,
640 continue; 635 continue;
641 636
642 pfn = pte_pfn(*pte); 637 pfn = pte_pfn(*pte);
643 if (!pfn_valid(pfn)) 638 if (unlikely(!pfn_valid(pfn))) {
639 print_bad_pte(vma, *pte, address);
644 continue; 640 continue;
641 }
645 642
646 page = pfn_to_page(pfn); 643 page = pfn_to_page(pfn);
647 BUG_ON(PageAnon(page)); 644 BUG_ON(PageAnon(page));
648 if (PageReserved(page))
649 continue;
650 645
651 if (ptep_clear_flush_young(vma, address, pte)) 646 if (ptep_clear_flush_young(vma, address, pte))
652 continue; 647 continue;
@@ -808,7 +803,6 @@ int try_to_unmap(struct page *page)
808{ 803{
809 int ret; 804 int ret;
810 805
811 BUG_ON(PageReserved(page));
812 BUG_ON(!PageLocked(page)); 806 BUG_ON(!PageLocked(page));
813 807
814 if (PageAnon(page)) 808 if (PageAnon(page))
diff --git a/mm/shmem.c b/mm/shmem.c
index 6796311a23ef..37777f4c11f8 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1506,8 +1506,10 @@ static void do_shmem_file_read(struct file *filp, loff_t *ppos, read_descriptor_
1506 */ 1506 */
1507 if (!offset) 1507 if (!offset)
1508 mark_page_accessed(page); 1508 mark_page_accessed(page);
1509 } else 1509 } else {
1510 page = ZERO_PAGE(0); 1510 page = ZERO_PAGE(0);
1511 page_cache_get(page);
1512 }
1511 1513
1512 /* 1514 /*
1513 * Ok, we have the page, and it's up-to-date, so 1515 * Ok, we have the page, and it's up-to-date, so
diff --git a/mm/swap.c b/mm/swap.c
index 7771d2803f62..21d15f99805c 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -48,7 +48,7 @@ void put_page(struct page *page)
48 } 48 }
49 return; 49 return;
50 } 50 }
51 if (!PageReserved(page) && put_page_testzero(page)) 51 if (put_page_testzero(page))
52 __page_cache_release(page); 52 __page_cache_release(page);
53} 53}
54EXPORT_SYMBOL(put_page); 54EXPORT_SYMBOL(put_page);
@@ -215,7 +215,7 @@ void release_pages(struct page **pages, int nr, int cold)
215 struct page *page = pages[i]; 215 struct page *page = pages[i];
216 struct zone *pagezone; 216 struct zone *pagezone;
217 217
218 if (PageReserved(page) || !put_page_testzero(page)) 218 if (!put_page_testzero(page))
219 continue; 219 continue;
220 220
221 pagezone = page_zone(page); 221 pagezone = page_zone(page);
diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c
index 67abebabf83e..e97b2d162cc7 100644
--- a/sound/core/pcm_native.c
+++ b/sound/core/pcm_native.c
@@ -2949,8 +2949,7 @@ static struct page * snd_pcm_mmap_status_nopage(struct vm_area_struct *area, uns
2949 return NOPAGE_OOM; 2949 return NOPAGE_OOM;
2950 runtime = substream->runtime; 2950 runtime = substream->runtime;
2951 page = virt_to_page(runtime->status); 2951 page = virt_to_page(runtime->status);
2952 if (!PageReserved(page)) 2952 get_page(page);
2953 get_page(page);
2954 if (type) 2953 if (type)
2955 *type = VM_FAULT_MINOR; 2954 *type = VM_FAULT_MINOR;
2956 return page; 2955 return page;
@@ -2992,8 +2991,7 @@ static struct page * snd_pcm_mmap_control_nopage(struct vm_area_struct *area, un
2992 return NOPAGE_OOM; 2991 return NOPAGE_OOM;
2993 runtime = substream->runtime; 2992 runtime = substream->runtime;
2994 page = virt_to_page(runtime->control); 2993 page = virt_to_page(runtime->control);
2995 if (!PageReserved(page)) 2994 get_page(page);
2996 get_page(page);
2997 if (type) 2995 if (type)
2998 *type = VM_FAULT_MINOR; 2996 *type = VM_FAULT_MINOR;
2999 return page; 2997 return page;
@@ -3066,8 +3064,7 @@ static struct page *snd_pcm_mmap_data_nopage(struct vm_area_struct *area, unsign
3066 vaddr = runtime->dma_area + offset; 3064 vaddr = runtime->dma_area + offset;
3067 page = virt_to_page(vaddr); 3065 page = virt_to_page(vaddr);
3068 } 3066 }
3069 if (!PageReserved(page)) 3067 get_page(page);
3070 get_page(page);
3071 if (type) 3068 if (type)
3072 *type = VM_FAULT_MINOR; 3069 *type = VM_FAULT_MINOR;
3073 return page; 3070 return page;