aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHugh Dickins <hugh@veritas.com>2005-11-22 00:32:15 -0500
committerLinus Torvalds <torvalds@g5.osdl.org>2005-11-22 12:13:42 -0500
commit0b14c179a483e71ea41df2aa4a661760063115bd (patch)
tree075fc303a3d2fd33f66c0af8f64064cff2b72b79
parent664beed0190fae687ac51295694004902ddeb18e (diff)
[PATCH] unpaged: VM_UNPAGED
Although we tend to associate VM_RESERVED with remap_pfn_range, quite a few drivers set VM_RESERVED on areas which are then populated by nopage. The PageReserved removal in 2.6.15-rc1 changed VM_RESERVED not to free pages in zap_pte_range, without changing those drivers not to set it: so their pages just leak away. Let's not change miscellaneous drivers now: introduce VM_UNPAGED at the core, to flag the special areas where the ptes may have no struct page, or if they have then it's not to be touched. Replace most instances of VM_RESERVED in core mm by VM_UNPAGED. Force it on in remap_pfn_range, and the sparc and sparc64 io_remap_pfn_range. Revert addition of VM_RESERVED to powerpc vdso, it's not needed there. Is it needed anywhere? It still governs the mm->reserved_vm statistic, and special vmas not to be merged, and areas not to be core dumped; but could probably be eliminated later (the drivers are probably specifying it because in 2.4 it kept swapout off the vma, but in 2.6 we work from the LRU, which these pages don't get on). Use the VM_SHM slot for VM_UNPAGED, and define VM_SHM to 0: it serves no purpose whatsoever, and should be removed from drivers when we clean up. Signed-off-by: Hugh Dickins <hugh@veritas.com> Acked-by: William Irwin <wli@holomorphy.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--arch/powerpc/kernel/vdso.c3
-rw-r--r--arch/sparc/mm/generic.c2
-rw-r--r--arch/sparc64/mm/generic.c2
-rw-r--r--include/linux/mm.h5
-rw-r--r--mm/fremap.c4
-rw-r--r--mm/madvise.c2
-rw-r--r--mm/memory.c30
-rw-r--r--mm/mempolicy.c2
-rw-r--r--mm/msync.c4
9 files changed, 30 insertions, 24 deletions
diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c
index 0d4d8bec0df4..b44b36e0c293 100644
--- a/arch/powerpc/kernel/vdso.c
+++ b/arch/powerpc/kernel/vdso.c
@@ -285,8 +285,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm,
285 * It's fine to use that for setting breakpoints in the vDSO code 285 * It's fine to use that for setting breakpoints in the vDSO code
286 * pages though 286 * pages though
287 */ 287 */
288 vma->vm_flags = VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | 288 vma->vm_flags = VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
289 VM_MAYEXEC | VM_RESERVED;
290 vma->vm_flags |= mm->def_flags; 289 vma->vm_flags |= mm->def_flags;
291 vma->vm_page_prot = protection_map[vma->vm_flags & 0x7]; 290 vma->vm_page_prot = protection_map[vma->vm_flags & 0x7];
292 vma->vm_ops = &vdso_vmops; 291 vma->vm_ops = &vdso_vmops;
diff --git a/arch/sparc/mm/generic.c b/arch/sparc/mm/generic.c
index 9604893ffdbd..0410bae681f8 100644
--- a/arch/sparc/mm/generic.c
+++ b/arch/sparc/mm/generic.c
@@ -74,7 +74,7 @@ int io_remap_pfn_range(struct vm_area_struct *vma, unsigned long from,
74 unsigned long offset = GET_PFN(pfn) << PAGE_SHIFT; 74 unsigned long offset = GET_PFN(pfn) << PAGE_SHIFT;
75 75
76 /* See comment in mm/memory.c remap_pfn_range */ 76 /* See comment in mm/memory.c remap_pfn_range */
77 vma->vm_flags |= VM_IO | VM_RESERVED; 77 vma->vm_flags |= VM_IO | VM_RESERVED | VM_UNPAGED;
78 78
79 prot = __pgprot(pg_iobits); 79 prot = __pgprot(pg_iobits);
80 offset -= from; 80 offset -= from;
diff --git a/arch/sparc64/mm/generic.c b/arch/sparc64/mm/generic.c
index 112c316e7cd2..8fd4cb1f050a 100644
--- a/arch/sparc64/mm/generic.c
+++ b/arch/sparc64/mm/generic.c
@@ -128,7 +128,7 @@ int io_remap_pfn_range(struct vm_area_struct *vma, unsigned long from,
128 unsigned long offset = GET_PFN(pfn) << PAGE_SHIFT; 128 unsigned long offset = GET_PFN(pfn) << PAGE_SHIFT;
129 129
130 /* See comment in mm/memory.c remap_pfn_range */ 130 /* See comment in mm/memory.c remap_pfn_range */
131 vma->vm_flags |= VM_IO | VM_RESERVED; 131 vma->vm_flags |= VM_IO | VM_RESERVED | VM_UNPAGED;
132 132
133 prot = __pgprot(pg_iobits); 133 prot = __pgprot(pg_iobits);
134 offset -= from; 134 offset -= from;
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 9701210c6680..f0cdfd18db55 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -144,7 +144,8 @@ extern unsigned int kobjsize(const void *objp);
144 144
145#define VM_GROWSDOWN 0x00000100 /* general info on the segment */ 145#define VM_GROWSDOWN 0x00000100 /* general info on the segment */
146#define VM_GROWSUP 0x00000200 146#define VM_GROWSUP 0x00000200
147#define VM_SHM 0x00000400 /* shared memory area, don't swap out */ 147#define VM_SHM 0x00000000 /* Means nothing: delete it later */
148#define VM_UNPAGED 0x00000400 /* Pages managed without map count */
148#define VM_DENYWRITE 0x00000800 /* ETXTBSY on write attempts.. */ 149#define VM_DENYWRITE 0x00000800 /* ETXTBSY on write attempts.. */
149 150
150#define VM_EXECUTABLE 0x00001000 151#define VM_EXECUTABLE 0x00001000
@@ -157,7 +158,7 @@ extern unsigned int kobjsize(const void *objp);
157 158
158#define VM_DONTCOPY 0x00020000 /* Do not copy this vma on fork */ 159#define VM_DONTCOPY 0x00020000 /* Do not copy this vma on fork */
159#define VM_DONTEXPAND 0x00040000 /* Cannot expand with mremap() */ 160#define VM_DONTEXPAND 0x00040000 /* Cannot expand with mremap() */
160#define VM_RESERVED 0x00080000 /* Pages managed in a special way */ 161#define VM_RESERVED 0x00080000 /* Count as reserved_vm like IO */
161#define VM_ACCOUNT 0x00100000 /* Is a VM accounted object */ 162#define VM_ACCOUNT 0x00100000 /* Is a VM accounted object */
162#define VM_HUGETLB 0x00400000 /* Huge TLB Page VM */ 163#define VM_HUGETLB 0x00400000 /* Huge TLB Page VM */
163#define VM_NONLINEAR 0x00800000 /* Is non-linear (remap_file_pages) */ 164#define VM_NONLINEAR 0x00800000 /* Is non-linear (remap_file_pages) */
diff --git a/mm/fremap.c b/mm/fremap.c
index d862be3bc3e3..94254c5d7a18 100644
--- a/mm/fremap.c
+++ b/mm/fremap.c
@@ -65,7 +65,7 @@ int install_page(struct mm_struct *mm, struct vm_area_struct *vma,
65 pte_t pte_val; 65 pte_t pte_val;
66 spinlock_t *ptl; 66 spinlock_t *ptl;
67 67
68 BUG_ON(vma->vm_flags & VM_RESERVED); 68 BUG_ON(vma->vm_flags & VM_UNPAGED);
69 69
70 pgd = pgd_offset(mm, addr); 70 pgd = pgd_offset(mm, addr);
71 pud = pud_alloc(mm, pgd, addr); 71 pud = pud_alloc(mm, pgd, addr);
@@ -122,7 +122,7 @@ int install_file_pte(struct mm_struct *mm, struct vm_area_struct *vma,
122 pte_t pte_val; 122 pte_t pte_val;
123 spinlock_t *ptl; 123 spinlock_t *ptl;
124 124
125 BUG_ON(vma->vm_flags & VM_RESERVED); 125 BUG_ON(vma->vm_flags & VM_UNPAGED);
126 126
127 pgd = pgd_offset(mm, addr); 127 pgd = pgd_offset(mm, addr);
128 pud = pud_alloc(mm, pgd, addr); 128 pud = pud_alloc(mm, pgd, addr);
diff --git a/mm/madvise.c b/mm/madvise.c
index 17aaf3e16449..328a3bcce527 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -126,7 +126,7 @@ static long madvise_dontneed(struct vm_area_struct * vma,
126 unsigned long start, unsigned long end) 126 unsigned long start, unsigned long end)
127{ 127{
128 *prev = vma; 128 *prev = vma;
129 if (vma->vm_flags & (VM_LOCKED|VM_HUGETLB|VM_RESERVED)) 129 if (vma->vm_flags & (VM_LOCKED|VM_HUGETLB|VM_UNPAGED))
130 return -EINVAL; 130 return -EINVAL;
131 131
132 if (unlikely(vma->vm_flags & VM_NONLINEAR)) { 132 if (unlikely(vma->vm_flags & VM_NONLINEAR)) {
diff --git a/mm/memory.c b/mm/memory.c
index cfce5f1f30f2..ece04963158e 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -334,7 +334,7 @@ static inline void add_mm_rss(struct mm_struct *mm, int file_rss, int anon_rss)
334 334
335/* 335/*
336 * This function is called to print an error when a pte in a 336 * This function is called to print an error when a pte in a
337 * !VM_RESERVED region is found pointing to an invalid pfn (which 337 * !VM_UNPAGED region is found pointing to an invalid pfn (which
338 * is an error. 338 * is an error.
339 * 339 *
340 * The calling function must still handle the error. 340 * The calling function must still handle the error.
@@ -381,15 +381,15 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
381 goto out_set_pte; 381 goto out_set_pte;
382 } 382 }
383 383
384 /* If the region is VM_RESERVED, the mapping is not 384 /* If the region is VM_UNPAGED, the mapping is not
385 * mapped via rmap - duplicate the pte as is. 385 * mapped via rmap - duplicate the pte as is.
386 */ 386 */
387 if (vm_flags & VM_RESERVED) 387 if (vm_flags & VM_UNPAGED)
388 goto out_set_pte; 388 goto out_set_pte;
389 389
390 pfn = pte_pfn(pte); 390 pfn = pte_pfn(pte);
391 /* If the pte points outside of valid memory but 391 /* If the pte points outside of valid memory but
392 * the region is not VM_RESERVED, we have a problem. 392 * the region is not VM_UNPAGED, we have a problem.
393 */ 393 */
394 if (unlikely(!pfn_valid(pfn))) { 394 if (unlikely(!pfn_valid(pfn))) {
395 print_bad_pte(vma, pte, addr); 395 print_bad_pte(vma, pte, addr);
@@ -528,7 +528,7 @@ int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
528 * readonly mappings. The tradeoff is that copy_page_range is more 528 * readonly mappings. The tradeoff is that copy_page_range is more
529 * efficient than faulting. 529 * efficient than faulting.
530 */ 530 */
531 if (!(vma->vm_flags & (VM_HUGETLB|VM_NONLINEAR|VM_RESERVED))) { 531 if (!(vma->vm_flags & (VM_HUGETLB|VM_NONLINEAR|VM_UNPAGED))) {
532 if (!vma->anon_vma) 532 if (!vma->anon_vma)
533 return 0; 533 return 0;
534 } 534 }
@@ -572,7 +572,7 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
572 572
573 (*zap_work) -= PAGE_SIZE; 573 (*zap_work) -= PAGE_SIZE;
574 574
575 if (!(vma->vm_flags & VM_RESERVED)) { 575 if (!(vma->vm_flags & VM_UNPAGED)) {
576 unsigned long pfn = pte_pfn(ptent); 576 unsigned long pfn = pte_pfn(ptent);
577 if (unlikely(!pfn_valid(pfn))) 577 if (unlikely(!pfn_valid(pfn)))
578 print_bad_pte(vma, ptent, addr); 578 print_bad_pte(vma, ptent, addr);
@@ -1191,10 +1191,16 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr,
1191 * rest of the world about it: 1191 * rest of the world about it:
1192 * VM_IO tells people not to look at these pages 1192 * VM_IO tells people not to look at these pages
1193 * (accesses can have side effects). 1193 * (accesses can have side effects).
1194 * VM_RESERVED tells the core MM not to "manage" these pages 1194 * VM_RESERVED is specified all over the place, because
1195 * (e.g. refcount, mapcount, try to swap them out). 1195 * in 2.4 it kept swapout's vma scan off this vma; but
1196 * in 2.6 the LRU scan won't even find its pages, so this
1197 * flag means no more than count its pages in reserved_vm,
1198 * and omit it from core dump, even when VM_IO turned off.
1199 * VM_UNPAGED tells the core MM not to "manage" these pages
1200 * (e.g. refcount, mapcount, try to swap them out): in
1201 * particular, zap_pte_range does not try to free them.
1196 */ 1202 */
1197 vma->vm_flags |= VM_IO | VM_RESERVED; 1203 vma->vm_flags |= VM_IO | VM_RESERVED | VM_UNPAGED;
1198 1204
1199 BUG_ON(addr >= end); 1205 BUG_ON(addr >= end);
1200 pfn -= addr >> PAGE_SHIFT; 1206 pfn -= addr >> PAGE_SHIFT;
@@ -1276,7 +1282,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
1276 pte_t entry; 1282 pte_t entry;
1277 int ret = VM_FAULT_MINOR; 1283 int ret = VM_FAULT_MINOR;
1278 1284
1279 BUG_ON(vma->vm_flags & VM_RESERVED); 1285 BUG_ON(vma->vm_flags & VM_UNPAGED);
1280 1286
1281 if (unlikely(!pfn_valid(pfn))) { 1287 if (unlikely(!pfn_valid(pfn))) {
1282 /* 1288 /*
@@ -1924,7 +1930,7 @@ retry:
1924 inc_mm_counter(mm, anon_rss); 1930 inc_mm_counter(mm, anon_rss);
1925 lru_cache_add_active(new_page); 1931 lru_cache_add_active(new_page);
1926 page_add_anon_rmap(new_page, vma, address); 1932 page_add_anon_rmap(new_page, vma, address);
1927 } else if (!(vma->vm_flags & VM_RESERVED)) { 1933 } else if (!(vma->vm_flags & VM_UNPAGED)) {
1928 inc_mm_counter(mm, file_rss); 1934 inc_mm_counter(mm, file_rss);
1929 page_add_file_rmap(new_page); 1935 page_add_file_rmap(new_page);
1930 } 1936 }
@@ -2203,7 +2209,7 @@ static int __init gate_vma_init(void)
2203 gate_vma.vm_start = FIXADDR_USER_START; 2209 gate_vma.vm_start = FIXADDR_USER_START;
2204 gate_vma.vm_end = FIXADDR_USER_END; 2210 gate_vma.vm_end = FIXADDR_USER_END;
2205 gate_vma.vm_page_prot = PAGE_READONLY; 2211 gate_vma.vm_page_prot = PAGE_READONLY;
2206 gate_vma.vm_flags = VM_RESERVED; 2212 gate_vma.vm_flags = 0;
2207 return 0; 2213 return 0;
2208} 2214}
2209__initcall(gate_vma_init); 2215__initcall(gate_vma_init);
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 5abc57c2b8bd..5609a31bdf22 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -269,7 +269,7 @@ check_range(struct mm_struct *mm, unsigned long start, unsigned long end,
269 first = find_vma(mm, start); 269 first = find_vma(mm, start);
270 if (!first) 270 if (!first)
271 return ERR_PTR(-EFAULT); 271 return ERR_PTR(-EFAULT);
272 if (first->vm_flags & VM_RESERVED) 272 if (first->vm_flags & VM_UNPAGED)
273 return ERR_PTR(-EACCES); 273 return ERR_PTR(-EACCES);
274 prev = NULL; 274 prev = NULL;
275 for (vma = first; vma && vma->vm_start < end; vma = vma->vm_next) { 275 for (vma = first; vma && vma->vm_start < end; vma = vma->vm_next) {
diff --git a/mm/msync.c b/mm/msync.c
index 0e040e9c39d8..b3f4caf3010b 100644
--- a/mm/msync.c
+++ b/mm/msync.c
@@ -97,9 +97,9 @@ static void msync_page_range(struct vm_area_struct *vma,
97 /* For hugepages we can't go walking the page table normally, 97 /* For hugepages we can't go walking the page table normally,
98 * but that's ok, hugetlbfs is memory based, so we don't need 98 * but that's ok, hugetlbfs is memory based, so we don't need
99 * to do anything more on an msync(). 99 * to do anything more on an msync().
100 * Can't do anything with VM_RESERVED regions either. 100 * Can't do anything with VM_UNPAGED regions either.
101 */ 101 */
102 if (vma->vm_flags & (VM_HUGETLB|VM_RESERVED)) 102 if (vma->vm_flags & (VM_HUGETLB|VM_UNPAGED))
103 return; 103 return;
104 104
105 BUG_ON(addr >= end); 105 BUG_ON(addr >= end);