aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/Kconfig6
-rw-r--r--mm/fremap.c10
-rw-r--r--mm/hugetlb.c6
-rw-r--r--mm/madvise.c2
-rw-r--r--mm/memory.c128
-rw-r--r--mm/mempolicy.c2
-rw-r--r--mm/mmap.c11
-rw-r--r--mm/mprotect.c8
-rw-r--r--mm/msync.c4
-rw-r--r--mm/page_alloc.c51
-rw-r--r--mm/rmap.c22
-rw-r--r--mm/swap.c3
-rw-r--r--mm/truncate.c6
13 files changed, 151 insertions, 108 deletions
diff --git a/mm/Kconfig b/mm/Kconfig
index ae9ce6b73e8a..21eb51d4da8f 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -125,12 +125,10 @@ comment "Memory hotplug is currently incompatible with Software Suspend"
125# space can be handled with less contention: split it at this NR_CPUS. 125# space can be handled with less contention: split it at this NR_CPUS.
126# Default to 4 for wider testing, though 8 might be more appropriate. 126# Default to 4 for wider testing, though 8 might be more appropriate.
127# ARM's adjust_pte (unused if VIPT) depends on mm-wide page_table_lock. 127# ARM's adjust_pte (unused if VIPT) depends on mm-wide page_table_lock.
128# PA-RISC's debug spinlock_t is too large for the 32-bit struct page. 128# PA-RISC 7xxx's spinlock_t would enlarge struct page from 32 to 44 bytes.
129# ARM26 and SPARC32 and PPC64 may use one page for multiple page tables.
130# 129#
131config SPLIT_PTLOCK_CPUS 130config SPLIT_PTLOCK_CPUS
132 int 131 int
133 default "4096" if ARM && !CPU_CACHE_VIPT 132 default "4096" if ARM && !CPU_CACHE_VIPT
134 default "4096" if PARISC && DEBUG_SPINLOCK && !64BIT 133 default "4096" if PARISC && !PA20
135 default "4096" if ARM26 || SPARC32 || PPC64
136 default "4" 134 default "4"
diff --git a/mm/fremap.c b/mm/fremap.c
index d862be3bc3e3..007cbad9331e 100644
--- a/mm/fremap.c
+++ b/mm/fremap.c
@@ -65,7 +65,7 @@ int install_page(struct mm_struct *mm, struct vm_area_struct *vma,
65 pte_t pte_val; 65 pte_t pte_val;
66 spinlock_t *ptl; 66 spinlock_t *ptl;
67 67
68 BUG_ON(vma->vm_flags & VM_RESERVED); 68 BUG_ON(vma->vm_flags & VM_UNPAGED);
69 69
70 pgd = pgd_offset(mm, addr); 70 pgd = pgd_offset(mm, addr);
71 pud = pud_alloc(mm, pgd, addr); 71 pud = pud_alloc(mm, pgd, addr);
@@ -122,7 +122,7 @@ int install_file_pte(struct mm_struct *mm, struct vm_area_struct *vma,
122 pte_t pte_val; 122 pte_t pte_val;
123 spinlock_t *ptl; 123 spinlock_t *ptl;
124 124
125 BUG_ON(vma->vm_flags & VM_RESERVED); 125 BUG_ON(vma->vm_flags & VM_UNPAGED);
126 126
127 pgd = pgd_offset(mm, addr); 127 pgd = pgd_offset(mm, addr);
128 pud = pud_alloc(mm, pgd, addr); 128 pud = pud_alloc(mm, pgd, addr);
@@ -204,12 +204,10 @@ asmlinkage long sys_remap_file_pages(unsigned long start, unsigned long size,
204 * Make sure the vma is shared, that it supports prefaulting, 204 * Make sure the vma is shared, that it supports prefaulting,
205 * and that the remapped range is valid and fully within 205 * and that the remapped range is valid and fully within
206 * the single existing vma. vm_private_data is used as a 206 * the single existing vma. vm_private_data is used as a
207 * swapout cursor in a VM_NONLINEAR vma (unless VM_RESERVED 207 * swapout cursor in a VM_NONLINEAR vma.
208 * or VM_LOCKED, but VM_LOCKED could be revoked later on).
209 */ 208 */
210 if (vma && (vma->vm_flags & VM_SHARED) && 209 if (vma && (vma->vm_flags & VM_SHARED) &&
211 (!vma->vm_private_data || 210 (!vma->vm_private_data || (vma->vm_flags & VM_NONLINEAR)) &&
212 (vma->vm_flags & (VM_NONLINEAR|VM_RESERVED))) &&
213 vma->vm_ops && vma->vm_ops->populate && 211 vma->vm_ops && vma->vm_ops->populate &&
214 end > start && start >= vma->vm_start && 212 end > start && start >= vma->vm_start &&
215 end <= vma->vm_end) { 213 end <= vma->vm_end) {
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 728e9bda12ea..3e52df7c471b 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -22,6 +22,10 @@ unsigned long max_huge_pages;
22static struct list_head hugepage_freelists[MAX_NUMNODES]; 22static struct list_head hugepage_freelists[MAX_NUMNODES];
23static unsigned int nr_huge_pages_node[MAX_NUMNODES]; 23static unsigned int nr_huge_pages_node[MAX_NUMNODES];
24static unsigned int free_huge_pages_node[MAX_NUMNODES]; 24static unsigned int free_huge_pages_node[MAX_NUMNODES];
25
26/*
27 * Protects updates to hugepage_freelists, nr_huge_pages, and free_huge_pages
28 */
25static DEFINE_SPINLOCK(hugetlb_lock); 29static DEFINE_SPINLOCK(hugetlb_lock);
26 30
27static void enqueue_huge_page(struct page *page) 31static void enqueue_huge_page(struct page *page)
@@ -61,8 +65,10 @@ static struct page *alloc_fresh_huge_page(void)
61 HUGETLB_PAGE_ORDER); 65 HUGETLB_PAGE_ORDER);
62 nid = (nid + 1) % num_online_nodes(); 66 nid = (nid + 1) % num_online_nodes();
63 if (page) { 67 if (page) {
68 spin_lock(&hugetlb_lock);
64 nr_huge_pages++; 69 nr_huge_pages++;
65 nr_huge_pages_node[page_to_nid(page)]++; 70 nr_huge_pages_node[page_to_nid(page)]++;
71 spin_unlock(&hugetlb_lock);
66 } 72 }
67 return page; 73 return page;
68} 74}
diff --git a/mm/madvise.c b/mm/madvise.c
index 17aaf3e16449..328a3bcce527 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -126,7 +126,7 @@ static long madvise_dontneed(struct vm_area_struct * vma,
126 unsigned long start, unsigned long end) 126 unsigned long start, unsigned long end)
127{ 127{
128 *prev = vma; 128 *prev = vma;
129 if (vma->vm_flags & (VM_LOCKED|VM_HUGETLB|VM_RESERVED)) 129 if (vma->vm_flags & (VM_LOCKED|VM_HUGETLB|VM_UNPAGED))
130 return -EINVAL; 130 return -EINVAL;
131 131
132 if (unlikely(vma->vm_flags & VM_NONLINEAR)) { 132 if (unlikely(vma->vm_flags & VM_NONLINEAR)) {
diff --git a/mm/memory.c b/mm/memory.c
index 2998cfc12f5b..d1f46f4e4c8a 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -334,7 +334,7 @@ static inline void add_mm_rss(struct mm_struct *mm, int file_rss, int anon_rss)
334 334
335/* 335/*
336 * This function is called to print an error when a pte in a 336 * This function is called to print an error when a pte in a
337 * !VM_RESERVED region is found pointing to an invalid pfn (which 337 * !VM_UNPAGED region is found pointing to an invalid pfn (which
338 * is an error. 338 * is an error.
339 * 339 *
340 * The calling function must still handle the error. 340 * The calling function must still handle the error.
@@ -350,6 +350,22 @@ void print_bad_pte(struct vm_area_struct *vma, pte_t pte, unsigned long vaddr)
350} 350}
351 351
352/* 352/*
353 * page_is_anon applies strict checks for an anonymous page belonging to
354 * this vma at this address. It is used on VM_UNPAGED vmas, which are
355 * usually populated with shared originals (which must not be counted),
356 * but occasionally contain private COWed copies (when !VM_SHARED, or
357 * perhaps via ptrace when VM_SHARED). An mmap of /dev/mem might window
358 * free pages, pages from other processes, or from other parts of this:
359 * it's tricky, but try not to be deceived by foreign anonymous pages.
360 */
361static inline int page_is_anon(struct page *page,
362 struct vm_area_struct *vma, unsigned long addr)
363{
364 return page && PageAnon(page) && page_mapped(page) &&
365 page_address_in_vma(page, vma) == addr;
366}
367
368/*
353 * copy one vm_area from one task to the other. Assumes the page tables 369 * copy one vm_area from one task to the other. Assumes the page tables
354 * already present in the new task to be cleared in the whole range 370 * already present in the new task to be cleared in the whole range
355 * covered by this vma. 371 * covered by this vma.
@@ -381,23 +397,22 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
381 goto out_set_pte; 397 goto out_set_pte;
382 } 398 }
383 399
384 /* If the region is VM_RESERVED, the mapping is not
385 * mapped via rmap - duplicate the pte as is.
386 */
387 if (vm_flags & VM_RESERVED)
388 goto out_set_pte;
389
390 pfn = pte_pfn(pte); 400 pfn = pte_pfn(pte);
391 /* If the pte points outside of valid memory but 401 page = pfn_valid(pfn)? pfn_to_page(pfn): NULL;
392 * the region is not VM_RESERVED, we have a problem. 402
403 if (unlikely(vm_flags & VM_UNPAGED))
404 if (!page_is_anon(page, vma, addr))
405 goto out_set_pte;
406
407 /*
408 * If the pte points outside of valid memory but
409 * the region is not VM_UNPAGED, we have a problem.
393 */ 410 */
394 if (unlikely(!pfn_valid(pfn))) { 411 if (unlikely(!page)) {
395 print_bad_pte(vma, pte, addr); 412 print_bad_pte(vma, pte, addr);
396 goto out_set_pte; /* try to do something sane */ 413 goto out_set_pte; /* try to do something sane */
397 } 414 }
398 415
399 page = pfn_to_page(pfn);
400
401 /* 416 /*
402 * If it's a COW mapping, write protect it both 417 * If it's a COW mapping, write protect it both
403 * in the parent and the child 418 * in the parent and the child
@@ -528,7 +543,7 @@ int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
528 * readonly mappings. The tradeoff is that copy_page_range is more 543 * readonly mappings. The tradeoff is that copy_page_range is more
529 * efficient than faulting. 544 * efficient than faulting.
530 */ 545 */
531 if (!(vma->vm_flags & (VM_HUGETLB|VM_NONLINEAR|VM_RESERVED))) { 546 if (!(vma->vm_flags & (VM_HUGETLB|VM_NONLINEAR|VM_UNPAGED))) {
532 if (!vma->anon_vma) 547 if (!vma->anon_vma)
533 return 0; 548 return 0;
534 } 549 }
@@ -568,17 +583,20 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
568 continue; 583 continue;
569 } 584 }
570 if (pte_present(ptent)) { 585 if (pte_present(ptent)) {
571 struct page *page = NULL; 586 struct page *page;
587 unsigned long pfn;
572 588
573 (*zap_work) -= PAGE_SIZE; 589 (*zap_work) -= PAGE_SIZE;
574 590
575 if (!(vma->vm_flags & VM_RESERVED)) { 591 pfn = pte_pfn(ptent);
576 unsigned long pfn = pte_pfn(ptent); 592 page = pfn_valid(pfn)? pfn_to_page(pfn): NULL;
577 if (unlikely(!pfn_valid(pfn))) 593
578 print_bad_pte(vma, ptent, addr); 594 if (unlikely(vma->vm_flags & VM_UNPAGED)) {
579 else 595 if (!page_is_anon(page, vma, addr))
580 page = pfn_to_page(pfn); 596 page = NULL;
581 } 597 } else if (unlikely(!page))
598 print_bad_pte(vma, ptent, addr);
599
582 if (unlikely(details) && page) { 600 if (unlikely(details) && page) {
583 /* 601 /*
584 * unmap_shared_mapping_pages() wants to 602 * unmap_shared_mapping_pages() wants to
@@ -968,7 +986,7 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
968 continue; 986 continue;
969 } 987 }
970 988
971 if (!vma || (vma->vm_flags & (VM_IO | VM_RESERVED)) 989 if (!vma || (vma->vm_flags & VM_IO)
972 || !(vm_flags & vma->vm_flags)) 990 || !(vm_flags & vma->vm_flags))
973 return i ? : -EFAULT; 991 return i ? : -EFAULT;
974 992
@@ -1191,10 +1209,16 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr,
1191 * rest of the world about it: 1209 * rest of the world about it:
1192 * VM_IO tells people not to look at these pages 1210 * VM_IO tells people not to look at these pages
1193 * (accesses can have side effects). 1211 * (accesses can have side effects).
1194 * VM_RESERVED tells the core MM not to "manage" these pages 1212 * VM_RESERVED is specified all over the place, because
1195 * (e.g. refcount, mapcount, try to swap them out). 1213 * in 2.4 it kept swapout's vma scan off this vma; but
1214 * in 2.6 the LRU scan won't even find its pages, so this
1215 * flag means no more than count its pages in reserved_vm,
1216 * and omit it from core dump, even when VM_IO turned off.
1217 * VM_UNPAGED tells the core MM not to "manage" these pages
1218 * (e.g. refcount, mapcount, try to swap them out): in
1219 * particular, zap_pte_range does not try to free them.
1196 */ 1220 */
1197 vma->vm_flags |= VM_IO | VM_RESERVED; 1221 vma->vm_flags |= VM_IO | VM_RESERVED | VM_UNPAGED;
1198 1222
1199 BUG_ON(addr >= end); 1223 BUG_ON(addr >= end);
1200 pfn -= addr >> PAGE_SHIFT; 1224 pfn -= addr >> PAGE_SHIFT;
@@ -1271,22 +1295,29 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
1271 unsigned long address, pte_t *page_table, pmd_t *pmd, 1295 unsigned long address, pte_t *page_table, pmd_t *pmd,
1272 spinlock_t *ptl, pte_t orig_pte) 1296 spinlock_t *ptl, pte_t orig_pte)
1273{ 1297{
1274 struct page *old_page, *new_page; 1298 struct page *old_page, *src_page, *new_page;
1275 unsigned long pfn = pte_pfn(orig_pte); 1299 unsigned long pfn = pte_pfn(orig_pte);
1276 pte_t entry; 1300 pte_t entry;
1277 int ret = VM_FAULT_MINOR; 1301 int ret = VM_FAULT_MINOR;
1278 1302
1279 BUG_ON(vma->vm_flags & VM_RESERVED);
1280
1281 if (unlikely(!pfn_valid(pfn))) { 1303 if (unlikely(!pfn_valid(pfn))) {
1282 /* 1304 /*
1283 * Page table corrupted: show pte and kill process. 1305 * Page table corrupted: show pte and kill process.
1306 * Or it's an attempt to COW an out-of-map VM_UNPAGED
1307 * entry, which copy_user_highpage does not support.
1284 */ 1308 */
1285 print_bad_pte(vma, orig_pte, address); 1309 print_bad_pte(vma, orig_pte, address);
1286 ret = VM_FAULT_OOM; 1310 ret = VM_FAULT_OOM;
1287 goto unlock; 1311 goto unlock;
1288 } 1312 }
1289 old_page = pfn_to_page(pfn); 1313 old_page = pfn_to_page(pfn);
1314 src_page = old_page;
1315
1316 if (unlikely(vma->vm_flags & VM_UNPAGED))
1317 if (!page_is_anon(old_page, vma, address)) {
1318 old_page = NULL;
1319 goto gotten;
1320 }
1290 1321
1291 if (PageAnon(old_page) && !TestSetPageLocked(old_page)) { 1322 if (PageAnon(old_page) && !TestSetPageLocked(old_page)) {
1292 int reuse = can_share_swap_page(old_page); 1323 int reuse = can_share_swap_page(old_page);
@@ -1307,11 +1338,12 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
1307 * Ok, we need to copy. Oh, well.. 1338 * Ok, we need to copy. Oh, well..
1308 */ 1339 */
1309 page_cache_get(old_page); 1340 page_cache_get(old_page);
1341gotten:
1310 pte_unmap_unlock(page_table, ptl); 1342 pte_unmap_unlock(page_table, ptl);
1311 1343
1312 if (unlikely(anon_vma_prepare(vma))) 1344 if (unlikely(anon_vma_prepare(vma)))
1313 goto oom; 1345 goto oom;
1314 if (old_page == ZERO_PAGE(address)) { 1346 if (src_page == ZERO_PAGE(address)) {
1315 new_page = alloc_zeroed_user_highpage(vma, address); 1347 new_page = alloc_zeroed_user_highpage(vma, address);
1316 if (!new_page) 1348 if (!new_page)
1317 goto oom; 1349 goto oom;
@@ -1319,7 +1351,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
1319 new_page = alloc_page_vma(GFP_HIGHUSER, vma, address); 1351 new_page = alloc_page_vma(GFP_HIGHUSER, vma, address);
1320 if (!new_page) 1352 if (!new_page)
1321 goto oom; 1353 goto oom;
1322 copy_user_highpage(new_page, old_page, address); 1354 copy_user_highpage(new_page, src_page, address);
1323 } 1355 }
1324 1356
1325 /* 1357 /*
@@ -1327,11 +1359,14 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
1327 */ 1359 */
1328 page_table = pte_offset_map_lock(mm, pmd, address, &ptl); 1360 page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
1329 if (likely(pte_same(*page_table, orig_pte))) { 1361 if (likely(pte_same(*page_table, orig_pte))) {
1330 page_remove_rmap(old_page); 1362 if (old_page) {
1331 if (!PageAnon(old_page)) { 1363 page_remove_rmap(old_page);
1364 if (!PageAnon(old_page)) {
1365 dec_mm_counter(mm, file_rss);
1366 inc_mm_counter(mm, anon_rss);
1367 }
1368 } else
1332 inc_mm_counter(mm, anon_rss); 1369 inc_mm_counter(mm, anon_rss);
1333 dec_mm_counter(mm, file_rss);
1334 }
1335 flush_cache_page(vma, address, pfn); 1370 flush_cache_page(vma, address, pfn);
1336 entry = mk_pte(new_page, vma->vm_page_prot); 1371 entry = mk_pte(new_page, vma->vm_page_prot);
1337 entry = maybe_mkwrite(pte_mkdirty(entry), vma); 1372 entry = maybe_mkwrite(pte_mkdirty(entry), vma);
@@ -1345,13 +1380,16 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
1345 new_page = old_page; 1380 new_page = old_page;
1346 ret |= VM_FAULT_WRITE; 1381 ret |= VM_FAULT_WRITE;
1347 } 1382 }
1348 page_cache_release(new_page); 1383 if (new_page)
1349 page_cache_release(old_page); 1384 page_cache_release(new_page);
1385 if (old_page)
1386 page_cache_release(old_page);
1350unlock: 1387unlock:
1351 pte_unmap_unlock(page_table, ptl); 1388 pte_unmap_unlock(page_table, ptl);
1352 return ret; 1389 return ret;
1353oom: 1390oom:
1354 page_cache_release(old_page); 1391 if (old_page)
1392 page_cache_release(old_page);
1355 return VM_FAULT_OOM; 1393 return VM_FAULT_OOM;
1356} 1394}
1357 1395
@@ -1774,7 +1812,16 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
1774 spinlock_t *ptl; 1812 spinlock_t *ptl;
1775 pte_t entry; 1813 pte_t entry;
1776 1814
1777 if (write_access) { 1815 /*
1816 * A VM_UNPAGED vma will normally be filled with present ptes
1817 * by remap_pfn_range, and never arrive here; but it might have
1818 * holes, or if !VM_DONTEXPAND, mremap might have expanded it.
1819 * It's weird enough handling anon pages in unpaged vmas, we do
1820 * not want to worry about ZERO_PAGEs too (it may or may not
1821 * matter if their counts wrap): just give them anon pages.
1822 */
1823
1824 if (write_access || (vma->vm_flags & VM_UNPAGED)) {
1778 /* Allocate our own private page. */ 1825 /* Allocate our own private page. */
1779 pte_unmap(page_table); 1826 pte_unmap(page_table);
1780 1827
@@ -1849,6 +1896,7 @@ static int do_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
1849 int anon = 0; 1896 int anon = 0;
1850 1897
1851 pte_unmap(page_table); 1898 pte_unmap(page_table);
1899 BUG_ON(vma->vm_flags & VM_UNPAGED);
1852 1900
1853 if (vma->vm_file) { 1901 if (vma->vm_file) {
1854 mapping = vma->vm_file->f_mapping; 1902 mapping = vma->vm_file->f_mapping;
@@ -1924,7 +1972,7 @@ retry:
1924 inc_mm_counter(mm, anon_rss); 1972 inc_mm_counter(mm, anon_rss);
1925 lru_cache_add_active(new_page); 1973 lru_cache_add_active(new_page);
1926 page_add_anon_rmap(new_page, vma, address); 1974 page_add_anon_rmap(new_page, vma, address);
1927 } else if (!(vma->vm_flags & VM_RESERVED)) { 1975 } else {
1928 inc_mm_counter(mm, file_rss); 1976 inc_mm_counter(mm, file_rss);
1929 page_add_file_rmap(new_page); 1977 page_add_file_rmap(new_page);
1930 } 1978 }
@@ -2203,7 +2251,7 @@ static int __init gate_vma_init(void)
2203 gate_vma.vm_start = FIXADDR_USER_START; 2251 gate_vma.vm_start = FIXADDR_USER_START;
2204 gate_vma.vm_end = FIXADDR_USER_END; 2252 gate_vma.vm_end = FIXADDR_USER_END;
2205 gate_vma.vm_page_prot = PAGE_READONLY; 2253 gate_vma.vm_page_prot = PAGE_READONLY;
2206 gate_vma.vm_flags = VM_RESERVED; 2254 gate_vma.vm_flags = 0;
2207 return 0; 2255 return 0;
2208} 2256}
2209__initcall(gate_vma_init); 2257__initcall(gate_vma_init);
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 5abc57c2b8bd..5609a31bdf22 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -269,7 +269,7 @@ check_range(struct mm_struct *mm, unsigned long start, unsigned long end,
269 first = find_vma(mm, start); 269 first = find_vma(mm, start);
270 if (!first) 270 if (!first)
271 return ERR_PTR(-EFAULT); 271 return ERR_PTR(-EFAULT);
272 if (first->vm_flags & VM_RESERVED) 272 if (first->vm_flags & VM_UNPAGED)
273 return ERR_PTR(-EACCES); 273 return ERR_PTR(-EACCES);
274 prev = NULL; 274 prev = NULL;
275 for (vma = first; vma && vma->vm_start < end; vma = vma->vm_next) { 275 for (vma = first; vma && vma->vm_start < end; vma = vma->vm_next) {
diff --git a/mm/mmap.c b/mm/mmap.c
index 4f8def03428c..11ca5927d5ff 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1076,17 +1076,6 @@ munmap_back:
1076 error = file->f_op->mmap(file, vma); 1076 error = file->f_op->mmap(file, vma);
1077 if (error) 1077 if (error)
1078 goto unmap_and_free_vma; 1078 goto unmap_and_free_vma;
1079 if ((vma->vm_flags & (VM_SHARED | VM_WRITE | VM_RESERVED))
1080 == (VM_WRITE | VM_RESERVED)) {
1081 printk(KERN_WARNING "program %s is using MAP_PRIVATE, "
1082 "PROT_WRITE mmap of VM_RESERVED memory, which "
1083 "is deprecated. Please report this to "
1084 "linux-kernel@vger.kernel.org\n",current->comm);
1085 if (vma->vm_ops && vma->vm_ops->close)
1086 vma->vm_ops->close(vma);
1087 error = -EACCES;
1088 goto unmap_and_free_vma;
1089 }
1090 } else if (vm_flags & VM_SHARED) { 1079 } else if (vm_flags & VM_SHARED) {
1091 error = shmem_zero_setup(vma); 1080 error = shmem_zero_setup(vma);
1092 if (error) 1081 if (error)
diff --git a/mm/mprotect.c b/mm/mprotect.c
index 17a2b52b753b..653b8571c1ed 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -124,14 +124,6 @@ mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev,
124 * a MAP_NORESERVE private mapping to writable will now reserve. 124 * a MAP_NORESERVE private mapping to writable will now reserve.
125 */ 125 */
126 if (newflags & VM_WRITE) { 126 if (newflags & VM_WRITE) {
127 if (oldflags & VM_RESERVED) {
128 BUG_ON(oldflags & VM_WRITE);
129 printk(KERN_WARNING "program %s is using MAP_PRIVATE, "
130 "PROT_WRITE mprotect of VM_RESERVED memory, "
131 "which is deprecated. Please report this to "
132 "linux-kernel@vger.kernel.org\n",current->comm);
133 return -EACCES;
134 }
135 if (!(oldflags & (VM_ACCOUNT|VM_WRITE|VM_SHARED|VM_HUGETLB))) { 127 if (!(oldflags & (VM_ACCOUNT|VM_WRITE|VM_SHARED|VM_HUGETLB))) {
136 charged = nrpages; 128 charged = nrpages;
137 if (security_vm_enough_memory(charged)) 129 if (security_vm_enough_memory(charged))
diff --git a/mm/msync.c b/mm/msync.c
index 0e040e9c39d8..b3f4caf3010b 100644
--- a/mm/msync.c
+++ b/mm/msync.c
@@ -97,9 +97,9 @@ static void msync_page_range(struct vm_area_struct *vma,
97 /* For hugepages we can't go walking the page table normally, 97 /* For hugepages we can't go walking the page table normally,
98 * but that's ok, hugetlbfs is memory based, so we don't need 98 * but that's ok, hugetlbfs is memory based, so we don't need
99 * to do anything more on an msync(). 99 * to do anything more on an msync().
100 * Can't do anything with VM_RESERVED regions either. 100 * Can't do anything with VM_UNPAGED regions either.
101 */ 101 */
102 if (vma->vm_flags & (VM_HUGETLB|VM_RESERVED)) 102 if (vma->vm_flags & (VM_HUGETLB|VM_UNPAGED))
103 return; 103 return;
104 104
105 BUG_ON(addr >= end); 105 BUG_ON(addr >= end);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index bd4de592dc23..1731236dec35 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -140,18 +140,13 @@ static void bad_page(const char *function, struct page *page)
140 1 << PG_reclaim | 140 1 << PG_reclaim |
141 1 << PG_slab | 141 1 << PG_slab |
142 1 << PG_swapcache | 142 1 << PG_swapcache |
143 1 << PG_writeback | 143 1 << PG_writeback );
144 1 << PG_reserved );
145 set_page_count(page, 0); 144 set_page_count(page, 0);
146 reset_page_mapcount(page); 145 reset_page_mapcount(page);
147 page->mapping = NULL; 146 page->mapping = NULL;
148 add_taint(TAINT_BAD_PAGE); 147 add_taint(TAINT_BAD_PAGE);
149} 148}
150 149
151#ifndef CONFIG_HUGETLB_PAGE
152#define prep_compound_page(page, order) do { } while (0)
153#define destroy_compound_page(page, order) do { } while (0)
154#else
155/* 150/*
156 * Higher-order pages are called "compound pages". They are structured thusly: 151 * Higher-order pages are called "compound pages". They are structured thusly:
157 * 152 *
@@ -205,7 +200,6 @@ static void destroy_compound_page(struct page *page, unsigned long order)
205 ClearPageCompound(p); 200 ClearPageCompound(p);
206 } 201 }
207} 202}
208#endif /* CONFIG_HUGETLB_PAGE */
209 203
210/* 204/*
211 * function for dealing with page's order in buddy system. 205 * function for dealing with page's order in buddy system.
@@ -340,7 +334,7 @@ static inline void __free_pages_bulk (struct page *page,
340 zone->free_area[order].nr_free++; 334 zone->free_area[order].nr_free++;
341} 335}
342 336
343static inline void free_pages_check(const char *function, struct page *page) 337static inline int free_pages_check(const char *function, struct page *page)
344{ 338{
345 if ( page_mapcount(page) || 339 if ( page_mapcount(page) ||
346 page->mapping != NULL || 340 page->mapping != NULL ||
@@ -358,6 +352,12 @@ static inline void free_pages_check(const char *function, struct page *page)
358 bad_page(function, page); 352 bad_page(function, page);
359 if (PageDirty(page)) 353 if (PageDirty(page))
360 __ClearPageDirty(page); 354 __ClearPageDirty(page);
355 /*
356 * For now, we report if PG_reserved was found set, but do not
357 * clear it, and do not free the page. But we shall soon need
358 * to do more, for when the ZERO_PAGE count wraps negative.
359 */
360 return PageReserved(page);
361} 361}
362 362
363/* 363/*
@@ -397,11 +397,10 @@ void __free_pages_ok(struct page *page, unsigned int order)
397{ 397{
398 LIST_HEAD(list); 398 LIST_HEAD(list);
399 int i; 399 int i;
400 int reserved = 0;
400 401
401 arch_free_page(page, order); 402 arch_free_page(page, order);
402 403
403 mod_page_state(pgfree, 1 << order);
404
405#ifndef CONFIG_MMU 404#ifndef CONFIG_MMU
406 if (order > 0) 405 if (order > 0)
407 for (i = 1 ; i < (1 << order) ; ++i) 406 for (i = 1 ; i < (1 << order) ; ++i)
@@ -409,8 +408,12 @@ void __free_pages_ok(struct page *page, unsigned int order)
409#endif 408#endif
410 409
411 for (i = 0 ; i < (1 << order) ; ++i) 410 for (i = 0 ; i < (1 << order) ; ++i)
412 free_pages_check(__FUNCTION__, page + i); 411 reserved += free_pages_check(__FUNCTION__, page + i);
412 if (reserved)
413 return;
414
413 list_add(&page->lru, &list); 415 list_add(&page->lru, &list);
416 mod_page_state(pgfree, 1 << order);
414 kernel_map_pages(page, 1<<order, 0); 417 kernel_map_pages(page, 1<<order, 0);
415 free_pages_bulk(page_zone(page), 1, &list, order); 418 free_pages_bulk(page_zone(page), 1, &list, order);
416} 419}
@@ -468,7 +471,7 @@ void set_page_refs(struct page *page, int order)
468/* 471/*
469 * This page is about to be returned from the page allocator 472 * This page is about to be returned from the page allocator
470 */ 473 */
471static void prep_new_page(struct page *page, int order) 474static int prep_new_page(struct page *page, int order)
472{ 475{
473 if ( page_mapcount(page) || 476 if ( page_mapcount(page) ||
474 page->mapping != NULL || 477 page->mapping != NULL ||
@@ -486,12 +489,20 @@ static void prep_new_page(struct page *page, int order)
486 1 << PG_reserved ))) 489 1 << PG_reserved )))
487 bad_page(__FUNCTION__, page); 490 bad_page(__FUNCTION__, page);
488 491
492 /*
493 * For now, we report if PG_reserved was found set, but do not
494 * clear it, and do not allocate the page: as a safety net.
495 */
496 if (PageReserved(page))
497 return 1;
498
489 page->flags &= ~(1 << PG_uptodate | 1 << PG_error | 499 page->flags &= ~(1 << PG_uptodate | 1 << PG_error |
490 1 << PG_referenced | 1 << PG_arch_1 | 500 1 << PG_referenced | 1 << PG_arch_1 |
491 1 << PG_checked | 1 << PG_mappedtodisk); 501 1 << PG_checked | 1 << PG_mappedtodisk);
492 set_page_private(page, 0); 502 set_page_private(page, 0);
493 set_page_refs(page, order); 503 set_page_refs(page, order);
494 kernel_map_pages(page, 1 << order, 1); 504 kernel_map_pages(page, 1 << order, 1);
505 return 0;
495} 506}
496 507
497/* 508/*
@@ -674,11 +685,14 @@ static void fastcall free_hot_cold_page(struct page *page, int cold)
674 685
675 arch_free_page(page, 0); 686 arch_free_page(page, 0);
676 687
677 kernel_map_pages(page, 1, 0);
678 inc_page_state(pgfree);
679 if (PageAnon(page)) 688 if (PageAnon(page))
680 page->mapping = NULL; 689 page->mapping = NULL;
681 free_pages_check(__FUNCTION__, page); 690 if (free_pages_check(__FUNCTION__, page))
691 return;
692
693 inc_page_state(pgfree);
694 kernel_map_pages(page, 1, 0);
695
682 pcp = &zone_pcp(zone, get_cpu())->pcp[cold]; 696 pcp = &zone_pcp(zone, get_cpu())->pcp[cold];
683 local_irq_save(flags); 697 local_irq_save(flags);
684 list_add(&page->lru, &pcp->list); 698 list_add(&page->lru, &pcp->list);
@@ -717,12 +731,14 @@ static struct page *
717buffered_rmqueue(struct zone *zone, int order, gfp_t gfp_flags) 731buffered_rmqueue(struct zone *zone, int order, gfp_t gfp_flags)
718{ 732{
719 unsigned long flags; 733 unsigned long flags;
720 struct page *page = NULL; 734 struct page *page;
721 int cold = !!(gfp_flags & __GFP_COLD); 735 int cold = !!(gfp_flags & __GFP_COLD);
722 736
737again:
723 if (order == 0) { 738 if (order == 0) {
724 struct per_cpu_pages *pcp; 739 struct per_cpu_pages *pcp;
725 740
741 page = NULL;
726 pcp = &zone_pcp(zone, get_cpu())->pcp[cold]; 742 pcp = &zone_pcp(zone, get_cpu())->pcp[cold];
727 local_irq_save(flags); 743 local_irq_save(flags);
728 if (pcp->count <= pcp->low) 744 if (pcp->count <= pcp->low)
@@ -744,7 +760,8 @@ buffered_rmqueue(struct zone *zone, int order, gfp_t gfp_flags)
744 if (page != NULL) { 760 if (page != NULL) {
745 BUG_ON(bad_range(zone, page)); 761 BUG_ON(bad_range(zone, page));
746 mod_page_state_zone(zone, pgalloc, 1 << order); 762 mod_page_state_zone(zone, pgalloc, 1 << order);
747 prep_new_page(page, order); 763 if (prep_new_page(page, order))
764 goto again;
748 765
749 if (gfp_flags & __GFP_ZERO) 766 if (gfp_flags & __GFP_ZERO)
750 prep_zero_page(page, order, gfp_flags); 767 prep_zero_page(page, order, gfp_flags);
diff --git a/mm/rmap.c b/mm/rmap.c
index 914d04b98bee..2e034a0b89ab 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -225,7 +225,9 @@ vma_address(struct page *page, struct vm_area_struct *vma)
225 225
226/* 226/*
227 * At what user virtual address is page expected in vma? checking that the 227 * At what user virtual address is page expected in vma? checking that the
228 * page matches the vma: currently only used by unuse_process, on anon pages. 228 * page matches the vma: currently only used on anon pages, by unuse_vma;
229 * and by extraordinary checks on anon pages in VM_UNPAGED vmas, taking
230 * care that an mmap of /dev/mem might window free and foreign pages.
229 */ 231 */
230unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma) 232unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma)
231{ 233{
@@ -234,7 +236,8 @@ unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma)
234 (void *)page->mapping - PAGE_MAPPING_ANON) 236 (void *)page->mapping - PAGE_MAPPING_ANON)
235 return -EFAULT; 237 return -EFAULT;
236 } else if (page->mapping && !(vma->vm_flags & VM_NONLINEAR)) { 238 } else if (page->mapping && !(vma->vm_flags & VM_NONLINEAR)) {
237 if (vma->vm_file->f_mapping != page->mapping) 239 if (!vma->vm_file ||
240 vma->vm_file->f_mapping != page->mapping)
238 return -EFAULT; 241 return -EFAULT;
239 } else 242 } else
240 return -EFAULT; 243 return -EFAULT;
@@ -529,10 +532,8 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma)
529 * If the page is mlock()d, we cannot swap it out. 532 * If the page is mlock()d, we cannot swap it out.
530 * If it's recently referenced (perhaps page_referenced 533 * If it's recently referenced (perhaps page_referenced
531 * skipped over this mm) then we should reactivate it. 534 * skipped over this mm) then we should reactivate it.
532 *
533 * Pages belonging to VM_RESERVED regions should not happen here.
534 */ 535 */
535 if ((vma->vm_flags & (VM_LOCKED|VM_RESERVED)) || 536 if ((vma->vm_flags & VM_LOCKED) ||
536 ptep_clear_flush_young(vma, address, pte)) { 537 ptep_clear_flush_young(vma, address, pte)) {
537 ret = SWAP_FAIL; 538 ret = SWAP_FAIL;
538 goto out_unmap; 539 goto out_unmap;
@@ -727,7 +728,7 @@ static int try_to_unmap_file(struct page *page)
727 728
728 list_for_each_entry(vma, &mapping->i_mmap_nonlinear, 729 list_for_each_entry(vma, &mapping->i_mmap_nonlinear,
729 shared.vm_set.list) { 730 shared.vm_set.list) {
730 if (vma->vm_flags & (VM_LOCKED|VM_RESERVED)) 731 if (vma->vm_flags & VM_LOCKED)
731 continue; 732 continue;
732 cursor = (unsigned long) vma->vm_private_data; 733 cursor = (unsigned long) vma->vm_private_data;
733 if (cursor > max_nl_cursor) 734 if (cursor > max_nl_cursor)
@@ -761,7 +762,7 @@ static int try_to_unmap_file(struct page *page)
761 do { 762 do {
762 list_for_each_entry(vma, &mapping->i_mmap_nonlinear, 763 list_for_each_entry(vma, &mapping->i_mmap_nonlinear,
763 shared.vm_set.list) { 764 shared.vm_set.list) {
764 if (vma->vm_flags & (VM_LOCKED|VM_RESERVED)) 765 if (vma->vm_flags & VM_LOCKED)
765 continue; 766 continue;
766 cursor = (unsigned long) vma->vm_private_data; 767 cursor = (unsigned long) vma->vm_private_data;
767 while ( cursor < max_nl_cursor && 768 while ( cursor < max_nl_cursor &&
@@ -783,11 +784,8 @@ static int try_to_unmap_file(struct page *page)
783 * in locked vmas). Reset cursor on all unreserved nonlinear 784 * in locked vmas). Reset cursor on all unreserved nonlinear
784 * vmas, now forgetting on which ones it had fallen behind. 785 * vmas, now forgetting on which ones it had fallen behind.
785 */ 786 */
786 list_for_each_entry(vma, &mapping->i_mmap_nonlinear, 787 list_for_each_entry(vma, &mapping->i_mmap_nonlinear, shared.vm_set.list)
787 shared.vm_set.list) { 788 vma->vm_private_data = NULL;
788 if (!(vma->vm_flags & VM_RESERVED))
789 vma->vm_private_data = NULL;
790 }
791out: 789out:
792 spin_unlock(&mapping->i_mmap_lock); 790 spin_unlock(&mapping->i_mmap_lock);
793 return ret; 791 return ret;
diff --git a/mm/swap.c b/mm/swap.c
index d09cf7f03e76..73d351439ef6 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -34,8 +34,6 @@
34/* How many pages do we try to swap or page in/out together? */ 34/* How many pages do we try to swap or page in/out together? */
35int page_cluster; 35int page_cluster;
36 36
37#ifdef CONFIG_HUGETLB_PAGE
38
39void put_page(struct page *page) 37void put_page(struct page *page)
40{ 38{
41 if (unlikely(PageCompound(page))) { 39 if (unlikely(PageCompound(page))) {
@@ -52,7 +50,6 @@ void put_page(struct page *page)
52 __page_cache_release(page); 50 __page_cache_release(page);
53} 51}
54EXPORT_SYMBOL(put_page); 52EXPORT_SYMBOL(put_page);
55#endif
56 53
57/* 54/*
58 * Writeback is about to end against a page which has been marked for immediate 55 * Writeback is about to end against a page which has been marked for immediate
diff --git a/mm/truncate.c b/mm/truncate.c
index 29c18f68dc35..9173ab500604 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -282,8 +282,8 @@ int invalidate_inode_pages2_range(struct address_space *mapping,
282 * Zap the rest of the file in one hit. 282 * Zap the rest of the file in one hit.
283 */ 283 */
284 unmap_mapping_range(mapping, 284 unmap_mapping_range(mapping,
285 page_index << PAGE_CACHE_SHIFT, 285 (loff_t)page_index<<PAGE_CACHE_SHIFT,
286 (end - page_index + 1) 286 (loff_t)(end - page_index + 1)
287 << PAGE_CACHE_SHIFT, 287 << PAGE_CACHE_SHIFT,
288 0); 288 0);
289 did_range_unmap = 1; 289 did_range_unmap = 1;
@@ -292,7 +292,7 @@ int invalidate_inode_pages2_range(struct address_space *mapping,
292 * Just zap this page 292 * Just zap this page
293 */ 293 */
294 unmap_mapping_range(mapping, 294 unmap_mapping_range(mapping,
295 page_index << PAGE_CACHE_SHIFT, 295 (loff_t)page_index<<PAGE_CACHE_SHIFT,
296 PAGE_CACHE_SIZE, 0); 296 PAGE_CACHE_SIZE, 0);
297 } 297 }
298 } 298 }