aboutsummaryrefslogtreecommitdiffstats
path: root/mm/memory.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/memory.c')
-rw-r--r--mm/memory.c128
1 files changed, 88 insertions, 40 deletions
diff --git a/mm/memory.c b/mm/memory.c
index 2998cfc12f5b..d1f46f4e4c8a 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -334,7 +334,7 @@ static inline void add_mm_rss(struct mm_struct *mm, int file_rss, int anon_rss)
334 334
335/* 335/*
336 * This function is called to print an error when a pte in a 336 * This function is called to print an error when a pte in a
337 * !VM_RESERVED region is found pointing to an invalid pfn (which 337 * !VM_UNPAGED region is found pointing to an invalid pfn (which
338 * is an error. 338 * is an error.
339 * 339 *
340 * The calling function must still handle the error. 340 * The calling function must still handle the error.
@@ -350,6 +350,22 @@ void print_bad_pte(struct vm_area_struct *vma, pte_t pte, unsigned long vaddr)
350} 350}
351 351
352/* 352/*
353 * page_is_anon applies strict checks for an anonymous page belonging to
354 * this vma at this address. It is used on VM_UNPAGED vmas, which are
355 * usually populated with shared originals (which must not be counted),
356 * but occasionally contain private COWed copies (when !VM_SHARED, or
357 * perhaps via ptrace when VM_SHARED). An mmap of /dev/mem might window
358 * free pages, pages from other processes, or from other parts of this:
359 * it's tricky, but try not to be deceived by foreign anonymous pages.
360 */
361static inline int page_is_anon(struct page *page,
362 struct vm_area_struct *vma, unsigned long addr)
363{
364 return page && PageAnon(page) && page_mapped(page) &&
365 page_address_in_vma(page, vma) == addr;
366}
367
368/*
353 * copy one vm_area from one task to the other. Assumes the page tables 369 * copy one vm_area from one task to the other. Assumes the page tables
354 * already present in the new task to be cleared in the whole range 370 * already present in the new task to be cleared in the whole range
355 * covered by this vma. 371 * covered by this vma.
@@ -381,23 +397,22 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
381 goto out_set_pte; 397 goto out_set_pte;
382 } 398 }
383 399
384 /* If the region is VM_RESERVED, the mapping is not
385 * mapped via rmap - duplicate the pte as is.
386 */
387 if (vm_flags & VM_RESERVED)
388 goto out_set_pte;
389
390 pfn = pte_pfn(pte); 400 pfn = pte_pfn(pte);
391 /* If the pte points outside of valid memory but 401 page = pfn_valid(pfn)? pfn_to_page(pfn): NULL;
392 * the region is not VM_RESERVED, we have a problem. 402
403 if (unlikely(vm_flags & VM_UNPAGED))
404 if (!page_is_anon(page, vma, addr))
405 goto out_set_pte;
406
407 /*
408 * If the pte points outside of valid memory but
409 * the region is not VM_UNPAGED, we have a problem.
393 */ 410 */
394 if (unlikely(!pfn_valid(pfn))) { 411 if (unlikely(!page)) {
395 print_bad_pte(vma, pte, addr); 412 print_bad_pte(vma, pte, addr);
396 goto out_set_pte; /* try to do something sane */ 413 goto out_set_pte; /* try to do something sane */
397 } 414 }
398 415
399 page = pfn_to_page(pfn);
400
401 /* 416 /*
402 * If it's a COW mapping, write protect it both 417 * If it's a COW mapping, write protect it both
403 * in the parent and the child 418 * in the parent and the child
@@ -528,7 +543,7 @@ int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
528 * readonly mappings. The tradeoff is that copy_page_range is more 543 * readonly mappings. The tradeoff is that copy_page_range is more
529 * efficient than faulting. 544 * efficient than faulting.
530 */ 545 */
531 if (!(vma->vm_flags & (VM_HUGETLB|VM_NONLINEAR|VM_RESERVED))) { 546 if (!(vma->vm_flags & (VM_HUGETLB|VM_NONLINEAR|VM_UNPAGED))) {
532 if (!vma->anon_vma) 547 if (!vma->anon_vma)
533 return 0; 548 return 0;
534 } 549 }
@@ -568,17 +583,20 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
568 continue; 583 continue;
569 } 584 }
570 if (pte_present(ptent)) { 585 if (pte_present(ptent)) {
571 struct page *page = NULL; 586 struct page *page;
587 unsigned long pfn;
572 588
573 (*zap_work) -= PAGE_SIZE; 589 (*zap_work) -= PAGE_SIZE;
574 590
575 if (!(vma->vm_flags & VM_RESERVED)) { 591 pfn = pte_pfn(ptent);
576 unsigned long pfn = pte_pfn(ptent); 592 page = pfn_valid(pfn)? pfn_to_page(pfn): NULL;
577 if (unlikely(!pfn_valid(pfn))) 593
578 print_bad_pte(vma, ptent, addr); 594 if (unlikely(vma->vm_flags & VM_UNPAGED)) {
579 else 595 if (!page_is_anon(page, vma, addr))
580 page = pfn_to_page(pfn); 596 page = NULL;
581 } 597 } else if (unlikely(!page))
598 print_bad_pte(vma, ptent, addr);
599
582 if (unlikely(details) && page) { 600 if (unlikely(details) && page) {
583 /* 601 /*
584 * unmap_shared_mapping_pages() wants to 602 * unmap_shared_mapping_pages() wants to
@@ -968,7 +986,7 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
968 continue; 986 continue;
969 } 987 }
970 988
971 if (!vma || (vma->vm_flags & (VM_IO | VM_RESERVED)) 989 if (!vma || (vma->vm_flags & VM_IO)
972 || !(vm_flags & vma->vm_flags)) 990 || !(vm_flags & vma->vm_flags))
973 return i ? : -EFAULT; 991 return i ? : -EFAULT;
974 992
@@ -1191,10 +1209,16 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr,
1191 * rest of the world about it: 1209 * rest of the world about it:
1192 * VM_IO tells people not to look at these pages 1210 * VM_IO tells people not to look at these pages
1193 * (accesses can have side effects). 1211 * (accesses can have side effects).
1194 * VM_RESERVED tells the core MM not to "manage" these pages 1212 * VM_RESERVED is specified all over the place, because
1195 * (e.g. refcount, mapcount, try to swap them out). 1213 * in 2.4 it kept swapout's vma scan off this vma; but
1214 * in 2.6 the LRU scan won't even find its pages, so this
1215 * flag means no more than count its pages in reserved_vm,
1216 * and omit it from core dump, even when VM_IO turned off.
1217 * VM_UNPAGED tells the core MM not to "manage" these pages
1218 * (e.g. refcount, mapcount, try to swap them out): in
1219 * particular, zap_pte_range does not try to free them.
1196 */ 1220 */
1197 vma->vm_flags |= VM_IO | VM_RESERVED; 1221 vma->vm_flags |= VM_IO | VM_RESERVED | VM_UNPAGED;
1198 1222
1199 BUG_ON(addr >= end); 1223 BUG_ON(addr >= end);
1200 pfn -= addr >> PAGE_SHIFT; 1224 pfn -= addr >> PAGE_SHIFT;
@@ -1271,22 +1295,29 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
1271 unsigned long address, pte_t *page_table, pmd_t *pmd, 1295 unsigned long address, pte_t *page_table, pmd_t *pmd,
1272 spinlock_t *ptl, pte_t orig_pte) 1296 spinlock_t *ptl, pte_t orig_pte)
1273{ 1297{
1274 struct page *old_page, *new_page; 1298 struct page *old_page, *src_page, *new_page;
1275 unsigned long pfn = pte_pfn(orig_pte); 1299 unsigned long pfn = pte_pfn(orig_pte);
1276 pte_t entry; 1300 pte_t entry;
1277 int ret = VM_FAULT_MINOR; 1301 int ret = VM_FAULT_MINOR;
1278 1302
1279 BUG_ON(vma->vm_flags & VM_RESERVED);
1280
1281 if (unlikely(!pfn_valid(pfn))) { 1303 if (unlikely(!pfn_valid(pfn))) {
1282 /* 1304 /*
1283 * Page table corrupted: show pte and kill process. 1305 * Page table corrupted: show pte and kill process.
1306 * Or it's an attempt to COW an out-of-map VM_UNPAGED
1307 * entry, which copy_user_highpage does not support.
1284 */ 1308 */
1285 print_bad_pte(vma, orig_pte, address); 1309 print_bad_pte(vma, orig_pte, address);
1286 ret = VM_FAULT_OOM; 1310 ret = VM_FAULT_OOM;
1287 goto unlock; 1311 goto unlock;
1288 } 1312 }
1289 old_page = pfn_to_page(pfn); 1313 old_page = pfn_to_page(pfn);
1314 src_page = old_page;
1315
1316 if (unlikely(vma->vm_flags & VM_UNPAGED))
1317 if (!page_is_anon(old_page, vma, address)) {
1318 old_page = NULL;
1319 goto gotten;
1320 }
1290 1321
1291 if (PageAnon(old_page) && !TestSetPageLocked(old_page)) { 1322 if (PageAnon(old_page) && !TestSetPageLocked(old_page)) {
1292 int reuse = can_share_swap_page(old_page); 1323 int reuse = can_share_swap_page(old_page);
@@ -1307,11 +1338,12 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
1307 * Ok, we need to copy. Oh, well.. 1338 * Ok, we need to copy. Oh, well..
1308 */ 1339 */
1309 page_cache_get(old_page); 1340 page_cache_get(old_page);
1341gotten:
1310 pte_unmap_unlock(page_table, ptl); 1342 pte_unmap_unlock(page_table, ptl);
1311 1343
1312 if (unlikely(anon_vma_prepare(vma))) 1344 if (unlikely(anon_vma_prepare(vma)))
1313 goto oom; 1345 goto oom;
1314 if (old_page == ZERO_PAGE(address)) { 1346 if (src_page == ZERO_PAGE(address)) {
1315 new_page = alloc_zeroed_user_highpage(vma, address); 1347 new_page = alloc_zeroed_user_highpage(vma, address);
1316 if (!new_page) 1348 if (!new_page)
1317 goto oom; 1349 goto oom;
@@ -1319,7 +1351,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
1319 new_page = alloc_page_vma(GFP_HIGHUSER, vma, address); 1351 new_page = alloc_page_vma(GFP_HIGHUSER, vma, address);
1320 if (!new_page) 1352 if (!new_page)
1321 goto oom; 1353 goto oom;
1322 copy_user_highpage(new_page, old_page, address); 1354 copy_user_highpage(new_page, src_page, address);
1323 } 1355 }
1324 1356
1325 /* 1357 /*
@@ -1327,11 +1359,14 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
1327 */ 1359 */
1328 page_table = pte_offset_map_lock(mm, pmd, address, &ptl); 1360 page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
1329 if (likely(pte_same(*page_table, orig_pte))) { 1361 if (likely(pte_same(*page_table, orig_pte))) {
1330 page_remove_rmap(old_page); 1362 if (old_page) {
1331 if (!PageAnon(old_page)) { 1363 page_remove_rmap(old_page);
1364 if (!PageAnon(old_page)) {
1365 dec_mm_counter(mm, file_rss);
1366 inc_mm_counter(mm, anon_rss);
1367 }
1368 } else
1332 inc_mm_counter(mm, anon_rss); 1369 inc_mm_counter(mm, anon_rss);
1333 dec_mm_counter(mm, file_rss);
1334 }
1335 flush_cache_page(vma, address, pfn); 1370 flush_cache_page(vma, address, pfn);
1336 entry = mk_pte(new_page, vma->vm_page_prot); 1371 entry = mk_pte(new_page, vma->vm_page_prot);
1337 entry = maybe_mkwrite(pte_mkdirty(entry), vma); 1372 entry = maybe_mkwrite(pte_mkdirty(entry), vma);
@@ -1345,13 +1380,16 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
1345 new_page = old_page; 1380 new_page = old_page;
1346 ret |= VM_FAULT_WRITE; 1381 ret |= VM_FAULT_WRITE;
1347 } 1382 }
1348 page_cache_release(new_page); 1383 if (new_page)
1349 page_cache_release(old_page); 1384 page_cache_release(new_page);
1385 if (old_page)
1386 page_cache_release(old_page);
1350unlock: 1387unlock:
1351 pte_unmap_unlock(page_table, ptl); 1388 pte_unmap_unlock(page_table, ptl);
1352 return ret; 1389 return ret;
1353oom: 1390oom:
1354 page_cache_release(old_page); 1391 if (old_page)
1392 page_cache_release(old_page);
1355 return VM_FAULT_OOM; 1393 return VM_FAULT_OOM;
1356} 1394}
1357 1395
@@ -1774,7 +1812,16 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
1774 spinlock_t *ptl; 1812 spinlock_t *ptl;
1775 pte_t entry; 1813 pte_t entry;
1776 1814
1777 if (write_access) { 1815 /*
1816 * A VM_UNPAGED vma will normally be filled with present ptes
1817 * by remap_pfn_range, and never arrive here; but it might have
1818 * holes, or if !VM_DONTEXPAND, mremap might have expanded it.
1819 * It's weird enough handling anon pages in unpaged vmas, we do
1820 * not want to worry about ZERO_PAGEs too (it may or may not
1821 * matter if their counts wrap): just give them anon pages.
1822 */
1823
1824 if (write_access || (vma->vm_flags & VM_UNPAGED)) {
1778 /* Allocate our own private page. */ 1825 /* Allocate our own private page. */
1779 pte_unmap(page_table); 1826 pte_unmap(page_table);
1780 1827
@@ -1849,6 +1896,7 @@ static int do_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
1849 int anon = 0; 1896 int anon = 0;
1850 1897
1851 pte_unmap(page_table); 1898 pte_unmap(page_table);
1899 BUG_ON(vma->vm_flags & VM_UNPAGED);
1852 1900
1853 if (vma->vm_file) { 1901 if (vma->vm_file) {
1854 mapping = vma->vm_file->f_mapping; 1902 mapping = vma->vm_file->f_mapping;
@@ -1924,7 +1972,7 @@ retry:
1924 inc_mm_counter(mm, anon_rss); 1972 inc_mm_counter(mm, anon_rss);
1925 lru_cache_add_active(new_page); 1973 lru_cache_add_active(new_page);
1926 page_add_anon_rmap(new_page, vma, address); 1974 page_add_anon_rmap(new_page, vma, address);
1927 } else if (!(vma->vm_flags & VM_RESERVED)) { 1975 } else {
1928 inc_mm_counter(mm, file_rss); 1976 inc_mm_counter(mm, file_rss);
1929 page_add_file_rmap(new_page); 1977 page_add_file_rmap(new_page);
1930 } 1978 }
@@ -2203,7 +2251,7 @@ static int __init gate_vma_init(void)
2203 gate_vma.vm_start = FIXADDR_USER_START; 2251 gate_vma.vm_start = FIXADDR_USER_START;
2204 gate_vma.vm_end = FIXADDR_USER_END; 2252 gate_vma.vm_end = FIXADDR_USER_END;
2205 gate_vma.vm_page_prot = PAGE_READONLY; 2253 gate_vma.vm_page_prot = PAGE_READONLY;
2206 gate_vma.vm_flags = VM_RESERVED; 2254 gate_vma.vm_flags = 0;
2207 return 0; 2255 return 0;
2208} 2256}
2209__initcall(gate_vma_init); 2257__initcall(gate_vma_init);