diff options
Diffstat (limited to 'mm/memory.c')
-rw-r--r-- | mm/memory.c | 128 |
1 files changed, 88 insertions, 40 deletions
diff --git a/mm/memory.c b/mm/memory.c index 2998cfc12f5b..d1f46f4e4c8a 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -334,7 +334,7 @@ static inline void add_mm_rss(struct mm_struct *mm, int file_rss, int anon_rss) | |||
334 | 334 | ||
335 | /* | 335 | /* |
336 | * This function is called to print an error when a pte in a | 336 | * This function is called to print an error when a pte in a |
337 | * !VM_RESERVED region is found pointing to an invalid pfn (which | 337 | * !VM_UNPAGED region is found pointing to an invalid pfn (which |
338 | * is an error. | 338 | * is an error. |
339 | * | 339 | * |
340 | * The calling function must still handle the error. | 340 | * The calling function must still handle the error. |
@@ -350,6 +350,22 @@ void print_bad_pte(struct vm_area_struct *vma, pte_t pte, unsigned long vaddr) | |||
350 | } | 350 | } |
351 | 351 | ||
352 | /* | 352 | /* |
353 | * page_is_anon applies strict checks for an anonymous page belonging to | ||
354 | * this vma at this address. It is used on VM_UNPAGED vmas, which are | ||
355 | * usually populated with shared originals (which must not be counted), | ||
356 | * but occasionally contain private COWed copies (when !VM_SHARED, or | ||
357 | * perhaps via ptrace when VM_SHARED). An mmap of /dev/mem might window | ||
358 | * free pages, pages from other processes, or from other parts of this: | ||
359 | * it's tricky, but try not to be deceived by foreign anonymous pages. | ||
360 | */ | ||
361 | static inline int page_is_anon(struct page *page, | ||
362 | struct vm_area_struct *vma, unsigned long addr) | ||
363 | { | ||
364 | return page && PageAnon(page) && page_mapped(page) && | ||
365 | page_address_in_vma(page, vma) == addr; | ||
366 | } | ||
367 | |||
368 | /* | ||
353 | * copy one vm_area from one task to the other. Assumes the page tables | 369 | * copy one vm_area from one task to the other. Assumes the page tables |
354 | * already present in the new task to be cleared in the whole range | 370 | * already present in the new task to be cleared in the whole range |
355 | * covered by this vma. | 371 | * covered by this vma. |
@@ -381,23 +397,22 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, | |||
381 | goto out_set_pte; | 397 | goto out_set_pte; |
382 | } | 398 | } |
383 | 399 | ||
384 | /* If the region is VM_RESERVED, the mapping is not | ||
385 | * mapped via rmap - duplicate the pte as is. | ||
386 | */ | ||
387 | if (vm_flags & VM_RESERVED) | ||
388 | goto out_set_pte; | ||
389 | |||
390 | pfn = pte_pfn(pte); | 400 | pfn = pte_pfn(pte); |
391 | /* If the pte points outside of valid memory but | 401 | page = pfn_valid(pfn)? pfn_to_page(pfn): NULL; |
392 | * the region is not VM_RESERVED, we have a problem. | 402 | |
403 | if (unlikely(vm_flags & VM_UNPAGED)) | ||
404 | if (!page_is_anon(page, vma, addr)) | ||
405 | goto out_set_pte; | ||
406 | |||
407 | /* | ||
408 | * If the pte points outside of valid memory but | ||
409 | * the region is not VM_UNPAGED, we have a problem. | ||
393 | */ | 410 | */ |
394 | if (unlikely(!pfn_valid(pfn))) { | 411 | if (unlikely(!page)) { |
395 | print_bad_pte(vma, pte, addr); | 412 | print_bad_pte(vma, pte, addr); |
396 | goto out_set_pte; /* try to do something sane */ | 413 | goto out_set_pte; /* try to do something sane */ |
397 | } | 414 | } |
398 | 415 | ||
399 | page = pfn_to_page(pfn); | ||
400 | |||
401 | /* | 416 | /* |
402 | * If it's a COW mapping, write protect it both | 417 | * If it's a COW mapping, write protect it both |
403 | * in the parent and the child | 418 | * in the parent and the child |
@@ -528,7 +543,7 @@ int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, | |||
528 | * readonly mappings. The tradeoff is that copy_page_range is more | 543 | * readonly mappings. The tradeoff is that copy_page_range is more |
529 | * efficient than faulting. | 544 | * efficient than faulting. |
530 | */ | 545 | */ |
531 | if (!(vma->vm_flags & (VM_HUGETLB|VM_NONLINEAR|VM_RESERVED))) { | 546 | if (!(vma->vm_flags & (VM_HUGETLB|VM_NONLINEAR|VM_UNPAGED))) { |
532 | if (!vma->anon_vma) | 547 | if (!vma->anon_vma) |
533 | return 0; | 548 | return 0; |
534 | } | 549 | } |
@@ -568,17 +583,20 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb, | |||
568 | continue; | 583 | continue; |
569 | } | 584 | } |
570 | if (pte_present(ptent)) { | 585 | if (pte_present(ptent)) { |
571 | struct page *page = NULL; | 586 | struct page *page; |
587 | unsigned long pfn; | ||
572 | 588 | ||
573 | (*zap_work) -= PAGE_SIZE; | 589 | (*zap_work) -= PAGE_SIZE; |
574 | 590 | ||
575 | if (!(vma->vm_flags & VM_RESERVED)) { | 591 | pfn = pte_pfn(ptent); |
576 | unsigned long pfn = pte_pfn(ptent); | 592 | page = pfn_valid(pfn)? pfn_to_page(pfn): NULL; |
577 | if (unlikely(!pfn_valid(pfn))) | 593 | |
578 | print_bad_pte(vma, ptent, addr); | 594 | if (unlikely(vma->vm_flags & VM_UNPAGED)) { |
579 | else | 595 | if (!page_is_anon(page, vma, addr)) |
580 | page = pfn_to_page(pfn); | 596 | page = NULL; |
581 | } | 597 | } else if (unlikely(!page)) |
598 | print_bad_pte(vma, ptent, addr); | ||
599 | |||
582 | if (unlikely(details) && page) { | 600 | if (unlikely(details) && page) { |
583 | /* | 601 | /* |
584 | * unmap_shared_mapping_pages() wants to | 602 | * unmap_shared_mapping_pages() wants to |
@@ -968,7 +986,7 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, | |||
968 | continue; | 986 | continue; |
969 | } | 987 | } |
970 | 988 | ||
971 | if (!vma || (vma->vm_flags & (VM_IO | VM_RESERVED)) | 989 | if (!vma || (vma->vm_flags & VM_IO) |
972 | || !(vm_flags & vma->vm_flags)) | 990 | || !(vm_flags & vma->vm_flags)) |
973 | return i ? : -EFAULT; | 991 | return i ? : -EFAULT; |
974 | 992 | ||
@@ -1191,10 +1209,16 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr, | |||
1191 | * rest of the world about it: | 1209 | * rest of the world about it: |
1192 | * VM_IO tells people not to look at these pages | 1210 | * VM_IO tells people not to look at these pages |
1193 | * (accesses can have side effects). | 1211 | * (accesses can have side effects). |
1194 | * VM_RESERVED tells the core MM not to "manage" these pages | 1212 | * VM_RESERVED is specified all over the place, because |
1195 | * (e.g. refcount, mapcount, try to swap them out). | 1213 | * in 2.4 it kept swapout's vma scan off this vma; but |
1214 | * in 2.6 the LRU scan won't even find its pages, so this | ||
1215 | * flag means no more than count its pages in reserved_vm, | ||
1216 | * and omit it from core dump, even when VM_IO turned off. | ||
1217 | * VM_UNPAGED tells the core MM not to "manage" these pages | ||
1218 | * (e.g. refcount, mapcount, try to swap them out): in | ||
1219 | * particular, zap_pte_range does not try to free them. | ||
1196 | */ | 1220 | */ |
1197 | vma->vm_flags |= VM_IO | VM_RESERVED; | 1221 | vma->vm_flags |= VM_IO | VM_RESERVED | VM_UNPAGED; |
1198 | 1222 | ||
1199 | BUG_ON(addr >= end); | 1223 | BUG_ON(addr >= end); |
1200 | pfn -= addr >> PAGE_SHIFT; | 1224 | pfn -= addr >> PAGE_SHIFT; |
@@ -1271,22 +1295,29 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
1271 | unsigned long address, pte_t *page_table, pmd_t *pmd, | 1295 | unsigned long address, pte_t *page_table, pmd_t *pmd, |
1272 | spinlock_t *ptl, pte_t orig_pte) | 1296 | spinlock_t *ptl, pte_t orig_pte) |
1273 | { | 1297 | { |
1274 | struct page *old_page, *new_page; | 1298 | struct page *old_page, *src_page, *new_page; |
1275 | unsigned long pfn = pte_pfn(orig_pte); | 1299 | unsigned long pfn = pte_pfn(orig_pte); |
1276 | pte_t entry; | 1300 | pte_t entry; |
1277 | int ret = VM_FAULT_MINOR; | 1301 | int ret = VM_FAULT_MINOR; |
1278 | 1302 | ||
1279 | BUG_ON(vma->vm_flags & VM_RESERVED); | ||
1280 | |||
1281 | if (unlikely(!pfn_valid(pfn))) { | 1303 | if (unlikely(!pfn_valid(pfn))) { |
1282 | /* | 1304 | /* |
1283 | * Page table corrupted: show pte and kill process. | 1305 | * Page table corrupted: show pte and kill process. |
1306 | * Or it's an attempt to COW an out-of-map VM_UNPAGED | ||
1307 | * entry, which copy_user_highpage does not support. | ||
1284 | */ | 1308 | */ |
1285 | print_bad_pte(vma, orig_pte, address); | 1309 | print_bad_pte(vma, orig_pte, address); |
1286 | ret = VM_FAULT_OOM; | 1310 | ret = VM_FAULT_OOM; |
1287 | goto unlock; | 1311 | goto unlock; |
1288 | } | 1312 | } |
1289 | old_page = pfn_to_page(pfn); | 1313 | old_page = pfn_to_page(pfn); |
1314 | src_page = old_page; | ||
1315 | |||
1316 | if (unlikely(vma->vm_flags & VM_UNPAGED)) | ||
1317 | if (!page_is_anon(old_page, vma, address)) { | ||
1318 | old_page = NULL; | ||
1319 | goto gotten; | ||
1320 | } | ||
1290 | 1321 | ||
1291 | if (PageAnon(old_page) && !TestSetPageLocked(old_page)) { | 1322 | if (PageAnon(old_page) && !TestSetPageLocked(old_page)) { |
1292 | int reuse = can_share_swap_page(old_page); | 1323 | int reuse = can_share_swap_page(old_page); |
@@ -1307,11 +1338,12 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
1307 | * Ok, we need to copy. Oh, well.. | 1338 | * Ok, we need to copy. Oh, well.. |
1308 | */ | 1339 | */ |
1309 | page_cache_get(old_page); | 1340 | page_cache_get(old_page); |
1341 | gotten: | ||
1310 | pte_unmap_unlock(page_table, ptl); | 1342 | pte_unmap_unlock(page_table, ptl); |
1311 | 1343 | ||
1312 | if (unlikely(anon_vma_prepare(vma))) | 1344 | if (unlikely(anon_vma_prepare(vma))) |
1313 | goto oom; | 1345 | goto oom; |
1314 | if (old_page == ZERO_PAGE(address)) { | 1346 | if (src_page == ZERO_PAGE(address)) { |
1315 | new_page = alloc_zeroed_user_highpage(vma, address); | 1347 | new_page = alloc_zeroed_user_highpage(vma, address); |
1316 | if (!new_page) | 1348 | if (!new_page) |
1317 | goto oom; | 1349 | goto oom; |
@@ -1319,7 +1351,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
1319 | new_page = alloc_page_vma(GFP_HIGHUSER, vma, address); | 1351 | new_page = alloc_page_vma(GFP_HIGHUSER, vma, address); |
1320 | if (!new_page) | 1352 | if (!new_page) |
1321 | goto oom; | 1353 | goto oom; |
1322 | copy_user_highpage(new_page, old_page, address); | 1354 | copy_user_highpage(new_page, src_page, address); |
1323 | } | 1355 | } |
1324 | 1356 | ||
1325 | /* | 1357 | /* |
@@ -1327,11 +1359,14 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
1327 | */ | 1359 | */ |
1328 | page_table = pte_offset_map_lock(mm, pmd, address, &ptl); | 1360 | page_table = pte_offset_map_lock(mm, pmd, address, &ptl); |
1329 | if (likely(pte_same(*page_table, orig_pte))) { | 1361 | if (likely(pte_same(*page_table, orig_pte))) { |
1330 | page_remove_rmap(old_page); | 1362 | if (old_page) { |
1331 | if (!PageAnon(old_page)) { | 1363 | page_remove_rmap(old_page); |
1364 | if (!PageAnon(old_page)) { | ||
1365 | dec_mm_counter(mm, file_rss); | ||
1366 | inc_mm_counter(mm, anon_rss); | ||
1367 | } | ||
1368 | } else | ||
1332 | inc_mm_counter(mm, anon_rss); | 1369 | inc_mm_counter(mm, anon_rss); |
1333 | dec_mm_counter(mm, file_rss); | ||
1334 | } | ||
1335 | flush_cache_page(vma, address, pfn); | 1370 | flush_cache_page(vma, address, pfn); |
1336 | entry = mk_pte(new_page, vma->vm_page_prot); | 1371 | entry = mk_pte(new_page, vma->vm_page_prot); |
1337 | entry = maybe_mkwrite(pte_mkdirty(entry), vma); | 1372 | entry = maybe_mkwrite(pte_mkdirty(entry), vma); |
@@ -1345,13 +1380,16 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
1345 | new_page = old_page; | 1380 | new_page = old_page; |
1346 | ret |= VM_FAULT_WRITE; | 1381 | ret |= VM_FAULT_WRITE; |
1347 | } | 1382 | } |
1348 | page_cache_release(new_page); | 1383 | if (new_page) |
1349 | page_cache_release(old_page); | 1384 | page_cache_release(new_page); |
1385 | if (old_page) | ||
1386 | page_cache_release(old_page); | ||
1350 | unlock: | 1387 | unlock: |
1351 | pte_unmap_unlock(page_table, ptl); | 1388 | pte_unmap_unlock(page_table, ptl); |
1352 | return ret; | 1389 | return ret; |
1353 | oom: | 1390 | oom: |
1354 | page_cache_release(old_page); | 1391 | if (old_page) |
1392 | page_cache_release(old_page); | ||
1355 | return VM_FAULT_OOM; | 1393 | return VM_FAULT_OOM; |
1356 | } | 1394 | } |
1357 | 1395 | ||
@@ -1774,7 +1812,16 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
1774 | spinlock_t *ptl; | 1812 | spinlock_t *ptl; |
1775 | pte_t entry; | 1813 | pte_t entry; |
1776 | 1814 | ||
1777 | if (write_access) { | 1815 | /* |
1816 | * A VM_UNPAGED vma will normally be filled with present ptes | ||
1817 | * by remap_pfn_range, and never arrive here; but it might have | ||
1818 | * holes, or if !VM_DONTEXPAND, mremap might have expanded it. | ||
1819 | * It's weird enough handling anon pages in unpaged vmas, we do | ||
1820 | * not want to worry about ZERO_PAGEs too (it may or may not | ||
1821 | * matter if their counts wrap): just give them anon pages. | ||
1822 | */ | ||
1823 | |||
1824 | if (write_access || (vma->vm_flags & VM_UNPAGED)) { | ||
1778 | /* Allocate our own private page. */ | 1825 | /* Allocate our own private page. */ |
1779 | pte_unmap(page_table); | 1826 | pte_unmap(page_table); |
1780 | 1827 | ||
@@ -1849,6 +1896,7 @@ static int do_no_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
1849 | int anon = 0; | 1896 | int anon = 0; |
1850 | 1897 | ||
1851 | pte_unmap(page_table); | 1898 | pte_unmap(page_table); |
1899 | BUG_ON(vma->vm_flags & VM_UNPAGED); | ||
1852 | 1900 | ||
1853 | if (vma->vm_file) { | 1901 | if (vma->vm_file) { |
1854 | mapping = vma->vm_file->f_mapping; | 1902 | mapping = vma->vm_file->f_mapping; |
@@ -1924,7 +1972,7 @@ retry: | |||
1924 | inc_mm_counter(mm, anon_rss); | 1972 | inc_mm_counter(mm, anon_rss); |
1925 | lru_cache_add_active(new_page); | 1973 | lru_cache_add_active(new_page); |
1926 | page_add_anon_rmap(new_page, vma, address); | 1974 | page_add_anon_rmap(new_page, vma, address); |
1927 | } else if (!(vma->vm_flags & VM_RESERVED)) { | 1975 | } else { |
1928 | inc_mm_counter(mm, file_rss); | 1976 | inc_mm_counter(mm, file_rss); |
1929 | page_add_file_rmap(new_page); | 1977 | page_add_file_rmap(new_page); |
1930 | } | 1978 | } |
@@ -2203,7 +2251,7 @@ static int __init gate_vma_init(void) | |||
2203 | gate_vma.vm_start = FIXADDR_USER_START; | 2251 | gate_vma.vm_start = FIXADDR_USER_START; |
2204 | gate_vma.vm_end = FIXADDR_USER_END; | 2252 | gate_vma.vm_end = FIXADDR_USER_END; |
2205 | gate_vma.vm_page_prot = PAGE_READONLY; | 2253 | gate_vma.vm_page_prot = PAGE_READONLY; |
2206 | gate_vma.vm_flags = VM_RESERVED; | 2254 | gate_vma.vm_flags = 0; |
2207 | return 0; | 2255 | return 0; |
2208 | } | 2256 | } |
2209 | __initcall(gate_vma_init); | 2257 | __initcall(gate_vma_init); |