diff options
Diffstat (limited to 'mm/memory.c')
-rw-r--r-- | mm/memory.c | 131 |
1 files changed, 79 insertions, 52 deletions
diff --git a/mm/memory.c b/mm/memory.c index da642b5528fa..e83f9440bb66 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -343,6 +343,23 @@ static inline void add_mm_rss(struct mm_struct *mm, int file_rss, int anon_rss) | |||
343 | #define NO_RSS 2 /* Increment neither file_rss nor anon_rss */ | 343 | #define NO_RSS 2 /* Increment neither file_rss nor anon_rss */ |
344 | 344 | ||
345 | /* | 345 | /* |
346 | * This function is called to print an error when a pte in a | ||
347 | * !VM_RESERVED region is found pointing to an invalid pfn (which | ||
348 | * is an error. | ||
349 | * | ||
350 | * The calling function must still handle the error. | ||
351 | */ | ||
352 | void print_bad_pte(struct vm_area_struct *vma, pte_t pte, unsigned long vaddr) | ||
353 | { | ||
354 | printk(KERN_ERR "Bad pte = %08llx, process = %s, " | ||
355 | "vm_flags = %lx, vaddr = %lx\n", | ||
356 | (long long)pte_val(pte), | ||
357 | (vma->vm_mm == current->mm ? current->comm : "???"), | ||
358 | vma->vm_flags, vaddr); | ||
359 | dump_stack(); | ||
360 | } | ||
361 | |||
362 | /* | ||
346 | * copy one vm_area from one task to the other. Assumes the page tables | 363 | * copy one vm_area from one task to the other. Assumes the page tables |
347 | * already present in the new task to be cleared in the whole range | 364 | * already present in the new task to be cleared in the whole range |
348 | * covered by this vma. | 365 | * covered by this vma. |
@@ -353,9 +370,10 @@ static inline void add_mm_rss(struct mm_struct *mm, int file_rss, int anon_rss) | |||
353 | 370 | ||
354 | static inline int | 371 | static inline int |
355 | copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, | 372 | copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, |
356 | pte_t *dst_pte, pte_t *src_pte, unsigned long vm_flags, | 373 | pte_t *dst_pte, pte_t *src_pte, struct vm_area_struct *vma, |
357 | unsigned long addr) | 374 | unsigned long addr) |
358 | { | 375 | { |
376 | unsigned long vm_flags = vma->vm_flags; | ||
359 | pte_t pte = *src_pte; | 377 | pte_t pte = *src_pte; |
360 | struct page *page; | 378 | struct page *page; |
361 | unsigned long pfn; | 379 | unsigned long pfn; |
@@ -375,18 +393,22 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, | |||
375 | goto out_set_pte; | 393 | goto out_set_pte; |
376 | } | 394 | } |
377 | 395 | ||
396 | /* If the region is VM_RESERVED, the mapping is not | ||
397 | * mapped via rmap - duplicate the pte as is. | ||
398 | */ | ||
399 | if (vm_flags & VM_RESERVED) | ||
400 | goto out_set_pte; | ||
401 | |||
378 | pfn = pte_pfn(pte); | 402 | pfn = pte_pfn(pte); |
379 | /* the pte points outside of valid memory, the | 403 | /* If the pte points outside of valid memory but |
380 | * mapping is assumed to be good, meaningful | 404 | * the region is not VM_RESERVED, we have a problem. |
381 | * and not mapped via rmap - duplicate the | ||
382 | * mapping as is. | ||
383 | */ | 405 | */ |
384 | page = NULL; | 406 | if (unlikely(!pfn_valid(pfn))) { |
385 | if (pfn_valid(pfn)) | 407 | print_bad_pte(vma, pte, addr); |
386 | page = pfn_to_page(pfn); | 408 | goto out_set_pte; /* try to do something sane */ |
409 | } | ||
387 | 410 | ||
388 | if (!page || PageReserved(page)) | 411 | page = pfn_to_page(pfn); |
389 | goto out_set_pte; | ||
390 | 412 | ||
391 | /* | 413 | /* |
392 | * If it's a COW mapping, write protect it both | 414 | * If it's a COW mapping, write protect it both |
@@ -418,7 +440,6 @@ static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, | |||
418 | unsigned long addr, unsigned long end) | 440 | unsigned long addr, unsigned long end) |
419 | { | 441 | { |
420 | pte_t *src_pte, *dst_pte; | 442 | pte_t *src_pte, *dst_pte; |
421 | unsigned long vm_flags = vma->vm_flags; | ||
422 | int progress = 0; | 443 | int progress = 0; |
423 | int rss[NO_RSS+1], anon; | 444 | int rss[NO_RSS+1], anon; |
424 | 445 | ||
@@ -446,8 +467,7 @@ again: | |||
446 | progress++; | 467 | progress++; |
447 | continue; | 468 | continue; |
448 | } | 469 | } |
449 | anon = copy_one_pte(dst_mm, src_mm, dst_pte, src_pte, | 470 | anon = copy_one_pte(dst_mm, src_mm, dst_pte, src_pte, vma,addr); |
450 | vm_flags, addr); | ||
451 | rss[anon]++; | 471 | rss[anon]++; |
452 | progress += 8; | 472 | progress += 8; |
453 | } while (dst_pte++, src_pte++, addr += PAGE_SIZE, addr != end); | 473 | } while (dst_pte++, src_pte++, addr += PAGE_SIZE, addr != end); |
@@ -541,10 +561,12 @@ int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, | |||
541 | return 0; | 561 | return 0; |
542 | } | 562 | } |
543 | 563 | ||
544 | static void zap_pte_range(struct mmu_gather *tlb, pmd_t *pmd, | 564 | static void zap_pte_range(struct mmu_gather *tlb, |
565 | struct vm_area_struct *vma, pmd_t *pmd, | ||
545 | unsigned long addr, unsigned long end, | 566 | unsigned long addr, unsigned long end, |
546 | struct zap_details *details) | 567 | struct zap_details *details) |
547 | { | 568 | { |
569 | struct mm_struct *mm = tlb->mm; | ||
548 | pte_t *pte; | 570 | pte_t *pte; |
549 | int file_rss = 0; | 571 | int file_rss = 0; |
550 | int anon_rss = 0; | 572 | int anon_rss = 0; |
@@ -556,11 +578,12 @@ static void zap_pte_range(struct mmu_gather *tlb, pmd_t *pmd, | |||
556 | continue; | 578 | continue; |
557 | if (pte_present(ptent)) { | 579 | if (pte_present(ptent)) { |
558 | struct page *page = NULL; | 580 | struct page *page = NULL; |
559 | unsigned long pfn = pte_pfn(ptent); | 581 | if (!(vma->vm_flags & VM_RESERVED)) { |
560 | if (pfn_valid(pfn)) { | 582 | unsigned long pfn = pte_pfn(ptent); |
561 | page = pfn_to_page(pfn); | 583 | if (unlikely(!pfn_valid(pfn))) |
562 | if (PageReserved(page)) | 584 | print_bad_pte(vma, ptent, addr); |
563 | page = NULL; | 585 | else |
586 | page = pfn_to_page(pfn); | ||
564 | } | 587 | } |
565 | if (unlikely(details) && page) { | 588 | if (unlikely(details) && page) { |
566 | /* | 589 | /* |
@@ -580,7 +603,7 @@ static void zap_pte_range(struct mmu_gather *tlb, pmd_t *pmd, | |||
580 | page->index > details->last_index)) | 603 | page->index > details->last_index)) |
581 | continue; | 604 | continue; |
582 | } | 605 | } |
583 | ptent = ptep_get_and_clear_full(tlb->mm, addr, pte, | 606 | ptent = ptep_get_and_clear_full(mm, addr, pte, |
584 | tlb->fullmm); | 607 | tlb->fullmm); |
585 | tlb_remove_tlb_entry(tlb, pte, addr); | 608 | tlb_remove_tlb_entry(tlb, pte, addr); |
586 | if (unlikely(!page)) | 609 | if (unlikely(!page)) |
@@ -588,7 +611,7 @@ static void zap_pte_range(struct mmu_gather *tlb, pmd_t *pmd, | |||
588 | if (unlikely(details) && details->nonlinear_vma | 611 | if (unlikely(details) && details->nonlinear_vma |
589 | && linear_page_index(details->nonlinear_vma, | 612 | && linear_page_index(details->nonlinear_vma, |
590 | addr) != page->index) | 613 | addr) != page->index) |
591 | set_pte_at(tlb->mm, addr, pte, | 614 | set_pte_at(mm, addr, pte, |
592 | pgoff_to_pte(page->index)); | 615 | pgoff_to_pte(page->index)); |
593 | if (PageAnon(page)) | 616 | if (PageAnon(page)) |
594 | anon_rss++; | 617 | anon_rss++; |
@@ -611,14 +634,15 @@ static void zap_pte_range(struct mmu_gather *tlb, pmd_t *pmd, | |||
611 | continue; | 634 | continue; |
612 | if (!pte_file(ptent)) | 635 | if (!pte_file(ptent)) |
613 | free_swap_and_cache(pte_to_swp_entry(ptent)); | 636 | free_swap_and_cache(pte_to_swp_entry(ptent)); |
614 | pte_clear_full(tlb->mm, addr, pte, tlb->fullmm); | 637 | pte_clear_full(mm, addr, pte, tlb->fullmm); |
615 | } while (pte++, addr += PAGE_SIZE, addr != end); | 638 | } while (pte++, addr += PAGE_SIZE, addr != end); |
616 | 639 | ||
617 | add_mm_rss(tlb->mm, -file_rss, -anon_rss); | 640 | add_mm_rss(mm, -file_rss, -anon_rss); |
618 | pte_unmap(pte - 1); | 641 | pte_unmap(pte - 1); |
619 | } | 642 | } |
620 | 643 | ||
621 | static inline void zap_pmd_range(struct mmu_gather *tlb, pud_t *pud, | 644 | static inline void zap_pmd_range(struct mmu_gather *tlb, |
645 | struct vm_area_struct *vma, pud_t *pud, | ||
622 | unsigned long addr, unsigned long end, | 646 | unsigned long addr, unsigned long end, |
623 | struct zap_details *details) | 647 | struct zap_details *details) |
624 | { | 648 | { |
@@ -630,11 +654,12 @@ static inline void zap_pmd_range(struct mmu_gather *tlb, pud_t *pud, | |||
630 | next = pmd_addr_end(addr, end); | 654 | next = pmd_addr_end(addr, end); |
631 | if (pmd_none_or_clear_bad(pmd)) | 655 | if (pmd_none_or_clear_bad(pmd)) |
632 | continue; | 656 | continue; |
633 | zap_pte_range(tlb, pmd, addr, next, details); | 657 | zap_pte_range(tlb, vma, pmd, addr, next, details); |
634 | } while (pmd++, addr = next, addr != end); | 658 | } while (pmd++, addr = next, addr != end); |
635 | } | 659 | } |
636 | 660 | ||
637 | static inline void zap_pud_range(struct mmu_gather *tlb, pgd_t *pgd, | 661 | static inline void zap_pud_range(struct mmu_gather *tlb, |
662 | struct vm_area_struct *vma, pgd_t *pgd, | ||
638 | unsigned long addr, unsigned long end, | 663 | unsigned long addr, unsigned long end, |
639 | struct zap_details *details) | 664 | struct zap_details *details) |
640 | { | 665 | { |
@@ -646,7 +671,7 @@ static inline void zap_pud_range(struct mmu_gather *tlb, pgd_t *pgd, | |||
646 | next = pud_addr_end(addr, end); | 671 | next = pud_addr_end(addr, end); |
647 | if (pud_none_or_clear_bad(pud)) | 672 | if (pud_none_or_clear_bad(pud)) |
648 | continue; | 673 | continue; |
649 | zap_pmd_range(tlb, pud, addr, next, details); | 674 | zap_pmd_range(tlb, vma, pud, addr, next, details); |
650 | } while (pud++, addr = next, addr != end); | 675 | } while (pud++, addr = next, addr != end); |
651 | } | 676 | } |
652 | 677 | ||
@@ -667,7 +692,7 @@ static void unmap_page_range(struct mmu_gather *tlb, struct vm_area_struct *vma, | |||
667 | next = pgd_addr_end(addr, end); | 692 | next = pgd_addr_end(addr, end); |
668 | if (pgd_none_or_clear_bad(pgd)) | 693 | if (pgd_none_or_clear_bad(pgd)) |
669 | continue; | 694 | continue; |
670 | zap_pud_range(tlb, pgd, addr, next, details); | 695 | zap_pud_range(tlb, vma, pgd, addr, next, details); |
671 | } while (pgd++, addr = next, addr != end); | 696 | } while (pgd++, addr = next, addr != end); |
672 | tlb_end_vma(tlb, vma); | 697 | tlb_end_vma(tlb, vma); |
673 | } | 698 | } |
@@ -967,7 +992,7 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, | |||
967 | continue; | 992 | continue; |
968 | } | 993 | } |
969 | 994 | ||
970 | if (!vma || (vma->vm_flags & VM_IO) | 995 | if (!vma || (vma->vm_flags & (VM_IO | VM_RESERVED)) |
971 | || !(flags & vma->vm_flags)) | 996 | || !(flags & vma->vm_flags)) |
972 | return i ? : -EFAULT; | 997 | return i ? : -EFAULT; |
973 | 998 | ||
@@ -1027,8 +1052,7 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, | |||
1027 | if (pages) { | 1052 | if (pages) { |
1028 | pages[i] = page; | 1053 | pages[i] = page; |
1029 | flush_dcache_page(page); | 1054 | flush_dcache_page(page); |
1030 | if (!PageReserved(page)) | 1055 | page_cache_get(page); |
1031 | page_cache_get(page); | ||
1032 | } | 1056 | } |
1033 | if (vmas) | 1057 | if (vmas) |
1034 | vmas[i] = vma; | 1058 | vmas[i] = vma; |
@@ -1051,7 +1075,11 @@ static int zeromap_pte_range(struct mm_struct *mm, pmd_t *pmd, | |||
1051 | if (!pte) | 1075 | if (!pte) |
1052 | return -ENOMEM; | 1076 | return -ENOMEM; |
1053 | do { | 1077 | do { |
1054 | pte_t zero_pte = pte_wrprotect(mk_pte(ZERO_PAGE(addr), prot)); | 1078 | struct page *page = ZERO_PAGE(addr); |
1079 | pte_t zero_pte = pte_wrprotect(mk_pte(page, prot)); | ||
1080 | page_cache_get(page); | ||
1081 | page_add_file_rmap(page); | ||
1082 | inc_mm_counter(mm, file_rss); | ||
1055 | BUG_ON(!pte_none(*pte)); | 1083 | BUG_ON(!pte_none(*pte)); |
1056 | set_pte_at(mm, addr, pte, zero_pte); | 1084 | set_pte_at(mm, addr, pte, zero_pte); |
1057 | } while (pte++, addr += PAGE_SIZE, addr != end); | 1085 | } while (pte++, addr += PAGE_SIZE, addr != end); |
@@ -1132,8 +1160,7 @@ static int remap_pte_range(struct mm_struct *mm, pmd_t *pmd, | |||
1132 | return -ENOMEM; | 1160 | return -ENOMEM; |
1133 | do { | 1161 | do { |
1134 | BUG_ON(!pte_none(*pte)); | 1162 | BUG_ON(!pte_none(*pte)); |
1135 | if (!pfn_valid(pfn) || PageReserved(pfn_to_page(pfn))) | 1163 | set_pte_at(mm, addr, pte, pfn_pte(pfn, prot)); |
1136 | set_pte_at(mm, addr, pte, pfn_pte(pfn, prot)); | ||
1137 | pfn++; | 1164 | pfn++; |
1138 | } while (pte++, addr += PAGE_SIZE, addr != end); | 1165 | } while (pte++, addr += PAGE_SIZE, addr != end); |
1139 | pte_unmap(pte - 1); | 1166 | pte_unmap(pte - 1); |
@@ -1195,8 +1222,8 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr, | |||
1195 | * rest of the world about it: | 1222 | * rest of the world about it: |
1196 | * VM_IO tells people not to look at these pages | 1223 | * VM_IO tells people not to look at these pages |
1197 | * (accesses can have side effects). | 1224 | * (accesses can have side effects). |
1198 | * VM_RESERVED tells swapout not to try to touch | 1225 | * VM_RESERVED tells the core MM not to "manage" these pages |
1199 | * this region. | 1226 | * (e.g. refcount, mapcount, try to swap them out). |
1200 | */ | 1227 | */ |
1201 | vma->vm_flags |= VM_IO | VM_RESERVED; | 1228 | vma->vm_flags |= VM_IO | VM_RESERVED; |
1202 | 1229 | ||
@@ -1256,11 +1283,13 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
1256 | pte_t entry; | 1283 | pte_t entry; |
1257 | int ret = VM_FAULT_MINOR; | 1284 | int ret = VM_FAULT_MINOR; |
1258 | 1285 | ||
1286 | BUG_ON(vma->vm_flags & VM_RESERVED); | ||
1287 | |||
1259 | if (unlikely(!pfn_valid(pfn))) { | 1288 | if (unlikely(!pfn_valid(pfn))) { |
1260 | /* | 1289 | /* |
1261 | * Page table corrupted: show pte and kill process. | 1290 | * Page table corrupted: show pte and kill process. |
1262 | */ | 1291 | */ |
1263 | pte_ERROR(orig_pte); | 1292 | print_bad_pte(vma, orig_pte, address); |
1264 | ret = VM_FAULT_OOM; | 1293 | ret = VM_FAULT_OOM; |
1265 | goto unlock; | 1294 | goto unlock; |
1266 | } | 1295 | } |
@@ -1284,8 +1313,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
1284 | /* | 1313 | /* |
1285 | * Ok, we need to copy. Oh, well.. | 1314 | * Ok, we need to copy. Oh, well.. |
1286 | */ | 1315 | */ |
1287 | if (!PageReserved(old_page)) | 1316 | page_cache_get(old_page); |
1288 | page_cache_get(old_page); | ||
1289 | pte_unmap(page_table); | 1317 | pte_unmap(page_table); |
1290 | spin_unlock(&mm->page_table_lock); | 1318 | spin_unlock(&mm->page_table_lock); |
1291 | 1319 | ||
@@ -1308,14 +1336,10 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
1308 | spin_lock(&mm->page_table_lock); | 1336 | spin_lock(&mm->page_table_lock); |
1309 | page_table = pte_offset_map(pmd, address); | 1337 | page_table = pte_offset_map(pmd, address); |
1310 | if (likely(pte_same(*page_table, orig_pte))) { | 1338 | if (likely(pte_same(*page_table, orig_pte))) { |
1311 | if (PageReserved(old_page)) | 1339 | page_remove_rmap(old_page); |
1340 | if (!PageAnon(old_page)) { | ||
1312 | inc_mm_counter(mm, anon_rss); | 1341 | inc_mm_counter(mm, anon_rss); |
1313 | else { | 1342 | dec_mm_counter(mm, file_rss); |
1314 | page_remove_rmap(old_page); | ||
1315 | if (!PageAnon(old_page)) { | ||
1316 | inc_mm_counter(mm, anon_rss); | ||
1317 | dec_mm_counter(mm, file_rss); | ||
1318 | } | ||
1319 | } | 1343 | } |
1320 | flush_cache_page(vma, address, pfn); | 1344 | flush_cache_page(vma, address, pfn); |
1321 | entry = mk_pte(new_page, vma->vm_page_prot); | 1345 | entry = mk_pte(new_page, vma->vm_page_prot); |
@@ -1769,14 +1793,13 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
1769 | unsigned long address, pte_t *page_table, pmd_t *pmd, | 1793 | unsigned long address, pte_t *page_table, pmd_t *pmd, |
1770 | int write_access) | 1794 | int write_access) |
1771 | { | 1795 | { |
1796 | struct page *page = ZERO_PAGE(addr); | ||
1772 | pte_t entry; | 1797 | pte_t entry; |
1773 | 1798 | ||
1774 | /* Mapping of ZERO_PAGE - vm_page_prot is readonly */ | 1799 | /* Mapping of ZERO_PAGE - vm_page_prot is readonly */ |
1775 | entry = mk_pte(ZERO_PAGE(addr), vma->vm_page_prot); | 1800 | entry = mk_pte(page, vma->vm_page_prot); |
1776 | 1801 | ||
1777 | if (write_access) { | 1802 | if (write_access) { |
1778 | struct page *page; | ||
1779 | |||
1780 | /* Allocate our own private page. */ | 1803 | /* Allocate our own private page. */ |
1781 | pte_unmap(page_table); | 1804 | pte_unmap(page_table); |
1782 | spin_unlock(&mm->page_table_lock); | 1805 | spin_unlock(&mm->page_table_lock); |
@@ -1800,6 +1823,10 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
1800 | lru_cache_add_active(page); | 1823 | lru_cache_add_active(page); |
1801 | SetPageReferenced(page); | 1824 | SetPageReferenced(page); |
1802 | page_add_anon_rmap(page, vma, address); | 1825 | page_add_anon_rmap(page, vma, address); |
1826 | } else { | ||
1827 | inc_mm_counter(mm, file_rss); | ||
1828 | page_add_file_rmap(page); | ||
1829 | page_cache_get(page); | ||
1803 | } | 1830 | } |
1804 | 1831 | ||
1805 | set_pte_at(mm, address, page_table, entry); | 1832 | set_pte_at(mm, address, page_table, entry); |
@@ -1916,7 +1943,7 @@ retry: | |||
1916 | inc_mm_counter(mm, anon_rss); | 1943 | inc_mm_counter(mm, anon_rss); |
1917 | lru_cache_add_active(new_page); | 1944 | lru_cache_add_active(new_page); |
1918 | page_add_anon_rmap(new_page, vma, address); | 1945 | page_add_anon_rmap(new_page, vma, address); |
1919 | } else if (!PageReserved(new_page)) { | 1946 | } else if (!(vma->vm_flags & VM_RESERVED)) { |
1920 | inc_mm_counter(mm, file_rss); | 1947 | inc_mm_counter(mm, file_rss); |
1921 | page_add_file_rmap(new_page); | 1948 | page_add_file_rmap(new_page); |
1922 | } | 1949 | } |
@@ -1957,7 +1984,7 @@ static int do_file_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
1957 | /* | 1984 | /* |
1958 | * Page table corrupted: show pte and kill process. | 1985 | * Page table corrupted: show pte and kill process. |
1959 | */ | 1986 | */ |
1960 | pte_ERROR(orig_pte); | 1987 | print_bad_pte(vma, orig_pte, address); |
1961 | return VM_FAULT_OOM; | 1988 | return VM_FAULT_OOM; |
1962 | } | 1989 | } |
1963 | /* We can then assume vm->vm_ops && vma->vm_ops->populate */ | 1990 | /* We can then assume vm->vm_ops && vma->vm_ops->populate */ |
@@ -2232,7 +2259,7 @@ static int __init gate_vma_init(void) | |||
2232 | gate_vma.vm_start = FIXADDR_USER_START; | 2259 | gate_vma.vm_start = FIXADDR_USER_START; |
2233 | gate_vma.vm_end = FIXADDR_USER_END; | 2260 | gate_vma.vm_end = FIXADDR_USER_END; |
2234 | gate_vma.vm_page_prot = PAGE_READONLY; | 2261 | gate_vma.vm_page_prot = PAGE_READONLY; |
2235 | gate_vma.vm_flags = 0; | 2262 | gate_vma.vm_flags = VM_RESERVED; |
2236 | return 0; | 2263 | return 0; |
2237 | } | 2264 | } |
2238 | __initcall(gate_vma_init); | 2265 | __initcall(gate_vma_init); |