aboutsummaryrefslogtreecommitdiffstats
path: root/mm/memory.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/memory.c')
-rw-r--r--mm/memory.c131
1 files changed, 79 insertions, 52 deletions
diff --git a/mm/memory.c b/mm/memory.c
index da642b5528fa..e83f9440bb66 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -343,6 +343,23 @@ static inline void add_mm_rss(struct mm_struct *mm, int file_rss, int anon_rss)
343#define NO_RSS 2 /* Increment neither file_rss nor anon_rss */ 343#define NO_RSS 2 /* Increment neither file_rss nor anon_rss */
344 344
345/* 345/*
346 * This function is called to print an error when a pte in a
347 * !VM_RESERVED region is found pointing to an invalid pfn (which
348 * is an error.
349 *
350 * The calling function must still handle the error.
351 */
352void print_bad_pte(struct vm_area_struct *vma, pte_t pte, unsigned long vaddr)
353{
354 printk(KERN_ERR "Bad pte = %08llx, process = %s, "
355 "vm_flags = %lx, vaddr = %lx\n",
356 (long long)pte_val(pte),
357 (vma->vm_mm == current->mm ? current->comm : "???"),
358 vma->vm_flags, vaddr);
359 dump_stack();
360}
361
362/*
346 * copy one vm_area from one task to the other. Assumes the page tables 363 * copy one vm_area from one task to the other. Assumes the page tables
347 * already present in the new task to be cleared in the whole range 364 * already present in the new task to be cleared in the whole range
348 * covered by this vma. 365 * covered by this vma.
@@ -353,9 +370,10 @@ static inline void add_mm_rss(struct mm_struct *mm, int file_rss, int anon_rss)
353 370
354static inline int 371static inline int
355copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, 372copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
356 pte_t *dst_pte, pte_t *src_pte, unsigned long vm_flags, 373 pte_t *dst_pte, pte_t *src_pte, struct vm_area_struct *vma,
357 unsigned long addr) 374 unsigned long addr)
358{ 375{
376 unsigned long vm_flags = vma->vm_flags;
359 pte_t pte = *src_pte; 377 pte_t pte = *src_pte;
360 struct page *page; 378 struct page *page;
361 unsigned long pfn; 379 unsigned long pfn;
@@ -375,18 +393,22 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
375 goto out_set_pte; 393 goto out_set_pte;
376 } 394 }
377 395
396 /* If the region is VM_RESERVED, the mapping is not
397 * mapped via rmap - duplicate the pte as is.
398 */
399 if (vm_flags & VM_RESERVED)
400 goto out_set_pte;
401
378 pfn = pte_pfn(pte); 402 pfn = pte_pfn(pte);
379 /* the pte points outside of valid memory, the 403 /* If the pte points outside of valid memory but
380 * mapping is assumed to be good, meaningful 404 * the region is not VM_RESERVED, we have a problem.
381 * and not mapped via rmap - duplicate the
382 * mapping as is.
383 */ 405 */
384 page = NULL; 406 if (unlikely(!pfn_valid(pfn))) {
385 if (pfn_valid(pfn)) 407 print_bad_pte(vma, pte, addr);
386 page = pfn_to_page(pfn); 408 goto out_set_pte; /* try to do something sane */
409 }
387 410
388 if (!page || PageReserved(page)) 411 page = pfn_to_page(pfn);
389 goto out_set_pte;
390 412
391 /* 413 /*
392 * If it's a COW mapping, write protect it both 414 * If it's a COW mapping, write protect it both
@@ -418,7 +440,6 @@ static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
418 unsigned long addr, unsigned long end) 440 unsigned long addr, unsigned long end)
419{ 441{
420 pte_t *src_pte, *dst_pte; 442 pte_t *src_pte, *dst_pte;
421 unsigned long vm_flags = vma->vm_flags;
422 int progress = 0; 443 int progress = 0;
423 int rss[NO_RSS+1], anon; 444 int rss[NO_RSS+1], anon;
424 445
@@ -446,8 +467,7 @@ again:
446 progress++; 467 progress++;
447 continue; 468 continue;
448 } 469 }
449 anon = copy_one_pte(dst_mm, src_mm, dst_pte, src_pte, 470 anon = copy_one_pte(dst_mm, src_mm, dst_pte, src_pte, vma,addr);
450 vm_flags, addr);
451 rss[anon]++; 471 rss[anon]++;
452 progress += 8; 472 progress += 8;
453 } while (dst_pte++, src_pte++, addr += PAGE_SIZE, addr != end); 473 } while (dst_pte++, src_pte++, addr += PAGE_SIZE, addr != end);
@@ -541,10 +561,12 @@ int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
541 return 0; 561 return 0;
542} 562}
543 563
544static void zap_pte_range(struct mmu_gather *tlb, pmd_t *pmd, 564static void zap_pte_range(struct mmu_gather *tlb,
565 struct vm_area_struct *vma, pmd_t *pmd,
545 unsigned long addr, unsigned long end, 566 unsigned long addr, unsigned long end,
546 struct zap_details *details) 567 struct zap_details *details)
547{ 568{
569 struct mm_struct *mm = tlb->mm;
548 pte_t *pte; 570 pte_t *pte;
549 int file_rss = 0; 571 int file_rss = 0;
550 int anon_rss = 0; 572 int anon_rss = 0;
@@ -556,11 +578,12 @@ static void zap_pte_range(struct mmu_gather *tlb, pmd_t *pmd,
556 continue; 578 continue;
557 if (pte_present(ptent)) { 579 if (pte_present(ptent)) {
558 struct page *page = NULL; 580 struct page *page = NULL;
559 unsigned long pfn = pte_pfn(ptent); 581 if (!(vma->vm_flags & VM_RESERVED)) {
560 if (pfn_valid(pfn)) { 582 unsigned long pfn = pte_pfn(ptent);
561 page = pfn_to_page(pfn); 583 if (unlikely(!pfn_valid(pfn)))
562 if (PageReserved(page)) 584 print_bad_pte(vma, ptent, addr);
563 page = NULL; 585 else
586 page = pfn_to_page(pfn);
564 } 587 }
565 if (unlikely(details) && page) { 588 if (unlikely(details) && page) {
566 /* 589 /*
@@ -580,7 +603,7 @@ static void zap_pte_range(struct mmu_gather *tlb, pmd_t *pmd,
580 page->index > details->last_index)) 603 page->index > details->last_index))
581 continue; 604 continue;
582 } 605 }
583 ptent = ptep_get_and_clear_full(tlb->mm, addr, pte, 606 ptent = ptep_get_and_clear_full(mm, addr, pte,
584 tlb->fullmm); 607 tlb->fullmm);
585 tlb_remove_tlb_entry(tlb, pte, addr); 608 tlb_remove_tlb_entry(tlb, pte, addr);
586 if (unlikely(!page)) 609 if (unlikely(!page))
@@ -588,7 +611,7 @@ static void zap_pte_range(struct mmu_gather *tlb, pmd_t *pmd,
588 if (unlikely(details) && details->nonlinear_vma 611 if (unlikely(details) && details->nonlinear_vma
589 && linear_page_index(details->nonlinear_vma, 612 && linear_page_index(details->nonlinear_vma,
590 addr) != page->index) 613 addr) != page->index)
591 set_pte_at(tlb->mm, addr, pte, 614 set_pte_at(mm, addr, pte,
592 pgoff_to_pte(page->index)); 615 pgoff_to_pte(page->index));
593 if (PageAnon(page)) 616 if (PageAnon(page))
594 anon_rss++; 617 anon_rss++;
@@ -611,14 +634,15 @@ static void zap_pte_range(struct mmu_gather *tlb, pmd_t *pmd,
611 continue; 634 continue;
612 if (!pte_file(ptent)) 635 if (!pte_file(ptent))
613 free_swap_and_cache(pte_to_swp_entry(ptent)); 636 free_swap_and_cache(pte_to_swp_entry(ptent));
614 pte_clear_full(tlb->mm, addr, pte, tlb->fullmm); 637 pte_clear_full(mm, addr, pte, tlb->fullmm);
615 } while (pte++, addr += PAGE_SIZE, addr != end); 638 } while (pte++, addr += PAGE_SIZE, addr != end);
616 639
617 add_mm_rss(tlb->mm, -file_rss, -anon_rss); 640 add_mm_rss(mm, -file_rss, -anon_rss);
618 pte_unmap(pte - 1); 641 pte_unmap(pte - 1);
619} 642}
620 643
621static inline void zap_pmd_range(struct mmu_gather *tlb, pud_t *pud, 644static inline void zap_pmd_range(struct mmu_gather *tlb,
645 struct vm_area_struct *vma, pud_t *pud,
622 unsigned long addr, unsigned long end, 646 unsigned long addr, unsigned long end,
623 struct zap_details *details) 647 struct zap_details *details)
624{ 648{
@@ -630,11 +654,12 @@ static inline void zap_pmd_range(struct mmu_gather *tlb, pud_t *pud,
630 next = pmd_addr_end(addr, end); 654 next = pmd_addr_end(addr, end);
631 if (pmd_none_or_clear_bad(pmd)) 655 if (pmd_none_or_clear_bad(pmd))
632 continue; 656 continue;
633 zap_pte_range(tlb, pmd, addr, next, details); 657 zap_pte_range(tlb, vma, pmd, addr, next, details);
634 } while (pmd++, addr = next, addr != end); 658 } while (pmd++, addr = next, addr != end);
635} 659}
636 660
637static inline void zap_pud_range(struct mmu_gather *tlb, pgd_t *pgd, 661static inline void zap_pud_range(struct mmu_gather *tlb,
662 struct vm_area_struct *vma, pgd_t *pgd,
638 unsigned long addr, unsigned long end, 663 unsigned long addr, unsigned long end,
639 struct zap_details *details) 664 struct zap_details *details)
640{ 665{
@@ -646,7 +671,7 @@ static inline void zap_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
646 next = pud_addr_end(addr, end); 671 next = pud_addr_end(addr, end);
647 if (pud_none_or_clear_bad(pud)) 672 if (pud_none_or_clear_bad(pud))
648 continue; 673 continue;
649 zap_pmd_range(tlb, pud, addr, next, details); 674 zap_pmd_range(tlb, vma, pud, addr, next, details);
650 } while (pud++, addr = next, addr != end); 675 } while (pud++, addr = next, addr != end);
651} 676}
652 677
@@ -667,7 +692,7 @@ static void unmap_page_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
667 next = pgd_addr_end(addr, end); 692 next = pgd_addr_end(addr, end);
668 if (pgd_none_or_clear_bad(pgd)) 693 if (pgd_none_or_clear_bad(pgd))
669 continue; 694 continue;
670 zap_pud_range(tlb, pgd, addr, next, details); 695 zap_pud_range(tlb, vma, pgd, addr, next, details);
671 } while (pgd++, addr = next, addr != end); 696 } while (pgd++, addr = next, addr != end);
672 tlb_end_vma(tlb, vma); 697 tlb_end_vma(tlb, vma);
673} 698}
@@ -967,7 +992,7 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
967 continue; 992 continue;
968 } 993 }
969 994
970 if (!vma || (vma->vm_flags & VM_IO) 995 if (!vma || (vma->vm_flags & (VM_IO | VM_RESERVED))
971 || !(flags & vma->vm_flags)) 996 || !(flags & vma->vm_flags))
972 return i ? : -EFAULT; 997 return i ? : -EFAULT;
973 998
@@ -1027,8 +1052,7 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
1027 if (pages) { 1052 if (pages) {
1028 pages[i] = page; 1053 pages[i] = page;
1029 flush_dcache_page(page); 1054 flush_dcache_page(page);
1030 if (!PageReserved(page)) 1055 page_cache_get(page);
1031 page_cache_get(page);
1032 } 1056 }
1033 if (vmas) 1057 if (vmas)
1034 vmas[i] = vma; 1058 vmas[i] = vma;
@@ -1051,7 +1075,11 @@ static int zeromap_pte_range(struct mm_struct *mm, pmd_t *pmd,
1051 if (!pte) 1075 if (!pte)
1052 return -ENOMEM; 1076 return -ENOMEM;
1053 do { 1077 do {
1054 pte_t zero_pte = pte_wrprotect(mk_pte(ZERO_PAGE(addr), prot)); 1078 struct page *page = ZERO_PAGE(addr);
1079 pte_t zero_pte = pte_wrprotect(mk_pte(page, prot));
1080 page_cache_get(page);
1081 page_add_file_rmap(page);
1082 inc_mm_counter(mm, file_rss);
1055 BUG_ON(!pte_none(*pte)); 1083 BUG_ON(!pte_none(*pte));
1056 set_pte_at(mm, addr, pte, zero_pte); 1084 set_pte_at(mm, addr, pte, zero_pte);
1057 } while (pte++, addr += PAGE_SIZE, addr != end); 1085 } while (pte++, addr += PAGE_SIZE, addr != end);
@@ -1132,8 +1160,7 @@ static int remap_pte_range(struct mm_struct *mm, pmd_t *pmd,
1132 return -ENOMEM; 1160 return -ENOMEM;
1133 do { 1161 do {
1134 BUG_ON(!pte_none(*pte)); 1162 BUG_ON(!pte_none(*pte));
1135 if (!pfn_valid(pfn) || PageReserved(pfn_to_page(pfn))) 1163 set_pte_at(mm, addr, pte, pfn_pte(pfn, prot));
1136 set_pte_at(mm, addr, pte, pfn_pte(pfn, prot));
1137 pfn++; 1164 pfn++;
1138 } while (pte++, addr += PAGE_SIZE, addr != end); 1165 } while (pte++, addr += PAGE_SIZE, addr != end);
1139 pte_unmap(pte - 1); 1166 pte_unmap(pte - 1);
@@ -1195,8 +1222,8 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr,
1195 * rest of the world about it: 1222 * rest of the world about it:
1196 * VM_IO tells people not to look at these pages 1223 * VM_IO tells people not to look at these pages
1197 * (accesses can have side effects). 1224 * (accesses can have side effects).
1198 * VM_RESERVED tells swapout not to try to touch 1225 * VM_RESERVED tells the core MM not to "manage" these pages
1199 * this region. 1226 * (e.g. refcount, mapcount, try to swap them out).
1200 */ 1227 */
1201 vma->vm_flags |= VM_IO | VM_RESERVED; 1228 vma->vm_flags |= VM_IO | VM_RESERVED;
1202 1229
@@ -1256,11 +1283,13 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
1256 pte_t entry; 1283 pte_t entry;
1257 int ret = VM_FAULT_MINOR; 1284 int ret = VM_FAULT_MINOR;
1258 1285
1286 BUG_ON(vma->vm_flags & VM_RESERVED);
1287
1259 if (unlikely(!pfn_valid(pfn))) { 1288 if (unlikely(!pfn_valid(pfn))) {
1260 /* 1289 /*
1261 * Page table corrupted: show pte and kill process. 1290 * Page table corrupted: show pte and kill process.
1262 */ 1291 */
1263 pte_ERROR(orig_pte); 1292 print_bad_pte(vma, orig_pte, address);
1264 ret = VM_FAULT_OOM; 1293 ret = VM_FAULT_OOM;
1265 goto unlock; 1294 goto unlock;
1266 } 1295 }
@@ -1284,8 +1313,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
1284 /* 1313 /*
1285 * Ok, we need to copy. Oh, well.. 1314 * Ok, we need to copy. Oh, well..
1286 */ 1315 */
1287 if (!PageReserved(old_page)) 1316 page_cache_get(old_page);
1288 page_cache_get(old_page);
1289 pte_unmap(page_table); 1317 pte_unmap(page_table);
1290 spin_unlock(&mm->page_table_lock); 1318 spin_unlock(&mm->page_table_lock);
1291 1319
@@ -1308,14 +1336,10 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
1308 spin_lock(&mm->page_table_lock); 1336 spin_lock(&mm->page_table_lock);
1309 page_table = pte_offset_map(pmd, address); 1337 page_table = pte_offset_map(pmd, address);
1310 if (likely(pte_same(*page_table, orig_pte))) { 1338 if (likely(pte_same(*page_table, orig_pte))) {
1311 if (PageReserved(old_page)) 1339 page_remove_rmap(old_page);
1340 if (!PageAnon(old_page)) {
1312 inc_mm_counter(mm, anon_rss); 1341 inc_mm_counter(mm, anon_rss);
1313 else { 1342 dec_mm_counter(mm, file_rss);
1314 page_remove_rmap(old_page);
1315 if (!PageAnon(old_page)) {
1316 inc_mm_counter(mm, anon_rss);
1317 dec_mm_counter(mm, file_rss);
1318 }
1319 } 1343 }
1320 flush_cache_page(vma, address, pfn); 1344 flush_cache_page(vma, address, pfn);
1321 entry = mk_pte(new_page, vma->vm_page_prot); 1345 entry = mk_pte(new_page, vma->vm_page_prot);
@@ -1769,14 +1793,13 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
1769 unsigned long address, pte_t *page_table, pmd_t *pmd, 1793 unsigned long address, pte_t *page_table, pmd_t *pmd,
1770 int write_access) 1794 int write_access)
1771{ 1795{
1796 struct page *page = ZERO_PAGE(addr);
1772 pte_t entry; 1797 pte_t entry;
1773 1798
1774 /* Mapping of ZERO_PAGE - vm_page_prot is readonly */ 1799 /* Mapping of ZERO_PAGE - vm_page_prot is readonly */
1775 entry = mk_pte(ZERO_PAGE(addr), vma->vm_page_prot); 1800 entry = mk_pte(page, vma->vm_page_prot);
1776 1801
1777 if (write_access) { 1802 if (write_access) {
1778 struct page *page;
1779
1780 /* Allocate our own private page. */ 1803 /* Allocate our own private page. */
1781 pte_unmap(page_table); 1804 pte_unmap(page_table);
1782 spin_unlock(&mm->page_table_lock); 1805 spin_unlock(&mm->page_table_lock);
@@ -1800,6 +1823,10 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
1800 lru_cache_add_active(page); 1823 lru_cache_add_active(page);
1801 SetPageReferenced(page); 1824 SetPageReferenced(page);
1802 page_add_anon_rmap(page, vma, address); 1825 page_add_anon_rmap(page, vma, address);
1826 } else {
1827 inc_mm_counter(mm, file_rss);
1828 page_add_file_rmap(page);
1829 page_cache_get(page);
1803 } 1830 }
1804 1831
1805 set_pte_at(mm, address, page_table, entry); 1832 set_pte_at(mm, address, page_table, entry);
@@ -1916,7 +1943,7 @@ retry:
1916 inc_mm_counter(mm, anon_rss); 1943 inc_mm_counter(mm, anon_rss);
1917 lru_cache_add_active(new_page); 1944 lru_cache_add_active(new_page);
1918 page_add_anon_rmap(new_page, vma, address); 1945 page_add_anon_rmap(new_page, vma, address);
1919 } else if (!PageReserved(new_page)) { 1946 } else if (!(vma->vm_flags & VM_RESERVED)) {
1920 inc_mm_counter(mm, file_rss); 1947 inc_mm_counter(mm, file_rss);
1921 page_add_file_rmap(new_page); 1948 page_add_file_rmap(new_page);
1922 } 1949 }
@@ -1957,7 +1984,7 @@ static int do_file_page(struct mm_struct *mm, struct vm_area_struct *vma,
1957 /* 1984 /*
1958 * Page table corrupted: show pte and kill process. 1985 * Page table corrupted: show pte and kill process.
1959 */ 1986 */
1960 pte_ERROR(orig_pte); 1987 print_bad_pte(vma, orig_pte, address);
1961 return VM_FAULT_OOM; 1988 return VM_FAULT_OOM;
1962 } 1989 }
1963 /* We can then assume vm->vm_ops && vma->vm_ops->populate */ 1990 /* We can then assume vm->vm_ops && vma->vm_ops->populate */
@@ -2232,7 +2259,7 @@ static int __init gate_vma_init(void)
2232 gate_vma.vm_start = FIXADDR_USER_START; 2259 gate_vma.vm_start = FIXADDR_USER_START;
2233 gate_vma.vm_end = FIXADDR_USER_END; 2260 gate_vma.vm_end = FIXADDR_USER_END;
2234 gate_vma.vm_page_prot = PAGE_READONLY; 2261 gate_vma.vm_page_prot = PAGE_READONLY;
2235 gate_vma.vm_flags = 0; 2262 gate_vma.vm_flags = VM_RESERVED;
2236 return 0; 2263 return 0;
2237} 2264}
2238__initcall(gate_vma_init); 2265__initcall(gate_vma_init);