diff options
Diffstat (limited to 'mm/memory.c')
-rw-r--r-- | mm/memory.c | 133 |
1 files changed, 97 insertions, 36 deletions
diff --git a/mm/memory.c b/mm/memory.c index 0ec7bc644271..7e2a4b1580e3 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -126,7 +126,7 @@ static void free_pte_range(struct mmu_gather *tlb, pmd_t *pmd) | |||
126 | pmd_clear(pmd); | 126 | pmd_clear(pmd); |
127 | pte_lock_deinit(page); | 127 | pte_lock_deinit(page); |
128 | pte_free_tlb(tlb, page); | 128 | pte_free_tlb(tlb, page); |
129 | dec_page_state(nr_page_table_pages); | 129 | dec_zone_page_state(page, NR_PAGETABLE); |
130 | tlb->mm->nr_ptes--; | 130 | tlb->mm->nr_ptes--; |
131 | } | 131 | } |
132 | 132 | ||
@@ -311,7 +311,7 @@ int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address) | |||
311 | pte_free(new); | 311 | pte_free(new); |
312 | } else { | 312 | } else { |
313 | mm->nr_ptes++; | 313 | mm->nr_ptes++; |
314 | inc_page_state(nr_page_table_pages); | 314 | inc_zone_page_state(new, NR_PAGETABLE); |
315 | pmd_populate(mm, pmd, new); | 315 | pmd_populate(mm, pmd, new); |
316 | } | 316 | } |
317 | spin_unlock(&mm->page_table_lock); | 317 | spin_unlock(&mm->page_table_lock); |
@@ -434,7 +434,9 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, | |||
434 | /* pte contains position in swap or file, so copy. */ | 434 | /* pte contains position in swap or file, so copy. */ |
435 | if (unlikely(!pte_present(pte))) { | 435 | if (unlikely(!pte_present(pte))) { |
436 | if (!pte_file(pte)) { | 436 | if (!pte_file(pte)) { |
437 | swap_duplicate(pte_to_swp_entry(pte)); | 437 | swp_entry_t entry = pte_to_swp_entry(pte); |
438 | |||
439 | swap_duplicate(entry); | ||
438 | /* make sure dst_mm is on swapoff's mmlist. */ | 440 | /* make sure dst_mm is on swapoff's mmlist. */ |
439 | if (unlikely(list_empty(&dst_mm->mmlist))) { | 441 | if (unlikely(list_empty(&dst_mm->mmlist))) { |
440 | spin_lock(&mmlist_lock); | 442 | spin_lock(&mmlist_lock); |
@@ -443,6 +445,16 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, | |||
443 | &src_mm->mmlist); | 445 | &src_mm->mmlist); |
444 | spin_unlock(&mmlist_lock); | 446 | spin_unlock(&mmlist_lock); |
445 | } | 447 | } |
448 | if (is_write_migration_entry(entry) && | ||
449 | is_cow_mapping(vm_flags)) { | ||
450 | /* | ||
451 | * COW mappings require pages in both parent | ||
452 | * and child to be set to read. | ||
453 | */ | ||
454 | make_migration_entry_read(&entry); | ||
455 | pte = swp_entry_to_pte(entry); | ||
456 | set_pte_at(src_mm, addr, src_pte, pte); | ||
457 | } | ||
446 | } | 458 | } |
447 | goto out_set_pte; | 459 | goto out_set_pte; |
448 | } | 460 | } |
@@ -1445,25 +1457,60 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
1445 | { | 1457 | { |
1446 | struct page *old_page, *new_page; | 1458 | struct page *old_page, *new_page; |
1447 | pte_t entry; | 1459 | pte_t entry; |
1448 | int ret = VM_FAULT_MINOR; | 1460 | int reuse, ret = VM_FAULT_MINOR; |
1449 | 1461 | ||
1450 | old_page = vm_normal_page(vma, address, orig_pte); | 1462 | old_page = vm_normal_page(vma, address, orig_pte); |
1451 | if (!old_page) | 1463 | if (!old_page) |
1452 | goto gotten; | 1464 | goto gotten; |
1453 | 1465 | ||
1454 | if (PageAnon(old_page) && !TestSetPageLocked(old_page)) { | 1466 | if (unlikely((vma->vm_flags & (VM_SHARED|VM_WRITE)) == |
1455 | int reuse = can_share_swap_page(old_page); | 1467 | (VM_SHARED|VM_WRITE))) { |
1456 | unlock_page(old_page); | 1468 | if (vma->vm_ops && vma->vm_ops->page_mkwrite) { |
1457 | if (reuse) { | 1469 | /* |
1458 | flush_cache_page(vma, address, pte_pfn(orig_pte)); | 1470 | * Notify the address space that the page is about to |
1459 | entry = pte_mkyoung(orig_pte); | 1471 | * become writable so that it can prohibit this or wait |
1460 | entry = maybe_mkwrite(pte_mkdirty(entry), vma); | 1472 | * for the page to get into an appropriate state. |
1461 | ptep_set_access_flags(vma, address, page_table, entry, 1); | 1473 | * |
1462 | update_mmu_cache(vma, address, entry); | 1474 | * We do this without the lock held, so that it can |
1463 | lazy_mmu_prot_update(entry); | 1475 | * sleep if it needs to. |
1464 | ret |= VM_FAULT_WRITE; | 1476 | */ |
1465 | goto unlock; | 1477 | page_cache_get(old_page); |
1478 | pte_unmap_unlock(page_table, ptl); | ||
1479 | |||
1480 | if (vma->vm_ops->page_mkwrite(vma, old_page) < 0) | ||
1481 | goto unwritable_page; | ||
1482 | |||
1483 | page_cache_release(old_page); | ||
1484 | |||
1485 | /* | ||
1486 | * Since we dropped the lock we need to revalidate | ||
1487 | * the PTE as someone else may have changed it. If | ||
1488 | * they did, we just return, as we can count on the | ||
1489 | * MMU to tell us if they didn't also make it writable. | ||
1490 | */ | ||
1491 | page_table = pte_offset_map_lock(mm, pmd, address, | ||
1492 | &ptl); | ||
1493 | if (!pte_same(*page_table, orig_pte)) | ||
1494 | goto unlock; | ||
1466 | } | 1495 | } |
1496 | |||
1497 | reuse = 1; | ||
1498 | } else if (PageAnon(old_page) && !TestSetPageLocked(old_page)) { | ||
1499 | reuse = can_share_swap_page(old_page); | ||
1500 | unlock_page(old_page); | ||
1501 | } else { | ||
1502 | reuse = 0; | ||
1503 | } | ||
1504 | |||
1505 | if (reuse) { | ||
1506 | flush_cache_page(vma, address, pte_pfn(orig_pte)); | ||
1507 | entry = pte_mkyoung(orig_pte); | ||
1508 | entry = maybe_mkwrite(pte_mkdirty(entry), vma); | ||
1509 | ptep_set_access_flags(vma, address, page_table, entry, 1); | ||
1510 | update_mmu_cache(vma, address, entry); | ||
1511 | lazy_mmu_prot_update(entry); | ||
1512 | ret |= VM_FAULT_WRITE; | ||
1513 | goto unlock; | ||
1467 | } | 1514 | } |
1468 | 1515 | ||
1469 | /* | 1516 | /* |
@@ -1523,6 +1570,10 @@ oom: | |||
1523 | if (old_page) | 1570 | if (old_page) |
1524 | page_cache_release(old_page); | 1571 | page_cache_release(old_page); |
1525 | return VM_FAULT_OOM; | 1572 | return VM_FAULT_OOM; |
1573 | |||
1574 | unwritable_page: | ||
1575 | page_cache_release(old_page); | ||
1576 | return VM_FAULT_SIGBUS; | ||
1526 | } | 1577 | } |
1527 | 1578 | ||
1528 | /* | 1579 | /* |
@@ -1879,7 +1930,10 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
1879 | goto out; | 1930 | goto out; |
1880 | 1931 | ||
1881 | entry = pte_to_swp_entry(orig_pte); | 1932 | entry = pte_to_swp_entry(orig_pte); |
1882 | again: | 1933 | if (is_migration_entry(entry)) { |
1934 | migration_entry_wait(mm, pmd, address); | ||
1935 | goto out; | ||
1936 | } | ||
1883 | page = lookup_swap_cache(entry); | 1937 | page = lookup_swap_cache(entry); |
1884 | if (!page) { | 1938 | if (!page) { |
1885 | swapin_readahead(entry, address, vma); | 1939 | swapin_readahead(entry, address, vma); |
@@ -1897,18 +1951,12 @@ again: | |||
1897 | 1951 | ||
1898 | /* Had to read the page from swap area: Major fault */ | 1952 | /* Had to read the page from swap area: Major fault */ |
1899 | ret = VM_FAULT_MAJOR; | 1953 | ret = VM_FAULT_MAJOR; |
1900 | inc_page_state(pgmajfault); | 1954 | count_vm_event(PGMAJFAULT); |
1901 | grab_swap_token(); | 1955 | grab_swap_token(); |
1902 | } | 1956 | } |
1903 | 1957 | ||
1904 | mark_page_accessed(page); | 1958 | mark_page_accessed(page); |
1905 | lock_page(page); | 1959 | lock_page(page); |
1906 | if (!PageSwapCache(page)) { | ||
1907 | /* Page migration has occured */ | ||
1908 | unlock_page(page); | ||
1909 | page_cache_release(page); | ||
1910 | goto again; | ||
1911 | } | ||
1912 | 1960 | ||
1913 | /* | 1961 | /* |
1914 | * Back out if somebody else already faulted in this pte. | 1962 | * Back out if somebody else already faulted in this pte. |
@@ -2074,18 +2122,31 @@ retry: | |||
2074 | /* | 2122 | /* |
2075 | * Should we do an early C-O-W break? | 2123 | * Should we do an early C-O-W break? |
2076 | */ | 2124 | */ |
2077 | if (write_access && !(vma->vm_flags & VM_SHARED)) { | 2125 | if (write_access) { |
2078 | struct page *page; | 2126 | if (!(vma->vm_flags & VM_SHARED)) { |
2127 | struct page *page; | ||
2079 | 2128 | ||
2080 | if (unlikely(anon_vma_prepare(vma))) | 2129 | if (unlikely(anon_vma_prepare(vma))) |
2081 | goto oom; | 2130 | goto oom; |
2082 | page = alloc_page_vma(GFP_HIGHUSER, vma, address); | 2131 | page = alloc_page_vma(GFP_HIGHUSER, vma, address); |
2083 | if (!page) | 2132 | if (!page) |
2084 | goto oom; | 2133 | goto oom; |
2085 | copy_user_highpage(page, new_page, address); | 2134 | copy_user_highpage(page, new_page, address); |
2086 | page_cache_release(new_page); | 2135 | page_cache_release(new_page); |
2087 | new_page = page; | 2136 | new_page = page; |
2088 | anon = 1; | 2137 | anon = 1; |
2138 | |||
2139 | } else { | ||
2140 | /* if the page will be shareable, see if the backing | ||
2141 | * address space wants to know that the page is about | ||
2142 | * to become writable */ | ||
2143 | if (vma->vm_ops->page_mkwrite && | ||
2144 | vma->vm_ops->page_mkwrite(vma, new_page) < 0 | ||
2145 | ) { | ||
2146 | page_cache_release(new_page); | ||
2147 | return VM_FAULT_SIGBUS; | ||
2148 | } | ||
2149 | } | ||
2089 | } | 2150 | } |
2090 | 2151 | ||
2091 | page_table = pte_offset_map_lock(mm, pmd, address, &ptl); | 2152 | page_table = pte_offset_map_lock(mm, pmd, address, &ptl); |
@@ -2263,7 +2324,7 @@ int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2263 | 2324 | ||
2264 | __set_current_state(TASK_RUNNING); | 2325 | __set_current_state(TASK_RUNNING); |
2265 | 2326 | ||
2266 | inc_page_state(pgfault); | 2327 | count_vm_event(PGFAULT); |
2267 | 2328 | ||
2268 | if (unlikely(is_vm_hugetlb_page(vma))) | 2329 | if (unlikely(is_vm_hugetlb_page(vma))) |
2269 | return hugetlb_fault(mm, vma, address, write_access); | 2330 | return hugetlb_fault(mm, vma, address, write_access); |