diff options
-rw-r--r-- | fs/buffer.c | 2 | ||||
-rw-r--r-- | include/linux/mm.h | 34 | ||||
-rw-r--r-- | include/linux/rmap.h | 14 | ||||
-rw-r--r-- | mm/memory.c | 29 | ||||
-rw-r--r-- | mm/mmap.c | 10 | ||||
-rw-r--r-- | mm/mprotect.c | 21 | ||||
-rw-r--r-- | mm/page-writeback.c | 17 | ||||
-rw-r--r-- | mm/rmap.c | 65 |
8 files changed, 162 insertions, 30 deletions
diff --git a/fs/buffer.c b/fs/buffer.c index 71649ef9b658..3b6d701073e7 100644 --- a/fs/buffer.c +++ b/fs/buffer.c | |||
@@ -2987,6 +2987,7 @@ int try_to_free_buffers(struct page *page) | |||
2987 | 2987 | ||
2988 | spin_lock(&mapping->private_lock); | 2988 | spin_lock(&mapping->private_lock); |
2989 | ret = drop_buffers(page, &buffers_to_free); | 2989 | ret = drop_buffers(page, &buffers_to_free); |
2990 | spin_unlock(&mapping->private_lock); | ||
2990 | if (ret) { | 2991 | if (ret) { |
2991 | /* | 2992 | /* |
2992 | * If the filesystem writes its buffers by hand (eg ext3) | 2993 | * If the filesystem writes its buffers by hand (eg ext3) |
@@ -2998,7 +2999,6 @@ int try_to_free_buffers(struct page *page) | |||
2998 | */ | 2999 | */ |
2999 | clear_page_dirty(page); | 3000 | clear_page_dirty(page); |
3000 | } | 3001 | } |
3001 | spin_unlock(&mapping->private_lock); | ||
3002 | out: | 3002 | out: |
3003 | if (buffers_to_free) { | 3003 | if (buffers_to_free) { |
3004 | struct buffer_head *bh = buffers_to_free; | 3004 | struct buffer_head *bh = buffers_to_free; |
diff --git a/include/linux/mm.h b/include/linux/mm.h index 7d20b25c58fc..449841413cf1 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h | |||
@@ -15,6 +15,7 @@ | |||
15 | #include <linux/fs.h> | 15 | #include <linux/fs.h> |
16 | #include <linux/mutex.h> | 16 | #include <linux/mutex.h> |
17 | #include <linux/debug_locks.h> | 17 | #include <linux/debug_locks.h> |
18 | #include <linux/backing-dev.h> | ||
18 | 19 | ||
19 | struct mempolicy; | 20 | struct mempolicy; |
20 | struct anon_vma; | 21 | struct anon_vma; |
@@ -810,6 +811,39 @@ struct shrinker; | |||
810 | extern struct shrinker *set_shrinker(int, shrinker_t); | 811 | extern struct shrinker *set_shrinker(int, shrinker_t); |
811 | extern void remove_shrinker(struct shrinker *shrinker); | 812 | extern void remove_shrinker(struct shrinker *shrinker); |
812 | 813 | ||
814 | /* | ||
815 | * Some shared mappigns will want the pages marked read-only | ||
816 | * to track write events. If so, we'll downgrade vm_page_prot | ||
817 | * to the private version (using protection_map[] without the | ||
818 | * VM_SHARED bit). | ||
819 | */ | ||
820 | static inline int vma_wants_writenotify(struct vm_area_struct *vma) | ||
821 | { | ||
822 | unsigned int vm_flags = vma->vm_flags; | ||
823 | |||
824 | /* If it was private or non-writable, the write bit is already clear */ | ||
825 | if ((vm_flags & (VM_WRITE|VM_SHARED)) != ((VM_WRITE|VM_SHARED))) | ||
826 | return 0; | ||
827 | |||
828 | /* The backer wishes to know when pages are first written to? */ | ||
829 | if (vma->vm_ops && vma->vm_ops->page_mkwrite) | ||
830 | return 1; | ||
831 | |||
832 | /* The open routine did something to the protections already? */ | ||
833 | if (pgprot_val(vma->vm_page_prot) != | ||
834 | pgprot_val(protection_map[vm_flags & | ||
835 | (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)])) | ||
836 | return 0; | ||
837 | |||
838 | /* Specialty mapping? */ | ||
839 | if (vm_flags & (VM_PFNMAP|VM_INSERTPAGE)) | ||
840 | return 0; | ||
841 | |||
842 | /* Can the mapping track the dirty pages? */ | ||
843 | return vma->vm_file && vma->vm_file->f_mapping && | ||
844 | mapping_cap_account_dirty(vma->vm_file->f_mapping); | ||
845 | } | ||
846 | |||
813 | extern pte_t *FASTCALL(get_locked_pte(struct mm_struct *mm, unsigned long addr, spinlock_t **ptl)); | 847 | extern pte_t *FASTCALL(get_locked_pte(struct mm_struct *mm, unsigned long addr, spinlock_t **ptl)); |
814 | 848 | ||
815 | int __pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address); | 849 | int __pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address); |
diff --git a/include/linux/rmap.h b/include/linux/rmap.h index bf97b0900014..db2c1df4fef9 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h | |||
@@ -103,6 +103,14 @@ pte_t *page_check_address(struct page *, struct mm_struct *, | |||
103 | */ | 103 | */ |
104 | unsigned long page_address_in_vma(struct page *, struct vm_area_struct *); | 104 | unsigned long page_address_in_vma(struct page *, struct vm_area_struct *); |
105 | 105 | ||
106 | /* | ||
107 | * Cleans the PTEs of shared mappings. | ||
108 | * (and since clean PTEs should also be readonly, write protects them too) | ||
109 | * | ||
110 | * returns the number of cleaned PTEs. | ||
111 | */ | ||
112 | int page_mkclean(struct page *); | ||
113 | |||
106 | #else /* !CONFIG_MMU */ | 114 | #else /* !CONFIG_MMU */ |
107 | 115 | ||
108 | #define anon_vma_init() do {} while (0) | 116 | #define anon_vma_init() do {} while (0) |
@@ -112,6 +120,12 @@ unsigned long page_address_in_vma(struct page *, struct vm_area_struct *); | |||
112 | #define page_referenced(page,l) TestClearPageReferenced(page) | 120 | #define page_referenced(page,l) TestClearPageReferenced(page) |
113 | #define try_to_unmap(page, refs) SWAP_FAIL | 121 | #define try_to_unmap(page, refs) SWAP_FAIL |
114 | 122 | ||
123 | static inline int page_mkclean(struct page *page) | ||
124 | { | ||
125 | return 0; | ||
126 | } | ||
127 | |||
128 | |||
115 | #endif /* CONFIG_MMU */ | 129 | #endif /* CONFIG_MMU */ |
116 | 130 | ||
117 | /* | 131 | /* |
diff --git a/mm/memory.c b/mm/memory.c index 109e9866237e..fa941b169071 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -1458,14 +1458,19 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
1458 | { | 1458 | { |
1459 | struct page *old_page, *new_page; | 1459 | struct page *old_page, *new_page; |
1460 | pte_t entry; | 1460 | pte_t entry; |
1461 | int reuse, ret = VM_FAULT_MINOR; | 1461 | int reuse = 0, ret = VM_FAULT_MINOR; |
1462 | struct page *dirty_page = NULL; | ||
1462 | 1463 | ||
1463 | old_page = vm_normal_page(vma, address, orig_pte); | 1464 | old_page = vm_normal_page(vma, address, orig_pte); |
1464 | if (!old_page) | 1465 | if (!old_page) |
1465 | goto gotten; | 1466 | goto gotten; |
1466 | 1467 | ||
1467 | if (unlikely((vma->vm_flags & (VM_SHARED|VM_WRITE)) == | 1468 | /* |
1468 | (VM_SHARED|VM_WRITE))) { | 1469 | * Only catch write-faults on shared writable pages, read-only |
1470 | * shared pages can get COWed by get_user_pages(.write=1, .force=1). | ||
1471 | */ | ||
1472 | if (unlikely((vma->vm_flags & (VM_WRITE|VM_SHARED)) == | ||
1473 | (VM_WRITE|VM_SHARED))) { | ||
1469 | if (vma->vm_ops && vma->vm_ops->page_mkwrite) { | 1474 | if (vma->vm_ops && vma->vm_ops->page_mkwrite) { |
1470 | /* | 1475 | /* |
1471 | * Notify the address space that the page is about to | 1476 | * Notify the address space that the page is about to |
@@ -1494,13 +1499,12 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
1494 | if (!pte_same(*page_table, orig_pte)) | 1499 | if (!pte_same(*page_table, orig_pte)) |
1495 | goto unlock; | 1500 | goto unlock; |
1496 | } | 1501 | } |
1497 | 1502 | dirty_page = old_page; | |
1503 | get_page(dirty_page); | ||
1498 | reuse = 1; | 1504 | reuse = 1; |
1499 | } else if (PageAnon(old_page) && !TestSetPageLocked(old_page)) { | 1505 | } else if (PageAnon(old_page) && !TestSetPageLocked(old_page)) { |
1500 | reuse = can_share_swap_page(old_page); | 1506 | reuse = can_share_swap_page(old_page); |
1501 | unlock_page(old_page); | 1507 | unlock_page(old_page); |
1502 | } else { | ||
1503 | reuse = 0; | ||
1504 | } | 1508 | } |
1505 | 1509 | ||
1506 | if (reuse) { | 1510 | if (reuse) { |
@@ -1566,6 +1570,10 @@ gotten: | |||
1566 | page_cache_release(old_page); | 1570 | page_cache_release(old_page); |
1567 | unlock: | 1571 | unlock: |
1568 | pte_unmap_unlock(page_table, ptl); | 1572 | pte_unmap_unlock(page_table, ptl); |
1573 | if (dirty_page) { | ||
1574 | set_page_dirty(dirty_page); | ||
1575 | put_page(dirty_page); | ||
1576 | } | ||
1569 | return ret; | 1577 | return ret; |
1570 | oom: | 1578 | oom: |
1571 | if (old_page) | 1579 | if (old_page) |
@@ -2098,6 +2106,7 @@ static int do_no_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2098 | unsigned int sequence = 0; | 2106 | unsigned int sequence = 0; |
2099 | int ret = VM_FAULT_MINOR; | 2107 | int ret = VM_FAULT_MINOR; |
2100 | int anon = 0; | 2108 | int anon = 0; |
2109 | struct page *dirty_page = NULL; | ||
2101 | 2110 | ||
2102 | pte_unmap(page_table); | 2111 | pte_unmap(page_table); |
2103 | BUG_ON(vma->vm_flags & VM_PFNMAP); | 2112 | BUG_ON(vma->vm_flags & VM_PFNMAP); |
@@ -2192,6 +2201,10 @@ retry: | |||
2192 | } else { | 2201 | } else { |
2193 | inc_mm_counter(mm, file_rss); | 2202 | inc_mm_counter(mm, file_rss); |
2194 | page_add_file_rmap(new_page); | 2203 | page_add_file_rmap(new_page); |
2204 | if (write_access) { | ||
2205 | dirty_page = new_page; | ||
2206 | get_page(dirty_page); | ||
2207 | } | ||
2195 | } | 2208 | } |
2196 | } else { | 2209 | } else { |
2197 | /* One of our sibling threads was faster, back out. */ | 2210 | /* One of our sibling threads was faster, back out. */ |
@@ -2204,6 +2217,10 @@ retry: | |||
2204 | lazy_mmu_prot_update(entry); | 2217 | lazy_mmu_prot_update(entry); |
2205 | unlock: | 2218 | unlock: |
2206 | pte_unmap_unlock(page_table, ptl); | 2219 | pte_unmap_unlock(page_table, ptl); |
2220 | if (dirty_page) { | ||
2221 | set_page_dirty(dirty_page); | ||
2222 | put_page(dirty_page); | ||
2223 | } | ||
2207 | return ret; | 2224 | return ret; |
2208 | oom: | 2225 | oom: |
2209 | page_cache_release(new_page); | 2226 | page_cache_release(new_page); |
@@ -1105,12 +1105,6 @@ munmap_back: | |||
1105 | goto free_vma; | 1105 | goto free_vma; |
1106 | } | 1106 | } |
1107 | 1107 | ||
1108 | /* Don't make the VMA automatically writable if it's shared, but the | ||
1109 | * backer wishes to know when pages are first written to */ | ||
1110 | if (vma->vm_ops && vma->vm_ops->page_mkwrite) | ||
1111 | vma->vm_page_prot = | ||
1112 | protection_map[vm_flags & (VM_READ|VM_WRITE|VM_EXEC)]; | ||
1113 | |||
1114 | /* We set VM_ACCOUNT in a shared mapping's vm_flags, to inform | 1108 | /* We set VM_ACCOUNT in a shared mapping's vm_flags, to inform |
1115 | * shmem_zero_setup (perhaps called through /dev/zero's ->mmap) | 1109 | * shmem_zero_setup (perhaps called through /dev/zero's ->mmap) |
1116 | * that memory reservation must be checked; but that reservation | 1110 | * that memory reservation must be checked; but that reservation |
@@ -1128,6 +1122,10 @@ munmap_back: | |||
1128 | pgoff = vma->vm_pgoff; | 1122 | pgoff = vma->vm_pgoff; |
1129 | vm_flags = vma->vm_flags; | 1123 | vm_flags = vma->vm_flags; |
1130 | 1124 | ||
1125 | if (vma_wants_writenotify(vma)) | ||
1126 | vma->vm_page_prot = | ||
1127 | protection_map[vm_flags & (VM_READ|VM_WRITE|VM_EXEC)]; | ||
1128 | |||
1131 | if (!file || !vma_merge(mm, prev, addr, vma->vm_end, | 1129 | if (!file || !vma_merge(mm, prev, addr, vma->vm_end, |
1132 | vma->vm_flags, NULL, file, pgoff, vma_policy(vma))) { | 1130 | vma->vm_flags, NULL, file, pgoff, vma_policy(vma))) { |
1133 | file = vma->vm_file; | 1131 | file = vma->vm_file; |
diff --git a/mm/mprotect.c b/mm/mprotect.c index 638edabaff71..367b7f6c0637 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c | |||
@@ -123,8 +123,6 @@ mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev, | |||
123 | unsigned long oldflags = vma->vm_flags; | 123 | unsigned long oldflags = vma->vm_flags; |
124 | long nrpages = (end - start) >> PAGE_SHIFT; | 124 | long nrpages = (end - start) >> PAGE_SHIFT; |
125 | unsigned long charged = 0; | 125 | unsigned long charged = 0; |
126 | unsigned int mask; | ||
127 | pgprot_t newprot; | ||
128 | pgoff_t pgoff; | 126 | pgoff_t pgoff; |
129 | int error; | 127 | int error; |
130 | 128 | ||
@@ -176,24 +174,21 @@ mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev, | |||
176 | } | 174 | } |
177 | 175 | ||
178 | success: | 176 | success: |
179 | /* Don't make the VMA automatically writable if it's shared, but the | ||
180 | * backer wishes to know when pages are first written to */ | ||
181 | mask = VM_READ|VM_WRITE|VM_EXEC|VM_SHARED; | ||
182 | if (vma->vm_ops && vma->vm_ops->page_mkwrite) | ||
183 | mask &= ~VM_SHARED; | ||
184 | |||
185 | newprot = protection_map[newflags & mask]; | ||
186 | |||
187 | /* | 177 | /* |
188 | * vm_flags and vm_page_prot are protected by the mmap_sem | 178 | * vm_flags and vm_page_prot are protected by the mmap_sem |
189 | * held in write mode. | 179 | * held in write mode. |
190 | */ | 180 | */ |
191 | vma->vm_flags = newflags; | 181 | vma->vm_flags = newflags; |
192 | vma->vm_page_prot = newprot; | 182 | vma->vm_page_prot = protection_map[newflags & |
183 | (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)]; | ||
184 | if (vma_wants_writenotify(vma)) | ||
185 | vma->vm_page_prot = protection_map[newflags & | ||
186 | (VM_READ|VM_WRITE|VM_EXEC)]; | ||
187 | |||
193 | if (is_vm_hugetlb_page(vma)) | 188 | if (is_vm_hugetlb_page(vma)) |
194 | hugetlb_change_protection(vma, start, end, newprot); | 189 | hugetlb_change_protection(vma, start, end, vma->vm_page_prot); |
195 | else | 190 | else |
196 | change_protection(vma, start, end, newprot); | 191 | change_protection(vma, start, end, vma->vm_page_prot); |
197 | vm_stat_account(mm, oldflags, vma->vm_file, -nrpages); | 192 | vm_stat_account(mm, oldflags, vma->vm_file, -nrpages); |
198 | vm_stat_account(mm, newflags, vma->vm_file, nrpages); | 193 | vm_stat_account(mm, newflags, vma->vm_file, nrpages); |
199 | return 0; | 194 | return 0; |
diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 77a0bc4e261a..1c87430b7a25 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c | |||
@@ -23,6 +23,7 @@ | |||
23 | #include <linux/backing-dev.h> | 23 | #include <linux/backing-dev.h> |
24 | #include <linux/blkdev.h> | 24 | #include <linux/blkdev.h> |
25 | #include <linux/mpage.h> | 25 | #include <linux/mpage.h> |
26 | #include <linux/rmap.h> | ||
26 | #include <linux/percpu.h> | 27 | #include <linux/percpu.h> |
27 | #include <linux/notifier.h> | 28 | #include <linux/notifier.h> |
28 | #include <linux/smp.h> | 29 | #include <linux/smp.h> |
@@ -550,7 +551,7 @@ int do_writepages(struct address_space *mapping, struct writeback_control *wbc) | |||
550 | return 0; | 551 | return 0; |
551 | wbc->for_writepages = 1; | 552 | wbc->for_writepages = 1; |
552 | if (mapping->a_ops->writepages) | 553 | if (mapping->a_ops->writepages) |
553 | ret = mapping->a_ops->writepages(mapping, wbc); | 554 | ret = mapping->a_ops->writepages(mapping, wbc); |
554 | else | 555 | else |
555 | ret = generic_writepages(mapping, wbc); | 556 | ret = generic_writepages(mapping, wbc); |
556 | wbc->for_writepages = 0; | 557 | wbc->for_writepages = 0; |
@@ -712,9 +713,15 @@ int test_clear_page_dirty(struct page *page) | |||
712 | radix_tree_tag_clear(&mapping->page_tree, | 713 | radix_tree_tag_clear(&mapping->page_tree, |
713 | page_index(page), | 714 | page_index(page), |
714 | PAGECACHE_TAG_DIRTY); | 715 | PAGECACHE_TAG_DIRTY); |
715 | if (mapping_cap_account_dirty(mapping)) | ||
716 | __dec_zone_page_state(page, NR_FILE_DIRTY); | ||
717 | write_unlock_irqrestore(&mapping->tree_lock, flags); | 716 | write_unlock_irqrestore(&mapping->tree_lock, flags); |
717 | /* | ||
718 | * We can continue to use `mapping' here because the | ||
719 | * page is locked, which pins the address_space | ||
720 | */ | ||
721 | if (mapping_cap_account_dirty(mapping)) { | ||
722 | page_mkclean(page); | ||
723 | dec_zone_page_state(page, NR_FILE_DIRTY); | ||
724 | } | ||
718 | return 1; | 725 | return 1; |
719 | } | 726 | } |
720 | write_unlock_irqrestore(&mapping->tree_lock, flags); | 727 | write_unlock_irqrestore(&mapping->tree_lock, flags); |
@@ -744,8 +751,10 @@ int clear_page_dirty_for_io(struct page *page) | |||
744 | 751 | ||
745 | if (mapping) { | 752 | if (mapping) { |
746 | if (TestClearPageDirty(page)) { | 753 | if (TestClearPageDirty(page)) { |
747 | if (mapping_cap_account_dirty(mapping)) | 754 | if (mapping_cap_account_dirty(mapping)) { |
755 | page_mkclean(page); | ||
748 | dec_zone_page_state(page, NR_FILE_DIRTY); | 756 | dec_zone_page_state(page, NR_FILE_DIRTY); |
757 | } | ||
749 | return 1; | 758 | return 1; |
750 | } | 759 | } |
751 | return 0; | 760 | return 0; |
@@ -434,6 +434,71 @@ int page_referenced(struct page *page, int is_locked) | |||
434 | return referenced; | 434 | return referenced; |
435 | } | 435 | } |
436 | 436 | ||
437 | static int page_mkclean_one(struct page *page, struct vm_area_struct *vma) | ||
438 | { | ||
439 | struct mm_struct *mm = vma->vm_mm; | ||
440 | unsigned long address; | ||
441 | pte_t *pte, entry; | ||
442 | spinlock_t *ptl; | ||
443 | int ret = 0; | ||
444 | |||
445 | address = vma_address(page, vma); | ||
446 | if (address == -EFAULT) | ||
447 | goto out; | ||
448 | |||
449 | pte = page_check_address(page, mm, address, &ptl); | ||
450 | if (!pte) | ||
451 | goto out; | ||
452 | |||
453 | if (!pte_dirty(*pte) && !pte_write(*pte)) | ||
454 | goto unlock; | ||
455 | |||
456 | entry = ptep_get_and_clear(mm, address, pte); | ||
457 | entry = pte_mkclean(entry); | ||
458 | entry = pte_wrprotect(entry); | ||
459 | ptep_establish(vma, address, pte, entry); | ||
460 | lazy_mmu_prot_update(entry); | ||
461 | ret = 1; | ||
462 | |||
463 | unlock: | ||
464 | pte_unmap_unlock(pte, ptl); | ||
465 | out: | ||
466 | return ret; | ||
467 | } | ||
468 | |||
469 | static int page_mkclean_file(struct address_space *mapping, struct page *page) | ||
470 | { | ||
471 | pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); | ||
472 | struct vm_area_struct *vma; | ||
473 | struct prio_tree_iter iter; | ||
474 | int ret = 0; | ||
475 | |||
476 | BUG_ON(PageAnon(page)); | ||
477 | |||
478 | spin_lock(&mapping->i_mmap_lock); | ||
479 | vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) { | ||
480 | if (vma->vm_flags & VM_SHARED) | ||
481 | ret += page_mkclean_one(page, vma); | ||
482 | } | ||
483 | spin_unlock(&mapping->i_mmap_lock); | ||
484 | return ret; | ||
485 | } | ||
486 | |||
487 | int page_mkclean(struct page *page) | ||
488 | { | ||
489 | int ret = 0; | ||
490 | |||
491 | BUG_ON(!PageLocked(page)); | ||
492 | |||
493 | if (page_mapped(page)) { | ||
494 | struct address_space *mapping = page_mapping(page); | ||
495 | if (mapping) | ||
496 | ret = page_mkclean_file(mapping, page); | ||
497 | } | ||
498 | |||
499 | return ret; | ||
500 | } | ||
501 | |||
437 | /** | 502 | /** |
438 | * page_set_anon_rmap - setup new anonymous rmap | 503 | * page_set_anon_rmap - setup new anonymous rmap |
439 | * @page: the page to add the mapping to | 504 | * @page: the page to add the mapping to |