diff options
author | Peter Zijlstra <a.p.zijlstra@chello.nl> | 2006-09-26 02:30:57 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2006-09-26 11:48:44 -0400 |
commit | d08b3851da41d0ee60851f2c75b118e1f7a5fc89 (patch) | |
tree | a01f6930a1387e8f66607e2fe16c62bb7044353b /include | |
parent | 725d704ecaca4a43f067092c140d4f3271cf2856 (diff) |
[PATCH] mm: tracking shared dirty pages
Tracking of dirty pages in shared writeable mmap()s.
The idea is simple: write protect clean shared writeable pages, catch the
write-fault, make writeable and set dirty. On page write-back clean all the
PTE dirty bits and write protect them once again.
The implementation is a tad harder, mainly because the default
backing_dev_info capabilities were too loosely maintained. Hence it is not
enough to test the backing_dev_info for cap_account_dirty.
The current heuristic is as follows, a VMA is eligible when:
- its shared writeable
(vm_flags & (VM_WRITE|VM_SHARED)) == (VM_WRITE|VM_SHARED)
- it is not a 'special' mapping
(vm_flags & (VM_PFNMAP|VM_INSERTPAGE)) == 0
- the backing_dev_info is cap_account_dirty
mapping_cap_account_dirty(vma->vm_file->f_mapping)
- f_op->mmap() didn't change the default page protection
Page from remap_pfn_range() are explicitly excluded because their COW
semantics are already horrid enough (see vm_normal_page() in do_wp_page()) and
because they don't have a backing store anyway.
mprotect() is taught about the new behaviour as well. However it overrides
the last condition.
Cleaning the pages on write-back is done with page_mkclean() a new rmap call.
It can be called on any page, but is currently only implemented for mapped
pages, if the page is found the be of a VMA that accounts dirty pages it will
also wrprotect the PTE.
Finally, in fs/buffers.c:try_to_free_buffers(); remove clear_page_dirty() from
under ->private_lock. This seems to be safe, since ->private_lock is used to
serialize access to the buffers, not the page itself. This is needed because
clear_page_dirty() will call into page_mkclean() and would thereby violate
locking order.
[dhowells@redhat.com: Provide a page_mkclean() implementation for NOMMU]
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Hugh Dickins <hugh@veritas.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'include')
-rw-r--r-- | include/linux/mm.h | 34 | ||||
-rw-r--r-- | include/linux/rmap.h | 14 |
2 files changed, 48 insertions, 0 deletions
diff --git a/include/linux/mm.h b/include/linux/mm.h index 7d20b25c58fc..449841413cf1 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h | |||
@@ -15,6 +15,7 @@ | |||
15 | #include <linux/fs.h> | 15 | #include <linux/fs.h> |
16 | #include <linux/mutex.h> | 16 | #include <linux/mutex.h> |
17 | #include <linux/debug_locks.h> | 17 | #include <linux/debug_locks.h> |
18 | #include <linux/backing-dev.h> | ||
18 | 19 | ||
19 | struct mempolicy; | 20 | struct mempolicy; |
20 | struct anon_vma; | 21 | struct anon_vma; |
@@ -810,6 +811,39 @@ struct shrinker; | |||
810 | extern struct shrinker *set_shrinker(int, shrinker_t); | 811 | extern struct shrinker *set_shrinker(int, shrinker_t); |
811 | extern void remove_shrinker(struct shrinker *shrinker); | 812 | extern void remove_shrinker(struct shrinker *shrinker); |
812 | 813 | ||
814 | /* | ||
815 | * Some shared mappigns will want the pages marked read-only | ||
816 | * to track write events. If so, we'll downgrade vm_page_prot | ||
817 | * to the private version (using protection_map[] without the | ||
818 | * VM_SHARED bit). | ||
819 | */ | ||
820 | static inline int vma_wants_writenotify(struct vm_area_struct *vma) | ||
821 | { | ||
822 | unsigned int vm_flags = vma->vm_flags; | ||
823 | |||
824 | /* If it was private or non-writable, the write bit is already clear */ | ||
825 | if ((vm_flags & (VM_WRITE|VM_SHARED)) != ((VM_WRITE|VM_SHARED))) | ||
826 | return 0; | ||
827 | |||
828 | /* The backer wishes to know when pages are first written to? */ | ||
829 | if (vma->vm_ops && vma->vm_ops->page_mkwrite) | ||
830 | return 1; | ||
831 | |||
832 | /* The open routine did something to the protections already? */ | ||
833 | if (pgprot_val(vma->vm_page_prot) != | ||
834 | pgprot_val(protection_map[vm_flags & | ||
835 | (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)])) | ||
836 | return 0; | ||
837 | |||
838 | /* Specialty mapping? */ | ||
839 | if (vm_flags & (VM_PFNMAP|VM_INSERTPAGE)) | ||
840 | return 0; | ||
841 | |||
842 | /* Can the mapping track the dirty pages? */ | ||
843 | return vma->vm_file && vma->vm_file->f_mapping && | ||
844 | mapping_cap_account_dirty(vma->vm_file->f_mapping); | ||
845 | } | ||
846 | |||
813 | extern pte_t *FASTCALL(get_locked_pte(struct mm_struct *mm, unsigned long addr, spinlock_t **ptl)); | 847 | extern pte_t *FASTCALL(get_locked_pte(struct mm_struct *mm, unsigned long addr, spinlock_t **ptl)); |
814 | 848 | ||
815 | int __pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address); | 849 | int __pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address); |
diff --git a/include/linux/rmap.h b/include/linux/rmap.h index bf97b0900014..db2c1df4fef9 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h | |||
@@ -103,6 +103,14 @@ pte_t *page_check_address(struct page *, struct mm_struct *, | |||
103 | */ | 103 | */ |
104 | unsigned long page_address_in_vma(struct page *, struct vm_area_struct *); | 104 | unsigned long page_address_in_vma(struct page *, struct vm_area_struct *); |
105 | 105 | ||
106 | /* | ||
107 | * Cleans the PTEs of shared mappings. | ||
108 | * (and since clean PTEs should also be readonly, write protects them too) | ||
109 | * | ||
110 | * returns the number of cleaned PTEs. | ||
111 | */ | ||
112 | int page_mkclean(struct page *); | ||
113 | |||
106 | #else /* !CONFIG_MMU */ | 114 | #else /* !CONFIG_MMU */ |
107 | 115 | ||
108 | #define anon_vma_init() do {} while (0) | 116 | #define anon_vma_init() do {} while (0) |
@@ -112,6 +120,12 @@ unsigned long page_address_in_vma(struct page *, struct vm_area_struct *); | |||
112 | #define page_referenced(page,l) TestClearPageReferenced(page) | 120 | #define page_referenced(page,l) TestClearPageReferenced(page) |
113 | #define try_to_unmap(page, refs) SWAP_FAIL | 121 | #define try_to_unmap(page, refs) SWAP_FAIL |
114 | 122 | ||
123 | static inline int page_mkclean(struct page *page) | ||
124 | { | ||
125 | return 0; | ||
126 | } | ||
127 | |||
128 | |||
115 | #endif /* CONFIG_MMU */ | 129 | #endif /* CONFIG_MMU */ |
116 | 130 | ||
117 | /* | 131 | /* |