aboutsummaryrefslogtreecommitdiffstats
path: root/mm/memory.c
diff options
context:
space:
mode:
authorPeter Zijlstra <a.p.zijlstra@chello.nl>2006-09-26 02:30:57 -0400
committerLinus Torvalds <torvalds@g5.osdl.org>2006-09-26 11:48:44 -0400
commitd08b3851da41d0ee60851f2c75b118e1f7a5fc89 (patch)
treea01f6930a1387e8f66607e2fe16c62bb7044353b /mm/memory.c
parent725d704ecaca4a43f067092c140d4f3271cf2856 (diff)
[PATCH] mm: tracking shared dirty pages
Tracking of dirty pages in shared writeable mmap()s. The idea is simple: write protect clean shared writeable pages, catch the write-fault, make writeable and set dirty. On page write-back clean all the PTE dirty bits and write protect them once again. The implementation is a tad harder, mainly because the default backing_dev_info capabilities were too loosely maintained. Hence it is not enough to test the backing_dev_info for cap_account_dirty. The current heuristic is as follows, a VMA is eligible when: - its shared writeable (vm_flags & (VM_WRITE|VM_SHARED)) == (VM_WRITE|VM_SHARED) - it is not a 'special' mapping (vm_flags & (VM_PFNMAP|VM_INSERTPAGE)) == 0 - the backing_dev_info is cap_account_dirty mapping_cap_account_dirty(vma->vm_file->f_mapping) - f_op->mmap() didn't change the default page protection Page from remap_pfn_range() are explicitly excluded because their COW semantics are already horrid enough (see vm_normal_page() in do_wp_page()) and because they don't have a backing store anyway. mprotect() is taught about the new behaviour as well. However it overrides the last condition. Cleaning the pages on write-back is done with page_mkclean() a new rmap call. It can be called on any page, but is currently only implemented for mapped pages, if the page is found the be of a VMA that accounts dirty pages it will also wrprotect the PTE. Finally, in fs/buffers.c:try_to_free_buffers(); remove clear_page_dirty() from under ->private_lock. This seems to be safe, since ->private_lock is used to serialize access to the buffers, not the page itself. This is needed because clear_page_dirty() will call into page_mkclean() and would thereby violate locking order. [dhowells@redhat.com: Provide a page_mkclean() implementation for NOMMU] Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Hugh Dickins <hugh@veritas.com> Signed-off-by: David Howells <dhowells@redhat.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'mm/memory.c')
-rw-r--r--mm/memory.c29
1 files changed, 23 insertions, 6 deletions
diff --git a/mm/memory.c b/mm/memory.c
index 109e9866237e..fa941b169071 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1458,14 +1458,19 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
1458{ 1458{
1459 struct page *old_page, *new_page; 1459 struct page *old_page, *new_page;
1460 pte_t entry; 1460 pte_t entry;
1461 int reuse, ret = VM_FAULT_MINOR; 1461 int reuse = 0, ret = VM_FAULT_MINOR;
1462 struct page *dirty_page = NULL;
1462 1463
1463 old_page = vm_normal_page(vma, address, orig_pte); 1464 old_page = vm_normal_page(vma, address, orig_pte);
1464 if (!old_page) 1465 if (!old_page)
1465 goto gotten; 1466 goto gotten;
1466 1467
1467 if (unlikely((vma->vm_flags & (VM_SHARED|VM_WRITE)) == 1468 /*
1468 (VM_SHARED|VM_WRITE))) { 1469 * Only catch write-faults on shared writable pages, read-only
1470 * shared pages can get COWed by get_user_pages(.write=1, .force=1).
1471 */
1472 if (unlikely((vma->vm_flags & (VM_WRITE|VM_SHARED)) ==
1473 (VM_WRITE|VM_SHARED))) {
1469 if (vma->vm_ops && vma->vm_ops->page_mkwrite) { 1474 if (vma->vm_ops && vma->vm_ops->page_mkwrite) {
1470 /* 1475 /*
1471 * Notify the address space that the page is about to 1476 * Notify the address space that the page is about to
@@ -1494,13 +1499,12 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
1494 if (!pte_same(*page_table, orig_pte)) 1499 if (!pte_same(*page_table, orig_pte))
1495 goto unlock; 1500 goto unlock;
1496 } 1501 }
1497 1502 dirty_page = old_page;
1503 get_page(dirty_page);
1498 reuse = 1; 1504 reuse = 1;
1499 } else if (PageAnon(old_page) && !TestSetPageLocked(old_page)) { 1505 } else if (PageAnon(old_page) && !TestSetPageLocked(old_page)) {
1500 reuse = can_share_swap_page(old_page); 1506 reuse = can_share_swap_page(old_page);
1501 unlock_page(old_page); 1507 unlock_page(old_page);
1502 } else {
1503 reuse = 0;
1504 } 1508 }
1505 1509
1506 if (reuse) { 1510 if (reuse) {
@@ -1566,6 +1570,10 @@ gotten:
1566 page_cache_release(old_page); 1570 page_cache_release(old_page);
1567unlock: 1571unlock:
1568 pte_unmap_unlock(page_table, ptl); 1572 pte_unmap_unlock(page_table, ptl);
1573 if (dirty_page) {
1574 set_page_dirty(dirty_page);
1575 put_page(dirty_page);
1576 }
1569 return ret; 1577 return ret;
1570oom: 1578oom:
1571 if (old_page) 1579 if (old_page)
@@ -2098,6 +2106,7 @@ static int do_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
2098 unsigned int sequence = 0; 2106 unsigned int sequence = 0;
2099 int ret = VM_FAULT_MINOR; 2107 int ret = VM_FAULT_MINOR;
2100 int anon = 0; 2108 int anon = 0;
2109 struct page *dirty_page = NULL;
2101 2110
2102 pte_unmap(page_table); 2111 pte_unmap(page_table);
2103 BUG_ON(vma->vm_flags & VM_PFNMAP); 2112 BUG_ON(vma->vm_flags & VM_PFNMAP);
@@ -2192,6 +2201,10 @@ retry:
2192 } else { 2201 } else {
2193 inc_mm_counter(mm, file_rss); 2202 inc_mm_counter(mm, file_rss);
2194 page_add_file_rmap(new_page); 2203 page_add_file_rmap(new_page);
2204 if (write_access) {
2205 dirty_page = new_page;
2206 get_page(dirty_page);
2207 }
2195 } 2208 }
2196 } else { 2209 } else {
2197 /* One of our sibling threads was faster, back out. */ 2210 /* One of our sibling threads was faster, back out. */
@@ -2204,6 +2217,10 @@ retry:
2204 lazy_mmu_prot_update(entry); 2217 lazy_mmu_prot_update(entry);
2205unlock: 2218unlock:
2206 pte_unmap_unlock(page_table, ptl); 2219 pte_unmap_unlock(page_table, ptl);
2220 if (dirty_page) {
2221 set_page_dirty(dirty_page);
2222 put_page(dirty_page);
2223 }
2207 return ret; 2224 return ret;
2208oom: 2225oom:
2209 page_cache_release(new_page); 2226 page_cache_release(new_page);