diff options
author | Peter Zijlstra <a.p.zijlstra@chello.nl> | 2006-09-26 02:30:57 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2006-09-26 11:48:44 -0400 |
commit | d08b3851da41d0ee60851f2c75b118e1f7a5fc89 (patch) | |
tree | a01f6930a1387e8f66607e2fe16c62bb7044353b /mm/memory.c | |
parent | 725d704ecaca4a43f067092c140d4f3271cf2856 (diff) |
[PATCH] mm: tracking shared dirty pages
Tracking of dirty pages in shared writeable mmap()s.
The idea is simple: write protect clean shared writeable pages, catch the
write-fault, make writeable and set dirty. On page write-back clean all the
PTE dirty bits and write protect them once again.
The implementation is a tad harder, mainly because the default
backing_dev_info capabilities were too loosely maintained. Hence it is not
enough to test the backing_dev_info for cap_account_dirty.
The current heuristic is as follows, a VMA is eligible when:
- its shared writeable
(vm_flags & (VM_WRITE|VM_SHARED)) == (VM_WRITE|VM_SHARED)
- it is not a 'special' mapping
(vm_flags & (VM_PFNMAP|VM_INSERTPAGE)) == 0
- the backing_dev_info is cap_account_dirty
mapping_cap_account_dirty(vma->vm_file->f_mapping)
- f_op->mmap() didn't change the default page protection
Page from remap_pfn_range() are explicitly excluded because their COW
semantics are already horrid enough (see vm_normal_page() in do_wp_page()) and
because they don't have a backing store anyway.
mprotect() is taught about the new behaviour as well. However it overrides
the last condition.
Cleaning the pages on write-back is done with page_mkclean() a new rmap call.
It can be called on any page, but is currently only implemented for mapped
pages, if the page is found the be of a VMA that accounts dirty pages it will
also wrprotect the PTE.
Finally, in fs/buffers.c:try_to_free_buffers(); remove clear_page_dirty() from
under ->private_lock. This seems to be safe, since ->private_lock is used to
serialize access to the buffers, not the page itself. This is needed because
clear_page_dirty() will call into page_mkclean() and would thereby violate
locking order.
[dhowells@redhat.com: Provide a page_mkclean() implementation for NOMMU]
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Hugh Dickins <hugh@veritas.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'mm/memory.c')
-rw-r--r-- | mm/memory.c | 29 |
1 files changed, 23 insertions, 6 deletions
diff --git a/mm/memory.c b/mm/memory.c index 109e9866237e..fa941b169071 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -1458,14 +1458,19 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
1458 | { | 1458 | { |
1459 | struct page *old_page, *new_page; | 1459 | struct page *old_page, *new_page; |
1460 | pte_t entry; | 1460 | pte_t entry; |
1461 | int reuse, ret = VM_FAULT_MINOR; | 1461 | int reuse = 0, ret = VM_FAULT_MINOR; |
1462 | struct page *dirty_page = NULL; | ||
1462 | 1463 | ||
1463 | old_page = vm_normal_page(vma, address, orig_pte); | 1464 | old_page = vm_normal_page(vma, address, orig_pte); |
1464 | if (!old_page) | 1465 | if (!old_page) |
1465 | goto gotten; | 1466 | goto gotten; |
1466 | 1467 | ||
1467 | if (unlikely((vma->vm_flags & (VM_SHARED|VM_WRITE)) == | 1468 | /* |
1468 | (VM_SHARED|VM_WRITE))) { | 1469 | * Only catch write-faults on shared writable pages, read-only |
1470 | * shared pages can get COWed by get_user_pages(.write=1, .force=1). | ||
1471 | */ | ||
1472 | if (unlikely((vma->vm_flags & (VM_WRITE|VM_SHARED)) == | ||
1473 | (VM_WRITE|VM_SHARED))) { | ||
1469 | if (vma->vm_ops && vma->vm_ops->page_mkwrite) { | 1474 | if (vma->vm_ops && vma->vm_ops->page_mkwrite) { |
1470 | /* | 1475 | /* |
1471 | * Notify the address space that the page is about to | 1476 | * Notify the address space that the page is about to |
@@ -1494,13 +1499,12 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
1494 | if (!pte_same(*page_table, orig_pte)) | 1499 | if (!pte_same(*page_table, orig_pte)) |
1495 | goto unlock; | 1500 | goto unlock; |
1496 | } | 1501 | } |
1497 | 1502 | dirty_page = old_page; | |
1503 | get_page(dirty_page); | ||
1498 | reuse = 1; | 1504 | reuse = 1; |
1499 | } else if (PageAnon(old_page) && !TestSetPageLocked(old_page)) { | 1505 | } else if (PageAnon(old_page) && !TestSetPageLocked(old_page)) { |
1500 | reuse = can_share_swap_page(old_page); | 1506 | reuse = can_share_swap_page(old_page); |
1501 | unlock_page(old_page); | 1507 | unlock_page(old_page); |
1502 | } else { | ||
1503 | reuse = 0; | ||
1504 | } | 1508 | } |
1505 | 1509 | ||
1506 | if (reuse) { | 1510 | if (reuse) { |
@@ -1566,6 +1570,10 @@ gotten: | |||
1566 | page_cache_release(old_page); | 1570 | page_cache_release(old_page); |
1567 | unlock: | 1571 | unlock: |
1568 | pte_unmap_unlock(page_table, ptl); | 1572 | pte_unmap_unlock(page_table, ptl); |
1573 | if (dirty_page) { | ||
1574 | set_page_dirty(dirty_page); | ||
1575 | put_page(dirty_page); | ||
1576 | } | ||
1569 | return ret; | 1577 | return ret; |
1570 | oom: | 1578 | oom: |
1571 | if (old_page) | 1579 | if (old_page) |
@@ -2098,6 +2106,7 @@ static int do_no_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2098 | unsigned int sequence = 0; | 2106 | unsigned int sequence = 0; |
2099 | int ret = VM_FAULT_MINOR; | 2107 | int ret = VM_FAULT_MINOR; |
2100 | int anon = 0; | 2108 | int anon = 0; |
2109 | struct page *dirty_page = NULL; | ||
2101 | 2110 | ||
2102 | pte_unmap(page_table); | 2111 | pte_unmap(page_table); |
2103 | BUG_ON(vma->vm_flags & VM_PFNMAP); | 2112 | BUG_ON(vma->vm_flags & VM_PFNMAP); |
@@ -2192,6 +2201,10 @@ retry: | |||
2192 | } else { | 2201 | } else { |
2193 | inc_mm_counter(mm, file_rss); | 2202 | inc_mm_counter(mm, file_rss); |
2194 | page_add_file_rmap(new_page); | 2203 | page_add_file_rmap(new_page); |
2204 | if (write_access) { | ||
2205 | dirty_page = new_page; | ||
2206 | get_page(dirty_page); | ||
2207 | } | ||
2195 | } | 2208 | } |
2196 | } else { | 2209 | } else { |
2197 | /* One of our sibling threads was faster, back out. */ | 2210 | /* One of our sibling threads was faster, back out. */ |
@@ -2204,6 +2217,10 @@ retry: | |||
2204 | lazy_mmu_prot_update(entry); | 2217 | lazy_mmu_prot_update(entry); |
2205 | unlock: | 2218 | unlock: |
2206 | pte_unmap_unlock(page_table, ptl); | 2219 | pte_unmap_unlock(page_table, ptl); |
2220 | if (dirty_page) { | ||
2221 | set_page_dirty(dirty_page); | ||
2222 | put_page(dirty_page); | ||
2223 | } | ||
2207 | return ret; | 2224 | return ret; |
2208 | oom: | 2225 | oom: |
2209 | page_cache_release(new_page); | 2226 | page_cache_release(new_page); |