diff options
author | Hugh Dickins <hughd@google.com> | 2012-10-08 19:33:19 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-10-09 03:22:56 -0400 |
commit | e6c509f85455041d3d7c4b863bf80bc294288cc1 (patch) | |
tree | 50ccf8e339b219851ca7ad000379b1559415e354 | |
parent | 39b5f29ac1f988c1615fbc9c69f6651ab0d0c3c7 (diff) |
mm: use clear_page_mlock() in page_remove_rmap()
We had thought that pages could no longer get freed while still marked as
mlocked; but Johannes Weiner posted this program to demonstrate that
truncating an mlocked private file mapping containing COWed pages is still
mishandled:
#include <sys/types.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <stdlib.h>
#include <unistd.h>
#include <fcntl.h>
#include <stdio.h>
int main(void)
{
char *map;
int fd;
system("grep mlockfreed /proc/vmstat");
fd = open("chigurh", O_CREAT|O_EXCL|O_RDWR);
unlink("chigurh");
ftruncate(fd, 4096);
map = mmap(NULL, 4096, PROT_WRITE, MAP_PRIVATE, fd, 0);
map[0] = 11;
mlock(map, sizeof(fd));
ftruncate(fd, 0);
close(fd);
munlock(map, sizeof(fd));
munmap(map, 4096);
system("grep mlockfreed /proc/vmstat");
return 0;
}
The anon COWed pages are not caught by truncation's clear_page_mlock() of
the pagecache pages; but unmap_mapping_range() unmaps them, so we ought to
look out for them there in page_remove_rmap(). Indeed, why should
truncation or invalidation be doing the clear_page_mlock() when removing
from pagecache? mlock is a property of mapping in userspace, not a
property of pagecache: an mlocked unmapped page is nonsensical.
Reported-by: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Hugh Dickins <hughd@google.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Rik van Riel <riel@redhat.com>
Cc: Michel Lespinasse <walken@google.com>
Cc: Ying Han <yinghan@google.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | mm/internal.h | 7 | ||||
-rw-r--r-- | mm/memory.c | 10 | ||||
-rw-r--r-- | mm/mlock.c | 16 | ||||
-rw-r--r-- | mm/rmap.c | 4 | ||||
-rw-r--r-- | mm/truncate.c | 4 |
5 files changed, 13 insertions, 28 deletions
diff --git a/mm/internal.h b/mm/internal.h index 78f25d6cc6a7..4dc93e2fe69e 100644 --- a/mm/internal.h +++ b/mm/internal.h | |||
@@ -201,12 +201,7 @@ extern void munlock_vma_page(struct page *page); | |||
201 | * If called for a page that is still mapped by mlocked vmas, all we do | 201 | * If called for a page that is still mapped by mlocked vmas, all we do |
202 | * is revert to lazy LRU behaviour -- semantics are not broken. | 202 | * is revert to lazy LRU behaviour -- semantics are not broken. |
203 | */ | 203 | */ |
204 | extern void __clear_page_mlock(struct page *page); | 204 | extern void clear_page_mlock(struct page *page); |
205 | static inline void clear_page_mlock(struct page *page) | ||
206 | { | ||
207 | if (unlikely(TestClearPageMlocked(page))) | ||
208 | __clear_page_mlock(page); | ||
209 | } | ||
210 | 205 | ||
211 | /* | 206 | /* |
212 | * mlock_migrate_page - called only from migrate_page_copy() to | 207 | * mlock_migrate_page - called only from migrate_page_copy() to |
diff --git a/mm/memory.c b/mm/memory.c index d205e4381a34..5f5d1f039bf4 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -1577,12 +1577,12 @@ split_fallthrough: | |||
1577 | if (page->mapping && trylock_page(page)) { | 1577 | if (page->mapping && trylock_page(page)) { |
1578 | lru_add_drain(); /* push cached pages to LRU */ | 1578 | lru_add_drain(); /* push cached pages to LRU */ |
1579 | /* | 1579 | /* |
1580 | * Because we lock page here and migration is | 1580 | * Because we lock page here, and migration is |
1581 | * blocked by the pte's page reference, we need | 1581 | * blocked by the pte's page reference, and we |
1582 | * only check for file-cache page truncation. | 1582 | * know the page is still mapped, we don't even |
1583 | * need to check for file-cache page truncation. | ||
1583 | */ | 1584 | */ |
1584 | if (page->mapping) | 1585 | mlock_vma_page(page); |
1585 | mlock_vma_page(page); | ||
1586 | unlock_page(page); | 1586 | unlock_page(page); |
1587 | } | 1587 | } |
1588 | } | 1588 | } |
diff --git a/mm/mlock.c b/mm/mlock.c index a948be4b7ba7..de7321592897 100644 --- a/mm/mlock.c +++ b/mm/mlock.c | |||
@@ -51,13 +51,10 @@ EXPORT_SYMBOL(can_do_mlock); | |||
51 | /* | 51 | /* |
52 | * LRU accounting for clear_page_mlock() | 52 | * LRU accounting for clear_page_mlock() |
53 | */ | 53 | */ |
54 | void __clear_page_mlock(struct page *page) | 54 | void clear_page_mlock(struct page *page) |
55 | { | 55 | { |
56 | VM_BUG_ON(!PageLocked(page)); | 56 | if (!TestClearPageMlocked(page)) |
57 | |||
58 | if (!page->mapping) { /* truncated ? */ | ||
59 | return; | 57 | return; |
60 | } | ||
61 | 58 | ||
62 | dec_zone_page_state(page, NR_MLOCK); | 59 | dec_zone_page_state(page, NR_MLOCK); |
63 | count_vm_event(UNEVICTABLE_PGCLEARED); | 60 | count_vm_event(UNEVICTABLE_PGCLEARED); |
@@ -290,14 +287,7 @@ void munlock_vma_pages_range(struct vm_area_struct *vma, | |||
290 | page = follow_page(vma, addr, FOLL_GET | FOLL_DUMP); | 287 | page = follow_page(vma, addr, FOLL_GET | FOLL_DUMP); |
291 | if (page && !IS_ERR(page)) { | 288 | if (page && !IS_ERR(page)) { |
292 | lock_page(page); | 289 | lock_page(page); |
293 | /* | 290 | munlock_vma_page(page); |
294 | * Like in __mlock_vma_pages_range(), | ||
295 | * because we lock page here and migration is | ||
296 | * blocked by the elevated reference, we need | ||
297 | * only check for file-cache page truncation. | ||
298 | */ | ||
299 | if (page->mapping) | ||
300 | munlock_vma_page(page); | ||
301 | unlock_page(page); | 291 | unlock_page(page); |
302 | put_page(page); | 292 | put_page(page); |
303 | } | 293 | } |
@@ -1155,7 +1155,10 @@ void page_remove_rmap(struct page *page) | |||
1155 | } else { | 1155 | } else { |
1156 | __dec_zone_page_state(page, NR_FILE_MAPPED); | 1156 | __dec_zone_page_state(page, NR_FILE_MAPPED); |
1157 | mem_cgroup_dec_page_stat(page, MEMCG_NR_FILE_MAPPED); | 1157 | mem_cgroup_dec_page_stat(page, MEMCG_NR_FILE_MAPPED); |
1158 | mem_cgroup_end_update_page_stat(page, &locked, &flags); | ||
1158 | } | 1159 | } |
1160 | if (unlikely(PageMlocked(page))) | ||
1161 | clear_page_mlock(page); | ||
1159 | /* | 1162 | /* |
1160 | * It would be tidy to reset the PageAnon mapping here, | 1163 | * It would be tidy to reset the PageAnon mapping here, |
1161 | * but that might overwrite a racing page_add_anon_rmap | 1164 | * but that might overwrite a racing page_add_anon_rmap |
@@ -1165,6 +1168,7 @@ void page_remove_rmap(struct page *page) | |||
1165 | * Leaving it set also helps swapoff to reinstate ptes | 1168 | * Leaving it set also helps swapoff to reinstate ptes |
1166 | * faster for those pages still in swapcache. | 1169 | * faster for those pages still in swapcache. |
1167 | */ | 1170 | */ |
1171 | return; | ||
1168 | out: | 1172 | out: |
1169 | if (!anon) | 1173 | if (!anon) |
1170 | mem_cgroup_end_update_page_stat(page, &locked, &flags); | 1174 | mem_cgroup_end_update_page_stat(page, &locked, &flags); |
diff --git a/mm/truncate.c b/mm/truncate.c index f38055cb8af6..d51ce92d6e83 100644 --- a/mm/truncate.c +++ b/mm/truncate.c | |||
@@ -107,7 +107,6 @@ truncate_complete_page(struct address_space *mapping, struct page *page) | |||
107 | 107 | ||
108 | cancel_dirty_page(page, PAGE_CACHE_SIZE); | 108 | cancel_dirty_page(page, PAGE_CACHE_SIZE); |
109 | 109 | ||
110 | clear_page_mlock(page); | ||
111 | ClearPageMappedToDisk(page); | 110 | ClearPageMappedToDisk(page); |
112 | delete_from_page_cache(page); | 111 | delete_from_page_cache(page); |
113 | return 0; | 112 | return 0; |
@@ -132,7 +131,6 @@ invalidate_complete_page(struct address_space *mapping, struct page *page) | |||
132 | if (page_has_private(page) && !try_to_release_page(page, 0)) | 131 | if (page_has_private(page) && !try_to_release_page(page, 0)) |
133 | return 0; | 132 | return 0; |
134 | 133 | ||
135 | clear_page_mlock(page); | ||
136 | ret = remove_mapping(mapping, page); | 134 | ret = remove_mapping(mapping, page); |
137 | 135 | ||
138 | return ret; | 136 | return ret; |
@@ -394,8 +392,6 @@ invalidate_complete_page2(struct address_space *mapping, struct page *page) | |||
394 | if (page_has_private(page) && !try_to_release_page(page, GFP_KERNEL)) | 392 | if (page_has_private(page) && !try_to_release_page(page, GFP_KERNEL)) |
395 | return 0; | 393 | return 0; |
396 | 394 | ||
397 | clear_page_mlock(page); | ||
398 | |||
399 | spin_lock_irq(&mapping->tree_lock); | 395 | spin_lock_irq(&mapping->tree_lock); |
400 | if (PageDirty(page)) | 396 | if (PageDirty(page)) |
401 | goto failed; | 397 | goto failed; |