aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHugh Dickins <hughd@google.com>2012-10-08 19:33:19 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2012-10-09 03:22:56 -0400
commite6c509f85455041d3d7c4b863bf80bc294288cc1 (patch)
tree50ccf8e339b219851ca7ad000379b1559415e354
parent39b5f29ac1f988c1615fbc9c69f6651ab0d0c3c7 (diff)
mm: use clear_page_mlock() in page_remove_rmap()
We had thought that pages could no longer get freed while still marked as mlocked; but Johannes Weiner posted this program to demonstrate that truncating an mlocked private file mapping containing COWed pages is still mishandled: #include <sys/types.h> #include <sys/mman.h> #include <sys/stat.h> #include <stdlib.h> #include <unistd.h> #include <fcntl.h> #include <stdio.h> int main(void) { char *map; int fd; system("grep mlockfreed /proc/vmstat"); fd = open("chigurh", O_CREAT|O_EXCL|O_RDWR); unlink("chigurh"); ftruncate(fd, 4096); map = mmap(NULL, 4096, PROT_WRITE, MAP_PRIVATE, fd, 0); map[0] = 11; mlock(map, sizeof(fd)); ftruncate(fd, 0); close(fd); munlock(map, sizeof(fd)); munmap(map, 4096); system("grep mlockfreed /proc/vmstat"); return 0; } The anon COWed pages are not caught by truncation's clear_page_mlock() of the pagecache pages; but unmap_mapping_range() unmaps them, so we ought to look out for them there in page_remove_rmap(). Indeed, why should truncation or invalidation be doing the clear_page_mlock() when removing from pagecache? mlock is a property of mapping in userspace, not a property of pagecache: an mlocked unmapped page is nonsensical. Reported-by: Johannes Weiner <hannes@cmpxchg.org> Signed-off-by: Hugh Dickins <hughd@google.com> Cc: Mel Gorman <mel@csn.ul.ie> Cc: Rik van Riel <riel@redhat.com> Cc: Michel Lespinasse <walken@google.com> Cc: Ying Han <yinghan@google.com> Acked-by: Johannes Weiner <hannes@cmpxchg.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--mm/internal.h7
-rw-r--r--mm/memory.c10
-rw-r--r--mm/mlock.c16
-rw-r--r--mm/rmap.c4
-rw-r--r--mm/truncate.c4
5 files changed, 13 insertions, 28 deletions
diff --git a/mm/internal.h b/mm/internal.h
index 78f25d6cc6a7..4dc93e2fe69e 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -201,12 +201,7 @@ extern void munlock_vma_page(struct page *page);
201 * If called for a page that is still mapped by mlocked vmas, all we do 201 * If called for a page that is still mapped by mlocked vmas, all we do
202 * is revert to lazy LRU behaviour -- semantics are not broken. 202 * is revert to lazy LRU behaviour -- semantics are not broken.
203 */ 203 */
204extern void __clear_page_mlock(struct page *page); 204extern void clear_page_mlock(struct page *page);
205static inline void clear_page_mlock(struct page *page)
206{
207 if (unlikely(TestClearPageMlocked(page)))
208 __clear_page_mlock(page);
209}
210 205
211/* 206/*
212 * mlock_migrate_page - called only from migrate_page_copy() to 207 * mlock_migrate_page - called only from migrate_page_copy() to
diff --git a/mm/memory.c b/mm/memory.c
index d205e4381a34..5f5d1f039bf4 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1577,12 +1577,12 @@ split_fallthrough:
1577 if (page->mapping && trylock_page(page)) { 1577 if (page->mapping && trylock_page(page)) {
1578 lru_add_drain(); /* push cached pages to LRU */ 1578 lru_add_drain(); /* push cached pages to LRU */
1579 /* 1579 /*
1580 * Because we lock page here and migration is 1580 * Because we lock page here, and migration is
1581 * blocked by the pte's page reference, we need 1581 * blocked by the pte's page reference, and we
1582 * only check for file-cache page truncation. 1582 * know the page is still mapped, we don't even
1583 * need to check for file-cache page truncation.
1583 */ 1584 */
1584 if (page->mapping) 1585 mlock_vma_page(page);
1585 mlock_vma_page(page);
1586 unlock_page(page); 1586 unlock_page(page);
1587 } 1587 }
1588 } 1588 }
diff --git a/mm/mlock.c b/mm/mlock.c
index a948be4b7ba7..de7321592897 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -51,13 +51,10 @@ EXPORT_SYMBOL(can_do_mlock);
51/* 51/*
52 * LRU accounting for clear_page_mlock() 52 * LRU accounting for clear_page_mlock()
53 */ 53 */
54void __clear_page_mlock(struct page *page) 54void clear_page_mlock(struct page *page)
55{ 55{
56 VM_BUG_ON(!PageLocked(page)); 56 if (!TestClearPageMlocked(page))
57
58 if (!page->mapping) { /* truncated ? */
59 return; 57 return;
60 }
61 58
62 dec_zone_page_state(page, NR_MLOCK); 59 dec_zone_page_state(page, NR_MLOCK);
63 count_vm_event(UNEVICTABLE_PGCLEARED); 60 count_vm_event(UNEVICTABLE_PGCLEARED);
@@ -290,14 +287,7 @@ void munlock_vma_pages_range(struct vm_area_struct *vma,
290 page = follow_page(vma, addr, FOLL_GET | FOLL_DUMP); 287 page = follow_page(vma, addr, FOLL_GET | FOLL_DUMP);
291 if (page && !IS_ERR(page)) { 288 if (page && !IS_ERR(page)) {
292 lock_page(page); 289 lock_page(page);
293 /* 290 munlock_vma_page(page);
294 * Like in __mlock_vma_pages_range(),
295 * because we lock page here and migration is
296 * blocked by the elevated reference, we need
297 * only check for file-cache page truncation.
298 */
299 if (page->mapping)
300 munlock_vma_page(page);
301 unlock_page(page); 291 unlock_page(page);
302 put_page(page); 292 put_page(page);
303 } 293 }
diff --git a/mm/rmap.c b/mm/rmap.c
index 0d86433e42d7..bf03149f495c 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1155,7 +1155,10 @@ void page_remove_rmap(struct page *page)
1155 } else { 1155 } else {
1156 __dec_zone_page_state(page, NR_FILE_MAPPED); 1156 __dec_zone_page_state(page, NR_FILE_MAPPED);
1157 mem_cgroup_dec_page_stat(page, MEMCG_NR_FILE_MAPPED); 1157 mem_cgroup_dec_page_stat(page, MEMCG_NR_FILE_MAPPED);
1158 mem_cgroup_end_update_page_stat(page, &locked, &flags);
1158 } 1159 }
1160 if (unlikely(PageMlocked(page)))
1161 clear_page_mlock(page);
1159 /* 1162 /*
1160 * It would be tidy to reset the PageAnon mapping here, 1163 * It would be tidy to reset the PageAnon mapping here,
1161 * but that might overwrite a racing page_add_anon_rmap 1164 * but that might overwrite a racing page_add_anon_rmap
@@ -1165,6 +1168,7 @@ void page_remove_rmap(struct page *page)
1165 * Leaving it set also helps swapoff to reinstate ptes 1168 * Leaving it set also helps swapoff to reinstate ptes
1166 * faster for those pages still in swapcache. 1169 * faster for those pages still in swapcache.
1167 */ 1170 */
1171 return;
1168out: 1172out:
1169 if (!anon) 1173 if (!anon)
1170 mem_cgroup_end_update_page_stat(page, &locked, &flags); 1174 mem_cgroup_end_update_page_stat(page, &locked, &flags);
diff --git a/mm/truncate.c b/mm/truncate.c
index f38055cb8af6..d51ce92d6e83 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -107,7 +107,6 @@ truncate_complete_page(struct address_space *mapping, struct page *page)
107 107
108 cancel_dirty_page(page, PAGE_CACHE_SIZE); 108 cancel_dirty_page(page, PAGE_CACHE_SIZE);
109 109
110 clear_page_mlock(page);
111 ClearPageMappedToDisk(page); 110 ClearPageMappedToDisk(page);
112 delete_from_page_cache(page); 111 delete_from_page_cache(page);
113 return 0; 112 return 0;
@@ -132,7 +131,6 @@ invalidate_complete_page(struct address_space *mapping, struct page *page)
132 if (page_has_private(page) && !try_to_release_page(page, 0)) 131 if (page_has_private(page) && !try_to_release_page(page, 0))
133 return 0; 132 return 0;
134 133
135 clear_page_mlock(page);
136 ret = remove_mapping(mapping, page); 134 ret = remove_mapping(mapping, page);
137 135
138 return ret; 136 return ret;
@@ -394,8 +392,6 @@ invalidate_complete_page2(struct address_space *mapping, struct page *page)
394 if (page_has_private(page) && !try_to_release_page(page, GFP_KERNEL)) 392 if (page_has_private(page) && !try_to_release_page(page, GFP_KERNEL))
395 return 0; 393 return 0;
396 394
397 clear_page_mlock(page);
398
399 spin_lock_irq(&mapping->tree_lock); 395 spin_lock_irq(&mapping->tree_lock);
400 if (PageDirty(page)) 396 if (PageDirty(page))
401 goto failed; 397 goto failed;