diff options
author | Hugh Dickins <hughd@google.com> | 2015-11-05 21:50:05 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-11-05 22:34:48 -0500 |
commit | 42cb14b110a5698ccf26ce59c4441722605a3743 (patch) | |
tree | 74ac752d834ead12cb7b969fb0e9a3ee53cc61a1 /mm/migrate.c | |
parent | cf4b769abb8aef01f887543cb8308c0d8671367c (diff) |
mm: migrate dirty page without clear_page_dirty_for_io etc
clear_page_dirty_for_io() has accumulated writeback and memcg subtleties
since v2.6.16 first introduced page migration; and the set_page_dirty()
which completed its migration of PageDirty, later had to be moderated to
__set_page_dirty_nobuffers(); then PageSwapBacked had to skip that too.
No actual problems seen with this procedure recently, but if you look into
what the clear_page_dirty_for_io(page)+set_page_dirty(newpage) is actually
achieving, it turns out to be nothing more than moving the PageDirty flag,
and its NR_FILE_DIRTY stat from one zone to another.
It would be good to avoid a pile of irrelevant decrementations and
incrementations, and improper event counting, and unnecessary descent of
the radix_tree under tree_lock (to set the PAGECACHE_TAG_DIRTY which
radix_tree_replace_slot() left in place anyway).
Do the NR_FILE_DIRTY movement, like the other stats movements, while
interrupts still disabled in migrate_page_move_mapping(); and don't even
bother if the zone is the same. Do the PageDirty movement there under
tree_lock too, where old page is frozen and newpage not yet visible:
bearing in mind that as soon as newpage becomes visible in radix_tree, an
un-page-locked set_page_dirty() might interfere (or perhaps that's just
not possible: anything doing so should already hold an additional
reference to the old page, preventing its migration; but play safe).
But we do still need to transfer PageDirty in migrate_page_copy(), for
those who don't go the mapping route through migrate_page_move_mapping().
Signed-off-by: Hugh Dickins <hughd@google.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Sasha Levin <sasha.levin@oracle.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/migrate.c')
-rw-r--r-- | mm/migrate.c | 51 |
1 files changed, 31 insertions, 20 deletions
diff --git a/mm/migrate.c b/mm/migrate.c index 3067e40e7be9..2834faba719a 100644 --- a/mm/migrate.c +++ b/mm/migrate.c | |||
@@ -30,6 +30,7 @@ | |||
30 | #include <linux/mempolicy.h> | 30 | #include <linux/mempolicy.h> |
31 | #include <linux/vmalloc.h> | 31 | #include <linux/vmalloc.h> |
32 | #include <linux/security.h> | 32 | #include <linux/security.h> |
33 | #include <linux/backing-dev.h> | ||
33 | #include <linux/syscalls.h> | 34 | #include <linux/syscalls.h> |
34 | #include <linux/hugetlb.h> | 35 | #include <linux/hugetlb.h> |
35 | #include <linux/hugetlb_cgroup.h> | 36 | #include <linux/hugetlb_cgroup.h> |
@@ -313,6 +314,8 @@ int migrate_page_move_mapping(struct address_space *mapping, | |||
313 | struct buffer_head *head, enum migrate_mode mode, | 314 | struct buffer_head *head, enum migrate_mode mode, |
314 | int extra_count) | 315 | int extra_count) |
315 | { | 316 | { |
317 | struct zone *oldzone, *newzone; | ||
318 | int dirty; | ||
316 | int expected_count = 1 + extra_count; | 319 | int expected_count = 1 + extra_count; |
317 | void **pslot; | 320 | void **pslot; |
318 | 321 | ||
@@ -331,6 +334,9 @@ int migrate_page_move_mapping(struct address_space *mapping, | |||
331 | return MIGRATEPAGE_SUCCESS; | 334 | return MIGRATEPAGE_SUCCESS; |
332 | } | 335 | } |
333 | 336 | ||
337 | oldzone = page_zone(page); | ||
338 | newzone = page_zone(newpage); | ||
339 | |||
334 | spin_lock_irq(&mapping->tree_lock); | 340 | spin_lock_irq(&mapping->tree_lock); |
335 | 341 | ||
336 | pslot = radix_tree_lookup_slot(&mapping->page_tree, | 342 | pslot = radix_tree_lookup_slot(&mapping->page_tree, |
@@ -378,6 +384,13 @@ int migrate_page_move_mapping(struct address_space *mapping, | |||
378 | set_page_private(newpage, page_private(page)); | 384 | set_page_private(newpage, page_private(page)); |
379 | } | 385 | } |
380 | 386 | ||
387 | /* Move dirty while page refs frozen and newpage not yet exposed */ | ||
388 | dirty = PageDirty(page); | ||
389 | if (dirty) { | ||
390 | ClearPageDirty(page); | ||
391 | SetPageDirty(newpage); | ||
392 | } | ||
393 | |||
381 | radix_tree_replace_slot(pslot, newpage); | 394 | radix_tree_replace_slot(pslot, newpage); |
382 | 395 | ||
383 | /* | 396 | /* |
@@ -387,6 +400,9 @@ int migrate_page_move_mapping(struct address_space *mapping, | |||
387 | */ | 400 | */ |
388 | page_unfreeze_refs(page, expected_count - 1); | 401 | page_unfreeze_refs(page, expected_count - 1); |
389 | 402 | ||
403 | spin_unlock(&mapping->tree_lock); | ||
404 | /* Leave irq disabled to prevent preemption while updating stats */ | ||
405 | |||
390 | /* | 406 | /* |
391 | * If moved to a different zone then also account | 407 | * If moved to a different zone then also account |
392 | * the page for that zone. Other VM counters will be | 408 | * the page for that zone. Other VM counters will be |
@@ -397,13 +413,19 @@ int migrate_page_move_mapping(struct address_space *mapping, | |||
397 | * via NR_FILE_PAGES and NR_ANON_PAGES if they | 413 | * via NR_FILE_PAGES and NR_ANON_PAGES if they |
398 | * are mapped to swap space. | 414 | * are mapped to swap space. |
399 | */ | 415 | */ |
400 | __dec_zone_page_state(page, NR_FILE_PAGES); | 416 | if (newzone != oldzone) { |
401 | __inc_zone_page_state(newpage, NR_FILE_PAGES); | 417 | __dec_zone_state(oldzone, NR_FILE_PAGES); |
402 | if (!PageSwapCache(page) && PageSwapBacked(page)) { | 418 | __inc_zone_state(newzone, NR_FILE_PAGES); |
403 | __dec_zone_page_state(page, NR_SHMEM); | 419 | if (PageSwapBacked(page) && !PageSwapCache(page)) { |
404 | __inc_zone_page_state(newpage, NR_SHMEM); | 420 | __dec_zone_state(oldzone, NR_SHMEM); |
421 | __inc_zone_state(newzone, NR_SHMEM); | ||
422 | } | ||
423 | if (dirty && mapping_cap_account_dirty(mapping)) { | ||
424 | __dec_zone_state(oldzone, NR_FILE_DIRTY); | ||
425 | __inc_zone_state(newzone, NR_FILE_DIRTY); | ||
426 | } | ||
405 | } | 427 | } |
406 | spin_unlock_irq(&mapping->tree_lock); | 428 | local_irq_enable(); |
407 | 429 | ||
408 | return MIGRATEPAGE_SUCCESS; | 430 | return MIGRATEPAGE_SUCCESS; |
409 | } | 431 | } |
@@ -524,20 +546,9 @@ void migrate_page_copy(struct page *newpage, struct page *page) | |||
524 | if (PageMappedToDisk(page)) | 546 | if (PageMappedToDisk(page)) |
525 | SetPageMappedToDisk(newpage); | 547 | SetPageMappedToDisk(newpage); |
526 | 548 | ||
527 | if (PageDirty(page)) { | 549 | /* Move dirty on pages not done by migrate_page_move_mapping() */ |
528 | clear_page_dirty_for_io(page); | 550 | if (PageDirty(page)) |
529 | /* | 551 | SetPageDirty(newpage); |
530 | * Want to mark the page and the radix tree as dirty, and | ||
531 | * redo the accounting that clear_page_dirty_for_io undid, | ||
532 | * but we can't use set_page_dirty because that function | ||
533 | * is actually a signal that all of the page has become dirty. | ||
534 | * Whereas only part of our page may be dirty. | ||
535 | */ | ||
536 | if (PageSwapBacked(page)) | ||
537 | SetPageDirty(newpage); | ||
538 | else | ||
539 | __set_page_dirty_nobuffers(newpage); | ||
540 | } | ||
541 | 552 | ||
542 | if (page_is_young(page)) | 553 | if (page_is_young(page)) |
543 | set_page_young(newpage); | 554 | set_page_young(newpage); |