summaryrefslogtreecommitdiffstats
path: root/mm/migrate.c
diff options
context:
space:
mode:
authorHugh Dickins <hughd@google.com>2015-11-05 21:50:05 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2015-11-05 22:34:48 -0500
commit42cb14b110a5698ccf26ce59c4441722605a3743 (patch)
tree74ac752d834ead12cb7b969fb0e9a3ee53cc61a1 /mm/migrate.c
parentcf4b769abb8aef01f887543cb8308c0d8671367c (diff)
mm: migrate dirty page without clear_page_dirty_for_io etc
clear_page_dirty_for_io() has accumulated writeback and memcg subtleties since v2.6.16 first introduced page migration; and the set_page_dirty() which completed its migration of PageDirty, later had to be moderated to __set_page_dirty_nobuffers(); then PageSwapBacked had to skip that too. No actual problems seen with this procedure recently, but if you look into what the clear_page_dirty_for_io(page)+set_page_dirty(newpage) is actually achieving, it turns out to be nothing more than moving the PageDirty flag, and its NR_FILE_DIRTY stat from one zone to another. It would be good to avoid a pile of irrelevant decrementations and incrementations, and improper event counting, and unnecessary descent of the radix_tree under tree_lock (to set the PAGECACHE_TAG_DIRTY which radix_tree_replace_slot() left in place anyway). Do the NR_FILE_DIRTY movement, like the other stats movements, while interrupts still disabled in migrate_page_move_mapping(); and don't even bother if the zone is the same. Do the PageDirty movement there under tree_lock too, where old page is frozen and newpage not yet visible: bearing in mind that as soon as newpage becomes visible in radix_tree, an un-page-locked set_page_dirty() might interfere (or perhaps that's just not possible: anything doing so should already hold an additional reference to the old page, preventing its migration; but play safe). But we do still need to transfer PageDirty in migrate_page_copy(), for those who don't go the mapping route through migrate_page_move_mapping(). Signed-off-by: Hugh Dickins <hughd@google.com> Cc: Christoph Lameter <cl@linux.com> Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com> Cc: Rik van Riel <riel@redhat.com> Cc: Vlastimil Babka <vbabka@suse.cz> Cc: Davidlohr Bueso <dave@stgolabs.net> Cc: Oleg Nesterov <oleg@redhat.com> Cc: Sasha Levin <sasha.levin@oracle.com> Cc: Dmitry Vyukov <dvyukov@google.com> Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/migrate.c')
-rw-r--r--mm/migrate.c51
1 files changed, 31 insertions, 20 deletions
diff --git a/mm/migrate.c b/mm/migrate.c
index 3067e40e7be9..2834faba719a 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -30,6 +30,7 @@
30#include <linux/mempolicy.h> 30#include <linux/mempolicy.h>
31#include <linux/vmalloc.h> 31#include <linux/vmalloc.h>
32#include <linux/security.h> 32#include <linux/security.h>
33#include <linux/backing-dev.h>
33#include <linux/syscalls.h> 34#include <linux/syscalls.h>
34#include <linux/hugetlb.h> 35#include <linux/hugetlb.h>
35#include <linux/hugetlb_cgroup.h> 36#include <linux/hugetlb_cgroup.h>
@@ -313,6 +314,8 @@ int migrate_page_move_mapping(struct address_space *mapping,
313 struct buffer_head *head, enum migrate_mode mode, 314 struct buffer_head *head, enum migrate_mode mode,
314 int extra_count) 315 int extra_count)
315{ 316{
317 struct zone *oldzone, *newzone;
318 int dirty;
316 int expected_count = 1 + extra_count; 319 int expected_count = 1 + extra_count;
317 void **pslot; 320 void **pslot;
318 321
@@ -331,6 +334,9 @@ int migrate_page_move_mapping(struct address_space *mapping,
331 return MIGRATEPAGE_SUCCESS; 334 return MIGRATEPAGE_SUCCESS;
332 } 335 }
333 336
337 oldzone = page_zone(page);
338 newzone = page_zone(newpage);
339
334 spin_lock_irq(&mapping->tree_lock); 340 spin_lock_irq(&mapping->tree_lock);
335 341
336 pslot = radix_tree_lookup_slot(&mapping->page_tree, 342 pslot = radix_tree_lookup_slot(&mapping->page_tree,
@@ -378,6 +384,13 @@ int migrate_page_move_mapping(struct address_space *mapping,
378 set_page_private(newpage, page_private(page)); 384 set_page_private(newpage, page_private(page));
379 } 385 }
380 386
387 /* Move dirty while page refs frozen and newpage not yet exposed */
388 dirty = PageDirty(page);
389 if (dirty) {
390 ClearPageDirty(page);
391 SetPageDirty(newpage);
392 }
393
381 radix_tree_replace_slot(pslot, newpage); 394 radix_tree_replace_slot(pslot, newpage);
382 395
383 /* 396 /*
@@ -387,6 +400,9 @@ int migrate_page_move_mapping(struct address_space *mapping,
387 */ 400 */
388 page_unfreeze_refs(page, expected_count - 1); 401 page_unfreeze_refs(page, expected_count - 1);
389 402
403 spin_unlock(&mapping->tree_lock);
404 /* Leave irq disabled to prevent preemption while updating stats */
405
390 /* 406 /*
391 * If moved to a different zone then also account 407 * If moved to a different zone then also account
392 * the page for that zone. Other VM counters will be 408 * the page for that zone. Other VM counters will be
@@ -397,13 +413,19 @@ int migrate_page_move_mapping(struct address_space *mapping,
397 * via NR_FILE_PAGES and NR_ANON_PAGES if they 413 * via NR_FILE_PAGES and NR_ANON_PAGES if they
398 * are mapped to swap space. 414 * are mapped to swap space.
399 */ 415 */
400 __dec_zone_page_state(page, NR_FILE_PAGES); 416 if (newzone != oldzone) {
401 __inc_zone_page_state(newpage, NR_FILE_PAGES); 417 __dec_zone_state(oldzone, NR_FILE_PAGES);
402 if (!PageSwapCache(page) && PageSwapBacked(page)) { 418 __inc_zone_state(newzone, NR_FILE_PAGES);
403 __dec_zone_page_state(page, NR_SHMEM); 419 if (PageSwapBacked(page) && !PageSwapCache(page)) {
404 __inc_zone_page_state(newpage, NR_SHMEM); 420 __dec_zone_state(oldzone, NR_SHMEM);
421 __inc_zone_state(newzone, NR_SHMEM);
422 }
423 if (dirty && mapping_cap_account_dirty(mapping)) {
424 __dec_zone_state(oldzone, NR_FILE_DIRTY);
425 __inc_zone_state(newzone, NR_FILE_DIRTY);
426 }
405 } 427 }
406 spin_unlock_irq(&mapping->tree_lock); 428 local_irq_enable();
407 429
408 return MIGRATEPAGE_SUCCESS; 430 return MIGRATEPAGE_SUCCESS;
409} 431}
@@ -524,20 +546,9 @@ void migrate_page_copy(struct page *newpage, struct page *page)
524 if (PageMappedToDisk(page)) 546 if (PageMappedToDisk(page))
525 SetPageMappedToDisk(newpage); 547 SetPageMappedToDisk(newpage);
526 548
527 if (PageDirty(page)) { 549 /* Move dirty on pages not done by migrate_page_move_mapping() */
528 clear_page_dirty_for_io(page); 550 if (PageDirty(page))
529 /* 551 SetPageDirty(newpage);
530 * Want to mark the page and the radix tree as dirty, and
531 * redo the accounting that clear_page_dirty_for_io undid,
532 * but we can't use set_page_dirty because that function
533 * is actually a signal that all of the page has become dirty.
534 * Whereas only part of our page may be dirty.
535 */
536 if (PageSwapBacked(page))
537 SetPageDirty(newpage);
538 else
539 __set_page_dirty_nobuffers(newpage);
540 }
541 552
542 if (page_is_young(page)) 553 if (page_is_young(page))
543 set_page_young(newpage); 554 set_page_young(newpage);