aboutsummaryrefslogtreecommitdiffstats
path: root/mm/shmem.c
diff options
context:
space:
mode:
authorJohannes Weiner <hannes@cmpxchg.org>2014-08-08 17:19:22 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2014-08-08 18:57:17 -0400
commit0a31bc97c80c3fa87b32c091d9a930ac19cd0c40 (patch)
tree06dafd237309f9b8ded980eb420a5377989e2c0b /mm/shmem.c
parent00501b531c4723972aa11d6d4ebcf8d6552007c8 (diff)
mm: memcontrol: rewrite uncharge API
The memcg uncharging code that is involved towards the end of a page's lifetime - truncation, reclaim, swapout, migration - is impressively complicated and fragile. Because anonymous and file pages were always charged before they had their page->mapping established, uncharges had to happen when the page type could still be known from the context; as in unmap for anonymous, page cache removal for file and shmem pages, and swap cache truncation for swap pages. However, these operations happen well before the page is actually freed, and so a lot of synchronization is necessary: - Charging, uncharging, page migration, and charge migration all need to take a per-page bit spinlock as they could race with uncharging. - Swap cache truncation happens during both swap-in and swap-out, and possibly repeatedly before the page is actually freed. This means that the memcg swapout code is called from many contexts that make no sense and it has to figure out the direction from page state to make sure memory and memory+swap are always correctly charged. - On page migration, the old page might be unmapped but then reused, so memcg code has to prevent untimely uncharging in that case. Because this code - which should be a simple charge transfer - is so special-cased, it is not reusable for replace_page_cache(). But now that charged pages always have a page->mapping, introduce mem_cgroup_uncharge(), which is called after the final put_page(), when we know for sure that nobody is looking at the page anymore. For page migration, introduce mem_cgroup_migrate(), which is called after the migration is successful and the new page is fully rmapped. Because the old page is no longer uncharged after migration, prevent double charges by decoupling the page's memcg association (PCG_USED and pc->mem_cgroup) from the page holding an actual charge. The new bits PCG_MEM and PCG_MEMSW represent the respective charges and are transferred to the new page during migration. mem_cgroup_migrate() is suitable for replace_page_cache() as well, which gets rid of mem_cgroup_replace_page_cache(). However, care needs to be taken because both the source and the target page can already be charged and on the LRU when fuse is splicing: grab the page lock on the charge moving side to prevent changing pc->mem_cgroup of a page under migration. Also, the lruvecs of both pages change as we uncharge the old and charge the new during migration, and putback may race with us, so grab the lru lock and isolate the pages iff on LRU to prevent races and ensure the pages are on the right lruvec afterward. Swap accounting is massively simplified: because the page is no longer uncharged as early as swap cache deletion, a new mem_cgroup_swapout() can transfer the page's memory+swap charge (PCG_MEMSW) to the swap entry before the final put_page() in page reclaim. Finally, page_cgroup changes are now protected by whatever protection the page itself offers: anonymous pages are charged under the page table lock, whereas page cache insertions, swapin, and migration hold the page lock. Uncharging happens under full exclusion with no outstanding references. Charging and uncharging also ensure that the page is off-LRU, which serializes against charge migration. Remove the very costly page_cgroup lock and set pc->flags non-atomically. [mhocko@suse.cz: mem_cgroup_charge_statistics needs preempt_disable] [vdavydov@parallels.com: fix flags definition] Signed-off-by: Johannes Weiner <hannes@cmpxchg.org> Cc: Hugh Dickins <hughd@google.com> Cc: Tejun Heo <tj@kernel.org> Cc: Vladimir Davydov <vdavydov@parallels.com> Tested-by: Jet Chen <jet.chen@intel.com> Acked-by: Michal Hocko <mhocko@suse.cz> Tested-by: Felipe Balbi <balbi@ti.com> Signed-off-by: Vladimir Davydov <vdavydov@parallels.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/shmem.c')
-rw-r--r--mm/shmem.c8
1 files changed, 2 insertions, 6 deletions
diff --git a/mm/shmem.c b/mm/shmem.c
index 1f1a8085538b..6dc80d298f9d 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -419,7 +419,6 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
419 pvec.pages, indices); 419 pvec.pages, indices);
420 if (!pvec.nr) 420 if (!pvec.nr)
421 break; 421 break;
422 mem_cgroup_uncharge_start();
423 for (i = 0; i < pagevec_count(&pvec); i++) { 422 for (i = 0; i < pagevec_count(&pvec); i++) {
424 struct page *page = pvec.pages[i]; 423 struct page *page = pvec.pages[i];
425 424
@@ -447,7 +446,6 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
447 } 446 }
448 pagevec_remove_exceptionals(&pvec); 447 pagevec_remove_exceptionals(&pvec);
449 pagevec_release(&pvec); 448 pagevec_release(&pvec);
450 mem_cgroup_uncharge_end();
451 cond_resched(); 449 cond_resched();
452 index++; 450 index++;
453 } 451 }
@@ -495,7 +493,6 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
495 index = start; 493 index = start;
496 continue; 494 continue;
497 } 495 }
498 mem_cgroup_uncharge_start();
499 for (i = 0; i < pagevec_count(&pvec); i++) { 496 for (i = 0; i < pagevec_count(&pvec); i++) {
500 struct page *page = pvec.pages[i]; 497 struct page *page = pvec.pages[i];
501 498
@@ -531,7 +528,6 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
531 } 528 }
532 pagevec_remove_exceptionals(&pvec); 529 pagevec_remove_exceptionals(&pvec);
533 pagevec_release(&pvec); 530 pagevec_release(&pvec);
534 mem_cgroup_uncharge_end();
535 index++; 531 index++;
536 } 532 }
537 533
@@ -835,7 +831,7 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
835 } 831 }
836 832
837 mutex_unlock(&shmem_swaplist_mutex); 833 mutex_unlock(&shmem_swaplist_mutex);
838 swapcache_free(swap, NULL); 834 swapcache_free(swap);
839redirty: 835redirty:
840 set_page_dirty(page); 836 set_page_dirty(page);
841 if (wbc->for_reclaim) 837 if (wbc->for_reclaim)
@@ -1008,7 +1004,7 @@ static int shmem_replace_page(struct page **pagep, gfp_t gfp,
1008 */ 1004 */
1009 oldpage = newpage; 1005 oldpage = newpage;
1010 } else { 1006 } else {
1011 mem_cgroup_replace_page_cache(oldpage, newpage); 1007 mem_cgroup_migrate(oldpage, newpage, false);
1012 lru_cache_add_anon(newpage); 1008 lru_cache_add_anon(newpage);
1013 *pagep = newpage; 1009 *pagep = newpage;
1014 } 1010 }