aboutsummaryrefslogtreecommitdiffstats
path: root/mm/migrate.c
diff options
context:
space:
mode:
authorJohannes Weiner <hannes@cmpxchg.org>2014-08-08 17:19:22 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2014-08-08 18:57:17 -0400
commit0a31bc97c80c3fa87b32c091d9a930ac19cd0c40 (patch)
tree06dafd237309f9b8ded980eb420a5377989e2c0b /mm/migrate.c
parent00501b531c4723972aa11d6d4ebcf8d6552007c8 (diff)
mm: memcontrol: rewrite uncharge API
The memcg uncharging code that is involved towards the end of a page's lifetime - truncation, reclaim, swapout, migration - is impressively complicated and fragile. Because anonymous and file pages were always charged before they had their page->mapping established, uncharges had to happen when the page type could still be known from the context; as in unmap for anonymous, page cache removal for file and shmem pages, and swap cache truncation for swap pages. However, these operations happen well before the page is actually freed, and so a lot of synchronization is necessary: - Charging, uncharging, page migration, and charge migration all need to take a per-page bit spinlock as they could race with uncharging. - Swap cache truncation happens during both swap-in and swap-out, and possibly repeatedly before the page is actually freed. This means that the memcg swapout code is called from many contexts that make no sense and it has to figure out the direction from page state to make sure memory and memory+swap are always correctly charged. - On page migration, the old page might be unmapped but then reused, so memcg code has to prevent untimely uncharging in that case. Because this code - which should be a simple charge transfer - is so special-cased, it is not reusable for replace_page_cache(). But now that charged pages always have a page->mapping, introduce mem_cgroup_uncharge(), which is called after the final put_page(), when we know for sure that nobody is looking at the page anymore. For page migration, introduce mem_cgroup_migrate(), which is called after the migration is successful and the new page is fully rmapped. Because the old page is no longer uncharged after migration, prevent double charges by decoupling the page's memcg association (PCG_USED and pc->mem_cgroup) from the page holding an actual charge. The new bits PCG_MEM and PCG_MEMSW represent the respective charges and are transferred to the new page during migration. mem_cgroup_migrate() is suitable for replace_page_cache() as well, which gets rid of mem_cgroup_replace_page_cache(). However, care needs to be taken because both the source and the target page can already be charged and on the LRU when fuse is splicing: grab the page lock on the charge moving side to prevent changing pc->mem_cgroup of a page under migration. Also, the lruvecs of both pages change as we uncharge the old and charge the new during migration, and putback may race with us, so grab the lru lock and isolate the pages iff on LRU to prevent races and ensure the pages are on the right lruvec afterward. Swap accounting is massively simplified: because the page is no longer uncharged as early as swap cache deletion, a new mem_cgroup_swapout() can transfer the page's memory+swap charge (PCG_MEMSW) to the swap entry before the final put_page() in page reclaim. Finally, page_cgroup changes are now protected by whatever protection the page itself offers: anonymous pages are charged under the page table lock, whereas page cache insertions, swapin, and migration hold the page lock. Uncharging happens under full exclusion with no outstanding references. Charging and uncharging also ensure that the page is off-LRU, which serializes against charge migration. Remove the very costly page_cgroup lock and set pc->flags non-atomically. [mhocko@suse.cz: mem_cgroup_charge_statistics needs preempt_disable] [vdavydov@parallels.com: fix flags definition] Signed-off-by: Johannes Weiner <hannes@cmpxchg.org> Cc: Hugh Dickins <hughd@google.com> Cc: Tejun Heo <tj@kernel.org> Cc: Vladimir Davydov <vdavydov@parallels.com> Tested-by: Jet Chen <jet.chen@intel.com> Acked-by: Michal Hocko <mhocko@suse.cz> Tested-by: Felipe Balbi <balbi@ti.com> Signed-off-by: Vladimir Davydov <vdavydov@parallels.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/migrate.c')
-rw-r--r--mm/migrate.c38
1 files changed, 9 insertions, 29 deletions
diff --git a/mm/migrate.c b/mm/migrate.c
index be6dbf995c0c..f78ec9bd454d 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -780,6 +780,7 @@ static int move_to_new_page(struct page *newpage, struct page *page,
780 if (rc != MIGRATEPAGE_SUCCESS) { 780 if (rc != MIGRATEPAGE_SUCCESS) {
781 newpage->mapping = NULL; 781 newpage->mapping = NULL;
782 } else { 782 } else {
783 mem_cgroup_migrate(page, newpage, false);
783 if (remap_swapcache) 784 if (remap_swapcache)
784 remove_migration_ptes(page, newpage); 785 remove_migration_ptes(page, newpage);
785 page->mapping = NULL; 786 page->mapping = NULL;
@@ -795,7 +796,6 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
795{ 796{
796 int rc = -EAGAIN; 797 int rc = -EAGAIN;
797 int remap_swapcache = 1; 798 int remap_swapcache = 1;
798 struct mem_cgroup *mem;
799 struct anon_vma *anon_vma = NULL; 799 struct anon_vma *anon_vma = NULL;
800 800
801 if (!trylock_page(page)) { 801 if (!trylock_page(page)) {
@@ -821,9 +821,6 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
821 lock_page(page); 821 lock_page(page);
822 } 822 }
823 823
824 /* charge against new page */
825 mem_cgroup_prepare_migration(page, newpage, &mem);
826
827 if (PageWriteback(page)) { 824 if (PageWriteback(page)) {
828 /* 825 /*
829 * Only in the case of a full synchronous migration is it 826 * Only in the case of a full synchronous migration is it
@@ -833,10 +830,10 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
833 */ 830 */
834 if (mode != MIGRATE_SYNC) { 831 if (mode != MIGRATE_SYNC) {
835 rc = -EBUSY; 832 rc = -EBUSY;
836 goto uncharge; 833 goto out_unlock;
837 } 834 }
838 if (!force) 835 if (!force)
839 goto uncharge; 836 goto out_unlock;
840 wait_on_page_writeback(page); 837 wait_on_page_writeback(page);
841 } 838 }
842 /* 839 /*
@@ -872,7 +869,7 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
872 */ 869 */
873 remap_swapcache = 0; 870 remap_swapcache = 0;
874 } else { 871 } else {
875 goto uncharge; 872 goto out_unlock;
876 } 873 }
877 } 874 }
878 875
@@ -885,7 +882,7 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
885 * the page migration right away (proteced by page lock). 882 * the page migration right away (proteced by page lock).
886 */ 883 */
887 rc = balloon_page_migrate(newpage, page, mode); 884 rc = balloon_page_migrate(newpage, page, mode);
888 goto uncharge; 885 goto out_unlock;
889 } 886 }
890 887
891 /* 888 /*
@@ -904,7 +901,7 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
904 VM_BUG_ON_PAGE(PageAnon(page), page); 901 VM_BUG_ON_PAGE(PageAnon(page), page);
905 if (page_has_private(page)) { 902 if (page_has_private(page)) {
906 try_to_free_buffers(page); 903 try_to_free_buffers(page);
907 goto uncharge; 904 goto out_unlock;
908 } 905 }
909 goto skip_unmap; 906 goto skip_unmap;
910 } 907 }
@@ -923,10 +920,7 @@ skip_unmap:
923 if (anon_vma) 920 if (anon_vma)
924 put_anon_vma(anon_vma); 921 put_anon_vma(anon_vma);
925 922
926uncharge: 923out_unlock:
927 mem_cgroup_end_migration(mem, page, newpage,
928 (rc == MIGRATEPAGE_SUCCESS ||
929 rc == MIGRATEPAGE_BALLOON_SUCCESS));
930 unlock_page(page); 924 unlock_page(page);
931out: 925out:
932 return rc; 926 return rc;
@@ -1786,7 +1780,6 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
1786 pg_data_t *pgdat = NODE_DATA(node); 1780 pg_data_t *pgdat = NODE_DATA(node);
1787 int isolated = 0; 1781 int isolated = 0;
1788 struct page *new_page = NULL; 1782 struct page *new_page = NULL;
1789 struct mem_cgroup *memcg = NULL;
1790 int page_lru = page_is_file_cache(page); 1783 int page_lru = page_is_file_cache(page);
1791 unsigned long mmun_start = address & HPAGE_PMD_MASK; 1784 unsigned long mmun_start = address & HPAGE_PMD_MASK;
1792 unsigned long mmun_end = mmun_start + HPAGE_PMD_SIZE; 1785 unsigned long mmun_end = mmun_start + HPAGE_PMD_SIZE;
@@ -1852,15 +1845,6 @@ fail_putback:
1852 goto out_unlock; 1845 goto out_unlock;
1853 } 1846 }
1854 1847
1855 /*
1856 * Traditional migration needs to prepare the memcg charge
1857 * transaction early to prevent the old page from being
1858 * uncharged when installing migration entries. Here we can
1859 * save the potential rollback and start the charge transfer
1860 * only when migration is already known to end successfully.
1861 */
1862 mem_cgroup_prepare_migration(page, new_page, &memcg);
1863
1864 orig_entry = *pmd; 1848 orig_entry = *pmd;
1865 entry = mk_pmd(new_page, vma->vm_page_prot); 1849 entry = mk_pmd(new_page, vma->vm_page_prot);
1866 entry = pmd_mkhuge(entry); 1850 entry = pmd_mkhuge(entry);
@@ -1888,14 +1872,10 @@ fail_putback:
1888 goto fail_putback; 1872 goto fail_putback;
1889 } 1873 }
1890 1874
1875 mem_cgroup_migrate(page, new_page, false);
1876
1891 page_remove_rmap(page); 1877 page_remove_rmap(page);
1892 1878
1893 /*
1894 * Finish the charge transaction under the page table lock to
1895 * prevent split_huge_page() from dividing up the charge
1896 * before it's fully transferred to the new page.
1897 */
1898 mem_cgroup_end_migration(memcg, page, new_page, true);
1899 spin_unlock(ptl); 1879 spin_unlock(ptl);
1900 mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); 1880 mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
1901 1881