aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>2012-01-12 20:18:57 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2012-01-12 23:13:07 -0500
commit36b62ad539498d00c2d280a151abad5f7630fa73 (patch)
tree553409901df6210e4a698c1991f9d455199e098b
parentdc67d50465f249bb357bf85b3ed1f642eb00130a (diff)
memcg: simplify corner case handling of LRU.
This patch simplifies LRU handling of racy case (memcg+SwapCache). At charging, SwapCache tend to be on LRU already. So, before overwriting pc->mem_cgroup, the page must be removed from LRU and added to LRU later. This patch does spin_lock(zone->lru_lock); if (PageLRU(page)) remove from LRU overwrite pc->mem_cgroup if (PageLRU(page)) add to new LRU. spin_unlock(zone->lru_lock); And guarantee all pages are not on LRU at modifying pc->mem_cgroup. This patch also unfies lru handling of replace_page_cache() and swapin. Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Miklos Szeredi <mszeredi@suse.cz> Acked-by: Michal Hocko <mhocko@suse.cz> Acked-by: Johannes Weiner <hannes@cmpxchg.org> Cc: Ying Han <yinghan@google.com> Cc: Hugh Dickins <hughd@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--mm/memcontrol.c109
1 files changed, 16 insertions, 93 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 71bac4d720d7..d58bb5fa4403 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1137,86 +1137,6 @@ struct lruvec *mem_cgroup_lru_move_lists(struct zone *zone,
1137} 1137}
1138 1138
1139/* 1139/*
1140 * At handling SwapCache and other FUSE stuff, pc->mem_cgroup may be changed
1141 * while it's linked to lru because the page may be reused after it's fully
1142 * uncharged. To handle that, unlink page_cgroup from LRU when charge it again.
1143 * It's done under lock_page and expected that zone->lru_lock isnever held.
1144 */
1145static void mem_cgroup_lru_del_before_commit(struct page *page)
1146{
1147 enum lru_list lru;
1148 unsigned long flags;
1149 struct zone *zone = page_zone(page);
1150 struct page_cgroup *pc = lookup_page_cgroup(page);
1151
1152 /*
1153 * Doing this check without taking ->lru_lock seems wrong but this
1154 * is safe. Because if page_cgroup's USED bit is unset, the page
1155 * will not be added to any memcg's LRU. If page_cgroup's USED bit is
1156 * set, the commit after this will fail, anyway.
1157 * This all charge/uncharge is done under some mutual execustion.
1158 * So, we don't need to taking care of changes in USED bit.
1159 */
1160 if (likely(!PageLRU(page)))
1161 return;
1162
1163 spin_lock_irqsave(&zone->lru_lock, flags);
1164 lru = page_lru(page);
1165 /*
1166 * The uncharged page could still be registered to the LRU of
1167 * the stale pc->mem_cgroup.
1168 *
1169 * As pc->mem_cgroup is about to get overwritten, the old LRU
1170 * accounting needs to be taken care of. Let root_mem_cgroup
1171 * babysit the page until the new memcg is responsible for it.
1172 *
1173 * The PCG_USED bit is guarded by lock_page() as the page is
1174 * swapcache/pagecache.
1175 */
1176 if (PageLRU(page) && PageCgroupAcctLRU(pc) && !PageCgroupUsed(pc)) {
1177 del_page_from_lru_list(zone, page, lru);
1178 add_page_to_lru_list(zone, page, lru);
1179 }
1180 spin_unlock_irqrestore(&zone->lru_lock, flags);
1181}
1182
1183static void mem_cgroup_lru_add_after_commit(struct page *page)
1184{
1185 enum lru_list lru;
1186 unsigned long flags;
1187 struct zone *zone = page_zone(page);
1188 struct page_cgroup *pc = lookup_page_cgroup(page);
1189 /*
1190 * putback: charge:
1191 * SetPageLRU SetPageCgroupUsed
1192 * smp_mb smp_mb
1193 * PageCgroupUsed && add to memcg LRU PageLRU && add to memcg LRU
1194 *
1195 * Ensure that one of the two sides adds the page to the memcg
1196 * LRU during a race.
1197 */
1198 smp_mb();
1199 /* taking care of that the page is added to LRU while we commit it */
1200 if (likely(!PageLRU(page)))
1201 return;
1202 spin_lock_irqsave(&zone->lru_lock, flags);
1203 lru = page_lru(page);
1204 /*
1205 * If the page is not on the LRU, someone will soon put it
1206 * there. If it is, and also already accounted for on the
1207 * memcg-side, it must be on the right lruvec as setting
1208 * pc->mem_cgroup and PageCgroupUsed is properly ordered.
1209 * Otherwise, root_mem_cgroup has been babysitting the page
1210 * during the charge. Move it to the new memcg now.
1211 */
1212 if (PageLRU(page) && !PageCgroupAcctLRU(pc)) {
1213 del_page_from_lru_list(zone, page, lru);
1214 add_page_to_lru_list(zone, page, lru);
1215 }
1216 spin_unlock_irqrestore(&zone->lru_lock, flags);
1217}
1218
1219/*
1220 * Checks whether given mem is same or in the root_mem_cgroup's 1140 * Checks whether given mem is same or in the root_mem_cgroup's
1221 * hierarchy subtree 1141 * hierarchy subtree
1222 */ 1142 */
@@ -2775,14 +2695,27 @@ __mem_cgroup_commit_charge_lrucare(struct page *page, struct mem_cgroup *memcg,
2775 enum charge_type ctype) 2695 enum charge_type ctype)
2776{ 2696{
2777 struct page_cgroup *pc = lookup_page_cgroup(page); 2697 struct page_cgroup *pc = lookup_page_cgroup(page);
2698 struct zone *zone = page_zone(page);
2699 unsigned long flags;
2700 bool removed = false;
2701
2778 /* 2702 /*
2779 * In some case, SwapCache, FUSE(splice_buf->radixtree), the page 2703 * In some case, SwapCache, FUSE(splice_buf->radixtree), the page
2780 * is already on LRU. It means the page may on some other page_cgroup's 2704 * is already on LRU. It means the page may on some other page_cgroup's
2781 * LRU. Take care of it. 2705 * LRU. Take care of it.
2782 */ 2706 */
2783 mem_cgroup_lru_del_before_commit(page); 2707 spin_lock_irqsave(&zone->lru_lock, flags);
2708 if (PageLRU(page)) {
2709 del_page_from_lru_list(zone, page, page_lru(page));
2710 ClearPageLRU(page);
2711 removed = true;
2712 }
2784 __mem_cgroup_commit_charge(memcg, page, 1, pc, ctype); 2713 __mem_cgroup_commit_charge(memcg, page, 1, pc, ctype);
2785 mem_cgroup_lru_add_after_commit(page); 2714 if (removed) {
2715 add_page_to_lru_list(zone, page, page_lru(page));
2716 SetPageLRU(page);
2717 }
2718 spin_unlock_irqrestore(&zone->lru_lock, flags);
2786 return; 2719 return;
2787} 2720}
2788 2721
@@ -3383,9 +3316,7 @@ void mem_cgroup_replace_page_cache(struct page *oldpage,
3383{ 3316{
3384 struct mem_cgroup *memcg; 3317 struct mem_cgroup *memcg;
3385 struct page_cgroup *pc; 3318 struct page_cgroup *pc;
3386 struct zone *zone;
3387 enum charge_type type = MEM_CGROUP_CHARGE_TYPE_CACHE; 3319 enum charge_type type = MEM_CGROUP_CHARGE_TYPE_CACHE;
3388 unsigned long flags;
3389 3320
3390 if (mem_cgroup_disabled()) 3321 if (mem_cgroup_disabled())
3391 return; 3322 return;
@@ -3401,20 +3332,12 @@ void mem_cgroup_replace_page_cache(struct page *oldpage,
3401 if (PageSwapBacked(oldpage)) 3332 if (PageSwapBacked(oldpage))
3402 type = MEM_CGROUP_CHARGE_TYPE_SHMEM; 3333 type = MEM_CGROUP_CHARGE_TYPE_SHMEM;
3403 3334
3404 zone = page_zone(newpage);
3405 pc = lookup_page_cgroup(newpage);
3406 /* 3335 /*
3407 * Even if newpage->mapping was NULL before starting replacement, 3336 * Even if newpage->mapping was NULL before starting replacement,
3408 * the newpage may be on LRU(or pagevec for LRU) already. We lock 3337 * the newpage may be on LRU(or pagevec for LRU) already. We lock
3409 * LRU while we overwrite pc->mem_cgroup. 3338 * LRU while we overwrite pc->mem_cgroup.
3410 */ 3339 */
3411 spin_lock_irqsave(&zone->lru_lock, flags); 3340 __mem_cgroup_commit_charge_lrucare(newpage, memcg, type);
3412 if (PageLRU(newpage))
3413 del_page_from_lru_list(zone, newpage, page_lru(newpage));
3414 __mem_cgroup_commit_charge(memcg, newpage, 1, pc, type);
3415 if (PageLRU(newpage))
3416 add_page_to_lru_list(zone, newpage, page_lru(newpage));
3417 spin_unlock_irqrestore(&zone->lru_lock, flags);
3418} 3341}
3419 3342
3420#ifdef CONFIG_DEBUG_VM 3343#ifdef CONFIG_DEBUG_VM