diff options
author | KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> | 2012-01-12 20:18:57 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-01-12 23:13:07 -0500 |
commit | 36b62ad539498d00c2d280a151abad5f7630fa73 (patch) | |
tree | 553409901df6210e4a698c1991f9d455199e098b /mm | |
parent | dc67d50465f249bb357bf85b3ed1f642eb00130a (diff) |
memcg: simplify corner case handling of LRU.
This patch simplifies LRU handling of racy case (memcg+SwapCache). At
charging, SwapCache tend to be on LRU already. So, before overwriting
pc->mem_cgroup, the page must be removed from LRU and added to LRU
later.
This patch does
spin_lock(zone->lru_lock);
if (PageLRU(page))
remove from LRU
overwrite pc->mem_cgroup
if (PageLRU(page))
add to new LRU.
spin_unlock(zone->lru_lock);
And guarantee all pages are not on LRU at modifying pc->mem_cgroup.
This patch also unfies lru handling of replace_page_cache() and
swapin.
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Miklos Szeredi <mszeredi@suse.cz>
Acked-by: Michal Hocko <mhocko@suse.cz>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Ying Han <yinghan@google.com>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r-- | mm/memcontrol.c | 109 |
1 files changed, 16 insertions, 93 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 71bac4d720d7..d58bb5fa4403 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -1137,86 +1137,6 @@ struct lruvec *mem_cgroup_lru_move_lists(struct zone *zone, | |||
1137 | } | 1137 | } |
1138 | 1138 | ||
1139 | /* | 1139 | /* |
1140 | * At handling SwapCache and other FUSE stuff, pc->mem_cgroup may be changed | ||
1141 | * while it's linked to lru because the page may be reused after it's fully | ||
1142 | * uncharged. To handle that, unlink page_cgroup from LRU when charge it again. | ||
1143 | * It's done under lock_page and expected that zone->lru_lock isnever held. | ||
1144 | */ | ||
1145 | static void mem_cgroup_lru_del_before_commit(struct page *page) | ||
1146 | { | ||
1147 | enum lru_list lru; | ||
1148 | unsigned long flags; | ||
1149 | struct zone *zone = page_zone(page); | ||
1150 | struct page_cgroup *pc = lookup_page_cgroup(page); | ||
1151 | |||
1152 | /* | ||
1153 | * Doing this check without taking ->lru_lock seems wrong but this | ||
1154 | * is safe. Because if page_cgroup's USED bit is unset, the page | ||
1155 | * will not be added to any memcg's LRU. If page_cgroup's USED bit is | ||
1156 | * set, the commit after this will fail, anyway. | ||
1157 | * This all charge/uncharge is done under some mutual execustion. | ||
1158 | * So, we don't need to taking care of changes in USED bit. | ||
1159 | */ | ||
1160 | if (likely(!PageLRU(page))) | ||
1161 | return; | ||
1162 | |||
1163 | spin_lock_irqsave(&zone->lru_lock, flags); | ||
1164 | lru = page_lru(page); | ||
1165 | /* | ||
1166 | * The uncharged page could still be registered to the LRU of | ||
1167 | * the stale pc->mem_cgroup. | ||
1168 | * | ||
1169 | * As pc->mem_cgroup is about to get overwritten, the old LRU | ||
1170 | * accounting needs to be taken care of. Let root_mem_cgroup | ||
1171 | * babysit the page until the new memcg is responsible for it. | ||
1172 | * | ||
1173 | * The PCG_USED bit is guarded by lock_page() as the page is | ||
1174 | * swapcache/pagecache. | ||
1175 | */ | ||
1176 | if (PageLRU(page) && PageCgroupAcctLRU(pc) && !PageCgroupUsed(pc)) { | ||
1177 | del_page_from_lru_list(zone, page, lru); | ||
1178 | add_page_to_lru_list(zone, page, lru); | ||
1179 | } | ||
1180 | spin_unlock_irqrestore(&zone->lru_lock, flags); | ||
1181 | } | ||
1182 | |||
1183 | static void mem_cgroup_lru_add_after_commit(struct page *page) | ||
1184 | { | ||
1185 | enum lru_list lru; | ||
1186 | unsigned long flags; | ||
1187 | struct zone *zone = page_zone(page); | ||
1188 | struct page_cgroup *pc = lookup_page_cgroup(page); | ||
1189 | /* | ||
1190 | * putback: charge: | ||
1191 | * SetPageLRU SetPageCgroupUsed | ||
1192 | * smp_mb smp_mb | ||
1193 | * PageCgroupUsed && add to memcg LRU PageLRU && add to memcg LRU | ||
1194 | * | ||
1195 | * Ensure that one of the two sides adds the page to the memcg | ||
1196 | * LRU during a race. | ||
1197 | */ | ||
1198 | smp_mb(); | ||
1199 | /* taking care of that the page is added to LRU while we commit it */ | ||
1200 | if (likely(!PageLRU(page))) | ||
1201 | return; | ||
1202 | spin_lock_irqsave(&zone->lru_lock, flags); | ||
1203 | lru = page_lru(page); | ||
1204 | /* | ||
1205 | * If the page is not on the LRU, someone will soon put it | ||
1206 | * there. If it is, and also already accounted for on the | ||
1207 | * memcg-side, it must be on the right lruvec as setting | ||
1208 | * pc->mem_cgroup and PageCgroupUsed is properly ordered. | ||
1209 | * Otherwise, root_mem_cgroup has been babysitting the page | ||
1210 | * during the charge. Move it to the new memcg now. | ||
1211 | */ | ||
1212 | if (PageLRU(page) && !PageCgroupAcctLRU(pc)) { | ||
1213 | del_page_from_lru_list(zone, page, lru); | ||
1214 | add_page_to_lru_list(zone, page, lru); | ||
1215 | } | ||
1216 | spin_unlock_irqrestore(&zone->lru_lock, flags); | ||
1217 | } | ||
1218 | |||
1219 | /* | ||
1220 | * Checks whether given mem is same or in the root_mem_cgroup's | 1140 | * Checks whether given mem is same or in the root_mem_cgroup's |
1221 | * hierarchy subtree | 1141 | * hierarchy subtree |
1222 | */ | 1142 | */ |
@@ -2775,14 +2695,27 @@ __mem_cgroup_commit_charge_lrucare(struct page *page, struct mem_cgroup *memcg, | |||
2775 | enum charge_type ctype) | 2695 | enum charge_type ctype) |
2776 | { | 2696 | { |
2777 | struct page_cgroup *pc = lookup_page_cgroup(page); | 2697 | struct page_cgroup *pc = lookup_page_cgroup(page); |
2698 | struct zone *zone = page_zone(page); | ||
2699 | unsigned long flags; | ||
2700 | bool removed = false; | ||
2701 | |||
2778 | /* | 2702 | /* |
2779 | * In some case, SwapCache, FUSE(splice_buf->radixtree), the page | 2703 | * In some case, SwapCache, FUSE(splice_buf->radixtree), the page |
2780 | * is already on LRU. It means the page may on some other page_cgroup's | 2704 | * is already on LRU. It means the page may on some other page_cgroup's |
2781 | * LRU. Take care of it. | 2705 | * LRU. Take care of it. |
2782 | */ | 2706 | */ |
2783 | mem_cgroup_lru_del_before_commit(page); | 2707 | spin_lock_irqsave(&zone->lru_lock, flags); |
2708 | if (PageLRU(page)) { | ||
2709 | del_page_from_lru_list(zone, page, page_lru(page)); | ||
2710 | ClearPageLRU(page); | ||
2711 | removed = true; | ||
2712 | } | ||
2784 | __mem_cgroup_commit_charge(memcg, page, 1, pc, ctype); | 2713 | __mem_cgroup_commit_charge(memcg, page, 1, pc, ctype); |
2785 | mem_cgroup_lru_add_after_commit(page); | 2714 | if (removed) { |
2715 | add_page_to_lru_list(zone, page, page_lru(page)); | ||
2716 | SetPageLRU(page); | ||
2717 | } | ||
2718 | spin_unlock_irqrestore(&zone->lru_lock, flags); | ||
2786 | return; | 2719 | return; |
2787 | } | 2720 | } |
2788 | 2721 | ||
@@ -3383,9 +3316,7 @@ void mem_cgroup_replace_page_cache(struct page *oldpage, | |||
3383 | { | 3316 | { |
3384 | struct mem_cgroup *memcg; | 3317 | struct mem_cgroup *memcg; |
3385 | struct page_cgroup *pc; | 3318 | struct page_cgroup *pc; |
3386 | struct zone *zone; | ||
3387 | enum charge_type type = MEM_CGROUP_CHARGE_TYPE_CACHE; | 3319 | enum charge_type type = MEM_CGROUP_CHARGE_TYPE_CACHE; |
3388 | unsigned long flags; | ||
3389 | 3320 | ||
3390 | if (mem_cgroup_disabled()) | 3321 | if (mem_cgroup_disabled()) |
3391 | return; | 3322 | return; |
@@ -3401,20 +3332,12 @@ void mem_cgroup_replace_page_cache(struct page *oldpage, | |||
3401 | if (PageSwapBacked(oldpage)) | 3332 | if (PageSwapBacked(oldpage)) |
3402 | type = MEM_CGROUP_CHARGE_TYPE_SHMEM; | 3333 | type = MEM_CGROUP_CHARGE_TYPE_SHMEM; |
3403 | 3334 | ||
3404 | zone = page_zone(newpage); | ||
3405 | pc = lookup_page_cgroup(newpage); | ||
3406 | /* | 3335 | /* |
3407 | * Even if newpage->mapping was NULL before starting replacement, | 3336 | * Even if newpage->mapping was NULL before starting replacement, |
3408 | * the newpage may be on LRU(or pagevec for LRU) already. We lock | 3337 | * the newpage may be on LRU(or pagevec for LRU) already. We lock |
3409 | * LRU while we overwrite pc->mem_cgroup. | 3338 | * LRU while we overwrite pc->mem_cgroup. |
3410 | */ | 3339 | */ |
3411 | spin_lock_irqsave(&zone->lru_lock, flags); | 3340 | __mem_cgroup_commit_charge_lrucare(newpage, memcg, type); |
3412 | if (PageLRU(newpage)) | ||
3413 | del_page_from_lru_list(zone, newpage, page_lru(newpage)); | ||
3414 | __mem_cgroup_commit_charge(memcg, newpage, 1, pc, type); | ||
3415 | if (PageLRU(newpage)) | ||
3416 | add_page_to_lru_list(zone, newpage, page_lru(newpage)); | ||
3417 | spin_unlock_irqrestore(&zone->lru_lock, flags); | ||
3418 | } | 3341 | } |
3419 | 3342 | ||
3420 | #ifdef CONFIG_DEBUG_VM | 3343 | #ifdef CONFIG_DEBUG_VM |