diff options
| author | KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> | 2012-01-12 20:18:57 -0500 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-01-12 23:13:07 -0500 |
| commit | 36b62ad539498d00c2d280a151abad5f7630fa73 (patch) | |
| tree | 553409901df6210e4a698c1991f9d455199e098b | |
| parent | dc67d50465f249bb357bf85b3ed1f642eb00130a (diff) | |
memcg: simplify corner case handling of LRU.
This patch simplifies LRU handling of racy case (memcg+SwapCache). At
charging, SwapCache tend to be on LRU already. So, before overwriting
pc->mem_cgroup, the page must be removed from LRU and added to LRU
later.
This patch does
spin_lock(zone->lru_lock);
if (PageLRU(page))
remove from LRU
overwrite pc->mem_cgroup
if (PageLRU(page))
add to new LRU.
spin_unlock(zone->lru_lock);
And guarantee all pages are not on LRU at modifying pc->mem_cgroup.
This patch also unfies lru handling of replace_page_cache() and
swapin.
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Miklos Szeredi <mszeredi@suse.cz>
Acked-by: Michal Hocko <mhocko@suse.cz>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Ying Han <yinghan@google.com>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
| -rw-r--r-- | mm/memcontrol.c | 109 |
1 files changed, 16 insertions, 93 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 71bac4d720d7..d58bb5fa4403 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
| @@ -1137,86 +1137,6 @@ struct lruvec *mem_cgroup_lru_move_lists(struct zone *zone, | |||
| 1137 | } | 1137 | } |
| 1138 | 1138 | ||
| 1139 | /* | 1139 | /* |
| 1140 | * At handling SwapCache and other FUSE stuff, pc->mem_cgroup may be changed | ||
| 1141 | * while it's linked to lru because the page may be reused after it's fully | ||
| 1142 | * uncharged. To handle that, unlink page_cgroup from LRU when charge it again. | ||
| 1143 | * It's done under lock_page and expected that zone->lru_lock isnever held. | ||
| 1144 | */ | ||
| 1145 | static void mem_cgroup_lru_del_before_commit(struct page *page) | ||
| 1146 | { | ||
| 1147 | enum lru_list lru; | ||
| 1148 | unsigned long flags; | ||
| 1149 | struct zone *zone = page_zone(page); | ||
| 1150 | struct page_cgroup *pc = lookup_page_cgroup(page); | ||
| 1151 | |||
| 1152 | /* | ||
| 1153 | * Doing this check without taking ->lru_lock seems wrong but this | ||
| 1154 | * is safe. Because if page_cgroup's USED bit is unset, the page | ||
| 1155 | * will not be added to any memcg's LRU. If page_cgroup's USED bit is | ||
| 1156 | * set, the commit after this will fail, anyway. | ||
| 1157 | * This all charge/uncharge is done under some mutual execustion. | ||
| 1158 | * So, we don't need to taking care of changes in USED bit. | ||
| 1159 | */ | ||
| 1160 | if (likely(!PageLRU(page))) | ||
| 1161 | return; | ||
| 1162 | |||
| 1163 | spin_lock_irqsave(&zone->lru_lock, flags); | ||
| 1164 | lru = page_lru(page); | ||
| 1165 | /* | ||
| 1166 | * The uncharged page could still be registered to the LRU of | ||
| 1167 | * the stale pc->mem_cgroup. | ||
| 1168 | * | ||
| 1169 | * As pc->mem_cgroup is about to get overwritten, the old LRU | ||
| 1170 | * accounting needs to be taken care of. Let root_mem_cgroup | ||
| 1171 | * babysit the page until the new memcg is responsible for it. | ||
| 1172 | * | ||
| 1173 | * The PCG_USED bit is guarded by lock_page() as the page is | ||
| 1174 | * swapcache/pagecache. | ||
| 1175 | */ | ||
| 1176 | if (PageLRU(page) && PageCgroupAcctLRU(pc) && !PageCgroupUsed(pc)) { | ||
| 1177 | del_page_from_lru_list(zone, page, lru); | ||
| 1178 | add_page_to_lru_list(zone, page, lru); | ||
| 1179 | } | ||
| 1180 | spin_unlock_irqrestore(&zone->lru_lock, flags); | ||
| 1181 | } | ||
| 1182 | |||
| 1183 | static void mem_cgroup_lru_add_after_commit(struct page *page) | ||
| 1184 | { | ||
| 1185 | enum lru_list lru; | ||
| 1186 | unsigned long flags; | ||
| 1187 | struct zone *zone = page_zone(page); | ||
| 1188 | struct page_cgroup *pc = lookup_page_cgroup(page); | ||
| 1189 | /* | ||
| 1190 | * putback: charge: | ||
| 1191 | * SetPageLRU SetPageCgroupUsed | ||
| 1192 | * smp_mb smp_mb | ||
| 1193 | * PageCgroupUsed && add to memcg LRU PageLRU && add to memcg LRU | ||
| 1194 | * | ||
| 1195 | * Ensure that one of the two sides adds the page to the memcg | ||
| 1196 | * LRU during a race. | ||
| 1197 | */ | ||
| 1198 | smp_mb(); | ||
| 1199 | /* taking care of that the page is added to LRU while we commit it */ | ||
| 1200 | if (likely(!PageLRU(page))) | ||
| 1201 | return; | ||
| 1202 | spin_lock_irqsave(&zone->lru_lock, flags); | ||
| 1203 | lru = page_lru(page); | ||
| 1204 | /* | ||
| 1205 | * If the page is not on the LRU, someone will soon put it | ||
| 1206 | * there. If it is, and also already accounted for on the | ||
| 1207 | * memcg-side, it must be on the right lruvec as setting | ||
| 1208 | * pc->mem_cgroup and PageCgroupUsed is properly ordered. | ||
| 1209 | * Otherwise, root_mem_cgroup has been babysitting the page | ||
| 1210 | * during the charge. Move it to the new memcg now. | ||
| 1211 | */ | ||
| 1212 | if (PageLRU(page) && !PageCgroupAcctLRU(pc)) { | ||
| 1213 | del_page_from_lru_list(zone, page, lru); | ||
| 1214 | add_page_to_lru_list(zone, page, lru); | ||
| 1215 | } | ||
| 1216 | spin_unlock_irqrestore(&zone->lru_lock, flags); | ||
| 1217 | } | ||
| 1218 | |||
| 1219 | /* | ||
| 1220 | * Checks whether given mem is same or in the root_mem_cgroup's | 1140 | * Checks whether given mem is same or in the root_mem_cgroup's |
| 1221 | * hierarchy subtree | 1141 | * hierarchy subtree |
| 1222 | */ | 1142 | */ |
| @@ -2775,14 +2695,27 @@ __mem_cgroup_commit_charge_lrucare(struct page *page, struct mem_cgroup *memcg, | |||
| 2775 | enum charge_type ctype) | 2695 | enum charge_type ctype) |
| 2776 | { | 2696 | { |
| 2777 | struct page_cgroup *pc = lookup_page_cgroup(page); | 2697 | struct page_cgroup *pc = lookup_page_cgroup(page); |
| 2698 | struct zone *zone = page_zone(page); | ||
| 2699 | unsigned long flags; | ||
| 2700 | bool removed = false; | ||
| 2701 | |||
| 2778 | /* | 2702 | /* |
| 2779 | * In some case, SwapCache, FUSE(splice_buf->radixtree), the page | 2703 | * In some case, SwapCache, FUSE(splice_buf->radixtree), the page |
| 2780 | * is already on LRU. It means the page may on some other page_cgroup's | 2704 | * is already on LRU. It means the page may on some other page_cgroup's |
| 2781 | * LRU. Take care of it. | 2705 | * LRU. Take care of it. |
| 2782 | */ | 2706 | */ |
| 2783 | mem_cgroup_lru_del_before_commit(page); | 2707 | spin_lock_irqsave(&zone->lru_lock, flags); |
| 2708 | if (PageLRU(page)) { | ||
| 2709 | del_page_from_lru_list(zone, page, page_lru(page)); | ||
| 2710 | ClearPageLRU(page); | ||
| 2711 | removed = true; | ||
| 2712 | } | ||
| 2784 | __mem_cgroup_commit_charge(memcg, page, 1, pc, ctype); | 2713 | __mem_cgroup_commit_charge(memcg, page, 1, pc, ctype); |
| 2785 | mem_cgroup_lru_add_after_commit(page); | 2714 | if (removed) { |
| 2715 | add_page_to_lru_list(zone, page, page_lru(page)); | ||
| 2716 | SetPageLRU(page); | ||
| 2717 | } | ||
| 2718 | spin_unlock_irqrestore(&zone->lru_lock, flags); | ||
| 2786 | return; | 2719 | return; |
| 2787 | } | 2720 | } |
| 2788 | 2721 | ||
| @@ -3383,9 +3316,7 @@ void mem_cgroup_replace_page_cache(struct page *oldpage, | |||
| 3383 | { | 3316 | { |
| 3384 | struct mem_cgroup *memcg; | 3317 | struct mem_cgroup *memcg; |
| 3385 | struct page_cgroup *pc; | 3318 | struct page_cgroup *pc; |
| 3386 | struct zone *zone; | ||
| 3387 | enum charge_type type = MEM_CGROUP_CHARGE_TYPE_CACHE; | 3319 | enum charge_type type = MEM_CGROUP_CHARGE_TYPE_CACHE; |
| 3388 | unsigned long flags; | ||
| 3389 | 3320 | ||
| 3390 | if (mem_cgroup_disabled()) | 3321 | if (mem_cgroup_disabled()) |
| 3391 | return; | 3322 | return; |
| @@ -3401,20 +3332,12 @@ void mem_cgroup_replace_page_cache(struct page *oldpage, | |||
| 3401 | if (PageSwapBacked(oldpage)) | 3332 | if (PageSwapBacked(oldpage)) |
| 3402 | type = MEM_CGROUP_CHARGE_TYPE_SHMEM; | 3333 | type = MEM_CGROUP_CHARGE_TYPE_SHMEM; |
| 3403 | 3334 | ||
| 3404 | zone = page_zone(newpage); | ||
| 3405 | pc = lookup_page_cgroup(newpage); | ||
| 3406 | /* | 3335 | /* |
| 3407 | * Even if newpage->mapping was NULL before starting replacement, | 3336 | * Even if newpage->mapping was NULL before starting replacement, |
| 3408 | * the newpage may be on LRU(or pagevec for LRU) already. We lock | 3337 | * the newpage may be on LRU(or pagevec for LRU) already. We lock |
| 3409 | * LRU while we overwrite pc->mem_cgroup. | 3338 | * LRU while we overwrite pc->mem_cgroup. |
| 3410 | */ | 3339 | */ |
| 3411 | spin_lock_irqsave(&zone->lru_lock, flags); | 3340 | __mem_cgroup_commit_charge_lrucare(newpage, memcg, type); |
| 3412 | if (PageLRU(newpage)) | ||
| 3413 | del_page_from_lru_list(zone, newpage, page_lru(newpage)); | ||
| 3414 | __mem_cgroup_commit_charge(memcg, newpage, 1, pc, type); | ||
| 3415 | if (PageLRU(newpage)) | ||
| 3416 | add_page_to_lru_list(zone, newpage, page_lru(newpage)); | ||
| 3417 | spin_unlock_irqrestore(&zone->lru_lock, flags); | ||
| 3418 | } | 3341 | } |
| 3419 | 3342 | ||
| 3420 | #ifdef CONFIG_DEBUG_VM | 3343 | #ifdef CONFIG_DEBUG_VM |
