diff options
author | KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> | 2009-01-07 21:08:35 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2009-01-08 11:31:10 -0500 |
commit | b5a84319a4343a0db753436fd8147e61eaafa7ea (patch) | |
tree | 5faae671b431b50a32a2d8c7a57cc9361d8f336d | |
parent | 544122e5e0ee27d5aac4a441f7746712afbf248c (diff) |
memcg: fix shmem's swap accounting
Now, you can see following even when swap accounting is enabled.
1. Create Group 01, and 02.
2. allocate a "file" on tmpfs by a task under 01.
3. swap out the "file" (by memory pressure)
4. Read "file" from a task in group 02.
5. the charge of "file" is moved to group 02.
This is not ideal behavior. This is because SwapCache which was loaded
by read-ahead is not taken into account..
This is a patch to fix shmem's swapcache behavior.
- remove mem_cgroup_cache_charge_swapin().
- Add SwapCache handler routine to mem_cgroup_cache_charge().
By this, shmem's file cache is charged at add_to_page_cache()
with GFP_NOWAIT.
- pass the page of swapcache to shrink_mem_cgroup.
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Paul Menage <menage@google.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | include/linux/memcontrol.h | 6 | ||||
-rw-r--r-- | include/linux/swap.h | 8 | ||||
-rw-r--r-- | mm/memcontrol.c | 134 | ||||
-rw-r--r-- | mm/shmem.c | 30 |
4 files changed, 76 insertions, 102 deletions
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 8ae6ece8c962..326f45c86530 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h | |||
@@ -56,7 +56,8 @@ extern void mem_cgroup_move_lists(struct page *page, | |||
56 | enum lru_list from, enum lru_list to); | 56 | enum lru_list from, enum lru_list to); |
57 | extern void mem_cgroup_uncharge_page(struct page *page); | 57 | extern void mem_cgroup_uncharge_page(struct page *page); |
58 | extern void mem_cgroup_uncharge_cache_page(struct page *page); | 58 | extern void mem_cgroup_uncharge_cache_page(struct page *page); |
59 | extern int mem_cgroup_shrink_usage(struct mm_struct *mm, gfp_t gfp_mask); | 59 | extern int mem_cgroup_shrink_usage(struct page *page, |
60 | struct mm_struct *mm, gfp_t gfp_mask); | ||
60 | 61 | ||
61 | extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan, | 62 | extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan, |
62 | struct list_head *dst, | 63 | struct list_head *dst, |
@@ -155,7 +156,8 @@ static inline void mem_cgroup_uncharge_cache_page(struct page *page) | |||
155 | { | 156 | { |
156 | } | 157 | } |
157 | 158 | ||
158 | static inline int mem_cgroup_shrink_usage(struct mm_struct *mm, gfp_t gfp_mask) | 159 | static inline int mem_cgroup_shrink_usage(struct page *page, |
160 | struct mm_struct *mm, gfp_t gfp_mask) | ||
159 | { | 161 | { |
160 | return 0; | 162 | return 0; |
161 | } | 163 | } |
diff --git a/include/linux/swap.h b/include/linux/swap.h index 4ccca25d0f05..d30215578877 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h | |||
@@ -335,16 +335,8 @@ static inline void disable_swap_token(void) | |||
335 | } | 335 | } |
336 | 336 | ||
337 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR | 337 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR |
338 | extern int mem_cgroup_cache_charge_swapin(struct page *page, | ||
339 | struct mm_struct *mm, gfp_t mask, bool locked); | ||
340 | extern void mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent); | 338 | extern void mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent); |
341 | #else | 339 | #else |
342 | static inline | ||
343 | int mem_cgroup_cache_charge_swapin(struct page *page, | ||
344 | struct mm_struct *mm, gfp_t mask, bool locked) | ||
345 | { | ||
346 | return 0; | ||
347 | } | ||
348 | static inline void | 340 | static inline void |
349 | mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent) | 341 | mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent) |
350 | { | 342 | { |
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index f50cb7b1efdb..93a792871804 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -893,6 +893,23 @@ nomem: | |||
893 | return -ENOMEM; | 893 | return -ENOMEM; |
894 | } | 894 | } |
895 | 895 | ||
896 | static struct mem_cgroup *try_get_mem_cgroup_from_swapcache(struct page *page) | ||
897 | { | ||
898 | struct mem_cgroup *mem; | ||
899 | swp_entry_t ent; | ||
900 | |||
901 | if (!PageSwapCache(page)) | ||
902 | return NULL; | ||
903 | |||
904 | ent.val = page_private(page); | ||
905 | mem = lookup_swap_cgroup(ent); | ||
906 | if (!mem) | ||
907 | return NULL; | ||
908 | if (!css_tryget(&mem->css)) | ||
909 | return NULL; | ||
910 | return mem; | ||
911 | } | ||
912 | |||
896 | /* | 913 | /* |
897 | * commit a charge got by __mem_cgroup_try_charge() and makes page_cgroup to be | 914 | * commit a charge got by __mem_cgroup_try_charge() and makes page_cgroup to be |
898 | * USED state. If already USED, uncharge and return. | 915 | * USED state. If already USED, uncharge and return. |
@@ -1084,6 +1101,9 @@ int mem_cgroup_newpage_charge(struct page *page, | |||
1084 | int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, | 1101 | int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, |
1085 | gfp_t gfp_mask) | 1102 | gfp_t gfp_mask) |
1086 | { | 1103 | { |
1104 | struct mem_cgroup *mem = NULL; | ||
1105 | int ret; | ||
1106 | |||
1087 | if (mem_cgroup_disabled()) | 1107 | if (mem_cgroup_disabled()) |
1088 | return 0; | 1108 | return 0; |
1089 | if (PageCompound(page)) | 1109 | if (PageCompound(page)) |
@@ -1096,6 +1116,8 @@ int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, | |||
1096 | * For GFP_NOWAIT case, the page may be pre-charged before calling | 1116 | * For GFP_NOWAIT case, the page may be pre-charged before calling |
1097 | * add_to_page_cache(). (See shmem.c) check it here and avoid to call | 1117 | * add_to_page_cache(). (See shmem.c) check it here and avoid to call |
1098 | * charge twice. (It works but has to pay a bit larger cost.) | 1118 | * charge twice. (It works but has to pay a bit larger cost.) |
1119 | * And when the page is SwapCache, it should take swap information | ||
1120 | * into account. This is under lock_page() now. | ||
1099 | */ | 1121 | */ |
1100 | if (!(gfp_mask & __GFP_WAIT)) { | 1122 | if (!(gfp_mask & __GFP_WAIT)) { |
1101 | struct page_cgroup *pc; | 1123 | struct page_cgroup *pc; |
@@ -1112,15 +1134,40 @@ int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, | |||
1112 | unlock_page_cgroup(pc); | 1134 | unlock_page_cgroup(pc); |
1113 | } | 1135 | } |
1114 | 1136 | ||
1115 | if (unlikely(!mm)) | 1137 | if (do_swap_account && PageSwapCache(page)) { |
1138 | mem = try_get_mem_cgroup_from_swapcache(page); | ||
1139 | if (mem) | ||
1140 | mm = NULL; | ||
1141 | else | ||
1142 | mem = NULL; | ||
1143 | /* SwapCache may be still linked to LRU now. */ | ||
1144 | mem_cgroup_lru_del_before_commit_swapcache(page); | ||
1145 | } | ||
1146 | |||
1147 | if (unlikely(!mm && !mem)) | ||
1116 | mm = &init_mm; | 1148 | mm = &init_mm; |
1117 | 1149 | ||
1118 | if (page_is_file_cache(page)) | 1150 | if (page_is_file_cache(page)) |
1119 | return mem_cgroup_charge_common(page, mm, gfp_mask, | 1151 | return mem_cgroup_charge_common(page, mm, gfp_mask, |
1120 | MEM_CGROUP_CHARGE_TYPE_CACHE, NULL); | 1152 | MEM_CGROUP_CHARGE_TYPE_CACHE, NULL); |
1121 | else | 1153 | |
1122 | return mem_cgroup_charge_common(page, mm, gfp_mask, | 1154 | ret = mem_cgroup_charge_common(page, mm, gfp_mask, |
1123 | MEM_CGROUP_CHARGE_TYPE_SHMEM, NULL); | 1155 | MEM_CGROUP_CHARGE_TYPE_SHMEM, mem); |
1156 | if (mem) | ||
1157 | css_put(&mem->css); | ||
1158 | if (PageSwapCache(page)) | ||
1159 | mem_cgroup_lru_add_after_commit_swapcache(page); | ||
1160 | |||
1161 | if (do_swap_account && !ret && PageSwapCache(page)) { | ||
1162 | swp_entry_t ent = {.val = page_private(page)}; | ||
1163 | /* avoid double counting */ | ||
1164 | mem = swap_cgroup_record(ent, NULL); | ||
1165 | if (mem) { | ||
1166 | res_counter_uncharge(&mem->memsw, PAGE_SIZE); | ||
1167 | mem_cgroup_put(mem); | ||
1168 | } | ||
1169 | } | ||
1170 | return ret; | ||
1124 | } | 1171 | } |
1125 | 1172 | ||
1126 | /* | 1173 | /* |
@@ -1134,7 +1181,6 @@ int mem_cgroup_try_charge_swapin(struct mm_struct *mm, | |||
1134 | gfp_t mask, struct mem_cgroup **ptr) | 1181 | gfp_t mask, struct mem_cgroup **ptr) |
1135 | { | 1182 | { |
1136 | struct mem_cgroup *mem; | 1183 | struct mem_cgroup *mem; |
1137 | swp_entry_t ent; | ||
1138 | int ret; | 1184 | int ret; |
1139 | 1185 | ||
1140 | if (mem_cgroup_disabled()) | 1186 | if (mem_cgroup_disabled()) |
@@ -1142,7 +1188,6 @@ int mem_cgroup_try_charge_swapin(struct mm_struct *mm, | |||
1142 | 1188 | ||
1143 | if (!do_swap_account) | 1189 | if (!do_swap_account) |
1144 | goto charge_cur_mm; | 1190 | goto charge_cur_mm; |
1145 | |||
1146 | /* | 1191 | /* |
1147 | * A racing thread's fault, or swapoff, may have already updated | 1192 | * A racing thread's fault, or swapoff, may have already updated |
1148 | * the pte, and even removed page from swap cache: return success | 1193 | * the pte, and even removed page from swap cache: return success |
@@ -1150,14 +1195,9 @@ int mem_cgroup_try_charge_swapin(struct mm_struct *mm, | |||
1150 | */ | 1195 | */ |
1151 | if (!PageSwapCache(page)) | 1196 | if (!PageSwapCache(page)) |
1152 | return 0; | 1197 | return 0; |
1153 | 1198 | mem = try_get_mem_cgroup_from_swapcache(page); | |
1154 | ent.val = page_private(page); | ||
1155 | |||
1156 | mem = lookup_swap_cgroup(ent); | ||
1157 | if (!mem) | 1199 | if (!mem) |
1158 | goto charge_cur_mm; | 1200 | goto charge_cur_mm; |
1159 | if (!css_tryget(&mem->css)) | ||
1160 | goto charge_cur_mm; | ||
1161 | *ptr = mem; | 1201 | *ptr = mem; |
1162 | ret = __mem_cgroup_try_charge(NULL, mask, ptr, true); | 1202 | ret = __mem_cgroup_try_charge(NULL, mask, ptr, true); |
1163 | /* drop extra refcnt from tryget */ | 1203 | /* drop extra refcnt from tryget */ |
@@ -1169,62 +1209,6 @@ charge_cur_mm: | |||
1169 | return __mem_cgroup_try_charge(mm, mask, ptr, true); | 1209 | return __mem_cgroup_try_charge(mm, mask, ptr, true); |
1170 | } | 1210 | } |
1171 | 1211 | ||
1172 | #ifdef CONFIG_SWAP | ||
1173 | |||
1174 | int mem_cgroup_cache_charge_swapin(struct page *page, | ||
1175 | struct mm_struct *mm, gfp_t mask, bool locked) | ||
1176 | { | ||
1177 | int ret = 0; | ||
1178 | |||
1179 | if (mem_cgroup_disabled()) | ||
1180 | return 0; | ||
1181 | if (unlikely(!mm)) | ||
1182 | mm = &init_mm; | ||
1183 | if (!locked) | ||
1184 | lock_page(page); | ||
1185 | /* | ||
1186 | * If not locked, the page can be dropped from SwapCache until | ||
1187 | * we reach here. | ||
1188 | */ | ||
1189 | if (PageSwapCache(page)) { | ||
1190 | struct mem_cgroup *mem = NULL; | ||
1191 | swp_entry_t ent; | ||
1192 | |||
1193 | ent.val = page_private(page); | ||
1194 | if (do_swap_account) { | ||
1195 | mem = lookup_swap_cgroup(ent); | ||
1196 | if (mem) { | ||
1197 | if (css_tryget(&mem->css)) | ||
1198 | mm = NULL; /* charge to recorded */ | ||
1199 | else | ||
1200 | mem = NULL; /* charge to current */ | ||
1201 | } | ||
1202 | } | ||
1203 | /* SwapCache may be still linked to LRU now. */ | ||
1204 | mem_cgroup_lru_del_before_commit_swapcache(page); | ||
1205 | ret = mem_cgroup_charge_common(page, mm, mask, | ||
1206 | MEM_CGROUP_CHARGE_TYPE_SHMEM, mem); | ||
1207 | mem_cgroup_lru_add_after_commit_swapcache(page); | ||
1208 | /* drop extra refcnt from tryget */ | ||
1209 | if (mem) | ||
1210 | css_put(&mem->css); | ||
1211 | |||
1212 | if (!ret && do_swap_account) { | ||
1213 | /* avoid double counting */ | ||
1214 | mem = swap_cgroup_record(ent, NULL); | ||
1215 | if (mem) { | ||
1216 | res_counter_uncharge(&mem->memsw, PAGE_SIZE); | ||
1217 | mem_cgroup_put(mem); | ||
1218 | } | ||
1219 | } | ||
1220 | } | ||
1221 | if (!locked) | ||
1222 | unlock_page(page); | ||
1223 | |||
1224 | return ret; | ||
1225 | } | ||
1226 | #endif | ||
1227 | |||
1228 | void mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr) | 1212 | void mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr) |
1229 | { | 1213 | { |
1230 | struct page_cgroup *pc; | 1214 | struct page_cgroup *pc; |
@@ -1486,18 +1470,20 @@ void mem_cgroup_end_migration(struct mem_cgroup *mem, | |||
1486 | * This is typically used for page reclaiming for shmem for reducing side | 1470 | * This is typically used for page reclaiming for shmem for reducing side |
1487 | * effect of page allocation from shmem, which is used by some mem_cgroup. | 1471 | * effect of page allocation from shmem, which is used by some mem_cgroup. |
1488 | */ | 1472 | */ |
1489 | int mem_cgroup_shrink_usage(struct mm_struct *mm, gfp_t gfp_mask) | 1473 | int mem_cgroup_shrink_usage(struct page *page, |
1474 | struct mm_struct *mm, | ||
1475 | gfp_t gfp_mask) | ||
1490 | { | 1476 | { |
1491 | struct mem_cgroup *mem; | 1477 | struct mem_cgroup *mem = NULL; |
1492 | int progress = 0; | 1478 | int progress = 0; |
1493 | int retry = MEM_CGROUP_RECLAIM_RETRIES; | 1479 | int retry = MEM_CGROUP_RECLAIM_RETRIES; |
1494 | 1480 | ||
1495 | if (mem_cgroup_disabled()) | 1481 | if (mem_cgroup_disabled()) |
1496 | return 0; | 1482 | return 0; |
1497 | if (!mm) | 1483 | if (page) |
1498 | return 0; | 1484 | mem = try_get_mem_cgroup_from_swapcache(page); |
1499 | 1485 | if (!mem && mm) | |
1500 | mem = try_get_mem_cgroup_from_mm(mm); | 1486 | mem = try_get_mem_cgroup_from_mm(mm); |
1501 | if (unlikely(!mem)) | 1487 | if (unlikely(!mem)) |
1502 | return 0; | 1488 | return 0; |
1503 | 1489 | ||
diff --git a/mm/shmem.c b/mm/shmem.c index bbb7b043c986..5d0de96c9789 100644 --- a/mm/shmem.c +++ b/mm/shmem.c | |||
@@ -929,11 +929,11 @@ found: | |||
929 | if (!inode) | 929 | if (!inode) |
930 | goto out; | 930 | goto out; |
931 | /* | 931 | /* |
932 | * Charge page using GFP_HIGHUSER_MOVABLE while we can wait. | 932 | * Charge page using GFP_KERNEL while we can wait. |
933 | * charged back to the user(not to caller) when swap account is used. | 933 | * Charged back to the user(not to caller) when swap account is used. |
934 | * add_to_page_cache() will be called with GFP_NOWAIT. | ||
934 | */ | 935 | */ |
935 | error = mem_cgroup_cache_charge_swapin(page, current->mm, GFP_KERNEL, | 936 | error = mem_cgroup_cache_charge(page, current->mm, GFP_KERNEL); |
936 | true); | ||
937 | if (error) | 937 | if (error) |
938 | goto out; | 938 | goto out; |
939 | error = radix_tree_preload(GFP_KERNEL); | 939 | error = radix_tree_preload(GFP_KERNEL); |
@@ -1270,16 +1270,6 @@ repeat: | |||
1270 | goto repeat; | 1270 | goto repeat; |
1271 | } | 1271 | } |
1272 | wait_on_page_locked(swappage); | 1272 | wait_on_page_locked(swappage); |
1273 | /* | ||
1274 | * We want to avoid charge at add_to_page_cache(). | ||
1275 | * charge against this swap cache here. | ||
1276 | */ | ||
1277 | if (mem_cgroup_cache_charge_swapin(swappage, | ||
1278 | current->mm, gfp & GFP_RECLAIM_MASK, false)) { | ||
1279 | page_cache_release(swappage); | ||
1280 | error = -ENOMEM; | ||
1281 | goto failed; | ||
1282 | } | ||
1283 | page_cache_release(swappage); | 1273 | page_cache_release(swappage); |
1284 | goto repeat; | 1274 | goto repeat; |
1285 | } | 1275 | } |
@@ -1334,15 +1324,19 @@ repeat: | |||
1334 | } else { | 1324 | } else { |
1335 | shmem_swp_unmap(entry); | 1325 | shmem_swp_unmap(entry); |
1336 | spin_unlock(&info->lock); | 1326 | spin_unlock(&info->lock); |
1337 | unlock_page(swappage); | ||
1338 | page_cache_release(swappage); | ||
1339 | if (error == -ENOMEM) { | 1327 | if (error == -ENOMEM) { |
1340 | /* allow reclaim from this memory cgroup */ | 1328 | /* allow reclaim from this memory cgroup */ |
1341 | error = mem_cgroup_shrink_usage(current->mm, | 1329 | error = mem_cgroup_shrink_usage(swappage, |
1330 | current->mm, | ||
1342 | gfp); | 1331 | gfp); |
1343 | if (error) | 1332 | if (error) { |
1333 | unlock_page(swappage); | ||
1334 | page_cache_release(swappage); | ||
1344 | goto failed; | 1335 | goto failed; |
1336 | } | ||
1345 | } | 1337 | } |
1338 | unlock_page(swappage); | ||
1339 | page_cache_release(swappage); | ||
1346 | goto repeat; | 1340 | goto repeat; |
1347 | } | 1341 | } |
1348 | } else if (sgp == SGP_READ && !filepage) { | 1342 | } else if (sgp == SGP_READ && !filepage) { |