aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>2009-01-07 21:08:35 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2009-01-08 11:31:10 -0500
commitb5a84319a4343a0db753436fd8147e61eaafa7ea (patch)
tree5faae671b431b50a32a2d8c7a57cc9361d8f336d
parent544122e5e0ee27d5aac4a441f7746712afbf248c (diff)
memcg: fix shmem's swap accounting
Now, you can see following even when swap accounting is enabled. 1. Create Group 01, and 02. 2. allocate a "file" on tmpfs by a task under 01. 3. swap out the "file" (by memory pressure) 4. Read "file" from a task in group 02. 5. the charge of "file" is moved to group 02. This is not ideal behavior. This is because SwapCache which was loaded by read-ahead is not taken into account.. This is a patch to fix shmem's swapcache behavior. - remove mem_cgroup_cache_charge_swapin(). - Add SwapCache handler routine to mem_cgroup_cache_charge(). By this, shmem's file cache is charged at add_to_page_cache() with GFP_NOWAIT. - pass the page of swapcache to shrink_mem_cgroup. Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp> Cc: Balbir Singh <balbir@in.ibm.com> Cc: Paul Menage <menage@google.com> Cc: Li Zefan <lizf@cn.fujitsu.com> Cc: Hugh Dickins <hugh@veritas.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--include/linux/memcontrol.h6
-rw-r--r--include/linux/swap.h8
-rw-r--r--mm/memcontrol.c134
-rw-r--r--mm/shmem.c30
4 files changed, 76 insertions, 102 deletions
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 8ae6ece8c962..326f45c86530 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -56,7 +56,8 @@ extern void mem_cgroup_move_lists(struct page *page,
56 enum lru_list from, enum lru_list to); 56 enum lru_list from, enum lru_list to);
57extern void mem_cgroup_uncharge_page(struct page *page); 57extern void mem_cgroup_uncharge_page(struct page *page);
58extern void mem_cgroup_uncharge_cache_page(struct page *page); 58extern void mem_cgroup_uncharge_cache_page(struct page *page);
59extern int mem_cgroup_shrink_usage(struct mm_struct *mm, gfp_t gfp_mask); 59extern int mem_cgroup_shrink_usage(struct page *page,
60 struct mm_struct *mm, gfp_t gfp_mask);
60 61
61extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan, 62extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
62 struct list_head *dst, 63 struct list_head *dst,
@@ -155,7 +156,8 @@ static inline void mem_cgroup_uncharge_cache_page(struct page *page)
155{ 156{
156} 157}
157 158
158static inline int mem_cgroup_shrink_usage(struct mm_struct *mm, gfp_t gfp_mask) 159static inline int mem_cgroup_shrink_usage(struct page *page,
160 struct mm_struct *mm, gfp_t gfp_mask)
159{ 161{
160 return 0; 162 return 0;
161} 163}
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 4ccca25d0f05..d30215578877 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -335,16 +335,8 @@ static inline void disable_swap_token(void)
335} 335}
336 336
337#ifdef CONFIG_CGROUP_MEM_RES_CTLR 337#ifdef CONFIG_CGROUP_MEM_RES_CTLR
338extern int mem_cgroup_cache_charge_swapin(struct page *page,
339 struct mm_struct *mm, gfp_t mask, bool locked);
340extern void mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent); 338extern void mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent);
341#else 339#else
342static inline
343int mem_cgroup_cache_charge_swapin(struct page *page,
344 struct mm_struct *mm, gfp_t mask, bool locked)
345{
346 return 0;
347}
348static inline void 340static inline void
349mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent) 341mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent)
350{ 342{
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index f50cb7b1efdb..93a792871804 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -893,6 +893,23 @@ nomem:
893 return -ENOMEM; 893 return -ENOMEM;
894} 894}
895 895
896static struct mem_cgroup *try_get_mem_cgroup_from_swapcache(struct page *page)
897{
898 struct mem_cgroup *mem;
899 swp_entry_t ent;
900
901 if (!PageSwapCache(page))
902 return NULL;
903
904 ent.val = page_private(page);
905 mem = lookup_swap_cgroup(ent);
906 if (!mem)
907 return NULL;
908 if (!css_tryget(&mem->css))
909 return NULL;
910 return mem;
911}
912
896/* 913/*
897 * commit a charge got by __mem_cgroup_try_charge() and makes page_cgroup to be 914 * commit a charge got by __mem_cgroup_try_charge() and makes page_cgroup to be
898 * USED state. If already USED, uncharge and return. 915 * USED state. If already USED, uncharge and return.
@@ -1084,6 +1101,9 @@ int mem_cgroup_newpage_charge(struct page *page,
1084int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, 1101int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
1085 gfp_t gfp_mask) 1102 gfp_t gfp_mask)
1086{ 1103{
1104 struct mem_cgroup *mem = NULL;
1105 int ret;
1106
1087 if (mem_cgroup_disabled()) 1107 if (mem_cgroup_disabled())
1088 return 0; 1108 return 0;
1089 if (PageCompound(page)) 1109 if (PageCompound(page))
@@ -1096,6 +1116,8 @@ int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
1096 * For GFP_NOWAIT case, the page may be pre-charged before calling 1116 * For GFP_NOWAIT case, the page may be pre-charged before calling
1097 * add_to_page_cache(). (See shmem.c) check it here and avoid to call 1117 * add_to_page_cache(). (See shmem.c) check it here and avoid to call
1098 * charge twice. (It works but has to pay a bit larger cost.) 1118 * charge twice. (It works but has to pay a bit larger cost.)
1119 * And when the page is SwapCache, it should take swap information
1120 * into account. This is under lock_page() now.
1099 */ 1121 */
1100 if (!(gfp_mask & __GFP_WAIT)) { 1122 if (!(gfp_mask & __GFP_WAIT)) {
1101 struct page_cgroup *pc; 1123 struct page_cgroup *pc;
@@ -1112,15 +1134,40 @@ int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
1112 unlock_page_cgroup(pc); 1134 unlock_page_cgroup(pc);
1113 } 1135 }
1114 1136
1115 if (unlikely(!mm)) 1137 if (do_swap_account && PageSwapCache(page)) {
1138 mem = try_get_mem_cgroup_from_swapcache(page);
1139 if (mem)
1140 mm = NULL;
1141 else
1142 mem = NULL;
1143 /* SwapCache may be still linked to LRU now. */
1144 mem_cgroup_lru_del_before_commit_swapcache(page);
1145 }
1146
1147 if (unlikely(!mm && !mem))
1116 mm = &init_mm; 1148 mm = &init_mm;
1117 1149
1118 if (page_is_file_cache(page)) 1150 if (page_is_file_cache(page))
1119 return mem_cgroup_charge_common(page, mm, gfp_mask, 1151 return mem_cgroup_charge_common(page, mm, gfp_mask,
1120 MEM_CGROUP_CHARGE_TYPE_CACHE, NULL); 1152 MEM_CGROUP_CHARGE_TYPE_CACHE, NULL);
1121 else 1153
1122 return mem_cgroup_charge_common(page, mm, gfp_mask, 1154 ret = mem_cgroup_charge_common(page, mm, gfp_mask,
1123 MEM_CGROUP_CHARGE_TYPE_SHMEM, NULL); 1155 MEM_CGROUP_CHARGE_TYPE_SHMEM, mem);
1156 if (mem)
1157 css_put(&mem->css);
1158 if (PageSwapCache(page))
1159 mem_cgroup_lru_add_after_commit_swapcache(page);
1160
1161 if (do_swap_account && !ret && PageSwapCache(page)) {
1162 swp_entry_t ent = {.val = page_private(page)};
1163 /* avoid double counting */
1164 mem = swap_cgroup_record(ent, NULL);
1165 if (mem) {
1166 res_counter_uncharge(&mem->memsw, PAGE_SIZE);
1167 mem_cgroup_put(mem);
1168 }
1169 }
1170 return ret;
1124} 1171}
1125 1172
1126/* 1173/*
@@ -1134,7 +1181,6 @@ int mem_cgroup_try_charge_swapin(struct mm_struct *mm,
1134 gfp_t mask, struct mem_cgroup **ptr) 1181 gfp_t mask, struct mem_cgroup **ptr)
1135{ 1182{
1136 struct mem_cgroup *mem; 1183 struct mem_cgroup *mem;
1137 swp_entry_t ent;
1138 int ret; 1184 int ret;
1139 1185
1140 if (mem_cgroup_disabled()) 1186 if (mem_cgroup_disabled())
@@ -1142,7 +1188,6 @@ int mem_cgroup_try_charge_swapin(struct mm_struct *mm,
1142 1188
1143 if (!do_swap_account) 1189 if (!do_swap_account)
1144 goto charge_cur_mm; 1190 goto charge_cur_mm;
1145
1146 /* 1191 /*
1147 * A racing thread's fault, or swapoff, may have already updated 1192 * A racing thread's fault, or swapoff, may have already updated
1148 * the pte, and even removed page from swap cache: return success 1193 * the pte, and even removed page from swap cache: return success
@@ -1150,14 +1195,9 @@ int mem_cgroup_try_charge_swapin(struct mm_struct *mm,
1150 */ 1195 */
1151 if (!PageSwapCache(page)) 1196 if (!PageSwapCache(page))
1152 return 0; 1197 return 0;
1153 1198 mem = try_get_mem_cgroup_from_swapcache(page);
1154 ent.val = page_private(page);
1155
1156 mem = lookup_swap_cgroup(ent);
1157 if (!mem) 1199 if (!mem)
1158 goto charge_cur_mm; 1200 goto charge_cur_mm;
1159 if (!css_tryget(&mem->css))
1160 goto charge_cur_mm;
1161 *ptr = mem; 1201 *ptr = mem;
1162 ret = __mem_cgroup_try_charge(NULL, mask, ptr, true); 1202 ret = __mem_cgroup_try_charge(NULL, mask, ptr, true);
1163 /* drop extra refcnt from tryget */ 1203 /* drop extra refcnt from tryget */
@@ -1169,62 +1209,6 @@ charge_cur_mm:
1169 return __mem_cgroup_try_charge(mm, mask, ptr, true); 1209 return __mem_cgroup_try_charge(mm, mask, ptr, true);
1170} 1210}
1171 1211
1172#ifdef CONFIG_SWAP
1173
1174int mem_cgroup_cache_charge_swapin(struct page *page,
1175 struct mm_struct *mm, gfp_t mask, bool locked)
1176{
1177 int ret = 0;
1178
1179 if (mem_cgroup_disabled())
1180 return 0;
1181 if (unlikely(!mm))
1182 mm = &init_mm;
1183 if (!locked)
1184 lock_page(page);
1185 /*
1186 * If not locked, the page can be dropped from SwapCache until
1187 * we reach here.
1188 */
1189 if (PageSwapCache(page)) {
1190 struct mem_cgroup *mem = NULL;
1191 swp_entry_t ent;
1192
1193 ent.val = page_private(page);
1194 if (do_swap_account) {
1195 mem = lookup_swap_cgroup(ent);
1196 if (mem) {
1197 if (css_tryget(&mem->css))
1198 mm = NULL; /* charge to recorded */
1199 else
1200 mem = NULL; /* charge to current */
1201 }
1202 }
1203 /* SwapCache may be still linked to LRU now. */
1204 mem_cgroup_lru_del_before_commit_swapcache(page);
1205 ret = mem_cgroup_charge_common(page, mm, mask,
1206 MEM_CGROUP_CHARGE_TYPE_SHMEM, mem);
1207 mem_cgroup_lru_add_after_commit_swapcache(page);
1208 /* drop extra refcnt from tryget */
1209 if (mem)
1210 css_put(&mem->css);
1211
1212 if (!ret && do_swap_account) {
1213 /* avoid double counting */
1214 mem = swap_cgroup_record(ent, NULL);
1215 if (mem) {
1216 res_counter_uncharge(&mem->memsw, PAGE_SIZE);
1217 mem_cgroup_put(mem);
1218 }
1219 }
1220 }
1221 if (!locked)
1222 unlock_page(page);
1223
1224 return ret;
1225}
1226#endif
1227
1228void mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr) 1212void mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr)
1229{ 1213{
1230 struct page_cgroup *pc; 1214 struct page_cgroup *pc;
@@ -1486,18 +1470,20 @@ void mem_cgroup_end_migration(struct mem_cgroup *mem,
1486 * This is typically used for page reclaiming for shmem for reducing side 1470 * This is typically used for page reclaiming for shmem for reducing side
1487 * effect of page allocation from shmem, which is used by some mem_cgroup. 1471 * effect of page allocation from shmem, which is used by some mem_cgroup.
1488 */ 1472 */
1489int mem_cgroup_shrink_usage(struct mm_struct *mm, gfp_t gfp_mask) 1473int mem_cgroup_shrink_usage(struct page *page,
1474 struct mm_struct *mm,
1475 gfp_t gfp_mask)
1490{ 1476{
1491 struct mem_cgroup *mem; 1477 struct mem_cgroup *mem = NULL;
1492 int progress = 0; 1478 int progress = 0;
1493 int retry = MEM_CGROUP_RECLAIM_RETRIES; 1479 int retry = MEM_CGROUP_RECLAIM_RETRIES;
1494 1480
1495 if (mem_cgroup_disabled()) 1481 if (mem_cgroup_disabled())
1496 return 0; 1482 return 0;
1497 if (!mm) 1483 if (page)
1498 return 0; 1484 mem = try_get_mem_cgroup_from_swapcache(page);
1499 1485 if (!mem && mm)
1500 mem = try_get_mem_cgroup_from_mm(mm); 1486 mem = try_get_mem_cgroup_from_mm(mm);
1501 if (unlikely(!mem)) 1487 if (unlikely(!mem))
1502 return 0; 1488 return 0;
1503 1489
diff --git a/mm/shmem.c b/mm/shmem.c
index bbb7b043c986..5d0de96c9789 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -929,11 +929,11 @@ found:
929 if (!inode) 929 if (!inode)
930 goto out; 930 goto out;
931 /* 931 /*
932 * Charge page using GFP_HIGHUSER_MOVABLE while we can wait. 932 * Charge page using GFP_KERNEL while we can wait.
933 * charged back to the user(not to caller) when swap account is used. 933 * Charged back to the user(not to caller) when swap account is used.
934 * add_to_page_cache() will be called with GFP_NOWAIT.
934 */ 935 */
935 error = mem_cgroup_cache_charge_swapin(page, current->mm, GFP_KERNEL, 936 error = mem_cgroup_cache_charge(page, current->mm, GFP_KERNEL);
936 true);
937 if (error) 937 if (error)
938 goto out; 938 goto out;
939 error = radix_tree_preload(GFP_KERNEL); 939 error = radix_tree_preload(GFP_KERNEL);
@@ -1270,16 +1270,6 @@ repeat:
1270 goto repeat; 1270 goto repeat;
1271 } 1271 }
1272 wait_on_page_locked(swappage); 1272 wait_on_page_locked(swappage);
1273 /*
1274 * We want to avoid charge at add_to_page_cache().
1275 * charge against this swap cache here.
1276 */
1277 if (mem_cgroup_cache_charge_swapin(swappage,
1278 current->mm, gfp & GFP_RECLAIM_MASK, false)) {
1279 page_cache_release(swappage);
1280 error = -ENOMEM;
1281 goto failed;
1282 }
1283 page_cache_release(swappage); 1273 page_cache_release(swappage);
1284 goto repeat; 1274 goto repeat;
1285 } 1275 }
@@ -1334,15 +1324,19 @@ repeat:
1334 } else { 1324 } else {
1335 shmem_swp_unmap(entry); 1325 shmem_swp_unmap(entry);
1336 spin_unlock(&info->lock); 1326 spin_unlock(&info->lock);
1337 unlock_page(swappage);
1338 page_cache_release(swappage);
1339 if (error == -ENOMEM) { 1327 if (error == -ENOMEM) {
1340 /* allow reclaim from this memory cgroup */ 1328 /* allow reclaim from this memory cgroup */
1341 error = mem_cgroup_shrink_usage(current->mm, 1329 error = mem_cgroup_shrink_usage(swappage,
1330 current->mm,
1342 gfp); 1331 gfp);
1343 if (error) 1332 if (error) {
1333 unlock_page(swappage);
1334 page_cache_release(swappage);
1344 goto failed; 1335 goto failed;
1336 }
1345 } 1337 }
1338 unlock_page(swappage);
1339 page_cache_release(swappage);
1346 goto repeat; 1340 goto repeat;
1347 } 1341 }
1348 } else if (sgp == SGP_READ && !filepage) { 1342 } else if (sgp == SGP_READ && !filepage) {