diff options
Diffstat (limited to 'mm/memcontrol.c')
-rw-r--r-- | mm/memcontrol.c | 453 |
1 files changed, 334 insertions, 119 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index c31a310aa146..954032b80bed 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -38,6 +38,7 @@ | |||
38 | #include <linux/vmalloc.h> | 38 | #include <linux/vmalloc.h> |
39 | #include <linux/mm_inline.h> | 39 | #include <linux/mm_inline.h> |
40 | #include <linux/page_cgroup.h> | 40 | #include <linux/page_cgroup.h> |
41 | #include <linux/cpu.h> | ||
41 | #include "internal.h" | 42 | #include "internal.h" |
42 | 43 | ||
43 | #include <asm/uaccess.h> | 44 | #include <asm/uaccess.h> |
@@ -54,7 +55,6 @@ static int really_do_swap_account __initdata = 1; /* for remember boot option*/ | |||
54 | #define do_swap_account (0) | 55 | #define do_swap_account (0) |
55 | #endif | 56 | #endif |
56 | 57 | ||
57 | static DEFINE_MUTEX(memcg_tasklist); /* can be hold under cgroup_mutex */ | ||
58 | #define SOFTLIMIT_EVENTS_THRESH (1000) | 58 | #define SOFTLIMIT_EVENTS_THRESH (1000) |
59 | 59 | ||
60 | /* | 60 | /* |
@@ -66,7 +66,7 @@ enum mem_cgroup_stat_index { | |||
66 | */ | 66 | */ |
67 | MEM_CGROUP_STAT_CACHE, /* # of pages charged as cache */ | 67 | MEM_CGROUP_STAT_CACHE, /* # of pages charged as cache */ |
68 | MEM_CGROUP_STAT_RSS, /* # of pages charged as anon rss */ | 68 | MEM_CGROUP_STAT_RSS, /* # of pages charged as anon rss */ |
69 | MEM_CGROUP_STAT_MAPPED_FILE, /* # of pages charged as file rss */ | 69 | MEM_CGROUP_STAT_FILE_MAPPED, /* # of pages charged as file rss */ |
70 | MEM_CGROUP_STAT_PGPGIN_COUNT, /* # of pages paged in */ | 70 | MEM_CGROUP_STAT_PGPGIN_COUNT, /* # of pages paged in */ |
71 | MEM_CGROUP_STAT_PGPGOUT_COUNT, /* # of pages paged out */ | 71 | MEM_CGROUP_STAT_PGPGOUT_COUNT, /* # of pages paged out */ |
72 | MEM_CGROUP_STAT_EVENTS, /* sum of pagein + pageout for internal use */ | 72 | MEM_CGROUP_STAT_EVENTS, /* sum of pagein + pageout for internal use */ |
@@ -275,6 +275,7 @@ enum charge_type { | |||
275 | static void mem_cgroup_get(struct mem_cgroup *mem); | 275 | static void mem_cgroup_get(struct mem_cgroup *mem); |
276 | static void mem_cgroup_put(struct mem_cgroup *mem); | 276 | static void mem_cgroup_put(struct mem_cgroup *mem); |
277 | static struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *mem); | 277 | static struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *mem); |
278 | static void drain_all_stock_async(void); | ||
278 | 279 | ||
279 | static struct mem_cgroup_per_zone * | 280 | static struct mem_cgroup_per_zone * |
280 | mem_cgroup_zoneinfo(struct mem_cgroup *mem, int nid, int zid) | 281 | mem_cgroup_zoneinfo(struct mem_cgroup *mem, int nid, int zid) |
@@ -282,6 +283,11 @@ mem_cgroup_zoneinfo(struct mem_cgroup *mem, int nid, int zid) | |||
282 | return &mem->info.nodeinfo[nid]->zoneinfo[zid]; | 283 | return &mem->info.nodeinfo[nid]->zoneinfo[zid]; |
283 | } | 284 | } |
284 | 285 | ||
286 | struct cgroup_subsys_state *mem_cgroup_css(struct mem_cgroup *mem) | ||
287 | { | ||
288 | return &mem->css; | ||
289 | } | ||
290 | |||
285 | static struct mem_cgroup_per_zone * | 291 | static struct mem_cgroup_per_zone * |
286 | page_cgroup_zoneinfo(struct page_cgroup *pc) | 292 | page_cgroup_zoneinfo(struct page_cgroup *pc) |
287 | { | 293 | { |
@@ -758,7 +764,13 @@ int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem) | |||
758 | task_unlock(task); | 764 | task_unlock(task); |
759 | if (!curr) | 765 | if (!curr) |
760 | return 0; | 766 | return 0; |
761 | if (curr->use_hierarchy) | 767 | /* |
768 | * We should check use_hierarchy of "mem" not "curr". Because checking | ||
769 | * use_hierarchy of "curr" here make this function true if hierarchy is | ||
770 | * enabled in "curr" and "curr" is a child of "mem" in *cgroup* | ||
771 | * hierarchy(even if use_hierarchy is disabled in "mem"). | ||
772 | */ | ||
773 | if (mem->use_hierarchy) | ||
762 | ret = css_is_ancestor(&curr->css, &mem->css); | 774 | ret = css_is_ancestor(&curr->css, &mem->css); |
763 | else | 775 | else |
764 | ret = (curr == mem); | 776 | ret = (curr == mem); |
@@ -1007,7 +1019,7 @@ void mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p) | |||
1007 | static char memcg_name[PATH_MAX]; | 1019 | static char memcg_name[PATH_MAX]; |
1008 | int ret; | 1020 | int ret; |
1009 | 1021 | ||
1010 | if (!memcg) | 1022 | if (!memcg || !p) |
1011 | return; | 1023 | return; |
1012 | 1024 | ||
1013 | 1025 | ||
@@ -1137,6 +1149,8 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem, | |||
1137 | victim = mem_cgroup_select_victim(root_mem); | 1149 | victim = mem_cgroup_select_victim(root_mem); |
1138 | if (victim == root_mem) { | 1150 | if (victim == root_mem) { |
1139 | loop++; | 1151 | loop++; |
1152 | if (loop >= 1) | ||
1153 | drain_all_stock_async(); | ||
1140 | if (loop >= 2) { | 1154 | if (loop >= 2) { |
1141 | /* | 1155 | /* |
1142 | * If we have not been able to reclaim | 1156 | * If we have not been able to reclaim |
@@ -1223,7 +1237,7 @@ static void record_last_oom(struct mem_cgroup *mem) | |||
1223 | * Currently used to update mapped file statistics, but the routine can be | 1237 | * Currently used to update mapped file statistics, but the routine can be |
1224 | * generalized to update other statistics as well. | 1238 | * generalized to update other statistics as well. |
1225 | */ | 1239 | */ |
1226 | void mem_cgroup_update_mapped_file_stat(struct page *page, int val) | 1240 | void mem_cgroup_update_file_mapped(struct page *page, int val) |
1227 | { | 1241 | { |
1228 | struct mem_cgroup *mem; | 1242 | struct mem_cgroup *mem; |
1229 | struct mem_cgroup_stat *stat; | 1243 | struct mem_cgroup_stat *stat; |
@@ -1231,9 +1245,6 @@ void mem_cgroup_update_mapped_file_stat(struct page *page, int val) | |||
1231 | int cpu; | 1245 | int cpu; |
1232 | struct page_cgroup *pc; | 1246 | struct page_cgroup *pc; |
1233 | 1247 | ||
1234 | if (!page_is_file_cache(page)) | ||
1235 | return; | ||
1236 | |||
1237 | pc = lookup_page_cgroup(page); | 1248 | pc = lookup_page_cgroup(page); |
1238 | if (unlikely(!pc)) | 1249 | if (unlikely(!pc)) |
1239 | return; | 1250 | return; |
@@ -1253,12 +1264,139 @@ void mem_cgroup_update_mapped_file_stat(struct page *page, int val) | |||
1253 | stat = &mem->stat; | 1264 | stat = &mem->stat; |
1254 | cpustat = &stat->cpustat[cpu]; | 1265 | cpustat = &stat->cpustat[cpu]; |
1255 | 1266 | ||
1256 | __mem_cgroup_stat_add_safe(cpustat, MEM_CGROUP_STAT_MAPPED_FILE, val); | 1267 | __mem_cgroup_stat_add_safe(cpustat, MEM_CGROUP_STAT_FILE_MAPPED, val); |
1257 | done: | 1268 | done: |
1258 | unlock_page_cgroup(pc); | 1269 | unlock_page_cgroup(pc); |
1259 | } | 1270 | } |
1260 | 1271 | ||
1261 | /* | 1272 | /* |
1273 | * size of first charge trial. "32" comes from vmscan.c's magic value. | ||
1274 | * TODO: maybe necessary to use big numbers in big irons. | ||
1275 | */ | ||
1276 | #define CHARGE_SIZE (32 * PAGE_SIZE) | ||
1277 | struct memcg_stock_pcp { | ||
1278 | struct mem_cgroup *cached; /* this never be root cgroup */ | ||
1279 | int charge; | ||
1280 | struct work_struct work; | ||
1281 | }; | ||
1282 | static DEFINE_PER_CPU(struct memcg_stock_pcp, memcg_stock); | ||
1283 | static atomic_t memcg_drain_count; | ||
1284 | |||
1285 | /* | ||
1286 | * Try to consume stocked charge on this cpu. If success, PAGE_SIZE is consumed | ||
1287 | * from local stock and true is returned. If the stock is 0 or charges from a | ||
1288 | * cgroup which is not current target, returns false. This stock will be | ||
1289 | * refilled. | ||
1290 | */ | ||
1291 | static bool consume_stock(struct mem_cgroup *mem) | ||
1292 | { | ||
1293 | struct memcg_stock_pcp *stock; | ||
1294 | bool ret = true; | ||
1295 | |||
1296 | stock = &get_cpu_var(memcg_stock); | ||
1297 | if (mem == stock->cached && stock->charge) | ||
1298 | stock->charge -= PAGE_SIZE; | ||
1299 | else /* need to call res_counter_charge */ | ||
1300 | ret = false; | ||
1301 | put_cpu_var(memcg_stock); | ||
1302 | return ret; | ||
1303 | } | ||
1304 | |||
1305 | /* | ||
1306 | * Returns stocks cached in percpu to res_counter and reset cached information. | ||
1307 | */ | ||
1308 | static void drain_stock(struct memcg_stock_pcp *stock) | ||
1309 | { | ||
1310 | struct mem_cgroup *old = stock->cached; | ||
1311 | |||
1312 | if (stock->charge) { | ||
1313 | res_counter_uncharge(&old->res, stock->charge); | ||
1314 | if (do_swap_account) | ||
1315 | res_counter_uncharge(&old->memsw, stock->charge); | ||
1316 | } | ||
1317 | stock->cached = NULL; | ||
1318 | stock->charge = 0; | ||
1319 | } | ||
1320 | |||
1321 | /* | ||
1322 | * This must be called under preempt disabled or must be called by | ||
1323 | * a thread which is pinned to local cpu. | ||
1324 | */ | ||
1325 | static void drain_local_stock(struct work_struct *dummy) | ||
1326 | { | ||
1327 | struct memcg_stock_pcp *stock = &__get_cpu_var(memcg_stock); | ||
1328 | drain_stock(stock); | ||
1329 | } | ||
1330 | |||
1331 | /* | ||
1332 | * Cache charges(val) which is from res_counter, to local per_cpu area. | ||
1333 | * This will be consumed by consumt_stock() function, later. | ||
1334 | */ | ||
1335 | static void refill_stock(struct mem_cgroup *mem, int val) | ||
1336 | { | ||
1337 | struct memcg_stock_pcp *stock = &get_cpu_var(memcg_stock); | ||
1338 | |||
1339 | if (stock->cached != mem) { /* reset if necessary */ | ||
1340 | drain_stock(stock); | ||
1341 | stock->cached = mem; | ||
1342 | } | ||
1343 | stock->charge += val; | ||
1344 | put_cpu_var(memcg_stock); | ||
1345 | } | ||
1346 | |||
1347 | /* | ||
1348 | * Tries to drain stocked charges in other cpus. This function is asynchronous | ||
1349 | * and just put a work per cpu for draining localy on each cpu. Caller can | ||
1350 | * expects some charges will be back to res_counter later but cannot wait for | ||
1351 | * it. | ||
1352 | */ | ||
1353 | static void drain_all_stock_async(void) | ||
1354 | { | ||
1355 | int cpu; | ||
1356 | /* This function is for scheduling "drain" in asynchronous way. | ||
1357 | * The result of "drain" is not directly handled by callers. Then, | ||
1358 | * if someone is calling drain, we don't have to call drain more. | ||
1359 | * Anyway, WORK_STRUCT_PENDING check in queue_work_on() will catch if | ||
1360 | * there is a race. We just do loose check here. | ||
1361 | */ | ||
1362 | if (atomic_read(&memcg_drain_count)) | ||
1363 | return; | ||
1364 | /* Notify other cpus that system-wide "drain" is running */ | ||
1365 | atomic_inc(&memcg_drain_count); | ||
1366 | get_online_cpus(); | ||
1367 | for_each_online_cpu(cpu) { | ||
1368 | struct memcg_stock_pcp *stock = &per_cpu(memcg_stock, cpu); | ||
1369 | schedule_work_on(cpu, &stock->work); | ||
1370 | } | ||
1371 | put_online_cpus(); | ||
1372 | atomic_dec(&memcg_drain_count); | ||
1373 | /* We don't wait for flush_work */ | ||
1374 | } | ||
1375 | |||
1376 | /* This is a synchronous drain interface. */ | ||
1377 | static void drain_all_stock_sync(void) | ||
1378 | { | ||
1379 | /* called when force_empty is called */ | ||
1380 | atomic_inc(&memcg_drain_count); | ||
1381 | schedule_on_each_cpu(drain_local_stock); | ||
1382 | atomic_dec(&memcg_drain_count); | ||
1383 | } | ||
1384 | |||
1385 | static int __cpuinit memcg_stock_cpu_callback(struct notifier_block *nb, | ||
1386 | unsigned long action, | ||
1387 | void *hcpu) | ||
1388 | { | ||
1389 | int cpu = (unsigned long)hcpu; | ||
1390 | struct memcg_stock_pcp *stock; | ||
1391 | |||
1392 | if (action != CPU_DEAD) | ||
1393 | return NOTIFY_OK; | ||
1394 | stock = &per_cpu(memcg_stock, cpu); | ||
1395 | drain_stock(stock); | ||
1396 | return NOTIFY_OK; | ||
1397 | } | ||
1398 | |||
1399 | /* | ||
1262 | * Unlike exported interface, "oom" parameter is added. if oom==true, | 1400 | * Unlike exported interface, "oom" parameter is added. if oom==true, |
1263 | * oom-killer can be invoked. | 1401 | * oom-killer can be invoked. |
1264 | */ | 1402 | */ |
@@ -1269,6 +1407,7 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm, | |||
1269 | struct mem_cgroup *mem, *mem_over_limit; | 1407 | struct mem_cgroup *mem, *mem_over_limit; |
1270 | int nr_retries = MEM_CGROUP_RECLAIM_RETRIES; | 1408 | int nr_retries = MEM_CGROUP_RECLAIM_RETRIES; |
1271 | struct res_counter *fail_res; | 1409 | struct res_counter *fail_res; |
1410 | int csize = CHARGE_SIZE; | ||
1272 | 1411 | ||
1273 | if (unlikely(test_thread_flag(TIF_MEMDIE))) { | 1412 | if (unlikely(test_thread_flag(TIF_MEMDIE))) { |
1274 | /* Don't account this! */ | 1413 | /* Don't account this! */ |
@@ -1293,23 +1432,25 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm, | |||
1293 | return 0; | 1432 | return 0; |
1294 | 1433 | ||
1295 | VM_BUG_ON(css_is_removed(&mem->css)); | 1434 | VM_BUG_ON(css_is_removed(&mem->css)); |
1435 | if (mem_cgroup_is_root(mem)) | ||
1436 | goto done; | ||
1296 | 1437 | ||
1297 | while (1) { | 1438 | while (1) { |
1298 | int ret = 0; | 1439 | int ret = 0; |
1299 | unsigned long flags = 0; | 1440 | unsigned long flags = 0; |
1300 | 1441 | ||
1301 | if (mem_cgroup_is_root(mem)) | 1442 | if (consume_stock(mem)) |
1302 | goto done; | 1443 | goto charged; |
1303 | ret = res_counter_charge(&mem->res, PAGE_SIZE, &fail_res); | 1444 | |
1445 | ret = res_counter_charge(&mem->res, csize, &fail_res); | ||
1304 | if (likely(!ret)) { | 1446 | if (likely(!ret)) { |
1305 | if (!do_swap_account) | 1447 | if (!do_swap_account) |
1306 | break; | 1448 | break; |
1307 | ret = res_counter_charge(&mem->memsw, PAGE_SIZE, | 1449 | ret = res_counter_charge(&mem->memsw, csize, &fail_res); |
1308 | &fail_res); | ||
1309 | if (likely(!ret)) | 1450 | if (likely(!ret)) |
1310 | break; | 1451 | break; |
1311 | /* mem+swap counter fails */ | 1452 | /* mem+swap counter fails */ |
1312 | res_counter_uncharge(&mem->res, PAGE_SIZE); | 1453 | res_counter_uncharge(&mem->res, csize); |
1313 | flags |= MEM_CGROUP_RECLAIM_NOSWAP; | 1454 | flags |= MEM_CGROUP_RECLAIM_NOSWAP; |
1314 | mem_over_limit = mem_cgroup_from_res_counter(fail_res, | 1455 | mem_over_limit = mem_cgroup_from_res_counter(fail_res, |
1315 | memsw); | 1456 | memsw); |
@@ -1318,6 +1459,11 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm, | |||
1318 | mem_over_limit = mem_cgroup_from_res_counter(fail_res, | 1459 | mem_over_limit = mem_cgroup_from_res_counter(fail_res, |
1319 | res); | 1460 | res); |
1320 | 1461 | ||
1462 | /* reduce request size and retry */ | ||
1463 | if (csize > PAGE_SIZE) { | ||
1464 | csize = PAGE_SIZE; | ||
1465 | continue; | ||
1466 | } | ||
1321 | if (!(gfp_mask & __GFP_WAIT)) | 1467 | if (!(gfp_mask & __GFP_WAIT)) |
1322 | goto nomem; | 1468 | goto nomem; |
1323 | 1469 | ||
@@ -1339,14 +1485,15 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm, | |||
1339 | 1485 | ||
1340 | if (!nr_retries--) { | 1486 | if (!nr_retries--) { |
1341 | if (oom) { | 1487 | if (oom) { |
1342 | mutex_lock(&memcg_tasklist); | ||
1343 | mem_cgroup_out_of_memory(mem_over_limit, gfp_mask); | 1488 | mem_cgroup_out_of_memory(mem_over_limit, gfp_mask); |
1344 | mutex_unlock(&memcg_tasklist); | ||
1345 | record_last_oom(mem_over_limit); | 1489 | record_last_oom(mem_over_limit); |
1346 | } | 1490 | } |
1347 | goto nomem; | 1491 | goto nomem; |
1348 | } | 1492 | } |
1349 | } | 1493 | } |
1494 | if (csize > PAGE_SIZE) | ||
1495 | refill_stock(mem, csize - PAGE_SIZE); | ||
1496 | charged: | ||
1350 | /* | 1497 | /* |
1351 | * Insert ancestor (and ancestor's ancestors), to softlimit RB-tree. | 1498 | * Insert ancestor (and ancestor's ancestors), to softlimit RB-tree. |
1352 | * if they exceeds softlimit. | 1499 | * if they exceeds softlimit. |
@@ -1361,6 +1508,21 @@ nomem: | |||
1361 | } | 1508 | } |
1362 | 1509 | ||
1363 | /* | 1510 | /* |
1511 | * Somemtimes we have to undo a charge we got by try_charge(). | ||
1512 | * This function is for that and do uncharge, put css's refcnt. | ||
1513 | * gotten by try_charge(). | ||
1514 | */ | ||
1515 | static void mem_cgroup_cancel_charge(struct mem_cgroup *mem) | ||
1516 | { | ||
1517 | if (!mem_cgroup_is_root(mem)) { | ||
1518 | res_counter_uncharge(&mem->res, PAGE_SIZE); | ||
1519 | if (do_swap_account) | ||
1520 | res_counter_uncharge(&mem->memsw, PAGE_SIZE); | ||
1521 | } | ||
1522 | css_put(&mem->css); | ||
1523 | } | ||
1524 | |||
1525 | /* | ||
1364 | * A helper function to get mem_cgroup from ID. must be called under | 1526 | * A helper function to get mem_cgroup from ID. must be called under |
1365 | * rcu_read_lock(). The caller must check css_is_removed() or some if | 1527 | * rcu_read_lock(). The caller must check css_is_removed() or some if |
1366 | * it's concern. (dropping refcnt from swap can be called against removed | 1528 | * it's concern. (dropping refcnt from swap can be called against removed |
@@ -1379,25 +1541,22 @@ static struct mem_cgroup *mem_cgroup_lookup(unsigned short id) | |||
1379 | return container_of(css, struct mem_cgroup, css); | 1541 | return container_of(css, struct mem_cgroup, css); |
1380 | } | 1542 | } |
1381 | 1543 | ||
1382 | static struct mem_cgroup *try_get_mem_cgroup_from_swapcache(struct page *page) | 1544 | struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page) |
1383 | { | 1545 | { |
1384 | struct mem_cgroup *mem; | 1546 | struct mem_cgroup *mem = NULL; |
1385 | struct page_cgroup *pc; | 1547 | struct page_cgroup *pc; |
1386 | unsigned short id; | 1548 | unsigned short id; |
1387 | swp_entry_t ent; | 1549 | swp_entry_t ent; |
1388 | 1550 | ||
1389 | VM_BUG_ON(!PageLocked(page)); | 1551 | VM_BUG_ON(!PageLocked(page)); |
1390 | 1552 | ||
1391 | if (!PageSwapCache(page)) | ||
1392 | return NULL; | ||
1393 | |||
1394 | pc = lookup_page_cgroup(page); | 1553 | pc = lookup_page_cgroup(page); |
1395 | lock_page_cgroup(pc); | 1554 | lock_page_cgroup(pc); |
1396 | if (PageCgroupUsed(pc)) { | 1555 | if (PageCgroupUsed(pc)) { |
1397 | mem = pc->mem_cgroup; | 1556 | mem = pc->mem_cgroup; |
1398 | if (mem && !css_tryget(&mem->css)) | 1557 | if (mem && !css_tryget(&mem->css)) |
1399 | mem = NULL; | 1558 | mem = NULL; |
1400 | } else { | 1559 | } else if (PageSwapCache(page)) { |
1401 | ent.val = page_private(page); | 1560 | ent.val = page_private(page); |
1402 | id = lookup_swap_cgroup(ent); | 1561 | id = lookup_swap_cgroup(ent); |
1403 | rcu_read_lock(); | 1562 | rcu_read_lock(); |
@@ -1426,12 +1585,7 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *mem, | |||
1426 | lock_page_cgroup(pc); | 1585 | lock_page_cgroup(pc); |
1427 | if (unlikely(PageCgroupUsed(pc))) { | 1586 | if (unlikely(PageCgroupUsed(pc))) { |
1428 | unlock_page_cgroup(pc); | 1587 | unlock_page_cgroup(pc); |
1429 | if (!mem_cgroup_is_root(mem)) { | 1588 | mem_cgroup_cancel_charge(mem); |
1430 | res_counter_uncharge(&mem->res, PAGE_SIZE); | ||
1431 | if (do_swap_account) | ||
1432 | res_counter_uncharge(&mem->memsw, PAGE_SIZE); | ||
1433 | } | ||
1434 | css_put(&mem->css); | ||
1435 | return; | 1589 | return; |
1436 | } | 1590 | } |
1437 | 1591 | ||
@@ -1464,27 +1618,22 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *mem, | |||
1464 | } | 1618 | } |
1465 | 1619 | ||
1466 | /** | 1620 | /** |
1467 | * mem_cgroup_move_account - move account of the page | 1621 | * __mem_cgroup_move_account - move account of the page |
1468 | * @pc: page_cgroup of the page. | 1622 | * @pc: page_cgroup of the page. |
1469 | * @from: mem_cgroup which the page is moved from. | 1623 | * @from: mem_cgroup which the page is moved from. |
1470 | * @to: mem_cgroup which the page is moved to. @from != @to. | 1624 | * @to: mem_cgroup which the page is moved to. @from != @to. |
1471 | * | 1625 | * |
1472 | * The caller must confirm following. | 1626 | * The caller must confirm following. |
1473 | * - page is not on LRU (isolate_page() is useful.) | 1627 | * - page is not on LRU (isolate_page() is useful.) |
1474 | * | 1628 | * - the pc is locked, used, and ->mem_cgroup points to @from. |
1475 | * returns 0 at success, | ||
1476 | * returns -EBUSY when lock is busy or "pc" is unstable. | ||
1477 | * | 1629 | * |
1478 | * This function does "uncharge" from old cgroup but doesn't do "charge" to | 1630 | * This function does "uncharge" from old cgroup but doesn't do "charge" to |
1479 | * new cgroup. It should be done by a caller. | 1631 | * new cgroup. It should be done by a caller. |
1480 | */ | 1632 | */ |
1481 | 1633 | ||
1482 | static int mem_cgroup_move_account(struct page_cgroup *pc, | 1634 | static void __mem_cgroup_move_account(struct page_cgroup *pc, |
1483 | struct mem_cgroup *from, struct mem_cgroup *to) | 1635 | struct mem_cgroup *from, struct mem_cgroup *to) |
1484 | { | 1636 | { |
1485 | struct mem_cgroup_per_zone *from_mz, *to_mz; | ||
1486 | int nid, zid; | ||
1487 | int ret = -EBUSY; | ||
1488 | struct page *page; | 1637 | struct page *page; |
1489 | int cpu; | 1638 | int cpu; |
1490 | struct mem_cgroup_stat *stat; | 1639 | struct mem_cgroup_stat *stat; |
@@ -1492,38 +1641,27 @@ static int mem_cgroup_move_account(struct page_cgroup *pc, | |||
1492 | 1641 | ||
1493 | VM_BUG_ON(from == to); | 1642 | VM_BUG_ON(from == to); |
1494 | VM_BUG_ON(PageLRU(pc->page)); | 1643 | VM_BUG_ON(PageLRU(pc->page)); |
1495 | 1644 | VM_BUG_ON(!PageCgroupLocked(pc)); | |
1496 | nid = page_cgroup_nid(pc); | 1645 | VM_BUG_ON(!PageCgroupUsed(pc)); |
1497 | zid = page_cgroup_zid(pc); | 1646 | VM_BUG_ON(pc->mem_cgroup != from); |
1498 | from_mz = mem_cgroup_zoneinfo(from, nid, zid); | ||
1499 | to_mz = mem_cgroup_zoneinfo(to, nid, zid); | ||
1500 | |||
1501 | if (!trylock_page_cgroup(pc)) | ||
1502 | return ret; | ||
1503 | |||
1504 | if (!PageCgroupUsed(pc)) | ||
1505 | goto out; | ||
1506 | |||
1507 | if (pc->mem_cgroup != from) | ||
1508 | goto out; | ||
1509 | 1647 | ||
1510 | if (!mem_cgroup_is_root(from)) | 1648 | if (!mem_cgroup_is_root(from)) |
1511 | res_counter_uncharge(&from->res, PAGE_SIZE); | 1649 | res_counter_uncharge(&from->res, PAGE_SIZE); |
1512 | mem_cgroup_charge_statistics(from, pc, false); | 1650 | mem_cgroup_charge_statistics(from, pc, false); |
1513 | 1651 | ||
1514 | page = pc->page; | 1652 | page = pc->page; |
1515 | if (page_is_file_cache(page) && page_mapped(page)) { | 1653 | if (page_mapped(page) && !PageAnon(page)) { |
1516 | cpu = smp_processor_id(); | 1654 | cpu = smp_processor_id(); |
1517 | /* Update mapped_file data for mem_cgroup "from" */ | 1655 | /* Update mapped_file data for mem_cgroup "from" */ |
1518 | stat = &from->stat; | 1656 | stat = &from->stat; |
1519 | cpustat = &stat->cpustat[cpu]; | 1657 | cpustat = &stat->cpustat[cpu]; |
1520 | __mem_cgroup_stat_add_safe(cpustat, MEM_CGROUP_STAT_MAPPED_FILE, | 1658 | __mem_cgroup_stat_add_safe(cpustat, MEM_CGROUP_STAT_FILE_MAPPED, |
1521 | -1); | 1659 | -1); |
1522 | 1660 | ||
1523 | /* Update mapped_file data for mem_cgroup "to" */ | 1661 | /* Update mapped_file data for mem_cgroup "to" */ |
1524 | stat = &to->stat; | 1662 | stat = &to->stat; |
1525 | cpustat = &stat->cpustat[cpu]; | 1663 | cpustat = &stat->cpustat[cpu]; |
1526 | __mem_cgroup_stat_add_safe(cpustat, MEM_CGROUP_STAT_MAPPED_FILE, | 1664 | __mem_cgroup_stat_add_safe(cpustat, MEM_CGROUP_STAT_FILE_MAPPED, |
1527 | 1); | 1665 | 1); |
1528 | } | 1666 | } |
1529 | 1667 | ||
@@ -1534,15 +1672,28 @@ static int mem_cgroup_move_account(struct page_cgroup *pc, | |||
1534 | css_get(&to->css); | 1672 | css_get(&to->css); |
1535 | pc->mem_cgroup = to; | 1673 | pc->mem_cgroup = to; |
1536 | mem_cgroup_charge_statistics(to, pc, true); | 1674 | mem_cgroup_charge_statistics(to, pc, true); |
1537 | ret = 0; | ||
1538 | out: | ||
1539 | unlock_page_cgroup(pc); | ||
1540 | /* | 1675 | /* |
1541 | * We charges against "to" which may not have any tasks. Then, "to" | 1676 | * We charges against "to" which may not have any tasks. Then, "to" |
1542 | * can be under rmdir(). But in current implementation, caller of | 1677 | * can be under rmdir(). But in current implementation, caller of |
1543 | * this function is just force_empty() and it's garanteed that | 1678 | * this function is just force_empty() and it's garanteed that |
1544 | * "to" is never removed. So, we don't check rmdir status here. | 1679 | * "to" is never removed. So, we don't check rmdir status here. |
1545 | */ | 1680 | */ |
1681 | } | ||
1682 | |||
1683 | /* | ||
1684 | * check whether the @pc is valid for moving account and call | ||
1685 | * __mem_cgroup_move_account() | ||
1686 | */ | ||
1687 | static int mem_cgroup_move_account(struct page_cgroup *pc, | ||
1688 | struct mem_cgroup *from, struct mem_cgroup *to) | ||
1689 | { | ||
1690 | int ret = -EINVAL; | ||
1691 | lock_page_cgroup(pc); | ||
1692 | if (PageCgroupUsed(pc) && pc->mem_cgroup == from) { | ||
1693 | __mem_cgroup_move_account(pc, from, to); | ||
1694 | ret = 0; | ||
1695 | } | ||
1696 | unlock_page_cgroup(pc); | ||
1546 | return ret; | 1697 | return ret; |
1547 | } | 1698 | } |
1548 | 1699 | ||
@@ -1564,45 +1715,27 @@ static int mem_cgroup_move_parent(struct page_cgroup *pc, | |||
1564 | if (!pcg) | 1715 | if (!pcg) |
1565 | return -EINVAL; | 1716 | return -EINVAL; |
1566 | 1717 | ||
1718 | ret = -EBUSY; | ||
1719 | if (!get_page_unless_zero(page)) | ||
1720 | goto out; | ||
1721 | if (isolate_lru_page(page)) | ||
1722 | goto put; | ||
1567 | 1723 | ||
1568 | parent = mem_cgroup_from_cont(pcg); | 1724 | parent = mem_cgroup_from_cont(pcg); |
1569 | |||
1570 | |||
1571 | ret = __mem_cgroup_try_charge(NULL, gfp_mask, &parent, false, page); | 1725 | ret = __mem_cgroup_try_charge(NULL, gfp_mask, &parent, false, page); |
1572 | if (ret || !parent) | 1726 | if (ret || !parent) |
1573 | return ret; | 1727 | goto put_back; |
1574 | |||
1575 | if (!get_page_unless_zero(page)) { | ||
1576 | ret = -EBUSY; | ||
1577 | goto uncharge; | ||
1578 | } | ||
1579 | |||
1580 | ret = isolate_lru_page(page); | ||
1581 | |||
1582 | if (ret) | ||
1583 | goto cancel; | ||
1584 | 1728 | ||
1585 | ret = mem_cgroup_move_account(pc, child, parent); | 1729 | ret = mem_cgroup_move_account(pc, child, parent); |
1586 | 1730 | if (!ret) | |
1731 | css_put(&parent->css); /* drop extra refcnt by try_charge() */ | ||
1732 | else | ||
1733 | mem_cgroup_cancel_charge(parent); /* does css_put */ | ||
1734 | put_back: | ||
1587 | putback_lru_page(page); | 1735 | putback_lru_page(page); |
1588 | if (!ret) { | 1736 | put: |
1589 | put_page(page); | ||
1590 | /* drop extra refcnt by try_charge() */ | ||
1591 | css_put(&parent->css); | ||
1592 | return 0; | ||
1593 | } | ||
1594 | |||
1595 | cancel: | ||
1596 | put_page(page); | 1737 | put_page(page); |
1597 | uncharge: | 1738 | out: |
1598 | /* drop extra refcnt by try_charge() */ | ||
1599 | css_put(&parent->css); | ||
1600 | /* uncharge if move fails */ | ||
1601 | if (!mem_cgroup_is_root(parent)) { | ||
1602 | res_counter_uncharge(&parent->res, PAGE_SIZE); | ||
1603 | if (do_swap_account) | ||
1604 | res_counter_uncharge(&parent->memsw, PAGE_SIZE); | ||
1605 | } | ||
1606 | return ret; | 1739 | return ret; |
1607 | } | 1740 | } |
1608 | 1741 | ||
@@ -1737,12 +1870,13 @@ int mem_cgroup_try_charge_swapin(struct mm_struct *mm, | |||
1737 | goto charge_cur_mm; | 1870 | goto charge_cur_mm; |
1738 | /* | 1871 | /* |
1739 | * A racing thread's fault, or swapoff, may have already updated | 1872 | * A racing thread's fault, or swapoff, may have already updated |
1740 | * the pte, and even removed page from swap cache: return success | 1873 | * the pte, and even removed page from swap cache: in those cases |
1741 | * to go on to do_swap_page()'s pte_same() test, which should fail. | 1874 | * do_swap_page()'s pte_same() test will fail; but there's also a |
1875 | * KSM case which does need to charge the page. | ||
1742 | */ | 1876 | */ |
1743 | if (!PageSwapCache(page)) | 1877 | if (!PageSwapCache(page)) |
1744 | return 0; | 1878 | goto charge_cur_mm; |
1745 | mem = try_get_mem_cgroup_from_swapcache(page); | 1879 | mem = try_get_mem_cgroup_from_page(page); |
1746 | if (!mem) | 1880 | if (!mem) |
1747 | goto charge_cur_mm; | 1881 | goto charge_cur_mm; |
1748 | *ptr = mem; | 1882 | *ptr = mem; |
@@ -1818,14 +1952,53 @@ void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *mem) | |||
1818 | return; | 1952 | return; |
1819 | if (!mem) | 1953 | if (!mem) |
1820 | return; | 1954 | return; |
1821 | if (!mem_cgroup_is_root(mem)) { | 1955 | mem_cgroup_cancel_charge(mem); |
1822 | res_counter_uncharge(&mem->res, PAGE_SIZE); | ||
1823 | if (do_swap_account) | ||
1824 | res_counter_uncharge(&mem->memsw, PAGE_SIZE); | ||
1825 | } | ||
1826 | css_put(&mem->css); | ||
1827 | } | 1956 | } |
1828 | 1957 | ||
1958 | static void | ||
1959 | __do_uncharge(struct mem_cgroup *mem, const enum charge_type ctype) | ||
1960 | { | ||
1961 | struct memcg_batch_info *batch = NULL; | ||
1962 | bool uncharge_memsw = true; | ||
1963 | /* If swapout, usage of swap doesn't decrease */ | ||
1964 | if (!do_swap_account || ctype == MEM_CGROUP_CHARGE_TYPE_SWAPOUT) | ||
1965 | uncharge_memsw = false; | ||
1966 | /* | ||
1967 | * do_batch > 0 when unmapping pages or inode invalidate/truncate. | ||
1968 | * In those cases, all pages freed continously can be expected to be in | ||
1969 | * the same cgroup and we have chance to coalesce uncharges. | ||
1970 | * But we do uncharge one by one if this is killed by OOM(TIF_MEMDIE) | ||
1971 | * because we want to do uncharge as soon as possible. | ||
1972 | */ | ||
1973 | if (!current->memcg_batch.do_batch || test_thread_flag(TIF_MEMDIE)) | ||
1974 | goto direct_uncharge; | ||
1975 | |||
1976 | batch = ¤t->memcg_batch; | ||
1977 | /* | ||
1978 | * In usual, we do css_get() when we remember memcg pointer. | ||
1979 | * But in this case, we keep res->usage until end of a series of | ||
1980 | * uncharges. Then, it's ok to ignore memcg's refcnt. | ||
1981 | */ | ||
1982 | if (!batch->memcg) | ||
1983 | batch->memcg = mem; | ||
1984 | /* | ||
1985 | * In typical case, batch->memcg == mem. This means we can | ||
1986 | * merge a series of uncharges to an uncharge of res_counter. | ||
1987 | * If not, we uncharge res_counter ony by one. | ||
1988 | */ | ||
1989 | if (batch->memcg != mem) | ||
1990 | goto direct_uncharge; | ||
1991 | /* remember freed charge and uncharge it later */ | ||
1992 | batch->bytes += PAGE_SIZE; | ||
1993 | if (uncharge_memsw) | ||
1994 | batch->memsw_bytes += PAGE_SIZE; | ||
1995 | return; | ||
1996 | direct_uncharge: | ||
1997 | res_counter_uncharge(&mem->res, PAGE_SIZE); | ||
1998 | if (uncharge_memsw) | ||
1999 | res_counter_uncharge(&mem->memsw, PAGE_SIZE); | ||
2000 | return; | ||
2001 | } | ||
1829 | 2002 | ||
1830 | /* | 2003 | /* |
1831 | * uncharge if !page_mapped(page) | 2004 | * uncharge if !page_mapped(page) |
@@ -1874,12 +2047,8 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype) | |||
1874 | break; | 2047 | break; |
1875 | } | 2048 | } |
1876 | 2049 | ||
1877 | if (!mem_cgroup_is_root(mem)) { | 2050 | if (!mem_cgroup_is_root(mem)) |
1878 | res_counter_uncharge(&mem->res, PAGE_SIZE); | 2051 | __do_uncharge(mem, ctype); |
1879 | if (do_swap_account && | ||
1880 | (ctype != MEM_CGROUP_CHARGE_TYPE_SWAPOUT)) | ||
1881 | res_counter_uncharge(&mem->memsw, PAGE_SIZE); | ||
1882 | } | ||
1883 | if (ctype == MEM_CGROUP_CHARGE_TYPE_SWAPOUT) | 2052 | if (ctype == MEM_CGROUP_CHARGE_TYPE_SWAPOUT) |
1884 | mem_cgroup_swap_statistics(mem, true); | 2053 | mem_cgroup_swap_statistics(mem, true); |
1885 | mem_cgroup_charge_statistics(mem, pc, false); | 2054 | mem_cgroup_charge_statistics(mem, pc, false); |
@@ -1925,6 +2094,50 @@ void mem_cgroup_uncharge_cache_page(struct page *page) | |||
1925 | __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_CACHE); | 2094 | __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_CACHE); |
1926 | } | 2095 | } |
1927 | 2096 | ||
2097 | /* | ||
2098 | * Batch_start/batch_end is called in unmap_page_range/invlidate/trucate. | ||
2099 | * In that cases, pages are freed continuously and we can expect pages | ||
2100 | * are in the same memcg. All these calls itself limits the number of | ||
2101 | * pages freed at once, then uncharge_start/end() is called properly. | ||
2102 | * This may be called prural(2) times in a context, | ||
2103 | */ | ||
2104 | |||
2105 | void mem_cgroup_uncharge_start(void) | ||
2106 | { | ||
2107 | current->memcg_batch.do_batch++; | ||
2108 | /* We can do nest. */ | ||
2109 | if (current->memcg_batch.do_batch == 1) { | ||
2110 | current->memcg_batch.memcg = NULL; | ||
2111 | current->memcg_batch.bytes = 0; | ||
2112 | current->memcg_batch.memsw_bytes = 0; | ||
2113 | } | ||
2114 | } | ||
2115 | |||
2116 | void mem_cgroup_uncharge_end(void) | ||
2117 | { | ||
2118 | struct memcg_batch_info *batch = ¤t->memcg_batch; | ||
2119 | |||
2120 | if (!batch->do_batch) | ||
2121 | return; | ||
2122 | |||
2123 | batch->do_batch--; | ||
2124 | if (batch->do_batch) /* If stacked, do nothing. */ | ||
2125 | return; | ||
2126 | |||
2127 | if (!batch->memcg) | ||
2128 | return; | ||
2129 | /* | ||
2130 | * This "batch->memcg" is valid without any css_get/put etc... | ||
2131 | * bacause we hide charges behind us. | ||
2132 | */ | ||
2133 | if (batch->bytes) | ||
2134 | res_counter_uncharge(&batch->memcg->res, batch->bytes); | ||
2135 | if (batch->memsw_bytes) | ||
2136 | res_counter_uncharge(&batch->memcg->memsw, batch->memsw_bytes); | ||
2137 | /* forget this pointer (for sanity check) */ | ||
2138 | batch->memcg = NULL; | ||
2139 | } | ||
2140 | |||
1928 | #ifdef CONFIG_SWAP | 2141 | #ifdef CONFIG_SWAP |
1929 | /* | 2142 | /* |
1930 | * called after __delete_from_swap_cache() and drop "page" account. | 2143 | * called after __delete_from_swap_cache() and drop "page" account. |
@@ -2100,7 +2313,6 @@ static int mem_cgroup_resize_limit(struct mem_cgroup *memcg, | |||
2100 | unsigned long long val) | 2313 | unsigned long long val) |
2101 | { | 2314 | { |
2102 | int retry_count; | 2315 | int retry_count; |
2103 | int progress; | ||
2104 | u64 memswlimit; | 2316 | u64 memswlimit; |
2105 | int ret = 0; | 2317 | int ret = 0; |
2106 | int children = mem_cgroup_count_children(memcg); | 2318 | int children = mem_cgroup_count_children(memcg); |
@@ -2144,8 +2356,7 @@ static int mem_cgroup_resize_limit(struct mem_cgroup *memcg, | |||
2144 | if (!ret) | 2356 | if (!ret) |
2145 | break; | 2357 | break; |
2146 | 2358 | ||
2147 | progress = mem_cgroup_hierarchical_reclaim(memcg, NULL, | 2359 | mem_cgroup_hierarchical_reclaim(memcg, NULL, GFP_KERNEL, |
2148 | GFP_KERNEL, | ||
2149 | MEM_CGROUP_RECLAIM_SHRINK); | 2360 | MEM_CGROUP_RECLAIM_SHRINK); |
2150 | curusage = res_counter_read_u64(&memcg->res, RES_USAGE); | 2361 | curusage = res_counter_read_u64(&memcg->res, RES_USAGE); |
2151 | /* Usage is reduced ? */ | 2362 | /* Usage is reduced ? */ |
@@ -2375,7 +2586,7 @@ static int mem_cgroup_force_empty(struct mem_cgroup *mem, bool free_all) | |||
2375 | if (free_all) | 2586 | if (free_all) |
2376 | goto try_to_free; | 2587 | goto try_to_free; |
2377 | move_account: | 2588 | move_account: |
2378 | while (mem->res.usage > 0) { | 2589 | do { |
2379 | ret = -EBUSY; | 2590 | ret = -EBUSY; |
2380 | if (cgroup_task_count(cgrp) || !list_empty(&cgrp->children)) | 2591 | if (cgroup_task_count(cgrp) || !list_empty(&cgrp->children)) |
2381 | goto out; | 2592 | goto out; |
@@ -2384,6 +2595,7 @@ move_account: | |||
2384 | goto out; | 2595 | goto out; |
2385 | /* This is for making all *used* pages to be on LRU. */ | 2596 | /* This is for making all *used* pages to be on LRU. */ |
2386 | lru_add_drain_all(); | 2597 | lru_add_drain_all(); |
2598 | drain_all_stock_sync(); | ||
2387 | ret = 0; | 2599 | ret = 0; |
2388 | for_each_node_state(node, N_HIGH_MEMORY) { | 2600 | for_each_node_state(node, N_HIGH_MEMORY) { |
2389 | for (zid = 0; !ret && zid < MAX_NR_ZONES; zid++) { | 2601 | for (zid = 0; !ret && zid < MAX_NR_ZONES; zid++) { |
@@ -2402,8 +2614,8 @@ move_account: | |||
2402 | if (ret == -ENOMEM) | 2614 | if (ret == -ENOMEM) |
2403 | goto try_to_free; | 2615 | goto try_to_free; |
2404 | cond_resched(); | 2616 | cond_resched(); |
2405 | } | 2617 | /* "ret" should also be checked to ensure all lists are empty. */ |
2406 | ret = 0; | 2618 | } while (mem->res.usage > 0 || ret); |
2407 | out: | 2619 | out: |
2408 | css_put(&mem->css); | 2620 | css_put(&mem->css); |
2409 | return ret; | 2621 | return ret; |
@@ -2436,10 +2648,7 @@ try_to_free: | |||
2436 | } | 2648 | } |
2437 | lru_add_drain(); | 2649 | lru_add_drain(); |
2438 | /* try move_account...there may be some *locked* pages. */ | 2650 | /* try move_account...there may be some *locked* pages. */ |
2439 | if (mem->res.usage) | 2651 | goto move_account; |
2440 | goto move_account; | ||
2441 | ret = 0; | ||
2442 | goto out; | ||
2443 | } | 2652 | } |
2444 | 2653 | ||
2445 | int mem_cgroup_force_empty_write(struct cgroup *cont, unsigned int event) | 2654 | int mem_cgroup_force_empty_write(struct cgroup *cont, unsigned int event) |
@@ -2541,6 +2750,7 @@ static u64 mem_cgroup_read(struct cgroup *cont, struct cftype *cft) | |||
2541 | val += idx_val; | 2750 | val += idx_val; |
2542 | mem_cgroup_get_recursive_idx_stat(mem, | 2751 | mem_cgroup_get_recursive_idx_stat(mem, |
2543 | MEM_CGROUP_STAT_SWAPOUT, &idx_val); | 2752 | MEM_CGROUP_STAT_SWAPOUT, &idx_val); |
2753 | val += idx_val; | ||
2544 | val <<= PAGE_SHIFT; | 2754 | val <<= PAGE_SHIFT; |
2545 | } else | 2755 | } else |
2546 | val = res_counter_read_u64(&mem->memsw, name); | 2756 | val = res_counter_read_u64(&mem->memsw, name); |
@@ -2660,7 +2870,7 @@ static int mem_cgroup_reset(struct cgroup *cont, unsigned int event) | |||
2660 | enum { | 2870 | enum { |
2661 | MCS_CACHE, | 2871 | MCS_CACHE, |
2662 | MCS_RSS, | 2872 | MCS_RSS, |
2663 | MCS_MAPPED_FILE, | 2873 | MCS_FILE_MAPPED, |
2664 | MCS_PGPGIN, | 2874 | MCS_PGPGIN, |
2665 | MCS_PGPGOUT, | 2875 | MCS_PGPGOUT, |
2666 | MCS_SWAP, | 2876 | MCS_SWAP, |
@@ -2704,8 +2914,8 @@ static int mem_cgroup_get_local_stat(struct mem_cgroup *mem, void *data) | |||
2704 | s->stat[MCS_CACHE] += val * PAGE_SIZE; | 2914 | s->stat[MCS_CACHE] += val * PAGE_SIZE; |
2705 | val = mem_cgroup_read_stat(&mem->stat, MEM_CGROUP_STAT_RSS); | 2915 | val = mem_cgroup_read_stat(&mem->stat, MEM_CGROUP_STAT_RSS); |
2706 | s->stat[MCS_RSS] += val * PAGE_SIZE; | 2916 | s->stat[MCS_RSS] += val * PAGE_SIZE; |
2707 | val = mem_cgroup_read_stat(&mem->stat, MEM_CGROUP_STAT_MAPPED_FILE); | 2917 | val = mem_cgroup_read_stat(&mem->stat, MEM_CGROUP_STAT_FILE_MAPPED); |
2708 | s->stat[MCS_MAPPED_FILE] += val * PAGE_SIZE; | 2918 | s->stat[MCS_FILE_MAPPED] += val * PAGE_SIZE; |
2709 | val = mem_cgroup_read_stat(&mem->stat, MEM_CGROUP_STAT_PGPGIN_COUNT); | 2919 | val = mem_cgroup_read_stat(&mem->stat, MEM_CGROUP_STAT_PGPGIN_COUNT); |
2710 | s->stat[MCS_PGPGIN] += val; | 2920 | s->stat[MCS_PGPGIN] += val; |
2711 | val = mem_cgroup_read_stat(&mem->stat, MEM_CGROUP_STAT_PGPGOUT_COUNT); | 2921 | val = mem_cgroup_read_stat(&mem->stat, MEM_CGROUP_STAT_PGPGOUT_COUNT); |
@@ -3097,11 +3307,18 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont) | |||
3097 | 3307 | ||
3098 | /* root ? */ | 3308 | /* root ? */ |
3099 | if (cont->parent == NULL) { | 3309 | if (cont->parent == NULL) { |
3310 | int cpu; | ||
3100 | enable_swap_cgroup(); | 3311 | enable_swap_cgroup(); |
3101 | parent = NULL; | 3312 | parent = NULL; |
3102 | root_mem_cgroup = mem; | 3313 | root_mem_cgroup = mem; |
3103 | if (mem_cgroup_soft_limit_tree_init()) | 3314 | if (mem_cgroup_soft_limit_tree_init()) |
3104 | goto free_out; | 3315 | goto free_out; |
3316 | for_each_possible_cpu(cpu) { | ||
3317 | struct memcg_stock_pcp *stock = | ||
3318 | &per_cpu(memcg_stock, cpu); | ||
3319 | INIT_WORK(&stock->work, drain_local_stock); | ||
3320 | } | ||
3321 | hotcpu_notifier(memcg_stock_cpu_callback, 0); | ||
3105 | 3322 | ||
3106 | } else { | 3323 | } else { |
3107 | parent = mem_cgroup_from_cont(cont->parent); | 3324 | parent = mem_cgroup_from_cont(cont->parent); |
@@ -3170,12 +3387,10 @@ static void mem_cgroup_move_task(struct cgroup_subsys *ss, | |||
3170 | struct task_struct *p, | 3387 | struct task_struct *p, |
3171 | bool threadgroup) | 3388 | bool threadgroup) |
3172 | { | 3389 | { |
3173 | mutex_lock(&memcg_tasklist); | ||
3174 | /* | 3390 | /* |
3175 | * FIXME: It's better to move charges of this process from old | 3391 | * FIXME: It's better to move charges of this process from old |
3176 | * memcg to new memcg. But it's just on TODO-List now. | 3392 | * memcg to new memcg. But it's just on TODO-List now. |
3177 | */ | 3393 | */ |
3178 | mutex_unlock(&memcg_tasklist); | ||
3179 | } | 3394 | } |
3180 | 3395 | ||
3181 | struct cgroup_subsys mem_cgroup_subsys = { | 3396 | struct cgroup_subsys mem_cgroup_subsys = { |