diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/huge_memory.c | 37 | ||||
-rw-r--r-- | mm/internal.h | 46 | ||||
-rw-r--r-- | mm/memcontrol.c | 1006 | ||||
-rw-r--r-- | mm/memory.c | 2 | ||||
-rw-r--r-- | mm/page_cgroup.c | 9 | ||||
-rw-r--r-- | mm/swap.c | 83 | ||||
-rw-r--r-- | mm/vmscan.c | 4 |
7 files changed, 641 insertions, 546 deletions
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 860ec211ddd6..4298abaae153 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c | |||
@@ -990,7 +990,7 @@ struct page *follow_trans_huge_pmd(struct mm_struct *mm, | |||
990 | page += (addr & ~HPAGE_PMD_MASK) >> PAGE_SHIFT; | 990 | page += (addr & ~HPAGE_PMD_MASK) >> PAGE_SHIFT; |
991 | VM_BUG_ON(!PageCompound(page)); | 991 | VM_BUG_ON(!PageCompound(page)); |
992 | if (flags & FOLL_GET) | 992 | if (flags & FOLL_GET) |
993 | get_page(page); | 993 | get_page_foll(page); |
994 | 994 | ||
995 | out: | 995 | out: |
996 | return page; | 996 | return page; |
@@ -1202,6 +1202,7 @@ static void __split_huge_page_refcount(struct page *page) | |||
1202 | unsigned long head_index = page->index; | 1202 | unsigned long head_index = page->index; |
1203 | struct zone *zone = page_zone(page); | 1203 | struct zone *zone = page_zone(page); |
1204 | int zonestat; | 1204 | int zonestat; |
1205 | int tail_count = 0; | ||
1205 | 1206 | ||
1206 | /* prevent PageLRU to go away from under us, and freeze lru stats */ | 1207 | /* prevent PageLRU to go away from under us, and freeze lru stats */ |
1207 | spin_lock_irq(&zone->lru_lock); | 1208 | spin_lock_irq(&zone->lru_lock); |
@@ -1210,11 +1211,27 @@ static void __split_huge_page_refcount(struct page *page) | |||
1210 | for (i = 1; i < HPAGE_PMD_NR; i++) { | 1211 | for (i = 1; i < HPAGE_PMD_NR; i++) { |
1211 | struct page *page_tail = page + i; | 1212 | struct page *page_tail = page + i; |
1212 | 1213 | ||
1213 | /* tail_page->_count cannot change */ | 1214 | /* tail_page->_mapcount cannot change */ |
1214 | atomic_sub(atomic_read(&page_tail->_count), &page->_count); | 1215 | BUG_ON(page_mapcount(page_tail) < 0); |
1215 | BUG_ON(page_count(page) <= 0); | 1216 | tail_count += page_mapcount(page_tail); |
1216 | atomic_add(page_mapcount(page) + 1, &page_tail->_count); | 1217 | /* check for overflow */ |
1217 | BUG_ON(atomic_read(&page_tail->_count) <= 0); | 1218 | BUG_ON(tail_count < 0); |
1219 | BUG_ON(atomic_read(&page_tail->_count) != 0); | ||
1220 | /* | ||
1221 | * tail_page->_count is zero and not changing from | ||
1222 | * under us. But get_page_unless_zero() may be running | ||
1223 | * from under us on the tail_page. If we used | ||
1224 | * atomic_set() below instead of atomic_add(), we | ||
1225 | * would then run atomic_set() concurrently with | ||
1226 | * get_page_unless_zero(), and atomic_set() is | ||
1227 | * implemented in C not using locked ops. spin_unlock | ||
1228 | * on x86 sometime uses locked ops because of PPro | ||
1229 | * errata 66, 92, so unless somebody can guarantee | ||
1230 | * atomic_set() here would be safe on all archs (and | ||
1231 | * not only on x86), it's safer to use atomic_add(). | ||
1232 | */ | ||
1233 | atomic_add(page_mapcount(page) + page_mapcount(page_tail) + 1, | ||
1234 | &page_tail->_count); | ||
1218 | 1235 | ||
1219 | /* after clearing PageTail the gup refcount can be released */ | 1236 | /* after clearing PageTail the gup refcount can be released */ |
1220 | smp_mb(); | 1237 | smp_mb(); |
@@ -1232,10 +1249,7 @@ static void __split_huge_page_refcount(struct page *page) | |||
1232 | (1L << PG_uptodate))); | 1249 | (1L << PG_uptodate))); |
1233 | page_tail->flags |= (1L << PG_dirty); | 1250 | page_tail->flags |= (1L << PG_dirty); |
1234 | 1251 | ||
1235 | /* | 1252 | /* clear PageTail before overwriting first_page */ |
1236 | * 1) clear PageTail before overwriting first_page | ||
1237 | * 2) clear PageTail before clearing PageHead for VM_BUG_ON | ||
1238 | */ | ||
1239 | smp_wmb(); | 1253 | smp_wmb(); |
1240 | 1254 | ||
1241 | /* | 1255 | /* |
@@ -1252,7 +1266,6 @@ static void __split_huge_page_refcount(struct page *page) | |||
1252 | * status is achieved setting a reserved bit in the | 1266 | * status is achieved setting a reserved bit in the |
1253 | * pmd, not by clearing the present bit. | 1267 | * pmd, not by clearing the present bit. |
1254 | */ | 1268 | */ |
1255 | BUG_ON(page_mapcount(page_tail)); | ||
1256 | page_tail->_mapcount = page->_mapcount; | 1269 | page_tail->_mapcount = page->_mapcount; |
1257 | 1270 | ||
1258 | BUG_ON(page_tail->mapping); | 1271 | BUG_ON(page_tail->mapping); |
@@ -1269,6 +1282,8 @@ static void __split_huge_page_refcount(struct page *page) | |||
1269 | 1282 | ||
1270 | lru_add_page_tail(zone, page, page_tail); | 1283 | lru_add_page_tail(zone, page, page_tail); |
1271 | } | 1284 | } |
1285 | atomic_sub(tail_count, &page->_count); | ||
1286 | BUG_ON(atomic_read(&page->_count) <= 0); | ||
1272 | 1287 | ||
1273 | __dec_zone_page_state(page, NR_ANON_TRANSPARENT_HUGEPAGES); | 1288 | __dec_zone_page_state(page, NR_ANON_TRANSPARENT_HUGEPAGES); |
1274 | __mod_zone_page_state(zone, NR_ANON_PAGES, HPAGE_PMD_NR); | 1289 | __mod_zone_page_state(zone, NR_ANON_PAGES, HPAGE_PMD_NR); |
diff --git a/mm/internal.h b/mm/internal.h index d071d380fb49..2189af491783 100644 --- a/mm/internal.h +++ b/mm/internal.h | |||
@@ -37,6 +37,52 @@ static inline void __put_page(struct page *page) | |||
37 | atomic_dec(&page->_count); | 37 | atomic_dec(&page->_count); |
38 | } | 38 | } |
39 | 39 | ||
40 | static inline void __get_page_tail_foll(struct page *page, | ||
41 | bool get_page_head) | ||
42 | { | ||
43 | /* | ||
44 | * If we're getting a tail page, the elevated page->_count is | ||
45 | * required only in the head page and we will elevate the head | ||
46 | * page->_count and tail page->_mapcount. | ||
47 | * | ||
48 | * We elevate page_tail->_mapcount for tail pages to force | ||
49 | * page_tail->_count to be zero at all times to avoid getting | ||
50 | * false positives from get_page_unless_zero() with | ||
51 | * speculative page access (like in | ||
52 | * page_cache_get_speculative()) on tail pages. | ||
53 | */ | ||
54 | VM_BUG_ON(atomic_read(&page->first_page->_count) <= 0); | ||
55 | VM_BUG_ON(atomic_read(&page->_count) != 0); | ||
56 | VM_BUG_ON(page_mapcount(page) < 0); | ||
57 | if (get_page_head) | ||
58 | atomic_inc(&page->first_page->_count); | ||
59 | atomic_inc(&page->_mapcount); | ||
60 | } | ||
61 | |||
62 | /* | ||
63 | * This is meant to be called as the FOLL_GET operation of | ||
64 | * follow_page() and it must be called while holding the proper PT | ||
65 | * lock while the pte (or pmd_trans_huge) is still mapping the page. | ||
66 | */ | ||
67 | static inline void get_page_foll(struct page *page) | ||
68 | { | ||
69 | if (unlikely(PageTail(page))) | ||
70 | /* | ||
71 | * This is safe only because | ||
72 | * __split_huge_page_refcount() can't run under | ||
73 | * get_page_foll() because we hold the proper PT lock. | ||
74 | */ | ||
75 | __get_page_tail_foll(page, true); | ||
76 | else { | ||
77 | /* | ||
78 | * Getting a normal page or the head of a compound page | ||
79 | * requires to already have an elevated page->_count. | ||
80 | */ | ||
81 | VM_BUG_ON(atomic_read(&page->_count) <= 0); | ||
82 | atomic_inc(&page->_count); | ||
83 | } | ||
84 | } | ||
85 | |||
40 | extern unsigned long highest_memmap_pfn; | 86 | extern unsigned long highest_memmap_pfn; |
41 | 87 | ||
42 | /* | 88 | /* |
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 2d5755544afe..7af1d5ee1598 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -201,8 +201,8 @@ struct mem_cgroup_eventfd_list { | |||
201 | struct eventfd_ctx *eventfd; | 201 | struct eventfd_ctx *eventfd; |
202 | }; | 202 | }; |
203 | 203 | ||
204 | static void mem_cgroup_threshold(struct mem_cgroup *mem); | 204 | static void mem_cgroup_threshold(struct mem_cgroup *memcg); |
205 | static void mem_cgroup_oom_notify(struct mem_cgroup *mem); | 205 | static void mem_cgroup_oom_notify(struct mem_cgroup *memcg); |
206 | 206 | ||
207 | /* | 207 | /* |
208 | * The memory controller data structure. The memory controller controls both | 208 | * The memory controller data structure. The memory controller controls both |
@@ -362,29 +362,29 @@ enum charge_type { | |||
362 | #define MEM_CGROUP_RECLAIM_SOFT_BIT 0x2 | 362 | #define MEM_CGROUP_RECLAIM_SOFT_BIT 0x2 |
363 | #define MEM_CGROUP_RECLAIM_SOFT (1 << MEM_CGROUP_RECLAIM_SOFT_BIT) | 363 | #define MEM_CGROUP_RECLAIM_SOFT (1 << MEM_CGROUP_RECLAIM_SOFT_BIT) |
364 | 364 | ||
365 | static void mem_cgroup_get(struct mem_cgroup *mem); | 365 | static void mem_cgroup_get(struct mem_cgroup *memcg); |
366 | static void mem_cgroup_put(struct mem_cgroup *mem); | 366 | static void mem_cgroup_put(struct mem_cgroup *memcg); |
367 | static struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *mem); | 367 | static struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg); |
368 | static void drain_all_stock_async(struct mem_cgroup *mem); | 368 | static void drain_all_stock_async(struct mem_cgroup *memcg); |
369 | 369 | ||
370 | static struct mem_cgroup_per_zone * | 370 | static struct mem_cgroup_per_zone * |
371 | mem_cgroup_zoneinfo(struct mem_cgroup *mem, int nid, int zid) | 371 | mem_cgroup_zoneinfo(struct mem_cgroup *memcg, int nid, int zid) |
372 | { | 372 | { |
373 | return &mem->info.nodeinfo[nid]->zoneinfo[zid]; | 373 | return &memcg->info.nodeinfo[nid]->zoneinfo[zid]; |
374 | } | 374 | } |
375 | 375 | ||
376 | struct cgroup_subsys_state *mem_cgroup_css(struct mem_cgroup *mem) | 376 | struct cgroup_subsys_state *mem_cgroup_css(struct mem_cgroup *memcg) |
377 | { | 377 | { |
378 | return &mem->css; | 378 | return &memcg->css; |
379 | } | 379 | } |
380 | 380 | ||
381 | static struct mem_cgroup_per_zone * | 381 | static struct mem_cgroup_per_zone * |
382 | page_cgroup_zoneinfo(struct mem_cgroup *mem, struct page *page) | 382 | page_cgroup_zoneinfo(struct mem_cgroup *memcg, struct page *page) |
383 | { | 383 | { |
384 | int nid = page_to_nid(page); | 384 | int nid = page_to_nid(page); |
385 | int zid = page_zonenum(page); | 385 | int zid = page_zonenum(page); |
386 | 386 | ||
387 | return mem_cgroup_zoneinfo(mem, nid, zid); | 387 | return mem_cgroup_zoneinfo(memcg, nid, zid); |
388 | } | 388 | } |
389 | 389 | ||
390 | static struct mem_cgroup_tree_per_zone * | 390 | static struct mem_cgroup_tree_per_zone * |
@@ -403,7 +403,7 @@ soft_limit_tree_from_page(struct page *page) | |||
403 | } | 403 | } |
404 | 404 | ||
405 | static void | 405 | static void |
406 | __mem_cgroup_insert_exceeded(struct mem_cgroup *mem, | 406 | __mem_cgroup_insert_exceeded(struct mem_cgroup *memcg, |
407 | struct mem_cgroup_per_zone *mz, | 407 | struct mem_cgroup_per_zone *mz, |
408 | struct mem_cgroup_tree_per_zone *mctz, | 408 | struct mem_cgroup_tree_per_zone *mctz, |
409 | unsigned long long new_usage_in_excess) | 409 | unsigned long long new_usage_in_excess) |
@@ -437,7 +437,7 @@ __mem_cgroup_insert_exceeded(struct mem_cgroup *mem, | |||
437 | } | 437 | } |
438 | 438 | ||
439 | static void | 439 | static void |
440 | __mem_cgroup_remove_exceeded(struct mem_cgroup *mem, | 440 | __mem_cgroup_remove_exceeded(struct mem_cgroup *memcg, |
441 | struct mem_cgroup_per_zone *mz, | 441 | struct mem_cgroup_per_zone *mz, |
442 | struct mem_cgroup_tree_per_zone *mctz) | 442 | struct mem_cgroup_tree_per_zone *mctz) |
443 | { | 443 | { |
@@ -448,17 +448,17 @@ __mem_cgroup_remove_exceeded(struct mem_cgroup *mem, | |||
448 | } | 448 | } |
449 | 449 | ||
450 | static void | 450 | static void |
451 | mem_cgroup_remove_exceeded(struct mem_cgroup *mem, | 451 | mem_cgroup_remove_exceeded(struct mem_cgroup *memcg, |
452 | struct mem_cgroup_per_zone *mz, | 452 | struct mem_cgroup_per_zone *mz, |
453 | struct mem_cgroup_tree_per_zone *mctz) | 453 | struct mem_cgroup_tree_per_zone *mctz) |
454 | { | 454 | { |
455 | spin_lock(&mctz->lock); | 455 | spin_lock(&mctz->lock); |
456 | __mem_cgroup_remove_exceeded(mem, mz, mctz); | 456 | __mem_cgroup_remove_exceeded(memcg, mz, mctz); |
457 | spin_unlock(&mctz->lock); | 457 | spin_unlock(&mctz->lock); |
458 | } | 458 | } |
459 | 459 | ||
460 | 460 | ||
461 | static void mem_cgroup_update_tree(struct mem_cgroup *mem, struct page *page) | 461 | static void mem_cgroup_update_tree(struct mem_cgroup *memcg, struct page *page) |
462 | { | 462 | { |
463 | unsigned long long excess; | 463 | unsigned long long excess; |
464 | struct mem_cgroup_per_zone *mz; | 464 | struct mem_cgroup_per_zone *mz; |
@@ -471,9 +471,9 @@ static void mem_cgroup_update_tree(struct mem_cgroup *mem, struct page *page) | |||
471 | * Necessary to update all ancestors when hierarchy is used. | 471 | * Necessary to update all ancestors when hierarchy is used. |
472 | * because their event counter is not touched. | 472 | * because their event counter is not touched. |
473 | */ | 473 | */ |
474 | for (; mem; mem = parent_mem_cgroup(mem)) { | 474 | for (; memcg; memcg = parent_mem_cgroup(memcg)) { |
475 | mz = mem_cgroup_zoneinfo(mem, nid, zid); | 475 | mz = mem_cgroup_zoneinfo(memcg, nid, zid); |
476 | excess = res_counter_soft_limit_excess(&mem->res); | 476 | excess = res_counter_soft_limit_excess(&memcg->res); |
477 | /* | 477 | /* |
478 | * We have to update the tree if mz is on RB-tree or | 478 | * We have to update the tree if mz is on RB-tree or |
479 | * mem is over its softlimit. | 479 | * mem is over its softlimit. |
@@ -482,18 +482,18 @@ static void mem_cgroup_update_tree(struct mem_cgroup *mem, struct page *page) | |||
482 | spin_lock(&mctz->lock); | 482 | spin_lock(&mctz->lock); |
483 | /* if on-tree, remove it */ | 483 | /* if on-tree, remove it */ |
484 | if (mz->on_tree) | 484 | if (mz->on_tree) |
485 | __mem_cgroup_remove_exceeded(mem, mz, mctz); | 485 | __mem_cgroup_remove_exceeded(memcg, mz, mctz); |
486 | /* | 486 | /* |
487 | * Insert again. mz->usage_in_excess will be updated. | 487 | * Insert again. mz->usage_in_excess will be updated. |
488 | * If excess is 0, no tree ops. | 488 | * If excess is 0, no tree ops. |
489 | */ | 489 | */ |
490 | __mem_cgroup_insert_exceeded(mem, mz, mctz, excess); | 490 | __mem_cgroup_insert_exceeded(memcg, mz, mctz, excess); |
491 | spin_unlock(&mctz->lock); | 491 | spin_unlock(&mctz->lock); |
492 | } | 492 | } |
493 | } | 493 | } |
494 | } | 494 | } |
495 | 495 | ||
496 | static void mem_cgroup_remove_from_trees(struct mem_cgroup *mem) | 496 | static void mem_cgroup_remove_from_trees(struct mem_cgroup *memcg) |
497 | { | 497 | { |
498 | int node, zone; | 498 | int node, zone; |
499 | struct mem_cgroup_per_zone *mz; | 499 | struct mem_cgroup_per_zone *mz; |
@@ -501,9 +501,9 @@ static void mem_cgroup_remove_from_trees(struct mem_cgroup *mem) | |||
501 | 501 | ||
502 | for_each_node_state(node, N_POSSIBLE) { | 502 | for_each_node_state(node, N_POSSIBLE) { |
503 | for (zone = 0; zone < MAX_NR_ZONES; zone++) { | 503 | for (zone = 0; zone < MAX_NR_ZONES; zone++) { |
504 | mz = mem_cgroup_zoneinfo(mem, node, zone); | 504 | mz = mem_cgroup_zoneinfo(memcg, node, zone); |
505 | mctz = soft_limit_tree_node_zone(node, zone); | 505 | mctz = soft_limit_tree_node_zone(node, zone); |
506 | mem_cgroup_remove_exceeded(mem, mz, mctz); | 506 | mem_cgroup_remove_exceeded(memcg, mz, mctz); |
507 | } | 507 | } |
508 | } | 508 | } |
509 | } | 509 | } |
@@ -564,7 +564,7 @@ mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_zone *mctz) | |||
564 | * common workload, threashold and synchonization as vmstat[] should be | 564 | * common workload, threashold and synchonization as vmstat[] should be |
565 | * implemented. | 565 | * implemented. |
566 | */ | 566 | */ |
567 | static long mem_cgroup_read_stat(struct mem_cgroup *mem, | 567 | static long mem_cgroup_read_stat(struct mem_cgroup *memcg, |
568 | enum mem_cgroup_stat_index idx) | 568 | enum mem_cgroup_stat_index idx) |
569 | { | 569 | { |
570 | long val = 0; | 570 | long val = 0; |
@@ -572,81 +572,83 @@ static long mem_cgroup_read_stat(struct mem_cgroup *mem, | |||
572 | 572 | ||
573 | get_online_cpus(); | 573 | get_online_cpus(); |
574 | for_each_online_cpu(cpu) | 574 | for_each_online_cpu(cpu) |
575 | val += per_cpu(mem->stat->count[idx], cpu); | 575 | val += per_cpu(memcg->stat->count[idx], cpu); |
576 | #ifdef CONFIG_HOTPLUG_CPU | 576 | #ifdef CONFIG_HOTPLUG_CPU |
577 | spin_lock(&mem->pcp_counter_lock); | 577 | spin_lock(&memcg->pcp_counter_lock); |
578 | val += mem->nocpu_base.count[idx]; | 578 | val += memcg->nocpu_base.count[idx]; |
579 | spin_unlock(&mem->pcp_counter_lock); | 579 | spin_unlock(&memcg->pcp_counter_lock); |
580 | #endif | 580 | #endif |
581 | put_online_cpus(); | 581 | put_online_cpus(); |
582 | return val; | 582 | return val; |
583 | } | 583 | } |
584 | 584 | ||
585 | static void mem_cgroup_swap_statistics(struct mem_cgroup *mem, | 585 | static void mem_cgroup_swap_statistics(struct mem_cgroup *memcg, |
586 | bool charge) | 586 | bool charge) |
587 | { | 587 | { |
588 | int val = (charge) ? 1 : -1; | 588 | int val = (charge) ? 1 : -1; |
589 | this_cpu_add(mem->stat->count[MEM_CGROUP_STAT_SWAPOUT], val); | 589 | this_cpu_add(memcg->stat->count[MEM_CGROUP_STAT_SWAPOUT], val); |
590 | } | 590 | } |
591 | 591 | ||
592 | void mem_cgroup_pgfault(struct mem_cgroup *mem, int val) | 592 | void mem_cgroup_pgfault(struct mem_cgroup *memcg, int val) |
593 | { | 593 | { |
594 | this_cpu_add(mem->stat->events[MEM_CGROUP_EVENTS_PGFAULT], val); | 594 | this_cpu_add(memcg->stat->events[MEM_CGROUP_EVENTS_PGFAULT], val); |
595 | } | 595 | } |
596 | 596 | ||
597 | void mem_cgroup_pgmajfault(struct mem_cgroup *mem, int val) | 597 | void mem_cgroup_pgmajfault(struct mem_cgroup *memcg, int val) |
598 | { | 598 | { |
599 | this_cpu_add(mem->stat->events[MEM_CGROUP_EVENTS_PGMAJFAULT], val); | 599 | this_cpu_add(memcg->stat->events[MEM_CGROUP_EVENTS_PGMAJFAULT], val); |
600 | } | 600 | } |
601 | 601 | ||
602 | static unsigned long mem_cgroup_read_events(struct mem_cgroup *mem, | 602 | static unsigned long mem_cgroup_read_events(struct mem_cgroup *memcg, |
603 | enum mem_cgroup_events_index idx) | 603 | enum mem_cgroup_events_index idx) |
604 | { | 604 | { |
605 | unsigned long val = 0; | 605 | unsigned long val = 0; |
606 | int cpu; | 606 | int cpu; |
607 | 607 | ||
608 | for_each_online_cpu(cpu) | 608 | for_each_online_cpu(cpu) |
609 | val += per_cpu(mem->stat->events[idx], cpu); | 609 | val += per_cpu(memcg->stat->events[idx], cpu); |
610 | #ifdef CONFIG_HOTPLUG_CPU | 610 | #ifdef CONFIG_HOTPLUG_CPU |
611 | spin_lock(&mem->pcp_counter_lock); | 611 | spin_lock(&memcg->pcp_counter_lock); |
612 | val += mem->nocpu_base.events[idx]; | 612 | val += memcg->nocpu_base.events[idx]; |
613 | spin_unlock(&mem->pcp_counter_lock); | 613 | spin_unlock(&memcg->pcp_counter_lock); |
614 | #endif | 614 | #endif |
615 | return val; | 615 | return val; |
616 | } | 616 | } |
617 | 617 | ||
618 | static void mem_cgroup_charge_statistics(struct mem_cgroup *mem, | 618 | static void mem_cgroup_charge_statistics(struct mem_cgroup *memcg, |
619 | bool file, int nr_pages) | 619 | bool file, int nr_pages) |
620 | { | 620 | { |
621 | preempt_disable(); | 621 | preempt_disable(); |
622 | 622 | ||
623 | if (file) | 623 | if (file) |
624 | __this_cpu_add(mem->stat->count[MEM_CGROUP_STAT_CACHE], nr_pages); | 624 | __this_cpu_add(memcg->stat->count[MEM_CGROUP_STAT_CACHE], |
625 | nr_pages); | ||
625 | else | 626 | else |
626 | __this_cpu_add(mem->stat->count[MEM_CGROUP_STAT_RSS], nr_pages); | 627 | __this_cpu_add(memcg->stat->count[MEM_CGROUP_STAT_RSS], |
628 | nr_pages); | ||
627 | 629 | ||
628 | /* pagein of a big page is an event. So, ignore page size */ | 630 | /* pagein of a big page is an event. So, ignore page size */ |
629 | if (nr_pages > 0) | 631 | if (nr_pages > 0) |
630 | __this_cpu_inc(mem->stat->events[MEM_CGROUP_EVENTS_PGPGIN]); | 632 | __this_cpu_inc(memcg->stat->events[MEM_CGROUP_EVENTS_PGPGIN]); |
631 | else { | 633 | else { |
632 | __this_cpu_inc(mem->stat->events[MEM_CGROUP_EVENTS_PGPGOUT]); | 634 | __this_cpu_inc(memcg->stat->events[MEM_CGROUP_EVENTS_PGPGOUT]); |
633 | nr_pages = -nr_pages; /* for event */ | 635 | nr_pages = -nr_pages; /* for event */ |
634 | } | 636 | } |
635 | 637 | ||
636 | __this_cpu_add(mem->stat->events[MEM_CGROUP_EVENTS_COUNT], nr_pages); | 638 | __this_cpu_add(memcg->stat->events[MEM_CGROUP_EVENTS_COUNT], nr_pages); |
637 | 639 | ||
638 | preempt_enable(); | 640 | preempt_enable(); |
639 | } | 641 | } |
640 | 642 | ||
641 | unsigned long | 643 | unsigned long |
642 | mem_cgroup_zone_nr_lru_pages(struct mem_cgroup *mem, int nid, int zid, | 644 | mem_cgroup_zone_nr_lru_pages(struct mem_cgroup *memcg, int nid, int zid, |
643 | unsigned int lru_mask) | 645 | unsigned int lru_mask) |
644 | { | 646 | { |
645 | struct mem_cgroup_per_zone *mz; | 647 | struct mem_cgroup_per_zone *mz; |
646 | enum lru_list l; | 648 | enum lru_list l; |
647 | unsigned long ret = 0; | 649 | unsigned long ret = 0; |
648 | 650 | ||
649 | mz = mem_cgroup_zoneinfo(mem, nid, zid); | 651 | mz = mem_cgroup_zoneinfo(memcg, nid, zid); |
650 | 652 | ||
651 | for_each_lru(l) { | 653 | for_each_lru(l) { |
652 | if (BIT(l) & lru_mask) | 654 | if (BIT(l) & lru_mask) |
@@ -656,44 +658,45 @@ mem_cgroup_zone_nr_lru_pages(struct mem_cgroup *mem, int nid, int zid, | |||
656 | } | 658 | } |
657 | 659 | ||
658 | static unsigned long | 660 | static unsigned long |
659 | mem_cgroup_node_nr_lru_pages(struct mem_cgroup *mem, | 661 | mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg, |
660 | int nid, unsigned int lru_mask) | 662 | int nid, unsigned int lru_mask) |
661 | { | 663 | { |
662 | u64 total = 0; | 664 | u64 total = 0; |
663 | int zid; | 665 | int zid; |
664 | 666 | ||
665 | for (zid = 0; zid < MAX_NR_ZONES; zid++) | 667 | for (zid = 0; zid < MAX_NR_ZONES; zid++) |
666 | total += mem_cgroup_zone_nr_lru_pages(mem, nid, zid, lru_mask); | 668 | total += mem_cgroup_zone_nr_lru_pages(memcg, |
669 | nid, zid, lru_mask); | ||
667 | 670 | ||
668 | return total; | 671 | return total; |
669 | } | 672 | } |
670 | 673 | ||
671 | static unsigned long mem_cgroup_nr_lru_pages(struct mem_cgroup *mem, | 674 | static unsigned long mem_cgroup_nr_lru_pages(struct mem_cgroup *memcg, |
672 | unsigned int lru_mask) | 675 | unsigned int lru_mask) |
673 | { | 676 | { |
674 | int nid; | 677 | int nid; |
675 | u64 total = 0; | 678 | u64 total = 0; |
676 | 679 | ||
677 | for_each_node_state(nid, N_HIGH_MEMORY) | 680 | for_each_node_state(nid, N_HIGH_MEMORY) |
678 | total += mem_cgroup_node_nr_lru_pages(mem, nid, lru_mask); | 681 | total += mem_cgroup_node_nr_lru_pages(memcg, nid, lru_mask); |
679 | return total; | 682 | return total; |
680 | } | 683 | } |
681 | 684 | ||
682 | static bool __memcg_event_check(struct mem_cgroup *mem, int target) | 685 | static bool __memcg_event_check(struct mem_cgroup *memcg, int target) |
683 | { | 686 | { |
684 | unsigned long val, next; | 687 | unsigned long val, next; |
685 | 688 | ||
686 | val = this_cpu_read(mem->stat->events[MEM_CGROUP_EVENTS_COUNT]); | 689 | val = __this_cpu_read(memcg->stat->events[MEM_CGROUP_EVENTS_COUNT]); |
687 | next = this_cpu_read(mem->stat->targets[target]); | 690 | next = __this_cpu_read(memcg->stat->targets[target]); |
688 | /* from time_after() in jiffies.h */ | 691 | /* from time_after() in jiffies.h */ |
689 | return ((long)next - (long)val < 0); | 692 | return ((long)next - (long)val < 0); |
690 | } | 693 | } |
691 | 694 | ||
692 | static void __mem_cgroup_target_update(struct mem_cgroup *mem, int target) | 695 | static void __mem_cgroup_target_update(struct mem_cgroup *memcg, int target) |
693 | { | 696 | { |
694 | unsigned long val, next; | 697 | unsigned long val, next; |
695 | 698 | ||
696 | val = this_cpu_read(mem->stat->events[MEM_CGROUP_EVENTS_COUNT]); | 699 | val = __this_cpu_read(memcg->stat->events[MEM_CGROUP_EVENTS_COUNT]); |
697 | 700 | ||
698 | switch (target) { | 701 | switch (target) { |
699 | case MEM_CGROUP_TARGET_THRESH: | 702 | case MEM_CGROUP_TARGET_THRESH: |
@@ -709,34 +712,36 @@ static void __mem_cgroup_target_update(struct mem_cgroup *mem, int target) | |||
709 | return; | 712 | return; |
710 | } | 713 | } |
711 | 714 | ||
712 | this_cpu_write(mem->stat->targets[target], next); | 715 | __this_cpu_write(memcg->stat->targets[target], next); |
713 | } | 716 | } |
714 | 717 | ||
715 | /* | 718 | /* |
716 | * Check events in order. | 719 | * Check events in order. |
717 | * | 720 | * |
718 | */ | 721 | */ |
719 | static void memcg_check_events(struct mem_cgroup *mem, struct page *page) | 722 | static void memcg_check_events(struct mem_cgroup *memcg, struct page *page) |
720 | { | 723 | { |
724 | preempt_disable(); | ||
721 | /* threshold event is triggered in finer grain than soft limit */ | 725 | /* threshold event is triggered in finer grain than soft limit */ |
722 | if (unlikely(__memcg_event_check(mem, MEM_CGROUP_TARGET_THRESH))) { | 726 | if (unlikely(__memcg_event_check(memcg, MEM_CGROUP_TARGET_THRESH))) { |
723 | mem_cgroup_threshold(mem); | 727 | mem_cgroup_threshold(memcg); |
724 | __mem_cgroup_target_update(mem, MEM_CGROUP_TARGET_THRESH); | 728 | __mem_cgroup_target_update(memcg, MEM_CGROUP_TARGET_THRESH); |
725 | if (unlikely(__memcg_event_check(mem, | 729 | if (unlikely(__memcg_event_check(memcg, |
726 | MEM_CGROUP_TARGET_SOFTLIMIT))) { | 730 | MEM_CGROUP_TARGET_SOFTLIMIT))) { |
727 | mem_cgroup_update_tree(mem, page); | 731 | mem_cgroup_update_tree(memcg, page); |
728 | __mem_cgroup_target_update(mem, | 732 | __mem_cgroup_target_update(memcg, |
729 | MEM_CGROUP_TARGET_SOFTLIMIT); | 733 | MEM_CGROUP_TARGET_SOFTLIMIT); |
730 | } | 734 | } |
731 | #if MAX_NUMNODES > 1 | 735 | #if MAX_NUMNODES > 1 |
732 | if (unlikely(__memcg_event_check(mem, | 736 | if (unlikely(__memcg_event_check(memcg, |
733 | MEM_CGROUP_TARGET_NUMAINFO))) { | 737 | MEM_CGROUP_TARGET_NUMAINFO))) { |
734 | atomic_inc(&mem->numainfo_events); | 738 | atomic_inc(&memcg->numainfo_events); |
735 | __mem_cgroup_target_update(mem, | 739 | __mem_cgroup_target_update(memcg, |
736 | MEM_CGROUP_TARGET_NUMAINFO); | 740 | MEM_CGROUP_TARGET_NUMAINFO); |
737 | } | 741 | } |
738 | #endif | 742 | #endif |
739 | } | 743 | } |
744 | preempt_enable(); | ||
740 | } | 745 | } |
741 | 746 | ||
742 | static struct mem_cgroup *mem_cgroup_from_cont(struct cgroup *cont) | 747 | static struct mem_cgroup *mem_cgroup_from_cont(struct cgroup *cont) |
@@ -762,7 +767,7 @@ struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p) | |||
762 | 767 | ||
763 | struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm) | 768 | struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm) |
764 | { | 769 | { |
765 | struct mem_cgroup *mem = NULL; | 770 | struct mem_cgroup *memcg = NULL; |
766 | 771 | ||
767 | if (!mm) | 772 | if (!mm) |
768 | return NULL; | 773 | return NULL; |
@@ -773,25 +778,25 @@ struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm) | |||
773 | */ | 778 | */ |
774 | rcu_read_lock(); | 779 | rcu_read_lock(); |
775 | do { | 780 | do { |
776 | mem = mem_cgroup_from_task(rcu_dereference(mm->owner)); | 781 | memcg = mem_cgroup_from_task(rcu_dereference(mm->owner)); |
777 | if (unlikely(!mem)) | 782 | if (unlikely(!memcg)) |
778 | break; | 783 | break; |
779 | } while (!css_tryget(&mem->css)); | 784 | } while (!css_tryget(&memcg->css)); |
780 | rcu_read_unlock(); | 785 | rcu_read_unlock(); |
781 | return mem; | 786 | return memcg; |
782 | } | 787 | } |
783 | 788 | ||
784 | /* The caller has to guarantee "mem" exists before calling this */ | 789 | /* The caller has to guarantee "mem" exists before calling this */ |
785 | static struct mem_cgroup *mem_cgroup_start_loop(struct mem_cgroup *mem) | 790 | static struct mem_cgroup *mem_cgroup_start_loop(struct mem_cgroup *memcg) |
786 | { | 791 | { |
787 | struct cgroup_subsys_state *css; | 792 | struct cgroup_subsys_state *css; |
788 | int found; | 793 | int found; |
789 | 794 | ||
790 | if (!mem) /* ROOT cgroup has the smallest ID */ | 795 | if (!memcg) /* ROOT cgroup has the smallest ID */ |
791 | return root_mem_cgroup; /*css_put/get against root is ignored*/ | 796 | return root_mem_cgroup; /*css_put/get against root is ignored*/ |
792 | if (!mem->use_hierarchy) { | 797 | if (!memcg->use_hierarchy) { |
793 | if (css_tryget(&mem->css)) | 798 | if (css_tryget(&memcg->css)) |
794 | return mem; | 799 | return memcg; |
795 | return NULL; | 800 | return NULL; |
796 | } | 801 | } |
797 | rcu_read_lock(); | 802 | rcu_read_lock(); |
@@ -799,13 +804,13 @@ static struct mem_cgroup *mem_cgroup_start_loop(struct mem_cgroup *mem) | |||
799 | * searching a memory cgroup which has the smallest ID under given | 804 | * searching a memory cgroup which has the smallest ID under given |
800 | * ROOT cgroup. (ID >= 1) | 805 | * ROOT cgroup. (ID >= 1) |
801 | */ | 806 | */ |
802 | css = css_get_next(&mem_cgroup_subsys, 1, &mem->css, &found); | 807 | css = css_get_next(&mem_cgroup_subsys, 1, &memcg->css, &found); |
803 | if (css && css_tryget(css)) | 808 | if (css && css_tryget(css)) |
804 | mem = container_of(css, struct mem_cgroup, css); | 809 | memcg = container_of(css, struct mem_cgroup, css); |
805 | else | 810 | else |
806 | mem = NULL; | 811 | memcg = NULL; |
807 | rcu_read_unlock(); | 812 | rcu_read_unlock(); |
808 | return mem; | 813 | return memcg; |
809 | } | 814 | } |
810 | 815 | ||
811 | static struct mem_cgroup *mem_cgroup_get_next(struct mem_cgroup *iter, | 816 | static struct mem_cgroup *mem_cgroup_get_next(struct mem_cgroup *iter, |
@@ -859,29 +864,29 @@ static struct mem_cgroup *mem_cgroup_get_next(struct mem_cgroup *iter, | |||
859 | for_each_mem_cgroup_tree_cond(iter, NULL, true) | 864 | for_each_mem_cgroup_tree_cond(iter, NULL, true) |
860 | 865 | ||
861 | 866 | ||
862 | static inline bool mem_cgroup_is_root(struct mem_cgroup *mem) | 867 | static inline bool mem_cgroup_is_root(struct mem_cgroup *memcg) |
863 | { | 868 | { |
864 | return (mem == root_mem_cgroup); | 869 | return (memcg == root_mem_cgroup); |
865 | } | 870 | } |
866 | 871 | ||
867 | void mem_cgroup_count_vm_event(struct mm_struct *mm, enum vm_event_item idx) | 872 | void mem_cgroup_count_vm_event(struct mm_struct *mm, enum vm_event_item idx) |
868 | { | 873 | { |
869 | struct mem_cgroup *mem; | 874 | struct mem_cgroup *memcg; |
870 | 875 | ||
871 | if (!mm) | 876 | if (!mm) |
872 | return; | 877 | return; |
873 | 878 | ||
874 | rcu_read_lock(); | 879 | rcu_read_lock(); |
875 | mem = mem_cgroup_from_task(rcu_dereference(mm->owner)); | 880 | memcg = mem_cgroup_from_task(rcu_dereference(mm->owner)); |
876 | if (unlikely(!mem)) | 881 | if (unlikely(!memcg)) |
877 | goto out; | 882 | goto out; |
878 | 883 | ||
879 | switch (idx) { | 884 | switch (idx) { |
880 | case PGMAJFAULT: | 885 | case PGMAJFAULT: |
881 | mem_cgroup_pgmajfault(mem, 1); | 886 | mem_cgroup_pgmajfault(memcg, 1); |
882 | break; | 887 | break; |
883 | case PGFAULT: | 888 | case PGFAULT: |
884 | mem_cgroup_pgfault(mem, 1); | 889 | mem_cgroup_pgfault(memcg, 1); |
885 | break; | 890 | break; |
886 | default: | 891 | default: |
887 | BUG(); | 892 | BUG(); |
@@ -990,6 +995,16 @@ void mem_cgroup_add_lru_list(struct page *page, enum lru_list lru) | |||
990 | return; | 995 | return; |
991 | pc = lookup_page_cgroup(page); | 996 | pc = lookup_page_cgroup(page); |
992 | VM_BUG_ON(PageCgroupAcctLRU(pc)); | 997 | VM_BUG_ON(PageCgroupAcctLRU(pc)); |
998 | /* | ||
999 | * putback: charge: | ||
1000 | * SetPageLRU SetPageCgroupUsed | ||
1001 | * smp_mb smp_mb | ||
1002 | * PageCgroupUsed && add to memcg LRU PageLRU && add to memcg LRU | ||
1003 | * | ||
1004 | * Ensure that one of the two sides adds the page to the memcg | ||
1005 | * LRU during a race. | ||
1006 | */ | ||
1007 | smp_mb(); | ||
993 | if (!PageCgroupUsed(pc)) | 1008 | if (!PageCgroupUsed(pc)) |
994 | return; | 1009 | return; |
995 | /* Ensure pc->mem_cgroup is visible after reading PCG_USED. */ | 1010 | /* Ensure pc->mem_cgroup is visible after reading PCG_USED. */ |
@@ -1041,7 +1056,16 @@ static void mem_cgroup_lru_add_after_commit(struct page *page) | |||
1041 | unsigned long flags; | 1056 | unsigned long flags; |
1042 | struct zone *zone = page_zone(page); | 1057 | struct zone *zone = page_zone(page); |
1043 | struct page_cgroup *pc = lookup_page_cgroup(page); | 1058 | struct page_cgroup *pc = lookup_page_cgroup(page); |
1044 | 1059 | /* | |
1060 | * putback: charge: | ||
1061 | * SetPageLRU SetPageCgroupUsed | ||
1062 | * smp_mb smp_mb | ||
1063 | * PageCgroupUsed && add to memcg LRU PageLRU && add to memcg LRU | ||
1064 | * | ||
1065 | * Ensure that one of the two sides adds the page to the memcg | ||
1066 | * LRU during a race. | ||
1067 | */ | ||
1068 | smp_mb(); | ||
1045 | /* taking care of that the page is added to LRU while we commit it */ | 1069 | /* taking care of that the page is added to LRU while we commit it */ |
1046 | if (likely(!PageLRU(page))) | 1070 | if (likely(!PageLRU(page))) |
1047 | return; | 1071 | return; |
@@ -1063,21 +1087,21 @@ void mem_cgroup_move_lists(struct page *page, | |||
1063 | } | 1087 | } |
1064 | 1088 | ||
1065 | /* | 1089 | /* |
1066 | * Checks whether given mem is same or in the root_mem's | 1090 | * Checks whether given mem is same or in the root_mem_cgroup's |
1067 | * hierarchy subtree | 1091 | * hierarchy subtree |
1068 | */ | 1092 | */ |
1069 | static bool mem_cgroup_same_or_subtree(const struct mem_cgroup *root_mem, | 1093 | static bool mem_cgroup_same_or_subtree(const struct mem_cgroup *root_memcg, |
1070 | struct mem_cgroup *mem) | 1094 | struct mem_cgroup *memcg) |
1071 | { | 1095 | { |
1072 | if (root_mem != mem) { | 1096 | if (root_memcg != memcg) { |
1073 | return (root_mem->use_hierarchy && | 1097 | return (root_memcg->use_hierarchy && |
1074 | css_is_ancestor(&mem->css, &root_mem->css)); | 1098 | css_is_ancestor(&memcg->css, &root_memcg->css)); |
1075 | } | 1099 | } |
1076 | 1100 | ||
1077 | return true; | 1101 | return true; |
1078 | } | 1102 | } |
1079 | 1103 | ||
1080 | int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem) | 1104 | int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *memcg) |
1081 | { | 1105 | { |
1082 | int ret; | 1106 | int ret; |
1083 | struct mem_cgroup *curr = NULL; | 1107 | struct mem_cgroup *curr = NULL; |
@@ -1091,25 +1115,29 @@ int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem) | |||
1091 | if (!curr) | 1115 | if (!curr) |
1092 | return 0; | 1116 | return 0; |
1093 | /* | 1117 | /* |
1094 | * We should check use_hierarchy of "mem" not "curr". Because checking | 1118 | * We should check use_hierarchy of "memcg" not "curr". Because checking |
1095 | * use_hierarchy of "curr" here make this function true if hierarchy is | 1119 | * use_hierarchy of "curr" here make this function true if hierarchy is |
1096 | * enabled in "curr" and "curr" is a child of "mem" in *cgroup* | 1120 | * enabled in "curr" and "curr" is a child of "memcg" in *cgroup* |
1097 | * hierarchy(even if use_hierarchy is disabled in "mem"). | 1121 | * hierarchy(even if use_hierarchy is disabled in "memcg"). |
1098 | */ | 1122 | */ |
1099 | ret = mem_cgroup_same_or_subtree(mem, curr); | 1123 | ret = mem_cgroup_same_or_subtree(memcg, curr); |
1100 | css_put(&curr->css); | 1124 | css_put(&curr->css); |
1101 | return ret; | 1125 | return ret; |
1102 | } | 1126 | } |
1103 | 1127 | ||
1104 | static int calc_inactive_ratio(struct mem_cgroup *memcg, unsigned long *present_pages) | 1128 | int mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg, struct zone *zone) |
1105 | { | 1129 | { |
1106 | unsigned long active; | 1130 | unsigned long inactive_ratio; |
1131 | int nid = zone_to_nid(zone); | ||
1132 | int zid = zone_idx(zone); | ||
1107 | unsigned long inactive; | 1133 | unsigned long inactive; |
1134 | unsigned long active; | ||
1108 | unsigned long gb; | 1135 | unsigned long gb; |
1109 | unsigned long inactive_ratio; | ||
1110 | 1136 | ||
1111 | inactive = mem_cgroup_nr_lru_pages(memcg, BIT(LRU_INACTIVE_ANON)); | 1137 | inactive = mem_cgroup_zone_nr_lru_pages(memcg, nid, zid, |
1112 | active = mem_cgroup_nr_lru_pages(memcg, BIT(LRU_ACTIVE_ANON)); | 1138 | BIT(LRU_INACTIVE_ANON)); |
1139 | active = mem_cgroup_zone_nr_lru_pages(memcg, nid, zid, | ||
1140 | BIT(LRU_ACTIVE_ANON)); | ||
1113 | 1141 | ||
1114 | gb = (inactive + active) >> (30 - PAGE_SHIFT); | 1142 | gb = (inactive + active) >> (30 - PAGE_SHIFT); |
1115 | if (gb) | 1143 | if (gb) |
@@ -1117,39 +1145,20 @@ static int calc_inactive_ratio(struct mem_cgroup *memcg, unsigned long *present_ | |||
1117 | else | 1145 | else |
1118 | inactive_ratio = 1; | 1146 | inactive_ratio = 1; |
1119 | 1147 | ||
1120 | if (present_pages) { | 1148 | return inactive * inactive_ratio < active; |
1121 | present_pages[0] = inactive; | ||
1122 | present_pages[1] = active; | ||
1123 | } | ||
1124 | |||
1125 | return inactive_ratio; | ||
1126 | } | 1149 | } |
1127 | 1150 | ||
1128 | int mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg) | 1151 | int mem_cgroup_inactive_file_is_low(struct mem_cgroup *memcg, struct zone *zone) |
1129 | { | ||
1130 | unsigned long active; | ||
1131 | unsigned long inactive; | ||
1132 | unsigned long present_pages[2]; | ||
1133 | unsigned long inactive_ratio; | ||
1134 | |||
1135 | inactive_ratio = calc_inactive_ratio(memcg, present_pages); | ||
1136 | |||
1137 | inactive = present_pages[0]; | ||
1138 | active = present_pages[1]; | ||
1139 | |||
1140 | if (inactive * inactive_ratio < active) | ||
1141 | return 1; | ||
1142 | |||
1143 | return 0; | ||
1144 | } | ||
1145 | |||
1146 | int mem_cgroup_inactive_file_is_low(struct mem_cgroup *memcg) | ||
1147 | { | 1152 | { |
1148 | unsigned long active; | 1153 | unsigned long active; |
1149 | unsigned long inactive; | 1154 | unsigned long inactive; |
1155 | int zid = zone_idx(zone); | ||
1156 | int nid = zone_to_nid(zone); | ||
1150 | 1157 | ||
1151 | inactive = mem_cgroup_nr_lru_pages(memcg, BIT(LRU_INACTIVE_FILE)); | 1158 | inactive = mem_cgroup_zone_nr_lru_pages(memcg, nid, zid, |
1152 | active = mem_cgroup_nr_lru_pages(memcg, BIT(LRU_ACTIVE_FILE)); | 1159 | BIT(LRU_INACTIVE_FILE)); |
1160 | active = mem_cgroup_zone_nr_lru_pages(memcg, nid, zid, | ||
1161 | BIT(LRU_ACTIVE_FILE)); | ||
1153 | 1162 | ||
1154 | return (active > inactive); | 1163 | return (active > inactive); |
1155 | } | 1164 | } |
@@ -1254,13 +1263,13 @@ unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan, | |||
1254 | * Returns the maximum amount of memory @mem can be charged with, in | 1263 | * Returns the maximum amount of memory @mem can be charged with, in |
1255 | * pages. | 1264 | * pages. |
1256 | */ | 1265 | */ |
1257 | static unsigned long mem_cgroup_margin(struct mem_cgroup *mem) | 1266 | static unsigned long mem_cgroup_margin(struct mem_cgroup *memcg) |
1258 | { | 1267 | { |
1259 | unsigned long long margin; | 1268 | unsigned long long margin; |
1260 | 1269 | ||
1261 | margin = res_counter_margin(&mem->res); | 1270 | margin = res_counter_margin(&memcg->res); |
1262 | if (do_swap_account) | 1271 | if (do_swap_account) |
1263 | margin = min(margin, res_counter_margin(&mem->memsw)); | 1272 | margin = min(margin, res_counter_margin(&memcg->memsw)); |
1264 | return margin >> PAGE_SHIFT; | 1273 | return margin >> PAGE_SHIFT; |
1265 | } | 1274 | } |
1266 | 1275 | ||
@@ -1275,33 +1284,33 @@ int mem_cgroup_swappiness(struct mem_cgroup *memcg) | |||
1275 | return memcg->swappiness; | 1284 | return memcg->swappiness; |
1276 | } | 1285 | } |
1277 | 1286 | ||
1278 | static void mem_cgroup_start_move(struct mem_cgroup *mem) | 1287 | static void mem_cgroup_start_move(struct mem_cgroup *memcg) |
1279 | { | 1288 | { |
1280 | int cpu; | 1289 | int cpu; |
1281 | 1290 | ||
1282 | get_online_cpus(); | 1291 | get_online_cpus(); |
1283 | spin_lock(&mem->pcp_counter_lock); | 1292 | spin_lock(&memcg->pcp_counter_lock); |
1284 | for_each_online_cpu(cpu) | 1293 | for_each_online_cpu(cpu) |
1285 | per_cpu(mem->stat->count[MEM_CGROUP_ON_MOVE], cpu) += 1; | 1294 | per_cpu(memcg->stat->count[MEM_CGROUP_ON_MOVE], cpu) += 1; |
1286 | mem->nocpu_base.count[MEM_CGROUP_ON_MOVE] += 1; | 1295 | memcg->nocpu_base.count[MEM_CGROUP_ON_MOVE] += 1; |
1287 | spin_unlock(&mem->pcp_counter_lock); | 1296 | spin_unlock(&memcg->pcp_counter_lock); |
1288 | put_online_cpus(); | 1297 | put_online_cpus(); |
1289 | 1298 | ||
1290 | synchronize_rcu(); | 1299 | synchronize_rcu(); |
1291 | } | 1300 | } |
1292 | 1301 | ||
1293 | static void mem_cgroup_end_move(struct mem_cgroup *mem) | 1302 | static void mem_cgroup_end_move(struct mem_cgroup *memcg) |
1294 | { | 1303 | { |
1295 | int cpu; | 1304 | int cpu; |
1296 | 1305 | ||
1297 | if (!mem) | 1306 | if (!memcg) |
1298 | return; | 1307 | return; |
1299 | get_online_cpus(); | 1308 | get_online_cpus(); |
1300 | spin_lock(&mem->pcp_counter_lock); | 1309 | spin_lock(&memcg->pcp_counter_lock); |
1301 | for_each_online_cpu(cpu) | 1310 | for_each_online_cpu(cpu) |
1302 | per_cpu(mem->stat->count[MEM_CGROUP_ON_MOVE], cpu) -= 1; | 1311 | per_cpu(memcg->stat->count[MEM_CGROUP_ON_MOVE], cpu) -= 1; |
1303 | mem->nocpu_base.count[MEM_CGROUP_ON_MOVE] -= 1; | 1312 | memcg->nocpu_base.count[MEM_CGROUP_ON_MOVE] -= 1; |
1304 | spin_unlock(&mem->pcp_counter_lock); | 1313 | spin_unlock(&memcg->pcp_counter_lock); |
1305 | put_online_cpus(); | 1314 | put_online_cpus(); |
1306 | } | 1315 | } |
1307 | /* | 1316 | /* |
@@ -1316,13 +1325,13 @@ static void mem_cgroup_end_move(struct mem_cgroup *mem) | |||
1316 | * waiting at hith-memory prressure caused by "move". | 1325 | * waiting at hith-memory prressure caused by "move". |
1317 | */ | 1326 | */ |
1318 | 1327 | ||
1319 | static bool mem_cgroup_stealed(struct mem_cgroup *mem) | 1328 | static bool mem_cgroup_stealed(struct mem_cgroup *memcg) |
1320 | { | 1329 | { |
1321 | VM_BUG_ON(!rcu_read_lock_held()); | 1330 | VM_BUG_ON(!rcu_read_lock_held()); |
1322 | return this_cpu_read(mem->stat->count[MEM_CGROUP_ON_MOVE]) > 0; | 1331 | return this_cpu_read(memcg->stat->count[MEM_CGROUP_ON_MOVE]) > 0; |
1323 | } | 1332 | } |
1324 | 1333 | ||
1325 | static bool mem_cgroup_under_move(struct mem_cgroup *mem) | 1334 | static bool mem_cgroup_under_move(struct mem_cgroup *memcg) |
1326 | { | 1335 | { |
1327 | struct mem_cgroup *from; | 1336 | struct mem_cgroup *from; |
1328 | struct mem_cgroup *to; | 1337 | struct mem_cgroup *to; |
@@ -1337,17 +1346,17 @@ static bool mem_cgroup_under_move(struct mem_cgroup *mem) | |||
1337 | if (!from) | 1346 | if (!from) |
1338 | goto unlock; | 1347 | goto unlock; |
1339 | 1348 | ||
1340 | ret = mem_cgroup_same_or_subtree(mem, from) | 1349 | ret = mem_cgroup_same_or_subtree(memcg, from) |
1341 | || mem_cgroup_same_or_subtree(mem, to); | 1350 | || mem_cgroup_same_or_subtree(memcg, to); |
1342 | unlock: | 1351 | unlock: |
1343 | spin_unlock(&mc.lock); | 1352 | spin_unlock(&mc.lock); |
1344 | return ret; | 1353 | return ret; |
1345 | } | 1354 | } |
1346 | 1355 | ||
1347 | static bool mem_cgroup_wait_acct_move(struct mem_cgroup *mem) | 1356 | static bool mem_cgroup_wait_acct_move(struct mem_cgroup *memcg) |
1348 | { | 1357 | { |
1349 | if (mc.moving_task && current != mc.moving_task) { | 1358 | if (mc.moving_task && current != mc.moving_task) { |
1350 | if (mem_cgroup_under_move(mem)) { | 1359 | if (mem_cgroup_under_move(memcg)) { |
1351 | DEFINE_WAIT(wait); | 1360 | DEFINE_WAIT(wait); |
1352 | prepare_to_wait(&mc.waitq, &wait, TASK_INTERRUPTIBLE); | 1361 | prepare_to_wait(&mc.waitq, &wait, TASK_INTERRUPTIBLE); |
1353 | /* moving charge context might have finished. */ | 1362 | /* moving charge context might have finished. */ |
@@ -1431,12 +1440,12 @@ done: | |||
1431 | * This function returns the number of memcg under hierarchy tree. Returns | 1440 | * This function returns the number of memcg under hierarchy tree. Returns |
1432 | * 1(self count) if no children. | 1441 | * 1(self count) if no children. |
1433 | */ | 1442 | */ |
1434 | static int mem_cgroup_count_children(struct mem_cgroup *mem) | 1443 | static int mem_cgroup_count_children(struct mem_cgroup *memcg) |
1435 | { | 1444 | { |
1436 | int num = 0; | 1445 | int num = 0; |
1437 | struct mem_cgroup *iter; | 1446 | struct mem_cgroup *iter; |
1438 | 1447 | ||
1439 | for_each_mem_cgroup_tree(iter, mem) | 1448 | for_each_mem_cgroup_tree(iter, memcg) |
1440 | num++; | 1449 | num++; |
1441 | return num; | 1450 | return num; |
1442 | } | 1451 | } |
@@ -1466,21 +1475,21 @@ u64 mem_cgroup_get_limit(struct mem_cgroup *memcg) | |||
1466 | * that to reclaim free pages from. | 1475 | * that to reclaim free pages from. |
1467 | */ | 1476 | */ |
1468 | static struct mem_cgroup * | 1477 | static struct mem_cgroup * |
1469 | mem_cgroup_select_victim(struct mem_cgroup *root_mem) | 1478 | mem_cgroup_select_victim(struct mem_cgroup *root_memcg) |
1470 | { | 1479 | { |
1471 | struct mem_cgroup *ret = NULL; | 1480 | struct mem_cgroup *ret = NULL; |
1472 | struct cgroup_subsys_state *css; | 1481 | struct cgroup_subsys_state *css; |
1473 | int nextid, found; | 1482 | int nextid, found; |
1474 | 1483 | ||
1475 | if (!root_mem->use_hierarchy) { | 1484 | if (!root_memcg->use_hierarchy) { |
1476 | css_get(&root_mem->css); | 1485 | css_get(&root_memcg->css); |
1477 | ret = root_mem; | 1486 | ret = root_memcg; |
1478 | } | 1487 | } |
1479 | 1488 | ||
1480 | while (!ret) { | 1489 | while (!ret) { |
1481 | rcu_read_lock(); | 1490 | rcu_read_lock(); |
1482 | nextid = root_mem->last_scanned_child + 1; | 1491 | nextid = root_memcg->last_scanned_child + 1; |
1483 | css = css_get_next(&mem_cgroup_subsys, nextid, &root_mem->css, | 1492 | css = css_get_next(&mem_cgroup_subsys, nextid, &root_memcg->css, |
1484 | &found); | 1493 | &found); |
1485 | if (css && css_tryget(css)) | 1494 | if (css && css_tryget(css)) |
1486 | ret = container_of(css, struct mem_cgroup, css); | 1495 | ret = container_of(css, struct mem_cgroup, css); |
@@ -1489,9 +1498,9 @@ mem_cgroup_select_victim(struct mem_cgroup *root_mem) | |||
1489 | /* Updates scanning parameter */ | 1498 | /* Updates scanning parameter */ |
1490 | if (!css) { | 1499 | if (!css) { |
1491 | /* this means start scan from ID:1 */ | 1500 | /* this means start scan from ID:1 */ |
1492 | root_mem->last_scanned_child = 0; | 1501 | root_memcg->last_scanned_child = 0; |
1493 | } else | 1502 | } else |
1494 | root_mem->last_scanned_child = found; | 1503 | root_memcg->last_scanned_child = found; |
1495 | } | 1504 | } |
1496 | 1505 | ||
1497 | return ret; | 1506 | return ret; |
@@ -1507,14 +1516,14 @@ mem_cgroup_select_victim(struct mem_cgroup *root_mem) | |||
1507 | * reclaimable pages on a node. Returns true if there are any reclaimable | 1516 | * reclaimable pages on a node. Returns true if there are any reclaimable |
1508 | * pages in the node. | 1517 | * pages in the node. |
1509 | */ | 1518 | */ |
1510 | static bool test_mem_cgroup_node_reclaimable(struct mem_cgroup *mem, | 1519 | static bool test_mem_cgroup_node_reclaimable(struct mem_cgroup *memcg, |
1511 | int nid, bool noswap) | 1520 | int nid, bool noswap) |
1512 | { | 1521 | { |
1513 | if (mem_cgroup_node_nr_lru_pages(mem, nid, LRU_ALL_FILE)) | 1522 | if (mem_cgroup_node_nr_lru_pages(memcg, nid, LRU_ALL_FILE)) |
1514 | return true; | 1523 | return true; |
1515 | if (noswap || !total_swap_pages) | 1524 | if (noswap || !total_swap_pages) |
1516 | return false; | 1525 | return false; |
1517 | if (mem_cgroup_node_nr_lru_pages(mem, nid, LRU_ALL_ANON)) | 1526 | if (mem_cgroup_node_nr_lru_pages(memcg, nid, LRU_ALL_ANON)) |
1518 | return true; | 1527 | return true; |
1519 | return false; | 1528 | return false; |
1520 | 1529 | ||
@@ -1527,29 +1536,29 @@ static bool test_mem_cgroup_node_reclaimable(struct mem_cgroup *mem, | |||
1527 | * nodes based on the zonelist. So update the list loosely once per 10 secs. | 1536 | * nodes based on the zonelist. So update the list loosely once per 10 secs. |
1528 | * | 1537 | * |
1529 | */ | 1538 | */ |
1530 | static void mem_cgroup_may_update_nodemask(struct mem_cgroup *mem) | 1539 | static void mem_cgroup_may_update_nodemask(struct mem_cgroup *memcg) |
1531 | { | 1540 | { |
1532 | int nid; | 1541 | int nid; |
1533 | /* | 1542 | /* |
1534 | * numainfo_events > 0 means there was at least NUMAINFO_EVENTS_TARGET | 1543 | * numainfo_events > 0 means there was at least NUMAINFO_EVENTS_TARGET |
1535 | * pagein/pageout changes since the last update. | 1544 | * pagein/pageout changes since the last update. |
1536 | */ | 1545 | */ |
1537 | if (!atomic_read(&mem->numainfo_events)) | 1546 | if (!atomic_read(&memcg->numainfo_events)) |
1538 | return; | 1547 | return; |
1539 | if (atomic_inc_return(&mem->numainfo_updating) > 1) | 1548 | if (atomic_inc_return(&memcg->numainfo_updating) > 1) |
1540 | return; | 1549 | return; |
1541 | 1550 | ||
1542 | /* make a nodemask where this memcg uses memory from */ | 1551 | /* make a nodemask where this memcg uses memory from */ |
1543 | mem->scan_nodes = node_states[N_HIGH_MEMORY]; | 1552 | memcg->scan_nodes = node_states[N_HIGH_MEMORY]; |
1544 | 1553 | ||
1545 | for_each_node_mask(nid, node_states[N_HIGH_MEMORY]) { | 1554 | for_each_node_mask(nid, node_states[N_HIGH_MEMORY]) { |
1546 | 1555 | ||
1547 | if (!test_mem_cgroup_node_reclaimable(mem, nid, false)) | 1556 | if (!test_mem_cgroup_node_reclaimable(memcg, nid, false)) |
1548 | node_clear(nid, mem->scan_nodes); | 1557 | node_clear(nid, memcg->scan_nodes); |
1549 | } | 1558 | } |
1550 | 1559 | ||
1551 | atomic_set(&mem->numainfo_events, 0); | 1560 | atomic_set(&memcg->numainfo_events, 0); |
1552 | atomic_set(&mem->numainfo_updating, 0); | 1561 | atomic_set(&memcg->numainfo_updating, 0); |
1553 | } | 1562 | } |
1554 | 1563 | ||
1555 | /* | 1564 | /* |
@@ -1564,16 +1573,16 @@ static void mem_cgroup_may_update_nodemask(struct mem_cgroup *mem) | |||
1564 | * | 1573 | * |
1565 | * Now, we use round-robin. Better algorithm is welcomed. | 1574 | * Now, we use round-robin. Better algorithm is welcomed. |
1566 | */ | 1575 | */ |
1567 | int mem_cgroup_select_victim_node(struct mem_cgroup *mem) | 1576 | int mem_cgroup_select_victim_node(struct mem_cgroup *memcg) |
1568 | { | 1577 | { |
1569 | int node; | 1578 | int node; |
1570 | 1579 | ||
1571 | mem_cgroup_may_update_nodemask(mem); | 1580 | mem_cgroup_may_update_nodemask(memcg); |
1572 | node = mem->last_scanned_node; | 1581 | node = memcg->last_scanned_node; |
1573 | 1582 | ||
1574 | node = next_node(node, mem->scan_nodes); | 1583 | node = next_node(node, memcg->scan_nodes); |
1575 | if (node == MAX_NUMNODES) | 1584 | if (node == MAX_NUMNODES) |
1576 | node = first_node(mem->scan_nodes); | 1585 | node = first_node(memcg->scan_nodes); |
1577 | /* | 1586 | /* |
1578 | * We call this when we hit limit, not when pages are added to LRU. | 1587 | * We call this when we hit limit, not when pages are added to LRU. |
1579 | * No LRU may hold pages because all pages are UNEVICTABLE or | 1588 | * No LRU may hold pages because all pages are UNEVICTABLE or |
@@ -1583,7 +1592,7 @@ int mem_cgroup_select_victim_node(struct mem_cgroup *mem) | |||
1583 | if (unlikely(node == MAX_NUMNODES)) | 1592 | if (unlikely(node == MAX_NUMNODES)) |
1584 | node = numa_node_id(); | 1593 | node = numa_node_id(); |
1585 | 1594 | ||
1586 | mem->last_scanned_node = node; | 1595 | memcg->last_scanned_node = node; |
1587 | return node; | 1596 | return node; |
1588 | } | 1597 | } |
1589 | 1598 | ||
@@ -1593,7 +1602,7 @@ int mem_cgroup_select_victim_node(struct mem_cgroup *mem) | |||
1593 | * unused nodes. But scan_nodes is lazily updated and may not cotain | 1602 | * unused nodes. But scan_nodes is lazily updated and may not cotain |
1594 | * enough new information. We need to do double check. | 1603 | * enough new information. We need to do double check. |
1595 | */ | 1604 | */ |
1596 | bool mem_cgroup_reclaimable(struct mem_cgroup *mem, bool noswap) | 1605 | bool mem_cgroup_reclaimable(struct mem_cgroup *memcg, bool noswap) |
1597 | { | 1606 | { |
1598 | int nid; | 1607 | int nid; |
1599 | 1608 | ||
@@ -1601,12 +1610,12 @@ bool mem_cgroup_reclaimable(struct mem_cgroup *mem, bool noswap) | |||
1601 | * quick check...making use of scan_node. | 1610 | * quick check...making use of scan_node. |
1602 | * We can skip unused nodes. | 1611 | * We can skip unused nodes. |
1603 | */ | 1612 | */ |
1604 | if (!nodes_empty(mem->scan_nodes)) { | 1613 | if (!nodes_empty(memcg->scan_nodes)) { |
1605 | for (nid = first_node(mem->scan_nodes); | 1614 | for (nid = first_node(memcg->scan_nodes); |
1606 | nid < MAX_NUMNODES; | 1615 | nid < MAX_NUMNODES; |
1607 | nid = next_node(nid, mem->scan_nodes)) { | 1616 | nid = next_node(nid, memcg->scan_nodes)) { |
1608 | 1617 | ||
1609 | if (test_mem_cgroup_node_reclaimable(mem, nid, noswap)) | 1618 | if (test_mem_cgroup_node_reclaimable(memcg, nid, noswap)) |
1610 | return true; | 1619 | return true; |
1611 | } | 1620 | } |
1612 | } | 1621 | } |
@@ -1614,23 +1623,23 @@ bool mem_cgroup_reclaimable(struct mem_cgroup *mem, bool noswap) | |||
1614 | * Check rest of nodes. | 1623 | * Check rest of nodes. |
1615 | */ | 1624 | */ |
1616 | for_each_node_state(nid, N_HIGH_MEMORY) { | 1625 | for_each_node_state(nid, N_HIGH_MEMORY) { |
1617 | if (node_isset(nid, mem->scan_nodes)) | 1626 | if (node_isset(nid, memcg->scan_nodes)) |
1618 | continue; | 1627 | continue; |
1619 | if (test_mem_cgroup_node_reclaimable(mem, nid, noswap)) | 1628 | if (test_mem_cgroup_node_reclaimable(memcg, nid, noswap)) |
1620 | return true; | 1629 | return true; |
1621 | } | 1630 | } |
1622 | return false; | 1631 | return false; |
1623 | } | 1632 | } |
1624 | 1633 | ||
1625 | #else | 1634 | #else |
1626 | int mem_cgroup_select_victim_node(struct mem_cgroup *mem) | 1635 | int mem_cgroup_select_victim_node(struct mem_cgroup *memcg) |
1627 | { | 1636 | { |
1628 | return 0; | 1637 | return 0; |
1629 | } | 1638 | } |
1630 | 1639 | ||
1631 | bool mem_cgroup_reclaimable(struct mem_cgroup *mem, bool noswap) | 1640 | bool mem_cgroup_reclaimable(struct mem_cgroup *memcg, bool noswap) |
1632 | { | 1641 | { |
1633 | return test_mem_cgroup_node_reclaimable(mem, 0, noswap); | 1642 | return test_mem_cgroup_node_reclaimable(memcg, 0, noswap); |
1634 | } | 1643 | } |
1635 | #endif | 1644 | #endif |
1636 | 1645 | ||
@@ -1639,14 +1648,14 @@ bool mem_cgroup_reclaimable(struct mem_cgroup *mem, bool noswap) | |||
1639 | * we reclaimed from, so that we don't end up penalizing one child extensively | 1648 | * we reclaimed from, so that we don't end up penalizing one child extensively |
1640 | * based on its position in the children list. | 1649 | * based on its position in the children list. |
1641 | * | 1650 | * |
1642 | * root_mem is the original ancestor that we've been reclaim from. | 1651 | * root_memcg is the original ancestor that we've been reclaim from. |
1643 | * | 1652 | * |
1644 | * We give up and return to the caller when we visit root_mem twice. | 1653 | * We give up and return to the caller when we visit root_memcg twice. |
1645 | * (other groups can be removed while we're walking....) | 1654 | * (other groups can be removed while we're walking....) |
1646 | * | 1655 | * |
1647 | * If shrink==true, for avoiding to free too much, this returns immedieately. | 1656 | * If shrink==true, for avoiding to free too much, this returns immedieately. |
1648 | */ | 1657 | */ |
1649 | static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem, | 1658 | static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_memcg, |
1650 | struct zone *zone, | 1659 | struct zone *zone, |
1651 | gfp_t gfp_mask, | 1660 | gfp_t gfp_mask, |
1652 | unsigned long reclaim_options, | 1661 | unsigned long reclaim_options, |
@@ -1661,15 +1670,15 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem, | |||
1661 | unsigned long excess; | 1670 | unsigned long excess; |
1662 | unsigned long nr_scanned; | 1671 | unsigned long nr_scanned; |
1663 | 1672 | ||
1664 | excess = res_counter_soft_limit_excess(&root_mem->res) >> PAGE_SHIFT; | 1673 | excess = res_counter_soft_limit_excess(&root_memcg->res) >> PAGE_SHIFT; |
1665 | 1674 | ||
1666 | /* If memsw_is_minimum==1, swap-out is of-no-use. */ | 1675 | /* If memsw_is_minimum==1, swap-out is of-no-use. */ |
1667 | if (!check_soft && !shrink && root_mem->memsw_is_minimum) | 1676 | if (!check_soft && !shrink && root_memcg->memsw_is_minimum) |
1668 | noswap = true; | 1677 | noswap = true; |
1669 | 1678 | ||
1670 | while (1) { | 1679 | while (1) { |
1671 | victim = mem_cgroup_select_victim(root_mem); | 1680 | victim = mem_cgroup_select_victim(root_memcg); |
1672 | if (victim == root_mem) { | 1681 | if (victim == root_memcg) { |
1673 | loop++; | 1682 | loop++; |
1674 | /* | 1683 | /* |
1675 | * We are not draining per cpu cached charges during | 1684 | * We are not draining per cpu cached charges during |
@@ -1678,7 +1687,7 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem, | |||
1678 | * charges will not give any. | 1687 | * charges will not give any. |
1679 | */ | 1688 | */ |
1680 | if (!check_soft && loop >= 1) | 1689 | if (!check_soft && loop >= 1) |
1681 | drain_all_stock_async(root_mem); | 1690 | drain_all_stock_async(root_memcg); |
1682 | if (loop >= 2) { | 1691 | if (loop >= 2) { |
1683 | /* | 1692 | /* |
1684 | * If we have not been able to reclaim | 1693 | * If we have not been able to reclaim |
@@ -1725,9 +1734,9 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem, | |||
1725 | return ret; | 1734 | return ret; |
1726 | total += ret; | 1735 | total += ret; |
1727 | if (check_soft) { | 1736 | if (check_soft) { |
1728 | if (!res_counter_soft_limit_excess(&root_mem->res)) | 1737 | if (!res_counter_soft_limit_excess(&root_memcg->res)) |
1729 | return total; | 1738 | return total; |
1730 | } else if (mem_cgroup_margin(root_mem)) | 1739 | } else if (mem_cgroup_margin(root_memcg)) |
1731 | return total; | 1740 | return total; |
1732 | } | 1741 | } |
1733 | return total; | 1742 | return total; |
@@ -1738,12 +1747,12 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem, | |||
1738 | * If someone is running, return false. | 1747 | * If someone is running, return false. |
1739 | * Has to be called with memcg_oom_lock | 1748 | * Has to be called with memcg_oom_lock |
1740 | */ | 1749 | */ |
1741 | static bool mem_cgroup_oom_lock(struct mem_cgroup *mem) | 1750 | static bool mem_cgroup_oom_lock(struct mem_cgroup *memcg) |
1742 | { | 1751 | { |
1743 | struct mem_cgroup *iter, *failed = NULL; | 1752 | struct mem_cgroup *iter, *failed = NULL; |
1744 | bool cond = true; | 1753 | bool cond = true; |
1745 | 1754 | ||
1746 | for_each_mem_cgroup_tree_cond(iter, mem, cond) { | 1755 | for_each_mem_cgroup_tree_cond(iter, memcg, cond) { |
1747 | if (iter->oom_lock) { | 1756 | if (iter->oom_lock) { |
1748 | /* | 1757 | /* |
1749 | * this subtree of our hierarchy is already locked | 1758 | * this subtree of our hierarchy is already locked |
@@ -1763,7 +1772,7 @@ static bool mem_cgroup_oom_lock(struct mem_cgroup *mem) | |||
1763 | * what we set up to the failing subtree | 1772 | * what we set up to the failing subtree |
1764 | */ | 1773 | */ |
1765 | cond = true; | 1774 | cond = true; |
1766 | for_each_mem_cgroup_tree_cond(iter, mem, cond) { | 1775 | for_each_mem_cgroup_tree_cond(iter, memcg, cond) { |
1767 | if (iter == failed) { | 1776 | if (iter == failed) { |
1768 | cond = false; | 1777 | cond = false; |
1769 | continue; | 1778 | continue; |
@@ -1776,24 +1785,24 @@ static bool mem_cgroup_oom_lock(struct mem_cgroup *mem) | |||
1776 | /* | 1785 | /* |
1777 | * Has to be called with memcg_oom_lock | 1786 | * Has to be called with memcg_oom_lock |
1778 | */ | 1787 | */ |
1779 | static int mem_cgroup_oom_unlock(struct mem_cgroup *mem) | 1788 | static int mem_cgroup_oom_unlock(struct mem_cgroup *memcg) |
1780 | { | 1789 | { |
1781 | struct mem_cgroup *iter; | 1790 | struct mem_cgroup *iter; |
1782 | 1791 | ||
1783 | for_each_mem_cgroup_tree(iter, mem) | 1792 | for_each_mem_cgroup_tree(iter, memcg) |
1784 | iter->oom_lock = false; | 1793 | iter->oom_lock = false; |
1785 | return 0; | 1794 | return 0; |
1786 | } | 1795 | } |
1787 | 1796 | ||
1788 | static void mem_cgroup_mark_under_oom(struct mem_cgroup *mem) | 1797 | static void mem_cgroup_mark_under_oom(struct mem_cgroup *memcg) |
1789 | { | 1798 | { |
1790 | struct mem_cgroup *iter; | 1799 | struct mem_cgroup *iter; |
1791 | 1800 | ||
1792 | for_each_mem_cgroup_tree(iter, mem) | 1801 | for_each_mem_cgroup_tree(iter, memcg) |
1793 | atomic_inc(&iter->under_oom); | 1802 | atomic_inc(&iter->under_oom); |
1794 | } | 1803 | } |
1795 | 1804 | ||
1796 | static void mem_cgroup_unmark_under_oom(struct mem_cgroup *mem) | 1805 | static void mem_cgroup_unmark_under_oom(struct mem_cgroup *memcg) |
1797 | { | 1806 | { |
1798 | struct mem_cgroup *iter; | 1807 | struct mem_cgroup *iter; |
1799 | 1808 | ||
@@ -1802,7 +1811,7 @@ static void mem_cgroup_unmark_under_oom(struct mem_cgroup *mem) | |||
1802 | * mem_cgroup_oom_lock() may not be called. We have to use | 1811 | * mem_cgroup_oom_lock() may not be called. We have to use |
1803 | * atomic_add_unless() here. | 1812 | * atomic_add_unless() here. |
1804 | */ | 1813 | */ |
1805 | for_each_mem_cgroup_tree(iter, mem) | 1814 | for_each_mem_cgroup_tree(iter, memcg) |
1806 | atomic_add_unless(&iter->under_oom, -1, 0); | 1815 | atomic_add_unless(&iter->under_oom, -1, 0); |
1807 | } | 1816 | } |
1808 | 1817 | ||
@@ -1817,85 +1826,85 @@ struct oom_wait_info { | |||
1817 | static int memcg_oom_wake_function(wait_queue_t *wait, | 1826 | static int memcg_oom_wake_function(wait_queue_t *wait, |
1818 | unsigned mode, int sync, void *arg) | 1827 | unsigned mode, int sync, void *arg) |
1819 | { | 1828 | { |
1820 | struct mem_cgroup *wake_mem = (struct mem_cgroup *)arg, | 1829 | struct mem_cgroup *wake_memcg = (struct mem_cgroup *)arg, |
1821 | *oom_wait_mem; | 1830 | *oom_wait_memcg; |
1822 | struct oom_wait_info *oom_wait_info; | 1831 | struct oom_wait_info *oom_wait_info; |
1823 | 1832 | ||
1824 | oom_wait_info = container_of(wait, struct oom_wait_info, wait); | 1833 | oom_wait_info = container_of(wait, struct oom_wait_info, wait); |
1825 | oom_wait_mem = oom_wait_info->mem; | 1834 | oom_wait_memcg = oom_wait_info->mem; |
1826 | 1835 | ||
1827 | /* | 1836 | /* |
1828 | * Both of oom_wait_info->mem and wake_mem are stable under us. | 1837 | * Both of oom_wait_info->mem and wake_mem are stable under us. |
1829 | * Then we can use css_is_ancestor without taking care of RCU. | 1838 | * Then we can use css_is_ancestor without taking care of RCU. |
1830 | */ | 1839 | */ |
1831 | if (!mem_cgroup_same_or_subtree(oom_wait_mem, wake_mem) | 1840 | if (!mem_cgroup_same_or_subtree(oom_wait_memcg, wake_memcg) |
1832 | && !mem_cgroup_same_or_subtree(wake_mem, oom_wait_mem)) | 1841 | && !mem_cgroup_same_or_subtree(wake_memcg, oom_wait_memcg)) |
1833 | return 0; | 1842 | return 0; |
1834 | return autoremove_wake_function(wait, mode, sync, arg); | 1843 | return autoremove_wake_function(wait, mode, sync, arg); |
1835 | } | 1844 | } |
1836 | 1845 | ||
1837 | static void memcg_wakeup_oom(struct mem_cgroup *mem) | 1846 | static void memcg_wakeup_oom(struct mem_cgroup *memcg) |
1838 | { | 1847 | { |
1839 | /* for filtering, pass "mem" as argument. */ | 1848 | /* for filtering, pass "memcg" as argument. */ |
1840 | __wake_up(&memcg_oom_waitq, TASK_NORMAL, 0, mem); | 1849 | __wake_up(&memcg_oom_waitq, TASK_NORMAL, 0, memcg); |
1841 | } | 1850 | } |
1842 | 1851 | ||
1843 | static void memcg_oom_recover(struct mem_cgroup *mem) | 1852 | static void memcg_oom_recover(struct mem_cgroup *memcg) |
1844 | { | 1853 | { |
1845 | if (mem && atomic_read(&mem->under_oom)) | 1854 | if (memcg && atomic_read(&memcg->under_oom)) |
1846 | memcg_wakeup_oom(mem); | 1855 | memcg_wakeup_oom(memcg); |
1847 | } | 1856 | } |
1848 | 1857 | ||
1849 | /* | 1858 | /* |
1850 | * try to call OOM killer. returns false if we should exit memory-reclaim loop. | 1859 | * try to call OOM killer. returns false if we should exit memory-reclaim loop. |
1851 | */ | 1860 | */ |
1852 | bool mem_cgroup_handle_oom(struct mem_cgroup *mem, gfp_t mask) | 1861 | bool mem_cgroup_handle_oom(struct mem_cgroup *memcg, gfp_t mask) |
1853 | { | 1862 | { |
1854 | struct oom_wait_info owait; | 1863 | struct oom_wait_info owait; |
1855 | bool locked, need_to_kill; | 1864 | bool locked, need_to_kill; |
1856 | 1865 | ||
1857 | owait.mem = mem; | 1866 | owait.mem = memcg; |
1858 | owait.wait.flags = 0; | 1867 | owait.wait.flags = 0; |
1859 | owait.wait.func = memcg_oom_wake_function; | 1868 | owait.wait.func = memcg_oom_wake_function; |
1860 | owait.wait.private = current; | 1869 | owait.wait.private = current; |
1861 | INIT_LIST_HEAD(&owait.wait.task_list); | 1870 | INIT_LIST_HEAD(&owait.wait.task_list); |
1862 | need_to_kill = true; | 1871 | need_to_kill = true; |
1863 | mem_cgroup_mark_under_oom(mem); | 1872 | mem_cgroup_mark_under_oom(memcg); |
1864 | 1873 | ||
1865 | /* At first, try to OOM lock hierarchy under mem.*/ | 1874 | /* At first, try to OOM lock hierarchy under memcg.*/ |
1866 | spin_lock(&memcg_oom_lock); | 1875 | spin_lock(&memcg_oom_lock); |
1867 | locked = mem_cgroup_oom_lock(mem); | 1876 | locked = mem_cgroup_oom_lock(memcg); |
1868 | /* | 1877 | /* |
1869 | * Even if signal_pending(), we can't quit charge() loop without | 1878 | * Even if signal_pending(), we can't quit charge() loop without |
1870 | * accounting. So, UNINTERRUPTIBLE is appropriate. But SIGKILL | 1879 | * accounting. So, UNINTERRUPTIBLE is appropriate. But SIGKILL |
1871 | * under OOM is always welcomed, use TASK_KILLABLE here. | 1880 | * under OOM is always welcomed, use TASK_KILLABLE here. |
1872 | */ | 1881 | */ |
1873 | prepare_to_wait(&memcg_oom_waitq, &owait.wait, TASK_KILLABLE); | 1882 | prepare_to_wait(&memcg_oom_waitq, &owait.wait, TASK_KILLABLE); |
1874 | if (!locked || mem->oom_kill_disable) | 1883 | if (!locked || memcg->oom_kill_disable) |
1875 | need_to_kill = false; | 1884 | need_to_kill = false; |
1876 | if (locked) | 1885 | if (locked) |
1877 | mem_cgroup_oom_notify(mem); | 1886 | mem_cgroup_oom_notify(memcg); |
1878 | spin_unlock(&memcg_oom_lock); | 1887 | spin_unlock(&memcg_oom_lock); |
1879 | 1888 | ||
1880 | if (need_to_kill) { | 1889 | if (need_to_kill) { |
1881 | finish_wait(&memcg_oom_waitq, &owait.wait); | 1890 | finish_wait(&memcg_oom_waitq, &owait.wait); |
1882 | mem_cgroup_out_of_memory(mem, mask); | 1891 | mem_cgroup_out_of_memory(memcg, mask); |
1883 | } else { | 1892 | } else { |
1884 | schedule(); | 1893 | schedule(); |
1885 | finish_wait(&memcg_oom_waitq, &owait.wait); | 1894 | finish_wait(&memcg_oom_waitq, &owait.wait); |
1886 | } | 1895 | } |
1887 | spin_lock(&memcg_oom_lock); | 1896 | spin_lock(&memcg_oom_lock); |
1888 | if (locked) | 1897 | if (locked) |
1889 | mem_cgroup_oom_unlock(mem); | 1898 | mem_cgroup_oom_unlock(memcg); |
1890 | memcg_wakeup_oom(mem); | 1899 | memcg_wakeup_oom(memcg); |
1891 | spin_unlock(&memcg_oom_lock); | 1900 | spin_unlock(&memcg_oom_lock); |
1892 | 1901 | ||
1893 | mem_cgroup_unmark_under_oom(mem); | 1902 | mem_cgroup_unmark_under_oom(memcg); |
1894 | 1903 | ||
1895 | if (test_thread_flag(TIF_MEMDIE) || fatal_signal_pending(current)) | 1904 | if (test_thread_flag(TIF_MEMDIE) || fatal_signal_pending(current)) |
1896 | return false; | 1905 | return false; |
1897 | /* Give chance to dying process */ | 1906 | /* Give chance to dying process */ |
1898 | schedule_timeout(1); | 1907 | schedule_timeout_uninterruptible(1); |
1899 | return true; | 1908 | return true; |
1900 | } | 1909 | } |
1901 | 1910 | ||
@@ -1926,7 +1935,7 @@ bool mem_cgroup_handle_oom(struct mem_cgroup *mem, gfp_t mask) | |||
1926 | void mem_cgroup_update_page_stat(struct page *page, | 1935 | void mem_cgroup_update_page_stat(struct page *page, |
1927 | enum mem_cgroup_page_stat_item idx, int val) | 1936 | enum mem_cgroup_page_stat_item idx, int val) |
1928 | { | 1937 | { |
1929 | struct mem_cgroup *mem; | 1938 | struct mem_cgroup *memcg; |
1930 | struct page_cgroup *pc = lookup_page_cgroup(page); | 1939 | struct page_cgroup *pc = lookup_page_cgroup(page); |
1931 | bool need_unlock = false; | 1940 | bool need_unlock = false; |
1932 | unsigned long uninitialized_var(flags); | 1941 | unsigned long uninitialized_var(flags); |
@@ -1935,16 +1944,16 @@ void mem_cgroup_update_page_stat(struct page *page, | |||
1935 | return; | 1944 | return; |
1936 | 1945 | ||
1937 | rcu_read_lock(); | 1946 | rcu_read_lock(); |
1938 | mem = pc->mem_cgroup; | 1947 | memcg = pc->mem_cgroup; |
1939 | if (unlikely(!mem || !PageCgroupUsed(pc))) | 1948 | if (unlikely(!memcg || !PageCgroupUsed(pc))) |
1940 | goto out; | 1949 | goto out; |
1941 | /* pc->mem_cgroup is unstable ? */ | 1950 | /* pc->mem_cgroup is unstable ? */ |
1942 | if (unlikely(mem_cgroup_stealed(mem)) || PageTransHuge(page)) { | 1951 | if (unlikely(mem_cgroup_stealed(memcg)) || PageTransHuge(page)) { |
1943 | /* take a lock against to access pc->mem_cgroup */ | 1952 | /* take a lock against to access pc->mem_cgroup */ |
1944 | move_lock_page_cgroup(pc, &flags); | 1953 | move_lock_page_cgroup(pc, &flags); |
1945 | need_unlock = true; | 1954 | need_unlock = true; |
1946 | mem = pc->mem_cgroup; | 1955 | memcg = pc->mem_cgroup; |
1947 | if (!mem || !PageCgroupUsed(pc)) | 1956 | if (!memcg || !PageCgroupUsed(pc)) |
1948 | goto out; | 1957 | goto out; |
1949 | } | 1958 | } |
1950 | 1959 | ||
@@ -1960,7 +1969,7 @@ void mem_cgroup_update_page_stat(struct page *page, | |||
1960 | BUG(); | 1969 | BUG(); |
1961 | } | 1970 | } |
1962 | 1971 | ||
1963 | this_cpu_add(mem->stat->count[idx], val); | 1972 | this_cpu_add(memcg->stat->count[idx], val); |
1964 | 1973 | ||
1965 | out: | 1974 | out: |
1966 | if (unlikely(need_unlock)) | 1975 | if (unlikely(need_unlock)) |
@@ -1991,13 +2000,13 @@ static DEFINE_MUTEX(percpu_charge_mutex); | |||
1991 | * cgroup which is not current target, returns false. This stock will be | 2000 | * cgroup which is not current target, returns false. This stock will be |
1992 | * refilled. | 2001 | * refilled. |
1993 | */ | 2002 | */ |
1994 | static bool consume_stock(struct mem_cgroup *mem) | 2003 | static bool consume_stock(struct mem_cgroup *memcg) |
1995 | { | 2004 | { |
1996 | struct memcg_stock_pcp *stock; | 2005 | struct memcg_stock_pcp *stock; |
1997 | bool ret = true; | 2006 | bool ret = true; |
1998 | 2007 | ||
1999 | stock = &get_cpu_var(memcg_stock); | 2008 | stock = &get_cpu_var(memcg_stock); |
2000 | if (mem == stock->cached && stock->nr_pages) | 2009 | if (memcg == stock->cached && stock->nr_pages) |
2001 | stock->nr_pages--; | 2010 | stock->nr_pages--; |
2002 | else /* need to call res_counter_charge */ | 2011 | else /* need to call res_counter_charge */ |
2003 | ret = false; | 2012 | ret = false; |
@@ -2038,24 +2047,24 @@ static void drain_local_stock(struct work_struct *dummy) | |||
2038 | * Cache charges(val) which is from res_counter, to local per_cpu area. | 2047 | * Cache charges(val) which is from res_counter, to local per_cpu area. |
2039 | * This will be consumed by consume_stock() function, later. | 2048 | * This will be consumed by consume_stock() function, later. |
2040 | */ | 2049 | */ |
2041 | static void refill_stock(struct mem_cgroup *mem, unsigned int nr_pages) | 2050 | static void refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages) |
2042 | { | 2051 | { |
2043 | struct memcg_stock_pcp *stock = &get_cpu_var(memcg_stock); | 2052 | struct memcg_stock_pcp *stock = &get_cpu_var(memcg_stock); |
2044 | 2053 | ||
2045 | if (stock->cached != mem) { /* reset if necessary */ | 2054 | if (stock->cached != memcg) { /* reset if necessary */ |
2046 | drain_stock(stock); | 2055 | drain_stock(stock); |
2047 | stock->cached = mem; | 2056 | stock->cached = memcg; |
2048 | } | 2057 | } |
2049 | stock->nr_pages += nr_pages; | 2058 | stock->nr_pages += nr_pages; |
2050 | put_cpu_var(memcg_stock); | 2059 | put_cpu_var(memcg_stock); |
2051 | } | 2060 | } |
2052 | 2061 | ||
2053 | /* | 2062 | /* |
2054 | * Drains all per-CPU charge caches for given root_mem resp. subtree | 2063 | * Drains all per-CPU charge caches for given root_memcg resp. subtree |
2055 | * of the hierarchy under it. sync flag says whether we should block | 2064 | * of the hierarchy under it. sync flag says whether we should block |
2056 | * until the work is done. | 2065 | * until the work is done. |
2057 | */ | 2066 | */ |
2058 | static void drain_all_stock(struct mem_cgroup *root_mem, bool sync) | 2067 | static void drain_all_stock(struct mem_cgroup *root_memcg, bool sync) |
2059 | { | 2068 | { |
2060 | int cpu, curcpu; | 2069 | int cpu, curcpu; |
2061 | 2070 | ||
@@ -2064,12 +2073,12 @@ static void drain_all_stock(struct mem_cgroup *root_mem, bool sync) | |||
2064 | curcpu = get_cpu(); | 2073 | curcpu = get_cpu(); |
2065 | for_each_online_cpu(cpu) { | 2074 | for_each_online_cpu(cpu) { |
2066 | struct memcg_stock_pcp *stock = &per_cpu(memcg_stock, cpu); | 2075 | struct memcg_stock_pcp *stock = &per_cpu(memcg_stock, cpu); |
2067 | struct mem_cgroup *mem; | 2076 | struct mem_cgroup *memcg; |
2068 | 2077 | ||
2069 | mem = stock->cached; | 2078 | memcg = stock->cached; |
2070 | if (!mem || !stock->nr_pages) | 2079 | if (!memcg || !stock->nr_pages) |
2071 | continue; | 2080 | continue; |
2072 | if (!mem_cgroup_same_or_subtree(root_mem, mem)) | 2081 | if (!mem_cgroup_same_or_subtree(root_memcg, memcg)) |
2073 | continue; | 2082 | continue; |
2074 | if (!test_and_set_bit(FLUSHING_CACHED_CHARGE, &stock->flags)) { | 2083 | if (!test_and_set_bit(FLUSHING_CACHED_CHARGE, &stock->flags)) { |
2075 | if (cpu == curcpu) | 2084 | if (cpu == curcpu) |
@@ -2098,23 +2107,23 @@ out: | |||
2098 | * expects some charges will be back to res_counter later but cannot wait for | 2107 | * expects some charges will be back to res_counter later but cannot wait for |
2099 | * it. | 2108 | * it. |
2100 | */ | 2109 | */ |
2101 | static void drain_all_stock_async(struct mem_cgroup *root_mem) | 2110 | static void drain_all_stock_async(struct mem_cgroup *root_memcg) |
2102 | { | 2111 | { |
2103 | /* | 2112 | /* |
2104 | * If someone calls draining, avoid adding more kworker runs. | 2113 | * If someone calls draining, avoid adding more kworker runs. |
2105 | */ | 2114 | */ |
2106 | if (!mutex_trylock(&percpu_charge_mutex)) | 2115 | if (!mutex_trylock(&percpu_charge_mutex)) |
2107 | return; | 2116 | return; |
2108 | drain_all_stock(root_mem, false); | 2117 | drain_all_stock(root_memcg, false); |
2109 | mutex_unlock(&percpu_charge_mutex); | 2118 | mutex_unlock(&percpu_charge_mutex); |
2110 | } | 2119 | } |
2111 | 2120 | ||
2112 | /* This is a synchronous drain interface. */ | 2121 | /* This is a synchronous drain interface. */ |
2113 | static void drain_all_stock_sync(struct mem_cgroup *root_mem) | 2122 | static void drain_all_stock_sync(struct mem_cgroup *root_memcg) |
2114 | { | 2123 | { |
2115 | /* called when force_empty is called */ | 2124 | /* called when force_empty is called */ |
2116 | mutex_lock(&percpu_charge_mutex); | 2125 | mutex_lock(&percpu_charge_mutex); |
2117 | drain_all_stock(root_mem, true); | 2126 | drain_all_stock(root_memcg, true); |
2118 | mutex_unlock(&percpu_charge_mutex); | 2127 | mutex_unlock(&percpu_charge_mutex); |
2119 | } | 2128 | } |
2120 | 2129 | ||
@@ -2122,35 +2131,35 @@ static void drain_all_stock_sync(struct mem_cgroup *root_mem) | |||
2122 | * This function drains percpu counter value from DEAD cpu and | 2131 | * This function drains percpu counter value from DEAD cpu and |
2123 | * move it to local cpu. Note that this function can be preempted. | 2132 | * move it to local cpu. Note that this function can be preempted. |
2124 | */ | 2133 | */ |
2125 | static void mem_cgroup_drain_pcp_counter(struct mem_cgroup *mem, int cpu) | 2134 | static void mem_cgroup_drain_pcp_counter(struct mem_cgroup *memcg, int cpu) |
2126 | { | 2135 | { |
2127 | int i; | 2136 | int i; |
2128 | 2137 | ||
2129 | spin_lock(&mem->pcp_counter_lock); | 2138 | spin_lock(&memcg->pcp_counter_lock); |
2130 | for (i = 0; i < MEM_CGROUP_STAT_DATA; i++) { | 2139 | for (i = 0; i < MEM_CGROUP_STAT_DATA; i++) { |
2131 | long x = per_cpu(mem->stat->count[i], cpu); | 2140 | long x = per_cpu(memcg->stat->count[i], cpu); |
2132 | 2141 | ||
2133 | per_cpu(mem->stat->count[i], cpu) = 0; | 2142 | per_cpu(memcg->stat->count[i], cpu) = 0; |
2134 | mem->nocpu_base.count[i] += x; | 2143 | memcg->nocpu_base.count[i] += x; |
2135 | } | 2144 | } |
2136 | for (i = 0; i < MEM_CGROUP_EVENTS_NSTATS; i++) { | 2145 | for (i = 0; i < MEM_CGROUP_EVENTS_NSTATS; i++) { |
2137 | unsigned long x = per_cpu(mem->stat->events[i], cpu); | 2146 | unsigned long x = per_cpu(memcg->stat->events[i], cpu); |
2138 | 2147 | ||
2139 | per_cpu(mem->stat->events[i], cpu) = 0; | 2148 | per_cpu(memcg->stat->events[i], cpu) = 0; |
2140 | mem->nocpu_base.events[i] += x; | 2149 | memcg->nocpu_base.events[i] += x; |
2141 | } | 2150 | } |
2142 | /* need to clear ON_MOVE value, works as a kind of lock. */ | 2151 | /* need to clear ON_MOVE value, works as a kind of lock. */ |
2143 | per_cpu(mem->stat->count[MEM_CGROUP_ON_MOVE], cpu) = 0; | 2152 | per_cpu(memcg->stat->count[MEM_CGROUP_ON_MOVE], cpu) = 0; |
2144 | spin_unlock(&mem->pcp_counter_lock); | 2153 | spin_unlock(&memcg->pcp_counter_lock); |
2145 | } | 2154 | } |
2146 | 2155 | ||
2147 | static void synchronize_mem_cgroup_on_move(struct mem_cgroup *mem, int cpu) | 2156 | static void synchronize_mem_cgroup_on_move(struct mem_cgroup *memcg, int cpu) |
2148 | { | 2157 | { |
2149 | int idx = MEM_CGROUP_ON_MOVE; | 2158 | int idx = MEM_CGROUP_ON_MOVE; |
2150 | 2159 | ||
2151 | spin_lock(&mem->pcp_counter_lock); | 2160 | spin_lock(&memcg->pcp_counter_lock); |
2152 | per_cpu(mem->stat->count[idx], cpu) = mem->nocpu_base.count[idx]; | 2161 | per_cpu(memcg->stat->count[idx], cpu) = memcg->nocpu_base.count[idx]; |
2153 | spin_unlock(&mem->pcp_counter_lock); | 2162 | spin_unlock(&memcg->pcp_counter_lock); |
2154 | } | 2163 | } |
2155 | 2164 | ||
2156 | static int __cpuinit memcg_cpu_hotplug_callback(struct notifier_block *nb, | 2165 | static int __cpuinit memcg_cpu_hotplug_callback(struct notifier_block *nb, |
@@ -2188,7 +2197,7 @@ enum { | |||
2188 | CHARGE_OOM_DIE, /* the current is killed because of OOM */ | 2197 | CHARGE_OOM_DIE, /* the current is killed because of OOM */ |
2189 | }; | 2198 | }; |
2190 | 2199 | ||
2191 | static int mem_cgroup_do_charge(struct mem_cgroup *mem, gfp_t gfp_mask, | 2200 | static int mem_cgroup_do_charge(struct mem_cgroup *memcg, gfp_t gfp_mask, |
2192 | unsigned int nr_pages, bool oom_check) | 2201 | unsigned int nr_pages, bool oom_check) |
2193 | { | 2202 | { |
2194 | unsigned long csize = nr_pages * PAGE_SIZE; | 2203 | unsigned long csize = nr_pages * PAGE_SIZE; |
@@ -2197,16 +2206,16 @@ static int mem_cgroup_do_charge(struct mem_cgroup *mem, gfp_t gfp_mask, | |||
2197 | unsigned long flags = 0; | 2206 | unsigned long flags = 0; |
2198 | int ret; | 2207 | int ret; |
2199 | 2208 | ||
2200 | ret = res_counter_charge(&mem->res, csize, &fail_res); | 2209 | ret = res_counter_charge(&memcg->res, csize, &fail_res); |
2201 | 2210 | ||
2202 | if (likely(!ret)) { | 2211 | if (likely(!ret)) { |
2203 | if (!do_swap_account) | 2212 | if (!do_swap_account) |
2204 | return CHARGE_OK; | 2213 | return CHARGE_OK; |
2205 | ret = res_counter_charge(&mem->memsw, csize, &fail_res); | 2214 | ret = res_counter_charge(&memcg->memsw, csize, &fail_res); |
2206 | if (likely(!ret)) | 2215 | if (likely(!ret)) |
2207 | return CHARGE_OK; | 2216 | return CHARGE_OK; |
2208 | 2217 | ||
2209 | res_counter_uncharge(&mem->res, csize); | 2218 | res_counter_uncharge(&memcg->res, csize); |
2210 | mem_over_limit = mem_cgroup_from_res_counter(fail_res, memsw); | 2219 | mem_over_limit = mem_cgroup_from_res_counter(fail_res, memsw); |
2211 | flags |= MEM_CGROUP_RECLAIM_NOSWAP; | 2220 | flags |= MEM_CGROUP_RECLAIM_NOSWAP; |
2212 | } else | 2221 | } else |
@@ -2264,12 +2273,12 @@ static int mem_cgroup_do_charge(struct mem_cgroup *mem, gfp_t gfp_mask, | |||
2264 | static int __mem_cgroup_try_charge(struct mm_struct *mm, | 2273 | static int __mem_cgroup_try_charge(struct mm_struct *mm, |
2265 | gfp_t gfp_mask, | 2274 | gfp_t gfp_mask, |
2266 | unsigned int nr_pages, | 2275 | unsigned int nr_pages, |
2267 | struct mem_cgroup **memcg, | 2276 | struct mem_cgroup **ptr, |
2268 | bool oom) | 2277 | bool oom) |
2269 | { | 2278 | { |
2270 | unsigned int batch = max(CHARGE_BATCH, nr_pages); | 2279 | unsigned int batch = max(CHARGE_BATCH, nr_pages); |
2271 | int nr_oom_retries = MEM_CGROUP_RECLAIM_RETRIES; | 2280 | int nr_oom_retries = MEM_CGROUP_RECLAIM_RETRIES; |
2272 | struct mem_cgroup *mem = NULL; | 2281 | struct mem_cgroup *memcg = NULL; |
2273 | int ret; | 2282 | int ret; |
2274 | 2283 | ||
2275 | /* | 2284 | /* |
@@ -2287,17 +2296,17 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm, | |||
2287 | * thread group leader migrates. It's possible that mm is not | 2296 | * thread group leader migrates. It's possible that mm is not |
2288 | * set, if so charge the init_mm (happens for pagecache usage). | 2297 | * set, if so charge the init_mm (happens for pagecache usage). |
2289 | */ | 2298 | */ |
2290 | if (!*memcg && !mm) | 2299 | if (!*ptr && !mm) |
2291 | goto bypass; | 2300 | goto bypass; |
2292 | again: | 2301 | again: |
2293 | if (*memcg) { /* css should be a valid one */ | 2302 | if (*ptr) { /* css should be a valid one */ |
2294 | mem = *memcg; | 2303 | memcg = *ptr; |
2295 | VM_BUG_ON(css_is_removed(&mem->css)); | 2304 | VM_BUG_ON(css_is_removed(&memcg->css)); |
2296 | if (mem_cgroup_is_root(mem)) | 2305 | if (mem_cgroup_is_root(memcg)) |
2297 | goto done; | 2306 | goto done; |
2298 | if (nr_pages == 1 && consume_stock(mem)) | 2307 | if (nr_pages == 1 && consume_stock(memcg)) |
2299 | goto done; | 2308 | goto done; |
2300 | css_get(&mem->css); | 2309 | css_get(&memcg->css); |
2301 | } else { | 2310 | } else { |
2302 | struct task_struct *p; | 2311 | struct task_struct *p; |
2303 | 2312 | ||
@@ -2305,7 +2314,7 @@ again: | |||
2305 | p = rcu_dereference(mm->owner); | 2314 | p = rcu_dereference(mm->owner); |
2306 | /* | 2315 | /* |
2307 | * Because we don't have task_lock(), "p" can exit. | 2316 | * Because we don't have task_lock(), "p" can exit. |
2308 | * In that case, "mem" can point to root or p can be NULL with | 2317 | * In that case, "memcg" can point to root or p can be NULL with |
2309 | * race with swapoff. Then, we have small risk of mis-accouning. | 2318 | * race with swapoff. Then, we have small risk of mis-accouning. |
2310 | * But such kind of mis-account by race always happens because | 2319 | * But such kind of mis-account by race always happens because |
2311 | * we don't have cgroup_mutex(). It's overkill and we allo that | 2320 | * we don't have cgroup_mutex(). It's overkill and we allo that |
@@ -2313,12 +2322,12 @@ again: | |||
2313 | * (*) swapoff at el will charge against mm-struct not against | 2322 | * (*) swapoff at el will charge against mm-struct not against |
2314 | * task-struct. So, mm->owner can be NULL. | 2323 | * task-struct. So, mm->owner can be NULL. |
2315 | */ | 2324 | */ |
2316 | mem = mem_cgroup_from_task(p); | 2325 | memcg = mem_cgroup_from_task(p); |
2317 | if (!mem || mem_cgroup_is_root(mem)) { | 2326 | if (!memcg || mem_cgroup_is_root(memcg)) { |
2318 | rcu_read_unlock(); | 2327 | rcu_read_unlock(); |
2319 | goto done; | 2328 | goto done; |
2320 | } | 2329 | } |
2321 | if (nr_pages == 1 && consume_stock(mem)) { | 2330 | if (nr_pages == 1 && consume_stock(memcg)) { |
2322 | /* | 2331 | /* |
2323 | * It seems dagerous to access memcg without css_get(). | 2332 | * It seems dagerous to access memcg without css_get(). |
2324 | * But considering how consume_stok works, it's not | 2333 | * But considering how consume_stok works, it's not |
@@ -2331,7 +2340,7 @@ again: | |||
2331 | goto done; | 2340 | goto done; |
2332 | } | 2341 | } |
2333 | /* after here, we may be blocked. we need to get refcnt */ | 2342 | /* after here, we may be blocked. we need to get refcnt */ |
2334 | if (!css_tryget(&mem->css)) { | 2343 | if (!css_tryget(&memcg->css)) { |
2335 | rcu_read_unlock(); | 2344 | rcu_read_unlock(); |
2336 | goto again; | 2345 | goto again; |
2337 | } | 2346 | } |
@@ -2343,7 +2352,7 @@ again: | |||
2343 | 2352 | ||
2344 | /* If killed, bypass charge */ | 2353 | /* If killed, bypass charge */ |
2345 | if (fatal_signal_pending(current)) { | 2354 | if (fatal_signal_pending(current)) { |
2346 | css_put(&mem->css); | 2355 | css_put(&memcg->css); |
2347 | goto bypass; | 2356 | goto bypass; |
2348 | } | 2357 | } |
2349 | 2358 | ||
@@ -2353,43 +2362,43 @@ again: | |||
2353 | nr_oom_retries = MEM_CGROUP_RECLAIM_RETRIES; | 2362 | nr_oom_retries = MEM_CGROUP_RECLAIM_RETRIES; |
2354 | } | 2363 | } |
2355 | 2364 | ||
2356 | ret = mem_cgroup_do_charge(mem, gfp_mask, batch, oom_check); | 2365 | ret = mem_cgroup_do_charge(memcg, gfp_mask, batch, oom_check); |
2357 | switch (ret) { | 2366 | switch (ret) { |
2358 | case CHARGE_OK: | 2367 | case CHARGE_OK: |
2359 | break; | 2368 | break; |
2360 | case CHARGE_RETRY: /* not in OOM situation but retry */ | 2369 | case CHARGE_RETRY: /* not in OOM situation but retry */ |
2361 | batch = nr_pages; | 2370 | batch = nr_pages; |
2362 | css_put(&mem->css); | 2371 | css_put(&memcg->css); |
2363 | mem = NULL; | 2372 | memcg = NULL; |
2364 | goto again; | 2373 | goto again; |
2365 | case CHARGE_WOULDBLOCK: /* !__GFP_WAIT */ | 2374 | case CHARGE_WOULDBLOCK: /* !__GFP_WAIT */ |
2366 | css_put(&mem->css); | 2375 | css_put(&memcg->css); |
2367 | goto nomem; | 2376 | goto nomem; |
2368 | case CHARGE_NOMEM: /* OOM routine works */ | 2377 | case CHARGE_NOMEM: /* OOM routine works */ |
2369 | if (!oom) { | 2378 | if (!oom) { |
2370 | css_put(&mem->css); | 2379 | css_put(&memcg->css); |
2371 | goto nomem; | 2380 | goto nomem; |
2372 | } | 2381 | } |
2373 | /* If oom, we never return -ENOMEM */ | 2382 | /* If oom, we never return -ENOMEM */ |
2374 | nr_oom_retries--; | 2383 | nr_oom_retries--; |
2375 | break; | 2384 | break; |
2376 | case CHARGE_OOM_DIE: /* Killed by OOM Killer */ | 2385 | case CHARGE_OOM_DIE: /* Killed by OOM Killer */ |
2377 | css_put(&mem->css); | 2386 | css_put(&memcg->css); |
2378 | goto bypass; | 2387 | goto bypass; |
2379 | } | 2388 | } |
2380 | } while (ret != CHARGE_OK); | 2389 | } while (ret != CHARGE_OK); |
2381 | 2390 | ||
2382 | if (batch > nr_pages) | 2391 | if (batch > nr_pages) |
2383 | refill_stock(mem, batch - nr_pages); | 2392 | refill_stock(memcg, batch - nr_pages); |
2384 | css_put(&mem->css); | 2393 | css_put(&memcg->css); |
2385 | done: | 2394 | done: |
2386 | *memcg = mem; | 2395 | *ptr = memcg; |
2387 | return 0; | 2396 | return 0; |
2388 | nomem: | 2397 | nomem: |
2389 | *memcg = NULL; | 2398 | *ptr = NULL; |
2390 | return -ENOMEM; | 2399 | return -ENOMEM; |
2391 | bypass: | 2400 | bypass: |
2392 | *memcg = NULL; | 2401 | *ptr = NULL; |
2393 | return 0; | 2402 | return 0; |
2394 | } | 2403 | } |
2395 | 2404 | ||
@@ -2398,15 +2407,15 @@ bypass: | |||
2398 | * This function is for that and do uncharge, put css's refcnt. | 2407 | * This function is for that and do uncharge, put css's refcnt. |
2399 | * gotten by try_charge(). | 2408 | * gotten by try_charge(). |
2400 | */ | 2409 | */ |
2401 | static void __mem_cgroup_cancel_charge(struct mem_cgroup *mem, | 2410 | static void __mem_cgroup_cancel_charge(struct mem_cgroup *memcg, |
2402 | unsigned int nr_pages) | 2411 | unsigned int nr_pages) |
2403 | { | 2412 | { |
2404 | if (!mem_cgroup_is_root(mem)) { | 2413 | if (!mem_cgroup_is_root(memcg)) { |
2405 | unsigned long bytes = nr_pages * PAGE_SIZE; | 2414 | unsigned long bytes = nr_pages * PAGE_SIZE; |
2406 | 2415 | ||
2407 | res_counter_uncharge(&mem->res, bytes); | 2416 | res_counter_uncharge(&memcg->res, bytes); |
2408 | if (do_swap_account) | 2417 | if (do_swap_account) |
2409 | res_counter_uncharge(&mem->memsw, bytes); | 2418 | res_counter_uncharge(&memcg->memsw, bytes); |
2410 | } | 2419 | } |
2411 | } | 2420 | } |
2412 | 2421 | ||
@@ -2431,7 +2440,7 @@ static struct mem_cgroup *mem_cgroup_lookup(unsigned short id) | |||
2431 | 2440 | ||
2432 | struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page) | 2441 | struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page) |
2433 | { | 2442 | { |
2434 | struct mem_cgroup *mem = NULL; | 2443 | struct mem_cgroup *memcg = NULL; |
2435 | struct page_cgroup *pc; | 2444 | struct page_cgroup *pc; |
2436 | unsigned short id; | 2445 | unsigned short id; |
2437 | swp_entry_t ent; | 2446 | swp_entry_t ent; |
@@ -2441,23 +2450,23 @@ struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page) | |||
2441 | pc = lookup_page_cgroup(page); | 2450 | pc = lookup_page_cgroup(page); |
2442 | lock_page_cgroup(pc); | 2451 | lock_page_cgroup(pc); |
2443 | if (PageCgroupUsed(pc)) { | 2452 | if (PageCgroupUsed(pc)) { |
2444 | mem = pc->mem_cgroup; | 2453 | memcg = pc->mem_cgroup; |
2445 | if (mem && !css_tryget(&mem->css)) | 2454 | if (memcg && !css_tryget(&memcg->css)) |
2446 | mem = NULL; | 2455 | memcg = NULL; |
2447 | } else if (PageSwapCache(page)) { | 2456 | } else if (PageSwapCache(page)) { |
2448 | ent.val = page_private(page); | 2457 | ent.val = page_private(page); |
2449 | id = lookup_swap_cgroup(ent); | 2458 | id = lookup_swap_cgroup(ent); |
2450 | rcu_read_lock(); | 2459 | rcu_read_lock(); |
2451 | mem = mem_cgroup_lookup(id); | 2460 | memcg = mem_cgroup_lookup(id); |
2452 | if (mem && !css_tryget(&mem->css)) | 2461 | if (memcg && !css_tryget(&memcg->css)) |
2453 | mem = NULL; | 2462 | memcg = NULL; |
2454 | rcu_read_unlock(); | 2463 | rcu_read_unlock(); |
2455 | } | 2464 | } |
2456 | unlock_page_cgroup(pc); | 2465 | unlock_page_cgroup(pc); |
2457 | return mem; | 2466 | return memcg; |
2458 | } | 2467 | } |
2459 | 2468 | ||
2460 | static void __mem_cgroup_commit_charge(struct mem_cgroup *mem, | 2469 | static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg, |
2461 | struct page *page, | 2470 | struct page *page, |
2462 | unsigned int nr_pages, | 2471 | unsigned int nr_pages, |
2463 | struct page_cgroup *pc, | 2472 | struct page_cgroup *pc, |
@@ -2466,14 +2475,14 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *mem, | |||
2466 | lock_page_cgroup(pc); | 2475 | lock_page_cgroup(pc); |
2467 | if (unlikely(PageCgroupUsed(pc))) { | 2476 | if (unlikely(PageCgroupUsed(pc))) { |
2468 | unlock_page_cgroup(pc); | 2477 | unlock_page_cgroup(pc); |
2469 | __mem_cgroup_cancel_charge(mem, nr_pages); | 2478 | __mem_cgroup_cancel_charge(memcg, nr_pages); |
2470 | return; | 2479 | return; |
2471 | } | 2480 | } |
2472 | /* | 2481 | /* |
2473 | * we don't need page_cgroup_lock about tail pages, becase they are not | 2482 | * we don't need page_cgroup_lock about tail pages, becase they are not |
2474 | * accessed by any other context at this point. | 2483 | * accessed by any other context at this point. |
2475 | */ | 2484 | */ |
2476 | pc->mem_cgroup = mem; | 2485 | pc->mem_cgroup = memcg; |
2477 | /* | 2486 | /* |
2478 | * We access a page_cgroup asynchronously without lock_page_cgroup(). | 2487 | * We access a page_cgroup asynchronously without lock_page_cgroup(). |
2479 | * Especially when a page_cgroup is taken from a page, pc->mem_cgroup | 2488 | * Especially when a page_cgroup is taken from a page, pc->mem_cgroup |
@@ -2496,14 +2505,14 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *mem, | |||
2496 | break; | 2505 | break; |
2497 | } | 2506 | } |
2498 | 2507 | ||
2499 | mem_cgroup_charge_statistics(mem, PageCgroupCache(pc), nr_pages); | 2508 | mem_cgroup_charge_statistics(memcg, PageCgroupCache(pc), nr_pages); |
2500 | unlock_page_cgroup(pc); | 2509 | unlock_page_cgroup(pc); |
2501 | /* | 2510 | /* |
2502 | * "charge_statistics" updated event counter. Then, check it. | 2511 | * "charge_statistics" updated event counter. Then, check it. |
2503 | * Insert ancestor (and ancestor's ancestors), to softlimit RB-tree. | 2512 | * Insert ancestor (and ancestor's ancestors), to softlimit RB-tree. |
2504 | * if they exceeds softlimit. | 2513 | * if they exceeds softlimit. |
2505 | */ | 2514 | */ |
2506 | memcg_check_events(mem, page); | 2515 | memcg_check_events(memcg, page); |
2507 | } | 2516 | } |
2508 | 2517 | ||
2509 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | 2518 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE |
@@ -2690,7 +2699,7 @@ out: | |||
2690 | static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm, | 2699 | static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm, |
2691 | gfp_t gfp_mask, enum charge_type ctype) | 2700 | gfp_t gfp_mask, enum charge_type ctype) |
2692 | { | 2701 | { |
2693 | struct mem_cgroup *mem = NULL; | 2702 | struct mem_cgroup *memcg = NULL; |
2694 | unsigned int nr_pages = 1; | 2703 | unsigned int nr_pages = 1; |
2695 | struct page_cgroup *pc; | 2704 | struct page_cgroup *pc; |
2696 | bool oom = true; | 2705 | bool oom = true; |
@@ -2709,11 +2718,11 @@ static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm, | |||
2709 | pc = lookup_page_cgroup(page); | 2718 | pc = lookup_page_cgroup(page); |
2710 | BUG_ON(!pc); /* XXX: remove this and move pc lookup into commit */ | 2719 | BUG_ON(!pc); /* XXX: remove this and move pc lookup into commit */ |
2711 | 2720 | ||
2712 | ret = __mem_cgroup_try_charge(mm, gfp_mask, nr_pages, &mem, oom); | 2721 | ret = __mem_cgroup_try_charge(mm, gfp_mask, nr_pages, &memcg, oom); |
2713 | if (ret || !mem) | 2722 | if (ret || !memcg) |
2714 | return ret; | 2723 | return ret; |
2715 | 2724 | ||
2716 | __mem_cgroup_commit_charge(mem, page, nr_pages, pc, ctype); | 2725 | __mem_cgroup_commit_charge(memcg, page, nr_pages, pc, ctype); |
2717 | return 0; | 2726 | return 0; |
2718 | } | 2727 | } |
2719 | 2728 | ||
@@ -2742,7 +2751,7 @@ __mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr, | |||
2742 | enum charge_type ctype); | 2751 | enum charge_type ctype); |
2743 | 2752 | ||
2744 | static void | 2753 | static void |
2745 | __mem_cgroup_commit_charge_lrucare(struct page *page, struct mem_cgroup *mem, | 2754 | __mem_cgroup_commit_charge_lrucare(struct page *page, struct mem_cgroup *memcg, |
2746 | enum charge_type ctype) | 2755 | enum charge_type ctype) |
2747 | { | 2756 | { |
2748 | struct page_cgroup *pc = lookup_page_cgroup(page); | 2757 | struct page_cgroup *pc = lookup_page_cgroup(page); |
@@ -2752,7 +2761,7 @@ __mem_cgroup_commit_charge_lrucare(struct page *page, struct mem_cgroup *mem, | |||
2752 | * LRU. Take care of it. | 2761 | * LRU. Take care of it. |
2753 | */ | 2762 | */ |
2754 | mem_cgroup_lru_del_before_commit(page); | 2763 | mem_cgroup_lru_del_before_commit(page); |
2755 | __mem_cgroup_commit_charge(mem, page, 1, pc, ctype); | 2764 | __mem_cgroup_commit_charge(memcg, page, 1, pc, ctype); |
2756 | mem_cgroup_lru_add_after_commit(page); | 2765 | mem_cgroup_lru_add_after_commit(page); |
2757 | return; | 2766 | return; |
2758 | } | 2767 | } |
@@ -2760,7 +2769,7 @@ __mem_cgroup_commit_charge_lrucare(struct page *page, struct mem_cgroup *mem, | |||
2760 | int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, | 2769 | int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, |
2761 | gfp_t gfp_mask) | 2770 | gfp_t gfp_mask) |
2762 | { | 2771 | { |
2763 | struct mem_cgroup *mem = NULL; | 2772 | struct mem_cgroup *memcg = NULL; |
2764 | int ret; | 2773 | int ret; |
2765 | 2774 | ||
2766 | if (mem_cgroup_disabled()) | 2775 | if (mem_cgroup_disabled()) |
@@ -2772,8 +2781,8 @@ int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, | |||
2772 | mm = &init_mm; | 2781 | mm = &init_mm; |
2773 | 2782 | ||
2774 | if (page_is_file_cache(page)) { | 2783 | if (page_is_file_cache(page)) { |
2775 | ret = __mem_cgroup_try_charge(mm, gfp_mask, 1, &mem, true); | 2784 | ret = __mem_cgroup_try_charge(mm, gfp_mask, 1, &memcg, true); |
2776 | if (ret || !mem) | 2785 | if (ret || !memcg) |
2777 | return ret; | 2786 | return ret; |
2778 | 2787 | ||
2779 | /* | 2788 | /* |
@@ -2781,15 +2790,15 @@ int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, | |||
2781 | * put that would remove them from the LRU list, make | 2790 | * put that would remove them from the LRU list, make |
2782 | * sure that they get relinked properly. | 2791 | * sure that they get relinked properly. |
2783 | */ | 2792 | */ |
2784 | __mem_cgroup_commit_charge_lrucare(page, mem, | 2793 | __mem_cgroup_commit_charge_lrucare(page, memcg, |
2785 | MEM_CGROUP_CHARGE_TYPE_CACHE); | 2794 | MEM_CGROUP_CHARGE_TYPE_CACHE); |
2786 | return ret; | 2795 | return ret; |
2787 | } | 2796 | } |
2788 | /* shmem */ | 2797 | /* shmem */ |
2789 | if (PageSwapCache(page)) { | 2798 | if (PageSwapCache(page)) { |
2790 | ret = mem_cgroup_try_charge_swapin(mm, page, gfp_mask, &mem); | 2799 | ret = mem_cgroup_try_charge_swapin(mm, page, gfp_mask, &memcg); |
2791 | if (!ret) | 2800 | if (!ret) |
2792 | __mem_cgroup_commit_charge_swapin(page, mem, | 2801 | __mem_cgroup_commit_charge_swapin(page, memcg, |
2793 | MEM_CGROUP_CHARGE_TYPE_SHMEM); | 2802 | MEM_CGROUP_CHARGE_TYPE_SHMEM); |
2794 | } else | 2803 | } else |
2795 | ret = mem_cgroup_charge_common(page, mm, gfp_mask, | 2804 | ret = mem_cgroup_charge_common(page, mm, gfp_mask, |
@@ -2808,7 +2817,7 @@ int mem_cgroup_try_charge_swapin(struct mm_struct *mm, | |||
2808 | struct page *page, | 2817 | struct page *page, |
2809 | gfp_t mask, struct mem_cgroup **ptr) | 2818 | gfp_t mask, struct mem_cgroup **ptr) |
2810 | { | 2819 | { |
2811 | struct mem_cgroup *mem; | 2820 | struct mem_cgroup *memcg; |
2812 | int ret; | 2821 | int ret; |
2813 | 2822 | ||
2814 | *ptr = NULL; | 2823 | *ptr = NULL; |
@@ -2826,12 +2835,12 @@ int mem_cgroup_try_charge_swapin(struct mm_struct *mm, | |||
2826 | */ | 2835 | */ |
2827 | if (!PageSwapCache(page)) | 2836 | if (!PageSwapCache(page)) |
2828 | goto charge_cur_mm; | 2837 | goto charge_cur_mm; |
2829 | mem = try_get_mem_cgroup_from_page(page); | 2838 | memcg = try_get_mem_cgroup_from_page(page); |
2830 | if (!mem) | 2839 | if (!memcg) |
2831 | goto charge_cur_mm; | 2840 | goto charge_cur_mm; |
2832 | *ptr = mem; | 2841 | *ptr = memcg; |
2833 | ret = __mem_cgroup_try_charge(NULL, mask, 1, ptr, true); | 2842 | ret = __mem_cgroup_try_charge(NULL, mask, 1, ptr, true); |
2834 | css_put(&mem->css); | 2843 | css_put(&memcg->css); |
2835 | return ret; | 2844 | return ret; |
2836 | charge_cur_mm: | 2845 | charge_cur_mm: |
2837 | if (unlikely(!mm)) | 2846 | if (unlikely(!mm)) |
@@ -2891,16 +2900,16 @@ void mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr) | |||
2891 | MEM_CGROUP_CHARGE_TYPE_MAPPED); | 2900 | MEM_CGROUP_CHARGE_TYPE_MAPPED); |
2892 | } | 2901 | } |
2893 | 2902 | ||
2894 | void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *mem) | 2903 | void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *memcg) |
2895 | { | 2904 | { |
2896 | if (mem_cgroup_disabled()) | 2905 | if (mem_cgroup_disabled()) |
2897 | return; | 2906 | return; |
2898 | if (!mem) | 2907 | if (!memcg) |
2899 | return; | 2908 | return; |
2900 | __mem_cgroup_cancel_charge(mem, 1); | 2909 | __mem_cgroup_cancel_charge(memcg, 1); |
2901 | } | 2910 | } |
2902 | 2911 | ||
2903 | static void mem_cgroup_do_uncharge(struct mem_cgroup *mem, | 2912 | static void mem_cgroup_do_uncharge(struct mem_cgroup *memcg, |
2904 | unsigned int nr_pages, | 2913 | unsigned int nr_pages, |
2905 | const enum charge_type ctype) | 2914 | const enum charge_type ctype) |
2906 | { | 2915 | { |
@@ -2918,7 +2927,7 @@ static void mem_cgroup_do_uncharge(struct mem_cgroup *mem, | |||
2918 | * uncharges. Then, it's ok to ignore memcg's refcnt. | 2927 | * uncharges. Then, it's ok to ignore memcg's refcnt. |
2919 | */ | 2928 | */ |
2920 | if (!batch->memcg) | 2929 | if (!batch->memcg) |
2921 | batch->memcg = mem; | 2930 | batch->memcg = memcg; |
2922 | /* | 2931 | /* |
2923 | * do_batch > 0 when unmapping pages or inode invalidate/truncate. | 2932 | * do_batch > 0 when unmapping pages or inode invalidate/truncate. |
2924 | * In those cases, all pages freed continuously can be expected to be in | 2933 | * In those cases, all pages freed continuously can be expected to be in |
@@ -2938,7 +2947,7 @@ static void mem_cgroup_do_uncharge(struct mem_cgroup *mem, | |||
2938 | * merge a series of uncharges to an uncharge of res_counter. | 2947 | * merge a series of uncharges to an uncharge of res_counter. |
2939 | * If not, we uncharge res_counter ony by one. | 2948 | * If not, we uncharge res_counter ony by one. |
2940 | */ | 2949 | */ |
2941 | if (batch->memcg != mem) | 2950 | if (batch->memcg != memcg) |
2942 | goto direct_uncharge; | 2951 | goto direct_uncharge; |
2943 | /* remember freed charge and uncharge it later */ | 2952 | /* remember freed charge and uncharge it later */ |
2944 | batch->nr_pages++; | 2953 | batch->nr_pages++; |
@@ -2946,11 +2955,11 @@ static void mem_cgroup_do_uncharge(struct mem_cgroup *mem, | |||
2946 | batch->memsw_nr_pages++; | 2955 | batch->memsw_nr_pages++; |
2947 | return; | 2956 | return; |
2948 | direct_uncharge: | 2957 | direct_uncharge: |
2949 | res_counter_uncharge(&mem->res, nr_pages * PAGE_SIZE); | 2958 | res_counter_uncharge(&memcg->res, nr_pages * PAGE_SIZE); |
2950 | if (uncharge_memsw) | 2959 | if (uncharge_memsw) |
2951 | res_counter_uncharge(&mem->memsw, nr_pages * PAGE_SIZE); | 2960 | res_counter_uncharge(&memcg->memsw, nr_pages * PAGE_SIZE); |
2952 | if (unlikely(batch->memcg != mem)) | 2961 | if (unlikely(batch->memcg != memcg)) |
2953 | memcg_oom_recover(mem); | 2962 | memcg_oom_recover(memcg); |
2954 | return; | 2963 | return; |
2955 | } | 2964 | } |
2956 | 2965 | ||
@@ -2960,7 +2969,7 @@ direct_uncharge: | |||
2960 | static struct mem_cgroup * | 2969 | static struct mem_cgroup * |
2961 | __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype) | 2970 | __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype) |
2962 | { | 2971 | { |
2963 | struct mem_cgroup *mem = NULL; | 2972 | struct mem_cgroup *memcg = NULL; |
2964 | unsigned int nr_pages = 1; | 2973 | unsigned int nr_pages = 1; |
2965 | struct page_cgroup *pc; | 2974 | struct page_cgroup *pc; |
2966 | 2975 | ||
@@ -2983,7 +2992,7 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype) | |||
2983 | 2992 | ||
2984 | lock_page_cgroup(pc); | 2993 | lock_page_cgroup(pc); |
2985 | 2994 | ||
2986 | mem = pc->mem_cgroup; | 2995 | memcg = pc->mem_cgroup; |
2987 | 2996 | ||
2988 | if (!PageCgroupUsed(pc)) | 2997 | if (!PageCgroupUsed(pc)) |
2989 | goto unlock_out; | 2998 | goto unlock_out; |
@@ -3006,7 +3015,7 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype) | |||
3006 | break; | 3015 | break; |
3007 | } | 3016 | } |
3008 | 3017 | ||
3009 | mem_cgroup_charge_statistics(mem, PageCgroupCache(pc), -nr_pages); | 3018 | mem_cgroup_charge_statistics(memcg, PageCgroupCache(pc), -nr_pages); |
3010 | 3019 | ||
3011 | ClearPageCgroupUsed(pc); | 3020 | ClearPageCgroupUsed(pc); |
3012 | /* | 3021 | /* |
@@ -3018,18 +3027,18 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype) | |||
3018 | 3027 | ||
3019 | unlock_page_cgroup(pc); | 3028 | unlock_page_cgroup(pc); |
3020 | /* | 3029 | /* |
3021 | * even after unlock, we have mem->res.usage here and this memcg | 3030 | * even after unlock, we have memcg->res.usage here and this memcg |
3022 | * will never be freed. | 3031 | * will never be freed. |
3023 | */ | 3032 | */ |
3024 | memcg_check_events(mem, page); | 3033 | memcg_check_events(memcg, page); |
3025 | if (do_swap_account && ctype == MEM_CGROUP_CHARGE_TYPE_SWAPOUT) { | 3034 | if (do_swap_account && ctype == MEM_CGROUP_CHARGE_TYPE_SWAPOUT) { |
3026 | mem_cgroup_swap_statistics(mem, true); | 3035 | mem_cgroup_swap_statistics(memcg, true); |
3027 | mem_cgroup_get(mem); | 3036 | mem_cgroup_get(memcg); |
3028 | } | 3037 | } |
3029 | if (!mem_cgroup_is_root(mem)) | 3038 | if (!mem_cgroup_is_root(memcg)) |
3030 | mem_cgroup_do_uncharge(mem, nr_pages, ctype); | 3039 | mem_cgroup_do_uncharge(memcg, nr_pages, ctype); |
3031 | 3040 | ||
3032 | return mem; | 3041 | return memcg; |
3033 | 3042 | ||
3034 | unlock_out: | 3043 | unlock_out: |
3035 | unlock_page_cgroup(pc); | 3044 | unlock_page_cgroup(pc); |
@@ -3219,7 +3228,7 @@ static inline int mem_cgroup_move_swap_account(swp_entry_t entry, | |||
3219 | int mem_cgroup_prepare_migration(struct page *page, | 3228 | int mem_cgroup_prepare_migration(struct page *page, |
3220 | struct page *newpage, struct mem_cgroup **ptr, gfp_t gfp_mask) | 3229 | struct page *newpage, struct mem_cgroup **ptr, gfp_t gfp_mask) |
3221 | { | 3230 | { |
3222 | struct mem_cgroup *mem = NULL; | 3231 | struct mem_cgroup *memcg = NULL; |
3223 | struct page_cgroup *pc; | 3232 | struct page_cgroup *pc; |
3224 | enum charge_type ctype; | 3233 | enum charge_type ctype; |
3225 | int ret = 0; | 3234 | int ret = 0; |
@@ -3233,8 +3242,8 @@ int mem_cgroup_prepare_migration(struct page *page, | |||
3233 | pc = lookup_page_cgroup(page); | 3242 | pc = lookup_page_cgroup(page); |
3234 | lock_page_cgroup(pc); | 3243 | lock_page_cgroup(pc); |
3235 | if (PageCgroupUsed(pc)) { | 3244 | if (PageCgroupUsed(pc)) { |
3236 | mem = pc->mem_cgroup; | 3245 | memcg = pc->mem_cgroup; |
3237 | css_get(&mem->css); | 3246 | css_get(&memcg->css); |
3238 | /* | 3247 | /* |
3239 | * At migrating an anonymous page, its mapcount goes down | 3248 | * At migrating an anonymous page, its mapcount goes down |
3240 | * to 0 and uncharge() will be called. But, even if it's fully | 3249 | * to 0 and uncharge() will be called. But, even if it's fully |
@@ -3272,12 +3281,12 @@ int mem_cgroup_prepare_migration(struct page *page, | |||
3272 | * If the page is not charged at this point, | 3281 | * If the page is not charged at this point, |
3273 | * we return here. | 3282 | * we return here. |
3274 | */ | 3283 | */ |
3275 | if (!mem) | 3284 | if (!memcg) |
3276 | return 0; | 3285 | return 0; |
3277 | 3286 | ||
3278 | *ptr = mem; | 3287 | *ptr = memcg; |
3279 | ret = __mem_cgroup_try_charge(NULL, gfp_mask, 1, ptr, false); | 3288 | ret = __mem_cgroup_try_charge(NULL, gfp_mask, 1, ptr, false); |
3280 | css_put(&mem->css);/* drop extra refcnt */ | 3289 | css_put(&memcg->css);/* drop extra refcnt */ |
3281 | if (ret || *ptr == NULL) { | 3290 | if (ret || *ptr == NULL) { |
3282 | if (PageAnon(page)) { | 3291 | if (PageAnon(page)) { |
3283 | lock_page_cgroup(pc); | 3292 | lock_page_cgroup(pc); |
@@ -3303,21 +3312,21 @@ int mem_cgroup_prepare_migration(struct page *page, | |||
3303 | ctype = MEM_CGROUP_CHARGE_TYPE_CACHE; | 3312 | ctype = MEM_CGROUP_CHARGE_TYPE_CACHE; |
3304 | else | 3313 | else |
3305 | ctype = MEM_CGROUP_CHARGE_TYPE_SHMEM; | 3314 | ctype = MEM_CGROUP_CHARGE_TYPE_SHMEM; |
3306 | __mem_cgroup_commit_charge(mem, page, 1, pc, ctype); | 3315 | __mem_cgroup_commit_charge(memcg, page, 1, pc, ctype); |
3307 | return ret; | 3316 | return ret; |
3308 | } | 3317 | } |
3309 | 3318 | ||
3310 | /* remove redundant charge if migration failed*/ | 3319 | /* remove redundant charge if migration failed*/ |
3311 | void mem_cgroup_end_migration(struct mem_cgroup *mem, | 3320 | void mem_cgroup_end_migration(struct mem_cgroup *memcg, |
3312 | struct page *oldpage, struct page *newpage, bool migration_ok) | 3321 | struct page *oldpage, struct page *newpage, bool migration_ok) |
3313 | { | 3322 | { |
3314 | struct page *used, *unused; | 3323 | struct page *used, *unused; |
3315 | struct page_cgroup *pc; | 3324 | struct page_cgroup *pc; |
3316 | 3325 | ||
3317 | if (!mem) | 3326 | if (!memcg) |
3318 | return; | 3327 | return; |
3319 | /* blocks rmdir() */ | 3328 | /* blocks rmdir() */ |
3320 | cgroup_exclude_rmdir(&mem->css); | 3329 | cgroup_exclude_rmdir(&memcg->css); |
3321 | if (!migration_ok) { | 3330 | if (!migration_ok) { |
3322 | used = oldpage; | 3331 | used = oldpage; |
3323 | unused = newpage; | 3332 | unused = newpage; |
@@ -3353,7 +3362,7 @@ void mem_cgroup_end_migration(struct mem_cgroup *mem, | |||
3353 | * So, rmdir()->pre_destroy() can be called while we do this charge. | 3362 | * So, rmdir()->pre_destroy() can be called while we do this charge. |
3354 | * In that case, we need to call pre_destroy() again. check it here. | 3363 | * In that case, we need to call pre_destroy() again. check it here. |
3355 | */ | 3364 | */ |
3356 | cgroup_release_and_wakeup_rmdir(&mem->css); | 3365 | cgroup_release_and_wakeup_rmdir(&memcg->css); |
3357 | } | 3366 | } |
3358 | 3367 | ||
3359 | #ifdef CONFIG_DEBUG_VM | 3368 | #ifdef CONFIG_DEBUG_VM |
@@ -3432,7 +3441,7 @@ static int mem_cgroup_resize_limit(struct mem_cgroup *memcg, | |||
3432 | /* | 3441 | /* |
3433 | * Rather than hide all in some function, I do this in | 3442 | * Rather than hide all in some function, I do this in |
3434 | * open coded manner. You see what this really does. | 3443 | * open coded manner. You see what this really does. |
3435 | * We have to guarantee mem->res.limit < mem->memsw.limit. | 3444 | * We have to guarantee memcg->res.limit < memcg->memsw.limit. |
3436 | */ | 3445 | */ |
3437 | mutex_lock(&set_limit_mutex); | 3446 | mutex_lock(&set_limit_mutex); |
3438 | memswlimit = res_counter_read_u64(&memcg->memsw, RES_LIMIT); | 3447 | memswlimit = res_counter_read_u64(&memcg->memsw, RES_LIMIT); |
@@ -3494,7 +3503,7 @@ static int mem_cgroup_resize_memsw_limit(struct mem_cgroup *memcg, | |||
3494 | /* | 3503 | /* |
3495 | * Rather than hide all in some function, I do this in | 3504 | * Rather than hide all in some function, I do this in |
3496 | * open coded manner. You see what this really does. | 3505 | * open coded manner. You see what this really does. |
3497 | * We have to guarantee mem->res.limit < mem->memsw.limit. | 3506 | * We have to guarantee memcg->res.limit < memcg->memsw.limit. |
3498 | */ | 3507 | */ |
3499 | mutex_lock(&set_limit_mutex); | 3508 | mutex_lock(&set_limit_mutex); |
3500 | memlimit = res_counter_read_u64(&memcg->res, RES_LIMIT); | 3509 | memlimit = res_counter_read_u64(&memcg->res, RES_LIMIT); |
@@ -3632,7 +3641,7 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order, | |||
3632 | * This routine traverse page_cgroup in given list and drop them all. | 3641 | * This routine traverse page_cgroup in given list and drop them all. |
3633 | * *And* this routine doesn't reclaim page itself, just removes page_cgroup. | 3642 | * *And* this routine doesn't reclaim page itself, just removes page_cgroup. |
3634 | */ | 3643 | */ |
3635 | static int mem_cgroup_force_empty_list(struct mem_cgroup *mem, | 3644 | static int mem_cgroup_force_empty_list(struct mem_cgroup *memcg, |
3636 | int node, int zid, enum lru_list lru) | 3645 | int node, int zid, enum lru_list lru) |
3637 | { | 3646 | { |
3638 | struct zone *zone; | 3647 | struct zone *zone; |
@@ -3643,7 +3652,7 @@ static int mem_cgroup_force_empty_list(struct mem_cgroup *mem, | |||
3643 | int ret = 0; | 3652 | int ret = 0; |
3644 | 3653 | ||
3645 | zone = &NODE_DATA(node)->node_zones[zid]; | 3654 | zone = &NODE_DATA(node)->node_zones[zid]; |
3646 | mz = mem_cgroup_zoneinfo(mem, node, zid); | 3655 | mz = mem_cgroup_zoneinfo(memcg, node, zid); |
3647 | list = &mz->lists[lru]; | 3656 | list = &mz->lists[lru]; |
3648 | 3657 | ||
3649 | loop = MEM_CGROUP_ZSTAT(mz, lru); | 3658 | loop = MEM_CGROUP_ZSTAT(mz, lru); |
@@ -3670,7 +3679,7 @@ static int mem_cgroup_force_empty_list(struct mem_cgroup *mem, | |||
3670 | 3679 | ||
3671 | page = lookup_cgroup_page(pc); | 3680 | page = lookup_cgroup_page(pc); |
3672 | 3681 | ||
3673 | ret = mem_cgroup_move_parent(page, pc, mem, GFP_KERNEL); | 3682 | ret = mem_cgroup_move_parent(page, pc, memcg, GFP_KERNEL); |
3674 | if (ret == -ENOMEM) | 3683 | if (ret == -ENOMEM) |
3675 | break; | 3684 | break; |
3676 | 3685 | ||
@@ -3691,14 +3700,14 @@ static int mem_cgroup_force_empty_list(struct mem_cgroup *mem, | |||
3691 | * make mem_cgroup's charge to be 0 if there is no task. | 3700 | * make mem_cgroup's charge to be 0 if there is no task. |
3692 | * This enables deleting this mem_cgroup. | 3701 | * This enables deleting this mem_cgroup. |
3693 | */ | 3702 | */ |
3694 | static int mem_cgroup_force_empty(struct mem_cgroup *mem, bool free_all) | 3703 | static int mem_cgroup_force_empty(struct mem_cgroup *memcg, bool free_all) |
3695 | { | 3704 | { |
3696 | int ret; | 3705 | int ret; |
3697 | int node, zid, shrink; | 3706 | int node, zid, shrink; |
3698 | int nr_retries = MEM_CGROUP_RECLAIM_RETRIES; | 3707 | int nr_retries = MEM_CGROUP_RECLAIM_RETRIES; |
3699 | struct cgroup *cgrp = mem->css.cgroup; | 3708 | struct cgroup *cgrp = memcg->css.cgroup; |
3700 | 3709 | ||
3701 | css_get(&mem->css); | 3710 | css_get(&memcg->css); |
3702 | 3711 | ||
3703 | shrink = 0; | 3712 | shrink = 0; |
3704 | /* should free all ? */ | 3713 | /* should free all ? */ |
@@ -3714,14 +3723,14 @@ move_account: | |||
3714 | goto out; | 3723 | goto out; |
3715 | /* This is for making all *used* pages to be on LRU. */ | 3724 | /* This is for making all *used* pages to be on LRU. */ |
3716 | lru_add_drain_all(); | 3725 | lru_add_drain_all(); |
3717 | drain_all_stock_sync(mem); | 3726 | drain_all_stock_sync(memcg); |
3718 | ret = 0; | 3727 | ret = 0; |
3719 | mem_cgroup_start_move(mem); | 3728 | mem_cgroup_start_move(memcg); |
3720 | for_each_node_state(node, N_HIGH_MEMORY) { | 3729 | for_each_node_state(node, N_HIGH_MEMORY) { |
3721 | for (zid = 0; !ret && zid < MAX_NR_ZONES; zid++) { | 3730 | for (zid = 0; !ret && zid < MAX_NR_ZONES; zid++) { |
3722 | enum lru_list l; | 3731 | enum lru_list l; |
3723 | for_each_lru(l) { | 3732 | for_each_lru(l) { |
3724 | ret = mem_cgroup_force_empty_list(mem, | 3733 | ret = mem_cgroup_force_empty_list(memcg, |
3725 | node, zid, l); | 3734 | node, zid, l); |
3726 | if (ret) | 3735 | if (ret) |
3727 | break; | 3736 | break; |
@@ -3730,16 +3739,16 @@ move_account: | |||
3730 | if (ret) | 3739 | if (ret) |
3731 | break; | 3740 | break; |
3732 | } | 3741 | } |
3733 | mem_cgroup_end_move(mem); | 3742 | mem_cgroup_end_move(memcg); |
3734 | memcg_oom_recover(mem); | 3743 | memcg_oom_recover(memcg); |
3735 | /* it seems parent cgroup doesn't have enough mem */ | 3744 | /* it seems parent cgroup doesn't have enough mem */ |
3736 | if (ret == -ENOMEM) | 3745 | if (ret == -ENOMEM) |
3737 | goto try_to_free; | 3746 | goto try_to_free; |
3738 | cond_resched(); | 3747 | cond_resched(); |
3739 | /* "ret" should also be checked to ensure all lists are empty. */ | 3748 | /* "ret" should also be checked to ensure all lists are empty. */ |
3740 | } while (mem->res.usage > 0 || ret); | 3749 | } while (memcg->res.usage > 0 || ret); |
3741 | out: | 3750 | out: |
3742 | css_put(&mem->css); | 3751 | css_put(&memcg->css); |
3743 | return ret; | 3752 | return ret; |
3744 | 3753 | ||
3745 | try_to_free: | 3754 | try_to_free: |
@@ -3752,14 +3761,14 @@ try_to_free: | |||
3752 | lru_add_drain_all(); | 3761 | lru_add_drain_all(); |
3753 | /* try to free all pages in this cgroup */ | 3762 | /* try to free all pages in this cgroup */ |
3754 | shrink = 1; | 3763 | shrink = 1; |
3755 | while (nr_retries && mem->res.usage > 0) { | 3764 | while (nr_retries && memcg->res.usage > 0) { |
3756 | int progress; | 3765 | int progress; |
3757 | 3766 | ||
3758 | if (signal_pending(current)) { | 3767 | if (signal_pending(current)) { |
3759 | ret = -EINTR; | 3768 | ret = -EINTR; |
3760 | goto out; | 3769 | goto out; |
3761 | } | 3770 | } |
3762 | progress = try_to_free_mem_cgroup_pages(mem, GFP_KERNEL, | 3771 | progress = try_to_free_mem_cgroup_pages(memcg, GFP_KERNEL, |
3763 | false); | 3772 | false); |
3764 | if (!progress) { | 3773 | if (!progress) { |
3765 | nr_retries--; | 3774 | nr_retries--; |
@@ -3788,12 +3797,12 @@ static int mem_cgroup_hierarchy_write(struct cgroup *cont, struct cftype *cft, | |||
3788 | u64 val) | 3797 | u64 val) |
3789 | { | 3798 | { |
3790 | int retval = 0; | 3799 | int retval = 0; |
3791 | struct mem_cgroup *mem = mem_cgroup_from_cont(cont); | 3800 | struct mem_cgroup *memcg = mem_cgroup_from_cont(cont); |
3792 | struct cgroup *parent = cont->parent; | 3801 | struct cgroup *parent = cont->parent; |
3793 | struct mem_cgroup *parent_mem = NULL; | 3802 | struct mem_cgroup *parent_memcg = NULL; |
3794 | 3803 | ||
3795 | if (parent) | 3804 | if (parent) |
3796 | parent_mem = mem_cgroup_from_cont(parent); | 3805 | parent_memcg = mem_cgroup_from_cont(parent); |
3797 | 3806 | ||
3798 | cgroup_lock(); | 3807 | cgroup_lock(); |
3799 | /* | 3808 | /* |
@@ -3804,10 +3813,10 @@ static int mem_cgroup_hierarchy_write(struct cgroup *cont, struct cftype *cft, | |||
3804 | * For the root cgroup, parent_mem is NULL, we allow value to be | 3813 | * For the root cgroup, parent_mem is NULL, we allow value to be |
3805 | * set if there are no children. | 3814 | * set if there are no children. |
3806 | */ | 3815 | */ |
3807 | if ((!parent_mem || !parent_mem->use_hierarchy) && | 3816 | if ((!parent_memcg || !parent_memcg->use_hierarchy) && |
3808 | (val == 1 || val == 0)) { | 3817 | (val == 1 || val == 0)) { |
3809 | if (list_empty(&cont->children)) | 3818 | if (list_empty(&cont->children)) |
3810 | mem->use_hierarchy = val; | 3819 | memcg->use_hierarchy = val; |
3811 | else | 3820 | else |
3812 | retval = -EBUSY; | 3821 | retval = -EBUSY; |
3813 | } else | 3822 | } else |
@@ -3818,14 +3827,14 @@ static int mem_cgroup_hierarchy_write(struct cgroup *cont, struct cftype *cft, | |||
3818 | } | 3827 | } |
3819 | 3828 | ||
3820 | 3829 | ||
3821 | static unsigned long mem_cgroup_recursive_stat(struct mem_cgroup *mem, | 3830 | static unsigned long mem_cgroup_recursive_stat(struct mem_cgroup *memcg, |
3822 | enum mem_cgroup_stat_index idx) | 3831 | enum mem_cgroup_stat_index idx) |
3823 | { | 3832 | { |
3824 | struct mem_cgroup *iter; | 3833 | struct mem_cgroup *iter; |
3825 | long val = 0; | 3834 | long val = 0; |
3826 | 3835 | ||
3827 | /* Per-cpu values can be negative, use a signed accumulator */ | 3836 | /* Per-cpu values can be negative, use a signed accumulator */ |
3828 | for_each_mem_cgroup_tree(iter, mem) | 3837 | for_each_mem_cgroup_tree(iter, memcg) |
3829 | val += mem_cgroup_read_stat(iter, idx); | 3838 | val += mem_cgroup_read_stat(iter, idx); |
3830 | 3839 | ||
3831 | if (val < 0) /* race ? */ | 3840 | if (val < 0) /* race ? */ |
@@ -3833,29 +3842,29 @@ static unsigned long mem_cgroup_recursive_stat(struct mem_cgroup *mem, | |||
3833 | return val; | 3842 | return val; |
3834 | } | 3843 | } |
3835 | 3844 | ||
3836 | static inline u64 mem_cgroup_usage(struct mem_cgroup *mem, bool swap) | 3845 | static inline u64 mem_cgroup_usage(struct mem_cgroup *memcg, bool swap) |
3837 | { | 3846 | { |
3838 | u64 val; | 3847 | u64 val; |
3839 | 3848 | ||
3840 | if (!mem_cgroup_is_root(mem)) { | 3849 | if (!mem_cgroup_is_root(memcg)) { |
3841 | if (!swap) | 3850 | if (!swap) |
3842 | return res_counter_read_u64(&mem->res, RES_USAGE); | 3851 | return res_counter_read_u64(&memcg->res, RES_USAGE); |
3843 | else | 3852 | else |
3844 | return res_counter_read_u64(&mem->memsw, RES_USAGE); | 3853 | return res_counter_read_u64(&memcg->memsw, RES_USAGE); |
3845 | } | 3854 | } |
3846 | 3855 | ||
3847 | val = mem_cgroup_recursive_stat(mem, MEM_CGROUP_STAT_CACHE); | 3856 | val = mem_cgroup_recursive_stat(memcg, MEM_CGROUP_STAT_CACHE); |
3848 | val += mem_cgroup_recursive_stat(mem, MEM_CGROUP_STAT_RSS); | 3857 | val += mem_cgroup_recursive_stat(memcg, MEM_CGROUP_STAT_RSS); |
3849 | 3858 | ||
3850 | if (swap) | 3859 | if (swap) |
3851 | val += mem_cgroup_recursive_stat(mem, MEM_CGROUP_STAT_SWAPOUT); | 3860 | val += mem_cgroup_recursive_stat(memcg, MEM_CGROUP_STAT_SWAPOUT); |
3852 | 3861 | ||
3853 | return val << PAGE_SHIFT; | 3862 | return val << PAGE_SHIFT; |
3854 | } | 3863 | } |
3855 | 3864 | ||
3856 | static u64 mem_cgroup_read(struct cgroup *cont, struct cftype *cft) | 3865 | static u64 mem_cgroup_read(struct cgroup *cont, struct cftype *cft) |
3857 | { | 3866 | { |
3858 | struct mem_cgroup *mem = mem_cgroup_from_cont(cont); | 3867 | struct mem_cgroup *memcg = mem_cgroup_from_cont(cont); |
3859 | u64 val; | 3868 | u64 val; |
3860 | int type, name; | 3869 | int type, name; |
3861 | 3870 | ||
@@ -3864,15 +3873,15 @@ static u64 mem_cgroup_read(struct cgroup *cont, struct cftype *cft) | |||
3864 | switch (type) { | 3873 | switch (type) { |
3865 | case _MEM: | 3874 | case _MEM: |
3866 | if (name == RES_USAGE) | 3875 | if (name == RES_USAGE) |
3867 | val = mem_cgroup_usage(mem, false); | 3876 | val = mem_cgroup_usage(memcg, false); |
3868 | else | 3877 | else |
3869 | val = res_counter_read_u64(&mem->res, name); | 3878 | val = res_counter_read_u64(&memcg->res, name); |
3870 | break; | 3879 | break; |
3871 | case _MEMSWAP: | 3880 | case _MEMSWAP: |
3872 | if (name == RES_USAGE) | 3881 | if (name == RES_USAGE) |
3873 | val = mem_cgroup_usage(mem, true); | 3882 | val = mem_cgroup_usage(memcg, true); |
3874 | else | 3883 | else |
3875 | val = res_counter_read_u64(&mem->memsw, name); | 3884 | val = res_counter_read_u64(&memcg->memsw, name); |
3876 | break; | 3885 | break; |
3877 | default: | 3886 | default: |
3878 | BUG(); | 3887 | BUG(); |
@@ -3960,24 +3969,24 @@ out: | |||
3960 | 3969 | ||
3961 | static int mem_cgroup_reset(struct cgroup *cont, unsigned int event) | 3970 | static int mem_cgroup_reset(struct cgroup *cont, unsigned int event) |
3962 | { | 3971 | { |
3963 | struct mem_cgroup *mem; | 3972 | struct mem_cgroup *memcg; |
3964 | int type, name; | 3973 | int type, name; |
3965 | 3974 | ||
3966 | mem = mem_cgroup_from_cont(cont); | 3975 | memcg = mem_cgroup_from_cont(cont); |
3967 | type = MEMFILE_TYPE(event); | 3976 | type = MEMFILE_TYPE(event); |
3968 | name = MEMFILE_ATTR(event); | 3977 | name = MEMFILE_ATTR(event); |
3969 | switch (name) { | 3978 | switch (name) { |
3970 | case RES_MAX_USAGE: | 3979 | case RES_MAX_USAGE: |
3971 | if (type == _MEM) | 3980 | if (type == _MEM) |
3972 | res_counter_reset_max(&mem->res); | 3981 | res_counter_reset_max(&memcg->res); |
3973 | else | 3982 | else |
3974 | res_counter_reset_max(&mem->memsw); | 3983 | res_counter_reset_max(&memcg->memsw); |
3975 | break; | 3984 | break; |
3976 | case RES_FAILCNT: | 3985 | case RES_FAILCNT: |
3977 | if (type == _MEM) | 3986 | if (type == _MEM) |
3978 | res_counter_reset_failcnt(&mem->res); | 3987 | res_counter_reset_failcnt(&memcg->res); |
3979 | else | 3988 | else |
3980 | res_counter_reset_failcnt(&mem->memsw); | 3989 | res_counter_reset_failcnt(&memcg->memsw); |
3981 | break; | 3990 | break; |
3982 | } | 3991 | } |
3983 | 3992 | ||
@@ -3994,7 +4003,7 @@ static u64 mem_cgroup_move_charge_read(struct cgroup *cgrp, | |||
3994 | static int mem_cgroup_move_charge_write(struct cgroup *cgrp, | 4003 | static int mem_cgroup_move_charge_write(struct cgroup *cgrp, |
3995 | struct cftype *cft, u64 val) | 4004 | struct cftype *cft, u64 val) |
3996 | { | 4005 | { |
3997 | struct mem_cgroup *mem = mem_cgroup_from_cont(cgrp); | 4006 | struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp); |
3998 | 4007 | ||
3999 | if (val >= (1 << NR_MOVE_TYPE)) | 4008 | if (val >= (1 << NR_MOVE_TYPE)) |
4000 | return -EINVAL; | 4009 | return -EINVAL; |
@@ -4004,7 +4013,7 @@ static int mem_cgroup_move_charge_write(struct cgroup *cgrp, | |||
4004 | * inconsistent. | 4013 | * inconsistent. |
4005 | */ | 4014 | */ |
4006 | cgroup_lock(); | 4015 | cgroup_lock(); |
4007 | mem->move_charge_at_immigrate = val; | 4016 | memcg->move_charge_at_immigrate = val; |
4008 | cgroup_unlock(); | 4017 | cgroup_unlock(); |
4009 | 4018 | ||
4010 | return 0; | 4019 | return 0; |
@@ -4061,49 +4070,49 @@ struct { | |||
4061 | 4070 | ||
4062 | 4071 | ||
4063 | static void | 4072 | static void |
4064 | mem_cgroup_get_local_stat(struct mem_cgroup *mem, struct mcs_total_stat *s) | 4073 | mem_cgroup_get_local_stat(struct mem_cgroup *memcg, struct mcs_total_stat *s) |
4065 | { | 4074 | { |
4066 | s64 val; | 4075 | s64 val; |
4067 | 4076 | ||
4068 | /* per cpu stat */ | 4077 | /* per cpu stat */ |
4069 | val = mem_cgroup_read_stat(mem, MEM_CGROUP_STAT_CACHE); | 4078 | val = mem_cgroup_read_stat(memcg, MEM_CGROUP_STAT_CACHE); |
4070 | s->stat[MCS_CACHE] += val * PAGE_SIZE; | 4079 | s->stat[MCS_CACHE] += val * PAGE_SIZE; |
4071 | val = mem_cgroup_read_stat(mem, MEM_CGROUP_STAT_RSS); | 4080 | val = mem_cgroup_read_stat(memcg, MEM_CGROUP_STAT_RSS); |
4072 | s->stat[MCS_RSS] += val * PAGE_SIZE; | 4081 | s->stat[MCS_RSS] += val * PAGE_SIZE; |
4073 | val = mem_cgroup_read_stat(mem, MEM_CGROUP_STAT_FILE_MAPPED); | 4082 | val = mem_cgroup_read_stat(memcg, MEM_CGROUP_STAT_FILE_MAPPED); |
4074 | s->stat[MCS_FILE_MAPPED] += val * PAGE_SIZE; | 4083 | s->stat[MCS_FILE_MAPPED] += val * PAGE_SIZE; |
4075 | val = mem_cgroup_read_events(mem, MEM_CGROUP_EVENTS_PGPGIN); | 4084 | val = mem_cgroup_read_events(memcg, MEM_CGROUP_EVENTS_PGPGIN); |
4076 | s->stat[MCS_PGPGIN] += val; | 4085 | s->stat[MCS_PGPGIN] += val; |
4077 | val = mem_cgroup_read_events(mem, MEM_CGROUP_EVENTS_PGPGOUT); | 4086 | val = mem_cgroup_read_events(memcg, MEM_CGROUP_EVENTS_PGPGOUT); |
4078 | s->stat[MCS_PGPGOUT] += val; | 4087 | s->stat[MCS_PGPGOUT] += val; |
4079 | if (do_swap_account) { | 4088 | if (do_swap_account) { |
4080 | val = mem_cgroup_read_stat(mem, MEM_CGROUP_STAT_SWAPOUT); | 4089 | val = mem_cgroup_read_stat(memcg, MEM_CGROUP_STAT_SWAPOUT); |
4081 | s->stat[MCS_SWAP] += val * PAGE_SIZE; | 4090 | s->stat[MCS_SWAP] += val * PAGE_SIZE; |
4082 | } | 4091 | } |
4083 | val = mem_cgroup_read_events(mem, MEM_CGROUP_EVENTS_PGFAULT); | 4092 | val = mem_cgroup_read_events(memcg, MEM_CGROUP_EVENTS_PGFAULT); |
4084 | s->stat[MCS_PGFAULT] += val; | 4093 | s->stat[MCS_PGFAULT] += val; |
4085 | val = mem_cgroup_read_events(mem, MEM_CGROUP_EVENTS_PGMAJFAULT); | 4094 | val = mem_cgroup_read_events(memcg, MEM_CGROUP_EVENTS_PGMAJFAULT); |
4086 | s->stat[MCS_PGMAJFAULT] += val; | 4095 | s->stat[MCS_PGMAJFAULT] += val; |
4087 | 4096 | ||
4088 | /* per zone stat */ | 4097 | /* per zone stat */ |
4089 | val = mem_cgroup_nr_lru_pages(mem, BIT(LRU_INACTIVE_ANON)); | 4098 | val = mem_cgroup_nr_lru_pages(memcg, BIT(LRU_INACTIVE_ANON)); |
4090 | s->stat[MCS_INACTIVE_ANON] += val * PAGE_SIZE; | 4099 | s->stat[MCS_INACTIVE_ANON] += val * PAGE_SIZE; |
4091 | val = mem_cgroup_nr_lru_pages(mem, BIT(LRU_ACTIVE_ANON)); | 4100 | val = mem_cgroup_nr_lru_pages(memcg, BIT(LRU_ACTIVE_ANON)); |
4092 | s->stat[MCS_ACTIVE_ANON] += val * PAGE_SIZE; | 4101 | s->stat[MCS_ACTIVE_ANON] += val * PAGE_SIZE; |
4093 | val = mem_cgroup_nr_lru_pages(mem, BIT(LRU_INACTIVE_FILE)); | 4102 | val = mem_cgroup_nr_lru_pages(memcg, BIT(LRU_INACTIVE_FILE)); |
4094 | s->stat[MCS_INACTIVE_FILE] += val * PAGE_SIZE; | 4103 | s->stat[MCS_INACTIVE_FILE] += val * PAGE_SIZE; |
4095 | val = mem_cgroup_nr_lru_pages(mem, BIT(LRU_ACTIVE_FILE)); | 4104 | val = mem_cgroup_nr_lru_pages(memcg, BIT(LRU_ACTIVE_FILE)); |
4096 | s->stat[MCS_ACTIVE_FILE] += val * PAGE_SIZE; | 4105 | s->stat[MCS_ACTIVE_FILE] += val * PAGE_SIZE; |
4097 | val = mem_cgroup_nr_lru_pages(mem, BIT(LRU_UNEVICTABLE)); | 4106 | val = mem_cgroup_nr_lru_pages(memcg, BIT(LRU_UNEVICTABLE)); |
4098 | s->stat[MCS_UNEVICTABLE] += val * PAGE_SIZE; | 4107 | s->stat[MCS_UNEVICTABLE] += val * PAGE_SIZE; |
4099 | } | 4108 | } |
4100 | 4109 | ||
4101 | static void | 4110 | static void |
4102 | mem_cgroup_get_total_stat(struct mem_cgroup *mem, struct mcs_total_stat *s) | 4111 | mem_cgroup_get_total_stat(struct mem_cgroup *memcg, struct mcs_total_stat *s) |
4103 | { | 4112 | { |
4104 | struct mem_cgroup *iter; | 4113 | struct mem_cgroup *iter; |
4105 | 4114 | ||
4106 | for_each_mem_cgroup_tree(iter, mem) | 4115 | for_each_mem_cgroup_tree(iter, memcg) |
4107 | mem_cgroup_get_local_stat(iter, s); | 4116 | mem_cgroup_get_local_stat(iter, s); |
4108 | } | 4117 | } |
4109 | 4118 | ||
@@ -4189,8 +4198,6 @@ static int mem_control_stat_show(struct cgroup *cont, struct cftype *cft, | |||
4189 | } | 4198 | } |
4190 | 4199 | ||
4191 | #ifdef CONFIG_DEBUG_VM | 4200 | #ifdef CONFIG_DEBUG_VM |
4192 | cb->fill(cb, "inactive_ratio", calc_inactive_ratio(mem_cont, NULL)); | ||
4193 | |||
4194 | { | 4201 | { |
4195 | int nid, zid; | 4202 | int nid, zid; |
4196 | struct mem_cgroup_per_zone *mz; | 4203 | struct mem_cgroup_per_zone *mz; |
@@ -4327,20 +4334,20 @@ static int compare_thresholds(const void *a, const void *b) | |||
4327 | return _a->threshold - _b->threshold; | 4334 | return _a->threshold - _b->threshold; |
4328 | } | 4335 | } |
4329 | 4336 | ||
4330 | static int mem_cgroup_oom_notify_cb(struct mem_cgroup *mem) | 4337 | static int mem_cgroup_oom_notify_cb(struct mem_cgroup *memcg) |
4331 | { | 4338 | { |
4332 | struct mem_cgroup_eventfd_list *ev; | 4339 | struct mem_cgroup_eventfd_list *ev; |
4333 | 4340 | ||
4334 | list_for_each_entry(ev, &mem->oom_notify, list) | 4341 | list_for_each_entry(ev, &memcg->oom_notify, list) |
4335 | eventfd_signal(ev->eventfd, 1); | 4342 | eventfd_signal(ev->eventfd, 1); |
4336 | return 0; | 4343 | return 0; |
4337 | } | 4344 | } |
4338 | 4345 | ||
4339 | static void mem_cgroup_oom_notify(struct mem_cgroup *mem) | 4346 | static void mem_cgroup_oom_notify(struct mem_cgroup *memcg) |
4340 | { | 4347 | { |
4341 | struct mem_cgroup *iter; | 4348 | struct mem_cgroup *iter; |
4342 | 4349 | ||
4343 | for_each_mem_cgroup_tree(iter, mem) | 4350 | for_each_mem_cgroup_tree(iter, memcg) |
4344 | mem_cgroup_oom_notify_cb(iter); | 4351 | mem_cgroup_oom_notify_cb(iter); |
4345 | } | 4352 | } |
4346 | 4353 | ||
@@ -4530,7 +4537,7 @@ static int mem_cgroup_oom_register_event(struct cgroup *cgrp, | |||
4530 | static void mem_cgroup_oom_unregister_event(struct cgroup *cgrp, | 4537 | static void mem_cgroup_oom_unregister_event(struct cgroup *cgrp, |
4531 | struct cftype *cft, struct eventfd_ctx *eventfd) | 4538 | struct cftype *cft, struct eventfd_ctx *eventfd) |
4532 | { | 4539 | { |
4533 | struct mem_cgroup *mem = mem_cgroup_from_cont(cgrp); | 4540 | struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp); |
4534 | struct mem_cgroup_eventfd_list *ev, *tmp; | 4541 | struct mem_cgroup_eventfd_list *ev, *tmp; |
4535 | int type = MEMFILE_TYPE(cft->private); | 4542 | int type = MEMFILE_TYPE(cft->private); |
4536 | 4543 | ||
@@ -4538,7 +4545,7 @@ static void mem_cgroup_oom_unregister_event(struct cgroup *cgrp, | |||
4538 | 4545 | ||
4539 | spin_lock(&memcg_oom_lock); | 4546 | spin_lock(&memcg_oom_lock); |
4540 | 4547 | ||
4541 | list_for_each_entry_safe(ev, tmp, &mem->oom_notify, list) { | 4548 | list_for_each_entry_safe(ev, tmp, &memcg->oom_notify, list) { |
4542 | if (ev->eventfd == eventfd) { | 4549 | if (ev->eventfd == eventfd) { |
4543 | list_del(&ev->list); | 4550 | list_del(&ev->list); |
4544 | kfree(ev); | 4551 | kfree(ev); |
@@ -4551,11 +4558,11 @@ static void mem_cgroup_oom_unregister_event(struct cgroup *cgrp, | |||
4551 | static int mem_cgroup_oom_control_read(struct cgroup *cgrp, | 4558 | static int mem_cgroup_oom_control_read(struct cgroup *cgrp, |
4552 | struct cftype *cft, struct cgroup_map_cb *cb) | 4559 | struct cftype *cft, struct cgroup_map_cb *cb) |
4553 | { | 4560 | { |
4554 | struct mem_cgroup *mem = mem_cgroup_from_cont(cgrp); | 4561 | struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp); |
4555 | 4562 | ||
4556 | cb->fill(cb, "oom_kill_disable", mem->oom_kill_disable); | 4563 | cb->fill(cb, "oom_kill_disable", memcg->oom_kill_disable); |
4557 | 4564 | ||
4558 | if (atomic_read(&mem->under_oom)) | 4565 | if (atomic_read(&memcg->under_oom)) |
4559 | cb->fill(cb, "under_oom", 1); | 4566 | cb->fill(cb, "under_oom", 1); |
4560 | else | 4567 | else |
4561 | cb->fill(cb, "under_oom", 0); | 4568 | cb->fill(cb, "under_oom", 0); |
@@ -4565,7 +4572,7 @@ static int mem_cgroup_oom_control_read(struct cgroup *cgrp, | |||
4565 | static int mem_cgroup_oom_control_write(struct cgroup *cgrp, | 4572 | static int mem_cgroup_oom_control_write(struct cgroup *cgrp, |
4566 | struct cftype *cft, u64 val) | 4573 | struct cftype *cft, u64 val) |
4567 | { | 4574 | { |
4568 | struct mem_cgroup *mem = mem_cgroup_from_cont(cgrp); | 4575 | struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp); |
4569 | struct mem_cgroup *parent; | 4576 | struct mem_cgroup *parent; |
4570 | 4577 | ||
4571 | /* cannot set to root cgroup and only 0 and 1 are allowed */ | 4578 | /* cannot set to root cgroup and only 0 and 1 are allowed */ |
@@ -4577,13 +4584,13 @@ static int mem_cgroup_oom_control_write(struct cgroup *cgrp, | |||
4577 | cgroup_lock(); | 4584 | cgroup_lock(); |
4578 | /* oom-kill-disable is a flag for subhierarchy. */ | 4585 | /* oom-kill-disable is a flag for subhierarchy. */ |
4579 | if ((parent->use_hierarchy) || | 4586 | if ((parent->use_hierarchy) || |
4580 | (mem->use_hierarchy && !list_empty(&cgrp->children))) { | 4587 | (memcg->use_hierarchy && !list_empty(&cgrp->children))) { |
4581 | cgroup_unlock(); | 4588 | cgroup_unlock(); |
4582 | return -EINVAL; | 4589 | return -EINVAL; |
4583 | } | 4590 | } |
4584 | mem->oom_kill_disable = val; | 4591 | memcg->oom_kill_disable = val; |
4585 | if (!val) | 4592 | if (!val) |
4586 | memcg_oom_recover(mem); | 4593 | memcg_oom_recover(memcg); |
4587 | cgroup_unlock(); | 4594 | cgroup_unlock(); |
4588 | return 0; | 4595 | return 0; |
4589 | } | 4596 | } |
@@ -4719,7 +4726,7 @@ static int register_memsw_files(struct cgroup *cont, struct cgroup_subsys *ss) | |||
4719 | } | 4726 | } |
4720 | #endif | 4727 | #endif |
4721 | 4728 | ||
4722 | static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *mem, int node) | 4729 | static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *memcg, int node) |
4723 | { | 4730 | { |
4724 | struct mem_cgroup_per_node *pn; | 4731 | struct mem_cgroup_per_node *pn; |
4725 | struct mem_cgroup_per_zone *mz; | 4732 | struct mem_cgroup_per_zone *mz; |
@@ -4739,21 +4746,21 @@ static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *mem, int node) | |||
4739 | if (!pn) | 4746 | if (!pn) |
4740 | return 1; | 4747 | return 1; |
4741 | 4748 | ||
4742 | mem->info.nodeinfo[node] = pn; | ||
4743 | for (zone = 0; zone < MAX_NR_ZONES; zone++) { | 4749 | for (zone = 0; zone < MAX_NR_ZONES; zone++) { |
4744 | mz = &pn->zoneinfo[zone]; | 4750 | mz = &pn->zoneinfo[zone]; |
4745 | for_each_lru(l) | 4751 | for_each_lru(l) |
4746 | INIT_LIST_HEAD(&mz->lists[l]); | 4752 | INIT_LIST_HEAD(&mz->lists[l]); |
4747 | mz->usage_in_excess = 0; | 4753 | mz->usage_in_excess = 0; |
4748 | mz->on_tree = false; | 4754 | mz->on_tree = false; |
4749 | mz->mem = mem; | 4755 | mz->mem = memcg; |
4750 | } | 4756 | } |
4757 | memcg->info.nodeinfo[node] = pn; | ||
4751 | return 0; | 4758 | return 0; |
4752 | } | 4759 | } |
4753 | 4760 | ||
4754 | static void free_mem_cgroup_per_zone_info(struct mem_cgroup *mem, int node) | 4761 | static void free_mem_cgroup_per_zone_info(struct mem_cgroup *memcg, int node) |
4755 | { | 4762 | { |
4756 | kfree(mem->info.nodeinfo[node]); | 4763 | kfree(memcg->info.nodeinfo[node]); |
4757 | } | 4764 | } |
4758 | 4765 | ||
4759 | static struct mem_cgroup *mem_cgroup_alloc(void) | 4766 | static struct mem_cgroup *mem_cgroup_alloc(void) |
@@ -4795,51 +4802,51 @@ out_free: | |||
4795 | * Removal of cgroup itself succeeds regardless of refs from swap. | 4802 | * Removal of cgroup itself succeeds regardless of refs from swap. |
4796 | */ | 4803 | */ |
4797 | 4804 | ||
4798 | static void __mem_cgroup_free(struct mem_cgroup *mem) | 4805 | static void __mem_cgroup_free(struct mem_cgroup *memcg) |
4799 | { | 4806 | { |
4800 | int node; | 4807 | int node; |
4801 | 4808 | ||
4802 | mem_cgroup_remove_from_trees(mem); | 4809 | mem_cgroup_remove_from_trees(memcg); |
4803 | free_css_id(&mem_cgroup_subsys, &mem->css); | 4810 | free_css_id(&mem_cgroup_subsys, &memcg->css); |
4804 | 4811 | ||
4805 | for_each_node_state(node, N_POSSIBLE) | 4812 | for_each_node_state(node, N_POSSIBLE) |
4806 | free_mem_cgroup_per_zone_info(mem, node); | 4813 | free_mem_cgroup_per_zone_info(memcg, node); |
4807 | 4814 | ||
4808 | free_percpu(mem->stat); | 4815 | free_percpu(memcg->stat); |
4809 | if (sizeof(struct mem_cgroup) < PAGE_SIZE) | 4816 | if (sizeof(struct mem_cgroup) < PAGE_SIZE) |
4810 | kfree(mem); | 4817 | kfree(memcg); |
4811 | else | 4818 | else |
4812 | vfree(mem); | 4819 | vfree(memcg); |
4813 | } | 4820 | } |
4814 | 4821 | ||
4815 | static void mem_cgroup_get(struct mem_cgroup *mem) | 4822 | static void mem_cgroup_get(struct mem_cgroup *memcg) |
4816 | { | 4823 | { |
4817 | atomic_inc(&mem->refcnt); | 4824 | atomic_inc(&memcg->refcnt); |
4818 | } | 4825 | } |
4819 | 4826 | ||
4820 | static void __mem_cgroup_put(struct mem_cgroup *mem, int count) | 4827 | static void __mem_cgroup_put(struct mem_cgroup *memcg, int count) |
4821 | { | 4828 | { |
4822 | if (atomic_sub_and_test(count, &mem->refcnt)) { | 4829 | if (atomic_sub_and_test(count, &memcg->refcnt)) { |
4823 | struct mem_cgroup *parent = parent_mem_cgroup(mem); | 4830 | struct mem_cgroup *parent = parent_mem_cgroup(memcg); |
4824 | __mem_cgroup_free(mem); | 4831 | __mem_cgroup_free(memcg); |
4825 | if (parent) | 4832 | if (parent) |
4826 | mem_cgroup_put(parent); | 4833 | mem_cgroup_put(parent); |
4827 | } | 4834 | } |
4828 | } | 4835 | } |
4829 | 4836 | ||
4830 | static void mem_cgroup_put(struct mem_cgroup *mem) | 4837 | static void mem_cgroup_put(struct mem_cgroup *memcg) |
4831 | { | 4838 | { |
4832 | __mem_cgroup_put(mem, 1); | 4839 | __mem_cgroup_put(memcg, 1); |
4833 | } | 4840 | } |
4834 | 4841 | ||
4835 | /* | 4842 | /* |
4836 | * Returns the parent mem_cgroup in memcgroup hierarchy with hierarchy enabled. | 4843 | * Returns the parent mem_cgroup in memcgroup hierarchy with hierarchy enabled. |
4837 | */ | 4844 | */ |
4838 | static struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *mem) | 4845 | static struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg) |
4839 | { | 4846 | { |
4840 | if (!mem->res.parent) | 4847 | if (!memcg->res.parent) |
4841 | return NULL; | 4848 | return NULL; |
4842 | return mem_cgroup_from_res_counter(mem->res.parent, res); | 4849 | return mem_cgroup_from_res_counter(memcg->res.parent, res); |
4843 | } | 4850 | } |
4844 | 4851 | ||
4845 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP | 4852 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP |
@@ -4882,16 +4889,16 @@ static int mem_cgroup_soft_limit_tree_init(void) | |||
4882 | static struct cgroup_subsys_state * __ref | 4889 | static struct cgroup_subsys_state * __ref |
4883 | mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont) | 4890 | mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont) |
4884 | { | 4891 | { |
4885 | struct mem_cgroup *mem, *parent; | 4892 | struct mem_cgroup *memcg, *parent; |
4886 | long error = -ENOMEM; | 4893 | long error = -ENOMEM; |
4887 | int node; | 4894 | int node; |
4888 | 4895 | ||
4889 | mem = mem_cgroup_alloc(); | 4896 | memcg = mem_cgroup_alloc(); |
4890 | if (!mem) | 4897 | if (!memcg) |
4891 | return ERR_PTR(error); | 4898 | return ERR_PTR(error); |
4892 | 4899 | ||
4893 | for_each_node_state(node, N_POSSIBLE) | 4900 | for_each_node_state(node, N_POSSIBLE) |
4894 | if (alloc_mem_cgroup_per_zone_info(mem, node)) | 4901 | if (alloc_mem_cgroup_per_zone_info(memcg, node)) |
4895 | goto free_out; | 4902 | goto free_out; |
4896 | 4903 | ||
4897 | /* root ? */ | 4904 | /* root ? */ |
@@ -4899,7 +4906,7 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont) | |||
4899 | int cpu; | 4906 | int cpu; |
4900 | enable_swap_cgroup(); | 4907 | enable_swap_cgroup(); |
4901 | parent = NULL; | 4908 | parent = NULL; |
4902 | root_mem_cgroup = mem; | 4909 | root_mem_cgroup = memcg; |
4903 | if (mem_cgroup_soft_limit_tree_init()) | 4910 | if (mem_cgroup_soft_limit_tree_init()) |
4904 | goto free_out; | 4911 | goto free_out; |
4905 | for_each_possible_cpu(cpu) { | 4912 | for_each_possible_cpu(cpu) { |
@@ -4910,13 +4917,13 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont) | |||
4910 | hotcpu_notifier(memcg_cpu_hotplug_callback, 0); | 4917 | hotcpu_notifier(memcg_cpu_hotplug_callback, 0); |
4911 | } else { | 4918 | } else { |
4912 | parent = mem_cgroup_from_cont(cont->parent); | 4919 | parent = mem_cgroup_from_cont(cont->parent); |
4913 | mem->use_hierarchy = parent->use_hierarchy; | 4920 | memcg->use_hierarchy = parent->use_hierarchy; |
4914 | mem->oom_kill_disable = parent->oom_kill_disable; | 4921 | memcg->oom_kill_disable = parent->oom_kill_disable; |
4915 | } | 4922 | } |
4916 | 4923 | ||
4917 | if (parent && parent->use_hierarchy) { | 4924 | if (parent && parent->use_hierarchy) { |
4918 | res_counter_init(&mem->res, &parent->res); | 4925 | res_counter_init(&memcg->res, &parent->res); |
4919 | res_counter_init(&mem->memsw, &parent->memsw); | 4926 | res_counter_init(&memcg->memsw, &parent->memsw); |
4920 | /* | 4927 | /* |
4921 | * We increment refcnt of the parent to ensure that we can | 4928 | * We increment refcnt of the parent to ensure that we can |
4922 | * safely access it on res_counter_charge/uncharge. | 4929 | * safely access it on res_counter_charge/uncharge. |
@@ -4925,21 +4932,21 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont) | |||
4925 | */ | 4932 | */ |
4926 | mem_cgroup_get(parent); | 4933 | mem_cgroup_get(parent); |
4927 | } else { | 4934 | } else { |
4928 | res_counter_init(&mem->res, NULL); | 4935 | res_counter_init(&memcg->res, NULL); |
4929 | res_counter_init(&mem->memsw, NULL); | 4936 | res_counter_init(&memcg->memsw, NULL); |
4930 | } | 4937 | } |
4931 | mem->last_scanned_child = 0; | 4938 | memcg->last_scanned_child = 0; |
4932 | mem->last_scanned_node = MAX_NUMNODES; | 4939 | memcg->last_scanned_node = MAX_NUMNODES; |
4933 | INIT_LIST_HEAD(&mem->oom_notify); | 4940 | INIT_LIST_HEAD(&memcg->oom_notify); |
4934 | 4941 | ||
4935 | if (parent) | 4942 | if (parent) |
4936 | mem->swappiness = mem_cgroup_swappiness(parent); | 4943 | memcg->swappiness = mem_cgroup_swappiness(parent); |
4937 | atomic_set(&mem->refcnt, 1); | 4944 | atomic_set(&memcg->refcnt, 1); |
4938 | mem->move_charge_at_immigrate = 0; | 4945 | memcg->move_charge_at_immigrate = 0; |
4939 | mutex_init(&mem->thresholds_lock); | 4946 | mutex_init(&memcg->thresholds_lock); |
4940 | return &mem->css; | 4947 | return &memcg->css; |
4941 | free_out: | 4948 | free_out: |
4942 | __mem_cgroup_free(mem); | 4949 | __mem_cgroup_free(memcg); |
4943 | root_mem_cgroup = NULL; | 4950 | root_mem_cgroup = NULL; |
4944 | return ERR_PTR(error); | 4951 | return ERR_PTR(error); |
4945 | } | 4952 | } |
@@ -4947,17 +4954,17 @@ free_out: | |||
4947 | static int mem_cgroup_pre_destroy(struct cgroup_subsys *ss, | 4954 | static int mem_cgroup_pre_destroy(struct cgroup_subsys *ss, |
4948 | struct cgroup *cont) | 4955 | struct cgroup *cont) |
4949 | { | 4956 | { |
4950 | struct mem_cgroup *mem = mem_cgroup_from_cont(cont); | 4957 | struct mem_cgroup *memcg = mem_cgroup_from_cont(cont); |
4951 | 4958 | ||
4952 | return mem_cgroup_force_empty(mem, false); | 4959 | return mem_cgroup_force_empty(memcg, false); |
4953 | } | 4960 | } |
4954 | 4961 | ||
4955 | static void mem_cgroup_destroy(struct cgroup_subsys *ss, | 4962 | static void mem_cgroup_destroy(struct cgroup_subsys *ss, |
4956 | struct cgroup *cont) | 4963 | struct cgroup *cont) |
4957 | { | 4964 | { |
4958 | struct mem_cgroup *mem = mem_cgroup_from_cont(cont); | 4965 | struct mem_cgroup *memcg = mem_cgroup_from_cont(cont); |
4959 | 4966 | ||
4960 | mem_cgroup_put(mem); | 4967 | mem_cgroup_put(memcg); |
4961 | } | 4968 | } |
4962 | 4969 | ||
4963 | static int mem_cgroup_populate(struct cgroup_subsys *ss, | 4970 | static int mem_cgroup_populate(struct cgroup_subsys *ss, |
@@ -4980,9 +4987,9 @@ static int mem_cgroup_do_precharge(unsigned long count) | |||
4980 | { | 4987 | { |
4981 | int ret = 0; | 4988 | int ret = 0; |
4982 | int batch_count = PRECHARGE_COUNT_AT_ONCE; | 4989 | int batch_count = PRECHARGE_COUNT_AT_ONCE; |
4983 | struct mem_cgroup *mem = mc.to; | 4990 | struct mem_cgroup *memcg = mc.to; |
4984 | 4991 | ||
4985 | if (mem_cgroup_is_root(mem)) { | 4992 | if (mem_cgroup_is_root(memcg)) { |
4986 | mc.precharge += count; | 4993 | mc.precharge += count; |
4987 | /* we don't need css_get for root */ | 4994 | /* we don't need css_get for root */ |
4988 | return ret; | 4995 | return ret; |
@@ -4991,16 +4998,16 @@ static int mem_cgroup_do_precharge(unsigned long count) | |||
4991 | if (count > 1) { | 4998 | if (count > 1) { |
4992 | struct res_counter *dummy; | 4999 | struct res_counter *dummy; |
4993 | /* | 5000 | /* |
4994 | * "mem" cannot be under rmdir() because we've already checked | 5001 | * "memcg" cannot be under rmdir() because we've already checked |
4995 | * by cgroup_lock_live_cgroup() that it is not removed and we | 5002 | * by cgroup_lock_live_cgroup() that it is not removed and we |
4996 | * are still under the same cgroup_mutex. So we can postpone | 5003 | * are still under the same cgroup_mutex. So we can postpone |
4997 | * css_get(). | 5004 | * css_get(). |
4998 | */ | 5005 | */ |
4999 | if (res_counter_charge(&mem->res, PAGE_SIZE * count, &dummy)) | 5006 | if (res_counter_charge(&memcg->res, PAGE_SIZE * count, &dummy)) |
5000 | goto one_by_one; | 5007 | goto one_by_one; |
5001 | if (do_swap_account && res_counter_charge(&mem->memsw, | 5008 | if (do_swap_account && res_counter_charge(&memcg->memsw, |
5002 | PAGE_SIZE * count, &dummy)) { | 5009 | PAGE_SIZE * count, &dummy)) { |
5003 | res_counter_uncharge(&mem->res, PAGE_SIZE * count); | 5010 | res_counter_uncharge(&memcg->res, PAGE_SIZE * count); |
5004 | goto one_by_one; | 5011 | goto one_by_one; |
5005 | } | 5012 | } |
5006 | mc.precharge += count; | 5013 | mc.precharge += count; |
@@ -5017,8 +5024,9 @@ one_by_one: | |||
5017 | batch_count = PRECHARGE_COUNT_AT_ONCE; | 5024 | batch_count = PRECHARGE_COUNT_AT_ONCE; |
5018 | cond_resched(); | 5025 | cond_resched(); |
5019 | } | 5026 | } |
5020 | ret = __mem_cgroup_try_charge(NULL, GFP_KERNEL, 1, &mem, false); | 5027 | ret = __mem_cgroup_try_charge(NULL, |
5021 | if (ret || !mem) | 5028 | GFP_KERNEL, 1, &memcg, false); |
5029 | if (ret || !memcg) | ||
5022 | /* mem_cgroup_clear_mc() will do uncharge later */ | 5030 | /* mem_cgroup_clear_mc() will do uncharge later */ |
5023 | return -ENOMEM; | 5031 | return -ENOMEM; |
5024 | mc.precharge++; | 5032 | mc.precharge++; |
@@ -5292,13 +5300,13 @@ static int mem_cgroup_can_attach(struct cgroup_subsys *ss, | |||
5292 | struct task_struct *p) | 5300 | struct task_struct *p) |
5293 | { | 5301 | { |
5294 | int ret = 0; | 5302 | int ret = 0; |
5295 | struct mem_cgroup *mem = mem_cgroup_from_cont(cgroup); | 5303 | struct mem_cgroup *memcg = mem_cgroup_from_cont(cgroup); |
5296 | 5304 | ||
5297 | if (mem->move_charge_at_immigrate) { | 5305 | if (memcg->move_charge_at_immigrate) { |
5298 | struct mm_struct *mm; | 5306 | struct mm_struct *mm; |
5299 | struct mem_cgroup *from = mem_cgroup_from_task(p); | 5307 | struct mem_cgroup *from = mem_cgroup_from_task(p); |
5300 | 5308 | ||
5301 | VM_BUG_ON(from == mem); | 5309 | VM_BUG_ON(from == memcg); |
5302 | 5310 | ||
5303 | mm = get_task_mm(p); | 5311 | mm = get_task_mm(p); |
5304 | if (!mm) | 5312 | if (!mm) |
@@ -5313,7 +5321,7 @@ static int mem_cgroup_can_attach(struct cgroup_subsys *ss, | |||
5313 | mem_cgroup_start_move(from); | 5321 | mem_cgroup_start_move(from); |
5314 | spin_lock(&mc.lock); | 5322 | spin_lock(&mc.lock); |
5315 | mc.from = from; | 5323 | mc.from = from; |
5316 | mc.to = mem; | 5324 | mc.to = memcg; |
5317 | spin_unlock(&mc.lock); | 5325 | spin_unlock(&mc.lock); |
5318 | /* We set mc.moving_task later */ | 5326 | /* We set mc.moving_task later */ |
5319 | 5327 | ||
diff --git a/mm/memory.c b/mm/memory.c index a56e3ba816b2..b2b87315cdc6 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -1503,7 +1503,7 @@ split_fallthrough: | |||
1503 | } | 1503 | } |
1504 | 1504 | ||
1505 | if (flags & FOLL_GET) | 1505 | if (flags & FOLL_GET) |
1506 | get_page(page); | 1506 | get_page_foll(page); |
1507 | if (flags & FOLL_TOUCH) { | 1507 | if (flags & FOLL_TOUCH) { |
1508 | if ((flags & FOLL_WRITE) && | 1508 | if ((flags & FOLL_WRITE) && |
1509 | !pte_dirty(pte) && !PageDirty(page)) | 1509 | !pte_dirty(pte) && !PageDirty(page)) |
diff --git a/mm/page_cgroup.c b/mm/page_cgroup.c index 6bdc67dbbc28..2d123f94a8df 100644 --- a/mm/page_cgroup.c +++ b/mm/page_cgroup.c | |||
@@ -133,10 +133,13 @@ struct page *lookup_cgroup_page(struct page_cgroup *pc) | |||
133 | static void *__meminit alloc_page_cgroup(size_t size, int nid) | 133 | static void *__meminit alloc_page_cgroup(size_t size, int nid) |
134 | { | 134 | { |
135 | void *addr = NULL; | 135 | void *addr = NULL; |
136 | gfp_t flags = GFP_KERNEL | __GFP_NOWARN; | ||
136 | 137 | ||
137 | addr = alloc_pages_exact_nid(nid, size, GFP_KERNEL | __GFP_NOWARN); | 138 | addr = alloc_pages_exact_nid(nid, size, flags); |
138 | if (addr) | 139 | if (addr) { |
140 | kmemleak_alloc(addr, size, 1, flags); | ||
139 | return addr; | 141 | return addr; |
142 | } | ||
140 | 143 | ||
141 | if (node_state(nid, N_HIGH_MEMORY)) | 144 | if (node_state(nid, N_HIGH_MEMORY)) |
142 | addr = vmalloc_node(size, nid); | 145 | addr = vmalloc_node(size, nid); |
@@ -357,7 +360,7 @@ struct swap_cgroup_ctrl { | |||
357 | spinlock_t lock; | 360 | spinlock_t lock; |
358 | }; | 361 | }; |
359 | 362 | ||
360 | struct swap_cgroup_ctrl swap_cgroup_ctrl[MAX_SWAPFILES]; | 363 | static struct swap_cgroup_ctrl swap_cgroup_ctrl[MAX_SWAPFILES]; |
361 | 364 | ||
362 | struct swap_cgroup { | 365 | struct swap_cgroup { |
363 | unsigned short id; | 366 | unsigned short id; |
@@ -78,39 +78,22 @@ static void put_compound_page(struct page *page) | |||
78 | { | 78 | { |
79 | if (unlikely(PageTail(page))) { | 79 | if (unlikely(PageTail(page))) { |
80 | /* __split_huge_page_refcount can run under us */ | 80 | /* __split_huge_page_refcount can run under us */ |
81 | struct page *page_head = page->first_page; | 81 | struct page *page_head = compound_trans_head(page); |
82 | smp_rmb(); | 82 | |
83 | /* | 83 | if (likely(page != page_head && |
84 | * If PageTail is still set after smp_rmb() we can be sure | 84 | get_page_unless_zero(page_head))) { |
85 | * that the page->first_page we read wasn't a dangling pointer. | ||
86 | * See __split_huge_page_refcount() smp_wmb(). | ||
87 | */ | ||
88 | if (likely(PageTail(page) && get_page_unless_zero(page_head))) { | ||
89 | unsigned long flags; | 85 | unsigned long flags; |
90 | /* | 86 | /* |
91 | * Verify that our page_head wasn't converted | 87 | * page_head wasn't a dangling pointer but it |
92 | * to a a regular page before we got a | 88 | * may not be a head page anymore by the time |
93 | * reference on it. | 89 | * we obtain the lock. That is ok as long as it |
90 | * can't be freed from under us. | ||
94 | */ | 91 | */ |
95 | if (unlikely(!PageHead(page_head))) { | ||
96 | /* PageHead is cleared after PageTail */ | ||
97 | smp_rmb(); | ||
98 | VM_BUG_ON(PageTail(page)); | ||
99 | goto out_put_head; | ||
100 | } | ||
101 | /* | ||
102 | * Only run compound_lock on a valid PageHead, | ||
103 | * after having it pinned with | ||
104 | * get_page_unless_zero() above. | ||
105 | */ | ||
106 | smp_mb(); | ||
107 | /* page_head wasn't a dangling pointer */ | ||
108 | flags = compound_lock_irqsave(page_head); | 92 | flags = compound_lock_irqsave(page_head); |
109 | if (unlikely(!PageTail(page))) { | 93 | if (unlikely(!PageTail(page))) { |
110 | /* __split_huge_page_refcount run before us */ | 94 | /* __split_huge_page_refcount run before us */ |
111 | compound_unlock_irqrestore(page_head, flags); | 95 | compound_unlock_irqrestore(page_head, flags); |
112 | VM_BUG_ON(PageHead(page_head)); | 96 | VM_BUG_ON(PageHead(page_head)); |
113 | out_put_head: | ||
114 | if (put_page_testzero(page_head)) | 97 | if (put_page_testzero(page_head)) |
115 | __put_single_page(page_head); | 98 | __put_single_page(page_head); |
116 | out_put_single: | 99 | out_put_single: |
@@ -121,16 +104,17 @@ static void put_compound_page(struct page *page) | |||
121 | VM_BUG_ON(page_head != page->first_page); | 104 | VM_BUG_ON(page_head != page->first_page); |
122 | /* | 105 | /* |
123 | * We can release the refcount taken by | 106 | * We can release the refcount taken by |
124 | * get_page_unless_zero now that | 107 | * get_page_unless_zero() now that |
125 | * split_huge_page_refcount is blocked on the | 108 | * __split_huge_page_refcount() is blocked on |
126 | * compound_lock. | 109 | * the compound_lock. |
127 | */ | 110 | */ |
128 | if (put_page_testzero(page_head)) | 111 | if (put_page_testzero(page_head)) |
129 | VM_BUG_ON(1); | 112 | VM_BUG_ON(1); |
130 | /* __split_huge_page_refcount will wait now */ | 113 | /* __split_huge_page_refcount will wait now */ |
131 | VM_BUG_ON(atomic_read(&page->_count) <= 0); | 114 | VM_BUG_ON(page_mapcount(page) <= 0); |
132 | atomic_dec(&page->_count); | 115 | atomic_dec(&page->_mapcount); |
133 | VM_BUG_ON(atomic_read(&page_head->_count) <= 0); | 116 | VM_BUG_ON(atomic_read(&page_head->_count) <= 0); |
117 | VM_BUG_ON(atomic_read(&page->_count) != 0); | ||
134 | compound_unlock_irqrestore(page_head, flags); | 118 | compound_unlock_irqrestore(page_head, flags); |
135 | if (put_page_testzero(page_head)) { | 119 | if (put_page_testzero(page_head)) { |
136 | if (PageHead(page_head)) | 120 | if (PageHead(page_head)) |
@@ -160,6 +144,45 @@ void put_page(struct page *page) | |||
160 | } | 144 | } |
161 | EXPORT_SYMBOL(put_page); | 145 | EXPORT_SYMBOL(put_page); |
162 | 146 | ||
147 | /* | ||
148 | * This function is exported but must not be called by anything other | ||
149 | * than get_page(). It implements the slow path of get_page(). | ||
150 | */ | ||
151 | bool __get_page_tail(struct page *page) | ||
152 | { | ||
153 | /* | ||
154 | * This takes care of get_page() if run on a tail page | ||
155 | * returned by one of the get_user_pages/follow_page variants. | ||
156 | * get_user_pages/follow_page itself doesn't need the compound | ||
157 | * lock because it runs __get_page_tail_foll() under the | ||
158 | * proper PT lock that already serializes against | ||
159 | * split_huge_page(). | ||
160 | */ | ||
161 | unsigned long flags; | ||
162 | bool got = false; | ||
163 | struct page *page_head = compound_trans_head(page); | ||
164 | |||
165 | if (likely(page != page_head && get_page_unless_zero(page_head))) { | ||
166 | /* | ||
167 | * page_head wasn't a dangling pointer but it | ||
168 | * may not be a head page anymore by the time | ||
169 | * we obtain the lock. That is ok as long as it | ||
170 | * can't be freed from under us. | ||
171 | */ | ||
172 | flags = compound_lock_irqsave(page_head); | ||
173 | /* here __split_huge_page_refcount won't run anymore */ | ||
174 | if (likely(PageTail(page))) { | ||
175 | __get_page_tail_foll(page, false); | ||
176 | got = true; | ||
177 | } | ||
178 | compound_unlock_irqrestore(page_head, flags); | ||
179 | if (unlikely(!got)) | ||
180 | put_page(page_head); | ||
181 | } | ||
182 | return got; | ||
183 | } | ||
184 | EXPORT_SYMBOL(__get_page_tail); | ||
185 | |||
163 | /** | 186 | /** |
164 | * put_pages_list() - release a list of pages | 187 | * put_pages_list() - release a list of pages |
165 | * @pages: list of pages threaded on page->lru | 188 | * @pages: list of pages threaded on page->lru |
diff --git a/mm/vmscan.c b/mm/vmscan.c index a90c603a8d02..132d1ddb2238 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -1767,7 +1767,7 @@ static int inactive_anon_is_low(struct zone *zone, struct scan_control *sc) | |||
1767 | if (scanning_global_lru(sc)) | 1767 | if (scanning_global_lru(sc)) |
1768 | low = inactive_anon_is_low_global(zone); | 1768 | low = inactive_anon_is_low_global(zone); |
1769 | else | 1769 | else |
1770 | low = mem_cgroup_inactive_anon_is_low(sc->mem_cgroup); | 1770 | low = mem_cgroup_inactive_anon_is_low(sc->mem_cgroup, zone); |
1771 | return low; | 1771 | return low; |
1772 | } | 1772 | } |
1773 | #else | 1773 | #else |
@@ -1810,7 +1810,7 @@ static int inactive_file_is_low(struct zone *zone, struct scan_control *sc) | |||
1810 | if (scanning_global_lru(sc)) | 1810 | if (scanning_global_lru(sc)) |
1811 | low = inactive_file_is_low_global(zone); | 1811 | low = inactive_file_is_low_global(zone); |
1812 | else | 1812 | else |
1813 | low = mem_cgroup_inactive_file_is_low(sc->mem_cgroup); | 1813 | low = mem_cgroup_inactive_file_is_low(sc->mem_cgroup, zone); |
1814 | return low; | 1814 | return low; |
1815 | } | 1815 | } |
1816 | 1816 | ||