diff options
author | Ingo Molnar <mingo@kernel.org> | 2015-03-04 00:35:43 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2015-03-04 00:35:43 -0500 |
commit | d2c032e3dc58137a7261a7824d3acce435db1d66 (patch) | |
tree | 7eea1c7c6103eefe879f07472eec99b3c41eb792 /mm/memcontrol.c | |
parent | 7e8e385aaf6ed5b64b5d9108081cfcdcdd021b78 (diff) | |
parent | 13a7a6ac0a11197edcd0f756a035f472b42cdf8b (diff) |
Merge tag 'v4.0-rc2' into x86/asm, to refresh the tree
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'mm/memcontrol.c')
-rw-r--r-- | mm/memcontrol.c | 1073 |
1 files changed, 580 insertions, 493 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 2f6893c2f01b..9fe07692eaad 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -72,22 +72,13 @@ EXPORT_SYMBOL(memory_cgrp_subsys); | |||
72 | #define MEM_CGROUP_RECLAIM_RETRIES 5 | 72 | #define MEM_CGROUP_RECLAIM_RETRIES 5 |
73 | static struct mem_cgroup *root_mem_cgroup __read_mostly; | 73 | static struct mem_cgroup *root_mem_cgroup __read_mostly; |
74 | 74 | ||
75 | /* Whether the swap controller is active */ | ||
75 | #ifdef CONFIG_MEMCG_SWAP | 76 | #ifdef CONFIG_MEMCG_SWAP |
76 | /* Turned on only when memory cgroup is enabled && really_do_swap_account = 1 */ | ||
77 | int do_swap_account __read_mostly; | 77 | int do_swap_account __read_mostly; |
78 | |||
79 | /* for remember boot option*/ | ||
80 | #ifdef CONFIG_MEMCG_SWAP_ENABLED | ||
81 | static int really_do_swap_account __initdata = 1; | ||
82 | #else | ||
83 | static int really_do_swap_account __initdata; | ||
84 | #endif | ||
85 | |||
86 | #else | 78 | #else |
87 | #define do_swap_account 0 | 79 | #define do_swap_account 0 |
88 | #endif | 80 | #endif |
89 | 81 | ||
90 | |||
91 | static const char * const mem_cgroup_stat_names[] = { | 82 | static const char * const mem_cgroup_stat_names[] = { |
92 | "cache", | 83 | "cache", |
93 | "rss", | 84 | "rss", |
@@ -97,14 +88,6 @@ static const char * const mem_cgroup_stat_names[] = { | |||
97 | "swap", | 88 | "swap", |
98 | }; | 89 | }; |
99 | 90 | ||
100 | enum mem_cgroup_events_index { | ||
101 | MEM_CGROUP_EVENTS_PGPGIN, /* # of pages paged in */ | ||
102 | MEM_CGROUP_EVENTS_PGPGOUT, /* # of pages paged out */ | ||
103 | MEM_CGROUP_EVENTS_PGFAULT, /* # of page-faults */ | ||
104 | MEM_CGROUP_EVENTS_PGMAJFAULT, /* # of major page-faults */ | ||
105 | MEM_CGROUP_EVENTS_NSTATS, | ||
106 | }; | ||
107 | |||
108 | static const char * const mem_cgroup_events_names[] = { | 91 | static const char * const mem_cgroup_events_names[] = { |
109 | "pgpgin", | 92 | "pgpgin", |
110 | "pgpgout", | 93 | "pgpgout", |
@@ -138,7 +121,7 @@ enum mem_cgroup_events_target { | |||
138 | 121 | ||
139 | struct mem_cgroup_stat_cpu { | 122 | struct mem_cgroup_stat_cpu { |
140 | long count[MEM_CGROUP_STAT_NSTATS]; | 123 | long count[MEM_CGROUP_STAT_NSTATS]; |
141 | unsigned long events[MEM_CGROUP_EVENTS_NSTATS]; | 124 | unsigned long events[MEMCG_NR_EVENTS]; |
142 | unsigned long nr_page_events; | 125 | unsigned long nr_page_events; |
143 | unsigned long targets[MEM_CGROUP_NTARGETS]; | 126 | unsigned long targets[MEM_CGROUP_NTARGETS]; |
144 | }; | 127 | }; |
@@ -284,6 +267,10 @@ struct mem_cgroup { | |||
284 | struct page_counter memsw; | 267 | struct page_counter memsw; |
285 | struct page_counter kmem; | 268 | struct page_counter kmem; |
286 | 269 | ||
270 | /* Normal memory consumption range */ | ||
271 | unsigned long low; | ||
272 | unsigned long high; | ||
273 | |||
287 | unsigned long soft_limit; | 274 | unsigned long soft_limit; |
288 | 275 | ||
289 | /* vmpressure notifications */ | 276 | /* vmpressure notifications */ |
@@ -325,9 +312,11 @@ struct mem_cgroup { | |||
325 | /* | 312 | /* |
326 | * set > 0 if pages under this cgroup are moving to other cgroup. | 313 | * set > 0 if pages under this cgroup are moving to other cgroup. |
327 | */ | 314 | */ |
328 | atomic_t moving_account; | 315 | atomic_t moving_account; |
329 | /* taken only while moving_account > 0 */ | 316 | /* taken only while moving_account > 0 */ |
330 | spinlock_t move_lock; | 317 | spinlock_t move_lock; |
318 | struct task_struct *move_lock_task; | ||
319 | unsigned long move_lock_flags; | ||
331 | /* | 320 | /* |
332 | * percpu counter. | 321 | * percpu counter. |
333 | */ | 322 | */ |
@@ -343,11 +332,10 @@ struct mem_cgroup { | |||
343 | struct cg_proto tcp_mem; | 332 | struct cg_proto tcp_mem; |
344 | #endif | 333 | #endif |
345 | #if defined(CONFIG_MEMCG_KMEM) | 334 | #if defined(CONFIG_MEMCG_KMEM) |
346 | /* analogous to slab_common's slab_caches list, but per-memcg; | 335 | /* Index in the kmem_cache->memcg_params.memcg_caches array */ |
347 | * protected by memcg_slab_mutex */ | ||
348 | struct list_head memcg_slab_caches; | ||
349 | /* Index in the kmem_cache->memcg_params->memcg_caches array */ | ||
350 | int kmemcg_id; | 336 | int kmemcg_id; |
337 | bool kmem_acct_activated; | ||
338 | bool kmem_acct_active; | ||
351 | #endif | 339 | #endif |
352 | 340 | ||
353 | int last_scanned_node; | 341 | int last_scanned_node; |
@@ -366,29 +354,26 @@ struct mem_cgroup { | |||
366 | }; | 354 | }; |
367 | 355 | ||
368 | #ifdef CONFIG_MEMCG_KMEM | 356 | #ifdef CONFIG_MEMCG_KMEM |
369 | static bool memcg_kmem_is_active(struct mem_cgroup *memcg) | 357 | bool memcg_kmem_is_active(struct mem_cgroup *memcg) |
370 | { | 358 | { |
371 | return memcg->kmemcg_id >= 0; | 359 | return memcg->kmem_acct_active; |
372 | } | 360 | } |
373 | #endif | 361 | #endif |
374 | 362 | ||
375 | /* Stuffs for move charges at task migration. */ | 363 | /* Stuffs for move charges at task migration. */ |
376 | /* | 364 | /* |
377 | * Types of charges to be moved. "move_charge_at_immitgrate" and | 365 | * Types of charges to be moved. |
378 | * "immigrate_flags" are treated as a left-shifted bitmap of these types. | ||
379 | */ | 366 | */ |
380 | enum move_type { | 367 | #define MOVE_ANON 0x1U |
381 | MOVE_CHARGE_TYPE_ANON, /* private anonymous page and swap of it */ | 368 | #define MOVE_FILE 0x2U |
382 | MOVE_CHARGE_TYPE_FILE, /* file page(including tmpfs) and swap of it */ | 369 | #define MOVE_MASK (MOVE_ANON | MOVE_FILE) |
383 | NR_MOVE_TYPE, | ||
384 | }; | ||
385 | 370 | ||
386 | /* "mc" and its members are protected by cgroup_mutex */ | 371 | /* "mc" and its members are protected by cgroup_mutex */ |
387 | static struct move_charge_struct { | 372 | static struct move_charge_struct { |
388 | spinlock_t lock; /* for from, to */ | 373 | spinlock_t lock; /* for from, to */ |
389 | struct mem_cgroup *from; | 374 | struct mem_cgroup *from; |
390 | struct mem_cgroup *to; | 375 | struct mem_cgroup *to; |
391 | unsigned long immigrate_flags; | 376 | unsigned long flags; |
392 | unsigned long precharge; | 377 | unsigned long precharge; |
393 | unsigned long moved_charge; | 378 | unsigned long moved_charge; |
394 | unsigned long moved_swap; | 379 | unsigned long moved_swap; |
@@ -399,16 +384,6 @@ static struct move_charge_struct { | |||
399 | .waitq = __WAIT_QUEUE_HEAD_INITIALIZER(mc.waitq), | 384 | .waitq = __WAIT_QUEUE_HEAD_INITIALIZER(mc.waitq), |
400 | }; | 385 | }; |
401 | 386 | ||
402 | static bool move_anon(void) | ||
403 | { | ||
404 | return test_bit(MOVE_CHARGE_TYPE_ANON, &mc.immigrate_flags); | ||
405 | } | ||
406 | |||
407 | static bool move_file(void) | ||
408 | { | ||
409 | return test_bit(MOVE_CHARGE_TYPE_FILE, &mc.immigrate_flags); | ||
410 | } | ||
411 | |||
412 | /* | 387 | /* |
413 | * Maximum loops in mem_cgroup_hierarchical_reclaim(), used for soft | 388 | * Maximum loops in mem_cgroup_hierarchical_reclaim(), used for soft |
414 | * limit reclaim to prevent infinite loops, if they ever occur. | 389 | * limit reclaim to prevent infinite loops, if they ever occur. |
@@ -544,33 +519,35 @@ struct cg_proto *tcp_proto_cgroup(struct mem_cgroup *memcg) | |||
544 | } | 519 | } |
545 | EXPORT_SYMBOL(tcp_proto_cgroup); | 520 | EXPORT_SYMBOL(tcp_proto_cgroup); |
546 | 521 | ||
547 | static void disarm_sock_keys(struct mem_cgroup *memcg) | ||
548 | { | ||
549 | if (!memcg_proto_activated(&memcg->tcp_mem)) | ||
550 | return; | ||
551 | static_key_slow_dec(&memcg_socket_limit_enabled); | ||
552 | } | ||
553 | #else | ||
554 | static void disarm_sock_keys(struct mem_cgroup *memcg) | ||
555 | { | ||
556 | } | ||
557 | #endif | 522 | #endif |
558 | 523 | ||
559 | #ifdef CONFIG_MEMCG_KMEM | 524 | #ifdef CONFIG_MEMCG_KMEM |
560 | /* | 525 | /* |
561 | * This will be the memcg's index in each cache's ->memcg_params->memcg_caches. | 526 | * This will be the memcg's index in each cache's ->memcg_params.memcg_caches. |
562 | * The main reason for not using cgroup id for this: | 527 | * The main reason for not using cgroup id for this: |
563 | * this works better in sparse environments, where we have a lot of memcgs, | 528 | * this works better in sparse environments, where we have a lot of memcgs, |
564 | * but only a few kmem-limited. Or also, if we have, for instance, 200 | 529 | * but only a few kmem-limited. Or also, if we have, for instance, 200 |
565 | * memcgs, and none but the 200th is kmem-limited, we'd have to have a | 530 | * memcgs, and none but the 200th is kmem-limited, we'd have to have a |
566 | * 200 entry array for that. | 531 | * 200 entry array for that. |
567 | * | 532 | * |
568 | * The current size of the caches array is stored in | 533 | * The current size of the caches array is stored in memcg_nr_cache_ids. It |
569 | * memcg_limited_groups_array_size. It will double each time we have to | 534 | * will double each time we have to increase it. |
570 | * increase it. | ||
571 | */ | 535 | */ |
572 | static DEFINE_IDA(kmem_limited_groups); | 536 | static DEFINE_IDA(memcg_cache_ida); |
573 | int memcg_limited_groups_array_size; | 537 | int memcg_nr_cache_ids; |
538 | |||
539 | /* Protects memcg_nr_cache_ids */ | ||
540 | static DECLARE_RWSEM(memcg_cache_ids_sem); | ||
541 | |||
542 | void memcg_get_cache_ids(void) | ||
543 | { | ||
544 | down_read(&memcg_cache_ids_sem); | ||
545 | } | ||
546 | |||
547 | void memcg_put_cache_ids(void) | ||
548 | { | ||
549 | up_read(&memcg_cache_ids_sem); | ||
550 | } | ||
574 | 551 | ||
575 | /* | 552 | /* |
576 | * MIN_SIZE is different than 1, because we would like to avoid going through | 553 | * MIN_SIZE is different than 1, because we would like to avoid going through |
@@ -596,32 +573,8 @@ int memcg_limited_groups_array_size; | |||
596 | struct static_key memcg_kmem_enabled_key; | 573 | struct static_key memcg_kmem_enabled_key; |
597 | EXPORT_SYMBOL(memcg_kmem_enabled_key); | 574 | EXPORT_SYMBOL(memcg_kmem_enabled_key); |
598 | 575 | ||
599 | static void memcg_free_cache_id(int id); | ||
600 | |||
601 | static void disarm_kmem_keys(struct mem_cgroup *memcg) | ||
602 | { | ||
603 | if (memcg_kmem_is_active(memcg)) { | ||
604 | static_key_slow_dec(&memcg_kmem_enabled_key); | ||
605 | memcg_free_cache_id(memcg->kmemcg_id); | ||
606 | } | ||
607 | /* | ||
608 | * This check can't live in kmem destruction function, | ||
609 | * since the charges will outlive the cgroup | ||
610 | */ | ||
611 | WARN_ON(page_counter_read(&memcg->kmem)); | ||
612 | } | ||
613 | #else | ||
614 | static void disarm_kmem_keys(struct mem_cgroup *memcg) | ||
615 | { | ||
616 | } | ||
617 | #endif /* CONFIG_MEMCG_KMEM */ | 576 | #endif /* CONFIG_MEMCG_KMEM */ |
618 | 577 | ||
619 | static void disarm_static_keys(struct mem_cgroup *memcg) | ||
620 | { | ||
621 | disarm_sock_keys(memcg); | ||
622 | disarm_kmem_keys(memcg); | ||
623 | } | ||
624 | |||
625 | static struct mem_cgroup_per_zone * | 578 | static struct mem_cgroup_per_zone * |
626 | mem_cgroup_zone_zoneinfo(struct mem_cgroup *memcg, struct zone *zone) | 579 | mem_cgroup_zone_zoneinfo(struct mem_cgroup *memcg, struct zone *zone) |
627 | { | 580 | { |
@@ -1368,6 +1321,20 @@ int mem_cgroup_inactive_anon_is_low(struct lruvec *lruvec) | |||
1368 | return inactive * inactive_ratio < active; | 1321 | return inactive * inactive_ratio < active; |
1369 | } | 1322 | } |
1370 | 1323 | ||
1324 | bool mem_cgroup_lruvec_online(struct lruvec *lruvec) | ||
1325 | { | ||
1326 | struct mem_cgroup_per_zone *mz; | ||
1327 | struct mem_cgroup *memcg; | ||
1328 | |||
1329 | if (mem_cgroup_disabled()) | ||
1330 | return true; | ||
1331 | |||
1332 | mz = container_of(lruvec, struct mem_cgroup_per_zone, lruvec); | ||
1333 | memcg = mz->memcg; | ||
1334 | |||
1335 | return !!(memcg->css.flags & CSS_ONLINE); | ||
1336 | } | ||
1337 | |||
1371 | #define mem_cgroup_from_counter(counter, member) \ | 1338 | #define mem_cgroup_from_counter(counter, member) \ |
1372 | container_of(counter, struct mem_cgroup, member) | 1339 | container_of(counter, struct mem_cgroup, member) |
1373 | 1340 | ||
@@ -1560,7 +1527,7 @@ static void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask, | |||
1560 | * quickly exit and free its memory. | 1527 | * quickly exit and free its memory. |
1561 | */ | 1528 | */ |
1562 | if (fatal_signal_pending(current) || task_will_free_mem(current)) { | 1529 | if (fatal_signal_pending(current) || task_will_free_mem(current)) { |
1563 | set_thread_flag(TIF_MEMDIE); | 1530 | mark_tsk_oom_victim(current); |
1564 | return; | 1531 | return; |
1565 | } | 1532 | } |
1566 | 1533 | ||
@@ -1934,7 +1901,7 @@ bool mem_cgroup_oom_synchronize(bool handle) | |||
1934 | if (!memcg) | 1901 | if (!memcg) |
1935 | return false; | 1902 | return false; |
1936 | 1903 | ||
1937 | if (!handle) | 1904 | if (!handle || oom_killer_disabled) |
1938 | goto cleanup; | 1905 | goto cleanup; |
1939 | 1906 | ||
1940 | owait.memcg = memcg; | 1907 | owait.memcg = memcg; |
@@ -1980,34 +1947,33 @@ cleanup: | |||
1980 | /** | 1947 | /** |
1981 | * mem_cgroup_begin_page_stat - begin a page state statistics transaction | 1948 | * mem_cgroup_begin_page_stat - begin a page state statistics transaction |
1982 | * @page: page that is going to change accounted state | 1949 | * @page: page that is going to change accounted state |
1983 | * @locked: &memcg->move_lock slowpath was taken | ||
1984 | * @flags: IRQ-state flags for &memcg->move_lock | ||
1985 | * | 1950 | * |
1986 | * This function must mark the beginning of an accounted page state | 1951 | * This function must mark the beginning of an accounted page state |
1987 | * change to prevent double accounting when the page is concurrently | 1952 | * change to prevent double accounting when the page is concurrently |
1988 | * being moved to another memcg: | 1953 | * being moved to another memcg: |
1989 | * | 1954 | * |
1990 | * memcg = mem_cgroup_begin_page_stat(page, &locked, &flags); | 1955 | * memcg = mem_cgroup_begin_page_stat(page); |
1991 | * if (TestClearPageState(page)) | 1956 | * if (TestClearPageState(page)) |
1992 | * mem_cgroup_update_page_stat(memcg, state, -1); | 1957 | * mem_cgroup_update_page_stat(memcg, state, -1); |
1993 | * mem_cgroup_end_page_stat(memcg, locked, flags); | 1958 | * mem_cgroup_end_page_stat(memcg); |
1994 | * | ||
1995 | * The RCU lock is held throughout the transaction. The fast path can | ||
1996 | * get away without acquiring the memcg->move_lock (@locked is false) | ||
1997 | * because page moving starts with an RCU grace period. | ||
1998 | * | ||
1999 | * The RCU lock also protects the memcg from being freed when the page | ||
2000 | * state that is going to change is the only thing preventing the page | ||
2001 | * from being uncharged. E.g. end-writeback clearing PageWriteback(), | ||
2002 | * which allows migration to go ahead and uncharge the page before the | ||
2003 | * account transaction might be complete. | ||
2004 | */ | 1959 | */ |
2005 | struct mem_cgroup *mem_cgroup_begin_page_stat(struct page *page, | 1960 | struct mem_cgroup *mem_cgroup_begin_page_stat(struct page *page) |
2006 | bool *locked, | ||
2007 | unsigned long *flags) | ||
2008 | { | 1961 | { |
2009 | struct mem_cgroup *memcg; | 1962 | struct mem_cgroup *memcg; |
1963 | unsigned long flags; | ||
2010 | 1964 | ||
1965 | /* | ||
1966 | * The RCU lock is held throughout the transaction. The fast | ||
1967 | * path can get away without acquiring the memcg->move_lock | ||
1968 | * because page moving starts with an RCU grace period. | ||
1969 | * | ||
1970 | * The RCU lock also protects the memcg from being freed when | ||
1971 | * the page state that is going to change is the only thing | ||
1972 | * preventing the page from being uncharged. | ||
1973 | * E.g. end-writeback clearing PageWriteback(), which allows | ||
1974 | * migration to go ahead and uncharge the page before the | ||
1975 | * account transaction might be complete. | ||
1976 | */ | ||
2011 | rcu_read_lock(); | 1977 | rcu_read_lock(); |
2012 | 1978 | ||
2013 | if (mem_cgroup_disabled()) | 1979 | if (mem_cgroup_disabled()) |
@@ -2017,16 +1983,22 @@ again: | |||
2017 | if (unlikely(!memcg)) | 1983 | if (unlikely(!memcg)) |
2018 | return NULL; | 1984 | return NULL; |
2019 | 1985 | ||
2020 | *locked = false; | ||
2021 | if (atomic_read(&memcg->moving_account) <= 0) | 1986 | if (atomic_read(&memcg->moving_account) <= 0) |
2022 | return memcg; | 1987 | return memcg; |
2023 | 1988 | ||
2024 | spin_lock_irqsave(&memcg->move_lock, *flags); | 1989 | spin_lock_irqsave(&memcg->move_lock, flags); |
2025 | if (memcg != page->mem_cgroup) { | 1990 | if (memcg != page->mem_cgroup) { |
2026 | spin_unlock_irqrestore(&memcg->move_lock, *flags); | 1991 | spin_unlock_irqrestore(&memcg->move_lock, flags); |
2027 | goto again; | 1992 | goto again; |
2028 | } | 1993 | } |
2029 | *locked = true; | 1994 | |
1995 | /* | ||
1996 | * When charge migration first begins, we can have locked and | ||
1997 | * unlocked page stat updates happening concurrently. Track | ||
1998 | * the task who has the lock for mem_cgroup_end_page_stat(). | ||
1999 | */ | ||
2000 | memcg->move_lock_task = current; | ||
2001 | memcg->move_lock_flags = flags; | ||
2030 | 2002 | ||
2031 | return memcg; | 2003 | return memcg; |
2032 | } | 2004 | } |
@@ -2034,14 +2006,17 @@ again: | |||
2034 | /** | 2006 | /** |
2035 | * mem_cgroup_end_page_stat - finish a page state statistics transaction | 2007 | * mem_cgroup_end_page_stat - finish a page state statistics transaction |
2036 | * @memcg: the memcg that was accounted against | 2008 | * @memcg: the memcg that was accounted against |
2037 | * @locked: value received from mem_cgroup_begin_page_stat() | ||
2038 | * @flags: value received from mem_cgroup_begin_page_stat() | ||
2039 | */ | 2009 | */ |
2040 | void mem_cgroup_end_page_stat(struct mem_cgroup *memcg, bool *locked, | 2010 | void mem_cgroup_end_page_stat(struct mem_cgroup *memcg) |
2041 | unsigned long *flags) | ||
2042 | { | 2011 | { |
2043 | if (memcg && *locked) | 2012 | if (memcg && memcg->move_lock_task == current) { |
2044 | spin_unlock_irqrestore(&memcg->move_lock, *flags); | 2013 | unsigned long flags = memcg->move_lock_flags; |
2014 | |||
2015 | memcg->move_lock_task = NULL; | ||
2016 | memcg->move_lock_flags = 0; | ||
2017 | |||
2018 | spin_unlock_irqrestore(&memcg->move_lock, flags); | ||
2019 | } | ||
2045 | 2020 | ||
2046 | rcu_read_unlock(); | 2021 | rcu_read_unlock(); |
2047 | } | 2022 | } |
@@ -2134,17 +2109,6 @@ static void drain_local_stock(struct work_struct *dummy) | |||
2134 | clear_bit(FLUSHING_CACHED_CHARGE, &stock->flags); | 2109 | clear_bit(FLUSHING_CACHED_CHARGE, &stock->flags); |
2135 | } | 2110 | } |
2136 | 2111 | ||
2137 | static void __init memcg_stock_init(void) | ||
2138 | { | ||
2139 | int cpu; | ||
2140 | |||
2141 | for_each_possible_cpu(cpu) { | ||
2142 | struct memcg_stock_pcp *stock = | ||
2143 | &per_cpu(memcg_stock, cpu); | ||
2144 | INIT_WORK(&stock->work, drain_local_stock); | ||
2145 | } | ||
2146 | } | ||
2147 | |||
2148 | /* | 2112 | /* |
2149 | * Cache charges(val) to local per_cpu area. | 2113 | * Cache charges(val) to local per_cpu area. |
2150 | * This will be consumed by consume_stock() function, later. | 2114 | * This will be consumed by consume_stock() function, later. |
@@ -2294,6 +2258,8 @@ retry: | |||
2294 | if (!(gfp_mask & __GFP_WAIT)) | 2258 | if (!(gfp_mask & __GFP_WAIT)) |
2295 | goto nomem; | 2259 | goto nomem; |
2296 | 2260 | ||
2261 | mem_cgroup_events(mem_over_limit, MEMCG_MAX, 1); | ||
2262 | |||
2297 | nr_reclaimed = try_to_free_mem_cgroup_pages(mem_over_limit, nr_pages, | 2263 | nr_reclaimed = try_to_free_mem_cgroup_pages(mem_over_limit, nr_pages, |
2298 | gfp_mask, may_swap); | 2264 | gfp_mask, may_swap); |
2299 | 2265 | ||
@@ -2335,6 +2301,8 @@ retry: | |||
2335 | if (fatal_signal_pending(current)) | 2301 | if (fatal_signal_pending(current)) |
2336 | goto bypass; | 2302 | goto bypass; |
2337 | 2303 | ||
2304 | mem_cgroup_events(mem_over_limit, MEMCG_OOM, 1); | ||
2305 | |||
2338 | mem_cgroup_oom(mem_over_limit, gfp_mask, get_order(nr_pages)); | 2306 | mem_cgroup_oom(mem_over_limit, gfp_mask, get_order(nr_pages)); |
2339 | nomem: | 2307 | nomem: |
2340 | if (!(gfp_mask & __GFP_NOFAIL)) | 2308 | if (!(gfp_mask & __GFP_NOFAIL)) |
@@ -2346,6 +2314,16 @@ done_restock: | |||
2346 | css_get_many(&memcg->css, batch); | 2314 | css_get_many(&memcg->css, batch); |
2347 | if (batch > nr_pages) | 2315 | if (batch > nr_pages) |
2348 | refill_stock(memcg, batch - nr_pages); | 2316 | refill_stock(memcg, batch - nr_pages); |
2317 | /* | ||
2318 | * If the hierarchy is above the normal consumption range, | ||
2319 | * make the charging task trim their excess contribution. | ||
2320 | */ | ||
2321 | do { | ||
2322 | if (page_counter_read(&memcg->memory) <= memcg->high) | ||
2323 | continue; | ||
2324 | mem_cgroup_events(memcg, MEMCG_HIGH, 1); | ||
2325 | try_to_free_mem_cgroup_pages(memcg, nr_pages, gfp_mask, true); | ||
2326 | } while ((memcg = parent_mem_cgroup(memcg))); | ||
2349 | done: | 2327 | done: |
2350 | return ret; | 2328 | return ret; |
2351 | } | 2329 | } |
@@ -2476,27 +2454,8 @@ static void commit_charge(struct page *page, struct mem_cgroup *memcg, | |||
2476 | } | 2454 | } |
2477 | 2455 | ||
2478 | #ifdef CONFIG_MEMCG_KMEM | 2456 | #ifdef CONFIG_MEMCG_KMEM |
2479 | /* | 2457 | int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp, |
2480 | * The memcg_slab_mutex is held whenever a per memcg kmem cache is created or | 2458 | unsigned long nr_pages) |
2481 | * destroyed. It protects memcg_caches arrays and memcg_slab_caches lists. | ||
2482 | */ | ||
2483 | static DEFINE_MUTEX(memcg_slab_mutex); | ||
2484 | |||
2485 | /* | ||
2486 | * This is a bit cumbersome, but it is rarely used and avoids a backpointer | ||
2487 | * in the memcg_cache_params struct. | ||
2488 | */ | ||
2489 | static struct kmem_cache *memcg_params_to_cache(struct memcg_cache_params *p) | ||
2490 | { | ||
2491 | struct kmem_cache *cachep; | ||
2492 | |||
2493 | VM_BUG_ON(p->is_root_cache); | ||
2494 | cachep = p->root_cache; | ||
2495 | return cache_from_memcg_idx(cachep, memcg_cache_id(p->memcg)); | ||
2496 | } | ||
2497 | |||
2498 | static int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp, | ||
2499 | unsigned long nr_pages) | ||
2500 | { | 2459 | { |
2501 | struct page_counter *counter; | 2460 | struct page_counter *counter; |
2502 | int ret = 0; | 2461 | int ret = 0; |
@@ -2533,8 +2492,7 @@ static int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp, | |||
2533 | return ret; | 2492 | return ret; |
2534 | } | 2493 | } |
2535 | 2494 | ||
2536 | static void memcg_uncharge_kmem(struct mem_cgroup *memcg, | 2495 | void memcg_uncharge_kmem(struct mem_cgroup *memcg, unsigned long nr_pages) |
2537 | unsigned long nr_pages) | ||
2538 | { | 2496 | { |
2539 | page_counter_uncharge(&memcg->memory, nr_pages); | 2497 | page_counter_uncharge(&memcg->memory, nr_pages); |
2540 | if (do_swap_account) | 2498 | if (do_swap_account) |
@@ -2560,18 +2518,19 @@ static int memcg_alloc_cache_id(void) | |||
2560 | int id, size; | 2518 | int id, size; |
2561 | int err; | 2519 | int err; |
2562 | 2520 | ||
2563 | id = ida_simple_get(&kmem_limited_groups, | 2521 | id = ida_simple_get(&memcg_cache_ida, |
2564 | 0, MEMCG_CACHES_MAX_SIZE, GFP_KERNEL); | 2522 | 0, MEMCG_CACHES_MAX_SIZE, GFP_KERNEL); |
2565 | if (id < 0) | 2523 | if (id < 0) |
2566 | return id; | 2524 | return id; |
2567 | 2525 | ||
2568 | if (id < memcg_limited_groups_array_size) | 2526 | if (id < memcg_nr_cache_ids) |
2569 | return id; | 2527 | return id; |
2570 | 2528 | ||
2571 | /* | 2529 | /* |
2572 | * There's no space for the new id in memcg_caches arrays, | 2530 | * There's no space for the new id in memcg_caches arrays, |
2573 | * so we have to grow them. | 2531 | * so we have to grow them. |
2574 | */ | 2532 | */ |
2533 | down_write(&memcg_cache_ids_sem); | ||
2575 | 2534 | ||
2576 | size = 2 * (id + 1); | 2535 | size = 2 * (id + 1); |
2577 | if (size < MEMCG_CACHES_MIN_SIZE) | 2536 | if (size < MEMCG_CACHES_MIN_SIZE) |
@@ -2579,12 +2538,16 @@ static int memcg_alloc_cache_id(void) | |||
2579 | else if (size > MEMCG_CACHES_MAX_SIZE) | 2538 | else if (size > MEMCG_CACHES_MAX_SIZE) |
2580 | size = MEMCG_CACHES_MAX_SIZE; | 2539 | size = MEMCG_CACHES_MAX_SIZE; |
2581 | 2540 | ||
2582 | mutex_lock(&memcg_slab_mutex); | ||
2583 | err = memcg_update_all_caches(size); | 2541 | err = memcg_update_all_caches(size); |
2584 | mutex_unlock(&memcg_slab_mutex); | 2542 | if (!err) |
2543 | err = memcg_update_all_list_lrus(size); | ||
2544 | if (!err) | ||
2545 | memcg_nr_cache_ids = size; | ||
2546 | |||
2547 | up_write(&memcg_cache_ids_sem); | ||
2585 | 2548 | ||
2586 | if (err) { | 2549 | if (err) { |
2587 | ida_simple_remove(&kmem_limited_groups, id); | 2550 | ida_simple_remove(&memcg_cache_ida, id); |
2588 | return err; | 2551 | return err; |
2589 | } | 2552 | } |
2590 | return id; | 2553 | return id; |
@@ -2592,136 +2555,23 @@ static int memcg_alloc_cache_id(void) | |||
2592 | 2555 | ||
2593 | static void memcg_free_cache_id(int id) | 2556 | static void memcg_free_cache_id(int id) |
2594 | { | 2557 | { |
2595 | ida_simple_remove(&kmem_limited_groups, id); | 2558 | ida_simple_remove(&memcg_cache_ida, id); |
2596 | } | 2559 | } |
2597 | 2560 | ||
2598 | /* | 2561 | struct memcg_kmem_cache_create_work { |
2599 | * We should update the current array size iff all caches updates succeed. This | ||
2600 | * can only be done from the slab side. The slab mutex needs to be held when | ||
2601 | * calling this. | ||
2602 | */ | ||
2603 | void memcg_update_array_size(int num) | ||
2604 | { | ||
2605 | memcg_limited_groups_array_size = num; | ||
2606 | } | ||
2607 | |||
2608 | static void memcg_register_cache(struct mem_cgroup *memcg, | ||
2609 | struct kmem_cache *root_cache) | ||
2610 | { | ||
2611 | static char memcg_name_buf[NAME_MAX + 1]; /* protected by | ||
2612 | memcg_slab_mutex */ | ||
2613 | struct kmem_cache *cachep; | ||
2614 | int id; | ||
2615 | |||
2616 | lockdep_assert_held(&memcg_slab_mutex); | ||
2617 | |||
2618 | id = memcg_cache_id(memcg); | ||
2619 | |||
2620 | /* | ||
2621 | * Since per-memcg caches are created asynchronously on first | ||
2622 | * allocation (see memcg_kmem_get_cache()), several threads can try to | ||
2623 | * create the same cache, but only one of them may succeed. | ||
2624 | */ | ||
2625 | if (cache_from_memcg_idx(root_cache, id)) | ||
2626 | return; | ||
2627 | |||
2628 | cgroup_name(memcg->css.cgroup, memcg_name_buf, NAME_MAX + 1); | ||
2629 | cachep = memcg_create_kmem_cache(memcg, root_cache, memcg_name_buf); | ||
2630 | /* | ||
2631 | * If we could not create a memcg cache, do not complain, because | ||
2632 | * that's not critical at all as we can always proceed with the root | ||
2633 | * cache. | ||
2634 | */ | ||
2635 | if (!cachep) | ||
2636 | return; | ||
2637 | |||
2638 | list_add(&cachep->memcg_params->list, &memcg->memcg_slab_caches); | ||
2639 | |||
2640 | /* | ||
2641 | * Since readers won't lock (see cache_from_memcg_idx()), we need a | ||
2642 | * barrier here to ensure nobody will see the kmem_cache partially | ||
2643 | * initialized. | ||
2644 | */ | ||
2645 | smp_wmb(); | ||
2646 | |||
2647 | BUG_ON(root_cache->memcg_params->memcg_caches[id]); | ||
2648 | root_cache->memcg_params->memcg_caches[id] = cachep; | ||
2649 | } | ||
2650 | |||
2651 | static void memcg_unregister_cache(struct kmem_cache *cachep) | ||
2652 | { | ||
2653 | struct kmem_cache *root_cache; | ||
2654 | struct mem_cgroup *memcg; | ||
2655 | int id; | ||
2656 | |||
2657 | lockdep_assert_held(&memcg_slab_mutex); | ||
2658 | |||
2659 | BUG_ON(is_root_cache(cachep)); | ||
2660 | |||
2661 | root_cache = cachep->memcg_params->root_cache; | ||
2662 | memcg = cachep->memcg_params->memcg; | ||
2663 | id = memcg_cache_id(memcg); | ||
2664 | |||
2665 | BUG_ON(root_cache->memcg_params->memcg_caches[id] != cachep); | ||
2666 | root_cache->memcg_params->memcg_caches[id] = NULL; | ||
2667 | |||
2668 | list_del(&cachep->memcg_params->list); | ||
2669 | |||
2670 | kmem_cache_destroy(cachep); | ||
2671 | } | ||
2672 | |||
2673 | int __memcg_cleanup_cache_params(struct kmem_cache *s) | ||
2674 | { | ||
2675 | struct kmem_cache *c; | ||
2676 | int i, failed = 0; | ||
2677 | |||
2678 | mutex_lock(&memcg_slab_mutex); | ||
2679 | for_each_memcg_cache_index(i) { | ||
2680 | c = cache_from_memcg_idx(s, i); | ||
2681 | if (!c) | ||
2682 | continue; | ||
2683 | |||
2684 | memcg_unregister_cache(c); | ||
2685 | |||
2686 | if (cache_from_memcg_idx(s, i)) | ||
2687 | failed++; | ||
2688 | } | ||
2689 | mutex_unlock(&memcg_slab_mutex); | ||
2690 | return failed; | ||
2691 | } | ||
2692 | |||
2693 | static void memcg_unregister_all_caches(struct mem_cgroup *memcg) | ||
2694 | { | ||
2695 | struct kmem_cache *cachep; | ||
2696 | struct memcg_cache_params *params, *tmp; | ||
2697 | |||
2698 | if (!memcg_kmem_is_active(memcg)) | ||
2699 | return; | ||
2700 | |||
2701 | mutex_lock(&memcg_slab_mutex); | ||
2702 | list_for_each_entry_safe(params, tmp, &memcg->memcg_slab_caches, list) { | ||
2703 | cachep = memcg_params_to_cache(params); | ||
2704 | memcg_unregister_cache(cachep); | ||
2705 | } | ||
2706 | mutex_unlock(&memcg_slab_mutex); | ||
2707 | } | ||
2708 | |||
2709 | struct memcg_register_cache_work { | ||
2710 | struct mem_cgroup *memcg; | 2562 | struct mem_cgroup *memcg; |
2711 | struct kmem_cache *cachep; | 2563 | struct kmem_cache *cachep; |
2712 | struct work_struct work; | 2564 | struct work_struct work; |
2713 | }; | 2565 | }; |
2714 | 2566 | ||
2715 | static void memcg_register_cache_func(struct work_struct *w) | 2567 | static void memcg_kmem_cache_create_func(struct work_struct *w) |
2716 | { | 2568 | { |
2717 | struct memcg_register_cache_work *cw = | 2569 | struct memcg_kmem_cache_create_work *cw = |
2718 | container_of(w, struct memcg_register_cache_work, work); | 2570 | container_of(w, struct memcg_kmem_cache_create_work, work); |
2719 | struct mem_cgroup *memcg = cw->memcg; | 2571 | struct mem_cgroup *memcg = cw->memcg; |
2720 | struct kmem_cache *cachep = cw->cachep; | 2572 | struct kmem_cache *cachep = cw->cachep; |
2721 | 2573 | ||
2722 | mutex_lock(&memcg_slab_mutex); | 2574 | memcg_create_kmem_cache(memcg, cachep); |
2723 | memcg_register_cache(memcg, cachep); | ||
2724 | mutex_unlock(&memcg_slab_mutex); | ||
2725 | 2575 | ||
2726 | css_put(&memcg->css); | 2576 | css_put(&memcg->css); |
2727 | kfree(cw); | 2577 | kfree(cw); |
@@ -2730,10 +2580,10 @@ static void memcg_register_cache_func(struct work_struct *w) | |||
2730 | /* | 2580 | /* |
2731 | * Enqueue the creation of a per-memcg kmem_cache. | 2581 | * Enqueue the creation of a per-memcg kmem_cache. |
2732 | */ | 2582 | */ |
2733 | static void __memcg_schedule_register_cache(struct mem_cgroup *memcg, | 2583 | static void __memcg_schedule_kmem_cache_create(struct mem_cgroup *memcg, |
2734 | struct kmem_cache *cachep) | 2584 | struct kmem_cache *cachep) |
2735 | { | 2585 | { |
2736 | struct memcg_register_cache_work *cw; | 2586 | struct memcg_kmem_cache_create_work *cw; |
2737 | 2587 | ||
2738 | cw = kmalloc(sizeof(*cw), GFP_NOWAIT); | 2588 | cw = kmalloc(sizeof(*cw), GFP_NOWAIT); |
2739 | if (!cw) | 2589 | if (!cw) |
@@ -2743,18 +2593,18 @@ static void __memcg_schedule_register_cache(struct mem_cgroup *memcg, | |||
2743 | 2593 | ||
2744 | cw->memcg = memcg; | 2594 | cw->memcg = memcg; |
2745 | cw->cachep = cachep; | 2595 | cw->cachep = cachep; |
2596 | INIT_WORK(&cw->work, memcg_kmem_cache_create_func); | ||
2746 | 2597 | ||
2747 | INIT_WORK(&cw->work, memcg_register_cache_func); | ||
2748 | schedule_work(&cw->work); | 2598 | schedule_work(&cw->work); |
2749 | } | 2599 | } |
2750 | 2600 | ||
2751 | static void memcg_schedule_register_cache(struct mem_cgroup *memcg, | 2601 | static void memcg_schedule_kmem_cache_create(struct mem_cgroup *memcg, |
2752 | struct kmem_cache *cachep) | 2602 | struct kmem_cache *cachep) |
2753 | { | 2603 | { |
2754 | /* | 2604 | /* |
2755 | * We need to stop accounting when we kmalloc, because if the | 2605 | * We need to stop accounting when we kmalloc, because if the |
2756 | * corresponding kmalloc cache is not yet created, the first allocation | 2606 | * corresponding kmalloc cache is not yet created, the first allocation |
2757 | * in __memcg_schedule_register_cache will recurse. | 2607 | * in __memcg_schedule_kmem_cache_create will recurse. |
2758 | * | 2608 | * |
2759 | * However, it is better to enclose the whole function. Depending on | 2609 | * However, it is better to enclose the whole function. Depending on |
2760 | * the debugging options enabled, INIT_WORK(), for instance, can | 2610 | * the debugging options enabled, INIT_WORK(), for instance, can |
@@ -2763,24 +2613,10 @@ static void memcg_schedule_register_cache(struct mem_cgroup *memcg, | |||
2763 | * the safest choice is to do it like this, wrapping the whole function. | 2613 | * the safest choice is to do it like this, wrapping the whole function. |
2764 | */ | 2614 | */ |
2765 | current->memcg_kmem_skip_account = 1; | 2615 | current->memcg_kmem_skip_account = 1; |
2766 | __memcg_schedule_register_cache(memcg, cachep); | 2616 | __memcg_schedule_kmem_cache_create(memcg, cachep); |
2767 | current->memcg_kmem_skip_account = 0; | 2617 | current->memcg_kmem_skip_account = 0; |
2768 | } | 2618 | } |
2769 | 2619 | ||
2770 | int __memcg_charge_slab(struct kmem_cache *cachep, gfp_t gfp, int order) | ||
2771 | { | ||
2772 | unsigned int nr_pages = 1 << order; | ||
2773 | |||
2774 | return memcg_charge_kmem(cachep->memcg_params->memcg, gfp, nr_pages); | ||
2775 | } | ||
2776 | |||
2777 | void __memcg_uncharge_slab(struct kmem_cache *cachep, int order) | ||
2778 | { | ||
2779 | unsigned int nr_pages = 1 << order; | ||
2780 | |||
2781 | memcg_uncharge_kmem(cachep->memcg_params->memcg, nr_pages); | ||
2782 | } | ||
2783 | |||
2784 | /* | 2620 | /* |
2785 | * Return the kmem_cache we're supposed to use for a slab allocation. | 2621 | * Return the kmem_cache we're supposed to use for a slab allocation. |
2786 | * We try to use the current memcg's version of the cache. | 2622 | * We try to use the current memcg's version of the cache. |
@@ -2798,18 +2634,19 @@ struct kmem_cache *__memcg_kmem_get_cache(struct kmem_cache *cachep) | |||
2798 | { | 2634 | { |
2799 | struct mem_cgroup *memcg; | 2635 | struct mem_cgroup *memcg; |
2800 | struct kmem_cache *memcg_cachep; | 2636 | struct kmem_cache *memcg_cachep; |
2637 | int kmemcg_id; | ||
2801 | 2638 | ||
2802 | VM_BUG_ON(!cachep->memcg_params); | 2639 | VM_BUG_ON(!is_root_cache(cachep)); |
2803 | VM_BUG_ON(!cachep->memcg_params->is_root_cache); | ||
2804 | 2640 | ||
2805 | if (current->memcg_kmem_skip_account) | 2641 | if (current->memcg_kmem_skip_account) |
2806 | return cachep; | 2642 | return cachep; |
2807 | 2643 | ||
2808 | memcg = get_mem_cgroup_from_mm(current->mm); | 2644 | memcg = get_mem_cgroup_from_mm(current->mm); |
2809 | if (!memcg_kmem_is_active(memcg)) | 2645 | kmemcg_id = ACCESS_ONCE(memcg->kmemcg_id); |
2646 | if (kmemcg_id < 0) | ||
2810 | goto out; | 2647 | goto out; |
2811 | 2648 | ||
2812 | memcg_cachep = cache_from_memcg_idx(cachep, memcg_cache_id(memcg)); | 2649 | memcg_cachep = cache_from_memcg_idx(cachep, kmemcg_id); |
2813 | if (likely(memcg_cachep)) | 2650 | if (likely(memcg_cachep)) |
2814 | return memcg_cachep; | 2651 | return memcg_cachep; |
2815 | 2652 | ||
@@ -2825,7 +2662,7 @@ struct kmem_cache *__memcg_kmem_get_cache(struct kmem_cache *cachep) | |||
2825 | * could happen with the slab_mutex held. So it's better to | 2662 | * could happen with the slab_mutex held. So it's better to |
2826 | * defer everything. | 2663 | * defer everything. |
2827 | */ | 2664 | */ |
2828 | memcg_schedule_register_cache(memcg, cachep); | 2665 | memcg_schedule_kmem_cache_create(memcg, cachep); |
2829 | out: | 2666 | out: |
2830 | css_put(&memcg->css); | 2667 | css_put(&memcg->css); |
2831 | return cachep; | 2668 | return cachep; |
@@ -2834,7 +2671,7 @@ out: | |||
2834 | void __memcg_kmem_put_cache(struct kmem_cache *cachep) | 2671 | void __memcg_kmem_put_cache(struct kmem_cache *cachep) |
2835 | { | 2672 | { |
2836 | if (!is_root_cache(cachep)) | 2673 | if (!is_root_cache(cachep)) |
2837 | css_put(&cachep->memcg_params->memcg->css); | 2674 | css_put(&cachep->memcg_params.memcg->css); |
2838 | } | 2675 | } |
2839 | 2676 | ||
2840 | /* | 2677 | /* |
@@ -2899,6 +2736,24 @@ void __memcg_kmem_uncharge_pages(struct page *page, int order) | |||
2899 | memcg_uncharge_kmem(memcg, 1 << order); | 2736 | memcg_uncharge_kmem(memcg, 1 << order); |
2900 | page->mem_cgroup = NULL; | 2737 | page->mem_cgroup = NULL; |
2901 | } | 2738 | } |
2739 | |||
2740 | struct mem_cgroup *__mem_cgroup_from_kmem(void *ptr) | ||
2741 | { | ||
2742 | struct mem_cgroup *memcg = NULL; | ||
2743 | struct kmem_cache *cachep; | ||
2744 | struct page *page; | ||
2745 | |||
2746 | page = virt_to_head_page(ptr); | ||
2747 | if (PageSlab(page)) { | ||
2748 | cachep = page->slab_cache; | ||
2749 | if (!is_root_cache(cachep)) | ||
2750 | memcg = cachep->memcg_params.memcg; | ||
2751 | } else | ||
2752 | /* page allocated by alloc_kmem_pages */ | ||
2753 | memcg = page->mem_cgroup; | ||
2754 | |||
2755 | return memcg; | ||
2756 | } | ||
2902 | #endif /* CONFIG_MEMCG_KMEM */ | 2757 | #endif /* CONFIG_MEMCG_KMEM */ |
2903 | 2758 | ||
2904 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | 2759 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE |
@@ -3433,8 +3288,9 @@ static int memcg_activate_kmem(struct mem_cgroup *memcg, | |||
3433 | int err = 0; | 3288 | int err = 0; |
3434 | int memcg_id; | 3289 | int memcg_id; |
3435 | 3290 | ||
3436 | if (memcg_kmem_is_active(memcg)) | 3291 | BUG_ON(memcg->kmemcg_id >= 0); |
3437 | return 0; | 3292 | BUG_ON(memcg->kmem_acct_activated); |
3293 | BUG_ON(memcg->kmem_acct_active); | ||
3438 | 3294 | ||
3439 | /* | 3295 | /* |
3440 | * For simplicity, we won't allow this to be disabled. It also can't | 3296 | * For simplicity, we won't allow this to be disabled. It also can't |
@@ -3477,6 +3333,8 @@ static int memcg_activate_kmem(struct mem_cgroup *memcg, | |||
3477 | * patched. | 3333 | * patched. |
3478 | */ | 3334 | */ |
3479 | memcg->kmemcg_id = memcg_id; | 3335 | memcg->kmemcg_id = memcg_id; |
3336 | memcg->kmem_acct_activated = true; | ||
3337 | memcg->kmem_acct_active = true; | ||
3480 | out: | 3338 | out: |
3481 | return err; | 3339 | return err; |
3482 | } | 3340 | } |
@@ -3533,7 +3391,7 @@ static ssize_t mem_cgroup_write(struct kernfs_open_file *of, | |||
3533 | int ret; | 3391 | int ret; |
3534 | 3392 | ||
3535 | buf = strstrip(buf); | 3393 | buf = strstrip(buf); |
3536 | ret = page_counter_memparse(buf, &nr_pages); | 3394 | ret = page_counter_memparse(buf, "-1", &nr_pages); |
3537 | if (ret) | 3395 | if (ret) |
3538 | return ret; | 3396 | return ret; |
3539 | 3397 | ||
@@ -3609,7 +3467,7 @@ static int mem_cgroup_move_charge_write(struct cgroup_subsys_state *css, | |||
3609 | { | 3467 | { |
3610 | struct mem_cgroup *memcg = mem_cgroup_from_css(css); | 3468 | struct mem_cgroup *memcg = mem_cgroup_from_css(css); |
3611 | 3469 | ||
3612 | if (val >= (1 << NR_MOVE_TYPE)) | 3470 | if (val & ~MOVE_MASK) |
3613 | return -EINVAL; | 3471 | return -EINVAL; |
3614 | 3472 | ||
3615 | /* | 3473 | /* |
@@ -3687,6 +3545,10 @@ static int memcg_stat_show(struct seq_file *m, void *v) | |||
3687 | struct mem_cgroup *mi; | 3545 | struct mem_cgroup *mi; |
3688 | unsigned int i; | 3546 | unsigned int i; |
3689 | 3547 | ||
3548 | BUILD_BUG_ON(ARRAY_SIZE(mem_cgroup_stat_names) != | ||
3549 | MEM_CGROUP_STAT_NSTATS); | ||
3550 | BUILD_BUG_ON(ARRAY_SIZE(mem_cgroup_events_names) != | ||
3551 | MEM_CGROUP_EVENTS_NSTATS); | ||
3690 | BUILD_BUG_ON(ARRAY_SIZE(mem_cgroup_lru_names) != NR_LRU_LISTS); | 3552 | BUILD_BUG_ON(ARRAY_SIZE(mem_cgroup_lru_names) != NR_LRU_LISTS); |
3691 | 3553 | ||
3692 | for (i = 0; i < MEM_CGROUP_STAT_NSTATS; i++) { | 3554 | for (i = 0; i < MEM_CGROUP_STAT_NSTATS; i++) { |
@@ -3901,7 +3763,7 @@ static int __mem_cgroup_usage_register_event(struct mem_cgroup *memcg, | |||
3901 | unsigned long usage; | 3763 | unsigned long usage; |
3902 | int i, size, ret; | 3764 | int i, size, ret; |
3903 | 3765 | ||
3904 | ret = page_counter_memparse(args, &threshold); | 3766 | ret = page_counter_memparse(args, "-1", &threshold); |
3905 | if (ret) | 3767 | if (ret) |
3906 | return ret; | 3768 | return ret; |
3907 | 3769 | ||
@@ -4152,9 +4014,59 @@ static int memcg_init_kmem(struct mem_cgroup *memcg, struct cgroup_subsys *ss) | |||
4152 | return mem_cgroup_sockets_init(memcg, ss); | 4014 | return mem_cgroup_sockets_init(memcg, ss); |
4153 | } | 4015 | } |
4154 | 4016 | ||
4017 | static void memcg_deactivate_kmem(struct mem_cgroup *memcg) | ||
4018 | { | ||
4019 | struct cgroup_subsys_state *css; | ||
4020 | struct mem_cgroup *parent, *child; | ||
4021 | int kmemcg_id; | ||
4022 | |||
4023 | if (!memcg->kmem_acct_active) | ||
4024 | return; | ||
4025 | |||
4026 | /* | ||
4027 | * Clear the 'active' flag before clearing memcg_caches arrays entries. | ||
4028 | * Since we take the slab_mutex in memcg_deactivate_kmem_caches(), it | ||
4029 | * guarantees no cache will be created for this cgroup after we are | ||
4030 | * done (see memcg_create_kmem_cache()). | ||
4031 | */ | ||
4032 | memcg->kmem_acct_active = false; | ||
4033 | |||
4034 | memcg_deactivate_kmem_caches(memcg); | ||
4035 | |||
4036 | kmemcg_id = memcg->kmemcg_id; | ||
4037 | BUG_ON(kmemcg_id < 0); | ||
4038 | |||
4039 | parent = parent_mem_cgroup(memcg); | ||
4040 | if (!parent) | ||
4041 | parent = root_mem_cgroup; | ||
4042 | |||
4043 | /* | ||
4044 | * Change kmemcg_id of this cgroup and all its descendants to the | ||
4045 | * parent's id, and then move all entries from this cgroup's list_lrus | ||
4046 | * to ones of the parent. After we have finished, all list_lrus | ||
4047 | * corresponding to this cgroup are guaranteed to remain empty. The | ||
4048 | * ordering is imposed by list_lru_node->lock taken by | ||
4049 | * memcg_drain_all_list_lrus(). | ||
4050 | */ | ||
4051 | css_for_each_descendant_pre(css, &memcg->css) { | ||
4052 | child = mem_cgroup_from_css(css); | ||
4053 | BUG_ON(child->kmemcg_id != kmemcg_id); | ||
4054 | child->kmemcg_id = parent->kmemcg_id; | ||
4055 | if (!memcg->use_hierarchy) | ||
4056 | break; | ||
4057 | } | ||
4058 | memcg_drain_all_list_lrus(kmemcg_id, parent->kmemcg_id); | ||
4059 | |||
4060 | memcg_free_cache_id(kmemcg_id); | ||
4061 | } | ||
4062 | |||
4155 | static void memcg_destroy_kmem(struct mem_cgroup *memcg) | 4063 | static void memcg_destroy_kmem(struct mem_cgroup *memcg) |
4156 | { | 4064 | { |
4157 | memcg_unregister_all_caches(memcg); | 4065 | if (memcg->kmem_acct_activated) { |
4066 | memcg_destroy_kmem_caches(memcg); | ||
4067 | static_key_slow_dec(&memcg_kmem_enabled_key); | ||
4068 | WARN_ON(page_counter_read(&memcg->kmem)); | ||
4069 | } | ||
4158 | mem_cgroup_sockets_destroy(memcg); | 4070 | mem_cgroup_sockets_destroy(memcg); |
4159 | } | 4071 | } |
4160 | #else | 4072 | #else |
@@ -4163,6 +4075,10 @@ static int memcg_init_kmem(struct mem_cgroup *memcg, struct cgroup_subsys *ss) | |||
4163 | return 0; | 4075 | return 0; |
4164 | } | 4076 | } |
4165 | 4077 | ||
4078 | static void memcg_deactivate_kmem(struct mem_cgroup *memcg) | ||
4079 | { | ||
4080 | } | ||
4081 | |||
4166 | static void memcg_destroy_kmem(struct mem_cgroup *memcg) | 4082 | static void memcg_destroy_kmem(struct mem_cgroup *memcg) |
4167 | { | 4083 | { |
4168 | } | 4084 | } |
@@ -4391,7 +4307,7 @@ out_kfree: | |||
4391 | return ret; | 4307 | return ret; |
4392 | } | 4308 | } |
4393 | 4309 | ||
4394 | static struct cftype mem_cgroup_files[] = { | 4310 | static struct cftype mem_cgroup_legacy_files[] = { |
4395 | { | 4311 | { |
4396 | .name = "usage_in_bytes", | 4312 | .name = "usage_in_bytes", |
4397 | .private = MEMFILE_PRIVATE(_MEM, RES_USAGE), | 4313 | .private = MEMFILE_PRIVATE(_MEM, RES_USAGE), |
@@ -4502,34 +4418,6 @@ static struct cftype mem_cgroup_files[] = { | |||
4502 | { }, /* terminate */ | 4418 | { }, /* terminate */ |
4503 | }; | 4419 | }; |
4504 | 4420 | ||
4505 | #ifdef CONFIG_MEMCG_SWAP | ||
4506 | static struct cftype memsw_cgroup_files[] = { | ||
4507 | { | ||
4508 | .name = "memsw.usage_in_bytes", | ||
4509 | .private = MEMFILE_PRIVATE(_MEMSWAP, RES_USAGE), | ||
4510 | .read_u64 = mem_cgroup_read_u64, | ||
4511 | }, | ||
4512 | { | ||
4513 | .name = "memsw.max_usage_in_bytes", | ||
4514 | .private = MEMFILE_PRIVATE(_MEMSWAP, RES_MAX_USAGE), | ||
4515 | .write = mem_cgroup_reset, | ||
4516 | .read_u64 = mem_cgroup_read_u64, | ||
4517 | }, | ||
4518 | { | ||
4519 | .name = "memsw.limit_in_bytes", | ||
4520 | .private = MEMFILE_PRIVATE(_MEMSWAP, RES_LIMIT), | ||
4521 | .write = mem_cgroup_write, | ||
4522 | .read_u64 = mem_cgroup_read_u64, | ||
4523 | }, | ||
4524 | { | ||
4525 | .name = "memsw.failcnt", | ||
4526 | .private = MEMFILE_PRIVATE(_MEMSWAP, RES_FAILCNT), | ||
4527 | .write = mem_cgroup_reset, | ||
4528 | .read_u64 = mem_cgroup_read_u64, | ||
4529 | }, | ||
4530 | { }, /* terminate */ | ||
4531 | }; | ||
4532 | #endif | ||
4533 | static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *memcg, int node) | 4421 | static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *memcg, int node) |
4534 | { | 4422 | { |
4535 | struct mem_cgroup_per_node *pn; | 4423 | struct mem_cgroup_per_node *pn; |
@@ -4609,8 +4497,6 @@ static void __mem_cgroup_free(struct mem_cgroup *memcg) | |||
4609 | free_mem_cgroup_per_zone_info(memcg, node); | 4497 | free_mem_cgroup_per_zone_info(memcg, node); |
4610 | 4498 | ||
4611 | free_percpu(memcg->stat); | 4499 | free_percpu(memcg->stat); |
4612 | |||
4613 | disarm_static_keys(memcg); | ||
4614 | kfree(memcg); | 4500 | kfree(memcg); |
4615 | } | 4501 | } |
4616 | 4502 | ||
@@ -4625,29 +4511,6 @@ struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg) | |||
4625 | } | 4511 | } |
4626 | EXPORT_SYMBOL(parent_mem_cgroup); | 4512 | EXPORT_SYMBOL(parent_mem_cgroup); |
4627 | 4513 | ||
4628 | static void __init mem_cgroup_soft_limit_tree_init(void) | ||
4629 | { | ||
4630 | struct mem_cgroup_tree_per_node *rtpn; | ||
4631 | struct mem_cgroup_tree_per_zone *rtpz; | ||
4632 | int tmp, node, zone; | ||
4633 | |||
4634 | for_each_node(node) { | ||
4635 | tmp = node; | ||
4636 | if (!node_state(node, N_NORMAL_MEMORY)) | ||
4637 | tmp = -1; | ||
4638 | rtpn = kzalloc_node(sizeof(*rtpn), GFP_KERNEL, tmp); | ||
4639 | BUG_ON(!rtpn); | ||
4640 | |||
4641 | soft_limit_tree.rb_tree_per_node[node] = rtpn; | ||
4642 | |||
4643 | for (zone = 0; zone < MAX_NR_ZONES; zone++) { | ||
4644 | rtpz = &rtpn->rb_tree_per_zone[zone]; | ||
4645 | rtpz->rb_root = RB_ROOT; | ||
4646 | spin_lock_init(&rtpz->lock); | ||
4647 | } | ||
4648 | } | ||
4649 | } | ||
4650 | |||
4651 | static struct cgroup_subsys_state * __ref | 4514 | static struct cgroup_subsys_state * __ref |
4652 | mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css) | 4515 | mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css) |
4653 | { | 4516 | { |
@@ -4667,6 +4530,7 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css) | |||
4667 | if (parent_css == NULL) { | 4530 | if (parent_css == NULL) { |
4668 | root_mem_cgroup = memcg; | 4531 | root_mem_cgroup = memcg; |
4669 | page_counter_init(&memcg->memory, NULL); | 4532 | page_counter_init(&memcg->memory, NULL); |
4533 | memcg->high = PAGE_COUNTER_MAX; | ||
4670 | memcg->soft_limit = PAGE_COUNTER_MAX; | 4534 | memcg->soft_limit = PAGE_COUNTER_MAX; |
4671 | page_counter_init(&memcg->memsw, NULL); | 4535 | page_counter_init(&memcg->memsw, NULL); |
4672 | page_counter_init(&memcg->kmem, NULL); | 4536 | page_counter_init(&memcg->kmem, NULL); |
@@ -4682,7 +4546,6 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css) | |||
4682 | spin_lock_init(&memcg->event_list_lock); | 4546 | spin_lock_init(&memcg->event_list_lock); |
4683 | #ifdef CONFIG_MEMCG_KMEM | 4547 | #ifdef CONFIG_MEMCG_KMEM |
4684 | memcg->kmemcg_id = -1; | 4548 | memcg->kmemcg_id = -1; |
4685 | INIT_LIST_HEAD(&memcg->memcg_slab_caches); | ||
4686 | #endif | 4549 | #endif |
4687 | 4550 | ||
4688 | return &memcg->css; | 4551 | return &memcg->css; |
@@ -4713,6 +4576,7 @@ mem_cgroup_css_online(struct cgroup_subsys_state *css) | |||
4713 | 4576 | ||
4714 | if (parent->use_hierarchy) { | 4577 | if (parent->use_hierarchy) { |
4715 | page_counter_init(&memcg->memory, &parent->memory); | 4578 | page_counter_init(&memcg->memory, &parent->memory); |
4579 | memcg->high = PAGE_COUNTER_MAX; | ||
4716 | memcg->soft_limit = PAGE_COUNTER_MAX; | 4580 | memcg->soft_limit = PAGE_COUNTER_MAX; |
4717 | page_counter_init(&memcg->memsw, &parent->memsw); | 4581 | page_counter_init(&memcg->memsw, &parent->memsw); |
4718 | page_counter_init(&memcg->kmem, &parent->kmem); | 4582 | page_counter_init(&memcg->kmem, &parent->kmem); |
@@ -4723,6 +4587,7 @@ mem_cgroup_css_online(struct cgroup_subsys_state *css) | |||
4723 | */ | 4587 | */ |
4724 | } else { | 4588 | } else { |
4725 | page_counter_init(&memcg->memory, NULL); | 4589 | page_counter_init(&memcg->memory, NULL); |
4590 | memcg->high = PAGE_COUNTER_MAX; | ||
4726 | memcg->soft_limit = PAGE_COUNTER_MAX; | 4591 | memcg->soft_limit = PAGE_COUNTER_MAX; |
4727 | page_counter_init(&memcg->memsw, NULL); | 4592 | page_counter_init(&memcg->memsw, NULL); |
4728 | page_counter_init(&memcg->kmem, NULL); | 4593 | page_counter_init(&memcg->kmem, NULL); |
@@ -4768,6 +4633,8 @@ static void mem_cgroup_css_offline(struct cgroup_subsys_state *css) | |||
4768 | spin_unlock(&memcg->event_list_lock); | 4633 | spin_unlock(&memcg->event_list_lock); |
4769 | 4634 | ||
4770 | vmpressure_cleanup(&memcg->vmpressure); | 4635 | vmpressure_cleanup(&memcg->vmpressure); |
4636 | |||
4637 | memcg_deactivate_kmem(memcg); | ||
4771 | } | 4638 | } |
4772 | 4639 | ||
4773 | static void mem_cgroup_css_free(struct cgroup_subsys_state *css) | 4640 | static void mem_cgroup_css_free(struct cgroup_subsys_state *css) |
@@ -4798,6 +4665,8 @@ static void mem_cgroup_css_reset(struct cgroup_subsys_state *css) | |||
4798 | mem_cgroup_resize_limit(memcg, PAGE_COUNTER_MAX); | 4665 | mem_cgroup_resize_limit(memcg, PAGE_COUNTER_MAX); |
4799 | mem_cgroup_resize_memsw_limit(memcg, PAGE_COUNTER_MAX); | 4666 | mem_cgroup_resize_memsw_limit(memcg, PAGE_COUNTER_MAX); |
4800 | memcg_update_kmem_limit(memcg, PAGE_COUNTER_MAX); | 4667 | memcg_update_kmem_limit(memcg, PAGE_COUNTER_MAX); |
4668 | memcg->low = 0; | ||
4669 | memcg->high = PAGE_COUNTER_MAX; | ||
4801 | memcg->soft_limit = PAGE_COUNTER_MAX; | 4670 | memcg->soft_limit = PAGE_COUNTER_MAX; |
4802 | } | 4671 | } |
4803 | 4672 | ||
@@ -4874,12 +4743,12 @@ static struct page *mc_handle_present_pte(struct vm_area_struct *vma, | |||
4874 | if (!page || !page_mapped(page)) | 4743 | if (!page || !page_mapped(page)) |
4875 | return NULL; | 4744 | return NULL; |
4876 | if (PageAnon(page)) { | 4745 | if (PageAnon(page)) { |
4877 | /* we don't move shared anon */ | 4746 | if (!(mc.flags & MOVE_ANON)) |
4878 | if (!move_anon()) | ||
4879 | return NULL; | 4747 | return NULL; |
4880 | } else if (!move_file()) | 4748 | } else { |
4881 | /* we ignore mapcount for file pages */ | 4749 | if (!(mc.flags & MOVE_FILE)) |
4882 | return NULL; | 4750 | return NULL; |
4751 | } | ||
4883 | if (!get_page_unless_zero(page)) | 4752 | if (!get_page_unless_zero(page)) |
4884 | return NULL; | 4753 | return NULL; |
4885 | 4754 | ||
@@ -4893,7 +4762,7 @@ static struct page *mc_handle_swap_pte(struct vm_area_struct *vma, | |||
4893 | struct page *page = NULL; | 4762 | struct page *page = NULL; |
4894 | swp_entry_t ent = pte_to_swp_entry(ptent); | 4763 | swp_entry_t ent = pte_to_swp_entry(ptent); |
4895 | 4764 | ||
4896 | if (!move_anon() || non_swap_entry(ent)) | 4765 | if (!(mc.flags & MOVE_ANON) || non_swap_entry(ent)) |
4897 | return NULL; | 4766 | return NULL; |
4898 | /* | 4767 | /* |
4899 | * Because lookup_swap_cache() updates some statistics counter, | 4768 | * Because lookup_swap_cache() updates some statistics counter, |
@@ -4922,14 +4791,11 @@ static struct page *mc_handle_file_pte(struct vm_area_struct *vma, | |||
4922 | 4791 | ||
4923 | if (!vma->vm_file) /* anonymous vma */ | 4792 | if (!vma->vm_file) /* anonymous vma */ |
4924 | return NULL; | 4793 | return NULL; |
4925 | if (!move_file()) | 4794 | if (!(mc.flags & MOVE_FILE)) |
4926 | return NULL; | 4795 | return NULL; |
4927 | 4796 | ||
4928 | mapping = vma->vm_file->f_mapping; | 4797 | mapping = vma->vm_file->f_mapping; |
4929 | if (pte_none(ptent)) | 4798 | pgoff = linear_page_index(vma, addr); |
4930 | pgoff = linear_page_index(vma, addr); | ||
4931 | else /* pte_file(ptent) is true */ | ||
4932 | pgoff = pte_to_pgoff(ptent); | ||
4933 | 4799 | ||
4934 | /* page is moved even if it's not RSS of this task(page-faulted). */ | 4800 | /* page is moved even if it's not RSS of this task(page-faulted). */ |
4935 | #ifdef CONFIG_SWAP | 4801 | #ifdef CONFIG_SWAP |
@@ -4961,7 +4827,7 @@ static enum mc_target_type get_mctgt_type(struct vm_area_struct *vma, | |||
4961 | page = mc_handle_present_pte(vma, addr, ptent); | 4827 | page = mc_handle_present_pte(vma, addr, ptent); |
4962 | else if (is_swap_pte(ptent)) | 4828 | else if (is_swap_pte(ptent)) |
4963 | page = mc_handle_swap_pte(vma, addr, ptent, &ent); | 4829 | page = mc_handle_swap_pte(vma, addr, ptent, &ent); |
4964 | else if (pte_none(ptent) || pte_file(ptent)) | 4830 | else if (pte_none(ptent)) |
4965 | page = mc_handle_file_pte(vma, addr, ptent, &ent); | 4831 | page = mc_handle_file_pte(vma, addr, ptent, &ent); |
4966 | 4832 | ||
4967 | if (!page && !ent.val) | 4833 | if (!page && !ent.val) |
@@ -5004,7 +4870,7 @@ static enum mc_target_type get_mctgt_type_thp(struct vm_area_struct *vma, | |||
5004 | 4870 | ||
5005 | page = pmd_page(pmd); | 4871 | page = pmd_page(pmd); |
5006 | VM_BUG_ON_PAGE(!page || !PageHead(page), page); | 4872 | VM_BUG_ON_PAGE(!page || !PageHead(page), page); |
5007 | if (!move_anon()) | 4873 | if (!(mc.flags & MOVE_ANON)) |
5008 | return ret; | 4874 | return ret; |
5009 | if (page->mem_cgroup == mc.from) { | 4875 | if (page->mem_cgroup == mc.from) { |
5010 | ret = MC_TARGET_PAGE; | 4876 | ret = MC_TARGET_PAGE; |
@@ -5027,7 +4893,7 @@ static int mem_cgroup_count_precharge_pte_range(pmd_t *pmd, | |||
5027 | unsigned long addr, unsigned long end, | 4893 | unsigned long addr, unsigned long end, |
5028 | struct mm_walk *walk) | 4894 | struct mm_walk *walk) |
5029 | { | 4895 | { |
5030 | struct vm_area_struct *vma = walk->private; | 4896 | struct vm_area_struct *vma = walk->vma; |
5031 | pte_t *pte; | 4897 | pte_t *pte; |
5032 | spinlock_t *ptl; | 4898 | spinlock_t *ptl; |
5033 | 4899 | ||
@@ -5053,20 +4919,13 @@ static int mem_cgroup_count_precharge_pte_range(pmd_t *pmd, | |||
5053 | static unsigned long mem_cgroup_count_precharge(struct mm_struct *mm) | 4919 | static unsigned long mem_cgroup_count_precharge(struct mm_struct *mm) |
5054 | { | 4920 | { |
5055 | unsigned long precharge; | 4921 | unsigned long precharge; |
5056 | struct vm_area_struct *vma; | ||
5057 | 4922 | ||
4923 | struct mm_walk mem_cgroup_count_precharge_walk = { | ||
4924 | .pmd_entry = mem_cgroup_count_precharge_pte_range, | ||
4925 | .mm = mm, | ||
4926 | }; | ||
5058 | down_read(&mm->mmap_sem); | 4927 | down_read(&mm->mmap_sem); |
5059 | for (vma = mm->mmap; vma; vma = vma->vm_next) { | 4928 | walk_page_range(0, ~0UL, &mem_cgroup_count_precharge_walk); |
5060 | struct mm_walk mem_cgroup_count_precharge_walk = { | ||
5061 | .pmd_entry = mem_cgroup_count_precharge_pte_range, | ||
5062 | .mm = mm, | ||
5063 | .private = vma, | ||
5064 | }; | ||
5065 | if (is_vm_hugetlb_page(vma)) | ||
5066 | continue; | ||
5067 | walk_page_range(vma->vm_start, vma->vm_end, | ||
5068 | &mem_cgroup_count_precharge_walk); | ||
5069 | } | ||
5070 | up_read(&mm->mmap_sem); | 4929 | up_read(&mm->mmap_sem); |
5071 | 4930 | ||
5072 | precharge = mc.precharge; | 4931 | precharge = mc.precharge; |
@@ -5146,15 +5005,15 @@ static int mem_cgroup_can_attach(struct cgroup_subsys_state *css, | |||
5146 | struct task_struct *p = cgroup_taskset_first(tset); | 5005 | struct task_struct *p = cgroup_taskset_first(tset); |
5147 | int ret = 0; | 5006 | int ret = 0; |
5148 | struct mem_cgroup *memcg = mem_cgroup_from_css(css); | 5007 | struct mem_cgroup *memcg = mem_cgroup_from_css(css); |
5149 | unsigned long move_charge_at_immigrate; | 5008 | unsigned long move_flags; |
5150 | 5009 | ||
5151 | /* | 5010 | /* |
5152 | * We are now commited to this value whatever it is. Changes in this | 5011 | * We are now commited to this value whatever it is. Changes in this |
5153 | * tunable will only affect upcoming migrations, not the current one. | 5012 | * tunable will only affect upcoming migrations, not the current one. |
5154 | * So we need to save it, and keep it going. | 5013 | * So we need to save it, and keep it going. |
5155 | */ | 5014 | */ |
5156 | move_charge_at_immigrate = memcg->move_charge_at_immigrate; | 5015 | move_flags = ACCESS_ONCE(memcg->move_charge_at_immigrate); |
5157 | if (move_charge_at_immigrate) { | 5016 | if (move_flags) { |
5158 | struct mm_struct *mm; | 5017 | struct mm_struct *mm; |
5159 | struct mem_cgroup *from = mem_cgroup_from_task(p); | 5018 | struct mem_cgroup *from = mem_cgroup_from_task(p); |
5160 | 5019 | ||
@@ -5174,7 +5033,7 @@ static int mem_cgroup_can_attach(struct cgroup_subsys_state *css, | |||
5174 | spin_lock(&mc.lock); | 5033 | spin_lock(&mc.lock); |
5175 | mc.from = from; | 5034 | mc.from = from; |
5176 | mc.to = memcg; | 5035 | mc.to = memcg; |
5177 | mc.immigrate_flags = move_charge_at_immigrate; | 5036 | mc.flags = move_flags; |
5178 | spin_unlock(&mc.lock); | 5037 | spin_unlock(&mc.lock); |
5179 | /* We set mc.moving_task later */ | 5038 | /* We set mc.moving_task later */ |
5180 | 5039 | ||
@@ -5199,7 +5058,7 @@ static int mem_cgroup_move_charge_pte_range(pmd_t *pmd, | |||
5199 | struct mm_walk *walk) | 5058 | struct mm_walk *walk) |
5200 | { | 5059 | { |
5201 | int ret = 0; | 5060 | int ret = 0; |
5202 | struct vm_area_struct *vma = walk->private; | 5061 | struct vm_area_struct *vma = walk->vma; |
5203 | pte_t *pte; | 5062 | pte_t *pte; |
5204 | spinlock_t *ptl; | 5063 | spinlock_t *ptl; |
5205 | enum mc_target_type target_type; | 5064 | enum mc_target_type target_type; |
@@ -5295,7 +5154,10 @@ put: /* get_mctgt_type() gets the page */ | |||
5295 | 5154 | ||
5296 | static void mem_cgroup_move_charge(struct mm_struct *mm) | 5155 | static void mem_cgroup_move_charge(struct mm_struct *mm) |
5297 | { | 5156 | { |
5298 | struct vm_area_struct *vma; | 5157 | struct mm_walk mem_cgroup_move_charge_walk = { |
5158 | .pmd_entry = mem_cgroup_move_charge_pte_range, | ||
5159 | .mm = mm, | ||
5160 | }; | ||
5299 | 5161 | ||
5300 | lru_add_drain_all(); | 5162 | lru_add_drain_all(); |
5301 | /* | 5163 | /* |
@@ -5318,24 +5180,11 @@ retry: | |||
5318 | cond_resched(); | 5180 | cond_resched(); |
5319 | goto retry; | 5181 | goto retry; |
5320 | } | 5182 | } |
5321 | for (vma = mm->mmap; vma; vma = vma->vm_next) { | 5183 | /* |
5322 | int ret; | 5184 | * When we have consumed all precharges and failed in doing |
5323 | struct mm_walk mem_cgroup_move_charge_walk = { | 5185 | * additional charge, the page walk just aborts. |
5324 | .pmd_entry = mem_cgroup_move_charge_pte_range, | 5186 | */ |
5325 | .mm = mm, | 5187 | walk_page_range(0, ~0UL, &mem_cgroup_move_charge_walk); |
5326 | .private = vma, | ||
5327 | }; | ||
5328 | if (is_vm_hugetlb_page(vma)) | ||
5329 | continue; | ||
5330 | ret = walk_page_range(vma->vm_start, vma->vm_end, | ||
5331 | &mem_cgroup_move_charge_walk); | ||
5332 | if (ret) | ||
5333 | /* | ||
5334 | * means we have consumed all precharges and failed in | ||
5335 | * doing additional charge. Just abandon here. | ||
5336 | */ | ||
5337 | break; | ||
5338 | } | ||
5339 | up_read(&mm->mmap_sem); | 5188 | up_read(&mm->mmap_sem); |
5340 | atomic_dec(&mc.from->moving_account); | 5189 | atomic_dec(&mc.from->moving_account); |
5341 | } | 5190 | } |
@@ -5386,118 +5235,211 @@ static void mem_cgroup_bind(struct cgroup_subsys_state *root_css) | |||
5386 | mem_cgroup_from_css(root_css)->use_hierarchy = true; | 5235 | mem_cgroup_from_css(root_css)->use_hierarchy = true; |
5387 | } | 5236 | } |
5388 | 5237 | ||
5389 | struct cgroup_subsys memory_cgrp_subsys = { | 5238 | static u64 memory_current_read(struct cgroup_subsys_state *css, |
5390 | .css_alloc = mem_cgroup_css_alloc, | 5239 | struct cftype *cft) |
5391 | .css_online = mem_cgroup_css_online, | 5240 | { |
5392 | .css_offline = mem_cgroup_css_offline, | 5241 | return mem_cgroup_usage(mem_cgroup_from_css(css), false); |
5393 | .css_free = mem_cgroup_css_free, | 5242 | } |
5394 | .css_reset = mem_cgroup_css_reset, | ||
5395 | .can_attach = mem_cgroup_can_attach, | ||
5396 | .cancel_attach = mem_cgroup_cancel_attach, | ||
5397 | .attach = mem_cgroup_move_task, | ||
5398 | .bind = mem_cgroup_bind, | ||
5399 | .legacy_cftypes = mem_cgroup_files, | ||
5400 | .early_init = 0, | ||
5401 | }; | ||
5402 | 5243 | ||
5403 | #ifdef CONFIG_MEMCG_SWAP | 5244 | static int memory_low_show(struct seq_file *m, void *v) |
5404 | static int __init enable_swap_account(char *s) | ||
5405 | { | 5245 | { |
5406 | if (!strcmp(s, "1")) | 5246 | struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m)); |
5407 | really_do_swap_account = 1; | 5247 | unsigned long low = ACCESS_ONCE(memcg->low); |
5408 | else if (!strcmp(s, "0")) | 5248 | |
5409 | really_do_swap_account = 0; | 5249 | if (low == PAGE_COUNTER_MAX) |
5410 | return 1; | 5250 | seq_puts(m, "max\n"); |
5251 | else | ||
5252 | seq_printf(m, "%llu\n", (u64)low * PAGE_SIZE); | ||
5253 | |||
5254 | return 0; | ||
5411 | } | 5255 | } |
5412 | __setup("swapaccount=", enable_swap_account); | ||
5413 | 5256 | ||
5414 | static void __init memsw_file_init(void) | 5257 | static ssize_t memory_low_write(struct kernfs_open_file *of, |
5258 | char *buf, size_t nbytes, loff_t off) | ||
5415 | { | 5259 | { |
5416 | WARN_ON(cgroup_add_legacy_cftypes(&memory_cgrp_subsys, | 5260 | struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of)); |
5417 | memsw_cgroup_files)); | 5261 | unsigned long low; |
5262 | int err; | ||
5263 | |||
5264 | buf = strstrip(buf); | ||
5265 | err = page_counter_memparse(buf, "max", &low); | ||
5266 | if (err) | ||
5267 | return err; | ||
5268 | |||
5269 | memcg->low = low; | ||
5270 | |||
5271 | return nbytes; | ||
5418 | } | 5272 | } |
5419 | 5273 | ||
5420 | static void __init enable_swap_cgroup(void) | 5274 | static int memory_high_show(struct seq_file *m, void *v) |
5421 | { | 5275 | { |
5422 | if (!mem_cgroup_disabled() && really_do_swap_account) { | 5276 | struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m)); |
5423 | do_swap_account = 1; | 5277 | unsigned long high = ACCESS_ONCE(memcg->high); |
5424 | memsw_file_init(); | 5278 | |
5425 | } | 5279 | if (high == PAGE_COUNTER_MAX) |
5280 | seq_puts(m, "max\n"); | ||
5281 | else | ||
5282 | seq_printf(m, "%llu\n", (u64)high * PAGE_SIZE); | ||
5283 | |||
5284 | return 0; | ||
5426 | } | 5285 | } |
5427 | 5286 | ||
5428 | #else | 5287 | static ssize_t memory_high_write(struct kernfs_open_file *of, |
5429 | static void __init enable_swap_cgroup(void) | 5288 | char *buf, size_t nbytes, loff_t off) |
5430 | { | 5289 | { |
5290 | struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of)); | ||
5291 | unsigned long high; | ||
5292 | int err; | ||
5293 | |||
5294 | buf = strstrip(buf); | ||
5295 | err = page_counter_memparse(buf, "max", &high); | ||
5296 | if (err) | ||
5297 | return err; | ||
5298 | |||
5299 | memcg->high = high; | ||
5300 | |||
5301 | return nbytes; | ||
5431 | } | 5302 | } |
5432 | #endif | ||
5433 | 5303 | ||
5434 | #ifdef CONFIG_MEMCG_SWAP | 5304 | static int memory_max_show(struct seq_file *m, void *v) |
5435 | /** | ||
5436 | * mem_cgroup_swapout - transfer a memsw charge to swap | ||
5437 | * @page: page whose memsw charge to transfer | ||
5438 | * @entry: swap entry to move the charge to | ||
5439 | * | ||
5440 | * Transfer the memsw charge of @page to @entry. | ||
5441 | */ | ||
5442 | void mem_cgroup_swapout(struct page *page, swp_entry_t entry) | ||
5443 | { | 5305 | { |
5444 | struct mem_cgroup *memcg; | 5306 | struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m)); |
5445 | unsigned short oldid; | 5307 | unsigned long max = ACCESS_ONCE(memcg->memory.limit); |
5446 | 5308 | ||
5447 | VM_BUG_ON_PAGE(PageLRU(page), page); | 5309 | if (max == PAGE_COUNTER_MAX) |
5448 | VM_BUG_ON_PAGE(page_count(page), page); | 5310 | seq_puts(m, "max\n"); |
5311 | else | ||
5312 | seq_printf(m, "%llu\n", (u64)max * PAGE_SIZE); | ||
5449 | 5313 | ||
5450 | if (!do_swap_account) | 5314 | return 0; |
5451 | return; | 5315 | } |
5452 | 5316 | ||
5453 | memcg = page->mem_cgroup; | 5317 | static ssize_t memory_max_write(struct kernfs_open_file *of, |
5318 | char *buf, size_t nbytes, loff_t off) | ||
5319 | { | ||
5320 | struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of)); | ||
5321 | unsigned long max; | ||
5322 | int err; | ||
5454 | 5323 | ||
5455 | /* Readahead page, never charged */ | 5324 | buf = strstrip(buf); |
5456 | if (!memcg) | 5325 | err = page_counter_memparse(buf, "max", &max); |
5457 | return; | 5326 | if (err) |
5327 | return err; | ||
5458 | 5328 | ||
5459 | oldid = swap_cgroup_record(entry, mem_cgroup_id(memcg)); | 5329 | err = mem_cgroup_resize_limit(memcg, max); |
5460 | VM_BUG_ON_PAGE(oldid, page); | 5330 | if (err) |
5461 | mem_cgroup_swap_statistics(memcg, true); | 5331 | return err; |
5462 | 5332 | ||
5463 | page->mem_cgroup = NULL; | 5333 | return nbytes; |
5334 | } | ||
5464 | 5335 | ||
5465 | if (!mem_cgroup_is_root(memcg)) | 5336 | static int memory_events_show(struct seq_file *m, void *v) |
5466 | page_counter_uncharge(&memcg->memory, 1); | 5337 | { |
5338 | struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m)); | ||
5467 | 5339 | ||
5468 | /* XXX: caller holds IRQ-safe mapping->tree_lock */ | 5340 | seq_printf(m, "low %lu\n", mem_cgroup_read_events(memcg, MEMCG_LOW)); |
5469 | VM_BUG_ON(!irqs_disabled()); | 5341 | seq_printf(m, "high %lu\n", mem_cgroup_read_events(memcg, MEMCG_HIGH)); |
5342 | seq_printf(m, "max %lu\n", mem_cgroup_read_events(memcg, MEMCG_MAX)); | ||
5343 | seq_printf(m, "oom %lu\n", mem_cgroup_read_events(memcg, MEMCG_OOM)); | ||
5470 | 5344 | ||
5471 | mem_cgroup_charge_statistics(memcg, page, -1); | 5345 | return 0; |
5472 | memcg_check_events(memcg, page); | ||
5473 | } | 5346 | } |
5474 | 5347 | ||
5348 | static struct cftype memory_files[] = { | ||
5349 | { | ||
5350 | .name = "current", | ||
5351 | .read_u64 = memory_current_read, | ||
5352 | }, | ||
5353 | { | ||
5354 | .name = "low", | ||
5355 | .flags = CFTYPE_NOT_ON_ROOT, | ||
5356 | .seq_show = memory_low_show, | ||
5357 | .write = memory_low_write, | ||
5358 | }, | ||
5359 | { | ||
5360 | .name = "high", | ||
5361 | .flags = CFTYPE_NOT_ON_ROOT, | ||
5362 | .seq_show = memory_high_show, | ||
5363 | .write = memory_high_write, | ||
5364 | }, | ||
5365 | { | ||
5366 | .name = "max", | ||
5367 | .flags = CFTYPE_NOT_ON_ROOT, | ||
5368 | .seq_show = memory_max_show, | ||
5369 | .write = memory_max_write, | ||
5370 | }, | ||
5371 | { | ||
5372 | .name = "events", | ||
5373 | .flags = CFTYPE_NOT_ON_ROOT, | ||
5374 | .seq_show = memory_events_show, | ||
5375 | }, | ||
5376 | { } /* terminate */ | ||
5377 | }; | ||
5378 | |||
5379 | struct cgroup_subsys memory_cgrp_subsys = { | ||
5380 | .css_alloc = mem_cgroup_css_alloc, | ||
5381 | .css_online = mem_cgroup_css_online, | ||
5382 | .css_offline = mem_cgroup_css_offline, | ||
5383 | .css_free = mem_cgroup_css_free, | ||
5384 | .css_reset = mem_cgroup_css_reset, | ||
5385 | .can_attach = mem_cgroup_can_attach, | ||
5386 | .cancel_attach = mem_cgroup_cancel_attach, | ||
5387 | .attach = mem_cgroup_move_task, | ||
5388 | .bind = mem_cgroup_bind, | ||
5389 | .dfl_cftypes = memory_files, | ||
5390 | .legacy_cftypes = mem_cgroup_legacy_files, | ||
5391 | .early_init = 0, | ||
5392 | }; | ||
5393 | |||
5475 | /** | 5394 | /** |
5476 | * mem_cgroup_uncharge_swap - uncharge a swap entry | 5395 | * mem_cgroup_events - count memory events against a cgroup |
5477 | * @entry: swap entry to uncharge | 5396 | * @memcg: the memory cgroup |
5397 | * @idx: the event index | ||
5398 | * @nr: the number of events to account for | ||
5399 | */ | ||
5400 | void mem_cgroup_events(struct mem_cgroup *memcg, | ||
5401 | enum mem_cgroup_events_index idx, | ||
5402 | unsigned int nr) | ||
5403 | { | ||
5404 | this_cpu_add(memcg->stat->events[idx], nr); | ||
5405 | } | ||
5406 | |||
5407 | /** | ||
5408 | * mem_cgroup_low - check if memory consumption is below the normal range | ||
5409 | * @root: the highest ancestor to consider | ||
5410 | * @memcg: the memory cgroup to check | ||
5478 | * | 5411 | * |
5479 | * Drop the memsw charge associated with @entry. | 5412 | * Returns %true if memory consumption of @memcg, and that of all |
5413 | * configurable ancestors up to @root, is below the normal range. | ||
5480 | */ | 5414 | */ |
5481 | void mem_cgroup_uncharge_swap(swp_entry_t entry) | 5415 | bool mem_cgroup_low(struct mem_cgroup *root, struct mem_cgroup *memcg) |
5482 | { | 5416 | { |
5483 | struct mem_cgroup *memcg; | 5417 | if (mem_cgroup_disabled()) |
5484 | unsigned short id; | 5418 | return false; |
5485 | 5419 | ||
5486 | if (!do_swap_account) | 5420 | /* |
5487 | return; | 5421 | * The toplevel group doesn't have a configurable range, so |
5422 | * it's never low when looked at directly, and it is not | ||
5423 | * considered an ancestor when assessing the hierarchy. | ||
5424 | */ | ||
5488 | 5425 | ||
5489 | id = swap_cgroup_record(entry, 0); | 5426 | if (memcg == root_mem_cgroup) |
5490 | rcu_read_lock(); | 5427 | return false; |
5491 | memcg = mem_cgroup_lookup(id); | 5428 | |
5492 | if (memcg) { | 5429 | if (page_counter_read(&memcg->memory) >= memcg->low) |
5493 | if (!mem_cgroup_is_root(memcg)) | 5430 | return false; |
5494 | page_counter_uncharge(&memcg->memsw, 1); | 5431 | |
5495 | mem_cgroup_swap_statistics(memcg, false); | 5432 | while (memcg != root) { |
5496 | css_put(&memcg->css); | 5433 | memcg = parent_mem_cgroup(memcg); |
5434 | |||
5435 | if (memcg == root_mem_cgroup) | ||
5436 | break; | ||
5437 | |||
5438 | if (page_counter_read(&memcg->memory) >= memcg->low) | ||
5439 | return false; | ||
5497 | } | 5440 | } |
5498 | rcu_read_unlock(); | 5441 | return true; |
5499 | } | 5442 | } |
5500 | #endif | ||
5501 | 5443 | ||
5502 | /** | 5444 | /** |
5503 | * mem_cgroup_try_charge - try charging a page | 5445 | * mem_cgroup_try_charge - try charging a page |
@@ -5831,10 +5773,155 @@ void mem_cgroup_migrate(struct page *oldpage, struct page *newpage, | |||
5831 | */ | 5773 | */ |
5832 | static int __init mem_cgroup_init(void) | 5774 | static int __init mem_cgroup_init(void) |
5833 | { | 5775 | { |
5776 | int cpu, node; | ||
5777 | |||
5834 | hotcpu_notifier(memcg_cpu_hotplug_callback, 0); | 5778 | hotcpu_notifier(memcg_cpu_hotplug_callback, 0); |
5835 | enable_swap_cgroup(); | 5779 | |
5836 | mem_cgroup_soft_limit_tree_init(); | 5780 | for_each_possible_cpu(cpu) |
5837 | memcg_stock_init(); | 5781 | INIT_WORK(&per_cpu_ptr(&memcg_stock, cpu)->work, |
5782 | drain_local_stock); | ||
5783 | |||
5784 | for_each_node(node) { | ||
5785 | struct mem_cgroup_tree_per_node *rtpn; | ||
5786 | int zone; | ||
5787 | |||
5788 | rtpn = kzalloc_node(sizeof(*rtpn), GFP_KERNEL, | ||
5789 | node_online(node) ? node : NUMA_NO_NODE); | ||
5790 | |||
5791 | for (zone = 0; zone < MAX_NR_ZONES; zone++) { | ||
5792 | struct mem_cgroup_tree_per_zone *rtpz; | ||
5793 | |||
5794 | rtpz = &rtpn->rb_tree_per_zone[zone]; | ||
5795 | rtpz->rb_root = RB_ROOT; | ||
5796 | spin_lock_init(&rtpz->lock); | ||
5797 | } | ||
5798 | soft_limit_tree.rb_tree_per_node[node] = rtpn; | ||
5799 | } | ||
5800 | |||
5838 | return 0; | 5801 | return 0; |
5839 | } | 5802 | } |
5840 | subsys_initcall(mem_cgroup_init); | 5803 | subsys_initcall(mem_cgroup_init); |
5804 | |||
5805 | #ifdef CONFIG_MEMCG_SWAP | ||
5806 | /** | ||
5807 | * mem_cgroup_swapout - transfer a memsw charge to swap | ||
5808 | * @page: page whose memsw charge to transfer | ||
5809 | * @entry: swap entry to move the charge to | ||
5810 | * | ||
5811 | * Transfer the memsw charge of @page to @entry. | ||
5812 | */ | ||
5813 | void mem_cgroup_swapout(struct page *page, swp_entry_t entry) | ||
5814 | { | ||
5815 | struct mem_cgroup *memcg; | ||
5816 | unsigned short oldid; | ||
5817 | |||
5818 | VM_BUG_ON_PAGE(PageLRU(page), page); | ||
5819 | VM_BUG_ON_PAGE(page_count(page), page); | ||
5820 | |||
5821 | if (!do_swap_account) | ||
5822 | return; | ||
5823 | |||
5824 | memcg = page->mem_cgroup; | ||
5825 | |||
5826 | /* Readahead page, never charged */ | ||
5827 | if (!memcg) | ||
5828 | return; | ||
5829 | |||
5830 | oldid = swap_cgroup_record(entry, mem_cgroup_id(memcg)); | ||
5831 | VM_BUG_ON_PAGE(oldid, page); | ||
5832 | mem_cgroup_swap_statistics(memcg, true); | ||
5833 | |||
5834 | page->mem_cgroup = NULL; | ||
5835 | |||
5836 | if (!mem_cgroup_is_root(memcg)) | ||
5837 | page_counter_uncharge(&memcg->memory, 1); | ||
5838 | |||
5839 | /* XXX: caller holds IRQ-safe mapping->tree_lock */ | ||
5840 | VM_BUG_ON(!irqs_disabled()); | ||
5841 | |||
5842 | mem_cgroup_charge_statistics(memcg, page, -1); | ||
5843 | memcg_check_events(memcg, page); | ||
5844 | } | ||
5845 | |||
5846 | /** | ||
5847 | * mem_cgroup_uncharge_swap - uncharge a swap entry | ||
5848 | * @entry: swap entry to uncharge | ||
5849 | * | ||
5850 | * Drop the memsw charge associated with @entry. | ||
5851 | */ | ||
5852 | void mem_cgroup_uncharge_swap(swp_entry_t entry) | ||
5853 | { | ||
5854 | struct mem_cgroup *memcg; | ||
5855 | unsigned short id; | ||
5856 | |||
5857 | if (!do_swap_account) | ||
5858 | return; | ||
5859 | |||
5860 | id = swap_cgroup_record(entry, 0); | ||
5861 | rcu_read_lock(); | ||
5862 | memcg = mem_cgroup_lookup(id); | ||
5863 | if (memcg) { | ||
5864 | if (!mem_cgroup_is_root(memcg)) | ||
5865 | page_counter_uncharge(&memcg->memsw, 1); | ||
5866 | mem_cgroup_swap_statistics(memcg, false); | ||
5867 | css_put(&memcg->css); | ||
5868 | } | ||
5869 | rcu_read_unlock(); | ||
5870 | } | ||
5871 | |||
5872 | /* for remember boot option*/ | ||
5873 | #ifdef CONFIG_MEMCG_SWAP_ENABLED | ||
5874 | static int really_do_swap_account __initdata = 1; | ||
5875 | #else | ||
5876 | static int really_do_swap_account __initdata; | ||
5877 | #endif | ||
5878 | |||
5879 | static int __init enable_swap_account(char *s) | ||
5880 | { | ||
5881 | if (!strcmp(s, "1")) | ||
5882 | really_do_swap_account = 1; | ||
5883 | else if (!strcmp(s, "0")) | ||
5884 | really_do_swap_account = 0; | ||
5885 | return 1; | ||
5886 | } | ||
5887 | __setup("swapaccount=", enable_swap_account); | ||
5888 | |||
5889 | static struct cftype memsw_cgroup_files[] = { | ||
5890 | { | ||
5891 | .name = "memsw.usage_in_bytes", | ||
5892 | .private = MEMFILE_PRIVATE(_MEMSWAP, RES_USAGE), | ||
5893 | .read_u64 = mem_cgroup_read_u64, | ||
5894 | }, | ||
5895 | { | ||
5896 | .name = "memsw.max_usage_in_bytes", | ||
5897 | .private = MEMFILE_PRIVATE(_MEMSWAP, RES_MAX_USAGE), | ||
5898 | .write = mem_cgroup_reset, | ||
5899 | .read_u64 = mem_cgroup_read_u64, | ||
5900 | }, | ||
5901 | { | ||
5902 | .name = "memsw.limit_in_bytes", | ||
5903 | .private = MEMFILE_PRIVATE(_MEMSWAP, RES_LIMIT), | ||
5904 | .write = mem_cgroup_write, | ||
5905 | .read_u64 = mem_cgroup_read_u64, | ||
5906 | }, | ||
5907 | { | ||
5908 | .name = "memsw.failcnt", | ||
5909 | .private = MEMFILE_PRIVATE(_MEMSWAP, RES_FAILCNT), | ||
5910 | .write = mem_cgroup_reset, | ||
5911 | .read_u64 = mem_cgroup_read_u64, | ||
5912 | }, | ||
5913 | { }, /* terminate */ | ||
5914 | }; | ||
5915 | |||
5916 | static int __init mem_cgroup_swap_init(void) | ||
5917 | { | ||
5918 | if (!mem_cgroup_disabled() && really_do_swap_account) { | ||
5919 | do_swap_account = 1; | ||
5920 | WARN_ON(cgroup_add_legacy_cftypes(&memory_cgrp_subsys, | ||
5921 | memsw_cgroup_files)); | ||
5922 | } | ||
5923 | return 0; | ||
5924 | } | ||
5925 | subsys_initcall(mem_cgroup_swap_init); | ||
5926 | |||
5927 | #endif /* CONFIG_MEMCG_SWAP */ | ||