diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/memcontrol.c | 229 | ||||
-rw-r--r-- | mm/vmscan.c | 22 |
2 files changed, 237 insertions, 14 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 6453ea5a27aa..ee97c9ac62c0 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -97,14 +97,6 @@ static const char * const mem_cgroup_stat_names[] = { | |||
97 | "swap", | 97 | "swap", |
98 | }; | 98 | }; |
99 | 99 | ||
100 | enum mem_cgroup_events_index { | ||
101 | MEM_CGROUP_EVENTS_PGPGIN, /* # of pages paged in */ | ||
102 | MEM_CGROUP_EVENTS_PGPGOUT, /* # of pages paged out */ | ||
103 | MEM_CGROUP_EVENTS_PGFAULT, /* # of page-faults */ | ||
104 | MEM_CGROUP_EVENTS_PGMAJFAULT, /* # of major page-faults */ | ||
105 | MEM_CGROUP_EVENTS_NSTATS, | ||
106 | }; | ||
107 | |||
108 | static const char * const mem_cgroup_events_names[] = { | 100 | static const char * const mem_cgroup_events_names[] = { |
109 | "pgpgin", | 101 | "pgpgin", |
110 | "pgpgout", | 102 | "pgpgout", |
@@ -138,7 +130,7 @@ enum mem_cgroup_events_target { | |||
138 | 130 | ||
139 | struct mem_cgroup_stat_cpu { | 131 | struct mem_cgroup_stat_cpu { |
140 | long count[MEM_CGROUP_STAT_NSTATS]; | 132 | long count[MEM_CGROUP_STAT_NSTATS]; |
141 | unsigned long events[MEM_CGROUP_EVENTS_NSTATS]; | 133 | unsigned long events[MEMCG_NR_EVENTS]; |
142 | unsigned long nr_page_events; | 134 | unsigned long nr_page_events; |
143 | unsigned long targets[MEM_CGROUP_NTARGETS]; | 135 | unsigned long targets[MEM_CGROUP_NTARGETS]; |
144 | }; | 136 | }; |
@@ -284,6 +276,10 @@ struct mem_cgroup { | |||
284 | struct page_counter memsw; | 276 | struct page_counter memsw; |
285 | struct page_counter kmem; | 277 | struct page_counter kmem; |
286 | 278 | ||
279 | /* Normal memory consumption range */ | ||
280 | unsigned long low; | ||
281 | unsigned long high; | ||
282 | |||
287 | unsigned long soft_limit; | 283 | unsigned long soft_limit; |
288 | 284 | ||
289 | /* vmpressure notifications */ | 285 | /* vmpressure notifications */ |
@@ -2315,6 +2311,8 @@ retry: | |||
2315 | if (!(gfp_mask & __GFP_WAIT)) | 2311 | if (!(gfp_mask & __GFP_WAIT)) |
2316 | goto nomem; | 2312 | goto nomem; |
2317 | 2313 | ||
2314 | mem_cgroup_events(mem_over_limit, MEMCG_MAX, 1); | ||
2315 | |||
2318 | nr_reclaimed = try_to_free_mem_cgroup_pages(mem_over_limit, nr_pages, | 2316 | nr_reclaimed = try_to_free_mem_cgroup_pages(mem_over_limit, nr_pages, |
2319 | gfp_mask, may_swap); | 2317 | gfp_mask, may_swap); |
2320 | 2318 | ||
@@ -2356,6 +2354,8 @@ retry: | |||
2356 | if (fatal_signal_pending(current)) | 2354 | if (fatal_signal_pending(current)) |
2357 | goto bypass; | 2355 | goto bypass; |
2358 | 2356 | ||
2357 | mem_cgroup_events(mem_over_limit, MEMCG_OOM, 1); | ||
2358 | |||
2359 | mem_cgroup_oom(mem_over_limit, gfp_mask, get_order(nr_pages)); | 2359 | mem_cgroup_oom(mem_over_limit, gfp_mask, get_order(nr_pages)); |
2360 | nomem: | 2360 | nomem: |
2361 | if (!(gfp_mask & __GFP_NOFAIL)) | 2361 | if (!(gfp_mask & __GFP_NOFAIL)) |
@@ -2367,6 +2367,16 @@ done_restock: | |||
2367 | css_get_many(&memcg->css, batch); | 2367 | css_get_many(&memcg->css, batch); |
2368 | if (batch > nr_pages) | 2368 | if (batch > nr_pages) |
2369 | refill_stock(memcg, batch - nr_pages); | 2369 | refill_stock(memcg, batch - nr_pages); |
2370 | /* | ||
2371 | * If the hierarchy is above the normal consumption range, | ||
2372 | * make the charging task trim their excess contribution. | ||
2373 | */ | ||
2374 | do { | ||
2375 | if (page_counter_read(&memcg->memory) <= memcg->high) | ||
2376 | continue; | ||
2377 | mem_cgroup_events(memcg, MEMCG_HIGH, 1); | ||
2378 | try_to_free_mem_cgroup_pages(memcg, nr_pages, gfp_mask, true); | ||
2379 | } while ((memcg = parent_mem_cgroup(memcg))); | ||
2370 | done: | 2380 | done: |
2371 | return ret; | 2381 | return ret; |
2372 | } | 2382 | } |
@@ -4276,7 +4286,7 @@ out_kfree: | |||
4276 | return ret; | 4286 | return ret; |
4277 | } | 4287 | } |
4278 | 4288 | ||
4279 | static struct cftype mem_cgroup_files[] = { | 4289 | static struct cftype mem_cgroup_legacy_files[] = { |
4280 | { | 4290 | { |
4281 | .name = "usage_in_bytes", | 4291 | .name = "usage_in_bytes", |
4282 | .private = MEMFILE_PRIVATE(_MEM, RES_USAGE), | 4292 | .private = MEMFILE_PRIVATE(_MEM, RES_USAGE), |
@@ -4552,6 +4562,7 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css) | |||
4552 | if (parent_css == NULL) { | 4562 | if (parent_css == NULL) { |
4553 | root_mem_cgroup = memcg; | 4563 | root_mem_cgroup = memcg; |
4554 | page_counter_init(&memcg->memory, NULL); | 4564 | page_counter_init(&memcg->memory, NULL); |
4565 | memcg->high = PAGE_COUNTER_MAX; | ||
4555 | memcg->soft_limit = PAGE_COUNTER_MAX; | 4566 | memcg->soft_limit = PAGE_COUNTER_MAX; |
4556 | page_counter_init(&memcg->memsw, NULL); | 4567 | page_counter_init(&memcg->memsw, NULL); |
4557 | page_counter_init(&memcg->kmem, NULL); | 4568 | page_counter_init(&memcg->kmem, NULL); |
@@ -4597,6 +4608,7 @@ mem_cgroup_css_online(struct cgroup_subsys_state *css) | |||
4597 | 4608 | ||
4598 | if (parent->use_hierarchy) { | 4609 | if (parent->use_hierarchy) { |
4599 | page_counter_init(&memcg->memory, &parent->memory); | 4610 | page_counter_init(&memcg->memory, &parent->memory); |
4611 | memcg->high = PAGE_COUNTER_MAX; | ||
4600 | memcg->soft_limit = PAGE_COUNTER_MAX; | 4612 | memcg->soft_limit = PAGE_COUNTER_MAX; |
4601 | page_counter_init(&memcg->memsw, &parent->memsw); | 4613 | page_counter_init(&memcg->memsw, &parent->memsw); |
4602 | page_counter_init(&memcg->kmem, &parent->kmem); | 4614 | page_counter_init(&memcg->kmem, &parent->kmem); |
@@ -4607,6 +4619,7 @@ mem_cgroup_css_online(struct cgroup_subsys_state *css) | |||
4607 | */ | 4619 | */ |
4608 | } else { | 4620 | } else { |
4609 | page_counter_init(&memcg->memory, NULL); | 4621 | page_counter_init(&memcg->memory, NULL); |
4622 | memcg->high = PAGE_COUNTER_MAX; | ||
4610 | memcg->soft_limit = PAGE_COUNTER_MAX; | 4623 | memcg->soft_limit = PAGE_COUNTER_MAX; |
4611 | page_counter_init(&memcg->memsw, NULL); | 4624 | page_counter_init(&memcg->memsw, NULL); |
4612 | page_counter_init(&memcg->kmem, NULL); | 4625 | page_counter_init(&memcg->kmem, NULL); |
@@ -4682,6 +4695,8 @@ static void mem_cgroup_css_reset(struct cgroup_subsys_state *css) | |||
4682 | mem_cgroup_resize_limit(memcg, PAGE_COUNTER_MAX); | 4695 | mem_cgroup_resize_limit(memcg, PAGE_COUNTER_MAX); |
4683 | mem_cgroup_resize_memsw_limit(memcg, PAGE_COUNTER_MAX); | 4696 | mem_cgroup_resize_memsw_limit(memcg, PAGE_COUNTER_MAX); |
4684 | memcg_update_kmem_limit(memcg, PAGE_COUNTER_MAX); | 4697 | memcg_update_kmem_limit(memcg, PAGE_COUNTER_MAX); |
4698 | memcg->low = 0; | ||
4699 | memcg->high = PAGE_COUNTER_MAX; | ||
4685 | memcg->soft_limit = PAGE_COUNTER_MAX; | 4700 | memcg->soft_limit = PAGE_COUNTER_MAX; |
4686 | } | 4701 | } |
4687 | 4702 | ||
@@ -5267,6 +5282,147 @@ static void mem_cgroup_bind(struct cgroup_subsys_state *root_css) | |||
5267 | mem_cgroup_from_css(root_css)->use_hierarchy = true; | 5282 | mem_cgroup_from_css(root_css)->use_hierarchy = true; |
5268 | } | 5283 | } |
5269 | 5284 | ||
5285 | static u64 memory_current_read(struct cgroup_subsys_state *css, | ||
5286 | struct cftype *cft) | ||
5287 | { | ||
5288 | return mem_cgroup_usage(mem_cgroup_from_css(css), false); | ||
5289 | } | ||
5290 | |||
5291 | static int memory_low_show(struct seq_file *m, void *v) | ||
5292 | { | ||
5293 | struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m)); | ||
5294 | unsigned long low = ACCESS_ONCE(memcg->low); | ||
5295 | |||
5296 | if (low == PAGE_COUNTER_MAX) | ||
5297 | seq_puts(m, "infinity\n"); | ||
5298 | else | ||
5299 | seq_printf(m, "%llu\n", (u64)low * PAGE_SIZE); | ||
5300 | |||
5301 | return 0; | ||
5302 | } | ||
5303 | |||
5304 | static ssize_t memory_low_write(struct kernfs_open_file *of, | ||
5305 | char *buf, size_t nbytes, loff_t off) | ||
5306 | { | ||
5307 | struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of)); | ||
5308 | unsigned long low; | ||
5309 | int err; | ||
5310 | |||
5311 | buf = strstrip(buf); | ||
5312 | err = page_counter_memparse(buf, "infinity", &low); | ||
5313 | if (err) | ||
5314 | return err; | ||
5315 | |||
5316 | memcg->low = low; | ||
5317 | |||
5318 | return nbytes; | ||
5319 | } | ||
5320 | |||
5321 | static int memory_high_show(struct seq_file *m, void *v) | ||
5322 | { | ||
5323 | struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m)); | ||
5324 | unsigned long high = ACCESS_ONCE(memcg->high); | ||
5325 | |||
5326 | if (high == PAGE_COUNTER_MAX) | ||
5327 | seq_puts(m, "infinity\n"); | ||
5328 | else | ||
5329 | seq_printf(m, "%llu\n", (u64)high * PAGE_SIZE); | ||
5330 | |||
5331 | return 0; | ||
5332 | } | ||
5333 | |||
5334 | static ssize_t memory_high_write(struct kernfs_open_file *of, | ||
5335 | char *buf, size_t nbytes, loff_t off) | ||
5336 | { | ||
5337 | struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of)); | ||
5338 | unsigned long high; | ||
5339 | int err; | ||
5340 | |||
5341 | buf = strstrip(buf); | ||
5342 | err = page_counter_memparse(buf, "infinity", &high); | ||
5343 | if (err) | ||
5344 | return err; | ||
5345 | |||
5346 | memcg->high = high; | ||
5347 | |||
5348 | return nbytes; | ||
5349 | } | ||
5350 | |||
5351 | static int memory_max_show(struct seq_file *m, void *v) | ||
5352 | { | ||
5353 | struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m)); | ||
5354 | unsigned long max = ACCESS_ONCE(memcg->memory.limit); | ||
5355 | |||
5356 | if (max == PAGE_COUNTER_MAX) | ||
5357 | seq_puts(m, "infinity\n"); | ||
5358 | else | ||
5359 | seq_printf(m, "%llu\n", (u64)max * PAGE_SIZE); | ||
5360 | |||
5361 | return 0; | ||
5362 | } | ||
5363 | |||
5364 | static ssize_t memory_max_write(struct kernfs_open_file *of, | ||
5365 | char *buf, size_t nbytes, loff_t off) | ||
5366 | { | ||
5367 | struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of)); | ||
5368 | unsigned long max; | ||
5369 | int err; | ||
5370 | |||
5371 | buf = strstrip(buf); | ||
5372 | err = page_counter_memparse(buf, "infinity", &max); | ||
5373 | if (err) | ||
5374 | return err; | ||
5375 | |||
5376 | err = mem_cgroup_resize_limit(memcg, max); | ||
5377 | if (err) | ||
5378 | return err; | ||
5379 | |||
5380 | return nbytes; | ||
5381 | } | ||
5382 | |||
5383 | static int memory_events_show(struct seq_file *m, void *v) | ||
5384 | { | ||
5385 | struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m)); | ||
5386 | |||
5387 | seq_printf(m, "low %lu\n", mem_cgroup_read_events(memcg, MEMCG_LOW)); | ||
5388 | seq_printf(m, "high %lu\n", mem_cgroup_read_events(memcg, MEMCG_HIGH)); | ||
5389 | seq_printf(m, "max %lu\n", mem_cgroup_read_events(memcg, MEMCG_MAX)); | ||
5390 | seq_printf(m, "oom %lu\n", mem_cgroup_read_events(memcg, MEMCG_OOM)); | ||
5391 | |||
5392 | return 0; | ||
5393 | } | ||
5394 | |||
5395 | static struct cftype memory_files[] = { | ||
5396 | { | ||
5397 | .name = "current", | ||
5398 | .read_u64 = memory_current_read, | ||
5399 | }, | ||
5400 | { | ||
5401 | .name = "low", | ||
5402 | .flags = CFTYPE_NOT_ON_ROOT, | ||
5403 | .seq_show = memory_low_show, | ||
5404 | .write = memory_low_write, | ||
5405 | }, | ||
5406 | { | ||
5407 | .name = "high", | ||
5408 | .flags = CFTYPE_NOT_ON_ROOT, | ||
5409 | .seq_show = memory_high_show, | ||
5410 | .write = memory_high_write, | ||
5411 | }, | ||
5412 | { | ||
5413 | .name = "max", | ||
5414 | .flags = CFTYPE_NOT_ON_ROOT, | ||
5415 | .seq_show = memory_max_show, | ||
5416 | .write = memory_max_write, | ||
5417 | }, | ||
5418 | { | ||
5419 | .name = "events", | ||
5420 | .flags = CFTYPE_NOT_ON_ROOT, | ||
5421 | .seq_show = memory_events_show, | ||
5422 | }, | ||
5423 | { } /* terminate */ | ||
5424 | }; | ||
5425 | |||
5270 | struct cgroup_subsys memory_cgrp_subsys = { | 5426 | struct cgroup_subsys memory_cgrp_subsys = { |
5271 | .css_alloc = mem_cgroup_css_alloc, | 5427 | .css_alloc = mem_cgroup_css_alloc, |
5272 | .css_online = mem_cgroup_css_online, | 5428 | .css_online = mem_cgroup_css_online, |
@@ -5277,7 +5433,8 @@ struct cgroup_subsys memory_cgrp_subsys = { | |||
5277 | .cancel_attach = mem_cgroup_cancel_attach, | 5433 | .cancel_attach = mem_cgroup_cancel_attach, |
5278 | .attach = mem_cgroup_move_task, | 5434 | .attach = mem_cgroup_move_task, |
5279 | .bind = mem_cgroup_bind, | 5435 | .bind = mem_cgroup_bind, |
5280 | .legacy_cftypes = mem_cgroup_files, | 5436 | .dfl_cftypes = memory_files, |
5437 | .legacy_cftypes = mem_cgroup_legacy_files, | ||
5281 | .early_init = 0, | 5438 | .early_init = 0, |
5282 | }; | 5439 | }; |
5283 | 5440 | ||
@@ -5312,6 +5469,56 @@ static void __init enable_swap_cgroup(void) | |||
5312 | } | 5469 | } |
5313 | #endif | 5470 | #endif |
5314 | 5471 | ||
5472 | /** | ||
5473 | * mem_cgroup_events - count memory events against a cgroup | ||
5474 | * @memcg: the memory cgroup | ||
5475 | * @idx: the event index | ||
5476 | * @nr: the number of events to account for | ||
5477 | */ | ||
5478 | void mem_cgroup_events(struct mem_cgroup *memcg, | ||
5479 | enum mem_cgroup_events_index idx, | ||
5480 | unsigned int nr) | ||
5481 | { | ||
5482 | this_cpu_add(memcg->stat->events[idx], nr); | ||
5483 | } | ||
5484 | |||
5485 | /** | ||
5486 | * mem_cgroup_low - check if memory consumption is below the normal range | ||
5487 | * @root: the highest ancestor to consider | ||
5488 | * @memcg: the memory cgroup to check | ||
5489 | * | ||
5490 | * Returns %true if memory consumption of @memcg, and that of all | ||
5491 | * configurable ancestors up to @root, is below the normal range. | ||
5492 | */ | ||
5493 | bool mem_cgroup_low(struct mem_cgroup *root, struct mem_cgroup *memcg) | ||
5494 | { | ||
5495 | if (mem_cgroup_disabled()) | ||
5496 | return false; | ||
5497 | |||
5498 | /* | ||
5499 | * The toplevel group doesn't have a configurable range, so | ||
5500 | * it's never low when looked at directly, and it is not | ||
5501 | * considered an ancestor when assessing the hierarchy. | ||
5502 | */ | ||
5503 | |||
5504 | if (memcg == root_mem_cgroup) | ||
5505 | return false; | ||
5506 | |||
5507 | if (page_counter_read(&memcg->memory) > memcg->low) | ||
5508 | return false; | ||
5509 | |||
5510 | while (memcg != root) { | ||
5511 | memcg = parent_mem_cgroup(memcg); | ||
5512 | |||
5513 | if (memcg == root_mem_cgroup) | ||
5514 | break; | ||
5515 | |||
5516 | if (page_counter_read(&memcg->memory) > memcg->low) | ||
5517 | return false; | ||
5518 | } | ||
5519 | return true; | ||
5520 | } | ||
5521 | |||
5315 | #ifdef CONFIG_MEMCG_SWAP | 5522 | #ifdef CONFIG_MEMCG_SWAP |
5316 | /** | 5523 | /** |
5317 | * mem_cgroup_swapout - transfer a memsw charge to swap | 5524 | * mem_cgroup_swapout - transfer a memsw charge to swap |
diff --git a/mm/vmscan.c b/mm/vmscan.c index b6dfa0081a8e..8e645ee52045 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -91,6 +91,9 @@ struct scan_control { | |||
91 | /* Can pages be swapped as part of reclaim? */ | 91 | /* Can pages be swapped as part of reclaim? */ |
92 | unsigned int may_swap:1; | 92 | unsigned int may_swap:1; |
93 | 93 | ||
94 | /* Can cgroups be reclaimed below their normal consumption range? */ | ||
95 | unsigned int may_thrash:1; | ||
96 | |||
94 | unsigned int hibernation_mode:1; | 97 | unsigned int hibernation_mode:1; |
95 | 98 | ||
96 | /* One of the zones is ready for compaction */ | 99 | /* One of the zones is ready for compaction */ |
@@ -2294,6 +2297,12 @@ static bool shrink_zone(struct zone *zone, struct scan_control *sc, | |||
2294 | struct lruvec *lruvec; | 2297 | struct lruvec *lruvec; |
2295 | int swappiness; | 2298 | int swappiness; |
2296 | 2299 | ||
2300 | if (mem_cgroup_low(root, memcg)) { | ||
2301 | if (!sc->may_thrash) | ||
2302 | continue; | ||
2303 | mem_cgroup_events(memcg, MEMCG_LOW, 1); | ||
2304 | } | ||
2305 | |||
2297 | lruvec = mem_cgroup_zone_lruvec(zone, memcg); | 2306 | lruvec = mem_cgroup_zone_lruvec(zone, memcg); |
2298 | swappiness = mem_cgroup_swappiness(memcg); | 2307 | swappiness = mem_cgroup_swappiness(memcg); |
2299 | 2308 | ||
@@ -2315,8 +2324,7 @@ static bool shrink_zone(struct zone *zone, struct scan_control *sc, | |||
2315 | mem_cgroup_iter_break(root, memcg); | 2324 | mem_cgroup_iter_break(root, memcg); |
2316 | break; | 2325 | break; |
2317 | } | 2326 | } |
2318 | memcg = mem_cgroup_iter(root, memcg, &reclaim); | 2327 | } while ((memcg = mem_cgroup_iter(root, memcg, &reclaim))); |
2319 | } while (memcg); | ||
2320 | 2328 | ||
2321 | /* | 2329 | /* |
2322 | * Shrink the slab caches in the same proportion that | 2330 | * Shrink the slab caches in the same proportion that |
@@ -2519,10 +2527,11 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc) | |||
2519 | static unsigned long do_try_to_free_pages(struct zonelist *zonelist, | 2527 | static unsigned long do_try_to_free_pages(struct zonelist *zonelist, |
2520 | struct scan_control *sc) | 2528 | struct scan_control *sc) |
2521 | { | 2529 | { |
2530 | int initial_priority = sc->priority; | ||
2522 | unsigned long total_scanned = 0; | 2531 | unsigned long total_scanned = 0; |
2523 | unsigned long writeback_threshold; | 2532 | unsigned long writeback_threshold; |
2524 | bool zones_reclaimable; | 2533 | bool zones_reclaimable; |
2525 | 2534 | retry: | |
2526 | delayacct_freepages_start(); | 2535 | delayacct_freepages_start(); |
2527 | 2536 | ||
2528 | if (global_reclaim(sc)) | 2537 | if (global_reclaim(sc)) |
@@ -2572,6 +2581,13 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, | |||
2572 | if (sc->compaction_ready) | 2581 | if (sc->compaction_ready) |
2573 | return 1; | 2582 | return 1; |
2574 | 2583 | ||
2584 | /* Untapped cgroup reserves? Don't OOM, retry. */ | ||
2585 | if (!sc->may_thrash) { | ||
2586 | sc->priority = initial_priority; | ||
2587 | sc->may_thrash = 1; | ||
2588 | goto retry; | ||
2589 | } | ||
2590 | |||
2575 | /* Any of the zones still reclaimable? Don't OOM. */ | 2591 | /* Any of the zones still reclaimable? Don't OOM. */ |
2576 | if (zones_reclaimable) | 2592 | if (zones_reclaimable) |
2577 | return 1; | 2593 | return 1; |