aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/memcontrol.c229
-rw-r--r--mm/vmscan.c22
2 files changed, 237 insertions, 14 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 6453ea5a27aa..ee97c9ac62c0 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -97,14 +97,6 @@ static const char * const mem_cgroup_stat_names[] = {
97 "swap", 97 "swap",
98}; 98};
99 99
100enum mem_cgroup_events_index {
101 MEM_CGROUP_EVENTS_PGPGIN, /* # of pages paged in */
102 MEM_CGROUP_EVENTS_PGPGOUT, /* # of pages paged out */
103 MEM_CGROUP_EVENTS_PGFAULT, /* # of page-faults */
104 MEM_CGROUP_EVENTS_PGMAJFAULT, /* # of major page-faults */
105 MEM_CGROUP_EVENTS_NSTATS,
106};
107
108static const char * const mem_cgroup_events_names[] = { 100static const char * const mem_cgroup_events_names[] = {
109 "pgpgin", 101 "pgpgin",
110 "pgpgout", 102 "pgpgout",
@@ -138,7 +130,7 @@ enum mem_cgroup_events_target {
138 130
139struct mem_cgroup_stat_cpu { 131struct mem_cgroup_stat_cpu {
140 long count[MEM_CGROUP_STAT_NSTATS]; 132 long count[MEM_CGROUP_STAT_NSTATS];
141 unsigned long events[MEM_CGROUP_EVENTS_NSTATS]; 133 unsigned long events[MEMCG_NR_EVENTS];
142 unsigned long nr_page_events; 134 unsigned long nr_page_events;
143 unsigned long targets[MEM_CGROUP_NTARGETS]; 135 unsigned long targets[MEM_CGROUP_NTARGETS];
144}; 136};
@@ -284,6 +276,10 @@ struct mem_cgroup {
284 struct page_counter memsw; 276 struct page_counter memsw;
285 struct page_counter kmem; 277 struct page_counter kmem;
286 278
279 /* Normal memory consumption range */
280 unsigned long low;
281 unsigned long high;
282
287 unsigned long soft_limit; 283 unsigned long soft_limit;
288 284
289 /* vmpressure notifications */ 285 /* vmpressure notifications */
@@ -2315,6 +2311,8 @@ retry:
2315 if (!(gfp_mask & __GFP_WAIT)) 2311 if (!(gfp_mask & __GFP_WAIT))
2316 goto nomem; 2312 goto nomem;
2317 2313
2314 mem_cgroup_events(mem_over_limit, MEMCG_MAX, 1);
2315
2318 nr_reclaimed = try_to_free_mem_cgroup_pages(mem_over_limit, nr_pages, 2316 nr_reclaimed = try_to_free_mem_cgroup_pages(mem_over_limit, nr_pages,
2319 gfp_mask, may_swap); 2317 gfp_mask, may_swap);
2320 2318
@@ -2356,6 +2354,8 @@ retry:
2356 if (fatal_signal_pending(current)) 2354 if (fatal_signal_pending(current))
2357 goto bypass; 2355 goto bypass;
2358 2356
2357 mem_cgroup_events(mem_over_limit, MEMCG_OOM, 1);
2358
2359 mem_cgroup_oom(mem_over_limit, gfp_mask, get_order(nr_pages)); 2359 mem_cgroup_oom(mem_over_limit, gfp_mask, get_order(nr_pages));
2360nomem: 2360nomem:
2361 if (!(gfp_mask & __GFP_NOFAIL)) 2361 if (!(gfp_mask & __GFP_NOFAIL))
@@ -2367,6 +2367,16 @@ done_restock:
2367 css_get_many(&memcg->css, batch); 2367 css_get_many(&memcg->css, batch);
2368 if (batch > nr_pages) 2368 if (batch > nr_pages)
2369 refill_stock(memcg, batch - nr_pages); 2369 refill_stock(memcg, batch - nr_pages);
2370 /*
2371 * If the hierarchy is above the normal consumption range,
2372 * make the charging task trim their excess contribution.
2373 */
2374 do {
2375 if (page_counter_read(&memcg->memory) <= memcg->high)
2376 continue;
2377 mem_cgroup_events(memcg, MEMCG_HIGH, 1);
2378 try_to_free_mem_cgroup_pages(memcg, nr_pages, gfp_mask, true);
2379 } while ((memcg = parent_mem_cgroup(memcg)));
2370done: 2380done:
2371 return ret; 2381 return ret;
2372} 2382}
@@ -4276,7 +4286,7 @@ out_kfree:
4276 return ret; 4286 return ret;
4277} 4287}
4278 4288
4279static struct cftype mem_cgroup_files[] = { 4289static struct cftype mem_cgroup_legacy_files[] = {
4280 { 4290 {
4281 .name = "usage_in_bytes", 4291 .name = "usage_in_bytes",
4282 .private = MEMFILE_PRIVATE(_MEM, RES_USAGE), 4292 .private = MEMFILE_PRIVATE(_MEM, RES_USAGE),
@@ -4552,6 +4562,7 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
4552 if (parent_css == NULL) { 4562 if (parent_css == NULL) {
4553 root_mem_cgroup = memcg; 4563 root_mem_cgroup = memcg;
4554 page_counter_init(&memcg->memory, NULL); 4564 page_counter_init(&memcg->memory, NULL);
4565 memcg->high = PAGE_COUNTER_MAX;
4555 memcg->soft_limit = PAGE_COUNTER_MAX; 4566 memcg->soft_limit = PAGE_COUNTER_MAX;
4556 page_counter_init(&memcg->memsw, NULL); 4567 page_counter_init(&memcg->memsw, NULL);
4557 page_counter_init(&memcg->kmem, NULL); 4568 page_counter_init(&memcg->kmem, NULL);
@@ -4597,6 +4608,7 @@ mem_cgroup_css_online(struct cgroup_subsys_state *css)
4597 4608
4598 if (parent->use_hierarchy) { 4609 if (parent->use_hierarchy) {
4599 page_counter_init(&memcg->memory, &parent->memory); 4610 page_counter_init(&memcg->memory, &parent->memory);
4611 memcg->high = PAGE_COUNTER_MAX;
4600 memcg->soft_limit = PAGE_COUNTER_MAX; 4612 memcg->soft_limit = PAGE_COUNTER_MAX;
4601 page_counter_init(&memcg->memsw, &parent->memsw); 4613 page_counter_init(&memcg->memsw, &parent->memsw);
4602 page_counter_init(&memcg->kmem, &parent->kmem); 4614 page_counter_init(&memcg->kmem, &parent->kmem);
@@ -4607,6 +4619,7 @@ mem_cgroup_css_online(struct cgroup_subsys_state *css)
4607 */ 4619 */
4608 } else { 4620 } else {
4609 page_counter_init(&memcg->memory, NULL); 4621 page_counter_init(&memcg->memory, NULL);
4622 memcg->high = PAGE_COUNTER_MAX;
4610 memcg->soft_limit = PAGE_COUNTER_MAX; 4623 memcg->soft_limit = PAGE_COUNTER_MAX;
4611 page_counter_init(&memcg->memsw, NULL); 4624 page_counter_init(&memcg->memsw, NULL);
4612 page_counter_init(&memcg->kmem, NULL); 4625 page_counter_init(&memcg->kmem, NULL);
@@ -4682,6 +4695,8 @@ static void mem_cgroup_css_reset(struct cgroup_subsys_state *css)
4682 mem_cgroup_resize_limit(memcg, PAGE_COUNTER_MAX); 4695 mem_cgroup_resize_limit(memcg, PAGE_COUNTER_MAX);
4683 mem_cgroup_resize_memsw_limit(memcg, PAGE_COUNTER_MAX); 4696 mem_cgroup_resize_memsw_limit(memcg, PAGE_COUNTER_MAX);
4684 memcg_update_kmem_limit(memcg, PAGE_COUNTER_MAX); 4697 memcg_update_kmem_limit(memcg, PAGE_COUNTER_MAX);
4698 memcg->low = 0;
4699 memcg->high = PAGE_COUNTER_MAX;
4685 memcg->soft_limit = PAGE_COUNTER_MAX; 4700 memcg->soft_limit = PAGE_COUNTER_MAX;
4686} 4701}
4687 4702
@@ -5267,6 +5282,147 @@ static void mem_cgroup_bind(struct cgroup_subsys_state *root_css)
5267 mem_cgroup_from_css(root_css)->use_hierarchy = true; 5282 mem_cgroup_from_css(root_css)->use_hierarchy = true;
5268} 5283}
5269 5284
5285static u64 memory_current_read(struct cgroup_subsys_state *css,
5286 struct cftype *cft)
5287{
5288 return mem_cgroup_usage(mem_cgroup_from_css(css), false);
5289}
5290
5291static int memory_low_show(struct seq_file *m, void *v)
5292{
5293 struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
5294 unsigned long low = ACCESS_ONCE(memcg->low);
5295
5296 if (low == PAGE_COUNTER_MAX)
5297 seq_puts(m, "infinity\n");
5298 else
5299 seq_printf(m, "%llu\n", (u64)low * PAGE_SIZE);
5300
5301 return 0;
5302}
5303
5304static ssize_t memory_low_write(struct kernfs_open_file *of,
5305 char *buf, size_t nbytes, loff_t off)
5306{
5307 struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
5308 unsigned long low;
5309 int err;
5310
5311 buf = strstrip(buf);
5312 err = page_counter_memparse(buf, "infinity", &low);
5313 if (err)
5314 return err;
5315
5316 memcg->low = low;
5317
5318 return nbytes;
5319}
5320
5321static int memory_high_show(struct seq_file *m, void *v)
5322{
5323 struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
5324 unsigned long high = ACCESS_ONCE(memcg->high);
5325
5326 if (high == PAGE_COUNTER_MAX)
5327 seq_puts(m, "infinity\n");
5328 else
5329 seq_printf(m, "%llu\n", (u64)high * PAGE_SIZE);
5330
5331 return 0;
5332}
5333
5334static ssize_t memory_high_write(struct kernfs_open_file *of,
5335 char *buf, size_t nbytes, loff_t off)
5336{
5337 struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
5338 unsigned long high;
5339 int err;
5340
5341 buf = strstrip(buf);
5342 err = page_counter_memparse(buf, "infinity", &high);
5343 if (err)
5344 return err;
5345
5346 memcg->high = high;
5347
5348 return nbytes;
5349}
5350
5351static int memory_max_show(struct seq_file *m, void *v)
5352{
5353 struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
5354 unsigned long max = ACCESS_ONCE(memcg->memory.limit);
5355
5356 if (max == PAGE_COUNTER_MAX)
5357 seq_puts(m, "infinity\n");
5358 else
5359 seq_printf(m, "%llu\n", (u64)max * PAGE_SIZE);
5360
5361 return 0;
5362}
5363
5364static ssize_t memory_max_write(struct kernfs_open_file *of,
5365 char *buf, size_t nbytes, loff_t off)
5366{
5367 struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
5368 unsigned long max;
5369 int err;
5370
5371 buf = strstrip(buf);
5372 err = page_counter_memparse(buf, "infinity", &max);
5373 if (err)
5374 return err;
5375
5376 err = mem_cgroup_resize_limit(memcg, max);
5377 if (err)
5378 return err;
5379
5380 return nbytes;
5381}
5382
5383static int memory_events_show(struct seq_file *m, void *v)
5384{
5385 struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
5386
5387 seq_printf(m, "low %lu\n", mem_cgroup_read_events(memcg, MEMCG_LOW));
5388 seq_printf(m, "high %lu\n", mem_cgroup_read_events(memcg, MEMCG_HIGH));
5389 seq_printf(m, "max %lu\n", mem_cgroup_read_events(memcg, MEMCG_MAX));
5390 seq_printf(m, "oom %lu\n", mem_cgroup_read_events(memcg, MEMCG_OOM));
5391
5392 return 0;
5393}
5394
5395static struct cftype memory_files[] = {
5396 {
5397 .name = "current",
5398 .read_u64 = memory_current_read,
5399 },
5400 {
5401 .name = "low",
5402 .flags = CFTYPE_NOT_ON_ROOT,
5403 .seq_show = memory_low_show,
5404 .write = memory_low_write,
5405 },
5406 {
5407 .name = "high",
5408 .flags = CFTYPE_NOT_ON_ROOT,
5409 .seq_show = memory_high_show,
5410 .write = memory_high_write,
5411 },
5412 {
5413 .name = "max",
5414 .flags = CFTYPE_NOT_ON_ROOT,
5415 .seq_show = memory_max_show,
5416 .write = memory_max_write,
5417 },
5418 {
5419 .name = "events",
5420 .flags = CFTYPE_NOT_ON_ROOT,
5421 .seq_show = memory_events_show,
5422 },
5423 { } /* terminate */
5424};
5425
5270struct cgroup_subsys memory_cgrp_subsys = { 5426struct cgroup_subsys memory_cgrp_subsys = {
5271 .css_alloc = mem_cgroup_css_alloc, 5427 .css_alloc = mem_cgroup_css_alloc,
5272 .css_online = mem_cgroup_css_online, 5428 .css_online = mem_cgroup_css_online,
@@ -5277,7 +5433,8 @@ struct cgroup_subsys memory_cgrp_subsys = {
5277 .cancel_attach = mem_cgroup_cancel_attach, 5433 .cancel_attach = mem_cgroup_cancel_attach,
5278 .attach = mem_cgroup_move_task, 5434 .attach = mem_cgroup_move_task,
5279 .bind = mem_cgroup_bind, 5435 .bind = mem_cgroup_bind,
5280 .legacy_cftypes = mem_cgroup_files, 5436 .dfl_cftypes = memory_files,
5437 .legacy_cftypes = mem_cgroup_legacy_files,
5281 .early_init = 0, 5438 .early_init = 0,
5282}; 5439};
5283 5440
@@ -5312,6 +5469,56 @@ static void __init enable_swap_cgroup(void)
5312} 5469}
5313#endif 5470#endif
5314 5471
5472/**
5473 * mem_cgroup_events - count memory events against a cgroup
5474 * @memcg: the memory cgroup
5475 * @idx: the event index
5476 * @nr: the number of events to account for
5477 */
5478void mem_cgroup_events(struct mem_cgroup *memcg,
5479 enum mem_cgroup_events_index idx,
5480 unsigned int nr)
5481{
5482 this_cpu_add(memcg->stat->events[idx], nr);
5483}
5484
5485/**
5486 * mem_cgroup_low - check if memory consumption is below the normal range
5487 * @root: the highest ancestor to consider
5488 * @memcg: the memory cgroup to check
5489 *
5490 * Returns %true if memory consumption of @memcg, and that of all
5491 * configurable ancestors up to @root, is below the normal range.
5492 */
5493bool mem_cgroup_low(struct mem_cgroup *root, struct mem_cgroup *memcg)
5494{
5495 if (mem_cgroup_disabled())
5496 return false;
5497
5498 /*
5499 * The toplevel group doesn't have a configurable range, so
5500 * it's never low when looked at directly, and it is not
5501 * considered an ancestor when assessing the hierarchy.
5502 */
5503
5504 if (memcg == root_mem_cgroup)
5505 return false;
5506
5507 if (page_counter_read(&memcg->memory) > memcg->low)
5508 return false;
5509
5510 while (memcg != root) {
5511 memcg = parent_mem_cgroup(memcg);
5512
5513 if (memcg == root_mem_cgroup)
5514 break;
5515
5516 if (page_counter_read(&memcg->memory) > memcg->low)
5517 return false;
5518 }
5519 return true;
5520}
5521
5315#ifdef CONFIG_MEMCG_SWAP 5522#ifdef CONFIG_MEMCG_SWAP
5316/** 5523/**
5317 * mem_cgroup_swapout - transfer a memsw charge to swap 5524 * mem_cgroup_swapout - transfer a memsw charge to swap
diff --git a/mm/vmscan.c b/mm/vmscan.c
index b6dfa0081a8e..8e645ee52045 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -91,6 +91,9 @@ struct scan_control {
91 /* Can pages be swapped as part of reclaim? */ 91 /* Can pages be swapped as part of reclaim? */
92 unsigned int may_swap:1; 92 unsigned int may_swap:1;
93 93
94 /* Can cgroups be reclaimed below their normal consumption range? */
95 unsigned int may_thrash:1;
96
94 unsigned int hibernation_mode:1; 97 unsigned int hibernation_mode:1;
95 98
96 /* One of the zones is ready for compaction */ 99 /* One of the zones is ready for compaction */
@@ -2294,6 +2297,12 @@ static bool shrink_zone(struct zone *zone, struct scan_control *sc,
2294 struct lruvec *lruvec; 2297 struct lruvec *lruvec;
2295 int swappiness; 2298 int swappiness;
2296 2299
2300 if (mem_cgroup_low(root, memcg)) {
2301 if (!sc->may_thrash)
2302 continue;
2303 mem_cgroup_events(memcg, MEMCG_LOW, 1);
2304 }
2305
2297 lruvec = mem_cgroup_zone_lruvec(zone, memcg); 2306 lruvec = mem_cgroup_zone_lruvec(zone, memcg);
2298 swappiness = mem_cgroup_swappiness(memcg); 2307 swappiness = mem_cgroup_swappiness(memcg);
2299 2308
@@ -2315,8 +2324,7 @@ static bool shrink_zone(struct zone *zone, struct scan_control *sc,
2315 mem_cgroup_iter_break(root, memcg); 2324 mem_cgroup_iter_break(root, memcg);
2316 break; 2325 break;
2317 } 2326 }
2318 memcg = mem_cgroup_iter(root, memcg, &reclaim); 2327 } while ((memcg = mem_cgroup_iter(root, memcg, &reclaim)));
2319 } while (memcg);
2320 2328
2321 /* 2329 /*
2322 * Shrink the slab caches in the same proportion that 2330 * Shrink the slab caches in the same proportion that
@@ -2519,10 +2527,11 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
2519static unsigned long do_try_to_free_pages(struct zonelist *zonelist, 2527static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
2520 struct scan_control *sc) 2528 struct scan_control *sc)
2521{ 2529{
2530 int initial_priority = sc->priority;
2522 unsigned long total_scanned = 0; 2531 unsigned long total_scanned = 0;
2523 unsigned long writeback_threshold; 2532 unsigned long writeback_threshold;
2524 bool zones_reclaimable; 2533 bool zones_reclaimable;
2525 2534retry:
2526 delayacct_freepages_start(); 2535 delayacct_freepages_start();
2527 2536
2528 if (global_reclaim(sc)) 2537 if (global_reclaim(sc))
@@ -2572,6 +2581,13 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
2572 if (sc->compaction_ready) 2581 if (sc->compaction_ready)
2573 return 1; 2582 return 1;
2574 2583
2584 /* Untapped cgroup reserves? Don't OOM, retry. */
2585 if (!sc->may_thrash) {
2586 sc->priority = initial_priority;
2587 sc->may_thrash = 1;
2588 goto retry;
2589 }
2590
2575 /* Any of the zones still reclaimable? Don't OOM. */ 2591 /* Any of the zones still reclaimable? Don't OOM. */
2576 if (zones_reclaimable) 2592 if (zones_reclaimable)
2577 return 1; 2593 return 1;