aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--mm/memcontrol.c166
1 files changed, 162 insertions, 4 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index e72fb2b4a7d8..20e1d90b3363 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -143,6 +143,13 @@ struct mem_cgroup {
143 struct mem_cgroup_lru_info info; 143 struct mem_cgroup_lru_info info;
144 144
145 int prev_priority; /* for recording reclaim priority */ 145 int prev_priority; /* for recording reclaim priority */
146
147 /*
148 * While reclaiming in a hiearchy, we cache the last child we
149 * reclaimed from. Protected by cgroup_lock()
150 */
151 struct mem_cgroup *last_scanned_child;
152
146 int obsolete; 153 int obsolete;
147 atomic_t refcnt; 154 atomic_t refcnt;
148 /* 155 /*
@@ -461,6 +468,149 @@ unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
461 return nr_taken; 468 return nr_taken;
462} 469}
463 470
471#define mem_cgroup_from_res_counter(counter, member) \
472 container_of(counter, struct mem_cgroup, member)
473
474/*
475 * This routine finds the DFS walk successor. This routine should be
476 * called with cgroup_mutex held
477 */
478static struct mem_cgroup *
479mem_cgroup_get_next_node(struct mem_cgroup *curr, struct mem_cgroup *root_mem)
480{
481 struct cgroup *cgroup, *curr_cgroup, *root_cgroup;
482
483 curr_cgroup = curr->css.cgroup;
484 root_cgroup = root_mem->css.cgroup;
485
486 if (!list_empty(&curr_cgroup->children)) {
487 /*
488 * Walk down to children
489 */
490 mem_cgroup_put(curr);
491 cgroup = list_entry(curr_cgroup->children.next,
492 struct cgroup, sibling);
493 curr = mem_cgroup_from_cont(cgroup);
494 mem_cgroup_get(curr);
495 goto done;
496 }
497
498visit_parent:
499 if (curr_cgroup == root_cgroup) {
500 mem_cgroup_put(curr);
501 curr = root_mem;
502 mem_cgroup_get(curr);
503 goto done;
504 }
505
506 /*
507 * Goto next sibling
508 */
509 if (curr_cgroup->sibling.next != &curr_cgroup->parent->children) {
510 mem_cgroup_put(curr);
511 cgroup = list_entry(curr_cgroup->sibling.next, struct cgroup,
512 sibling);
513 curr = mem_cgroup_from_cont(cgroup);
514 mem_cgroup_get(curr);
515 goto done;
516 }
517
518 /*
519 * Go up to next parent and next parent's sibling if need be
520 */
521 curr_cgroup = curr_cgroup->parent;
522 goto visit_parent;
523
524done:
525 root_mem->last_scanned_child = curr;
526 return curr;
527}
528
529/*
530 * Visit the first child (need not be the first child as per the ordering
531 * of the cgroup list, since we track last_scanned_child) of @mem and use
532 * that to reclaim free pages from.
533 */
534static struct mem_cgroup *
535mem_cgroup_get_first_node(struct mem_cgroup *root_mem)
536{
537 struct cgroup *cgroup;
538 struct mem_cgroup *ret;
539 bool obsolete = (root_mem->last_scanned_child &&
540 root_mem->last_scanned_child->obsolete);
541
542 /*
543 * Scan all children under the mem_cgroup mem
544 */
545 cgroup_lock();
546 if (list_empty(&root_mem->css.cgroup->children)) {
547 ret = root_mem;
548 goto done;
549 }
550
551 if (!root_mem->last_scanned_child || obsolete) {
552
553 if (obsolete)
554 mem_cgroup_put(root_mem->last_scanned_child);
555
556 cgroup = list_first_entry(&root_mem->css.cgroup->children,
557 struct cgroup, sibling);
558 ret = mem_cgroup_from_cont(cgroup);
559 mem_cgroup_get(ret);
560 } else
561 ret = mem_cgroup_get_next_node(root_mem->last_scanned_child,
562 root_mem);
563
564done:
565 root_mem->last_scanned_child = ret;
566 cgroup_unlock();
567 return ret;
568}
569
570/*
571 * Dance down the hierarchy if needed to reclaim memory. We remember the
572 * last child we reclaimed from, so that we don't end up penalizing
573 * one child extensively based on its position in the children list.
574 *
575 * root_mem is the original ancestor that we've been reclaim from.
576 */
577static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem,
578 gfp_t gfp_mask, bool noswap)
579{
580 struct mem_cgroup *next_mem;
581 int ret = 0;
582
583 /*
584 * Reclaim unconditionally and don't check for return value.
585 * We need to reclaim in the current group and down the tree.
586 * One might think about checking for children before reclaiming,
587 * but there might be left over accounting, even after children
588 * have left.
589 */
590 ret = try_to_free_mem_cgroup_pages(root_mem, gfp_mask, noswap);
591 if (res_counter_check_under_limit(&root_mem->res))
592 return 0;
593
594 next_mem = mem_cgroup_get_first_node(root_mem);
595
596 while (next_mem != root_mem) {
597 if (next_mem->obsolete) {
598 mem_cgroup_put(next_mem);
599 cgroup_lock();
600 next_mem = mem_cgroup_get_first_node(root_mem);
601 cgroup_unlock();
602 continue;
603 }
604 ret = try_to_free_mem_cgroup_pages(next_mem, gfp_mask, noswap);
605 if (res_counter_check_under_limit(&root_mem->res))
606 return 0;
607 cgroup_lock();
608 next_mem = mem_cgroup_get_next_node(next_mem, root_mem);
609 cgroup_unlock();
610 }
611 return ret;
612}
613
464/* 614/*
465 * Unlike exported interface, "oom" parameter is added. if oom==true, 615 * Unlike exported interface, "oom" parameter is added. if oom==true,
466 * oom-killer can be invoked. 616 * oom-killer can be invoked.
@@ -469,7 +619,7 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm,
469 gfp_t gfp_mask, struct mem_cgroup **memcg, 619 gfp_t gfp_mask, struct mem_cgroup **memcg,
470 bool oom) 620 bool oom)
471{ 621{
472 struct mem_cgroup *mem; 622 struct mem_cgroup *mem, *mem_over_limit;
473 int nr_retries = MEM_CGROUP_RECLAIM_RETRIES; 623 int nr_retries = MEM_CGROUP_RECLAIM_RETRIES;
474 struct res_counter *fail_res; 624 struct res_counter *fail_res;
475 /* 625 /*
@@ -511,12 +661,18 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm,
511 /* mem+swap counter fails */ 661 /* mem+swap counter fails */
512 res_counter_uncharge(&mem->res, PAGE_SIZE); 662 res_counter_uncharge(&mem->res, PAGE_SIZE);
513 noswap = true; 663 noswap = true;
514 } 664 mem_over_limit = mem_cgroup_from_res_counter(fail_res,
665 memsw);
666 } else
667 /* mem counter fails */
668 mem_over_limit = mem_cgroup_from_res_counter(fail_res,
669 res);
670
515 if (!(gfp_mask & __GFP_WAIT)) 671 if (!(gfp_mask & __GFP_WAIT))
516 goto nomem; 672 goto nomem;
517 673
518 if (try_to_free_mem_cgroup_pages(mem, gfp_mask, noswap)) 674 ret = mem_cgroup_hierarchical_reclaim(mem_over_limit, gfp_mask,
519 continue; 675 noswap);
520 676
521 /* 677 /*
522 * try_to_free_mem_cgroup_pages() might not give us a full 678 * try_to_free_mem_cgroup_pages() might not give us a full
@@ -1732,6 +1888,8 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
1732 res_counter_init(&mem->memsw, parent ? &parent->memsw : NULL); 1888 res_counter_init(&mem->memsw, parent ? &parent->memsw : NULL);
1733 1889
1734 1890
1891 mem->last_scanned_child = NULL;
1892
1735 return &mem->css; 1893 return &mem->css;
1736free_out: 1894free_out:
1737 for_each_node_state(node, N_POSSIBLE) 1895 for_each_node_state(node, N_POSSIBLE)