aboutsummaryrefslogtreecommitdiffstats
path: root/mm/memcontrol.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/memcontrol.c')
-rw-r--r--mm/memcontrol.c248
1 files changed, 148 insertions, 100 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 0576e9e64586..991860e6e0a7 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1047,6 +1047,49 @@ static unsigned int get_swappiness(struct mem_cgroup *memcg)
1047 return swappiness; 1047 return swappiness;
1048} 1048}
1049 1049
1050/* A routine for testing mem is not under move_account */
1051
1052static bool mem_cgroup_under_move(struct mem_cgroup *mem)
1053{
1054 struct mem_cgroup *from = mc.from;
1055 struct mem_cgroup *to = mc.to;
1056 bool ret = false;
1057
1058 if (from == mem || to == mem)
1059 return true;
1060
1061 if (!from || !to || !mem->use_hierarchy)
1062 return false;
1063
1064 rcu_read_lock();
1065 if (css_tryget(&from->css)) {
1066 ret = css_is_ancestor(&from->css, &mem->css);
1067 css_put(&from->css);
1068 }
1069 if (!ret && css_tryget(&to->css)) {
1070 ret = css_is_ancestor(&to->css, &mem->css);
1071 css_put(&to->css);
1072 }
1073 rcu_read_unlock();
1074 return ret;
1075}
1076
1077static bool mem_cgroup_wait_acct_move(struct mem_cgroup *mem)
1078{
1079 if (mc.moving_task && current != mc.moving_task) {
1080 if (mem_cgroup_under_move(mem)) {
1081 DEFINE_WAIT(wait);
1082 prepare_to_wait(&mc.waitq, &wait, TASK_INTERRUPTIBLE);
1083 /* moving charge context might have finished. */
1084 if (mc.moving_task)
1085 schedule();
1086 finish_wait(&mc.waitq, &wait);
1087 return true;
1088 }
1089 }
1090 return false;
1091}
1092
1050static int mem_cgroup_count_children_cb(struct mem_cgroup *mem, void *data) 1093static int mem_cgroup_count_children_cb(struct mem_cgroup *mem, void *data)
1051{ 1094{
1052 int *val = data; 1095 int *val = data;
@@ -1575,16 +1618,83 @@ static int __cpuinit memcg_stock_cpu_callback(struct notifier_block *nb,
1575 return NOTIFY_OK; 1618 return NOTIFY_OK;
1576} 1619}
1577 1620
1621
1622/* See __mem_cgroup_try_charge() for details */
1623enum {
1624 CHARGE_OK, /* success */
1625 CHARGE_RETRY, /* need to retry but retry is not bad */
1626 CHARGE_NOMEM, /* we can't do more. return -ENOMEM */
1627 CHARGE_WOULDBLOCK, /* GFP_WAIT wasn't set and no enough res. */
1628 CHARGE_OOM_DIE, /* the current is killed because of OOM */
1629};
1630
1631static int __mem_cgroup_do_charge(struct mem_cgroup *mem, gfp_t gfp_mask,
1632 int csize, bool oom_check)
1633{
1634 struct mem_cgroup *mem_over_limit;
1635 struct res_counter *fail_res;
1636 unsigned long flags = 0;
1637 int ret;
1638
1639 ret = res_counter_charge(&mem->res, csize, &fail_res);
1640
1641 if (likely(!ret)) {
1642 if (!do_swap_account)
1643 return CHARGE_OK;
1644 ret = res_counter_charge(&mem->memsw, csize, &fail_res);
1645 if (likely(!ret))
1646 return CHARGE_OK;
1647
1648 mem_over_limit = mem_cgroup_from_res_counter(fail_res, memsw);
1649 flags |= MEM_CGROUP_RECLAIM_NOSWAP;
1650 } else
1651 mem_over_limit = mem_cgroup_from_res_counter(fail_res, res);
1652
1653 if (csize > PAGE_SIZE) /* change csize and retry */
1654 return CHARGE_RETRY;
1655
1656 if (!(gfp_mask & __GFP_WAIT))
1657 return CHARGE_WOULDBLOCK;
1658
1659 ret = mem_cgroup_hierarchical_reclaim(mem_over_limit, NULL,
1660 gfp_mask, flags);
1661 /*
1662 * try_to_free_mem_cgroup_pages() might not give us a full
1663 * picture of reclaim. Some pages are reclaimed and might be
1664 * moved to swap cache or just unmapped from the cgroup.
1665 * Check the limit again to see if the reclaim reduced the
1666 * current usage of the cgroup before giving up
1667 */
1668 if (ret || mem_cgroup_check_under_limit(mem_over_limit))
1669 return CHARGE_RETRY;
1670
1671 /*
1672 * At task move, charge accounts can be doubly counted. So, it's
1673 * better to wait until the end of task_move if something is going on.
1674 */
1675 if (mem_cgroup_wait_acct_move(mem_over_limit))
1676 return CHARGE_RETRY;
1677
1678 /* If we don't need to call oom-killer at el, return immediately */
1679 if (!oom_check)
1680 return CHARGE_NOMEM;
1681 /* check OOM */
1682 if (!mem_cgroup_handle_oom(mem_over_limit, gfp_mask))
1683 return CHARGE_OOM_DIE;
1684
1685 return CHARGE_RETRY;
1686}
1687
1578/* 1688/*
1579 * Unlike exported interface, "oom" parameter is added. if oom==true, 1689 * Unlike exported interface, "oom" parameter is added. if oom==true,
1580 * oom-killer can be invoked. 1690 * oom-killer can be invoked.
1581 */ 1691 */
1582static int __mem_cgroup_try_charge(struct mm_struct *mm, 1692static int __mem_cgroup_try_charge(struct mm_struct *mm,
1583 gfp_t gfp_mask, struct mem_cgroup **memcg, bool oom) 1693 gfp_t gfp_mask, struct mem_cgroup **memcg, bool oom)
1584{ 1694{
1585 struct mem_cgroup *mem, *mem_over_limit; 1695 int nr_oom_retries = MEM_CGROUP_RECLAIM_RETRIES;
1586 int nr_retries = MEM_CGROUP_RECLAIM_RETRIES; 1696 struct mem_cgroup *mem = NULL;
1587 struct res_counter *fail_res; 1697 int ret;
1588 int csize = CHARGE_SIZE; 1698 int csize = CHARGE_SIZE;
1589 1699
1590 /* 1700 /*
@@ -1602,120 +1712,56 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm,
1602 * thread group leader migrates. It's possible that mm is not 1712 * thread group leader migrates. It's possible that mm is not
1603 * set, if so charge the init_mm (happens for pagecache usage). 1713 * set, if so charge the init_mm (happens for pagecache usage).
1604 */ 1714 */
1605 mem = *memcg; 1715 if (*memcg) {
1606 if (likely(!mem)) { 1716 mem = *memcg;
1717 css_get(&mem->css);
1718 } else {
1607 mem = try_get_mem_cgroup_from_mm(mm); 1719 mem = try_get_mem_cgroup_from_mm(mm);
1720 if (unlikely(!mem))
1721 return 0;
1608 *memcg = mem; 1722 *memcg = mem;
1609 } else {
1610 css_get(&mem->css);
1611 } 1723 }
1612 if (unlikely(!mem))
1613 return 0;
1614 1724
1615 VM_BUG_ON(css_is_removed(&mem->css)); 1725 VM_BUG_ON(css_is_removed(&mem->css));
1616 if (mem_cgroup_is_root(mem)) 1726 if (mem_cgroup_is_root(mem))
1617 goto done; 1727 goto done;
1618 1728
1619 while (1) { 1729 do {
1620 int ret = 0; 1730 bool oom_check;
1621 unsigned long flags = 0;
1622 1731
1623 if (consume_stock(mem)) 1732 if (consume_stock(mem))
1624 goto done; 1733 goto done; /* don't need to fill stock */
1625 1734 /* If killed, bypass charge */
1626 ret = res_counter_charge(&mem->res, csize, &fail_res); 1735 if (fatal_signal_pending(current))
1627 if (likely(!ret)) { 1736 goto bypass;
1628 if (!do_swap_account)
1629 break;
1630 ret = res_counter_charge(&mem->memsw, csize, &fail_res);
1631 if (likely(!ret))
1632 break;
1633 /* mem+swap counter fails */
1634 res_counter_uncharge(&mem->res, csize);
1635 flags |= MEM_CGROUP_RECLAIM_NOSWAP;
1636 mem_over_limit = mem_cgroup_from_res_counter(fail_res,
1637 memsw);
1638 } else
1639 /* mem counter fails */
1640 mem_over_limit = mem_cgroup_from_res_counter(fail_res,
1641 res);
1642 1737
1643 /* reduce request size and retry */ 1738 oom_check = false;
1644 if (csize > PAGE_SIZE) { 1739 if (oom && !nr_oom_retries) {
1645 csize = PAGE_SIZE; 1740 oom_check = true;
1646 continue; 1741 nr_oom_retries = MEM_CGROUP_RECLAIM_RETRIES;
1647 } 1742 }
1648 if (!(gfp_mask & __GFP_WAIT))
1649 goto nomem;
1650 1743
1651 ret = mem_cgroup_hierarchical_reclaim(mem_over_limit, NULL, 1744 ret = __mem_cgroup_do_charge(mem, gfp_mask, csize, oom_check);
1652 gfp_mask, flags);
1653 if (ret)
1654 continue;
1655 1745
1656 /* 1746 switch (ret) {
1657 * try_to_free_mem_cgroup_pages() might not give us a full 1747 case CHARGE_OK:
1658 * picture of reclaim. Some pages are reclaimed and might be 1748 break;
1659 * moved to swap cache or just unmapped from the cgroup. 1749 case CHARGE_RETRY: /* not in OOM situation but retry */
1660 * Check the limit again to see if the reclaim reduced the 1750 csize = PAGE_SIZE;
1661 * current usage of the cgroup before giving up 1751 break;
1662 * 1752 case CHARGE_WOULDBLOCK: /* !__GFP_WAIT */
1663 */ 1753 goto nomem;
1664 if (mem_cgroup_check_under_limit(mem_over_limit)) 1754 case CHARGE_NOMEM: /* OOM routine works */
1665 continue;
1666
1667 /* try to avoid oom while someone is moving charge */
1668 if (mc.moving_task && current != mc.moving_task) {
1669 struct mem_cgroup *from, *to;
1670 bool do_continue = false;
1671 /*
1672 * There is a small race that "from" or "to" can be
1673 * freed by rmdir, so we use css_tryget().
1674 */
1675 from = mc.from;
1676 to = mc.to;
1677 if (from && css_tryget(&from->css)) {
1678 if (mem_over_limit->use_hierarchy)
1679 do_continue = css_is_ancestor(
1680 &from->css,
1681 &mem_over_limit->css);
1682 else
1683 do_continue = (from == mem_over_limit);
1684 css_put(&from->css);
1685 }
1686 if (!do_continue && to && css_tryget(&to->css)) {
1687 if (mem_over_limit->use_hierarchy)
1688 do_continue = css_is_ancestor(
1689 &to->css,
1690 &mem_over_limit->css);
1691 else
1692 do_continue = (to == mem_over_limit);
1693 css_put(&to->css);
1694 }
1695 if (do_continue) {
1696 DEFINE_WAIT(wait);
1697 prepare_to_wait(&mc.waitq, &wait,
1698 TASK_INTERRUPTIBLE);
1699 /* moving charge context might have finished. */
1700 if (mc.moving_task)
1701 schedule();
1702 finish_wait(&mc.waitq, &wait);
1703 continue;
1704 }
1705 }
1706
1707 if (!nr_retries--) {
1708 if (!oom) 1755 if (!oom)
1709 goto nomem; 1756 goto nomem;
1710 if (mem_cgroup_handle_oom(mem_over_limit, gfp_mask)) { 1757 /* If oom, we never return -ENOMEM */
1711 nr_retries = MEM_CGROUP_RECLAIM_RETRIES; 1758 nr_oom_retries--;
1712 continue; 1759 break;
1713 } 1760 case CHARGE_OOM_DIE: /* Killed by OOM Killer */
1714 /* When we reach here, current task is dying .*/
1715 css_put(&mem->css);
1716 goto bypass; 1761 goto bypass;
1717 } 1762 }
1718 } 1763 } while (ret != CHARGE_OK);
1764
1719 if (csize > PAGE_SIZE) 1765 if (csize > PAGE_SIZE)
1720 refill_stock(mem, csize - PAGE_SIZE); 1766 refill_stock(mem, csize - PAGE_SIZE);
1721done: 1767done:
@@ -1724,6 +1770,8 @@ nomem:
1724 css_put(&mem->css); 1770 css_put(&mem->css);
1725 return -ENOMEM; 1771 return -ENOMEM;
1726bypass: 1772bypass:
1773 if (mem)
1774 css_put(&mem->css);
1727 *memcg = NULL; 1775 *memcg = NULL;
1728 return 0; 1776 return 0;
1729} 1777}