aboutsummaryrefslogtreecommitdiffstats
path: root/mm/memcontrol.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/memcontrol.c')
-rw-r--r--mm/memcontrol.c140
1 files changed, 103 insertions, 37 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index ddffc74cdebe..e013b8e57d25 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -108,10 +108,12 @@ enum mem_cgroup_events_index {
108enum mem_cgroup_events_target { 108enum mem_cgroup_events_target {
109 MEM_CGROUP_TARGET_THRESH, 109 MEM_CGROUP_TARGET_THRESH,
110 MEM_CGROUP_TARGET_SOFTLIMIT, 110 MEM_CGROUP_TARGET_SOFTLIMIT,
111 MEM_CGROUP_TARGET_NUMAINFO,
111 MEM_CGROUP_NTARGETS, 112 MEM_CGROUP_NTARGETS,
112}; 113};
113#define THRESHOLDS_EVENTS_TARGET (128) 114#define THRESHOLDS_EVENTS_TARGET (128)
114#define SOFTLIMIT_EVENTS_TARGET (1024) 115#define SOFTLIMIT_EVENTS_TARGET (1024)
116#define NUMAINFO_EVENTS_TARGET (1024)
115 117
116struct mem_cgroup_stat_cpu { 118struct mem_cgroup_stat_cpu {
117 long count[MEM_CGROUP_STAT_NSTATS]; 119 long count[MEM_CGROUP_STAT_NSTATS];
@@ -237,7 +239,8 @@ struct mem_cgroup {
237 int last_scanned_node; 239 int last_scanned_node;
238#if MAX_NUMNODES > 1 240#if MAX_NUMNODES > 1
239 nodemask_t scan_nodes; 241 nodemask_t scan_nodes;
240 unsigned long next_scan_node_update; 242 atomic_t numainfo_events;
243 atomic_t numainfo_updating;
241#endif 244#endif
242 /* 245 /*
243 * Should the accounting and control be hierarchical, per subtree? 246 * Should the accounting and control be hierarchical, per subtree?
@@ -577,15 +580,6 @@ static long mem_cgroup_read_stat(struct mem_cgroup *mem,
577 return val; 580 return val;
578} 581}
579 582
580static long mem_cgroup_local_usage(struct mem_cgroup *mem)
581{
582 long ret;
583
584 ret = mem_cgroup_read_stat(mem, MEM_CGROUP_STAT_RSS);
585 ret += mem_cgroup_read_stat(mem, MEM_CGROUP_STAT_CACHE);
586 return ret;
587}
588
589static void mem_cgroup_swap_statistics(struct mem_cgroup *mem, 583static void mem_cgroup_swap_statistics(struct mem_cgroup *mem,
590 bool charge) 584 bool charge)
591{ 585{
@@ -689,6 +683,9 @@ static void __mem_cgroup_target_update(struct mem_cgroup *mem, int target)
689 case MEM_CGROUP_TARGET_SOFTLIMIT: 683 case MEM_CGROUP_TARGET_SOFTLIMIT:
690 next = val + SOFTLIMIT_EVENTS_TARGET; 684 next = val + SOFTLIMIT_EVENTS_TARGET;
691 break; 685 break;
686 case MEM_CGROUP_TARGET_NUMAINFO:
687 next = val + NUMAINFO_EVENTS_TARGET;
688 break;
692 default: 689 default:
693 return; 690 return;
694 } 691 }
@@ -707,11 +704,19 @@ static void memcg_check_events(struct mem_cgroup *mem, struct page *page)
707 mem_cgroup_threshold(mem); 704 mem_cgroup_threshold(mem);
708 __mem_cgroup_target_update(mem, MEM_CGROUP_TARGET_THRESH); 705 __mem_cgroup_target_update(mem, MEM_CGROUP_TARGET_THRESH);
709 if (unlikely(__memcg_event_check(mem, 706 if (unlikely(__memcg_event_check(mem,
710 MEM_CGROUP_TARGET_SOFTLIMIT))){ 707 MEM_CGROUP_TARGET_SOFTLIMIT))) {
711 mem_cgroup_update_tree(mem, page); 708 mem_cgroup_update_tree(mem, page);
712 __mem_cgroup_target_update(mem, 709 __mem_cgroup_target_update(mem,
713 MEM_CGROUP_TARGET_SOFTLIMIT); 710 MEM_CGROUP_TARGET_SOFTLIMIT);
711 }
712#if MAX_NUMNODES > 1
713 if (unlikely(__memcg_event_check(mem,
714 MEM_CGROUP_TARGET_NUMAINFO))) {
715 atomic_inc(&mem->numainfo_events);
716 __mem_cgroup_target_update(mem,
717 MEM_CGROUP_TARGET_NUMAINFO);
714 } 718 }
719#endif
715 } 720 }
716} 721}
717 722
@@ -1129,7 +1134,6 @@ unsigned long mem_cgroup_zone_nr_lru_pages(struct mem_cgroup *memcg,
1129 return MEM_CGROUP_ZSTAT(mz, lru); 1134 return MEM_CGROUP_ZSTAT(mz, lru);
1130} 1135}
1131 1136
1132#ifdef CONFIG_NUMA
1133static unsigned long mem_cgroup_node_nr_file_lru_pages(struct mem_cgroup *memcg, 1137static unsigned long mem_cgroup_node_nr_file_lru_pages(struct mem_cgroup *memcg,
1134 int nid) 1138 int nid)
1135{ 1139{
@@ -1141,6 +1145,17 @@ static unsigned long mem_cgroup_node_nr_file_lru_pages(struct mem_cgroup *memcg,
1141 return ret; 1145 return ret;
1142} 1146}
1143 1147
1148static unsigned long mem_cgroup_node_nr_anon_lru_pages(struct mem_cgroup *memcg,
1149 int nid)
1150{
1151 unsigned long ret;
1152
1153 ret = mem_cgroup_get_zonestat_node(memcg, nid, LRU_INACTIVE_ANON) +
1154 mem_cgroup_get_zonestat_node(memcg, nid, LRU_ACTIVE_ANON);
1155 return ret;
1156}
1157
1158#if MAX_NUMNODES > 1
1144static unsigned long mem_cgroup_nr_file_lru_pages(struct mem_cgroup *memcg) 1159static unsigned long mem_cgroup_nr_file_lru_pages(struct mem_cgroup *memcg)
1145{ 1160{
1146 u64 total = 0; 1161 u64 total = 0;
@@ -1152,17 +1167,6 @@ static unsigned long mem_cgroup_nr_file_lru_pages(struct mem_cgroup *memcg)
1152 return total; 1167 return total;
1153} 1168}
1154 1169
1155static unsigned long mem_cgroup_node_nr_anon_lru_pages(struct mem_cgroup *memcg,
1156 int nid)
1157{
1158 unsigned long ret;
1159
1160 ret = mem_cgroup_get_zonestat_node(memcg, nid, LRU_INACTIVE_ANON) +
1161 mem_cgroup_get_zonestat_node(memcg, nid, LRU_ACTIVE_ANON);
1162
1163 return ret;
1164}
1165
1166static unsigned long mem_cgroup_nr_anon_lru_pages(struct mem_cgroup *memcg) 1170static unsigned long mem_cgroup_nr_anon_lru_pages(struct mem_cgroup *memcg)
1167{ 1171{
1168 u64 total = 0; 1172 u64 total = 0;
@@ -1559,6 +1563,28 @@ mem_cgroup_select_victim(struct mem_cgroup *root_mem)
1559 return ret; 1563 return ret;
1560} 1564}
1561 1565
1566/**
1567 * test_mem_cgroup_node_reclaimable
1568 * @mem: the target memcg
1569 * @nid: the node ID to be checked.
1570 * @noswap : specify true here if the user wants flle only information.
1571 *
1572 * This function returns whether the specified memcg contains any
1573 * reclaimable pages on a node. Returns true if there are any reclaimable
1574 * pages in the node.
1575 */
1576static bool test_mem_cgroup_node_reclaimable(struct mem_cgroup *mem,
1577 int nid, bool noswap)
1578{
1579 if (mem_cgroup_node_nr_file_lru_pages(mem, nid))
1580 return true;
1581 if (noswap || !total_swap_pages)
1582 return false;
1583 if (mem_cgroup_node_nr_anon_lru_pages(mem, nid))
1584 return true;
1585 return false;
1586
1587}
1562#if MAX_NUMNODES > 1 1588#if MAX_NUMNODES > 1
1563 1589
1564/* 1590/*
@@ -1570,26 +1596,26 @@ mem_cgroup_select_victim(struct mem_cgroup *root_mem)
1570static void mem_cgroup_may_update_nodemask(struct mem_cgroup *mem) 1596static void mem_cgroup_may_update_nodemask(struct mem_cgroup *mem)
1571{ 1597{
1572 int nid; 1598 int nid;
1573 1599 /*
1574 if (time_after(mem->next_scan_node_update, jiffies)) 1600 * numainfo_events > 0 means there was at least NUMAINFO_EVENTS_TARGET
1601 * pagein/pageout changes since the last update.
1602 */
1603 if (!atomic_read(&mem->numainfo_events))
1604 return;
1605 if (atomic_inc_return(&mem->numainfo_updating) > 1)
1575 return; 1606 return;
1576 1607
1577 mem->next_scan_node_update = jiffies + 10*HZ;
1578 /* make a nodemask where this memcg uses memory from */ 1608 /* make a nodemask where this memcg uses memory from */
1579 mem->scan_nodes = node_states[N_HIGH_MEMORY]; 1609 mem->scan_nodes = node_states[N_HIGH_MEMORY];
1580 1610
1581 for_each_node_mask(nid, node_states[N_HIGH_MEMORY]) { 1611 for_each_node_mask(nid, node_states[N_HIGH_MEMORY]) {
1582 1612
1583 if (mem_cgroup_get_zonestat_node(mem, nid, LRU_INACTIVE_FILE) || 1613 if (!test_mem_cgroup_node_reclaimable(mem, nid, false))
1584 mem_cgroup_get_zonestat_node(mem, nid, LRU_ACTIVE_FILE)) 1614 node_clear(nid, mem->scan_nodes);
1585 continue;
1586
1587 if (total_swap_pages &&
1588 (mem_cgroup_get_zonestat_node(mem, nid, LRU_INACTIVE_ANON) ||
1589 mem_cgroup_get_zonestat_node(mem, nid, LRU_ACTIVE_ANON)))
1590 continue;
1591 node_clear(nid, mem->scan_nodes);
1592 } 1615 }
1616
1617 atomic_set(&mem->numainfo_events, 0);
1618 atomic_set(&mem->numainfo_updating, 0);
1593} 1619}
1594 1620
1595/* 1621/*
@@ -1627,11 +1653,51 @@ int mem_cgroup_select_victim_node(struct mem_cgroup *mem)
1627 return node; 1653 return node;
1628} 1654}
1629 1655
1656/*
1657 * Check all nodes whether it contains reclaimable pages or not.
1658 * For quick scan, we make use of scan_nodes. This will allow us to skip
1659 * unused nodes. But scan_nodes is lazily updated and may not cotain
1660 * enough new information. We need to do double check.
1661 */
1662bool mem_cgroup_reclaimable(struct mem_cgroup *mem, bool noswap)
1663{
1664 int nid;
1665
1666 /*
1667 * quick check...making use of scan_node.
1668 * We can skip unused nodes.
1669 */
1670 if (!nodes_empty(mem->scan_nodes)) {
1671 for (nid = first_node(mem->scan_nodes);
1672 nid < MAX_NUMNODES;
1673 nid = next_node(nid, mem->scan_nodes)) {
1674
1675 if (test_mem_cgroup_node_reclaimable(mem, nid, noswap))
1676 return true;
1677 }
1678 }
1679 /*
1680 * Check rest of nodes.
1681 */
1682 for_each_node_state(nid, N_HIGH_MEMORY) {
1683 if (node_isset(nid, mem->scan_nodes))
1684 continue;
1685 if (test_mem_cgroup_node_reclaimable(mem, nid, noswap))
1686 return true;
1687 }
1688 return false;
1689}
1690
1630#else 1691#else
1631int mem_cgroup_select_victim_node(struct mem_cgroup *mem) 1692int mem_cgroup_select_victim_node(struct mem_cgroup *mem)
1632{ 1693{
1633 return 0; 1694 return 0;
1634} 1695}
1696
1697bool mem_cgroup_reclaimable(struct mem_cgroup *mem, bool noswap)
1698{
1699 return test_mem_cgroup_node_reclaimable(mem, 0, noswap);
1700}
1635#endif 1701#endif
1636 1702
1637/* 1703/*
@@ -1702,7 +1768,7 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem,
1702 } 1768 }
1703 } 1769 }
1704 } 1770 }
1705 if (!mem_cgroup_local_usage(victim)) { 1771 if (!mem_cgroup_reclaimable(victim, noswap)) {
1706 /* this cgroup's local usage == 0 */ 1772 /* this cgroup's local usage == 0 */
1707 css_put(&victim->css); 1773 css_put(&victim->css);
1708 continue; 1774 continue;