aboutsummaryrefslogtreecommitdiffstats
path: root/mm/memcontrol.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/memcontrol.c')
-rw-r--r--mm/memcontrol.c102
1 files changed, 96 insertions, 6 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index fc62c714f3b6..1520efd1c7c4 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -231,6 +231,11 @@ struct mem_cgroup {
231 * reclaimed from. 231 * reclaimed from.
232 */ 232 */
233 int last_scanned_child; 233 int last_scanned_child;
234 int last_scanned_node;
235#if MAX_NUMNODES > 1
236 nodemask_t scan_nodes;
237 unsigned long next_scan_node_update;
238#endif
234 /* 239 /*
235 * Should the accounting and control be hierarchical, per subtree? 240 * Should the accounting and control be hierarchical, per subtree?
236 */ 241 */
@@ -624,18 +629,27 @@ static void mem_cgroup_charge_statistics(struct mem_cgroup *mem,
624 preempt_enable(); 629 preempt_enable();
625} 630}
626 631
632static unsigned long
633mem_cgroup_get_zonestat_node(struct mem_cgroup *mem, int nid, enum lru_list idx)
634{
635 struct mem_cgroup_per_zone *mz;
636 u64 total = 0;
637 int zid;
638
639 for (zid = 0; zid < MAX_NR_ZONES; zid++) {
640 mz = mem_cgroup_zoneinfo(mem, nid, zid);
641 total += MEM_CGROUP_ZSTAT(mz, idx);
642 }
643 return total;
644}
627static unsigned long mem_cgroup_get_local_zonestat(struct mem_cgroup *mem, 645static unsigned long mem_cgroup_get_local_zonestat(struct mem_cgroup *mem,
628 enum lru_list idx) 646 enum lru_list idx)
629{ 647{
630 int nid, zid; 648 int nid;
631 struct mem_cgroup_per_zone *mz;
632 u64 total = 0; 649 u64 total = 0;
633 650
634 for_each_online_node(nid) 651 for_each_online_node(nid)
635 for (zid = 0; zid < MAX_NR_ZONES; zid++) { 652 total += mem_cgroup_get_zonestat_node(mem, nid, idx);
636 mz = mem_cgroup_zoneinfo(mem, nid, zid);
637 total += MEM_CGROUP_ZSTAT(mz, idx);
638 }
639 return total; 653 return total;
640} 654}
641 655
@@ -1418,6 +1432,81 @@ mem_cgroup_select_victim(struct mem_cgroup *root_mem)
1418 return ret; 1432 return ret;
1419} 1433}
1420 1434
1435#if MAX_NUMNODES > 1
1436
1437/*
1438 * Always updating the nodemask is not very good - even if we have an empty
1439 * list or the wrong list here, we can start from some node and traverse all
1440 * nodes based on the zonelist. So update the list loosely once per 10 secs.
1441 *
1442 */
1443static void mem_cgroup_may_update_nodemask(struct mem_cgroup *mem)
1444{
1445 int nid;
1446
1447 if (time_after(mem->next_scan_node_update, jiffies))
1448 return;
1449
1450 mem->next_scan_node_update = jiffies + 10*HZ;
1451 /* make a nodemask where this memcg uses memory from */
1452 mem->scan_nodes = node_states[N_HIGH_MEMORY];
1453
1454 for_each_node_mask(nid, node_states[N_HIGH_MEMORY]) {
1455
1456 if (mem_cgroup_get_zonestat_node(mem, nid, LRU_INACTIVE_FILE) ||
1457 mem_cgroup_get_zonestat_node(mem, nid, LRU_ACTIVE_FILE))
1458 continue;
1459
1460 if (total_swap_pages &&
1461 (mem_cgroup_get_zonestat_node(mem, nid, LRU_INACTIVE_ANON) ||
1462 mem_cgroup_get_zonestat_node(mem, nid, LRU_ACTIVE_ANON)))
1463 continue;
1464 node_clear(nid, mem->scan_nodes);
1465 }
1466}
1467
1468/*
1469 * Selecting a node where we start reclaim from. Because what we need is just
1470 * reducing usage counter, start from anywhere is O,K. Considering
1471 * memory reclaim from current node, there are pros. and cons.
1472 *
1473 * Freeing memory from current node means freeing memory from a node which
1474 * we'll use or we've used. So, it may make LRU bad. And if several threads
1475 * hit limits, it will see a contention on a node. But freeing from remote
1476 * node means more costs for memory reclaim because of memory latency.
1477 *
1478 * Now, we use round-robin. Better algorithm is welcomed.
1479 */
1480int mem_cgroup_select_victim_node(struct mem_cgroup *mem)
1481{
1482 int node;
1483
1484 mem_cgroup_may_update_nodemask(mem);
1485 node = mem->last_scanned_node;
1486
1487 node = next_node(node, mem->scan_nodes);
1488 if (node == MAX_NUMNODES)
1489 node = first_node(mem->scan_nodes);
1490 /*
1491 * We call this when we hit limit, not when pages are added to LRU.
1492 * No LRU may hold pages because all pages are UNEVICTABLE or
1493 * memcg is too small and all pages are not on LRU. In that case,
1494 * we use curret node.
1495 */
1496 if (unlikely(node == MAX_NUMNODES))
1497 node = numa_node_id();
1498
1499 mem->last_scanned_node = node;
1500 return node;
1501}
1502
1503#else
1504int mem_cgroup_select_victim_node(struct mem_cgroup *mem)
1505{
1506 return 0;
1507}
1508#endif
1509
1421/* 1510/*
1422 * Scan the hierarchy if needed to reclaim memory. We remember the last child 1511 * Scan the hierarchy if needed to reclaim memory. We remember the last child
1423 * we reclaimed from, so that we don't end up penalizing one child extensively 1512 * we reclaimed from, so that we don't end up penalizing one child extensively
@@ -4606,6 +4695,7 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
4606 res_counter_init(&mem->memsw, NULL); 4695 res_counter_init(&mem->memsw, NULL);
4607 } 4696 }
4608 mem->last_scanned_child = 0; 4697 mem->last_scanned_child = 0;
4698 mem->last_scanned_node = MAX_NUMNODES;
4609 INIT_LIST_HEAD(&mem->oom_notify); 4699 INIT_LIST_HEAD(&mem->oom_notify);
4610 4700
4611 if (parent) 4701 if (parent)