aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/memcontrol.c140
-rw-r--r--mm/memory.c1
-rw-r--r--mm/nommu.c9
-rw-r--r--mm/vmscan.c62
4 files changed, 147 insertions, 65 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index ddffc74cdebe..e013b8e57d25 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -108,10 +108,12 @@ enum mem_cgroup_events_index {
108enum mem_cgroup_events_target { 108enum mem_cgroup_events_target {
109 MEM_CGROUP_TARGET_THRESH, 109 MEM_CGROUP_TARGET_THRESH,
110 MEM_CGROUP_TARGET_SOFTLIMIT, 110 MEM_CGROUP_TARGET_SOFTLIMIT,
111 MEM_CGROUP_TARGET_NUMAINFO,
111 MEM_CGROUP_NTARGETS, 112 MEM_CGROUP_NTARGETS,
112}; 113};
113#define THRESHOLDS_EVENTS_TARGET (128) 114#define THRESHOLDS_EVENTS_TARGET (128)
114#define SOFTLIMIT_EVENTS_TARGET (1024) 115#define SOFTLIMIT_EVENTS_TARGET (1024)
116#define NUMAINFO_EVENTS_TARGET (1024)
115 117
116struct mem_cgroup_stat_cpu { 118struct mem_cgroup_stat_cpu {
117 long count[MEM_CGROUP_STAT_NSTATS]; 119 long count[MEM_CGROUP_STAT_NSTATS];
@@ -237,7 +239,8 @@ struct mem_cgroup {
237 int last_scanned_node; 239 int last_scanned_node;
238#if MAX_NUMNODES > 1 240#if MAX_NUMNODES > 1
239 nodemask_t scan_nodes; 241 nodemask_t scan_nodes;
240 unsigned long next_scan_node_update; 242 atomic_t numainfo_events;
243 atomic_t numainfo_updating;
241#endif 244#endif
242 /* 245 /*
243 * Should the accounting and control be hierarchical, per subtree? 246 * Should the accounting and control be hierarchical, per subtree?
@@ -577,15 +580,6 @@ static long mem_cgroup_read_stat(struct mem_cgroup *mem,
577 return val; 580 return val;
578} 581}
579 582
580static long mem_cgroup_local_usage(struct mem_cgroup *mem)
581{
582 long ret;
583
584 ret = mem_cgroup_read_stat(mem, MEM_CGROUP_STAT_RSS);
585 ret += mem_cgroup_read_stat(mem, MEM_CGROUP_STAT_CACHE);
586 return ret;
587}
588
589static void mem_cgroup_swap_statistics(struct mem_cgroup *mem, 583static void mem_cgroup_swap_statistics(struct mem_cgroup *mem,
590 bool charge) 584 bool charge)
591{ 585{
@@ -689,6 +683,9 @@ static void __mem_cgroup_target_update(struct mem_cgroup *mem, int target)
689 case MEM_CGROUP_TARGET_SOFTLIMIT: 683 case MEM_CGROUP_TARGET_SOFTLIMIT:
690 next = val + SOFTLIMIT_EVENTS_TARGET; 684 next = val + SOFTLIMIT_EVENTS_TARGET;
691 break; 685 break;
686 case MEM_CGROUP_TARGET_NUMAINFO:
687 next = val + NUMAINFO_EVENTS_TARGET;
688 break;
692 default: 689 default:
693 return; 690 return;
694 } 691 }
@@ -707,11 +704,19 @@ static void memcg_check_events(struct mem_cgroup *mem, struct page *page)
707 mem_cgroup_threshold(mem); 704 mem_cgroup_threshold(mem);
708 __mem_cgroup_target_update(mem, MEM_CGROUP_TARGET_THRESH); 705 __mem_cgroup_target_update(mem, MEM_CGROUP_TARGET_THRESH);
709 if (unlikely(__memcg_event_check(mem, 706 if (unlikely(__memcg_event_check(mem,
710 MEM_CGROUP_TARGET_SOFTLIMIT))){ 707 MEM_CGROUP_TARGET_SOFTLIMIT))) {
711 mem_cgroup_update_tree(mem, page); 708 mem_cgroup_update_tree(mem, page);
712 __mem_cgroup_target_update(mem, 709 __mem_cgroup_target_update(mem,
713 MEM_CGROUP_TARGET_SOFTLIMIT); 710 MEM_CGROUP_TARGET_SOFTLIMIT);
711 }
712#if MAX_NUMNODES > 1
713 if (unlikely(__memcg_event_check(mem,
714 MEM_CGROUP_TARGET_NUMAINFO))) {
715 atomic_inc(&mem->numainfo_events);
716 __mem_cgroup_target_update(mem,
717 MEM_CGROUP_TARGET_NUMAINFO);
714 } 718 }
719#endif
715 } 720 }
716} 721}
717 722
@@ -1129,7 +1134,6 @@ unsigned long mem_cgroup_zone_nr_lru_pages(struct mem_cgroup *memcg,
1129 return MEM_CGROUP_ZSTAT(mz, lru); 1134 return MEM_CGROUP_ZSTAT(mz, lru);
1130} 1135}
1131 1136
1132#ifdef CONFIG_NUMA
1133static unsigned long mem_cgroup_node_nr_file_lru_pages(struct mem_cgroup *memcg, 1137static unsigned long mem_cgroup_node_nr_file_lru_pages(struct mem_cgroup *memcg,
1134 int nid) 1138 int nid)
1135{ 1139{
@@ -1141,6 +1145,17 @@ static unsigned long mem_cgroup_node_nr_file_lru_pages(struct mem_cgroup *memcg,
1141 return ret; 1145 return ret;
1142} 1146}
1143 1147
1148static unsigned long mem_cgroup_node_nr_anon_lru_pages(struct mem_cgroup *memcg,
1149 int nid)
1150{
1151 unsigned long ret;
1152
1153 ret = mem_cgroup_get_zonestat_node(memcg, nid, LRU_INACTIVE_ANON) +
1154 mem_cgroup_get_zonestat_node(memcg, nid, LRU_ACTIVE_ANON);
1155 return ret;
1156}
1157
1158#if MAX_NUMNODES > 1
1144static unsigned long mem_cgroup_nr_file_lru_pages(struct mem_cgroup *memcg) 1159static unsigned long mem_cgroup_nr_file_lru_pages(struct mem_cgroup *memcg)
1145{ 1160{
1146 u64 total = 0; 1161 u64 total = 0;
@@ -1152,17 +1167,6 @@ static unsigned long mem_cgroup_nr_file_lru_pages(struct mem_cgroup *memcg)
1152 return total; 1167 return total;
1153} 1168}
1154 1169
1155static unsigned long mem_cgroup_node_nr_anon_lru_pages(struct mem_cgroup *memcg,
1156 int nid)
1157{
1158 unsigned long ret;
1159
1160 ret = mem_cgroup_get_zonestat_node(memcg, nid, LRU_INACTIVE_ANON) +
1161 mem_cgroup_get_zonestat_node(memcg, nid, LRU_ACTIVE_ANON);
1162
1163 return ret;
1164}
1165
1166static unsigned long mem_cgroup_nr_anon_lru_pages(struct mem_cgroup *memcg) 1170static unsigned long mem_cgroup_nr_anon_lru_pages(struct mem_cgroup *memcg)
1167{ 1171{
1168 u64 total = 0; 1172 u64 total = 0;
@@ -1559,6 +1563,28 @@ mem_cgroup_select_victim(struct mem_cgroup *root_mem)
1559 return ret; 1563 return ret;
1560} 1564}
1561 1565
1566/**
1567 * test_mem_cgroup_node_reclaimable
1568 * @mem: the target memcg
1569 * @nid: the node ID to be checked.
1570 * @noswap : specify true here if the user wants flle only information.
1571 *
1572 * This function returns whether the specified memcg contains any
1573 * reclaimable pages on a node. Returns true if there are any reclaimable
1574 * pages in the node.
1575 */
1576static bool test_mem_cgroup_node_reclaimable(struct mem_cgroup *mem,
1577 int nid, bool noswap)
1578{
1579 if (mem_cgroup_node_nr_file_lru_pages(mem, nid))
1580 return true;
1581 if (noswap || !total_swap_pages)
1582 return false;
1583 if (mem_cgroup_node_nr_anon_lru_pages(mem, nid))
1584 return true;
1585 return false;
1586
1587}
1562#if MAX_NUMNODES > 1 1588#if MAX_NUMNODES > 1
1563 1589
1564/* 1590/*
@@ -1570,26 +1596,26 @@ mem_cgroup_select_victim(struct mem_cgroup *root_mem)
1570static void mem_cgroup_may_update_nodemask(struct mem_cgroup *mem) 1596static void mem_cgroup_may_update_nodemask(struct mem_cgroup *mem)
1571{ 1597{
1572 int nid; 1598 int nid;
1573 1599 /*
1574 if (time_after(mem->next_scan_node_update, jiffies)) 1600 * numainfo_events > 0 means there was at least NUMAINFO_EVENTS_TARGET
1601 * pagein/pageout changes since the last update.
1602 */
1603 if (!atomic_read(&mem->numainfo_events))
1604 return;
1605 if (atomic_inc_return(&mem->numainfo_updating) > 1)
1575 return; 1606 return;
1576 1607
1577 mem->next_scan_node_update = jiffies + 10*HZ;
1578 /* make a nodemask where this memcg uses memory from */ 1608 /* make a nodemask where this memcg uses memory from */
1579 mem->scan_nodes = node_states[N_HIGH_MEMORY]; 1609 mem->scan_nodes = node_states[N_HIGH_MEMORY];
1580 1610
1581 for_each_node_mask(nid, node_states[N_HIGH_MEMORY]) { 1611 for_each_node_mask(nid, node_states[N_HIGH_MEMORY]) {
1582 1612
1583 if (mem_cgroup_get_zonestat_node(mem, nid, LRU_INACTIVE_FILE) || 1613 if (!test_mem_cgroup_node_reclaimable(mem, nid, false))
1584 mem_cgroup_get_zonestat_node(mem, nid, LRU_ACTIVE_FILE)) 1614 node_clear(nid, mem->scan_nodes);
1585 continue;
1586
1587 if (total_swap_pages &&
1588 (mem_cgroup_get_zonestat_node(mem, nid, LRU_INACTIVE_ANON) ||
1589 mem_cgroup_get_zonestat_node(mem, nid, LRU_ACTIVE_ANON)))
1590 continue;
1591 node_clear(nid, mem->scan_nodes);
1592 } 1615 }
1616
1617 atomic_set(&mem->numainfo_events, 0);
1618 atomic_set(&mem->numainfo_updating, 0);
1593} 1619}
1594 1620
1595/* 1621/*
@@ -1627,11 +1653,51 @@ int mem_cgroup_select_victim_node(struct mem_cgroup *mem)
1627 return node; 1653 return node;
1628} 1654}
1629 1655
1656/*
1657 * Check all nodes whether it contains reclaimable pages or not.
1658 * For quick scan, we make use of scan_nodes. This will allow us to skip
1659 * unused nodes. But scan_nodes is lazily updated and may not cotain
1660 * enough new information. We need to do double check.
1661 */
1662bool mem_cgroup_reclaimable(struct mem_cgroup *mem, bool noswap)
1663{
1664 int nid;
1665
1666 /*
1667 * quick check...making use of scan_node.
1668 * We can skip unused nodes.
1669 */
1670 if (!nodes_empty(mem->scan_nodes)) {
1671 for (nid = first_node(mem->scan_nodes);
1672 nid < MAX_NUMNODES;
1673 nid = next_node(nid, mem->scan_nodes)) {
1674
1675 if (test_mem_cgroup_node_reclaimable(mem, nid, noswap))
1676 return true;
1677 }
1678 }
1679 /*
1680 * Check rest of nodes.
1681 */
1682 for_each_node_state(nid, N_HIGH_MEMORY) {
1683 if (node_isset(nid, mem->scan_nodes))
1684 continue;
1685 if (test_mem_cgroup_node_reclaimable(mem, nid, noswap))
1686 return true;
1687 }
1688 return false;
1689}
1690
1630#else 1691#else
1631int mem_cgroup_select_victim_node(struct mem_cgroup *mem) 1692int mem_cgroup_select_victim_node(struct mem_cgroup *mem)
1632{ 1693{
1633 return 0; 1694 return 0;
1634} 1695}
1696
1697bool mem_cgroup_reclaimable(struct mem_cgroup *mem, bool noswap)
1698{
1699 return test_mem_cgroup_node_reclaimable(mem, 0, noswap);
1700}
1635#endif 1701#endif
1636 1702
1637/* 1703/*
@@ -1702,7 +1768,7 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem,
1702 } 1768 }
1703 } 1769 }
1704 } 1770 }
1705 if (!mem_cgroup_local_usage(victim)) { 1771 if (!mem_cgroup_reclaimable(victim, noswap)) {
1706 /* this cgroup's local usage == 0 */ 1772 /* this cgroup's local usage == 0 */
1707 css_put(&victim->css); 1773 css_put(&victim->css);
1708 continue; 1774 continue;
diff --git a/mm/memory.c b/mm/memory.c
index 40b7531ee8ba..9b8a01d941cb 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -305,6 +305,7 @@ int __tlb_remove_page(struct mmu_gather *tlb, struct page *page)
305 if (batch->nr == batch->max) { 305 if (batch->nr == batch->max) {
306 if (!tlb_next_batch(tlb)) 306 if (!tlb_next_batch(tlb))
307 return 0; 307 return 0;
308 batch = tlb->active;
308 } 309 }
309 VM_BUG_ON(batch->nr > batch->max); 310 VM_BUG_ON(batch->nr > batch->max);
310 311
diff --git a/mm/nommu.c b/mm/nommu.c
index 1fd0c51b10a6..9edc897a3970 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -1813,10 +1813,13 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
1813 return NULL; 1813 return NULL;
1814} 1814}
1815 1815
1816int remap_pfn_range(struct vm_area_struct *vma, unsigned long from, 1816int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr,
1817 unsigned long to, unsigned long size, pgprot_t prot) 1817 unsigned long pfn, unsigned long size, pgprot_t prot)
1818{ 1818{
1819 vma->vm_start = vma->vm_pgoff << PAGE_SHIFT; 1819 if (addr != (pfn << PAGE_SHIFT))
1820 return -EINVAL;
1821
1822 vma->vm_flags |= VM_IO | VM_RESERVED | VM_PFNMAP;
1820 return 0; 1823 return 0;
1821} 1824}
1822EXPORT_SYMBOL(remap_pfn_range); 1825EXPORT_SYMBOL(remap_pfn_range);
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 4f49535d4cd3..d036e59d302b 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2310,7 +2310,8 @@ static bool pgdat_balanced(pg_data_t *pgdat, unsigned long balanced_pages,
2310 for (i = 0; i <= classzone_idx; i++) 2310 for (i = 0; i <= classzone_idx; i++)
2311 present_pages += pgdat->node_zones[i].present_pages; 2311 present_pages += pgdat->node_zones[i].present_pages;
2312 2312
2313 return balanced_pages > (present_pages >> 2); 2313 /* A special case here: if zone has no page, we think it's balanced */
2314 return balanced_pages >= (present_pages >> 2);
2314} 2315}
2315 2316
2316/* is kswapd sleeping prematurely? */ 2317/* is kswapd sleeping prematurely? */
@@ -2326,7 +2327,7 @@ static bool sleeping_prematurely(pg_data_t *pgdat, int order, long remaining,
2326 return true; 2327 return true;
2327 2328
2328 /* Check the watermark levels */ 2329 /* Check the watermark levels */
2329 for (i = 0; i < pgdat->nr_zones; i++) { 2330 for (i = 0; i <= classzone_idx; i++) {
2330 struct zone *zone = pgdat->node_zones + i; 2331 struct zone *zone = pgdat->node_zones + i;
2331 2332
2332 if (!populated_zone(zone)) 2333 if (!populated_zone(zone))
@@ -2344,7 +2345,7 @@ static bool sleeping_prematurely(pg_data_t *pgdat, int order, long remaining,
2344 } 2345 }
2345 2346
2346 if (!zone_watermark_ok_safe(zone, order, high_wmark_pages(zone), 2347 if (!zone_watermark_ok_safe(zone, order, high_wmark_pages(zone),
2347 classzone_idx, 0)) 2348 i, 0))
2348 all_zones_ok = false; 2349 all_zones_ok = false;
2349 else 2350 else
2350 balanced += zone->present_pages; 2351 balanced += zone->present_pages;
@@ -2451,7 +2452,6 @@ loop_again:
2451 if (!zone_watermark_ok_safe(zone, order, 2452 if (!zone_watermark_ok_safe(zone, order,
2452 high_wmark_pages(zone), 0, 0)) { 2453 high_wmark_pages(zone), 0, 0)) {
2453 end_zone = i; 2454 end_zone = i;
2454 *classzone_idx = i;
2455 break; 2455 break;
2456 } 2456 }
2457 } 2457 }
@@ -2510,18 +2510,18 @@ loop_again:
2510 KSWAPD_ZONE_BALANCE_GAP_RATIO); 2510 KSWAPD_ZONE_BALANCE_GAP_RATIO);
2511 if (!zone_watermark_ok_safe(zone, order, 2511 if (!zone_watermark_ok_safe(zone, order,
2512 high_wmark_pages(zone) + balance_gap, 2512 high_wmark_pages(zone) + balance_gap,
2513 end_zone, 0)) 2513 end_zone, 0)) {
2514 shrink_zone(priority, zone, &sc); 2514 shrink_zone(priority, zone, &sc);
2515 reclaim_state->reclaimed_slab = 0;
2516 nr_slab = shrink_slab(&shrink, sc.nr_scanned, lru_pages);
2517 sc.nr_reclaimed += reclaim_state->reclaimed_slab;
2518 total_scanned += sc.nr_scanned;
2519 2515
2520 if (zone->all_unreclaimable) 2516 reclaim_state->reclaimed_slab = 0;
2521 continue; 2517 nr_slab = shrink_slab(&shrink, sc.nr_scanned, lru_pages);
2522 if (nr_slab == 0 && 2518 sc.nr_reclaimed += reclaim_state->reclaimed_slab;
2523 !zone_reclaimable(zone)) 2519 total_scanned += sc.nr_scanned;
2524 zone->all_unreclaimable = 1; 2520
2521 if (nr_slab == 0 && !zone_reclaimable(zone))
2522 zone->all_unreclaimable = 1;
2523 }
2524
2525 /* 2525 /*
2526 * If we've done a decent amount of scanning and 2526 * If we've done a decent amount of scanning and
2527 * the reclaim ratio is low, start doing writepage 2527 * the reclaim ratio is low, start doing writepage
@@ -2531,6 +2531,12 @@ loop_again:
2531 total_scanned > sc.nr_reclaimed + sc.nr_reclaimed / 2) 2531 total_scanned > sc.nr_reclaimed + sc.nr_reclaimed / 2)
2532 sc.may_writepage = 1; 2532 sc.may_writepage = 1;
2533 2533
2534 if (zone->all_unreclaimable) {
2535 if (end_zone && end_zone == i)
2536 end_zone--;
2537 continue;
2538 }
2539
2534 if (!zone_watermark_ok_safe(zone, order, 2540 if (!zone_watermark_ok_safe(zone, order,
2535 high_wmark_pages(zone), end_zone, 0)) { 2541 high_wmark_pages(zone), end_zone, 0)) {
2536 all_zones_ok = 0; 2542 all_zones_ok = 0;
@@ -2709,8 +2715,8 @@ static void kswapd_try_to_sleep(pg_data_t *pgdat, int order, int classzone_idx)
2709 */ 2715 */
2710static int kswapd(void *p) 2716static int kswapd(void *p)
2711{ 2717{
2712 unsigned long order; 2718 unsigned long order, new_order;
2713 int classzone_idx; 2719 int classzone_idx, new_classzone_idx;
2714 pg_data_t *pgdat = (pg_data_t*)p; 2720 pg_data_t *pgdat = (pg_data_t*)p;
2715 struct task_struct *tsk = current; 2721 struct task_struct *tsk = current;
2716 2722
@@ -2740,17 +2746,23 @@ static int kswapd(void *p)
2740 tsk->flags |= PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD; 2746 tsk->flags |= PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD;
2741 set_freezable(); 2747 set_freezable();
2742 2748
2743 order = 0; 2749 order = new_order = 0;
2744 classzone_idx = MAX_NR_ZONES - 1; 2750 classzone_idx = new_classzone_idx = pgdat->nr_zones - 1;
2745 for ( ; ; ) { 2751 for ( ; ; ) {
2746 unsigned long new_order;
2747 int new_classzone_idx;
2748 int ret; 2752 int ret;
2749 2753
2750 new_order = pgdat->kswapd_max_order; 2754 /*
2751 new_classzone_idx = pgdat->classzone_idx; 2755 * If the last balance_pgdat was unsuccessful it's unlikely a
2752 pgdat->kswapd_max_order = 0; 2756 * new request of a similar or harder type will succeed soon
2753 pgdat->classzone_idx = MAX_NR_ZONES - 1; 2757 * so consider going to sleep on the basis we reclaimed at
2758 */
2759 if (classzone_idx >= new_classzone_idx && order == new_order) {
2760 new_order = pgdat->kswapd_max_order;
2761 new_classzone_idx = pgdat->classzone_idx;
2762 pgdat->kswapd_max_order = 0;
2763 pgdat->classzone_idx = pgdat->nr_zones - 1;
2764 }
2765
2754 if (order < new_order || classzone_idx > new_classzone_idx) { 2766 if (order < new_order || classzone_idx > new_classzone_idx) {
2755 /* 2767 /*
2756 * Don't sleep if someone wants a larger 'order' 2768 * Don't sleep if someone wants a larger 'order'
@@ -2763,7 +2775,7 @@ static int kswapd(void *p)
2763 order = pgdat->kswapd_max_order; 2775 order = pgdat->kswapd_max_order;
2764 classzone_idx = pgdat->classzone_idx; 2776 classzone_idx = pgdat->classzone_idx;
2765 pgdat->kswapd_max_order = 0; 2777 pgdat->kswapd_max_order = 0;
2766 pgdat->classzone_idx = MAX_NR_ZONES - 1; 2778 pgdat->classzone_idx = pgdat->nr_zones - 1;
2767 } 2779 }
2768 2780
2769 ret = try_to_freeze(); 2781 ret = try_to_freeze();