aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/hugetlb.c2
-rw-r--r--mm/mempolicy.c2
-rw-r--r--mm/mmap.c6
-rw-r--r--mm/nommu.c2
-rw-r--r--mm/page_alloc.c13
-rw-r--r--mm/slab.c14
-rw-r--r--mm/slub.c15
-rw-r--r--mm/sparse.c14
-rw-r--r--mm/vmscan.c69
9 files changed, 113 insertions, 24 deletions
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index d7ca59d66c59..de4cf458d6e1 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -643,7 +643,7 @@ int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
643 spin_unlock(&mm->page_table_lock); 643 spin_unlock(&mm->page_table_lock);
644 ret = hugetlb_fault(mm, vma, vaddr, 0); 644 ret = hugetlb_fault(mm, vma, vaddr, 0);
645 spin_lock(&mm->page_table_lock); 645 spin_lock(&mm->page_table_lock);
646 if (!(ret & VM_FAULT_MAJOR)) 646 if (!(ret & VM_FAULT_ERROR))
647 continue; 647 continue;
648 648
649 remainder = 0; 649 remainder = 0;
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 71b84b45154a..172abffeb2e3 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -149,7 +149,7 @@ static struct zonelist *bind_zonelist(nodemask_t *nodes)
149 lower zones etc. Avoid empty zones because the memory allocator 149 lower zones etc. Avoid empty zones because the memory allocator
150 doesn't like them. If you implement node hot removal you 150 doesn't like them. If you implement node hot removal you
151 have to fix that. */ 151 have to fix that. */
152 k = policy_zone; 152 k = MAX_NR_ZONES - 1;
153 while (1) { 153 while (1) {
154 for_each_node_mask(nd, *nodes) { 154 for_each_node_mask(nd, *nodes) {
155 struct zone *z = &NODE_DATA(nd)->node_zones[k]; 155 struct zone *z = &NODE_DATA(nd)->node_zones[k];
diff --git a/mm/mmap.c b/mm/mmap.c
index b6537211b9cc..0d40e66c841b 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -93,7 +93,7 @@ atomic_t vm_committed_space = ATOMIC_INIT(0);
93 * Note this is a helper function intended to be used by LSMs which 93 * Note this is a helper function intended to be used by LSMs which
94 * wish to use this logic. 94 * wish to use this logic.
95 */ 95 */
96int __vm_enough_memory(long pages, int cap_sys_admin) 96int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin)
97{ 97{
98 unsigned long free, allowed; 98 unsigned long free, allowed;
99 99
@@ -166,7 +166,7 @@ int __vm_enough_memory(long pages, int cap_sys_admin)
166 166
167 /* Don't let a single process grow too big: 167 /* Don't let a single process grow too big:
168 leave 3% of the size of this process for other processes */ 168 leave 3% of the size of this process for other processes */
169 allowed -= current->mm->total_vm / 32; 169 allowed -= mm->total_vm / 32;
170 170
171 /* 171 /*
172 * cast `allowed' as a signed long because vm_committed_space 172 * cast `allowed' as a signed long because vm_committed_space
@@ -2077,7 +2077,7 @@ int insert_vm_struct(struct mm_struct * mm, struct vm_area_struct * vma)
2077 if (__vma && __vma->vm_start < vma->vm_end) 2077 if (__vma && __vma->vm_start < vma->vm_end)
2078 return -ENOMEM; 2078 return -ENOMEM;
2079 if ((vma->vm_flags & VM_ACCOUNT) && 2079 if ((vma->vm_flags & VM_ACCOUNT) &&
2080 security_vm_enough_memory(vma_pages(vma))) 2080 security_vm_enough_memory_mm(mm, vma_pages(vma)))
2081 return -ENOMEM; 2081 return -ENOMEM;
2082 vma_link(mm, vma, prev, rb_link, rb_parent); 2082 vma_link(mm, vma, prev, rb_link, rb_parent);
2083 return 0; 2083 return 0;
diff --git a/mm/nommu.c b/mm/nommu.c
index 9eef6a398555..8ed0cb43118a 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -1270,7 +1270,7 @@ EXPORT_SYMBOL(get_unmapped_area);
1270 * Note this is a helper function intended to be used by LSMs which 1270 * Note this is a helper function intended to be used by LSMs which
1271 * wish to use this logic. 1271 * wish to use this logic.
1272 */ 1272 */
1273int __vm_enough_memory(long pages, int cap_sys_admin) 1273int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin)
1274{ 1274{
1275 unsigned long free, allowed; 1275 unsigned long free, allowed;
1276 1276
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 3da85b81dabb..6427653023aa 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1157,6 +1157,7 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order,
1157 nodemask_t *allowednodes = NULL;/* zonelist_cache approximation */ 1157 nodemask_t *allowednodes = NULL;/* zonelist_cache approximation */
1158 int zlc_active = 0; /* set if using zonelist_cache */ 1158 int zlc_active = 0; /* set if using zonelist_cache */
1159 int did_zlc_setup = 0; /* just call zlc_setup() one time */ 1159 int did_zlc_setup = 0; /* just call zlc_setup() one time */
1160 enum zone_type highest_zoneidx = -1; /* Gets set for policy zonelists */
1160 1161
1161zonelist_scan: 1162zonelist_scan:
1162 /* 1163 /*
@@ -1166,6 +1167,18 @@ zonelist_scan:
1166 z = zonelist->zones; 1167 z = zonelist->zones;
1167 1168
1168 do { 1169 do {
1170 /*
1171 * In NUMA, this could be a policy zonelist which contains
1172 * zones that may not be allowed by the current gfp_mask.
1173 * Check the zone is allowed by the current flags
1174 */
1175 if (unlikely(alloc_should_filter_zonelist(zonelist))) {
1176 if (highest_zoneidx == -1)
1177 highest_zoneidx = gfp_zone(gfp_mask);
1178 if (zone_idx(*z) > highest_zoneidx)
1179 continue;
1180 }
1181
1169 if (NUMA_BUILD && zlc_active && 1182 if (NUMA_BUILD && zlc_active &&
1170 !zlc_zone_worth_trying(zonelist, z, allowednodes)) 1183 !zlc_zone_worth_trying(zonelist, z, allowednodes))
1171 continue; 1184 continue;
diff --git a/mm/slab.c b/mm/slab.c
index a684778b2b41..6f6abef83a1a 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -883,6 +883,7 @@ static void __slab_error(const char *function, struct kmem_cache *cachep,
883 */ 883 */
884 884
885static int use_alien_caches __read_mostly = 1; 885static int use_alien_caches __read_mostly = 1;
886static int numa_platform __read_mostly = 1;
886static int __init noaliencache_setup(char *s) 887static int __init noaliencache_setup(char *s)
887{ 888{
888 use_alien_caches = 0; 889 use_alien_caches = 0;
@@ -1399,8 +1400,10 @@ void __init kmem_cache_init(void)
1399 int order; 1400 int order;
1400 int node; 1401 int node;
1401 1402
1402 if (num_possible_nodes() == 1) 1403 if (num_possible_nodes() == 1) {
1403 use_alien_caches = 0; 1404 use_alien_caches = 0;
1405 numa_platform = 0;
1406 }
1404 1407
1405 for (i = 0; i < NUM_INIT_LISTS; i++) { 1408 for (i = 0; i < NUM_INIT_LISTS; i++) {
1406 kmem_list3_init(&initkmem_list3[i]); 1409 kmem_list3_init(&initkmem_list3[i]);
@@ -3558,7 +3561,14 @@ static inline void __cache_free(struct kmem_cache *cachep, void *objp)
3558 check_irq_off(); 3561 check_irq_off();
3559 objp = cache_free_debugcheck(cachep, objp, __builtin_return_address(0)); 3562 objp = cache_free_debugcheck(cachep, objp, __builtin_return_address(0));
3560 3563
3561 if (cache_free_alien(cachep, objp)) 3564 /*
3565 * Skip calling cache_free_alien() when the platform is not numa.
3566 * This will avoid cache misses that happen while accessing slabp (which
3567 * is per page memory reference) to get nodeid. Instead use a global
3568 * variable to skip the call, which is mostly likely to be present in
3569 * the cache.
3570 */
3571 if (numa_platform && cache_free_alien(cachep, objp))
3562 return; 3572 return;
3563 3573
3564 if (likely(ac->avail < ac->limit)) { 3574 if (likely(ac->avail < ac->limit)) {
diff --git a/mm/slub.c b/mm/slub.c
index 69d02e3e439e..04151da399c6 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1877,9 +1877,16 @@ static struct kmem_cache_node * __init early_kmem_cache_node_alloc(gfp_t gfpflag
1877 1877
1878 BUG_ON(kmalloc_caches->size < sizeof(struct kmem_cache_node)); 1878 BUG_ON(kmalloc_caches->size < sizeof(struct kmem_cache_node));
1879 1879
1880 page = new_slab(kmalloc_caches, gfpflags | GFP_THISNODE, node); 1880 page = new_slab(kmalloc_caches, gfpflags, node);
1881 1881
1882 BUG_ON(!page); 1882 BUG_ON(!page);
1883 if (page_to_nid(page) != node) {
1884 printk(KERN_ERR "SLUB: Unable to allocate memory from "
1885 "node %d\n", node);
1886 printk(KERN_ERR "SLUB: Allocating a useless per node structure "
1887 "in order to be able to continue\n");
1888 }
1889
1883 n = page->freelist; 1890 n = page->freelist;
1884 BUG_ON(!n); 1891 BUG_ON(!n);
1885 page->freelist = get_freepointer(kmalloc_caches, n); 1892 page->freelist = get_freepointer(kmalloc_caches, n);
@@ -3112,7 +3119,7 @@ static int list_locations(struct kmem_cache *s, char *buf,
3112 unsigned long flags; 3119 unsigned long flags;
3113 struct page *page; 3120 struct page *page;
3114 3121
3115 if (!atomic_read(&n->nr_slabs)) 3122 if (!atomic_long_read(&n->nr_slabs))
3116 continue; 3123 continue;
3117 3124
3118 spin_lock_irqsave(&n->list_lock, flags); 3125 spin_lock_irqsave(&n->list_lock, flags);
@@ -3247,7 +3254,7 @@ static unsigned long slab_objects(struct kmem_cache *s,
3247 } 3254 }
3248 3255
3249 if (flags & SO_FULL) { 3256 if (flags & SO_FULL) {
3250 int full_slabs = atomic_read(&n->nr_slabs) 3257 int full_slabs = atomic_long_read(&n->nr_slabs)
3251 - per_cpu[node] 3258 - per_cpu[node]
3252 - n->nr_partial; 3259 - n->nr_partial;
3253 3260
@@ -3283,7 +3290,7 @@ static int any_slab_objects(struct kmem_cache *s)
3283 for_each_node(node) { 3290 for_each_node(node) {
3284 struct kmem_cache_node *n = get_node(s, node); 3291 struct kmem_cache_node *n = get_node(s, node);
3285 3292
3286 if (n->nr_partial || atomic_read(&n->nr_slabs)) 3293 if (n->nr_partial || atomic_long_read(&n->nr_slabs))
3287 return 1; 3294 return 1;
3288 } 3295 }
3289 return 0; 3296 return 0;
diff --git a/mm/sparse.c b/mm/sparse.c
index 3047bf06c1f3..239f5a720d38 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -41,6 +41,15 @@ int page_to_nid(struct page *page)
41 return section_to_node_table[page_to_section(page)]; 41 return section_to_node_table[page_to_section(page)];
42} 42}
43EXPORT_SYMBOL(page_to_nid); 43EXPORT_SYMBOL(page_to_nid);
44
45static void set_section_nid(unsigned long section_nr, int nid)
46{
47 section_to_node_table[section_nr] = nid;
48}
49#else /* !NODE_NOT_IN_PAGE_FLAGS */
50static inline void set_section_nid(unsigned long section_nr, int nid)
51{
52}
44#endif 53#endif
45 54
46#ifdef CONFIG_SPARSEMEM_EXTREME 55#ifdef CONFIG_SPARSEMEM_EXTREME
@@ -68,10 +77,6 @@ static int __meminit sparse_index_init(unsigned long section_nr, int nid)
68 struct mem_section *section; 77 struct mem_section *section;
69 int ret = 0; 78 int ret = 0;
70 79
71#ifdef NODE_NOT_IN_PAGE_FLAGS
72 section_to_node_table[section_nr] = nid;
73#endif
74
75 if (mem_section[root]) 80 if (mem_section[root])
76 return -EEXIST; 81 return -EEXIST;
77 82
@@ -148,6 +153,7 @@ void __init memory_present(int nid, unsigned long start, unsigned long end)
148 struct mem_section *ms; 153 struct mem_section *ms;
149 154
150 sparse_index_init(section, nid); 155 sparse_index_init(section, nid);
156 set_section_nid(section, nid);
151 157
152 ms = __nr_to_section(section); 158 ms = __nr_to_section(section);
153 if (!ms->section_mem_map) 159 if (!ms->section_mem_map)
diff --git a/mm/vmscan.c b/mm/vmscan.c
index d419e10e3daa..a6e65d024995 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -271,6 +271,12 @@ static void handle_write_error(struct address_space *mapping,
271 unlock_page(page); 271 unlock_page(page);
272} 272}
273 273
274/* Request for sync pageout. */
275enum pageout_io {
276 PAGEOUT_IO_ASYNC,
277 PAGEOUT_IO_SYNC,
278};
279
274/* possible outcome of pageout() */ 280/* possible outcome of pageout() */
275typedef enum { 281typedef enum {
276 /* failed to write page out, page is locked */ 282 /* failed to write page out, page is locked */
@@ -287,7 +293,8 @@ typedef enum {
287 * pageout is called by shrink_page_list() for each dirty page. 293 * pageout is called by shrink_page_list() for each dirty page.
288 * Calls ->writepage(). 294 * Calls ->writepage().
289 */ 295 */
290static pageout_t pageout(struct page *page, struct address_space *mapping) 296static pageout_t pageout(struct page *page, struct address_space *mapping,
297 enum pageout_io sync_writeback)
291{ 298{
292 /* 299 /*
293 * If the page is dirty, only perform writeback if that write 300 * If the page is dirty, only perform writeback if that write
@@ -346,6 +353,15 @@ static pageout_t pageout(struct page *page, struct address_space *mapping)
346 ClearPageReclaim(page); 353 ClearPageReclaim(page);
347 return PAGE_ACTIVATE; 354 return PAGE_ACTIVATE;
348 } 355 }
356
357 /*
358 * Wait on writeback if requested to. This happens when
359 * direct reclaiming a large contiguous area and the
360 * first attempt to free a range of pages fails.
361 */
362 if (PageWriteback(page) && sync_writeback == PAGEOUT_IO_SYNC)
363 wait_on_page_writeback(page);
364
349 if (!PageWriteback(page)) { 365 if (!PageWriteback(page)) {
350 /* synchronous write or broken a_ops? */ 366 /* synchronous write or broken a_ops? */
351 ClearPageReclaim(page); 367 ClearPageReclaim(page);
@@ -423,7 +439,8 @@ cannot_free:
423 * shrink_page_list() returns the number of reclaimed pages 439 * shrink_page_list() returns the number of reclaimed pages
424 */ 440 */
425static unsigned long shrink_page_list(struct list_head *page_list, 441static unsigned long shrink_page_list(struct list_head *page_list,
426 struct scan_control *sc) 442 struct scan_control *sc,
443 enum pageout_io sync_writeback)
427{ 444{
428 LIST_HEAD(ret_pages); 445 LIST_HEAD(ret_pages);
429 struct pagevec freed_pvec; 446 struct pagevec freed_pvec;
@@ -458,8 +475,23 @@ static unsigned long shrink_page_list(struct list_head *page_list,
458 if (page_mapped(page) || PageSwapCache(page)) 475 if (page_mapped(page) || PageSwapCache(page))
459 sc->nr_scanned++; 476 sc->nr_scanned++;
460 477
461 if (PageWriteback(page)) 478 may_enter_fs = (sc->gfp_mask & __GFP_FS) ||
462 goto keep_locked; 479 (PageSwapCache(page) && (sc->gfp_mask & __GFP_IO));
480
481 if (PageWriteback(page)) {
482 /*
483 * Synchronous reclaim is performed in two passes,
484 * first an asynchronous pass over the list to
485 * start parallel writeback, and a second synchronous
486 * pass to wait for the IO to complete. Wait here
487 * for any page for which writeback has already
488 * started.
489 */
490 if (sync_writeback == PAGEOUT_IO_SYNC && may_enter_fs)
491 wait_on_page_writeback(page);
492 else
493 goto keep_locked;
494 }
463 495
464 referenced = page_referenced(page, 1); 496 referenced = page_referenced(page, 1);
465 /* In active use or really unfreeable? Activate it. */ 497 /* In active use or really unfreeable? Activate it. */
@@ -478,8 +510,6 @@ static unsigned long shrink_page_list(struct list_head *page_list,
478#endif /* CONFIG_SWAP */ 510#endif /* CONFIG_SWAP */
479 511
480 mapping = page_mapping(page); 512 mapping = page_mapping(page);
481 may_enter_fs = (sc->gfp_mask & __GFP_FS) ||
482 (PageSwapCache(page) && (sc->gfp_mask & __GFP_IO));
483 513
484 /* 514 /*
485 * The page is mapped into the page tables of one or more 515 * The page is mapped into the page tables of one or more
@@ -505,7 +535,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
505 goto keep_locked; 535 goto keep_locked;
506 536
507 /* Page is dirty, try to write it out here */ 537 /* Page is dirty, try to write it out here */
508 switch(pageout(page, mapping)) { 538 switch (pageout(page, mapping, sync_writeback)) {
509 case PAGE_KEEP: 539 case PAGE_KEEP:
510 goto keep_locked; 540 goto keep_locked;
511 case PAGE_ACTIVATE: 541 case PAGE_ACTIVATE:
@@ -777,6 +807,7 @@ static unsigned long shrink_inactive_list(unsigned long max_scan,
777 (sc->order > PAGE_ALLOC_COSTLY_ORDER)? 807 (sc->order > PAGE_ALLOC_COSTLY_ORDER)?
778 ISOLATE_BOTH : ISOLATE_INACTIVE); 808 ISOLATE_BOTH : ISOLATE_INACTIVE);
779 nr_active = clear_active_flags(&page_list); 809 nr_active = clear_active_flags(&page_list);
810 __count_vm_events(PGDEACTIVATE, nr_active);
780 811
781 __mod_zone_page_state(zone, NR_ACTIVE, -nr_active); 812 __mod_zone_page_state(zone, NR_ACTIVE, -nr_active);
782 __mod_zone_page_state(zone, NR_INACTIVE, 813 __mod_zone_page_state(zone, NR_INACTIVE,
@@ -785,7 +816,29 @@ static unsigned long shrink_inactive_list(unsigned long max_scan,
785 spin_unlock_irq(&zone->lru_lock); 816 spin_unlock_irq(&zone->lru_lock);
786 817
787 nr_scanned += nr_scan; 818 nr_scanned += nr_scan;
788 nr_freed = shrink_page_list(&page_list, sc); 819 nr_freed = shrink_page_list(&page_list, sc, PAGEOUT_IO_ASYNC);
820
821 /*
822 * If we are direct reclaiming for contiguous pages and we do
823 * not reclaim everything in the list, try again and wait
824 * for IO to complete. This will stall high-order allocations
825 * but that should be acceptable to the caller
826 */
827 if (nr_freed < nr_taken && !current_is_kswapd() &&
828 sc->order > PAGE_ALLOC_COSTLY_ORDER) {
829 congestion_wait(WRITE, HZ/10);
830
831 /*
832 * The attempt at page out may have made some
833 * of the pages active, mark them inactive again.
834 */
835 nr_active = clear_active_flags(&page_list);
836 count_vm_events(PGDEACTIVATE, nr_active);
837
838 nr_freed += shrink_page_list(&page_list, sc,
839 PAGEOUT_IO_SYNC);
840 }
841
789 nr_reclaimed += nr_freed; 842 nr_reclaimed += nr_freed;
790 local_irq_disable(); 843 local_irq_disable();
791 if (current_is_kswapd()) { 844 if (current_is_kswapd()) {