diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/hugetlb.c | 2 | ||||
-rw-r--r-- | mm/mempolicy.c | 2 | ||||
-rw-r--r-- | mm/mmap.c | 6 | ||||
-rw-r--r-- | mm/nommu.c | 2 | ||||
-rw-r--r-- | mm/page_alloc.c | 13 | ||||
-rw-r--r-- | mm/slab.c | 14 | ||||
-rw-r--r-- | mm/slub.c | 15 | ||||
-rw-r--r-- | mm/sparse.c | 14 | ||||
-rw-r--r-- | mm/vmscan.c | 69 |
9 files changed, 113 insertions, 24 deletions
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index d7ca59d66c59..de4cf458d6e1 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c | |||
@@ -643,7 +643,7 @@ int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
643 | spin_unlock(&mm->page_table_lock); | 643 | spin_unlock(&mm->page_table_lock); |
644 | ret = hugetlb_fault(mm, vma, vaddr, 0); | 644 | ret = hugetlb_fault(mm, vma, vaddr, 0); |
645 | spin_lock(&mm->page_table_lock); | 645 | spin_lock(&mm->page_table_lock); |
646 | if (!(ret & VM_FAULT_MAJOR)) | 646 | if (!(ret & VM_FAULT_ERROR)) |
647 | continue; | 647 | continue; |
648 | 648 | ||
649 | remainder = 0; | 649 | remainder = 0; |
diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 71b84b45154a..172abffeb2e3 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c | |||
@@ -149,7 +149,7 @@ static struct zonelist *bind_zonelist(nodemask_t *nodes) | |||
149 | lower zones etc. Avoid empty zones because the memory allocator | 149 | lower zones etc. Avoid empty zones because the memory allocator |
150 | doesn't like them. If you implement node hot removal you | 150 | doesn't like them. If you implement node hot removal you |
151 | have to fix that. */ | 151 | have to fix that. */ |
152 | k = policy_zone; | 152 | k = MAX_NR_ZONES - 1; |
153 | while (1) { | 153 | while (1) { |
154 | for_each_node_mask(nd, *nodes) { | 154 | for_each_node_mask(nd, *nodes) { |
155 | struct zone *z = &NODE_DATA(nd)->node_zones[k]; | 155 | struct zone *z = &NODE_DATA(nd)->node_zones[k]; |
@@ -93,7 +93,7 @@ atomic_t vm_committed_space = ATOMIC_INIT(0); | |||
93 | * Note this is a helper function intended to be used by LSMs which | 93 | * Note this is a helper function intended to be used by LSMs which |
94 | * wish to use this logic. | 94 | * wish to use this logic. |
95 | */ | 95 | */ |
96 | int __vm_enough_memory(long pages, int cap_sys_admin) | 96 | int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin) |
97 | { | 97 | { |
98 | unsigned long free, allowed; | 98 | unsigned long free, allowed; |
99 | 99 | ||
@@ -166,7 +166,7 @@ int __vm_enough_memory(long pages, int cap_sys_admin) | |||
166 | 166 | ||
167 | /* Don't let a single process grow too big: | 167 | /* Don't let a single process grow too big: |
168 | leave 3% of the size of this process for other processes */ | 168 | leave 3% of the size of this process for other processes */ |
169 | allowed -= current->mm->total_vm / 32; | 169 | allowed -= mm->total_vm / 32; |
170 | 170 | ||
171 | /* | 171 | /* |
172 | * cast `allowed' as a signed long because vm_committed_space | 172 | * cast `allowed' as a signed long because vm_committed_space |
@@ -2077,7 +2077,7 @@ int insert_vm_struct(struct mm_struct * mm, struct vm_area_struct * vma) | |||
2077 | if (__vma && __vma->vm_start < vma->vm_end) | 2077 | if (__vma && __vma->vm_start < vma->vm_end) |
2078 | return -ENOMEM; | 2078 | return -ENOMEM; |
2079 | if ((vma->vm_flags & VM_ACCOUNT) && | 2079 | if ((vma->vm_flags & VM_ACCOUNT) && |
2080 | security_vm_enough_memory(vma_pages(vma))) | 2080 | security_vm_enough_memory_mm(mm, vma_pages(vma))) |
2081 | return -ENOMEM; | 2081 | return -ENOMEM; |
2082 | vma_link(mm, vma, prev, rb_link, rb_parent); | 2082 | vma_link(mm, vma, prev, rb_link, rb_parent); |
2083 | return 0; | 2083 | return 0; |
diff --git a/mm/nommu.c b/mm/nommu.c index 9eef6a398555..8ed0cb43118a 100644 --- a/mm/nommu.c +++ b/mm/nommu.c | |||
@@ -1270,7 +1270,7 @@ EXPORT_SYMBOL(get_unmapped_area); | |||
1270 | * Note this is a helper function intended to be used by LSMs which | 1270 | * Note this is a helper function intended to be used by LSMs which |
1271 | * wish to use this logic. | 1271 | * wish to use this logic. |
1272 | */ | 1272 | */ |
1273 | int __vm_enough_memory(long pages, int cap_sys_admin) | 1273 | int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin) |
1274 | { | 1274 | { |
1275 | unsigned long free, allowed; | 1275 | unsigned long free, allowed; |
1276 | 1276 | ||
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 3da85b81dabb..6427653023aa 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -1157,6 +1157,7 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, | |||
1157 | nodemask_t *allowednodes = NULL;/* zonelist_cache approximation */ | 1157 | nodemask_t *allowednodes = NULL;/* zonelist_cache approximation */ |
1158 | int zlc_active = 0; /* set if using zonelist_cache */ | 1158 | int zlc_active = 0; /* set if using zonelist_cache */ |
1159 | int did_zlc_setup = 0; /* just call zlc_setup() one time */ | 1159 | int did_zlc_setup = 0; /* just call zlc_setup() one time */ |
1160 | enum zone_type highest_zoneidx = -1; /* Gets set for policy zonelists */ | ||
1160 | 1161 | ||
1161 | zonelist_scan: | 1162 | zonelist_scan: |
1162 | /* | 1163 | /* |
@@ -1166,6 +1167,18 @@ zonelist_scan: | |||
1166 | z = zonelist->zones; | 1167 | z = zonelist->zones; |
1167 | 1168 | ||
1168 | do { | 1169 | do { |
1170 | /* | ||
1171 | * In NUMA, this could be a policy zonelist which contains | ||
1172 | * zones that may not be allowed by the current gfp_mask. | ||
1173 | * Check the zone is allowed by the current flags | ||
1174 | */ | ||
1175 | if (unlikely(alloc_should_filter_zonelist(zonelist))) { | ||
1176 | if (highest_zoneidx == -1) | ||
1177 | highest_zoneidx = gfp_zone(gfp_mask); | ||
1178 | if (zone_idx(*z) > highest_zoneidx) | ||
1179 | continue; | ||
1180 | } | ||
1181 | |||
1169 | if (NUMA_BUILD && zlc_active && | 1182 | if (NUMA_BUILD && zlc_active && |
1170 | !zlc_zone_worth_trying(zonelist, z, allowednodes)) | 1183 | !zlc_zone_worth_trying(zonelist, z, allowednodes)) |
1171 | continue; | 1184 | continue; |
@@ -883,6 +883,7 @@ static void __slab_error(const char *function, struct kmem_cache *cachep, | |||
883 | */ | 883 | */ |
884 | 884 | ||
885 | static int use_alien_caches __read_mostly = 1; | 885 | static int use_alien_caches __read_mostly = 1; |
886 | static int numa_platform __read_mostly = 1; | ||
886 | static int __init noaliencache_setup(char *s) | 887 | static int __init noaliencache_setup(char *s) |
887 | { | 888 | { |
888 | use_alien_caches = 0; | 889 | use_alien_caches = 0; |
@@ -1399,8 +1400,10 @@ void __init kmem_cache_init(void) | |||
1399 | int order; | 1400 | int order; |
1400 | int node; | 1401 | int node; |
1401 | 1402 | ||
1402 | if (num_possible_nodes() == 1) | 1403 | if (num_possible_nodes() == 1) { |
1403 | use_alien_caches = 0; | 1404 | use_alien_caches = 0; |
1405 | numa_platform = 0; | ||
1406 | } | ||
1404 | 1407 | ||
1405 | for (i = 0; i < NUM_INIT_LISTS; i++) { | 1408 | for (i = 0; i < NUM_INIT_LISTS; i++) { |
1406 | kmem_list3_init(&initkmem_list3[i]); | 1409 | kmem_list3_init(&initkmem_list3[i]); |
@@ -3558,7 +3561,14 @@ static inline void __cache_free(struct kmem_cache *cachep, void *objp) | |||
3558 | check_irq_off(); | 3561 | check_irq_off(); |
3559 | objp = cache_free_debugcheck(cachep, objp, __builtin_return_address(0)); | 3562 | objp = cache_free_debugcheck(cachep, objp, __builtin_return_address(0)); |
3560 | 3563 | ||
3561 | if (cache_free_alien(cachep, objp)) | 3564 | /* |
3565 | * Skip calling cache_free_alien() when the platform is not numa. | ||
3566 | * This will avoid cache misses that happen while accessing slabp (which | ||
3567 | * is per page memory reference) to get nodeid. Instead use a global | ||
3568 | * variable to skip the call, which is mostly likely to be present in | ||
3569 | * the cache. | ||
3570 | */ | ||
3571 | if (numa_platform && cache_free_alien(cachep, objp)) | ||
3562 | return; | 3572 | return; |
3563 | 3573 | ||
3564 | if (likely(ac->avail < ac->limit)) { | 3574 | if (likely(ac->avail < ac->limit)) { |
@@ -1877,9 +1877,16 @@ static struct kmem_cache_node * __init early_kmem_cache_node_alloc(gfp_t gfpflag | |||
1877 | 1877 | ||
1878 | BUG_ON(kmalloc_caches->size < sizeof(struct kmem_cache_node)); | 1878 | BUG_ON(kmalloc_caches->size < sizeof(struct kmem_cache_node)); |
1879 | 1879 | ||
1880 | page = new_slab(kmalloc_caches, gfpflags | GFP_THISNODE, node); | 1880 | page = new_slab(kmalloc_caches, gfpflags, node); |
1881 | 1881 | ||
1882 | BUG_ON(!page); | 1882 | BUG_ON(!page); |
1883 | if (page_to_nid(page) != node) { | ||
1884 | printk(KERN_ERR "SLUB: Unable to allocate memory from " | ||
1885 | "node %d\n", node); | ||
1886 | printk(KERN_ERR "SLUB: Allocating a useless per node structure " | ||
1887 | "in order to be able to continue\n"); | ||
1888 | } | ||
1889 | |||
1883 | n = page->freelist; | 1890 | n = page->freelist; |
1884 | BUG_ON(!n); | 1891 | BUG_ON(!n); |
1885 | page->freelist = get_freepointer(kmalloc_caches, n); | 1892 | page->freelist = get_freepointer(kmalloc_caches, n); |
@@ -3112,7 +3119,7 @@ static int list_locations(struct kmem_cache *s, char *buf, | |||
3112 | unsigned long flags; | 3119 | unsigned long flags; |
3113 | struct page *page; | 3120 | struct page *page; |
3114 | 3121 | ||
3115 | if (!atomic_read(&n->nr_slabs)) | 3122 | if (!atomic_long_read(&n->nr_slabs)) |
3116 | continue; | 3123 | continue; |
3117 | 3124 | ||
3118 | spin_lock_irqsave(&n->list_lock, flags); | 3125 | spin_lock_irqsave(&n->list_lock, flags); |
@@ -3247,7 +3254,7 @@ static unsigned long slab_objects(struct kmem_cache *s, | |||
3247 | } | 3254 | } |
3248 | 3255 | ||
3249 | if (flags & SO_FULL) { | 3256 | if (flags & SO_FULL) { |
3250 | int full_slabs = atomic_read(&n->nr_slabs) | 3257 | int full_slabs = atomic_long_read(&n->nr_slabs) |
3251 | - per_cpu[node] | 3258 | - per_cpu[node] |
3252 | - n->nr_partial; | 3259 | - n->nr_partial; |
3253 | 3260 | ||
@@ -3283,7 +3290,7 @@ static int any_slab_objects(struct kmem_cache *s) | |||
3283 | for_each_node(node) { | 3290 | for_each_node(node) { |
3284 | struct kmem_cache_node *n = get_node(s, node); | 3291 | struct kmem_cache_node *n = get_node(s, node); |
3285 | 3292 | ||
3286 | if (n->nr_partial || atomic_read(&n->nr_slabs)) | 3293 | if (n->nr_partial || atomic_long_read(&n->nr_slabs)) |
3287 | return 1; | 3294 | return 1; |
3288 | } | 3295 | } |
3289 | return 0; | 3296 | return 0; |
diff --git a/mm/sparse.c b/mm/sparse.c index 3047bf06c1f3..239f5a720d38 100644 --- a/mm/sparse.c +++ b/mm/sparse.c | |||
@@ -41,6 +41,15 @@ int page_to_nid(struct page *page) | |||
41 | return section_to_node_table[page_to_section(page)]; | 41 | return section_to_node_table[page_to_section(page)]; |
42 | } | 42 | } |
43 | EXPORT_SYMBOL(page_to_nid); | 43 | EXPORT_SYMBOL(page_to_nid); |
44 | |||
45 | static void set_section_nid(unsigned long section_nr, int nid) | ||
46 | { | ||
47 | section_to_node_table[section_nr] = nid; | ||
48 | } | ||
49 | #else /* !NODE_NOT_IN_PAGE_FLAGS */ | ||
50 | static inline void set_section_nid(unsigned long section_nr, int nid) | ||
51 | { | ||
52 | } | ||
44 | #endif | 53 | #endif |
45 | 54 | ||
46 | #ifdef CONFIG_SPARSEMEM_EXTREME | 55 | #ifdef CONFIG_SPARSEMEM_EXTREME |
@@ -68,10 +77,6 @@ static int __meminit sparse_index_init(unsigned long section_nr, int nid) | |||
68 | struct mem_section *section; | 77 | struct mem_section *section; |
69 | int ret = 0; | 78 | int ret = 0; |
70 | 79 | ||
71 | #ifdef NODE_NOT_IN_PAGE_FLAGS | ||
72 | section_to_node_table[section_nr] = nid; | ||
73 | #endif | ||
74 | |||
75 | if (mem_section[root]) | 80 | if (mem_section[root]) |
76 | return -EEXIST; | 81 | return -EEXIST; |
77 | 82 | ||
@@ -148,6 +153,7 @@ void __init memory_present(int nid, unsigned long start, unsigned long end) | |||
148 | struct mem_section *ms; | 153 | struct mem_section *ms; |
149 | 154 | ||
150 | sparse_index_init(section, nid); | 155 | sparse_index_init(section, nid); |
156 | set_section_nid(section, nid); | ||
151 | 157 | ||
152 | ms = __nr_to_section(section); | 158 | ms = __nr_to_section(section); |
153 | if (!ms->section_mem_map) | 159 | if (!ms->section_mem_map) |
diff --git a/mm/vmscan.c b/mm/vmscan.c index d419e10e3daa..a6e65d024995 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -271,6 +271,12 @@ static void handle_write_error(struct address_space *mapping, | |||
271 | unlock_page(page); | 271 | unlock_page(page); |
272 | } | 272 | } |
273 | 273 | ||
274 | /* Request for sync pageout. */ | ||
275 | enum pageout_io { | ||
276 | PAGEOUT_IO_ASYNC, | ||
277 | PAGEOUT_IO_SYNC, | ||
278 | }; | ||
279 | |||
274 | /* possible outcome of pageout() */ | 280 | /* possible outcome of pageout() */ |
275 | typedef enum { | 281 | typedef enum { |
276 | /* failed to write page out, page is locked */ | 282 | /* failed to write page out, page is locked */ |
@@ -287,7 +293,8 @@ typedef enum { | |||
287 | * pageout is called by shrink_page_list() for each dirty page. | 293 | * pageout is called by shrink_page_list() for each dirty page. |
288 | * Calls ->writepage(). | 294 | * Calls ->writepage(). |
289 | */ | 295 | */ |
290 | static pageout_t pageout(struct page *page, struct address_space *mapping) | 296 | static pageout_t pageout(struct page *page, struct address_space *mapping, |
297 | enum pageout_io sync_writeback) | ||
291 | { | 298 | { |
292 | /* | 299 | /* |
293 | * If the page is dirty, only perform writeback if that write | 300 | * If the page is dirty, only perform writeback if that write |
@@ -346,6 +353,15 @@ static pageout_t pageout(struct page *page, struct address_space *mapping) | |||
346 | ClearPageReclaim(page); | 353 | ClearPageReclaim(page); |
347 | return PAGE_ACTIVATE; | 354 | return PAGE_ACTIVATE; |
348 | } | 355 | } |
356 | |||
357 | /* | ||
358 | * Wait on writeback if requested to. This happens when | ||
359 | * direct reclaiming a large contiguous area and the | ||
360 | * first attempt to free a range of pages fails. | ||
361 | */ | ||
362 | if (PageWriteback(page) && sync_writeback == PAGEOUT_IO_SYNC) | ||
363 | wait_on_page_writeback(page); | ||
364 | |||
349 | if (!PageWriteback(page)) { | 365 | if (!PageWriteback(page)) { |
350 | /* synchronous write or broken a_ops? */ | 366 | /* synchronous write or broken a_ops? */ |
351 | ClearPageReclaim(page); | 367 | ClearPageReclaim(page); |
@@ -423,7 +439,8 @@ cannot_free: | |||
423 | * shrink_page_list() returns the number of reclaimed pages | 439 | * shrink_page_list() returns the number of reclaimed pages |
424 | */ | 440 | */ |
425 | static unsigned long shrink_page_list(struct list_head *page_list, | 441 | static unsigned long shrink_page_list(struct list_head *page_list, |
426 | struct scan_control *sc) | 442 | struct scan_control *sc, |
443 | enum pageout_io sync_writeback) | ||
427 | { | 444 | { |
428 | LIST_HEAD(ret_pages); | 445 | LIST_HEAD(ret_pages); |
429 | struct pagevec freed_pvec; | 446 | struct pagevec freed_pvec; |
@@ -458,8 +475,23 @@ static unsigned long shrink_page_list(struct list_head *page_list, | |||
458 | if (page_mapped(page) || PageSwapCache(page)) | 475 | if (page_mapped(page) || PageSwapCache(page)) |
459 | sc->nr_scanned++; | 476 | sc->nr_scanned++; |
460 | 477 | ||
461 | if (PageWriteback(page)) | 478 | may_enter_fs = (sc->gfp_mask & __GFP_FS) || |
462 | goto keep_locked; | 479 | (PageSwapCache(page) && (sc->gfp_mask & __GFP_IO)); |
480 | |||
481 | if (PageWriteback(page)) { | ||
482 | /* | ||
483 | * Synchronous reclaim is performed in two passes, | ||
484 | * first an asynchronous pass over the list to | ||
485 | * start parallel writeback, and a second synchronous | ||
486 | * pass to wait for the IO to complete. Wait here | ||
487 | * for any page for which writeback has already | ||
488 | * started. | ||
489 | */ | ||
490 | if (sync_writeback == PAGEOUT_IO_SYNC && may_enter_fs) | ||
491 | wait_on_page_writeback(page); | ||
492 | else | ||
493 | goto keep_locked; | ||
494 | } | ||
463 | 495 | ||
464 | referenced = page_referenced(page, 1); | 496 | referenced = page_referenced(page, 1); |
465 | /* In active use or really unfreeable? Activate it. */ | 497 | /* In active use or really unfreeable? Activate it. */ |
@@ -478,8 +510,6 @@ static unsigned long shrink_page_list(struct list_head *page_list, | |||
478 | #endif /* CONFIG_SWAP */ | 510 | #endif /* CONFIG_SWAP */ |
479 | 511 | ||
480 | mapping = page_mapping(page); | 512 | mapping = page_mapping(page); |
481 | may_enter_fs = (sc->gfp_mask & __GFP_FS) || | ||
482 | (PageSwapCache(page) && (sc->gfp_mask & __GFP_IO)); | ||
483 | 513 | ||
484 | /* | 514 | /* |
485 | * The page is mapped into the page tables of one or more | 515 | * The page is mapped into the page tables of one or more |
@@ -505,7 +535,7 @@ static unsigned long shrink_page_list(struct list_head *page_list, | |||
505 | goto keep_locked; | 535 | goto keep_locked; |
506 | 536 | ||
507 | /* Page is dirty, try to write it out here */ | 537 | /* Page is dirty, try to write it out here */ |
508 | switch(pageout(page, mapping)) { | 538 | switch (pageout(page, mapping, sync_writeback)) { |
509 | case PAGE_KEEP: | 539 | case PAGE_KEEP: |
510 | goto keep_locked; | 540 | goto keep_locked; |
511 | case PAGE_ACTIVATE: | 541 | case PAGE_ACTIVATE: |
@@ -777,6 +807,7 @@ static unsigned long shrink_inactive_list(unsigned long max_scan, | |||
777 | (sc->order > PAGE_ALLOC_COSTLY_ORDER)? | 807 | (sc->order > PAGE_ALLOC_COSTLY_ORDER)? |
778 | ISOLATE_BOTH : ISOLATE_INACTIVE); | 808 | ISOLATE_BOTH : ISOLATE_INACTIVE); |
779 | nr_active = clear_active_flags(&page_list); | 809 | nr_active = clear_active_flags(&page_list); |
810 | __count_vm_events(PGDEACTIVATE, nr_active); | ||
780 | 811 | ||
781 | __mod_zone_page_state(zone, NR_ACTIVE, -nr_active); | 812 | __mod_zone_page_state(zone, NR_ACTIVE, -nr_active); |
782 | __mod_zone_page_state(zone, NR_INACTIVE, | 813 | __mod_zone_page_state(zone, NR_INACTIVE, |
@@ -785,7 +816,29 @@ static unsigned long shrink_inactive_list(unsigned long max_scan, | |||
785 | spin_unlock_irq(&zone->lru_lock); | 816 | spin_unlock_irq(&zone->lru_lock); |
786 | 817 | ||
787 | nr_scanned += nr_scan; | 818 | nr_scanned += nr_scan; |
788 | nr_freed = shrink_page_list(&page_list, sc); | 819 | nr_freed = shrink_page_list(&page_list, sc, PAGEOUT_IO_ASYNC); |
820 | |||
821 | /* | ||
822 | * If we are direct reclaiming for contiguous pages and we do | ||
823 | * not reclaim everything in the list, try again and wait | ||
824 | * for IO to complete. This will stall high-order allocations | ||
825 | * but that should be acceptable to the caller | ||
826 | */ | ||
827 | if (nr_freed < nr_taken && !current_is_kswapd() && | ||
828 | sc->order > PAGE_ALLOC_COSTLY_ORDER) { | ||
829 | congestion_wait(WRITE, HZ/10); | ||
830 | |||
831 | /* | ||
832 | * The attempt at page out may have made some | ||
833 | * of the pages active, mark them inactive again. | ||
834 | */ | ||
835 | nr_active = clear_active_flags(&page_list); | ||
836 | count_vm_events(PGDEACTIVATE, nr_active); | ||
837 | |||
838 | nr_freed += shrink_page_list(&page_list, sc, | ||
839 | PAGEOUT_IO_SYNC); | ||
840 | } | ||
841 | |||
789 | nr_reclaimed += nr_freed; | 842 | nr_reclaimed += nr_freed; |
790 | local_irq_disable(); | 843 | local_irq_disable(); |
791 | if (current_is_kswapd()) { | 844 | if (current_is_kswapd()) { |