diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/Kconfig | 6 | ||||
-rw-r--r-- | mm/bootmem.c | 195 | ||||
-rw-r--r-- | mm/failslab.c | 18 | ||||
-rw-r--r-- | mm/filemap.c | 103 | ||||
-rw-r--r-- | mm/hugetlb.c | 13 | ||||
-rw-r--r-- | mm/maccess.c | 11 | ||||
-rw-r--r-- | mm/memcontrol.c | 11 | ||||
-rw-r--r-- | mm/memory.c | 14 | ||||
-rw-r--r-- | mm/migrate.c | 41 | ||||
-rw-r--r-- | mm/mmap.c | 40 | ||||
-rw-r--r-- | mm/mmu_context.c | 3 | ||||
-rw-r--r-- | mm/nommu.c | 144 | ||||
-rw-r--r-- | mm/oom_kill.c | 2 | ||||
-rw-r--r-- | mm/page_alloc.c | 272 | ||||
-rw-r--r-- | mm/percpu.c | 40 | ||||
-rw-r--r-- | mm/slab.c | 11 | ||||
-rw-r--r-- | mm/slub.c | 337 | ||||
-rw-r--r-- | mm/sparse-vmemmap.c | 76 | ||||
-rw-r--r-- | mm/sparse.c | 196 | ||||
-rw-r--r-- | mm/truncate.c | 30 | ||||
-rw-r--r-- | mm/util.c | 46 | ||||
-rw-r--r-- | mm/vmalloc.c | 114 | ||||
-rw-r--r-- | mm/vmscan.c | 3 | ||||
-rw-r--r-- | mm/vmstat.c | 15 |
24 files changed, 1114 insertions, 627 deletions
diff --git a/mm/Kconfig b/mm/Kconfig index 17b8947aa7da..9c61158308dc 100644 --- a/mm/Kconfig +++ b/mm/Kconfig | |||
@@ -115,6 +115,10 @@ config SPARSEMEM_EXTREME | |||
115 | config SPARSEMEM_VMEMMAP_ENABLE | 115 | config SPARSEMEM_VMEMMAP_ENABLE |
116 | bool | 116 | bool |
117 | 117 | ||
118 | config SPARSEMEM_ALLOC_MEM_MAP_TOGETHER | ||
119 | def_bool y | ||
120 | depends on SPARSEMEM && X86_64 | ||
121 | |||
118 | config SPARSEMEM_VMEMMAP | 122 | config SPARSEMEM_VMEMMAP |
119 | bool "Sparse Memory virtual memmap" | 123 | bool "Sparse Memory virtual memmap" |
120 | depends on SPARSEMEM && SPARSEMEM_VMEMMAP_ENABLE | 124 | depends on SPARSEMEM && SPARSEMEM_VMEMMAP_ENABLE |
@@ -195,7 +199,7 @@ config BOUNCE | |||
195 | config NR_QUICK | 199 | config NR_QUICK |
196 | int | 200 | int |
197 | depends on QUICKLIST | 201 | depends on QUICKLIST |
198 | default "2" if SUPERH || AVR32 | 202 | default "2" if AVR32 |
199 | default "1" | 203 | default "1" |
200 | 204 | ||
201 | config VIRT_TO_BUS | 205 | config VIRT_TO_BUS |
diff --git a/mm/bootmem.c b/mm/bootmem.c index 7d1486875e1c..d7c791ef0036 100644 --- a/mm/bootmem.c +++ b/mm/bootmem.c | |||
@@ -13,6 +13,7 @@ | |||
13 | #include <linux/bootmem.h> | 13 | #include <linux/bootmem.h> |
14 | #include <linux/module.h> | 14 | #include <linux/module.h> |
15 | #include <linux/kmemleak.h> | 15 | #include <linux/kmemleak.h> |
16 | #include <linux/range.h> | ||
16 | 17 | ||
17 | #include <asm/bug.h> | 18 | #include <asm/bug.h> |
18 | #include <asm/io.h> | 19 | #include <asm/io.h> |
@@ -32,6 +33,7 @@ unsigned long max_pfn; | |||
32 | unsigned long saved_max_pfn; | 33 | unsigned long saved_max_pfn; |
33 | #endif | 34 | #endif |
34 | 35 | ||
36 | #ifndef CONFIG_NO_BOOTMEM | ||
35 | bootmem_data_t bootmem_node_data[MAX_NUMNODES] __initdata; | 37 | bootmem_data_t bootmem_node_data[MAX_NUMNODES] __initdata; |
36 | 38 | ||
37 | static struct list_head bdata_list __initdata = LIST_HEAD_INIT(bdata_list); | 39 | static struct list_head bdata_list __initdata = LIST_HEAD_INIT(bdata_list); |
@@ -142,7 +144,7 @@ unsigned long __init init_bootmem(unsigned long start, unsigned long pages) | |||
142 | min_low_pfn = start; | 144 | min_low_pfn = start; |
143 | return init_bootmem_core(NODE_DATA(0)->bdata, start, 0, pages); | 145 | return init_bootmem_core(NODE_DATA(0)->bdata, start, 0, pages); |
144 | } | 146 | } |
145 | 147 | #endif | |
146 | /* | 148 | /* |
147 | * free_bootmem_late - free bootmem pages directly to page allocator | 149 | * free_bootmem_late - free bootmem pages directly to page allocator |
148 | * @addr: starting address of the range | 150 | * @addr: starting address of the range |
@@ -167,6 +169,60 @@ void __init free_bootmem_late(unsigned long addr, unsigned long size) | |||
167 | } | 169 | } |
168 | } | 170 | } |
169 | 171 | ||
172 | #ifdef CONFIG_NO_BOOTMEM | ||
173 | static void __init __free_pages_memory(unsigned long start, unsigned long end) | ||
174 | { | ||
175 | int i; | ||
176 | unsigned long start_aligned, end_aligned; | ||
177 | int order = ilog2(BITS_PER_LONG); | ||
178 | |||
179 | start_aligned = (start + (BITS_PER_LONG - 1)) & ~(BITS_PER_LONG - 1); | ||
180 | end_aligned = end & ~(BITS_PER_LONG - 1); | ||
181 | |||
182 | if (end_aligned <= start_aligned) { | ||
183 | #if 1 | ||
184 | printk(KERN_DEBUG " %lx - %lx\n", start, end); | ||
185 | #endif | ||
186 | for (i = start; i < end; i++) | ||
187 | __free_pages_bootmem(pfn_to_page(i), 0); | ||
188 | |||
189 | return; | ||
190 | } | ||
191 | |||
192 | #if 1 | ||
193 | printk(KERN_DEBUG " %lx %lx - %lx %lx\n", | ||
194 | start, start_aligned, end_aligned, end); | ||
195 | #endif | ||
196 | for (i = start; i < start_aligned; i++) | ||
197 | __free_pages_bootmem(pfn_to_page(i), 0); | ||
198 | |||
199 | for (i = start_aligned; i < end_aligned; i += BITS_PER_LONG) | ||
200 | __free_pages_bootmem(pfn_to_page(i), order); | ||
201 | |||
202 | for (i = end_aligned; i < end; i++) | ||
203 | __free_pages_bootmem(pfn_to_page(i), 0); | ||
204 | } | ||
205 | |||
206 | unsigned long __init free_all_memory_core_early(int nodeid) | ||
207 | { | ||
208 | int i; | ||
209 | u64 start, end; | ||
210 | unsigned long count = 0; | ||
211 | struct range *range = NULL; | ||
212 | int nr_range; | ||
213 | |||
214 | nr_range = get_free_all_memory_range(&range, nodeid); | ||
215 | |||
216 | for (i = 0; i < nr_range; i++) { | ||
217 | start = range[i].start; | ||
218 | end = range[i].end; | ||
219 | count += end - start; | ||
220 | __free_pages_memory(start, end); | ||
221 | } | ||
222 | |||
223 | return count; | ||
224 | } | ||
225 | #else | ||
170 | static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata) | 226 | static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata) |
171 | { | 227 | { |
172 | int aligned; | 228 | int aligned; |
@@ -227,6 +283,7 @@ static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata) | |||
227 | 283 | ||
228 | return count; | 284 | return count; |
229 | } | 285 | } |
286 | #endif | ||
230 | 287 | ||
231 | /** | 288 | /** |
232 | * free_all_bootmem_node - release a node's free pages to the buddy allocator | 289 | * free_all_bootmem_node - release a node's free pages to the buddy allocator |
@@ -237,7 +294,12 @@ static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata) | |||
237 | unsigned long __init free_all_bootmem_node(pg_data_t *pgdat) | 294 | unsigned long __init free_all_bootmem_node(pg_data_t *pgdat) |
238 | { | 295 | { |
239 | register_page_bootmem_info_node(pgdat); | 296 | register_page_bootmem_info_node(pgdat); |
297 | #ifdef CONFIG_NO_BOOTMEM | ||
298 | /* free_all_memory_core_early(MAX_NUMNODES) will be called later */ | ||
299 | return 0; | ||
300 | #else | ||
240 | return free_all_bootmem_core(pgdat->bdata); | 301 | return free_all_bootmem_core(pgdat->bdata); |
302 | #endif | ||
241 | } | 303 | } |
242 | 304 | ||
243 | /** | 305 | /** |
@@ -247,9 +309,14 @@ unsigned long __init free_all_bootmem_node(pg_data_t *pgdat) | |||
247 | */ | 309 | */ |
248 | unsigned long __init free_all_bootmem(void) | 310 | unsigned long __init free_all_bootmem(void) |
249 | { | 311 | { |
312 | #ifdef CONFIG_NO_BOOTMEM | ||
313 | return free_all_memory_core_early(NODE_DATA(0)->node_id); | ||
314 | #else | ||
250 | return free_all_bootmem_core(NODE_DATA(0)->bdata); | 315 | return free_all_bootmem_core(NODE_DATA(0)->bdata); |
316 | #endif | ||
251 | } | 317 | } |
252 | 318 | ||
319 | #ifndef CONFIG_NO_BOOTMEM | ||
253 | static void __init __free(bootmem_data_t *bdata, | 320 | static void __init __free(bootmem_data_t *bdata, |
254 | unsigned long sidx, unsigned long eidx) | 321 | unsigned long sidx, unsigned long eidx) |
255 | { | 322 | { |
@@ -344,6 +411,7 @@ static int __init mark_bootmem(unsigned long start, unsigned long end, | |||
344 | } | 411 | } |
345 | BUG(); | 412 | BUG(); |
346 | } | 413 | } |
414 | #endif | ||
347 | 415 | ||
348 | /** | 416 | /** |
349 | * free_bootmem_node - mark a page range as usable | 417 | * free_bootmem_node - mark a page range as usable |
@@ -358,6 +426,12 @@ static int __init mark_bootmem(unsigned long start, unsigned long end, | |||
358 | void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr, | 426 | void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr, |
359 | unsigned long size) | 427 | unsigned long size) |
360 | { | 428 | { |
429 | #ifdef CONFIG_NO_BOOTMEM | ||
430 | free_early(physaddr, physaddr + size); | ||
431 | #if 0 | ||
432 | printk(KERN_DEBUG "free %lx %lx\n", physaddr, size); | ||
433 | #endif | ||
434 | #else | ||
361 | unsigned long start, end; | 435 | unsigned long start, end; |
362 | 436 | ||
363 | kmemleak_free_part(__va(physaddr), size); | 437 | kmemleak_free_part(__va(physaddr), size); |
@@ -366,6 +440,7 @@ void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr, | |||
366 | end = PFN_DOWN(physaddr + size); | 440 | end = PFN_DOWN(physaddr + size); |
367 | 441 | ||
368 | mark_bootmem_node(pgdat->bdata, start, end, 0, 0); | 442 | mark_bootmem_node(pgdat->bdata, start, end, 0, 0); |
443 | #endif | ||
369 | } | 444 | } |
370 | 445 | ||
371 | /** | 446 | /** |
@@ -379,6 +454,12 @@ void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr, | |||
379 | */ | 454 | */ |
380 | void __init free_bootmem(unsigned long addr, unsigned long size) | 455 | void __init free_bootmem(unsigned long addr, unsigned long size) |
381 | { | 456 | { |
457 | #ifdef CONFIG_NO_BOOTMEM | ||
458 | free_early(addr, addr + size); | ||
459 | #if 0 | ||
460 | printk(KERN_DEBUG "free %lx %lx\n", addr, size); | ||
461 | #endif | ||
462 | #else | ||
382 | unsigned long start, end; | 463 | unsigned long start, end; |
383 | 464 | ||
384 | kmemleak_free_part(__va(addr), size); | 465 | kmemleak_free_part(__va(addr), size); |
@@ -387,6 +468,7 @@ void __init free_bootmem(unsigned long addr, unsigned long size) | |||
387 | end = PFN_DOWN(addr + size); | 468 | end = PFN_DOWN(addr + size); |
388 | 469 | ||
389 | mark_bootmem(start, end, 0, 0); | 470 | mark_bootmem(start, end, 0, 0); |
471 | #endif | ||
390 | } | 472 | } |
391 | 473 | ||
392 | /** | 474 | /** |
@@ -403,12 +485,17 @@ void __init free_bootmem(unsigned long addr, unsigned long size) | |||
403 | int __init reserve_bootmem_node(pg_data_t *pgdat, unsigned long physaddr, | 485 | int __init reserve_bootmem_node(pg_data_t *pgdat, unsigned long physaddr, |
404 | unsigned long size, int flags) | 486 | unsigned long size, int flags) |
405 | { | 487 | { |
488 | #ifdef CONFIG_NO_BOOTMEM | ||
489 | panic("no bootmem"); | ||
490 | return 0; | ||
491 | #else | ||
406 | unsigned long start, end; | 492 | unsigned long start, end; |
407 | 493 | ||
408 | start = PFN_DOWN(physaddr); | 494 | start = PFN_DOWN(physaddr); |
409 | end = PFN_UP(physaddr + size); | 495 | end = PFN_UP(physaddr + size); |
410 | 496 | ||
411 | return mark_bootmem_node(pgdat->bdata, start, end, 1, flags); | 497 | return mark_bootmem_node(pgdat->bdata, start, end, 1, flags); |
498 | #endif | ||
412 | } | 499 | } |
413 | 500 | ||
414 | /** | 501 | /** |
@@ -424,14 +511,20 @@ int __init reserve_bootmem_node(pg_data_t *pgdat, unsigned long physaddr, | |||
424 | int __init reserve_bootmem(unsigned long addr, unsigned long size, | 511 | int __init reserve_bootmem(unsigned long addr, unsigned long size, |
425 | int flags) | 512 | int flags) |
426 | { | 513 | { |
514 | #ifdef CONFIG_NO_BOOTMEM | ||
515 | panic("no bootmem"); | ||
516 | return 0; | ||
517 | #else | ||
427 | unsigned long start, end; | 518 | unsigned long start, end; |
428 | 519 | ||
429 | start = PFN_DOWN(addr); | 520 | start = PFN_DOWN(addr); |
430 | end = PFN_UP(addr + size); | 521 | end = PFN_UP(addr + size); |
431 | 522 | ||
432 | return mark_bootmem(start, end, 1, flags); | 523 | return mark_bootmem(start, end, 1, flags); |
524 | #endif | ||
433 | } | 525 | } |
434 | 526 | ||
527 | #ifndef CONFIG_NO_BOOTMEM | ||
435 | static unsigned long __init align_idx(struct bootmem_data *bdata, | 528 | static unsigned long __init align_idx(struct bootmem_data *bdata, |
436 | unsigned long idx, unsigned long step) | 529 | unsigned long idx, unsigned long step) |
437 | { | 530 | { |
@@ -582,12 +675,33 @@ static void * __init alloc_arch_preferred_bootmem(bootmem_data_t *bdata, | |||
582 | #endif | 675 | #endif |
583 | return NULL; | 676 | return NULL; |
584 | } | 677 | } |
678 | #endif | ||
585 | 679 | ||
586 | static void * __init ___alloc_bootmem_nopanic(unsigned long size, | 680 | static void * __init ___alloc_bootmem_nopanic(unsigned long size, |
587 | unsigned long align, | 681 | unsigned long align, |
588 | unsigned long goal, | 682 | unsigned long goal, |
589 | unsigned long limit) | 683 | unsigned long limit) |
590 | { | 684 | { |
685 | #ifdef CONFIG_NO_BOOTMEM | ||
686 | void *ptr; | ||
687 | |||
688 | if (WARN_ON_ONCE(slab_is_available())) | ||
689 | return kzalloc(size, GFP_NOWAIT); | ||
690 | |||
691 | restart: | ||
692 | |||
693 | ptr = __alloc_memory_core_early(MAX_NUMNODES, size, align, goal, limit); | ||
694 | |||
695 | if (ptr) | ||
696 | return ptr; | ||
697 | |||
698 | if (goal != 0) { | ||
699 | goal = 0; | ||
700 | goto restart; | ||
701 | } | ||
702 | |||
703 | return NULL; | ||
704 | #else | ||
591 | bootmem_data_t *bdata; | 705 | bootmem_data_t *bdata; |
592 | void *region; | 706 | void *region; |
593 | 707 | ||
@@ -613,6 +727,7 @@ restart: | |||
613 | } | 727 | } |
614 | 728 | ||
615 | return NULL; | 729 | return NULL; |
730 | #endif | ||
616 | } | 731 | } |
617 | 732 | ||
618 | /** | 733 | /** |
@@ -631,7 +746,13 @@ restart: | |||
631 | void * __init __alloc_bootmem_nopanic(unsigned long size, unsigned long align, | 746 | void * __init __alloc_bootmem_nopanic(unsigned long size, unsigned long align, |
632 | unsigned long goal) | 747 | unsigned long goal) |
633 | { | 748 | { |
634 | return ___alloc_bootmem_nopanic(size, align, goal, 0); | 749 | unsigned long limit = 0; |
750 | |||
751 | #ifdef CONFIG_NO_BOOTMEM | ||
752 | limit = -1UL; | ||
753 | #endif | ||
754 | |||
755 | return ___alloc_bootmem_nopanic(size, align, goal, limit); | ||
635 | } | 756 | } |
636 | 757 | ||
637 | static void * __init ___alloc_bootmem(unsigned long size, unsigned long align, | 758 | static void * __init ___alloc_bootmem(unsigned long size, unsigned long align, |
@@ -665,9 +786,16 @@ static void * __init ___alloc_bootmem(unsigned long size, unsigned long align, | |||
665 | void * __init __alloc_bootmem(unsigned long size, unsigned long align, | 786 | void * __init __alloc_bootmem(unsigned long size, unsigned long align, |
666 | unsigned long goal) | 787 | unsigned long goal) |
667 | { | 788 | { |
668 | return ___alloc_bootmem(size, align, goal, 0); | 789 | unsigned long limit = 0; |
790 | |||
791 | #ifdef CONFIG_NO_BOOTMEM | ||
792 | limit = -1UL; | ||
793 | #endif | ||
794 | |||
795 | return ___alloc_bootmem(size, align, goal, limit); | ||
669 | } | 796 | } |
670 | 797 | ||
798 | #ifndef CONFIG_NO_BOOTMEM | ||
671 | static void * __init ___alloc_bootmem_node(bootmem_data_t *bdata, | 799 | static void * __init ___alloc_bootmem_node(bootmem_data_t *bdata, |
672 | unsigned long size, unsigned long align, | 800 | unsigned long size, unsigned long align, |
673 | unsigned long goal, unsigned long limit) | 801 | unsigned long goal, unsigned long limit) |
@@ -684,6 +812,7 @@ static void * __init ___alloc_bootmem_node(bootmem_data_t *bdata, | |||
684 | 812 | ||
685 | return ___alloc_bootmem(size, align, goal, limit); | 813 | return ___alloc_bootmem(size, align, goal, limit); |
686 | } | 814 | } |
815 | #endif | ||
687 | 816 | ||
688 | /** | 817 | /** |
689 | * __alloc_bootmem_node - allocate boot memory from a specific node | 818 | * __alloc_bootmem_node - allocate boot memory from a specific node |
@@ -706,7 +835,46 @@ void * __init __alloc_bootmem_node(pg_data_t *pgdat, unsigned long size, | |||
706 | if (WARN_ON_ONCE(slab_is_available())) | 835 | if (WARN_ON_ONCE(slab_is_available())) |
707 | return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id); | 836 | return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id); |
708 | 837 | ||
838 | #ifdef CONFIG_NO_BOOTMEM | ||
839 | return __alloc_memory_core_early(pgdat->node_id, size, align, | ||
840 | goal, -1ULL); | ||
841 | #else | ||
709 | return ___alloc_bootmem_node(pgdat->bdata, size, align, goal, 0); | 842 | return ___alloc_bootmem_node(pgdat->bdata, size, align, goal, 0); |
843 | #endif | ||
844 | } | ||
845 | |||
846 | void * __init __alloc_bootmem_node_high(pg_data_t *pgdat, unsigned long size, | ||
847 | unsigned long align, unsigned long goal) | ||
848 | { | ||
849 | #ifdef MAX_DMA32_PFN | ||
850 | unsigned long end_pfn; | ||
851 | |||
852 | if (WARN_ON_ONCE(slab_is_available())) | ||
853 | return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id); | ||
854 | |||
855 | /* update goal according ...MAX_DMA32_PFN */ | ||
856 | end_pfn = pgdat->node_start_pfn + pgdat->node_spanned_pages; | ||
857 | |||
858 | if (end_pfn > MAX_DMA32_PFN + (128 >> (20 - PAGE_SHIFT)) && | ||
859 | (goal >> PAGE_SHIFT) < MAX_DMA32_PFN) { | ||
860 | void *ptr; | ||
861 | unsigned long new_goal; | ||
862 | |||
863 | new_goal = MAX_DMA32_PFN << PAGE_SHIFT; | ||
864 | #ifdef CONFIG_NO_BOOTMEM | ||
865 | ptr = __alloc_memory_core_early(pgdat->node_id, size, align, | ||
866 | new_goal, -1ULL); | ||
867 | #else | ||
868 | ptr = alloc_bootmem_core(pgdat->bdata, size, align, | ||
869 | new_goal, 0); | ||
870 | #endif | ||
871 | if (ptr) | ||
872 | return ptr; | ||
873 | } | ||
874 | #endif | ||
875 | |||
876 | return __alloc_bootmem_node(pgdat, size, align, goal); | ||
877 | |||
710 | } | 878 | } |
711 | 879 | ||
712 | #ifdef CONFIG_SPARSEMEM | 880 | #ifdef CONFIG_SPARSEMEM |
@@ -720,6 +888,16 @@ void * __init __alloc_bootmem_node(pg_data_t *pgdat, unsigned long size, | |||
720 | void * __init alloc_bootmem_section(unsigned long size, | 888 | void * __init alloc_bootmem_section(unsigned long size, |
721 | unsigned long section_nr) | 889 | unsigned long section_nr) |
722 | { | 890 | { |
891 | #ifdef CONFIG_NO_BOOTMEM | ||
892 | unsigned long pfn, goal, limit; | ||
893 | |||
894 | pfn = section_nr_to_pfn(section_nr); | ||
895 | goal = pfn << PAGE_SHIFT; | ||
896 | limit = section_nr_to_pfn(section_nr + 1) << PAGE_SHIFT; | ||
897 | |||
898 | return __alloc_memory_core_early(early_pfn_to_nid(pfn), size, | ||
899 | SMP_CACHE_BYTES, goal, limit); | ||
900 | #else | ||
723 | bootmem_data_t *bdata; | 901 | bootmem_data_t *bdata; |
724 | unsigned long pfn, goal, limit; | 902 | unsigned long pfn, goal, limit; |
725 | 903 | ||
@@ -729,6 +907,7 @@ void * __init alloc_bootmem_section(unsigned long size, | |||
729 | bdata = &bootmem_node_data[early_pfn_to_nid(pfn)]; | 907 | bdata = &bootmem_node_data[early_pfn_to_nid(pfn)]; |
730 | 908 | ||
731 | return alloc_bootmem_core(bdata, size, SMP_CACHE_BYTES, goal, limit); | 909 | return alloc_bootmem_core(bdata, size, SMP_CACHE_BYTES, goal, limit); |
910 | #endif | ||
732 | } | 911 | } |
733 | #endif | 912 | #endif |
734 | 913 | ||
@@ -740,11 +919,16 @@ void * __init __alloc_bootmem_node_nopanic(pg_data_t *pgdat, unsigned long size, | |||
740 | if (WARN_ON_ONCE(slab_is_available())) | 919 | if (WARN_ON_ONCE(slab_is_available())) |
741 | return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id); | 920 | return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id); |
742 | 921 | ||
922 | #ifdef CONFIG_NO_BOOTMEM | ||
923 | ptr = __alloc_memory_core_early(pgdat->node_id, size, align, | ||
924 | goal, -1ULL); | ||
925 | #else | ||
743 | ptr = alloc_arch_preferred_bootmem(pgdat->bdata, size, align, goal, 0); | 926 | ptr = alloc_arch_preferred_bootmem(pgdat->bdata, size, align, goal, 0); |
744 | if (ptr) | 927 | if (ptr) |
745 | return ptr; | 928 | return ptr; |
746 | 929 | ||
747 | ptr = alloc_bootmem_core(pgdat->bdata, size, align, goal, 0); | 930 | ptr = alloc_bootmem_core(pgdat->bdata, size, align, goal, 0); |
931 | #endif | ||
748 | if (ptr) | 932 | if (ptr) |
749 | return ptr; | 933 | return ptr; |
750 | 934 | ||
@@ -795,6 +979,11 @@ void * __init __alloc_bootmem_low_node(pg_data_t *pgdat, unsigned long size, | |||
795 | if (WARN_ON_ONCE(slab_is_available())) | 979 | if (WARN_ON_ONCE(slab_is_available())) |
796 | return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id); | 980 | return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id); |
797 | 981 | ||
982 | #ifdef CONFIG_NO_BOOTMEM | ||
983 | return __alloc_memory_core_early(pgdat->node_id, size, align, | ||
984 | goal, ARCH_LOW_ADDRESS_LIMIT); | ||
985 | #else | ||
798 | return ___alloc_bootmem_node(pgdat->bdata, size, align, | 986 | return ___alloc_bootmem_node(pgdat->bdata, size, align, |
799 | goal, ARCH_LOW_ADDRESS_LIMIT); | 987 | goal, ARCH_LOW_ADDRESS_LIMIT); |
988 | #endif | ||
800 | } | 989 | } |
diff --git a/mm/failslab.c b/mm/failslab.c index 9339de5f0a91..bb41f98dd8b7 100644 --- a/mm/failslab.c +++ b/mm/failslab.c | |||
@@ -1,18 +1,22 @@ | |||
1 | #include <linux/fault-inject.h> | 1 | #include <linux/fault-inject.h> |
2 | #include <linux/gfp.h> | 2 | #include <linux/gfp.h> |
3 | #include <linux/slab.h> | ||
3 | 4 | ||
4 | static struct { | 5 | static struct { |
5 | struct fault_attr attr; | 6 | struct fault_attr attr; |
6 | u32 ignore_gfp_wait; | 7 | u32 ignore_gfp_wait; |
8 | int cache_filter; | ||
7 | #ifdef CONFIG_FAULT_INJECTION_DEBUG_FS | 9 | #ifdef CONFIG_FAULT_INJECTION_DEBUG_FS |
8 | struct dentry *ignore_gfp_wait_file; | 10 | struct dentry *ignore_gfp_wait_file; |
11 | struct dentry *cache_filter_file; | ||
9 | #endif | 12 | #endif |
10 | } failslab = { | 13 | } failslab = { |
11 | .attr = FAULT_ATTR_INITIALIZER, | 14 | .attr = FAULT_ATTR_INITIALIZER, |
12 | .ignore_gfp_wait = 1, | 15 | .ignore_gfp_wait = 1, |
16 | .cache_filter = 0, | ||
13 | }; | 17 | }; |
14 | 18 | ||
15 | bool should_failslab(size_t size, gfp_t gfpflags) | 19 | bool should_failslab(size_t size, gfp_t gfpflags, unsigned long cache_flags) |
16 | { | 20 | { |
17 | if (gfpflags & __GFP_NOFAIL) | 21 | if (gfpflags & __GFP_NOFAIL) |
18 | return false; | 22 | return false; |
@@ -20,6 +24,9 @@ bool should_failslab(size_t size, gfp_t gfpflags) | |||
20 | if (failslab.ignore_gfp_wait && (gfpflags & __GFP_WAIT)) | 24 | if (failslab.ignore_gfp_wait && (gfpflags & __GFP_WAIT)) |
21 | return false; | 25 | return false; |
22 | 26 | ||
27 | if (failslab.cache_filter && !(cache_flags & SLAB_FAILSLAB)) | ||
28 | return false; | ||
29 | |||
23 | return should_fail(&failslab.attr, size); | 30 | return should_fail(&failslab.attr, size); |
24 | } | 31 | } |
25 | 32 | ||
@@ -30,7 +37,6 @@ static int __init setup_failslab(char *str) | |||
30 | __setup("failslab=", setup_failslab); | 37 | __setup("failslab=", setup_failslab); |
31 | 38 | ||
32 | #ifdef CONFIG_FAULT_INJECTION_DEBUG_FS | 39 | #ifdef CONFIG_FAULT_INJECTION_DEBUG_FS |
33 | |||
34 | static int __init failslab_debugfs_init(void) | 40 | static int __init failslab_debugfs_init(void) |
35 | { | 41 | { |
36 | mode_t mode = S_IFREG | S_IRUSR | S_IWUSR; | 42 | mode_t mode = S_IFREG | S_IRUSR | S_IWUSR; |
@@ -46,8 +52,14 @@ static int __init failslab_debugfs_init(void) | |||
46 | debugfs_create_bool("ignore-gfp-wait", mode, dir, | 52 | debugfs_create_bool("ignore-gfp-wait", mode, dir, |
47 | &failslab.ignore_gfp_wait); | 53 | &failslab.ignore_gfp_wait); |
48 | 54 | ||
49 | if (!failslab.ignore_gfp_wait_file) { | 55 | failslab.cache_filter_file = |
56 | debugfs_create_bool("cache-filter", mode, dir, | ||
57 | &failslab.cache_filter); | ||
58 | |||
59 | if (!failslab.ignore_gfp_wait_file || | ||
60 | !failslab.cache_filter_file) { | ||
50 | err = -ENOMEM; | 61 | err = -ENOMEM; |
62 | debugfs_remove(failslab.cache_filter_file); | ||
51 | debugfs_remove(failslab.ignore_gfp_wait_file); | 63 | debugfs_remove(failslab.ignore_gfp_wait_file); |
52 | cleanup_fault_attr_dentries(&failslab.attr); | 64 | cleanup_fault_attr_dentries(&failslab.attr); |
53 | } | 65 | } |
diff --git a/mm/filemap.c b/mm/filemap.c index 96ac6b0eb6cb..698ea80f2102 100644 --- a/mm/filemap.c +++ b/mm/filemap.c | |||
@@ -1634,14 +1634,15 @@ EXPORT_SYMBOL(generic_file_readonly_mmap); | |||
1634 | static struct page *__read_cache_page(struct address_space *mapping, | 1634 | static struct page *__read_cache_page(struct address_space *mapping, |
1635 | pgoff_t index, | 1635 | pgoff_t index, |
1636 | int (*filler)(void *,struct page*), | 1636 | int (*filler)(void *,struct page*), |
1637 | void *data) | 1637 | void *data, |
1638 | gfp_t gfp) | ||
1638 | { | 1639 | { |
1639 | struct page *page; | 1640 | struct page *page; |
1640 | int err; | 1641 | int err; |
1641 | repeat: | 1642 | repeat: |
1642 | page = find_get_page(mapping, index); | 1643 | page = find_get_page(mapping, index); |
1643 | if (!page) { | 1644 | if (!page) { |
1644 | page = page_cache_alloc_cold(mapping); | 1645 | page = __page_cache_alloc(gfp | __GFP_COLD); |
1645 | if (!page) | 1646 | if (!page) |
1646 | return ERR_PTR(-ENOMEM); | 1647 | return ERR_PTR(-ENOMEM); |
1647 | err = add_to_page_cache_lru(page, mapping, index, GFP_KERNEL); | 1648 | err = add_to_page_cache_lru(page, mapping, index, GFP_KERNEL); |
@@ -1661,31 +1662,18 @@ repeat: | |||
1661 | return page; | 1662 | return page; |
1662 | } | 1663 | } |
1663 | 1664 | ||
1664 | /** | 1665 | static struct page *do_read_cache_page(struct address_space *mapping, |
1665 | * read_cache_page_async - read into page cache, fill it if needed | ||
1666 | * @mapping: the page's address_space | ||
1667 | * @index: the page index | ||
1668 | * @filler: function to perform the read | ||
1669 | * @data: destination for read data | ||
1670 | * | ||
1671 | * Same as read_cache_page, but don't wait for page to become unlocked | ||
1672 | * after submitting it to the filler. | ||
1673 | * | ||
1674 | * Read into the page cache. If a page already exists, and PageUptodate() is | ||
1675 | * not set, try to fill the page but don't wait for it to become unlocked. | ||
1676 | * | ||
1677 | * If the page does not get brought uptodate, return -EIO. | ||
1678 | */ | ||
1679 | struct page *read_cache_page_async(struct address_space *mapping, | ||
1680 | pgoff_t index, | 1666 | pgoff_t index, |
1681 | int (*filler)(void *,struct page*), | 1667 | int (*filler)(void *,struct page*), |
1682 | void *data) | 1668 | void *data, |
1669 | gfp_t gfp) | ||
1670 | |||
1683 | { | 1671 | { |
1684 | struct page *page; | 1672 | struct page *page; |
1685 | int err; | 1673 | int err; |
1686 | 1674 | ||
1687 | retry: | 1675 | retry: |
1688 | page = __read_cache_page(mapping, index, filler, data); | 1676 | page = __read_cache_page(mapping, index, filler, data, gfp); |
1689 | if (IS_ERR(page)) | 1677 | if (IS_ERR(page)) |
1690 | return page; | 1678 | return page; |
1691 | if (PageUptodate(page)) | 1679 | if (PageUptodate(page)) |
@@ -1710,8 +1698,67 @@ out: | |||
1710 | mark_page_accessed(page); | 1698 | mark_page_accessed(page); |
1711 | return page; | 1699 | return page; |
1712 | } | 1700 | } |
1701 | |||
1702 | /** | ||
1703 | * read_cache_page_async - read into page cache, fill it if needed | ||
1704 | * @mapping: the page's address_space | ||
1705 | * @index: the page index | ||
1706 | * @filler: function to perform the read | ||
1707 | * @data: destination for read data | ||
1708 | * | ||
1709 | * Same as read_cache_page, but don't wait for page to become unlocked | ||
1710 | * after submitting it to the filler. | ||
1711 | * | ||
1712 | * Read into the page cache. If a page already exists, and PageUptodate() is | ||
1713 | * not set, try to fill the page but don't wait for it to become unlocked. | ||
1714 | * | ||
1715 | * If the page does not get brought uptodate, return -EIO. | ||
1716 | */ | ||
1717 | struct page *read_cache_page_async(struct address_space *mapping, | ||
1718 | pgoff_t index, | ||
1719 | int (*filler)(void *,struct page*), | ||
1720 | void *data) | ||
1721 | { | ||
1722 | return do_read_cache_page(mapping, index, filler, data, mapping_gfp_mask(mapping)); | ||
1723 | } | ||
1713 | EXPORT_SYMBOL(read_cache_page_async); | 1724 | EXPORT_SYMBOL(read_cache_page_async); |
1714 | 1725 | ||
1726 | static struct page *wait_on_page_read(struct page *page) | ||
1727 | { | ||
1728 | if (!IS_ERR(page)) { | ||
1729 | wait_on_page_locked(page); | ||
1730 | if (!PageUptodate(page)) { | ||
1731 | page_cache_release(page); | ||
1732 | page = ERR_PTR(-EIO); | ||
1733 | } | ||
1734 | } | ||
1735 | return page; | ||
1736 | } | ||
1737 | |||
1738 | /** | ||
1739 | * read_cache_page_gfp - read into page cache, using specified page allocation flags. | ||
1740 | * @mapping: the page's address_space | ||
1741 | * @index: the page index | ||
1742 | * @gfp: the page allocator flags to use if allocating | ||
1743 | * | ||
1744 | * This is the same as "read_mapping_page(mapping, index, NULL)", but with | ||
1745 | * any new page allocations done using the specified allocation flags. Note | ||
1746 | * that the Radix tree operations will still use GFP_KERNEL, so you can't | ||
1747 | * expect to do this atomically or anything like that - but you can pass in | ||
1748 | * other page requirements. | ||
1749 | * | ||
1750 | * If the page does not get brought uptodate, return -EIO. | ||
1751 | */ | ||
1752 | struct page *read_cache_page_gfp(struct address_space *mapping, | ||
1753 | pgoff_t index, | ||
1754 | gfp_t gfp) | ||
1755 | { | ||
1756 | filler_t *filler = (filler_t *)mapping->a_ops->readpage; | ||
1757 | |||
1758 | return wait_on_page_read(do_read_cache_page(mapping, index, filler, NULL, gfp)); | ||
1759 | } | ||
1760 | EXPORT_SYMBOL(read_cache_page_gfp); | ||
1761 | |||
1715 | /** | 1762 | /** |
1716 | * read_cache_page - read into page cache, fill it if needed | 1763 | * read_cache_page - read into page cache, fill it if needed |
1717 | * @mapping: the page's address_space | 1764 | * @mapping: the page's address_space |
@@ -1729,18 +1776,7 @@ struct page *read_cache_page(struct address_space *mapping, | |||
1729 | int (*filler)(void *,struct page*), | 1776 | int (*filler)(void *,struct page*), |
1730 | void *data) | 1777 | void *data) |
1731 | { | 1778 | { |
1732 | struct page *page; | 1779 | return wait_on_page_read(read_cache_page_async(mapping, index, filler, data)); |
1733 | |||
1734 | page = read_cache_page_async(mapping, index, filler, data); | ||
1735 | if (IS_ERR(page)) | ||
1736 | goto out; | ||
1737 | wait_on_page_locked(page); | ||
1738 | if (!PageUptodate(page)) { | ||
1739 | page_cache_release(page); | ||
1740 | page = ERR_PTR(-EIO); | ||
1741 | } | ||
1742 | out: | ||
1743 | return page; | ||
1744 | } | 1780 | } |
1745 | EXPORT_SYMBOL(read_cache_page); | 1781 | EXPORT_SYMBOL(read_cache_page); |
1746 | 1782 | ||
@@ -2196,6 +2232,9 @@ again: | |||
2196 | if (unlikely(status)) | 2232 | if (unlikely(status)) |
2197 | break; | 2233 | break; |
2198 | 2234 | ||
2235 | if (mapping_writably_mapped(mapping)) | ||
2236 | flush_dcache_page(page); | ||
2237 | |||
2199 | pagefault_disable(); | 2238 | pagefault_disable(); |
2200 | copied = iov_iter_copy_from_user_atomic(page, i, offset, bytes); | 2239 | copied = iov_iter_copy_from_user_atomic(page, i, offset, bytes); |
2201 | pagefault_enable(); | 2240 | pagefault_enable(); |
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 65f38c218207..3a5aeb37c110 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c | |||
@@ -402,7 +402,7 @@ static void clear_huge_page(struct page *page, | |||
402 | { | 402 | { |
403 | int i; | 403 | int i; |
404 | 404 | ||
405 | if (unlikely(sz > MAX_ORDER_NR_PAGES)) { | 405 | if (unlikely(sz/PAGE_SIZE > MAX_ORDER_NR_PAGES)) { |
406 | clear_gigantic_page(page, addr, sz); | 406 | clear_gigantic_page(page, addr, sz); |
407 | return; | 407 | return; |
408 | } | 408 | } |
@@ -1515,10 +1515,9 @@ static struct attribute_group hstate_attr_group = { | |||
1515 | .attrs = hstate_attrs, | 1515 | .attrs = hstate_attrs, |
1516 | }; | 1516 | }; |
1517 | 1517 | ||
1518 | static int __init hugetlb_sysfs_add_hstate(struct hstate *h, | 1518 | static int hugetlb_sysfs_add_hstate(struct hstate *h, struct kobject *parent, |
1519 | struct kobject *parent, | 1519 | struct kobject **hstate_kobjs, |
1520 | struct kobject **hstate_kobjs, | 1520 | struct attribute_group *hstate_attr_group) |
1521 | struct attribute_group *hstate_attr_group) | ||
1522 | { | 1521 | { |
1523 | int retval; | 1522 | int retval; |
1524 | int hi = h - hstates; | 1523 | int hi = h - hstates; |
@@ -2088,7 +2087,7 @@ static void set_huge_ptep_writable(struct vm_area_struct *vma, | |||
2088 | 2087 | ||
2089 | entry = pte_mkwrite(pte_mkdirty(huge_ptep_get(ptep))); | 2088 | entry = pte_mkwrite(pte_mkdirty(huge_ptep_get(ptep))); |
2090 | if (huge_ptep_set_access_flags(vma, address, ptep, entry, 1)) { | 2089 | if (huge_ptep_set_access_flags(vma, address, ptep, entry, 1)) { |
2091 | update_mmu_cache(vma, address, entry); | 2090 | update_mmu_cache(vma, address, ptep); |
2092 | } | 2091 | } |
2093 | } | 2092 | } |
2094 | 2093 | ||
@@ -2559,7 +2558,7 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2559 | entry = pte_mkyoung(entry); | 2558 | entry = pte_mkyoung(entry); |
2560 | if (huge_ptep_set_access_flags(vma, address, ptep, entry, | 2559 | if (huge_ptep_set_access_flags(vma, address, ptep, entry, |
2561 | flags & FAULT_FLAG_WRITE)) | 2560 | flags & FAULT_FLAG_WRITE)) |
2562 | update_mmu_cache(vma, address, entry); | 2561 | update_mmu_cache(vma, address, ptep); |
2563 | 2562 | ||
2564 | out_page_table_lock: | 2563 | out_page_table_lock: |
2565 | spin_unlock(&mm->page_table_lock); | 2564 | spin_unlock(&mm->page_table_lock); |
diff --git a/mm/maccess.c b/mm/maccess.c index 9073695ff25f..4e348dbaecd7 100644 --- a/mm/maccess.c +++ b/mm/maccess.c | |||
@@ -14,7 +14,11 @@ | |||
14 | * Safely read from address @src to the buffer at @dst. If a kernel fault | 14 | * Safely read from address @src to the buffer at @dst. If a kernel fault |
15 | * happens, handle that and return -EFAULT. | 15 | * happens, handle that and return -EFAULT. |
16 | */ | 16 | */ |
17 | long probe_kernel_read(void *dst, void *src, size_t size) | 17 | |
18 | long __weak probe_kernel_read(void *dst, void *src, size_t size) | ||
19 | __attribute__((alias("__probe_kernel_read"))); | ||
20 | |||
21 | long __probe_kernel_read(void *dst, void *src, size_t size) | ||
18 | { | 22 | { |
19 | long ret; | 23 | long ret; |
20 | mm_segment_t old_fs = get_fs(); | 24 | mm_segment_t old_fs = get_fs(); |
@@ -39,7 +43,10 @@ EXPORT_SYMBOL_GPL(probe_kernel_read); | |||
39 | * Safely write to address @dst from the buffer at @src. If a kernel fault | 43 | * Safely write to address @dst from the buffer at @src. If a kernel fault |
40 | * happens, handle that and return -EFAULT. | 44 | * happens, handle that and return -EFAULT. |
41 | */ | 45 | */ |
42 | long notrace __weak probe_kernel_write(void *dst, void *src, size_t size) | 46 | long __weak probe_kernel_write(void *dst, void *src, size_t size) |
47 | __attribute__((alias("__probe_kernel_write"))); | ||
48 | |||
49 | long __probe_kernel_write(void *dst, void *src, size_t size) | ||
43 | { | 50 | { |
44 | long ret; | 51 | long ret; |
45 | mm_segment_t old_fs = get_fs(); | 52 | mm_segment_t old_fs = get_fs(); |
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 488b644e0e8e..954032b80bed 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -2586,7 +2586,7 @@ static int mem_cgroup_force_empty(struct mem_cgroup *mem, bool free_all) | |||
2586 | if (free_all) | 2586 | if (free_all) |
2587 | goto try_to_free; | 2587 | goto try_to_free; |
2588 | move_account: | 2588 | move_account: |
2589 | while (mem->res.usage > 0) { | 2589 | do { |
2590 | ret = -EBUSY; | 2590 | ret = -EBUSY; |
2591 | if (cgroup_task_count(cgrp) || !list_empty(&cgrp->children)) | 2591 | if (cgroup_task_count(cgrp) || !list_empty(&cgrp->children)) |
2592 | goto out; | 2592 | goto out; |
@@ -2614,8 +2614,8 @@ move_account: | |||
2614 | if (ret == -ENOMEM) | 2614 | if (ret == -ENOMEM) |
2615 | goto try_to_free; | 2615 | goto try_to_free; |
2616 | cond_resched(); | 2616 | cond_resched(); |
2617 | } | 2617 | /* "ret" should also be checked to ensure all lists are empty. */ |
2618 | ret = 0; | 2618 | } while (mem->res.usage > 0 || ret); |
2619 | out: | 2619 | out: |
2620 | css_put(&mem->css); | 2620 | css_put(&mem->css); |
2621 | return ret; | 2621 | return ret; |
@@ -2648,10 +2648,7 @@ try_to_free: | |||
2648 | } | 2648 | } |
2649 | lru_add_drain(); | 2649 | lru_add_drain(); |
2650 | /* try move_account...there may be some *locked* pages. */ | 2650 | /* try move_account...there may be some *locked* pages. */ |
2651 | if (mem->res.usage) | 2651 | goto move_account; |
2652 | goto move_account; | ||
2653 | ret = 0; | ||
2654 | goto out; | ||
2655 | } | 2652 | } |
2656 | 2653 | ||
2657 | int mem_cgroup_force_empty_write(struct cgroup *cont, unsigned int event) | 2654 | int mem_cgroup_force_empty_write(struct cgroup *cont, unsigned int event) |
diff --git a/mm/memory.c b/mm/memory.c index 09e4b1be7b67..72fb5f39bccc 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -1593,7 +1593,7 @@ static int insert_pfn(struct vm_area_struct *vma, unsigned long addr, | |||
1593 | /* Ok, finally just insert the thing.. */ | 1593 | /* Ok, finally just insert the thing.. */ |
1594 | entry = pte_mkspecial(pfn_pte(pfn, prot)); | 1594 | entry = pte_mkspecial(pfn_pte(pfn, prot)); |
1595 | set_pte_at(mm, addr, pte, entry); | 1595 | set_pte_at(mm, addr, pte, entry); |
1596 | update_mmu_cache(vma, addr, entry); /* XXX: why not for insert_page? */ | 1596 | update_mmu_cache(vma, addr, pte); /* XXX: why not for insert_page? */ |
1597 | 1597 | ||
1598 | retval = 0; | 1598 | retval = 0; |
1599 | out_unlock: | 1599 | out_unlock: |
@@ -2116,7 +2116,7 @@ reuse: | |||
2116 | entry = pte_mkyoung(orig_pte); | 2116 | entry = pte_mkyoung(orig_pte); |
2117 | entry = maybe_mkwrite(pte_mkdirty(entry), vma); | 2117 | entry = maybe_mkwrite(pte_mkdirty(entry), vma); |
2118 | if (ptep_set_access_flags(vma, address, page_table, entry,1)) | 2118 | if (ptep_set_access_flags(vma, address, page_table, entry,1)) |
2119 | update_mmu_cache(vma, address, entry); | 2119 | update_mmu_cache(vma, address, page_table); |
2120 | ret |= VM_FAULT_WRITE; | 2120 | ret |= VM_FAULT_WRITE; |
2121 | goto unlock; | 2121 | goto unlock; |
2122 | } | 2122 | } |
@@ -2185,7 +2185,7 @@ gotten: | |||
2185 | * new page to be mapped directly into the secondary page table. | 2185 | * new page to be mapped directly into the secondary page table. |
2186 | */ | 2186 | */ |
2187 | set_pte_at_notify(mm, address, page_table, entry); | 2187 | set_pte_at_notify(mm, address, page_table, entry); |
2188 | update_mmu_cache(vma, address, entry); | 2188 | update_mmu_cache(vma, address, page_table); |
2189 | if (old_page) { | 2189 | if (old_page) { |
2190 | /* | 2190 | /* |
2191 | * Only after switching the pte to the new page may | 2191 | * Only after switching the pte to the new page may |
@@ -2629,7 +2629,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2629 | } | 2629 | } |
2630 | 2630 | ||
2631 | /* No need to invalidate - it was non-present before */ | 2631 | /* No need to invalidate - it was non-present before */ |
2632 | update_mmu_cache(vma, address, pte); | 2632 | update_mmu_cache(vma, address, page_table); |
2633 | unlock: | 2633 | unlock: |
2634 | pte_unmap_unlock(page_table, ptl); | 2634 | pte_unmap_unlock(page_table, ptl); |
2635 | out: | 2635 | out: |
@@ -2694,7 +2694,7 @@ setpte: | |||
2694 | set_pte_at(mm, address, page_table, entry); | 2694 | set_pte_at(mm, address, page_table, entry); |
2695 | 2695 | ||
2696 | /* No need to invalidate - it was non-present before */ | 2696 | /* No need to invalidate - it was non-present before */ |
2697 | update_mmu_cache(vma, address, entry); | 2697 | update_mmu_cache(vma, address, page_table); |
2698 | unlock: | 2698 | unlock: |
2699 | pte_unmap_unlock(page_table, ptl); | 2699 | pte_unmap_unlock(page_table, ptl); |
2700 | return 0; | 2700 | return 0; |
@@ -2855,7 +2855,7 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2855 | set_pte_at(mm, address, page_table, entry); | 2855 | set_pte_at(mm, address, page_table, entry); |
2856 | 2856 | ||
2857 | /* no need to invalidate: a not-present page won't be cached */ | 2857 | /* no need to invalidate: a not-present page won't be cached */ |
2858 | update_mmu_cache(vma, address, entry); | 2858 | update_mmu_cache(vma, address, page_table); |
2859 | } else { | 2859 | } else { |
2860 | if (charged) | 2860 | if (charged) |
2861 | mem_cgroup_uncharge_page(page); | 2861 | mem_cgroup_uncharge_page(page); |
@@ -2992,7 +2992,7 @@ static inline int handle_pte_fault(struct mm_struct *mm, | |||
2992 | } | 2992 | } |
2993 | entry = pte_mkyoung(entry); | 2993 | entry = pte_mkyoung(entry); |
2994 | if (ptep_set_access_flags(vma, address, pte, entry, flags & FAULT_FLAG_WRITE)) { | 2994 | if (ptep_set_access_flags(vma, address, pte, entry, flags & FAULT_FLAG_WRITE)) { |
2995 | update_mmu_cache(vma, address, entry); | 2995 | update_mmu_cache(vma, address, pte); |
2996 | } else { | 2996 | } else { |
2997 | /* | 2997 | /* |
2998 | * This is needed only for protection faults but the arch code | 2998 | * This is needed only for protection faults but the arch code |
diff --git a/mm/migrate.c b/mm/migrate.c index efddbf0926b2..edb6101ed774 100644 --- a/mm/migrate.c +++ b/mm/migrate.c | |||
@@ -134,7 +134,7 @@ static int remove_migration_pte(struct page *new, struct vm_area_struct *vma, | |||
134 | page_add_file_rmap(new); | 134 | page_add_file_rmap(new); |
135 | 135 | ||
136 | /* No need to invalidate - it was non-present before */ | 136 | /* No need to invalidate - it was non-present before */ |
137 | update_mmu_cache(vma, addr, pte); | 137 | update_mmu_cache(vma, addr, ptep); |
138 | unlock: | 138 | unlock: |
139 | pte_unmap_unlock(ptep, ptl); | 139 | pte_unmap_unlock(ptep, ptl); |
140 | out: | 140 | out: |
@@ -912,6 +912,9 @@ static int do_pages_move(struct mm_struct *mm, struct task_struct *task, | |||
912 | goto out_pm; | 912 | goto out_pm; |
913 | 913 | ||
914 | err = -ENODEV; | 914 | err = -ENODEV; |
915 | if (node < 0 || node >= MAX_NUMNODES) | ||
916 | goto out_pm; | ||
917 | |||
915 | if (!node_state(node, N_HIGH_MEMORY)) | 918 | if (!node_state(node, N_HIGH_MEMORY)) |
916 | goto out_pm; | 919 | goto out_pm; |
917 | 920 | ||
@@ -999,33 +1002,27 @@ static int do_pages_stat(struct mm_struct *mm, unsigned long nr_pages, | |||
999 | #define DO_PAGES_STAT_CHUNK_NR 16 | 1002 | #define DO_PAGES_STAT_CHUNK_NR 16 |
1000 | const void __user *chunk_pages[DO_PAGES_STAT_CHUNK_NR]; | 1003 | const void __user *chunk_pages[DO_PAGES_STAT_CHUNK_NR]; |
1001 | int chunk_status[DO_PAGES_STAT_CHUNK_NR]; | 1004 | int chunk_status[DO_PAGES_STAT_CHUNK_NR]; |
1002 | unsigned long i, chunk_nr = DO_PAGES_STAT_CHUNK_NR; | ||
1003 | int err; | ||
1004 | 1005 | ||
1005 | for (i = 0; i < nr_pages; i += chunk_nr) { | 1006 | while (nr_pages) { |
1006 | if (chunk_nr > nr_pages - i) | 1007 | unsigned long chunk_nr; |
1007 | chunk_nr = nr_pages - i; | ||
1008 | 1008 | ||
1009 | err = copy_from_user(chunk_pages, &pages[i], | 1009 | chunk_nr = nr_pages; |
1010 | chunk_nr * sizeof(*chunk_pages)); | 1010 | if (chunk_nr > DO_PAGES_STAT_CHUNK_NR) |
1011 | if (err) { | 1011 | chunk_nr = DO_PAGES_STAT_CHUNK_NR; |
1012 | err = -EFAULT; | 1012 | |
1013 | goto out; | 1013 | if (copy_from_user(chunk_pages, pages, chunk_nr * sizeof(*chunk_pages))) |
1014 | } | 1014 | break; |
1015 | 1015 | ||
1016 | do_pages_stat_array(mm, chunk_nr, chunk_pages, chunk_status); | 1016 | do_pages_stat_array(mm, chunk_nr, chunk_pages, chunk_status); |
1017 | 1017 | ||
1018 | err = copy_to_user(&status[i], chunk_status, | 1018 | if (copy_to_user(status, chunk_status, chunk_nr * sizeof(*status))) |
1019 | chunk_nr * sizeof(*chunk_status)); | 1019 | break; |
1020 | if (err) { | ||
1021 | err = -EFAULT; | ||
1022 | goto out; | ||
1023 | } | ||
1024 | } | ||
1025 | err = 0; | ||
1026 | 1020 | ||
1027 | out: | 1021 | pages += chunk_nr; |
1028 | return err; | 1022 | status += chunk_nr; |
1023 | nr_pages -= chunk_nr; | ||
1024 | } | ||
1025 | return nr_pages ? -EFAULT : 0; | ||
1029 | } | 1026 | } |
1030 | 1027 | ||
1031 | /* | 1028 | /* |
@@ -1043,6 +1043,46 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, | |||
1043 | } | 1043 | } |
1044 | EXPORT_SYMBOL(do_mmap_pgoff); | 1044 | EXPORT_SYMBOL(do_mmap_pgoff); |
1045 | 1045 | ||
1046 | SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len, | ||
1047 | unsigned long, prot, unsigned long, flags, | ||
1048 | unsigned long, fd, unsigned long, pgoff) | ||
1049 | { | ||
1050 | struct file *file = NULL; | ||
1051 | unsigned long retval = -EBADF; | ||
1052 | |||
1053 | if (!(flags & MAP_ANONYMOUS)) { | ||
1054 | if (unlikely(flags & MAP_HUGETLB)) | ||
1055 | return -EINVAL; | ||
1056 | file = fget(fd); | ||
1057 | if (!file) | ||
1058 | goto out; | ||
1059 | } else if (flags & MAP_HUGETLB) { | ||
1060 | struct user_struct *user = NULL; | ||
1061 | /* | ||
1062 | * VM_NORESERVE is used because the reservations will be | ||
1063 | * taken when vm_ops->mmap() is called | ||
1064 | * A dummy user value is used because we are not locking | ||
1065 | * memory so no accounting is necessary | ||
1066 | */ | ||
1067 | len = ALIGN(len, huge_page_size(&default_hstate)); | ||
1068 | file = hugetlb_file_setup(HUGETLB_ANON_FILE, len, VM_NORESERVE, | ||
1069 | &user, HUGETLB_ANONHUGE_INODE); | ||
1070 | if (IS_ERR(file)) | ||
1071 | return PTR_ERR(file); | ||
1072 | } | ||
1073 | |||
1074 | flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); | ||
1075 | |||
1076 | down_write(¤t->mm->mmap_sem); | ||
1077 | retval = do_mmap_pgoff(file, addr, len, prot, flags, pgoff); | ||
1078 | up_write(¤t->mm->mmap_sem); | ||
1079 | |||
1080 | if (file) | ||
1081 | fput(file); | ||
1082 | out: | ||
1083 | return retval; | ||
1084 | } | ||
1085 | |||
1046 | /* | 1086 | /* |
1047 | * Some shared mappigns will want the pages marked read-only | 1087 | * Some shared mappigns will want the pages marked read-only |
1048 | * to track write events. If so, we'll downgrade vm_page_prot | 1088 | * to track write events. If so, we'll downgrade vm_page_prot |
diff --git a/mm/mmu_context.c b/mm/mmu_context.c index ded9081f4021..0777654147c9 100644 --- a/mm/mmu_context.c +++ b/mm/mmu_context.c | |||
@@ -5,6 +5,7 @@ | |||
5 | 5 | ||
6 | #include <linux/mm.h> | 6 | #include <linux/mm.h> |
7 | #include <linux/mmu_context.h> | 7 | #include <linux/mmu_context.h> |
8 | #include <linux/module.h> | ||
8 | #include <linux/sched.h> | 9 | #include <linux/sched.h> |
9 | 10 | ||
10 | #include <asm/mmu_context.h> | 11 | #include <asm/mmu_context.h> |
@@ -37,6 +38,7 @@ void use_mm(struct mm_struct *mm) | |||
37 | if (active_mm != mm) | 38 | if (active_mm != mm) |
38 | mmdrop(active_mm); | 39 | mmdrop(active_mm); |
39 | } | 40 | } |
41 | EXPORT_SYMBOL_GPL(use_mm); | ||
40 | 42 | ||
41 | /* | 43 | /* |
42 | * unuse_mm | 44 | * unuse_mm |
@@ -56,3 +58,4 @@ void unuse_mm(struct mm_struct *mm) | |||
56 | enter_lazy_tlb(mm, tsk); | 58 | enter_lazy_tlb(mm, tsk); |
57 | task_unlock(tsk); | 59 | task_unlock(tsk); |
58 | } | 60 | } |
61 | EXPORT_SYMBOL_GPL(unuse_mm); | ||
diff --git a/mm/nommu.c b/mm/nommu.c index 8687973462bb..48a2ecfaf059 100644 --- a/mm/nommu.c +++ b/mm/nommu.c | |||
@@ -432,6 +432,7 @@ SYSCALL_DEFINE1(brk, unsigned long, brk) | |||
432 | /* | 432 | /* |
433 | * Ok, looks good - let it rip. | 433 | * Ok, looks good - let it rip. |
434 | */ | 434 | */ |
435 | flush_icache_range(mm->brk, brk); | ||
435 | return mm->brk = brk; | 436 | return mm->brk = brk; |
436 | } | 437 | } |
437 | 438 | ||
@@ -551,11 +552,11 @@ static void free_page_series(unsigned long from, unsigned long to) | |||
551 | static void __put_nommu_region(struct vm_region *region) | 552 | static void __put_nommu_region(struct vm_region *region) |
552 | __releases(nommu_region_sem) | 553 | __releases(nommu_region_sem) |
553 | { | 554 | { |
554 | kenter("%p{%d}", region, atomic_read(®ion->vm_usage)); | 555 | kenter("%p{%d}", region, region->vm_usage); |
555 | 556 | ||
556 | BUG_ON(!nommu_region_tree.rb_node); | 557 | BUG_ON(!nommu_region_tree.rb_node); |
557 | 558 | ||
558 | if (atomic_dec_and_test(®ion->vm_usage)) { | 559 | if (--region->vm_usage == 0) { |
559 | if (region->vm_top > region->vm_start) | 560 | if (region->vm_top > region->vm_start) |
560 | delete_nommu_region(region); | 561 | delete_nommu_region(region); |
561 | up_write(&nommu_region_sem); | 562 | up_write(&nommu_region_sem); |
@@ -1204,7 +1205,7 @@ unsigned long do_mmap_pgoff(struct file *file, | |||
1204 | if (!vma) | 1205 | if (!vma) |
1205 | goto error_getting_vma; | 1206 | goto error_getting_vma; |
1206 | 1207 | ||
1207 | atomic_set(®ion->vm_usage, 1); | 1208 | region->vm_usage = 1; |
1208 | region->vm_flags = vm_flags; | 1209 | region->vm_flags = vm_flags; |
1209 | region->vm_pgoff = pgoff; | 1210 | region->vm_pgoff = pgoff; |
1210 | 1211 | ||
@@ -1271,7 +1272,7 @@ unsigned long do_mmap_pgoff(struct file *file, | |||
1271 | } | 1272 | } |
1272 | 1273 | ||
1273 | /* we've found a region we can share */ | 1274 | /* we've found a region we can share */ |
1274 | atomic_inc(&pregion->vm_usage); | 1275 | pregion->vm_usage++; |
1275 | vma->vm_region = pregion; | 1276 | vma->vm_region = pregion; |
1276 | start = pregion->vm_start; | 1277 | start = pregion->vm_start; |
1277 | start += (pgoff - pregion->vm_pgoff) << PAGE_SHIFT; | 1278 | start += (pgoff - pregion->vm_pgoff) << PAGE_SHIFT; |
@@ -1288,7 +1289,7 @@ unsigned long do_mmap_pgoff(struct file *file, | |||
1288 | vma->vm_region = NULL; | 1289 | vma->vm_region = NULL; |
1289 | vma->vm_start = 0; | 1290 | vma->vm_start = 0; |
1290 | vma->vm_end = 0; | 1291 | vma->vm_end = 0; |
1291 | atomic_dec(&pregion->vm_usage); | 1292 | pregion->vm_usage--; |
1292 | pregion = NULL; | 1293 | pregion = NULL; |
1293 | goto error_just_free; | 1294 | goto error_just_free; |
1294 | } | 1295 | } |
@@ -1353,10 +1354,14 @@ unsigned long do_mmap_pgoff(struct file *file, | |||
1353 | share: | 1354 | share: |
1354 | add_vma_to_mm(current->mm, vma); | 1355 | add_vma_to_mm(current->mm, vma); |
1355 | 1356 | ||
1356 | up_write(&nommu_region_sem); | 1357 | /* we flush the region from the icache only when the first executable |
1358 | * mapping of it is made */ | ||
1359 | if (vma->vm_flags & VM_EXEC && !region->vm_icache_flushed) { | ||
1360 | flush_icache_range(region->vm_start, region->vm_end); | ||
1361 | region->vm_icache_flushed = true; | ||
1362 | } | ||
1357 | 1363 | ||
1358 | if (prot & PROT_EXEC) | 1364 | up_write(&nommu_region_sem); |
1359 | flush_icache_range(result, result + len); | ||
1360 | 1365 | ||
1361 | kleave(" = %lx", result); | 1366 | kleave(" = %lx", result); |
1362 | return result; | 1367 | return result; |
@@ -1398,6 +1403,31 @@ error_getting_region: | |||
1398 | } | 1403 | } |
1399 | EXPORT_SYMBOL(do_mmap_pgoff); | 1404 | EXPORT_SYMBOL(do_mmap_pgoff); |
1400 | 1405 | ||
1406 | SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len, | ||
1407 | unsigned long, prot, unsigned long, flags, | ||
1408 | unsigned long, fd, unsigned long, pgoff) | ||
1409 | { | ||
1410 | struct file *file = NULL; | ||
1411 | unsigned long retval = -EBADF; | ||
1412 | |||
1413 | if (!(flags & MAP_ANONYMOUS)) { | ||
1414 | file = fget(fd); | ||
1415 | if (!file) | ||
1416 | goto out; | ||
1417 | } | ||
1418 | |||
1419 | flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); | ||
1420 | |||
1421 | down_write(¤t->mm->mmap_sem); | ||
1422 | retval = do_mmap_pgoff(file, addr, len, prot, flags, pgoff); | ||
1423 | up_write(¤t->mm->mmap_sem); | ||
1424 | |||
1425 | if (file) | ||
1426 | fput(file); | ||
1427 | out: | ||
1428 | return retval; | ||
1429 | } | ||
1430 | |||
1401 | /* | 1431 | /* |
1402 | * split a vma into two pieces at address 'addr', a new vma is allocated either | 1432 | * split a vma into two pieces at address 'addr', a new vma is allocated either |
1403 | * for the first part or the tail. | 1433 | * for the first part or the tail. |
@@ -1411,10 +1441,9 @@ int split_vma(struct mm_struct *mm, struct vm_area_struct *vma, | |||
1411 | 1441 | ||
1412 | kenter(""); | 1442 | kenter(""); |
1413 | 1443 | ||
1414 | /* we're only permitted to split anonymous regions that have a single | 1444 | /* we're only permitted to split anonymous regions (these should have |
1415 | * owner */ | 1445 | * only a single usage on the region) */ |
1416 | if (vma->vm_file || | 1446 | if (vma->vm_file) |
1417 | atomic_read(&vma->vm_region->vm_usage) != 1) | ||
1418 | return -ENOMEM; | 1447 | return -ENOMEM; |
1419 | 1448 | ||
1420 | if (mm->map_count >= sysctl_max_map_count) | 1449 | if (mm->map_count >= sysctl_max_map_count) |
@@ -1488,7 +1517,7 @@ static int shrink_vma(struct mm_struct *mm, | |||
1488 | 1517 | ||
1489 | /* cut the backing region down to size */ | 1518 | /* cut the backing region down to size */ |
1490 | region = vma->vm_region; | 1519 | region = vma->vm_region; |
1491 | BUG_ON(atomic_read(®ion->vm_usage) != 1); | 1520 | BUG_ON(region->vm_usage != 1); |
1492 | 1521 | ||
1493 | down_write(&nommu_region_sem); | 1522 | down_write(&nommu_region_sem); |
1494 | delete_nommu_region(region); | 1523 | delete_nommu_region(region); |
@@ -1732,27 +1761,6 @@ void unmap_mapping_range(struct address_space *mapping, | |||
1732 | EXPORT_SYMBOL(unmap_mapping_range); | 1761 | EXPORT_SYMBOL(unmap_mapping_range); |
1733 | 1762 | ||
1734 | /* | 1763 | /* |
1735 | * ask for an unmapped area at which to create a mapping on a file | ||
1736 | */ | ||
1737 | unsigned long get_unmapped_area(struct file *file, unsigned long addr, | ||
1738 | unsigned long len, unsigned long pgoff, | ||
1739 | unsigned long flags) | ||
1740 | { | ||
1741 | unsigned long (*get_area)(struct file *, unsigned long, unsigned long, | ||
1742 | unsigned long, unsigned long); | ||
1743 | |||
1744 | get_area = current->mm->get_unmapped_area; | ||
1745 | if (file && file->f_op && file->f_op->get_unmapped_area) | ||
1746 | get_area = file->f_op->get_unmapped_area; | ||
1747 | |||
1748 | if (!get_area) | ||
1749 | return -ENOSYS; | ||
1750 | |||
1751 | return get_area(file, addr, len, pgoff, flags); | ||
1752 | } | ||
1753 | EXPORT_SYMBOL(get_unmapped_area); | ||
1754 | |||
1755 | /* | ||
1756 | * Check that a process has enough memory to allocate a new virtual | 1764 | * Check that a process has enough memory to allocate a new virtual |
1757 | * mapping. 0 means there is enough memory for the allocation to | 1765 | * mapping. 0 means there is enough memory for the allocation to |
1758 | * succeed and -ENOMEM implies there is not. | 1766 | * succeed and -ENOMEM implies there is not. |
@@ -1891,9 +1899,11 @@ int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, in | |||
1891 | 1899 | ||
1892 | /* only read or write mappings where it is permitted */ | 1900 | /* only read or write mappings where it is permitted */ |
1893 | if (write && vma->vm_flags & VM_MAYWRITE) | 1901 | if (write && vma->vm_flags & VM_MAYWRITE) |
1894 | len -= copy_to_user((void *) addr, buf, len); | 1902 | copy_to_user_page(vma, NULL, addr, |
1903 | (void *) addr, buf, len); | ||
1895 | else if (!write && vma->vm_flags & VM_MAYREAD) | 1904 | else if (!write && vma->vm_flags & VM_MAYREAD) |
1896 | len -= copy_from_user(buf, (void *) addr, len); | 1905 | copy_from_user_page(vma, NULL, addr, |
1906 | buf, (void *) addr, len); | ||
1897 | else | 1907 | else |
1898 | len = 0; | 1908 | len = 0; |
1899 | } else { | 1909 | } else { |
@@ -1904,3 +1914,65 @@ int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, in | |||
1904 | mmput(mm); | 1914 | mmput(mm); |
1905 | return len; | 1915 | return len; |
1906 | } | 1916 | } |
1917 | |||
1918 | /** | ||
1919 | * nommu_shrink_inode_mappings - Shrink the shared mappings on an inode | ||
1920 | * @inode: The inode to check | ||
1921 | * @size: The current filesize of the inode | ||
1922 | * @newsize: The proposed filesize of the inode | ||
1923 | * | ||
1924 | * Check the shared mappings on an inode on behalf of a shrinking truncate to | ||
1925 | * make sure that that any outstanding VMAs aren't broken and then shrink the | ||
1926 | * vm_regions that extend that beyond so that do_mmap_pgoff() doesn't | ||
1927 | * automatically grant mappings that are too large. | ||
1928 | */ | ||
1929 | int nommu_shrink_inode_mappings(struct inode *inode, size_t size, | ||
1930 | size_t newsize) | ||
1931 | { | ||
1932 | struct vm_area_struct *vma; | ||
1933 | struct prio_tree_iter iter; | ||
1934 | struct vm_region *region; | ||
1935 | pgoff_t low, high; | ||
1936 | size_t r_size, r_top; | ||
1937 | |||
1938 | low = newsize >> PAGE_SHIFT; | ||
1939 | high = (size + PAGE_SIZE - 1) >> PAGE_SHIFT; | ||
1940 | |||
1941 | down_write(&nommu_region_sem); | ||
1942 | |||
1943 | /* search for VMAs that fall within the dead zone */ | ||
1944 | vma_prio_tree_foreach(vma, &iter, &inode->i_mapping->i_mmap, | ||
1945 | low, high) { | ||
1946 | /* found one - only interested if it's shared out of the page | ||
1947 | * cache */ | ||
1948 | if (vma->vm_flags & VM_SHARED) { | ||
1949 | up_write(&nommu_region_sem); | ||
1950 | return -ETXTBSY; /* not quite true, but near enough */ | ||
1951 | } | ||
1952 | } | ||
1953 | |||
1954 | /* reduce any regions that overlap the dead zone - if in existence, | ||
1955 | * these will be pointed to by VMAs that don't overlap the dead zone | ||
1956 | * | ||
1957 | * we don't check for any regions that start beyond the EOF as there | ||
1958 | * shouldn't be any | ||
1959 | */ | ||
1960 | vma_prio_tree_foreach(vma, &iter, &inode->i_mapping->i_mmap, | ||
1961 | 0, ULONG_MAX) { | ||
1962 | if (!(vma->vm_flags & VM_SHARED)) | ||
1963 | continue; | ||
1964 | |||
1965 | region = vma->vm_region; | ||
1966 | r_size = region->vm_top - region->vm_start; | ||
1967 | r_top = (region->vm_pgoff << PAGE_SHIFT) + r_size; | ||
1968 | |||
1969 | if (r_top > newsize) { | ||
1970 | region->vm_top -= r_top - newsize; | ||
1971 | if (region->vm_end > region->vm_top) | ||
1972 | region->vm_end = region->vm_top; | ||
1973 | } | ||
1974 | } | ||
1975 | |||
1976 | up_write(&nommu_region_sem); | ||
1977 | return 0; | ||
1978 | } | ||
diff --git a/mm/oom_kill.c b/mm/oom_kill.c index f52481b1c1e5..237050478f28 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c | |||
@@ -459,6 +459,8 @@ static int oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order, | |||
459 | list_for_each_entry(c, &p->children, sibling) { | 459 | list_for_each_entry(c, &p->children, sibling) { |
460 | if (c->mm == p->mm) | 460 | if (c->mm == p->mm) |
461 | continue; | 461 | continue; |
462 | if (mem && !task_in_mem_cgroup(c, mem)) | ||
463 | continue; | ||
462 | if (!oom_kill_task(c)) | 464 | if (!oom_kill_task(c)) |
463 | return 0; | 465 | return 0; |
464 | } | 466 | } |
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 4e9f5cc5fb59..a6b17aa4740b 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -556,8 +556,9 @@ static void free_pcppages_bulk(struct zone *zone, int count, | |||
556 | page = list_entry(list->prev, struct page, lru); | 556 | page = list_entry(list->prev, struct page, lru); |
557 | /* must delete as __free_one_page list manipulates */ | 557 | /* must delete as __free_one_page list manipulates */ |
558 | list_del(&page->lru); | 558 | list_del(&page->lru); |
559 | __free_one_page(page, zone, 0, migratetype); | 559 | /* MIGRATE_MOVABLE list may include MIGRATE_RESERVEs */ |
560 | trace_mm_page_pcpu_drain(page, 0, migratetype); | 560 | __free_one_page(page, zone, 0, page_private(page)); |
561 | trace_mm_page_pcpu_drain(page, 0, page_private(page)); | ||
561 | } while (--count && --batch_free && !list_empty(list)); | 562 | } while (--count && --batch_free && !list_empty(list)); |
562 | } | 563 | } |
563 | spin_unlock(&zone->lock); | 564 | spin_unlock(&zone->lock); |
@@ -1008,10 +1009,10 @@ static void drain_pages(unsigned int cpu) | |||
1008 | struct per_cpu_pageset *pset; | 1009 | struct per_cpu_pageset *pset; |
1009 | struct per_cpu_pages *pcp; | 1010 | struct per_cpu_pages *pcp; |
1010 | 1011 | ||
1011 | pset = zone_pcp(zone, cpu); | 1012 | local_irq_save(flags); |
1013 | pset = per_cpu_ptr(zone->pageset, cpu); | ||
1012 | 1014 | ||
1013 | pcp = &pset->pcp; | 1015 | pcp = &pset->pcp; |
1014 | local_irq_save(flags); | ||
1015 | free_pcppages_bulk(zone, pcp->count, pcp); | 1016 | free_pcppages_bulk(zone, pcp->count, pcp); |
1016 | pcp->count = 0; | 1017 | pcp->count = 0; |
1017 | local_irq_restore(flags); | 1018 | local_irq_restore(flags); |
@@ -1095,7 +1096,6 @@ static void free_hot_cold_page(struct page *page, int cold) | |||
1095 | arch_free_page(page, 0); | 1096 | arch_free_page(page, 0); |
1096 | kernel_map_pages(page, 1, 0); | 1097 | kernel_map_pages(page, 1, 0); |
1097 | 1098 | ||
1098 | pcp = &zone_pcp(zone, get_cpu())->pcp; | ||
1099 | migratetype = get_pageblock_migratetype(page); | 1099 | migratetype = get_pageblock_migratetype(page); |
1100 | set_page_private(page, migratetype); | 1100 | set_page_private(page, migratetype); |
1101 | local_irq_save(flags); | 1101 | local_irq_save(flags); |
@@ -1118,6 +1118,7 @@ static void free_hot_cold_page(struct page *page, int cold) | |||
1118 | migratetype = MIGRATE_MOVABLE; | 1118 | migratetype = MIGRATE_MOVABLE; |
1119 | } | 1119 | } |
1120 | 1120 | ||
1121 | pcp = &this_cpu_ptr(zone->pageset)->pcp; | ||
1121 | if (cold) | 1122 | if (cold) |
1122 | list_add_tail(&page->lru, &pcp->lists[migratetype]); | 1123 | list_add_tail(&page->lru, &pcp->lists[migratetype]); |
1123 | else | 1124 | else |
@@ -1130,7 +1131,6 @@ static void free_hot_cold_page(struct page *page, int cold) | |||
1130 | 1131 | ||
1131 | out: | 1132 | out: |
1132 | local_irq_restore(flags); | 1133 | local_irq_restore(flags); |
1133 | put_cpu(); | ||
1134 | } | 1134 | } |
1135 | 1135 | ||
1136 | void free_hot_page(struct page *page) | 1136 | void free_hot_page(struct page *page) |
@@ -1180,17 +1180,15 @@ struct page *buffered_rmqueue(struct zone *preferred_zone, | |||
1180 | unsigned long flags; | 1180 | unsigned long flags; |
1181 | struct page *page; | 1181 | struct page *page; |
1182 | int cold = !!(gfp_flags & __GFP_COLD); | 1182 | int cold = !!(gfp_flags & __GFP_COLD); |
1183 | int cpu; | ||
1184 | 1183 | ||
1185 | again: | 1184 | again: |
1186 | cpu = get_cpu(); | ||
1187 | if (likely(order == 0)) { | 1185 | if (likely(order == 0)) { |
1188 | struct per_cpu_pages *pcp; | 1186 | struct per_cpu_pages *pcp; |
1189 | struct list_head *list; | 1187 | struct list_head *list; |
1190 | 1188 | ||
1191 | pcp = &zone_pcp(zone, cpu)->pcp; | ||
1192 | list = &pcp->lists[migratetype]; | ||
1193 | local_irq_save(flags); | 1189 | local_irq_save(flags); |
1190 | pcp = &this_cpu_ptr(zone->pageset)->pcp; | ||
1191 | list = &pcp->lists[migratetype]; | ||
1194 | if (list_empty(list)) { | 1192 | if (list_empty(list)) { |
1195 | pcp->count += rmqueue_bulk(zone, 0, | 1193 | pcp->count += rmqueue_bulk(zone, 0, |
1196 | pcp->batch, list, | 1194 | pcp->batch, list, |
@@ -1222,16 +1220,15 @@ again: | |||
1222 | } | 1220 | } |
1223 | spin_lock_irqsave(&zone->lock, flags); | 1221 | spin_lock_irqsave(&zone->lock, flags); |
1224 | page = __rmqueue(zone, order, migratetype); | 1222 | page = __rmqueue(zone, order, migratetype); |
1225 | __mod_zone_page_state(zone, NR_FREE_PAGES, -(1 << order)); | ||
1226 | spin_unlock(&zone->lock); | 1223 | spin_unlock(&zone->lock); |
1227 | if (!page) | 1224 | if (!page) |
1228 | goto failed; | 1225 | goto failed; |
1226 | __mod_zone_page_state(zone, NR_FREE_PAGES, -(1 << order)); | ||
1229 | } | 1227 | } |
1230 | 1228 | ||
1231 | __count_zone_vm_events(PGALLOC, zone, 1 << order); | 1229 | __count_zone_vm_events(PGALLOC, zone, 1 << order); |
1232 | zone_statistics(preferred_zone, zone); | 1230 | zone_statistics(preferred_zone, zone); |
1233 | local_irq_restore(flags); | 1231 | local_irq_restore(flags); |
1234 | put_cpu(); | ||
1235 | 1232 | ||
1236 | VM_BUG_ON(bad_range(zone, page)); | 1233 | VM_BUG_ON(bad_range(zone, page)); |
1237 | if (prep_new_page(page, order, gfp_flags)) | 1234 | if (prep_new_page(page, order, gfp_flags)) |
@@ -1240,7 +1237,6 @@ again: | |||
1240 | 1237 | ||
1241 | failed: | 1238 | failed: |
1242 | local_irq_restore(flags); | 1239 | local_irq_restore(flags); |
1243 | put_cpu(); | ||
1244 | return NULL; | 1240 | return NULL; |
1245 | } | 1241 | } |
1246 | 1242 | ||
@@ -2179,7 +2175,7 @@ void show_free_areas(void) | |||
2179 | for_each_online_cpu(cpu) { | 2175 | for_each_online_cpu(cpu) { |
2180 | struct per_cpu_pageset *pageset; | 2176 | struct per_cpu_pageset *pageset; |
2181 | 2177 | ||
2182 | pageset = zone_pcp(zone, cpu); | 2178 | pageset = per_cpu_ptr(zone->pageset, cpu); |
2183 | 2179 | ||
2184 | printk("CPU %4d: hi:%5d, btch:%4d usd:%4d\n", | 2180 | printk("CPU %4d: hi:%5d, btch:%4d usd:%4d\n", |
2185 | cpu, pageset->pcp.high, | 2181 | cpu, pageset->pcp.high, |
@@ -2744,10 +2740,29 @@ static void build_zonelist_cache(pg_data_t *pgdat) | |||
2744 | 2740 | ||
2745 | #endif /* CONFIG_NUMA */ | 2741 | #endif /* CONFIG_NUMA */ |
2746 | 2742 | ||
2743 | /* | ||
2744 | * Boot pageset table. One per cpu which is going to be used for all | ||
2745 | * zones and all nodes. The parameters will be set in such a way | ||
2746 | * that an item put on a list will immediately be handed over to | ||
2747 | * the buddy list. This is safe since pageset manipulation is done | ||
2748 | * with interrupts disabled. | ||
2749 | * | ||
2750 | * The boot_pagesets must be kept even after bootup is complete for | ||
2751 | * unused processors and/or zones. They do play a role for bootstrapping | ||
2752 | * hotplugged processors. | ||
2753 | * | ||
2754 | * zoneinfo_show() and maybe other functions do | ||
2755 | * not check if the processor is online before following the pageset pointer. | ||
2756 | * Other parts of the kernel may not check if the zone is available. | ||
2757 | */ | ||
2758 | static void setup_pageset(struct per_cpu_pageset *p, unsigned long batch); | ||
2759 | static DEFINE_PER_CPU(struct per_cpu_pageset, boot_pageset); | ||
2760 | |||
2747 | /* return values int ....just for stop_machine() */ | 2761 | /* return values int ....just for stop_machine() */ |
2748 | static int __build_all_zonelists(void *dummy) | 2762 | static int __build_all_zonelists(void *dummy) |
2749 | { | 2763 | { |
2750 | int nid; | 2764 | int nid; |
2765 | int cpu; | ||
2751 | 2766 | ||
2752 | #ifdef CONFIG_NUMA | 2767 | #ifdef CONFIG_NUMA |
2753 | memset(node_load, 0, sizeof(node_load)); | 2768 | memset(node_load, 0, sizeof(node_load)); |
@@ -2758,6 +2773,23 @@ static int __build_all_zonelists(void *dummy) | |||
2758 | build_zonelists(pgdat); | 2773 | build_zonelists(pgdat); |
2759 | build_zonelist_cache(pgdat); | 2774 | build_zonelist_cache(pgdat); |
2760 | } | 2775 | } |
2776 | |||
2777 | /* | ||
2778 | * Initialize the boot_pagesets that are going to be used | ||
2779 | * for bootstrapping processors. The real pagesets for | ||
2780 | * each zone will be allocated later when the per cpu | ||
2781 | * allocator is available. | ||
2782 | * | ||
2783 | * boot_pagesets are used also for bootstrapping offline | ||
2784 | * cpus if the system is already booted because the pagesets | ||
2785 | * are needed to initialize allocators on a specific cpu too. | ||
2786 | * F.e. the percpu allocator needs the page allocator which | ||
2787 | * needs the percpu allocator in order to allocate its pagesets | ||
2788 | * (a chicken-egg dilemma). | ||
2789 | */ | ||
2790 | for_each_possible_cpu(cpu) | ||
2791 | setup_pageset(&per_cpu(boot_pageset, cpu), 0); | ||
2792 | |||
2761 | return 0; | 2793 | return 0; |
2762 | } | 2794 | } |
2763 | 2795 | ||
@@ -3095,121 +3127,33 @@ static void setup_pagelist_highmark(struct per_cpu_pageset *p, | |||
3095 | pcp->batch = PAGE_SHIFT * 8; | 3127 | pcp->batch = PAGE_SHIFT * 8; |
3096 | } | 3128 | } |
3097 | 3129 | ||
3098 | |||
3099 | #ifdef CONFIG_NUMA | ||
3100 | /* | ||
3101 | * Boot pageset table. One per cpu which is going to be used for all | ||
3102 | * zones and all nodes. The parameters will be set in such a way | ||
3103 | * that an item put on a list will immediately be handed over to | ||
3104 | * the buddy list. This is safe since pageset manipulation is done | ||
3105 | * with interrupts disabled. | ||
3106 | * | ||
3107 | * Some NUMA counter updates may also be caught by the boot pagesets. | ||
3108 | * | ||
3109 | * The boot_pagesets must be kept even after bootup is complete for | ||
3110 | * unused processors and/or zones. They do play a role for bootstrapping | ||
3111 | * hotplugged processors. | ||
3112 | * | ||
3113 | * zoneinfo_show() and maybe other functions do | ||
3114 | * not check if the processor is online before following the pageset pointer. | ||
3115 | * Other parts of the kernel may not check if the zone is available. | ||
3116 | */ | ||
3117 | static struct per_cpu_pageset boot_pageset[NR_CPUS]; | ||
3118 | |||
3119 | /* | 3130 | /* |
3120 | * Dynamically allocate memory for the | 3131 | * Allocate per cpu pagesets and initialize them. |
3121 | * per cpu pageset array in struct zone. | 3132 | * Before this call only boot pagesets were available. |
3133 | * Boot pagesets will no longer be used by this processorr | ||
3134 | * after setup_per_cpu_pageset(). | ||
3122 | */ | 3135 | */ |
3123 | static int __cpuinit process_zones(int cpu) | 3136 | void __init setup_per_cpu_pageset(void) |
3124 | { | 3137 | { |
3125 | struct zone *zone, *dzone; | 3138 | struct zone *zone; |
3126 | int node = cpu_to_node(cpu); | 3139 | int cpu; |
3127 | |||
3128 | node_set_state(node, N_CPU); /* this node has a cpu */ | ||
3129 | 3140 | ||
3130 | for_each_populated_zone(zone) { | 3141 | for_each_populated_zone(zone) { |
3131 | zone_pcp(zone, cpu) = kmalloc_node(sizeof(struct per_cpu_pageset), | 3142 | zone->pageset = alloc_percpu(struct per_cpu_pageset); |
3132 | GFP_KERNEL, node); | ||
3133 | if (!zone_pcp(zone, cpu)) | ||
3134 | goto bad; | ||
3135 | 3143 | ||
3136 | setup_pageset(zone_pcp(zone, cpu), zone_batchsize(zone)); | 3144 | for_each_possible_cpu(cpu) { |
3145 | struct per_cpu_pageset *pcp = per_cpu_ptr(zone->pageset, cpu); | ||
3137 | 3146 | ||
3138 | if (percpu_pagelist_fraction) | 3147 | setup_pageset(pcp, zone_batchsize(zone)); |
3139 | setup_pagelist_highmark(zone_pcp(zone, cpu), | ||
3140 | (zone->present_pages / percpu_pagelist_fraction)); | ||
3141 | } | ||
3142 | 3148 | ||
3143 | return 0; | 3149 | if (percpu_pagelist_fraction) |
3144 | bad: | 3150 | setup_pagelist_highmark(pcp, |
3145 | for_each_zone(dzone) { | 3151 | (zone->present_pages / |
3146 | if (!populated_zone(dzone)) | 3152 | percpu_pagelist_fraction)); |
3147 | continue; | 3153 | } |
3148 | if (dzone == zone) | ||
3149 | break; | ||
3150 | kfree(zone_pcp(dzone, cpu)); | ||
3151 | zone_pcp(dzone, cpu) = &boot_pageset[cpu]; | ||
3152 | } | ||
3153 | return -ENOMEM; | ||
3154 | } | ||
3155 | |||
3156 | static inline void free_zone_pagesets(int cpu) | ||
3157 | { | ||
3158 | struct zone *zone; | ||
3159 | |||
3160 | for_each_zone(zone) { | ||
3161 | struct per_cpu_pageset *pset = zone_pcp(zone, cpu); | ||
3162 | |||
3163 | /* Free per_cpu_pageset if it is slab allocated */ | ||
3164 | if (pset != &boot_pageset[cpu]) | ||
3165 | kfree(pset); | ||
3166 | zone_pcp(zone, cpu) = &boot_pageset[cpu]; | ||
3167 | } | ||
3168 | } | ||
3169 | |||
3170 | static int __cpuinit pageset_cpuup_callback(struct notifier_block *nfb, | ||
3171 | unsigned long action, | ||
3172 | void *hcpu) | ||
3173 | { | ||
3174 | int cpu = (long)hcpu; | ||
3175 | int ret = NOTIFY_OK; | ||
3176 | |||
3177 | switch (action) { | ||
3178 | case CPU_UP_PREPARE: | ||
3179 | case CPU_UP_PREPARE_FROZEN: | ||
3180 | if (process_zones(cpu)) | ||
3181 | ret = NOTIFY_BAD; | ||
3182 | break; | ||
3183 | case CPU_UP_CANCELED: | ||
3184 | case CPU_UP_CANCELED_FROZEN: | ||
3185 | case CPU_DEAD: | ||
3186 | case CPU_DEAD_FROZEN: | ||
3187 | free_zone_pagesets(cpu); | ||
3188 | break; | ||
3189 | default: | ||
3190 | break; | ||
3191 | } | 3154 | } |
3192 | return ret; | ||
3193 | } | ||
3194 | |||
3195 | static struct notifier_block __cpuinitdata pageset_notifier = | ||
3196 | { &pageset_cpuup_callback, NULL, 0 }; | ||
3197 | |||
3198 | void __init setup_per_cpu_pageset(void) | ||
3199 | { | ||
3200 | int err; | ||
3201 | |||
3202 | /* Initialize per_cpu_pageset for cpu 0. | ||
3203 | * A cpuup callback will do this for every cpu | ||
3204 | * as it comes online | ||
3205 | */ | ||
3206 | err = process_zones(smp_processor_id()); | ||
3207 | BUG_ON(err); | ||
3208 | register_cpu_notifier(&pageset_notifier); | ||
3209 | } | 3155 | } |
3210 | 3156 | ||
3211 | #endif | ||
3212 | |||
3213 | static noinline __init_refok | 3157 | static noinline __init_refok |
3214 | int zone_wait_table_init(struct zone *zone, unsigned long zone_size_pages) | 3158 | int zone_wait_table_init(struct zone *zone, unsigned long zone_size_pages) |
3215 | { | 3159 | { |
@@ -3263,7 +3207,7 @@ static int __zone_pcp_update(void *data) | |||
3263 | struct per_cpu_pageset *pset; | 3207 | struct per_cpu_pageset *pset; |
3264 | struct per_cpu_pages *pcp; | 3208 | struct per_cpu_pages *pcp; |
3265 | 3209 | ||
3266 | pset = zone_pcp(zone, cpu); | 3210 | pset = per_cpu_ptr(zone->pageset, cpu); |
3267 | pcp = &pset->pcp; | 3211 | pcp = &pset->pcp; |
3268 | 3212 | ||
3269 | local_irq_save(flags); | 3213 | local_irq_save(flags); |
@@ -3281,21 +3225,17 @@ void zone_pcp_update(struct zone *zone) | |||
3281 | 3225 | ||
3282 | static __meminit void zone_pcp_init(struct zone *zone) | 3226 | static __meminit void zone_pcp_init(struct zone *zone) |
3283 | { | 3227 | { |
3284 | int cpu; | 3228 | /* |
3285 | unsigned long batch = zone_batchsize(zone); | 3229 | * per cpu subsystem is not up at this point. The following code |
3230 | * relies on the ability of the linker to provide the | ||
3231 | * offset of a (static) per cpu variable into the per cpu area. | ||
3232 | */ | ||
3233 | zone->pageset = &boot_pageset; | ||
3286 | 3234 | ||
3287 | for (cpu = 0; cpu < NR_CPUS; cpu++) { | ||
3288 | #ifdef CONFIG_NUMA | ||
3289 | /* Early boot. Slab allocator not functional yet */ | ||
3290 | zone_pcp(zone, cpu) = &boot_pageset[cpu]; | ||
3291 | setup_pageset(&boot_pageset[cpu],0); | ||
3292 | #else | ||
3293 | setup_pageset(zone_pcp(zone,cpu), batch); | ||
3294 | #endif | ||
3295 | } | ||
3296 | if (zone->present_pages) | 3235 | if (zone->present_pages) |
3297 | printk(KERN_DEBUG " %s zone: %lu pages, LIFO batch:%lu\n", | 3236 | printk(KERN_DEBUG " %s zone: %lu pages, LIFO batch:%u\n", |
3298 | zone->name, zone->present_pages, batch); | 3237 | zone->name, zone->present_pages, |
3238 | zone_batchsize(zone)); | ||
3299 | } | 3239 | } |
3300 | 3240 | ||
3301 | __meminit int init_currently_empty_zone(struct zone *zone, | 3241 | __meminit int init_currently_empty_zone(struct zone *zone, |
@@ -3434,6 +3374,61 @@ void __init free_bootmem_with_active_regions(int nid, | |||
3434 | } | 3374 | } |
3435 | } | 3375 | } |
3436 | 3376 | ||
3377 | int __init add_from_early_node_map(struct range *range, int az, | ||
3378 | int nr_range, int nid) | ||
3379 | { | ||
3380 | int i; | ||
3381 | u64 start, end; | ||
3382 | |||
3383 | /* need to go over early_node_map to find out good range for node */ | ||
3384 | for_each_active_range_index_in_nid(i, nid) { | ||
3385 | start = early_node_map[i].start_pfn; | ||
3386 | end = early_node_map[i].end_pfn; | ||
3387 | nr_range = add_range(range, az, nr_range, start, end); | ||
3388 | } | ||
3389 | return nr_range; | ||
3390 | } | ||
3391 | |||
3392 | #ifdef CONFIG_NO_BOOTMEM | ||
3393 | void * __init __alloc_memory_core_early(int nid, u64 size, u64 align, | ||
3394 | u64 goal, u64 limit) | ||
3395 | { | ||
3396 | int i; | ||
3397 | void *ptr; | ||
3398 | |||
3399 | /* need to go over early_node_map to find out good range for node */ | ||
3400 | for_each_active_range_index_in_nid(i, nid) { | ||
3401 | u64 addr; | ||
3402 | u64 ei_start, ei_last; | ||
3403 | |||
3404 | ei_last = early_node_map[i].end_pfn; | ||
3405 | ei_last <<= PAGE_SHIFT; | ||
3406 | ei_start = early_node_map[i].start_pfn; | ||
3407 | ei_start <<= PAGE_SHIFT; | ||
3408 | addr = find_early_area(ei_start, ei_last, | ||
3409 | goal, limit, size, align); | ||
3410 | |||
3411 | if (addr == -1ULL) | ||
3412 | continue; | ||
3413 | |||
3414 | #if 0 | ||
3415 | printk(KERN_DEBUG "alloc (nid=%d %llx - %llx) (%llx - %llx) %llx %llx => %llx\n", | ||
3416 | nid, | ||
3417 | ei_start, ei_last, goal, limit, size, | ||
3418 | align, addr); | ||
3419 | #endif | ||
3420 | |||
3421 | ptr = phys_to_virt(addr); | ||
3422 | memset(ptr, 0, size); | ||
3423 | reserve_early_without_check(addr, addr + size, "BOOTMEM"); | ||
3424 | return ptr; | ||
3425 | } | ||
3426 | |||
3427 | return NULL; | ||
3428 | } | ||
3429 | #endif | ||
3430 | |||
3431 | |||
3437 | void __init work_with_active_regions(int nid, work_fn_t work_fn, void *data) | 3432 | void __init work_with_active_regions(int nid, work_fn_t work_fn, void *data) |
3438 | { | 3433 | { |
3439 | int i; | 3434 | int i; |
@@ -3998,7 +3993,7 @@ void __init add_active_range(unsigned int nid, unsigned long start_pfn, | |||
3998 | } | 3993 | } |
3999 | 3994 | ||
4000 | /* Merge backward if suitable */ | 3995 | /* Merge backward if suitable */ |
4001 | if (start_pfn < early_node_map[i].end_pfn && | 3996 | if (start_pfn < early_node_map[i].start_pfn && |
4002 | end_pfn >= early_node_map[i].start_pfn) { | 3997 | end_pfn >= early_node_map[i].start_pfn) { |
4003 | early_node_map[i].start_pfn = start_pfn; | 3998 | early_node_map[i].start_pfn = start_pfn; |
4004 | return; | 3999 | return; |
@@ -4466,7 +4461,11 @@ void __init set_dma_reserve(unsigned long new_dma_reserve) | |||
4466 | } | 4461 | } |
4467 | 4462 | ||
4468 | #ifndef CONFIG_NEED_MULTIPLE_NODES | 4463 | #ifndef CONFIG_NEED_MULTIPLE_NODES |
4469 | struct pglist_data __refdata contig_page_data = { .bdata = &bootmem_node_data[0] }; | 4464 | struct pglist_data __refdata contig_page_data = { |
4465 | #ifndef CONFIG_NO_BOOTMEM | ||
4466 | .bdata = &bootmem_node_data[0] | ||
4467 | #endif | ||
4468 | }; | ||
4470 | EXPORT_SYMBOL(contig_page_data); | 4469 | EXPORT_SYMBOL(contig_page_data); |
4471 | #endif | 4470 | #endif |
4472 | 4471 | ||
@@ -4809,10 +4808,11 @@ int percpu_pagelist_fraction_sysctl_handler(ctl_table *table, int write, | |||
4809 | if (!write || (ret == -EINVAL)) | 4808 | if (!write || (ret == -EINVAL)) |
4810 | return ret; | 4809 | return ret; |
4811 | for_each_populated_zone(zone) { | 4810 | for_each_populated_zone(zone) { |
4812 | for_each_online_cpu(cpu) { | 4811 | for_each_possible_cpu(cpu) { |
4813 | unsigned long high; | 4812 | unsigned long high; |
4814 | high = zone->present_pages / percpu_pagelist_fraction; | 4813 | high = zone->present_pages / percpu_pagelist_fraction; |
4815 | setup_pagelist_highmark(zone_pcp(zone, cpu), high); | 4814 | setup_pagelist_highmark( |
4815 | per_cpu_ptr(zone->pageset, cpu), high); | ||
4816 | } | 4816 | } |
4817 | } | 4817 | } |
4818 | return 0; | 4818 | return 0; |
diff --git a/mm/percpu.c b/mm/percpu.c index 442010cc91c6..768419d44ad7 100644 --- a/mm/percpu.c +++ b/mm/percpu.c | |||
@@ -80,13 +80,15 @@ | |||
80 | /* default addr <-> pcpu_ptr mapping, override in asm/percpu.h if necessary */ | 80 | /* default addr <-> pcpu_ptr mapping, override in asm/percpu.h if necessary */ |
81 | #ifndef __addr_to_pcpu_ptr | 81 | #ifndef __addr_to_pcpu_ptr |
82 | #define __addr_to_pcpu_ptr(addr) \ | 82 | #define __addr_to_pcpu_ptr(addr) \ |
83 | (void *)((unsigned long)(addr) - (unsigned long)pcpu_base_addr \ | 83 | (void __percpu *)((unsigned long)(addr) - \ |
84 | + (unsigned long)__per_cpu_start) | 84 | (unsigned long)pcpu_base_addr + \ |
85 | (unsigned long)__per_cpu_start) | ||
85 | #endif | 86 | #endif |
86 | #ifndef __pcpu_ptr_to_addr | 87 | #ifndef __pcpu_ptr_to_addr |
87 | #define __pcpu_ptr_to_addr(ptr) \ | 88 | #define __pcpu_ptr_to_addr(ptr) \ |
88 | (void *)((unsigned long)(ptr) + (unsigned long)pcpu_base_addr \ | 89 | (void __force *)((unsigned long)(ptr) + \ |
89 | - (unsigned long)__per_cpu_start) | 90 | (unsigned long)pcpu_base_addr - \ |
91 | (unsigned long)__per_cpu_start) | ||
90 | #endif | 92 | #endif |
91 | 93 | ||
92 | struct pcpu_chunk { | 94 | struct pcpu_chunk { |
@@ -913,11 +915,10 @@ static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk, int off, int size) | |||
913 | int rs, re; | 915 | int rs, re; |
914 | 916 | ||
915 | /* quick path, check whether it's empty already */ | 917 | /* quick path, check whether it's empty already */ |
916 | pcpu_for_each_unpop_region(chunk, rs, re, page_start, page_end) { | 918 | rs = page_start; |
917 | if (rs == page_start && re == page_end) | 919 | pcpu_next_unpop(chunk, &rs, &re, page_end); |
918 | return; | 920 | if (rs == page_start && re == page_end) |
919 | break; | 921 | return; |
920 | } | ||
921 | 922 | ||
922 | /* immutable chunks can't be depopulated */ | 923 | /* immutable chunks can't be depopulated */ |
923 | WARN_ON(chunk->immutable); | 924 | WARN_ON(chunk->immutable); |
@@ -968,11 +969,10 @@ static int pcpu_populate_chunk(struct pcpu_chunk *chunk, int off, int size) | |||
968 | int rs, re, rc; | 969 | int rs, re, rc; |
969 | 970 | ||
970 | /* quick path, check whether all pages are already there */ | 971 | /* quick path, check whether all pages are already there */ |
971 | pcpu_for_each_pop_region(chunk, rs, re, page_start, page_end) { | 972 | rs = page_start; |
972 | if (rs == page_start && re == page_end) | 973 | pcpu_next_pop(chunk, &rs, &re, page_end); |
973 | goto clear; | 974 | if (rs == page_start && re == page_end) |
974 | break; | 975 | goto clear; |
975 | } | ||
976 | 976 | ||
977 | /* need to allocate and map pages, this chunk can't be immutable */ | 977 | /* need to allocate and map pages, this chunk can't be immutable */ |
978 | WARN_ON(chunk->immutable); | 978 | WARN_ON(chunk->immutable); |
@@ -1067,7 +1067,7 @@ static struct pcpu_chunk *alloc_pcpu_chunk(void) | |||
1067 | * RETURNS: | 1067 | * RETURNS: |
1068 | * Percpu pointer to the allocated area on success, NULL on failure. | 1068 | * Percpu pointer to the allocated area on success, NULL on failure. |
1069 | */ | 1069 | */ |
1070 | static void *pcpu_alloc(size_t size, size_t align, bool reserved) | 1070 | static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved) |
1071 | { | 1071 | { |
1072 | static int warn_limit = 10; | 1072 | static int warn_limit = 10; |
1073 | struct pcpu_chunk *chunk; | 1073 | struct pcpu_chunk *chunk; |
@@ -1196,7 +1196,7 @@ fail_unlock_mutex: | |||
1196 | * RETURNS: | 1196 | * RETURNS: |
1197 | * Percpu pointer to the allocated area on success, NULL on failure. | 1197 | * Percpu pointer to the allocated area on success, NULL on failure. |
1198 | */ | 1198 | */ |
1199 | void *__alloc_percpu(size_t size, size_t align) | 1199 | void __percpu *__alloc_percpu(size_t size, size_t align) |
1200 | { | 1200 | { |
1201 | return pcpu_alloc(size, align, false); | 1201 | return pcpu_alloc(size, align, false); |
1202 | } | 1202 | } |
@@ -1217,7 +1217,7 @@ EXPORT_SYMBOL_GPL(__alloc_percpu); | |||
1217 | * RETURNS: | 1217 | * RETURNS: |
1218 | * Percpu pointer to the allocated area on success, NULL on failure. | 1218 | * Percpu pointer to the allocated area on success, NULL on failure. |
1219 | */ | 1219 | */ |
1220 | void *__alloc_reserved_percpu(size_t size, size_t align) | 1220 | void __percpu *__alloc_reserved_percpu(size_t size, size_t align) |
1221 | { | 1221 | { |
1222 | return pcpu_alloc(size, align, true); | 1222 | return pcpu_alloc(size, align, true); |
1223 | } | 1223 | } |
@@ -1269,9 +1269,9 @@ static void pcpu_reclaim(struct work_struct *work) | |||
1269 | * CONTEXT: | 1269 | * CONTEXT: |
1270 | * Can be called from atomic context. | 1270 | * Can be called from atomic context. |
1271 | */ | 1271 | */ |
1272 | void free_percpu(void *ptr) | 1272 | void free_percpu(void __percpu *ptr) |
1273 | { | 1273 | { |
1274 | void *addr = __pcpu_ptr_to_addr(ptr); | 1274 | void *addr; |
1275 | struct pcpu_chunk *chunk; | 1275 | struct pcpu_chunk *chunk; |
1276 | unsigned long flags; | 1276 | unsigned long flags; |
1277 | int off; | 1277 | int off; |
@@ -1279,6 +1279,8 @@ void free_percpu(void *ptr) | |||
1279 | if (!ptr) | 1279 | if (!ptr) |
1280 | return; | 1280 | return; |
1281 | 1281 | ||
1282 | addr = __pcpu_ptr_to_addr(ptr); | ||
1283 | |||
1282 | spin_lock_irqsave(&pcpu_lock, flags); | 1284 | spin_lock_irqsave(&pcpu_lock, flags); |
1283 | 1285 | ||
1284 | chunk = pcpu_chunk_addr_search(addr); | 1286 | chunk = pcpu_chunk_addr_search(addr); |
@@ -654,7 +654,7 @@ static void init_node_lock_keys(int q) | |||
654 | 654 | ||
655 | l3 = s->cs_cachep->nodelists[q]; | 655 | l3 = s->cs_cachep->nodelists[q]; |
656 | if (!l3 || OFF_SLAB(s->cs_cachep)) | 656 | if (!l3 || OFF_SLAB(s->cs_cachep)) |
657 | return; | 657 | continue; |
658 | lockdep_set_class(&l3->list_lock, &on_slab_l3_key); | 658 | lockdep_set_class(&l3->list_lock, &on_slab_l3_key); |
659 | alc = l3->alien; | 659 | alc = l3->alien; |
660 | /* | 660 | /* |
@@ -665,7 +665,7 @@ static void init_node_lock_keys(int q) | |||
665 | * for alloc_alien_cache, | 665 | * for alloc_alien_cache, |
666 | */ | 666 | */ |
667 | if (!alc || (unsigned long)alc == BAD_ALIEN_MAGIC) | 667 | if (!alc || (unsigned long)alc == BAD_ALIEN_MAGIC) |
668 | return; | 668 | continue; |
669 | for_each_node(r) { | 669 | for_each_node(r) { |
670 | if (alc[r]) | 670 | if (alc[r]) |
671 | lockdep_set_class(&alc[r]->lock, | 671 | lockdep_set_class(&alc[r]->lock, |
@@ -935,7 +935,6 @@ static int transfer_objects(struct array_cache *to, | |||
935 | 935 | ||
936 | from->avail -= nr; | 936 | from->avail -= nr; |
937 | to->avail += nr; | 937 | to->avail += nr; |
938 | to->touched = 1; | ||
939 | return nr; | 938 | return nr; |
940 | } | 939 | } |
941 | 940 | ||
@@ -2961,8 +2960,10 @@ retry: | |||
2961 | spin_lock(&l3->list_lock); | 2960 | spin_lock(&l3->list_lock); |
2962 | 2961 | ||
2963 | /* See if we can refill from the shared array */ | 2962 | /* See if we can refill from the shared array */ |
2964 | if (l3->shared && transfer_objects(ac, l3->shared, batchcount)) | 2963 | if (l3->shared && transfer_objects(ac, l3->shared, batchcount)) { |
2964 | l3->shared->touched = 1; | ||
2965 | goto alloc_done; | 2965 | goto alloc_done; |
2966 | } | ||
2966 | 2967 | ||
2967 | while (batchcount > 0) { | 2968 | while (batchcount > 0) { |
2968 | struct list_head *entry; | 2969 | struct list_head *entry; |
@@ -3099,7 +3100,7 @@ static bool slab_should_failslab(struct kmem_cache *cachep, gfp_t flags) | |||
3099 | if (cachep == &cache_cache) | 3100 | if (cachep == &cache_cache) |
3100 | return false; | 3101 | return false; |
3101 | 3102 | ||
3102 | return should_failslab(obj_size(cachep), flags); | 3103 | return should_failslab(obj_size(cachep), flags, cachep->flags); |
3103 | } | 3104 | } |
3104 | 3105 | ||
3105 | static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags) | 3106 | static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags) |
@@ -151,7 +151,8 @@ | |||
151 | * Set of flags that will prevent slab merging | 151 | * Set of flags that will prevent slab merging |
152 | */ | 152 | */ |
153 | #define SLUB_NEVER_MERGE (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \ | 153 | #define SLUB_NEVER_MERGE (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \ |
154 | SLAB_TRACE | SLAB_DESTROY_BY_RCU | SLAB_NOLEAKTRACE) | 154 | SLAB_TRACE | SLAB_DESTROY_BY_RCU | SLAB_NOLEAKTRACE | \ |
155 | SLAB_FAILSLAB) | ||
155 | 156 | ||
156 | #define SLUB_MERGE_SAME (SLAB_DEBUG_FREE | SLAB_RECLAIM_ACCOUNT | \ | 157 | #define SLUB_MERGE_SAME (SLAB_DEBUG_FREE | SLAB_RECLAIM_ACCOUNT | \ |
157 | SLAB_CACHE_DMA | SLAB_NOTRACK) | 158 | SLAB_CACHE_DMA | SLAB_NOTRACK) |
@@ -217,10 +218,10 @@ static inline void sysfs_slab_remove(struct kmem_cache *s) | |||
217 | 218 | ||
218 | #endif | 219 | #endif |
219 | 220 | ||
220 | static inline void stat(struct kmem_cache_cpu *c, enum stat_item si) | 221 | static inline void stat(struct kmem_cache *s, enum stat_item si) |
221 | { | 222 | { |
222 | #ifdef CONFIG_SLUB_STATS | 223 | #ifdef CONFIG_SLUB_STATS |
223 | c->stat[si]++; | 224 | __this_cpu_inc(s->cpu_slab->stat[si]); |
224 | #endif | 225 | #endif |
225 | } | 226 | } |
226 | 227 | ||
@@ -242,15 +243,6 @@ static inline struct kmem_cache_node *get_node(struct kmem_cache *s, int node) | |||
242 | #endif | 243 | #endif |
243 | } | 244 | } |
244 | 245 | ||
245 | static inline struct kmem_cache_cpu *get_cpu_slab(struct kmem_cache *s, int cpu) | ||
246 | { | ||
247 | #ifdef CONFIG_SMP | ||
248 | return s->cpu_slab[cpu]; | ||
249 | #else | ||
250 | return &s->cpu_slab; | ||
251 | #endif | ||
252 | } | ||
253 | |||
254 | /* Verify that a pointer has an address that is valid within a slab page */ | 246 | /* Verify that a pointer has an address that is valid within a slab page */ |
255 | static inline int check_valid_pointer(struct kmem_cache *s, | 247 | static inline int check_valid_pointer(struct kmem_cache *s, |
256 | struct page *page, const void *object) | 248 | struct page *page, const void *object) |
@@ -269,13 +261,6 @@ static inline int check_valid_pointer(struct kmem_cache *s, | |||
269 | return 1; | 261 | return 1; |
270 | } | 262 | } |
271 | 263 | ||
272 | /* | ||
273 | * Slow version of get and set free pointer. | ||
274 | * | ||
275 | * This version requires touching the cache lines of kmem_cache which | ||
276 | * we avoid to do in the fast alloc free paths. There we obtain the offset | ||
277 | * from the page struct. | ||
278 | */ | ||
279 | static inline void *get_freepointer(struct kmem_cache *s, void *object) | 264 | static inline void *get_freepointer(struct kmem_cache *s, void *object) |
280 | { | 265 | { |
281 | return *(void **)(object + s->offset); | 266 | return *(void **)(object + s->offset); |
@@ -1020,6 +1005,9 @@ static int __init setup_slub_debug(char *str) | |||
1020 | case 't': | 1005 | case 't': |
1021 | slub_debug |= SLAB_TRACE; | 1006 | slub_debug |= SLAB_TRACE; |
1022 | break; | 1007 | break; |
1008 | case 'a': | ||
1009 | slub_debug |= SLAB_FAILSLAB; | ||
1010 | break; | ||
1023 | default: | 1011 | default: |
1024 | printk(KERN_ERR "slub_debug option '%c' " | 1012 | printk(KERN_ERR "slub_debug option '%c' " |
1025 | "unknown. skipped\n", *str); | 1013 | "unknown. skipped\n", *str); |
@@ -1124,7 +1112,7 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) | |||
1124 | if (!page) | 1112 | if (!page) |
1125 | return NULL; | 1113 | return NULL; |
1126 | 1114 | ||
1127 | stat(get_cpu_slab(s, raw_smp_processor_id()), ORDER_FALLBACK); | 1115 | stat(s, ORDER_FALLBACK); |
1128 | } | 1116 | } |
1129 | 1117 | ||
1130 | if (kmemcheck_enabled | 1118 | if (kmemcheck_enabled |
@@ -1422,23 +1410,22 @@ static struct page *get_partial(struct kmem_cache *s, gfp_t flags, int node) | |||
1422 | static void unfreeze_slab(struct kmem_cache *s, struct page *page, int tail) | 1410 | static void unfreeze_slab(struct kmem_cache *s, struct page *page, int tail) |
1423 | { | 1411 | { |
1424 | struct kmem_cache_node *n = get_node(s, page_to_nid(page)); | 1412 | struct kmem_cache_node *n = get_node(s, page_to_nid(page)); |
1425 | struct kmem_cache_cpu *c = get_cpu_slab(s, smp_processor_id()); | ||
1426 | 1413 | ||
1427 | __ClearPageSlubFrozen(page); | 1414 | __ClearPageSlubFrozen(page); |
1428 | if (page->inuse) { | 1415 | if (page->inuse) { |
1429 | 1416 | ||
1430 | if (page->freelist) { | 1417 | if (page->freelist) { |
1431 | add_partial(n, page, tail); | 1418 | add_partial(n, page, tail); |
1432 | stat(c, tail ? DEACTIVATE_TO_TAIL : DEACTIVATE_TO_HEAD); | 1419 | stat(s, tail ? DEACTIVATE_TO_TAIL : DEACTIVATE_TO_HEAD); |
1433 | } else { | 1420 | } else { |
1434 | stat(c, DEACTIVATE_FULL); | 1421 | stat(s, DEACTIVATE_FULL); |
1435 | if (SLABDEBUG && PageSlubDebug(page) && | 1422 | if (SLABDEBUG && PageSlubDebug(page) && |
1436 | (s->flags & SLAB_STORE_USER)) | 1423 | (s->flags & SLAB_STORE_USER)) |
1437 | add_full(n, page); | 1424 | add_full(n, page); |
1438 | } | 1425 | } |
1439 | slab_unlock(page); | 1426 | slab_unlock(page); |
1440 | } else { | 1427 | } else { |
1441 | stat(c, DEACTIVATE_EMPTY); | 1428 | stat(s, DEACTIVATE_EMPTY); |
1442 | if (n->nr_partial < s->min_partial) { | 1429 | if (n->nr_partial < s->min_partial) { |
1443 | /* | 1430 | /* |
1444 | * Adding an empty slab to the partial slabs in order | 1431 | * Adding an empty slab to the partial slabs in order |
@@ -1454,7 +1441,7 @@ static void unfreeze_slab(struct kmem_cache *s, struct page *page, int tail) | |||
1454 | slab_unlock(page); | 1441 | slab_unlock(page); |
1455 | } else { | 1442 | } else { |
1456 | slab_unlock(page); | 1443 | slab_unlock(page); |
1457 | stat(get_cpu_slab(s, raw_smp_processor_id()), FREE_SLAB); | 1444 | stat(s, FREE_SLAB); |
1458 | discard_slab(s, page); | 1445 | discard_slab(s, page); |
1459 | } | 1446 | } |
1460 | } | 1447 | } |
@@ -1469,7 +1456,7 @@ static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) | |||
1469 | int tail = 1; | 1456 | int tail = 1; |
1470 | 1457 | ||
1471 | if (page->freelist) | 1458 | if (page->freelist) |
1472 | stat(c, DEACTIVATE_REMOTE_FREES); | 1459 | stat(s, DEACTIVATE_REMOTE_FREES); |
1473 | /* | 1460 | /* |
1474 | * Merge cpu freelist into slab freelist. Typically we get here | 1461 | * Merge cpu freelist into slab freelist. Typically we get here |
1475 | * because both freelists are empty. So this is unlikely | 1462 | * because both freelists are empty. So this is unlikely |
@@ -1482,10 +1469,10 @@ static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) | |||
1482 | 1469 | ||
1483 | /* Retrieve object from cpu_freelist */ | 1470 | /* Retrieve object from cpu_freelist */ |
1484 | object = c->freelist; | 1471 | object = c->freelist; |
1485 | c->freelist = c->freelist[c->offset]; | 1472 | c->freelist = get_freepointer(s, c->freelist); |
1486 | 1473 | ||
1487 | /* And put onto the regular freelist */ | 1474 | /* And put onto the regular freelist */ |
1488 | object[c->offset] = page->freelist; | 1475 | set_freepointer(s, object, page->freelist); |
1489 | page->freelist = object; | 1476 | page->freelist = object; |
1490 | page->inuse--; | 1477 | page->inuse--; |
1491 | } | 1478 | } |
@@ -1495,7 +1482,7 @@ static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) | |||
1495 | 1482 | ||
1496 | static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) | 1483 | static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) |
1497 | { | 1484 | { |
1498 | stat(c, CPUSLAB_FLUSH); | 1485 | stat(s, CPUSLAB_FLUSH); |
1499 | slab_lock(c->page); | 1486 | slab_lock(c->page); |
1500 | deactivate_slab(s, c); | 1487 | deactivate_slab(s, c); |
1501 | } | 1488 | } |
@@ -1507,7 +1494,7 @@ static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) | |||
1507 | */ | 1494 | */ |
1508 | static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu) | 1495 | static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu) |
1509 | { | 1496 | { |
1510 | struct kmem_cache_cpu *c = get_cpu_slab(s, cpu); | 1497 | struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu); |
1511 | 1498 | ||
1512 | if (likely(c && c->page)) | 1499 | if (likely(c && c->page)) |
1513 | flush_slab(s, c); | 1500 | flush_slab(s, c); |
@@ -1635,7 +1622,7 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, | |||
1635 | if (unlikely(!node_match(c, node))) | 1622 | if (unlikely(!node_match(c, node))) |
1636 | goto another_slab; | 1623 | goto another_slab; |
1637 | 1624 | ||
1638 | stat(c, ALLOC_REFILL); | 1625 | stat(s, ALLOC_REFILL); |
1639 | 1626 | ||
1640 | load_freelist: | 1627 | load_freelist: |
1641 | object = c->page->freelist; | 1628 | object = c->page->freelist; |
@@ -1644,13 +1631,13 @@ load_freelist: | |||
1644 | if (unlikely(SLABDEBUG && PageSlubDebug(c->page))) | 1631 | if (unlikely(SLABDEBUG && PageSlubDebug(c->page))) |
1645 | goto debug; | 1632 | goto debug; |
1646 | 1633 | ||
1647 | c->freelist = object[c->offset]; | 1634 | c->freelist = get_freepointer(s, object); |
1648 | c->page->inuse = c->page->objects; | 1635 | c->page->inuse = c->page->objects; |
1649 | c->page->freelist = NULL; | 1636 | c->page->freelist = NULL; |
1650 | c->node = page_to_nid(c->page); | 1637 | c->node = page_to_nid(c->page); |
1651 | unlock_out: | 1638 | unlock_out: |
1652 | slab_unlock(c->page); | 1639 | slab_unlock(c->page); |
1653 | stat(c, ALLOC_SLOWPATH); | 1640 | stat(s, ALLOC_SLOWPATH); |
1654 | return object; | 1641 | return object; |
1655 | 1642 | ||
1656 | another_slab: | 1643 | another_slab: |
@@ -1660,7 +1647,7 @@ new_slab: | |||
1660 | new = get_partial(s, gfpflags, node); | 1647 | new = get_partial(s, gfpflags, node); |
1661 | if (new) { | 1648 | if (new) { |
1662 | c->page = new; | 1649 | c->page = new; |
1663 | stat(c, ALLOC_FROM_PARTIAL); | 1650 | stat(s, ALLOC_FROM_PARTIAL); |
1664 | goto load_freelist; | 1651 | goto load_freelist; |
1665 | } | 1652 | } |
1666 | 1653 | ||
@@ -1673,8 +1660,8 @@ new_slab: | |||
1673 | local_irq_disable(); | 1660 | local_irq_disable(); |
1674 | 1661 | ||
1675 | if (new) { | 1662 | if (new) { |
1676 | c = get_cpu_slab(s, smp_processor_id()); | 1663 | c = __this_cpu_ptr(s->cpu_slab); |
1677 | stat(c, ALLOC_SLAB); | 1664 | stat(s, ALLOC_SLAB); |
1678 | if (c->page) | 1665 | if (c->page) |
1679 | flush_slab(s, c); | 1666 | flush_slab(s, c); |
1680 | slab_lock(new); | 1667 | slab_lock(new); |
@@ -1690,7 +1677,7 @@ debug: | |||
1690 | goto another_slab; | 1677 | goto another_slab; |
1691 | 1678 | ||
1692 | c->page->inuse++; | 1679 | c->page->inuse++; |
1693 | c->page->freelist = object[c->offset]; | 1680 | c->page->freelist = get_freepointer(s, object); |
1694 | c->node = -1; | 1681 | c->node = -1; |
1695 | goto unlock_out; | 1682 | goto unlock_out; |
1696 | } | 1683 | } |
@@ -1711,35 +1698,33 @@ static __always_inline void *slab_alloc(struct kmem_cache *s, | |||
1711 | void **object; | 1698 | void **object; |
1712 | struct kmem_cache_cpu *c; | 1699 | struct kmem_cache_cpu *c; |
1713 | unsigned long flags; | 1700 | unsigned long flags; |
1714 | unsigned int objsize; | ||
1715 | 1701 | ||
1716 | gfpflags &= gfp_allowed_mask; | 1702 | gfpflags &= gfp_allowed_mask; |
1717 | 1703 | ||
1718 | lockdep_trace_alloc(gfpflags); | 1704 | lockdep_trace_alloc(gfpflags); |
1719 | might_sleep_if(gfpflags & __GFP_WAIT); | 1705 | might_sleep_if(gfpflags & __GFP_WAIT); |
1720 | 1706 | ||
1721 | if (should_failslab(s->objsize, gfpflags)) | 1707 | if (should_failslab(s->objsize, gfpflags, s->flags)) |
1722 | return NULL; | 1708 | return NULL; |
1723 | 1709 | ||
1724 | local_irq_save(flags); | 1710 | local_irq_save(flags); |
1725 | c = get_cpu_slab(s, smp_processor_id()); | 1711 | c = __this_cpu_ptr(s->cpu_slab); |
1726 | objsize = c->objsize; | 1712 | object = c->freelist; |
1727 | if (unlikely(!c->freelist || !node_match(c, node))) | 1713 | if (unlikely(!object || !node_match(c, node))) |
1728 | 1714 | ||
1729 | object = __slab_alloc(s, gfpflags, node, addr, c); | 1715 | object = __slab_alloc(s, gfpflags, node, addr, c); |
1730 | 1716 | ||
1731 | else { | 1717 | else { |
1732 | object = c->freelist; | 1718 | c->freelist = get_freepointer(s, object); |
1733 | c->freelist = object[c->offset]; | 1719 | stat(s, ALLOC_FASTPATH); |
1734 | stat(c, ALLOC_FASTPATH); | ||
1735 | } | 1720 | } |
1736 | local_irq_restore(flags); | 1721 | local_irq_restore(flags); |
1737 | 1722 | ||
1738 | if (unlikely(gfpflags & __GFP_ZERO) && object) | 1723 | if (unlikely(gfpflags & __GFP_ZERO) && object) |
1739 | memset(object, 0, objsize); | 1724 | memset(object, 0, s->objsize); |
1740 | 1725 | ||
1741 | kmemcheck_slab_alloc(s, gfpflags, object, c->objsize); | 1726 | kmemcheck_slab_alloc(s, gfpflags, object, s->objsize); |
1742 | kmemleak_alloc_recursive(object, objsize, 1, s->flags, gfpflags); | 1727 | kmemleak_alloc_recursive(object, s->objsize, 1, s->flags, gfpflags); |
1743 | 1728 | ||
1744 | return object; | 1729 | return object; |
1745 | } | 1730 | } |
@@ -1794,26 +1779,25 @@ EXPORT_SYMBOL(kmem_cache_alloc_node_notrace); | |||
1794 | * handling required then we can return immediately. | 1779 | * handling required then we can return immediately. |
1795 | */ | 1780 | */ |
1796 | static void __slab_free(struct kmem_cache *s, struct page *page, | 1781 | static void __slab_free(struct kmem_cache *s, struct page *page, |
1797 | void *x, unsigned long addr, unsigned int offset) | 1782 | void *x, unsigned long addr) |
1798 | { | 1783 | { |
1799 | void *prior; | 1784 | void *prior; |
1800 | void **object = (void *)x; | 1785 | void **object = (void *)x; |
1801 | struct kmem_cache_cpu *c; | ||
1802 | 1786 | ||
1803 | c = get_cpu_slab(s, raw_smp_processor_id()); | 1787 | stat(s, FREE_SLOWPATH); |
1804 | stat(c, FREE_SLOWPATH); | ||
1805 | slab_lock(page); | 1788 | slab_lock(page); |
1806 | 1789 | ||
1807 | if (unlikely(SLABDEBUG && PageSlubDebug(page))) | 1790 | if (unlikely(SLABDEBUG && PageSlubDebug(page))) |
1808 | goto debug; | 1791 | goto debug; |
1809 | 1792 | ||
1810 | checks_ok: | 1793 | checks_ok: |
1811 | prior = object[offset] = page->freelist; | 1794 | prior = page->freelist; |
1795 | set_freepointer(s, object, prior); | ||
1812 | page->freelist = object; | 1796 | page->freelist = object; |
1813 | page->inuse--; | 1797 | page->inuse--; |
1814 | 1798 | ||
1815 | if (unlikely(PageSlubFrozen(page))) { | 1799 | if (unlikely(PageSlubFrozen(page))) { |
1816 | stat(c, FREE_FROZEN); | 1800 | stat(s, FREE_FROZEN); |
1817 | goto out_unlock; | 1801 | goto out_unlock; |
1818 | } | 1802 | } |
1819 | 1803 | ||
@@ -1826,7 +1810,7 @@ checks_ok: | |||
1826 | */ | 1810 | */ |
1827 | if (unlikely(!prior)) { | 1811 | if (unlikely(!prior)) { |
1828 | add_partial(get_node(s, page_to_nid(page)), page, 1); | 1812 | add_partial(get_node(s, page_to_nid(page)), page, 1); |
1829 | stat(c, FREE_ADD_PARTIAL); | 1813 | stat(s, FREE_ADD_PARTIAL); |
1830 | } | 1814 | } |
1831 | 1815 | ||
1832 | out_unlock: | 1816 | out_unlock: |
@@ -1839,10 +1823,10 @@ slab_empty: | |||
1839 | * Slab still on the partial list. | 1823 | * Slab still on the partial list. |
1840 | */ | 1824 | */ |
1841 | remove_partial(s, page); | 1825 | remove_partial(s, page); |
1842 | stat(c, FREE_REMOVE_PARTIAL); | 1826 | stat(s, FREE_REMOVE_PARTIAL); |
1843 | } | 1827 | } |
1844 | slab_unlock(page); | 1828 | slab_unlock(page); |
1845 | stat(c, FREE_SLAB); | 1829 | stat(s, FREE_SLAB); |
1846 | discard_slab(s, page); | 1830 | discard_slab(s, page); |
1847 | return; | 1831 | return; |
1848 | 1832 | ||
@@ -1872,17 +1856,17 @@ static __always_inline void slab_free(struct kmem_cache *s, | |||
1872 | 1856 | ||
1873 | kmemleak_free_recursive(x, s->flags); | 1857 | kmemleak_free_recursive(x, s->flags); |
1874 | local_irq_save(flags); | 1858 | local_irq_save(flags); |
1875 | c = get_cpu_slab(s, smp_processor_id()); | 1859 | c = __this_cpu_ptr(s->cpu_slab); |
1876 | kmemcheck_slab_free(s, object, c->objsize); | 1860 | kmemcheck_slab_free(s, object, s->objsize); |
1877 | debug_check_no_locks_freed(object, c->objsize); | 1861 | debug_check_no_locks_freed(object, s->objsize); |
1878 | if (!(s->flags & SLAB_DEBUG_OBJECTS)) | 1862 | if (!(s->flags & SLAB_DEBUG_OBJECTS)) |
1879 | debug_check_no_obj_freed(object, c->objsize); | 1863 | debug_check_no_obj_freed(object, s->objsize); |
1880 | if (likely(page == c->page && c->node >= 0)) { | 1864 | if (likely(page == c->page && c->node >= 0)) { |
1881 | object[c->offset] = c->freelist; | 1865 | set_freepointer(s, object, c->freelist); |
1882 | c->freelist = object; | 1866 | c->freelist = object; |
1883 | stat(c, FREE_FASTPATH); | 1867 | stat(s, FREE_FASTPATH); |
1884 | } else | 1868 | } else |
1885 | __slab_free(s, page, x, addr, c->offset); | 1869 | __slab_free(s, page, x, addr); |
1886 | 1870 | ||
1887 | local_irq_restore(flags); | 1871 | local_irq_restore(flags); |
1888 | } | 1872 | } |
@@ -2069,19 +2053,6 @@ static unsigned long calculate_alignment(unsigned long flags, | |||
2069 | return ALIGN(align, sizeof(void *)); | 2053 | return ALIGN(align, sizeof(void *)); |
2070 | } | 2054 | } |
2071 | 2055 | ||
2072 | static void init_kmem_cache_cpu(struct kmem_cache *s, | ||
2073 | struct kmem_cache_cpu *c) | ||
2074 | { | ||
2075 | c->page = NULL; | ||
2076 | c->freelist = NULL; | ||
2077 | c->node = 0; | ||
2078 | c->offset = s->offset / sizeof(void *); | ||
2079 | c->objsize = s->objsize; | ||
2080 | #ifdef CONFIG_SLUB_STATS | ||
2081 | memset(c->stat, 0, NR_SLUB_STAT_ITEMS * sizeof(unsigned)); | ||
2082 | #endif | ||
2083 | } | ||
2084 | |||
2085 | static void | 2056 | static void |
2086 | init_kmem_cache_node(struct kmem_cache_node *n, struct kmem_cache *s) | 2057 | init_kmem_cache_node(struct kmem_cache_node *n, struct kmem_cache *s) |
2087 | { | 2058 | { |
@@ -2095,130 +2066,24 @@ init_kmem_cache_node(struct kmem_cache_node *n, struct kmem_cache *s) | |||
2095 | #endif | 2066 | #endif |
2096 | } | 2067 | } |
2097 | 2068 | ||
2098 | #ifdef CONFIG_SMP | 2069 | static DEFINE_PER_CPU(struct kmem_cache_cpu, kmalloc_percpu[KMALLOC_CACHES]); |
2099 | /* | ||
2100 | * Per cpu array for per cpu structures. | ||
2101 | * | ||
2102 | * The per cpu array places all kmem_cache_cpu structures from one processor | ||
2103 | * close together meaning that it becomes possible that multiple per cpu | ||
2104 | * structures are contained in one cacheline. This may be particularly | ||
2105 | * beneficial for the kmalloc caches. | ||
2106 | * | ||
2107 | * A desktop system typically has around 60-80 slabs. With 100 here we are | ||
2108 | * likely able to get per cpu structures for all caches from the array defined | ||
2109 | * here. We must be able to cover all kmalloc caches during bootstrap. | ||
2110 | * | ||
2111 | * If the per cpu array is exhausted then fall back to kmalloc | ||
2112 | * of individual cachelines. No sharing is possible then. | ||
2113 | */ | ||
2114 | #define NR_KMEM_CACHE_CPU 100 | ||
2115 | |||
2116 | static DEFINE_PER_CPU(struct kmem_cache_cpu [NR_KMEM_CACHE_CPU], | ||
2117 | kmem_cache_cpu); | ||
2118 | |||
2119 | static DEFINE_PER_CPU(struct kmem_cache_cpu *, kmem_cache_cpu_free); | ||
2120 | static DECLARE_BITMAP(kmem_cach_cpu_free_init_once, CONFIG_NR_CPUS); | ||
2121 | |||
2122 | static struct kmem_cache_cpu *alloc_kmem_cache_cpu(struct kmem_cache *s, | ||
2123 | int cpu, gfp_t flags) | ||
2124 | { | ||
2125 | struct kmem_cache_cpu *c = per_cpu(kmem_cache_cpu_free, cpu); | ||
2126 | |||
2127 | if (c) | ||
2128 | per_cpu(kmem_cache_cpu_free, cpu) = | ||
2129 | (void *)c->freelist; | ||
2130 | else { | ||
2131 | /* Table overflow: So allocate ourselves */ | ||
2132 | c = kmalloc_node( | ||
2133 | ALIGN(sizeof(struct kmem_cache_cpu), cache_line_size()), | ||
2134 | flags, cpu_to_node(cpu)); | ||
2135 | if (!c) | ||
2136 | return NULL; | ||
2137 | } | ||
2138 | |||
2139 | init_kmem_cache_cpu(s, c); | ||
2140 | return c; | ||
2141 | } | ||
2142 | |||
2143 | static void free_kmem_cache_cpu(struct kmem_cache_cpu *c, int cpu) | ||
2144 | { | ||
2145 | if (c < per_cpu(kmem_cache_cpu, cpu) || | ||
2146 | c >= per_cpu(kmem_cache_cpu, cpu) + NR_KMEM_CACHE_CPU) { | ||
2147 | kfree(c); | ||
2148 | return; | ||
2149 | } | ||
2150 | c->freelist = (void *)per_cpu(kmem_cache_cpu_free, cpu); | ||
2151 | per_cpu(kmem_cache_cpu_free, cpu) = c; | ||
2152 | } | ||
2153 | |||
2154 | static void free_kmem_cache_cpus(struct kmem_cache *s) | ||
2155 | { | ||
2156 | int cpu; | ||
2157 | |||
2158 | for_each_online_cpu(cpu) { | ||
2159 | struct kmem_cache_cpu *c = get_cpu_slab(s, cpu); | ||
2160 | |||
2161 | if (c) { | ||
2162 | s->cpu_slab[cpu] = NULL; | ||
2163 | free_kmem_cache_cpu(c, cpu); | ||
2164 | } | ||
2165 | } | ||
2166 | } | ||
2167 | |||
2168 | static int alloc_kmem_cache_cpus(struct kmem_cache *s, gfp_t flags) | ||
2169 | { | ||
2170 | int cpu; | ||
2171 | |||
2172 | for_each_online_cpu(cpu) { | ||
2173 | struct kmem_cache_cpu *c = get_cpu_slab(s, cpu); | ||
2174 | |||
2175 | if (c) | ||
2176 | continue; | ||
2177 | |||
2178 | c = alloc_kmem_cache_cpu(s, cpu, flags); | ||
2179 | if (!c) { | ||
2180 | free_kmem_cache_cpus(s); | ||
2181 | return 0; | ||
2182 | } | ||
2183 | s->cpu_slab[cpu] = c; | ||
2184 | } | ||
2185 | return 1; | ||
2186 | } | ||
2187 | |||
2188 | /* | ||
2189 | * Initialize the per cpu array. | ||
2190 | */ | ||
2191 | static void init_alloc_cpu_cpu(int cpu) | ||
2192 | { | ||
2193 | int i; | ||
2194 | 2070 | ||
2195 | if (cpumask_test_cpu(cpu, to_cpumask(kmem_cach_cpu_free_init_once))) | 2071 | static inline int alloc_kmem_cache_cpus(struct kmem_cache *s, gfp_t flags) |
2196 | return; | ||
2197 | |||
2198 | for (i = NR_KMEM_CACHE_CPU - 1; i >= 0; i--) | ||
2199 | free_kmem_cache_cpu(&per_cpu(kmem_cache_cpu, cpu)[i], cpu); | ||
2200 | |||
2201 | cpumask_set_cpu(cpu, to_cpumask(kmem_cach_cpu_free_init_once)); | ||
2202 | } | ||
2203 | |||
2204 | static void __init init_alloc_cpu(void) | ||
2205 | { | 2072 | { |
2206 | int cpu; | 2073 | if (s < kmalloc_caches + KMALLOC_CACHES && s >= kmalloc_caches) |
2207 | 2074 | /* | |
2208 | for_each_online_cpu(cpu) | 2075 | * Boot time creation of the kmalloc array. Use static per cpu data |
2209 | init_alloc_cpu_cpu(cpu); | 2076 | * since the per cpu allocator is not available yet. |
2210 | } | 2077 | */ |
2078 | s->cpu_slab = per_cpu_var(kmalloc_percpu) + (s - kmalloc_caches); | ||
2079 | else | ||
2080 | s->cpu_slab = alloc_percpu(struct kmem_cache_cpu); | ||
2211 | 2081 | ||
2212 | #else | 2082 | if (!s->cpu_slab) |
2213 | static inline void free_kmem_cache_cpus(struct kmem_cache *s) {} | 2083 | return 0; |
2214 | static inline void init_alloc_cpu(void) {} | ||
2215 | 2084 | ||
2216 | static inline int alloc_kmem_cache_cpus(struct kmem_cache *s, gfp_t flags) | ||
2217 | { | ||
2218 | init_kmem_cache_cpu(s, &s->cpu_slab); | ||
2219 | return 1; | 2085 | return 1; |
2220 | } | 2086 | } |
2221 | #endif | ||
2222 | 2087 | ||
2223 | #ifdef CONFIG_NUMA | 2088 | #ifdef CONFIG_NUMA |
2224 | /* | 2089 | /* |
@@ -2287,7 +2152,8 @@ static int init_kmem_cache_nodes(struct kmem_cache *s, gfp_t gfpflags) | |||
2287 | int node; | 2152 | int node; |
2288 | int local_node; | 2153 | int local_node; |
2289 | 2154 | ||
2290 | if (slab_state >= UP) | 2155 | if (slab_state >= UP && (s < kmalloc_caches || |
2156 | s > kmalloc_caches + KMALLOC_CACHES)) | ||
2291 | local_node = page_to_nid(virt_to_page(s)); | 2157 | local_node = page_to_nid(virt_to_page(s)); |
2292 | else | 2158 | else |
2293 | local_node = 0; | 2159 | local_node = 0; |
@@ -2502,6 +2368,7 @@ static int kmem_cache_open(struct kmem_cache *s, gfp_t gfpflags, | |||
2502 | 2368 | ||
2503 | if (alloc_kmem_cache_cpus(s, gfpflags & ~SLUB_DMA)) | 2369 | if (alloc_kmem_cache_cpus(s, gfpflags & ~SLUB_DMA)) |
2504 | return 1; | 2370 | return 1; |
2371 | |||
2505 | free_kmem_cache_nodes(s); | 2372 | free_kmem_cache_nodes(s); |
2506 | error: | 2373 | error: |
2507 | if (flags & SLAB_PANIC) | 2374 | if (flags & SLAB_PANIC) |
@@ -2609,9 +2476,8 @@ static inline int kmem_cache_close(struct kmem_cache *s) | |||
2609 | int node; | 2476 | int node; |
2610 | 2477 | ||
2611 | flush_all(s); | 2478 | flush_all(s); |
2612 | 2479 | free_percpu(s->cpu_slab); | |
2613 | /* Attempt to free all objects */ | 2480 | /* Attempt to free all objects */ |
2614 | free_kmem_cache_cpus(s); | ||
2615 | for_each_node_state(node, N_NORMAL_MEMORY) { | 2481 | for_each_node_state(node, N_NORMAL_MEMORY) { |
2616 | struct kmem_cache_node *n = get_node(s, node); | 2482 | struct kmem_cache_node *n = get_node(s, node); |
2617 | 2483 | ||
@@ -2651,7 +2517,7 @@ EXPORT_SYMBOL(kmem_cache_destroy); | |||
2651 | * Kmalloc subsystem | 2517 | * Kmalloc subsystem |
2652 | *******************************************************************/ | 2518 | *******************************************************************/ |
2653 | 2519 | ||
2654 | struct kmem_cache kmalloc_caches[SLUB_PAGE_SHIFT] __cacheline_aligned; | 2520 | struct kmem_cache kmalloc_caches[KMALLOC_CACHES] __cacheline_aligned; |
2655 | EXPORT_SYMBOL(kmalloc_caches); | 2521 | EXPORT_SYMBOL(kmalloc_caches); |
2656 | 2522 | ||
2657 | static int __init setup_slub_min_order(char *str) | 2523 | static int __init setup_slub_min_order(char *str) |
@@ -2741,6 +2607,7 @@ static noinline struct kmem_cache *dma_kmalloc_cache(int index, gfp_t flags) | |||
2741 | char *text; | 2607 | char *text; |
2742 | size_t realsize; | 2608 | size_t realsize; |
2743 | unsigned long slabflags; | 2609 | unsigned long slabflags; |
2610 | int i; | ||
2744 | 2611 | ||
2745 | s = kmalloc_caches_dma[index]; | 2612 | s = kmalloc_caches_dma[index]; |
2746 | if (s) | 2613 | if (s) |
@@ -2760,7 +2627,14 @@ static noinline struct kmem_cache *dma_kmalloc_cache(int index, gfp_t flags) | |||
2760 | realsize = kmalloc_caches[index].objsize; | 2627 | realsize = kmalloc_caches[index].objsize; |
2761 | text = kasprintf(flags & ~SLUB_DMA, "kmalloc_dma-%d", | 2628 | text = kasprintf(flags & ~SLUB_DMA, "kmalloc_dma-%d", |
2762 | (unsigned int)realsize); | 2629 | (unsigned int)realsize); |
2763 | s = kmalloc(kmem_size, flags & ~SLUB_DMA); | 2630 | |
2631 | s = NULL; | ||
2632 | for (i = 0; i < KMALLOC_CACHES; i++) | ||
2633 | if (!kmalloc_caches[i].size) | ||
2634 | break; | ||
2635 | |||
2636 | BUG_ON(i >= KMALLOC_CACHES); | ||
2637 | s = kmalloc_caches + i; | ||
2764 | 2638 | ||
2765 | /* | 2639 | /* |
2766 | * Must defer sysfs creation to a workqueue because we don't know | 2640 | * Must defer sysfs creation to a workqueue because we don't know |
@@ -2772,9 +2646,9 @@ static noinline struct kmem_cache *dma_kmalloc_cache(int index, gfp_t flags) | |||
2772 | if (slab_state >= SYSFS) | 2646 | if (slab_state >= SYSFS) |
2773 | slabflags |= __SYSFS_ADD_DEFERRED; | 2647 | slabflags |= __SYSFS_ADD_DEFERRED; |
2774 | 2648 | ||
2775 | if (!s || !text || !kmem_cache_open(s, flags, text, | 2649 | if (!text || !kmem_cache_open(s, flags, text, |
2776 | realsize, ARCH_KMALLOC_MINALIGN, slabflags, NULL)) { | 2650 | realsize, ARCH_KMALLOC_MINALIGN, slabflags, NULL)) { |
2777 | kfree(s); | 2651 | s->size = 0; |
2778 | kfree(text); | 2652 | kfree(text); |
2779 | goto unlock_out; | 2653 | goto unlock_out; |
2780 | } | 2654 | } |
@@ -3176,8 +3050,6 @@ void __init kmem_cache_init(void) | |||
3176 | int i; | 3050 | int i; |
3177 | int caches = 0; | 3051 | int caches = 0; |
3178 | 3052 | ||
3179 | init_alloc_cpu(); | ||
3180 | |||
3181 | #ifdef CONFIG_NUMA | 3053 | #ifdef CONFIG_NUMA |
3182 | /* | 3054 | /* |
3183 | * Must first have the slab cache available for the allocations of the | 3055 | * Must first have the slab cache available for the allocations of the |
@@ -3261,8 +3133,10 @@ void __init kmem_cache_init(void) | |||
3261 | 3133 | ||
3262 | #ifdef CONFIG_SMP | 3134 | #ifdef CONFIG_SMP |
3263 | register_cpu_notifier(&slab_notifier); | 3135 | register_cpu_notifier(&slab_notifier); |
3264 | kmem_size = offsetof(struct kmem_cache, cpu_slab) + | 3136 | #endif |
3265 | nr_cpu_ids * sizeof(struct kmem_cache_cpu *); | 3137 | #ifdef CONFIG_NUMA |
3138 | kmem_size = offsetof(struct kmem_cache, node) + | ||
3139 | nr_node_ids * sizeof(struct kmem_cache_node *); | ||
3266 | #else | 3140 | #else |
3267 | kmem_size = sizeof(struct kmem_cache); | 3141 | kmem_size = sizeof(struct kmem_cache); |
3268 | #endif | 3142 | #endif |
@@ -3351,22 +3225,12 @@ struct kmem_cache *kmem_cache_create(const char *name, size_t size, | |||
3351 | down_write(&slub_lock); | 3225 | down_write(&slub_lock); |
3352 | s = find_mergeable(size, align, flags, name, ctor); | 3226 | s = find_mergeable(size, align, flags, name, ctor); |
3353 | if (s) { | 3227 | if (s) { |
3354 | int cpu; | ||
3355 | |||
3356 | s->refcount++; | 3228 | s->refcount++; |
3357 | /* | 3229 | /* |
3358 | * Adjust the object sizes so that we clear | 3230 | * Adjust the object sizes so that we clear |
3359 | * the complete object on kzalloc. | 3231 | * the complete object on kzalloc. |
3360 | */ | 3232 | */ |
3361 | s->objsize = max(s->objsize, (int)size); | 3233 | s->objsize = max(s->objsize, (int)size); |
3362 | |||
3363 | /* | ||
3364 | * And then we need to update the object size in the | ||
3365 | * per cpu structures | ||
3366 | */ | ||
3367 | for_each_online_cpu(cpu) | ||
3368 | get_cpu_slab(s, cpu)->objsize = s->objsize; | ||
3369 | |||
3370 | s->inuse = max_t(int, s->inuse, ALIGN(size, sizeof(void *))); | 3234 | s->inuse = max_t(int, s->inuse, ALIGN(size, sizeof(void *))); |
3371 | up_write(&slub_lock); | 3235 | up_write(&slub_lock); |
3372 | 3236 | ||
@@ -3420,29 +3284,15 @@ static int __cpuinit slab_cpuup_callback(struct notifier_block *nfb, | |||
3420 | unsigned long flags; | 3284 | unsigned long flags; |
3421 | 3285 | ||
3422 | switch (action) { | 3286 | switch (action) { |
3423 | case CPU_UP_PREPARE: | ||
3424 | case CPU_UP_PREPARE_FROZEN: | ||
3425 | init_alloc_cpu_cpu(cpu); | ||
3426 | down_read(&slub_lock); | ||
3427 | list_for_each_entry(s, &slab_caches, list) | ||
3428 | s->cpu_slab[cpu] = alloc_kmem_cache_cpu(s, cpu, | ||
3429 | GFP_KERNEL); | ||
3430 | up_read(&slub_lock); | ||
3431 | break; | ||
3432 | |||
3433 | case CPU_UP_CANCELED: | 3287 | case CPU_UP_CANCELED: |
3434 | case CPU_UP_CANCELED_FROZEN: | 3288 | case CPU_UP_CANCELED_FROZEN: |
3435 | case CPU_DEAD: | 3289 | case CPU_DEAD: |
3436 | case CPU_DEAD_FROZEN: | 3290 | case CPU_DEAD_FROZEN: |
3437 | down_read(&slub_lock); | 3291 | down_read(&slub_lock); |
3438 | list_for_each_entry(s, &slab_caches, list) { | 3292 | list_for_each_entry(s, &slab_caches, list) { |
3439 | struct kmem_cache_cpu *c = get_cpu_slab(s, cpu); | ||
3440 | |||
3441 | local_irq_save(flags); | 3293 | local_irq_save(flags); |
3442 | __flush_cpu_slab(s, cpu); | 3294 | __flush_cpu_slab(s, cpu); |
3443 | local_irq_restore(flags); | 3295 | local_irq_restore(flags); |
3444 | free_kmem_cache_cpu(c, cpu); | ||
3445 | s->cpu_slab[cpu] = NULL; | ||
3446 | } | 3296 | } |
3447 | up_read(&slub_lock); | 3297 | up_read(&slub_lock); |
3448 | break; | 3298 | break; |
@@ -3928,7 +3778,7 @@ static ssize_t show_slab_objects(struct kmem_cache *s, | |||
3928 | int cpu; | 3778 | int cpu; |
3929 | 3779 | ||
3930 | for_each_possible_cpu(cpu) { | 3780 | for_each_possible_cpu(cpu) { |
3931 | struct kmem_cache_cpu *c = get_cpu_slab(s, cpu); | 3781 | struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu); |
3932 | 3782 | ||
3933 | if (!c || c->node < 0) | 3783 | if (!c || c->node < 0) |
3934 | continue; | 3784 | continue; |
@@ -4171,6 +4021,23 @@ static ssize_t trace_store(struct kmem_cache *s, const char *buf, | |||
4171 | } | 4021 | } |
4172 | SLAB_ATTR(trace); | 4022 | SLAB_ATTR(trace); |
4173 | 4023 | ||
4024 | #ifdef CONFIG_FAILSLAB | ||
4025 | static ssize_t failslab_show(struct kmem_cache *s, char *buf) | ||
4026 | { | ||
4027 | return sprintf(buf, "%d\n", !!(s->flags & SLAB_FAILSLAB)); | ||
4028 | } | ||
4029 | |||
4030 | static ssize_t failslab_store(struct kmem_cache *s, const char *buf, | ||
4031 | size_t length) | ||
4032 | { | ||
4033 | s->flags &= ~SLAB_FAILSLAB; | ||
4034 | if (buf[0] == '1') | ||
4035 | s->flags |= SLAB_FAILSLAB; | ||
4036 | return length; | ||
4037 | } | ||
4038 | SLAB_ATTR(failslab); | ||
4039 | #endif | ||
4040 | |||
4174 | static ssize_t reclaim_account_show(struct kmem_cache *s, char *buf) | 4041 | static ssize_t reclaim_account_show(struct kmem_cache *s, char *buf) |
4175 | { | 4042 | { |
4176 | return sprintf(buf, "%d\n", !!(s->flags & SLAB_RECLAIM_ACCOUNT)); | 4043 | return sprintf(buf, "%d\n", !!(s->flags & SLAB_RECLAIM_ACCOUNT)); |
@@ -4353,7 +4220,7 @@ static int show_stat(struct kmem_cache *s, char *buf, enum stat_item si) | |||
4353 | return -ENOMEM; | 4220 | return -ENOMEM; |
4354 | 4221 | ||
4355 | for_each_online_cpu(cpu) { | 4222 | for_each_online_cpu(cpu) { |
4356 | unsigned x = get_cpu_slab(s, cpu)->stat[si]; | 4223 | unsigned x = per_cpu_ptr(s->cpu_slab, cpu)->stat[si]; |
4357 | 4224 | ||
4358 | data[cpu] = x; | 4225 | data[cpu] = x; |
4359 | sum += x; | 4226 | sum += x; |
@@ -4376,7 +4243,7 @@ static void clear_stat(struct kmem_cache *s, enum stat_item si) | |||
4376 | int cpu; | 4243 | int cpu; |
4377 | 4244 | ||
4378 | for_each_online_cpu(cpu) | 4245 | for_each_online_cpu(cpu) |
4379 | get_cpu_slab(s, cpu)->stat[si] = 0; | 4246 | per_cpu_ptr(s->cpu_slab, cpu)->stat[si] = 0; |
4380 | } | 4247 | } |
4381 | 4248 | ||
4382 | #define STAT_ATTR(si, text) \ | 4249 | #define STAT_ATTR(si, text) \ |
@@ -4467,6 +4334,10 @@ static struct attribute *slab_attrs[] = { | |||
4467 | &deactivate_remote_frees_attr.attr, | 4334 | &deactivate_remote_frees_attr.attr, |
4468 | &order_fallback_attr.attr, | 4335 | &order_fallback_attr.attr, |
4469 | #endif | 4336 | #endif |
4337 | #ifdef CONFIG_FAILSLAB | ||
4338 | &failslab_attr.attr, | ||
4339 | #endif | ||
4340 | |||
4470 | NULL | 4341 | NULL |
4471 | }; | 4342 | }; |
4472 | 4343 | ||
diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c index d9714bdcb4a3..392b9bb5bc01 100644 --- a/mm/sparse-vmemmap.c +++ b/mm/sparse-vmemmap.c | |||
@@ -40,9 +40,11 @@ static void * __init_refok __earlyonly_bootmem_alloc(int node, | |||
40 | unsigned long align, | 40 | unsigned long align, |
41 | unsigned long goal) | 41 | unsigned long goal) |
42 | { | 42 | { |
43 | return __alloc_bootmem_node(NODE_DATA(node), size, align, goal); | 43 | return __alloc_bootmem_node_high(NODE_DATA(node), size, align, goal); |
44 | } | 44 | } |
45 | 45 | ||
46 | static void *vmemmap_buf; | ||
47 | static void *vmemmap_buf_end; | ||
46 | 48 | ||
47 | void * __meminit vmemmap_alloc_block(unsigned long size, int node) | 49 | void * __meminit vmemmap_alloc_block(unsigned long size, int node) |
48 | { | 50 | { |
@@ -64,6 +66,24 @@ void * __meminit vmemmap_alloc_block(unsigned long size, int node) | |||
64 | __pa(MAX_DMA_ADDRESS)); | 66 | __pa(MAX_DMA_ADDRESS)); |
65 | } | 67 | } |
66 | 68 | ||
69 | /* need to make sure size is all the same during early stage */ | ||
70 | void * __meminit vmemmap_alloc_block_buf(unsigned long size, int node) | ||
71 | { | ||
72 | void *ptr; | ||
73 | |||
74 | if (!vmemmap_buf) | ||
75 | return vmemmap_alloc_block(size, node); | ||
76 | |||
77 | /* take the from buf */ | ||
78 | ptr = (void *)ALIGN((unsigned long)vmemmap_buf, size); | ||
79 | if (ptr + size > vmemmap_buf_end) | ||
80 | return vmemmap_alloc_block(size, node); | ||
81 | |||
82 | vmemmap_buf = ptr + size; | ||
83 | |||
84 | return ptr; | ||
85 | } | ||
86 | |||
67 | void __meminit vmemmap_verify(pte_t *pte, int node, | 87 | void __meminit vmemmap_verify(pte_t *pte, int node, |
68 | unsigned long start, unsigned long end) | 88 | unsigned long start, unsigned long end) |
69 | { | 89 | { |
@@ -80,7 +100,7 @@ pte_t * __meminit vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node) | |||
80 | pte_t *pte = pte_offset_kernel(pmd, addr); | 100 | pte_t *pte = pte_offset_kernel(pmd, addr); |
81 | if (pte_none(*pte)) { | 101 | if (pte_none(*pte)) { |
82 | pte_t entry; | 102 | pte_t entry; |
83 | void *p = vmemmap_alloc_block(PAGE_SIZE, node); | 103 | void *p = vmemmap_alloc_block_buf(PAGE_SIZE, node); |
84 | if (!p) | 104 | if (!p) |
85 | return NULL; | 105 | return NULL; |
86 | entry = pfn_pte(__pa(p) >> PAGE_SHIFT, PAGE_KERNEL); | 106 | entry = pfn_pte(__pa(p) >> PAGE_SHIFT, PAGE_KERNEL); |
@@ -163,3 +183,55 @@ struct page * __meminit sparse_mem_map_populate(unsigned long pnum, int nid) | |||
163 | 183 | ||
164 | return map; | 184 | return map; |
165 | } | 185 | } |
186 | |||
187 | void __init sparse_mem_maps_populate_node(struct page **map_map, | ||
188 | unsigned long pnum_begin, | ||
189 | unsigned long pnum_end, | ||
190 | unsigned long map_count, int nodeid) | ||
191 | { | ||
192 | unsigned long pnum; | ||
193 | unsigned long size = sizeof(struct page) * PAGES_PER_SECTION; | ||
194 | void *vmemmap_buf_start; | ||
195 | |||
196 | size = ALIGN(size, PMD_SIZE); | ||
197 | vmemmap_buf_start = __earlyonly_bootmem_alloc(nodeid, size * map_count, | ||
198 | PMD_SIZE, __pa(MAX_DMA_ADDRESS)); | ||
199 | |||
200 | if (vmemmap_buf_start) { | ||
201 | vmemmap_buf = vmemmap_buf_start; | ||
202 | vmemmap_buf_end = vmemmap_buf_start + size * map_count; | ||
203 | } | ||
204 | |||
205 | for (pnum = pnum_begin; pnum < pnum_end; pnum++) { | ||
206 | struct mem_section *ms; | ||
207 | |||
208 | if (!present_section_nr(pnum)) | ||
209 | continue; | ||
210 | |||
211 | map_map[pnum] = sparse_mem_map_populate(pnum, nodeid); | ||
212 | if (map_map[pnum]) | ||
213 | continue; | ||
214 | ms = __nr_to_section(pnum); | ||
215 | printk(KERN_ERR "%s: sparsemem memory map backing failed " | ||
216 | "some memory will not be available.\n", __func__); | ||
217 | ms->section_mem_map = 0; | ||
218 | } | ||
219 | |||
220 | if (vmemmap_buf_start) { | ||
221 | /* need to free left buf */ | ||
222 | #ifdef CONFIG_NO_BOOTMEM | ||
223 | free_early(__pa(vmemmap_buf_start), __pa(vmemmap_buf_end)); | ||
224 | if (vmemmap_buf_start < vmemmap_buf) { | ||
225 | char name[15]; | ||
226 | |||
227 | snprintf(name, sizeof(name), "MEMMAP %d", nodeid); | ||
228 | reserve_early_without_check(__pa(vmemmap_buf_start), | ||
229 | __pa(vmemmap_buf), name); | ||
230 | } | ||
231 | #else | ||
232 | free_bootmem(__pa(vmemmap_buf), vmemmap_buf_end - vmemmap_buf); | ||
233 | #endif | ||
234 | vmemmap_buf = NULL; | ||
235 | vmemmap_buf_end = NULL; | ||
236 | } | ||
237 | } | ||
diff --git a/mm/sparse.c b/mm/sparse.c index 6ce4aab69e99..22896d589133 100644 --- a/mm/sparse.c +++ b/mm/sparse.c | |||
@@ -271,7 +271,8 @@ static unsigned long *__kmalloc_section_usemap(void) | |||
271 | 271 | ||
272 | #ifdef CONFIG_MEMORY_HOTREMOVE | 272 | #ifdef CONFIG_MEMORY_HOTREMOVE |
273 | static unsigned long * __init | 273 | static unsigned long * __init |
274 | sparse_early_usemap_alloc_pgdat_section(struct pglist_data *pgdat) | 274 | sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat, |
275 | unsigned long count) | ||
275 | { | 276 | { |
276 | unsigned long section_nr; | 277 | unsigned long section_nr; |
277 | 278 | ||
@@ -286,7 +287,7 @@ sparse_early_usemap_alloc_pgdat_section(struct pglist_data *pgdat) | |||
286 | * this problem. | 287 | * this problem. |
287 | */ | 288 | */ |
288 | section_nr = pfn_to_section_nr(__pa(pgdat) >> PAGE_SHIFT); | 289 | section_nr = pfn_to_section_nr(__pa(pgdat) >> PAGE_SHIFT); |
289 | return alloc_bootmem_section(usemap_size(), section_nr); | 290 | return alloc_bootmem_section(usemap_size() * count, section_nr); |
290 | } | 291 | } |
291 | 292 | ||
292 | static void __init check_usemap_section_nr(int nid, unsigned long *usemap) | 293 | static void __init check_usemap_section_nr(int nid, unsigned long *usemap) |
@@ -329,7 +330,8 @@ static void __init check_usemap_section_nr(int nid, unsigned long *usemap) | |||
329 | } | 330 | } |
330 | #else | 331 | #else |
331 | static unsigned long * __init | 332 | static unsigned long * __init |
332 | sparse_early_usemap_alloc_pgdat_section(struct pglist_data *pgdat) | 333 | sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat, |
334 | unsigned long count) | ||
333 | { | 335 | { |
334 | return NULL; | 336 | return NULL; |
335 | } | 337 | } |
@@ -339,27 +341,40 @@ static void __init check_usemap_section_nr(int nid, unsigned long *usemap) | |||
339 | } | 341 | } |
340 | #endif /* CONFIG_MEMORY_HOTREMOVE */ | 342 | #endif /* CONFIG_MEMORY_HOTREMOVE */ |
341 | 343 | ||
342 | static unsigned long *__init sparse_early_usemap_alloc(unsigned long pnum) | 344 | static void __init sparse_early_usemaps_alloc_node(unsigned long**usemap_map, |
345 | unsigned long pnum_begin, | ||
346 | unsigned long pnum_end, | ||
347 | unsigned long usemap_count, int nodeid) | ||
343 | { | 348 | { |
344 | unsigned long *usemap; | 349 | void *usemap; |
345 | struct mem_section *ms = __nr_to_section(pnum); | 350 | unsigned long pnum; |
346 | int nid = sparse_early_nid(ms); | 351 | int size = usemap_size(); |
347 | |||
348 | usemap = sparse_early_usemap_alloc_pgdat_section(NODE_DATA(nid)); | ||
349 | if (usemap) | ||
350 | return usemap; | ||
351 | 352 | ||
352 | usemap = alloc_bootmem_node(NODE_DATA(nid), usemap_size()); | 353 | usemap = sparse_early_usemaps_alloc_pgdat_section(NODE_DATA(nodeid), |
354 | usemap_count); | ||
353 | if (usemap) { | 355 | if (usemap) { |
354 | check_usemap_section_nr(nid, usemap); | 356 | for (pnum = pnum_begin; pnum < pnum_end; pnum++) { |
355 | return usemap; | 357 | if (!present_section_nr(pnum)) |
358 | continue; | ||
359 | usemap_map[pnum] = usemap; | ||
360 | usemap += size; | ||
361 | } | ||
362 | return; | ||
356 | } | 363 | } |
357 | 364 | ||
358 | /* Stupid: suppress gcc warning for SPARSEMEM && !NUMA */ | 365 | usemap = alloc_bootmem_node(NODE_DATA(nodeid), size * usemap_count); |
359 | nid = 0; | 366 | if (usemap) { |
367 | for (pnum = pnum_begin; pnum < pnum_end; pnum++) { | ||
368 | if (!present_section_nr(pnum)) | ||
369 | continue; | ||
370 | usemap_map[pnum] = usemap; | ||
371 | usemap += size; | ||
372 | check_usemap_section_nr(nodeid, usemap_map[pnum]); | ||
373 | } | ||
374 | return; | ||
375 | } | ||
360 | 376 | ||
361 | printk(KERN_WARNING "%s: allocation failed\n", __func__); | 377 | printk(KERN_WARNING "%s: allocation failed\n", __func__); |
362 | return NULL; | ||
363 | } | 378 | } |
364 | 379 | ||
365 | #ifndef CONFIG_SPARSEMEM_VMEMMAP | 380 | #ifndef CONFIG_SPARSEMEM_VMEMMAP |
@@ -375,8 +390,65 @@ struct page __init *sparse_mem_map_populate(unsigned long pnum, int nid) | |||
375 | PAGE_ALIGN(sizeof(struct page) * PAGES_PER_SECTION)); | 390 | PAGE_ALIGN(sizeof(struct page) * PAGES_PER_SECTION)); |
376 | return map; | 391 | return map; |
377 | } | 392 | } |
393 | void __init sparse_mem_maps_populate_node(struct page **map_map, | ||
394 | unsigned long pnum_begin, | ||
395 | unsigned long pnum_end, | ||
396 | unsigned long map_count, int nodeid) | ||
397 | { | ||
398 | void *map; | ||
399 | unsigned long pnum; | ||
400 | unsigned long size = sizeof(struct page) * PAGES_PER_SECTION; | ||
401 | |||
402 | map = alloc_remap(nodeid, size * map_count); | ||
403 | if (map) { | ||
404 | for (pnum = pnum_begin; pnum < pnum_end; pnum++) { | ||
405 | if (!present_section_nr(pnum)) | ||
406 | continue; | ||
407 | map_map[pnum] = map; | ||
408 | map += size; | ||
409 | } | ||
410 | return; | ||
411 | } | ||
412 | |||
413 | size = PAGE_ALIGN(size); | ||
414 | map = alloc_bootmem_pages_node(NODE_DATA(nodeid), size * map_count); | ||
415 | if (map) { | ||
416 | for (pnum = pnum_begin; pnum < pnum_end; pnum++) { | ||
417 | if (!present_section_nr(pnum)) | ||
418 | continue; | ||
419 | map_map[pnum] = map; | ||
420 | map += size; | ||
421 | } | ||
422 | return; | ||
423 | } | ||
424 | |||
425 | /* fallback */ | ||
426 | for (pnum = pnum_begin; pnum < pnum_end; pnum++) { | ||
427 | struct mem_section *ms; | ||
428 | |||
429 | if (!present_section_nr(pnum)) | ||
430 | continue; | ||
431 | map_map[pnum] = sparse_mem_map_populate(pnum, nodeid); | ||
432 | if (map_map[pnum]) | ||
433 | continue; | ||
434 | ms = __nr_to_section(pnum); | ||
435 | printk(KERN_ERR "%s: sparsemem memory map backing failed " | ||
436 | "some memory will not be available.\n", __func__); | ||
437 | ms->section_mem_map = 0; | ||
438 | } | ||
439 | } | ||
378 | #endif /* !CONFIG_SPARSEMEM_VMEMMAP */ | 440 | #endif /* !CONFIG_SPARSEMEM_VMEMMAP */ |
379 | 441 | ||
442 | #ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER | ||
443 | static void __init sparse_early_mem_maps_alloc_node(struct page **map_map, | ||
444 | unsigned long pnum_begin, | ||
445 | unsigned long pnum_end, | ||
446 | unsigned long map_count, int nodeid) | ||
447 | { | ||
448 | sparse_mem_maps_populate_node(map_map, pnum_begin, pnum_end, | ||
449 | map_count, nodeid); | ||
450 | } | ||
451 | #else | ||
380 | static struct page __init *sparse_early_mem_map_alloc(unsigned long pnum) | 452 | static struct page __init *sparse_early_mem_map_alloc(unsigned long pnum) |
381 | { | 453 | { |
382 | struct page *map; | 454 | struct page *map; |
@@ -392,10 +464,12 @@ static struct page __init *sparse_early_mem_map_alloc(unsigned long pnum) | |||
392 | ms->section_mem_map = 0; | 464 | ms->section_mem_map = 0; |
393 | return NULL; | 465 | return NULL; |
394 | } | 466 | } |
467 | #endif | ||
395 | 468 | ||
396 | void __attribute__((weak)) __meminit vmemmap_populate_print_last(void) | 469 | void __attribute__((weak)) __meminit vmemmap_populate_print_last(void) |
397 | { | 470 | { |
398 | } | 471 | } |
472 | |||
399 | /* | 473 | /* |
400 | * Allocate the accumulated non-linear sections, allocate a mem_map | 474 | * Allocate the accumulated non-linear sections, allocate a mem_map |
401 | * for each and record the physical to section mapping. | 475 | * for each and record the physical to section mapping. |
@@ -407,6 +481,14 @@ void __init sparse_init(void) | |||
407 | unsigned long *usemap; | 481 | unsigned long *usemap; |
408 | unsigned long **usemap_map; | 482 | unsigned long **usemap_map; |
409 | int size; | 483 | int size; |
484 | int nodeid_begin = 0; | ||
485 | unsigned long pnum_begin = 0; | ||
486 | unsigned long usemap_count; | ||
487 | #ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER | ||
488 | unsigned long map_count; | ||
489 | int size2; | ||
490 | struct page **map_map; | ||
491 | #endif | ||
410 | 492 | ||
411 | /* | 493 | /* |
412 | * map is using big page (aka 2M in x86 64 bit) | 494 | * map is using big page (aka 2M in x86 64 bit) |
@@ -425,10 +507,81 @@ void __init sparse_init(void) | |||
425 | panic("can not allocate usemap_map\n"); | 507 | panic("can not allocate usemap_map\n"); |
426 | 508 | ||
427 | for (pnum = 0; pnum < NR_MEM_SECTIONS; pnum++) { | 509 | for (pnum = 0; pnum < NR_MEM_SECTIONS; pnum++) { |
510 | struct mem_section *ms; | ||
511 | |||
428 | if (!present_section_nr(pnum)) | 512 | if (!present_section_nr(pnum)) |
429 | continue; | 513 | continue; |
430 | usemap_map[pnum] = sparse_early_usemap_alloc(pnum); | 514 | ms = __nr_to_section(pnum); |
515 | nodeid_begin = sparse_early_nid(ms); | ||
516 | pnum_begin = pnum; | ||
517 | break; | ||
431 | } | 518 | } |
519 | usemap_count = 1; | ||
520 | for (pnum = pnum_begin + 1; pnum < NR_MEM_SECTIONS; pnum++) { | ||
521 | struct mem_section *ms; | ||
522 | int nodeid; | ||
523 | |||
524 | if (!present_section_nr(pnum)) | ||
525 | continue; | ||
526 | ms = __nr_to_section(pnum); | ||
527 | nodeid = sparse_early_nid(ms); | ||
528 | if (nodeid == nodeid_begin) { | ||
529 | usemap_count++; | ||
530 | continue; | ||
531 | } | ||
532 | /* ok, we need to take cake of from pnum_begin to pnum - 1*/ | ||
533 | sparse_early_usemaps_alloc_node(usemap_map, pnum_begin, pnum, | ||
534 | usemap_count, nodeid_begin); | ||
535 | /* new start, update count etc*/ | ||
536 | nodeid_begin = nodeid; | ||
537 | pnum_begin = pnum; | ||
538 | usemap_count = 1; | ||
539 | } | ||
540 | /* ok, last chunk */ | ||
541 | sparse_early_usemaps_alloc_node(usemap_map, pnum_begin, NR_MEM_SECTIONS, | ||
542 | usemap_count, nodeid_begin); | ||
543 | |||
544 | #ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER | ||
545 | size2 = sizeof(struct page *) * NR_MEM_SECTIONS; | ||
546 | map_map = alloc_bootmem(size2); | ||
547 | if (!map_map) | ||
548 | panic("can not allocate map_map\n"); | ||
549 | |||
550 | for (pnum = 0; pnum < NR_MEM_SECTIONS; pnum++) { | ||
551 | struct mem_section *ms; | ||
552 | |||
553 | if (!present_section_nr(pnum)) | ||
554 | continue; | ||
555 | ms = __nr_to_section(pnum); | ||
556 | nodeid_begin = sparse_early_nid(ms); | ||
557 | pnum_begin = pnum; | ||
558 | break; | ||
559 | } | ||
560 | map_count = 1; | ||
561 | for (pnum = pnum_begin + 1; pnum < NR_MEM_SECTIONS; pnum++) { | ||
562 | struct mem_section *ms; | ||
563 | int nodeid; | ||
564 | |||
565 | if (!present_section_nr(pnum)) | ||
566 | continue; | ||
567 | ms = __nr_to_section(pnum); | ||
568 | nodeid = sparse_early_nid(ms); | ||
569 | if (nodeid == nodeid_begin) { | ||
570 | map_count++; | ||
571 | continue; | ||
572 | } | ||
573 | /* ok, we need to take cake of from pnum_begin to pnum - 1*/ | ||
574 | sparse_early_mem_maps_alloc_node(map_map, pnum_begin, pnum, | ||
575 | map_count, nodeid_begin); | ||
576 | /* new start, update count etc*/ | ||
577 | nodeid_begin = nodeid; | ||
578 | pnum_begin = pnum; | ||
579 | map_count = 1; | ||
580 | } | ||
581 | /* ok, last chunk */ | ||
582 | sparse_early_mem_maps_alloc_node(map_map, pnum_begin, NR_MEM_SECTIONS, | ||
583 | map_count, nodeid_begin); | ||
584 | #endif | ||
432 | 585 | ||
433 | for (pnum = 0; pnum < NR_MEM_SECTIONS; pnum++) { | 586 | for (pnum = 0; pnum < NR_MEM_SECTIONS; pnum++) { |
434 | if (!present_section_nr(pnum)) | 587 | if (!present_section_nr(pnum)) |
@@ -438,7 +591,11 @@ void __init sparse_init(void) | |||
438 | if (!usemap) | 591 | if (!usemap) |
439 | continue; | 592 | continue; |
440 | 593 | ||
594 | #ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER | ||
595 | map = map_map[pnum]; | ||
596 | #else | ||
441 | map = sparse_early_mem_map_alloc(pnum); | 597 | map = sparse_early_mem_map_alloc(pnum); |
598 | #endif | ||
442 | if (!map) | 599 | if (!map) |
443 | continue; | 600 | continue; |
444 | 601 | ||
@@ -448,6 +605,9 @@ void __init sparse_init(void) | |||
448 | 605 | ||
449 | vmemmap_populate_print_last(); | 606 | vmemmap_populate_print_last(); |
450 | 607 | ||
608 | #ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER | ||
609 | free_bootmem(__pa(map_map), size2); | ||
610 | #endif | ||
451 | free_bootmem(__pa(usemap_map), size); | 611 | free_bootmem(__pa(usemap_map), size); |
452 | } | 612 | } |
453 | 613 | ||
diff --git a/mm/truncate.c b/mm/truncate.c index 342deee22684..e87e37244829 100644 --- a/mm/truncate.c +++ b/mm/truncate.c | |||
@@ -522,22 +522,20 @@ EXPORT_SYMBOL_GPL(invalidate_inode_pages2); | |||
522 | */ | 522 | */ |
523 | void truncate_pagecache(struct inode *inode, loff_t old, loff_t new) | 523 | void truncate_pagecache(struct inode *inode, loff_t old, loff_t new) |
524 | { | 524 | { |
525 | if (new < old) { | 525 | struct address_space *mapping = inode->i_mapping; |
526 | struct address_space *mapping = inode->i_mapping; | 526 | |
527 | 527 | /* | |
528 | /* | 528 | * unmap_mapping_range is called twice, first simply for |
529 | * unmap_mapping_range is called twice, first simply for | 529 | * efficiency so that truncate_inode_pages does fewer |
530 | * efficiency so that truncate_inode_pages does fewer | 530 | * single-page unmaps. However after this first call, and |
531 | * single-page unmaps. However after this first call, and | 531 | * before truncate_inode_pages finishes, it is possible for |
532 | * before truncate_inode_pages finishes, it is possible for | 532 | * private pages to be COWed, which remain after |
533 | * private pages to be COWed, which remain after | 533 | * truncate_inode_pages finishes, hence the second |
534 | * truncate_inode_pages finishes, hence the second | 534 | * unmap_mapping_range call must be made for correctness. |
535 | * unmap_mapping_range call must be made for correctness. | 535 | */ |
536 | */ | 536 | unmap_mapping_range(mapping, new + PAGE_SIZE - 1, 0, 1); |
537 | unmap_mapping_range(mapping, new + PAGE_SIZE - 1, 0, 1); | 537 | truncate_inode_pages(mapping, new); |
538 | truncate_inode_pages(mapping, new); | 538 | unmap_mapping_range(mapping, new + PAGE_SIZE - 1, 0, 1); |
539 | unmap_mapping_range(mapping, new + PAGE_SIZE - 1, 0, 1); | ||
540 | } | ||
541 | } | 539 | } |
542 | EXPORT_SYMBOL(truncate_pagecache); | 540 | EXPORT_SYMBOL(truncate_pagecache); |
543 | 541 | ||
@@ -4,10 +4,6 @@ | |||
4 | #include <linux/module.h> | 4 | #include <linux/module.h> |
5 | #include <linux/err.h> | 5 | #include <linux/err.h> |
6 | #include <linux/sched.h> | 6 | #include <linux/sched.h> |
7 | #include <linux/hugetlb.h> | ||
8 | #include <linux/syscalls.h> | ||
9 | #include <linux/mman.h> | ||
10 | #include <linux/file.h> | ||
11 | #include <asm/uaccess.h> | 7 | #include <asm/uaccess.h> |
12 | 8 | ||
13 | #define CREATE_TRACE_POINTS | 9 | #define CREATE_TRACE_POINTS |
@@ -224,7 +220,7 @@ char *strndup_user(const char __user *s, long n) | |||
224 | } | 220 | } |
225 | EXPORT_SYMBOL(strndup_user); | 221 | EXPORT_SYMBOL(strndup_user); |
226 | 222 | ||
227 | #ifndef HAVE_ARCH_PICK_MMAP_LAYOUT | 223 | #if defined(CONFIG_MMU) && !defined(HAVE_ARCH_PICK_MMAP_LAYOUT) |
228 | void arch_pick_mmap_layout(struct mm_struct *mm) | 224 | void arch_pick_mmap_layout(struct mm_struct *mm) |
229 | { | 225 | { |
230 | mm->mmap_base = TASK_UNMAPPED_BASE; | 226 | mm->mmap_base = TASK_UNMAPPED_BASE; |
@@ -272,46 +268,6 @@ int __attribute__((weak)) get_user_pages_fast(unsigned long start, | |||
272 | } | 268 | } |
273 | EXPORT_SYMBOL_GPL(get_user_pages_fast); | 269 | EXPORT_SYMBOL_GPL(get_user_pages_fast); |
274 | 270 | ||
275 | SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len, | ||
276 | unsigned long, prot, unsigned long, flags, | ||
277 | unsigned long, fd, unsigned long, pgoff) | ||
278 | { | ||
279 | struct file * file = NULL; | ||
280 | unsigned long retval = -EBADF; | ||
281 | |||
282 | if (!(flags & MAP_ANONYMOUS)) { | ||
283 | if (unlikely(flags & MAP_HUGETLB)) | ||
284 | return -EINVAL; | ||
285 | file = fget(fd); | ||
286 | if (!file) | ||
287 | goto out; | ||
288 | } else if (flags & MAP_HUGETLB) { | ||
289 | struct user_struct *user = NULL; | ||
290 | /* | ||
291 | * VM_NORESERVE is used because the reservations will be | ||
292 | * taken when vm_ops->mmap() is called | ||
293 | * A dummy user value is used because we are not locking | ||
294 | * memory so no accounting is necessary | ||
295 | */ | ||
296 | len = ALIGN(len, huge_page_size(&default_hstate)); | ||
297 | file = hugetlb_file_setup(HUGETLB_ANON_FILE, len, VM_NORESERVE, | ||
298 | &user, HUGETLB_ANONHUGE_INODE); | ||
299 | if (IS_ERR(file)) | ||
300 | return PTR_ERR(file); | ||
301 | } | ||
302 | |||
303 | flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); | ||
304 | |||
305 | down_write(¤t->mm->mmap_sem); | ||
306 | retval = do_mmap_pgoff(file, addr, len, prot, flags, pgoff); | ||
307 | up_write(¤t->mm->mmap_sem); | ||
308 | |||
309 | if (file) | ||
310 | fput(file); | ||
311 | out: | ||
312 | return retval; | ||
313 | } | ||
314 | |||
315 | /* Tracepoints definitions. */ | 271 | /* Tracepoints definitions. */ |
316 | EXPORT_TRACEPOINT_SYMBOL(kmalloc); | 272 | EXPORT_TRACEPOINT_SYMBOL(kmalloc); |
317 | EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc); | 273 | EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc); |
diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 37e69295f250..ae007462b7f6 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c | |||
@@ -509,6 +509,9 @@ static unsigned long lazy_max_pages(void) | |||
509 | 509 | ||
510 | static atomic_t vmap_lazy_nr = ATOMIC_INIT(0); | 510 | static atomic_t vmap_lazy_nr = ATOMIC_INIT(0); |
511 | 511 | ||
512 | /* for per-CPU blocks */ | ||
513 | static void purge_fragmented_blocks_allcpus(void); | ||
514 | |||
512 | /* | 515 | /* |
513 | * Purges all lazily-freed vmap areas. | 516 | * Purges all lazily-freed vmap areas. |
514 | * | 517 | * |
@@ -539,6 +542,9 @@ static void __purge_vmap_area_lazy(unsigned long *start, unsigned long *end, | |||
539 | } else | 542 | } else |
540 | spin_lock(&purge_lock); | 543 | spin_lock(&purge_lock); |
541 | 544 | ||
545 | if (sync) | ||
546 | purge_fragmented_blocks_allcpus(); | ||
547 | |||
542 | rcu_read_lock(); | 548 | rcu_read_lock(); |
543 | list_for_each_entry_rcu(va, &vmap_area_list, list) { | 549 | list_for_each_entry_rcu(va, &vmap_area_list, list) { |
544 | if (va->flags & VM_LAZY_FREE) { | 550 | if (va->flags & VM_LAZY_FREE) { |
@@ -555,10 +561,8 @@ static void __purge_vmap_area_lazy(unsigned long *start, unsigned long *end, | |||
555 | } | 561 | } |
556 | rcu_read_unlock(); | 562 | rcu_read_unlock(); |
557 | 563 | ||
558 | if (nr) { | 564 | if (nr) |
559 | BUG_ON(nr > atomic_read(&vmap_lazy_nr)); | ||
560 | atomic_sub(nr, &vmap_lazy_nr); | 565 | atomic_sub(nr, &vmap_lazy_nr); |
561 | } | ||
562 | 566 | ||
563 | if (nr || force_flush) | 567 | if (nr || force_flush) |
564 | flush_tlb_kernel_range(*start, *end); | 568 | flush_tlb_kernel_range(*start, *end); |
@@ -669,8 +673,6 @@ static bool vmap_initialized __read_mostly = false; | |||
669 | struct vmap_block_queue { | 673 | struct vmap_block_queue { |
670 | spinlock_t lock; | 674 | spinlock_t lock; |
671 | struct list_head free; | 675 | struct list_head free; |
672 | struct list_head dirty; | ||
673 | unsigned int nr_dirty; | ||
674 | }; | 676 | }; |
675 | 677 | ||
676 | struct vmap_block { | 678 | struct vmap_block { |
@@ -680,10 +682,9 @@ struct vmap_block { | |||
680 | unsigned long free, dirty; | 682 | unsigned long free, dirty; |
681 | DECLARE_BITMAP(alloc_map, VMAP_BBMAP_BITS); | 683 | DECLARE_BITMAP(alloc_map, VMAP_BBMAP_BITS); |
682 | DECLARE_BITMAP(dirty_map, VMAP_BBMAP_BITS); | 684 | DECLARE_BITMAP(dirty_map, VMAP_BBMAP_BITS); |
683 | union { | 685 | struct list_head free_list; |
684 | struct list_head free_list; | 686 | struct rcu_head rcu_head; |
685 | struct rcu_head rcu_head; | 687 | struct list_head purge; |
686 | }; | ||
687 | }; | 688 | }; |
688 | 689 | ||
689 | /* Queue of free and dirty vmap blocks, for allocation and flushing purposes */ | 690 | /* Queue of free and dirty vmap blocks, for allocation and flushing purposes */ |
@@ -759,7 +760,7 @@ static struct vmap_block *new_vmap_block(gfp_t gfp_mask) | |||
759 | vbq = &get_cpu_var(vmap_block_queue); | 760 | vbq = &get_cpu_var(vmap_block_queue); |
760 | vb->vbq = vbq; | 761 | vb->vbq = vbq; |
761 | spin_lock(&vbq->lock); | 762 | spin_lock(&vbq->lock); |
762 | list_add(&vb->free_list, &vbq->free); | 763 | list_add_rcu(&vb->free_list, &vbq->free); |
763 | spin_unlock(&vbq->lock); | 764 | spin_unlock(&vbq->lock); |
764 | put_cpu_var(vmap_block_queue); | 765 | put_cpu_var(vmap_block_queue); |
765 | 766 | ||
@@ -778,8 +779,6 @@ static void free_vmap_block(struct vmap_block *vb) | |||
778 | struct vmap_block *tmp; | 779 | struct vmap_block *tmp; |
779 | unsigned long vb_idx; | 780 | unsigned long vb_idx; |
780 | 781 | ||
781 | BUG_ON(!list_empty(&vb->free_list)); | ||
782 | |||
783 | vb_idx = addr_to_vb_idx(vb->va->va_start); | 782 | vb_idx = addr_to_vb_idx(vb->va->va_start); |
784 | spin_lock(&vmap_block_tree_lock); | 783 | spin_lock(&vmap_block_tree_lock); |
785 | tmp = radix_tree_delete(&vmap_block_tree, vb_idx); | 784 | tmp = radix_tree_delete(&vmap_block_tree, vb_idx); |
@@ -790,12 +789,61 @@ static void free_vmap_block(struct vmap_block *vb) | |||
790 | call_rcu(&vb->rcu_head, rcu_free_vb); | 789 | call_rcu(&vb->rcu_head, rcu_free_vb); |
791 | } | 790 | } |
792 | 791 | ||
792 | static void purge_fragmented_blocks(int cpu) | ||
793 | { | ||
794 | LIST_HEAD(purge); | ||
795 | struct vmap_block *vb; | ||
796 | struct vmap_block *n_vb; | ||
797 | struct vmap_block_queue *vbq = &per_cpu(vmap_block_queue, cpu); | ||
798 | |||
799 | rcu_read_lock(); | ||
800 | list_for_each_entry_rcu(vb, &vbq->free, free_list) { | ||
801 | |||
802 | if (!(vb->free + vb->dirty == VMAP_BBMAP_BITS && vb->dirty != VMAP_BBMAP_BITS)) | ||
803 | continue; | ||
804 | |||
805 | spin_lock(&vb->lock); | ||
806 | if (vb->free + vb->dirty == VMAP_BBMAP_BITS && vb->dirty != VMAP_BBMAP_BITS) { | ||
807 | vb->free = 0; /* prevent further allocs after releasing lock */ | ||
808 | vb->dirty = VMAP_BBMAP_BITS; /* prevent purging it again */ | ||
809 | bitmap_fill(vb->alloc_map, VMAP_BBMAP_BITS); | ||
810 | bitmap_fill(vb->dirty_map, VMAP_BBMAP_BITS); | ||
811 | spin_lock(&vbq->lock); | ||
812 | list_del_rcu(&vb->free_list); | ||
813 | spin_unlock(&vbq->lock); | ||
814 | spin_unlock(&vb->lock); | ||
815 | list_add_tail(&vb->purge, &purge); | ||
816 | } else | ||
817 | spin_unlock(&vb->lock); | ||
818 | } | ||
819 | rcu_read_unlock(); | ||
820 | |||
821 | list_for_each_entry_safe(vb, n_vb, &purge, purge) { | ||
822 | list_del(&vb->purge); | ||
823 | free_vmap_block(vb); | ||
824 | } | ||
825 | } | ||
826 | |||
827 | static void purge_fragmented_blocks_thiscpu(void) | ||
828 | { | ||
829 | purge_fragmented_blocks(smp_processor_id()); | ||
830 | } | ||
831 | |||
832 | static void purge_fragmented_blocks_allcpus(void) | ||
833 | { | ||
834 | int cpu; | ||
835 | |||
836 | for_each_possible_cpu(cpu) | ||
837 | purge_fragmented_blocks(cpu); | ||
838 | } | ||
839 | |||
793 | static void *vb_alloc(unsigned long size, gfp_t gfp_mask) | 840 | static void *vb_alloc(unsigned long size, gfp_t gfp_mask) |
794 | { | 841 | { |
795 | struct vmap_block_queue *vbq; | 842 | struct vmap_block_queue *vbq; |
796 | struct vmap_block *vb; | 843 | struct vmap_block *vb; |
797 | unsigned long addr = 0; | 844 | unsigned long addr = 0; |
798 | unsigned int order; | 845 | unsigned int order; |
846 | int purge = 0; | ||
799 | 847 | ||
800 | BUG_ON(size & ~PAGE_MASK); | 848 | BUG_ON(size & ~PAGE_MASK); |
801 | BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC); | 849 | BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC); |
@@ -808,24 +856,38 @@ again: | |||
808 | int i; | 856 | int i; |
809 | 857 | ||
810 | spin_lock(&vb->lock); | 858 | spin_lock(&vb->lock); |
859 | if (vb->free < 1UL << order) | ||
860 | goto next; | ||
861 | |||
811 | i = bitmap_find_free_region(vb->alloc_map, | 862 | i = bitmap_find_free_region(vb->alloc_map, |
812 | VMAP_BBMAP_BITS, order); | 863 | VMAP_BBMAP_BITS, order); |
813 | 864 | ||
814 | if (i >= 0) { | 865 | if (i < 0) { |
815 | addr = vb->va->va_start + (i << PAGE_SHIFT); | 866 | if (vb->free + vb->dirty == VMAP_BBMAP_BITS) { |
816 | BUG_ON(addr_to_vb_idx(addr) != | 867 | /* fragmented and no outstanding allocations */ |
817 | addr_to_vb_idx(vb->va->va_start)); | 868 | BUG_ON(vb->dirty != VMAP_BBMAP_BITS); |
818 | vb->free -= 1UL << order; | 869 | purge = 1; |
819 | if (vb->free == 0) { | ||
820 | spin_lock(&vbq->lock); | ||
821 | list_del_init(&vb->free_list); | ||
822 | spin_unlock(&vbq->lock); | ||
823 | } | 870 | } |
824 | spin_unlock(&vb->lock); | 871 | goto next; |
825 | break; | 872 | } |
873 | addr = vb->va->va_start + (i << PAGE_SHIFT); | ||
874 | BUG_ON(addr_to_vb_idx(addr) != | ||
875 | addr_to_vb_idx(vb->va->va_start)); | ||
876 | vb->free -= 1UL << order; | ||
877 | if (vb->free == 0) { | ||
878 | spin_lock(&vbq->lock); | ||
879 | list_del_rcu(&vb->free_list); | ||
880 | spin_unlock(&vbq->lock); | ||
826 | } | 881 | } |
827 | spin_unlock(&vb->lock); | 882 | spin_unlock(&vb->lock); |
883 | break; | ||
884 | next: | ||
885 | spin_unlock(&vb->lock); | ||
828 | } | 886 | } |
887 | |||
888 | if (purge) | ||
889 | purge_fragmented_blocks_thiscpu(); | ||
890 | |||
829 | put_cpu_var(vmap_block_queue); | 891 | put_cpu_var(vmap_block_queue); |
830 | rcu_read_unlock(); | 892 | rcu_read_unlock(); |
831 | 893 | ||
@@ -862,11 +924,11 @@ static void vb_free(const void *addr, unsigned long size) | |||
862 | BUG_ON(!vb); | 924 | BUG_ON(!vb); |
863 | 925 | ||
864 | spin_lock(&vb->lock); | 926 | spin_lock(&vb->lock); |
865 | bitmap_allocate_region(vb->dirty_map, offset >> PAGE_SHIFT, order); | 927 | BUG_ON(bitmap_allocate_region(vb->dirty_map, offset >> PAGE_SHIFT, order)); |
866 | 928 | ||
867 | vb->dirty += 1UL << order; | 929 | vb->dirty += 1UL << order; |
868 | if (vb->dirty == VMAP_BBMAP_BITS) { | 930 | if (vb->dirty == VMAP_BBMAP_BITS) { |
869 | BUG_ON(vb->free || !list_empty(&vb->free_list)); | 931 | BUG_ON(vb->free); |
870 | spin_unlock(&vb->lock); | 932 | spin_unlock(&vb->lock); |
871 | free_vmap_block(vb); | 933 | free_vmap_block(vb); |
872 | } else | 934 | } else |
@@ -1035,8 +1097,6 @@ void __init vmalloc_init(void) | |||
1035 | vbq = &per_cpu(vmap_block_queue, i); | 1097 | vbq = &per_cpu(vmap_block_queue, i); |
1036 | spin_lock_init(&vbq->lock); | 1098 | spin_lock_init(&vbq->lock); |
1037 | INIT_LIST_HEAD(&vbq->free); | 1099 | INIT_LIST_HEAD(&vbq->free); |
1038 | INIT_LIST_HEAD(&vbq->dirty); | ||
1039 | vbq->nr_dirty = 0; | ||
1040 | } | 1100 | } |
1041 | 1101 | ||
1042 | /* Import existing vmlist entries. */ | 1102 | /* Import existing vmlist entries. */ |
diff --git a/mm/vmscan.c b/mm/vmscan.c index 885207a6b6b7..c26986c85ce0 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -1922,6 +1922,9 @@ static int sleeping_prematurely(pg_data_t *pgdat, int order, long remaining) | |||
1922 | if (!populated_zone(zone)) | 1922 | if (!populated_zone(zone)) |
1923 | continue; | 1923 | continue; |
1924 | 1924 | ||
1925 | if (zone_is_all_unreclaimable(zone)) | ||
1926 | continue; | ||
1927 | |||
1925 | if (!zone_watermark_ok(zone, order, high_wmark_pages(zone), | 1928 | if (!zone_watermark_ok(zone, order, high_wmark_pages(zone), |
1926 | 0, 0)) | 1929 | 0, 0)) |
1927 | return 1; | 1930 | return 1; |
diff --git a/mm/vmstat.c b/mm/vmstat.c index 6051fbab67ba..fc5aa183bc45 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c | |||
@@ -139,7 +139,8 @@ static void refresh_zone_stat_thresholds(void) | |||
139 | threshold = calculate_threshold(zone); | 139 | threshold = calculate_threshold(zone); |
140 | 140 | ||
141 | for_each_online_cpu(cpu) | 141 | for_each_online_cpu(cpu) |
142 | zone_pcp(zone, cpu)->stat_threshold = threshold; | 142 | per_cpu_ptr(zone->pageset, cpu)->stat_threshold |
143 | = threshold; | ||
143 | } | 144 | } |
144 | } | 145 | } |
145 | 146 | ||
@@ -149,7 +150,8 @@ static void refresh_zone_stat_thresholds(void) | |||
149 | void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item, | 150 | void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item, |
150 | int delta) | 151 | int delta) |
151 | { | 152 | { |
152 | struct per_cpu_pageset *pcp = zone_pcp(zone, smp_processor_id()); | 153 | struct per_cpu_pageset *pcp = this_cpu_ptr(zone->pageset); |
154 | |||
153 | s8 *p = pcp->vm_stat_diff + item; | 155 | s8 *p = pcp->vm_stat_diff + item; |
154 | long x; | 156 | long x; |
155 | 157 | ||
@@ -202,7 +204,7 @@ EXPORT_SYMBOL(mod_zone_page_state); | |||
202 | */ | 204 | */ |
203 | void __inc_zone_state(struct zone *zone, enum zone_stat_item item) | 205 | void __inc_zone_state(struct zone *zone, enum zone_stat_item item) |
204 | { | 206 | { |
205 | struct per_cpu_pageset *pcp = zone_pcp(zone, smp_processor_id()); | 207 | struct per_cpu_pageset *pcp = this_cpu_ptr(zone->pageset); |
206 | s8 *p = pcp->vm_stat_diff + item; | 208 | s8 *p = pcp->vm_stat_diff + item; |
207 | 209 | ||
208 | (*p)++; | 210 | (*p)++; |
@@ -223,7 +225,7 @@ EXPORT_SYMBOL(__inc_zone_page_state); | |||
223 | 225 | ||
224 | void __dec_zone_state(struct zone *zone, enum zone_stat_item item) | 226 | void __dec_zone_state(struct zone *zone, enum zone_stat_item item) |
225 | { | 227 | { |
226 | struct per_cpu_pageset *pcp = zone_pcp(zone, smp_processor_id()); | 228 | struct per_cpu_pageset *pcp = this_cpu_ptr(zone->pageset); |
227 | s8 *p = pcp->vm_stat_diff + item; | 229 | s8 *p = pcp->vm_stat_diff + item; |
228 | 230 | ||
229 | (*p)--; | 231 | (*p)--; |
@@ -300,7 +302,7 @@ void refresh_cpu_vm_stats(int cpu) | |||
300 | for_each_populated_zone(zone) { | 302 | for_each_populated_zone(zone) { |
301 | struct per_cpu_pageset *p; | 303 | struct per_cpu_pageset *p; |
302 | 304 | ||
303 | p = zone_pcp(zone, cpu); | 305 | p = per_cpu_ptr(zone->pageset, cpu); |
304 | 306 | ||
305 | for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) | 307 | for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) |
306 | if (p->vm_stat_diff[i]) { | 308 | if (p->vm_stat_diff[i]) { |
@@ -741,7 +743,7 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat, | |||
741 | for_each_online_cpu(i) { | 743 | for_each_online_cpu(i) { |
742 | struct per_cpu_pageset *pageset; | 744 | struct per_cpu_pageset *pageset; |
743 | 745 | ||
744 | pageset = zone_pcp(zone, i); | 746 | pageset = per_cpu_ptr(zone->pageset, i); |
745 | seq_printf(m, | 747 | seq_printf(m, |
746 | "\n cpu: %i" | 748 | "\n cpu: %i" |
747 | "\n count: %i" | 749 | "\n count: %i" |
@@ -906,6 +908,7 @@ static int __cpuinit vmstat_cpuup_callback(struct notifier_block *nfb, | |||
906 | case CPU_ONLINE: | 908 | case CPU_ONLINE: |
907 | case CPU_ONLINE_FROZEN: | 909 | case CPU_ONLINE_FROZEN: |
908 | start_cpu_timer(cpu); | 910 | start_cpu_timer(cpu); |
911 | node_set_state(cpu_to_node(cpu), N_CPU); | ||
909 | break; | 912 | break; |
910 | case CPU_DOWN_PREPARE: | 913 | case CPU_DOWN_PREPARE: |
911 | case CPU_DOWN_PREPARE_FROZEN: | 914 | case CPU_DOWN_PREPARE_FROZEN: |