diff options
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r-- | mm/page_alloc.c | 180 |
1 files changed, 118 insertions, 62 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 234bd4895d14..dc523a1f270d 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -49,13 +49,11 @@ nodemask_t node_online_map __read_mostly = { { [0] = 1UL } }; | |||
49 | EXPORT_SYMBOL(node_online_map); | 49 | EXPORT_SYMBOL(node_online_map); |
50 | nodemask_t node_possible_map __read_mostly = NODE_MASK_ALL; | 50 | nodemask_t node_possible_map __read_mostly = NODE_MASK_ALL; |
51 | EXPORT_SYMBOL(node_possible_map); | 51 | EXPORT_SYMBOL(node_possible_map); |
52 | struct pglist_data *pgdat_list __read_mostly; | ||
53 | unsigned long totalram_pages __read_mostly; | 52 | unsigned long totalram_pages __read_mostly; |
54 | unsigned long totalhigh_pages __read_mostly; | 53 | unsigned long totalhigh_pages __read_mostly; |
55 | long nr_swap_pages; | 54 | long nr_swap_pages; |
56 | int percpu_pagelist_fraction; | 55 | int percpu_pagelist_fraction; |
57 | 56 | ||
58 | static void fastcall free_hot_cold_page(struct page *page, int cold); | ||
59 | static void __free_pages_ok(struct page *page, unsigned int order); | 57 | static void __free_pages_ok(struct page *page, unsigned int order); |
60 | 58 | ||
61 | /* | 59 | /* |
@@ -190,7 +188,7 @@ static void prep_compound_page(struct page *page, unsigned long order) | |||
190 | for (i = 0; i < nr_pages; i++) { | 188 | for (i = 0; i < nr_pages; i++) { |
191 | struct page *p = page + i; | 189 | struct page *p = page + i; |
192 | 190 | ||
193 | SetPageCompound(p); | 191 | __SetPageCompound(p); |
194 | set_page_private(p, (unsigned long)page); | 192 | set_page_private(p, (unsigned long)page); |
195 | } | 193 | } |
196 | } | 194 | } |
@@ -209,10 +207,24 @@ static void destroy_compound_page(struct page *page, unsigned long order) | |||
209 | if (unlikely(!PageCompound(p) | | 207 | if (unlikely(!PageCompound(p) | |
210 | (page_private(p) != (unsigned long)page))) | 208 | (page_private(p) != (unsigned long)page))) |
211 | bad_page(page); | 209 | bad_page(page); |
212 | ClearPageCompound(p); | 210 | __ClearPageCompound(p); |
213 | } | 211 | } |
214 | } | 212 | } |
215 | 213 | ||
214 | static inline void prep_zero_page(struct page *page, int order, gfp_t gfp_flags) | ||
215 | { | ||
216 | int i; | ||
217 | |||
218 | BUG_ON((gfp_flags & (__GFP_WAIT | __GFP_HIGHMEM)) == __GFP_HIGHMEM); | ||
219 | /* | ||
220 | * clear_highpage() will use KM_USER0, so it's a bug to use __GFP_ZERO | ||
221 | * and __GFP_HIGHMEM from hard or soft interrupt context. | ||
222 | */ | ||
223 | BUG_ON((gfp_flags & __GFP_HIGHMEM) && in_interrupt()); | ||
224 | for (i = 0; i < (1 << order); i++) | ||
225 | clear_highpage(page + i); | ||
226 | } | ||
227 | |||
216 | /* | 228 | /* |
217 | * function for dealing with page's order in buddy system. | 229 | * function for dealing with page's order in buddy system. |
218 | * zone->lock is already acquired when we use these. | 230 | * zone->lock is already acquired when we use these. |
@@ -423,11 +435,6 @@ static void __free_pages_ok(struct page *page, unsigned int order) | |||
423 | mutex_debug_check_no_locks_freed(page_address(page), | 435 | mutex_debug_check_no_locks_freed(page_address(page), |
424 | PAGE_SIZE<<order); | 436 | PAGE_SIZE<<order); |
425 | 437 | ||
426 | #ifndef CONFIG_MMU | ||
427 | for (i = 1 ; i < (1 << order) ; ++i) | ||
428 | __put_page(page + i); | ||
429 | #endif | ||
430 | |||
431 | for (i = 0 ; i < (1 << order) ; ++i) | 438 | for (i = 0 ; i < (1 << order) ; ++i) |
432 | reserved += free_pages_check(page + i); | 439 | reserved += free_pages_check(page + i); |
433 | if (reserved) | 440 | if (reserved) |
@@ -448,28 +455,23 @@ void fastcall __init __free_pages_bootmem(struct page *page, unsigned int order) | |||
448 | if (order == 0) { | 455 | if (order == 0) { |
449 | __ClearPageReserved(page); | 456 | __ClearPageReserved(page); |
450 | set_page_count(page, 0); | 457 | set_page_count(page, 0); |
451 | 458 | set_page_refcounted(page); | |
452 | free_hot_cold_page(page, 0); | 459 | __free_page(page); |
453 | } else { | 460 | } else { |
454 | LIST_HEAD(list); | ||
455 | int loop; | 461 | int loop; |
456 | 462 | ||
463 | prefetchw(page); | ||
457 | for (loop = 0; loop < BITS_PER_LONG; loop++) { | 464 | for (loop = 0; loop < BITS_PER_LONG; loop++) { |
458 | struct page *p = &page[loop]; | 465 | struct page *p = &page[loop]; |
459 | 466 | ||
460 | if (loop + 16 < BITS_PER_LONG) | 467 | if (loop + 1 < BITS_PER_LONG) |
461 | prefetchw(p + 16); | 468 | prefetchw(p + 1); |
462 | __ClearPageReserved(p); | 469 | __ClearPageReserved(p); |
463 | set_page_count(p, 0); | 470 | set_page_count(p, 0); |
464 | } | 471 | } |
465 | 472 | ||
466 | arch_free_page(page, order); | 473 | set_page_refcounted(page); |
467 | 474 | __free_pages(page, order); | |
468 | mod_page_state(pgfree, 1 << order); | ||
469 | |||
470 | list_add(&page->lru, &list); | ||
471 | kernel_map_pages(page, 1 << order, 0); | ||
472 | free_pages_bulk(page_zone(page), 1, &list, order); | ||
473 | } | 475 | } |
474 | } | 476 | } |
475 | 477 | ||
@@ -507,7 +509,7 @@ static inline void expand(struct zone *zone, struct page *page, | |||
507 | /* | 509 | /* |
508 | * This page is about to be returned from the page allocator | 510 | * This page is about to be returned from the page allocator |
509 | */ | 511 | */ |
510 | static int prep_new_page(struct page *page, int order) | 512 | static int prep_new_page(struct page *page, int order, gfp_t gfp_flags) |
511 | { | 513 | { |
512 | if (unlikely(page_mapcount(page) | | 514 | if (unlikely(page_mapcount(page) | |
513 | (page->mapping != NULL) | | 515 | (page->mapping != NULL) | |
@@ -536,8 +538,15 @@ static int prep_new_page(struct page *page, int order) | |||
536 | 1 << PG_referenced | 1 << PG_arch_1 | | 538 | 1 << PG_referenced | 1 << PG_arch_1 | |
537 | 1 << PG_checked | 1 << PG_mappedtodisk); | 539 | 1 << PG_checked | 1 << PG_mappedtodisk); |
538 | set_page_private(page, 0); | 540 | set_page_private(page, 0); |
539 | set_page_refs(page, order); | 541 | set_page_refcounted(page); |
540 | kernel_map_pages(page, 1 << order, 1); | 542 | kernel_map_pages(page, 1 << order, 1); |
543 | |||
544 | if (gfp_flags & __GFP_ZERO) | ||
545 | prep_zero_page(page, order, gfp_flags); | ||
546 | |||
547 | if (order && (gfp_flags & __GFP_COMP)) | ||
548 | prep_compound_page(page, order); | ||
549 | |||
541 | return 0; | 550 | return 0; |
542 | } | 551 | } |
543 | 552 | ||
@@ -593,13 +602,14 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order, | |||
593 | /* | 602 | /* |
594 | * Called from the slab reaper to drain pagesets on a particular node that | 603 | * Called from the slab reaper to drain pagesets on a particular node that |
595 | * belong to the currently executing processor. | 604 | * belong to the currently executing processor. |
605 | * Note that this function must be called with the thread pinned to | ||
606 | * a single processor. | ||
596 | */ | 607 | */ |
597 | void drain_node_pages(int nodeid) | 608 | void drain_node_pages(int nodeid) |
598 | { | 609 | { |
599 | int i, z; | 610 | int i, z; |
600 | unsigned long flags; | 611 | unsigned long flags; |
601 | 612 | ||
602 | local_irq_save(flags); | ||
603 | for (z = 0; z < MAX_NR_ZONES; z++) { | 613 | for (z = 0; z < MAX_NR_ZONES; z++) { |
604 | struct zone *zone = NODE_DATA(nodeid)->node_zones + z; | 614 | struct zone *zone = NODE_DATA(nodeid)->node_zones + z; |
605 | struct per_cpu_pageset *pset; | 615 | struct per_cpu_pageset *pset; |
@@ -609,11 +619,14 @@ void drain_node_pages(int nodeid) | |||
609 | struct per_cpu_pages *pcp; | 619 | struct per_cpu_pages *pcp; |
610 | 620 | ||
611 | pcp = &pset->pcp[i]; | 621 | pcp = &pset->pcp[i]; |
612 | free_pages_bulk(zone, pcp->count, &pcp->list, 0); | 622 | if (pcp->count) { |
613 | pcp->count = 0; | 623 | local_irq_save(flags); |
624 | free_pages_bulk(zone, pcp->count, &pcp->list, 0); | ||
625 | pcp->count = 0; | ||
626 | local_irq_restore(flags); | ||
627 | } | ||
614 | } | 628 | } |
615 | } | 629 | } |
616 | local_irq_restore(flags); | ||
617 | } | 630 | } |
618 | #endif | 631 | #endif |
619 | 632 | ||
@@ -743,13 +756,22 @@ void fastcall free_cold_page(struct page *page) | |||
743 | free_hot_cold_page(page, 1); | 756 | free_hot_cold_page(page, 1); |
744 | } | 757 | } |
745 | 758 | ||
746 | static inline void prep_zero_page(struct page *page, int order, gfp_t gfp_flags) | 759 | /* |
760 | * split_page takes a non-compound higher-order page, and splits it into | ||
761 | * n (1<<order) sub-pages: page[0..n] | ||
762 | * Each sub-page must be freed individually. | ||
763 | * | ||
764 | * Note: this is probably too low level an operation for use in drivers. | ||
765 | * Please consult with lkml before using this in your driver. | ||
766 | */ | ||
767 | void split_page(struct page *page, unsigned int order) | ||
747 | { | 768 | { |
748 | int i; | 769 | int i; |
749 | 770 | ||
750 | BUG_ON((gfp_flags & (__GFP_WAIT | __GFP_HIGHMEM)) == __GFP_HIGHMEM); | 771 | BUG_ON(PageCompound(page)); |
751 | for(i = 0; i < (1 << order); i++) | 772 | BUG_ON(!page_count(page)); |
752 | clear_highpage(page + i); | 773 | for (i = 1; i < (1 << order); i++) |
774 | set_page_refcounted(page + i); | ||
753 | } | 775 | } |
754 | 776 | ||
755 | /* | 777 | /* |
@@ -795,14 +817,8 @@ again: | |||
795 | put_cpu(); | 817 | put_cpu(); |
796 | 818 | ||
797 | BUG_ON(bad_range(zone, page)); | 819 | BUG_ON(bad_range(zone, page)); |
798 | if (prep_new_page(page, order)) | 820 | if (prep_new_page(page, order, gfp_flags)) |
799 | goto again; | 821 | goto again; |
800 | |||
801 | if (gfp_flags & __GFP_ZERO) | ||
802 | prep_zero_page(page, order, gfp_flags); | ||
803 | |||
804 | if (order && (gfp_flags & __GFP_COMP)) | ||
805 | prep_compound_page(page, order); | ||
806 | return page; | 822 | return page; |
807 | 823 | ||
808 | failed: | 824 | failed: |
@@ -926,7 +942,8 @@ restart: | |||
926 | goto got_pg; | 942 | goto got_pg; |
927 | 943 | ||
928 | do { | 944 | do { |
929 | wakeup_kswapd(*z, order); | 945 | if (cpuset_zone_allowed(*z, gfp_mask)) |
946 | wakeup_kswapd(*z, order); | ||
930 | } while (*(++z)); | 947 | } while (*(++z)); |
931 | 948 | ||
932 | /* | 949 | /* |
@@ -1183,7 +1200,7 @@ unsigned int nr_free_highpages (void) | |||
1183 | pg_data_t *pgdat; | 1200 | pg_data_t *pgdat; |
1184 | unsigned int pages = 0; | 1201 | unsigned int pages = 0; |
1185 | 1202 | ||
1186 | for_each_pgdat(pgdat) | 1203 | for_each_online_pgdat(pgdat) |
1187 | pages += pgdat->node_zones[ZONE_HIGHMEM].free_pages; | 1204 | pages += pgdat->node_zones[ZONE_HIGHMEM].free_pages; |
1188 | 1205 | ||
1189 | return pages; | 1206 | return pages; |
@@ -1214,24 +1231,22 @@ DEFINE_PER_CPU(long, nr_pagecache_local) = 0; | |||
1214 | 1231 | ||
1215 | static void __get_page_state(struct page_state *ret, int nr, cpumask_t *cpumask) | 1232 | static void __get_page_state(struct page_state *ret, int nr, cpumask_t *cpumask) |
1216 | { | 1233 | { |
1217 | int cpu = 0; | 1234 | unsigned cpu; |
1218 | 1235 | ||
1219 | memset(ret, 0, nr * sizeof(unsigned long)); | 1236 | memset(ret, 0, nr * sizeof(unsigned long)); |
1220 | cpus_and(*cpumask, *cpumask, cpu_online_map); | 1237 | cpus_and(*cpumask, *cpumask, cpu_online_map); |
1221 | 1238 | ||
1222 | cpu = first_cpu(*cpumask); | 1239 | for_each_cpu_mask(cpu, *cpumask) { |
1223 | while (cpu < NR_CPUS) { | 1240 | unsigned long *in; |
1224 | unsigned long *in, *out, off; | 1241 | unsigned long *out; |
1225 | 1242 | unsigned off; | |
1226 | if (!cpu_isset(cpu, *cpumask)) | 1243 | unsigned next_cpu; |
1227 | continue; | ||
1228 | 1244 | ||
1229 | in = (unsigned long *)&per_cpu(page_states, cpu); | 1245 | in = (unsigned long *)&per_cpu(page_states, cpu); |
1230 | 1246 | ||
1231 | cpu = next_cpu(cpu, *cpumask); | 1247 | next_cpu = next_cpu(cpu, *cpumask); |
1232 | 1248 | if (likely(next_cpu < NR_CPUS)) | |
1233 | if (likely(cpu < NR_CPUS)) | 1249 | prefetch(&per_cpu(page_states, next_cpu)); |
1234 | prefetch(&per_cpu(page_states, cpu)); | ||
1235 | 1250 | ||
1236 | out = (unsigned long *)ret; | 1251 | out = (unsigned long *)ret; |
1237 | for (off = 0; off < nr; off++) | 1252 | for (off = 0; off < nr; off++) |
@@ -1327,7 +1342,7 @@ void get_zone_counts(unsigned long *active, | |||
1327 | *active = 0; | 1342 | *active = 0; |
1328 | *inactive = 0; | 1343 | *inactive = 0; |
1329 | *free = 0; | 1344 | *free = 0; |
1330 | for_each_pgdat(pgdat) { | 1345 | for_each_online_pgdat(pgdat) { |
1331 | unsigned long l, m, n; | 1346 | unsigned long l, m, n; |
1332 | __get_zone_counts(&l, &m, &n, pgdat); | 1347 | __get_zone_counts(&l, &m, &n, pgdat); |
1333 | *active += l; | 1348 | *active += l; |
@@ -1764,7 +1779,7 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone, | |||
1764 | continue; | 1779 | continue; |
1765 | page = pfn_to_page(pfn); | 1780 | page = pfn_to_page(pfn); |
1766 | set_page_links(page, zone, nid, pfn); | 1781 | set_page_links(page, zone, nid, pfn); |
1767 | set_page_count(page, 1); | 1782 | init_page_count(page); |
1768 | reset_page_mapcount(page); | 1783 | reset_page_mapcount(page); |
1769 | SetPageReserved(page); | 1784 | SetPageReserved(page); |
1770 | INIT_LIST_HEAD(&page->lru); | 1785 | INIT_LIST_HEAD(&page->lru); |
@@ -2013,8 +2028,9 @@ static __meminit void zone_pcp_init(struct zone *zone) | |||
2013 | setup_pageset(zone_pcp(zone,cpu), batch); | 2028 | setup_pageset(zone_pcp(zone,cpu), batch); |
2014 | #endif | 2029 | #endif |
2015 | } | 2030 | } |
2016 | printk(KERN_DEBUG " %s zone: %lu pages, LIFO batch:%lu\n", | 2031 | if (zone->present_pages) |
2017 | zone->name, zone->present_pages, batch); | 2032 | printk(KERN_DEBUG " %s zone: %lu pages, LIFO batch:%lu\n", |
2033 | zone->name, zone->present_pages, batch); | ||
2018 | } | 2034 | } |
2019 | 2035 | ||
2020 | static __meminit void init_currently_empty_zone(struct zone *zone, | 2036 | static __meminit void init_currently_empty_zone(struct zone *zone, |
@@ -2025,7 +2041,6 @@ static __meminit void init_currently_empty_zone(struct zone *zone, | |||
2025 | zone_wait_table_init(zone, size); | 2041 | zone_wait_table_init(zone, size); |
2026 | pgdat->nr_zones = zone_idx(zone) + 1; | 2042 | pgdat->nr_zones = zone_idx(zone) + 1; |
2027 | 2043 | ||
2028 | zone->zone_mem_map = pfn_to_page(zone_start_pfn); | ||
2029 | zone->zone_start_pfn = zone_start_pfn; | 2044 | zone->zone_start_pfn = zone_start_pfn; |
2030 | 2045 | ||
2031 | memmap_init(size, pgdat->node_id, zone_idx(zone), zone_start_pfn); | 2046 | memmap_init(size, pgdat->node_id, zone_idx(zone), zone_start_pfn); |
@@ -2153,8 +2168,9 @@ static void *frag_start(struct seq_file *m, loff_t *pos) | |||
2153 | { | 2168 | { |
2154 | pg_data_t *pgdat; | 2169 | pg_data_t *pgdat; |
2155 | loff_t node = *pos; | 2170 | loff_t node = *pos; |
2156 | 2171 | for (pgdat = first_online_pgdat(); | |
2157 | for (pgdat = pgdat_list; pgdat && node; pgdat = pgdat->pgdat_next) | 2172 | pgdat && node; |
2173 | pgdat = next_online_pgdat(pgdat)) | ||
2158 | --node; | 2174 | --node; |
2159 | 2175 | ||
2160 | return pgdat; | 2176 | return pgdat; |
@@ -2165,7 +2181,7 @@ static void *frag_next(struct seq_file *m, void *arg, loff_t *pos) | |||
2165 | pg_data_t *pgdat = (pg_data_t *)arg; | 2181 | pg_data_t *pgdat = (pg_data_t *)arg; |
2166 | 2182 | ||
2167 | (*pos)++; | 2183 | (*pos)++; |
2168 | return pgdat->pgdat_next; | 2184 | return next_online_pgdat(pgdat); |
2169 | } | 2185 | } |
2170 | 2186 | ||
2171 | static void frag_stop(struct seq_file *m, void *arg) | 2187 | static void frag_stop(struct seq_file *m, void *arg) |
@@ -2466,7 +2482,7 @@ static void setup_per_zone_lowmem_reserve(void) | |||
2466 | struct pglist_data *pgdat; | 2482 | struct pglist_data *pgdat; |
2467 | int j, idx; | 2483 | int j, idx; |
2468 | 2484 | ||
2469 | for_each_pgdat(pgdat) { | 2485 | for_each_online_pgdat(pgdat) { |
2470 | for (j = 0; j < MAX_NR_ZONES; j++) { | 2486 | for (j = 0; j < MAX_NR_ZONES; j++) { |
2471 | struct zone *zone = pgdat->node_zones + j; | 2487 | struct zone *zone = pgdat->node_zones + j; |
2472 | unsigned long present_pages = zone->present_pages; | 2488 | unsigned long present_pages = zone->present_pages; |
@@ -2685,8 +2701,7 @@ void *__init alloc_large_system_hash(const char *tablename, | |||
2685 | else | 2701 | else |
2686 | numentries <<= (PAGE_SHIFT - scale); | 2702 | numentries <<= (PAGE_SHIFT - scale); |
2687 | } | 2703 | } |
2688 | /* rounded up to nearest power of 2 in size */ | 2704 | numentries = roundup_pow_of_two(numentries); |
2689 | numentries = 1UL << (long_log2(numentries) + 1); | ||
2690 | 2705 | ||
2691 | /* limit allocation size to 1/16 total memory by default */ | 2706 | /* limit allocation size to 1/16 total memory by default */ |
2692 | if (max == 0) { | 2707 | if (max == 0) { |
@@ -2729,3 +2744,44 @@ void *__init alloc_large_system_hash(const char *tablename, | |||
2729 | 2744 | ||
2730 | return table; | 2745 | return table; |
2731 | } | 2746 | } |
2747 | |||
2748 | #ifdef CONFIG_OUT_OF_LINE_PFN_TO_PAGE | ||
2749 | /* | ||
2750 | * pfn <-> page translation. out-of-line version. | ||
2751 | * (see asm-generic/memory_model.h) | ||
2752 | */ | ||
2753 | #if defined(CONFIG_FLATMEM) | ||
2754 | struct page *pfn_to_page(unsigned long pfn) | ||
2755 | { | ||
2756 | return mem_map + (pfn - ARCH_PFN_OFFSET); | ||
2757 | } | ||
2758 | unsigned long page_to_pfn(struct page *page) | ||
2759 | { | ||
2760 | return (page - mem_map) + ARCH_PFN_OFFSET; | ||
2761 | } | ||
2762 | #elif defined(CONFIG_DISCONTIGMEM) | ||
2763 | struct page *pfn_to_page(unsigned long pfn) | ||
2764 | { | ||
2765 | int nid = arch_pfn_to_nid(pfn); | ||
2766 | return NODE_DATA(nid)->node_mem_map + arch_local_page_offset(pfn,nid); | ||
2767 | } | ||
2768 | unsigned long page_to_pfn(struct page *page) | ||
2769 | { | ||
2770 | struct pglist_data *pgdat = NODE_DATA(page_to_nid(page)); | ||
2771 | return (page - pgdat->node_mem_map) + pgdat->node_start_pfn; | ||
2772 | } | ||
2773 | #elif defined(CONFIG_SPARSEMEM) | ||
2774 | struct page *pfn_to_page(unsigned long pfn) | ||
2775 | { | ||
2776 | return __section_mem_map_addr(__pfn_to_section(pfn)) + pfn; | ||
2777 | } | ||
2778 | |||
2779 | unsigned long page_to_pfn(struct page *page) | ||
2780 | { | ||
2781 | long section_id = page_to_section(page); | ||
2782 | return page - __section_mem_map_addr(__nr_to_section(section_id)); | ||
2783 | } | ||
2784 | #endif /* CONFIG_FLATMEM/DISCONTIGMME/SPARSEMEM */ | ||
2785 | EXPORT_SYMBOL(pfn_to_page); | ||
2786 | EXPORT_SYMBOL(page_to_pfn); | ||
2787 | #endif /* CONFIG_OUT_OF_LINE_PFN_TO_PAGE */ | ||