aboutsummaryrefslogtreecommitdiffstats
path: root/mm/page_alloc.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r--mm/page_alloc.c180
1 files changed, 118 insertions, 62 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 234bd4895d14..dc523a1f270d 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -49,13 +49,11 @@ nodemask_t node_online_map __read_mostly = { { [0] = 1UL } };
49EXPORT_SYMBOL(node_online_map); 49EXPORT_SYMBOL(node_online_map);
50nodemask_t node_possible_map __read_mostly = NODE_MASK_ALL; 50nodemask_t node_possible_map __read_mostly = NODE_MASK_ALL;
51EXPORT_SYMBOL(node_possible_map); 51EXPORT_SYMBOL(node_possible_map);
52struct pglist_data *pgdat_list __read_mostly;
53unsigned long totalram_pages __read_mostly; 52unsigned long totalram_pages __read_mostly;
54unsigned long totalhigh_pages __read_mostly; 53unsigned long totalhigh_pages __read_mostly;
55long nr_swap_pages; 54long nr_swap_pages;
56int percpu_pagelist_fraction; 55int percpu_pagelist_fraction;
57 56
58static void fastcall free_hot_cold_page(struct page *page, int cold);
59static void __free_pages_ok(struct page *page, unsigned int order); 57static void __free_pages_ok(struct page *page, unsigned int order);
60 58
61/* 59/*
@@ -190,7 +188,7 @@ static void prep_compound_page(struct page *page, unsigned long order)
190 for (i = 0; i < nr_pages; i++) { 188 for (i = 0; i < nr_pages; i++) {
191 struct page *p = page + i; 189 struct page *p = page + i;
192 190
193 SetPageCompound(p); 191 __SetPageCompound(p);
194 set_page_private(p, (unsigned long)page); 192 set_page_private(p, (unsigned long)page);
195 } 193 }
196} 194}
@@ -209,10 +207,24 @@ static void destroy_compound_page(struct page *page, unsigned long order)
209 if (unlikely(!PageCompound(p) | 207 if (unlikely(!PageCompound(p) |
210 (page_private(p) != (unsigned long)page))) 208 (page_private(p) != (unsigned long)page)))
211 bad_page(page); 209 bad_page(page);
212 ClearPageCompound(p); 210 __ClearPageCompound(p);
213 } 211 }
214} 212}
215 213
214static inline void prep_zero_page(struct page *page, int order, gfp_t gfp_flags)
215{
216 int i;
217
218 BUG_ON((gfp_flags & (__GFP_WAIT | __GFP_HIGHMEM)) == __GFP_HIGHMEM);
219 /*
220 * clear_highpage() will use KM_USER0, so it's a bug to use __GFP_ZERO
221 * and __GFP_HIGHMEM from hard or soft interrupt context.
222 */
223 BUG_ON((gfp_flags & __GFP_HIGHMEM) && in_interrupt());
224 for (i = 0; i < (1 << order); i++)
225 clear_highpage(page + i);
226}
227
216/* 228/*
217 * function for dealing with page's order in buddy system. 229 * function for dealing with page's order in buddy system.
218 * zone->lock is already acquired when we use these. 230 * zone->lock is already acquired when we use these.
@@ -423,11 +435,6 @@ static void __free_pages_ok(struct page *page, unsigned int order)
423 mutex_debug_check_no_locks_freed(page_address(page), 435 mutex_debug_check_no_locks_freed(page_address(page),
424 PAGE_SIZE<<order); 436 PAGE_SIZE<<order);
425 437
426#ifndef CONFIG_MMU
427 for (i = 1 ; i < (1 << order) ; ++i)
428 __put_page(page + i);
429#endif
430
431 for (i = 0 ; i < (1 << order) ; ++i) 438 for (i = 0 ; i < (1 << order) ; ++i)
432 reserved += free_pages_check(page + i); 439 reserved += free_pages_check(page + i);
433 if (reserved) 440 if (reserved)
@@ -448,28 +455,23 @@ void fastcall __init __free_pages_bootmem(struct page *page, unsigned int order)
448 if (order == 0) { 455 if (order == 0) {
449 __ClearPageReserved(page); 456 __ClearPageReserved(page);
450 set_page_count(page, 0); 457 set_page_count(page, 0);
451 458 set_page_refcounted(page);
452 free_hot_cold_page(page, 0); 459 __free_page(page);
453 } else { 460 } else {
454 LIST_HEAD(list);
455 int loop; 461 int loop;
456 462
463 prefetchw(page);
457 for (loop = 0; loop < BITS_PER_LONG; loop++) { 464 for (loop = 0; loop < BITS_PER_LONG; loop++) {
458 struct page *p = &page[loop]; 465 struct page *p = &page[loop];
459 466
460 if (loop + 16 < BITS_PER_LONG) 467 if (loop + 1 < BITS_PER_LONG)
461 prefetchw(p + 16); 468 prefetchw(p + 1);
462 __ClearPageReserved(p); 469 __ClearPageReserved(p);
463 set_page_count(p, 0); 470 set_page_count(p, 0);
464 } 471 }
465 472
466 arch_free_page(page, order); 473 set_page_refcounted(page);
467 474 __free_pages(page, order);
468 mod_page_state(pgfree, 1 << order);
469
470 list_add(&page->lru, &list);
471 kernel_map_pages(page, 1 << order, 0);
472 free_pages_bulk(page_zone(page), 1, &list, order);
473 } 475 }
474} 476}
475 477
@@ -507,7 +509,7 @@ static inline void expand(struct zone *zone, struct page *page,
507/* 509/*
508 * This page is about to be returned from the page allocator 510 * This page is about to be returned from the page allocator
509 */ 511 */
510static int prep_new_page(struct page *page, int order) 512static int prep_new_page(struct page *page, int order, gfp_t gfp_flags)
511{ 513{
512 if (unlikely(page_mapcount(page) | 514 if (unlikely(page_mapcount(page) |
513 (page->mapping != NULL) | 515 (page->mapping != NULL) |
@@ -536,8 +538,15 @@ static int prep_new_page(struct page *page, int order)
536 1 << PG_referenced | 1 << PG_arch_1 | 538 1 << PG_referenced | 1 << PG_arch_1 |
537 1 << PG_checked | 1 << PG_mappedtodisk); 539 1 << PG_checked | 1 << PG_mappedtodisk);
538 set_page_private(page, 0); 540 set_page_private(page, 0);
539 set_page_refs(page, order); 541 set_page_refcounted(page);
540 kernel_map_pages(page, 1 << order, 1); 542 kernel_map_pages(page, 1 << order, 1);
543
544 if (gfp_flags & __GFP_ZERO)
545 prep_zero_page(page, order, gfp_flags);
546
547 if (order && (gfp_flags & __GFP_COMP))
548 prep_compound_page(page, order);
549
541 return 0; 550 return 0;
542} 551}
543 552
@@ -593,13 +602,14 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
593/* 602/*
594 * Called from the slab reaper to drain pagesets on a particular node that 603 * Called from the slab reaper to drain pagesets on a particular node that
595 * belong to the currently executing processor. 604 * belong to the currently executing processor.
605 * Note that this function must be called with the thread pinned to
606 * a single processor.
596 */ 607 */
597void drain_node_pages(int nodeid) 608void drain_node_pages(int nodeid)
598{ 609{
599 int i, z; 610 int i, z;
600 unsigned long flags; 611 unsigned long flags;
601 612
602 local_irq_save(flags);
603 for (z = 0; z < MAX_NR_ZONES; z++) { 613 for (z = 0; z < MAX_NR_ZONES; z++) {
604 struct zone *zone = NODE_DATA(nodeid)->node_zones + z; 614 struct zone *zone = NODE_DATA(nodeid)->node_zones + z;
605 struct per_cpu_pageset *pset; 615 struct per_cpu_pageset *pset;
@@ -609,11 +619,14 @@ void drain_node_pages(int nodeid)
609 struct per_cpu_pages *pcp; 619 struct per_cpu_pages *pcp;
610 620
611 pcp = &pset->pcp[i]; 621 pcp = &pset->pcp[i];
612 free_pages_bulk(zone, pcp->count, &pcp->list, 0); 622 if (pcp->count) {
613 pcp->count = 0; 623 local_irq_save(flags);
624 free_pages_bulk(zone, pcp->count, &pcp->list, 0);
625 pcp->count = 0;
626 local_irq_restore(flags);
627 }
614 } 628 }
615 } 629 }
616 local_irq_restore(flags);
617} 630}
618#endif 631#endif
619 632
@@ -743,13 +756,22 @@ void fastcall free_cold_page(struct page *page)
743 free_hot_cold_page(page, 1); 756 free_hot_cold_page(page, 1);
744} 757}
745 758
746static inline void prep_zero_page(struct page *page, int order, gfp_t gfp_flags) 759/*
760 * split_page takes a non-compound higher-order page, and splits it into
761 * n (1<<order) sub-pages: page[0..n]
762 * Each sub-page must be freed individually.
763 *
764 * Note: this is probably too low level an operation for use in drivers.
765 * Please consult with lkml before using this in your driver.
766 */
767void split_page(struct page *page, unsigned int order)
747{ 768{
748 int i; 769 int i;
749 770
750 BUG_ON((gfp_flags & (__GFP_WAIT | __GFP_HIGHMEM)) == __GFP_HIGHMEM); 771 BUG_ON(PageCompound(page));
751 for(i = 0; i < (1 << order); i++) 772 BUG_ON(!page_count(page));
752 clear_highpage(page + i); 773 for (i = 1; i < (1 << order); i++)
774 set_page_refcounted(page + i);
753} 775}
754 776
755/* 777/*
@@ -795,14 +817,8 @@ again:
795 put_cpu(); 817 put_cpu();
796 818
797 BUG_ON(bad_range(zone, page)); 819 BUG_ON(bad_range(zone, page));
798 if (prep_new_page(page, order)) 820 if (prep_new_page(page, order, gfp_flags))
799 goto again; 821 goto again;
800
801 if (gfp_flags & __GFP_ZERO)
802 prep_zero_page(page, order, gfp_flags);
803
804 if (order && (gfp_flags & __GFP_COMP))
805 prep_compound_page(page, order);
806 return page; 822 return page;
807 823
808failed: 824failed:
@@ -926,7 +942,8 @@ restart:
926 goto got_pg; 942 goto got_pg;
927 943
928 do { 944 do {
929 wakeup_kswapd(*z, order); 945 if (cpuset_zone_allowed(*z, gfp_mask))
946 wakeup_kswapd(*z, order);
930 } while (*(++z)); 947 } while (*(++z));
931 948
932 /* 949 /*
@@ -1183,7 +1200,7 @@ unsigned int nr_free_highpages (void)
1183 pg_data_t *pgdat; 1200 pg_data_t *pgdat;
1184 unsigned int pages = 0; 1201 unsigned int pages = 0;
1185 1202
1186 for_each_pgdat(pgdat) 1203 for_each_online_pgdat(pgdat)
1187 pages += pgdat->node_zones[ZONE_HIGHMEM].free_pages; 1204 pages += pgdat->node_zones[ZONE_HIGHMEM].free_pages;
1188 1205
1189 return pages; 1206 return pages;
@@ -1214,24 +1231,22 @@ DEFINE_PER_CPU(long, nr_pagecache_local) = 0;
1214 1231
1215static void __get_page_state(struct page_state *ret, int nr, cpumask_t *cpumask) 1232static void __get_page_state(struct page_state *ret, int nr, cpumask_t *cpumask)
1216{ 1233{
1217 int cpu = 0; 1234 unsigned cpu;
1218 1235
1219 memset(ret, 0, nr * sizeof(unsigned long)); 1236 memset(ret, 0, nr * sizeof(unsigned long));
1220 cpus_and(*cpumask, *cpumask, cpu_online_map); 1237 cpus_and(*cpumask, *cpumask, cpu_online_map);
1221 1238
1222 cpu = first_cpu(*cpumask); 1239 for_each_cpu_mask(cpu, *cpumask) {
1223 while (cpu < NR_CPUS) { 1240 unsigned long *in;
1224 unsigned long *in, *out, off; 1241 unsigned long *out;
1225 1242 unsigned off;
1226 if (!cpu_isset(cpu, *cpumask)) 1243 unsigned next_cpu;
1227 continue;
1228 1244
1229 in = (unsigned long *)&per_cpu(page_states, cpu); 1245 in = (unsigned long *)&per_cpu(page_states, cpu);
1230 1246
1231 cpu = next_cpu(cpu, *cpumask); 1247 next_cpu = next_cpu(cpu, *cpumask);
1232 1248 if (likely(next_cpu < NR_CPUS))
1233 if (likely(cpu < NR_CPUS)) 1249 prefetch(&per_cpu(page_states, next_cpu));
1234 prefetch(&per_cpu(page_states, cpu));
1235 1250
1236 out = (unsigned long *)ret; 1251 out = (unsigned long *)ret;
1237 for (off = 0; off < nr; off++) 1252 for (off = 0; off < nr; off++)
@@ -1327,7 +1342,7 @@ void get_zone_counts(unsigned long *active,
1327 *active = 0; 1342 *active = 0;
1328 *inactive = 0; 1343 *inactive = 0;
1329 *free = 0; 1344 *free = 0;
1330 for_each_pgdat(pgdat) { 1345 for_each_online_pgdat(pgdat) {
1331 unsigned long l, m, n; 1346 unsigned long l, m, n;
1332 __get_zone_counts(&l, &m, &n, pgdat); 1347 __get_zone_counts(&l, &m, &n, pgdat);
1333 *active += l; 1348 *active += l;
@@ -1764,7 +1779,7 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
1764 continue; 1779 continue;
1765 page = pfn_to_page(pfn); 1780 page = pfn_to_page(pfn);
1766 set_page_links(page, zone, nid, pfn); 1781 set_page_links(page, zone, nid, pfn);
1767 set_page_count(page, 1); 1782 init_page_count(page);
1768 reset_page_mapcount(page); 1783 reset_page_mapcount(page);
1769 SetPageReserved(page); 1784 SetPageReserved(page);
1770 INIT_LIST_HEAD(&page->lru); 1785 INIT_LIST_HEAD(&page->lru);
@@ -2013,8 +2028,9 @@ static __meminit void zone_pcp_init(struct zone *zone)
2013 setup_pageset(zone_pcp(zone,cpu), batch); 2028 setup_pageset(zone_pcp(zone,cpu), batch);
2014#endif 2029#endif
2015 } 2030 }
2016 printk(KERN_DEBUG " %s zone: %lu pages, LIFO batch:%lu\n", 2031 if (zone->present_pages)
2017 zone->name, zone->present_pages, batch); 2032 printk(KERN_DEBUG " %s zone: %lu pages, LIFO batch:%lu\n",
2033 zone->name, zone->present_pages, batch);
2018} 2034}
2019 2035
2020static __meminit void init_currently_empty_zone(struct zone *zone, 2036static __meminit void init_currently_empty_zone(struct zone *zone,
@@ -2025,7 +2041,6 @@ static __meminit void init_currently_empty_zone(struct zone *zone,
2025 zone_wait_table_init(zone, size); 2041 zone_wait_table_init(zone, size);
2026 pgdat->nr_zones = zone_idx(zone) + 1; 2042 pgdat->nr_zones = zone_idx(zone) + 1;
2027 2043
2028 zone->zone_mem_map = pfn_to_page(zone_start_pfn);
2029 zone->zone_start_pfn = zone_start_pfn; 2044 zone->zone_start_pfn = zone_start_pfn;
2030 2045
2031 memmap_init(size, pgdat->node_id, zone_idx(zone), zone_start_pfn); 2046 memmap_init(size, pgdat->node_id, zone_idx(zone), zone_start_pfn);
@@ -2153,8 +2168,9 @@ static void *frag_start(struct seq_file *m, loff_t *pos)
2153{ 2168{
2154 pg_data_t *pgdat; 2169 pg_data_t *pgdat;
2155 loff_t node = *pos; 2170 loff_t node = *pos;
2156 2171 for (pgdat = first_online_pgdat();
2157 for (pgdat = pgdat_list; pgdat && node; pgdat = pgdat->pgdat_next) 2172 pgdat && node;
2173 pgdat = next_online_pgdat(pgdat))
2158 --node; 2174 --node;
2159 2175
2160 return pgdat; 2176 return pgdat;
@@ -2165,7 +2181,7 @@ static void *frag_next(struct seq_file *m, void *arg, loff_t *pos)
2165 pg_data_t *pgdat = (pg_data_t *)arg; 2181 pg_data_t *pgdat = (pg_data_t *)arg;
2166 2182
2167 (*pos)++; 2183 (*pos)++;
2168 return pgdat->pgdat_next; 2184 return next_online_pgdat(pgdat);
2169} 2185}
2170 2186
2171static void frag_stop(struct seq_file *m, void *arg) 2187static void frag_stop(struct seq_file *m, void *arg)
@@ -2466,7 +2482,7 @@ static void setup_per_zone_lowmem_reserve(void)
2466 struct pglist_data *pgdat; 2482 struct pglist_data *pgdat;
2467 int j, idx; 2483 int j, idx;
2468 2484
2469 for_each_pgdat(pgdat) { 2485 for_each_online_pgdat(pgdat) {
2470 for (j = 0; j < MAX_NR_ZONES; j++) { 2486 for (j = 0; j < MAX_NR_ZONES; j++) {
2471 struct zone *zone = pgdat->node_zones + j; 2487 struct zone *zone = pgdat->node_zones + j;
2472 unsigned long present_pages = zone->present_pages; 2488 unsigned long present_pages = zone->present_pages;
@@ -2685,8 +2701,7 @@ void *__init alloc_large_system_hash(const char *tablename,
2685 else 2701 else
2686 numentries <<= (PAGE_SHIFT - scale); 2702 numentries <<= (PAGE_SHIFT - scale);
2687 } 2703 }
2688 /* rounded up to nearest power of 2 in size */ 2704 numentries = roundup_pow_of_two(numentries);
2689 numentries = 1UL << (long_log2(numentries) + 1);
2690 2705
2691 /* limit allocation size to 1/16 total memory by default */ 2706 /* limit allocation size to 1/16 total memory by default */
2692 if (max == 0) { 2707 if (max == 0) {
@@ -2729,3 +2744,44 @@ void *__init alloc_large_system_hash(const char *tablename,
2729 2744
2730 return table; 2745 return table;
2731} 2746}
2747
2748#ifdef CONFIG_OUT_OF_LINE_PFN_TO_PAGE
2749/*
2750 * pfn <-> page translation. out-of-line version.
2751 * (see asm-generic/memory_model.h)
2752 */
2753#if defined(CONFIG_FLATMEM)
2754struct page *pfn_to_page(unsigned long pfn)
2755{
2756 return mem_map + (pfn - ARCH_PFN_OFFSET);
2757}
2758unsigned long page_to_pfn(struct page *page)
2759{
2760 return (page - mem_map) + ARCH_PFN_OFFSET;
2761}
2762#elif defined(CONFIG_DISCONTIGMEM)
2763struct page *pfn_to_page(unsigned long pfn)
2764{
2765 int nid = arch_pfn_to_nid(pfn);
2766 return NODE_DATA(nid)->node_mem_map + arch_local_page_offset(pfn,nid);
2767}
2768unsigned long page_to_pfn(struct page *page)
2769{
2770 struct pglist_data *pgdat = NODE_DATA(page_to_nid(page));
2771 return (page - pgdat->node_mem_map) + pgdat->node_start_pfn;
2772}
2773#elif defined(CONFIG_SPARSEMEM)
2774struct page *pfn_to_page(unsigned long pfn)
2775{
2776 return __section_mem_map_addr(__pfn_to_section(pfn)) + pfn;
2777}
2778
2779unsigned long page_to_pfn(struct page *page)
2780{
2781 long section_id = page_to_section(page);
2782 return page - __section_mem_map_addr(__nr_to_section(section_id));
2783}
2784#endif /* CONFIG_FLATMEM/DISCONTIGMME/SPARSEMEM */
2785EXPORT_SYMBOL(pfn_to_page);
2786EXPORT_SYMBOL(page_to_pfn);
2787#endif /* CONFIG_OUT_OF_LINE_PFN_TO_PAGE */