diff options
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r-- | mm/page_alloc.c | 129 |
1 files changed, 90 insertions, 39 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index fd47494cb989..e0e84924171b 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -53,6 +53,7 @@ struct pglist_data *pgdat_list __read_mostly; | |||
53 | unsigned long totalram_pages __read_mostly; | 53 | unsigned long totalram_pages __read_mostly; |
54 | unsigned long totalhigh_pages __read_mostly; | 54 | unsigned long totalhigh_pages __read_mostly; |
55 | long nr_swap_pages; | 55 | long nr_swap_pages; |
56 | int percpu_pagelist_fraction; | ||
56 | 57 | ||
57 | static void fastcall free_hot_cold_page(struct page *page, int cold); | 58 | static void fastcall free_hot_cold_page(struct page *page, int cold); |
58 | 59 | ||
@@ -307,7 +308,7 @@ static inline int page_is_buddy(struct page *page, int order) | |||
307 | * -- wli | 308 | * -- wli |
308 | */ | 309 | */ |
309 | 310 | ||
310 | static inline void __free_pages_bulk (struct page *page, | 311 | static inline void __free_one_page(struct page *page, |
311 | struct zone *zone, unsigned int order) | 312 | struct zone *zone, unsigned int order) |
312 | { | 313 | { |
313 | unsigned long page_idx; | 314 | unsigned long page_idx; |
@@ -382,40 +383,42 @@ static inline int free_pages_check(struct page *page) | |||
382 | * And clear the zone's pages_scanned counter, to hold off the "all pages are | 383 | * And clear the zone's pages_scanned counter, to hold off the "all pages are |
383 | * pinned" detection logic. | 384 | * pinned" detection logic. |
384 | */ | 385 | */ |
385 | static int | 386 | static void free_pages_bulk(struct zone *zone, int count, |
386 | free_pages_bulk(struct zone *zone, int count, | 387 | struct list_head *list, int order) |
387 | struct list_head *list, unsigned int order) | ||
388 | { | 388 | { |
389 | struct page *page = NULL; | ||
390 | int ret = 0; | ||
391 | |||
392 | spin_lock(&zone->lock); | 389 | spin_lock(&zone->lock); |
393 | zone->all_unreclaimable = 0; | 390 | zone->all_unreclaimable = 0; |
394 | zone->pages_scanned = 0; | 391 | zone->pages_scanned = 0; |
395 | while (!list_empty(list) && count--) { | 392 | while (count--) { |
393 | struct page *page; | ||
394 | |||
395 | BUG_ON(list_empty(list)); | ||
396 | page = list_entry(list->prev, struct page, lru); | 396 | page = list_entry(list->prev, struct page, lru); |
397 | /* have to delete it as __free_pages_bulk list manipulates */ | 397 | /* have to delete it as __free_one_page list manipulates */ |
398 | list_del(&page->lru); | 398 | list_del(&page->lru); |
399 | __free_pages_bulk(page, zone, order); | 399 | __free_one_page(page, zone, order); |
400 | ret++; | ||
401 | } | 400 | } |
402 | spin_unlock(&zone->lock); | 401 | spin_unlock(&zone->lock); |
403 | return ret; | ||
404 | } | 402 | } |
405 | 403 | ||
406 | void __free_pages_ok(struct page *page, unsigned int order) | 404 | static void free_one_page(struct zone *zone, struct page *page, int order) |
407 | { | 405 | { |
408 | unsigned long flags; | ||
409 | LIST_HEAD(list); | 406 | LIST_HEAD(list); |
407 | list_add(&page->lru, &list); | ||
408 | free_pages_bulk(zone, 1, &list, order); | ||
409 | } | ||
410 | |||
411 | static void __free_pages_ok(struct page *page, unsigned int order) | ||
412 | { | ||
413 | unsigned long flags; | ||
410 | int i; | 414 | int i; |
411 | int reserved = 0; | 415 | int reserved = 0; |
412 | 416 | ||
413 | arch_free_page(page, order); | 417 | arch_free_page(page, order); |
414 | 418 | ||
415 | #ifndef CONFIG_MMU | 419 | #ifndef CONFIG_MMU |
416 | if (order > 0) | 420 | for (i = 1 ; i < (1 << order) ; ++i) |
417 | for (i = 1 ; i < (1 << order) ; ++i) | 421 | __put_page(page + i); |
418 | __put_page(page + i); | ||
419 | #endif | 422 | #endif |
420 | 423 | ||
421 | for (i = 0 ; i < (1 << order) ; ++i) | 424 | for (i = 0 ; i < (1 << order) ; ++i) |
@@ -423,11 +426,10 @@ void __free_pages_ok(struct page *page, unsigned int order) | |||
423 | if (reserved) | 426 | if (reserved) |
424 | return; | 427 | return; |
425 | 428 | ||
426 | list_add(&page->lru, &list); | 429 | kernel_map_pages(page, 1 << order, 0); |
427 | kernel_map_pages(page, 1<<order, 0); | ||
428 | local_irq_save(flags); | 430 | local_irq_save(flags); |
429 | __mod_page_state(pgfree, 1 << order); | 431 | __mod_page_state(pgfree, 1 << order); |
430 | free_pages_bulk(page_zone(page), 1, &list, order); | 432 | free_one_page(page_zone(page), page, order); |
431 | local_irq_restore(flags); | 433 | local_irq_restore(flags); |
432 | } | 434 | } |
433 | 435 | ||
@@ -596,14 +598,13 @@ void drain_remote_pages(void) | |||
596 | if (zone->zone_pgdat->node_id == numa_node_id()) | 598 | if (zone->zone_pgdat->node_id == numa_node_id()) |
597 | continue; | 599 | continue; |
598 | 600 | ||
599 | pset = zone->pageset[smp_processor_id()]; | 601 | pset = zone_pcp(zone, smp_processor_id()); |
600 | for (i = 0; i < ARRAY_SIZE(pset->pcp); i++) { | 602 | for (i = 0; i < ARRAY_SIZE(pset->pcp); i++) { |
601 | struct per_cpu_pages *pcp; | 603 | struct per_cpu_pages *pcp; |
602 | 604 | ||
603 | pcp = &pset->pcp[i]; | 605 | pcp = &pset->pcp[i]; |
604 | if (pcp->count) | 606 | free_pages_bulk(zone, pcp->count, &pcp->list, 0); |
605 | pcp->count -= free_pages_bulk(zone, pcp->count, | 607 | pcp->count = 0; |
606 | &pcp->list, 0); | ||
607 | } | 608 | } |
608 | } | 609 | } |
609 | local_irq_restore(flags); | 610 | local_irq_restore(flags); |
@@ -626,8 +627,8 @@ static void __drain_pages(unsigned int cpu) | |||
626 | 627 | ||
627 | pcp = &pset->pcp[i]; | 628 | pcp = &pset->pcp[i]; |
628 | local_irq_save(flags); | 629 | local_irq_save(flags); |
629 | pcp->count -= free_pages_bulk(zone, pcp->count, | 630 | free_pages_bulk(zone, pcp->count, &pcp->list, 0); |
630 | &pcp->list, 0); | 631 | pcp->count = 0; |
631 | local_irq_restore(flags); | 632 | local_irq_restore(flags); |
632 | } | 633 | } |
633 | } | 634 | } |
@@ -718,8 +719,10 @@ static void fastcall free_hot_cold_page(struct page *page, int cold) | |||
718 | __inc_page_state(pgfree); | 719 | __inc_page_state(pgfree); |
719 | list_add(&page->lru, &pcp->list); | 720 | list_add(&page->lru, &pcp->list); |
720 | pcp->count++; | 721 | pcp->count++; |
721 | if (pcp->count >= pcp->high) | 722 | if (pcp->count >= pcp->high) { |
722 | pcp->count -= free_pages_bulk(zone, pcp->batch, &pcp->list, 0); | 723 | free_pages_bulk(zone, pcp->batch, &pcp->list, 0); |
724 | pcp->count -= pcp->batch; | ||
725 | } | ||
723 | local_irq_restore(flags); | 726 | local_irq_restore(flags); |
724 | put_cpu(); | 727 | put_cpu(); |
725 | } | 728 | } |
@@ -758,7 +761,7 @@ static struct page *buffered_rmqueue(struct zonelist *zonelist, | |||
758 | 761 | ||
759 | again: | 762 | again: |
760 | cpu = get_cpu(); | 763 | cpu = get_cpu(); |
761 | if (order == 0) { | 764 | if (likely(order == 0)) { |
762 | struct per_cpu_pages *pcp; | 765 | struct per_cpu_pages *pcp; |
763 | 766 | ||
764 | pcp = &zone_pcp(zone, cpu)->pcp[cold]; | 767 | pcp = &zone_pcp(zone, cpu)->pcp[cold]; |
@@ -973,6 +976,7 @@ rebalance: | |||
973 | cond_resched(); | 976 | cond_resched(); |
974 | 977 | ||
975 | /* We now go into synchronous reclaim */ | 978 | /* We now go into synchronous reclaim */ |
979 | cpuset_memory_pressure_bump(); | ||
976 | p->flags |= PF_MEMALLOC; | 980 | p->flags |= PF_MEMALLOC; |
977 | reclaim_state.reclaimed_slab = 0; | 981 | reclaim_state.reclaimed_slab = 0; |
978 | p->reclaim_state = &reclaim_state; | 982 | p->reclaim_state = &reclaim_state; |
@@ -1204,6 +1208,7 @@ static void __get_page_state(struct page_state *ret, int nr, cpumask_t *cpumask) | |||
1204 | int cpu = 0; | 1208 | int cpu = 0; |
1205 | 1209 | ||
1206 | memset(ret, 0, sizeof(*ret)); | 1210 | memset(ret, 0, sizeof(*ret)); |
1211 | cpus_and(*cpumask, *cpumask, cpu_online_map); | ||
1207 | 1212 | ||
1208 | cpu = first_cpu(*cpumask); | 1213 | cpu = first_cpu(*cpumask); |
1209 | while (cpu < NR_CPUS) { | 1214 | while (cpu < NR_CPUS) { |
@@ -1256,7 +1261,7 @@ unsigned long read_page_state_offset(unsigned long offset) | |||
1256 | unsigned long ret = 0; | 1261 | unsigned long ret = 0; |
1257 | int cpu; | 1262 | int cpu; |
1258 | 1263 | ||
1259 | for_each_cpu(cpu) { | 1264 | for_each_online_cpu(cpu) { |
1260 | unsigned long in; | 1265 | unsigned long in; |
1261 | 1266 | ||
1262 | in = (unsigned long)&per_cpu(page_states, cpu) + offset; | 1267 | in = (unsigned long)&per_cpu(page_states, cpu) + offset; |
@@ -1830,6 +1835,24 @@ inline void setup_pageset(struct per_cpu_pageset *p, unsigned long batch) | |||
1830 | INIT_LIST_HEAD(&pcp->list); | 1835 | INIT_LIST_HEAD(&pcp->list); |
1831 | } | 1836 | } |
1832 | 1837 | ||
1838 | /* | ||
1839 | * setup_pagelist_highmark() sets the high water mark for hot per_cpu_pagelist | ||
1840 | * to the value high for the pageset p. | ||
1841 | */ | ||
1842 | |||
1843 | static void setup_pagelist_highmark(struct per_cpu_pageset *p, | ||
1844 | unsigned long high) | ||
1845 | { | ||
1846 | struct per_cpu_pages *pcp; | ||
1847 | |||
1848 | pcp = &p->pcp[0]; /* hot list */ | ||
1849 | pcp->high = high; | ||
1850 | pcp->batch = max(1UL, high/4); | ||
1851 | if ((high/4) > (PAGE_SHIFT * 8)) | ||
1852 | pcp->batch = PAGE_SHIFT * 8; | ||
1853 | } | ||
1854 | |||
1855 | |||
1833 | #ifdef CONFIG_NUMA | 1856 | #ifdef CONFIG_NUMA |
1834 | /* | 1857 | /* |
1835 | * Boot pageset table. One per cpu which is going to be used for all | 1858 | * Boot pageset table. One per cpu which is going to be used for all |
@@ -1861,12 +1884,16 @@ static int __devinit process_zones(int cpu) | |||
1861 | 1884 | ||
1862 | for_each_zone(zone) { | 1885 | for_each_zone(zone) { |
1863 | 1886 | ||
1864 | zone->pageset[cpu] = kmalloc_node(sizeof(struct per_cpu_pageset), | 1887 | zone_pcp(zone, cpu) = kmalloc_node(sizeof(struct per_cpu_pageset), |
1865 | GFP_KERNEL, cpu_to_node(cpu)); | 1888 | GFP_KERNEL, cpu_to_node(cpu)); |
1866 | if (!zone->pageset[cpu]) | 1889 | if (!zone_pcp(zone, cpu)) |
1867 | goto bad; | 1890 | goto bad; |
1868 | 1891 | ||
1869 | setup_pageset(zone->pageset[cpu], zone_batchsize(zone)); | 1892 | setup_pageset(zone_pcp(zone, cpu), zone_batchsize(zone)); |
1893 | |||
1894 | if (percpu_pagelist_fraction) | ||
1895 | setup_pagelist_highmark(zone_pcp(zone, cpu), | ||
1896 | (zone->present_pages / percpu_pagelist_fraction)); | ||
1870 | } | 1897 | } |
1871 | 1898 | ||
1872 | return 0; | 1899 | return 0; |
@@ -1874,15 +1901,14 @@ bad: | |||
1874 | for_each_zone(dzone) { | 1901 | for_each_zone(dzone) { |
1875 | if (dzone == zone) | 1902 | if (dzone == zone) |
1876 | break; | 1903 | break; |
1877 | kfree(dzone->pageset[cpu]); | 1904 | kfree(zone_pcp(dzone, cpu)); |
1878 | dzone->pageset[cpu] = NULL; | 1905 | zone_pcp(dzone, cpu) = NULL; |
1879 | } | 1906 | } |
1880 | return -ENOMEM; | 1907 | return -ENOMEM; |
1881 | } | 1908 | } |
1882 | 1909 | ||
1883 | static inline void free_zone_pagesets(int cpu) | 1910 | static inline void free_zone_pagesets(int cpu) |
1884 | { | 1911 | { |
1885 | #ifdef CONFIG_NUMA | ||
1886 | struct zone *zone; | 1912 | struct zone *zone; |
1887 | 1913 | ||
1888 | for_each_zone(zone) { | 1914 | for_each_zone(zone) { |
@@ -1891,7 +1917,6 @@ static inline void free_zone_pagesets(int cpu) | |||
1891 | zone_pcp(zone, cpu) = NULL; | 1917 | zone_pcp(zone, cpu) = NULL; |
1892 | kfree(pset); | 1918 | kfree(pset); |
1893 | } | 1919 | } |
1894 | #endif | ||
1895 | } | 1920 | } |
1896 | 1921 | ||
1897 | static int __devinit pageset_cpuup_callback(struct notifier_block *nfb, | 1922 | static int __devinit pageset_cpuup_callback(struct notifier_block *nfb, |
@@ -1962,7 +1987,7 @@ static __devinit void zone_pcp_init(struct zone *zone) | |||
1962 | for (cpu = 0; cpu < NR_CPUS; cpu++) { | 1987 | for (cpu = 0; cpu < NR_CPUS; cpu++) { |
1963 | #ifdef CONFIG_NUMA | 1988 | #ifdef CONFIG_NUMA |
1964 | /* Early boot. Slab allocator not functional yet */ | 1989 | /* Early boot. Slab allocator not functional yet */ |
1965 | zone->pageset[cpu] = &boot_pageset[cpu]; | 1990 | zone_pcp(zone, cpu) = &boot_pageset[cpu]; |
1966 | setup_pageset(&boot_pageset[cpu],0); | 1991 | setup_pageset(&boot_pageset[cpu],0); |
1967 | #else | 1992 | #else |
1968 | setup_pageset(zone_pcp(zone,cpu), batch); | 1993 | setup_pageset(zone_pcp(zone,cpu), batch); |
@@ -2205,7 +2230,7 @@ static int zoneinfo_show(struct seq_file *m, void *arg) | |||
2205 | seq_printf(m, | 2230 | seq_printf(m, |
2206 | ")" | 2231 | ")" |
2207 | "\n pagesets"); | 2232 | "\n pagesets"); |
2208 | for (i = 0; i < ARRAY_SIZE(zone->pageset); i++) { | 2233 | for_each_online_cpu(i) { |
2209 | struct per_cpu_pageset *pageset; | 2234 | struct per_cpu_pageset *pageset; |
2210 | int j; | 2235 | int j; |
2211 | 2236 | ||
@@ -2568,6 +2593,32 @@ int lowmem_reserve_ratio_sysctl_handler(ctl_table *table, int write, | |||
2568 | return 0; | 2593 | return 0; |
2569 | } | 2594 | } |
2570 | 2595 | ||
2596 | /* | ||
2597 | * percpu_pagelist_fraction - changes the pcp->high for each zone on each | ||
2598 | * cpu. It is the fraction of total pages in each zone that a hot per cpu pagelist | ||
2599 | * can have before it gets flushed back to buddy allocator. | ||
2600 | */ | ||
2601 | |||
2602 | int percpu_pagelist_fraction_sysctl_handler(ctl_table *table, int write, | ||
2603 | struct file *file, void __user *buffer, size_t *length, loff_t *ppos) | ||
2604 | { | ||
2605 | struct zone *zone; | ||
2606 | unsigned int cpu; | ||
2607 | int ret; | ||
2608 | |||
2609 | ret = proc_dointvec_minmax(table, write, file, buffer, length, ppos); | ||
2610 | if (!write || (ret == -EINVAL)) | ||
2611 | return ret; | ||
2612 | for_each_zone(zone) { | ||
2613 | for_each_online_cpu(cpu) { | ||
2614 | unsigned long high; | ||
2615 | high = zone->present_pages / percpu_pagelist_fraction; | ||
2616 | setup_pagelist_highmark(zone_pcp(zone, cpu), high); | ||
2617 | } | ||
2618 | } | ||
2619 | return 0; | ||
2620 | } | ||
2621 | |||
2571 | __initdata int hashdist = HASHDIST_DEFAULT; | 2622 | __initdata int hashdist = HASHDIST_DEFAULT; |
2572 | 2623 | ||
2573 | #ifdef CONFIG_NUMA | 2624 | #ifdef CONFIG_NUMA |