aboutsummaryrefslogtreecommitdiffstats
path: root/mm/page_alloc.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r--mm/page_alloc.c129
1 files changed, 90 insertions, 39 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index fd47494cb989..e0e84924171b 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -53,6 +53,7 @@ struct pglist_data *pgdat_list __read_mostly;
53unsigned long totalram_pages __read_mostly; 53unsigned long totalram_pages __read_mostly;
54unsigned long totalhigh_pages __read_mostly; 54unsigned long totalhigh_pages __read_mostly;
55long nr_swap_pages; 55long nr_swap_pages;
56int percpu_pagelist_fraction;
56 57
57static void fastcall free_hot_cold_page(struct page *page, int cold); 58static void fastcall free_hot_cold_page(struct page *page, int cold);
58 59
@@ -307,7 +308,7 @@ static inline int page_is_buddy(struct page *page, int order)
307 * -- wli 308 * -- wli
308 */ 309 */
309 310
310static inline void __free_pages_bulk (struct page *page, 311static inline void __free_one_page(struct page *page,
311 struct zone *zone, unsigned int order) 312 struct zone *zone, unsigned int order)
312{ 313{
313 unsigned long page_idx; 314 unsigned long page_idx;
@@ -382,40 +383,42 @@ static inline int free_pages_check(struct page *page)
382 * And clear the zone's pages_scanned counter, to hold off the "all pages are 383 * And clear the zone's pages_scanned counter, to hold off the "all pages are
383 * pinned" detection logic. 384 * pinned" detection logic.
384 */ 385 */
385static int 386static void free_pages_bulk(struct zone *zone, int count,
386free_pages_bulk(struct zone *zone, int count, 387 struct list_head *list, int order)
387 struct list_head *list, unsigned int order)
388{ 388{
389 struct page *page = NULL;
390 int ret = 0;
391
392 spin_lock(&zone->lock); 389 spin_lock(&zone->lock);
393 zone->all_unreclaimable = 0; 390 zone->all_unreclaimable = 0;
394 zone->pages_scanned = 0; 391 zone->pages_scanned = 0;
395 while (!list_empty(list) && count--) { 392 while (count--) {
393 struct page *page;
394
395 BUG_ON(list_empty(list));
396 page = list_entry(list->prev, struct page, lru); 396 page = list_entry(list->prev, struct page, lru);
397 /* have to delete it as __free_pages_bulk list manipulates */ 397 /* have to delete it as __free_one_page list manipulates */
398 list_del(&page->lru); 398 list_del(&page->lru);
399 __free_pages_bulk(page, zone, order); 399 __free_one_page(page, zone, order);
400 ret++;
401 } 400 }
402 spin_unlock(&zone->lock); 401 spin_unlock(&zone->lock);
403 return ret;
404} 402}
405 403
406void __free_pages_ok(struct page *page, unsigned int order) 404static void free_one_page(struct zone *zone, struct page *page, int order)
407{ 405{
408 unsigned long flags;
409 LIST_HEAD(list); 406 LIST_HEAD(list);
407 list_add(&page->lru, &list);
408 free_pages_bulk(zone, 1, &list, order);
409}
410
411static void __free_pages_ok(struct page *page, unsigned int order)
412{
413 unsigned long flags;
410 int i; 414 int i;
411 int reserved = 0; 415 int reserved = 0;
412 416
413 arch_free_page(page, order); 417 arch_free_page(page, order);
414 418
415#ifndef CONFIG_MMU 419#ifndef CONFIG_MMU
416 if (order > 0) 420 for (i = 1 ; i < (1 << order) ; ++i)
417 for (i = 1 ; i < (1 << order) ; ++i) 421 __put_page(page + i);
418 __put_page(page + i);
419#endif 422#endif
420 423
421 for (i = 0 ; i < (1 << order) ; ++i) 424 for (i = 0 ; i < (1 << order) ; ++i)
@@ -423,11 +426,10 @@ void __free_pages_ok(struct page *page, unsigned int order)
423 if (reserved) 426 if (reserved)
424 return; 427 return;
425 428
426 list_add(&page->lru, &list); 429 kernel_map_pages(page, 1 << order, 0);
427 kernel_map_pages(page, 1<<order, 0);
428 local_irq_save(flags); 430 local_irq_save(flags);
429 __mod_page_state(pgfree, 1 << order); 431 __mod_page_state(pgfree, 1 << order);
430 free_pages_bulk(page_zone(page), 1, &list, order); 432 free_one_page(page_zone(page), page, order);
431 local_irq_restore(flags); 433 local_irq_restore(flags);
432} 434}
433 435
@@ -596,14 +598,13 @@ void drain_remote_pages(void)
596 if (zone->zone_pgdat->node_id == numa_node_id()) 598 if (zone->zone_pgdat->node_id == numa_node_id())
597 continue; 599 continue;
598 600
599 pset = zone->pageset[smp_processor_id()]; 601 pset = zone_pcp(zone, smp_processor_id());
600 for (i = 0; i < ARRAY_SIZE(pset->pcp); i++) { 602 for (i = 0; i < ARRAY_SIZE(pset->pcp); i++) {
601 struct per_cpu_pages *pcp; 603 struct per_cpu_pages *pcp;
602 604
603 pcp = &pset->pcp[i]; 605 pcp = &pset->pcp[i];
604 if (pcp->count) 606 free_pages_bulk(zone, pcp->count, &pcp->list, 0);
605 pcp->count -= free_pages_bulk(zone, pcp->count, 607 pcp->count = 0;
606 &pcp->list, 0);
607 } 608 }
608 } 609 }
609 local_irq_restore(flags); 610 local_irq_restore(flags);
@@ -626,8 +627,8 @@ static void __drain_pages(unsigned int cpu)
626 627
627 pcp = &pset->pcp[i]; 628 pcp = &pset->pcp[i];
628 local_irq_save(flags); 629 local_irq_save(flags);
629 pcp->count -= free_pages_bulk(zone, pcp->count, 630 free_pages_bulk(zone, pcp->count, &pcp->list, 0);
630 &pcp->list, 0); 631 pcp->count = 0;
631 local_irq_restore(flags); 632 local_irq_restore(flags);
632 } 633 }
633 } 634 }
@@ -718,8 +719,10 @@ static void fastcall free_hot_cold_page(struct page *page, int cold)
718 __inc_page_state(pgfree); 719 __inc_page_state(pgfree);
719 list_add(&page->lru, &pcp->list); 720 list_add(&page->lru, &pcp->list);
720 pcp->count++; 721 pcp->count++;
721 if (pcp->count >= pcp->high) 722 if (pcp->count >= pcp->high) {
722 pcp->count -= free_pages_bulk(zone, pcp->batch, &pcp->list, 0); 723 free_pages_bulk(zone, pcp->batch, &pcp->list, 0);
724 pcp->count -= pcp->batch;
725 }
723 local_irq_restore(flags); 726 local_irq_restore(flags);
724 put_cpu(); 727 put_cpu();
725} 728}
@@ -758,7 +761,7 @@ static struct page *buffered_rmqueue(struct zonelist *zonelist,
758 761
759again: 762again:
760 cpu = get_cpu(); 763 cpu = get_cpu();
761 if (order == 0) { 764 if (likely(order == 0)) {
762 struct per_cpu_pages *pcp; 765 struct per_cpu_pages *pcp;
763 766
764 pcp = &zone_pcp(zone, cpu)->pcp[cold]; 767 pcp = &zone_pcp(zone, cpu)->pcp[cold];
@@ -973,6 +976,7 @@ rebalance:
973 cond_resched(); 976 cond_resched();
974 977
975 /* We now go into synchronous reclaim */ 978 /* We now go into synchronous reclaim */
979 cpuset_memory_pressure_bump();
976 p->flags |= PF_MEMALLOC; 980 p->flags |= PF_MEMALLOC;
977 reclaim_state.reclaimed_slab = 0; 981 reclaim_state.reclaimed_slab = 0;
978 p->reclaim_state = &reclaim_state; 982 p->reclaim_state = &reclaim_state;
@@ -1204,6 +1208,7 @@ static void __get_page_state(struct page_state *ret, int nr, cpumask_t *cpumask)
1204 int cpu = 0; 1208 int cpu = 0;
1205 1209
1206 memset(ret, 0, sizeof(*ret)); 1210 memset(ret, 0, sizeof(*ret));
1211 cpus_and(*cpumask, *cpumask, cpu_online_map);
1207 1212
1208 cpu = first_cpu(*cpumask); 1213 cpu = first_cpu(*cpumask);
1209 while (cpu < NR_CPUS) { 1214 while (cpu < NR_CPUS) {
@@ -1256,7 +1261,7 @@ unsigned long read_page_state_offset(unsigned long offset)
1256 unsigned long ret = 0; 1261 unsigned long ret = 0;
1257 int cpu; 1262 int cpu;
1258 1263
1259 for_each_cpu(cpu) { 1264 for_each_online_cpu(cpu) {
1260 unsigned long in; 1265 unsigned long in;
1261 1266
1262 in = (unsigned long)&per_cpu(page_states, cpu) + offset; 1267 in = (unsigned long)&per_cpu(page_states, cpu) + offset;
@@ -1830,6 +1835,24 @@ inline void setup_pageset(struct per_cpu_pageset *p, unsigned long batch)
1830 INIT_LIST_HEAD(&pcp->list); 1835 INIT_LIST_HEAD(&pcp->list);
1831} 1836}
1832 1837
1838/*
1839 * setup_pagelist_highmark() sets the high water mark for hot per_cpu_pagelist
1840 * to the value high for the pageset p.
1841 */
1842
1843static void setup_pagelist_highmark(struct per_cpu_pageset *p,
1844 unsigned long high)
1845{
1846 struct per_cpu_pages *pcp;
1847
1848 pcp = &p->pcp[0]; /* hot list */
1849 pcp->high = high;
1850 pcp->batch = max(1UL, high/4);
1851 if ((high/4) > (PAGE_SHIFT * 8))
1852 pcp->batch = PAGE_SHIFT * 8;
1853}
1854
1855
1833#ifdef CONFIG_NUMA 1856#ifdef CONFIG_NUMA
1834/* 1857/*
1835 * Boot pageset table. One per cpu which is going to be used for all 1858 * Boot pageset table. One per cpu which is going to be used for all
@@ -1861,12 +1884,16 @@ static int __devinit process_zones(int cpu)
1861 1884
1862 for_each_zone(zone) { 1885 for_each_zone(zone) {
1863 1886
1864 zone->pageset[cpu] = kmalloc_node(sizeof(struct per_cpu_pageset), 1887 zone_pcp(zone, cpu) = kmalloc_node(sizeof(struct per_cpu_pageset),
1865 GFP_KERNEL, cpu_to_node(cpu)); 1888 GFP_KERNEL, cpu_to_node(cpu));
1866 if (!zone->pageset[cpu]) 1889 if (!zone_pcp(zone, cpu))
1867 goto bad; 1890 goto bad;
1868 1891
1869 setup_pageset(zone->pageset[cpu], zone_batchsize(zone)); 1892 setup_pageset(zone_pcp(zone, cpu), zone_batchsize(zone));
1893
1894 if (percpu_pagelist_fraction)
1895 setup_pagelist_highmark(zone_pcp(zone, cpu),
1896 (zone->present_pages / percpu_pagelist_fraction));
1870 } 1897 }
1871 1898
1872 return 0; 1899 return 0;
@@ -1874,15 +1901,14 @@ bad:
1874 for_each_zone(dzone) { 1901 for_each_zone(dzone) {
1875 if (dzone == zone) 1902 if (dzone == zone)
1876 break; 1903 break;
1877 kfree(dzone->pageset[cpu]); 1904 kfree(zone_pcp(dzone, cpu));
1878 dzone->pageset[cpu] = NULL; 1905 zone_pcp(dzone, cpu) = NULL;
1879 } 1906 }
1880 return -ENOMEM; 1907 return -ENOMEM;
1881} 1908}
1882 1909
1883static inline void free_zone_pagesets(int cpu) 1910static inline void free_zone_pagesets(int cpu)
1884{ 1911{
1885#ifdef CONFIG_NUMA
1886 struct zone *zone; 1912 struct zone *zone;
1887 1913
1888 for_each_zone(zone) { 1914 for_each_zone(zone) {
@@ -1891,7 +1917,6 @@ static inline void free_zone_pagesets(int cpu)
1891 zone_pcp(zone, cpu) = NULL; 1917 zone_pcp(zone, cpu) = NULL;
1892 kfree(pset); 1918 kfree(pset);
1893 } 1919 }
1894#endif
1895} 1920}
1896 1921
1897static int __devinit pageset_cpuup_callback(struct notifier_block *nfb, 1922static int __devinit pageset_cpuup_callback(struct notifier_block *nfb,
@@ -1962,7 +1987,7 @@ static __devinit void zone_pcp_init(struct zone *zone)
1962 for (cpu = 0; cpu < NR_CPUS; cpu++) { 1987 for (cpu = 0; cpu < NR_CPUS; cpu++) {
1963#ifdef CONFIG_NUMA 1988#ifdef CONFIG_NUMA
1964 /* Early boot. Slab allocator not functional yet */ 1989 /* Early boot. Slab allocator not functional yet */
1965 zone->pageset[cpu] = &boot_pageset[cpu]; 1990 zone_pcp(zone, cpu) = &boot_pageset[cpu];
1966 setup_pageset(&boot_pageset[cpu],0); 1991 setup_pageset(&boot_pageset[cpu],0);
1967#else 1992#else
1968 setup_pageset(zone_pcp(zone,cpu), batch); 1993 setup_pageset(zone_pcp(zone,cpu), batch);
@@ -2205,7 +2230,7 @@ static int zoneinfo_show(struct seq_file *m, void *arg)
2205 seq_printf(m, 2230 seq_printf(m,
2206 ")" 2231 ")"
2207 "\n pagesets"); 2232 "\n pagesets");
2208 for (i = 0; i < ARRAY_SIZE(zone->pageset); i++) { 2233 for_each_online_cpu(i) {
2209 struct per_cpu_pageset *pageset; 2234 struct per_cpu_pageset *pageset;
2210 int j; 2235 int j;
2211 2236
@@ -2568,6 +2593,32 @@ int lowmem_reserve_ratio_sysctl_handler(ctl_table *table, int write,
2568 return 0; 2593 return 0;
2569} 2594}
2570 2595
2596/*
2597 * percpu_pagelist_fraction - changes the pcp->high for each zone on each
2598 * cpu. It is the fraction of total pages in each zone that a hot per cpu pagelist
2599 * can have before it gets flushed back to buddy allocator.
2600 */
2601
2602int percpu_pagelist_fraction_sysctl_handler(ctl_table *table, int write,
2603 struct file *file, void __user *buffer, size_t *length, loff_t *ppos)
2604{
2605 struct zone *zone;
2606 unsigned int cpu;
2607 int ret;
2608
2609 ret = proc_dointvec_minmax(table, write, file, buffer, length, ppos);
2610 if (!write || (ret == -EINVAL))
2611 return ret;
2612 for_each_zone(zone) {
2613 for_each_online_cpu(cpu) {
2614 unsigned long high;
2615 high = zone->present_pages / percpu_pagelist_fraction;
2616 setup_pagelist_highmark(zone_pcp(zone, cpu), high);
2617 }
2618 }
2619 return 0;
2620}
2621
2571__initdata int hashdist = HASHDIST_DEFAULT; 2622__initdata int hashdist = HASHDIST_DEFAULT;
2572 2623
2573#ifdef CONFIG_NUMA 2624#ifdef CONFIG_NUMA