diff options
| author | Christoph Lameter <christoph@graphe.net> | 2005-06-21 20:15:00 -0400 |
|---|---|---|
| committer | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-06-21 21:46:18 -0400 |
| commit | 2caaad41e4aa8f5dd999695b4ddeaa0e7f3912a4 (patch) | |
| tree | 4ce8426bf3a85d92efc5a0f6c981f54963d472e8 | |
| parent | 4ae7c03943fca73f23bc0cdb938070f41b98101f (diff) | |
[PATCH] Reduce size of huge boot per_cpu_pageset
Reduce size of the huge per_cpu_pageset structure in __initdata introduced
into mm1 with the pageset localization patchset. Use one specially
configured pageset per cpu for all zones and nodes during bootup.
- Avoid duplication of pageset initialization code.
- do the adding to the pageset list before potential free_pages_bulk
in free_hot_cold_page (otherwise we would have to hold a page
in a pageset during the period that the boot pagesets are in use).
- remove mistaken __cpuinitdata attribute and revert back to __initdata
for the boot pageset. A boot pageset is not necessary for cpu hotplug.
Tested for UP SMP NUMA on x86_64 (2.6.12-rc6-mm1): UP SMP NUMA Tested on
IA64 (2.6.12-rc5-mm2): NUMA (2.6.12-rc6-mm1 broken for IA64 because of
sparsemem patches)
Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
| -rw-r--r-- | mm/page_alloc.c | 108 |
1 files changed, 42 insertions, 66 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index a95e72d7f945..418102a02921 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
| @@ -71,11 +71,6 @@ EXPORT_SYMBOL(nr_swap_pages); | |||
| 71 | struct zone *zone_table[1 << (ZONES_SHIFT + NODES_SHIFT)]; | 71 | struct zone *zone_table[1 << (ZONES_SHIFT + NODES_SHIFT)]; |
| 72 | EXPORT_SYMBOL(zone_table); | 72 | EXPORT_SYMBOL(zone_table); |
| 73 | 73 | ||
| 74 | #ifdef CONFIG_NUMA | ||
| 75 | static struct per_cpu_pageset | ||
| 76 | pageset_table[MAX_NR_ZONES*MAX_NUMNODES*NR_CPUS] __initdata; | ||
| 77 | #endif | ||
| 78 | |||
| 79 | static char *zone_names[MAX_NR_ZONES] = { "DMA", "Normal", "HighMem" }; | 74 | static char *zone_names[MAX_NR_ZONES] = { "DMA", "Normal", "HighMem" }; |
| 80 | int min_free_kbytes = 1024; | 75 | int min_free_kbytes = 1024; |
| 81 | 76 | ||
| @@ -652,10 +647,10 @@ static void fastcall free_hot_cold_page(struct page *page, int cold) | |||
| 652 | free_pages_check(__FUNCTION__, page); | 647 | free_pages_check(__FUNCTION__, page); |
| 653 | pcp = &zone_pcp(zone, get_cpu())->pcp[cold]; | 648 | pcp = &zone_pcp(zone, get_cpu())->pcp[cold]; |
| 654 | local_irq_save(flags); | 649 | local_irq_save(flags); |
| 655 | if (pcp->count >= pcp->high) | ||
| 656 | pcp->count -= free_pages_bulk(zone, pcp->batch, &pcp->list, 0); | ||
| 657 | list_add(&page->lru, &pcp->list); | 650 | list_add(&page->lru, &pcp->list); |
| 658 | pcp->count++; | 651 | pcp->count++; |
| 652 | if (pcp->count >= pcp->high) | ||
| 653 | pcp->count -= free_pages_bulk(zone, pcp->batch, &pcp->list, 0); | ||
| 659 | local_irq_restore(flags); | 654 | local_irq_restore(flags); |
| 660 | put_cpu(); | 655 | put_cpu(); |
| 661 | } | 656 | } |
| @@ -1714,57 +1709,55 @@ static int __devinit zone_batchsize(struct zone *zone) | |||
| 1714 | return batch; | 1709 | return batch; |
| 1715 | } | 1710 | } |
| 1716 | 1711 | ||
| 1712 | inline void setup_pageset(struct per_cpu_pageset *p, unsigned long batch) | ||
| 1713 | { | ||
| 1714 | struct per_cpu_pages *pcp; | ||
| 1715 | |||
| 1716 | pcp = &p->pcp[0]; /* hot */ | ||
| 1717 | pcp->count = 0; | ||
| 1718 | pcp->low = 2 * batch; | ||
| 1719 | pcp->high = 6 * batch; | ||
| 1720 | pcp->batch = max(1UL, 1 * batch); | ||
| 1721 | INIT_LIST_HEAD(&pcp->list); | ||
| 1722 | |||
| 1723 | pcp = &p->pcp[1]; /* cold*/ | ||
| 1724 | pcp->count = 0; | ||
| 1725 | pcp->low = 0; | ||
| 1726 | pcp->high = 2 * batch; | ||
| 1727 | pcp->batch = max(1UL, 1 * batch); | ||
| 1728 | INIT_LIST_HEAD(&pcp->list); | ||
| 1729 | } | ||
| 1730 | |||
| 1717 | #ifdef CONFIG_NUMA | 1731 | #ifdef CONFIG_NUMA |
| 1718 | /* | 1732 | /* |
| 1719 | * Dynamicaly allocate memory for the | 1733 | * Boot pageset table. One per cpu which is going to be used for all |
| 1734 | * zones and all nodes. The parameters will be set in such a way | ||
| 1735 | * that an item put on a list will immediately be handed over to | ||
| 1736 | * the buddy list. This is safe since pageset manipulation is done | ||
| 1737 | * with interrupts disabled. | ||
| 1738 | * | ||
| 1739 | * Some NUMA counter updates may also be caught by the boot pagesets. | ||
| 1740 | * These will be discarded when bootup is complete. | ||
| 1741 | */ | ||
| 1742 | static struct per_cpu_pageset | ||
| 1743 | boot_pageset[NR_CPUS] __initdata; | ||
| 1744 | |||
| 1745 | /* | ||
| 1746 | * Dynamically allocate memory for the | ||
| 1720 | * per cpu pageset array in struct zone. | 1747 | * per cpu pageset array in struct zone. |
| 1721 | */ | 1748 | */ |
| 1722 | static int __devinit process_zones(int cpu) | 1749 | static int __devinit process_zones(int cpu) |
| 1723 | { | 1750 | { |
| 1724 | struct zone *zone, *dzone; | 1751 | struct zone *zone, *dzone; |
| 1725 | int i; | ||
| 1726 | 1752 | ||
| 1727 | for_each_zone(zone) { | 1753 | for_each_zone(zone) { |
| 1728 | struct per_cpu_pageset *npageset = NULL; | ||
| 1729 | 1754 | ||
| 1730 | npageset = kmalloc_node(sizeof(struct per_cpu_pageset), | 1755 | zone->pageset[cpu] = kmalloc_node(sizeof(struct per_cpu_pageset), |
| 1731 | GFP_KERNEL, cpu_to_node(cpu)); | 1756 | GFP_KERNEL, cpu_to_node(cpu)); |
| 1732 | if (!npageset) { | 1757 | if (!zone->pageset[cpu]) |
| 1733 | zone->pageset[cpu] = NULL; | ||
| 1734 | goto bad; | 1758 | goto bad; |
| 1735 | } | ||
| 1736 | 1759 | ||
| 1737 | if (zone->pageset[cpu]) { | 1760 | setup_pageset(zone->pageset[cpu], zone_batchsize(zone)); |
| 1738 | memcpy(npageset, zone->pageset[cpu], | ||
| 1739 | sizeof(struct per_cpu_pageset)); | ||
| 1740 | |||
| 1741 | /* Relocate lists */ | ||
| 1742 | for (i = 0; i < 2; i++) { | ||
| 1743 | INIT_LIST_HEAD(&npageset->pcp[i].list); | ||
| 1744 | list_splice(&zone->pageset[cpu]->pcp[i].list, | ||
| 1745 | &npageset->pcp[i].list); | ||
| 1746 | } | ||
| 1747 | } else { | ||
| 1748 | struct per_cpu_pages *pcp; | ||
| 1749 | unsigned long batch; | ||
| 1750 | |||
| 1751 | batch = zone_batchsize(zone); | ||
| 1752 | |||
| 1753 | pcp = &npageset->pcp[0]; /* hot */ | ||
| 1754 | pcp->count = 0; | ||
| 1755 | pcp->low = 2 * batch; | ||
| 1756 | pcp->high = 6 * batch; | ||
| 1757 | pcp->batch = 1 * batch; | ||
| 1758 | INIT_LIST_HEAD(&pcp->list); | ||
| 1759 | |||
| 1760 | pcp = &npageset->pcp[1]; /* cold*/ | ||
| 1761 | pcp->count = 0; | ||
| 1762 | pcp->low = 0; | ||
| 1763 | pcp->high = 2 * batch; | ||
| 1764 | pcp->batch = 1 * batch; | ||
| 1765 | INIT_LIST_HEAD(&pcp->list); | ||
| 1766 | } | ||
| 1767 | zone->pageset[cpu] = npageset; | ||
| 1768 | } | 1761 | } |
| 1769 | 1762 | ||
| 1770 | return 0; | 1763 | return 0; |
| @@ -1878,30 +1871,13 @@ static void __init free_area_init_core(struct pglist_data *pgdat, | |||
| 1878 | batch = zone_batchsize(zone); | 1871 | batch = zone_batchsize(zone); |
| 1879 | 1872 | ||
| 1880 | for (cpu = 0; cpu < NR_CPUS; cpu++) { | 1873 | for (cpu = 0; cpu < NR_CPUS; cpu++) { |
| 1881 | struct per_cpu_pages *pcp; | ||
| 1882 | #ifdef CONFIG_NUMA | 1874 | #ifdef CONFIG_NUMA |
| 1883 | struct per_cpu_pageset *pgset; | 1875 | /* Early boot. Slab allocator not functional yet */ |
| 1884 | pgset = &pageset_table[nid*MAX_NR_ZONES*NR_CPUS + | 1876 | zone->pageset[cpu] = &boot_pageset[cpu]; |
| 1885 | (j * NR_CPUS) + cpu]; | 1877 | setup_pageset(&boot_pageset[cpu],0); |
| 1886 | |||
| 1887 | zone->pageset[cpu] = pgset; | ||
| 1888 | #else | 1878 | #else |
| 1889 | struct per_cpu_pageset *pgset = zone_pcp(zone, cpu); | 1879 | setup_pageset(zone_pcp(zone,cpu), batch); |
| 1890 | #endif | 1880 | #endif |
| 1891 | |||
| 1892 | pcp = &pgset->pcp[0]; /* hot */ | ||
| 1893 | pcp->count = 0; | ||
| 1894 | pcp->low = 2 * batch; | ||
| 1895 | pcp->high = 6 * batch; | ||
| 1896 | pcp->batch = 1 * batch; | ||
| 1897 | INIT_LIST_HEAD(&pcp->list); | ||
| 1898 | |||
| 1899 | pcp = &pgset->pcp[1]; /* cold */ | ||
| 1900 | pcp->count = 0; | ||
| 1901 | pcp->low = 0; | ||
| 1902 | pcp->high = 2 * batch; | ||
| 1903 | pcp->batch = 1 * batch; | ||
| 1904 | INIT_LIST_HEAD(&pcp->list); | ||
| 1905 | } | 1881 | } |
| 1906 | printk(KERN_DEBUG " %s zone: %lu pages, LIFO batch:%lu\n", | 1882 | printk(KERN_DEBUG " %s zone: %lu pages, LIFO batch:%lu\n", |
| 1907 | zone_names[j], realsize, batch); | 1883 | zone_names[j], realsize, batch); |
