diff options
author | Christoph Lameter <christoph@graphe.net> | 2005-06-21 20:15:00 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-06-21 21:46:18 -0400 |
commit | 2caaad41e4aa8f5dd999695b4ddeaa0e7f3912a4 (patch) | |
tree | 4ce8426bf3a85d92efc5a0f6c981f54963d472e8 | |
parent | 4ae7c03943fca73f23bc0cdb938070f41b98101f (diff) |
[PATCH] Reduce size of huge boot per_cpu_pageset
Reduce size of the huge per_cpu_pageset structure in __initdata introduced
into mm1 with the pageset localization patchset. Use one specially
configured pageset per cpu for all zones and nodes during bootup.
- Avoid duplication of pageset initialization code.
- do the adding to the pageset list before potential free_pages_bulk
in free_hot_cold_page (otherwise we would have to hold a page
in a pageset during the period that the boot pagesets are in use).
- remove mistaken __cpuinitdata attribute and revert back to __initdata
for the boot pageset. A boot pageset is not necessary for cpu hotplug.
Tested for UP SMP NUMA on x86_64 (2.6.12-rc6-mm1): UP SMP NUMA Tested on
IA64 (2.6.12-rc5-mm2): NUMA (2.6.12-rc6-mm1 broken for IA64 because of
sparsemem patches)
Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r-- | mm/page_alloc.c | 108 |
1 files changed, 42 insertions, 66 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index a95e72d7f945..418102a02921 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -71,11 +71,6 @@ EXPORT_SYMBOL(nr_swap_pages); | |||
71 | struct zone *zone_table[1 << (ZONES_SHIFT + NODES_SHIFT)]; | 71 | struct zone *zone_table[1 << (ZONES_SHIFT + NODES_SHIFT)]; |
72 | EXPORT_SYMBOL(zone_table); | 72 | EXPORT_SYMBOL(zone_table); |
73 | 73 | ||
74 | #ifdef CONFIG_NUMA | ||
75 | static struct per_cpu_pageset | ||
76 | pageset_table[MAX_NR_ZONES*MAX_NUMNODES*NR_CPUS] __initdata; | ||
77 | #endif | ||
78 | |||
79 | static char *zone_names[MAX_NR_ZONES] = { "DMA", "Normal", "HighMem" }; | 74 | static char *zone_names[MAX_NR_ZONES] = { "DMA", "Normal", "HighMem" }; |
80 | int min_free_kbytes = 1024; | 75 | int min_free_kbytes = 1024; |
81 | 76 | ||
@@ -652,10 +647,10 @@ static void fastcall free_hot_cold_page(struct page *page, int cold) | |||
652 | free_pages_check(__FUNCTION__, page); | 647 | free_pages_check(__FUNCTION__, page); |
653 | pcp = &zone_pcp(zone, get_cpu())->pcp[cold]; | 648 | pcp = &zone_pcp(zone, get_cpu())->pcp[cold]; |
654 | local_irq_save(flags); | 649 | local_irq_save(flags); |
655 | if (pcp->count >= pcp->high) | ||
656 | pcp->count -= free_pages_bulk(zone, pcp->batch, &pcp->list, 0); | ||
657 | list_add(&page->lru, &pcp->list); | 650 | list_add(&page->lru, &pcp->list); |
658 | pcp->count++; | 651 | pcp->count++; |
652 | if (pcp->count >= pcp->high) | ||
653 | pcp->count -= free_pages_bulk(zone, pcp->batch, &pcp->list, 0); | ||
659 | local_irq_restore(flags); | 654 | local_irq_restore(flags); |
660 | put_cpu(); | 655 | put_cpu(); |
661 | } | 656 | } |
@@ -1714,57 +1709,55 @@ static int __devinit zone_batchsize(struct zone *zone) | |||
1714 | return batch; | 1709 | return batch; |
1715 | } | 1710 | } |
1716 | 1711 | ||
1712 | inline void setup_pageset(struct per_cpu_pageset *p, unsigned long batch) | ||
1713 | { | ||
1714 | struct per_cpu_pages *pcp; | ||
1715 | |||
1716 | pcp = &p->pcp[0]; /* hot */ | ||
1717 | pcp->count = 0; | ||
1718 | pcp->low = 2 * batch; | ||
1719 | pcp->high = 6 * batch; | ||
1720 | pcp->batch = max(1UL, 1 * batch); | ||
1721 | INIT_LIST_HEAD(&pcp->list); | ||
1722 | |||
1723 | pcp = &p->pcp[1]; /* cold*/ | ||
1724 | pcp->count = 0; | ||
1725 | pcp->low = 0; | ||
1726 | pcp->high = 2 * batch; | ||
1727 | pcp->batch = max(1UL, 1 * batch); | ||
1728 | INIT_LIST_HEAD(&pcp->list); | ||
1729 | } | ||
1730 | |||
1717 | #ifdef CONFIG_NUMA | 1731 | #ifdef CONFIG_NUMA |
1718 | /* | 1732 | /* |
1719 | * Dynamicaly allocate memory for the | 1733 | * Boot pageset table. One per cpu which is going to be used for all |
1734 | * zones and all nodes. The parameters will be set in such a way | ||
1735 | * that an item put on a list will immediately be handed over to | ||
1736 | * the buddy list. This is safe since pageset manipulation is done | ||
1737 | * with interrupts disabled. | ||
1738 | * | ||
1739 | * Some NUMA counter updates may also be caught by the boot pagesets. | ||
1740 | * These will be discarded when bootup is complete. | ||
1741 | */ | ||
1742 | static struct per_cpu_pageset | ||
1743 | boot_pageset[NR_CPUS] __initdata; | ||
1744 | |||
1745 | /* | ||
1746 | * Dynamically allocate memory for the | ||
1720 | * per cpu pageset array in struct zone. | 1747 | * per cpu pageset array in struct zone. |
1721 | */ | 1748 | */ |
1722 | static int __devinit process_zones(int cpu) | 1749 | static int __devinit process_zones(int cpu) |
1723 | { | 1750 | { |
1724 | struct zone *zone, *dzone; | 1751 | struct zone *zone, *dzone; |
1725 | int i; | ||
1726 | 1752 | ||
1727 | for_each_zone(zone) { | 1753 | for_each_zone(zone) { |
1728 | struct per_cpu_pageset *npageset = NULL; | ||
1729 | 1754 | ||
1730 | npageset = kmalloc_node(sizeof(struct per_cpu_pageset), | 1755 | zone->pageset[cpu] = kmalloc_node(sizeof(struct per_cpu_pageset), |
1731 | GFP_KERNEL, cpu_to_node(cpu)); | 1756 | GFP_KERNEL, cpu_to_node(cpu)); |
1732 | if (!npageset) { | 1757 | if (!zone->pageset[cpu]) |
1733 | zone->pageset[cpu] = NULL; | ||
1734 | goto bad; | 1758 | goto bad; |
1735 | } | ||
1736 | 1759 | ||
1737 | if (zone->pageset[cpu]) { | 1760 | setup_pageset(zone->pageset[cpu], zone_batchsize(zone)); |
1738 | memcpy(npageset, zone->pageset[cpu], | ||
1739 | sizeof(struct per_cpu_pageset)); | ||
1740 | |||
1741 | /* Relocate lists */ | ||
1742 | for (i = 0; i < 2; i++) { | ||
1743 | INIT_LIST_HEAD(&npageset->pcp[i].list); | ||
1744 | list_splice(&zone->pageset[cpu]->pcp[i].list, | ||
1745 | &npageset->pcp[i].list); | ||
1746 | } | ||
1747 | } else { | ||
1748 | struct per_cpu_pages *pcp; | ||
1749 | unsigned long batch; | ||
1750 | |||
1751 | batch = zone_batchsize(zone); | ||
1752 | |||
1753 | pcp = &npageset->pcp[0]; /* hot */ | ||
1754 | pcp->count = 0; | ||
1755 | pcp->low = 2 * batch; | ||
1756 | pcp->high = 6 * batch; | ||
1757 | pcp->batch = 1 * batch; | ||
1758 | INIT_LIST_HEAD(&pcp->list); | ||
1759 | |||
1760 | pcp = &npageset->pcp[1]; /* cold*/ | ||
1761 | pcp->count = 0; | ||
1762 | pcp->low = 0; | ||
1763 | pcp->high = 2 * batch; | ||
1764 | pcp->batch = 1 * batch; | ||
1765 | INIT_LIST_HEAD(&pcp->list); | ||
1766 | } | ||
1767 | zone->pageset[cpu] = npageset; | ||
1768 | } | 1761 | } |
1769 | 1762 | ||
1770 | return 0; | 1763 | return 0; |
@@ -1878,30 +1871,13 @@ static void __init free_area_init_core(struct pglist_data *pgdat, | |||
1878 | batch = zone_batchsize(zone); | 1871 | batch = zone_batchsize(zone); |
1879 | 1872 | ||
1880 | for (cpu = 0; cpu < NR_CPUS; cpu++) { | 1873 | for (cpu = 0; cpu < NR_CPUS; cpu++) { |
1881 | struct per_cpu_pages *pcp; | ||
1882 | #ifdef CONFIG_NUMA | 1874 | #ifdef CONFIG_NUMA |
1883 | struct per_cpu_pageset *pgset; | 1875 | /* Early boot. Slab allocator not functional yet */ |
1884 | pgset = &pageset_table[nid*MAX_NR_ZONES*NR_CPUS + | 1876 | zone->pageset[cpu] = &boot_pageset[cpu]; |
1885 | (j * NR_CPUS) + cpu]; | 1877 | setup_pageset(&boot_pageset[cpu],0); |
1886 | |||
1887 | zone->pageset[cpu] = pgset; | ||
1888 | #else | 1878 | #else |
1889 | struct per_cpu_pageset *pgset = zone_pcp(zone, cpu); | 1879 | setup_pageset(zone_pcp(zone,cpu), batch); |
1890 | #endif | 1880 | #endif |
1891 | |||
1892 | pcp = &pgset->pcp[0]; /* hot */ | ||
1893 | pcp->count = 0; | ||
1894 | pcp->low = 2 * batch; | ||
1895 | pcp->high = 6 * batch; | ||
1896 | pcp->batch = 1 * batch; | ||
1897 | INIT_LIST_HEAD(&pcp->list); | ||
1898 | |||
1899 | pcp = &pgset->pcp[1]; /* cold */ | ||
1900 | pcp->count = 0; | ||
1901 | pcp->low = 0; | ||
1902 | pcp->high = 2 * batch; | ||
1903 | pcp->batch = 1 * batch; | ||
1904 | INIT_LIST_HEAD(&pcp->list); | ||
1905 | } | 1881 | } |
1906 | printk(KERN_DEBUG " %s zone: %lu pages, LIFO batch:%lu\n", | 1882 | printk(KERN_DEBUG " %s zone: %lu pages, LIFO batch:%lu\n", |
1907 | zone_names[j], realsize, batch); | 1883 | zone_names[j], realsize, batch); |