diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/Kconfig | 38 | ||||
-rw-r--r-- | mm/Makefile | 1 | ||||
-rw-r--r-- | mm/bootmem.c | 9 | ||||
-rw-r--r-- | mm/memory.c | 2 | ||||
-rw-r--r-- | mm/page_alloc.c | 39 | ||||
-rw-r--r-- | mm/sparse.c | 85 |
6 files changed, 159 insertions, 15 deletions
diff --git a/mm/Kconfig b/mm/Kconfig index 5127441561b4..cd379936cac6 100644 --- a/mm/Kconfig +++ b/mm/Kconfig | |||
@@ -6,6 +6,7 @@ choice | |||
6 | prompt "Memory model" | 6 | prompt "Memory model" |
7 | depends on SELECT_MEMORY_MODEL | 7 | depends on SELECT_MEMORY_MODEL |
8 | default DISCONTIGMEM_MANUAL if ARCH_DISCONTIGMEM_DEFAULT | 8 | default DISCONTIGMEM_MANUAL if ARCH_DISCONTIGMEM_DEFAULT |
9 | default SPARSEMEM_MANUAL if ARCH_SPARSEMEM_DEFAULT | ||
9 | default FLATMEM_MANUAL | 10 | default FLATMEM_MANUAL |
10 | 11 | ||
11 | config FLATMEM_MANUAL | 12 | config FLATMEM_MANUAL |
@@ -17,7 +18,15 @@ config FLATMEM_MANUAL | |||
17 | only have one option here: FLATMEM. This is normal | 18 | only have one option here: FLATMEM. This is normal |
18 | and a correct option. | 19 | and a correct option. |
19 | 20 | ||
20 | If unsure, choose this option over any other. | 21 | Some users of more advanced features like NUMA and |
22 | memory hotplug may have different options here. | ||
23 | DISCONTIGMEM is an more mature, better tested system, | ||
24 | but is incompatible with memory hotplug and may suffer | ||
25 | decreased performance over SPARSEMEM. If unsure between | ||
26 | "Sparse Memory" and "Discontiguous Memory", choose | ||
27 | "Discontiguous Memory". | ||
28 | |||
29 | If unsure, choose this option (Flat Memory) over any other. | ||
21 | 30 | ||
22 | config DISCONTIGMEM_MANUAL | 31 | config DISCONTIGMEM_MANUAL |
23 | bool "Discontigious Memory" | 32 | bool "Discontigious Memory" |
@@ -35,15 +44,38 @@ config DISCONTIGMEM_MANUAL | |||
35 | 44 | ||
36 | If unsure, choose "Flat Memory" over this option. | 45 | If unsure, choose "Flat Memory" over this option. |
37 | 46 | ||
47 | config SPARSEMEM_MANUAL | ||
48 | bool "Sparse Memory" | ||
49 | depends on ARCH_SPARSEMEM_ENABLE | ||
50 | help | ||
51 | This will be the only option for some systems, including | ||
52 | memory hotplug systems. This is normal. | ||
53 | |||
54 | For many other systems, this will be an alternative to | ||
55 | "Discontigious Memory". This option provides some potential | ||
56 | performance benefits, along with decreased code complexity, | ||
57 | but it is newer, and more experimental. | ||
58 | |||
59 | If unsure, choose "Discontiguous Memory" or "Flat Memory" | ||
60 | over this option. | ||
61 | |||
38 | endchoice | 62 | endchoice |
39 | 63 | ||
40 | config DISCONTIGMEM | 64 | config DISCONTIGMEM |
41 | def_bool y | 65 | def_bool y |
42 | depends on (!SELECT_MEMORY_MODEL && ARCH_DISCONTIGMEM_ENABLE) || DISCONTIGMEM_MANUAL | 66 | depends on (!SELECT_MEMORY_MODEL && ARCH_DISCONTIGMEM_ENABLE) || DISCONTIGMEM_MANUAL |
43 | 67 | ||
68 | config SPARSEMEM | ||
69 | def_bool y | ||
70 | depends on SPARSEMEM_MANUAL | ||
71 | |||
44 | config FLATMEM | 72 | config FLATMEM |
45 | def_bool y | 73 | def_bool y |
46 | depends on !DISCONTIGMEM || FLATMEM_MANUAL | 74 | depends on (!DISCONTIGMEM && !SPARSEMEM) || FLATMEM_MANUAL |
75 | |||
76 | config FLAT_NODE_MEM_MAP | ||
77 | def_bool y | ||
78 | depends on !SPARSEMEM | ||
47 | 79 | ||
48 | # | 80 | # |
49 | # Both the NUMA code and DISCONTIGMEM use arrays of pg_data_t's | 81 | # Both the NUMA code and DISCONTIGMEM use arrays of pg_data_t's |
@@ -56,4 +88,4 @@ config NEED_MULTIPLE_NODES | |||
56 | 88 | ||
57 | config HAVE_MEMORY_PRESENT | 89 | config HAVE_MEMORY_PRESENT |
58 | def_bool y | 90 | def_bool y |
59 | depends on ARCH_HAVE_MEMORY_PRESENT | 91 | depends on ARCH_HAVE_MEMORY_PRESENT || SPARSEMEM |
diff --git a/mm/Makefile b/mm/Makefile index 097408064f6a..8f70ffd763c8 100644 --- a/mm/Makefile +++ b/mm/Makefile | |||
@@ -15,6 +15,7 @@ obj-y := bootmem.o filemap.o mempool.o oom_kill.o fadvise.o \ | |||
15 | obj-$(CONFIG_SWAP) += page_io.o swap_state.o swapfile.o thrash.o | 15 | obj-$(CONFIG_SWAP) += page_io.o swap_state.o swapfile.o thrash.o |
16 | obj-$(CONFIG_HUGETLBFS) += hugetlb.o | 16 | obj-$(CONFIG_HUGETLBFS) += hugetlb.o |
17 | obj-$(CONFIG_NUMA) += mempolicy.o | 17 | obj-$(CONFIG_NUMA) += mempolicy.o |
18 | obj-$(CONFIG_SPARSEMEM) += sparse.o | ||
18 | obj-$(CONFIG_SHMEM) += shmem.o | 19 | obj-$(CONFIG_SHMEM) += shmem.o |
19 | obj-$(CONFIG_TINY_SHMEM) += tiny-shmem.o | 20 | obj-$(CONFIG_TINY_SHMEM) += tiny-shmem.o |
20 | 21 | ||
diff --git a/mm/bootmem.c b/mm/bootmem.c index 260e703850d8..f82f7aebbee3 100644 --- a/mm/bootmem.c +++ b/mm/bootmem.c | |||
@@ -256,6 +256,7 @@ found: | |||
256 | static unsigned long __init free_all_bootmem_core(pg_data_t *pgdat) | 256 | static unsigned long __init free_all_bootmem_core(pg_data_t *pgdat) |
257 | { | 257 | { |
258 | struct page *page; | 258 | struct page *page; |
259 | unsigned long pfn; | ||
259 | bootmem_data_t *bdata = pgdat->bdata; | 260 | bootmem_data_t *bdata = pgdat->bdata; |
260 | unsigned long i, count, total = 0; | 261 | unsigned long i, count, total = 0; |
261 | unsigned long idx; | 262 | unsigned long idx; |
@@ -266,7 +267,7 @@ static unsigned long __init free_all_bootmem_core(pg_data_t *pgdat) | |||
266 | 267 | ||
267 | count = 0; | 268 | count = 0; |
268 | /* first extant page of the node */ | 269 | /* first extant page of the node */ |
269 | page = virt_to_page(phys_to_virt(bdata->node_boot_start)); | 270 | pfn = bdata->node_boot_start >> PAGE_SHIFT; |
270 | idx = bdata->node_low_pfn - (bdata->node_boot_start >> PAGE_SHIFT); | 271 | idx = bdata->node_low_pfn - (bdata->node_boot_start >> PAGE_SHIFT); |
271 | map = bdata->node_bootmem_map; | 272 | map = bdata->node_bootmem_map; |
272 | /* Check physaddr is O(LOG2(BITS_PER_LONG)) page aligned */ | 273 | /* Check physaddr is O(LOG2(BITS_PER_LONG)) page aligned */ |
@@ -275,9 +276,11 @@ static unsigned long __init free_all_bootmem_core(pg_data_t *pgdat) | |||
275 | gofast = 1; | 276 | gofast = 1; |
276 | for (i = 0; i < idx; ) { | 277 | for (i = 0; i < idx; ) { |
277 | unsigned long v = ~map[i / BITS_PER_LONG]; | 278 | unsigned long v = ~map[i / BITS_PER_LONG]; |
279 | |||
278 | if (gofast && v == ~0UL) { | 280 | if (gofast && v == ~0UL) { |
279 | int j, order; | 281 | int j, order; |
280 | 282 | ||
283 | page = pfn_to_page(pfn); | ||
281 | count += BITS_PER_LONG; | 284 | count += BITS_PER_LONG; |
282 | __ClearPageReserved(page); | 285 | __ClearPageReserved(page); |
283 | order = ffs(BITS_PER_LONG) - 1; | 286 | order = ffs(BITS_PER_LONG) - 1; |
@@ -292,6 +295,8 @@ static unsigned long __init free_all_bootmem_core(pg_data_t *pgdat) | |||
292 | page += BITS_PER_LONG; | 295 | page += BITS_PER_LONG; |
293 | } else if (v) { | 296 | } else if (v) { |
294 | unsigned long m; | 297 | unsigned long m; |
298 | |||
299 | page = pfn_to_page(pfn); | ||
295 | for (m = 1; m && i < idx; m<<=1, page++, i++) { | 300 | for (m = 1; m && i < idx; m<<=1, page++, i++) { |
296 | if (v & m) { | 301 | if (v & m) { |
297 | count++; | 302 | count++; |
@@ -302,8 +307,8 @@ static unsigned long __init free_all_bootmem_core(pg_data_t *pgdat) | |||
302 | } | 307 | } |
303 | } else { | 308 | } else { |
304 | i+=BITS_PER_LONG; | 309 | i+=BITS_PER_LONG; |
305 | page += BITS_PER_LONG; | ||
306 | } | 310 | } |
311 | pfn += BITS_PER_LONG; | ||
307 | } | 312 | } |
308 | total += count; | 313 | total += count; |
309 | 314 | ||
diff --git a/mm/memory.c b/mm/memory.c index da91b7bf9986..30975ef48722 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -58,7 +58,7 @@ | |||
58 | #include <linux/swapops.h> | 58 | #include <linux/swapops.h> |
59 | #include <linux/elf.h> | 59 | #include <linux/elf.h> |
60 | 60 | ||
61 | #ifndef CONFIG_DISCONTIGMEM | 61 | #ifndef CONFIG_NEED_MULTIPLE_NODES |
62 | /* use the per-pgdat data instead for discontigmem - mbligh */ | 62 | /* use the per-pgdat data instead for discontigmem - mbligh */ |
63 | unsigned long max_mapnr; | 63 | unsigned long max_mapnr; |
64 | struct page *mem_map; | 64 | struct page *mem_map; |
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 20e239599db0..5c1b8982a6da 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -68,7 +68,7 @@ EXPORT_SYMBOL(nr_swap_pages); | |||
68 | * Used by page_zone() to look up the address of the struct zone whose | 68 | * Used by page_zone() to look up the address of the struct zone whose |
69 | * id is encoded in the upper bits of page->flags | 69 | * id is encoded in the upper bits of page->flags |
70 | */ | 70 | */ |
71 | struct zone *zone_table[1 << (ZONES_SHIFT + NODES_SHIFT)]; | 71 | struct zone *zone_table[1 << ZONETABLE_SHIFT]; |
72 | EXPORT_SYMBOL(zone_table); | 72 | EXPORT_SYMBOL(zone_table); |
73 | 73 | ||
74 | static char *zone_names[MAX_NR_ZONES] = { "DMA", "Normal", "HighMem" }; | 74 | static char *zone_names[MAX_NR_ZONES] = { "DMA", "Normal", "HighMem" }; |
@@ -1649,11 +1649,15 @@ static void __init calculate_zone_totalpages(struct pglist_data *pgdat, | |||
1649 | void __init memmap_init_zone(unsigned long size, int nid, unsigned long zone, | 1649 | void __init memmap_init_zone(unsigned long size, int nid, unsigned long zone, |
1650 | unsigned long start_pfn) | 1650 | unsigned long start_pfn) |
1651 | { | 1651 | { |
1652 | struct page *start = pfn_to_page(start_pfn); | ||
1653 | struct page *page; | 1652 | struct page *page; |
1653 | int end_pfn = start_pfn + size; | ||
1654 | int pfn; | ||
1654 | 1655 | ||
1655 | for (page = start; page < (start + size); page++) { | 1656 | for (pfn = start_pfn; pfn < end_pfn; pfn++, page++) { |
1656 | set_page_links(page, zone, nid); | 1657 | if (!early_pfn_valid(pfn)) |
1658 | continue; | ||
1659 | page = pfn_to_page(pfn); | ||
1660 | set_page_links(page, zone, nid, pfn); | ||
1657 | set_page_count(page, 0); | 1661 | set_page_count(page, 0); |
1658 | reset_page_mapcount(page); | 1662 | reset_page_mapcount(page); |
1659 | SetPageReserved(page); | 1663 | SetPageReserved(page); |
@@ -1677,6 +1681,20 @@ void zone_init_free_lists(struct pglist_data *pgdat, struct zone *zone, | |||
1677 | } | 1681 | } |
1678 | } | 1682 | } |
1679 | 1683 | ||
1684 | #define ZONETABLE_INDEX(x, zone_nr) ((x << ZONES_SHIFT) | zone_nr) | ||
1685 | void zonetable_add(struct zone *zone, int nid, int zid, unsigned long pfn, | ||
1686 | unsigned long size) | ||
1687 | { | ||
1688 | unsigned long snum = pfn_to_section_nr(pfn); | ||
1689 | unsigned long end = pfn_to_section_nr(pfn + size); | ||
1690 | |||
1691 | if (FLAGS_HAS_NODE) | ||
1692 | zone_table[ZONETABLE_INDEX(nid, zid)] = zone; | ||
1693 | else | ||
1694 | for (; snum <= end; snum++) | ||
1695 | zone_table[ZONETABLE_INDEX(snum, zid)] = zone; | ||
1696 | } | ||
1697 | |||
1680 | #ifndef __HAVE_ARCH_MEMMAP_INIT | 1698 | #ifndef __HAVE_ARCH_MEMMAP_INIT |
1681 | #define memmap_init(size, nid, zone, start_pfn) \ | 1699 | #define memmap_init(size, nid, zone, start_pfn) \ |
1682 | memmap_init_zone((size), (nid), (zone), (start_pfn)) | 1700 | memmap_init_zone((size), (nid), (zone), (start_pfn)) |
@@ -1861,7 +1879,6 @@ static void __init free_area_init_core(struct pglist_data *pgdat, | |||
1861 | unsigned long size, realsize; | 1879 | unsigned long size, realsize; |
1862 | unsigned long batch; | 1880 | unsigned long batch; |
1863 | 1881 | ||
1864 | zone_table[NODEZONE(nid, j)] = zone; | ||
1865 | realsize = size = zones_size[j]; | 1882 | realsize = size = zones_size[j]; |
1866 | if (zholes_size) | 1883 | if (zholes_size) |
1867 | realsize -= zholes_size[j]; | 1884 | realsize -= zholes_size[j]; |
@@ -1927,6 +1944,8 @@ static void __init free_area_init_core(struct pglist_data *pgdat, | |||
1927 | 1944 | ||
1928 | memmap_init(size, nid, j, zone_start_pfn); | 1945 | memmap_init(size, nid, j, zone_start_pfn); |
1929 | 1946 | ||
1947 | zonetable_add(zone, nid, j, zone_start_pfn, size); | ||
1948 | |||
1930 | zone_start_pfn += size; | 1949 | zone_start_pfn += size; |
1931 | 1950 | ||
1932 | zone_init_free_lists(pgdat, zone, zone->spanned_pages); | 1951 | zone_init_free_lists(pgdat, zone, zone->spanned_pages); |
@@ -1935,28 +1954,30 @@ static void __init free_area_init_core(struct pglist_data *pgdat, | |||
1935 | 1954 | ||
1936 | static void __init alloc_node_mem_map(struct pglist_data *pgdat) | 1955 | static void __init alloc_node_mem_map(struct pglist_data *pgdat) |
1937 | { | 1956 | { |
1938 | unsigned long size; | ||
1939 | struct page *map; | ||
1940 | |||
1941 | /* Skip empty nodes */ | 1957 | /* Skip empty nodes */ |
1942 | if (!pgdat->node_spanned_pages) | 1958 | if (!pgdat->node_spanned_pages) |
1943 | return; | 1959 | return; |
1944 | 1960 | ||
1961 | #ifdef CONFIG_FLAT_NODE_MEM_MAP | ||
1945 | /* ia64 gets its own node_mem_map, before this, without bootmem */ | 1962 | /* ia64 gets its own node_mem_map, before this, without bootmem */ |
1946 | if (!pgdat->node_mem_map) { | 1963 | if (!pgdat->node_mem_map) { |
1964 | unsigned long size; | ||
1965 | struct page *map; | ||
1966 | |||
1947 | size = (pgdat->node_spanned_pages + 1) * sizeof(struct page); | 1967 | size = (pgdat->node_spanned_pages + 1) * sizeof(struct page); |
1948 | map = alloc_remap(pgdat->node_id, size); | 1968 | map = alloc_remap(pgdat->node_id, size); |
1949 | if (!map) | 1969 | if (!map) |
1950 | map = alloc_bootmem_node(pgdat, size); | 1970 | map = alloc_bootmem_node(pgdat, size); |
1951 | pgdat->node_mem_map = map; | 1971 | pgdat->node_mem_map = map; |
1952 | } | 1972 | } |
1953 | #ifndef CONFIG_DISCONTIGMEM | 1973 | #ifdef CONFIG_FLATMEM |
1954 | /* | 1974 | /* |
1955 | * With no DISCONTIG, the global mem_map is just set as node 0's | 1975 | * With no DISCONTIG, the global mem_map is just set as node 0's |
1956 | */ | 1976 | */ |
1957 | if (pgdat == NODE_DATA(0)) | 1977 | if (pgdat == NODE_DATA(0)) |
1958 | mem_map = NODE_DATA(0)->node_mem_map; | 1978 | mem_map = NODE_DATA(0)->node_mem_map; |
1959 | #endif | 1979 | #endif |
1980 | #endif /* CONFIG_FLAT_NODE_MEM_MAP */ | ||
1960 | } | 1981 | } |
1961 | 1982 | ||
1962 | void __init free_area_init_node(int nid, struct pglist_data *pgdat, | 1983 | void __init free_area_init_node(int nid, struct pglist_data *pgdat, |
diff --git a/mm/sparse.c b/mm/sparse.c new file mode 100644 index 000000000000..f888385b9e14 --- /dev/null +++ b/mm/sparse.c | |||
@@ -0,0 +1,85 @@ | |||
1 | /* | ||
2 | * sparse memory mappings. | ||
3 | */ | ||
4 | #include <linux/config.h> | ||
5 | #include <linux/mm.h> | ||
6 | #include <linux/mmzone.h> | ||
7 | #include <linux/bootmem.h> | ||
8 | #include <linux/module.h> | ||
9 | #include <asm/dma.h> | ||
10 | |||
11 | /* | ||
12 | * Permanent SPARSEMEM data: | ||
13 | * | ||
14 | * 1) mem_section - memory sections, mem_map's for valid memory | ||
15 | */ | ||
16 | struct mem_section mem_section[NR_MEM_SECTIONS]; | ||
17 | EXPORT_SYMBOL(mem_section); | ||
18 | |||
19 | /* Record a memory area against a node. */ | ||
20 | void memory_present(int nid, unsigned long start, unsigned long end) | ||
21 | { | ||
22 | unsigned long pfn; | ||
23 | |||
24 | start &= PAGE_SECTION_MASK; | ||
25 | for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION) { | ||
26 | unsigned long section = pfn_to_section_nr(pfn); | ||
27 | if (!mem_section[section].section_mem_map) | ||
28 | mem_section[section].section_mem_map = (void *) -1; | ||
29 | } | ||
30 | } | ||
31 | |||
32 | /* | ||
33 | * Only used by the i386 NUMA architecures, but relatively | ||
34 | * generic code. | ||
35 | */ | ||
36 | unsigned long __init node_memmap_size_bytes(int nid, unsigned long start_pfn, | ||
37 | unsigned long end_pfn) | ||
38 | { | ||
39 | unsigned long pfn; | ||
40 | unsigned long nr_pages = 0; | ||
41 | |||
42 | for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) { | ||
43 | if (nid != early_pfn_to_nid(pfn)) | ||
44 | continue; | ||
45 | |||
46 | if (pfn_valid(pfn)) | ||
47 | nr_pages += PAGES_PER_SECTION; | ||
48 | } | ||
49 | |||
50 | return nr_pages * sizeof(struct page); | ||
51 | } | ||
52 | |||
53 | /* | ||
54 | * Allocate the accumulated non-linear sections, allocate a mem_map | ||
55 | * for each and record the physical to section mapping. | ||
56 | */ | ||
57 | void sparse_init(void) | ||
58 | { | ||
59 | unsigned long pnum; | ||
60 | struct page *map; | ||
61 | int nid; | ||
62 | |||
63 | for (pnum = 0; pnum < NR_MEM_SECTIONS; pnum++) { | ||
64 | if (!mem_section[pnum].section_mem_map) | ||
65 | continue; | ||
66 | |||
67 | nid = early_pfn_to_nid(section_nr_to_pfn(pnum)); | ||
68 | map = alloc_remap(nid, sizeof(struct page) * PAGES_PER_SECTION); | ||
69 | if (!map) | ||
70 | map = alloc_bootmem_node(NODE_DATA(nid), | ||
71 | sizeof(struct page) * PAGES_PER_SECTION); | ||
72 | if (!map) { | ||
73 | mem_section[pnum].section_mem_map = 0; | ||
74 | continue; | ||
75 | } | ||
76 | |||
77 | /* | ||
78 | * Subtle, we encode the real pfn into the mem_map such that | ||
79 | * the identity pfn - section_mem_map will return the actual | ||
80 | * physical page frame number. | ||
81 | */ | ||
82 | mem_section[pnum].section_mem_map = map - | ||
83 | section_nr_to_pfn(pnum); | ||
84 | } | ||
85 | } | ||