diff options
-rw-r--r-- | arch/i386/Kconfig | 1 | ||||
-rw-r--r-- | include/linux/mm.h | 92 | ||||
-rw-r--r-- | include/linux/mmzone.h | 96 | ||||
-rw-r--r-- | include/linux/numa.h | 2 | ||||
-rw-r--r-- | mm/Kconfig | 38 | ||||
-rw-r--r-- | mm/Makefile | 1 | ||||
-rw-r--r-- | mm/bootmem.c | 9 | ||||
-rw-r--r-- | mm/memory.c | 2 | ||||
-rw-r--r-- | mm/page_alloc.c | 39 | ||||
-rw-r--r-- | mm/sparse.c | 85 |
10 files changed, 332 insertions, 33 deletions
diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig index 3b7248126d29..f0064b5e3702 100644 --- a/arch/i386/Kconfig +++ b/arch/i386/Kconfig | |||
@@ -813,6 +813,7 @@ source "mm/Kconfig" | |||
813 | config HAVE_ARCH_EARLY_PFN_TO_NID | 813 | config HAVE_ARCH_EARLY_PFN_TO_NID |
814 | bool | 814 | bool |
815 | default y | 815 | default y |
816 | depends on NUMA | ||
816 | 817 | ||
817 | config HIGHPTE | 818 | config HIGHPTE |
818 | bool "Allocate 3rd-level pagetables from highmem" | 819 | bool "Allocate 3rd-level pagetables from highmem" |
diff --git a/include/linux/mm.h b/include/linux/mm.h index 57b2ead51dba..6eb7f48317f8 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h | |||
@@ -397,40 +397,80 @@ static inline void put_page(struct page *page) | |||
397 | * sets it, so none of the operations on it need to be atomic. | 397 | * sets it, so none of the operations on it need to be atomic. |
398 | */ | 398 | */ |
399 | 399 | ||
400 | /* Page flags: | NODE | ZONE | ... | FLAGS | */ | 400 | |
401 | #define NODES_PGOFF ((sizeof(page_flags_t)*8) - NODES_SHIFT) | 401 | /* |
402 | #define ZONES_PGOFF (NODES_PGOFF - ZONES_SHIFT) | 402 | * page->flags layout: |
403 | * | ||
404 | * There are three possibilities for how page->flags get | ||
405 | * laid out. The first is for the normal case, without | ||
406 | * sparsemem. The second is for sparsemem when there is | ||
407 | * plenty of space for node and section. The last is when | ||
408 | * we have run out of space and have to fall back to an | ||
409 | * alternate (slower) way of determining the node. | ||
410 | * | ||
411 | * No sparsemem: | NODE | ZONE | ... | FLAGS | | ||
412 | * with space for node: | SECTION | NODE | ZONE | ... | FLAGS | | ||
413 | * no space for node: | SECTION | ZONE | ... | FLAGS | | ||
414 | */ | ||
415 | #ifdef CONFIG_SPARSEMEM | ||
416 | #define SECTIONS_WIDTH SECTIONS_SHIFT | ||
417 | #else | ||
418 | #define SECTIONS_WIDTH 0 | ||
419 | #endif | ||
420 | |||
421 | #define ZONES_WIDTH ZONES_SHIFT | ||
422 | |||
423 | #if SECTIONS_WIDTH+ZONES_WIDTH+NODES_SHIFT <= FLAGS_RESERVED | ||
424 | #define NODES_WIDTH NODES_SHIFT | ||
425 | #else | ||
426 | #define NODES_WIDTH 0 | ||
427 | #endif | ||
428 | |||
429 | /* Page flags: | [SECTION] | [NODE] | ZONE | ... | FLAGS | */ | ||
430 | #define SECTIONS_PGOFF ((sizeof(page_flags_t)*8) - SECTIONS_WIDTH) | ||
431 | #define NODES_PGOFF (SECTIONS_PGOFF - NODES_WIDTH) | ||
432 | #define ZONES_PGOFF (NODES_PGOFF - ZONES_WIDTH) | ||
433 | |||
434 | /* | ||
435 | * We are going to use the flags for the page to node mapping if its in | ||
436 | * there. This includes the case where there is no node, so it is implicit. | ||
437 | */ | ||
438 | #define FLAGS_HAS_NODE (NODES_WIDTH > 0 || NODES_SHIFT == 0) | ||
439 | |||
440 | #ifndef PFN_SECTION_SHIFT | ||
441 | #define PFN_SECTION_SHIFT 0 | ||
442 | #endif | ||
403 | 443 | ||
404 | /* | 444 | /* |
405 | * Define the bit shifts to access each section. For non-existant | 445 | * Define the bit shifts to access each section. For non-existant |
406 | * sections we define the shift as 0; that plus a 0 mask ensures | 446 | * sections we define the shift as 0; that plus a 0 mask ensures |
407 | * the compiler will optimise away reference to them. | 447 | * the compiler will optimise away reference to them. |
408 | */ | 448 | */ |
409 | #define NODES_PGSHIFT (NODES_PGOFF * (NODES_SHIFT != 0)) | 449 | #define SECTIONS_PGSHIFT (SECTIONS_PGOFF * (SECTIONS_WIDTH != 0)) |
410 | #define ZONES_PGSHIFT (ZONES_PGOFF * (ZONES_SHIFT != 0)) | 450 | #define NODES_PGSHIFT (NODES_PGOFF * (NODES_WIDTH != 0)) |
451 | #define ZONES_PGSHIFT (ZONES_PGOFF * (ZONES_WIDTH != 0)) | ||
411 | 452 | ||
412 | /* NODE:ZONE is used to lookup the zone from a page. */ | 453 | /* NODE:ZONE or SECTION:ZONE is used to lookup the zone from a page. */ |
454 | #if FLAGS_HAS_NODE | ||
413 | #define ZONETABLE_SHIFT (NODES_SHIFT + ZONES_SHIFT) | 455 | #define ZONETABLE_SHIFT (NODES_SHIFT + ZONES_SHIFT) |
456 | #else | ||
457 | #define ZONETABLE_SHIFT (SECTIONS_SHIFT + ZONES_SHIFT) | ||
458 | #endif | ||
414 | #define ZONETABLE_PGSHIFT ZONES_PGSHIFT | 459 | #define ZONETABLE_PGSHIFT ZONES_PGSHIFT |
415 | 460 | ||
416 | #if NODES_SHIFT+ZONES_SHIFT > FLAGS_RESERVED | 461 | #if SECTIONS_WIDTH+NODES_WIDTH+ZONES_WIDTH > FLAGS_RESERVED |
417 | #error NODES_SHIFT+ZONES_SHIFT > FLAGS_RESERVED | 462 | #error SECTIONS_WIDTH+NODES_WIDTH+ZONES_WIDTH > FLAGS_RESERVED |
418 | #endif | 463 | #endif |
419 | 464 | ||
420 | #define NODEZONE(node, zone) ((node << ZONES_SHIFT) | zone) | 465 | #define ZONES_MASK ((1UL << ZONES_WIDTH) - 1) |
421 | 466 | #define NODES_MASK ((1UL << NODES_WIDTH) - 1) | |
422 | #define ZONES_MASK ((1UL << ZONES_SHIFT) - 1) | 467 | #define SECTIONS_MASK ((1UL << SECTIONS_WIDTH) - 1) |
423 | #define NODES_MASK ((1UL << NODES_SHIFT) - 1) | ||
424 | #define ZONETABLE_MASK ((1UL << ZONETABLE_SHIFT) - 1) | 468 | #define ZONETABLE_MASK ((1UL << ZONETABLE_SHIFT) - 1) |
425 | 469 | ||
426 | static inline unsigned long page_zonenum(struct page *page) | 470 | static inline unsigned long page_zonenum(struct page *page) |
427 | { | 471 | { |
428 | return (page->flags >> ZONES_PGSHIFT) & ZONES_MASK; | 472 | return (page->flags >> ZONES_PGSHIFT) & ZONES_MASK; |
429 | } | 473 | } |
430 | static inline unsigned long page_to_nid(struct page *page) | ||
431 | { | ||
432 | return (page->flags >> NODES_PGSHIFT) & NODES_MASK; | ||
433 | } | ||
434 | 474 | ||
435 | struct zone; | 475 | struct zone; |
436 | extern struct zone *zone_table[]; | 476 | extern struct zone *zone_table[]; |
@@ -441,6 +481,18 @@ static inline struct zone *page_zone(struct page *page) | |||
441 | ZONETABLE_MASK]; | 481 | ZONETABLE_MASK]; |
442 | } | 482 | } |
443 | 483 | ||
484 | static inline unsigned long page_to_nid(struct page *page) | ||
485 | { | ||
486 | if (FLAGS_HAS_NODE) | ||
487 | return (page->flags >> NODES_PGSHIFT) & NODES_MASK; | ||
488 | else | ||
489 | return page_zone(page)->zone_pgdat->node_id; | ||
490 | } | ||
491 | static inline unsigned long page_to_section(struct page *page) | ||
492 | { | ||
493 | return (page->flags >> SECTIONS_PGSHIFT) & SECTIONS_MASK; | ||
494 | } | ||
495 | |||
444 | static inline void set_page_zone(struct page *page, unsigned long zone) | 496 | static inline void set_page_zone(struct page *page, unsigned long zone) |
445 | { | 497 | { |
446 | page->flags &= ~(ZONES_MASK << ZONES_PGSHIFT); | 498 | page->flags &= ~(ZONES_MASK << ZONES_PGSHIFT); |
@@ -451,12 +503,18 @@ static inline void set_page_node(struct page *page, unsigned long node) | |||
451 | page->flags &= ~(NODES_MASK << NODES_PGSHIFT); | 503 | page->flags &= ~(NODES_MASK << NODES_PGSHIFT); |
452 | page->flags |= (node & NODES_MASK) << NODES_PGSHIFT; | 504 | page->flags |= (node & NODES_MASK) << NODES_PGSHIFT; |
453 | } | 505 | } |
506 | static inline void set_page_section(struct page *page, unsigned long section) | ||
507 | { | ||
508 | page->flags &= ~(SECTIONS_MASK << SECTIONS_PGSHIFT); | ||
509 | page->flags |= (section & SECTIONS_MASK) << SECTIONS_PGSHIFT; | ||
510 | } | ||
454 | 511 | ||
455 | static inline void set_page_links(struct page *page, unsigned long zone, | 512 | static inline void set_page_links(struct page *page, unsigned long zone, |
456 | unsigned long node) | 513 | unsigned long node, unsigned long pfn) |
457 | { | 514 | { |
458 | set_page_zone(page, zone); | 515 | set_page_zone(page, zone); |
459 | set_page_node(page, node); | 516 | set_page_node(page, node); |
517 | set_page_section(page, pfn_to_section_nr(pfn)); | ||
460 | } | 518 | } |
461 | 519 | ||
462 | #ifndef CONFIG_DISCONTIGMEM | 520 | #ifndef CONFIG_DISCONTIGMEM |
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 6ef07de98d69..19860d317ec2 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h | |||
@@ -269,7 +269,9 @@ typedef struct pglist_data { | |||
269 | struct zone node_zones[MAX_NR_ZONES]; | 269 | struct zone node_zones[MAX_NR_ZONES]; |
270 | struct zonelist node_zonelists[GFP_ZONETYPES]; | 270 | struct zonelist node_zonelists[GFP_ZONETYPES]; |
271 | int nr_zones; | 271 | int nr_zones; |
272 | #ifdef CONFIG_FLAT_NODE_MEM_MAP | ||
272 | struct page *node_mem_map; | 273 | struct page *node_mem_map; |
274 | #endif | ||
273 | struct bootmem_data *bdata; | 275 | struct bootmem_data *bdata; |
274 | unsigned long node_start_pfn; | 276 | unsigned long node_start_pfn; |
275 | unsigned long node_present_pages; /* total number of physical pages */ | 277 | unsigned long node_present_pages; /* total number of physical pages */ |
@@ -284,7 +286,11 @@ typedef struct pglist_data { | |||
284 | 286 | ||
285 | #define node_present_pages(nid) (NODE_DATA(nid)->node_present_pages) | 287 | #define node_present_pages(nid) (NODE_DATA(nid)->node_present_pages) |
286 | #define node_spanned_pages(nid) (NODE_DATA(nid)->node_spanned_pages) | 288 | #define node_spanned_pages(nid) (NODE_DATA(nid)->node_spanned_pages) |
289 | #ifdef CONFIG_FLAT_NODE_MEM_MAP | ||
287 | #define pgdat_page_nr(pgdat, pagenr) ((pgdat)->node_mem_map + (pagenr)) | 290 | #define pgdat_page_nr(pgdat, pagenr) ((pgdat)->node_mem_map + (pagenr)) |
291 | #else | ||
292 | #define pgdat_page_nr(pgdat, pagenr) pfn_to_page((pgdat)->node_start_pfn + (pagenr)) | ||
293 | #endif | ||
288 | #define nid_page_nr(nid, pagenr) pgdat_page_nr(NODE_DATA(nid),(pagenr)) | 294 | #define nid_page_nr(nid, pagenr) pgdat_page_nr(NODE_DATA(nid),(pagenr)) |
289 | 295 | ||
290 | extern struct pglist_data *pgdat_list; | 296 | extern struct pglist_data *pgdat_list; |
@@ -416,6 +422,10 @@ extern struct pglist_data contig_page_data; | |||
416 | 422 | ||
417 | #endif /* !CONFIG_NEED_MULTIPLE_NODES */ | 423 | #endif /* !CONFIG_NEED_MULTIPLE_NODES */ |
418 | 424 | ||
425 | #ifdef CONFIG_SPARSEMEM | ||
426 | #include <asm/sparsemem.h> | ||
427 | #endif | ||
428 | |||
419 | #if BITS_PER_LONG == 32 || defined(ARCH_HAS_ATOMIC_UNSIGNED) | 429 | #if BITS_PER_LONG == 32 || defined(ARCH_HAS_ATOMIC_UNSIGNED) |
420 | /* | 430 | /* |
421 | * with 32 bit page->flags field, we reserve 8 bits for node/zone info. | 431 | * with 32 bit page->flags field, we reserve 8 bits for node/zone info. |
@@ -439,6 +449,92 @@ extern struct pglist_data contig_page_data; | |||
439 | #define early_pfn_to_nid(nid) (0UL) | 449 | #define early_pfn_to_nid(nid) (0UL) |
440 | #endif | 450 | #endif |
441 | 451 | ||
452 | #define pfn_to_section_nr(pfn) ((pfn) >> PFN_SECTION_SHIFT) | ||
453 | #define section_nr_to_pfn(sec) ((sec) << PFN_SECTION_SHIFT) | ||
454 | |||
455 | #ifdef CONFIG_SPARSEMEM | ||
456 | |||
457 | /* | ||
458 | * SECTION_SHIFT #bits space required to store a section # | ||
459 | * | ||
460 | * PA_SECTION_SHIFT physical address to/from section number | ||
461 | * PFN_SECTION_SHIFT pfn to/from section number | ||
462 | */ | ||
463 | #define SECTIONS_SHIFT (MAX_PHYSMEM_BITS - SECTION_SIZE_BITS) | ||
464 | |||
465 | #define PA_SECTION_SHIFT (SECTION_SIZE_BITS) | ||
466 | #define PFN_SECTION_SHIFT (SECTION_SIZE_BITS - PAGE_SHIFT) | ||
467 | |||
468 | #define NR_MEM_SECTIONS (1UL << SECTIONS_SHIFT) | ||
469 | |||
470 | #define PAGES_PER_SECTION (1UL << PFN_SECTION_SHIFT) | ||
471 | #define PAGE_SECTION_MASK (~(PAGES_PER_SECTION-1)) | ||
472 | |||
473 | #if (MAX_ORDER - 1 + PAGE_SHIFT) > SECTION_SIZE_BITS | ||
474 | #error Allocator MAX_ORDER exceeds SECTION_SIZE | ||
475 | #endif | ||
476 | |||
477 | struct page; | ||
478 | struct mem_section { | ||
479 | struct page *section_mem_map; | ||
480 | }; | ||
481 | |||
482 | extern struct mem_section mem_section[NR_MEM_SECTIONS]; | ||
483 | |||
484 | /* | ||
485 | * Given a kernel address, find the home node of the underlying memory. | ||
486 | */ | ||
487 | #define kvaddr_to_nid(kaddr) pfn_to_nid(__pa(kaddr) >> PAGE_SHIFT) | ||
488 | |||
489 | static inline struct mem_section *__pfn_to_section(unsigned long pfn) | ||
490 | { | ||
491 | return &mem_section[pfn_to_section_nr(pfn)]; | ||
492 | } | ||
493 | |||
494 | #define pfn_to_page(pfn) \ | ||
495 | ({ \ | ||
496 | unsigned long __pfn = (pfn); \ | ||
497 | __pfn_to_section(__pfn)->section_mem_map + __pfn; \ | ||
498 | }) | ||
499 | #define page_to_pfn(page) \ | ||
500 | ({ \ | ||
501 | page - mem_section[page_to_section(page)].section_mem_map; \ | ||
502 | }) | ||
503 | |||
504 | static inline int pfn_valid(unsigned long pfn) | ||
505 | { | ||
506 | if (pfn_to_section_nr(pfn) >= NR_MEM_SECTIONS) | ||
507 | return 0; | ||
508 | return mem_section[pfn_to_section_nr(pfn)].section_mem_map != 0; | ||
509 | } | ||
510 | |||
511 | /* | ||
512 | * These are _only_ used during initialisation, therefore they | ||
513 | * can use __initdata ... They could have names to indicate | ||
514 | * this restriction. | ||
515 | */ | ||
516 | #ifdef CONFIG_NUMA | ||
517 | #define pfn_to_nid early_pfn_to_nid | ||
518 | #endif | ||
519 | |||
520 | #define pfn_to_pgdat(pfn) \ | ||
521 | ({ \ | ||
522 | NODE_DATA(pfn_to_nid(pfn)); \ | ||
523 | }) | ||
524 | |||
525 | #define early_pfn_valid(pfn) pfn_valid(pfn) | ||
526 | void sparse_init(void); | ||
527 | #else | ||
528 | #define sparse_init() do {} while (0) | ||
529 | #endif /* CONFIG_SPARSEMEM */ | ||
530 | |||
531 | #ifndef early_pfn_valid | ||
532 | #define early_pfn_valid(pfn) (1) | ||
533 | #endif | ||
534 | |||
535 | void memory_present(int nid, unsigned long start, unsigned long end); | ||
536 | unsigned long __init node_memmap_size_bytes(int, unsigned long, unsigned long); | ||
537 | |||
442 | #endif /* !__ASSEMBLY__ */ | 538 | #endif /* !__ASSEMBLY__ */ |
443 | #endif /* __KERNEL__ */ | 539 | #endif /* __KERNEL__ */ |
444 | #endif /* _LINUX_MMZONE_H */ | 540 | #endif /* _LINUX_MMZONE_H */ |
diff --git a/include/linux/numa.h b/include/linux/numa.h index bd0c8c4e9a95..f0c539bd3cfc 100644 --- a/include/linux/numa.h +++ b/include/linux/numa.h | |||
@@ -3,7 +3,7 @@ | |||
3 | 3 | ||
4 | #include <linux/config.h> | 4 | #include <linux/config.h> |
5 | 5 | ||
6 | #ifdef CONFIG_DISCONTIGMEM | 6 | #ifndef CONFIG_FLATMEM |
7 | #include <asm/numnodes.h> | 7 | #include <asm/numnodes.h> |
8 | #endif | 8 | #endif |
9 | 9 | ||
diff --git a/mm/Kconfig b/mm/Kconfig index 5127441561b4..cd379936cac6 100644 --- a/mm/Kconfig +++ b/mm/Kconfig | |||
@@ -6,6 +6,7 @@ choice | |||
6 | prompt "Memory model" | 6 | prompt "Memory model" |
7 | depends on SELECT_MEMORY_MODEL | 7 | depends on SELECT_MEMORY_MODEL |
8 | default DISCONTIGMEM_MANUAL if ARCH_DISCONTIGMEM_DEFAULT | 8 | default DISCONTIGMEM_MANUAL if ARCH_DISCONTIGMEM_DEFAULT |
9 | default SPARSEMEM_MANUAL if ARCH_SPARSEMEM_DEFAULT | ||
9 | default FLATMEM_MANUAL | 10 | default FLATMEM_MANUAL |
10 | 11 | ||
11 | config FLATMEM_MANUAL | 12 | config FLATMEM_MANUAL |
@@ -17,7 +18,15 @@ config FLATMEM_MANUAL | |||
17 | only have one option here: FLATMEM. This is normal | 18 | only have one option here: FLATMEM. This is normal |
18 | and a correct option. | 19 | and a correct option. |
19 | 20 | ||
20 | If unsure, choose this option over any other. | 21 | Some users of more advanced features like NUMA and |
22 | memory hotplug may have different options here. | ||
23 | DISCONTIGMEM is an more mature, better tested system, | ||
24 | but is incompatible with memory hotplug and may suffer | ||
25 | decreased performance over SPARSEMEM. If unsure between | ||
26 | "Sparse Memory" and "Discontiguous Memory", choose | ||
27 | "Discontiguous Memory". | ||
28 | |||
29 | If unsure, choose this option (Flat Memory) over any other. | ||
21 | 30 | ||
22 | config DISCONTIGMEM_MANUAL | 31 | config DISCONTIGMEM_MANUAL |
23 | bool "Discontigious Memory" | 32 | bool "Discontigious Memory" |
@@ -35,15 +44,38 @@ config DISCONTIGMEM_MANUAL | |||
35 | 44 | ||
36 | If unsure, choose "Flat Memory" over this option. | 45 | If unsure, choose "Flat Memory" over this option. |
37 | 46 | ||
47 | config SPARSEMEM_MANUAL | ||
48 | bool "Sparse Memory" | ||
49 | depends on ARCH_SPARSEMEM_ENABLE | ||
50 | help | ||
51 | This will be the only option for some systems, including | ||
52 | memory hotplug systems. This is normal. | ||
53 | |||
54 | For many other systems, this will be an alternative to | ||
55 | "Discontigious Memory". This option provides some potential | ||
56 | performance benefits, along with decreased code complexity, | ||
57 | but it is newer, and more experimental. | ||
58 | |||
59 | If unsure, choose "Discontiguous Memory" or "Flat Memory" | ||
60 | over this option. | ||
61 | |||
38 | endchoice | 62 | endchoice |
39 | 63 | ||
40 | config DISCONTIGMEM | 64 | config DISCONTIGMEM |
41 | def_bool y | 65 | def_bool y |
42 | depends on (!SELECT_MEMORY_MODEL && ARCH_DISCONTIGMEM_ENABLE) || DISCONTIGMEM_MANUAL | 66 | depends on (!SELECT_MEMORY_MODEL && ARCH_DISCONTIGMEM_ENABLE) || DISCONTIGMEM_MANUAL |
43 | 67 | ||
68 | config SPARSEMEM | ||
69 | def_bool y | ||
70 | depends on SPARSEMEM_MANUAL | ||
71 | |||
44 | config FLATMEM | 72 | config FLATMEM |
45 | def_bool y | 73 | def_bool y |
46 | depends on !DISCONTIGMEM || FLATMEM_MANUAL | 74 | depends on (!DISCONTIGMEM && !SPARSEMEM) || FLATMEM_MANUAL |
75 | |||
76 | config FLAT_NODE_MEM_MAP | ||
77 | def_bool y | ||
78 | depends on !SPARSEMEM | ||
47 | 79 | ||
48 | # | 80 | # |
49 | # Both the NUMA code and DISCONTIGMEM use arrays of pg_data_t's | 81 | # Both the NUMA code and DISCONTIGMEM use arrays of pg_data_t's |
@@ -56,4 +88,4 @@ config NEED_MULTIPLE_NODES | |||
56 | 88 | ||
57 | config HAVE_MEMORY_PRESENT | 89 | config HAVE_MEMORY_PRESENT |
58 | def_bool y | 90 | def_bool y |
59 | depends on ARCH_HAVE_MEMORY_PRESENT | 91 | depends on ARCH_HAVE_MEMORY_PRESENT || SPARSEMEM |
diff --git a/mm/Makefile b/mm/Makefile index 097408064f6a..8f70ffd763c8 100644 --- a/mm/Makefile +++ b/mm/Makefile | |||
@@ -15,6 +15,7 @@ obj-y := bootmem.o filemap.o mempool.o oom_kill.o fadvise.o \ | |||
15 | obj-$(CONFIG_SWAP) += page_io.o swap_state.o swapfile.o thrash.o | 15 | obj-$(CONFIG_SWAP) += page_io.o swap_state.o swapfile.o thrash.o |
16 | obj-$(CONFIG_HUGETLBFS) += hugetlb.o | 16 | obj-$(CONFIG_HUGETLBFS) += hugetlb.o |
17 | obj-$(CONFIG_NUMA) += mempolicy.o | 17 | obj-$(CONFIG_NUMA) += mempolicy.o |
18 | obj-$(CONFIG_SPARSEMEM) += sparse.o | ||
18 | obj-$(CONFIG_SHMEM) += shmem.o | 19 | obj-$(CONFIG_SHMEM) += shmem.o |
19 | obj-$(CONFIG_TINY_SHMEM) += tiny-shmem.o | 20 | obj-$(CONFIG_TINY_SHMEM) += tiny-shmem.o |
20 | 21 | ||
diff --git a/mm/bootmem.c b/mm/bootmem.c index 260e703850d8..f82f7aebbee3 100644 --- a/mm/bootmem.c +++ b/mm/bootmem.c | |||
@@ -256,6 +256,7 @@ found: | |||
256 | static unsigned long __init free_all_bootmem_core(pg_data_t *pgdat) | 256 | static unsigned long __init free_all_bootmem_core(pg_data_t *pgdat) |
257 | { | 257 | { |
258 | struct page *page; | 258 | struct page *page; |
259 | unsigned long pfn; | ||
259 | bootmem_data_t *bdata = pgdat->bdata; | 260 | bootmem_data_t *bdata = pgdat->bdata; |
260 | unsigned long i, count, total = 0; | 261 | unsigned long i, count, total = 0; |
261 | unsigned long idx; | 262 | unsigned long idx; |
@@ -266,7 +267,7 @@ static unsigned long __init free_all_bootmem_core(pg_data_t *pgdat) | |||
266 | 267 | ||
267 | count = 0; | 268 | count = 0; |
268 | /* first extant page of the node */ | 269 | /* first extant page of the node */ |
269 | page = virt_to_page(phys_to_virt(bdata->node_boot_start)); | 270 | pfn = bdata->node_boot_start >> PAGE_SHIFT; |
270 | idx = bdata->node_low_pfn - (bdata->node_boot_start >> PAGE_SHIFT); | 271 | idx = bdata->node_low_pfn - (bdata->node_boot_start >> PAGE_SHIFT); |
271 | map = bdata->node_bootmem_map; | 272 | map = bdata->node_bootmem_map; |
272 | /* Check physaddr is O(LOG2(BITS_PER_LONG)) page aligned */ | 273 | /* Check physaddr is O(LOG2(BITS_PER_LONG)) page aligned */ |
@@ -275,9 +276,11 @@ static unsigned long __init free_all_bootmem_core(pg_data_t *pgdat) | |||
275 | gofast = 1; | 276 | gofast = 1; |
276 | for (i = 0; i < idx; ) { | 277 | for (i = 0; i < idx; ) { |
277 | unsigned long v = ~map[i / BITS_PER_LONG]; | 278 | unsigned long v = ~map[i / BITS_PER_LONG]; |
279 | |||
278 | if (gofast && v == ~0UL) { | 280 | if (gofast && v == ~0UL) { |
279 | int j, order; | 281 | int j, order; |
280 | 282 | ||
283 | page = pfn_to_page(pfn); | ||
281 | count += BITS_PER_LONG; | 284 | count += BITS_PER_LONG; |
282 | __ClearPageReserved(page); | 285 | __ClearPageReserved(page); |
283 | order = ffs(BITS_PER_LONG) - 1; | 286 | order = ffs(BITS_PER_LONG) - 1; |
@@ -292,6 +295,8 @@ static unsigned long __init free_all_bootmem_core(pg_data_t *pgdat) | |||
292 | page += BITS_PER_LONG; | 295 | page += BITS_PER_LONG; |
293 | } else if (v) { | 296 | } else if (v) { |
294 | unsigned long m; | 297 | unsigned long m; |
298 | |||
299 | page = pfn_to_page(pfn); | ||
295 | for (m = 1; m && i < idx; m<<=1, page++, i++) { | 300 | for (m = 1; m && i < idx; m<<=1, page++, i++) { |
296 | if (v & m) { | 301 | if (v & m) { |
297 | count++; | 302 | count++; |
@@ -302,8 +307,8 @@ static unsigned long __init free_all_bootmem_core(pg_data_t *pgdat) | |||
302 | } | 307 | } |
303 | } else { | 308 | } else { |
304 | i+=BITS_PER_LONG; | 309 | i+=BITS_PER_LONG; |
305 | page += BITS_PER_LONG; | ||
306 | } | 310 | } |
311 | pfn += BITS_PER_LONG; | ||
307 | } | 312 | } |
308 | total += count; | 313 | total += count; |
309 | 314 | ||
diff --git a/mm/memory.c b/mm/memory.c index da91b7bf9986..30975ef48722 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -58,7 +58,7 @@ | |||
58 | #include <linux/swapops.h> | 58 | #include <linux/swapops.h> |
59 | #include <linux/elf.h> | 59 | #include <linux/elf.h> |
60 | 60 | ||
61 | #ifndef CONFIG_DISCONTIGMEM | 61 | #ifndef CONFIG_NEED_MULTIPLE_NODES |
62 | /* use the per-pgdat data instead for discontigmem - mbligh */ | 62 | /* use the per-pgdat data instead for discontigmem - mbligh */ |
63 | unsigned long max_mapnr; | 63 | unsigned long max_mapnr; |
64 | struct page *mem_map; | 64 | struct page *mem_map; |
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 20e239599db0..5c1b8982a6da 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -68,7 +68,7 @@ EXPORT_SYMBOL(nr_swap_pages); | |||
68 | * Used by page_zone() to look up the address of the struct zone whose | 68 | * Used by page_zone() to look up the address of the struct zone whose |
69 | * id is encoded in the upper bits of page->flags | 69 | * id is encoded in the upper bits of page->flags |
70 | */ | 70 | */ |
71 | struct zone *zone_table[1 << (ZONES_SHIFT + NODES_SHIFT)]; | 71 | struct zone *zone_table[1 << ZONETABLE_SHIFT]; |
72 | EXPORT_SYMBOL(zone_table); | 72 | EXPORT_SYMBOL(zone_table); |
73 | 73 | ||
74 | static char *zone_names[MAX_NR_ZONES] = { "DMA", "Normal", "HighMem" }; | 74 | static char *zone_names[MAX_NR_ZONES] = { "DMA", "Normal", "HighMem" }; |
@@ -1649,11 +1649,15 @@ static void __init calculate_zone_totalpages(struct pglist_data *pgdat, | |||
1649 | void __init memmap_init_zone(unsigned long size, int nid, unsigned long zone, | 1649 | void __init memmap_init_zone(unsigned long size, int nid, unsigned long zone, |
1650 | unsigned long start_pfn) | 1650 | unsigned long start_pfn) |
1651 | { | 1651 | { |
1652 | struct page *start = pfn_to_page(start_pfn); | ||
1653 | struct page *page; | 1652 | struct page *page; |
1653 | int end_pfn = start_pfn + size; | ||
1654 | int pfn; | ||
1654 | 1655 | ||
1655 | for (page = start; page < (start + size); page++) { | 1656 | for (pfn = start_pfn; pfn < end_pfn; pfn++, page++) { |
1656 | set_page_links(page, zone, nid); | 1657 | if (!early_pfn_valid(pfn)) |
1658 | continue; | ||
1659 | page = pfn_to_page(pfn); | ||
1660 | set_page_links(page, zone, nid, pfn); | ||
1657 | set_page_count(page, 0); | 1661 | set_page_count(page, 0); |
1658 | reset_page_mapcount(page); | 1662 | reset_page_mapcount(page); |
1659 | SetPageReserved(page); | 1663 | SetPageReserved(page); |
@@ -1677,6 +1681,20 @@ void zone_init_free_lists(struct pglist_data *pgdat, struct zone *zone, | |||
1677 | } | 1681 | } |
1678 | } | 1682 | } |
1679 | 1683 | ||
1684 | #define ZONETABLE_INDEX(x, zone_nr) ((x << ZONES_SHIFT) | zone_nr) | ||
1685 | void zonetable_add(struct zone *zone, int nid, int zid, unsigned long pfn, | ||
1686 | unsigned long size) | ||
1687 | { | ||
1688 | unsigned long snum = pfn_to_section_nr(pfn); | ||
1689 | unsigned long end = pfn_to_section_nr(pfn + size); | ||
1690 | |||
1691 | if (FLAGS_HAS_NODE) | ||
1692 | zone_table[ZONETABLE_INDEX(nid, zid)] = zone; | ||
1693 | else | ||
1694 | for (; snum <= end; snum++) | ||
1695 | zone_table[ZONETABLE_INDEX(snum, zid)] = zone; | ||
1696 | } | ||
1697 | |||
1680 | #ifndef __HAVE_ARCH_MEMMAP_INIT | 1698 | #ifndef __HAVE_ARCH_MEMMAP_INIT |
1681 | #define memmap_init(size, nid, zone, start_pfn) \ | 1699 | #define memmap_init(size, nid, zone, start_pfn) \ |
1682 | memmap_init_zone((size), (nid), (zone), (start_pfn)) | 1700 | memmap_init_zone((size), (nid), (zone), (start_pfn)) |
@@ -1861,7 +1879,6 @@ static void __init free_area_init_core(struct pglist_data *pgdat, | |||
1861 | unsigned long size, realsize; | 1879 | unsigned long size, realsize; |
1862 | unsigned long batch; | 1880 | unsigned long batch; |
1863 | 1881 | ||
1864 | zone_table[NODEZONE(nid, j)] = zone; | ||
1865 | realsize = size = zones_size[j]; | 1882 | realsize = size = zones_size[j]; |
1866 | if (zholes_size) | 1883 | if (zholes_size) |
1867 | realsize -= zholes_size[j]; | 1884 | realsize -= zholes_size[j]; |
@@ -1927,6 +1944,8 @@ static void __init free_area_init_core(struct pglist_data *pgdat, | |||
1927 | 1944 | ||
1928 | memmap_init(size, nid, j, zone_start_pfn); | 1945 | memmap_init(size, nid, j, zone_start_pfn); |
1929 | 1946 | ||
1947 | zonetable_add(zone, nid, j, zone_start_pfn, size); | ||
1948 | |||
1930 | zone_start_pfn += size; | 1949 | zone_start_pfn += size; |
1931 | 1950 | ||
1932 | zone_init_free_lists(pgdat, zone, zone->spanned_pages); | 1951 | zone_init_free_lists(pgdat, zone, zone->spanned_pages); |
@@ -1935,28 +1954,30 @@ static void __init free_area_init_core(struct pglist_data *pgdat, | |||
1935 | 1954 | ||
1936 | static void __init alloc_node_mem_map(struct pglist_data *pgdat) | 1955 | static void __init alloc_node_mem_map(struct pglist_data *pgdat) |
1937 | { | 1956 | { |
1938 | unsigned long size; | ||
1939 | struct page *map; | ||
1940 | |||
1941 | /* Skip empty nodes */ | 1957 | /* Skip empty nodes */ |
1942 | if (!pgdat->node_spanned_pages) | 1958 | if (!pgdat->node_spanned_pages) |
1943 | return; | 1959 | return; |
1944 | 1960 | ||
1961 | #ifdef CONFIG_FLAT_NODE_MEM_MAP | ||
1945 | /* ia64 gets its own node_mem_map, before this, without bootmem */ | 1962 | /* ia64 gets its own node_mem_map, before this, without bootmem */ |
1946 | if (!pgdat->node_mem_map) { | 1963 | if (!pgdat->node_mem_map) { |
1964 | unsigned long size; | ||
1965 | struct page *map; | ||
1966 | |||
1947 | size = (pgdat->node_spanned_pages + 1) * sizeof(struct page); | 1967 | size = (pgdat->node_spanned_pages + 1) * sizeof(struct page); |
1948 | map = alloc_remap(pgdat->node_id, size); | 1968 | map = alloc_remap(pgdat->node_id, size); |
1949 | if (!map) | 1969 | if (!map) |
1950 | map = alloc_bootmem_node(pgdat, size); | 1970 | map = alloc_bootmem_node(pgdat, size); |
1951 | pgdat->node_mem_map = map; | 1971 | pgdat->node_mem_map = map; |
1952 | } | 1972 | } |
1953 | #ifndef CONFIG_DISCONTIGMEM | 1973 | #ifdef CONFIG_FLATMEM |
1954 | /* | 1974 | /* |
1955 | * With no DISCONTIG, the global mem_map is just set as node 0's | 1975 | * With no DISCONTIG, the global mem_map is just set as node 0's |
1956 | */ | 1976 | */ |
1957 | if (pgdat == NODE_DATA(0)) | 1977 | if (pgdat == NODE_DATA(0)) |
1958 | mem_map = NODE_DATA(0)->node_mem_map; | 1978 | mem_map = NODE_DATA(0)->node_mem_map; |
1959 | #endif | 1979 | #endif |
1980 | #endif /* CONFIG_FLAT_NODE_MEM_MAP */ | ||
1960 | } | 1981 | } |
1961 | 1982 | ||
1962 | void __init free_area_init_node(int nid, struct pglist_data *pgdat, | 1983 | void __init free_area_init_node(int nid, struct pglist_data *pgdat, |
diff --git a/mm/sparse.c b/mm/sparse.c new file mode 100644 index 000000000000..f888385b9e14 --- /dev/null +++ b/mm/sparse.c | |||
@@ -0,0 +1,85 @@ | |||
1 | /* | ||
2 | * sparse memory mappings. | ||
3 | */ | ||
4 | #include <linux/config.h> | ||
5 | #include <linux/mm.h> | ||
6 | #include <linux/mmzone.h> | ||
7 | #include <linux/bootmem.h> | ||
8 | #include <linux/module.h> | ||
9 | #include <asm/dma.h> | ||
10 | |||
11 | /* | ||
12 | * Permanent SPARSEMEM data: | ||
13 | * | ||
14 | * 1) mem_section - memory sections, mem_map's for valid memory | ||
15 | */ | ||
16 | struct mem_section mem_section[NR_MEM_SECTIONS]; | ||
17 | EXPORT_SYMBOL(mem_section); | ||
18 | |||
19 | /* Record a memory area against a node. */ | ||
20 | void memory_present(int nid, unsigned long start, unsigned long end) | ||
21 | { | ||
22 | unsigned long pfn; | ||
23 | |||
24 | start &= PAGE_SECTION_MASK; | ||
25 | for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION) { | ||
26 | unsigned long section = pfn_to_section_nr(pfn); | ||
27 | if (!mem_section[section].section_mem_map) | ||
28 | mem_section[section].section_mem_map = (void *) -1; | ||
29 | } | ||
30 | } | ||
31 | |||
32 | /* | ||
33 | * Only used by the i386 NUMA architecures, but relatively | ||
34 | * generic code. | ||
35 | */ | ||
36 | unsigned long __init node_memmap_size_bytes(int nid, unsigned long start_pfn, | ||
37 | unsigned long end_pfn) | ||
38 | { | ||
39 | unsigned long pfn; | ||
40 | unsigned long nr_pages = 0; | ||
41 | |||
42 | for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) { | ||
43 | if (nid != early_pfn_to_nid(pfn)) | ||
44 | continue; | ||
45 | |||
46 | if (pfn_valid(pfn)) | ||
47 | nr_pages += PAGES_PER_SECTION; | ||
48 | } | ||
49 | |||
50 | return nr_pages * sizeof(struct page); | ||
51 | } | ||
52 | |||
53 | /* | ||
54 | * Allocate the accumulated non-linear sections, allocate a mem_map | ||
55 | * for each and record the physical to section mapping. | ||
56 | */ | ||
57 | void sparse_init(void) | ||
58 | { | ||
59 | unsigned long pnum; | ||
60 | struct page *map; | ||
61 | int nid; | ||
62 | |||
63 | for (pnum = 0; pnum < NR_MEM_SECTIONS; pnum++) { | ||
64 | if (!mem_section[pnum].section_mem_map) | ||
65 | continue; | ||
66 | |||
67 | nid = early_pfn_to_nid(section_nr_to_pfn(pnum)); | ||
68 | map = alloc_remap(nid, sizeof(struct page) * PAGES_PER_SECTION); | ||
69 | if (!map) | ||
70 | map = alloc_bootmem_node(NODE_DATA(nid), | ||
71 | sizeof(struct page) * PAGES_PER_SECTION); | ||
72 | if (!map) { | ||
73 | mem_section[pnum].section_mem_map = 0; | ||
74 | continue; | ||
75 | } | ||
76 | |||
77 | /* | ||
78 | * Subtle, we encode the real pfn into the mem_map such that | ||
79 | * the identity pfn - section_mem_map will return the actual | ||
80 | * physical page frame number. | ||
81 | */ | ||
82 | mem_section[pnum].section_mem_map = map - | ||
83 | section_nr_to_pfn(pnum); | ||
84 | } | ||
85 | } | ||