diff options
author | Andy Whitcroft <apw@shadowen.org> | 2005-06-23 03:08:00 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-06-23 12:45:05 -0400 |
commit | 29751f6991e845f7d002a6ae520bf996b38c8dcd (patch) | |
tree | f76c4c660ac4d204436f68851979343d2a9ba224 | |
parent | 641c767389b19859a45e6de46d8e18cd935bdb60 (diff) |
[PATCH] sparsemem hotplug base
Make sparse's initalization be accessible at runtime. This allows sparse
mappings to be created after boot in a hotplug situation.
This patch is separated from the previous one just to give an indication how
much of the sparse infrastructure is *just* for hotplug memory.
The section_mem_map doesn't really store a pointer. It stores something that
is convenient to do some math against to get a pointer. It isn't valid to
just do *section_mem_map, so I don't think it should be stored as a pointer.
There are a couple of things I'd like to store about a section. First of all,
the fact that it is !NULL does not mean that it is present. There could be
such a combination where section_mem_map *is* NULL, but the math gets you
properly to a real mem_map. So, I don't think that check is safe.
Since we're storing 32-bit-aligned structures, we have a few bits in the
bottom of the pointer to play with. Use one bit to encode whether there's
really a mem_map there, and the other one to tell whether there's a valid
section there. We need to distinguish between the two because sometimes
there's a gap between when a section is discovered to be present and when we
can get the mem_map for it.
Signed-off-by: Dave Hansen <haveblue@us.ibm.com>
Signed-off-by: Andy Whitcroft <apw@shadowen.org>
Signed-off-by: Jack Steiner <steiner@sgi.com>
Signed-off-by: Bob Picco <bob.picco@hp.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r-- | include/linux/mmzone.h | 56 | ||||
-rw-r--r-- | mm/page_alloc.c | 4 | ||||
-rw-r--r-- | mm/sparse.c | 92 |
3 files changed, 125 insertions, 27 deletions
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 746b57e3d370..6c90461ed99f 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h | |||
@@ -476,11 +476,56 @@ extern struct pglist_data contig_page_data; | |||
476 | 476 | ||
477 | struct page; | 477 | struct page; |
478 | struct mem_section { | 478 | struct mem_section { |
479 | struct page *section_mem_map; | 479 | /* |
480 | * This is, logically, a pointer to an array of struct | ||
481 | * pages. However, it is stored with some other magic. | ||
482 | * (see sparse.c::sparse_init_one_section()) | ||
483 | * | ||
484 | * Making it a UL at least makes someone do a cast | ||
485 | * before using it wrong. | ||
486 | */ | ||
487 | unsigned long section_mem_map; | ||
480 | }; | 488 | }; |
481 | 489 | ||
482 | extern struct mem_section mem_section[NR_MEM_SECTIONS]; | 490 | extern struct mem_section mem_section[NR_MEM_SECTIONS]; |
483 | 491 | ||
492 | static inline struct mem_section *__nr_to_section(unsigned long nr) | ||
493 | { | ||
494 | return &mem_section[nr]; | ||
495 | } | ||
496 | |||
497 | /* | ||
498 | * We use the lower bits of the mem_map pointer to store | ||
499 | * a little bit of information. There should be at least | ||
500 | * 3 bits here due to 32-bit alignment. | ||
501 | */ | ||
502 | #define SECTION_MARKED_PRESENT (1UL<<0) | ||
503 | #define SECTION_HAS_MEM_MAP (1UL<<1) | ||
504 | #define SECTION_MAP_LAST_BIT (1UL<<2) | ||
505 | #define SECTION_MAP_MASK (~(SECTION_MAP_LAST_BIT-1)) | ||
506 | |||
507 | static inline struct page *__section_mem_map_addr(struct mem_section *section) | ||
508 | { | ||
509 | unsigned long map = section->section_mem_map; | ||
510 | map &= SECTION_MAP_MASK; | ||
511 | return (struct page *)map; | ||
512 | } | ||
513 | |||
514 | static inline int valid_section(struct mem_section *section) | ||
515 | { | ||
516 | return (section->section_mem_map & SECTION_MARKED_PRESENT); | ||
517 | } | ||
518 | |||
519 | static inline int section_has_mem_map(struct mem_section *section) | ||
520 | { | ||
521 | return (section->section_mem_map & SECTION_HAS_MEM_MAP); | ||
522 | } | ||
523 | |||
524 | static inline int valid_section_nr(unsigned long nr) | ||
525 | { | ||
526 | return valid_section(__nr_to_section(nr)); | ||
527 | } | ||
528 | |||
484 | /* | 529 | /* |
485 | * Given a kernel address, find the home node of the underlying memory. | 530 | * Given a kernel address, find the home node of the underlying memory. |
486 | */ | 531 | */ |
@@ -488,24 +533,25 @@ extern struct mem_section mem_section[NR_MEM_SECTIONS]; | |||
488 | 533 | ||
489 | static inline struct mem_section *__pfn_to_section(unsigned long pfn) | 534 | static inline struct mem_section *__pfn_to_section(unsigned long pfn) |
490 | { | 535 | { |
491 | return &mem_section[pfn_to_section_nr(pfn)]; | 536 | return __nr_to_section(pfn_to_section_nr(pfn)); |
492 | } | 537 | } |
493 | 538 | ||
494 | #define pfn_to_page(pfn) \ | 539 | #define pfn_to_page(pfn) \ |
495 | ({ \ | 540 | ({ \ |
496 | unsigned long __pfn = (pfn); \ | 541 | unsigned long __pfn = (pfn); \ |
497 | __pfn_to_section(__pfn)->section_mem_map + __pfn; \ | 542 | __section_mem_map_addr(__pfn_to_section(__pfn)) + __pfn; \ |
498 | }) | 543 | }) |
499 | #define page_to_pfn(page) \ | 544 | #define page_to_pfn(page) \ |
500 | ({ \ | 545 | ({ \ |
501 | page - mem_section[page_to_section(page)].section_mem_map; \ | 546 | page - __section_mem_map_addr(__nr_to_section( \ |
547 | page_to_section(page))); \ | ||
502 | }) | 548 | }) |
503 | 549 | ||
504 | static inline int pfn_valid(unsigned long pfn) | 550 | static inline int pfn_valid(unsigned long pfn) |
505 | { | 551 | { |
506 | if (pfn_to_section_nr(pfn) >= NR_MEM_SECTIONS) | 552 | if (pfn_to_section_nr(pfn) >= NR_MEM_SECTIONS) |
507 | return 0; | 553 | return 0; |
508 | return mem_section[pfn_to_section_nr(pfn)].section_mem_map != 0; | 554 | return valid_section(__nr_to_section(pfn_to_section_nr(pfn))); |
509 | } | 555 | } |
510 | 556 | ||
511 | /* | 557 | /* |
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 1eb683f9b3af..7ee675ad101e 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -1650,8 +1650,8 @@ void __init memmap_init_zone(unsigned long size, int nid, unsigned long zone, | |||
1650 | unsigned long start_pfn) | 1650 | unsigned long start_pfn) |
1651 | { | 1651 | { |
1652 | struct page *page; | 1652 | struct page *page; |
1653 | int end_pfn = start_pfn + size; | 1653 | unsigned long end_pfn = start_pfn + size; |
1654 | int pfn; | 1654 | unsigned long pfn; |
1655 | 1655 | ||
1656 | for (pfn = start_pfn; pfn < end_pfn; pfn++, page++) { | 1656 | for (pfn = start_pfn; pfn < end_pfn; pfn++, page++) { |
1657 | if (!early_pfn_valid(pfn)) | 1657 | if (!early_pfn_valid(pfn)) |
diff --git a/mm/sparse.c b/mm/sparse.c index f888385b9e14..b54e304df4a7 100644 --- a/mm/sparse.c +++ b/mm/sparse.c | |||
@@ -25,7 +25,7 @@ void memory_present(int nid, unsigned long start, unsigned long end) | |||
25 | for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION) { | 25 | for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION) { |
26 | unsigned long section = pfn_to_section_nr(pfn); | 26 | unsigned long section = pfn_to_section_nr(pfn); |
27 | if (!mem_section[section].section_mem_map) | 27 | if (!mem_section[section].section_mem_map) |
28 | mem_section[section].section_mem_map = (void *) -1; | 28 | mem_section[section].section_mem_map = SECTION_MARKED_PRESENT; |
29 | } | 29 | } |
30 | } | 30 | } |
31 | 31 | ||
@@ -51,6 +51,56 @@ unsigned long __init node_memmap_size_bytes(int nid, unsigned long start_pfn, | |||
51 | } | 51 | } |
52 | 52 | ||
53 | /* | 53 | /* |
54 | * Subtle, we encode the real pfn into the mem_map such that | ||
55 | * the identity pfn - section_mem_map will return the actual | ||
56 | * physical page frame number. | ||
57 | */ | ||
58 | static unsigned long sparse_encode_mem_map(struct page *mem_map, unsigned long pnum) | ||
59 | { | ||
60 | return (unsigned long)(mem_map - (section_nr_to_pfn(pnum))); | ||
61 | } | ||
62 | |||
63 | /* | ||
64 | * We need this if we ever free the mem_maps. While not implemented yet, | ||
65 | * this function is included for parity with its sibling. | ||
66 | */ | ||
67 | static __attribute((unused)) | ||
68 | struct page *sparse_decode_mem_map(unsigned long coded_mem_map, unsigned long pnum) | ||
69 | { | ||
70 | return ((struct page *)coded_mem_map) + section_nr_to_pfn(pnum); | ||
71 | } | ||
72 | |||
73 | static int sparse_init_one_section(struct mem_section *ms, | ||
74 | unsigned long pnum, struct page *mem_map) | ||
75 | { | ||
76 | if (!valid_section(ms)) | ||
77 | return -EINVAL; | ||
78 | |||
79 | ms->section_mem_map |= sparse_encode_mem_map(mem_map, pnum); | ||
80 | |||
81 | return 1; | ||
82 | } | ||
83 | |||
84 | static struct page *sparse_early_mem_map_alloc(unsigned long pnum) | ||
85 | { | ||
86 | struct page *map; | ||
87 | int nid = early_pfn_to_nid(section_nr_to_pfn(pnum)); | ||
88 | |||
89 | map = alloc_remap(nid, sizeof(struct page) * PAGES_PER_SECTION); | ||
90 | if (map) | ||
91 | return map; | ||
92 | |||
93 | map = alloc_bootmem_node(NODE_DATA(nid), | ||
94 | sizeof(struct page) * PAGES_PER_SECTION); | ||
95 | if (map) | ||
96 | return map; | ||
97 | |||
98 | printk(KERN_WARNING "%s: allocation failed\n", __FUNCTION__); | ||
99 | mem_section[pnum].section_mem_map = 0; | ||
100 | return NULL; | ||
101 | } | ||
102 | |||
103 | /* | ||
54 | * Allocate the accumulated non-linear sections, allocate a mem_map | 104 | * Allocate the accumulated non-linear sections, allocate a mem_map |
55 | * for each and record the physical to section mapping. | 105 | * for each and record the physical to section mapping. |
56 | */ | 106 | */ |
@@ -58,28 +108,30 @@ void sparse_init(void) | |||
58 | { | 108 | { |
59 | unsigned long pnum; | 109 | unsigned long pnum; |
60 | struct page *map; | 110 | struct page *map; |
61 | int nid; | ||
62 | 111 | ||
63 | for (pnum = 0; pnum < NR_MEM_SECTIONS; pnum++) { | 112 | for (pnum = 0; pnum < NR_MEM_SECTIONS; pnum++) { |
64 | if (!mem_section[pnum].section_mem_map) | 113 | if (!valid_section_nr(pnum)) |
65 | continue; | 114 | continue; |
66 | 115 | ||
67 | nid = early_pfn_to_nid(section_nr_to_pfn(pnum)); | 116 | map = sparse_early_mem_map_alloc(pnum); |
68 | map = alloc_remap(nid, sizeof(struct page) * PAGES_PER_SECTION); | 117 | if (map) |
69 | if (!map) | 118 | sparse_init_one_section(&mem_section[pnum], pnum, map); |
70 | map = alloc_bootmem_node(NODE_DATA(nid), | ||
71 | sizeof(struct page) * PAGES_PER_SECTION); | ||
72 | if (!map) { | ||
73 | mem_section[pnum].section_mem_map = 0; | ||
74 | continue; | ||
75 | } | ||
76 | |||
77 | /* | ||
78 | * Subtle, we encode the real pfn into the mem_map such that | ||
79 | * the identity pfn - section_mem_map will return the actual | ||
80 | * physical page frame number. | ||
81 | */ | ||
82 | mem_section[pnum].section_mem_map = map - | ||
83 | section_nr_to_pfn(pnum); | ||
84 | } | 119 | } |
85 | } | 120 | } |
121 | |||
122 | /* | ||
123 | * returns the number of sections whose mem_maps were properly | ||
124 | * set. If this is <=0, then that means that the passed-in | ||
125 | * map was not consumed and must be freed. | ||
126 | */ | ||
127 | int sparse_add_one_section(unsigned long start_pfn, int nr_pages, struct page *map) | ||
128 | { | ||
129 | struct mem_section *ms = __pfn_to_section(start_pfn); | ||
130 | |||
131 | if (ms->section_mem_map & SECTION_MARKED_PRESENT) | ||
132 | return -EEXIST; | ||
133 | |||
134 | ms->section_mem_map |= SECTION_MARKED_PRESENT; | ||
135 | |||
136 | return sparse_init_one_section(ms, pfn_to_section_nr(start_pfn), map); | ||
137 | } | ||