diff options
author | Dave Hansen <haveblue@us.ibm.com> | 2005-10-29 21:16:52 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2005-10-30 00:40:44 -0400 |
commit | 208d54e5513c0c02d85af0990901354c74364d5c (patch) | |
tree | 83922f1d4a83f19bffcbff299044f421bd7e9c73 | |
parent | c6a57e19e464db118dc4ab9cfe9e9748c6d630a0 (diff) |
[PATCH] memory hotplug locking: node_size_lock
pgdat->node_size_lock is basically only neeeded in one place in the normal
code: show_mem(), which is the arch-specific sysrq-m printing function.
Strictly speaking, the architectures not doing memory hotplug do no need this
locking in show_mem(). However, they are all included for completeness. This
should also make any future consolidation of all of the implementations a
little more straightforward.
This lock is also held in the sparsemem code during a memory removal, as
sections are invalidated. This is the place there pfn_valid() is made false
for a memory area that's being removed. The lock is only required when doing
pfn_valid() operations on memory which the user does not already have a
reference on the page, such as in show_mem().
Signed-off-by: Dave Hansen <haveblue@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r-- | arch/alpha/mm/numa.c | 3 | ||||
-rw-r--r-- | arch/i386/mm/pgtable.c | 3 | ||||
-rw-r--r-- | arch/ia64/mm/discontig.c | 7 | ||||
-rw-r--r-- | arch/m32r/mm/init.c | 9 | ||||
-rw-r--r-- | arch/parisc/mm/init.c | 3 | ||||
-rw-r--r-- | arch/ppc64/mm/init.c | 6 | ||||
-rw-r--r-- | include/linux/memory_hotplug.h | 34 | ||||
-rw-r--r-- | include/linux/mmzone.h | 12 | ||||
-rw-r--r-- | mm/page_alloc.c | 1 |
9 files changed, 76 insertions, 2 deletions
diff --git a/arch/alpha/mm/numa.c b/arch/alpha/mm/numa.c index c7481d59b6df..6d5251254f68 100644 --- a/arch/alpha/mm/numa.c +++ b/arch/alpha/mm/numa.c | |||
@@ -371,6 +371,8 @@ show_mem(void) | |||
371 | show_free_areas(); | 371 | show_free_areas(); |
372 | printk("Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10)); | 372 | printk("Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10)); |
373 | for_each_online_node(nid) { | 373 | for_each_online_node(nid) { |
374 | unsigned long flags; | ||
375 | pgdat_resize_lock(NODE_DATA(nid), &flags); | ||
374 | i = node_spanned_pages(nid); | 376 | i = node_spanned_pages(nid); |
375 | while (i-- > 0) { | 377 | while (i-- > 0) { |
376 | struct page *page = nid_page_nr(nid, i); | 378 | struct page *page = nid_page_nr(nid, i); |
@@ -384,6 +386,7 @@ show_mem(void) | |||
384 | else | 386 | else |
385 | shared += page_count(page) - 1; | 387 | shared += page_count(page) - 1; |
386 | } | 388 | } |
389 | pgdat_resize_unlock(NODE_DATA(nid), &flags); | ||
387 | } | 390 | } |
388 | printk("%ld pages of RAM\n",total); | 391 | printk("%ld pages of RAM\n",total); |
389 | printk("%ld free pages\n",free); | 392 | printk("%ld free pages\n",free); |
diff --git a/arch/i386/mm/pgtable.c b/arch/i386/mm/pgtable.c index 39c099f15b5f..9db3242103be 100644 --- a/arch/i386/mm/pgtable.c +++ b/arch/i386/mm/pgtable.c | |||
@@ -31,11 +31,13 @@ void show_mem(void) | |||
31 | pg_data_t *pgdat; | 31 | pg_data_t *pgdat; |
32 | unsigned long i; | 32 | unsigned long i; |
33 | struct page_state ps; | 33 | struct page_state ps; |
34 | unsigned long flags; | ||
34 | 35 | ||
35 | printk(KERN_INFO "Mem-info:\n"); | 36 | printk(KERN_INFO "Mem-info:\n"); |
36 | show_free_areas(); | 37 | show_free_areas(); |
37 | printk(KERN_INFO "Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10)); | 38 | printk(KERN_INFO "Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10)); |
38 | for_each_pgdat(pgdat) { | 39 | for_each_pgdat(pgdat) { |
40 | pgdat_resize_lock(pgdat, &flags); | ||
39 | for (i = 0; i < pgdat->node_spanned_pages; ++i) { | 41 | for (i = 0; i < pgdat->node_spanned_pages; ++i) { |
40 | page = pgdat_page_nr(pgdat, i); | 42 | page = pgdat_page_nr(pgdat, i); |
41 | total++; | 43 | total++; |
@@ -48,6 +50,7 @@ void show_mem(void) | |||
48 | else if (page_count(page)) | 50 | else if (page_count(page)) |
49 | shared += page_count(page) - 1; | 51 | shared += page_count(page) - 1; |
50 | } | 52 | } |
53 | pgdat_resize_unlock(pgdat, &flags); | ||
51 | } | 54 | } |
52 | printk(KERN_INFO "%d pages of RAM\n", total); | 55 | printk(KERN_INFO "%d pages of RAM\n", total); |
53 | printk(KERN_INFO "%d pages of HIGHMEM\n", highmem); | 56 | printk(KERN_INFO "%d pages of HIGHMEM\n", highmem); |
diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c index a3788fb84809..a88cdb7232f8 100644 --- a/arch/ia64/mm/discontig.c +++ b/arch/ia64/mm/discontig.c | |||
@@ -555,9 +555,13 @@ void show_mem(void) | |||
555 | show_free_areas(); | 555 | show_free_areas(); |
556 | printk("Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10)); | 556 | printk("Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10)); |
557 | for_each_pgdat(pgdat) { | 557 | for_each_pgdat(pgdat) { |
558 | unsigned long present = pgdat->node_present_pages; | 558 | unsigned long present; |
559 | unsigned long flags; | ||
559 | int shared = 0, cached = 0, reserved = 0; | 560 | int shared = 0, cached = 0, reserved = 0; |
561 | |||
560 | printk("Node ID: %d\n", pgdat->node_id); | 562 | printk("Node ID: %d\n", pgdat->node_id); |
563 | pgdat_resize_lock(pgdat, &flags); | ||
564 | present = pgdat->node_present_pages; | ||
561 | for(i = 0; i < pgdat->node_spanned_pages; i++) { | 565 | for(i = 0; i < pgdat->node_spanned_pages; i++) { |
562 | struct page *page; | 566 | struct page *page; |
563 | if (pfn_valid(pgdat->node_start_pfn + i)) | 567 | if (pfn_valid(pgdat->node_start_pfn + i)) |
@@ -571,6 +575,7 @@ void show_mem(void) | |||
571 | else if (page_count(page)) | 575 | else if (page_count(page)) |
572 | shared += page_count(page)-1; | 576 | shared += page_count(page)-1; |
573 | } | 577 | } |
578 | pgdat_resize_unlock(pgdat, &flags); | ||
574 | total_present += present; | 579 | total_present += present; |
575 | total_reserved += reserved; | 580 | total_reserved += reserved; |
576 | total_cached += cached; | 581 | total_cached += cached; |
diff --git a/arch/m32r/mm/init.c b/arch/m32r/mm/init.c index d9a40b1fe8ba..6facf15b04f3 100644 --- a/arch/m32r/mm/init.c +++ b/arch/m32r/mm/init.c | |||
@@ -48,6 +48,8 @@ void show_mem(void) | |||
48 | show_free_areas(); | 48 | show_free_areas(); |
49 | printk("Free swap: %6ldkB\n",nr_swap_pages<<(PAGE_SHIFT-10)); | 49 | printk("Free swap: %6ldkB\n",nr_swap_pages<<(PAGE_SHIFT-10)); |
50 | for_each_pgdat(pgdat) { | 50 | for_each_pgdat(pgdat) { |
51 | unsigned long flags; | ||
52 | pgdat_resize_lock(pgdat, &flags); | ||
51 | for (i = 0; i < pgdat->node_spanned_pages; ++i) { | 53 | for (i = 0; i < pgdat->node_spanned_pages; ++i) { |
52 | page = pgdat_page_nr(pgdat, i); | 54 | page = pgdat_page_nr(pgdat, i); |
53 | total++; | 55 | total++; |
@@ -60,6 +62,7 @@ void show_mem(void) | |||
60 | else if (page_count(page)) | 62 | else if (page_count(page)) |
61 | shared += page_count(page) - 1; | 63 | shared += page_count(page) - 1; |
62 | } | 64 | } |
65 | pgdat_resize_unlock(pgdat, &flags); | ||
63 | } | 66 | } |
64 | printk("%d pages of RAM\n", total); | 67 | printk("%d pages of RAM\n", total); |
65 | printk("%d pages of HIGHMEM\n",highmem); | 68 | printk("%d pages of HIGHMEM\n",highmem); |
@@ -150,10 +153,14 @@ int __init reservedpages_count(void) | |||
150 | int reservedpages, nid, i; | 153 | int reservedpages, nid, i; |
151 | 154 | ||
152 | reservedpages = 0; | 155 | reservedpages = 0; |
153 | for_each_online_node(nid) | 156 | for_each_online_node(nid) { |
157 | unsigned long flags; | ||
158 | pgdat_resize_lock(NODE_DATA(nid), &flags); | ||
154 | for (i = 0 ; i < MAX_LOW_PFN(nid) - START_PFN(nid) ; i++) | 159 | for (i = 0 ; i < MAX_LOW_PFN(nid) - START_PFN(nid) ; i++) |
155 | if (PageReserved(nid_page_nr(nid, i))) | 160 | if (PageReserved(nid_page_nr(nid, i))) |
156 | reservedpages++; | 161 | reservedpages++; |
162 | pgdat_resize_unlock(NODE_DATA(nid), &flags); | ||
163 | } | ||
157 | 164 | ||
158 | return reservedpages; | 165 | return reservedpages; |
159 | } | 166 | } |
diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c index 2886ad70db48..29b998e430e6 100644 --- a/arch/parisc/mm/init.c +++ b/arch/parisc/mm/init.c | |||
@@ -505,7 +505,9 @@ void show_mem(void) | |||
505 | 505 | ||
506 | for (j = node_start_pfn(i); j < node_end_pfn(i); j++) { | 506 | for (j = node_start_pfn(i); j < node_end_pfn(i); j++) { |
507 | struct page *p; | 507 | struct page *p; |
508 | unsigned long flags; | ||
508 | 509 | ||
510 | pgdat_resize_lock(NODE_DATA(i), &flags); | ||
509 | p = nid_page_nr(i, j) - node_start_pfn(i); | 511 | p = nid_page_nr(i, j) - node_start_pfn(i); |
510 | 512 | ||
511 | total++; | 513 | total++; |
@@ -517,6 +519,7 @@ void show_mem(void) | |||
517 | free++; | 519 | free++; |
518 | else | 520 | else |
519 | shared += page_count(p) - 1; | 521 | shared += page_count(p) - 1; |
522 | pgdat_resize_unlock(NODE_DATA(i), &flags); | ||
520 | } | 523 | } |
521 | } | 524 | } |
522 | #endif | 525 | #endif |
diff --git a/arch/ppc64/mm/init.c b/arch/ppc64/mm/init.c index a45584b3440c..975b26de34d6 100644 --- a/arch/ppc64/mm/init.c +++ b/arch/ppc64/mm/init.c | |||
@@ -104,6 +104,8 @@ void show_mem(void) | |||
104 | show_free_areas(); | 104 | show_free_areas(); |
105 | printk("Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10)); | 105 | printk("Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10)); |
106 | for_each_pgdat(pgdat) { | 106 | for_each_pgdat(pgdat) { |
107 | unsigned long flags; | ||
108 | pgdat_resize_lock(pgdat, &flags); | ||
107 | for (i = 0; i < pgdat->node_spanned_pages; i++) { | 109 | for (i = 0; i < pgdat->node_spanned_pages; i++) { |
108 | page = pgdat_page_nr(pgdat, i); | 110 | page = pgdat_page_nr(pgdat, i); |
109 | total++; | 111 | total++; |
@@ -114,6 +116,7 @@ void show_mem(void) | |||
114 | else if (page_count(page)) | 116 | else if (page_count(page)) |
115 | shared += page_count(page) - 1; | 117 | shared += page_count(page) - 1; |
116 | } | 118 | } |
119 | pgdat_resize_unlock(pgdat, &flags); | ||
117 | } | 120 | } |
118 | printk("%ld pages of RAM\n", total); | 121 | printk("%ld pages of RAM\n", total); |
119 | printk("%ld reserved pages\n", reserved); | 122 | printk("%ld reserved pages\n", reserved); |
@@ -647,11 +650,14 @@ void __init mem_init(void) | |||
647 | #endif | 650 | #endif |
648 | 651 | ||
649 | for_each_pgdat(pgdat) { | 652 | for_each_pgdat(pgdat) { |
653 | unsigned long flags; | ||
654 | pgdat_resize_lock(pgdat, &flags); | ||
650 | for (i = 0; i < pgdat->node_spanned_pages; i++) { | 655 | for (i = 0; i < pgdat->node_spanned_pages; i++) { |
651 | page = pgdat_page_nr(pgdat, i); | 656 | page = pgdat_page_nr(pgdat, i); |
652 | if (PageReserved(page)) | 657 | if (PageReserved(page)) |
653 | reservedpages++; | 658 | reservedpages++; |
654 | } | 659 | } |
660 | pgdat_resize_unlock(pgdat, &flags); | ||
655 | } | 661 | } |
656 | 662 | ||
657 | codesize = (unsigned long)&_etext - (unsigned long)&_stext; | 663 | codesize = (unsigned long)&_etext - (unsigned long)&_stext; |
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h new file mode 100644 index 000000000000..e8103be9d528 --- /dev/null +++ b/include/linux/memory_hotplug.h | |||
@@ -0,0 +1,34 @@ | |||
1 | #ifndef __LINUX_MEMORY_HOTPLUG_H | ||
2 | #define __LINUX_MEMORY_HOTPLUG_H | ||
3 | |||
4 | #include <linux/mmzone.h> | ||
5 | #include <linux/spinlock.h> | ||
6 | |||
7 | #ifdef CONFIG_MEMORY_HOTPLUG | ||
8 | /* | ||
9 | * pgdat resizing functions | ||
10 | */ | ||
11 | static inline | ||
12 | void pgdat_resize_lock(struct pglist_data *pgdat, unsigned long *flags) | ||
13 | { | ||
14 | spin_lock_irqsave(&pgdat->node_size_lock, *flags); | ||
15 | } | ||
16 | static inline | ||
17 | void pgdat_resize_unlock(struct pglist_data *pgdat, unsigned long *flags) | ||
18 | { | ||
19 | spin_lock_irqrestore(&pgdat->node_size_lock, *flags); | ||
20 | } | ||
21 | static inline | ||
22 | void pgdat_resize_init(struct pglist_data *pgdat) | ||
23 | { | ||
24 | spin_lock_init(&pgdat->node_size_lock); | ||
25 | } | ||
26 | #else /* ! CONFIG_MEMORY_HOTPLUG */ | ||
27 | /* | ||
28 | * Stub functions for when hotplug is off | ||
29 | */ | ||
30 | static inline void pgdat_resize_lock(struct pglist_data *p, unsigned long *f) {} | ||
31 | static inline void pgdat_resize_unlock(struct pglist_data *p, unsigned long *f) {} | ||
32 | static inline void pgdat_resize_init(struct pglist_data *pgdat) {} | ||
33 | #endif | ||
34 | #endif /* __LINUX_MEMORY_HOTPLUG_H */ | ||
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 4674145bb63d..e050d68963a1 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h | |||
@@ -273,6 +273,16 @@ typedef struct pglist_data { | |||
273 | struct page *node_mem_map; | 273 | struct page *node_mem_map; |
274 | #endif | 274 | #endif |
275 | struct bootmem_data *bdata; | 275 | struct bootmem_data *bdata; |
276 | #ifdef CONFIG_MEMORY_HOTPLUG | ||
277 | /* | ||
278 | * Must be held any time you expect node_start_pfn, node_present_pages | ||
279 | * or node_spanned_pages stay constant. Holding this will also | ||
280 | * guarantee that any pfn_valid() stays that way. | ||
281 | * | ||
282 | * Nests above zone->lock and zone->size_seqlock. | ||
283 | */ | ||
284 | spinlock_t node_size_lock; | ||
285 | #endif | ||
276 | unsigned long node_start_pfn; | 286 | unsigned long node_start_pfn; |
277 | unsigned long node_present_pages; /* total number of physical pages */ | 287 | unsigned long node_present_pages; /* total number of physical pages */ |
278 | unsigned long node_spanned_pages; /* total size of physical page | 288 | unsigned long node_spanned_pages; /* total size of physical page |
@@ -293,6 +303,8 @@ typedef struct pglist_data { | |||
293 | #endif | 303 | #endif |
294 | #define nid_page_nr(nid, pagenr) pgdat_page_nr(NODE_DATA(nid),(pagenr)) | 304 | #define nid_page_nr(nid, pagenr) pgdat_page_nr(NODE_DATA(nid),(pagenr)) |
295 | 305 | ||
306 | #include <linux/memory_hotplug.h> | ||
307 | |||
296 | extern struct pglist_data *pgdat_list; | 308 | extern struct pglist_data *pgdat_list; |
297 | 309 | ||
298 | void __get_zone_counts(unsigned long *active, unsigned long *inactive, | 310 | void __get_zone_counts(unsigned long *active, unsigned long *inactive, |
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index a51ef94eec33..32fad6d23200 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -1958,6 +1958,7 @@ static void __init free_area_init_core(struct pglist_data *pgdat, | |||
1958 | int nid = pgdat->node_id; | 1958 | int nid = pgdat->node_id; |
1959 | unsigned long zone_start_pfn = pgdat->node_start_pfn; | 1959 | unsigned long zone_start_pfn = pgdat->node_start_pfn; |
1960 | 1960 | ||
1961 | pgdat_resize_init(pgdat); | ||
1961 | pgdat->nr_zones = 0; | 1962 | pgdat->nr_zones = 0; |
1962 | init_waitqueue_head(&pgdat->kswapd_wait); | 1963 | init_waitqueue_head(&pgdat->kswapd_wait); |
1963 | pgdat->kswapd_max_order = 0; | 1964 | pgdat->kswapd_max_order = 0; |