diff options
author | Andrea Arcangeli <aarcange@redhat.com> | 2011-01-13 18:47:00 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2011-01-13 20:32:43 -0500 |
commit | 5f24ce5fd34c3ca1b3d10d30da754732da64d5c0 (patch) | |
tree | c82d27461f2adda210e77808b7dd04eaec017f2f | |
parent | 21ae5b01750f14140809508a478a4413792e0261 (diff) |
thp: remove PG_buddy
PG_buddy can be converted to _mapcount == -2. So the PG_compound_lock can
be added to page->flags without overflowing (because of the sparse section
bits increasing) with CONFIG_X86_PAE=y and CONFIG_X86_PAT=y. This also
has to move the memory hotplug code from _mapcount to lru.next to avoid
any risk of clashes. We can't use lru.next for PG_buddy removal, but
memory hotplug can use lru.next even more easily than the mapcount
instead.
Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | fs/proc/page.c | 14 | ||||
-rw-r--r-- | include/linux/memory_hotplug.h | 14 | ||||
-rw-r--r-- | include/linux/mm.h | 21 | ||||
-rw-r--r-- | include/linux/page-flags.h | 7 | ||||
-rw-r--r-- | mm/memory_hotplug.c | 14 | ||||
-rw-r--r-- | mm/page_alloc.c | 7 | ||||
-rw-r--r-- | mm/sparse.c | 4 |
7 files changed, 52 insertions, 29 deletions
diff --git a/fs/proc/page.c b/fs/proc/page.c index b06c674624e6..6d8e6a9e93ab 100644 --- a/fs/proc/page.c +++ b/fs/proc/page.c | |||
@@ -116,15 +116,17 @@ u64 stable_page_flags(struct page *page) | |||
116 | if (PageHuge(page)) | 116 | if (PageHuge(page)) |
117 | u |= 1 << KPF_HUGE; | 117 | u |= 1 << KPF_HUGE; |
118 | 118 | ||
119 | u |= kpf_copy_bit(k, KPF_LOCKED, PG_locked); | ||
120 | |||
121 | /* | 119 | /* |
122 | * Caveats on high order pages: | 120 | * Caveats on high order pages: page->_count will only be set |
123 | * PG_buddy will only be set on the head page; SLUB/SLQB do the same | 121 | * -1 on the head page; SLUB/SLQB do the same for PG_slab; |
124 | * for PG_slab; SLOB won't set PG_slab at all on compound pages. | 122 | * SLOB won't set PG_slab at all on compound pages. |
125 | */ | 123 | */ |
124 | if (PageBuddy(page)) | ||
125 | u |= 1 << KPF_BUDDY; | ||
126 | |||
127 | u |= kpf_copy_bit(k, KPF_LOCKED, PG_locked); | ||
128 | |||
126 | u |= kpf_copy_bit(k, KPF_SLAB, PG_slab); | 129 | u |= kpf_copy_bit(k, KPF_SLAB, PG_slab); |
127 | u |= kpf_copy_bit(k, KPF_BUDDY, PG_buddy); | ||
128 | 130 | ||
129 | u |= kpf_copy_bit(k, KPF_ERROR, PG_error); | 131 | u |= kpf_copy_bit(k, KPF_ERROR, PG_error); |
130 | u |= kpf_copy_bit(k, KPF_DIRTY, PG_dirty); | 132 | u |= kpf_copy_bit(k, KPF_DIRTY, PG_dirty); |
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 31c237a00c48..24376fe7ee68 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h | |||
@@ -13,12 +13,16 @@ struct mem_section; | |||
13 | #ifdef CONFIG_MEMORY_HOTPLUG | 13 | #ifdef CONFIG_MEMORY_HOTPLUG |
14 | 14 | ||
15 | /* | 15 | /* |
16 | * Types for free bootmem. | 16 | * Types for free bootmem stored in page->lru.next. These have to be in |
17 | * The normal smallest mapcount is -1. Here is smaller value than it. | 17 | * some random range in unsigned long space for debugging purposes. |
18 | */ | 18 | */ |
19 | #define SECTION_INFO (-1 - 1) | 19 | enum { |
20 | #define MIX_SECTION_INFO (-1 - 2) | 20 | MEMORY_HOTPLUG_MIN_BOOTMEM_TYPE = 12, |
21 | #define NODE_INFO (-1 - 3) | 21 | SECTION_INFO = MEMORY_HOTPLUG_MIN_BOOTMEM_TYPE, |
22 | MIX_SECTION_INFO, | ||
23 | NODE_INFO, | ||
24 | MEMORY_HOTPLUG_MAX_BOOTMEM_TYPE = NODE_INFO, | ||
25 | }; | ||
22 | 26 | ||
23 | /* | 27 | /* |
24 | * pgdat resizing functions | 28 | * pgdat resizing functions |
diff --git a/include/linux/mm.h b/include/linux/mm.h index 2ec5138badab..7ab7d2b60041 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h | |||
@@ -397,6 +397,27 @@ static inline void init_page_count(struct page *page) | |||
397 | atomic_set(&page->_count, 1); | 397 | atomic_set(&page->_count, 1); |
398 | } | 398 | } |
399 | 399 | ||
400 | /* | ||
401 | * PageBuddy() indicate that the page is free and in the buddy system | ||
402 | * (see mm/page_alloc.c). | ||
403 | */ | ||
404 | static inline int PageBuddy(struct page *page) | ||
405 | { | ||
406 | return atomic_read(&page->_mapcount) == -2; | ||
407 | } | ||
408 | |||
409 | static inline void __SetPageBuddy(struct page *page) | ||
410 | { | ||
411 | VM_BUG_ON(atomic_read(&page->_mapcount) != -1); | ||
412 | atomic_set(&page->_mapcount, -2); | ||
413 | } | ||
414 | |||
415 | static inline void __ClearPageBuddy(struct page *page) | ||
416 | { | ||
417 | VM_BUG_ON(!PageBuddy(page)); | ||
418 | atomic_set(&page->_mapcount, -1); | ||
419 | } | ||
420 | |||
400 | void put_page(struct page *page); | 421 | void put_page(struct page *page); |
401 | void put_pages_list(struct list_head *pages); | 422 | void put_pages_list(struct list_head *pages); |
402 | 423 | ||
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 4ca1241ef94e..0db8037e2725 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h | |||
@@ -48,9 +48,6 @@ | |||
48 | * struct page (these bits with information) are always mapped into kernel | 48 | * struct page (these bits with information) are always mapped into kernel |
49 | * address space... | 49 | * address space... |
50 | * | 50 | * |
51 | * PG_buddy is set to indicate that the page is free and in the buddy system | ||
52 | * (see mm/page_alloc.c). | ||
53 | * | ||
54 | * PG_hwpoison indicates that a page got corrupted in hardware and contains | 51 | * PG_hwpoison indicates that a page got corrupted in hardware and contains |
55 | * data with incorrect ECC bits that triggered a machine check. Accessing is | 52 | * data with incorrect ECC bits that triggered a machine check. Accessing is |
56 | * not safe since it may cause another machine check. Don't touch! | 53 | * not safe since it may cause another machine check. Don't touch! |
@@ -96,7 +93,6 @@ enum pageflags { | |||
96 | PG_swapcache, /* Swap page: swp_entry_t in private */ | 93 | PG_swapcache, /* Swap page: swp_entry_t in private */ |
97 | PG_mappedtodisk, /* Has blocks allocated on-disk */ | 94 | PG_mappedtodisk, /* Has blocks allocated on-disk */ |
98 | PG_reclaim, /* To be reclaimed asap */ | 95 | PG_reclaim, /* To be reclaimed asap */ |
99 | PG_buddy, /* Page is free, on buddy lists */ | ||
100 | PG_swapbacked, /* Page is backed by RAM/swap */ | 96 | PG_swapbacked, /* Page is backed by RAM/swap */ |
101 | PG_unevictable, /* Page is "unevictable" */ | 97 | PG_unevictable, /* Page is "unevictable" */ |
102 | #ifdef CONFIG_MMU | 98 | #ifdef CONFIG_MMU |
@@ -233,7 +229,6 @@ PAGEFLAG(OwnerPriv1, owner_priv_1) TESTCLEARFLAG(OwnerPriv1, owner_priv_1) | |||
233 | * risky: they bypass page accounting. | 229 | * risky: they bypass page accounting. |
234 | */ | 230 | */ |
235 | TESTPAGEFLAG(Writeback, writeback) TESTSCFLAG(Writeback, writeback) | 231 | TESTPAGEFLAG(Writeback, writeback) TESTSCFLAG(Writeback, writeback) |
236 | __PAGEFLAG(Buddy, buddy) | ||
237 | PAGEFLAG(MappedToDisk, mappedtodisk) | 232 | PAGEFLAG(MappedToDisk, mappedtodisk) |
238 | 233 | ||
239 | /* PG_readahead is only used for file reads; PG_reclaim is only for writes */ | 234 | /* PG_readahead is only used for file reads; PG_reclaim is only for writes */ |
@@ -461,7 +456,7 @@ static inline int PageTransCompound(struct page *page) | |||
461 | #define PAGE_FLAGS_CHECK_AT_FREE \ | 456 | #define PAGE_FLAGS_CHECK_AT_FREE \ |
462 | (1 << PG_lru | 1 << PG_locked | \ | 457 | (1 << PG_lru | 1 << PG_locked | \ |
463 | 1 << PG_private | 1 << PG_private_2 | \ | 458 | 1 << PG_private | 1 << PG_private_2 | \ |
464 | 1 << PG_buddy | 1 << PG_writeback | 1 << PG_reserved | \ | 459 | 1 << PG_writeback | 1 << PG_reserved | \ |
465 | 1 << PG_slab | 1 << PG_swapcache | 1 << PG_active | \ | 460 | 1 << PG_slab | 1 << PG_swapcache | 1 << PG_active | \ |
466 | 1 << PG_unevictable | __PG_MLOCKED | __PG_HWPOISON | \ | 461 | 1 << PG_unevictable | __PG_MLOCKED | __PG_HWPOISON | \ |
467 | __PG_COMPOUND_LOCK) | 462 | __PG_COMPOUND_LOCK) |
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index a2832c092509..e92f04749fcb 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c | |||
@@ -82,9 +82,10 @@ static void release_memory_resource(struct resource *res) | |||
82 | 82 | ||
83 | #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE | 83 | #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE |
84 | #ifndef CONFIG_SPARSEMEM_VMEMMAP | 84 | #ifndef CONFIG_SPARSEMEM_VMEMMAP |
85 | static void get_page_bootmem(unsigned long info, struct page *page, int type) | 85 | static void get_page_bootmem(unsigned long info, struct page *page, |
86 | unsigned long type) | ||
86 | { | 87 | { |
87 | atomic_set(&page->_mapcount, type); | 88 | page->lru.next = (struct list_head *) type; |
88 | SetPagePrivate(page); | 89 | SetPagePrivate(page); |
89 | set_page_private(page, info); | 90 | set_page_private(page, info); |
90 | atomic_inc(&page->_count); | 91 | atomic_inc(&page->_count); |
@@ -94,15 +95,16 @@ static void get_page_bootmem(unsigned long info, struct page *page, int type) | |||
94 | * so use __ref to tell modpost not to generate a warning */ | 95 | * so use __ref to tell modpost not to generate a warning */ |
95 | void __ref put_page_bootmem(struct page *page) | 96 | void __ref put_page_bootmem(struct page *page) |
96 | { | 97 | { |
97 | int type; | 98 | unsigned long type; |
98 | 99 | ||
99 | type = atomic_read(&page->_mapcount); | 100 | type = (unsigned long) page->lru.next; |
100 | BUG_ON(type >= -1); | 101 | BUG_ON(type < MEMORY_HOTPLUG_MIN_BOOTMEM_TYPE || |
102 | type > MEMORY_HOTPLUG_MAX_BOOTMEM_TYPE); | ||
101 | 103 | ||
102 | if (atomic_dec_return(&page->_count) == 1) { | 104 | if (atomic_dec_return(&page->_count) == 1) { |
103 | ClearPagePrivate(page); | 105 | ClearPagePrivate(page); |
104 | set_page_private(page, 0); | 106 | set_page_private(page, 0); |
105 | reset_page_mapcount(page); | 107 | INIT_LIST_HEAD(&page->lru); |
106 | __free_pages_bootmem(page, 0); | 108 | __free_pages_bootmem(page, 0); |
107 | } | 109 | } |
108 | 110 | ||
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index e7664b9f706c..9dfe49bceff4 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -449,8 +449,8 @@ __find_combined_index(unsigned long page_idx, unsigned int order) | |||
449 | * (c) a page and its buddy have the same order && | 449 | * (c) a page and its buddy have the same order && |
450 | * (d) a page and its buddy are in the same zone. | 450 | * (d) a page and its buddy are in the same zone. |
451 | * | 451 | * |
452 | * For recording whether a page is in the buddy system, we use PG_buddy. | 452 | * For recording whether a page is in the buddy system, we set ->_mapcount -2. |
453 | * Setting, clearing, and testing PG_buddy is serialized by zone->lock. | 453 | * Setting, clearing, and testing _mapcount -2 is serialized by zone->lock. |
454 | * | 454 | * |
455 | * For recording page's order, we use page_private(page). | 455 | * For recording page's order, we use page_private(page). |
456 | */ | 456 | */ |
@@ -483,7 +483,7 @@ static inline int page_is_buddy(struct page *page, struct page *buddy, | |||
483 | * as necessary, plus some accounting needed to play nicely with other | 483 | * as necessary, plus some accounting needed to play nicely with other |
484 | * parts of the VM system. | 484 | * parts of the VM system. |
485 | * At each level, we keep a list of pages, which are heads of continuous | 485 | * At each level, we keep a list of pages, which are heads of continuous |
486 | * free pages of length of (1 << order) and marked with PG_buddy. Page's | 486 | * free pages of length of (1 << order) and marked with _mapcount -2. Page's |
487 | * order is recorded in page_private(page) field. | 487 | * order is recorded in page_private(page) field. |
488 | * So when we are allocating or freeing one, we can derive the state of the | 488 | * So when we are allocating or freeing one, we can derive the state of the |
489 | * other. That is, if we allocate a small block, and both were | 489 | * other. That is, if we allocate a small block, and both were |
@@ -5574,7 +5574,6 @@ static struct trace_print_flags pageflag_names[] = { | |||
5574 | {1UL << PG_swapcache, "swapcache" }, | 5574 | {1UL << PG_swapcache, "swapcache" }, |
5575 | {1UL << PG_mappedtodisk, "mappedtodisk" }, | 5575 | {1UL << PG_mappedtodisk, "mappedtodisk" }, |
5576 | {1UL << PG_reclaim, "reclaim" }, | 5576 | {1UL << PG_reclaim, "reclaim" }, |
5577 | {1UL << PG_buddy, "buddy" }, | ||
5578 | {1UL << PG_swapbacked, "swapbacked" }, | 5577 | {1UL << PG_swapbacked, "swapbacked" }, |
5579 | {1UL << PG_unevictable, "unevictable" }, | 5578 | {1UL << PG_unevictable, "unevictable" }, |
5580 | #ifdef CONFIG_MMU | 5579 | #ifdef CONFIG_MMU |
diff --git a/mm/sparse.c b/mm/sparse.c index 95ac219af379..93250207c5cf 100644 --- a/mm/sparse.c +++ b/mm/sparse.c | |||
@@ -671,10 +671,10 @@ static void __kfree_section_memmap(struct page *memmap, unsigned long nr_pages) | |||
671 | static void free_map_bootmem(struct page *page, unsigned long nr_pages) | 671 | static void free_map_bootmem(struct page *page, unsigned long nr_pages) |
672 | { | 672 | { |
673 | unsigned long maps_section_nr, removing_section_nr, i; | 673 | unsigned long maps_section_nr, removing_section_nr, i; |
674 | int magic; | 674 | unsigned long magic; |
675 | 675 | ||
676 | for (i = 0; i < nr_pages; i++, page++) { | 676 | for (i = 0; i < nr_pages; i++, page++) { |
677 | magic = atomic_read(&page->_mapcount); | 677 | magic = (unsigned long) page->lru.next; |
678 | 678 | ||
679 | BUG_ON(magic == NODE_INFO); | 679 | BUG_ON(magic == NODE_INFO); |
680 | 680 | ||