aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndrea Arcangeli <aarcange@redhat.com>2011-01-13 18:47:00 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2011-01-13 20:32:43 -0500
commit5f24ce5fd34c3ca1b3d10d30da754732da64d5c0 (patch)
treec82d27461f2adda210e77808b7dd04eaec017f2f
parent21ae5b01750f14140809508a478a4413792e0261 (diff)
thp: remove PG_buddy
PG_buddy can be converted to _mapcount == -2. So the PG_compound_lock can be added to page->flags without overflowing (because of the sparse section bits increasing) with CONFIG_X86_PAE=y and CONFIG_X86_PAT=y. This also has to move the memory hotplug code from _mapcount to lru.next to avoid any risk of clashes. We can't use lru.next for PG_buddy removal, but memory hotplug can use lru.next even more easily than the mapcount instead. Signed-off-by: Andrea Arcangeli <aarcange@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--fs/proc/page.c14
-rw-r--r--include/linux/memory_hotplug.h14
-rw-r--r--include/linux/mm.h21
-rw-r--r--include/linux/page-flags.h7
-rw-r--r--mm/memory_hotplug.c14
-rw-r--r--mm/page_alloc.c7
-rw-r--r--mm/sparse.c4
7 files changed, 52 insertions, 29 deletions
diff --git a/fs/proc/page.c b/fs/proc/page.c
index b06c674624e6..6d8e6a9e93ab 100644
--- a/fs/proc/page.c
+++ b/fs/proc/page.c
@@ -116,15 +116,17 @@ u64 stable_page_flags(struct page *page)
116 if (PageHuge(page)) 116 if (PageHuge(page))
117 u |= 1 << KPF_HUGE; 117 u |= 1 << KPF_HUGE;
118 118
119 u |= kpf_copy_bit(k, KPF_LOCKED, PG_locked);
120
121 /* 119 /*
122 * Caveats on high order pages: 120 * Caveats on high order pages: page->_count will only be set
123 * PG_buddy will only be set on the head page; SLUB/SLQB do the same 121 * -1 on the head page; SLUB/SLQB do the same for PG_slab;
124 * for PG_slab; SLOB won't set PG_slab at all on compound pages. 122 * SLOB won't set PG_slab at all on compound pages.
125 */ 123 */
124 if (PageBuddy(page))
125 u |= 1 << KPF_BUDDY;
126
127 u |= kpf_copy_bit(k, KPF_LOCKED, PG_locked);
128
126 u |= kpf_copy_bit(k, KPF_SLAB, PG_slab); 129 u |= kpf_copy_bit(k, KPF_SLAB, PG_slab);
127 u |= kpf_copy_bit(k, KPF_BUDDY, PG_buddy);
128 130
129 u |= kpf_copy_bit(k, KPF_ERROR, PG_error); 131 u |= kpf_copy_bit(k, KPF_ERROR, PG_error);
130 u |= kpf_copy_bit(k, KPF_DIRTY, PG_dirty); 132 u |= kpf_copy_bit(k, KPF_DIRTY, PG_dirty);
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index 31c237a00c48..24376fe7ee68 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -13,12 +13,16 @@ struct mem_section;
13#ifdef CONFIG_MEMORY_HOTPLUG 13#ifdef CONFIG_MEMORY_HOTPLUG
14 14
15/* 15/*
16 * Types for free bootmem. 16 * Types for free bootmem stored in page->lru.next. These have to be in
17 * The normal smallest mapcount is -1. Here is smaller value than it. 17 * some random range in unsigned long space for debugging purposes.
18 */ 18 */
19#define SECTION_INFO (-1 - 1) 19enum {
20#define MIX_SECTION_INFO (-1 - 2) 20 MEMORY_HOTPLUG_MIN_BOOTMEM_TYPE = 12,
21#define NODE_INFO (-1 - 3) 21 SECTION_INFO = MEMORY_HOTPLUG_MIN_BOOTMEM_TYPE,
22 MIX_SECTION_INFO,
23 NODE_INFO,
24 MEMORY_HOTPLUG_MAX_BOOTMEM_TYPE = NODE_INFO,
25};
22 26
23/* 27/*
24 * pgdat resizing functions 28 * pgdat resizing functions
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 2ec5138badab..7ab7d2b60041 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -397,6 +397,27 @@ static inline void init_page_count(struct page *page)
397 atomic_set(&page->_count, 1); 397 atomic_set(&page->_count, 1);
398} 398}
399 399
400/*
401 * PageBuddy() indicate that the page is free and in the buddy system
402 * (see mm/page_alloc.c).
403 */
404static inline int PageBuddy(struct page *page)
405{
406 return atomic_read(&page->_mapcount) == -2;
407}
408
409static inline void __SetPageBuddy(struct page *page)
410{
411 VM_BUG_ON(atomic_read(&page->_mapcount) != -1);
412 atomic_set(&page->_mapcount, -2);
413}
414
415static inline void __ClearPageBuddy(struct page *page)
416{
417 VM_BUG_ON(!PageBuddy(page));
418 atomic_set(&page->_mapcount, -1);
419}
420
400void put_page(struct page *page); 421void put_page(struct page *page);
401void put_pages_list(struct list_head *pages); 422void put_pages_list(struct list_head *pages);
402 423
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 4ca1241ef94e..0db8037e2725 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -48,9 +48,6 @@
48 * struct page (these bits with information) are always mapped into kernel 48 * struct page (these bits with information) are always mapped into kernel
49 * address space... 49 * address space...
50 * 50 *
51 * PG_buddy is set to indicate that the page is free and in the buddy system
52 * (see mm/page_alloc.c).
53 *
54 * PG_hwpoison indicates that a page got corrupted in hardware and contains 51 * PG_hwpoison indicates that a page got corrupted in hardware and contains
55 * data with incorrect ECC bits that triggered a machine check. Accessing is 52 * data with incorrect ECC bits that triggered a machine check. Accessing is
56 * not safe since it may cause another machine check. Don't touch! 53 * not safe since it may cause another machine check. Don't touch!
@@ -96,7 +93,6 @@ enum pageflags {
96 PG_swapcache, /* Swap page: swp_entry_t in private */ 93 PG_swapcache, /* Swap page: swp_entry_t in private */
97 PG_mappedtodisk, /* Has blocks allocated on-disk */ 94 PG_mappedtodisk, /* Has blocks allocated on-disk */
98 PG_reclaim, /* To be reclaimed asap */ 95 PG_reclaim, /* To be reclaimed asap */
99 PG_buddy, /* Page is free, on buddy lists */
100 PG_swapbacked, /* Page is backed by RAM/swap */ 96 PG_swapbacked, /* Page is backed by RAM/swap */
101 PG_unevictable, /* Page is "unevictable" */ 97 PG_unevictable, /* Page is "unevictable" */
102#ifdef CONFIG_MMU 98#ifdef CONFIG_MMU
@@ -233,7 +229,6 @@ PAGEFLAG(OwnerPriv1, owner_priv_1) TESTCLEARFLAG(OwnerPriv1, owner_priv_1)
233 * risky: they bypass page accounting. 229 * risky: they bypass page accounting.
234 */ 230 */
235TESTPAGEFLAG(Writeback, writeback) TESTSCFLAG(Writeback, writeback) 231TESTPAGEFLAG(Writeback, writeback) TESTSCFLAG(Writeback, writeback)
236__PAGEFLAG(Buddy, buddy)
237PAGEFLAG(MappedToDisk, mappedtodisk) 232PAGEFLAG(MappedToDisk, mappedtodisk)
238 233
239/* PG_readahead is only used for file reads; PG_reclaim is only for writes */ 234/* PG_readahead is only used for file reads; PG_reclaim is only for writes */
@@ -461,7 +456,7 @@ static inline int PageTransCompound(struct page *page)
461#define PAGE_FLAGS_CHECK_AT_FREE \ 456#define PAGE_FLAGS_CHECK_AT_FREE \
462 (1 << PG_lru | 1 << PG_locked | \ 457 (1 << PG_lru | 1 << PG_locked | \
463 1 << PG_private | 1 << PG_private_2 | \ 458 1 << PG_private | 1 << PG_private_2 | \
464 1 << PG_buddy | 1 << PG_writeback | 1 << PG_reserved | \ 459 1 << PG_writeback | 1 << PG_reserved | \
465 1 << PG_slab | 1 << PG_swapcache | 1 << PG_active | \ 460 1 << PG_slab | 1 << PG_swapcache | 1 << PG_active | \
466 1 << PG_unevictable | __PG_MLOCKED | __PG_HWPOISON | \ 461 1 << PG_unevictable | __PG_MLOCKED | __PG_HWPOISON | \
467 __PG_COMPOUND_LOCK) 462 __PG_COMPOUND_LOCK)
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index a2832c092509..e92f04749fcb 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -82,9 +82,10 @@ static void release_memory_resource(struct resource *res)
82 82
83#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE 83#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
84#ifndef CONFIG_SPARSEMEM_VMEMMAP 84#ifndef CONFIG_SPARSEMEM_VMEMMAP
85static void get_page_bootmem(unsigned long info, struct page *page, int type) 85static void get_page_bootmem(unsigned long info, struct page *page,
86 unsigned long type)
86{ 87{
87 atomic_set(&page->_mapcount, type); 88 page->lru.next = (struct list_head *) type;
88 SetPagePrivate(page); 89 SetPagePrivate(page);
89 set_page_private(page, info); 90 set_page_private(page, info);
90 atomic_inc(&page->_count); 91 atomic_inc(&page->_count);
@@ -94,15 +95,16 @@ static void get_page_bootmem(unsigned long info, struct page *page, int type)
94 * so use __ref to tell modpost not to generate a warning */ 95 * so use __ref to tell modpost not to generate a warning */
95void __ref put_page_bootmem(struct page *page) 96void __ref put_page_bootmem(struct page *page)
96{ 97{
97 int type; 98 unsigned long type;
98 99
99 type = atomic_read(&page->_mapcount); 100 type = (unsigned long) page->lru.next;
100 BUG_ON(type >= -1); 101 BUG_ON(type < MEMORY_HOTPLUG_MIN_BOOTMEM_TYPE ||
102 type > MEMORY_HOTPLUG_MAX_BOOTMEM_TYPE);
101 103
102 if (atomic_dec_return(&page->_count) == 1) { 104 if (atomic_dec_return(&page->_count) == 1) {
103 ClearPagePrivate(page); 105 ClearPagePrivate(page);
104 set_page_private(page, 0); 106 set_page_private(page, 0);
105 reset_page_mapcount(page); 107 INIT_LIST_HEAD(&page->lru);
106 __free_pages_bootmem(page, 0); 108 __free_pages_bootmem(page, 0);
107 } 109 }
108 110
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index e7664b9f706c..9dfe49bceff4 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -449,8 +449,8 @@ __find_combined_index(unsigned long page_idx, unsigned int order)
449 * (c) a page and its buddy have the same order && 449 * (c) a page and its buddy have the same order &&
450 * (d) a page and its buddy are in the same zone. 450 * (d) a page and its buddy are in the same zone.
451 * 451 *
452 * For recording whether a page is in the buddy system, we use PG_buddy. 452 * For recording whether a page is in the buddy system, we set ->_mapcount -2.
453 * Setting, clearing, and testing PG_buddy is serialized by zone->lock. 453 * Setting, clearing, and testing _mapcount -2 is serialized by zone->lock.
454 * 454 *
455 * For recording page's order, we use page_private(page). 455 * For recording page's order, we use page_private(page).
456 */ 456 */
@@ -483,7 +483,7 @@ static inline int page_is_buddy(struct page *page, struct page *buddy,
483 * as necessary, plus some accounting needed to play nicely with other 483 * as necessary, plus some accounting needed to play nicely with other
484 * parts of the VM system. 484 * parts of the VM system.
485 * At each level, we keep a list of pages, which are heads of continuous 485 * At each level, we keep a list of pages, which are heads of continuous
486 * free pages of length of (1 << order) and marked with PG_buddy. Page's 486 * free pages of length of (1 << order) and marked with _mapcount -2. Page's
487 * order is recorded in page_private(page) field. 487 * order is recorded in page_private(page) field.
488 * So when we are allocating or freeing one, we can derive the state of the 488 * So when we are allocating or freeing one, we can derive the state of the
489 * other. That is, if we allocate a small block, and both were 489 * other. That is, if we allocate a small block, and both were
@@ -5574,7 +5574,6 @@ static struct trace_print_flags pageflag_names[] = {
5574 {1UL << PG_swapcache, "swapcache" }, 5574 {1UL << PG_swapcache, "swapcache" },
5575 {1UL << PG_mappedtodisk, "mappedtodisk" }, 5575 {1UL << PG_mappedtodisk, "mappedtodisk" },
5576 {1UL << PG_reclaim, "reclaim" }, 5576 {1UL << PG_reclaim, "reclaim" },
5577 {1UL << PG_buddy, "buddy" },
5578 {1UL << PG_swapbacked, "swapbacked" }, 5577 {1UL << PG_swapbacked, "swapbacked" },
5579 {1UL << PG_unevictable, "unevictable" }, 5578 {1UL << PG_unevictable, "unevictable" },
5580#ifdef CONFIG_MMU 5579#ifdef CONFIG_MMU
diff --git a/mm/sparse.c b/mm/sparse.c
index 95ac219af379..93250207c5cf 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -671,10 +671,10 @@ static void __kfree_section_memmap(struct page *memmap, unsigned long nr_pages)
671static void free_map_bootmem(struct page *page, unsigned long nr_pages) 671static void free_map_bootmem(struct page *page, unsigned long nr_pages)
672{ 672{
673 unsigned long maps_section_nr, removing_section_nr, i; 673 unsigned long maps_section_nr, removing_section_nr, i;
674 int magic; 674 unsigned long magic;
675 675
676 for (i = 0; i < nr_pages; i++, page++) { 676 for (i = 0; i < nr_pages; i++, page++) {
677 magic = atomic_read(&page->_mapcount); 677 magic = (unsigned long) page->lru.next;
678 678
679 BUG_ON(magic == NODE_INFO); 679 BUG_ON(magic == NODE_INFO);
680 680