diff options
author | Benjamin Herrenschmidt <benh@kernel.crashing.org> | 2008-04-30 01:41:48 -0400 |
---|---|---|
committer | Paul Mackerras <paulus@samba.org> | 2008-05-15 06:49:25 -0400 |
commit | cec08e7a948326b01555be6311480aa08e637de2 (patch) | |
tree | a08e1d91c9a0c5eaa4e81036a3d7f992b2de1745 | |
parent | 08fcf1d61193d7b7779aa6d7388535e26e064a0b (diff) |
[POWERPC] vmemmap fixes to use smaller pages
This changes vmemmap to use a different region (region 0xf) of the
address space, and to configure the page size of that region
dynamically at boot.
The problem with the current approach of always using 16M pages is that
it's not well suited to machines that have small amounts of memory such
as small partitions on pseries, or PS3's.
In fact, on the PS3, failure to allocate the 16M page backing vmmemmap
tends to prevent hotplugging the HV's "additional" memory, thus limiting
the available memory even more, from my experience down to something
like 80M total, which makes it really not very useable.
The logic used by my match to choose the vmemmap page size is:
- If 16M pages are available and there's 1G or more RAM at boot,
use that size.
- Else if 64K pages are available, use that
- Else use 4K pages
I've tested on a POWER6 (16M pages) and on an iSeries POWER3 (4K pages)
and it seems to work fine.
Note that I intend to change the way we organize the kernel regions &
SLBs so the actual region will change from 0xf back to something else at
one point, as I simplify the SLB miss handler, but that will be for a
later patch.
Signed-off-by: Paul Mackerras <paulus@samba.org>
-rw-r--r-- | arch/powerpc/mm/hash_utils_64.c | 28 | ||||
-rw-r--r-- | arch/powerpc/mm/init_64.c | 10 | ||||
-rw-r--r-- | arch/powerpc/mm/slb.c | 16 | ||||
-rw-r--r-- | arch/powerpc/mm/slb_low.S | 16 | ||||
-rw-r--r-- | include/asm-powerpc/mmu-hash64.h | 1 | ||||
-rw-r--r-- | include/asm-powerpc/pgtable-ppc64.h | 10 |
6 files changed, 65 insertions, 16 deletions
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 2b5a399f6fa6..0f2d239d94c4 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c | |||
@@ -94,6 +94,9 @@ unsigned long htab_hash_mask; | |||
94 | int mmu_linear_psize = MMU_PAGE_4K; | 94 | int mmu_linear_psize = MMU_PAGE_4K; |
95 | int mmu_virtual_psize = MMU_PAGE_4K; | 95 | int mmu_virtual_psize = MMU_PAGE_4K; |
96 | int mmu_vmalloc_psize = MMU_PAGE_4K; | 96 | int mmu_vmalloc_psize = MMU_PAGE_4K; |
97 | #ifdef CONFIG_SPARSEMEM_VMEMMAP | ||
98 | int mmu_vmemmap_psize = MMU_PAGE_4K; | ||
99 | #endif | ||
97 | int mmu_io_psize = MMU_PAGE_4K; | 100 | int mmu_io_psize = MMU_PAGE_4K; |
98 | int mmu_kernel_ssize = MMU_SEGSIZE_256M; | 101 | int mmu_kernel_ssize = MMU_SEGSIZE_256M; |
99 | int mmu_highuser_ssize = MMU_SEGSIZE_256M; | 102 | int mmu_highuser_ssize = MMU_SEGSIZE_256M; |
@@ -387,11 +390,32 @@ static void __init htab_init_page_sizes(void) | |||
387 | } | 390 | } |
388 | #endif /* CONFIG_PPC_64K_PAGES */ | 391 | #endif /* CONFIG_PPC_64K_PAGES */ |
389 | 392 | ||
393 | #ifdef CONFIG_SPARSEMEM_VMEMMAP | ||
394 | /* We try to use 16M pages for vmemmap if that is supported | ||
395 | * and we have at least 1G of RAM at boot | ||
396 | */ | ||
397 | if (mmu_psize_defs[MMU_PAGE_16M].shift && | ||
398 | lmb_phys_mem_size() >= 0x40000000) | ||
399 | mmu_vmemmap_psize = MMU_PAGE_16M; | ||
400 | else if (mmu_psize_defs[MMU_PAGE_64K].shift) | ||
401 | mmu_vmemmap_psize = MMU_PAGE_64K; | ||
402 | else | ||
403 | mmu_vmemmap_psize = MMU_PAGE_4K; | ||
404 | #endif /* CONFIG_SPARSEMEM_VMEMMAP */ | ||
405 | |||
390 | printk(KERN_DEBUG "Page orders: linear mapping = %d, " | 406 | printk(KERN_DEBUG "Page orders: linear mapping = %d, " |
391 | "virtual = %d, io = %d\n", | 407 | "virtual = %d, io = %d" |
408 | #ifdef CONFIG_SPARSEMEM_VMEMMAP | ||
409 | ", vmemmap = %d" | ||
410 | #endif | ||
411 | "\n", | ||
392 | mmu_psize_defs[mmu_linear_psize].shift, | 412 | mmu_psize_defs[mmu_linear_psize].shift, |
393 | mmu_psize_defs[mmu_virtual_psize].shift, | 413 | mmu_psize_defs[mmu_virtual_psize].shift, |
394 | mmu_psize_defs[mmu_io_psize].shift); | 414 | mmu_psize_defs[mmu_io_psize].shift |
415 | #ifdef CONFIG_SPARSEMEM_VMEMMAP | ||
416 | ,mmu_psize_defs[mmu_vmemmap_psize].shift | ||
417 | #endif | ||
418 | ); | ||
395 | 419 | ||
396 | #ifdef CONFIG_HUGETLB_PAGE | 420 | #ifdef CONFIG_HUGETLB_PAGE |
397 | /* Init large page size. Currently, we pick 16M or 1M depending | 421 | /* Init large page size. Currently, we pick 16M or 1M depending |
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index c5ac532a0161..6aa65375abf5 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c | |||
@@ -19,6 +19,8 @@ | |||
19 | * | 19 | * |
20 | */ | 20 | */ |
21 | 21 | ||
22 | #undef DEBUG | ||
23 | |||
22 | #include <linux/signal.h> | 24 | #include <linux/signal.h> |
23 | #include <linux/sched.h> | 25 | #include <linux/sched.h> |
24 | #include <linux/kernel.h> | 26 | #include <linux/kernel.h> |
@@ -208,12 +210,12 @@ int __meminit vmemmap_populated(unsigned long start, int page_size) | |||
208 | } | 210 | } |
209 | 211 | ||
210 | int __meminit vmemmap_populate(struct page *start_page, | 212 | int __meminit vmemmap_populate(struct page *start_page, |
211 | unsigned long nr_pages, int node) | 213 | unsigned long nr_pages, int node) |
212 | { | 214 | { |
213 | unsigned long mode_rw; | 215 | unsigned long mode_rw; |
214 | unsigned long start = (unsigned long)start_page; | 216 | unsigned long start = (unsigned long)start_page; |
215 | unsigned long end = (unsigned long)(start_page + nr_pages); | 217 | unsigned long end = (unsigned long)(start_page + nr_pages); |
216 | unsigned long page_size = 1 << mmu_psize_defs[mmu_linear_psize].shift; | 218 | unsigned long page_size = 1 << mmu_psize_defs[mmu_vmemmap_psize].shift; |
217 | 219 | ||
218 | mode_rw = _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_COHERENT | PP_RWXX; | 220 | mode_rw = _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_COHERENT | PP_RWXX; |
219 | 221 | ||
@@ -235,11 +237,11 @@ int __meminit vmemmap_populate(struct page *start_page, | |||
235 | start, p, __pa(p)); | 237 | start, p, __pa(p)); |
236 | 238 | ||
237 | mapped = htab_bolt_mapping(start, start + page_size, | 239 | mapped = htab_bolt_mapping(start, start + page_size, |
238 | __pa(p), mode_rw, mmu_linear_psize, | 240 | __pa(p), mode_rw, mmu_vmemmap_psize, |
239 | mmu_kernel_ssize); | 241 | mmu_kernel_ssize); |
240 | BUG_ON(mapped < 0); | 242 | BUG_ON(mapped < 0); |
241 | } | 243 | } |
242 | 244 | ||
243 | return 0; | 245 | return 0; |
244 | } | 246 | } |
245 | #endif | 247 | #endif /* CONFIG_SPARSEMEM_VMEMMAP */ |
diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c index cf8705e32d60..89497fb04280 100644 --- a/arch/powerpc/mm/slb.c +++ b/arch/powerpc/mm/slb.c | |||
@@ -28,7 +28,7 @@ | |||
28 | #include <asm/udbg.h> | 28 | #include <asm/udbg.h> |
29 | 29 | ||
30 | #ifdef DEBUG | 30 | #ifdef DEBUG |
31 | #define DBG(fmt...) udbg_printf(fmt) | 31 | #define DBG(fmt...) printk(fmt) |
32 | #else | 32 | #else |
33 | #define DBG pr_debug | 33 | #define DBG pr_debug |
34 | #endif | 34 | #endif |
@@ -263,13 +263,19 @@ void slb_initialize(void) | |||
263 | extern unsigned int *slb_miss_kernel_load_linear; | 263 | extern unsigned int *slb_miss_kernel_load_linear; |
264 | extern unsigned int *slb_miss_kernel_load_io; | 264 | extern unsigned int *slb_miss_kernel_load_io; |
265 | extern unsigned int *slb_compare_rr_to_size; | 265 | extern unsigned int *slb_compare_rr_to_size; |
266 | #ifdef CONFIG_SPARSEMEM_VMEMMAP | ||
267 | extern unsigned int *slb_miss_kernel_load_vmemmap; | ||
268 | unsigned long vmemmap_llp; | ||
269 | #endif | ||
266 | 270 | ||
267 | /* Prepare our SLB miss handler based on our page size */ | 271 | /* Prepare our SLB miss handler based on our page size */ |
268 | linear_llp = mmu_psize_defs[mmu_linear_psize].sllp; | 272 | linear_llp = mmu_psize_defs[mmu_linear_psize].sllp; |
269 | io_llp = mmu_psize_defs[mmu_io_psize].sllp; | 273 | io_llp = mmu_psize_defs[mmu_io_psize].sllp; |
270 | vmalloc_llp = mmu_psize_defs[mmu_vmalloc_psize].sllp; | 274 | vmalloc_llp = mmu_psize_defs[mmu_vmalloc_psize].sllp; |
271 | get_paca()->vmalloc_sllp = SLB_VSID_KERNEL | vmalloc_llp; | 275 | get_paca()->vmalloc_sllp = SLB_VSID_KERNEL | vmalloc_llp; |
272 | 276 | #ifdef CONFIG_SPARSEMEM_VMEMMAP | |
277 | vmemmap_llp = mmu_psize_defs[mmu_vmemmap_psize].sllp; | ||
278 | #endif | ||
273 | if (!slb_encoding_inited) { | 279 | if (!slb_encoding_inited) { |
274 | slb_encoding_inited = 1; | 280 | slb_encoding_inited = 1; |
275 | patch_slb_encoding(slb_miss_kernel_load_linear, | 281 | patch_slb_encoding(slb_miss_kernel_load_linear, |
@@ -281,6 +287,12 @@ void slb_initialize(void) | |||
281 | 287 | ||
282 | DBG("SLB: linear LLP = %04lx\n", linear_llp); | 288 | DBG("SLB: linear LLP = %04lx\n", linear_llp); |
283 | DBG("SLB: io LLP = %04lx\n", io_llp); | 289 | DBG("SLB: io LLP = %04lx\n", io_llp); |
290 | |||
291 | #ifdef CONFIG_SPARSEMEM_VMEMMAP | ||
292 | patch_slb_encoding(slb_miss_kernel_load_vmemmap, | ||
293 | SLB_VSID_KERNEL | vmemmap_llp); | ||
294 | DBG("SLB: vmemmap LLP = %04lx\n", vmemmap_llp); | ||
295 | #endif | ||
284 | } | 296 | } |
285 | 297 | ||
286 | get_paca()->stab_rr = SLB_NUM_BOLTED; | 298 | get_paca()->stab_rr = SLB_NUM_BOLTED; |
diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S index 657f6b37e9df..bc44dc4b5c67 100644 --- a/arch/powerpc/mm/slb_low.S +++ b/arch/powerpc/mm/slb_low.S | |||
@@ -47,8 +47,7 @@ _GLOBAL(slb_allocate_realmode) | |||
47 | * it to VSID 0, which is reserved as a bad VSID - one which | 47 | * it to VSID 0, which is reserved as a bad VSID - one which |
48 | * will never have any pages in it. */ | 48 | * will never have any pages in it. */ |
49 | 49 | ||
50 | /* Check if hitting the linear mapping of the vmalloc/ioremap | 50 | /* Check if hitting the linear mapping or some other kernel space |
51 | * kernel space | ||
52 | */ | 51 | */ |
53 | bne cr7,1f | 52 | bne cr7,1f |
54 | 53 | ||
@@ -62,7 +61,18 @@ BEGIN_FTR_SECTION | |||
62 | END_FTR_SECTION_IFCLR(CPU_FTR_1T_SEGMENT) | 61 | END_FTR_SECTION_IFCLR(CPU_FTR_1T_SEGMENT) |
63 | b slb_finish_load_1T | 62 | b slb_finish_load_1T |
64 | 63 | ||
65 | 1: /* vmalloc/ioremap mapping encoding bits, the "li" instructions below | 64 | 1: |
65 | #ifdef CONFIG_SPARSEMEM_VMEMMAP | ||
66 | /* Check virtual memmap region. To be patches at kernel boot */ | ||
67 | cmpldi cr0,r9,0xf | ||
68 | bne 1f | ||
69 | _GLOBAL(slb_miss_kernel_load_vmemmap) | ||
70 | li r11,0 | ||
71 | b 6f | ||
72 | 1: | ||
73 | #endif /* CONFIG_SPARSEMEM_VMEMMAP */ | ||
74 | |||
75 | /* vmalloc/ioremap mapping encoding bits, the "li" instructions below | ||
66 | * will be patched by the kernel at boot | 76 | * will be patched by the kernel at boot |
67 | */ | 77 | */ |
68 | BEGIN_FTR_SECTION | 78 | BEGIN_FTR_SECTION |
diff --git a/include/asm-powerpc/mmu-hash64.h b/include/asm-powerpc/mmu-hash64.h index 0dff76776044..39c5c5f62bf5 100644 --- a/include/asm-powerpc/mmu-hash64.h +++ b/include/asm-powerpc/mmu-hash64.h | |||
@@ -177,6 +177,7 @@ extern struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT]; | |||
177 | extern int mmu_linear_psize; | 177 | extern int mmu_linear_psize; |
178 | extern int mmu_virtual_psize; | 178 | extern int mmu_virtual_psize; |
179 | extern int mmu_vmalloc_psize; | 179 | extern int mmu_vmalloc_psize; |
180 | extern int mmu_vmemmap_psize; | ||
180 | extern int mmu_io_psize; | 181 | extern int mmu_io_psize; |
181 | extern int mmu_kernel_ssize; | 182 | extern int mmu_kernel_ssize; |
182 | extern int mmu_highuser_ssize; | 183 | extern int mmu_highuser_ssize; |
diff --git a/include/asm-powerpc/pgtable-ppc64.h b/include/asm-powerpc/pgtable-ppc64.h index 27f18695f7d6..cc6a43ba41d0 100644 --- a/include/asm-powerpc/pgtable-ppc64.h +++ b/include/asm-powerpc/pgtable-ppc64.h | |||
@@ -65,15 +65,15 @@ | |||
65 | 65 | ||
66 | #define VMALLOC_REGION_ID (REGION_ID(VMALLOC_START)) | 66 | #define VMALLOC_REGION_ID (REGION_ID(VMALLOC_START)) |
67 | #define KERNEL_REGION_ID (REGION_ID(PAGE_OFFSET)) | 67 | #define KERNEL_REGION_ID (REGION_ID(PAGE_OFFSET)) |
68 | #define VMEMMAP_REGION_ID (0xfUL) | ||
68 | #define USER_REGION_ID (0UL) | 69 | #define USER_REGION_ID (0UL) |
69 | 70 | ||
70 | /* | 71 | /* |
71 | * Defines the address of the vmemap area, in the top 16th of the | 72 | * Defines the address of the vmemap area, in its own region |
72 | * kernel region. | ||
73 | */ | 73 | */ |
74 | #define VMEMMAP_BASE (ASM_CONST(CONFIG_KERNEL_START) + \ | 74 | #define VMEMMAP_BASE (VMEMMAP_REGION_ID << REGION_SHIFT) |
75 | (0xfUL << (REGION_SHIFT - 4))) | 75 | #define vmemmap ((struct page *)VMEMMAP_BASE) |
76 | #define vmemmap ((struct page *)VMEMMAP_BASE) | 76 | |
77 | 77 | ||
78 | /* | 78 | /* |
79 | * Common bits in a linux-style PTE. These match the bits in the | 79 | * Common bits in a linux-style PTE. These match the bits in the |