aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBenjamin Herrenschmidt <benh@kernel.crashing.org>2008-04-30 01:41:48 -0400
committerPaul Mackerras <paulus@samba.org>2008-05-15 06:49:25 -0400
commitcec08e7a948326b01555be6311480aa08e637de2 (patch)
treea08e1d91c9a0c5eaa4e81036a3d7f992b2de1745
parent08fcf1d61193d7b7779aa6d7388535e26e064a0b (diff)
[POWERPC] vmemmap fixes to use smaller pages
This changes vmemmap to use a different region (region 0xf) of the address space, and to configure the page size of that region dynamically at boot. The problem with the current approach of always using 16M pages is that it's not well suited to machines that have small amounts of memory such as small partitions on pseries, or PS3's. In fact, on the PS3, failure to allocate the 16M page backing vmmemmap tends to prevent hotplugging the HV's "additional" memory, thus limiting the available memory even more, from my experience down to something like 80M total, which makes it really not very useable. The logic used by my match to choose the vmemmap page size is: - If 16M pages are available and there's 1G or more RAM at boot, use that size. - Else if 64K pages are available, use that - Else use 4K pages I've tested on a POWER6 (16M pages) and on an iSeries POWER3 (4K pages) and it seems to work fine. Note that I intend to change the way we organize the kernel regions & SLBs so the actual region will change from 0xf back to something else at one point, as I simplify the SLB miss handler, but that will be for a later patch. Signed-off-by: Paul Mackerras <paulus@samba.org>
-rw-r--r--arch/powerpc/mm/hash_utils_64.c28
-rw-r--r--arch/powerpc/mm/init_64.c10
-rw-r--r--arch/powerpc/mm/slb.c16
-rw-r--r--arch/powerpc/mm/slb_low.S16
-rw-r--r--include/asm-powerpc/mmu-hash64.h1
-rw-r--r--include/asm-powerpc/pgtable-ppc64.h10
6 files changed, 65 insertions, 16 deletions
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 2b5a399f6fa6..0f2d239d94c4 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -94,6 +94,9 @@ unsigned long htab_hash_mask;
94int mmu_linear_psize = MMU_PAGE_4K; 94int mmu_linear_psize = MMU_PAGE_4K;
95int mmu_virtual_psize = MMU_PAGE_4K; 95int mmu_virtual_psize = MMU_PAGE_4K;
96int mmu_vmalloc_psize = MMU_PAGE_4K; 96int mmu_vmalloc_psize = MMU_PAGE_4K;
97#ifdef CONFIG_SPARSEMEM_VMEMMAP
98int mmu_vmemmap_psize = MMU_PAGE_4K;
99#endif
97int mmu_io_psize = MMU_PAGE_4K; 100int mmu_io_psize = MMU_PAGE_4K;
98int mmu_kernel_ssize = MMU_SEGSIZE_256M; 101int mmu_kernel_ssize = MMU_SEGSIZE_256M;
99int mmu_highuser_ssize = MMU_SEGSIZE_256M; 102int mmu_highuser_ssize = MMU_SEGSIZE_256M;
@@ -387,11 +390,32 @@ static void __init htab_init_page_sizes(void)
387 } 390 }
388#endif /* CONFIG_PPC_64K_PAGES */ 391#endif /* CONFIG_PPC_64K_PAGES */
389 392
393#ifdef CONFIG_SPARSEMEM_VMEMMAP
394 /* We try to use 16M pages for vmemmap if that is supported
395 * and we have at least 1G of RAM at boot
396 */
397 if (mmu_psize_defs[MMU_PAGE_16M].shift &&
398 lmb_phys_mem_size() >= 0x40000000)
399 mmu_vmemmap_psize = MMU_PAGE_16M;
400 else if (mmu_psize_defs[MMU_PAGE_64K].shift)
401 mmu_vmemmap_psize = MMU_PAGE_64K;
402 else
403 mmu_vmemmap_psize = MMU_PAGE_4K;
404#endif /* CONFIG_SPARSEMEM_VMEMMAP */
405
390 printk(KERN_DEBUG "Page orders: linear mapping = %d, " 406 printk(KERN_DEBUG "Page orders: linear mapping = %d, "
391 "virtual = %d, io = %d\n", 407 "virtual = %d, io = %d"
408#ifdef CONFIG_SPARSEMEM_VMEMMAP
409 ", vmemmap = %d"
410#endif
411 "\n",
392 mmu_psize_defs[mmu_linear_psize].shift, 412 mmu_psize_defs[mmu_linear_psize].shift,
393 mmu_psize_defs[mmu_virtual_psize].shift, 413 mmu_psize_defs[mmu_virtual_psize].shift,
394 mmu_psize_defs[mmu_io_psize].shift); 414 mmu_psize_defs[mmu_io_psize].shift
415#ifdef CONFIG_SPARSEMEM_VMEMMAP
416 ,mmu_psize_defs[mmu_vmemmap_psize].shift
417#endif
418 );
395 419
396#ifdef CONFIG_HUGETLB_PAGE 420#ifdef CONFIG_HUGETLB_PAGE
397 /* Init large page size. Currently, we pick 16M or 1M depending 421 /* Init large page size. Currently, we pick 16M or 1M depending
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index c5ac532a0161..6aa65375abf5 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -19,6 +19,8 @@
19 * 19 *
20 */ 20 */
21 21
22#undef DEBUG
23
22#include <linux/signal.h> 24#include <linux/signal.h>
23#include <linux/sched.h> 25#include <linux/sched.h>
24#include <linux/kernel.h> 26#include <linux/kernel.h>
@@ -208,12 +210,12 @@ int __meminit vmemmap_populated(unsigned long start, int page_size)
208} 210}
209 211
210int __meminit vmemmap_populate(struct page *start_page, 212int __meminit vmemmap_populate(struct page *start_page,
211 unsigned long nr_pages, int node) 213 unsigned long nr_pages, int node)
212{ 214{
213 unsigned long mode_rw; 215 unsigned long mode_rw;
214 unsigned long start = (unsigned long)start_page; 216 unsigned long start = (unsigned long)start_page;
215 unsigned long end = (unsigned long)(start_page + nr_pages); 217 unsigned long end = (unsigned long)(start_page + nr_pages);
216 unsigned long page_size = 1 << mmu_psize_defs[mmu_linear_psize].shift; 218 unsigned long page_size = 1 << mmu_psize_defs[mmu_vmemmap_psize].shift;
217 219
218 mode_rw = _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_COHERENT | PP_RWXX; 220 mode_rw = _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_COHERENT | PP_RWXX;
219 221
@@ -235,11 +237,11 @@ int __meminit vmemmap_populate(struct page *start_page,
235 start, p, __pa(p)); 237 start, p, __pa(p));
236 238
237 mapped = htab_bolt_mapping(start, start + page_size, 239 mapped = htab_bolt_mapping(start, start + page_size,
238 __pa(p), mode_rw, mmu_linear_psize, 240 __pa(p), mode_rw, mmu_vmemmap_psize,
239 mmu_kernel_ssize); 241 mmu_kernel_ssize);
240 BUG_ON(mapped < 0); 242 BUG_ON(mapped < 0);
241 } 243 }
242 244
243 return 0; 245 return 0;
244} 246}
245#endif 247#endif /* CONFIG_SPARSEMEM_VMEMMAP */
diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c
index cf8705e32d60..89497fb04280 100644
--- a/arch/powerpc/mm/slb.c
+++ b/arch/powerpc/mm/slb.c
@@ -28,7 +28,7 @@
28#include <asm/udbg.h> 28#include <asm/udbg.h>
29 29
30#ifdef DEBUG 30#ifdef DEBUG
31#define DBG(fmt...) udbg_printf(fmt) 31#define DBG(fmt...) printk(fmt)
32#else 32#else
33#define DBG pr_debug 33#define DBG pr_debug
34#endif 34#endif
@@ -263,13 +263,19 @@ void slb_initialize(void)
263 extern unsigned int *slb_miss_kernel_load_linear; 263 extern unsigned int *slb_miss_kernel_load_linear;
264 extern unsigned int *slb_miss_kernel_load_io; 264 extern unsigned int *slb_miss_kernel_load_io;
265 extern unsigned int *slb_compare_rr_to_size; 265 extern unsigned int *slb_compare_rr_to_size;
266#ifdef CONFIG_SPARSEMEM_VMEMMAP
267 extern unsigned int *slb_miss_kernel_load_vmemmap;
268 unsigned long vmemmap_llp;
269#endif
266 270
267 /* Prepare our SLB miss handler based on our page size */ 271 /* Prepare our SLB miss handler based on our page size */
268 linear_llp = mmu_psize_defs[mmu_linear_psize].sllp; 272 linear_llp = mmu_psize_defs[mmu_linear_psize].sllp;
269 io_llp = mmu_psize_defs[mmu_io_psize].sllp; 273 io_llp = mmu_psize_defs[mmu_io_psize].sllp;
270 vmalloc_llp = mmu_psize_defs[mmu_vmalloc_psize].sllp; 274 vmalloc_llp = mmu_psize_defs[mmu_vmalloc_psize].sllp;
271 get_paca()->vmalloc_sllp = SLB_VSID_KERNEL | vmalloc_llp; 275 get_paca()->vmalloc_sllp = SLB_VSID_KERNEL | vmalloc_llp;
272 276#ifdef CONFIG_SPARSEMEM_VMEMMAP
277 vmemmap_llp = mmu_psize_defs[mmu_vmemmap_psize].sllp;
278#endif
273 if (!slb_encoding_inited) { 279 if (!slb_encoding_inited) {
274 slb_encoding_inited = 1; 280 slb_encoding_inited = 1;
275 patch_slb_encoding(slb_miss_kernel_load_linear, 281 patch_slb_encoding(slb_miss_kernel_load_linear,
@@ -281,6 +287,12 @@ void slb_initialize(void)
281 287
282 DBG("SLB: linear LLP = %04lx\n", linear_llp); 288 DBG("SLB: linear LLP = %04lx\n", linear_llp);
283 DBG("SLB: io LLP = %04lx\n", io_llp); 289 DBG("SLB: io LLP = %04lx\n", io_llp);
290
291#ifdef CONFIG_SPARSEMEM_VMEMMAP
292 patch_slb_encoding(slb_miss_kernel_load_vmemmap,
293 SLB_VSID_KERNEL | vmemmap_llp);
294 DBG("SLB: vmemmap LLP = %04lx\n", vmemmap_llp);
295#endif
284 } 296 }
285 297
286 get_paca()->stab_rr = SLB_NUM_BOLTED; 298 get_paca()->stab_rr = SLB_NUM_BOLTED;
diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S
index 657f6b37e9df..bc44dc4b5c67 100644
--- a/arch/powerpc/mm/slb_low.S
+++ b/arch/powerpc/mm/slb_low.S
@@ -47,8 +47,7 @@ _GLOBAL(slb_allocate_realmode)
47 * it to VSID 0, which is reserved as a bad VSID - one which 47 * it to VSID 0, which is reserved as a bad VSID - one which
48 * will never have any pages in it. */ 48 * will never have any pages in it. */
49 49
50 /* Check if hitting the linear mapping of the vmalloc/ioremap 50 /* Check if hitting the linear mapping or some other kernel space
51 * kernel space
52 */ 51 */
53 bne cr7,1f 52 bne cr7,1f
54 53
@@ -62,7 +61,18 @@ BEGIN_FTR_SECTION
62END_FTR_SECTION_IFCLR(CPU_FTR_1T_SEGMENT) 61END_FTR_SECTION_IFCLR(CPU_FTR_1T_SEGMENT)
63 b slb_finish_load_1T 62 b slb_finish_load_1T
64 63
651: /* vmalloc/ioremap mapping encoding bits, the "li" instructions below 641:
65#ifdef CONFIG_SPARSEMEM_VMEMMAP
66 /* Check virtual memmap region. To be patches at kernel boot */
67 cmpldi cr0,r9,0xf
68 bne 1f
69_GLOBAL(slb_miss_kernel_load_vmemmap)
70 li r11,0
71 b 6f
721:
73#endif /* CONFIG_SPARSEMEM_VMEMMAP */
74
75 /* vmalloc/ioremap mapping encoding bits, the "li" instructions below
66 * will be patched by the kernel at boot 76 * will be patched by the kernel at boot
67 */ 77 */
68BEGIN_FTR_SECTION 78BEGIN_FTR_SECTION
diff --git a/include/asm-powerpc/mmu-hash64.h b/include/asm-powerpc/mmu-hash64.h
index 0dff76776044..39c5c5f62bf5 100644
--- a/include/asm-powerpc/mmu-hash64.h
+++ b/include/asm-powerpc/mmu-hash64.h
@@ -177,6 +177,7 @@ extern struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT];
177extern int mmu_linear_psize; 177extern int mmu_linear_psize;
178extern int mmu_virtual_psize; 178extern int mmu_virtual_psize;
179extern int mmu_vmalloc_psize; 179extern int mmu_vmalloc_psize;
180extern int mmu_vmemmap_psize;
180extern int mmu_io_psize; 181extern int mmu_io_psize;
181extern int mmu_kernel_ssize; 182extern int mmu_kernel_ssize;
182extern int mmu_highuser_ssize; 183extern int mmu_highuser_ssize;
diff --git a/include/asm-powerpc/pgtable-ppc64.h b/include/asm-powerpc/pgtable-ppc64.h
index 27f18695f7d6..cc6a43ba41d0 100644
--- a/include/asm-powerpc/pgtable-ppc64.h
+++ b/include/asm-powerpc/pgtable-ppc64.h
@@ -65,15 +65,15 @@
65 65
66#define VMALLOC_REGION_ID (REGION_ID(VMALLOC_START)) 66#define VMALLOC_REGION_ID (REGION_ID(VMALLOC_START))
67#define KERNEL_REGION_ID (REGION_ID(PAGE_OFFSET)) 67#define KERNEL_REGION_ID (REGION_ID(PAGE_OFFSET))
68#define VMEMMAP_REGION_ID (0xfUL)
68#define USER_REGION_ID (0UL) 69#define USER_REGION_ID (0UL)
69 70
70/* 71/*
71 * Defines the address of the vmemap area, in the top 16th of the 72 * Defines the address of the vmemap area, in its own region
72 * kernel region.
73 */ 73 */
74#define VMEMMAP_BASE (ASM_CONST(CONFIG_KERNEL_START) + \ 74#define VMEMMAP_BASE (VMEMMAP_REGION_ID << REGION_SHIFT)
75 (0xfUL << (REGION_SHIFT - 4))) 75#define vmemmap ((struct page *)VMEMMAP_BASE)
76#define vmemmap ((struct page *)VMEMMAP_BASE) 76
77 77
78/* 78/*
79 * Common bits in a linux-style PTE. These match the bits in the 79 * Common bits in a linux-style PTE. These match the bits in the