aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86_64
diff options
context:
space:
mode:
authorVivek Goyal <vgoyal@in.ibm.com>2007-05-02 13:27:07 -0400
committerAndi Kleen <andi@basil.nowhere.org>2007-05-02 13:27:07 -0400
commit0dbf7028c0c1f266c9631139450a1502d3cd457e (patch)
tree2616edcd32d92b6539d2810fd3043b054baabb92 /arch/x86_64
parent1b29c1643c0d82512477ccd97dc290198fe23e22 (diff)
[PATCH] x86: __pa and __pa_symbol address space separation
Currently __pa_symbol is for use with symbols in the kernel address map and __pa is for use with pointers into the physical memory map. But the code is implemented so you can usually interchange the two. __pa which is much more common can be implemented much more cheaply if it is it doesn't have to worry about any other kernel address spaces. This is especially true with a relocatable kernel as __pa_symbol needs to peform an extra variable read to resolve the address. There is a third macro that is added for the vsyscall data __pa_vsymbol for finding the physical addesses of vsyscall pages. Most of this patch is simply sorting through the references to __pa or __pa_symbol and using the proper one. A little of it is continuing to use a physical address when we have it instead of recalculating it several times. swapper_pgd is now NULL. leave_mm now uses init_mm.pgd and init_mm.pgd is initialized at boot (instead of compile time) to the physmem virtual mapping of init_level4_pgd. The physical address changed. Except for the for EMPTY_ZERO page all of the remaining references to __pa_symbol appear to be during kernel initialization. So this should reduce the cost of __pa in the common case, even on a relocated kernel. As this is technically a semantic change we need to be on the lookout for anything I missed. But it works for me (tm). Signed-off-by: Eric W. Biederman <ebiederm@xmission.com> Signed-off-by: Vivek Goyal <vgoyal@in.ibm.com> Signed-off-by: Andi Kleen <ak@suse.de>
Diffstat (limited to 'arch/x86_64')
-rw-r--r--arch/x86_64/kernel/machine_kexec.c14
-rw-r--r--arch/x86_64/kernel/setup.c9
-rw-r--r--arch/x86_64/kernel/smp.c2
-rw-r--r--arch/x86_64/kernel/vsyscall.c9
-rw-r--r--arch/x86_64/mm/init.c21
-rw-r--r--arch/x86_64/mm/pageattr.c16
6 files changed, 39 insertions, 32 deletions
diff --git a/arch/x86_64/kernel/machine_kexec.c b/arch/x86_64/kernel/machine_kexec.c
index 0497e3bd5bff..a8bb33c1a8f2 100644
--- a/arch/x86_64/kernel/machine_kexec.c
+++ b/arch/x86_64/kernel/machine_kexec.c
@@ -191,19 +191,19 @@ NORET_TYPE void machine_kexec(struct kimage *image)
191 191
192 page_list[PA_CONTROL_PAGE] = __pa(control_page); 192 page_list[PA_CONTROL_PAGE] = __pa(control_page);
193 page_list[VA_CONTROL_PAGE] = (unsigned long)relocate_kernel; 193 page_list[VA_CONTROL_PAGE] = (unsigned long)relocate_kernel;
194 page_list[PA_PGD] = __pa(kexec_pgd); 194 page_list[PA_PGD] = __pa_symbol(&kexec_pgd);
195 page_list[VA_PGD] = (unsigned long)kexec_pgd; 195 page_list[VA_PGD] = (unsigned long)kexec_pgd;
196 page_list[PA_PUD_0] = __pa(kexec_pud0); 196 page_list[PA_PUD_0] = __pa_symbol(&kexec_pud0);
197 page_list[VA_PUD_0] = (unsigned long)kexec_pud0; 197 page_list[VA_PUD_0] = (unsigned long)kexec_pud0;
198 page_list[PA_PMD_0] = __pa(kexec_pmd0); 198 page_list[PA_PMD_0] = __pa_symbol(&kexec_pmd0);
199 page_list[VA_PMD_0] = (unsigned long)kexec_pmd0; 199 page_list[VA_PMD_0] = (unsigned long)kexec_pmd0;
200 page_list[PA_PTE_0] = __pa(kexec_pte0); 200 page_list[PA_PTE_0] = __pa_symbol(&kexec_pte0);
201 page_list[VA_PTE_0] = (unsigned long)kexec_pte0; 201 page_list[VA_PTE_0] = (unsigned long)kexec_pte0;
202 page_list[PA_PUD_1] = __pa(kexec_pud1); 202 page_list[PA_PUD_1] = __pa_symbol(&kexec_pud1);
203 page_list[VA_PUD_1] = (unsigned long)kexec_pud1; 203 page_list[VA_PUD_1] = (unsigned long)kexec_pud1;
204 page_list[PA_PMD_1] = __pa(kexec_pmd1); 204 page_list[PA_PMD_1] = __pa_symbol(&kexec_pmd1);
205 page_list[VA_PMD_1] = (unsigned long)kexec_pmd1; 205 page_list[VA_PMD_1] = (unsigned long)kexec_pmd1;
206 page_list[PA_PTE_1] = __pa(kexec_pte1); 206 page_list[PA_PTE_1] = __pa_symbol(&kexec_pte1);
207 page_list[VA_PTE_1] = (unsigned long)kexec_pte1; 207 page_list[VA_PTE_1] = (unsigned long)kexec_pte1;
208 208
209 page_list[PA_TABLE_PAGE] = 209 page_list[PA_TABLE_PAGE] =
diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c
index 0e2b8df0ea64..b9bdfc1b54f8 100644
--- a/arch/x86_64/kernel/setup.c
+++ b/arch/x86_64/kernel/setup.c
@@ -243,11 +243,12 @@ void __init setup_arch(char **cmdline_p)
243 init_mm.end_code = (unsigned long) &_etext; 243 init_mm.end_code = (unsigned long) &_etext;
244 init_mm.end_data = (unsigned long) &_edata; 244 init_mm.end_data = (unsigned long) &_edata;
245 init_mm.brk = (unsigned long) &_end; 245 init_mm.brk = (unsigned long) &_end;
246 init_mm.pgd = __va(__pa_symbol(&init_level4_pgt));
246 247
247 code_resource.start = virt_to_phys(&_text); 248 code_resource.start = __pa_symbol(&_text);
248 code_resource.end = virt_to_phys(&_etext)-1; 249 code_resource.end = __pa_symbol(&_etext)-1;
249 data_resource.start = virt_to_phys(&_etext); 250 data_resource.start = __pa_symbol(&_etext);
250 data_resource.end = virt_to_phys(&_edata)-1; 251 data_resource.end = __pa_symbol(&_edata)-1;
251 252
252 early_identify_cpu(&boot_cpu_data); 253 early_identify_cpu(&boot_cpu_data);
253 254
diff --git a/arch/x86_64/kernel/smp.c b/arch/x86_64/kernel/smp.c
index bd1d123947ce..22abae4e9f39 100644
--- a/arch/x86_64/kernel/smp.c
+++ b/arch/x86_64/kernel/smp.c
@@ -76,7 +76,7 @@ static inline void leave_mm(int cpu)
76 if (read_pda(mmu_state) == TLBSTATE_OK) 76 if (read_pda(mmu_state) == TLBSTATE_OK)
77 BUG(); 77 BUG();
78 cpu_clear(cpu, read_pda(active_mm)->cpu_vm_mask); 78 cpu_clear(cpu, read_pda(active_mm)->cpu_vm_mask);
79 load_cr3(swapper_pg_dir); 79 load_cr3(init_mm.pgd);
80} 80}
81 81
82/* 82/*
diff --git a/arch/x86_64/kernel/vsyscall.c b/arch/x86_64/kernel/vsyscall.c
index b43c698cf7d3..d14cbb3e0ebe 100644
--- a/arch/x86_64/kernel/vsyscall.c
+++ b/arch/x86_64/kernel/vsyscall.c
@@ -45,6 +45,11 @@
45 45
46#define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr))) 46#define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr)))
47#define __syscall_clobber "r11","rcx","memory" 47#define __syscall_clobber "r11","rcx","memory"
48#define __pa_vsymbol(x) \
49 ({unsigned long v; \
50 extern char __vsyscall_0; \
51 asm("" : "=r" (v) : "0" (x)); \
52 ((v - VSYSCALL_FIRST_PAGE) + __pa_symbol(&__vsyscall_0)); })
48 53
49struct vsyscall_gtod_data_t { 54struct vsyscall_gtod_data_t {
50 seqlock_t lock; 55 seqlock_t lock;
@@ -224,10 +229,10 @@ static int vsyscall_sysctl_change(ctl_table *ctl, int write, struct file * filp,
224 return ret; 229 return ret;
225 /* gcc has some trouble with __va(__pa()), so just do it this 230 /* gcc has some trouble with __va(__pa()), so just do it this
226 way. */ 231 way. */
227 map1 = ioremap(__pa_symbol(&vsysc1), 2); 232 map1 = ioremap(__pa_vsymbol(&vsysc1), 2);
228 if (!map1) 233 if (!map1)
229 return -ENOMEM; 234 return -ENOMEM;
230 map2 = ioremap(__pa_symbol(&vsysc2), 2); 235 map2 = ioremap(__pa_vsymbol(&vsysc2), 2);
231 if (!map2) { 236 if (!map2) {
232 ret = -ENOMEM; 237 ret = -ENOMEM;
233 goto out; 238 goto out;
diff --git a/arch/x86_64/mm/init.c b/arch/x86_64/mm/init.c
index b0a607892183..69e22d3c9238 100644
--- a/arch/x86_64/mm/init.c
+++ b/arch/x86_64/mm/init.c
@@ -565,11 +565,11 @@ void free_init_pages(char *what, unsigned long begin, unsigned long end)
565 565
566 printk(KERN_INFO "Freeing %s: %ldk freed\n", what, (end - begin) >> 10); 566 printk(KERN_INFO "Freeing %s: %ldk freed\n", what, (end - begin) >> 10);
567 for (addr = begin; addr < end; addr += PAGE_SIZE) { 567 for (addr = begin; addr < end; addr += PAGE_SIZE) {
568 ClearPageReserved(virt_to_page(addr)); 568 struct page *page = pfn_to_page(addr >> PAGE_SHIFT);
569 init_page_count(virt_to_page(addr)); 569 ClearPageReserved(page);
570 memset((void *)(addr & ~(PAGE_SIZE-1)), 570 init_page_count(page);
571 POISON_FREE_INITMEM, PAGE_SIZE); 571 memset(page_address(page), POISON_FREE_INITMEM, PAGE_SIZE);
572 free_page(addr); 572 __free_page(page);
573 totalram_pages++; 573 totalram_pages++;
574 } 574 }
575} 575}
@@ -579,17 +579,18 @@ void free_initmem(void)
579 memset(__initdata_begin, POISON_FREE_INITDATA, 579 memset(__initdata_begin, POISON_FREE_INITDATA,
580 __initdata_end - __initdata_begin); 580 __initdata_end - __initdata_begin);
581 free_init_pages("unused kernel memory", 581 free_init_pages("unused kernel memory",
582 (unsigned long)(&__init_begin), 582 __pa_symbol(&__init_begin),
583 (unsigned long)(&__init_end)); 583 __pa_symbol(&__init_end));
584} 584}
585 585
586#ifdef CONFIG_DEBUG_RODATA 586#ifdef CONFIG_DEBUG_RODATA
587 587
588void mark_rodata_ro(void) 588void mark_rodata_ro(void)
589{ 589{
590 unsigned long addr = (unsigned long)__start_rodata; 590 unsigned long addr = (unsigned long)__va(__pa_symbol(&__start_rodata));
591 unsigned long end = (unsigned long)__va(__pa_symbol(&__end_rodata));
591 592
592 for (; addr < (unsigned long)__end_rodata; addr += PAGE_SIZE) 593 for (; addr < end; addr += PAGE_SIZE)
593 change_page_attr_addr(addr, 1, PAGE_KERNEL_RO); 594 change_page_attr_addr(addr, 1, PAGE_KERNEL_RO);
594 595
595 printk ("Write protecting the kernel read-only data: %luk\n", 596 printk ("Write protecting the kernel read-only data: %luk\n",
@@ -608,7 +609,7 @@ void mark_rodata_ro(void)
608#ifdef CONFIG_BLK_DEV_INITRD 609#ifdef CONFIG_BLK_DEV_INITRD
609void free_initrd_mem(unsigned long start, unsigned long end) 610void free_initrd_mem(unsigned long start, unsigned long end)
610{ 611{
611 free_init_pages("initrd memory", start, end); 612 free_init_pages("initrd memory", __pa(start), __pa(end));
612} 613}
613#endif 614#endif
614 615
diff --git a/arch/x86_64/mm/pageattr.c b/arch/x86_64/mm/pageattr.c
index 081409aa3452..76ee90a5abe0 100644
--- a/arch/x86_64/mm/pageattr.c
+++ b/arch/x86_64/mm/pageattr.c
@@ -51,7 +51,6 @@ static struct page *split_large_page(unsigned long address, pgprot_t prot,
51 SetPagePrivate(base); 51 SetPagePrivate(base);
52 page_private(base) = 0; 52 page_private(base) = 0;
53 53
54 address = __pa(address);
55 addr = address & LARGE_PAGE_MASK; 54 addr = address & LARGE_PAGE_MASK;
56 pbase = (pte_t *)page_address(base); 55 pbase = (pte_t *)page_address(base);
57 for (i = 0; i < PTRS_PER_PTE; i++, addr += PAGE_SIZE) { 56 for (i = 0; i < PTRS_PER_PTE; i++, addr += PAGE_SIZE) {
@@ -101,13 +100,12 @@ static inline void save_page(struct page *fpage)
101 * No more special protections in this 2/4MB area - revert to a 100 * No more special protections in this 2/4MB area - revert to a
102 * large page again. 101 * large page again.
103 */ 102 */
104static void revert_page(unsigned long address, pgprot_t ref_prot) 103static void revert_page(unsigned long address, unsigned long pfn, pgprot_t ref_prot)
105{ 104{
106 pgd_t *pgd; 105 pgd_t *pgd;
107 pud_t *pud; 106 pud_t *pud;
108 pmd_t *pmd; 107 pmd_t *pmd;
109 pte_t large_pte; 108 pte_t large_pte;
110 unsigned long pfn;
111 109
112 pgd = pgd_offset_k(address); 110 pgd = pgd_offset_k(address);
113 BUG_ON(pgd_none(*pgd)); 111 BUG_ON(pgd_none(*pgd));
@@ -115,7 +113,6 @@ static void revert_page(unsigned long address, pgprot_t ref_prot)
115 BUG_ON(pud_none(*pud)); 113 BUG_ON(pud_none(*pud));
116 pmd = pmd_offset(pud, address); 114 pmd = pmd_offset(pud, address);
117 BUG_ON(pmd_val(*pmd) & _PAGE_PSE); 115 BUG_ON(pmd_val(*pmd) & _PAGE_PSE);
118 pfn = (__pa(address) & LARGE_PAGE_MASK) >> PAGE_SHIFT;
119 large_pte = pfn_pte(pfn, ref_prot); 116 large_pte = pfn_pte(pfn, ref_prot);
120 large_pte = pte_mkhuge(large_pte); 117 large_pte = pte_mkhuge(large_pte);
121 set_pte((pte_t *)pmd, large_pte); 118 set_pte((pte_t *)pmd, large_pte);
@@ -141,7 +138,8 @@ __change_page_attr(unsigned long address, unsigned long pfn, pgprot_t prot,
141 */ 138 */
142 struct page *split; 139 struct page *split;
143 ref_prot2 = pte_pgprot(pte_clrhuge(*kpte)); 140 ref_prot2 = pte_pgprot(pte_clrhuge(*kpte));
144 split = split_large_page(address, prot, ref_prot2); 141 split = split_large_page(pfn << PAGE_SHIFT, prot,
142 ref_prot2);
145 if (!split) 143 if (!split)
146 return -ENOMEM; 144 return -ENOMEM;
147 set_pte(kpte, mk_pte(split, ref_prot2)); 145 set_pte(kpte, mk_pte(split, ref_prot2));
@@ -160,7 +158,7 @@ __change_page_attr(unsigned long address, unsigned long pfn, pgprot_t prot,
160 158
161 if (page_private(kpte_page) == 0) { 159 if (page_private(kpte_page) == 0) {
162 save_page(kpte_page); 160 save_page(kpte_page);
163 revert_page(address, ref_prot); 161 revert_page(address, pfn, ref_prot);
164 } 162 }
165 return 0; 163 return 0;
166} 164}
@@ -180,6 +178,7 @@ __change_page_attr(unsigned long address, unsigned long pfn, pgprot_t prot,
180 */ 178 */
181int change_page_attr_addr(unsigned long address, int numpages, pgprot_t prot) 179int change_page_attr_addr(unsigned long address, int numpages, pgprot_t prot)
182{ 180{
181 unsigned long phys_base_pfn = __pa_symbol(__START_KERNEL_map) >> PAGE_SHIFT;
183 int err = 0; 182 int err = 0;
184 int i; 183 int i;
185 184
@@ -192,10 +191,11 @@ int change_page_attr_addr(unsigned long address, int numpages, pgprot_t prot)
192 break; 191 break;
193 /* Handle kernel mapping too which aliases part of the 192 /* Handle kernel mapping too which aliases part of the
194 * lowmem */ 193 * lowmem */
195 if (__pa(address) < KERNEL_TEXT_SIZE) { 194 if ((pfn >= phys_base_pfn) &&
195 ((pfn - phys_base_pfn) < (KERNEL_TEXT_SIZE >> PAGE_SHIFT))) {
196 unsigned long addr2; 196 unsigned long addr2;
197 pgprot_t prot2; 197 pgprot_t prot2;
198 addr2 = __START_KERNEL_map + __pa(address); 198 addr2 = __START_KERNEL_map + ((pfn - phys_base_pfn) << PAGE_SHIFT);
199 /* Make sure the kernel mappings stay executable */ 199 /* Make sure the kernel mappings stay executable */
200 prot2 = pte_pgprot(pte_mkexec(pfn_pte(0, prot))); 200 prot2 = pte_pgprot(pte_mkexec(pfn_pte(0, prot)));
201 err = __change_page_attr(addr2, pfn, prot2, 201 err = __change_page_attr(addr2, pfn, prot2,