diff options
author | Vivek Goyal <vgoyal@in.ibm.com> | 2007-05-02 13:27:07 -0400 |
---|---|---|
committer | Andi Kleen <andi@basil.nowhere.org> | 2007-05-02 13:27:07 -0400 |
commit | 0dbf7028c0c1f266c9631139450a1502d3cd457e (patch) | |
tree | 2616edcd32d92b6539d2810fd3043b054baabb92 /arch/x86_64 | |
parent | 1b29c1643c0d82512477ccd97dc290198fe23e22 (diff) |
[PATCH] x86: __pa and __pa_symbol address space separation
Currently __pa_symbol is for use with symbols in the kernel address
map and __pa is for use with pointers into the physical memory map.
But the code is implemented so you can usually interchange the two.
__pa which is much more common can be implemented much more cheaply
if it is it doesn't have to worry about any other kernel address
spaces. This is especially true with a relocatable kernel as
__pa_symbol needs to peform an extra variable read to resolve
the address.
There is a third macro that is added for the vsyscall data
__pa_vsymbol for finding the physical addesses of vsyscall pages.
Most of this patch is simply sorting through the references to
__pa or __pa_symbol and using the proper one. A little of
it is continuing to use a physical address when we have it
instead of recalculating it several times.
swapper_pgd is now NULL. leave_mm now uses init_mm.pgd
and init_mm.pgd is initialized at boot (instead of compile time)
to the physmem virtual mapping of init_level4_pgd. The
physical address changed.
Except for the for EMPTY_ZERO page all of the remaining references
to __pa_symbol appear to be during kernel initialization. So this
should reduce the cost of __pa in the common case, even on a relocated
kernel.
As this is technically a semantic change we need to be on the lookout
for anything I missed. But it works for me (tm).
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: Vivek Goyal <vgoyal@in.ibm.com>
Signed-off-by: Andi Kleen <ak@suse.de>
Diffstat (limited to 'arch/x86_64')
-rw-r--r-- | arch/x86_64/kernel/machine_kexec.c | 14 | ||||
-rw-r--r-- | arch/x86_64/kernel/setup.c | 9 | ||||
-rw-r--r-- | arch/x86_64/kernel/smp.c | 2 | ||||
-rw-r--r-- | arch/x86_64/kernel/vsyscall.c | 9 | ||||
-rw-r--r-- | arch/x86_64/mm/init.c | 21 | ||||
-rw-r--r-- | arch/x86_64/mm/pageattr.c | 16 |
6 files changed, 39 insertions, 32 deletions
diff --git a/arch/x86_64/kernel/machine_kexec.c b/arch/x86_64/kernel/machine_kexec.c index 0497e3bd5bff..a8bb33c1a8f2 100644 --- a/arch/x86_64/kernel/machine_kexec.c +++ b/arch/x86_64/kernel/machine_kexec.c | |||
@@ -191,19 +191,19 @@ NORET_TYPE void machine_kexec(struct kimage *image) | |||
191 | 191 | ||
192 | page_list[PA_CONTROL_PAGE] = __pa(control_page); | 192 | page_list[PA_CONTROL_PAGE] = __pa(control_page); |
193 | page_list[VA_CONTROL_PAGE] = (unsigned long)relocate_kernel; | 193 | page_list[VA_CONTROL_PAGE] = (unsigned long)relocate_kernel; |
194 | page_list[PA_PGD] = __pa(kexec_pgd); | 194 | page_list[PA_PGD] = __pa_symbol(&kexec_pgd); |
195 | page_list[VA_PGD] = (unsigned long)kexec_pgd; | 195 | page_list[VA_PGD] = (unsigned long)kexec_pgd; |
196 | page_list[PA_PUD_0] = __pa(kexec_pud0); | 196 | page_list[PA_PUD_0] = __pa_symbol(&kexec_pud0); |
197 | page_list[VA_PUD_0] = (unsigned long)kexec_pud0; | 197 | page_list[VA_PUD_0] = (unsigned long)kexec_pud0; |
198 | page_list[PA_PMD_0] = __pa(kexec_pmd0); | 198 | page_list[PA_PMD_0] = __pa_symbol(&kexec_pmd0); |
199 | page_list[VA_PMD_0] = (unsigned long)kexec_pmd0; | 199 | page_list[VA_PMD_0] = (unsigned long)kexec_pmd0; |
200 | page_list[PA_PTE_0] = __pa(kexec_pte0); | 200 | page_list[PA_PTE_0] = __pa_symbol(&kexec_pte0); |
201 | page_list[VA_PTE_0] = (unsigned long)kexec_pte0; | 201 | page_list[VA_PTE_0] = (unsigned long)kexec_pte0; |
202 | page_list[PA_PUD_1] = __pa(kexec_pud1); | 202 | page_list[PA_PUD_1] = __pa_symbol(&kexec_pud1); |
203 | page_list[VA_PUD_1] = (unsigned long)kexec_pud1; | 203 | page_list[VA_PUD_1] = (unsigned long)kexec_pud1; |
204 | page_list[PA_PMD_1] = __pa(kexec_pmd1); | 204 | page_list[PA_PMD_1] = __pa_symbol(&kexec_pmd1); |
205 | page_list[VA_PMD_1] = (unsigned long)kexec_pmd1; | 205 | page_list[VA_PMD_1] = (unsigned long)kexec_pmd1; |
206 | page_list[PA_PTE_1] = __pa(kexec_pte1); | 206 | page_list[PA_PTE_1] = __pa_symbol(&kexec_pte1); |
207 | page_list[VA_PTE_1] = (unsigned long)kexec_pte1; | 207 | page_list[VA_PTE_1] = (unsigned long)kexec_pte1; |
208 | 208 | ||
209 | page_list[PA_TABLE_PAGE] = | 209 | page_list[PA_TABLE_PAGE] = |
diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c index 0e2b8df0ea64..b9bdfc1b54f8 100644 --- a/arch/x86_64/kernel/setup.c +++ b/arch/x86_64/kernel/setup.c | |||
@@ -243,11 +243,12 @@ void __init setup_arch(char **cmdline_p) | |||
243 | init_mm.end_code = (unsigned long) &_etext; | 243 | init_mm.end_code = (unsigned long) &_etext; |
244 | init_mm.end_data = (unsigned long) &_edata; | 244 | init_mm.end_data = (unsigned long) &_edata; |
245 | init_mm.brk = (unsigned long) &_end; | 245 | init_mm.brk = (unsigned long) &_end; |
246 | init_mm.pgd = __va(__pa_symbol(&init_level4_pgt)); | ||
246 | 247 | ||
247 | code_resource.start = virt_to_phys(&_text); | 248 | code_resource.start = __pa_symbol(&_text); |
248 | code_resource.end = virt_to_phys(&_etext)-1; | 249 | code_resource.end = __pa_symbol(&_etext)-1; |
249 | data_resource.start = virt_to_phys(&_etext); | 250 | data_resource.start = __pa_symbol(&_etext); |
250 | data_resource.end = virt_to_phys(&_edata)-1; | 251 | data_resource.end = __pa_symbol(&_edata)-1; |
251 | 252 | ||
252 | early_identify_cpu(&boot_cpu_data); | 253 | early_identify_cpu(&boot_cpu_data); |
253 | 254 | ||
diff --git a/arch/x86_64/kernel/smp.c b/arch/x86_64/kernel/smp.c index bd1d123947ce..22abae4e9f39 100644 --- a/arch/x86_64/kernel/smp.c +++ b/arch/x86_64/kernel/smp.c | |||
@@ -76,7 +76,7 @@ static inline void leave_mm(int cpu) | |||
76 | if (read_pda(mmu_state) == TLBSTATE_OK) | 76 | if (read_pda(mmu_state) == TLBSTATE_OK) |
77 | BUG(); | 77 | BUG(); |
78 | cpu_clear(cpu, read_pda(active_mm)->cpu_vm_mask); | 78 | cpu_clear(cpu, read_pda(active_mm)->cpu_vm_mask); |
79 | load_cr3(swapper_pg_dir); | 79 | load_cr3(init_mm.pgd); |
80 | } | 80 | } |
81 | 81 | ||
82 | /* | 82 | /* |
diff --git a/arch/x86_64/kernel/vsyscall.c b/arch/x86_64/kernel/vsyscall.c index b43c698cf7d3..d14cbb3e0ebe 100644 --- a/arch/x86_64/kernel/vsyscall.c +++ b/arch/x86_64/kernel/vsyscall.c | |||
@@ -45,6 +45,11 @@ | |||
45 | 45 | ||
46 | #define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr))) | 46 | #define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr))) |
47 | #define __syscall_clobber "r11","rcx","memory" | 47 | #define __syscall_clobber "r11","rcx","memory" |
48 | #define __pa_vsymbol(x) \ | ||
49 | ({unsigned long v; \ | ||
50 | extern char __vsyscall_0; \ | ||
51 | asm("" : "=r" (v) : "0" (x)); \ | ||
52 | ((v - VSYSCALL_FIRST_PAGE) + __pa_symbol(&__vsyscall_0)); }) | ||
48 | 53 | ||
49 | struct vsyscall_gtod_data_t { | 54 | struct vsyscall_gtod_data_t { |
50 | seqlock_t lock; | 55 | seqlock_t lock; |
@@ -224,10 +229,10 @@ static int vsyscall_sysctl_change(ctl_table *ctl, int write, struct file * filp, | |||
224 | return ret; | 229 | return ret; |
225 | /* gcc has some trouble with __va(__pa()), so just do it this | 230 | /* gcc has some trouble with __va(__pa()), so just do it this |
226 | way. */ | 231 | way. */ |
227 | map1 = ioremap(__pa_symbol(&vsysc1), 2); | 232 | map1 = ioremap(__pa_vsymbol(&vsysc1), 2); |
228 | if (!map1) | 233 | if (!map1) |
229 | return -ENOMEM; | 234 | return -ENOMEM; |
230 | map2 = ioremap(__pa_symbol(&vsysc2), 2); | 235 | map2 = ioremap(__pa_vsymbol(&vsysc2), 2); |
231 | if (!map2) { | 236 | if (!map2) { |
232 | ret = -ENOMEM; | 237 | ret = -ENOMEM; |
233 | goto out; | 238 | goto out; |
diff --git a/arch/x86_64/mm/init.c b/arch/x86_64/mm/init.c index b0a607892183..69e22d3c9238 100644 --- a/arch/x86_64/mm/init.c +++ b/arch/x86_64/mm/init.c | |||
@@ -565,11 +565,11 @@ void free_init_pages(char *what, unsigned long begin, unsigned long end) | |||
565 | 565 | ||
566 | printk(KERN_INFO "Freeing %s: %ldk freed\n", what, (end - begin) >> 10); | 566 | printk(KERN_INFO "Freeing %s: %ldk freed\n", what, (end - begin) >> 10); |
567 | for (addr = begin; addr < end; addr += PAGE_SIZE) { | 567 | for (addr = begin; addr < end; addr += PAGE_SIZE) { |
568 | ClearPageReserved(virt_to_page(addr)); | 568 | struct page *page = pfn_to_page(addr >> PAGE_SHIFT); |
569 | init_page_count(virt_to_page(addr)); | 569 | ClearPageReserved(page); |
570 | memset((void *)(addr & ~(PAGE_SIZE-1)), | 570 | init_page_count(page); |
571 | POISON_FREE_INITMEM, PAGE_SIZE); | 571 | memset(page_address(page), POISON_FREE_INITMEM, PAGE_SIZE); |
572 | free_page(addr); | 572 | __free_page(page); |
573 | totalram_pages++; | 573 | totalram_pages++; |
574 | } | 574 | } |
575 | } | 575 | } |
@@ -579,17 +579,18 @@ void free_initmem(void) | |||
579 | memset(__initdata_begin, POISON_FREE_INITDATA, | 579 | memset(__initdata_begin, POISON_FREE_INITDATA, |
580 | __initdata_end - __initdata_begin); | 580 | __initdata_end - __initdata_begin); |
581 | free_init_pages("unused kernel memory", | 581 | free_init_pages("unused kernel memory", |
582 | (unsigned long)(&__init_begin), | 582 | __pa_symbol(&__init_begin), |
583 | (unsigned long)(&__init_end)); | 583 | __pa_symbol(&__init_end)); |
584 | } | 584 | } |
585 | 585 | ||
586 | #ifdef CONFIG_DEBUG_RODATA | 586 | #ifdef CONFIG_DEBUG_RODATA |
587 | 587 | ||
588 | void mark_rodata_ro(void) | 588 | void mark_rodata_ro(void) |
589 | { | 589 | { |
590 | unsigned long addr = (unsigned long)__start_rodata; | 590 | unsigned long addr = (unsigned long)__va(__pa_symbol(&__start_rodata)); |
591 | unsigned long end = (unsigned long)__va(__pa_symbol(&__end_rodata)); | ||
591 | 592 | ||
592 | for (; addr < (unsigned long)__end_rodata; addr += PAGE_SIZE) | 593 | for (; addr < end; addr += PAGE_SIZE) |
593 | change_page_attr_addr(addr, 1, PAGE_KERNEL_RO); | 594 | change_page_attr_addr(addr, 1, PAGE_KERNEL_RO); |
594 | 595 | ||
595 | printk ("Write protecting the kernel read-only data: %luk\n", | 596 | printk ("Write protecting the kernel read-only data: %luk\n", |
@@ -608,7 +609,7 @@ void mark_rodata_ro(void) | |||
608 | #ifdef CONFIG_BLK_DEV_INITRD | 609 | #ifdef CONFIG_BLK_DEV_INITRD |
609 | void free_initrd_mem(unsigned long start, unsigned long end) | 610 | void free_initrd_mem(unsigned long start, unsigned long end) |
610 | { | 611 | { |
611 | free_init_pages("initrd memory", start, end); | 612 | free_init_pages("initrd memory", __pa(start), __pa(end)); |
612 | } | 613 | } |
613 | #endif | 614 | #endif |
614 | 615 | ||
diff --git a/arch/x86_64/mm/pageattr.c b/arch/x86_64/mm/pageattr.c index 081409aa3452..76ee90a5abe0 100644 --- a/arch/x86_64/mm/pageattr.c +++ b/arch/x86_64/mm/pageattr.c | |||
@@ -51,7 +51,6 @@ static struct page *split_large_page(unsigned long address, pgprot_t prot, | |||
51 | SetPagePrivate(base); | 51 | SetPagePrivate(base); |
52 | page_private(base) = 0; | 52 | page_private(base) = 0; |
53 | 53 | ||
54 | address = __pa(address); | ||
55 | addr = address & LARGE_PAGE_MASK; | 54 | addr = address & LARGE_PAGE_MASK; |
56 | pbase = (pte_t *)page_address(base); | 55 | pbase = (pte_t *)page_address(base); |
57 | for (i = 0; i < PTRS_PER_PTE; i++, addr += PAGE_SIZE) { | 56 | for (i = 0; i < PTRS_PER_PTE; i++, addr += PAGE_SIZE) { |
@@ -101,13 +100,12 @@ static inline void save_page(struct page *fpage) | |||
101 | * No more special protections in this 2/4MB area - revert to a | 100 | * No more special protections in this 2/4MB area - revert to a |
102 | * large page again. | 101 | * large page again. |
103 | */ | 102 | */ |
104 | static void revert_page(unsigned long address, pgprot_t ref_prot) | 103 | static void revert_page(unsigned long address, unsigned long pfn, pgprot_t ref_prot) |
105 | { | 104 | { |
106 | pgd_t *pgd; | 105 | pgd_t *pgd; |
107 | pud_t *pud; | 106 | pud_t *pud; |
108 | pmd_t *pmd; | 107 | pmd_t *pmd; |
109 | pte_t large_pte; | 108 | pte_t large_pte; |
110 | unsigned long pfn; | ||
111 | 109 | ||
112 | pgd = pgd_offset_k(address); | 110 | pgd = pgd_offset_k(address); |
113 | BUG_ON(pgd_none(*pgd)); | 111 | BUG_ON(pgd_none(*pgd)); |
@@ -115,7 +113,6 @@ static void revert_page(unsigned long address, pgprot_t ref_prot) | |||
115 | BUG_ON(pud_none(*pud)); | 113 | BUG_ON(pud_none(*pud)); |
116 | pmd = pmd_offset(pud, address); | 114 | pmd = pmd_offset(pud, address); |
117 | BUG_ON(pmd_val(*pmd) & _PAGE_PSE); | 115 | BUG_ON(pmd_val(*pmd) & _PAGE_PSE); |
118 | pfn = (__pa(address) & LARGE_PAGE_MASK) >> PAGE_SHIFT; | ||
119 | large_pte = pfn_pte(pfn, ref_prot); | 116 | large_pte = pfn_pte(pfn, ref_prot); |
120 | large_pte = pte_mkhuge(large_pte); | 117 | large_pte = pte_mkhuge(large_pte); |
121 | set_pte((pte_t *)pmd, large_pte); | 118 | set_pte((pte_t *)pmd, large_pte); |
@@ -141,7 +138,8 @@ __change_page_attr(unsigned long address, unsigned long pfn, pgprot_t prot, | |||
141 | */ | 138 | */ |
142 | struct page *split; | 139 | struct page *split; |
143 | ref_prot2 = pte_pgprot(pte_clrhuge(*kpte)); | 140 | ref_prot2 = pte_pgprot(pte_clrhuge(*kpte)); |
144 | split = split_large_page(address, prot, ref_prot2); | 141 | split = split_large_page(pfn << PAGE_SHIFT, prot, |
142 | ref_prot2); | ||
145 | if (!split) | 143 | if (!split) |
146 | return -ENOMEM; | 144 | return -ENOMEM; |
147 | set_pte(kpte, mk_pte(split, ref_prot2)); | 145 | set_pte(kpte, mk_pte(split, ref_prot2)); |
@@ -160,7 +158,7 @@ __change_page_attr(unsigned long address, unsigned long pfn, pgprot_t prot, | |||
160 | 158 | ||
161 | if (page_private(kpte_page) == 0) { | 159 | if (page_private(kpte_page) == 0) { |
162 | save_page(kpte_page); | 160 | save_page(kpte_page); |
163 | revert_page(address, ref_prot); | 161 | revert_page(address, pfn, ref_prot); |
164 | } | 162 | } |
165 | return 0; | 163 | return 0; |
166 | } | 164 | } |
@@ -180,6 +178,7 @@ __change_page_attr(unsigned long address, unsigned long pfn, pgprot_t prot, | |||
180 | */ | 178 | */ |
181 | int change_page_attr_addr(unsigned long address, int numpages, pgprot_t prot) | 179 | int change_page_attr_addr(unsigned long address, int numpages, pgprot_t prot) |
182 | { | 180 | { |
181 | unsigned long phys_base_pfn = __pa_symbol(__START_KERNEL_map) >> PAGE_SHIFT; | ||
183 | int err = 0; | 182 | int err = 0; |
184 | int i; | 183 | int i; |
185 | 184 | ||
@@ -192,10 +191,11 @@ int change_page_attr_addr(unsigned long address, int numpages, pgprot_t prot) | |||
192 | break; | 191 | break; |
193 | /* Handle kernel mapping too which aliases part of the | 192 | /* Handle kernel mapping too which aliases part of the |
194 | * lowmem */ | 193 | * lowmem */ |
195 | if (__pa(address) < KERNEL_TEXT_SIZE) { | 194 | if ((pfn >= phys_base_pfn) && |
195 | ((pfn - phys_base_pfn) < (KERNEL_TEXT_SIZE >> PAGE_SHIFT))) { | ||
196 | unsigned long addr2; | 196 | unsigned long addr2; |
197 | pgprot_t prot2; | 197 | pgprot_t prot2; |
198 | addr2 = __START_KERNEL_map + __pa(address); | 198 | addr2 = __START_KERNEL_map + ((pfn - phys_base_pfn) << PAGE_SHIFT); |
199 | /* Make sure the kernel mappings stay executable */ | 199 | /* Make sure the kernel mappings stay executable */ |
200 | prot2 = pte_pgprot(pte_mkexec(pfn_pte(0, prot))); | 200 | prot2 = pte_pgprot(pte_mkexec(pfn_pte(0, prot))); |
201 | err = __change_page_attr(addr2, pfn, prot2, | 201 | err = __change_page_attr(addr2, pfn, prot2, |