aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRusty Russell <rusty@rustcorp.com.au>2006-12-06 20:14:08 -0500
committerAndi Kleen <andi@basil.nowhere.org>2006-12-06 20:14:08 -0500
commitda181a8b3916aa7f2e3c5775d2bd2fe3454cf82d (patch)
treec5be6c957d57563b9854732df1210aad97027b03
parent13623d79309dd82e1964458fa017979d16f33fa8 (diff)
[PATCH] paravirt: Add MMU virtualization to paravirt_ops
Add the three bare TLB accessor functions to paravirt-ops. Most amusingly, flush_tlb is redefined on SMP, so I can't call the paravirt op flush_tlb. Instead, I chose to indicate the actual flush type, kernel (global) vs. user (non-global). Global in this sense means using the global bit in the page table entry, which makes TLB entries persistent across CR3 reloads, not global as in the SMP sense of invoking remote shootdowns, so the term is confusingly overloaded. AK: folded in fix from Zach for PAE compilation Signed-off-by: Zachary Amsden <zach@vmware.com> Signed-off-by: Chris Wright <chrisw@sous-sol.org> Signed-off-by: Andi Kleen <ak@suse.de> Cc: Rusty Russell <rusty@rustcorp.com.au> Cc: Jeremy Fitzhardinge <jeremy@goop.org> Signed-off-by: Andrew Morton <akpm@osdl.org>
-rw-r--r--arch/i386/kernel/paravirt.c109
-rw-r--r--arch/i386/mm/boot_ioremap.c1
-rw-r--r--include/asm-i386/paravirt.h75
-rw-r--r--include/asm-i386/pgtable-2level.h5
-rw-r--r--include/asm-i386/pgtable-3level.h40
-rw-r--r--include/asm-i386/pgtable.h4
-rw-r--r--include/asm-i386/tlbflush.h18
7 files changed, 226 insertions, 26 deletions
diff --git a/arch/i386/kernel/paravirt.c b/arch/i386/kernel/paravirt.c
index fe82eb3adf42..3dceab5828f1 100644
--- a/arch/i386/kernel/paravirt.c
+++ b/arch/i386/kernel/paravirt.c
@@ -31,6 +31,7 @@
31#include <asm/delay.h> 31#include <asm/delay.h>
32#include <asm/fixmap.h> 32#include <asm/fixmap.h>
33#include <asm/apic.h> 33#include <asm/apic.h>
34#include <asm/tlbflush.h>
34 35
35/* nop stub */ 36/* nop stub */
36static void native_nop(void) 37static void native_nop(void)
@@ -379,6 +380,97 @@ static fastcall void native_io_delay(void)
379 asm volatile("outb %al,$0x80"); 380 asm volatile("outb %al,$0x80");
380} 381}
381 382
383static fastcall void native_flush_tlb(void)
384{
385 __native_flush_tlb();
386}
387
388/*
389 * Global pages have to be flushed a bit differently. Not a real
390 * performance problem because this does not happen often.
391 */
392static fastcall void native_flush_tlb_global(void)
393{
394 __native_flush_tlb_global();
395}
396
397static fastcall void native_flush_tlb_single(u32 addr)
398{
399 __native_flush_tlb_single(addr);
400}
401
402#ifndef CONFIG_X86_PAE
403static fastcall void native_set_pte(pte_t *ptep, pte_t pteval)
404{
405 *ptep = pteval;
406}
407
408static fastcall void native_set_pte_at(struct mm_struct *mm, u32 addr, pte_t *ptep, pte_t pteval)
409{
410 *ptep = pteval;
411}
412
413static fastcall void native_set_pmd(pmd_t *pmdp, pmd_t pmdval)
414{
415 *pmdp = pmdval;
416}
417
418#else /* CONFIG_X86_PAE */
419
420static fastcall void native_set_pte(pte_t *ptep, pte_t pte)
421{
422 ptep->pte_high = pte.pte_high;
423 smp_wmb();
424 ptep->pte_low = pte.pte_low;
425}
426
427static fastcall void native_set_pte_at(struct mm_struct *mm, u32 addr, pte_t *ptep, pte_t pte)
428{
429 ptep->pte_high = pte.pte_high;
430 smp_wmb();
431 ptep->pte_low = pte.pte_low;
432}
433
434static fastcall void native_set_pte_present(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte)
435{
436 ptep->pte_low = 0;
437 smp_wmb();
438 ptep->pte_high = pte.pte_high;
439 smp_wmb();
440 ptep->pte_low = pte.pte_low;
441}
442
443static fastcall void native_set_pte_atomic(pte_t *ptep, pte_t pteval)
444{
445 set_64bit((unsigned long long *)ptep,pte_val(pteval));
446}
447
448static fastcall void native_set_pmd(pmd_t *pmdp, pmd_t pmdval)
449{
450 set_64bit((unsigned long long *)pmdp,pmd_val(pmdval));
451}
452
453static fastcall void native_set_pud(pud_t *pudp, pud_t pudval)
454{
455 *pudp = pudval;
456}
457
458static fastcall void native_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
459{
460 ptep->pte_low = 0;
461 smp_wmb();
462 ptep->pte_high = 0;
463}
464
465static fastcall void native_pmd_clear(pmd_t *pmd)
466{
467 u32 *tmp = (u32 *)pmd;
468 *tmp = 0;
469 smp_wmb();
470 *(tmp + 1) = 0;
471}
472#endif /* CONFIG_X86_PAE */
473
382/* These are in entry.S */ 474/* These are in entry.S */
383extern fastcall void native_iret(void); 475extern fastcall void native_iret(void);
384extern fastcall void native_irq_enable_sysexit(void); 476extern fastcall void native_irq_enable_sysexit(void);
@@ -454,6 +546,23 @@ struct paravirt_ops paravirt_ops = {
454 .apic_read = native_apic_read, 546 .apic_read = native_apic_read,
455#endif 547#endif
456 548
549 .flush_tlb_user = native_flush_tlb,
550 .flush_tlb_kernel = native_flush_tlb_global,
551 .flush_tlb_single = native_flush_tlb_single,
552
553 .set_pte = native_set_pte,
554 .set_pte_at = native_set_pte_at,
555 .set_pmd = native_set_pmd,
556 .pte_update = (void *)native_nop,
557 .pte_update_defer = (void *)native_nop,
558#ifdef CONFIG_X86_PAE
559 .set_pte_atomic = native_set_pte_atomic,
560 .set_pte_present = native_set_pte_present,
561 .set_pud = native_set_pud,
562 .pte_clear = native_pte_clear,
563 .pmd_clear = native_pmd_clear,
564#endif
565
457 .irq_enable_sysexit = native_irq_enable_sysexit, 566 .irq_enable_sysexit = native_irq_enable_sysexit,
458 .iret = native_iret, 567 .iret = native_iret,
459}; 568};
diff --git a/arch/i386/mm/boot_ioremap.c b/arch/i386/mm/boot_ioremap.c
index 4de11f508c3a..4de95a17a7d4 100644
--- a/arch/i386/mm/boot_ioremap.c
+++ b/arch/i386/mm/boot_ioremap.c
@@ -16,6 +16,7 @@
16 */ 16 */
17 17
18#undef CONFIG_X86_PAE 18#undef CONFIG_X86_PAE
19#undef CONFIG_PARAVIRT
19#include <asm/page.h> 20#include <asm/page.h>
20#include <asm/pgtable.h> 21#include <asm/pgtable.h>
21#include <asm/tlbflush.h> 22#include <asm/tlbflush.h>
diff --git a/include/asm-i386/paravirt.h b/include/asm-i386/paravirt.h
index e2c803fadb14..9f06265065f4 100644
--- a/include/asm-i386/paravirt.h
+++ b/include/asm-i386/paravirt.h
@@ -4,6 +4,7 @@
4 * para-virtualization: those hooks are defined here. */ 4 * para-virtualization: those hooks are defined here. */
5#include <linux/linkage.h> 5#include <linux/linkage.h>
6#include <linux/stringify.h> 6#include <linux/stringify.h>
7#include <asm/page.h>
7 8
8#ifdef CONFIG_PARAVIRT 9#ifdef CONFIG_PARAVIRT
9/* These are the most performance critical ops, so we want to be able to patch 10/* These are the most performance critical ops, so we want to be able to patch
@@ -27,6 +28,7 @@
27struct thread_struct; 28struct thread_struct;
28struct Xgt_desc_struct; 29struct Xgt_desc_struct;
29struct tss_struct; 30struct tss_struct;
31struct mm_struct;
30struct paravirt_ops 32struct paravirt_ops
31{ 33{
32 unsigned int kernel_rpl; 34 unsigned int kernel_rpl;
@@ -121,6 +123,23 @@ struct paravirt_ops
121 unsigned long (fastcall *apic_read)(unsigned long reg); 123 unsigned long (fastcall *apic_read)(unsigned long reg);
122#endif 124#endif
123 125
126 void (fastcall *flush_tlb_user)(void);
127 void (fastcall *flush_tlb_kernel)(void);
128 void (fastcall *flush_tlb_single)(u32 addr);
129
130 void (fastcall *set_pte)(pte_t *ptep, pte_t pteval);
131 void (fastcall *set_pte_at)(struct mm_struct *mm, u32 addr, pte_t *ptep, pte_t pteval);
132 void (fastcall *set_pmd)(pmd_t *pmdp, pmd_t pmdval);
133 void (fastcall *pte_update)(struct mm_struct *mm, u32 addr, pte_t *ptep);
134 void (fastcall *pte_update_defer)(struct mm_struct *mm, u32 addr, pte_t *ptep);
135#ifdef CONFIG_X86_PAE
136 void (fastcall *set_pte_atomic)(pte_t *ptep, pte_t pteval);
137 void (fastcall *set_pte_present)(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte);
138 void (fastcall *set_pud)(pud_t *pudp, pud_t pudval);
139 void (fastcall *pte_clear)(struct mm_struct *mm, unsigned long addr, pte_t *ptep);
140 void (fastcall *pmd_clear)(pmd_t *pmdp);
141#endif
142
124 /* These two are jmp to, not actually called. */ 143 /* These two are jmp to, not actually called. */
125 void (fastcall *irq_enable_sysexit)(void); 144 void (fastcall *irq_enable_sysexit)(void);
126 void (fastcall *iret)(void); 145 void (fastcall *iret)(void);
@@ -297,6 +316,62 @@ static inline unsigned long apic_read(unsigned long reg)
297#endif 316#endif
298 317
299 318
319#define __flush_tlb() paravirt_ops.flush_tlb_user()
320#define __flush_tlb_global() paravirt_ops.flush_tlb_kernel()
321#define __flush_tlb_single(addr) paravirt_ops.flush_tlb_single(addr)
322
323static inline void set_pte(pte_t *ptep, pte_t pteval)
324{
325 paravirt_ops.set_pte(ptep, pteval);
326}
327
328static inline void set_pte_at(struct mm_struct *mm, u32 addr, pte_t *ptep, pte_t pteval)
329{
330 paravirt_ops.set_pte_at(mm, addr, ptep, pteval);
331}
332
333static inline void set_pmd(pmd_t *pmdp, pmd_t pmdval)
334{
335 paravirt_ops.set_pmd(pmdp, pmdval);
336}
337
338static inline void pte_update(struct mm_struct *mm, u32 addr, pte_t *ptep)
339{
340 paravirt_ops.pte_update(mm, addr, ptep);
341}
342
343static inline void pte_update_defer(struct mm_struct *mm, u32 addr, pte_t *ptep)
344{
345 paravirt_ops.pte_update_defer(mm, addr, ptep);
346}
347
348#ifdef CONFIG_X86_PAE
349static inline void set_pte_atomic(pte_t *ptep, pte_t pteval)
350{
351 paravirt_ops.set_pte_atomic(ptep, pteval);
352}
353
354static inline void set_pte_present(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte)
355{
356 paravirt_ops.set_pte_present(mm, addr, ptep, pte);
357}
358
359static inline void set_pud(pud_t *pudp, pud_t pudval)
360{
361 paravirt_ops.set_pud(pudp, pudval);
362}
363
364static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
365{
366 paravirt_ops.pte_clear(mm, addr, ptep);
367}
368
369static inline void pmd_clear(pmd_t *pmdp)
370{
371 paravirt_ops.pmd_clear(pmdp);
372}
373#endif
374
300/* These all sit in the .parainstructions section to tell us what to patch. */ 375/* These all sit in the .parainstructions section to tell us what to patch. */
301struct paravirt_patch { 376struct paravirt_patch {
302 u8 *instr; /* original instructions */ 377 u8 *instr; /* original instructions */
diff --git a/include/asm-i386/pgtable-2level.h b/include/asm-i386/pgtable-2level.h
index 8d8d3b9ecdb0..04d6186abc22 100644
--- a/include/asm-i386/pgtable-2level.h
+++ b/include/asm-i386/pgtable-2level.h
@@ -13,11 +13,14 @@
13 * within a page table are directly modified. Thus, the following 13 * within a page table are directly modified. Thus, the following
14 * hook is made available. 14 * hook is made available.
15 */ 15 */
16#ifndef CONFIG_PARAVIRT
16#define set_pte(pteptr, pteval) (*(pteptr) = pteval) 17#define set_pte(pteptr, pteval) (*(pteptr) = pteval)
17#define set_pte_at(mm,addr,ptep,pteval) set_pte(ptep,pteval) 18#define set_pte_at(mm,addr,ptep,pteval) set_pte(ptep,pteval)
19#define set_pmd(pmdptr, pmdval) (*(pmdptr) = (pmdval))
20#endif
21
18#define set_pte_atomic(pteptr, pteval) set_pte(pteptr,pteval) 22#define set_pte_atomic(pteptr, pteval) set_pte(pteptr,pteval)
19#define set_pte_present(mm,addr,ptep,pteval) set_pte_at(mm,addr,ptep,pteval) 23#define set_pte_present(mm,addr,ptep,pteval) set_pte_at(mm,addr,ptep,pteval)
20#define set_pmd(pmdptr, pmdval) (*(pmdptr) = (pmdval))
21 24
22#define pte_clear(mm,addr,xp) do { set_pte_at(mm, addr, xp, __pte(0)); } while (0) 25#define pte_clear(mm,addr,xp) do { set_pte_at(mm, addr, xp, __pte(0)); } while (0)
23#define pmd_clear(xp) do { set_pmd(xp, __pmd(0)); } while (0) 26#define pmd_clear(xp) do { set_pmd(xp, __pmd(0)); } while (0)
diff --git a/include/asm-i386/pgtable-3level.h b/include/asm-i386/pgtable-3level.h
index c2d701ea35be..2a6e67db8bc3 100644
--- a/include/asm-i386/pgtable-3level.h
+++ b/include/asm-i386/pgtable-3level.h
@@ -44,6 +44,7 @@ static inline int pte_exec_kernel(pte_t pte)
44 return pte_x(pte); 44 return pte_x(pte);
45} 45}
46 46
47#ifndef CONFIG_PARAVIRT
47/* Rules for using set_pte: the pte being assigned *must* be 48/* Rules for using set_pte: the pte being assigned *must* be
48 * either not present or in a state where the hardware will 49 * either not present or in a state where the hardware will
49 * not attempt to update the pte. In places where this is 50 * not attempt to update the pte. In places where this is
@@ -81,25 +82,6 @@ static inline void set_pte_present(struct mm_struct *mm, unsigned long addr, pte
81 (*(pudptr) = (pudval)) 82 (*(pudptr) = (pudval))
82 83
83/* 84/*
84 * Pentium-II erratum A13: in PAE mode we explicitly have to flush
85 * the TLB via cr3 if the top-level pgd is changed...
86 * We do not let the generic code free and clear pgd entries due to
87 * this erratum.
88 */
89static inline void pud_clear (pud_t * pud) { }
90
91#define pud_page(pud) \
92((struct page *) __va(pud_val(pud) & PAGE_MASK))
93
94#define pud_page_vaddr(pud) \
95((unsigned long) __va(pud_val(pud) & PAGE_MASK))
96
97
98/* Find an entry in the second-level page table.. */
99#define pmd_offset(pud, address) ((pmd_t *) pud_page(*(pud)) + \
100 pmd_index(address))
101
102/*
103 * For PTEs and PDEs, we must clear the P-bit first when clearing a page table 85 * For PTEs and PDEs, we must clear the P-bit first when clearing a page table
104 * entry, so clear the bottom half first and enforce ordering with a compiler 86 * entry, so clear the bottom half first and enforce ordering with a compiler
105 * barrier. 87 * barrier.
@@ -118,6 +100,26 @@ static inline void pmd_clear(pmd_t *pmd)
118 smp_wmb(); 100 smp_wmb();
119 *(tmp + 1) = 0; 101 *(tmp + 1) = 0;
120} 102}
103#endif
104
105/*
106 * Pentium-II erratum A13: in PAE mode we explicitly have to flush
107 * the TLB via cr3 if the top-level pgd is changed...
108 * We do not let the generic code free and clear pgd entries due to
109 * this erratum.
110 */
111static inline void pud_clear (pud_t * pud) { }
112
113#define pud_page(pud) \
114((struct page *) __va(pud_val(pud) & PAGE_MASK))
115
116#define pud_page_vaddr(pud) \
117((unsigned long) __va(pud_val(pud) & PAGE_MASK))
118
119
120/* Find an entry in the second-level page table.. */
121#define pmd_offset(pud, address) ((pmd_t *) pud_page(*(pud)) + \
122 pmd_index(address))
121 123
122#define __HAVE_ARCH_PTEP_GET_AND_CLEAR 124#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
123static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) 125static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
diff --git a/include/asm-i386/pgtable.h b/include/asm-i386/pgtable.h
index 7d398f493dde..efd7d90789d0 100644
--- a/include/asm-i386/pgtable.h
+++ b/include/asm-i386/pgtable.h
@@ -15,6 +15,7 @@
15#include <asm/processor.h> 15#include <asm/processor.h>
16#include <asm/fixmap.h> 16#include <asm/fixmap.h>
17#include <linux/threads.h> 17#include <linux/threads.h>
18#include <asm/paravirt.h>
18 19
19#ifndef _I386_BITOPS_H 20#ifndef _I386_BITOPS_H
20#include <asm/bitops.h> 21#include <asm/bitops.h>
@@ -246,6 +247,7 @@ static inline pte_t pte_mkhuge(pte_t pte) { (pte).pte_low |= _PAGE_PSE; return p
246# include <asm/pgtable-2level.h> 247# include <asm/pgtable-2level.h>
247#endif 248#endif
248 249
250#ifndef CONFIG_PARAVIRT
249/* 251/*
250 * Rules for using pte_update - it must be called after any PTE update which 252 * Rules for using pte_update - it must be called after any PTE update which
251 * has not been done using the set_pte / clear_pte interfaces. It is used by 253 * has not been done using the set_pte / clear_pte interfaces. It is used by
@@ -261,7 +263,7 @@ static inline pte_t pte_mkhuge(pte_t pte) { (pte).pte_low |= _PAGE_PSE; return p
261 */ 263 */
262#define pte_update(mm, addr, ptep) do { } while (0) 264#define pte_update(mm, addr, ptep) do { } while (0)
263#define pte_update_defer(mm, addr, ptep) do { } while (0) 265#define pte_update_defer(mm, addr, ptep) do { } while (0)
264 266#endif
265 267
266/* 268/*
267 * We only update the dirty/accessed state if we set 269 * We only update the dirty/accessed state if we set
diff --git a/include/asm-i386/tlbflush.h b/include/asm-i386/tlbflush.h
index 360648b0f2b3..4dd82840d53b 100644
--- a/include/asm-i386/tlbflush.h
+++ b/include/asm-i386/tlbflush.h
@@ -4,7 +4,15 @@
4#include <linux/mm.h> 4#include <linux/mm.h>
5#include <asm/processor.h> 5#include <asm/processor.h>
6 6
7#define __flush_tlb() \ 7#ifdef CONFIG_PARAVIRT
8#include <asm/paravirt.h>
9#else
10#define __flush_tlb() __native_flush_tlb()
11#define __flush_tlb_global() __native_flush_tlb_global()
12#define __flush_tlb_single(addr) __native_flush_tlb_single(addr)
13#endif
14
15#define __native_flush_tlb() \
8 do { \ 16 do { \
9 unsigned int tmpreg; \ 17 unsigned int tmpreg; \
10 \ 18 \
@@ -19,7 +27,7 @@
19 * Global pages have to be flushed a bit differently. Not a real 27 * Global pages have to be flushed a bit differently. Not a real
20 * performance problem because this does not happen often. 28 * performance problem because this does not happen often.
21 */ 29 */
22#define __flush_tlb_global() \ 30#define __native_flush_tlb_global() \
23 do { \ 31 do { \
24 unsigned int tmpreg, cr4, cr4_orig; \ 32 unsigned int tmpreg, cr4, cr4_orig; \
25 \ 33 \
@@ -36,6 +44,9 @@
36 : "memory"); \ 44 : "memory"); \
37 } while (0) 45 } while (0)
38 46
47#define __native_flush_tlb_single(addr) \
48 __asm__ __volatile__("invlpg (%0)" ::"r" (addr) : "memory")
49
39# define __flush_tlb_all() \ 50# define __flush_tlb_all() \
40 do { \ 51 do { \
41 if (cpu_has_pge) \ 52 if (cpu_has_pge) \
@@ -46,9 +57,6 @@
46 57
47#define cpu_has_invlpg (boot_cpu_data.x86 > 3) 58#define cpu_has_invlpg (boot_cpu_data.x86 > 3)
48 59
49#define __flush_tlb_single(addr) \
50 __asm__ __volatile__("invlpg (%0)" ::"r" (addr) : "memory")
51
52#ifdef CONFIG_X86_INVLPG 60#ifdef CONFIG_X86_INVLPG
53# define __flush_tlb_one(addr) __flush_tlb_single(addr) 61# define __flush_tlb_one(addr) __flush_tlb_single(addr)
54#else 62#else