diff options
author | Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com> | 2010-09-21 15:01:51 -0400 |
---|---|---|
committer | H. Peter Anvin <hpa@linux.intel.com> | 2010-10-19 16:57:08 -0400 |
commit | 617d34d9e5d8326ec8f188c616aa06ac59d083fe (patch) | |
tree | 763d02b7713bad65ba819a8334bb0e95d4370352 /arch/x86/mm | |
parent | 44235dcde416104b8e1db7606c283f4c0149c760 (diff) |
x86, mm: Hold mm->page_table_lock while doing vmalloc_sync
Take mm->page_table_lock while syncing the vmalloc region. This prevents
a race with the Xen pagetable pin/unpin code, which expects that the
page_table_lock is already held. If this race occurs, then Xen can see
an inconsistent page type (a page can either be read/write or a pagetable
page, and pin/unpin converts it between them), which will cause either
the pin or the set_p[gm]d to fail; either will crash the kernel.
vmalloc_sync_all() should be called rarely, so this extra use of
page_table_lock should not interfere with its normal users.
The mm pointer is stashed in the pgd page's index field, as that won't
be otherwise used for pgds.
Reported-by: Ian Campbell <ian.cambell@eu.citrix.com>
Originally-by: Jan Beulich <jbeulich@novell.com>
LKML-Reference: <4CB88A4C.1080305@goop.org>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Diffstat (limited to 'arch/x86/mm')
-rw-r--r-- | arch/x86/mm/fault.c | 11 | ||||
-rw-r--r-- | arch/x86/mm/init_64.c | 7 | ||||
-rw-r--r-- | arch/x86/mm/pgtable.c | 20 |
3 files changed, 34 insertions, 4 deletions
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index caec22906d7c..6c27c39f8a37 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c | |||
@@ -229,7 +229,16 @@ void vmalloc_sync_all(void) | |||
229 | 229 | ||
230 | spin_lock_irqsave(&pgd_lock, flags); | 230 | spin_lock_irqsave(&pgd_lock, flags); |
231 | list_for_each_entry(page, &pgd_list, lru) { | 231 | list_for_each_entry(page, &pgd_list, lru) { |
232 | if (!vmalloc_sync_one(page_address(page), address)) | 232 | spinlock_t *pgt_lock; |
233 | int ret; | ||
234 | |||
235 | pgt_lock = &pgd_page_get_mm(page)->page_table_lock; | ||
236 | |||
237 | spin_lock(pgt_lock); | ||
238 | ret = vmalloc_sync_one(page_address(page), address); | ||
239 | spin_unlock(pgt_lock); | ||
240 | |||
241 | if (!ret) | ||
233 | break; | 242 | break; |
234 | } | 243 | } |
235 | spin_unlock_irqrestore(&pgd_lock, flags); | 244 | spin_unlock_irqrestore(&pgd_lock, flags); |
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 1ad7c0ff5d2b..4d323fb770c2 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c | |||
@@ -116,12 +116,19 @@ void sync_global_pgds(unsigned long start, unsigned long end) | |||
116 | spin_lock_irqsave(&pgd_lock, flags); | 116 | spin_lock_irqsave(&pgd_lock, flags); |
117 | list_for_each_entry(page, &pgd_list, lru) { | 117 | list_for_each_entry(page, &pgd_list, lru) { |
118 | pgd_t *pgd; | 118 | pgd_t *pgd; |
119 | spinlock_t *pgt_lock; | ||
120 | |||
119 | pgd = (pgd_t *)page_address(page) + pgd_index(address); | 121 | pgd = (pgd_t *)page_address(page) + pgd_index(address); |
122 | pgt_lock = &pgd_page_get_mm(page)->page_table_lock; | ||
123 | spin_lock(pgt_lock); | ||
124 | |||
120 | if (pgd_none(*pgd)) | 125 | if (pgd_none(*pgd)) |
121 | set_pgd(pgd, *pgd_ref); | 126 | set_pgd(pgd, *pgd_ref); |
122 | else | 127 | else |
123 | BUG_ON(pgd_page_vaddr(*pgd) | 128 | BUG_ON(pgd_page_vaddr(*pgd) |
124 | != pgd_page_vaddr(*pgd_ref)); | 129 | != pgd_page_vaddr(*pgd_ref)); |
130 | |||
131 | spin_unlock(pgt_lock); | ||
125 | } | 132 | } |
126 | spin_unlock_irqrestore(&pgd_lock, flags); | 133 | spin_unlock_irqrestore(&pgd_lock, flags); |
127 | } | 134 | } |
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 5c4ee422590e..c70e57dbb491 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c | |||
@@ -87,7 +87,19 @@ static inline void pgd_list_del(pgd_t *pgd) | |||
87 | #define UNSHARED_PTRS_PER_PGD \ | 87 | #define UNSHARED_PTRS_PER_PGD \ |
88 | (SHARED_KERNEL_PMD ? KERNEL_PGD_BOUNDARY : PTRS_PER_PGD) | 88 | (SHARED_KERNEL_PMD ? KERNEL_PGD_BOUNDARY : PTRS_PER_PGD) |
89 | 89 | ||
90 | static void pgd_ctor(pgd_t *pgd) | 90 | |
91 | static void pgd_set_mm(pgd_t *pgd, struct mm_struct *mm) | ||
92 | { | ||
93 | BUILD_BUG_ON(sizeof(virt_to_page(pgd)->index) < sizeof(mm)); | ||
94 | virt_to_page(pgd)->index = (pgoff_t)mm; | ||
95 | } | ||
96 | |||
97 | struct mm_struct *pgd_page_get_mm(struct page *page) | ||
98 | { | ||
99 | return (struct mm_struct *)page->index; | ||
100 | } | ||
101 | |||
102 | static void pgd_ctor(struct mm_struct *mm, pgd_t *pgd) | ||
91 | { | 103 | { |
92 | /* If the pgd points to a shared pagetable level (either the | 104 | /* If the pgd points to a shared pagetable level (either the |
93 | ptes in non-PAE, or shared PMD in PAE), then just copy the | 105 | ptes in non-PAE, or shared PMD in PAE), then just copy the |
@@ -105,8 +117,10 @@ static void pgd_ctor(pgd_t *pgd) | |||
105 | } | 117 | } |
106 | 118 | ||
107 | /* list required to sync kernel mapping updates */ | 119 | /* list required to sync kernel mapping updates */ |
108 | if (!SHARED_KERNEL_PMD) | 120 | if (!SHARED_KERNEL_PMD) { |
121 | pgd_set_mm(pgd, mm); | ||
109 | pgd_list_add(pgd); | 122 | pgd_list_add(pgd); |
123 | } | ||
110 | } | 124 | } |
111 | 125 | ||
112 | static void pgd_dtor(pgd_t *pgd) | 126 | static void pgd_dtor(pgd_t *pgd) |
@@ -272,7 +286,7 @@ pgd_t *pgd_alloc(struct mm_struct *mm) | |||
272 | */ | 286 | */ |
273 | spin_lock_irqsave(&pgd_lock, flags); | 287 | spin_lock_irqsave(&pgd_lock, flags); |
274 | 288 | ||
275 | pgd_ctor(pgd); | 289 | pgd_ctor(mm, pgd); |
276 | pgd_prepopulate_pmd(mm, pgd, pmds); | 290 | pgd_prepopulate_pmd(mm, pgd, pmds); |
277 | 291 | ||
278 | spin_unlock_irqrestore(&pgd_lock, flags); | 292 | spin_unlock_irqrestore(&pgd_lock, flags); |