diff options
author | Andrea Arcangeli <aarcange@redhat.com> | 2011-02-16 18:45:22 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2011-03-10 03:41:57 -0500 |
commit | a79e53d85683c6dd9f99c90511028adc2043031f (patch) | |
tree | 191f423331b608a5f7f134f484ce62a9ee7fb4a6 /arch/x86/mm | |
parent | f86268549f424f83b9eb0963989270e14fbfc3de (diff) |
x86/mm: Fix pgd_lock deadlock
It's forbidden to take the page_table_lock with the irq disabled
or if there's contention the IPIs (for tlb flushes) sent with
the page_table_lock held will never run leading to a deadlock.
Nobody takes the pgd_lock from irq context so the _irqsave can be
removed.
Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Acked-by: Rik van Riel <riel@redhat.com>
Tested-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: <stable@kernel.org>
LKML-Reference: <201102162345.p1GNjMjm021738@imap1.linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86/mm')
-rw-r--r-- | arch/x86/mm/fault.c | 7 | ||||
-rw-r--r-- | arch/x86/mm/init_64.c | 6 | ||||
-rw-r--r-- | arch/x86/mm/pageattr.c | 18 | ||||
-rw-r--r-- | arch/x86/mm/pgtable.c | 11 |
4 files changed, 18 insertions, 24 deletions
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index ffc7be104fc2..20e3f8702d1e 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c | |||
@@ -229,15 +229,14 @@ void vmalloc_sync_all(void) | |||
229 | for (address = VMALLOC_START & PMD_MASK; | 229 | for (address = VMALLOC_START & PMD_MASK; |
230 | address >= TASK_SIZE && address < FIXADDR_TOP; | 230 | address >= TASK_SIZE && address < FIXADDR_TOP; |
231 | address += PMD_SIZE) { | 231 | address += PMD_SIZE) { |
232 | |||
233 | unsigned long flags; | ||
234 | struct page *page; | 232 | struct page *page; |
235 | 233 | ||
236 | spin_lock_irqsave(&pgd_lock, flags); | 234 | spin_lock(&pgd_lock); |
237 | list_for_each_entry(page, &pgd_list, lru) { | 235 | list_for_each_entry(page, &pgd_list, lru) { |
238 | spinlock_t *pgt_lock; | 236 | spinlock_t *pgt_lock; |
239 | pmd_t *ret; | 237 | pmd_t *ret; |
240 | 238 | ||
239 | /* the pgt_lock only for Xen */ | ||
241 | pgt_lock = &pgd_page_get_mm(page)->page_table_lock; | 240 | pgt_lock = &pgd_page_get_mm(page)->page_table_lock; |
242 | 241 | ||
243 | spin_lock(pgt_lock); | 242 | spin_lock(pgt_lock); |
@@ -247,7 +246,7 @@ void vmalloc_sync_all(void) | |||
247 | if (!ret) | 246 | if (!ret) |
248 | break; | 247 | break; |
249 | } | 248 | } |
250 | spin_unlock_irqrestore(&pgd_lock, flags); | 249 | spin_unlock(&pgd_lock); |
251 | } | 250 | } |
252 | } | 251 | } |
253 | 252 | ||
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 71a59296af80..c14a5422e152 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c | |||
@@ -105,18 +105,18 @@ void sync_global_pgds(unsigned long start, unsigned long end) | |||
105 | 105 | ||
106 | for (address = start; address <= end; address += PGDIR_SIZE) { | 106 | for (address = start; address <= end; address += PGDIR_SIZE) { |
107 | const pgd_t *pgd_ref = pgd_offset_k(address); | 107 | const pgd_t *pgd_ref = pgd_offset_k(address); |
108 | unsigned long flags; | ||
109 | struct page *page; | 108 | struct page *page; |
110 | 109 | ||
111 | if (pgd_none(*pgd_ref)) | 110 | if (pgd_none(*pgd_ref)) |
112 | continue; | 111 | continue; |
113 | 112 | ||
114 | spin_lock_irqsave(&pgd_lock, flags); | 113 | spin_lock(&pgd_lock); |
115 | list_for_each_entry(page, &pgd_list, lru) { | 114 | list_for_each_entry(page, &pgd_list, lru) { |
116 | pgd_t *pgd; | 115 | pgd_t *pgd; |
117 | spinlock_t *pgt_lock; | 116 | spinlock_t *pgt_lock; |
118 | 117 | ||
119 | pgd = (pgd_t *)page_address(page) + pgd_index(address); | 118 | pgd = (pgd_t *)page_address(page) + pgd_index(address); |
119 | /* the pgt_lock only for Xen */ | ||
120 | pgt_lock = &pgd_page_get_mm(page)->page_table_lock; | 120 | pgt_lock = &pgd_page_get_mm(page)->page_table_lock; |
121 | spin_lock(pgt_lock); | 121 | spin_lock(pgt_lock); |
122 | 122 | ||
@@ -128,7 +128,7 @@ void sync_global_pgds(unsigned long start, unsigned long end) | |||
128 | 128 | ||
129 | spin_unlock(pgt_lock); | 129 | spin_unlock(pgt_lock); |
130 | } | 130 | } |
131 | spin_unlock_irqrestore(&pgd_lock, flags); | 131 | spin_unlock(&pgd_lock); |
132 | } | 132 | } |
133 | } | 133 | } |
134 | 134 | ||
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index d343b3c81f3c..90825f2eb0f4 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c | |||
@@ -57,12 +57,10 @@ static unsigned long direct_pages_count[PG_LEVEL_NUM]; | |||
57 | 57 | ||
58 | void update_page_count(int level, unsigned long pages) | 58 | void update_page_count(int level, unsigned long pages) |
59 | { | 59 | { |
60 | unsigned long flags; | ||
61 | |||
62 | /* Protect against CPA */ | 60 | /* Protect against CPA */ |
63 | spin_lock_irqsave(&pgd_lock, flags); | 61 | spin_lock(&pgd_lock); |
64 | direct_pages_count[level] += pages; | 62 | direct_pages_count[level] += pages; |
65 | spin_unlock_irqrestore(&pgd_lock, flags); | 63 | spin_unlock(&pgd_lock); |
66 | } | 64 | } |
67 | 65 | ||
68 | static void split_page_count(int level) | 66 | static void split_page_count(int level) |
@@ -394,7 +392,7 @@ static int | |||
394 | try_preserve_large_page(pte_t *kpte, unsigned long address, | 392 | try_preserve_large_page(pte_t *kpte, unsigned long address, |
395 | struct cpa_data *cpa) | 393 | struct cpa_data *cpa) |
396 | { | 394 | { |
397 | unsigned long nextpage_addr, numpages, pmask, psize, flags, addr, pfn; | 395 | unsigned long nextpage_addr, numpages, pmask, psize, addr, pfn; |
398 | pte_t new_pte, old_pte, *tmp; | 396 | pte_t new_pte, old_pte, *tmp; |
399 | pgprot_t old_prot, new_prot, req_prot; | 397 | pgprot_t old_prot, new_prot, req_prot; |
400 | int i, do_split = 1; | 398 | int i, do_split = 1; |
@@ -403,7 +401,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address, | |||
403 | if (cpa->force_split) | 401 | if (cpa->force_split) |
404 | return 1; | 402 | return 1; |
405 | 403 | ||
406 | spin_lock_irqsave(&pgd_lock, flags); | 404 | spin_lock(&pgd_lock); |
407 | /* | 405 | /* |
408 | * Check for races, another CPU might have split this page | 406 | * Check for races, another CPU might have split this page |
409 | * up already: | 407 | * up already: |
@@ -498,14 +496,14 @@ try_preserve_large_page(pte_t *kpte, unsigned long address, | |||
498 | } | 496 | } |
499 | 497 | ||
500 | out_unlock: | 498 | out_unlock: |
501 | spin_unlock_irqrestore(&pgd_lock, flags); | 499 | spin_unlock(&pgd_lock); |
502 | 500 | ||
503 | return do_split; | 501 | return do_split; |
504 | } | 502 | } |
505 | 503 | ||
506 | static int split_large_page(pte_t *kpte, unsigned long address) | 504 | static int split_large_page(pte_t *kpte, unsigned long address) |
507 | { | 505 | { |
508 | unsigned long flags, pfn, pfninc = 1; | 506 | unsigned long pfn, pfninc = 1; |
509 | unsigned int i, level; | 507 | unsigned int i, level; |
510 | pte_t *pbase, *tmp; | 508 | pte_t *pbase, *tmp; |
511 | pgprot_t ref_prot; | 509 | pgprot_t ref_prot; |
@@ -519,7 +517,7 @@ static int split_large_page(pte_t *kpte, unsigned long address) | |||
519 | if (!base) | 517 | if (!base) |
520 | return -ENOMEM; | 518 | return -ENOMEM; |
521 | 519 | ||
522 | spin_lock_irqsave(&pgd_lock, flags); | 520 | spin_lock(&pgd_lock); |
523 | /* | 521 | /* |
524 | * Check for races, another CPU might have split this page | 522 | * Check for races, another CPU might have split this page |
525 | * up for us already: | 523 | * up for us already: |
@@ -591,7 +589,7 @@ out_unlock: | |||
591 | */ | 589 | */ |
592 | if (base) | 590 | if (base) |
593 | __free_page(base); | 591 | __free_page(base); |
594 | spin_unlock_irqrestore(&pgd_lock, flags); | 592 | spin_unlock(&pgd_lock); |
595 | 593 | ||
596 | return 0; | 594 | return 0; |
597 | } | 595 | } |
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 500242d3c96d..0113d19c8aa6 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c | |||
@@ -121,14 +121,12 @@ static void pgd_ctor(struct mm_struct *mm, pgd_t *pgd) | |||
121 | 121 | ||
122 | static void pgd_dtor(pgd_t *pgd) | 122 | static void pgd_dtor(pgd_t *pgd) |
123 | { | 123 | { |
124 | unsigned long flags; /* can be called from interrupt context */ | ||
125 | |||
126 | if (SHARED_KERNEL_PMD) | 124 | if (SHARED_KERNEL_PMD) |
127 | return; | 125 | return; |
128 | 126 | ||
129 | spin_lock_irqsave(&pgd_lock, flags); | 127 | spin_lock(&pgd_lock); |
130 | pgd_list_del(pgd); | 128 | pgd_list_del(pgd); |
131 | spin_unlock_irqrestore(&pgd_lock, flags); | 129 | spin_unlock(&pgd_lock); |
132 | } | 130 | } |
133 | 131 | ||
134 | /* | 132 | /* |
@@ -260,7 +258,6 @@ pgd_t *pgd_alloc(struct mm_struct *mm) | |||
260 | { | 258 | { |
261 | pgd_t *pgd; | 259 | pgd_t *pgd; |
262 | pmd_t *pmds[PREALLOCATED_PMDS]; | 260 | pmd_t *pmds[PREALLOCATED_PMDS]; |
263 | unsigned long flags; | ||
264 | 261 | ||
265 | pgd = (pgd_t *)__get_free_page(PGALLOC_GFP); | 262 | pgd = (pgd_t *)__get_free_page(PGALLOC_GFP); |
266 | 263 | ||
@@ -280,12 +277,12 @@ pgd_t *pgd_alloc(struct mm_struct *mm) | |||
280 | * respect to anything walking the pgd_list, so that they | 277 | * respect to anything walking the pgd_list, so that they |
281 | * never see a partially populated pgd. | 278 | * never see a partially populated pgd. |
282 | */ | 279 | */ |
283 | spin_lock_irqsave(&pgd_lock, flags); | 280 | spin_lock(&pgd_lock); |
284 | 281 | ||
285 | pgd_ctor(mm, pgd); | 282 | pgd_ctor(mm, pgd); |
286 | pgd_prepopulate_pmd(mm, pgd, pmds); | 283 | pgd_prepopulate_pmd(mm, pgd, pmds); |
287 | 284 | ||
288 | spin_unlock_irqrestore(&pgd_lock, flags); | 285 | spin_unlock(&pgd_lock); |
289 | 286 | ||
290 | return pgd; | 287 | return pgd; |
291 | 288 | ||