diff options
| author | Andrea Arcangeli <aarcange@redhat.com> | 2011-02-16 18:45:22 -0500 |
|---|---|---|
| committer | Ingo Molnar <mingo@elte.hu> | 2011-03-10 03:41:57 -0500 |
| commit | a79e53d85683c6dd9f99c90511028adc2043031f (patch) | |
| tree | 191f423331b608a5f7f134f484ce62a9ee7fb4a6 | |
| parent | f86268549f424f83b9eb0963989270e14fbfc3de (diff) | |
x86/mm: Fix pgd_lock deadlock
It's forbidden to take the page_table_lock with the irq disabled
or if there's contention the IPIs (for tlb flushes) sent with
the page_table_lock held will never run leading to a deadlock.
Nobody takes the pgd_lock from irq context so the _irqsave can be
removed.
Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Acked-by: Rik van Riel <riel@redhat.com>
Tested-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: <stable@kernel.org>
LKML-Reference: <201102162345.p1GNjMjm021738@imap1.linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
| -rw-r--r-- | arch/x86/mm/fault.c | 7 | ||||
| -rw-r--r-- | arch/x86/mm/init_64.c | 6 | ||||
| -rw-r--r-- | arch/x86/mm/pageattr.c | 18 | ||||
| -rw-r--r-- | arch/x86/mm/pgtable.c | 11 | ||||
| -rw-r--r-- | arch/x86/xen/mmu.c | 10 |
5 files changed, 22 insertions, 30 deletions
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index ffc7be104fc..20e3f8702d1 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c | |||
| @@ -229,15 +229,14 @@ void vmalloc_sync_all(void) | |||
| 229 | for (address = VMALLOC_START & PMD_MASK; | 229 | for (address = VMALLOC_START & PMD_MASK; |
| 230 | address >= TASK_SIZE && address < FIXADDR_TOP; | 230 | address >= TASK_SIZE && address < FIXADDR_TOP; |
| 231 | address += PMD_SIZE) { | 231 | address += PMD_SIZE) { |
| 232 | |||
| 233 | unsigned long flags; | ||
| 234 | struct page *page; | 232 | struct page *page; |
| 235 | 233 | ||
| 236 | spin_lock_irqsave(&pgd_lock, flags); | 234 | spin_lock(&pgd_lock); |
| 237 | list_for_each_entry(page, &pgd_list, lru) { | 235 | list_for_each_entry(page, &pgd_list, lru) { |
| 238 | spinlock_t *pgt_lock; | 236 | spinlock_t *pgt_lock; |
| 239 | pmd_t *ret; | 237 | pmd_t *ret; |
| 240 | 238 | ||
| 239 | /* the pgt_lock only for Xen */ | ||
| 241 | pgt_lock = &pgd_page_get_mm(page)->page_table_lock; | 240 | pgt_lock = &pgd_page_get_mm(page)->page_table_lock; |
| 242 | 241 | ||
| 243 | spin_lock(pgt_lock); | 242 | spin_lock(pgt_lock); |
| @@ -247,7 +246,7 @@ void vmalloc_sync_all(void) | |||
| 247 | if (!ret) | 246 | if (!ret) |
| 248 | break; | 247 | break; |
| 249 | } | 248 | } |
| 250 | spin_unlock_irqrestore(&pgd_lock, flags); | 249 | spin_unlock(&pgd_lock); |
| 251 | } | 250 | } |
| 252 | } | 251 | } |
| 253 | 252 | ||
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 71a59296af8..c14a5422e15 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c | |||
| @@ -105,18 +105,18 @@ void sync_global_pgds(unsigned long start, unsigned long end) | |||
| 105 | 105 | ||
| 106 | for (address = start; address <= end; address += PGDIR_SIZE) { | 106 | for (address = start; address <= end; address += PGDIR_SIZE) { |
| 107 | const pgd_t *pgd_ref = pgd_offset_k(address); | 107 | const pgd_t *pgd_ref = pgd_offset_k(address); |
| 108 | unsigned long flags; | ||
| 109 | struct page *page; | 108 | struct page *page; |
| 110 | 109 | ||
| 111 | if (pgd_none(*pgd_ref)) | 110 | if (pgd_none(*pgd_ref)) |
| 112 | continue; | 111 | continue; |
| 113 | 112 | ||
| 114 | spin_lock_irqsave(&pgd_lock, flags); | 113 | spin_lock(&pgd_lock); |
| 115 | list_for_each_entry(page, &pgd_list, lru) { | 114 | list_for_each_entry(page, &pgd_list, lru) { |
| 116 | pgd_t *pgd; | 115 | pgd_t *pgd; |
| 117 | spinlock_t *pgt_lock; | 116 | spinlock_t *pgt_lock; |
| 118 | 117 | ||
| 119 | pgd = (pgd_t *)page_address(page) + pgd_index(address); | 118 | pgd = (pgd_t *)page_address(page) + pgd_index(address); |
| 119 | /* the pgt_lock only for Xen */ | ||
| 120 | pgt_lock = &pgd_page_get_mm(page)->page_table_lock; | 120 | pgt_lock = &pgd_page_get_mm(page)->page_table_lock; |
| 121 | spin_lock(pgt_lock); | 121 | spin_lock(pgt_lock); |
| 122 | 122 | ||
| @@ -128,7 +128,7 @@ void sync_global_pgds(unsigned long start, unsigned long end) | |||
| 128 | 128 | ||
| 129 | spin_unlock(pgt_lock); | 129 | spin_unlock(pgt_lock); |
| 130 | } | 130 | } |
| 131 | spin_unlock_irqrestore(&pgd_lock, flags); | 131 | spin_unlock(&pgd_lock); |
| 132 | } | 132 | } |
| 133 | } | 133 | } |
| 134 | 134 | ||
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index d343b3c81f3..90825f2eb0f 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c | |||
| @@ -57,12 +57,10 @@ static unsigned long direct_pages_count[PG_LEVEL_NUM]; | |||
| 57 | 57 | ||
| 58 | void update_page_count(int level, unsigned long pages) | 58 | void update_page_count(int level, unsigned long pages) |
| 59 | { | 59 | { |
| 60 | unsigned long flags; | ||
| 61 | |||
| 62 | /* Protect against CPA */ | 60 | /* Protect against CPA */ |
| 63 | spin_lock_irqsave(&pgd_lock, flags); | 61 | spin_lock(&pgd_lock); |
| 64 | direct_pages_count[level] += pages; | 62 | direct_pages_count[level] += pages; |
| 65 | spin_unlock_irqrestore(&pgd_lock, flags); | 63 | spin_unlock(&pgd_lock); |
| 66 | } | 64 | } |
| 67 | 65 | ||
| 68 | static void split_page_count(int level) | 66 | static void split_page_count(int level) |
| @@ -394,7 +392,7 @@ static int | |||
| 394 | try_preserve_large_page(pte_t *kpte, unsigned long address, | 392 | try_preserve_large_page(pte_t *kpte, unsigned long address, |
| 395 | struct cpa_data *cpa) | 393 | struct cpa_data *cpa) |
| 396 | { | 394 | { |
| 397 | unsigned long nextpage_addr, numpages, pmask, psize, flags, addr, pfn; | 395 | unsigned long nextpage_addr, numpages, pmask, psize, addr, pfn; |
| 398 | pte_t new_pte, old_pte, *tmp; | 396 | pte_t new_pte, old_pte, *tmp; |
| 399 | pgprot_t old_prot, new_prot, req_prot; | 397 | pgprot_t old_prot, new_prot, req_prot; |
| 400 | int i, do_split = 1; | 398 | int i, do_split = 1; |
| @@ -403,7 +401,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address, | |||
| 403 | if (cpa->force_split) | 401 | if (cpa->force_split) |
| 404 | return 1; | 402 | return 1; |
| 405 | 403 | ||
| 406 | spin_lock_irqsave(&pgd_lock, flags); | 404 | spin_lock(&pgd_lock); |
| 407 | /* | 405 | /* |
| 408 | * Check for races, another CPU might have split this page | 406 | * Check for races, another CPU might have split this page |
| 409 | * up already: | 407 | * up already: |
| @@ -498,14 +496,14 @@ try_preserve_large_page(pte_t *kpte, unsigned long address, | |||
| 498 | } | 496 | } |
| 499 | 497 | ||
| 500 | out_unlock: | 498 | out_unlock: |
| 501 | spin_unlock_irqrestore(&pgd_lock, flags); | 499 | spin_unlock(&pgd_lock); |
| 502 | 500 | ||
| 503 | return do_split; | 501 | return do_split; |
| 504 | } | 502 | } |
| 505 | 503 | ||
| 506 | static int split_large_page(pte_t *kpte, unsigned long address) | 504 | static int split_large_page(pte_t *kpte, unsigned long address) |
| 507 | { | 505 | { |
| 508 | unsigned long flags, pfn, pfninc = 1; | 506 | unsigned long pfn, pfninc = 1; |
| 509 | unsigned int i, level; | 507 | unsigned int i, level; |
| 510 | pte_t *pbase, *tmp; | 508 | pte_t *pbase, *tmp; |
| 511 | pgprot_t ref_prot; | 509 | pgprot_t ref_prot; |
| @@ -519,7 +517,7 @@ static int split_large_page(pte_t *kpte, unsigned long address) | |||
| 519 | if (!base) | 517 | if (!base) |
| 520 | return -ENOMEM; | 518 | return -ENOMEM; |
| 521 | 519 | ||
| 522 | spin_lock_irqsave(&pgd_lock, flags); | 520 | spin_lock(&pgd_lock); |
| 523 | /* | 521 | /* |
| 524 | * Check for races, another CPU might have split this page | 522 | * Check for races, another CPU might have split this page |
| 525 | * up for us already: | 523 | * up for us already: |
| @@ -591,7 +589,7 @@ out_unlock: | |||
| 591 | */ | 589 | */ |
| 592 | if (base) | 590 | if (base) |
| 593 | __free_page(base); | 591 | __free_page(base); |
| 594 | spin_unlock_irqrestore(&pgd_lock, flags); | 592 | spin_unlock(&pgd_lock); |
| 595 | 593 | ||
| 596 | return 0; | 594 | return 0; |
| 597 | } | 595 | } |
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 500242d3c96..0113d19c8aa 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c | |||
| @@ -121,14 +121,12 @@ static void pgd_ctor(struct mm_struct *mm, pgd_t *pgd) | |||
| 121 | 121 | ||
| 122 | static void pgd_dtor(pgd_t *pgd) | 122 | static void pgd_dtor(pgd_t *pgd) |
| 123 | { | 123 | { |
| 124 | unsigned long flags; /* can be called from interrupt context */ | ||
| 125 | |||
| 126 | if (SHARED_KERNEL_PMD) | 124 | if (SHARED_KERNEL_PMD) |
| 127 | return; | 125 | return; |
| 128 | 126 | ||
| 129 | spin_lock_irqsave(&pgd_lock, flags); | 127 | spin_lock(&pgd_lock); |
| 130 | pgd_list_del(pgd); | 128 | pgd_list_del(pgd); |
| 131 | spin_unlock_irqrestore(&pgd_lock, flags); | 129 | spin_unlock(&pgd_lock); |
| 132 | } | 130 | } |
| 133 | 131 | ||
| 134 | /* | 132 | /* |
| @@ -260,7 +258,6 @@ pgd_t *pgd_alloc(struct mm_struct *mm) | |||
| 260 | { | 258 | { |
| 261 | pgd_t *pgd; | 259 | pgd_t *pgd; |
| 262 | pmd_t *pmds[PREALLOCATED_PMDS]; | 260 | pmd_t *pmds[PREALLOCATED_PMDS]; |
| 263 | unsigned long flags; | ||
| 264 | 261 | ||
| 265 | pgd = (pgd_t *)__get_free_page(PGALLOC_GFP); | 262 | pgd = (pgd_t *)__get_free_page(PGALLOC_GFP); |
| 266 | 263 | ||
| @@ -280,12 +277,12 @@ pgd_t *pgd_alloc(struct mm_struct *mm) | |||
| 280 | * respect to anything walking the pgd_list, so that they | 277 | * respect to anything walking the pgd_list, so that they |
| 281 | * never see a partially populated pgd. | 278 | * never see a partially populated pgd. |
| 282 | */ | 279 | */ |
| 283 | spin_lock_irqsave(&pgd_lock, flags); | 280 | spin_lock(&pgd_lock); |
| 284 | 281 | ||
| 285 | pgd_ctor(mm, pgd); | 282 | pgd_ctor(mm, pgd); |
| 286 | pgd_prepopulate_pmd(mm, pgd, pmds); | 283 | pgd_prepopulate_pmd(mm, pgd, pmds); |
| 287 | 284 | ||
| 288 | spin_unlock_irqrestore(&pgd_lock, flags); | 285 | spin_unlock(&pgd_lock); |
| 289 | 286 | ||
| 290 | return pgd; | 287 | return pgd; |
| 291 | 288 | ||
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 5e92b61ad57..f6089421147 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c | |||
| @@ -986,10 +986,9 @@ static void xen_pgd_pin(struct mm_struct *mm) | |||
| 986 | */ | 986 | */ |
| 987 | void xen_mm_pin_all(void) | 987 | void xen_mm_pin_all(void) |
| 988 | { | 988 | { |
| 989 | unsigned long flags; | ||
| 990 | struct page *page; | 989 | struct page *page; |
| 991 | 990 | ||
| 992 | spin_lock_irqsave(&pgd_lock, flags); | 991 | spin_lock(&pgd_lock); |
| 993 | 992 | ||
| 994 | list_for_each_entry(page, &pgd_list, lru) { | 993 | list_for_each_entry(page, &pgd_list, lru) { |
| 995 | if (!PagePinned(page)) { | 994 | if (!PagePinned(page)) { |
| @@ -998,7 +997,7 @@ void xen_mm_pin_all(void) | |||
| 998 | } | 997 | } |
| 999 | } | 998 | } |
| 1000 | 999 | ||
| 1001 | spin_unlock_irqrestore(&pgd_lock, flags); | 1000 | spin_unlock(&pgd_lock); |
| 1002 | } | 1001 | } |
| 1003 | 1002 | ||
| 1004 | /* | 1003 | /* |
| @@ -1099,10 +1098,9 @@ static void xen_pgd_unpin(struct mm_struct *mm) | |||
| 1099 | */ | 1098 | */ |
| 1100 | void xen_mm_unpin_all(void) | 1099 | void xen_mm_unpin_all(void) |
| 1101 | { | 1100 | { |
| 1102 | unsigned long flags; | ||
| 1103 | struct page *page; | 1101 | struct page *page; |
| 1104 | 1102 | ||
| 1105 | spin_lock_irqsave(&pgd_lock, flags); | 1103 | spin_lock(&pgd_lock); |
| 1106 | 1104 | ||
| 1107 | list_for_each_entry(page, &pgd_list, lru) { | 1105 | list_for_each_entry(page, &pgd_list, lru) { |
| 1108 | if (PageSavePinned(page)) { | 1106 | if (PageSavePinned(page)) { |
| @@ -1112,7 +1110,7 @@ void xen_mm_unpin_all(void) | |||
| 1112 | } | 1110 | } |
| 1113 | } | 1111 | } |
| 1114 | 1112 | ||
| 1115 | spin_unlock_irqrestore(&pgd_lock, flags); | 1113 | spin_unlock(&pgd_lock); |
| 1116 | } | 1114 | } |
| 1117 | 1115 | ||
| 1118 | void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next) | 1116 | void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next) |
