diff options
author | Andrea Arcangeli <aarcange@redhat.com> | 2011-02-16 18:45:22 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2011-03-10 03:41:57 -0500 |
commit | a79e53d85683c6dd9f99c90511028adc2043031f (patch) | |
tree | 191f423331b608a5f7f134f484ce62a9ee7fb4a6 | |
parent | f86268549f424f83b9eb0963989270e14fbfc3de (diff) |
x86/mm: Fix pgd_lock deadlock
It's forbidden to take the page_table_lock with the irq disabled
or if there's contention the IPIs (for tlb flushes) sent with
the page_table_lock held will never run leading to a deadlock.
Nobody takes the pgd_lock from irq context so the _irqsave can be
removed.
Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Acked-by: Rik van Riel <riel@redhat.com>
Tested-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: <stable@kernel.org>
LKML-Reference: <201102162345.p1GNjMjm021738@imap1.linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r-- | arch/x86/mm/fault.c | 7 | ||||
-rw-r--r-- | arch/x86/mm/init_64.c | 6 | ||||
-rw-r--r-- | arch/x86/mm/pageattr.c | 18 | ||||
-rw-r--r-- | arch/x86/mm/pgtable.c | 11 | ||||
-rw-r--r-- | arch/x86/xen/mmu.c | 10 |
5 files changed, 22 insertions, 30 deletions
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index ffc7be104fc2..20e3f8702d1e 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c | |||
@@ -229,15 +229,14 @@ void vmalloc_sync_all(void) | |||
229 | for (address = VMALLOC_START & PMD_MASK; | 229 | for (address = VMALLOC_START & PMD_MASK; |
230 | address >= TASK_SIZE && address < FIXADDR_TOP; | 230 | address >= TASK_SIZE && address < FIXADDR_TOP; |
231 | address += PMD_SIZE) { | 231 | address += PMD_SIZE) { |
232 | |||
233 | unsigned long flags; | ||
234 | struct page *page; | 232 | struct page *page; |
235 | 233 | ||
236 | spin_lock_irqsave(&pgd_lock, flags); | 234 | spin_lock(&pgd_lock); |
237 | list_for_each_entry(page, &pgd_list, lru) { | 235 | list_for_each_entry(page, &pgd_list, lru) { |
238 | spinlock_t *pgt_lock; | 236 | spinlock_t *pgt_lock; |
239 | pmd_t *ret; | 237 | pmd_t *ret; |
240 | 238 | ||
239 | /* the pgt_lock only for Xen */ | ||
241 | pgt_lock = &pgd_page_get_mm(page)->page_table_lock; | 240 | pgt_lock = &pgd_page_get_mm(page)->page_table_lock; |
242 | 241 | ||
243 | spin_lock(pgt_lock); | 242 | spin_lock(pgt_lock); |
@@ -247,7 +246,7 @@ void vmalloc_sync_all(void) | |||
247 | if (!ret) | 246 | if (!ret) |
248 | break; | 247 | break; |
249 | } | 248 | } |
250 | spin_unlock_irqrestore(&pgd_lock, flags); | 249 | spin_unlock(&pgd_lock); |
251 | } | 250 | } |
252 | } | 251 | } |
253 | 252 | ||
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 71a59296af80..c14a5422e152 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c | |||
@@ -105,18 +105,18 @@ void sync_global_pgds(unsigned long start, unsigned long end) | |||
105 | 105 | ||
106 | for (address = start; address <= end; address += PGDIR_SIZE) { | 106 | for (address = start; address <= end; address += PGDIR_SIZE) { |
107 | const pgd_t *pgd_ref = pgd_offset_k(address); | 107 | const pgd_t *pgd_ref = pgd_offset_k(address); |
108 | unsigned long flags; | ||
109 | struct page *page; | 108 | struct page *page; |
110 | 109 | ||
111 | if (pgd_none(*pgd_ref)) | 110 | if (pgd_none(*pgd_ref)) |
112 | continue; | 111 | continue; |
113 | 112 | ||
114 | spin_lock_irqsave(&pgd_lock, flags); | 113 | spin_lock(&pgd_lock); |
115 | list_for_each_entry(page, &pgd_list, lru) { | 114 | list_for_each_entry(page, &pgd_list, lru) { |
116 | pgd_t *pgd; | 115 | pgd_t *pgd; |
117 | spinlock_t *pgt_lock; | 116 | spinlock_t *pgt_lock; |
118 | 117 | ||
119 | pgd = (pgd_t *)page_address(page) + pgd_index(address); | 118 | pgd = (pgd_t *)page_address(page) + pgd_index(address); |
119 | /* the pgt_lock only for Xen */ | ||
120 | pgt_lock = &pgd_page_get_mm(page)->page_table_lock; | 120 | pgt_lock = &pgd_page_get_mm(page)->page_table_lock; |
121 | spin_lock(pgt_lock); | 121 | spin_lock(pgt_lock); |
122 | 122 | ||
@@ -128,7 +128,7 @@ void sync_global_pgds(unsigned long start, unsigned long end) | |||
128 | 128 | ||
129 | spin_unlock(pgt_lock); | 129 | spin_unlock(pgt_lock); |
130 | } | 130 | } |
131 | spin_unlock_irqrestore(&pgd_lock, flags); | 131 | spin_unlock(&pgd_lock); |
132 | } | 132 | } |
133 | } | 133 | } |
134 | 134 | ||
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index d343b3c81f3c..90825f2eb0f4 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c | |||
@@ -57,12 +57,10 @@ static unsigned long direct_pages_count[PG_LEVEL_NUM]; | |||
57 | 57 | ||
58 | void update_page_count(int level, unsigned long pages) | 58 | void update_page_count(int level, unsigned long pages) |
59 | { | 59 | { |
60 | unsigned long flags; | ||
61 | |||
62 | /* Protect against CPA */ | 60 | /* Protect against CPA */ |
63 | spin_lock_irqsave(&pgd_lock, flags); | 61 | spin_lock(&pgd_lock); |
64 | direct_pages_count[level] += pages; | 62 | direct_pages_count[level] += pages; |
65 | spin_unlock_irqrestore(&pgd_lock, flags); | 63 | spin_unlock(&pgd_lock); |
66 | } | 64 | } |
67 | 65 | ||
68 | static void split_page_count(int level) | 66 | static void split_page_count(int level) |
@@ -394,7 +392,7 @@ static int | |||
394 | try_preserve_large_page(pte_t *kpte, unsigned long address, | 392 | try_preserve_large_page(pte_t *kpte, unsigned long address, |
395 | struct cpa_data *cpa) | 393 | struct cpa_data *cpa) |
396 | { | 394 | { |
397 | unsigned long nextpage_addr, numpages, pmask, psize, flags, addr, pfn; | 395 | unsigned long nextpage_addr, numpages, pmask, psize, addr, pfn; |
398 | pte_t new_pte, old_pte, *tmp; | 396 | pte_t new_pte, old_pte, *tmp; |
399 | pgprot_t old_prot, new_prot, req_prot; | 397 | pgprot_t old_prot, new_prot, req_prot; |
400 | int i, do_split = 1; | 398 | int i, do_split = 1; |
@@ -403,7 +401,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address, | |||
403 | if (cpa->force_split) | 401 | if (cpa->force_split) |
404 | return 1; | 402 | return 1; |
405 | 403 | ||
406 | spin_lock_irqsave(&pgd_lock, flags); | 404 | spin_lock(&pgd_lock); |
407 | /* | 405 | /* |
408 | * Check for races, another CPU might have split this page | 406 | * Check for races, another CPU might have split this page |
409 | * up already: | 407 | * up already: |
@@ -498,14 +496,14 @@ try_preserve_large_page(pte_t *kpte, unsigned long address, | |||
498 | } | 496 | } |
499 | 497 | ||
500 | out_unlock: | 498 | out_unlock: |
501 | spin_unlock_irqrestore(&pgd_lock, flags); | 499 | spin_unlock(&pgd_lock); |
502 | 500 | ||
503 | return do_split; | 501 | return do_split; |
504 | } | 502 | } |
505 | 503 | ||
506 | static int split_large_page(pte_t *kpte, unsigned long address) | 504 | static int split_large_page(pte_t *kpte, unsigned long address) |
507 | { | 505 | { |
508 | unsigned long flags, pfn, pfninc = 1; | 506 | unsigned long pfn, pfninc = 1; |
509 | unsigned int i, level; | 507 | unsigned int i, level; |
510 | pte_t *pbase, *tmp; | 508 | pte_t *pbase, *tmp; |
511 | pgprot_t ref_prot; | 509 | pgprot_t ref_prot; |
@@ -519,7 +517,7 @@ static int split_large_page(pte_t *kpte, unsigned long address) | |||
519 | if (!base) | 517 | if (!base) |
520 | return -ENOMEM; | 518 | return -ENOMEM; |
521 | 519 | ||
522 | spin_lock_irqsave(&pgd_lock, flags); | 520 | spin_lock(&pgd_lock); |
523 | /* | 521 | /* |
524 | * Check for races, another CPU might have split this page | 522 | * Check for races, another CPU might have split this page |
525 | * up for us already: | 523 | * up for us already: |
@@ -591,7 +589,7 @@ out_unlock: | |||
591 | */ | 589 | */ |
592 | if (base) | 590 | if (base) |
593 | __free_page(base); | 591 | __free_page(base); |
594 | spin_unlock_irqrestore(&pgd_lock, flags); | 592 | spin_unlock(&pgd_lock); |
595 | 593 | ||
596 | return 0; | 594 | return 0; |
597 | } | 595 | } |
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 500242d3c96d..0113d19c8aa6 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c | |||
@@ -121,14 +121,12 @@ static void pgd_ctor(struct mm_struct *mm, pgd_t *pgd) | |||
121 | 121 | ||
122 | static void pgd_dtor(pgd_t *pgd) | 122 | static void pgd_dtor(pgd_t *pgd) |
123 | { | 123 | { |
124 | unsigned long flags; /* can be called from interrupt context */ | ||
125 | |||
126 | if (SHARED_KERNEL_PMD) | 124 | if (SHARED_KERNEL_PMD) |
127 | return; | 125 | return; |
128 | 126 | ||
129 | spin_lock_irqsave(&pgd_lock, flags); | 127 | spin_lock(&pgd_lock); |
130 | pgd_list_del(pgd); | 128 | pgd_list_del(pgd); |
131 | spin_unlock_irqrestore(&pgd_lock, flags); | 129 | spin_unlock(&pgd_lock); |
132 | } | 130 | } |
133 | 131 | ||
134 | /* | 132 | /* |
@@ -260,7 +258,6 @@ pgd_t *pgd_alloc(struct mm_struct *mm) | |||
260 | { | 258 | { |
261 | pgd_t *pgd; | 259 | pgd_t *pgd; |
262 | pmd_t *pmds[PREALLOCATED_PMDS]; | 260 | pmd_t *pmds[PREALLOCATED_PMDS]; |
263 | unsigned long flags; | ||
264 | 261 | ||
265 | pgd = (pgd_t *)__get_free_page(PGALLOC_GFP); | 262 | pgd = (pgd_t *)__get_free_page(PGALLOC_GFP); |
266 | 263 | ||
@@ -280,12 +277,12 @@ pgd_t *pgd_alloc(struct mm_struct *mm) | |||
280 | * respect to anything walking the pgd_list, so that they | 277 | * respect to anything walking the pgd_list, so that they |
281 | * never see a partially populated pgd. | 278 | * never see a partially populated pgd. |
282 | */ | 279 | */ |
283 | spin_lock_irqsave(&pgd_lock, flags); | 280 | spin_lock(&pgd_lock); |
284 | 281 | ||
285 | pgd_ctor(mm, pgd); | 282 | pgd_ctor(mm, pgd); |
286 | pgd_prepopulate_pmd(mm, pgd, pmds); | 283 | pgd_prepopulate_pmd(mm, pgd, pmds); |
287 | 284 | ||
288 | spin_unlock_irqrestore(&pgd_lock, flags); | 285 | spin_unlock(&pgd_lock); |
289 | 286 | ||
290 | return pgd; | 287 | return pgd; |
291 | 288 | ||
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 5e92b61ad574..f6089421147a 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c | |||
@@ -986,10 +986,9 @@ static void xen_pgd_pin(struct mm_struct *mm) | |||
986 | */ | 986 | */ |
987 | void xen_mm_pin_all(void) | 987 | void xen_mm_pin_all(void) |
988 | { | 988 | { |
989 | unsigned long flags; | ||
990 | struct page *page; | 989 | struct page *page; |
991 | 990 | ||
992 | spin_lock_irqsave(&pgd_lock, flags); | 991 | spin_lock(&pgd_lock); |
993 | 992 | ||
994 | list_for_each_entry(page, &pgd_list, lru) { | 993 | list_for_each_entry(page, &pgd_list, lru) { |
995 | if (!PagePinned(page)) { | 994 | if (!PagePinned(page)) { |
@@ -998,7 +997,7 @@ void xen_mm_pin_all(void) | |||
998 | } | 997 | } |
999 | } | 998 | } |
1000 | 999 | ||
1001 | spin_unlock_irqrestore(&pgd_lock, flags); | 1000 | spin_unlock(&pgd_lock); |
1002 | } | 1001 | } |
1003 | 1002 | ||
1004 | /* | 1003 | /* |
@@ -1099,10 +1098,9 @@ static void xen_pgd_unpin(struct mm_struct *mm) | |||
1099 | */ | 1098 | */ |
1100 | void xen_mm_unpin_all(void) | 1099 | void xen_mm_unpin_all(void) |
1101 | { | 1100 | { |
1102 | unsigned long flags; | ||
1103 | struct page *page; | 1101 | struct page *page; |
1104 | 1102 | ||
1105 | spin_lock_irqsave(&pgd_lock, flags); | 1103 | spin_lock(&pgd_lock); |
1106 | 1104 | ||
1107 | list_for_each_entry(page, &pgd_list, lru) { | 1105 | list_for_each_entry(page, &pgd_list, lru) { |
1108 | if (PageSavePinned(page)) { | 1106 | if (PageSavePinned(page)) { |
@@ -1112,7 +1110,7 @@ void xen_mm_unpin_all(void) | |||
1112 | } | 1110 | } |
1113 | } | 1111 | } |
1114 | 1112 | ||
1115 | spin_unlock_irqrestore(&pgd_lock, flags); | 1113 | spin_unlock(&pgd_lock); |
1116 | } | 1114 | } |
1117 | 1115 | ||
1118 | void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next) | 1116 | void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next) |