diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2011-05-29 14:32:28 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2011-05-29 14:32:28 -0400 |
commit | 6345d24daf0c1fffe6642081d783cdf653ebaa5c (patch) | |
tree | 415a253621279111bd481d48cbb86174c70b952a | |
parent | cab0d85c8dfcad4d799f9c294571440c6f1db091 (diff) |
mm: Fix boot crash in mm_alloc()
Thomas Gleixner reports that we now have a boot crash triggered by
CONFIG_CPUMASK_OFFSTACK=y:
BUG: unable to handle kernel NULL pointer dereference at (null)
IP: [<c11ae035>] find_next_bit+0x55/0xb0
Call Trace:
[<c11addda>] cpumask_any_but+0x2a/0x70
[<c102396b>] flush_tlb_mm+0x2b/0x80
[<c1022705>] pud_populate+0x35/0x50
[<c10227ba>] pgd_alloc+0x9a/0xf0
[<c103a3fc>] mm_init+0xec/0x120
[<c103a7a3>] mm_alloc+0x53/0xd0
which was introduced by commit de03c72cfce5 ("mm: convert
mm->cpu_vm_cpumask into cpumask_var_t"), and is due to wrong ordering of
mm_init() vs mm_init_cpumask
Thomas wrote a patch to just fix the ordering of initialization, but I
hate the new double allocation in the fork path, so I ended up instead
doing some more radical surgery to clean it all up.
Reported-by: Thomas Gleixner <tglx@linutronix.de>
Reported-by: Ingo Molnar <mingo@elte.hu>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | include/linux/mm_types.h | 14 | ||||
-rw-r--r-- | include/linux/sched.h | 1 | ||||
-rw-r--r-- | init/main.c | 2 | ||||
-rw-r--r-- | kernel/fork.c | 42 |
4 files changed, 23 insertions, 36 deletions
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 2a78aae78c69..027935c86c68 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h | |||
@@ -264,6 +264,8 @@ struct mm_struct { | |||
264 | 264 | ||
265 | struct linux_binfmt *binfmt; | 265 | struct linux_binfmt *binfmt; |
266 | 266 | ||
267 | cpumask_var_t cpu_vm_mask_var; | ||
268 | |||
267 | /* Architecture-specific MM context */ | 269 | /* Architecture-specific MM context */ |
268 | mm_context_t context; | 270 | mm_context_t context; |
269 | 271 | ||
@@ -311,10 +313,18 @@ struct mm_struct { | |||
311 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | 313 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE |
312 | pgtable_t pmd_huge_pte; /* protected by page_table_lock */ | 314 | pgtable_t pmd_huge_pte; /* protected by page_table_lock */ |
313 | #endif | 315 | #endif |
314 | 316 | #ifdef CONFIG_CPUMASK_OFFSTACK | |
315 | cpumask_var_t cpu_vm_mask_var; | 317 | struct cpumask cpumask_allocation; |
318 | #endif | ||
316 | }; | 319 | }; |
317 | 320 | ||
321 | static inline void mm_init_cpumask(struct mm_struct *mm) | ||
322 | { | ||
323 | #ifdef CONFIG_CPUMASK_OFFSTACK | ||
324 | mm->cpu_vm_mask_var = &mm->cpumask_allocation; | ||
325 | #endif | ||
326 | } | ||
327 | |||
318 | /* Future-safe accessor for struct mm_struct's cpu_vm_mask. */ | 328 | /* Future-safe accessor for struct mm_struct's cpu_vm_mask. */ |
319 | static inline cpumask_t *mm_cpumask(struct mm_struct *mm) | 329 | static inline cpumask_t *mm_cpumask(struct mm_struct *mm) |
320 | { | 330 | { |
diff --git a/include/linux/sched.h b/include/linux/sched.h index bcddd0138105..2a8621c4be1e 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
@@ -2194,7 +2194,6 @@ static inline void mmdrop(struct mm_struct * mm) | |||
2194 | if (unlikely(atomic_dec_and_test(&mm->mm_count))) | 2194 | if (unlikely(atomic_dec_and_test(&mm->mm_count))) |
2195 | __mmdrop(mm); | 2195 | __mmdrop(mm); |
2196 | } | 2196 | } |
2197 | extern int mm_init_cpumask(struct mm_struct *mm, struct mm_struct *oldmm); | ||
2198 | 2197 | ||
2199 | /* mmput gets rid of the mappings and all user-space */ | 2198 | /* mmput gets rid of the mappings and all user-space */ |
2200 | extern void mmput(struct mm_struct *); | 2199 | extern void mmput(struct mm_struct *); |
diff --git a/init/main.c b/init/main.c index d2f1e086bf33..cafba67c13bf 100644 --- a/init/main.c +++ b/init/main.c | |||
@@ -487,6 +487,7 @@ asmlinkage void __init start_kernel(void) | |||
487 | printk(KERN_NOTICE "%s", linux_banner); | 487 | printk(KERN_NOTICE "%s", linux_banner); |
488 | setup_arch(&command_line); | 488 | setup_arch(&command_line); |
489 | mm_init_owner(&init_mm, &init_task); | 489 | mm_init_owner(&init_mm, &init_task); |
490 | mm_init_cpumask(&init_mm); | ||
490 | setup_command_line(command_line); | 491 | setup_command_line(command_line); |
491 | setup_nr_cpu_ids(); | 492 | setup_nr_cpu_ids(); |
492 | setup_per_cpu_areas(); | 493 | setup_per_cpu_areas(); |
@@ -510,7 +511,6 @@ asmlinkage void __init start_kernel(void) | |||
510 | sort_main_extable(); | 511 | sort_main_extable(); |
511 | trap_init(); | 512 | trap_init(); |
512 | mm_init(); | 513 | mm_init(); |
513 | BUG_ON(mm_init_cpumask(&init_mm, 0)); | ||
514 | 514 | ||
515 | /* | 515 | /* |
516 | * Set up the scheduler prior starting any interrupts (such as the | 516 | * Set up the scheduler prior starting any interrupts (such as the |
diff --git a/kernel/fork.c b/kernel/fork.c index ca406d916713..0276c30401a0 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -484,20 +484,6 @@ static void mm_init_aio(struct mm_struct *mm) | |||
484 | #endif | 484 | #endif |
485 | } | 485 | } |
486 | 486 | ||
487 | int mm_init_cpumask(struct mm_struct *mm, struct mm_struct *oldmm) | ||
488 | { | ||
489 | #ifdef CONFIG_CPUMASK_OFFSTACK | ||
490 | if (!alloc_cpumask_var(&mm->cpu_vm_mask_var, GFP_KERNEL)) | ||
491 | return -ENOMEM; | ||
492 | |||
493 | if (oldmm) | ||
494 | cpumask_copy(mm_cpumask(mm), mm_cpumask(oldmm)); | ||
495 | else | ||
496 | memset(mm_cpumask(mm), 0, cpumask_size()); | ||
497 | #endif | ||
498 | return 0; | ||
499 | } | ||
500 | |||
501 | static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p) | 487 | static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p) |
502 | { | 488 | { |
503 | atomic_set(&mm->mm_users, 1); | 489 | atomic_set(&mm->mm_users, 1); |
@@ -538,17 +524,8 @@ struct mm_struct * mm_alloc(void) | |||
538 | return NULL; | 524 | return NULL; |
539 | 525 | ||
540 | memset(mm, 0, sizeof(*mm)); | 526 | memset(mm, 0, sizeof(*mm)); |
541 | mm = mm_init(mm, current); | 527 | mm_init_cpumask(mm); |
542 | if (!mm) | 528 | return mm_init(mm, current); |
543 | return NULL; | ||
544 | |||
545 | if (mm_init_cpumask(mm, NULL)) { | ||
546 | mm_free_pgd(mm); | ||
547 | free_mm(mm); | ||
548 | return NULL; | ||
549 | } | ||
550 | |||
551 | return mm; | ||
552 | } | 529 | } |
553 | 530 | ||
554 | /* | 531 | /* |
@@ -559,7 +536,6 @@ struct mm_struct * mm_alloc(void) | |||
559 | void __mmdrop(struct mm_struct *mm) | 536 | void __mmdrop(struct mm_struct *mm) |
560 | { | 537 | { |
561 | BUG_ON(mm == &init_mm); | 538 | BUG_ON(mm == &init_mm); |
562 | free_cpumask_var(mm->cpu_vm_mask_var); | ||
563 | mm_free_pgd(mm); | 539 | mm_free_pgd(mm); |
564 | destroy_context(mm); | 540 | destroy_context(mm); |
565 | mmu_notifier_mm_destroy(mm); | 541 | mmu_notifier_mm_destroy(mm); |
@@ -753,6 +729,7 @@ struct mm_struct *dup_mm(struct task_struct *tsk) | |||
753 | goto fail_nomem; | 729 | goto fail_nomem; |
754 | 730 | ||
755 | memcpy(mm, oldmm, sizeof(*mm)); | 731 | memcpy(mm, oldmm, sizeof(*mm)); |
732 | mm_init_cpumask(mm); | ||
756 | 733 | ||
757 | /* Initializing for Swap token stuff */ | 734 | /* Initializing for Swap token stuff */ |
758 | mm->token_priority = 0; | 735 | mm->token_priority = 0; |
@@ -765,9 +742,6 @@ struct mm_struct *dup_mm(struct task_struct *tsk) | |||
765 | if (!mm_init(mm, tsk)) | 742 | if (!mm_init(mm, tsk)) |
766 | goto fail_nomem; | 743 | goto fail_nomem; |
767 | 744 | ||
768 | if (mm_init_cpumask(mm, oldmm)) | ||
769 | goto fail_nocpumask; | ||
770 | |||
771 | if (init_new_context(tsk, mm)) | 745 | if (init_new_context(tsk, mm)) |
772 | goto fail_nocontext; | 746 | goto fail_nocontext; |
773 | 747 | ||
@@ -794,9 +768,6 @@ fail_nomem: | |||
794 | return NULL; | 768 | return NULL; |
795 | 769 | ||
796 | fail_nocontext: | 770 | fail_nocontext: |
797 | free_cpumask_var(mm->cpu_vm_mask_var); | ||
798 | |||
799 | fail_nocpumask: | ||
800 | /* | 771 | /* |
801 | * If init_new_context() failed, we cannot use mmput() to free the mm | 772 | * If init_new_context() failed, we cannot use mmput() to free the mm |
802 | * because it calls destroy_context() | 773 | * because it calls destroy_context() |
@@ -1591,6 +1562,13 @@ void __init proc_caches_init(void) | |||
1591 | fs_cachep = kmem_cache_create("fs_cache", | 1562 | fs_cachep = kmem_cache_create("fs_cache", |
1592 | sizeof(struct fs_struct), 0, | 1563 | sizeof(struct fs_struct), 0, |
1593 | SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL); | 1564 | SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL); |
1565 | /* | ||
1566 | * FIXME! The "sizeof(struct mm_struct)" currently includes the | ||
1567 | * whole struct cpumask for the OFFSTACK case. We could change | ||
1568 | * this to *only* allocate as much of it as required by the | ||
1569 | * maximum number of CPU's we can ever have. The cpumask_allocation | ||
1570 | * is at the end of the structure, exactly for that reason. | ||
1571 | */ | ||
1594 | mm_cachep = kmem_cache_create("mm_struct", | 1572 | mm_cachep = kmem_cache_create("mm_struct", |
1595 | sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN, | 1573 | sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN, |
1596 | SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL); | 1574 | SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL); |