From 6a242909b01120f6f3d571c0b75e20ec61f0d8d3 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 6 Mar 2009 14:33:58 +0900 Subject: percpu: clean up percpu constants Impact: cleaup Make the following cleanups. * There isn't much arch-specific about PERCPU_MODULE_RESERVE. Always define it whether arch overrides PERCPU_ENOUGH_ROOM or not. * blackfin overrides PERCPU_ENOUGH_ROOM to align static area size. Do it by default. * percpu allocation sizes doesn't have much to do with the page size. Don't use PAGE_SHIFT in their definition. Signed-off-by: Tejun Heo Cc: Bryan Wu --- arch/blackfin/include/asm/percpu.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'arch') diff --git a/arch/blackfin/include/asm/percpu.h b/arch/blackfin/include/asm/percpu.h index 797c0c165069..c94c7bc88c71 100644 --- a/arch/blackfin/include/asm/percpu.h +++ b/arch/blackfin/include/asm/percpu.h @@ -3,14 +3,4 @@ #include -#ifdef CONFIG_MODULES -#define PERCPU_MODULE_RESERVE 8192 -#else -#define PERCPU_MODULE_RESERVE 0 -#endif - -#define PERCPU_ENOUGH_ROOM \ - (ALIGN(__per_cpu_end - __per_cpu_start, SMP_CACHE_BYTES) + \ - PERCPU_MODULE_RESERVE) - #endif /* __ARCH_BLACKFIN_PERCPU__ */ -- cgit v1.2.2 From cafe8816b217b98dc3f268d3b77445da498beb4f Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 6 Mar 2009 14:33:59 +0900 Subject: percpu: use negative for auto for pcpu_setup_first_chunk() arguments Impact: argument semantic cleanup In pcpu_setup_first_chunk(), zero @unit_size and @dyn_size meant auto-sizing. It's okay for @unit_size as 0 doesn't make sense but 0 dynamic reserve size is valid. Alos, if arch @dyn_size is calculated from other parameters, it might end up passing in 0 @dyn_size and malfunction when the size is automatically adjusted. This patch makes both @unit_size and @dyn_size ssize_t and use -1 for auto sizing. Signed-off-by: Tejun Heo --- arch/x86/kernel/setup_percpu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index c29f301d3885..ef3a2cd3fe64 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -344,7 +344,7 @@ static ssize_t __init setup_pcpu_4k(size_t static_size) pr_info("PERCPU: Allocated %d 4k pages, static data %zu bytes\n", pcpu4k_nr_static_pages, static_size); - ret = pcpu_setup_first_chunk(pcpu4k_get_page, static_size, 0, 0, NULL, + ret = pcpu_setup_first_chunk(pcpu4k_get_page, static_size, -1, -1, NULL, pcpu4k_populate_pte); goto out_free_ar; -- cgit v1.2.2 From 9a4f8a878b68d5a5d9ee60908a52cf6a55e1b823 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 6 Mar 2009 14:33:59 +0900 Subject: x86: make embedding percpu allocator return excessive free space Impact: reduce unnecessary memory usage on certain configurations Embedding percpu allocator allocates unit_size * smp_num_possible_cpus() bytes consecutively and use it for the first chunk. However, if the static area is small, this can result in excessive prellocated free space in the first chunk due to PCPU_MIN_UNIT_SIZE restriction. This patch makes embedding percpu allocator preallocate only what's necessary as described by PERPCU_DYNAMIC_RESERVE and return the leftover to the bootmem allocator. Signed-off-by: Tejun Heo --- arch/x86/kernel/setup_percpu.c | 44 +++++++++++++++++++++++++++--------------- 1 file changed, 28 insertions(+), 16 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index ef3a2cd3fe64..38e2b2a470a5 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -241,24 +241,31 @@ static ssize_t __init setup_pcpu_remap(size_t static_size) * Embedding allocator * * The first chunk is sized to just contain the static area plus - * PERCPU_DYNAMIC_RESERVE and allocated as a contiguous area using - * bootmem allocator and used as-is without being mapped into vmalloc - * area. This enables the first chunk to piggy back on the linear - * physical PMD mapping and doesn't add any additional pressure to - * TLB. + * module and dynamic reserves, and allocated as a contiguous area + * using bootmem allocator and used as-is without being mapped into + * vmalloc area. This enables the first chunk to piggy back on the + * linear physical PMD mapping and doesn't add any additional pressure + * to TLB. Note that if the needed size is smaller than the minimum + * unit size, the leftover is returned to the bootmem allocator. */ static void *pcpue_ptr __initdata; +static size_t pcpue_size __initdata; static size_t pcpue_unit_size __initdata; static struct page * __init pcpue_get_page(unsigned int cpu, int pageno) { - return virt_to_page(pcpue_ptr + cpu * pcpue_unit_size - + ((size_t)pageno << PAGE_SHIFT)); + size_t off = (size_t)pageno << PAGE_SHIFT; + + if (off >= pcpue_size) + return NULL; + + return virt_to_page(pcpue_ptr + cpu * pcpue_unit_size + off); } static ssize_t __init setup_pcpu_embed(size_t static_size) { unsigned int cpu; + size_t dyn_size; /* * If large page isn't supported, there's no benefit in doing @@ -269,25 +276,30 @@ static ssize_t __init setup_pcpu_embed(size_t static_size) return -EINVAL; /* allocate and copy */ - pcpue_unit_size = PFN_ALIGN(static_size + PERCPU_DYNAMIC_RESERVE); - pcpue_unit_size = max_t(size_t, pcpue_unit_size, PCPU_MIN_UNIT_SIZE); + pcpue_size = PFN_ALIGN(static_size + PERCPU_DYNAMIC_RESERVE); + pcpue_unit_size = max_t(size_t, pcpue_size, PCPU_MIN_UNIT_SIZE); + dyn_size = pcpue_size - static_size; + pcpue_ptr = pcpu_alloc_bootmem(0, num_possible_cpus() * pcpue_unit_size, PAGE_SIZE); if (!pcpue_ptr) return -ENOMEM; - for_each_possible_cpu(cpu) - memcpy(pcpue_ptr + cpu * pcpue_unit_size, __per_cpu_load, - static_size); + for_each_possible_cpu(cpu) { + void *ptr = pcpue_ptr + cpu * pcpue_unit_size; + + free_bootmem(__pa(ptr + pcpue_size), + pcpue_unit_size - pcpue_size); + memcpy(ptr, __per_cpu_load, static_size); + } /* we're ready, commit */ pr_info("PERCPU: Embedded %zu pages at %p, static data %zu bytes\n", - pcpue_unit_size >> PAGE_SHIFT, pcpue_ptr, static_size); + pcpue_size >> PAGE_SHIFT, pcpue_ptr, static_size); return pcpu_setup_first_chunk(pcpue_get_page, static_size, - pcpue_unit_size, - pcpue_unit_size - static_size, pcpue_ptr, - NULL); + pcpue_unit_size, dyn_size, + pcpue_ptr, NULL); } /* -- cgit v1.2.2 From edcb463997ed7b2ffa3bac76e3e75957318f2e01 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 6 Mar 2009 14:33:59 +0900 Subject: percpu, module: implement reserved allocation and use it for module percpu variables Impact: add reserved allocation functionality and use it for module percpu variables This patch implements reserved allocation from the first chunk. When setting up the first chunk, arch can ask to set aside certain number of bytes right after the core static area which is available only through a separate reserved allocator. This will be used primarily for module static percpu variables on architectures with limited relocation range to ensure that the module perpcu symbols are inside the relocatable range. If reserved area is requested, the first chunk becomes reserved and isn't available for regular allocation. If the first chunk also includes piggy-back dynamic allocation area, a separate chunk mapping the same region is created to serve dynamic allocation. The first one is called static first chunk and the second dynamic first chunk. Although they share the page map, their different area map initializations guarantee they serve disjoint areas according to their purposes. If arch doesn't setup reserved area, reserved allocation is handled like any other allocation. Signed-off-by: Tejun Heo --- arch/x86/kernel/setup_percpu.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 38e2b2a470a5..dd4eabc747c8 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -217,7 +217,7 @@ proceed: pr_info("PERCPU: Remapped at %p with large pages, static data " "%zu bytes\n", vm.addr, static_size); - ret = pcpu_setup_first_chunk(pcpur_get_page, static_size, PMD_SIZE, + ret = pcpu_setup_first_chunk(pcpur_get_page, static_size, 0, PMD_SIZE, pcpur_size - static_size, vm.addr, NULL); goto out_free_ar; @@ -297,7 +297,7 @@ static ssize_t __init setup_pcpu_embed(size_t static_size) pr_info("PERCPU: Embedded %zu pages at %p, static data %zu bytes\n", pcpue_size >> PAGE_SHIFT, pcpue_ptr, static_size); - return pcpu_setup_first_chunk(pcpue_get_page, static_size, + return pcpu_setup_first_chunk(pcpue_get_page, static_size, 0, pcpue_unit_size, dyn_size, pcpue_ptr, NULL); } @@ -356,8 +356,8 @@ static ssize_t __init setup_pcpu_4k(size_t static_size) pr_info("PERCPU: Allocated %d 4k pages, static data %zu bytes\n", pcpu4k_nr_static_pages, static_size); - ret = pcpu_setup_first_chunk(pcpu4k_get_page, static_size, -1, -1, NULL, - pcpu4k_populate_pte); + ret = pcpu_setup_first_chunk(pcpu4k_get_page, static_size, 0, -1, -1, + NULL, pcpu4k_populate_pte); goto out_free_ar; enomem: -- cgit v1.2.2 From 6b19b0c2400437a3c10059ede0e59b517092e1bd Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 6 Mar 2009 14:33:59 +0900 Subject: x86, percpu: setup reserved percpu area for x86_64 Impact: fix relocation overflow during module load x86_64 uses 32bit relocations for symbol access and static percpu symbols whether in core or modules must be inside 2GB of the percpu segement base which the dynamic percpu allocator doesn't guarantee. This patch makes x86_64 reserve PERCPU_MODULE_RESERVE bytes in the first chunk so that module percpu areas are always allocated from the first chunk which is always inside the relocatable range. This problem exists for any percpu allocator but is easily triggered when using the embedding allocator because the second chunk is located beyond 2GB on it. This patch also changes the meaning of PERCPU_DYNAMIC_RESERVE such that it only indicates the size of the area to reserve for dynamic allocation as static and dynamic areas can be separate. New PERCPU_DYNAMIC_RESERVED is increased by 4k for both 32 and 64bits as the reserved area separation eats away some allocatable space and having slightly more headroom (currently between 4 and 8k after minimal boot sans module area) makes sense for common case performance. x86_32 can address anywhere from anywhere and doesn't need reserving. Mike Galbraith first reported the problem first and bisected it to the embedding percpu allocator commit. Signed-off-by: Tejun Heo Reported-by: Mike Galbraith Reported-by: Jaswinder Singh Rajput --- arch/x86/kernel/setup_percpu.c | 37 ++++++++++++++++++++++++++++--------- 1 file changed, 28 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index dd4eabc747c8..efa615f2bf43 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -42,6 +42,19 @@ unsigned long __per_cpu_offset[NR_CPUS] __read_mostly = { }; EXPORT_SYMBOL(__per_cpu_offset); +/* + * On x86_64 symbols referenced from code should be reachable using + * 32bit relocations. Reserve space for static percpu variables in + * modules so that they are always served from the first chunk which + * is located at the percpu segment base. On x86_32, anything can + * address anywhere. No need to reserve space in the first chunk. + */ +#ifdef CONFIG_X86_64 +#define PERCPU_FIRST_CHUNK_RESERVE PERCPU_MODULE_RESERVE +#else +#define PERCPU_FIRST_CHUNK_RESERVE 0 +#endif + /** * pcpu_need_numa - determine percpu allocation needs to consider NUMA * @@ -141,7 +154,7 @@ static ssize_t __init setup_pcpu_remap(size_t static_size) { static struct vm_struct vm; pg_data_t *last; - size_t ptrs_size; + size_t ptrs_size, dyn_size; unsigned int cpu; ssize_t ret; @@ -169,12 +182,14 @@ proceed: * Currently supports only single page. Supporting multiple * pages won't be too difficult if it ever becomes necessary. */ - pcpur_size = PFN_ALIGN(static_size + PERCPU_DYNAMIC_RESERVE); + pcpur_size = PFN_ALIGN(static_size + PERCPU_MODULE_RESERVE + + PERCPU_DYNAMIC_RESERVE); if (pcpur_size > PMD_SIZE) { pr_warning("PERCPU: static data is larger than large page, " "can't use large page\n"); return -EINVAL; } + dyn_size = pcpur_size - static_size - PERCPU_FIRST_CHUNK_RESERVE; /* allocate pointer array and alloc large pages */ ptrs_size = PFN_ALIGN(num_possible_cpus() * sizeof(pcpur_ptrs[0])); @@ -217,8 +232,9 @@ proceed: pr_info("PERCPU: Remapped at %p with large pages, static data " "%zu bytes\n", vm.addr, static_size); - ret = pcpu_setup_first_chunk(pcpur_get_page, static_size, 0, PMD_SIZE, - pcpur_size - static_size, vm.addr, NULL); + ret = pcpu_setup_first_chunk(pcpur_get_page, static_size, + PERCPU_FIRST_CHUNK_RESERVE, + PMD_SIZE, dyn_size, vm.addr, NULL); goto out_free_ar; enomem: @@ -276,9 +292,10 @@ static ssize_t __init setup_pcpu_embed(size_t static_size) return -EINVAL; /* allocate and copy */ - pcpue_size = PFN_ALIGN(static_size + PERCPU_DYNAMIC_RESERVE); + pcpue_size = PFN_ALIGN(static_size + PERCPU_MODULE_RESERVE + + PERCPU_DYNAMIC_RESERVE); pcpue_unit_size = max_t(size_t, pcpue_size, PCPU_MIN_UNIT_SIZE); - dyn_size = pcpue_size - static_size; + dyn_size = pcpue_size - static_size - PERCPU_FIRST_CHUNK_RESERVE; pcpue_ptr = pcpu_alloc_bootmem(0, num_possible_cpus() * pcpue_unit_size, PAGE_SIZE); @@ -297,7 +314,8 @@ static ssize_t __init setup_pcpu_embed(size_t static_size) pr_info("PERCPU: Embedded %zu pages at %p, static data %zu bytes\n", pcpue_size >> PAGE_SHIFT, pcpue_ptr, static_size); - return pcpu_setup_first_chunk(pcpue_get_page, static_size, 0, + return pcpu_setup_first_chunk(pcpue_get_page, static_size, + PERCPU_FIRST_CHUNK_RESERVE, pcpue_unit_size, dyn_size, pcpue_ptr, NULL); } @@ -356,8 +374,9 @@ static ssize_t __init setup_pcpu_4k(size_t static_size) pr_info("PERCPU: Allocated %d 4k pages, static data %zu bytes\n", pcpu4k_nr_static_pages, static_size); - ret = pcpu_setup_first_chunk(pcpu4k_get_page, static_size, 0, -1, -1, - NULL, pcpu4k_populate_pte); + ret = pcpu_setup_first_chunk(pcpu4k_get_page, static_size, + PERCPU_FIRST_CHUNK_RESERVE, -1, -1, NULL, + pcpu4k_populate_pte); goto out_free_ar; enomem: -- cgit v1.2.2 From d1a8e7792047f7dca7eb5759250e2c12800bf262 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 6 Mar 2009 03:12:50 -0800 Subject: x86: make "memtest" like "memtest=17" Impact: make boot command line "memtest" do one loop by default So don't need to guess many patterns in one loop. Signed-off-by: Yinghai Lu LKML-Reference: <49B10532.3020105@kernel.org> Signed-off-by: Ingo Molnar --- arch/x86/mm/memtest.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch') diff --git a/arch/x86/mm/memtest.c b/arch/x86/mm/memtest.c index 0bcd7883d036..605c8be06217 100644 --- a/arch/x86/mm/memtest.c +++ b/arch/x86/mm/memtest.c @@ -100,6 +100,9 @@ static int __init parse_memtest(char *arg) { if (arg) memtest_pattern = simple_strtoul(arg, NULL, 0); + else + memtest_pattern = ARRAY_SIZE(patterns); + return 0; } -- cgit v1.2.2 From c77a3b59c624454c501cbfa1a3611d5a00bf9532 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Thu, 5 Mar 2009 17:04:26 +0200 Subject: x86: fix uninitialized variable in init_memory_mapping() Signed-off-by: Pekka Enberg LKML-Reference: <1236265466.31324.9.camel@penberg-laptop> Signed-off-by: Ingo Molnar --- arch/x86/mm/init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 6d63e3d1253d..15219e0d1243 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -134,8 +134,8 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, { unsigned long page_size_mask = 0; unsigned long start_pfn, end_pfn; + unsigned long ret = 0; unsigned long pos; - unsigned long ret; struct map_range mr[NR_RANGE_MR]; int nr_range, i; -- cgit v1.2.2 From 5dd61dfabcaa5bfb67afb8a2d255bd1e156562e3 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Thu, 5 Mar 2009 17:04:57 +0200 Subject: x86: rename do_not_nx to disable_nx in mm/init_64.c As a preparational step for unifying noexec handling on 32-bit and 64-bit, rename the do_not_nx variable to disable_nx on 64-bit. Signed-off-by: Pekka Enberg LKML-Reference: <1236265497.31324.11.camel@penberg-laptop> Signed-off-by: Ingo Molnar --- arch/x86/mm/init_64.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 8a853bc3b287..54efa57d1c03 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -85,7 +85,7 @@ early_param("gbpages", parse_direct_gbpages_on); pteval_t __supported_pte_mask __read_mostly = ~_PAGE_IOMAP; EXPORT_SYMBOL_GPL(__supported_pte_mask); -static int do_not_nx __cpuinitdata; +static int disable_nx __cpuinitdata; /* * noexec=on|off @@ -100,9 +100,9 @@ static int __init nonx_setup(char *str) return -EINVAL; if (!strncmp(str, "on", 2)) { __supported_pte_mask |= _PAGE_NX; - do_not_nx = 0; + disable_nx = 0; } else if (!strncmp(str, "off", 3)) { - do_not_nx = 1; + disable_nx = 1; __supported_pte_mask &= ~_PAGE_NX; } return 0; @@ -114,7 +114,7 @@ void __cpuinit check_efer(void) unsigned long efer; rdmsrl(MSR_EFER, efer); - if (!(efer & EFER_NX) || do_not_nx) + if (!(efer & EFER_NX) || disable_nx) __supported_pte_mask &= ~_PAGE_NX; } -- cgit v1.2.2 From 7ab152470e8416ef2a44c800fdc157e2192f2974 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Fri, 6 Mar 2009 19:08:34 +0300 Subject: x86: linkage.h - guard assembler specifics by __ASSEMBLY__ Stephen Rothwell reported: |Today's linux-next build (x86_64 allmodconfig) produced this warning: | |In file included from drivers/char/epca.c:49: |drivers/char/digiFep1.h:7:1: warning: "GLOBAL" redefined |In file included from include/linux/linkage.h:5, | from include/linux/kernel.h:11, | from arch/x86/include/asm/system.h:10, | from arch/x86/include/asm/processor.h:17, | from include/linux/prefetch.h:14, | from include/linux/list.h:6, | from include/linux/module.h:9, | from drivers/char/epca.c:29: |arch/x86/include/asm/linkage.h:55:1: warning: this is the location of the previous definition | |Probably introduced by commit 95695547a7db44b88a7ee36cf5df188de267e99e |("x86: asm linkage - introduce GLOBAL macro") from the x86 tree. Any assembler specific snippets being placed in headers are to be protected by __ASSEMBLY__. Fixed. Also move __ALIGN definition under the same protection as well. Reported-by: Stephen Rothwell Signed-off-by: Cyrill Gorcunov LKML-Reference: <20090306160833.GB7420@localhost> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/linkage.h | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/linkage.h b/arch/x86/include/asm/linkage.h index 9320e2a8a26a..a0d70b46c27c 100644 --- a/arch/x86/include/asm/linkage.h +++ b/arch/x86/include/asm/linkage.h @@ -4,11 +4,6 @@ #undef notrace #define notrace __attribute__((no_instrument_function)) -#ifdef CONFIG_X86_64 -#define __ALIGN .p2align 4,,15 -#define __ALIGN_STR ".p2align 4,,15" -#endif - #ifdef CONFIG_X86_32 #define asmlinkage CPP_ASMLINKAGE __attribute__((regparm(0))) /* @@ -50,16 +45,25 @@ __asmlinkage_protect_n(ret, "g" (arg1), "g" (arg2), "g" (arg3), \ "g" (arg4), "g" (arg5), "g" (arg6)) -#endif +#endif /* CONFIG_X86_32 */ + +#ifdef __ASSEMBLY__ #define GLOBAL(name) \ .globl name; \ name: +#ifdef CONFIG_X86_64 +#define __ALIGN .p2align 4,,15 +#define __ALIGN_STR ".p2align 4,,15" +#endif + #ifdef CONFIG_X86_ALIGNMENT_16 #define __ALIGN .align 16,0x90 #define __ALIGN_STR ".align 16,0x90" #endif +#endif /* __ASSEMBLY__ */ + #endif /* _ASM_X86_LINKAGE_H */ -- cgit v1.2.2 From 3a450de1365d20afde406f0d9b2931a5e4a4fd6a Mon Sep 17 00:00:00 2001 From: Cliff Wickman Date: Fri, 6 Mar 2009 17:30:56 -0600 Subject: x86: UV: remove uv_flush_tlb_others() WARN_ON In uv_flush_tlb_others() (arch/x86/kernel/tlb_uv.c), the "WARN_ON(!in_atomic())" fails if CONFIG_PREEMPT is not enabled. And CONFIG_PREEMPT is not enabled by default in the distribution that most UV owners will use. We could #ifdef CONFIG_PREEMPT the warning, but that is not good form. And there seems to be no suitable fix to in_atomic() when CONFIG_PREMPT is not on. As Ingo commented: > and we have no proper primitive to test for atomicity. (mainly > because we dont know about atomicity on a non-preempt kernel) So we drop the WARN_ON. Signed-off-by: Cliff Wickman Signed-off-by: Tejun Heo Signed-off-by: Ingo Molnar --- arch/x86/kernel/tlb_uv.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c index f04549afcfe9..d038b9c45cf8 100644 --- a/arch/x86/kernel/tlb_uv.c +++ b/arch/x86/kernel/tlb_uv.c @@ -314,8 +314,6 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, int locals = 0; struct bau_desc *bau_desc; - WARN_ON(!in_atomic()); - cpumask_andnot(flush_mask, cpumask, cpumask_of(cpu)); uv_cpu = uv_blade_processor_id(); -- cgit v1.2.2 From 1f442d70c84aa798e243e721eba728a98434cd86 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sat, 7 Mar 2009 23:46:26 -0800 Subject: x86: remove smp_apply_quirks()/smp_checks() Impact: cleanup and code size reduction on 64-bit This code is only applied to Intel Pentium and AMD K7 32-bit cpus. Move those checks to intel_init()/amd_init() for 32-bit so 64-bit will not build this code. Also change to use cpu_index check to see if we need to emit warning. Signed-off-by: Yinghai Lu LKML-Reference: <49B377D2.8030108@kernel.org> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/amd.c | 52 ++++++++++++++++++++++++++++++ arch/x86/kernel/cpu/intel.c | 25 +++++++++++++++ arch/x86/kernel/smpboot.c | 78 --------------------------------------------- 3 files changed, 77 insertions(+), 78 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 25423a5b80ed..f47df59016c5 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -5,6 +5,7 @@ #include #include #include +#include #ifdef CONFIG_X86_64 # include @@ -141,6 +142,55 @@ static void __cpuinit init_amd_k6(struct cpuinfo_x86 *c) } } +static void __cpuinit amd_k7_smp_check(struct cpuinfo_x86 *c) +{ +#ifdef CONFIG_SMP + /* calling is from identify_secondary_cpu() ? */ + if (c->cpu_index == boot_cpu_id) + return; + + /* + * Certain Athlons might work (for various values of 'work') in SMP + * but they are not certified as MP capable. + */ + /* Athlon 660/661 is valid. */ + if ((c->x86_model == 6) && ((c->x86_mask == 0) || + (c->x86_mask == 1))) + goto valid_k7; + + /* Duron 670 is valid */ + if ((c->x86_model == 7) && (c->x86_mask == 0)) + goto valid_k7; + + /* + * Athlon 662, Duron 671, and Athlon >model 7 have capability + * bit. It's worth noting that the A5 stepping (662) of some + * Athlon XP's have the MP bit set. + * See http://www.heise.de/newsticker/data/jow-18.10.01-000 for + * more. + */ + if (((c->x86_model == 6) && (c->x86_mask >= 2)) || + ((c->x86_model == 7) && (c->x86_mask >= 1)) || + (c->x86_model > 7)) + if (cpu_has_mp) + goto valid_k7; + + /* If we get here, not a certified SMP capable AMD system. */ + + /* + * Don't taint if we are running SMP kernel on a single non-MP + * approved Athlon + */ + WARN_ONCE(1, "WARNING: This combination of AMD" + "processors is not suitable for SMP.\n"); + if (!test_taint(TAINT_UNSAFE_SMP)) + add_taint(TAINT_UNSAFE_SMP); + +valid_k7: + ; +#endif +} + static void __cpuinit init_amd_k7(struct cpuinfo_x86 *c) { u32 l, h; @@ -175,6 +225,8 @@ static void __cpuinit init_amd_k7(struct cpuinfo_x86 *c) } set_cpu_cap(c, X86_FEATURE_K7); + + amd_k7_smp_check(c); } #endif diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 25c559ba8d54..191117f1ad51 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -13,6 +13,7 @@ #include #include #include +#include #ifdef CONFIG_X86_64 #include @@ -110,6 +111,28 @@ static void __cpuinit trap_init_f00f_bug(void) } #endif +static void __cpuinit intel_smp_check(struct cpuinfo_x86 *c) +{ +#ifdef CONFIG_SMP + /* calling is from identify_secondary_cpu() ? */ + if (c->cpu_index == boot_cpu_id) + return; + + /* + * Mask B, Pentium, but not Pentium MMX + */ + if (c->x86 == 5 && + c->x86_mask >= 1 && c->x86_mask <= 4 && + c->x86_model <= 3) { + /* + * Remember we have B step Pentia with bugs + */ + WARN_ONCE(1, "WARNING: SMP operation may be unreliable" + "with B stepping processors.\n"); + } +#endif +} + static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c) { unsigned long lo, hi; @@ -186,6 +209,8 @@ static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c) #ifdef CONFIG_X86_NUMAQ numaq_tsc_disable(); #endif + + intel_smp_check(c); } #else static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c) diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 249334f5080a..ef7d10170c30 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -114,10 +114,6 @@ EXPORT_PER_CPU_SYMBOL(cpu_info); atomic_t init_deasserted; - -/* Set if we find a B stepping CPU */ -static int __cpuinitdata smp_b_stepping; - #if defined(CONFIG_NUMA) && defined(CONFIG_X86_32) /* which logical CPUs are on which nodes */ @@ -271,8 +267,6 @@ static void __cpuinit smp_callin(void) cpumask_set_cpu(cpuid, cpu_callin_mask); } -static int __cpuinitdata unsafe_smp; - /* * Activate a secondary processor. */ @@ -340,76 +334,6 @@ notrace static void __cpuinit start_secondary(void *unused) cpu_idle(); } -static void __cpuinit smp_apply_quirks(struct cpuinfo_x86 *c) -{ - /* - * Mask B, Pentium, but not Pentium MMX - */ - if (c->x86_vendor == X86_VENDOR_INTEL && - c->x86 == 5 && - c->x86_mask >= 1 && c->x86_mask <= 4 && - c->x86_model <= 3) - /* - * Remember we have B step Pentia with bugs - */ - smp_b_stepping = 1; - - /* - * Certain Athlons might work (for various values of 'work') in SMP - * but they are not certified as MP capable. - */ - if ((c->x86_vendor == X86_VENDOR_AMD) && (c->x86 == 6)) { - - if (num_possible_cpus() == 1) - goto valid_k7; - - /* Athlon 660/661 is valid. */ - if ((c->x86_model == 6) && ((c->x86_mask == 0) || - (c->x86_mask == 1))) - goto valid_k7; - - /* Duron 670 is valid */ - if ((c->x86_model == 7) && (c->x86_mask == 0)) - goto valid_k7; - - /* - * Athlon 662, Duron 671, and Athlon >model 7 have capability - * bit. It's worth noting that the A5 stepping (662) of some - * Athlon XP's have the MP bit set. - * See http://www.heise.de/newsticker/data/jow-18.10.01-000 for - * more. - */ - if (((c->x86_model == 6) && (c->x86_mask >= 2)) || - ((c->x86_model == 7) && (c->x86_mask >= 1)) || - (c->x86_model > 7)) - if (cpu_has_mp) - goto valid_k7; - - /* If we get here, not a certified SMP capable AMD system. */ - unsafe_smp = 1; - } - -valid_k7: - ; -} - -static void __cpuinit smp_checks(void) -{ - if (smp_b_stepping) - printk(KERN_WARNING "WARNING: SMP operation may be unreliable" - "with B stepping processors.\n"); - - /* - * Don't taint if we are running SMP kernel on a single non-MP - * approved Athlon - */ - if (unsafe_smp && num_online_cpus() > 1) { - printk(KERN_INFO "WARNING: This combination of AMD" - "processors is not suitable for SMP.\n"); - add_taint(TAINT_UNSAFE_SMP); - } -} - /* * The bootstrap kernel entry code has set these up. Save them for * a given CPU @@ -423,7 +347,6 @@ void __cpuinit smp_store_cpu_info(int id) c->cpu_index = id; if (id != 0) identify_secondary_cpu(c); - smp_apply_quirks(c); } @@ -1193,7 +1116,6 @@ void __init native_smp_cpus_done(unsigned int max_cpus) pr_debug("Boot done.\n"); impress_friends(); - smp_checks(); #ifdef CONFIG_X86_IO_APIC setup_ioapic_dest(); #endif -- cgit v1.2.2 From 8827247ffcc9e880cbe4705655065cf011265157 Mon Sep 17 00:00:00 2001 From: Wang Chen Date: Sat, 7 Mar 2009 13:34:19 +0800 Subject: x86: don't define __this_fixmap_does_not_exist() Impact: improve out-of-range fixmap index debugging Commit "1b42f51630c7eebce6fb780b480731eb81afd325" defined the __this_fixmap_does_not_exist() function with a WARN_ON(1) in it. This causes the linker to not report an error when __this_fixmap_does_not_exist() is called with a non-constant parameter. Ingo defined __this_fixmap_does_not_exist() because he wanted to get virt addresses of fix memory of nest level by non-constant index. But we can fix this and still keep the link-time check: We can get the four slot virt addresses on link time and store them to array slot_virt[]. Then we can then refer the slot_virt with non-constant index, in the ioremap-leak detection code. Signed-off-by: Wang Chen LKML-Reference: <49B2075B.4070509@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- arch/x86/mm/ioremap.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 62773abdf088..96786ef2c9a9 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -504,13 +504,19 @@ static inline pte_t * __init early_ioremap_pte(unsigned long addr) return &bm_pte[pte_index(addr)]; } +static unsigned long slot_virt[FIX_BTMAPS_SLOTS] __initdata; + void __init early_ioremap_init(void) { pmd_t *pmd; + int i; if (early_ioremap_debug) printk(KERN_INFO "early_ioremap_init()\n"); + for (i = 0; i < FIX_BTMAPS_SLOTS; i++) + slot_virt[i] = fix_to_virt(FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*i); + pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)); memset(bm_pte, 0, sizeof(bm_pte)); pmd_populate_kernel(&init_mm, pmd, bm_pte); @@ -577,6 +583,7 @@ static inline void __init early_clear_fixmap(enum fixed_addresses idx) static void __iomem *prev_map[FIX_BTMAPS_SLOTS] __initdata; static unsigned long prev_size[FIX_BTMAPS_SLOTS] __initdata; + static int __init check_early_ioremap_leak(void) { int count = 0; @@ -598,7 +605,8 @@ static int __init check_early_ioremap_leak(void) } late_initcall(check_early_ioremap_leak); -static void __init __iomem *__early_ioremap(unsigned long phys_addr, unsigned long size, pgprot_t prot) +static void __init __iomem * +__early_ioremap(unsigned long phys_addr, unsigned long size, pgprot_t prot) { unsigned long offset, last_addr; unsigned int nrpages; @@ -664,9 +672,9 @@ static void __init __iomem *__early_ioremap(unsigned long phys_addr, unsigned lo --nrpages; } if (early_ioremap_debug) - printk(KERN_CONT "%08lx + %08lx\n", offset, fix_to_virt(idx0)); + printk(KERN_CONT "%08lx + %08lx\n", offset, slot_virt[slot]); - prev_map[slot] = (void __iomem *)(offset + fix_to_virt(idx0)); + prev_map[slot] = (void __iomem *)(offset + slot_virt[slot]); return prev_map[slot]; } @@ -734,8 +742,3 @@ void __init early_iounmap(void __iomem *addr, unsigned long size) } prev_map[slot] = NULL; } - -void __this_fixmap_does_not_exist(void) -{ - WARN_ON(1); -} -- cgit v1.2.2 From e954ef20c29b7af07a8cb5452f14fb69e3d9d2b2 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 5 Mar 2009 12:04:57 -0800 Subject: x86: fix warning about nodeid Impact: cleanup Ingo found there warning about nodeid with some configs. try to use for_each_online_node for non numa too. in that case nodeid will be 0. also move out boundary checking from setup_node_bootmem(), so non-numa config will not check it. Signed-off-by: Yinghai Lu Cc: Andrew Morton LKML-Reference: <49B03069.80001@kernel.org> Signed-off-by: Ingo Molnar --- arch/x86/mm/init_32.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 2966c6b8d304..db81e9a8556b 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -806,11 +806,6 @@ static unsigned long __init setup_node_bootmem(int nodeid, { unsigned long bootmap_size; - if (start_pfn > max_low_pfn) - return bootmap; - if (end_pfn > max_low_pfn) - end_pfn = max_low_pfn; - /* don't touch min_low_pfn */ bootmap_size = init_bootmem_node(NODE_DATA(nodeid), bootmap >> PAGE_SHIFT, @@ -843,13 +838,23 @@ void __init setup_bootmem_allocator(void) max_pfn_mapped< max_low_pfn) + continue; + if (end_pfn > max_low_pfn) + end_pfn = max_low_pfn; #else - bootmap = setup_node_bootmem(0, 0, max_low_pfn, bootmap); + start_pfn = 0; + end_pfn = max_low_pfn; #endif + bootmap = setup_node_bootmem(nodeid, start_pfn, end_pfn, + bootmap); + } after_bootmem = 1; } -- cgit v1.2.2 From d0fc63f7bd07cb779a06dc1cdd0c5a14e7f5d562 Mon Sep 17 00:00:00 2001 From: Stuart Bennett Date: Sun, 8 Mar 2009 20:21:35 +0200 Subject: x86 mmiotrace: fix remove_kmmio_fault_pages() Impact: fix race+crash in mmiotrace The list manipulation in remove_kmmio_fault_pages() was broken. If more than one consecutive kmmio_fault_page was re-added during the grace period between unregister_kmmio_probe() and remove_kmmio_fault_pages(), the list manipulation failed to remove pages from the release list. After a second grace period the pages get into rcu_free_kmmio_fault_pages() and raise a BUG_ON() kernel crash. The list manipulation is fixed to properly remove pages from the release list. This bug has been present from the very beginning of mmiotrace in the mainline kernel. It was introduced in 0fd0e3da ("x86: mmiotrace full patch, preview 1"); An urgent fix for Linus. Tested by Stuart (on 32-bit) and Pekka (on amd and intel 64-bit systems, nouveau and nvidia proprietary). Signed-off-by: Stuart Bennett Signed-off-by: Pekka Paalanen LKML-Reference: <20090308202135.34933feb@daedalus.pq.iki.fi> Signed-off-by: Ingo Molnar --- arch/x86/mm/kmmio.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c index 9f205030d9aa..6a518dd08a36 100644 --- a/arch/x86/mm/kmmio.c +++ b/arch/x86/mm/kmmio.c @@ -451,23 +451,24 @@ static void rcu_free_kmmio_fault_pages(struct rcu_head *head) static void remove_kmmio_fault_pages(struct rcu_head *head) { - struct kmmio_delayed_release *dr = container_of( - head, - struct kmmio_delayed_release, - rcu); + struct kmmio_delayed_release *dr = + container_of(head, struct kmmio_delayed_release, rcu); struct kmmio_fault_page *p = dr->release_list; struct kmmio_fault_page **prevp = &dr->release_list; unsigned long flags; + spin_lock_irqsave(&kmmio_lock, flags); while (p) { - if (!p->count) + if (!p->count) { list_del_rcu(&p->list); - else + prevp = &p->release_next; + } else { *prevp = p->release_next; - prevp = &p->release_next; + } p = p->release_next; } spin_unlock_irqrestore(&kmmio_lock, flags); + /* This is the real RCU destroy call. */ call_rcu(&dr->rcu, rcu_free_kmmio_fault_pages); } -- cgit v1.2.2 From 0feca851c1b3cb4ebfa3149144b3d5de0879ebaa Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Fri, 6 Mar 2009 10:09:26 -0800 Subject: x86-32: make sure virt_addr_valid() returns false for fixmap addresses I found that virt_addr_valid() was returning true for fixmap addresses. I'm not sure whether pfn_valid() is supposed to include this test, but there's no harm in being explicit. Signed-off-by: Jeremy Fitzhardinge Cc: Jiri Slaby Cc: Yinghai Lu LKML-Reference: <49B166D6.2080505@goop.org> Signed-off-by: Ingo Molnar --- arch/x86/mm/ioremap.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 62773abdf088..62def5795730 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -87,6 +87,8 @@ bool __virt_addr_valid(unsigned long x) return false; if (__vmalloc_start_set && is_vmalloc_addr((void *) x)) return false; + if (x >= FIXADDR_START) + return false; return pfn_valid((x - PAGE_OFFSET) >> PAGE_SHIFT); } EXPORT_SYMBOL(__virt_addr_valid); -- cgit v1.2.2