From 26f80bd6a9ab17bc8a60b6092e7c0d05c5927ce5 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Mon, 19 Jan 2009 00:38:58 +0900 Subject: x86-64: Convert irqstacks to per-cpu Move the irqstackptr variable from the PDA to per-cpu. Make the stacks themselves per-cpu, removing some specific allocation code. Add a seperate flag (is_boot_cpu) to simplify the per-cpu boot adjustments. tj: * sprinkle some underbars around. * irq_stack_ptr is not used till traps_init(), no reason to initialize it early. On SMP, just leaving it NULL till proper initialization in setup_per_cpu_areas() works. Dropped is_boot_cpu and early irq_stack_ptr initialization. * do DECLARE/DEFINE_PER_CPU(char[IRQ_STACK_SIZE], irq_stack) instead of (char, irq_stack[IRQ_STACK_SIZE]). Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/include/asm/processor.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/x86/include/asm/processor.h') diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 091cd8855f2..f511246fa6c 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -378,6 +378,9 @@ union thread_xstate { #ifdef CONFIG_X86_64 DECLARE_PER_CPU(struct orig_ist, orig_ist); + +DECLARE_PER_CPU(char[IRQ_STACK_SIZE], irq_stack); +DECLARE_PER_CPU(char *, irq_stack_ptr); #endif extern void print_cpu_info(struct cpuinfo_x86 *); -- cgit v1.2.2 From 947e76cdc34c782fc947313d4331380686eebbad Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Mon, 19 Jan 2009 12:21:28 +0900 Subject: x86: move stack_canary into irq_stack Impact: x86_64 percpu area layout change, irq_stack now at the beginning Now that the PDA is empty except for the stack canary, it can be removed. The irqstack is moved to the start of the per-cpu section. If the stack protector is enabled, the canary overlaps the bottom 48 bytes of the irqstack. tj: * updated subject * dropped asm relocation of irq_stack_ptr * updated comments a bit * rebased on top of stack canary changes Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/include/asm/processor.h | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) (limited to 'arch/x86/include/asm/processor.h') diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index f511246fa6c..48676b943b9 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -379,8 +379,29 @@ union thread_xstate { #ifdef CONFIG_X86_64 DECLARE_PER_CPU(struct orig_ist, orig_ist); -DECLARE_PER_CPU(char[IRQ_STACK_SIZE], irq_stack); +union irq_stack_union { + char irq_stack[IRQ_STACK_SIZE]; + /* + * GCC hardcodes the stack canary as %gs:40. Since the + * irq_stack is the object at %gs:0, we reserve the bottom + * 48 bytes of the irq stack for the canary. + */ + struct { + char gs_base[40]; + unsigned long stack_canary; + }; +}; + +DECLARE_PER_CPU(union irq_stack_union, irq_stack_union); DECLARE_PER_CPU(char *, irq_stack_ptr); + +static inline void load_gs_base(int cpu) +{ + /* Memory clobbers used to order pda/percpu accesses */ + mb(); + wrmsrl(MSR_GS_BASE, (unsigned long)per_cpu(irq_stack_union.gs_base, cpu)); + mb(); +} #endif extern void print_cpu_info(struct cpuinfo_x86 *); -- cgit v1.2.2 From b1882e68d17a93b523dce09c3a181319aace2f0e Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Fri, 23 Jan 2009 17:18:52 -0800 Subject: x86: clean up stray space in Impact: Whitespace cleanup only Clean up a stray space character in arch/x86/include/asm/processor.h. Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/processor.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/include/asm/processor.h') diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 091cd8855f2..ac8fab3b868 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -73,7 +73,7 @@ struct cpuinfo_x86 { char pad0; #else /* Number of 4K pages in DTLB/ITLB combined(in pages): */ - int x86_tlbsize; + int x86_tlbsize; __u8 x86_virt_bits; __u8 x86_phys_bits; #endif -- cgit v1.2.2 From b2d2f4312b117a6cc647c8521e2643a88771f757 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Tue, 27 Jan 2009 12:56:48 +0900 Subject: x86: initialize per-cpu GDT segment in per-cpu setup Impact: cleanup Rename init_gdt() to setup_percpu_segment(), and move it to setup_percpu.c. Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/include/asm/processor.h | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86/include/asm/processor.h') diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 48676b943b9..32c30b02b51 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -778,7 +778,6 @@ extern struct desc_ptr early_gdt_descr; extern void cpu_set_gdt(int); extern void switch_to_new_gdt(void); extern void cpu_init(void); -extern void init_gdt(int cpu); static inline unsigned long get_debugctlmsr(void) { -- cgit v1.2.2 From 1825b8edc2034c012ae48f797d74efd1bd9d4f72 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Tue, 27 Jan 2009 12:56:48 +0900 Subject: x86: remove extra barriers from load_gs_base() Impact: optimization mb() generates an mfence instruction, which is not needed here. Only a compiler barrier is needed, and that is handled by the memory clobber in the wrmsrl function. Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/include/asm/processor.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'arch/x86/include/asm/processor.h') diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 32c30b02b51..794234eba31 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -397,10 +397,7 @@ DECLARE_PER_CPU(char *, irq_stack_ptr); static inline void load_gs_base(int cpu) { - /* Memory clobbers used to order pda/percpu accesses */ - mb(); wrmsrl(MSR_GS_BASE, (unsigned long)per_cpu(irq_stack_union.gs_base, cpu)); - mb(); } #endif -- cgit v1.2.2 From 2697fbd5faf19c84c17441b1752bdcbdcfd1248c Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Tue, 27 Jan 2009 12:56:48 +0900 Subject: x86: load new GDT after setting up boot cpu per-cpu area Impact: sync 32 and 64-bit code Merge load_gs_base() into switch_to_new_gdt(). Load the GDT and per-cpu state for the boot cpu when its new area is set up. Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/include/asm/processor.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'arch/x86/include/asm/processor.h') diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 794234eba31..befa20b4a68 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -394,11 +394,6 @@ union irq_stack_union { DECLARE_PER_CPU(union irq_stack_union, irq_stack_union); DECLARE_PER_CPU(char *, irq_stack_ptr); - -static inline void load_gs_base(int cpu) -{ - wrmsrl(MSR_GS_BASE, (unsigned long)per_cpu(irq_stack_union.gs_base, cpu)); -} #endif extern void print_cpu_info(struct cpuinfo_x86 *); -- cgit v1.2.2 From 552be871e67ff577ed36beb2f53d078b42304739 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Fri, 30 Jan 2009 17:47:53 +0900 Subject: x86: pass in cpu number to switch_to_new_gdt() Impact: cleanup, prepare for xen boot fix. Xen needs to call this function very early to setup the GDT and per-cpu segments. Remove the call to smp_processor_id() and just pass in the cpu number. Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- arch/x86/include/asm/processor.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/include/asm/processor.h') diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index befa20b4a68..1c25eb69ea8 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -768,7 +768,7 @@ extern int sysenter_setup(void); extern struct desc_ptr early_gdt_descr; extern void cpu_set_gdt(int); -extern void switch_to_new_gdt(void); +extern void switch_to_new_gdt(int); extern void cpu_init(void); static inline unsigned long get_debugctlmsr(void) -- cgit v1.2.2 From 11e3a840cd5b731cdd8f6f956dfae78a8046d09c Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Fri, 30 Jan 2009 17:47:54 +0900 Subject: x86: split loading percpu segments from loading gdt Impact: split out a function, no functional change Xen needs to be able to access percpu data from very early on. For various reasons, it cannot also load the gdt at that time. It does, however, have a pefectly functional gdt at that point, so there's no pressing need to reload the gdt. Split the function to load the segment registers off, so Xen can call it directly. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Tejun Heo --- arch/x86/include/asm/processor.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/include/asm/processor.h') diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 1c25eb69ea8..656d02ea509 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -769,6 +769,7 @@ extern struct desc_ptr early_gdt_descr; extern void cpu_set_gdt(int); extern void switch_to_new_gdt(int); +extern void load_percpu_segment(int); extern void cpu_init(void); static inline unsigned long get_debugctlmsr(void) -- cgit v1.2.2 From 2add8e235cbe0dcd672c33fc322754e15500238c Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Sun, 8 Feb 2009 09:58:39 -0500 Subject: x86: use linker to offset symbols by __per_cpu_load Impact: cleanup and bug fix Use the linker to create symbols for certain per-cpu variables that are offset by __per_cpu_load. This allows the removal of the runtime fixup of the GDT pointer, which fixes a bug with resume reported by Jiri Slaby. Reported-by: Jiri Slaby Signed-off-by: Brian Gerst Acked-by: Jiri Slaby Signed-off-by: Ingo Molnar --- arch/x86/include/asm/processor.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86/include/asm/processor.h') diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 656d02ea509..373d3f628d2 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -393,6 +393,8 @@ union irq_stack_union { }; DECLARE_PER_CPU(union irq_stack_union, irq_stack_union); +DECLARE_INIT_PER_CPU(irq_stack_union); + DECLARE_PER_CPU(char *, irq_stack_ptr); #endif -- cgit v1.2.2 From 60a5317ff0f42dd313094b88f809f63041568b08 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 9 Feb 2009 22:17:40 +0900 Subject: x86: implement x86_32 stack protector Impact: stack protector for x86_32 Implement stack protector for x86_32. GDT entry 28 is used for it. It's set to point to stack_canary-20 and have the length of 24 bytes. CONFIG_CC_STACKPROTECTOR turns off CONFIG_X86_32_LAZY_GS and sets %gs to the stack canary segment on entry. As %gs is otherwise unused by the kernel, the canary can be anywhere. It's defined as a percpu variable. x86_32 exception handlers take register frame on stack directly as struct pt_regs. With -fstack-protector turned on, gcc copies the whole structure after the stack canary and (of course) doesn't copy back on return thus losing all changed. For now, -fno-stack-protector is added to all files which contain those functions. We definitely need something better. Signed-off-by: Tejun Heo Signed-off-by: Ingo Molnar --- arch/x86/include/asm/processor.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/x86/include/asm/processor.h') diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 9763eb70013..5a947210425 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -396,7 +396,11 @@ DECLARE_PER_CPU(union irq_stack_union, irq_stack_union); DECLARE_INIT_PER_CPU(irq_stack_union); DECLARE_PER_CPU(char *, irq_stack_ptr); +#else /* X86_64 */ +#ifdef CONFIG_CC_STACKPROTECTOR +DECLARE_PER_CPU(unsigned long, stack_canary); #endif +#endif /* X86_64 */ extern void print_cpu_info(struct cpuinfo_x86 *); extern unsigned int xstate_size; -- cgit v1.2.2 From 54321d947ae9d6a051b81e3eccaf2d8658aeecc6 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 11 Feb 2009 10:20:05 -0800 Subject: x86: move pte types into pgtable*.h pgtable*.h is intended for definitions relating to actual pagetables and their entries, so move all the definitions for (pte|pmd|pud|pgd)(val)?_t to the appropriate pgtable*.h headers. Signed-off-by: Jeremy Fitzhardinge --- arch/x86/include/asm/processor.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/include/asm/processor.h') diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 656d02ea509..96302880774 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -16,6 +16,7 @@ struct mm_struct; #include #include #include +#include #include #include #include -- cgit v1.2.2 From de5483029b8f18e23395d8fd4f4ef6ae15beb809 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Thu, 19 Feb 2009 12:20:17 +0530 Subject: x86: include/asm/processor.h remove double declaration of print_cpu_info Signed-off-by: Jaswinder Singh Rajput Signed-off-by: Ingo Molnar --- arch/x86/include/asm/processor.h | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86/include/asm/processor.h') diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index dabab1a19dd..72914d0315e 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -403,7 +403,6 @@ DECLARE_PER_CPU(unsigned long, stack_canary); #endif #endif /* X86_64 */ -extern void print_cpu_info(struct cpuinfo_x86 *); extern unsigned int xstate_size; extern void free_thread_xstate(struct task_struct *); extern struct kmem_cache *task_xstate_cachep; -- cgit v1.2.2 From d951734654f76a370a89b4e531af9b765bd13541 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 20 Feb 2009 23:32:28 +0100 Subject: x86, mm: rename TASK_SIZE64 => TASK_SIZE_MAX Impact: cleanup Rename TASK_SIZE64 to TASK_SIZE_MAX, and provide the define on 32-bit too. (mapped to TASK_SIZE) This allows 32-bit code to make use of the (former-) TASK_SIZE64 symbol as well, in a clean way. Cc: Linus Torvalds Signed-off-by: Ingo Molnar --- arch/x86/include/asm/processor.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'arch/x86/include/asm/processor.h') diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 72914d0315e..c7a98f73821 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -861,6 +861,7 @@ static inline void spin_lock_prefetch(const void *x) * User space process size: 3GB (default). */ #define TASK_SIZE PAGE_OFFSET +#define TASK_SIZE_MAX TASK_SIZE #define STACK_TOP TASK_SIZE #define STACK_TOP_MAX STACK_TOP @@ -920,7 +921,7 @@ extern unsigned long thread_saved_pc(struct task_struct *tsk); /* * User space process size. 47bits minus one guard page. */ -#define TASK_SIZE64 ((1UL << 47) - PAGE_SIZE) +#define TASK_SIZE_MAX ((1UL << 47) - PAGE_SIZE) /* This decides where the kernel will search for a free chunk of vm * space during mmap's. @@ -929,12 +930,12 @@ extern unsigned long thread_saved_pc(struct task_struct *tsk); 0xc0000000 : 0xFFFFe000) #define TASK_SIZE (test_thread_flag(TIF_IA32) ? \ - IA32_PAGE_OFFSET : TASK_SIZE64) + IA32_PAGE_OFFSET : TASK_SIZE_MAX) #define TASK_SIZE_OF(child) ((test_tsk_thread_flag(child, TIF_IA32)) ? \ - IA32_PAGE_OFFSET : TASK_SIZE64) + IA32_PAGE_OFFSET : TASK_SIZE_MAX) #define STACK_TOP TASK_SIZE -#define STACK_TOP_MAX TASK_SIZE64 +#define STACK_TOP_MAX TASK_SIZE_MAX #define INIT_THREAD { \ .sp0 = (unsigned long)&init_stack + sizeof(init_stack) \ -- cgit v1.2.2 From db949bba3c7cf2e664ac12e237c6d4c914f0c69d Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Fri, 27 Feb 2009 13:25:21 -0800 Subject: x86-32: use non-lazy io bitmap context switching Impact: remove 32-bit optimization to prepare unification x86-32 and -64 differ in the way they context-switch tasks with io permission bitmaps. x86-64 simply copies the next tasks io bitmap into place (if any) on context switch. x86-32 invalidates the bitmap on context switch, so that the next IO instruction will fault; at that point it installs the appropriate IO bitmap. This makes context switching IO-bitmap-using tasks a bit more less expensive, at the cost of making the next IO instruction slower due to the extra fault. This tradeoff only makes sense if IO-bitmap-using processes are relatively common, but they don't actually use IO instructions very often. However, in a typical desktop system, the only process likely to be using IO bitmaps is the X server, and nothing at all on a server. Therefore the lazy context switch doesn't really win all that much, and its just a gratuitious difference from 64-bit code. This patch removes the lazy context switch, with a view to unifying this code in a later change. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar --- arch/x86/include/asm/processor.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'arch/x86/include/asm/processor.h') diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index c7a98f73821..76139506c3e 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -248,7 +248,6 @@ struct x86_hw_tss { #define IO_BITMAP_LONGS (IO_BITMAP_BYTES/sizeof(long)) #define IO_BITMAP_OFFSET offsetof(struct tss_struct, io_bitmap) #define INVALID_IO_BITMAP_OFFSET 0x8000 -#define INVALID_IO_BITMAP_OFFSET_LAZY 0x9000 struct tss_struct { /* @@ -263,11 +262,6 @@ struct tss_struct { * be within the limit. */ unsigned long io_bitmap[IO_BITMAP_LONGS + 1]; - /* - * Cache the current maximum and the last task that used the bitmap: - */ - unsigned long io_bitmap_max; - struct thread_struct *io_bitmap_owner; /* * .. and then another 0x100 bytes for the emergency kernel stack: -- cgit v1.2.2 From 155dd720d06a219ddf5a56b473cb3325441fc879 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Fri, 13 Mar 2009 14:49:53 +1030 Subject: cpumask: convert struct cpuinfo_x86's llc_shared_map to cpumask_var_t Impact: reduce kernel memory usage when CONFIG_CPUMASK_OFFSTACK=y Signed-off-by: Rusty Russell --- arch/x86/include/asm/processor.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/include/asm/processor.h') diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 76139506c3e..d794d9483c5 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -94,7 +94,7 @@ struct cpuinfo_x86 { unsigned long loops_per_jiffy; #ifdef CONFIG_SMP /* cpus sharing the last level cache: */ - cpumask_t llc_shared_map; + cpumask_var_t llc_shared_map; #endif /* cpuid returned max cores value: */ u16 x86_max_cores; -- cgit v1.2.2 From 30e1e6d1af2b67558bccf322af2b3e0676b209ae Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 17 Mar 2009 14:50:34 +1030 Subject: cpumask: fix CONFIG_CPUMASK_OFFSTACK=y cpu hotunplug crash Impact: Fix cpu offline when CONFIG_MAXSMP=y Changeset bc9b83dd1f66402b870301c3c7117b9c1484abb4 "cpumask: convert c1e_mask in arch/x86/kernel/process.c to cpumask_var_t" contained a bug: c1e_mask is manipulated even if C1E isn't detected (and hence not allocated). This is simply fixed by checking for NULL (which gcc optimizes out anyway of CONFIG_CPUMASK_OFFSTACK=n, since it knows ce1_mask can never be NULL). In addition, fix a leak where select_idle_routine re-allocates (and re-clears) c1e_mask on every cpu init. Reported-by: Ingo Molnar Signed-off-by: Rusty Russell Cc: Mike Travis LKML-Reference: <200903171450.34549.rusty@rustcorp.com.au> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/processor.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/include/asm/processor.h') diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index d794d9483c5..9874dd98a29 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -733,6 +733,7 @@ static inline void __sti_mwait(unsigned long eax, unsigned long ecx) extern void mwait_idle_with_hints(unsigned long eax, unsigned long ecx); extern void select_idle_routine(const struct cpuinfo_x86 *c); +extern void init_c1e_mask(void); extern unsigned long boot_option_idle_override; extern unsigned long idle_halt; -- cgit v1.2.2