diff options
| author | Ingo Molnar <mingo@elte.hu> | 2008-08-15 12:15:17 -0400 |
|---|---|---|
| committer | Ingo Molnar <mingo@elte.hu> | 2008-08-15 12:15:17 -0400 |
| commit | f3efbe582b5396d134024c03a5fa253f2a85d9a6 (patch) | |
| tree | e4e15b7567b82d24cb1e7327398286a2b88df04c /arch/x86 | |
| parent | 05d3ed0a1fe3ea05ab9f3b8d32576a0bc2e19660 (diff) | |
| parent | b635acec48bcaa9183fcbf4e3955616b0d4119b5 (diff) | |
Merge branch 'linus' into x86/gart
Diffstat (limited to 'arch/x86')
82 files changed, 1587 insertions, 841 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 03980cb04291..ac2fb0641a04 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
| @@ -21,12 +21,16 @@ config X86 | |||
| 21 | select HAVE_UNSTABLE_SCHED_CLOCK | 21 | select HAVE_UNSTABLE_SCHED_CLOCK |
| 22 | select HAVE_IDE | 22 | select HAVE_IDE |
| 23 | select HAVE_OPROFILE | 23 | select HAVE_OPROFILE |
| 24 | select HAVE_IOREMAP_PROT | ||
| 24 | select HAVE_KPROBES | 25 | select HAVE_KPROBES |
| 26 | select ARCH_WANT_OPTIONAL_GPIOLIB | ||
| 25 | select HAVE_KRETPROBES | 27 | select HAVE_KRETPROBES |
| 26 | select HAVE_DYNAMIC_FTRACE | 28 | select HAVE_DYNAMIC_FTRACE |
| 27 | select HAVE_FTRACE | 29 | select HAVE_FTRACE |
| 28 | select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64) | 30 | select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64) |
| 29 | select HAVE_ARCH_KGDB if !X86_VOYAGER | 31 | select HAVE_ARCH_KGDB if !X86_VOYAGER |
| 32 | select HAVE_GENERIC_DMA_COHERENT if X86_32 | ||
| 33 | select HAVE_EFFICIENT_UNALIGNED_ACCESS | ||
| 30 | 34 | ||
| 31 | config ARCH_DEFCONFIG | 35 | config ARCH_DEFCONFIG |
| 32 | string | 36 | string |
| @@ -329,20 +333,6 @@ config X86_BIGSMP | |||
| 329 | 333 | ||
| 330 | endif | 334 | endif |
| 331 | 335 | ||
| 332 | config X86_RDC321X | ||
| 333 | bool "RDC R-321x SoC" | ||
| 334 | depends on X86_32 | ||
| 335 | select M486 | ||
| 336 | select X86_REBOOTFIXUPS | ||
| 337 | select GENERIC_GPIO | ||
| 338 | select LEDS_CLASS | ||
| 339 | select LEDS_GPIO | ||
| 340 | select NEW_LEDS | ||
| 341 | help | ||
| 342 | This option is needed for RDC R-321x system-on-chip, also known | ||
| 343 | as R-8610-(G). | ||
| 344 | If you don't have one of these chips, you should say N here. | ||
| 345 | |||
| 346 | config X86_VSMP | 336 | config X86_VSMP |
| 347 | bool "Support for ScaleMP vSMP" | 337 | bool "Support for ScaleMP vSMP" |
| 348 | select PARAVIRT | 338 | select PARAVIRT |
| @@ -366,6 +356,16 @@ config X86_VISWS | |||
| 366 | A kernel compiled for the Visual Workstation will run on general | 356 | A kernel compiled for the Visual Workstation will run on general |
| 367 | PCs as well. See <file:Documentation/sgi-visws.txt> for details. | 357 | PCs as well. See <file:Documentation/sgi-visws.txt> for details. |
| 368 | 358 | ||
| 359 | config X86_RDC321X | ||
| 360 | bool "RDC R-321x SoC" | ||
| 361 | depends on X86_32 | ||
| 362 | select M486 | ||
| 363 | select X86_REBOOTFIXUPS | ||
| 364 | help | ||
| 365 | This option is needed for RDC R-321x system-on-chip, also known | ||
| 366 | as R-8610-(G). | ||
| 367 | If you don't have one of these chips, you should say N here. | ||
| 368 | |||
| 369 | config SCHED_NO_NO_OMIT_FRAME_POINTER | 369 | config SCHED_NO_NO_OMIT_FRAME_POINTER |
| 370 | def_bool y | 370 | def_bool y |
| 371 | prompt "Single-depth WCHAN output" | 371 | prompt "Single-depth WCHAN output" |
| @@ -1276,6 +1276,14 @@ config CRASH_DUMP | |||
| 1276 | (CONFIG_RELOCATABLE=y). | 1276 | (CONFIG_RELOCATABLE=y). |
| 1277 | For more details see Documentation/kdump/kdump.txt | 1277 | For more details see Documentation/kdump/kdump.txt |
| 1278 | 1278 | ||
| 1279 | config KEXEC_JUMP | ||
| 1280 | bool "kexec jump (EXPERIMENTAL)" | ||
| 1281 | depends on EXPERIMENTAL | ||
| 1282 | depends on KEXEC && HIBERNATION && X86_32 | ||
| 1283 | help | ||
| 1284 | Jump between original kernel and kexeced kernel and invoke | ||
| 1285 | code in physical address mode via KEXEC | ||
| 1286 | |||
| 1279 | config PHYSICAL_START | 1287 | config PHYSICAL_START |
| 1280 | hex "Physical address where the kernel is loaded" if (EMBEDDED || CRASH_DUMP) | 1288 | hex "Physical address where the kernel is loaded" if (EMBEDDED || CRASH_DUMP) |
| 1281 | default "0x1000000" if X86_NUMAQ | 1289 | default "0x1000000" if X86_NUMAQ |
diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 919ce21ea654..f5631da585b6 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile | |||
| @@ -118,11 +118,6 @@ mflags-$(CONFIG_X86_GENERICARCH):= -Iinclude/asm-x86/mach-generic | |||
| 118 | fcore-$(CONFIG_X86_GENERICARCH) += arch/x86/mach-generic/ | 118 | fcore-$(CONFIG_X86_GENERICARCH) += arch/x86/mach-generic/ |
| 119 | mcore-$(CONFIG_X86_GENERICARCH) := arch/x86/mach-default/ | 119 | mcore-$(CONFIG_X86_GENERICARCH) := arch/x86/mach-default/ |
| 120 | 120 | ||
| 121 | # RDC R-321x subarch support | ||
| 122 | mflags-$(CONFIG_X86_RDC321X) := -Iinclude/asm-x86/mach-rdc321x | ||
| 123 | mcore-$(CONFIG_X86_RDC321X) := arch/x86/mach-default/ | ||
| 124 | core-$(CONFIG_X86_RDC321X) += arch/x86/mach-rdc321x/ | ||
| 125 | |||
| 126 | # default subarch .h files | 121 | # default subarch .h files |
| 127 | mflags-y += -Iinclude/asm-x86/mach-default | 122 | mflags-y += -Iinclude/asm-x86/mach-default |
| 128 | 123 | ||
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index bc5553b496f7..9fea73706479 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c | |||
| @@ -182,8 +182,6 @@ static unsigned outcnt; | |||
| 182 | static int fill_inbuf(void); | 182 | static int fill_inbuf(void); |
| 183 | static void flush_window(void); | 183 | static void flush_window(void); |
| 184 | static void error(char *m); | 184 | static void error(char *m); |
| 185 | static void gzip_mark(void **); | ||
| 186 | static void gzip_release(void **); | ||
| 187 | 185 | ||
| 188 | /* | 186 | /* |
| 189 | * This is set up by the setup-routine at boot-time | 187 | * This is set up by the setup-routine at boot-time |
| @@ -196,9 +194,6 @@ extern int input_len; | |||
| 196 | 194 | ||
| 197 | static long bytes_out; | 195 | static long bytes_out; |
| 198 | 196 | ||
| 199 | static void *malloc(int size); | ||
| 200 | static void free(void *where); | ||
| 201 | |||
| 202 | static void *memset(void *s, int c, unsigned n); | 197 | static void *memset(void *s, int c, unsigned n); |
| 203 | static void *memcpy(void *dest, const void *src, unsigned n); | 198 | static void *memcpy(void *dest, const void *src, unsigned n); |
| 204 | 199 | ||
| @@ -220,40 +215,6 @@ static int lines, cols; | |||
| 220 | 215 | ||
| 221 | #include "../../../../lib/inflate.c" | 216 | #include "../../../../lib/inflate.c" |
| 222 | 217 | ||
| 223 | static void *malloc(int size) | ||
| 224 | { | ||
| 225 | void *p; | ||
| 226 | |||
| 227 | if (size < 0) | ||
| 228 | error("Malloc error"); | ||
| 229 | if (free_mem_ptr <= 0) | ||
| 230 | error("Memory error"); | ||
| 231 | |||
| 232 | free_mem_ptr = (free_mem_ptr + 3) & ~3; /* Align */ | ||
| 233 | |||
| 234 | p = (void *)free_mem_ptr; | ||
| 235 | free_mem_ptr += size; | ||
| 236 | |||
| 237 | if (free_mem_ptr >= free_mem_end_ptr) | ||
| 238 | error("Out of memory"); | ||
| 239 | |||
| 240 | return p; | ||
| 241 | } | ||
| 242 | |||
| 243 | static void free(void *where) | ||
| 244 | { /* Don't care */ | ||
| 245 | } | ||
| 246 | |||
| 247 | static void gzip_mark(void **ptr) | ||
| 248 | { | ||
| 249 | *ptr = (void *) free_mem_ptr; | ||
| 250 | } | ||
| 251 | |||
| 252 | static void gzip_release(void **ptr) | ||
| 253 | { | ||
| 254 | free_mem_ptr = (memptr) *ptr; | ||
| 255 | } | ||
| 256 | |||
| 257 | static void scroll(void) | 218 | static void scroll(void) |
| 258 | { | 219 | { |
| 259 | int i; | 220 | int i; |
diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c index 58cccb6483b0..a0e1dbe67dc1 100644 --- a/arch/x86/ia32/ia32_aout.c +++ b/arch/x86/ia32/ia32_aout.c | |||
| @@ -441,12 +441,6 @@ beyond_if: | |||
| 441 | regs->r8 = regs->r9 = regs->r10 = regs->r11 = | 441 | regs->r8 = regs->r9 = regs->r10 = regs->r11 = |
| 442 | regs->r12 = regs->r13 = regs->r14 = regs->r15 = 0; | 442 | regs->r12 = regs->r13 = regs->r14 = regs->r15 = 0; |
| 443 | set_fs(USER_DS); | 443 | set_fs(USER_DS); |
| 444 | if (unlikely(current->ptrace & PT_PTRACED)) { | ||
| 445 | if (current->ptrace & PT_TRACE_EXEC) | ||
| 446 | ptrace_notify((PTRACE_EVENT_EXEC << 8) | SIGTRAP); | ||
| 447 | else | ||
| 448 | send_sig(SIGTRAP, current, 0); | ||
| 449 | } | ||
| 450 | return 0; | 444 | return 0; |
| 451 | } | 445 | } |
| 452 | 446 | ||
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index 23d146ce676b..ffc1bb4fed7d 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S | |||
| @@ -15,6 +15,16 @@ | |||
| 15 | #include <asm/irqflags.h> | 15 | #include <asm/irqflags.h> |
| 16 | #include <linux/linkage.h> | 16 | #include <linux/linkage.h> |
| 17 | 17 | ||
| 18 | /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */ | ||
| 19 | #include <linux/elf-em.h> | ||
| 20 | #define AUDIT_ARCH_I386 (EM_386|__AUDIT_ARCH_LE) | ||
| 21 | #define __AUDIT_ARCH_LE 0x40000000 | ||
| 22 | |||
| 23 | #ifndef CONFIG_AUDITSYSCALL | ||
| 24 | #define sysexit_audit int_ret_from_sys_call | ||
| 25 | #define sysretl_audit int_ret_from_sys_call | ||
| 26 | #endif | ||
| 27 | |||
| 18 | #define IA32_NR_syscalls ((ia32_syscall_end - ia32_sys_call_table)/8) | 28 | #define IA32_NR_syscalls ((ia32_syscall_end - ia32_sys_call_table)/8) |
| 19 | 29 | ||
| 20 | .macro IA32_ARG_FIXUP noebp=0 | 30 | .macro IA32_ARG_FIXUP noebp=0 |
| @@ -148,13 +158,15 @@ ENTRY(ia32_sysenter_target) | |||
| 148 | ja ia32_badsys | 158 | ja ia32_badsys |
| 149 | sysenter_do_call: | 159 | sysenter_do_call: |
| 150 | IA32_ARG_FIXUP 1 | 160 | IA32_ARG_FIXUP 1 |
| 161 | sysenter_dispatch: | ||
| 151 | call *ia32_sys_call_table(,%rax,8) | 162 | call *ia32_sys_call_table(,%rax,8) |
| 152 | movq %rax,RAX-ARGOFFSET(%rsp) | 163 | movq %rax,RAX-ARGOFFSET(%rsp) |
| 153 | GET_THREAD_INFO(%r10) | 164 | GET_THREAD_INFO(%r10) |
| 154 | DISABLE_INTERRUPTS(CLBR_NONE) | 165 | DISABLE_INTERRUPTS(CLBR_NONE) |
| 155 | TRACE_IRQS_OFF | 166 | TRACE_IRQS_OFF |
| 156 | testl $_TIF_ALLWORK_MASK,TI_flags(%r10) | 167 | testl $_TIF_ALLWORK_MASK,TI_flags(%r10) |
| 157 | jnz int_ret_from_sys_call | 168 | jnz sysexit_audit |
| 169 | sysexit_from_sys_call: | ||
| 158 | andl $~TS_COMPAT,TI_status(%r10) | 170 | andl $~TS_COMPAT,TI_status(%r10) |
| 159 | /* clear IF, that popfq doesn't enable interrupts early */ | 171 | /* clear IF, that popfq doesn't enable interrupts early */ |
| 160 | andl $~0x200,EFLAGS-R11(%rsp) | 172 | andl $~0x200,EFLAGS-R11(%rsp) |
| @@ -170,9 +182,63 @@ sysenter_do_call: | |||
| 170 | TRACE_IRQS_ON | 182 | TRACE_IRQS_ON |
| 171 | ENABLE_INTERRUPTS_SYSEXIT32 | 183 | ENABLE_INTERRUPTS_SYSEXIT32 |
| 172 | 184 | ||
| 173 | sysenter_tracesys: | 185 | #ifdef CONFIG_AUDITSYSCALL |
| 186 | .macro auditsys_entry_common | ||
| 187 | movl %esi,%r9d /* 6th arg: 4th syscall arg */ | ||
| 188 | movl %edx,%r8d /* 5th arg: 3rd syscall arg */ | ||
| 189 | /* (already in %ecx) 4th arg: 2nd syscall arg */ | ||
| 190 | movl %ebx,%edx /* 3rd arg: 1st syscall arg */ | ||
| 191 | movl %eax,%esi /* 2nd arg: syscall number */ | ||
| 192 | movl $AUDIT_ARCH_I386,%edi /* 1st arg: audit arch */ | ||
| 193 | call audit_syscall_entry | ||
| 194 | movl RAX-ARGOFFSET(%rsp),%eax /* reload syscall number */ | ||
| 195 | cmpl $(IA32_NR_syscalls-1),%eax | ||
| 196 | ja ia32_badsys | ||
| 197 | movl %ebx,%edi /* reload 1st syscall arg */ | ||
| 198 | movl RCX-ARGOFFSET(%rsp),%esi /* reload 2nd syscall arg */ | ||
| 199 | movl RDX-ARGOFFSET(%rsp),%edx /* reload 3rd syscall arg */ | ||
| 200 | movl RSI-ARGOFFSET(%rsp),%ecx /* reload 4th syscall arg */ | ||
| 201 | movl RDI-ARGOFFSET(%rsp),%r8d /* reload 5th syscall arg */ | ||
| 202 | .endm | ||
| 203 | |||
| 204 | .macro auditsys_exit exit,ebpsave=RBP | ||
| 205 | testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),TI_flags(%r10) | ||
| 206 | jnz int_ret_from_sys_call | ||
| 207 | TRACE_IRQS_ON | ||
| 208 | sti | ||
| 209 | movl %eax,%esi /* second arg, syscall return value */ | ||
| 210 | cmpl $0,%eax /* is it < 0? */ | ||
| 211 | setl %al /* 1 if so, 0 if not */ | ||
| 212 | movzbl %al,%edi /* zero-extend that into %edi */ | ||
| 213 | inc %edi /* first arg, 0->1(AUDITSC_SUCCESS), 1->2(AUDITSC_FAILURE) */ | ||
| 214 | call audit_syscall_exit | ||
| 215 | GET_THREAD_INFO(%r10) | ||
| 216 | movl RAX-ARGOFFSET(%rsp),%eax /* reload syscall return value */ | ||
| 217 | movl \ebpsave-ARGOFFSET(%rsp),%ebp /* reload user register value */ | ||
| 218 | movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi | ||
| 219 | cli | ||
| 220 | TRACE_IRQS_OFF | ||
| 221 | testl %edi,TI_flags(%r10) | ||
| 222 | jnz int_with_check | ||
| 223 | jmp \exit | ||
| 224 | .endm | ||
| 225 | |||
| 226 | sysenter_auditsys: | ||
| 174 | CFI_RESTORE_STATE | 227 | CFI_RESTORE_STATE |
| 228 | auditsys_entry_common | ||
| 229 | movl %ebp,%r9d /* reload 6th syscall arg */ | ||
| 230 | jmp sysenter_dispatch | ||
| 231 | |||
| 232 | sysexit_audit: | ||
| 233 | auditsys_exit sysexit_from_sys_call | ||
| 234 | #endif | ||
| 235 | |||
| 236 | sysenter_tracesys: | ||
| 175 | xchgl %r9d,%ebp | 237 | xchgl %r9d,%ebp |
| 238 | #ifdef CONFIG_AUDITSYSCALL | ||
| 239 | testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%r10) | ||
| 240 | jz sysenter_auditsys | ||
| 241 | #endif | ||
| 176 | SAVE_REST | 242 | SAVE_REST |
| 177 | CLEAR_RREGS | 243 | CLEAR_RREGS |
| 178 | movq %r9,R9(%rsp) | 244 | movq %r9,R9(%rsp) |
| @@ -252,13 +318,15 @@ cstar_do_call: | |||
| 252 | cmpl $IA32_NR_syscalls-1,%eax | 318 | cmpl $IA32_NR_syscalls-1,%eax |
| 253 | ja ia32_badsys | 319 | ja ia32_badsys |
| 254 | IA32_ARG_FIXUP 1 | 320 | IA32_ARG_FIXUP 1 |
| 321 | cstar_dispatch: | ||
| 255 | call *ia32_sys_call_table(,%rax,8) | 322 | call *ia32_sys_call_table(,%rax,8) |
| 256 | movq %rax,RAX-ARGOFFSET(%rsp) | 323 | movq %rax,RAX-ARGOFFSET(%rsp) |
| 257 | GET_THREAD_INFO(%r10) | 324 | GET_THREAD_INFO(%r10) |
| 258 | DISABLE_INTERRUPTS(CLBR_NONE) | 325 | DISABLE_INTERRUPTS(CLBR_NONE) |
| 259 | TRACE_IRQS_OFF | 326 | TRACE_IRQS_OFF |
| 260 | testl $_TIF_ALLWORK_MASK,TI_flags(%r10) | 327 | testl $_TIF_ALLWORK_MASK,TI_flags(%r10) |
| 261 | jnz int_ret_from_sys_call | 328 | jnz sysretl_audit |
| 329 | sysretl_from_sys_call: | ||
| 262 | andl $~TS_COMPAT,TI_status(%r10) | 330 | andl $~TS_COMPAT,TI_status(%r10) |
| 263 | RESTORE_ARGS 1,-ARG_SKIP,1,1,1 | 331 | RESTORE_ARGS 1,-ARG_SKIP,1,1,1 |
| 264 | movl RIP-ARGOFFSET(%rsp),%ecx | 332 | movl RIP-ARGOFFSET(%rsp),%ecx |
| @@ -270,8 +338,23 @@ cstar_do_call: | |||
| 270 | CFI_RESTORE rsp | 338 | CFI_RESTORE rsp |
| 271 | USERGS_SYSRET32 | 339 | USERGS_SYSRET32 |
| 272 | 340 | ||
| 273 | cstar_tracesys: | 341 | #ifdef CONFIG_AUDITSYSCALL |
| 342 | cstar_auditsys: | ||
| 274 | CFI_RESTORE_STATE | 343 | CFI_RESTORE_STATE |
| 344 | movl %r9d,R9-ARGOFFSET(%rsp) /* register to be clobbered by call */ | ||
| 345 | auditsys_entry_common | ||
| 346 | movl R9-ARGOFFSET(%rsp),%r9d /* reload 6th syscall arg */ | ||
| 347 | jmp cstar_dispatch | ||
| 348 | |||
| 349 | sysretl_audit: | ||
| 350 | auditsys_exit sysretl_from_sys_call, RCX /* user %ebp in RCX slot */ | ||
| 351 | #endif | ||
| 352 | |||
| 353 | cstar_tracesys: | ||
| 354 | #ifdef CONFIG_AUDITSYSCALL | ||
| 355 | testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%r10) | ||
| 356 | jz cstar_auditsys | ||
| 357 | #endif | ||
| 275 | xchgl %r9d,%ebp | 358 | xchgl %r9d,%ebp |
| 276 | SAVE_REST | 359 | SAVE_REST |
| 277 | CLEAR_RREGS | 360 | CLEAR_RREGS |
| @@ -743,4 +826,10 @@ ia32_sys_call_table: | |||
| 743 | .quad sys32_fallocate | 826 | .quad sys32_fallocate |
| 744 | .quad compat_sys_timerfd_settime /* 325 */ | 827 | .quad compat_sys_timerfd_settime /* 325 */ |
| 745 | .quad compat_sys_timerfd_gettime | 828 | .quad compat_sys_timerfd_gettime |
| 829 | .quad compat_sys_signalfd4 | ||
| 830 | .quad sys_eventfd2 | ||
| 831 | .quad sys_epoll_create1 | ||
| 832 | .quad sys_dup3 /* 330 */ | ||
| 833 | .quad sys_pipe2 | ||
| 834 | .quad sys_inotify_init1 | ||
| 746 | ia32_syscall_end: | 835 | ia32_syscall_end: |
diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c index f00afdf61e67..d3c64088b981 100644 --- a/arch/x86/ia32/sys_ia32.c +++ b/arch/x86/ia32/sys_ia32.c | |||
| @@ -238,7 +238,7 @@ asmlinkage long sys32_pipe(int __user *fd) | |||
| 238 | int retval; | 238 | int retval; |
| 239 | int fds[2]; | 239 | int fds[2]; |
| 240 | 240 | ||
| 241 | retval = do_pipe(fds); | 241 | retval = do_pipe_flags(fds, 0); |
| 242 | if (retval) | 242 | if (retval) |
| 243 | goto out; | 243 | goto out; |
| 244 | if (copy_to_user(fd, fds, sizeof(fds))) | 244 | if (copy_to_user(fd, fds, sizeof(fds))) |
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index a3ddad18aaa3..fa2161d5003b 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c | |||
| @@ -150,6 +150,10 @@ static int __init acpi_sleep_setup(char *str) | |||
| 150 | acpi_realmode_flags |= 2; | 150 | acpi_realmode_flags |= 2; |
| 151 | if (strncmp(str, "s3_beep", 7) == 0) | 151 | if (strncmp(str, "s3_beep", 7) == 0) |
| 152 | acpi_realmode_flags |= 4; | 152 | acpi_realmode_flags |= 4; |
| 153 | #ifdef CONFIG_HIBERNATION | ||
| 154 | if (strncmp(str, "s4_nohwsig", 10) == 0) | ||
| 155 | acpi_no_s4_hw_signature(); | ||
| 156 | #endif | ||
| 153 | if (strncmp(str, "old_ordering", 12) == 0) | 157 | if (strncmp(str, "old_ordering", 12) == 0) |
| 154 | acpi_old_suspend_ordering(); | 158 | acpi_old_suspend_ordering(); |
| 155 | str = strchr(str, ','); | 159 | str = strchr(str, ','); |
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index c25210e6ac88..22d7d050905d 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c | |||
| @@ -29,9 +29,6 @@ | |||
| 29 | 29 | ||
| 30 | #define CMD_SET_TYPE(cmd, t) ((cmd)->data[1] |= ((t) << 28)) | 30 | #define CMD_SET_TYPE(cmd, t) ((cmd)->data[1] |= ((t) << 28)) |
| 31 | 31 | ||
| 32 | #define to_pages(addr, size) \ | ||
| 33 | (round_up(((addr) & ~PAGE_MASK) + (size), PAGE_SIZE) >> PAGE_SHIFT) | ||
| 34 | |||
| 35 | #define EXIT_LOOP_COUNT 10000000 | 32 | #define EXIT_LOOP_COUNT 10000000 |
| 36 | 33 | ||
| 37 | static DEFINE_RWLOCK(amd_iommu_devtable_lock); | 34 | static DEFINE_RWLOCK(amd_iommu_devtable_lock); |
| @@ -185,7 +182,7 @@ static int iommu_flush_pages(struct amd_iommu *iommu, u16 domid, | |||
| 185 | u64 address, size_t size) | 182 | u64 address, size_t size) |
| 186 | { | 183 | { |
| 187 | int s = 0; | 184 | int s = 0; |
| 188 | unsigned pages = to_pages(address, size); | 185 | unsigned pages = iommu_num_pages(address, size); |
| 189 | 186 | ||
| 190 | address &= PAGE_MASK; | 187 | address &= PAGE_MASK; |
| 191 | 188 | ||
| @@ -557,8 +554,8 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu, | |||
| 557 | if (iommu->exclusion_start && | 554 | if (iommu->exclusion_start && |
| 558 | iommu->exclusion_start < dma_dom->aperture_size) { | 555 | iommu->exclusion_start < dma_dom->aperture_size) { |
| 559 | unsigned long startpage = iommu->exclusion_start >> PAGE_SHIFT; | 556 | unsigned long startpage = iommu->exclusion_start >> PAGE_SHIFT; |
| 560 | int pages = to_pages(iommu->exclusion_start, | 557 | int pages = iommu_num_pages(iommu->exclusion_start, |
| 561 | iommu->exclusion_length); | 558 | iommu->exclusion_length); |
| 562 | dma_ops_reserve_addresses(dma_dom, startpage, pages); | 559 | dma_ops_reserve_addresses(dma_dom, startpage, pages); |
| 563 | } | 560 | } |
| 564 | 561 | ||
| @@ -667,7 +664,7 @@ static int get_device_resources(struct device *dev, | |||
| 667 | _bdf = calc_devid(pcidev->bus->number, pcidev->devfn); | 664 | _bdf = calc_devid(pcidev->bus->number, pcidev->devfn); |
| 668 | 665 | ||
| 669 | /* device not translated by any IOMMU in the system? */ | 666 | /* device not translated by any IOMMU in the system? */ |
| 670 | if (_bdf >= amd_iommu_last_bdf) { | 667 | if (_bdf > amd_iommu_last_bdf) { |
| 671 | *iommu = NULL; | 668 | *iommu = NULL; |
| 672 | *domain = NULL; | 669 | *domain = NULL; |
| 673 | *bdf = 0xffff; | 670 | *bdf = 0xffff; |
| @@ -767,7 +764,7 @@ static dma_addr_t __map_single(struct device *dev, | |||
| 767 | unsigned int pages; | 764 | unsigned int pages; |
| 768 | int i; | 765 | int i; |
| 769 | 766 | ||
| 770 | pages = to_pages(paddr, size); | 767 | pages = iommu_num_pages(paddr, size); |
| 771 | paddr &= PAGE_MASK; | 768 | paddr &= PAGE_MASK; |
| 772 | 769 | ||
| 773 | address = dma_ops_alloc_addresses(dev, dma_dom, pages); | 770 | address = dma_ops_alloc_addresses(dev, dma_dom, pages); |
| @@ -802,7 +799,7 @@ static void __unmap_single(struct amd_iommu *iommu, | |||
| 802 | if ((dma_addr == 0) || (dma_addr + size > dma_dom->aperture_size)) | 799 | if ((dma_addr == 0) || (dma_addr + size > dma_dom->aperture_size)) |
| 803 | return; | 800 | return; |
| 804 | 801 | ||
| 805 | pages = to_pages(dma_addr, size); | 802 | pages = iommu_num_pages(dma_addr, size); |
| 806 | dma_addr &= PAGE_MASK; | 803 | dma_addr &= PAGE_MASK; |
| 807 | start = dma_addr; | 804 | start = dma_addr; |
| 808 | 805 | ||
| @@ -1085,7 +1082,7 @@ void prealloc_protection_domains(void) | |||
| 1085 | 1082 | ||
| 1086 | while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { | 1083 | while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { |
| 1087 | devid = (dev->bus->number << 8) | dev->devfn; | 1084 | devid = (dev->bus->number << 8) | dev->devfn; |
| 1088 | if (devid >= amd_iommu_last_bdf) | 1085 | if (devid > amd_iommu_last_bdf) |
| 1089 | continue; | 1086 | continue; |
| 1090 | devid = amd_iommu_alias_table[devid]; | 1087 | devid = amd_iommu_alias_table[devid]; |
| 1091 | if (domain_for_device(devid)) | 1088 | if (domain_for_device(devid)) |
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index c9d8ff2eb130..d9a9da597e79 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c | |||
| @@ -732,7 +732,7 @@ static int __init init_exclusion_range(struct ivmd_header *m) | |||
| 732 | set_device_exclusion_range(m->devid, m); | 732 | set_device_exclusion_range(m->devid, m); |
| 733 | break; | 733 | break; |
| 734 | case ACPI_IVMD_TYPE_ALL: | 734 | case ACPI_IVMD_TYPE_ALL: |
| 735 | for (i = 0; i < amd_iommu_last_bdf; ++i) | 735 | for (i = 0; i <= amd_iommu_last_bdf; ++i) |
| 736 | set_device_exclusion_range(i, m); | 736 | set_device_exclusion_range(i, m); |
| 737 | break; | 737 | break; |
| 738 | case ACPI_IVMD_TYPE_RANGE: | 738 | case ACPI_IVMD_TYPE_RANGE: |
| @@ -934,7 +934,7 @@ int __init amd_iommu_init(void) | |||
| 934 | /* | 934 | /* |
| 935 | * let all alias entries point to itself | 935 | * let all alias entries point to itself |
| 936 | */ | 936 | */ |
| 937 | for (i = 0; i < amd_iommu_last_bdf; ++i) | 937 | for (i = 0; i <= amd_iommu_last_bdf; ++i) |
| 938 | amd_iommu_alias_table[i] = i; | 938 | amd_iommu_alias_table[i] = i; |
| 939 | 939 | ||
| 940 | /* | 940 | /* |
diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index d6c898358371..039a8d4aaf62 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c | |||
| @@ -1720,15 +1720,19 @@ static int __init parse_lapic_timer_c2_ok(char *arg) | |||
| 1720 | } | 1720 | } |
| 1721 | early_param("lapic_timer_c2_ok", parse_lapic_timer_c2_ok); | 1721 | early_param("lapic_timer_c2_ok", parse_lapic_timer_c2_ok); |
| 1722 | 1722 | ||
| 1723 | static int __init apic_set_verbosity(char *str) | 1723 | static int __init apic_set_verbosity(char *arg) |
| 1724 | { | 1724 | { |
| 1725 | if (strcmp("debug", str) == 0) | 1725 | if (!arg) |
| 1726 | return -EINVAL; | ||
| 1727 | |||
| 1728 | if (strcmp(arg, "debug") == 0) | ||
| 1726 | apic_verbosity = APIC_DEBUG; | 1729 | apic_verbosity = APIC_DEBUG; |
| 1727 | else if (strcmp("verbose", str) == 0) | 1730 | else if (strcmp(arg, "verbose") == 0) |
| 1728 | apic_verbosity = APIC_VERBOSE; | 1731 | apic_verbosity = APIC_VERBOSE; |
| 1729 | return 1; | 1732 | |
| 1733 | return 0; | ||
| 1730 | } | 1734 | } |
| 1731 | __setup("apic=", apic_set_verbosity); | 1735 | early_param("apic", apic_set_verbosity); |
| 1732 | 1736 | ||
| 1733 | static int __init lapic_insert_resource(void) | 1737 | static int __init lapic_insert_resource(void) |
| 1734 | { | 1738 | { |
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index bf9b441331e9..9ee24e6bc4b0 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c | |||
| @@ -219,7 +219,6 @@ | |||
| 219 | #include <linux/time.h> | 219 | #include <linux/time.h> |
| 220 | #include <linux/sched.h> | 220 | #include <linux/sched.h> |
| 221 | #include <linux/pm.h> | 221 | #include <linux/pm.h> |
| 222 | #include <linux/pm_legacy.h> | ||
| 223 | #include <linux/capability.h> | 222 | #include <linux/capability.h> |
| 224 | #include <linux/device.h> | 223 | #include <linux/device.h> |
| 225 | #include <linux/kernel.h> | 224 | #include <linux/kernel.h> |
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index c9b58a806e85..c8e315f1aa83 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c | |||
| @@ -50,6 +50,8 @@ static double __initdata y = 3145727.0; | |||
| 50 | */ | 50 | */ |
| 51 | static void __init check_fpu(void) | 51 | static void __init check_fpu(void) |
| 52 | { | 52 | { |
| 53 | s32 fdiv_bug; | ||
| 54 | |||
| 53 | if (!boot_cpu_data.hard_math) { | 55 | if (!boot_cpu_data.hard_math) { |
| 54 | #ifndef CONFIG_MATH_EMULATION | 56 | #ifndef CONFIG_MATH_EMULATION |
| 55 | printk(KERN_EMERG "No coprocessor found and no math emulation present.\n"); | 57 | printk(KERN_EMERG "No coprocessor found and no math emulation present.\n"); |
| @@ -74,8 +76,10 @@ static void __init check_fpu(void) | |||
| 74 | "fistpl %0\n\t" | 76 | "fistpl %0\n\t" |
| 75 | "fwait\n\t" | 77 | "fwait\n\t" |
| 76 | "fninit" | 78 | "fninit" |
| 77 | : "=m" (*&boot_cpu_data.fdiv_bug) | 79 | : "=m" (*&fdiv_bug) |
| 78 | : "m" (*&x), "m" (*&y)); | 80 | : "m" (*&x), "m" (*&y)); |
| 81 | |||
| 82 | boot_cpu_data.fdiv_bug = fdiv_bug; | ||
| 79 | if (boot_cpu_data.fdiv_bug) | 83 | if (boot_cpu_data.fdiv_bug) |
| 80 | printk("Hmm, FPU with FDIV bug.\n"); | 84 | printk("Hmm, FPU with FDIV bug.\n"); |
| 81 | } | 85 | } |
diff --git a/arch/x86/kernel/cpu/cpufreq/Kconfig b/arch/x86/kernel/cpu/cpufreq/Kconfig index cb7a5715596d..efae3b22a0ff 100644 --- a/arch/x86/kernel/cpu/cpufreq/Kconfig +++ b/arch/x86/kernel/cpu/cpufreq/Kconfig | |||
| @@ -235,9 +235,9 @@ config X86_LONGHAUL | |||
| 235 | If in doubt, say N. | 235 | If in doubt, say N. |
| 236 | 236 | ||
| 237 | config X86_E_POWERSAVER | 237 | config X86_E_POWERSAVER |
| 238 | tristate "VIA C7 Enhanced PowerSaver (EXPERIMENTAL)" | 238 | tristate "VIA C7 Enhanced PowerSaver" |
| 239 | select CPU_FREQ_TABLE | 239 | select CPU_FREQ_TABLE |
| 240 | depends on X86_32 && EXPERIMENTAL | 240 | depends on X86_32 |
| 241 | help | 241 | help |
| 242 | This adds the CPUFreq driver for VIA C7 processors. | 242 | This adds the CPUFreq driver for VIA C7 processors. |
| 243 | 243 | ||
diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c index b0c8208df9fa..dd097b835839 100644 --- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c | |||
| @@ -202,7 +202,7 @@ static void drv_write(struct drv_cmd *cmd) | |||
| 202 | cpumask_t saved_mask = current->cpus_allowed; | 202 | cpumask_t saved_mask = current->cpus_allowed; |
| 203 | unsigned int i; | 203 | unsigned int i; |
| 204 | 204 | ||
| 205 | for_each_cpu_mask(i, cmd->mask) { | 205 | for_each_cpu_mask_nr(i, cmd->mask) { |
| 206 | set_cpus_allowed_ptr(current, &cpumask_of_cpu(i)); | 206 | set_cpus_allowed_ptr(current, &cpumask_of_cpu(i)); |
| 207 | do_drv_write(cmd); | 207 | do_drv_write(cmd); |
| 208 | } | 208 | } |
| @@ -451,7 +451,7 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy, | |||
| 451 | 451 | ||
| 452 | freqs.old = perf->states[perf->state].core_frequency * 1000; | 452 | freqs.old = perf->states[perf->state].core_frequency * 1000; |
| 453 | freqs.new = data->freq_table[next_state].frequency; | 453 | freqs.new = data->freq_table[next_state].frequency; |
| 454 | for_each_cpu_mask(i, cmd.mask) { | 454 | for_each_cpu_mask_nr(i, cmd.mask) { |
| 455 | freqs.cpu = i; | 455 | freqs.cpu = i; |
| 456 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); | 456 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); |
| 457 | } | 457 | } |
| @@ -466,7 +466,7 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy, | |||
| 466 | } | 466 | } |
| 467 | } | 467 | } |
| 468 | 468 | ||
| 469 | for_each_cpu_mask(i, cmd.mask) { | 469 | for_each_cpu_mask_nr(i, cmd.mask) { |
| 470 | freqs.cpu = i; | 470 | freqs.cpu = i; |
| 471 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); | 471 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); |
| 472 | } | 472 | } |
diff --git a/arch/x86/kernel/cpu/cpufreq/elanfreq.c b/arch/x86/kernel/cpu/cpufreq/elanfreq.c index 94619c22f563..e4a4bf870e94 100644 --- a/arch/x86/kernel/cpu/cpufreq/elanfreq.c +++ b/arch/x86/kernel/cpu/cpufreq/elanfreq.c | |||
| @@ -44,7 +44,7 @@ struct s_elan_multiplier { | |||
| 44 | * It is important that the frequencies | 44 | * It is important that the frequencies |
| 45 | * are listed in ascending order here! | 45 | * are listed in ascending order here! |
| 46 | */ | 46 | */ |
| 47 | struct s_elan_multiplier elan_multiplier[] = { | 47 | static struct s_elan_multiplier elan_multiplier[] = { |
| 48 | {1000, 0x02, 0x18}, | 48 | {1000, 0x02, 0x18}, |
| 49 | {2000, 0x02, 0x10}, | 49 | {2000, 0x02, 0x10}, |
| 50 | {4000, 0x02, 0x08}, | 50 | {4000, 0x02, 0x08}, |
diff --git a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c index 199e4e05e5dc..f1685fb91fbd 100644 --- a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c +++ b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c | |||
| @@ -122,7 +122,7 @@ static int cpufreq_p4_target(struct cpufreq_policy *policy, | |||
| 122 | return 0; | 122 | return 0; |
| 123 | 123 | ||
| 124 | /* notifiers */ | 124 | /* notifiers */ |
| 125 | for_each_cpu_mask(i, policy->cpus) { | 125 | for_each_cpu_mask_nr(i, policy->cpus) { |
| 126 | freqs.cpu = i; | 126 | freqs.cpu = i; |
| 127 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); | 127 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); |
| 128 | } | 128 | } |
| @@ -130,11 +130,11 @@ static int cpufreq_p4_target(struct cpufreq_policy *policy, | |||
| 130 | /* run on each logical CPU, see section 13.15.3 of IA32 Intel Architecture Software | 130 | /* run on each logical CPU, see section 13.15.3 of IA32 Intel Architecture Software |
| 131 | * Developer's Manual, Volume 3 | 131 | * Developer's Manual, Volume 3 |
| 132 | */ | 132 | */ |
| 133 | for_each_cpu_mask(i, policy->cpus) | 133 | for_each_cpu_mask_nr(i, policy->cpus) |
| 134 | cpufreq_p4_setdc(i, p4clockmod_table[newstate].index); | 134 | cpufreq_p4_setdc(i, p4clockmod_table[newstate].index); |
| 135 | 135 | ||
| 136 | /* notifiers */ | 136 | /* notifiers */ |
| 137 | for_each_cpu_mask(i, policy->cpus) { | 137 | for_each_cpu_mask_nr(i, policy->cpus) { |
| 138 | freqs.cpu = i; | 138 | freqs.cpu = i; |
| 139 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); | 139 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); |
| 140 | } | 140 | } |
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index 206791eb46e3..4e7271999a74 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c | |||
| @@ -66,7 +66,6 @@ static u32 find_freq_from_fid(u32 fid) | |||
| 66 | return 800 + (fid * 100); | 66 | return 800 + (fid * 100); |
| 67 | } | 67 | } |
| 68 | 68 | ||
| 69 | |||
| 70 | /* Return a frequency in KHz, given an input fid */ | 69 | /* Return a frequency in KHz, given an input fid */ |
| 71 | static u32 find_khz_freq_from_fid(u32 fid) | 70 | static u32 find_khz_freq_from_fid(u32 fid) |
| 72 | { | 71 | { |
| @@ -78,7 +77,6 @@ static u32 find_khz_freq_from_pstate(struct cpufreq_frequency_table *data, u32 p | |||
| 78 | return data[pstate].frequency; | 77 | return data[pstate].frequency; |
| 79 | } | 78 | } |
| 80 | 79 | ||
| 81 | |||
| 82 | /* Return the vco fid for an input fid | 80 | /* Return the vco fid for an input fid |
| 83 | * | 81 | * |
| 84 | * Each "low" fid has corresponding "high" fid, and you can get to "low" fids | 82 | * Each "low" fid has corresponding "high" fid, and you can get to "low" fids |
| @@ -166,7 +164,6 @@ static void fidvid_msr_init(void) | |||
| 166 | wrmsr(MSR_FIDVID_CTL, lo, hi); | 164 | wrmsr(MSR_FIDVID_CTL, lo, hi); |
| 167 | } | 165 | } |
| 168 | 166 | ||
| 169 | |||
| 170 | /* write the new fid value along with the other control fields to the msr */ | 167 | /* write the new fid value along with the other control fields to the msr */ |
| 171 | static int write_new_fid(struct powernow_k8_data *data, u32 fid) | 168 | static int write_new_fid(struct powernow_k8_data *data, u32 fid) |
| 172 | { | 169 | { |
| @@ -740,44 +737,63 @@ static int find_psb_table(struct powernow_k8_data *data) | |||
| 740 | #ifdef CONFIG_X86_POWERNOW_K8_ACPI | 737 | #ifdef CONFIG_X86_POWERNOW_K8_ACPI |
| 741 | static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned int index) | 738 | static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned int index) |
| 742 | { | 739 | { |
| 743 | if (!data->acpi_data.state_count || (cpu_family == CPU_HW_PSTATE)) | 740 | if (!data->acpi_data->state_count || (cpu_family == CPU_HW_PSTATE)) |
| 744 | return; | 741 | return; |
| 745 | 742 | ||
| 746 | data->irt = (data->acpi_data.states[index].control >> IRT_SHIFT) & IRT_MASK; | 743 | data->irt = (data->acpi_data->states[index].control >> IRT_SHIFT) & IRT_MASK; |
| 747 | data->rvo = (data->acpi_data.states[index].control >> RVO_SHIFT) & RVO_MASK; | 744 | data->rvo = (data->acpi_data->states[index].control >> RVO_SHIFT) & RVO_MASK; |
| 748 | data->exttype = (data->acpi_data.states[index].control >> EXT_TYPE_SHIFT) & EXT_TYPE_MASK; | 745 | data->exttype = (data->acpi_data->states[index].control >> EXT_TYPE_SHIFT) & EXT_TYPE_MASK; |
| 749 | data->plllock = (data->acpi_data.states[index].control >> PLL_L_SHIFT) & PLL_L_MASK; | 746 | data->plllock = (data->acpi_data->states[index].control >> PLL_L_SHIFT) & PLL_L_MASK; |
| 750 | data->vidmvs = 1 << ((data->acpi_data.states[index].control >> MVS_SHIFT) & MVS_MASK); | 747 | data->vidmvs = 1 << ((data->acpi_data->states[index].control >> MVS_SHIFT) & MVS_MASK); |
| 751 | data->vstable = (data->acpi_data.states[index].control >> VST_SHIFT) & VST_MASK; | 748 | data->vstable = (data->acpi_data->states[index].control >> VST_SHIFT) & VST_MASK; |
| 749 | } | ||
| 750 | |||
| 751 | |||
| 752 | static struct acpi_processor_performance *acpi_perf_data; | ||
| 753 | static int preregister_valid; | ||
| 754 | |||
| 755 | static int powernow_k8_cpu_preinit_acpi(void) | ||
| 756 | { | ||
| 757 | acpi_perf_data = alloc_percpu(struct acpi_processor_performance); | ||
| 758 | if (!acpi_perf_data) | ||
| 759 | return -ENODEV; | ||
| 760 | |||
| 761 | if (acpi_processor_preregister_performance(acpi_perf_data)) | ||
| 762 | return -ENODEV; | ||
| 763 | else | ||
| 764 | preregister_valid = 1; | ||
| 765 | return 0; | ||
| 752 | } | 766 | } |
| 753 | 767 | ||
| 754 | static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) | 768 | static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) |
| 755 | { | 769 | { |
| 756 | struct cpufreq_frequency_table *powernow_table; | 770 | struct cpufreq_frequency_table *powernow_table; |
| 757 | int ret_val; | 771 | int ret_val; |
| 772 | int cpu = 0; | ||
| 758 | 773 | ||
| 759 | if (acpi_processor_register_performance(&data->acpi_data, data->cpu)) { | 774 | data->acpi_data = percpu_ptr(acpi_perf_data, cpu); |
| 775 | if (acpi_processor_register_performance(data->acpi_data, data->cpu)) { | ||
| 760 | dprintk("register performance failed: bad ACPI data\n"); | 776 | dprintk("register performance failed: bad ACPI data\n"); |
| 761 | return -EIO; | 777 | return -EIO; |
| 762 | } | 778 | } |
| 763 | 779 | ||
| 764 | /* verify the data contained in the ACPI structures */ | 780 | /* verify the data contained in the ACPI structures */ |
| 765 | if (data->acpi_data.state_count <= 1) { | 781 | if (data->acpi_data->state_count <= 1) { |
| 766 | dprintk("No ACPI P-States\n"); | 782 | dprintk("No ACPI P-States\n"); |
| 767 | goto err_out; | 783 | goto err_out; |
| 768 | } | 784 | } |
| 769 | 785 | ||
| 770 | if ((data->acpi_data.control_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE) || | 786 | if ((data->acpi_data->control_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE) || |
| 771 | (data->acpi_data.status_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE)) { | 787 | (data->acpi_data->status_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE)) { |
| 772 | dprintk("Invalid control/status registers (%x - %x)\n", | 788 | dprintk("Invalid control/status registers (%x - %x)\n", |
| 773 | data->acpi_data.control_register.space_id, | 789 | data->acpi_data->control_register.space_id, |
| 774 | data->acpi_data.status_register.space_id); | 790 | data->acpi_data->status_register.space_id); |
| 775 | goto err_out; | 791 | goto err_out; |
| 776 | } | 792 | } |
| 777 | 793 | ||
| 778 | /* fill in data->powernow_table */ | 794 | /* fill in data->powernow_table */ |
| 779 | powernow_table = kmalloc((sizeof(struct cpufreq_frequency_table) | 795 | powernow_table = kmalloc((sizeof(struct cpufreq_frequency_table) |
| 780 | * (data->acpi_data.state_count + 1)), GFP_KERNEL); | 796 | * (data->acpi_data->state_count + 1)), GFP_KERNEL); |
| 781 | if (!powernow_table) { | 797 | if (!powernow_table) { |
| 782 | dprintk("powernow_table memory alloc failure\n"); | 798 | dprintk("powernow_table memory alloc failure\n"); |
| 783 | goto err_out; | 799 | goto err_out; |
| @@ -790,12 +806,12 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) | |||
| 790 | if (ret_val) | 806 | if (ret_val) |
| 791 | goto err_out_mem; | 807 | goto err_out_mem; |
| 792 | 808 | ||
| 793 | powernow_table[data->acpi_data.state_count].frequency = CPUFREQ_TABLE_END; | 809 | powernow_table[data->acpi_data->state_count].frequency = CPUFREQ_TABLE_END; |
| 794 | powernow_table[data->acpi_data.state_count].index = 0; | 810 | powernow_table[data->acpi_data->state_count].index = 0; |
| 795 | data->powernow_table = powernow_table; | 811 | data->powernow_table = powernow_table; |
| 796 | 812 | ||
| 797 | /* fill in data */ | 813 | /* fill in data */ |
| 798 | data->numps = data->acpi_data.state_count; | 814 | data->numps = data->acpi_data->state_count; |
| 799 | if (first_cpu(per_cpu(cpu_core_map, data->cpu)) == data->cpu) | 815 | if (first_cpu(per_cpu(cpu_core_map, data->cpu)) == data->cpu) |
| 800 | print_basics(data); | 816 | print_basics(data); |
| 801 | powernow_k8_acpi_pst_values(data, 0); | 817 | powernow_k8_acpi_pst_values(data, 0); |
| @@ -803,16 +819,31 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) | |||
| 803 | /* notify BIOS that we exist */ | 819 | /* notify BIOS that we exist */ |
| 804 | acpi_processor_notify_smm(THIS_MODULE); | 820 | acpi_processor_notify_smm(THIS_MODULE); |
| 805 | 821 | ||
| 822 | /* determine affinity, from ACPI if available */ | ||
| 823 | if (preregister_valid) { | ||
| 824 | if ((data->acpi_data->shared_type == CPUFREQ_SHARED_TYPE_ALL) || | ||
| 825 | (data->acpi_data->shared_type == CPUFREQ_SHARED_TYPE_ANY)) | ||
| 826 | data->starting_core_affinity = data->acpi_data->shared_cpu_map; | ||
| 827 | else | ||
| 828 | data->starting_core_affinity = cpumask_of_cpu(data->cpu); | ||
| 829 | } else { | ||
| 830 | /* best guess from family if not */ | ||
| 831 | if (cpu_family == CPU_HW_PSTATE) | ||
| 832 | data->starting_core_affinity = cpumask_of_cpu(data->cpu); | ||
| 833 | else | ||
| 834 | data->starting_core_affinity = per_cpu(cpu_core_map, data->cpu); | ||
| 835 | } | ||
| 836 | |||
| 806 | return 0; | 837 | return 0; |
| 807 | 838 | ||
| 808 | err_out_mem: | 839 | err_out_mem: |
| 809 | kfree(powernow_table); | 840 | kfree(powernow_table); |
| 810 | 841 | ||
| 811 | err_out: | 842 | err_out: |
| 812 | acpi_processor_unregister_performance(&data->acpi_data, data->cpu); | 843 | acpi_processor_unregister_performance(data->acpi_data, data->cpu); |
| 813 | 844 | ||
| 814 | /* data->acpi_data.state_count informs us at ->exit() whether ACPI was used */ | 845 | /* data->acpi_data.state_count informs us at ->exit() whether ACPI was used */ |
| 815 | data->acpi_data.state_count = 0; | 846 | data->acpi_data->state_count = 0; |
| 816 | 847 | ||
| 817 | return -ENODEV; | 848 | return -ENODEV; |
| 818 | } | 849 | } |
| @@ -824,10 +855,10 @@ static int fill_powernow_table_pstate(struct powernow_k8_data *data, struct cpuf | |||
| 824 | rdmsr(MSR_PSTATE_CUR_LIMIT, hi, lo); | 855 | rdmsr(MSR_PSTATE_CUR_LIMIT, hi, lo); |
| 825 | data->max_hw_pstate = (hi & HW_PSTATE_MAX_MASK) >> HW_PSTATE_MAX_SHIFT; | 856 | data->max_hw_pstate = (hi & HW_PSTATE_MAX_MASK) >> HW_PSTATE_MAX_SHIFT; |
| 826 | 857 | ||
| 827 | for (i = 0; i < data->acpi_data.state_count; i++) { | 858 | for (i = 0; i < data->acpi_data->state_count; i++) { |
| 828 | u32 index; | 859 | u32 index; |
| 829 | 860 | ||
| 830 | index = data->acpi_data.states[i].control & HW_PSTATE_MASK; | 861 | index = data->acpi_data->states[i].control & HW_PSTATE_MASK; |
| 831 | if (index > data->max_hw_pstate) { | 862 | if (index > data->max_hw_pstate) { |
| 832 | printk(KERN_ERR PFX "invalid pstate %d - bad value %d.\n", i, index); | 863 | printk(KERN_ERR PFX "invalid pstate %d - bad value %d.\n", i, index); |
| 833 | printk(KERN_ERR PFX "Please report to BIOS manufacturer\n"); | 864 | printk(KERN_ERR PFX "Please report to BIOS manufacturer\n"); |
| @@ -843,7 +874,7 @@ static int fill_powernow_table_pstate(struct powernow_k8_data *data, struct cpuf | |||
| 843 | 874 | ||
| 844 | powernow_table[i].index = index; | 875 | powernow_table[i].index = index; |
| 845 | 876 | ||
| 846 | powernow_table[i].frequency = data->acpi_data.states[i].core_frequency * 1000; | 877 | powernow_table[i].frequency = data->acpi_data->states[i].core_frequency * 1000; |
| 847 | } | 878 | } |
| 848 | return 0; | 879 | return 0; |
| 849 | } | 880 | } |
| @@ -852,16 +883,16 @@ static int fill_powernow_table_fidvid(struct powernow_k8_data *data, struct cpuf | |||
| 852 | { | 883 | { |
| 853 | int i; | 884 | int i; |
| 854 | int cntlofreq = 0; | 885 | int cntlofreq = 0; |
| 855 | for (i = 0; i < data->acpi_data.state_count; i++) { | 886 | for (i = 0; i < data->acpi_data->state_count; i++) { |
| 856 | u32 fid; | 887 | u32 fid; |
| 857 | u32 vid; | 888 | u32 vid; |
| 858 | 889 | ||
| 859 | if (data->exttype) { | 890 | if (data->exttype) { |
| 860 | fid = data->acpi_data.states[i].status & EXT_FID_MASK; | 891 | fid = data->acpi_data->states[i].status & EXT_FID_MASK; |
| 861 | vid = (data->acpi_data.states[i].status >> VID_SHIFT) & EXT_VID_MASK; | 892 | vid = (data->acpi_data->states[i].status >> VID_SHIFT) & EXT_VID_MASK; |
| 862 | } else { | 893 | } else { |
| 863 | fid = data->acpi_data.states[i].control & FID_MASK; | 894 | fid = data->acpi_data->states[i].control & FID_MASK; |
| 864 | vid = (data->acpi_data.states[i].control >> VID_SHIFT) & VID_MASK; | 895 | vid = (data->acpi_data->states[i].control >> VID_SHIFT) & VID_MASK; |
| 865 | } | 896 | } |
| 866 | 897 | ||
| 867 | dprintk(" %d : fid 0x%x, vid 0x%x\n", i, fid, vid); | 898 | dprintk(" %d : fid 0x%x, vid 0x%x\n", i, fid, vid); |
| @@ -902,10 +933,10 @@ static int fill_powernow_table_fidvid(struct powernow_k8_data *data, struct cpuf | |||
| 902 | cntlofreq = i; | 933 | cntlofreq = i; |
| 903 | } | 934 | } |
| 904 | 935 | ||
| 905 | if (powernow_table[i].frequency != (data->acpi_data.states[i].core_frequency * 1000)) { | 936 | if (powernow_table[i].frequency != (data->acpi_data->states[i].core_frequency * 1000)) { |
| 906 | printk(KERN_INFO PFX "invalid freq entries %u kHz vs. %u kHz\n", | 937 | printk(KERN_INFO PFX "invalid freq entries %u kHz vs. %u kHz\n", |
| 907 | powernow_table[i].frequency, | 938 | powernow_table[i].frequency, |
| 908 | (unsigned int) (data->acpi_data.states[i].core_frequency * 1000)); | 939 | (unsigned int) (data->acpi_data->states[i].core_frequency * 1000)); |
| 909 | powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID; | 940 | powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID; |
| 910 | continue; | 941 | continue; |
| 911 | } | 942 | } |
| @@ -915,11 +946,12 @@ static int fill_powernow_table_fidvid(struct powernow_k8_data *data, struct cpuf | |||
| 915 | 946 | ||
| 916 | static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data) | 947 | static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data) |
| 917 | { | 948 | { |
| 918 | if (data->acpi_data.state_count) | 949 | if (data->acpi_data->state_count) |
| 919 | acpi_processor_unregister_performance(&data->acpi_data, data->cpu); | 950 | acpi_processor_unregister_performance(data->acpi_data, data->cpu); |
| 920 | } | 951 | } |
| 921 | 952 | ||
| 922 | #else | 953 | #else |
| 954 | static int powernow_k8_cpu_preinit_acpi(void) { return -ENODEV; } | ||
| 923 | static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) { return -ENODEV; } | 955 | static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) { return -ENODEV; } |
| 924 | static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data) { return; } | 956 | static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data) { return; } |
| 925 | static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned int index) { return; } | 957 | static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned int index) { return; } |
| @@ -966,7 +998,7 @@ static int transition_frequency_fidvid(struct powernow_k8_data *data, unsigned i | |||
| 966 | freqs.old = find_khz_freq_from_fid(data->currfid); | 998 | freqs.old = find_khz_freq_from_fid(data->currfid); |
| 967 | freqs.new = find_khz_freq_from_fid(fid); | 999 | freqs.new = find_khz_freq_from_fid(fid); |
| 968 | 1000 | ||
| 969 | for_each_cpu_mask(i, *(data->available_cores)) { | 1001 | for_each_cpu_mask_nr(i, *(data->available_cores)) { |
| 970 | freqs.cpu = i; | 1002 | freqs.cpu = i; |
| 971 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); | 1003 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); |
| 972 | } | 1004 | } |
| @@ -974,7 +1006,7 @@ static int transition_frequency_fidvid(struct powernow_k8_data *data, unsigned i | |||
| 974 | res = transition_fid_vid(data, fid, vid); | 1006 | res = transition_fid_vid(data, fid, vid); |
| 975 | freqs.new = find_khz_freq_from_fid(data->currfid); | 1007 | freqs.new = find_khz_freq_from_fid(data->currfid); |
| 976 | 1008 | ||
| 977 | for_each_cpu_mask(i, *(data->available_cores)) { | 1009 | for_each_cpu_mask_nr(i, *(data->available_cores)) { |
| 978 | freqs.cpu = i; | 1010 | freqs.cpu = i; |
| 979 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); | 1011 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); |
| 980 | } | 1012 | } |
| @@ -997,7 +1029,7 @@ static int transition_frequency_pstate(struct powernow_k8_data *data, unsigned i | |||
| 997 | freqs.old = find_khz_freq_from_pstate(data->powernow_table, data->currpstate); | 1029 | freqs.old = find_khz_freq_from_pstate(data->powernow_table, data->currpstate); |
| 998 | freqs.new = find_khz_freq_from_pstate(data->powernow_table, pstate); | 1030 | freqs.new = find_khz_freq_from_pstate(data->powernow_table, pstate); |
| 999 | 1031 | ||
| 1000 | for_each_cpu_mask(i, *(data->available_cores)) { | 1032 | for_each_cpu_mask_nr(i, *(data->available_cores)) { |
| 1001 | freqs.cpu = i; | 1033 | freqs.cpu = i; |
| 1002 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); | 1034 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); |
| 1003 | } | 1035 | } |
| @@ -1005,7 +1037,7 @@ static int transition_frequency_pstate(struct powernow_k8_data *data, unsigned i | |||
| 1005 | res = transition_pstate(data, pstate); | 1037 | res = transition_pstate(data, pstate); |
| 1006 | freqs.new = find_khz_freq_from_pstate(data->powernow_table, pstate); | 1038 | freqs.new = find_khz_freq_from_pstate(data->powernow_table, pstate); |
| 1007 | 1039 | ||
| 1008 | for_each_cpu_mask(i, *(data->available_cores)) { | 1040 | for_each_cpu_mask_nr(i, *(data->available_cores)) { |
| 1009 | freqs.cpu = i; | 1041 | freqs.cpu = i; |
| 1010 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); | 1042 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); |
| 1011 | } | 1043 | } |
| @@ -1104,7 +1136,7 @@ static int powernowk8_verify(struct cpufreq_policy *pol) | |||
| 1104 | static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) | 1136 | static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) |
| 1105 | { | 1137 | { |
| 1106 | struct powernow_k8_data *data; | 1138 | struct powernow_k8_data *data; |
| 1107 | cpumask_t oldmask; | 1139 | cpumask_t oldmask = CPU_MASK_ALL; |
| 1108 | int rc; | 1140 | int rc; |
| 1109 | 1141 | ||
| 1110 | if (!cpu_online(pol->cpu)) | 1142 | if (!cpu_online(pol->cpu)) |
| @@ -1177,10 +1209,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) | |||
| 1177 | /* run on any CPU again */ | 1209 | /* run on any CPU again */ |
| 1178 | set_cpus_allowed_ptr(current, &oldmask); | 1210 | set_cpus_allowed_ptr(current, &oldmask); |
| 1179 | 1211 | ||
| 1180 | if (cpu_family == CPU_HW_PSTATE) | 1212 | pol->cpus = data->starting_core_affinity; |
| 1181 | pol->cpus = cpumask_of_cpu(pol->cpu); | ||
| 1182 | else | ||
| 1183 | pol->cpus = per_cpu(cpu_core_map, pol->cpu); | ||
| 1184 | data->available_cores = &(pol->cpus); | 1213 | data->available_cores = &(pol->cpus); |
| 1185 | 1214 | ||
| 1186 | /* Take a crude guess here. | 1215 | /* Take a crude guess here. |
| @@ -1303,6 +1332,7 @@ static int __cpuinit powernowk8_init(void) | |||
| 1303 | } | 1332 | } |
| 1304 | 1333 | ||
| 1305 | if (supported_cpus == num_online_cpus()) { | 1334 | if (supported_cpus == num_online_cpus()) { |
| 1335 | powernow_k8_cpu_preinit_acpi(); | ||
| 1306 | printk(KERN_INFO PFX "Found %d %s " | 1336 | printk(KERN_INFO PFX "Found %d %s " |
| 1307 | "processors (%d cpu cores) (" VERSION ")\n", | 1337 | "processors (%d cpu cores) (" VERSION ")\n", |
| 1308 | num_online_nodes(), | 1338 | num_online_nodes(), |
| @@ -1319,6 +1349,10 @@ static void __exit powernowk8_exit(void) | |||
| 1319 | dprintk("exit\n"); | 1349 | dprintk("exit\n"); |
| 1320 | 1350 | ||
| 1321 | cpufreq_unregister_driver(&cpufreq_amd64_driver); | 1351 | cpufreq_unregister_driver(&cpufreq_amd64_driver); |
| 1352 | |||
| 1353 | #ifdef CONFIG_X86_POWERNOW_K8_ACPI | ||
| 1354 | free_percpu(acpi_perf_data); | ||
| 1355 | #endif | ||
| 1322 | } | 1356 | } |
| 1323 | 1357 | ||
| 1324 | MODULE_AUTHOR("Paul Devriendt <paul.devriendt@amd.com> and Mark Langsdorf <mark.langsdorf@amd.com>"); | 1358 | MODULE_AUTHOR("Paul Devriendt <paul.devriendt@amd.com> and Mark Langsdorf <mark.langsdorf@amd.com>"); |
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h index ab48cfed4d96..a62612cd4be8 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h | |||
| @@ -33,12 +33,13 @@ struct powernow_k8_data { | |||
| 33 | #ifdef CONFIG_X86_POWERNOW_K8_ACPI | 33 | #ifdef CONFIG_X86_POWERNOW_K8_ACPI |
| 34 | /* the acpi table needs to be kept. it's only available if ACPI was | 34 | /* the acpi table needs to be kept. it's only available if ACPI was |
| 35 | * used to determine valid frequency/vid/fid states */ | 35 | * used to determine valid frequency/vid/fid states */ |
| 36 | struct acpi_processor_performance acpi_data; | 36 | struct acpi_processor_performance *acpi_data; |
| 37 | #endif | 37 | #endif |
| 38 | /* we need to keep track of associated cores, but let cpufreq | 38 | /* we need to keep track of associated cores, but let cpufreq |
| 39 | * handle hotplug events - so just point at cpufreq pol->cpus | 39 | * handle hotplug events - so just point at cpufreq pol->cpus |
| 40 | * structure */ | 40 | * structure */ |
| 41 | cpumask_t *available_cores; | 41 | cpumask_t *available_cores; |
| 42 | cpumask_t starting_core_affinity; | ||
| 42 | }; | 43 | }; |
| 43 | 44 | ||
| 44 | 45 | ||
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c index 908dd347c67e..15e13c01cc36 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c | |||
| @@ -28,7 +28,8 @@ | |||
| 28 | #define PFX "speedstep-centrino: " | 28 | #define PFX "speedstep-centrino: " |
| 29 | #define MAINTAINER "cpufreq@lists.linux.org.uk" | 29 | #define MAINTAINER "cpufreq@lists.linux.org.uk" |
| 30 | 30 | ||
| 31 | #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "speedstep-centrino", msg) | 31 | #define dprintk(msg...) \ |
| 32 | cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "speedstep-centrino", msg) | ||
| 32 | 33 | ||
| 33 | #define INTEL_MSR_RANGE (0xffff) | 34 | #define INTEL_MSR_RANGE (0xffff) |
| 34 | 35 | ||
| @@ -66,11 +67,12 @@ struct cpu_model | |||
| 66 | 67 | ||
| 67 | struct cpufreq_frequency_table *op_points; /* clock/voltage pairs */ | 68 | struct cpufreq_frequency_table *op_points; /* clock/voltage pairs */ |
| 68 | }; | 69 | }; |
| 69 | static int centrino_verify_cpu_id(const struct cpuinfo_x86 *c, const struct cpu_id *x); | 70 | static int centrino_verify_cpu_id(const struct cpuinfo_x86 *c, |
| 71 | const struct cpu_id *x); | ||
| 70 | 72 | ||
| 71 | /* Operating points for current CPU */ | 73 | /* Operating points for current CPU */ |
| 72 | static struct cpu_model *centrino_model[NR_CPUS]; | 74 | static DEFINE_PER_CPU(struct cpu_model *, centrino_model); |
| 73 | static const struct cpu_id *centrino_cpu[NR_CPUS]; | 75 | static DEFINE_PER_CPU(const struct cpu_id *, centrino_cpu); |
| 74 | 76 | ||
| 75 | static struct cpufreq_driver centrino_driver; | 77 | static struct cpufreq_driver centrino_driver; |
| 76 | 78 | ||
| @@ -255,7 +257,7 @@ static int centrino_cpu_init_table(struct cpufreq_policy *policy) | |||
| 255 | return -ENOENT; | 257 | return -ENOENT; |
| 256 | } | 258 | } |
| 257 | 259 | ||
| 258 | centrino_model[policy->cpu] = model; | 260 | per_cpu(centrino_model, policy->cpu) = model; |
| 259 | 261 | ||
| 260 | dprintk("found \"%s\": max frequency: %dkHz\n", | 262 | dprintk("found \"%s\": max frequency: %dkHz\n", |
| 261 | model->model_name, model->max_freq); | 263 | model->model_name, model->max_freq); |
| @@ -264,10 +266,14 @@ static int centrino_cpu_init_table(struct cpufreq_policy *policy) | |||
| 264 | } | 266 | } |
| 265 | 267 | ||
| 266 | #else | 268 | #else |
| 267 | static inline int centrino_cpu_init_table(struct cpufreq_policy *policy) { return -ENODEV; } | 269 | static inline int centrino_cpu_init_table(struct cpufreq_policy *policy) |
| 270 | { | ||
| 271 | return -ENODEV; | ||
| 272 | } | ||
| 268 | #endif /* CONFIG_X86_SPEEDSTEP_CENTRINO_TABLE */ | 273 | #endif /* CONFIG_X86_SPEEDSTEP_CENTRINO_TABLE */ |
| 269 | 274 | ||
| 270 | static int centrino_verify_cpu_id(const struct cpuinfo_x86 *c, const struct cpu_id *x) | 275 | static int centrino_verify_cpu_id(const struct cpuinfo_x86 *c, |
| 276 | const struct cpu_id *x) | ||
| 271 | { | 277 | { |
| 272 | if ((c->x86 == x->x86) && | 278 | if ((c->x86 == x->x86) && |
| 273 | (c->x86_model == x->x86_model) && | 279 | (c->x86_model == x->x86_model) && |
| @@ -286,23 +292,28 @@ static unsigned extract_clock(unsigned msr, unsigned int cpu, int failsafe) | |||
| 286 | * for centrino, as some DSDTs are buggy. | 292 | * for centrino, as some DSDTs are buggy. |
| 287 | * Ideally, this can be done using the acpi_data structure. | 293 | * Ideally, this can be done using the acpi_data structure. |
| 288 | */ | 294 | */ |
| 289 | if ((centrino_cpu[cpu] == &cpu_ids[CPU_BANIAS]) || | 295 | if ((per_cpu(centrino_cpu, cpu) == &cpu_ids[CPU_BANIAS]) || |
| 290 | (centrino_cpu[cpu] == &cpu_ids[CPU_DOTHAN_A1]) || | 296 | (per_cpu(centrino_cpu, cpu) == &cpu_ids[CPU_DOTHAN_A1]) || |
| 291 | (centrino_cpu[cpu] == &cpu_ids[CPU_DOTHAN_B0])) { | 297 | (per_cpu(centrino_cpu, cpu) == &cpu_ids[CPU_DOTHAN_B0])) { |
| 292 | msr = (msr >> 8) & 0xff; | 298 | msr = (msr >> 8) & 0xff; |
| 293 | return msr * 100000; | 299 | return msr * 100000; |
| 294 | } | 300 | } |
| 295 | 301 | ||
| 296 | if ((!centrino_model[cpu]) || (!centrino_model[cpu]->op_points)) | 302 | if ((!per_cpu(centrino_model, cpu)) || |
| 303 | (!per_cpu(centrino_model, cpu)->op_points)) | ||
| 297 | return 0; | 304 | return 0; |
| 298 | 305 | ||
| 299 | msr &= 0xffff; | 306 | msr &= 0xffff; |
| 300 | for (i=0;centrino_model[cpu]->op_points[i].frequency != CPUFREQ_TABLE_END; i++) { | 307 | for (i = 0; |
| 301 | if (msr == centrino_model[cpu]->op_points[i].index) | 308 | per_cpu(centrino_model, cpu)->op_points[i].frequency |
| 302 | return centrino_model[cpu]->op_points[i].frequency; | 309 | != CPUFREQ_TABLE_END; |
| 310 | i++) { | ||
| 311 | if (msr == per_cpu(centrino_model, cpu)->op_points[i].index) | ||
| 312 | return per_cpu(centrino_model, cpu)-> | ||
| 313 | op_points[i].frequency; | ||
| 303 | } | 314 | } |
| 304 | if (failsafe) | 315 | if (failsafe) |
| 305 | return centrino_model[cpu]->op_points[i-1].frequency; | 316 | return per_cpu(centrino_model, cpu)->op_points[i-1].frequency; |
| 306 | else | 317 | else |
| 307 | return 0; | 318 | return 0; |
| 308 | } | 319 | } |
| @@ -347,7 +358,8 @@ static int centrino_cpu_init(struct cpufreq_policy *policy) | |||
| 347 | int i; | 358 | int i; |
| 348 | 359 | ||
| 349 | /* Only Intel makes Enhanced Speedstep-capable CPUs */ | 360 | /* Only Intel makes Enhanced Speedstep-capable CPUs */ |
| 350 | if (cpu->x86_vendor != X86_VENDOR_INTEL || !cpu_has(cpu, X86_FEATURE_EST)) | 361 | if (cpu->x86_vendor != X86_VENDOR_INTEL || |
| 362 | !cpu_has(cpu, X86_FEATURE_EST)) | ||
| 351 | return -ENODEV; | 363 | return -ENODEV; |
| 352 | 364 | ||
| 353 | if (cpu_has(cpu, X86_FEATURE_CONSTANT_TSC)) | 365 | if (cpu_has(cpu, X86_FEATURE_CONSTANT_TSC)) |
| @@ -361,9 +373,9 @@ static int centrino_cpu_init(struct cpufreq_policy *policy) | |||
| 361 | break; | 373 | break; |
| 362 | 374 | ||
| 363 | if (i != N_IDS) | 375 | if (i != N_IDS) |
| 364 | centrino_cpu[policy->cpu] = &cpu_ids[i]; | 376 | per_cpu(centrino_cpu, policy->cpu) = &cpu_ids[i]; |
| 365 | 377 | ||
| 366 | if (!centrino_cpu[policy->cpu]) { | 378 | if (!per_cpu(centrino_cpu, policy->cpu)) { |
| 367 | dprintk("found unsupported CPU with " | 379 | dprintk("found unsupported CPU with " |
| 368 | "Enhanced SpeedStep: send /proc/cpuinfo to " | 380 | "Enhanced SpeedStep: send /proc/cpuinfo to " |
| 369 | MAINTAINER "\n"); | 381 | MAINTAINER "\n"); |
| @@ -386,23 +398,26 @@ static int centrino_cpu_init(struct cpufreq_policy *policy) | |||
| 386 | /* check to see if it stuck */ | 398 | /* check to see if it stuck */ |
| 387 | rdmsr(MSR_IA32_MISC_ENABLE, l, h); | 399 | rdmsr(MSR_IA32_MISC_ENABLE, l, h); |
| 388 | if (!(l & (1<<16))) { | 400 | if (!(l & (1<<16))) { |
| 389 | printk(KERN_INFO PFX "couldn't enable Enhanced SpeedStep\n"); | 401 | printk(KERN_INFO PFX |
| 402 | "couldn't enable Enhanced SpeedStep\n"); | ||
| 390 | return -ENODEV; | 403 | return -ENODEV; |
| 391 | } | 404 | } |
| 392 | } | 405 | } |
| 393 | 406 | ||
| 394 | freq = get_cur_freq(policy->cpu); | 407 | freq = get_cur_freq(policy->cpu); |
| 395 | 408 | policy->cpuinfo.transition_latency = 10000; | |
| 396 | policy->cpuinfo.transition_latency = 10000; /* 10uS transition latency */ | 409 | /* 10uS transition latency */ |
| 397 | policy->cur = freq; | 410 | policy->cur = freq; |
| 398 | 411 | ||
| 399 | dprintk("centrino_cpu_init: cur=%dkHz\n", policy->cur); | 412 | dprintk("centrino_cpu_init: cur=%dkHz\n", policy->cur); |
| 400 | 413 | ||
| 401 | ret = cpufreq_frequency_table_cpuinfo(policy, centrino_model[policy->cpu]->op_points); | 414 | ret = cpufreq_frequency_table_cpuinfo(policy, |
| 415 | per_cpu(centrino_model, policy->cpu)->op_points); | ||
| 402 | if (ret) | 416 | if (ret) |
| 403 | return (ret); | 417 | return (ret); |
| 404 | 418 | ||
| 405 | cpufreq_frequency_table_get_attr(centrino_model[policy->cpu]->op_points, policy->cpu); | 419 | cpufreq_frequency_table_get_attr( |
| 420 | per_cpu(centrino_model, policy->cpu)->op_points, policy->cpu); | ||
| 406 | 421 | ||
| 407 | return 0; | 422 | return 0; |
| 408 | } | 423 | } |
| @@ -411,12 +426,12 @@ static int centrino_cpu_exit(struct cpufreq_policy *policy) | |||
| 411 | { | 426 | { |
| 412 | unsigned int cpu = policy->cpu; | 427 | unsigned int cpu = policy->cpu; |
| 413 | 428 | ||
| 414 | if (!centrino_model[cpu]) | 429 | if (!per_cpu(centrino_model, cpu)) |
| 415 | return -ENODEV; | 430 | return -ENODEV; |
| 416 | 431 | ||
| 417 | cpufreq_frequency_table_put_attr(cpu); | 432 | cpufreq_frequency_table_put_attr(cpu); |
| 418 | 433 | ||
| 419 | centrino_model[cpu] = NULL; | 434 | per_cpu(centrino_model, cpu) = NULL; |
| 420 | 435 | ||
| 421 | return 0; | 436 | return 0; |
| 422 | } | 437 | } |
| @@ -430,17 +445,26 @@ static int centrino_cpu_exit(struct cpufreq_policy *policy) | |||
| 430 | */ | 445 | */ |
| 431 | static int centrino_verify (struct cpufreq_policy *policy) | 446 | static int centrino_verify (struct cpufreq_policy *policy) |
| 432 | { | 447 | { |
| 433 | return cpufreq_frequency_table_verify(policy, centrino_model[policy->cpu]->op_points); | 448 | return cpufreq_frequency_table_verify(policy, |
| 449 | per_cpu(centrino_model, policy->cpu)->op_points); | ||
| 434 | } | 450 | } |
| 435 | 451 | ||
| 436 | /** | 452 | /** |
| 437 | * centrino_setpolicy - set a new CPUFreq policy | 453 | * centrino_setpolicy - set a new CPUFreq policy |
| 438 | * @policy: new policy | 454 | * @policy: new policy |
| 439 | * @target_freq: the target frequency | 455 | * @target_freq: the target frequency |
| 440 | * @relation: how that frequency relates to achieved frequency (CPUFREQ_RELATION_L or CPUFREQ_RELATION_H) | 456 | * @relation: how that frequency relates to achieved frequency |
| 457 | * (CPUFREQ_RELATION_L or CPUFREQ_RELATION_H) | ||
| 441 | * | 458 | * |
| 442 | * Sets a new CPUFreq policy. | 459 | * Sets a new CPUFreq policy. |
| 443 | */ | 460 | */ |
| 461 | struct allmasks { | ||
| 462 | cpumask_t online_policy_cpus; | ||
| 463 | cpumask_t saved_mask; | ||
| 464 | cpumask_t set_mask; | ||
| 465 | cpumask_t covered_cpus; | ||
| 466 | }; | ||
| 467 | |||
| 444 | static int centrino_target (struct cpufreq_policy *policy, | 468 | static int centrino_target (struct cpufreq_policy *policy, |
| 445 | unsigned int target_freq, | 469 | unsigned int target_freq, |
| 446 | unsigned int relation) | 470 | unsigned int relation) |
| @@ -448,48 +472,55 @@ static int centrino_target (struct cpufreq_policy *policy, | |||
| 448 | unsigned int newstate = 0; | 472 | unsigned int newstate = 0; |
| 449 | unsigned int msr, oldmsr = 0, h = 0, cpu = policy->cpu; | 473 | unsigned int msr, oldmsr = 0, h = 0, cpu = policy->cpu; |
| 450 | struct cpufreq_freqs freqs; | 474 | struct cpufreq_freqs freqs; |
| 451 | cpumask_t online_policy_cpus; | ||
| 452 | cpumask_t saved_mask; | ||
| 453 | cpumask_t set_mask; | ||
| 454 | cpumask_t covered_cpus; | ||
| 455 | int retval = 0; | 475 | int retval = 0; |
| 456 | unsigned int j, k, first_cpu, tmp; | 476 | unsigned int j, k, first_cpu, tmp; |
| 457 | 477 | CPUMASK_ALLOC(allmasks); | |
| 458 | if (unlikely(centrino_model[cpu] == NULL)) | 478 | CPUMASK_PTR(online_policy_cpus, allmasks); |
| 459 | return -ENODEV; | 479 | CPUMASK_PTR(saved_mask, allmasks); |
| 480 | CPUMASK_PTR(set_mask, allmasks); | ||
| 481 | CPUMASK_PTR(covered_cpus, allmasks); | ||
| 482 | |||
| 483 | if (unlikely(allmasks == NULL)) | ||
| 484 | return -ENOMEM; | ||
| 485 | |||
| 486 | if (unlikely(per_cpu(centrino_model, cpu) == NULL)) { | ||
| 487 | retval = -ENODEV; | ||
| 488 | goto out; | ||
| 489 | } | ||
| 460 | 490 | ||
| 461 | if (unlikely(cpufreq_frequency_table_target(policy, | 491 | if (unlikely(cpufreq_frequency_table_target(policy, |
| 462 | centrino_model[cpu]->op_points, | 492 | per_cpu(centrino_model, cpu)->op_points, |
| 463 | target_freq, | 493 | target_freq, |
| 464 | relation, | 494 | relation, |
| 465 | &newstate))) { | 495 | &newstate))) { |
| 466 | return -EINVAL; | 496 | retval = -EINVAL; |
| 497 | goto out; | ||
| 467 | } | 498 | } |
| 468 | 499 | ||
| 469 | #ifdef CONFIG_HOTPLUG_CPU | 500 | #ifdef CONFIG_HOTPLUG_CPU |
| 470 | /* cpufreq holds the hotplug lock, so we are safe from here on */ | 501 | /* cpufreq holds the hotplug lock, so we are safe from here on */ |
| 471 | cpus_and(online_policy_cpus, cpu_online_map, policy->cpus); | 502 | cpus_and(*online_policy_cpus, cpu_online_map, policy->cpus); |
| 472 | #else | 503 | #else |
| 473 | online_policy_cpus = policy->cpus; | 504 | *online_policy_cpus = policy->cpus; |
| 474 | #endif | 505 | #endif |
| 475 | 506 | ||
| 476 | saved_mask = current->cpus_allowed; | 507 | *saved_mask = current->cpus_allowed; |
| 477 | first_cpu = 1; | 508 | first_cpu = 1; |
| 478 | cpus_clear(covered_cpus); | 509 | cpus_clear(*covered_cpus); |
| 479 | for_each_cpu_mask(j, online_policy_cpus) { | 510 | for_each_cpu_mask_nr(j, *online_policy_cpus) { |
| 480 | /* | 511 | /* |
| 481 | * Support for SMP systems. | 512 | * Support for SMP systems. |
| 482 | * Make sure we are running on CPU that wants to change freq | 513 | * Make sure we are running on CPU that wants to change freq |
| 483 | */ | 514 | */ |
| 484 | cpus_clear(set_mask); | 515 | cpus_clear(*set_mask); |
| 485 | if (policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) | 516 | if (policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) |
| 486 | cpus_or(set_mask, set_mask, online_policy_cpus); | 517 | cpus_or(*set_mask, *set_mask, *online_policy_cpus); |
| 487 | else | 518 | else |
| 488 | cpu_set(j, set_mask); | 519 | cpu_set(j, *set_mask); |
| 489 | 520 | ||
| 490 | set_cpus_allowed_ptr(current, &set_mask); | 521 | set_cpus_allowed_ptr(current, set_mask); |
| 491 | preempt_disable(); | 522 | preempt_disable(); |
| 492 | if (unlikely(!cpu_isset(smp_processor_id(), set_mask))) { | 523 | if (unlikely(!cpu_isset(smp_processor_id(), *set_mask))) { |
| 493 | dprintk("couldn't limit to CPUs in this domain\n"); | 524 | dprintk("couldn't limit to CPUs in this domain\n"); |
| 494 | retval = -EAGAIN; | 525 | retval = -EAGAIN; |
| 495 | if (first_cpu) { | 526 | if (first_cpu) { |
| @@ -500,7 +531,7 @@ static int centrino_target (struct cpufreq_policy *policy, | |||
| 500 | break; | 531 | break; |
| 501 | } | 532 | } |
| 502 | 533 | ||
| 503 | msr = centrino_model[cpu]->op_points[newstate].index; | 534 | msr = per_cpu(centrino_model, cpu)->op_points[newstate].index; |
| 504 | 535 | ||
| 505 | if (first_cpu) { | 536 | if (first_cpu) { |
| 506 | rdmsr(MSR_IA32_PERF_CTL, oldmsr, h); | 537 | rdmsr(MSR_IA32_PERF_CTL, oldmsr, h); |
| @@ -517,7 +548,7 @@ static int centrino_target (struct cpufreq_policy *policy, | |||
| 517 | dprintk("target=%dkHz old=%d new=%d msr=%04x\n", | 548 | dprintk("target=%dkHz old=%d new=%d msr=%04x\n", |
| 518 | target_freq, freqs.old, freqs.new, msr); | 549 | target_freq, freqs.old, freqs.new, msr); |
| 519 | 550 | ||
| 520 | for_each_cpu_mask(k, online_policy_cpus) { | 551 | for_each_cpu_mask_nr(k, *online_policy_cpus) { |
| 521 | freqs.cpu = k; | 552 | freqs.cpu = k; |
| 522 | cpufreq_notify_transition(&freqs, | 553 | cpufreq_notify_transition(&freqs, |
| 523 | CPUFREQ_PRECHANGE); | 554 | CPUFREQ_PRECHANGE); |
| @@ -536,11 +567,11 @@ static int centrino_target (struct cpufreq_policy *policy, | |||
| 536 | break; | 567 | break; |
| 537 | } | 568 | } |
| 538 | 569 | ||
| 539 | cpu_set(j, covered_cpus); | 570 | cpu_set(j, *covered_cpus); |
| 540 | preempt_enable(); | 571 | preempt_enable(); |
| 541 | } | 572 | } |
| 542 | 573 | ||
| 543 | for_each_cpu_mask(k, online_policy_cpus) { | 574 | for_each_cpu_mask_nr(k, *online_policy_cpus) { |
| 544 | freqs.cpu = k; | 575 | freqs.cpu = k; |
| 545 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); | 576 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); |
| 546 | } | 577 | } |
| @@ -553,30 +584,32 @@ static int centrino_target (struct cpufreq_policy *policy, | |||
| 553 | * Best effort undo.. | 584 | * Best effort undo.. |
| 554 | */ | 585 | */ |
| 555 | 586 | ||
| 556 | if (!cpus_empty(covered_cpus)) { | 587 | if (!cpus_empty(*covered_cpus)) |
| 557 | for_each_cpu_mask(j, covered_cpus) { | 588 | for_each_cpu_mask_nr(j, *covered_cpus) { |
| 558 | set_cpus_allowed_ptr(current, | 589 | set_cpus_allowed_ptr(current, |
| 559 | &cpumask_of_cpu(j)); | 590 | &cpumask_of_cpu(j)); |
| 560 | wrmsr(MSR_IA32_PERF_CTL, oldmsr, h); | 591 | wrmsr(MSR_IA32_PERF_CTL, oldmsr, h); |
| 561 | } | 592 | } |
| 562 | } | ||
| 563 | 593 | ||
| 564 | tmp = freqs.new; | 594 | tmp = freqs.new; |
| 565 | freqs.new = freqs.old; | 595 | freqs.new = freqs.old; |
| 566 | freqs.old = tmp; | 596 | freqs.old = tmp; |
| 567 | for_each_cpu_mask(j, online_policy_cpus) { | 597 | for_each_cpu_mask_nr(j, *online_policy_cpus) { |
| 568 | freqs.cpu = j; | 598 | freqs.cpu = j; |
| 569 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); | 599 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); |
| 570 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); | 600 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); |
| 571 | } | 601 | } |
| 572 | } | 602 | } |
| 573 | set_cpus_allowed_ptr(current, &saved_mask); | 603 | set_cpus_allowed_ptr(current, saved_mask); |
| 574 | return 0; | 604 | retval = 0; |
| 605 | goto out; | ||
| 575 | 606 | ||
| 576 | migrate_end: | 607 | migrate_end: |
| 577 | preempt_enable(); | 608 | preempt_enable(); |
| 578 | set_cpus_allowed_ptr(current, &saved_mask); | 609 | set_cpus_allowed_ptr(current, saved_mask); |
| 579 | return 0; | 610 | out: |
| 611 | CPUMASK_FREE(allmasks); | ||
| 612 | return retval; | ||
| 580 | } | 613 | } |
| 581 | 614 | ||
| 582 | static struct freq_attr* centrino_attr[] = { | 615 | static struct freq_attr* centrino_attr[] = { |
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c index 1b50244b1fdf..191f7263c61d 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c | |||
| @@ -279,7 +279,7 @@ static int speedstep_target (struct cpufreq_policy *policy, | |||
| 279 | 279 | ||
| 280 | cpus_allowed = current->cpus_allowed; | 280 | cpus_allowed = current->cpus_allowed; |
| 281 | 281 | ||
| 282 | for_each_cpu_mask(i, policy->cpus) { | 282 | for_each_cpu_mask_nr(i, policy->cpus) { |
| 283 | freqs.cpu = i; | 283 | freqs.cpu = i; |
| 284 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); | 284 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); |
| 285 | } | 285 | } |
| @@ -292,7 +292,7 @@ static int speedstep_target (struct cpufreq_policy *policy, | |||
| 292 | /* allow to be run on all CPUs */ | 292 | /* allow to be run on all CPUs */ |
| 293 | set_cpus_allowed_ptr(current, &cpus_allowed); | 293 | set_cpus_allowed_ptr(current, &cpus_allowed); |
| 294 | 294 | ||
| 295 | for_each_cpu_mask(i, policy->cpus) { | 295 | for_each_cpu_mask_nr(i, policy->cpus) { |
| 296 | freqs.cpu = i; | 296 | freqs.cpu = i; |
| 297 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); | 297 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); |
| 298 | } | 298 | } |
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index ff517f0b8cc4..6b0a10b002f1 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c | |||
| @@ -489,7 +489,7 @@ static void __cpuinit cache_remove_shared_cpu_map(unsigned int cpu, int index) | |||
| 489 | int sibling; | 489 | int sibling; |
| 490 | 490 | ||
| 491 | this_leaf = CPUID4_INFO_IDX(cpu, index); | 491 | this_leaf = CPUID4_INFO_IDX(cpu, index); |
| 492 | for_each_cpu_mask(sibling, this_leaf->shared_cpu_map) { | 492 | for_each_cpu_mask_nr(sibling, this_leaf->shared_cpu_map) { |
| 493 | sibling_leaf = CPUID4_INFO_IDX(sibling, index); | 493 | sibling_leaf = CPUID4_INFO_IDX(sibling, index); |
| 494 | cpu_clear(cpu, sibling_leaf->shared_cpu_map); | 494 | cpu_clear(cpu, sibling_leaf->shared_cpu_map); |
| 495 | } | 495 | } |
diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c index 9ab65be82427..65a339678ece 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_64.c | |||
| @@ -580,7 +580,7 @@ static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize, | |||
| 580 | char __user *buf = ubuf; | 580 | char __user *buf = ubuf; |
| 581 | int i, err; | 581 | int i, err; |
| 582 | 582 | ||
| 583 | cpu_tsc = kmalloc(NR_CPUS * sizeof(long), GFP_KERNEL); | 583 | cpu_tsc = kmalloc(nr_cpu_ids * sizeof(long), GFP_KERNEL); |
| 584 | if (!cpu_tsc) | 584 | if (!cpu_tsc) |
| 585 | return -ENOMEM; | 585 | return -ENOMEM; |
| 586 | 586 | ||
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c index 7c9a813e1193..88736cadbaa6 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c | |||
| @@ -527,7 +527,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) | |||
| 527 | if (err) | 527 | if (err) |
| 528 | goto out_free; | 528 | goto out_free; |
| 529 | 529 | ||
| 530 | for_each_cpu_mask(i, b->cpus) { | 530 | for_each_cpu_mask_nr(i, b->cpus) { |
| 531 | if (i == cpu) | 531 | if (i == cpu) |
| 532 | continue; | 532 | continue; |
| 533 | 533 | ||
| @@ -617,7 +617,7 @@ static void threshold_remove_bank(unsigned int cpu, int bank) | |||
| 617 | #endif | 617 | #endif |
| 618 | 618 | ||
| 619 | /* remove all sibling symlinks before unregistering */ | 619 | /* remove all sibling symlinks before unregistering */ |
| 620 | for_each_cpu_mask(i, b->cpus) { | 620 | for_each_cpu_mask_nr(i, b->cpus) { |
| 621 | if (i == cpu) | 621 | if (i == cpu) |
| 622 | continue; | 622 | continue; |
| 623 | 623 | ||
diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c index 0d0d9057e7c0..a26c480b9491 100644 --- a/arch/x86/kernel/cpu/proc.c +++ b/arch/x86/kernel/cpu/proc.c | |||
| @@ -160,7 +160,7 @@ static void *c_start(struct seq_file *m, loff_t *pos) | |||
| 160 | { | 160 | { |
| 161 | if (*pos == 0) /* just in case, cpu 0 is not the first */ | 161 | if (*pos == 0) /* just in case, cpu 0 is not the first */ |
| 162 | *pos = first_cpu(cpu_online_map); | 162 | *pos = first_cpu(cpu_online_map); |
| 163 | if ((*pos) < NR_CPUS && cpu_online(*pos)) | 163 | if ((*pos) < nr_cpu_ids && cpu_online(*pos)) |
| 164 | return &cpu_data(*pos); | 164 | return &cpu_data(*pos); |
| 165 | return NULL; | 165 | return NULL; |
| 166 | } | 166 | } |
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index cdfd94cc6b14..109792bc7cfa 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S | |||
| @@ -54,6 +54,16 @@ | |||
| 54 | #include <asm/ftrace.h> | 54 | #include <asm/ftrace.h> |
| 55 | #include <asm/irq_vectors.h> | 55 | #include <asm/irq_vectors.h> |
| 56 | 56 | ||
| 57 | /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */ | ||
| 58 | #include <linux/elf-em.h> | ||
| 59 | #define AUDIT_ARCH_I386 (EM_386|__AUDIT_ARCH_LE) | ||
| 60 | #define __AUDIT_ARCH_LE 0x40000000 | ||
| 61 | |||
| 62 | #ifndef CONFIG_AUDITSYSCALL | ||
| 63 | #define sysenter_audit syscall_trace_entry | ||
| 64 | #define sysexit_audit syscall_exit_work | ||
| 65 | #endif | ||
| 66 | |||
| 57 | /* | 67 | /* |
| 58 | * We use macros for low-level operations which need to be overridden | 68 | * We use macros for low-level operations which need to be overridden |
| 59 | * for paravirtualization. The following will never clobber any registers: | 69 | * for paravirtualization. The following will never clobber any registers: |
| @@ -333,7 +343,8 @@ sysenter_past_esp: | |||
| 333 | 343 | ||
| 334 | /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */ | 344 | /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */ |
| 335 | testw $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp) | 345 | testw $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp) |
| 336 | jnz syscall_trace_entry | 346 | jnz sysenter_audit |
| 347 | sysenter_do_call: | ||
| 337 | cmpl $(nr_syscalls), %eax | 348 | cmpl $(nr_syscalls), %eax |
| 338 | jae syscall_badsys | 349 | jae syscall_badsys |
| 339 | call *sys_call_table(,%eax,4) | 350 | call *sys_call_table(,%eax,4) |
| @@ -343,7 +354,8 @@ sysenter_past_esp: | |||
| 343 | TRACE_IRQS_OFF | 354 | TRACE_IRQS_OFF |
| 344 | movl TI_flags(%ebp), %ecx | 355 | movl TI_flags(%ebp), %ecx |
| 345 | testw $_TIF_ALLWORK_MASK, %cx | 356 | testw $_TIF_ALLWORK_MASK, %cx |
| 346 | jne syscall_exit_work | 357 | jne sysexit_audit |
| 358 | sysenter_exit: | ||
| 347 | /* if something modifies registers it must also disable sysexit */ | 359 | /* if something modifies registers it must also disable sysexit */ |
| 348 | movl PT_EIP(%esp), %edx | 360 | movl PT_EIP(%esp), %edx |
| 349 | movl PT_OLDESP(%esp), %ecx | 361 | movl PT_OLDESP(%esp), %ecx |
| @@ -351,6 +363,45 @@ sysenter_past_esp: | |||
| 351 | TRACE_IRQS_ON | 363 | TRACE_IRQS_ON |
| 352 | 1: mov PT_FS(%esp), %fs | 364 | 1: mov PT_FS(%esp), %fs |
| 353 | ENABLE_INTERRUPTS_SYSEXIT | 365 | ENABLE_INTERRUPTS_SYSEXIT |
| 366 | |||
| 367 | #ifdef CONFIG_AUDITSYSCALL | ||
| 368 | sysenter_audit: | ||
| 369 | testw $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%ebp) | ||
| 370 | jnz syscall_trace_entry | ||
| 371 | addl $4,%esp | ||
| 372 | CFI_ADJUST_CFA_OFFSET -4 | ||
| 373 | /* %esi already in 8(%esp) 6th arg: 4th syscall arg */ | ||
| 374 | /* %edx already in 4(%esp) 5th arg: 3rd syscall arg */ | ||
| 375 | /* %ecx already in 0(%esp) 4th arg: 2nd syscall arg */ | ||
| 376 | movl %ebx,%ecx /* 3rd arg: 1st syscall arg */ | ||
| 377 | movl %eax,%edx /* 2nd arg: syscall number */ | ||
| 378 | movl $AUDIT_ARCH_I386,%eax /* 1st arg: audit arch */ | ||
| 379 | call audit_syscall_entry | ||
| 380 | pushl %ebx | ||
| 381 | CFI_ADJUST_CFA_OFFSET 4 | ||
| 382 | movl PT_EAX(%esp),%eax /* reload syscall number */ | ||
| 383 | jmp sysenter_do_call | ||
| 384 | |||
| 385 | sysexit_audit: | ||
| 386 | testw $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %cx | ||
| 387 | jne syscall_exit_work | ||
| 388 | TRACE_IRQS_ON | ||
| 389 | ENABLE_INTERRUPTS(CLBR_ANY) | ||
| 390 | movl %eax,%edx /* second arg, syscall return value */ | ||
| 391 | cmpl $0,%eax /* is it < 0? */ | ||
| 392 | setl %al /* 1 if so, 0 if not */ | ||
| 393 | movzbl %al,%eax /* zero-extend that */ | ||
| 394 | inc %eax /* first arg, 0->1(AUDITSC_SUCCESS), 1->2(AUDITSC_FAILURE) */ | ||
| 395 | call audit_syscall_exit | ||
| 396 | DISABLE_INTERRUPTS(CLBR_ANY) | ||
| 397 | TRACE_IRQS_OFF | ||
| 398 | movl TI_flags(%ebp), %ecx | ||
| 399 | testw $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %cx | ||
| 400 | jne syscall_exit_work | ||
| 401 | movl PT_EAX(%esp),%eax /* reload syscall return value */ | ||
| 402 | jmp sysenter_exit | ||
| 403 | #endif | ||
| 404 | |||
| 354 | CFI_ENDPROC | 405 | CFI_ENDPROC |
| 355 | .pushsection .fixup,"ax" | 406 | .pushsection .fixup,"ax" |
| 356 | 2: movl $0,PT_FS(%esp) | 407 | 2: movl $0,PT_FS(%esp) |
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 8410e26f4183..89434d439605 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S | |||
| @@ -53,6 +53,12 @@ | |||
| 53 | #include <asm/paravirt.h> | 53 | #include <asm/paravirt.h> |
| 54 | #include <asm/ftrace.h> | 54 | #include <asm/ftrace.h> |
| 55 | 55 | ||
| 56 | /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */ | ||
| 57 | #include <linux/elf-em.h> | ||
| 58 | #define AUDIT_ARCH_X86_64 (EM_X86_64|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE) | ||
| 59 | #define __AUDIT_ARCH_64BIT 0x80000000 | ||
| 60 | #define __AUDIT_ARCH_LE 0x40000000 | ||
| 61 | |||
| 56 | .code64 | 62 | .code64 |
| 57 | 63 | ||
| 58 | #ifdef CONFIG_FTRACE | 64 | #ifdef CONFIG_FTRACE |
| @@ -351,6 +357,7 @@ ENTRY(system_call_after_swapgs) | |||
| 351 | GET_THREAD_INFO(%rcx) | 357 | GET_THREAD_INFO(%rcx) |
| 352 | testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%rcx) | 358 | testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%rcx) |
| 353 | jnz tracesys | 359 | jnz tracesys |
| 360 | system_call_fastpath: | ||
| 354 | cmpq $__NR_syscall_max,%rax | 361 | cmpq $__NR_syscall_max,%rax |
| 355 | ja badsys | 362 | ja badsys |
| 356 | movq %r10,%rcx | 363 | movq %r10,%rcx |
| @@ -402,16 +409,16 @@ sysret_careful: | |||
| 402 | sysret_signal: | 409 | sysret_signal: |
| 403 | TRACE_IRQS_ON | 410 | TRACE_IRQS_ON |
| 404 | ENABLE_INTERRUPTS(CLBR_NONE) | 411 | ENABLE_INTERRUPTS(CLBR_NONE) |
| 405 | testl $_TIF_DO_NOTIFY_MASK,%edx | 412 | #ifdef CONFIG_AUDITSYSCALL |
| 406 | jz 1f | 413 | bt $TIF_SYSCALL_AUDIT,%edx |
| 407 | 414 | jc sysret_audit | |
| 408 | /* Really a signal */ | 415 | #endif |
| 409 | /* edx: work flags (arg3) */ | 416 | /* edx: work flags (arg3) */ |
| 410 | leaq do_notify_resume(%rip),%rax | 417 | leaq do_notify_resume(%rip),%rax |
| 411 | leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1 | 418 | leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1 |
| 412 | xorl %esi,%esi # oldset -> arg2 | 419 | xorl %esi,%esi # oldset -> arg2 |
| 413 | call ptregscall_common | 420 | call ptregscall_common |
| 414 | 1: movl $_TIF_WORK_MASK,%edi | 421 | movl $_TIF_WORK_MASK,%edi |
| 415 | /* Use IRET because user could have changed frame. This | 422 | /* Use IRET because user could have changed frame. This |
| 416 | works because ptregscall_common has called FIXUP_TOP_OF_STACK. */ | 423 | works because ptregscall_common has called FIXUP_TOP_OF_STACK. */ |
| 417 | DISABLE_INTERRUPTS(CLBR_NONE) | 424 | DISABLE_INTERRUPTS(CLBR_NONE) |
| @@ -422,8 +429,45 @@ badsys: | |||
| 422 | movq $-ENOSYS,RAX-ARGOFFSET(%rsp) | 429 | movq $-ENOSYS,RAX-ARGOFFSET(%rsp) |
| 423 | jmp ret_from_sys_call | 430 | jmp ret_from_sys_call |
| 424 | 431 | ||
| 432 | #ifdef CONFIG_AUDITSYSCALL | ||
| 433 | /* | ||
| 434 | * Fast path for syscall audit without full syscall trace. | ||
| 435 | * We just call audit_syscall_entry() directly, and then | ||
| 436 | * jump back to the normal fast path. | ||
| 437 | */ | ||
| 438 | auditsys: | ||
| 439 | movq %r10,%r9 /* 6th arg: 4th syscall arg */ | ||
| 440 | movq %rdx,%r8 /* 5th arg: 3rd syscall arg */ | ||
| 441 | movq %rsi,%rcx /* 4th arg: 2nd syscall arg */ | ||
| 442 | movq %rdi,%rdx /* 3rd arg: 1st syscall arg */ | ||
| 443 | movq %rax,%rsi /* 2nd arg: syscall number */ | ||
| 444 | movl $AUDIT_ARCH_X86_64,%edi /* 1st arg: audit arch */ | ||
| 445 | call audit_syscall_entry | ||
| 446 | LOAD_ARGS 0 /* reload call-clobbered registers */ | ||
| 447 | jmp system_call_fastpath | ||
| 448 | |||
| 449 | /* | ||
| 450 | * Return fast path for syscall audit. Call audit_syscall_exit() | ||
| 451 | * directly and then jump back to the fast path with TIF_SYSCALL_AUDIT | ||
| 452 | * masked off. | ||
| 453 | */ | ||
| 454 | sysret_audit: | ||
| 455 | movq %rax,%rsi /* second arg, syscall return value */ | ||
| 456 | cmpq $0,%rax /* is it < 0? */ | ||
| 457 | setl %al /* 1 if so, 0 if not */ | ||
| 458 | movzbl %al,%edi /* zero-extend that into %edi */ | ||
| 459 | inc %edi /* first arg, 0->1(AUDITSC_SUCCESS), 1->2(AUDITSC_FAILURE) */ | ||
| 460 | call audit_syscall_exit | ||
| 461 | movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi | ||
| 462 | jmp sysret_check | ||
| 463 | #endif /* CONFIG_AUDITSYSCALL */ | ||
| 464 | |||
| 425 | /* Do syscall tracing */ | 465 | /* Do syscall tracing */ |
| 426 | tracesys: | 466 | tracesys: |
| 467 | #ifdef CONFIG_AUDITSYSCALL | ||
| 468 | testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%rcx) | ||
| 469 | jz auditsys | ||
| 470 | #endif | ||
| 427 | SAVE_REST | 471 | SAVE_REST |
| 428 | movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */ | 472 | movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */ |
| 429 | FIXUP_TOP_OF_STACK %rdi | 473 | FIXUP_TOP_OF_STACK %rdi |
| @@ -448,6 +492,7 @@ tracesys: | |||
| 448 | * Has correct top of stack, but partial stack frame. | 492 | * Has correct top of stack, but partial stack frame. |
| 449 | */ | 493 | */ |
| 450 | .globl int_ret_from_sys_call | 494 | .globl int_ret_from_sys_call |
| 495 | .globl int_with_check | ||
| 451 | int_ret_from_sys_call: | 496 | int_ret_from_sys_call: |
| 452 | DISABLE_INTERRUPTS(CLBR_NONE) | 497 | DISABLE_INTERRUPTS(CLBR_NONE) |
| 453 | TRACE_IRQS_OFF | 498 | TRACE_IRQS_OFF |
diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c index 1fa8be5bd217..eaff0bbb1444 100644 --- a/arch/x86/kernel/genapic_64.c +++ b/arch/x86/kernel/genapic_64.c | |||
| @@ -99,3 +99,4 @@ int is_uv_system(void) | |||
| 99 | { | 99 | { |
| 100 | return uv_system_type != UV_NONE; | 100 | return uv_system_type != UV_NONE; |
| 101 | } | 101 | } |
| 102 | EXPORT_SYMBOL_GPL(is_uv_system); | ||
diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c index 1a9c68845ee8..786548a62d38 100644 --- a/arch/x86/kernel/genapic_flat_64.c +++ b/arch/x86/kernel/genapic_flat_64.c | |||
| @@ -168,7 +168,7 @@ static unsigned int physflat_cpu_mask_to_apicid(cpumask_t cpumask) | |||
| 168 | * May as well be the first. | 168 | * May as well be the first. |
| 169 | */ | 169 | */ |
| 170 | cpu = first_cpu(cpumask); | 170 | cpu = first_cpu(cpumask); |
| 171 | if ((unsigned)cpu < NR_CPUS) | 171 | if ((unsigned)cpu < nr_cpu_ids) |
| 172 | return per_cpu(x86_cpu_to_apicid, cpu); | 172 | return per_cpu(x86_cpu_to_apicid, cpu); |
| 173 | else | 173 | else |
| 174 | return BAD_APICID; | 174 | return BAD_APICID; |
diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c index 3c3929340692..2cfcbded888a 100644 --- a/arch/x86/kernel/genx2apic_uv_x.c +++ b/arch/x86/kernel/genx2apic_uv_x.c | |||
| @@ -98,7 +98,7 @@ static void uv_send_IPI_mask(cpumask_t mask, int vector) | |||
| 98 | { | 98 | { |
| 99 | unsigned int cpu; | 99 | unsigned int cpu; |
| 100 | 100 | ||
| 101 | for (cpu = 0; cpu < NR_CPUS; ++cpu) | 101 | for_each_possible_cpu(cpu) |
| 102 | if (cpu_isset(cpu, mask)) | 102 | if (cpu_isset(cpu, mask)) |
| 103 | uv_send_IPI_one(cpu, vector); | 103 | uv_send_IPI_one(cpu, vector); |
| 104 | } | 104 | } |
| @@ -132,7 +132,7 @@ static unsigned int uv_cpu_mask_to_apicid(cpumask_t cpumask) | |||
| 132 | * May as well be the first. | 132 | * May as well be the first. |
| 133 | */ | 133 | */ |
| 134 | cpu = first_cpu(cpumask); | 134 | cpu = first_cpu(cpumask); |
| 135 | if ((unsigned)cpu < NR_CPUS) | 135 | if ((unsigned)cpu < nr_cpu_ids) |
| 136 | return per_cpu(x86_cpu_to_apicid, cpu); | 136 | return per_cpu(x86_cpu_to_apicid, cpu); |
| 137 | else | 137 | else |
| 138 | return BAD_APICID; | 138 | return BAD_APICID; |
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index f67e93441caf..a7010c3a377a 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S | |||
| @@ -456,9 +456,6 @@ is386: movl $2,%ecx # set MP | |||
| 456 | 1: | 456 | 1: |
| 457 | #endif /* CONFIG_SMP */ | 457 | #endif /* CONFIG_SMP */ |
| 458 | jmp *(initial_code) | 458 | jmp *(initial_code) |
| 459 | .align 4 | ||
| 460 | ENTRY(initial_code) | ||
| 461 | .long i386_start_kernel | ||
| 462 | 459 | ||
| 463 | /* | 460 | /* |
| 464 | * We depend on ET to be correct. This checks for 287/387. | 461 | * We depend on ET to be correct. This checks for 287/387. |
| @@ -601,6 +598,11 @@ ignore_int: | |||
| 601 | #endif | 598 | #endif |
| 602 | iret | 599 | iret |
| 603 | 600 | ||
| 601 | .section .cpuinit.data,"wa" | ||
| 602 | .align 4 | ||
| 603 | ENTRY(initial_code) | ||
| 604 | .long i386_start_kernel | ||
| 605 | |||
| 604 | .section .text | 606 | .section .text |
| 605 | /* | 607 | /* |
| 606 | * Real beginning of normal "text" segment | 608 | * Real beginning of normal "text" segment |
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 0ea6a19bfdfe..ad2b15a1334d 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c | |||
| @@ -468,7 +468,7 @@ void hpet_disable(void) | |||
| 468 | #define RTC_NUM_INTS 1 | 468 | #define RTC_NUM_INTS 1 |
| 469 | 469 | ||
| 470 | static unsigned long hpet_rtc_flags; | 470 | static unsigned long hpet_rtc_flags; |
| 471 | static unsigned long hpet_prev_update_sec; | 471 | static int hpet_prev_update_sec; |
| 472 | static struct rtc_time hpet_alarm_time; | 472 | static struct rtc_time hpet_alarm_time; |
| 473 | static unsigned long hpet_pie_count; | 473 | static unsigned long hpet_pie_count; |
| 474 | static unsigned long hpet_t1_cmp; | 474 | static unsigned long hpet_t1_cmp; |
| @@ -575,6 +575,9 @@ int hpet_set_rtc_irq_bit(unsigned long bit_mask) | |||
| 575 | 575 | ||
| 576 | hpet_rtc_flags |= bit_mask; | 576 | hpet_rtc_flags |= bit_mask; |
| 577 | 577 | ||
| 578 | if ((bit_mask & RTC_UIE) && !(oldbits & RTC_UIE)) | ||
| 579 | hpet_prev_update_sec = -1; | ||
| 580 | |||
| 578 | if (!oldbits) | 581 | if (!oldbits) |
| 579 | hpet_rtc_timer_init(); | 582 | hpet_rtc_timer_init(); |
| 580 | 583 | ||
| @@ -652,7 +655,7 @@ static void hpet_rtc_timer_reinit(void) | |||
| 652 | if (hpet_rtc_flags & RTC_PIE) | 655 | if (hpet_rtc_flags & RTC_PIE) |
| 653 | hpet_pie_count += lost_ints; | 656 | hpet_pie_count += lost_ints; |
| 654 | if (printk_ratelimit()) | 657 | if (printk_ratelimit()) |
| 655 | printk(KERN_WARNING "rtc: lost %d interrupts\n", | 658 | printk(KERN_WARNING "hpet1: lost %d rtc interrupts\n", |
| 656 | lost_ints); | 659 | lost_ints); |
| 657 | } | 660 | } |
| 658 | } | 661 | } |
| @@ -670,7 +673,8 @@ irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id) | |||
| 670 | 673 | ||
| 671 | if (hpet_rtc_flags & RTC_UIE && | 674 | if (hpet_rtc_flags & RTC_UIE && |
| 672 | curr_time.tm_sec != hpet_prev_update_sec) { | 675 | curr_time.tm_sec != hpet_prev_update_sec) { |
| 673 | rtc_int_flag = RTC_UF; | 676 | if (hpet_prev_update_sec >= 0) |
| 677 | rtc_int_flag = RTC_UF; | ||
| 674 | hpet_prev_update_sec = curr_time.tm_sec; | 678 | hpet_prev_update_sec = curr_time.tm_sec; |
| 675 | } | 679 | } |
| 676 | 680 | ||
diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c index de9aa0e3a9c5..09cddb57bec4 100644 --- a/arch/x86/kernel/io_apic_32.c +++ b/arch/x86/kernel/io_apic_32.c | |||
| @@ -57,7 +57,7 @@ atomic_t irq_mis_count; | |||
| 57 | static struct { int pin, apic; } ioapic_i8259 = { -1, -1 }; | 57 | static struct { int pin, apic; } ioapic_i8259 = { -1, -1 }; |
| 58 | 58 | ||
| 59 | static DEFINE_SPINLOCK(ioapic_lock); | 59 | static DEFINE_SPINLOCK(ioapic_lock); |
| 60 | static DEFINE_SPINLOCK(vector_lock); | 60 | DEFINE_SPINLOCK(vector_lock); |
| 61 | 61 | ||
| 62 | int timer_through_8259 __initdata; | 62 | int timer_through_8259 __initdata; |
| 63 | 63 | ||
| @@ -1209,10 +1209,6 @@ static int assign_irq_vector(int irq) | |||
| 1209 | return vector; | 1209 | return vector; |
| 1210 | } | 1210 | } |
| 1211 | 1211 | ||
| 1212 | void setup_vector_irq(int cpu) | ||
| 1213 | { | ||
| 1214 | } | ||
| 1215 | |||
| 1216 | static struct irq_chip ioapic_chip; | 1212 | static struct irq_chip ioapic_chip; |
| 1217 | 1213 | ||
| 1218 | #define IOAPIC_AUTO -1 | 1214 | #define IOAPIC_AUTO -1 |
diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c index 64a46affd858..61a83b70c18f 100644 --- a/arch/x86/kernel/io_apic_64.c +++ b/arch/x86/kernel/io_apic_64.c | |||
| @@ -101,7 +101,7 @@ int timer_through_8259 __initdata; | |||
| 101 | static struct { int pin, apic; } ioapic_i8259 = { -1, -1 }; | 101 | static struct { int pin, apic; } ioapic_i8259 = { -1, -1 }; |
| 102 | 102 | ||
| 103 | static DEFINE_SPINLOCK(ioapic_lock); | 103 | static DEFINE_SPINLOCK(ioapic_lock); |
| 104 | DEFINE_SPINLOCK(vector_lock); | 104 | static DEFINE_SPINLOCK(vector_lock); |
| 105 | 105 | ||
| 106 | /* | 106 | /* |
| 107 | * # of IRQ routing registers | 107 | * # of IRQ routing registers |
| @@ -697,6 +697,19 @@ static int pin_2_irq(int idx, int apic, int pin) | |||
| 697 | return irq; | 697 | return irq; |
| 698 | } | 698 | } |
| 699 | 699 | ||
| 700 | void lock_vector_lock(void) | ||
| 701 | { | ||
| 702 | /* Used to the online set of cpus does not change | ||
| 703 | * during assign_irq_vector. | ||
| 704 | */ | ||
| 705 | spin_lock(&vector_lock); | ||
| 706 | } | ||
| 707 | |||
| 708 | void unlock_vector_lock(void) | ||
| 709 | { | ||
| 710 | spin_unlock(&vector_lock); | ||
| 711 | } | ||
| 712 | |||
| 700 | static int __assign_irq_vector(int irq, cpumask_t mask) | 713 | static int __assign_irq_vector(int irq, cpumask_t mask) |
| 701 | { | 714 | { |
| 702 | /* | 715 | /* |
| @@ -732,7 +745,7 @@ static int __assign_irq_vector(int irq, cpumask_t mask) | |||
| 732 | return 0; | 745 | return 0; |
| 733 | } | 746 | } |
| 734 | 747 | ||
| 735 | for_each_cpu_mask(cpu, mask) { | 748 | for_each_cpu_mask_nr(cpu, mask) { |
| 736 | cpumask_t domain, new_mask; | 749 | cpumask_t domain, new_mask; |
| 737 | int new_cpu; | 750 | int new_cpu; |
| 738 | int vector, offset; | 751 | int vector, offset; |
| @@ -753,7 +766,7 @@ next: | |||
| 753 | continue; | 766 | continue; |
| 754 | if (vector == IA32_SYSCALL_VECTOR) | 767 | if (vector == IA32_SYSCALL_VECTOR) |
| 755 | goto next; | 768 | goto next; |
| 756 | for_each_cpu_mask(new_cpu, new_mask) | 769 | for_each_cpu_mask_nr(new_cpu, new_mask) |
| 757 | if (per_cpu(vector_irq, new_cpu)[vector] != -1) | 770 | if (per_cpu(vector_irq, new_cpu)[vector] != -1) |
| 758 | goto next; | 771 | goto next; |
| 759 | /* Found one! */ | 772 | /* Found one! */ |
| @@ -763,7 +776,7 @@ next: | |||
| 763 | cfg->move_in_progress = 1; | 776 | cfg->move_in_progress = 1; |
| 764 | cfg->old_domain = cfg->domain; | 777 | cfg->old_domain = cfg->domain; |
| 765 | } | 778 | } |
| 766 | for_each_cpu_mask(new_cpu, new_mask) | 779 | for_each_cpu_mask_nr(new_cpu, new_mask) |
| 767 | per_cpu(vector_irq, new_cpu)[vector] = irq; | 780 | per_cpu(vector_irq, new_cpu)[vector] = irq; |
| 768 | cfg->vector = vector; | 781 | cfg->vector = vector; |
| 769 | cfg->domain = domain; | 782 | cfg->domain = domain; |
| @@ -795,14 +808,14 @@ static void __clear_irq_vector(int irq) | |||
| 795 | 808 | ||
| 796 | vector = cfg->vector; | 809 | vector = cfg->vector; |
| 797 | cpus_and(mask, cfg->domain, cpu_online_map); | 810 | cpus_and(mask, cfg->domain, cpu_online_map); |
| 798 | for_each_cpu_mask(cpu, mask) | 811 | for_each_cpu_mask_nr(cpu, mask) |
| 799 | per_cpu(vector_irq, cpu)[vector] = -1; | 812 | per_cpu(vector_irq, cpu)[vector] = -1; |
| 800 | 813 | ||
| 801 | cfg->vector = 0; | 814 | cfg->vector = 0; |
| 802 | cpus_clear(cfg->domain); | 815 | cpus_clear(cfg->domain); |
| 803 | } | 816 | } |
| 804 | 817 | ||
| 805 | static void __setup_vector_irq(int cpu) | 818 | void __setup_vector_irq(int cpu) |
| 806 | { | 819 | { |
| 807 | /* Initialize vector_irq on a new cpu */ | 820 | /* Initialize vector_irq on a new cpu */ |
| 808 | /* This function must be called with vector_lock held */ | 821 | /* This function must be called with vector_lock held */ |
| @@ -825,14 +838,6 @@ static void __setup_vector_irq(int cpu) | |||
| 825 | } | 838 | } |
| 826 | } | 839 | } |
| 827 | 840 | ||
| 828 | void setup_vector_irq(int cpu) | ||
| 829 | { | ||
| 830 | spin_lock(&vector_lock); | ||
| 831 | __setup_vector_irq(smp_processor_id()); | ||
| 832 | spin_unlock(&vector_lock); | ||
| 833 | } | ||
| 834 | |||
| 835 | |||
| 836 | static struct irq_chip ioapic_chip; | 841 | static struct irq_chip ioapic_chip; |
| 837 | 842 | ||
| 838 | static void ioapic_register_intr(int irq, unsigned long trigger) | 843 | static void ioapic_register_intr(int irq, unsigned long trigger) |
| @@ -1373,12 +1378,10 @@ static unsigned int startup_ioapic_irq(unsigned int irq) | |||
| 1373 | static int ioapic_retrigger_irq(unsigned int irq) | 1378 | static int ioapic_retrigger_irq(unsigned int irq) |
| 1374 | { | 1379 | { |
| 1375 | struct irq_cfg *cfg = &irq_cfg[irq]; | 1380 | struct irq_cfg *cfg = &irq_cfg[irq]; |
| 1376 | cpumask_t mask; | ||
| 1377 | unsigned long flags; | 1381 | unsigned long flags; |
| 1378 | 1382 | ||
| 1379 | spin_lock_irqsave(&vector_lock, flags); | 1383 | spin_lock_irqsave(&vector_lock, flags); |
| 1380 | mask = cpumask_of_cpu(first_cpu(cfg->domain)); | 1384 | send_IPI_mask(cpumask_of_cpu(first_cpu(cfg->domain)), cfg->vector); |
| 1381 | send_IPI_mask(mask, cfg->vector); | ||
| 1382 | spin_unlock_irqrestore(&vector_lock, flags); | 1385 | spin_unlock_irqrestore(&vector_lock, flags); |
| 1383 | 1386 | ||
| 1384 | return 1; | 1387 | return 1; |
diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c index 0373e88de95a..1f26fd9ec4f4 100644 --- a/arch/x86/kernel/irqinit_64.c +++ b/arch/x86/kernel/irqinit_64.c | |||
| @@ -43,10 +43,11 @@ | |||
| 43 | 43 | ||
| 44 | #define BUILD_IRQ(nr) \ | 44 | #define BUILD_IRQ(nr) \ |
| 45 | asmlinkage void IRQ_NAME(nr); \ | 45 | asmlinkage void IRQ_NAME(nr); \ |
| 46 | asm("\n.p2align\n" \ | 46 | asm("\n.text\n.p2align\n" \ |
| 47 | "IRQ" #nr "_interrupt:\n\t" \ | 47 | "IRQ" #nr "_interrupt:\n\t" \ |
| 48 | "push $~(" #nr ") ; " \ | 48 | "push $~(" #nr ") ; " \ |
| 49 | "jmp common_interrupt"); | 49 | "jmp common_interrupt\n" \ |
| 50 | ".previous"); | ||
| 50 | 51 | ||
| 51 | #define BI(x,y) \ | 52 | #define BI(x,y) \ |
| 52 | BUILD_IRQ(x##y) | 53 | BUILD_IRQ(x##y) |
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index 43c019f85f0d..6c27679ec6aa 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c | |||
| @@ -431,7 +431,6 @@ static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs) | |||
| 431 | regs->ip = (unsigned long)p->ainsn.insn; | 431 | regs->ip = (unsigned long)p->ainsn.insn; |
| 432 | } | 432 | } |
| 433 | 433 | ||
| 434 | /* Called with kretprobe_lock held */ | ||
| 435 | void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, | 434 | void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, |
| 436 | struct pt_regs *regs) | 435 | struct pt_regs *regs) |
| 437 | { | 436 | { |
| @@ -682,8 +681,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs) | |||
| 682 | unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline; | 681 | unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline; |
| 683 | 682 | ||
| 684 | INIT_HLIST_HEAD(&empty_rp); | 683 | INIT_HLIST_HEAD(&empty_rp); |
| 685 | spin_lock_irqsave(&kretprobe_lock, flags); | 684 | kretprobe_hash_lock(current, &head, &flags); |
| 686 | head = kretprobe_inst_table_head(current); | ||
| 687 | /* fixup registers */ | 685 | /* fixup registers */ |
| 688 | #ifdef CONFIG_X86_64 | 686 | #ifdef CONFIG_X86_64 |
| 689 | regs->cs = __KERNEL_CS; | 687 | regs->cs = __KERNEL_CS; |
| @@ -732,7 +730,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs) | |||
| 732 | 730 | ||
| 733 | kretprobe_assert(ri, orig_ret_address, trampoline_address); | 731 | kretprobe_assert(ri, orig_ret_address, trampoline_address); |
| 734 | 732 | ||
| 735 | spin_unlock_irqrestore(&kretprobe_lock, flags); | 733 | kretprobe_hash_unlock(current, &flags); |
| 736 | 734 | ||
| 737 | hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) { | 735 | hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) { |
| 738 | hlist_del(&ri->hlist); | 736 | hlist_del(&ri->hlist); |
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c index a8449571858a..b68e21f06f4f 100644 --- a/arch/x86/kernel/ldt.c +++ b/arch/x86/kernel/ldt.c | |||
| @@ -62,12 +62,10 @@ static int alloc_ldt(mm_context_t *pc, int mincount, int reload) | |||
| 62 | 62 | ||
| 63 | if (reload) { | 63 | if (reload) { |
| 64 | #ifdef CONFIG_SMP | 64 | #ifdef CONFIG_SMP |
| 65 | cpumask_t mask; | ||
| 66 | |||
| 67 | preempt_disable(); | 65 | preempt_disable(); |
| 68 | load_LDT(pc); | 66 | load_LDT(pc); |
| 69 | mask = cpumask_of_cpu(smp_processor_id()); | 67 | if (!cpus_equal(current->mm->cpu_vm_mask, |
| 70 | if (!cpus_equal(current->mm->cpu_vm_mask, mask)) | 68 | cpumask_of_cpu(smp_processor_id()))) |
| 71 | smp_call_function(flush_ldt, current->mm, 1); | 69 | smp_call_function(flush_ldt, current->mm, 1); |
| 72 | preempt_enable(); | 70 | preempt_enable(); |
| 73 | #else | 71 | #else |
diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c index 8864230d55af..9fe478d98406 100644 --- a/arch/x86/kernel/machine_kexec_32.c +++ b/arch/x86/kernel/machine_kexec_32.c | |||
| @@ -22,6 +22,7 @@ | |||
| 22 | #include <asm/cpufeature.h> | 22 | #include <asm/cpufeature.h> |
| 23 | #include <asm/desc.h> | 23 | #include <asm/desc.h> |
| 24 | #include <asm/system.h> | 24 | #include <asm/system.h> |
| 25 | #include <asm/cacheflush.h> | ||
| 25 | 26 | ||
| 26 | #define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE))) | 27 | #define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE))) |
| 27 | static u32 kexec_pgd[1024] PAGE_ALIGNED; | 28 | static u32 kexec_pgd[1024] PAGE_ALIGNED; |
| @@ -85,10 +86,12 @@ static void load_segments(void) | |||
| 85 | * reboot code buffer to allow us to avoid allocations | 86 | * reboot code buffer to allow us to avoid allocations |
| 86 | * later. | 87 | * later. |
| 87 | * | 88 | * |
| 88 | * Currently nothing. | 89 | * Make control page executable. |
| 89 | */ | 90 | */ |
| 90 | int machine_kexec_prepare(struct kimage *image) | 91 | int machine_kexec_prepare(struct kimage *image) |
| 91 | { | 92 | { |
| 93 | if (nx_enabled) | ||
| 94 | set_pages_x(image->control_code_page, 1); | ||
| 92 | return 0; | 95 | return 0; |
| 93 | } | 96 | } |
| 94 | 97 | ||
| @@ -98,27 +101,48 @@ int machine_kexec_prepare(struct kimage *image) | |||
| 98 | */ | 101 | */ |
| 99 | void machine_kexec_cleanup(struct kimage *image) | 102 | void machine_kexec_cleanup(struct kimage *image) |
| 100 | { | 103 | { |
| 104 | if (nx_enabled) | ||
| 105 | set_pages_nx(image->control_code_page, 1); | ||
| 101 | } | 106 | } |
| 102 | 107 | ||
| 103 | /* | 108 | /* |
| 104 | * Do not allocate memory (or fail in any way) in machine_kexec(). | 109 | * Do not allocate memory (or fail in any way) in machine_kexec(). |
| 105 | * We are past the point of no return, committed to rebooting now. | 110 | * We are past the point of no return, committed to rebooting now. |
| 106 | */ | 111 | */ |
| 107 | NORET_TYPE void machine_kexec(struct kimage *image) | 112 | void machine_kexec(struct kimage *image) |
| 108 | { | 113 | { |
| 109 | unsigned long page_list[PAGES_NR]; | 114 | unsigned long page_list[PAGES_NR]; |
| 110 | void *control_page; | 115 | void *control_page; |
| 116 | asmlinkage unsigned long | ||
| 117 | (*relocate_kernel_ptr)(unsigned long indirection_page, | ||
| 118 | unsigned long control_page, | ||
| 119 | unsigned long start_address, | ||
| 120 | unsigned int has_pae, | ||
| 121 | unsigned int preserve_context); | ||
| 111 | 122 | ||
| 112 | tracer_disable(); | 123 | tracer_disable(); |
| 113 | 124 | ||
| 114 | /* Interrupts aren't acceptable while we reboot */ | 125 | /* Interrupts aren't acceptable while we reboot */ |
| 115 | local_irq_disable(); | 126 | local_irq_disable(); |
| 116 | 127 | ||
| 128 | if (image->preserve_context) { | ||
| 129 | #ifdef CONFIG_X86_IO_APIC | ||
| 130 | /* We need to put APICs in legacy mode so that we can | ||
| 131 | * get timer interrupts in second kernel. kexec/kdump | ||
| 132 | * paths already have calls to disable_IO_APIC() in | ||
| 133 | * one form or other. kexec jump path also need | ||
| 134 | * one. | ||
| 135 | */ | ||
| 136 | disable_IO_APIC(); | ||
| 137 | #endif | ||
| 138 | } | ||
| 139 | |||
| 117 | control_page = page_address(image->control_code_page); | 140 | control_page = page_address(image->control_code_page); |
| 118 | memcpy(control_page, relocate_kernel, PAGE_SIZE); | 141 | memcpy(control_page, relocate_kernel, PAGE_SIZE/2); |
| 119 | 142 | ||
| 143 | relocate_kernel_ptr = control_page; | ||
| 120 | page_list[PA_CONTROL_PAGE] = __pa(control_page); | 144 | page_list[PA_CONTROL_PAGE] = __pa(control_page); |
| 121 | page_list[VA_CONTROL_PAGE] = (unsigned long)relocate_kernel; | 145 | page_list[VA_CONTROL_PAGE] = (unsigned long)control_page; |
| 122 | page_list[PA_PGD] = __pa(kexec_pgd); | 146 | page_list[PA_PGD] = __pa(kexec_pgd); |
| 123 | page_list[VA_PGD] = (unsigned long)kexec_pgd; | 147 | page_list[VA_PGD] = (unsigned long)kexec_pgd; |
| 124 | #ifdef CONFIG_X86_PAE | 148 | #ifdef CONFIG_X86_PAE |
| @@ -131,6 +155,7 @@ NORET_TYPE void machine_kexec(struct kimage *image) | |||
| 131 | page_list[VA_PTE_0] = (unsigned long)kexec_pte0; | 155 | page_list[VA_PTE_0] = (unsigned long)kexec_pte0; |
| 132 | page_list[PA_PTE_1] = __pa(kexec_pte1); | 156 | page_list[PA_PTE_1] = __pa(kexec_pte1); |
| 133 | page_list[VA_PTE_1] = (unsigned long)kexec_pte1; | 157 | page_list[VA_PTE_1] = (unsigned long)kexec_pte1; |
| 158 | page_list[PA_SWAP_PAGE] = (page_to_pfn(image->swap_page) << PAGE_SHIFT); | ||
| 134 | 159 | ||
| 135 | /* The segment registers are funny things, they have both a | 160 | /* The segment registers are funny things, they have both a |
| 136 | * visible and an invisible part. Whenever the visible part is | 161 | * visible and an invisible part. Whenever the visible part is |
| @@ -149,8 +174,10 @@ NORET_TYPE void machine_kexec(struct kimage *image) | |||
| 149 | set_idt(phys_to_virt(0),0); | 174 | set_idt(phys_to_virt(0),0); |
| 150 | 175 | ||
| 151 | /* now call it */ | 176 | /* now call it */ |
| 152 | relocate_kernel((unsigned long)image->head, (unsigned long)page_list, | 177 | image->start = relocate_kernel_ptr((unsigned long)image->head, |
| 153 | image->start, cpu_has_pae); | 178 | (unsigned long)page_list, |
| 179 | image->start, cpu_has_pae, | ||
| 180 | image->preserve_context); | ||
| 154 | } | 181 | } |
| 155 | 182 | ||
| 156 | void arch_crash_save_vmcoreinfo(void) | 183 | void arch_crash_save_vmcoreinfo(void) |
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index 9dd9262693a3..c43caa3a91f3 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c | |||
| @@ -181,7 +181,7 @@ void machine_kexec_cleanup(struct kimage *image) | |||
| 181 | * Do not allocate memory (or fail in any way) in machine_kexec(). | 181 | * Do not allocate memory (or fail in any way) in machine_kexec(). |
| 182 | * We are past the point of no return, committed to rebooting now. | 182 | * We are past the point of no return, committed to rebooting now. |
| 183 | */ | 183 | */ |
| 184 | NORET_TYPE void machine_kexec(struct kimage *image) | 184 | void machine_kexec(struct kimage *image) |
| 185 | { | 185 | { |
| 186 | unsigned long page_list[PAGES_NR]; | 186 | unsigned long page_list[PAGES_NR]; |
| 187 | void *control_page; | 187 | void *control_page; |
diff --git a/arch/x86/kernel/microcode.c b/arch/x86/kernel/microcode.c index fc4790638b69..652fa5c38ebe 100644 --- a/arch/x86/kernel/microcode.c +++ b/arch/x86/kernel/microcode.c | |||
| @@ -657,9 +657,7 @@ static ssize_t reload_store(struct sys_device *dev, | |||
| 657 | if (end == buf) | 657 | if (end == buf) |
| 658 | return -EINVAL; | 658 | return -EINVAL; |
| 659 | if (val == 1) { | 659 | if (val == 1) { |
| 660 | cpumask_t old; | 660 | cpumask_t old = current->cpus_allowed; |
| 661 | |||
| 662 | old = current->cpus_allowed; | ||
| 663 | 661 | ||
| 664 | get_online_cpus(); | 662 | get_online_cpus(); |
| 665 | set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); | 663 | set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); |
diff --git a/arch/x86/kernel/module_64.c b/arch/x86/kernel/module_64.c index 0e867676b5a5..6ba87830d4b1 100644 --- a/arch/x86/kernel/module_64.c +++ b/arch/x86/kernel/module_64.c | |||
| @@ -22,6 +22,7 @@ | |||
| 22 | #include <linux/fs.h> | 22 | #include <linux/fs.h> |
| 23 | #include <linux/string.h> | 23 | #include <linux/string.h> |
| 24 | #include <linux/kernel.h> | 24 | #include <linux/kernel.h> |
| 25 | #include <linux/mm.h> | ||
| 25 | #include <linux/slab.h> | 26 | #include <linux/slab.h> |
| 26 | #include <linux/bug.h> | 27 | #include <linux/bug.h> |
| 27 | 28 | ||
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 6ae005ccaed8..678090508a62 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c | |||
| @@ -83,7 +83,7 @@ static void __init MP_bus_info(struct mpc_config_bus *m) | |||
| 83 | if (x86_quirks->mpc_oem_bus_info) | 83 | if (x86_quirks->mpc_oem_bus_info) |
| 84 | x86_quirks->mpc_oem_bus_info(m, str); | 84 | x86_quirks->mpc_oem_bus_info(m, str); |
| 85 | else | 85 | else |
| 86 | printk(KERN_INFO "Bus #%d is %s\n", m->mpc_busid, str); | 86 | apic_printk(APIC_VERBOSE, "Bus #%d is %s\n", m->mpc_busid, str); |
| 87 | 87 | ||
| 88 | #if MAX_MP_BUSSES < 256 | 88 | #if MAX_MP_BUSSES < 256 |
| 89 | if (m->mpc_busid >= MAX_MP_BUSSES) { | 89 | if (m->mpc_busid >= MAX_MP_BUSSES) { |
| @@ -154,7 +154,7 @@ static void __init MP_ioapic_info(struct mpc_config_ioapic *m) | |||
| 154 | 154 | ||
| 155 | static void print_MP_intsrc_info(struct mpc_config_intsrc *m) | 155 | static void print_MP_intsrc_info(struct mpc_config_intsrc *m) |
| 156 | { | 156 | { |
| 157 | printk(KERN_CONT "Int: type %d, pol %d, trig %d, bus %02x," | 157 | apic_printk(APIC_VERBOSE, "Int: type %d, pol %d, trig %d, bus %02x," |
| 158 | " IRQ %02x, APIC ID %x, APIC INT %02x\n", | 158 | " IRQ %02x, APIC ID %x, APIC INT %02x\n", |
| 159 | m->mpc_irqtype, m->mpc_irqflag & 3, | 159 | m->mpc_irqtype, m->mpc_irqflag & 3, |
| 160 | (m->mpc_irqflag >> 2) & 3, m->mpc_srcbus, | 160 | (m->mpc_irqflag >> 2) & 3, m->mpc_srcbus, |
| @@ -163,7 +163,7 @@ static void print_MP_intsrc_info(struct mpc_config_intsrc *m) | |||
| 163 | 163 | ||
| 164 | static void __init print_mp_irq_info(struct mp_config_intsrc *mp_irq) | 164 | static void __init print_mp_irq_info(struct mp_config_intsrc *mp_irq) |
| 165 | { | 165 | { |
| 166 | printk(KERN_CONT "Int: type %d, pol %d, trig %d, bus %02x," | 166 | apic_printk(APIC_VERBOSE, "Int: type %d, pol %d, trig %d, bus %02x," |
| 167 | " IRQ %02x, APIC ID %x, APIC INT %02x\n", | 167 | " IRQ %02x, APIC ID %x, APIC INT %02x\n", |
| 168 | mp_irq->mp_irqtype, mp_irq->mp_irqflag & 3, | 168 | mp_irq->mp_irqtype, mp_irq->mp_irqflag & 3, |
| 169 | (mp_irq->mp_irqflag >> 2) & 3, mp_irq->mp_srcbus, | 169 | (mp_irq->mp_irqflag >> 2) & 3, mp_irq->mp_srcbus, |
| @@ -235,7 +235,7 @@ static void __init MP_intsrc_info(struct mpc_config_intsrc *m) | |||
| 235 | 235 | ||
| 236 | static void __init MP_lintsrc_info(struct mpc_config_lintsrc *m) | 236 | static void __init MP_lintsrc_info(struct mpc_config_lintsrc *m) |
| 237 | { | 237 | { |
| 238 | printk(KERN_INFO "Lint: type %d, pol %d, trig %d, bus %02x," | 238 | apic_printk(APIC_VERBOSE, "Lint: type %d, pol %d, trig %d, bus %02x," |
| 239 | " IRQ %02x, APIC ID %x, APIC LINT %02x\n", | 239 | " IRQ %02x, APIC ID %x, APIC LINT %02x\n", |
| 240 | m->mpc_irqtype, m->mpc_irqflag & 3, | 240 | m->mpc_irqtype, m->mpc_irqflag & 3, |
| 241 | (m->mpc_irqflag >> 2) & 3, m->mpc_srcbusid, | 241 | (m->mpc_irqflag >> 2) & 3, m->mpc_srcbusid, |
| @@ -695,7 +695,8 @@ static int __init smp_scan_config(unsigned long base, unsigned long length, | |||
| 695 | unsigned int *bp = phys_to_virt(base); | 695 | unsigned int *bp = phys_to_virt(base); |
| 696 | struct intel_mp_floating *mpf; | 696 | struct intel_mp_floating *mpf; |
| 697 | 697 | ||
| 698 | printk(KERN_DEBUG "Scan SMP from %p for %ld bytes.\n", bp, length); | 698 | apic_printk(APIC_VERBOSE, "Scan SMP from %p for %ld bytes.\n", |
| 699 | bp, length); | ||
| 699 | BUILD_BUG_ON(sizeof(*mpf) != 16); | 700 | BUILD_BUG_ON(sizeof(*mpf) != 16); |
| 700 | 701 | ||
| 701 | while (length > 0) { | 702 | while (length > 0) { |
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c index 151f2d171f7c..02d19328525d 100644 --- a/arch/x86/kernel/pci-calgary_64.c +++ b/arch/x86/kernel/pci-calgary_64.c | |||
| @@ -29,6 +29,7 @@ | |||
| 29 | #include <linux/mm.h> | 29 | #include <linux/mm.h> |
| 30 | #include <linux/spinlock.h> | 30 | #include <linux/spinlock.h> |
| 31 | #include <linux/string.h> | 31 | #include <linux/string.h> |
| 32 | #include <linux/crash_dump.h> | ||
| 32 | #include <linux/dma-mapping.h> | 33 | #include <linux/dma-mapping.h> |
| 33 | #include <linux/bitops.h> | 34 | #include <linux/bitops.h> |
| 34 | #include <linux/pci_ids.h> | 35 | #include <linux/pci_ids.h> |
| @@ -36,6 +37,7 @@ | |||
| 36 | #include <linux/delay.h> | 37 | #include <linux/delay.h> |
| 37 | #include <linux/scatterlist.h> | 38 | #include <linux/scatterlist.h> |
| 38 | #include <linux/iommu-helper.h> | 39 | #include <linux/iommu-helper.h> |
| 40 | |||
| 39 | #include <asm/iommu.h> | 41 | #include <asm/iommu.h> |
| 40 | #include <asm/calgary.h> | 42 | #include <asm/calgary.h> |
| 41 | #include <asm/tce.h> | 43 | #include <asm/tce.h> |
| @@ -167,6 +169,8 @@ static void calgary_dump_error_regs(struct iommu_table *tbl); | |||
| 167 | static void calioc2_handle_quirks(struct iommu_table *tbl, struct pci_dev *dev); | 169 | static void calioc2_handle_quirks(struct iommu_table *tbl, struct pci_dev *dev); |
| 168 | static void calioc2_tce_cache_blast(struct iommu_table *tbl); | 170 | static void calioc2_tce_cache_blast(struct iommu_table *tbl); |
| 169 | static void calioc2_dump_error_regs(struct iommu_table *tbl); | 171 | static void calioc2_dump_error_regs(struct iommu_table *tbl); |
| 172 | static void calgary_init_bitmap_from_tce_table(struct iommu_table *tbl); | ||
| 173 | static void get_tce_space_from_tar(void); | ||
| 170 | 174 | ||
| 171 | static struct cal_chipset_ops calgary_chip_ops = { | 175 | static struct cal_chipset_ops calgary_chip_ops = { |
| 172 | .handle_quirks = calgary_handle_quirks, | 176 | .handle_quirks = calgary_handle_quirks, |
| @@ -410,22 +414,6 @@ static void calgary_unmap_sg(struct device *dev, | |||
| 410 | } | 414 | } |
| 411 | } | 415 | } |
| 412 | 416 | ||
| 413 | static int calgary_nontranslate_map_sg(struct device* dev, | ||
| 414 | struct scatterlist *sg, int nelems, int direction) | ||
| 415 | { | ||
| 416 | struct scatterlist *s; | ||
| 417 | int i; | ||
| 418 | |||
| 419 | for_each_sg(sg, s, nelems, i) { | ||
| 420 | struct page *p = sg_page(s); | ||
| 421 | |||
| 422 | BUG_ON(!p); | ||
| 423 | s->dma_address = virt_to_bus(sg_virt(s)); | ||
| 424 | s->dma_length = s->length; | ||
| 425 | } | ||
| 426 | return nelems; | ||
| 427 | } | ||
| 428 | |||
| 429 | static int calgary_map_sg(struct device *dev, struct scatterlist *sg, | 417 | static int calgary_map_sg(struct device *dev, struct scatterlist *sg, |
| 430 | int nelems, int direction) | 418 | int nelems, int direction) |
| 431 | { | 419 | { |
| @@ -436,9 +424,6 @@ static int calgary_map_sg(struct device *dev, struct scatterlist *sg, | |||
| 436 | unsigned long entry; | 424 | unsigned long entry; |
| 437 | int i; | 425 | int i; |
| 438 | 426 | ||
| 439 | if (!translation_enabled(tbl)) | ||
| 440 | return calgary_nontranslate_map_sg(dev, sg, nelems, direction); | ||
| 441 | |||
| 442 | for_each_sg(sg, s, nelems, i) { | 427 | for_each_sg(sg, s, nelems, i) { |
| 443 | BUG_ON(!sg_page(s)); | 428 | BUG_ON(!sg_page(s)); |
| 444 | 429 | ||
| @@ -474,7 +459,6 @@ error: | |||
| 474 | static dma_addr_t calgary_map_single(struct device *dev, phys_addr_t paddr, | 459 | static dma_addr_t calgary_map_single(struct device *dev, phys_addr_t paddr, |
| 475 | size_t size, int direction) | 460 | size_t size, int direction) |
| 476 | { | 461 | { |
| 477 | dma_addr_t dma_handle = bad_dma_address; | ||
| 478 | void *vaddr = phys_to_virt(paddr); | 462 | void *vaddr = phys_to_virt(paddr); |
| 479 | unsigned long uaddr; | 463 | unsigned long uaddr; |
| 480 | unsigned int npages; | 464 | unsigned int npages; |
| @@ -483,12 +467,7 @@ static dma_addr_t calgary_map_single(struct device *dev, phys_addr_t paddr, | |||
| 483 | uaddr = (unsigned long)vaddr; | 467 | uaddr = (unsigned long)vaddr; |
| 484 | npages = num_dma_pages(uaddr, size); | 468 | npages = num_dma_pages(uaddr, size); |
| 485 | 469 | ||
| 486 | if (translation_enabled(tbl)) | 470 | return iommu_alloc(dev, tbl, vaddr, npages, direction); |
| 487 | dma_handle = iommu_alloc(dev, tbl, vaddr, npages, direction); | ||
| 488 | else | ||
| 489 | dma_handle = virt_to_bus(vaddr); | ||
| 490 | |||
| 491 | return dma_handle; | ||
| 492 | } | 471 | } |
| 493 | 472 | ||
| 494 | static void calgary_unmap_single(struct device *dev, dma_addr_t dma_handle, | 473 | static void calgary_unmap_single(struct device *dev, dma_addr_t dma_handle, |
| @@ -497,9 +476,6 @@ static void calgary_unmap_single(struct device *dev, dma_addr_t dma_handle, | |||
| 497 | struct iommu_table *tbl = find_iommu_table(dev); | 476 | struct iommu_table *tbl = find_iommu_table(dev); |
| 498 | unsigned int npages; | 477 | unsigned int npages; |
| 499 | 478 | ||
| 500 | if (!translation_enabled(tbl)) | ||
| 501 | return; | ||
| 502 | |||
| 503 | npages = num_dma_pages(dma_handle, size); | 479 | npages = num_dma_pages(dma_handle, size); |
| 504 | iommu_free(tbl, dma_handle, npages); | 480 | iommu_free(tbl, dma_handle, npages); |
| 505 | } | 481 | } |
| @@ -522,18 +498,12 @@ static void* calgary_alloc_coherent(struct device *dev, size_t size, | |||
| 522 | goto error; | 498 | goto error; |
| 523 | memset(ret, 0, size); | 499 | memset(ret, 0, size); |
| 524 | 500 | ||
| 525 | if (translation_enabled(tbl)) { | 501 | /* set up tces to cover the allocated range */ |
| 526 | /* set up tces to cover the allocated range */ | 502 | mapping = iommu_alloc(dev, tbl, ret, npages, DMA_BIDIRECTIONAL); |
| 527 | mapping = iommu_alloc(dev, tbl, ret, npages, DMA_BIDIRECTIONAL); | 503 | if (mapping == bad_dma_address) |
| 528 | if (mapping == bad_dma_address) | 504 | goto free; |
| 529 | goto free; | 505 | *dma_handle = mapping; |
| 530 | |||
| 531 | *dma_handle = mapping; | ||
| 532 | } else /* non translated slot */ | ||
| 533 | *dma_handle = virt_to_bus(ret); | ||
| 534 | |||
| 535 | return ret; | 506 | return ret; |
| 536 | |||
| 537 | free: | 507 | free: |
| 538 | free_pages((unsigned long)ret, get_order(size)); | 508 | free_pages((unsigned long)ret, get_order(size)); |
| 539 | ret = NULL; | 509 | ret = NULL; |
| @@ -541,7 +511,7 @@ error: | |||
| 541 | return ret; | 511 | return ret; |
| 542 | } | 512 | } |
| 543 | 513 | ||
| 544 | static const struct dma_mapping_ops calgary_dma_ops = { | 514 | static struct dma_mapping_ops calgary_dma_ops = { |
| 545 | .alloc_coherent = calgary_alloc_coherent, | 515 | .alloc_coherent = calgary_alloc_coherent, |
| 546 | .map_single = calgary_map_single, | 516 | .map_single = calgary_map_single, |
| 547 | .unmap_single = calgary_unmap_single, | 517 | .unmap_single = calgary_unmap_single, |
| @@ -830,7 +800,11 @@ static int __init calgary_setup_tar(struct pci_dev *dev, void __iomem *bbar) | |||
| 830 | 800 | ||
| 831 | tbl = pci_iommu(dev->bus); | 801 | tbl = pci_iommu(dev->bus); |
| 832 | tbl->it_base = (unsigned long)bus_info[dev->bus->number].tce_space; | 802 | tbl->it_base = (unsigned long)bus_info[dev->bus->number].tce_space; |
| 833 | tce_free(tbl, 0, tbl->it_size); | 803 | |
| 804 | if (is_kdump_kernel()) | ||
| 805 | calgary_init_bitmap_from_tce_table(tbl); | ||
| 806 | else | ||
| 807 | tce_free(tbl, 0, tbl->it_size); | ||
| 834 | 808 | ||
| 835 | if (is_calgary(dev->device)) | 809 | if (is_calgary(dev->device)) |
| 836 | tbl->chip_ops = &calgary_chip_ops; | 810 | tbl->chip_ops = &calgary_chip_ops; |
| @@ -1209,6 +1183,10 @@ static int __init calgary_init(void) | |||
| 1209 | if (ret) | 1183 | if (ret) |
| 1210 | return ret; | 1184 | return ret; |
| 1211 | 1185 | ||
| 1186 | /* Purely for kdump kernel case */ | ||
| 1187 | if (is_kdump_kernel()) | ||
| 1188 | get_tce_space_from_tar(); | ||
| 1189 | |||
| 1212 | do { | 1190 | do { |
| 1213 | dev = pci_get_device(PCI_VENDOR_ID_IBM, PCI_ANY_ID, dev); | 1191 | dev = pci_get_device(PCI_VENDOR_ID_IBM, PCI_ANY_ID, dev); |
| 1214 | if (!dev) | 1192 | if (!dev) |
| @@ -1230,6 +1208,16 @@ static int __init calgary_init(void) | |||
| 1230 | goto error; | 1208 | goto error; |
| 1231 | } while (1); | 1209 | } while (1); |
| 1232 | 1210 | ||
| 1211 | dev = NULL; | ||
| 1212 | for_each_pci_dev(dev) { | ||
| 1213 | struct iommu_table *tbl; | ||
| 1214 | |||
| 1215 | tbl = find_iommu_table(&dev->dev); | ||
| 1216 | |||
| 1217 | if (translation_enabled(tbl)) | ||
| 1218 | dev->dev.archdata.dma_ops = &calgary_dma_ops; | ||
| 1219 | } | ||
| 1220 | |||
| 1233 | return ret; | 1221 | return ret; |
| 1234 | 1222 | ||
| 1235 | error: | 1223 | error: |
| @@ -1251,6 +1239,7 @@ error: | |||
| 1251 | calgary_disable_translation(dev); | 1239 | calgary_disable_translation(dev); |
| 1252 | calgary_free_bus(dev); | 1240 | calgary_free_bus(dev); |
| 1253 | pci_dev_put(dev); /* Undo calgary_init_one()'s pci_dev_get() */ | 1241 | pci_dev_put(dev); /* Undo calgary_init_one()'s pci_dev_get() */ |
| 1242 | dev->dev.archdata.dma_ops = NULL; | ||
| 1254 | } while (1); | 1243 | } while (1); |
| 1255 | 1244 | ||
| 1256 | return ret; | 1245 | return ret; |
| @@ -1339,6 +1328,61 @@ static int __init calgary_bus_has_devices(int bus, unsigned short pci_dev) | |||
| 1339 | return (val != 0xffffffff); | 1328 | return (val != 0xffffffff); |
| 1340 | } | 1329 | } |
| 1341 | 1330 | ||
| 1331 | /* | ||
| 1332 | * calgary_init_bitmap_from_tce_table(): | ||
| 1333 | * Funtion for kdump case. In the second/kdump kernel initialize | ||
| 1334 | * the bitmap based on the tce table entries obtained from first kernel | ||
| 1335 | */ | ||
| 1336 | static void calgary_init_bitmap_from_tce_table(struct iommu_table *tbl) | ||
| 1337 | { | ||
| 1338 | u64 *tp; | ||
| 1339 | unsigned int index; | ||
| 1340 | tp = ((u64 *)tbl->it_base); | ||
| 1341 | for (index = 0 ; index < tbl->it_size; index++) { | ||
| 1342 | if (*tp != 0x0) | ||
| 1343 | set_bit(index, tbl->it_map); | ||
| 1344 | tp++; | ||
| 1345 | } | ||
| 1346 | } | ||
| 1347 | |||
| 1348 | /* | ||
| 1349 | * get_tce_space_from_tar(): | ||
| 1350 | * Function for kdump case. Get the tce tables from first kernel | ||
| 1351 | * by reading the contents of the base adress register of calgary iommu | ||
| 1352 | */ | ||
| 1353 | static void get_tce_space_from_tar(void) | ||
| 1354 | { | ||
| 1355 | int bus; | ||
| 1356 | void __iomem *target; | ||
| 1357 | unsigned long tce_space; | ||
| 1358 | |||
| 1359 | for (bus = 0; bus < MAX_PHB_BUS_NUM; bus++) { | ||
| 1360 | struct calgary_bus_info *info = &bus_info[bus]; | ||
| 1361 | unsigned short pci_device; | ||
| 1362 | u32 val; | ||
| 1363 | |||
| 1364 | val = read_pci_config(bus, 0, 0, 0); | ||
| 1365 | pci_device = (val & 0xFFFF0000) >> 16; | ||
| 1366 | |||
| 1367 | if (!is_cal_pci_dev(pci_device)) | ||
| 1368 | continue; | ||
| 1369 | if (info->translation_disabled) | ||
| 1370 | continue; | ||
| 1371 | |||
| 1372 | if (calgary_bus_has_devices(bus, pci_device) || | ||
| 1373 | translate_empty_slots) { | ||
| 1374 | target = calgary_reg(bus_info[bus].bbar, | ||
| 1375 | tar_offset(bus)); | ||
| 1376 | tce_space = be64_to_cpu(readq(target)); | ||
| 1377 | tce_space = tce_space & TAR_SW_BITS; | ||
| 1378 | |||
| 1379 | tce_space = tce_space & (~specified_table_size); | ||
| 1380 | info->tce_space = (u64 *)__va(tce_space); | ||
| 1381 | } | ||
| 1382 | } | ||
| 1383 | return; | ||
| 1384 | } | ||
| 1385 | |||
| 1342 | void __init detect_calgary(void) | 1386 | void __init detect_calgary(void) |
| 1343 | { | 1387 | { |
| 1344 | int bus; | 1388 | int bus; |
| @@ -1394,7 +1438,8 @@ void __init detect_calgary(void) | |||
| 1394 | return; | 1438 | return; |
| 1395 | } | 1439 | } |
| 1396 | 1440 | ||
| 1397 | specified_table_size = determine_tce_table_size(max_pfn * PAGE_SIZE); | 1441 | specified_table_size = determine_tce_table_size((is_kdump_kernel() ? |
| 1442 | saved_max_pfn : max_pfn) * PAGE_SIZE); | ||
| 1398 | 1443 | ||
| 1399 | for (bus = 0; bus < MAX_PHB_BUS_NUM; bus++) { | 1444 | for (bus = 0; bus < MAX_PHB_BUS_NUM; bus++) { |
| 1400 | struct calgary_bus_info *info = &bus_info[bus]; | 1445 | struct calgary_bus_info *info = &bus_info[bus]; |
| @@ -1412,10 +1457,16 @@ void __init detect_calgary(void) | |||
| 1412 | 1457 | ||
| 1413 | if (calgary_bus_has_devices(bus, pci_device) || | 1458 | if (calgary_bus_has_devices(bus, pci_device) || |
| 1414 | translate_empty_slots) { | 1459 | translate_empty_slots) { |
| 1415 | tbl = alloc_tce_table(); | 1460 | /* |
| 1416 | if (!tbl) | 1461 | * If it is kdump kernel, find and use tce tables |
| 1417 | goto cleanup; | 1462 | * from first kernel, else allocate tce tables here |
| 1418 | info->tce_space = tbl; | 1463 | */ |
| 1464 | if (!is_kdump_kernel()) { | ||
| 1465 | tbl = alloc_tce_table(); | ||
| 1466 | if (!tbl) | ||
| 1467 | goto cleanup; | ||
| 1468 | info->tce_space = tbl; | ||
| 1469 | } | ||
| 1419 | calgary_found = 1; | 1470 | calgary_found = 1; |
| 1420 | } | 1471 | } |
| 1421 | } | 1472 | } |
| @@ -1430,6 +1481,10 @@ void __init detect_calgary(void) | |||
| 1430 | printk(KERN_INFO "PCI-DMA: Calgary TCE table spec is %d, " | 1481 | printk(KERN_INFO "PCI-DMA: Calgary TCE table spec is %d, " |
| 1431 | "CONFIG_IOMMU_DEBUG is %s.\n", specified_table_size, | 1482 | "CONFIG_IOMMU_DEBUG is %s.\n", specified_table_size, |
| 1432 | debugging ? "enabled" : "disabled"); | 1483 | debugging ? "enabled" : "disabled"); |
| 1484 | |||
| 1485 | /* swiotlb for devices that aren't behind the Calgary. */ | ||
| 1486 | if (max_pfn > MAX_DMA32_PFN) | ||
| 1487 | swiotlb = 1; | ||
| 1433 | } | 1488 | } |
| 1434 | return; | 1489 | return; |
| 1435 | 1490 | ||
| @@ -1446,7 +1501,7 @@ int __init calgary_iommu_init(void) | |||
| 1446 | { | 1501 | { |
| 1447 | int ret; | 1502 | int ret; |
| 1448 | 1503 | ||
| 1449 | if (no_iommu || swiotlb) | 1504 | if (no_iommu || (swiotlb && !calgary_detected)) |
| 1450 | return -ENODEV; | 1505 | return -ENODEV; |
| 1451 | 1506 | ||
| 1452 | if (!calgary_detected) | 1507 | if (!calgary_detected) |
| @@ -1459,15 +1514,14 @@ int __init calgary_iommu_init(void) | |||
| 1459 | if (ret) { | 1514 | if (ret) { |
| 1460 | printk(KERN_ERR "PCI-DMA: Calgary init failed %d, " | 1515 | printk(KERN_ERR "PCI-DMA: Calgary init failed %d, " |
| 1461 | "falling back to no_iommu\n", ret); | 1516 | "falling back to no_iommu\n", ret); |
| 1462 | if (max_pfn > MAX_DMA32_PFN) | ||
| 1463 | printk(KERN_ERR "WARNING more than 4GB of memory, " | ||
| 1464 | "32bit PCI may malfunction.\n"); | ||
| 1465 | return ret; | 1517 | return ret; |
| 1466 | } | 1518 | } |
| 1467 | 1519 | ||
| 1468 | force_iommu = 1; | 1520 | force_iommu = 1; |
| 1469 | bad_dma_address = 0x0; | 1521 | bad_dma_address = 0x0; |
| 1470 | dma_ops = &calgary_dma_ops; | 1522 | /* dma_ops is set to swiotlb or nommu */ |
| 1523 | if (!dma_ops) | ||
| 1524 | dma_ops = &nommu_dma_ops; | ||
| 1471 | 1525 | ||
| 1472 | return 0; | 1526 | return 0; |
| 1473 | } | 1527 | } |
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index cbecb05551bb..87d4d6964ec2 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c | |||
| @@ -11,7 +11,7 @@ | |||
| 11 | 11 | ||
| 12 | static int forbid_dac __read_mostly; | 12 | static int forbid_dac __read_mostly; |
| 13 | 13 | ||
| 14 | const struct dma_mapping_ops *dma_ops; | 14 | struct dma_mapping_ops *dma_ops; |
| 15 | EXPORT_SYMBOL(dma_ops); | 15 | EXPORT_SYMBOL(dma_ops); |
| 16 | 16 | ||
| 17 | static int iommu_sac_force __read_mostly; | 17 | static int iommu_sac_force __read_mostly; |
| @@ -123,6 +123,14 @@ void __init pci_iommu_alloc(void) | |||
| 123 | 123 | ||
| 124 | pci_swiotlb_init(); | 124 | pci_swiotlb_init(); |
| 125 | } | 125 | } |
| 126 | |||
| 127 | unsigned long iommu_num_pages(unsigned long addr, unsigned long len) | ||
| 128 | { | ||
| 129 | unsigned long size = roundup((addr & ~PAGE_MASK) + len, PAGE_SIZE); | ||
| 130 | |||
| 131 | return size >> PAGE_SHIFT; | ||
| 132 | } | ||
| 133 | EXPORT_SYMBOL(iommu_num_pages); | ||
| 126 | #endif | 134 | #endif |
| 127 | 135 | ||
| 128 | /* | 136 | /* |
| @@ -192,126 +200,10 @@ static __init int iommu_setup(char *p) | |||
| 192 | } | 200 | } |
| 193 | early_param("iommu", iommu_setup); | 201 | early_param("iommu", iommu_setup); |
| 194 | 202 | ||
| 195 | #ifdef CONFIG_X86_32 | ||
| 196 | int dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr, | ||
| 197 | dma_addr_t device_addr, size_t size, int flags) | ||
| 198 | { | ||
| 199 | void __iomem *mem_base = NULL; | ||
| 200 | int pages = size >> PAGE_SHIFT; | ||
| 201 | int bitmap_size = BITS_TO_LONGS(pages) * sizeof(long); | ||
| 202 | |||
| 203 | if ((flags & (DMA_MEMORY_MAP | DMA_MEMORY_IO)) == 0) | ||
| 204 | goto out; | ||
| 205 | if (!size) | ||
| 206 | goto out; | ||
| 207 | if (dev->dma_mem) | ||
| 208 | goto out; | ||
| 209 | |||
| 210 | /* FIXME: this routine just ignores DMA_MEMORY_INCLUDES_CHILDREN */ | ||
| 211 | |||
| 212 | mem_base = ioremap(bus_addr, size); | ||
| 213 | if (!mem_base) | ||
| 214 | goto out; | ||
| 215 | |||
| 216 | dev->dma_mem = kzalloc(sizeof(struct dma_coherent_mem), GFP_KERNEL); | ||
| 217 | if (!dev->dma_mem) | ||
| 218 | goto out; | ||
| 219 | dev->dma_mem->bitmap = kzalloc(bitmap_size, GFP_KERNEL); | ||
| 220 | if (!dev->dma_mem->bitmap) | ||
| 221 | goto free1_out; | ||
| 222 | |||
| 223 | dev->dma_mem->virt_base = mem_base; | ||
| 224 | dev->dma_mem->device_base = device_addr; | ||
| 225 | dev->dma_mem->size = pages; | ||
| 226 | dev->dma_mem->flags = flags; | ||
| 227 | |||
| 228 | if (flags & DMA_MEMORY_MAP) | ||
| 229 | return DMA_MEMORY_MAP; | ||
| 230 | |||
| 231 | return DMA_MEMORY_IO; | ||
| 232 | |||
| 233 | free1_out: | ||
| 234 | kfree(dev->dma_mem); | ||
| 235 | out: | ||
| 236 | if (mem_base) | ||
| 237 | iounmap(mem_base); | ||
| 238 | return 0; | ||
| 239 | } | ||
| 240 | EXPORT_SYMBOL(dma_declare_coherent_memory); | ||
| 241 | |||
| 242 | void dma_release_declared_memory(struct device *dev) | ||
| 243 | { | ||
| 244 | struct dma_coherent_mem *mem = dev->dma_mem; | ||
| 245 | |||
| 246 | if (!mem) | ||
| 247 | return; | ||
| 248 | dev->dma_mem = NULL; | ||
| 249 | iounmap(mem->virt_base); | ||
| 250 | kfree(mem->bitmap); | ||
| 251 | kfree(mem); | ||
| 252 | } | ||
| 253 | EXPORT_SYMBOL(dma_release_declared_memory); | ||
| 254 | |||
| 255 | void *dma_mark_declared_memory_occupied(struct device *dev, | ||
| 256 | dma_addr_t device_addr, size_t size) | ||
| 257 | { | ||
| 258 | struct dma_coherent_mem *mem = dev->dma_mem; | ||
| 259 | int pos, err; | ||
| 260 | int pages = (size + (device_addr & ~PAGE_MASK) + PAGE_SIZE - 1); | ||
| 261 | |||
| 262 | pages >>= PAGE_SHIFT; | ||
| 263 | |||
| 264 | if (!mem) | ||
| 265 | return ERR_PTR(-EINVAL); | ||
| 266 | |||
| 267 | pos = (device_addr - mem->device_base) >> PAGE_SHIFT; | ||
| 268 | err = bitmap_allocate_region(mem->bitmap, pos, get_order(pages)); | ||
| 269 | if (err != 0) | ||
| 270 | return ERR_PTR(err); | ||
| 271 | return mem->virt_base + (pos << PAGE_SHIFT); | ||
| 272 | } | ||
| 273 | EXPORT_SYMBOL(dma_mark_declared_memory_occupied); | ||
| 274 | |||
| 275 | static int dma_alloc_from_coherent_mem(struct device *dev, ssize_t size, | ||
| 276 | dma_addr_t *dma_handle, void **ret) | ||
| 277 | { | ||
| 278 | struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL; | ||
| 279 | int order = get_order(size); | ||
| 280 | |||
| 281 | if (mem) { | ||
| 282 | int page = bitmap_find_free_region(mem->bitmap, mem->size, | ||
| 283 | order); | ||
| 284 | if (page >= 0) { | ||
| 285 | *dma_handle = mem->device_base + (page << PAGE_SHIFT); | ||
| 286 | *ret = mem->virt_base + (page << PAGE_SHIFT); | ||
| 287 | memset(*ret, 0, size); | ||
| 288 | } | ||
| 289 | if (mem->flags & DMA_MEMORY_EXCLUSIVE) | ||
| 290 | *ret = NULL; | ||
| 291 | } | ||
| 292 | return (mem != NULL); | ||
| 293 | } | ||
| 294 | |||
| 295 | static int dma_release_coherent(struct device *dev, int order, void *vaddr) | ||
| 296 | { | ||
| 297 | struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL; | ||
| 298 | |||
| 299 | if (mem && vaddr >= mem->virt_base && vaddr < | ||
| 300 | (mem->virt_base + (mem->size << PAGE_SHIFT))) { | ||
| 301 | int page = (vaddr - mem->virt_base) >> PAGE_SHIFT; | ||
| 302 | |||
| 303 | bitmap_release_region(mem->bitmap, page, order); | ||
| 304 | return 1; | ||
| 305 | } | ||
| 306 | return 0; | ||
| 307 | } | ||
| 308 | #else | ||
| 309 | #define dma_alloc_from_coherent_mem(dev, size, handle, ret) (0) | ||
| 310 | #define dma_release_coherent(dev, order, vaddr) (0) | ||
| 311 | #endif /* CONFIG_X86_32 */ | ||
| 312 | |||
| 313 | int dma_supported(struct device *dev, u64 mask) | 203 | int dma_supported(struct device *dev, u64 mask) |
| 314 | { | 204 | { |
| 205 | struct dma_mapping_ops *ops = get_dma_ops(dev); | ||
| 206 | |||
| 315 | #ifdef CONFIG_PCI | 207 | #ifdef CONFIG_PCI |
| 316 | if (mask > 0xffffffff && forbid_dac > 0) { | 208 | if (mask > 0xffffffff && forbid_dac > 0) { |
| 317 | dev_info(dev, "PCI: Disallowing DAC for device\n"); | 209 | dev_info(dev, "PCI: Disallowing DAC for device\n"); |
| @@ -319,8 +211,8 @@ int dma_supported(struct device *dev, u64 mask) | |||
| 319 | } | 211 | } |
| 320 | #endif | 212 | #endif |
| 321 | 213 | ||
| 322 | if (dma_ops->dma_supported) | 214 | if (ops->dma_supported) |
| 323 | return dma_ops->dma_supported(dev, mask); | 215 | return ops->dma_supported(dev, mask); |
| 324 | 216 | ||
| 325 | /* Copied from i386. Doesn't make much sense, because it will | 217 | /* Copied from i386. Doesn't make much sense, because it will |
| 326 | only work for pci_alloc_coherent. | 218 | only work for pci_alloc_coherent. |
| @@ -367,6 +259,7 @@ void * | |||
| 367 | dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, | 259 | dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, |
| 368 | gfp_t gfp) | 260 | gfp_t gfp) |
| 369 | { | 261 | { |
| 262 | struct dma_mapping_ops *ops = get_dma_ops(dev); | ||
| 370 | void *memory = NULL; | 263 | void *memory = NULL; |
| 371 | struct page *page; | 264 | struct page *page; |
| 372 | unsigned long dma_mask = 0; | 265 | unsigned long dma_mask = 0; |
| @@ -376,7 +269,7 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, | |||
| 376 | /* ignore region specifiers */ | 269 | /* ignore region specifiers */ |
| 377 | gfp &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); | 270 | gfp &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); |
| 378 | 271 | ||
| 379 | if (dma_alloc_from_coherent_mem(dev, size, dma_handle, &memory)) | 272 | if (dma_alloc_from_coherent(dev, size, dma_handle, &memory)) |
| 380 | return memory; | 273 | return memory; |
| 381 | 274 | ||
| 382 | if (!dev) { | 275 | if (!dev) { |
| @@ -435,8 +328,8 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, | |||
| 435 | /* Let low level make its own zone decisions */ | 328 | /* Let low level make its own zone decisions */ |
| 436 | gfp &= ~(GFP_DMA32|GFP_DMA); | 329 | gfp &= ~(GFP_DMA32|GFP_DMA); |
| 437 | 330 | ||
| 438 | if (dma_ops->alloc_coherent) | 331 | if (ops->alloc_coherent) |
| 439 | return dma_ops->alloc_coherent(dev, size, | 332 | return ops->alloc_coherent(dev, size, |
| 440 | dma_handle, gfp); | 333 | dma_handle, gfp); |
| 441 | return NULL; | 334 | return NULL; |
| 442 | } | 335 | } |
| @@ -448,14 +341,14 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, | |||
| 448 | } | 341 | } |
| 449 | } | 342 | } |
| 450 | 343 | ||
| 451 | if (dma_ops->alloc_coherent) { | 344 | if (ops->alloc_coherent) { |
| 452 | free_pages((unsigned long)memory, get_order(size)); | 345 | free_pages((unsigned long)memory, get_order(size)); |
| 453 | gfp &= ~(GFP_DMA|GFP_DMA32); | 346 | gfp &= ~(GFP_DMA|GFP_DMA32); |
| 454 | return dma_ops->alloc_coherent(dev, size, dma_handle, gfp); | 347 | return ops->alloc_coherent(dev, size, dma_handle, gfp); |
| 455 | } | 348 | } |
| 456 | 349 | ||
| 457 | if (dma_ops->map_simple) { | 350 | if (ops->map_simple) { |
| 458 | *dma_handle = dma_ops->map_simple(dev, virt_to_phys(memory), | 351 | *dma_handle = ops->map_simple(dev, virt_to_phys(memory), |
| 459 | size, | 352 | size, |
| 460 | PCI_DMA_BIDIRECTIONAL); | 353 | PCI_DMA_BIDIRECTIONAL); |
| 461 | if (*dma_handle != bad_dma_address) | 354 | if (*dma_handle != bad_dma_address) |
| @@ -477,12 +370,14 @@ EXPORT_SYMBOL(dma_alloc_coherent); | |||
| 477 | void dma_free_coherent(struct device *dev, size_t size, | 370 | void dma_free_coherent(struct device *dev, size_t size, |
| 478 | void *vaddr, dma_addr_t bus) | 371 | void *vaddr, dma_addr_t bus) |
| 479 | { | 372 | { |
| 373 | struct dma_mapping_ops *ops = get_dma_ops(dev); | ||
| 374 | |||
| 480 | int order = get_order(size); | 375 | int order = get_order(size); |
| 481 | WARN_ON(irqs_disabled()); /* for portability */ | 376 | WARN_ON(irqs_disabled()); /* for portability */ |
| 482 | if (dma_release_coherent(dev, order, vaddr)) | 377 | if (dma_release_from_coherent(dev, order, vaddr)) |
| 483 | return; | 378 | return; |
| 484 | if (dma_ops->unmap_single) | 379 | if (ops->unmap_single) |
| 485 | dma_ops->unmap_single(dev, bus, size, 0); | 380 | ops->unmap_single(dev, bus, size, 0); |
| 486 | free_pages((unsigned long)vaddr, order); | 381 | free_pages((unsigned long)vaddr, order); |
| 487 | } | 382 | } |
| 488 | EXPORT_SYMBOL(dma_free_coherent); | 383 | EXPORT_SYMBOL(dma_free_coherent); |
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index 1062dc1e6396..cdab67849074 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c | |||
| @@ -67,9 +67,6 @@ static u32 gart_unmapped_entry; | |||
| 67 | (((x) & 0xfffff000) | (((x) >> 32) << 4) | GPTE_VALID | GPTE_COHERENT) | 67 | (((x) & 0xfffff000) | (((x) >> 32) << 4) | GPTE_VALID | GPTE_COHERENT) |
| 68 | #define GPTE_DECODE(x) (((x) & 0xfffff000) | (((u64)(x) & 0xff0) << 28)) | 68 | #define GPTE_DECODE(x) (((x) & 0xfffff000) | (((u64)(x) & 0xff0) << 28)) |
| 69 | 69 | ||
| 70 | #define to_pages(addr, size) \ | ||
| 71 | (round_up(((addr) & ~PAGE_MASK) + (size), PAGE_SIZE) >> PAGE_SHIFT) | ||
| 72 | |||
| 73 | #define EMERGENCY_PAGES 32 /* = 128KB */ | 70 | #define EMERGENCY_PAGES 32 /* = 128KB */ |
| 74 | 71 | ||
| 75 | #ifdef CONFIG_AGP | 72 | #ifdef CONFIG_AGP |
| @@ -241,7 +238,7 @@ nonforced_iommu(struct device *dev, unsigned long addr, size_t size) | |||
| 241 | static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem, | 238 | static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem, |
| 242 | size_t size, int dir) | 239 | size_t size, int dir) |
| 243 | { | 240 | { |
| 244 | unsigned long npages = to_pages(phys_mem, size); | 241 | unsigned long npages = iommu_num_pages(phys_mem, size); |
| 245 | unsigned long iommu_page = alloc_iommu(dev, npages); | 242 | unsigned long iommu_page = alloc_iommu(dev, npages); |
| 246 | int i; | 243 | int i; |
| 247 | 244 | ||
| @@ -304,7 +301,7 @@ static void gart_unmap_single(struct device *dev, dma_addr_t dma_addr, | |||
| 304 | return; | 301 | return; |
| 305 | 302 | ||
| 306 | iommu_page = (dma_addr - iommu_bus_base)>>PAGE_SHIFT; | 303 | iommu_page = (dma_addr - iommu_bus_base)>>PAGE_SHIFT; |
| 307 | npages = to_pages(dma_addr, size); | 304 | npages = iommu_num_pages(dma_addr, size); |
| 308 | for (i = 0; i < npages; i++) { | 305 | for (i = 0; i < npages; i++) { |
| 309 | iommu_gatt_base[iommu_page + i] = gart_unmapped_entry; | 306 | iommu_gatt_base[iommu_page + i] = gart_unmapped_entry; |
| 310 | CLEAR_LEAK(iommu_page + i); | 307 | CLEAR_LEAK(iommu_page + i); |
| @@ -387,7 +384,7 @@ static int __dma_map_cont(struct device *dev, struct scatterlist *start, | |||
| 387 | } | 384 | } |
| 388 | 385 | ||
| 389 | addr = phys_addr; | 386 | addr = phys_addr; |
| 390 | pages = to_pages(s->offset, s->length); | 387 | pages = iommu_num_pages(s->offset, s->length); |
| 391 | while (pages--) { | 388 | while (pages--) { |
| 392 | iommu_gatt_base[iommu_page] = GPTE_ENCODE(addr); | 389 | iommu_gatt_base[iommu_page] = GPTE_ENCODE(addr); |
| 393 | SET_LEAK(iommu_page); | 390 | SET_LEAK(iommu_page); |
| @@ -470,7 +467,7 @@ gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, int dir) | |||
| 470 | 467 | ||
| 471 | seg_size += s->length; | 468 | seg_size += s->length; |
| 472 | need = nextneed; | 469 | need = nextneed; |
| 473 | pages += to_pages(s->offset, s->length); | 470 | pages += iommu_num_pages(s->offset, s->length); |
| 474 | ps = s; | 471 | ps = s; |
| 475 | } | 472 | } |
| 476 | if (dma_map_cont(dev, start_sg, i - start, sgmap, pages, need) < 0) | 473 | if (dma_map_cont(dev, start_sg, i - start, sgmap, pages, need) < 0) |
| @@ -692,8 +689,7 @@ static __init int init_k8_gatt(struct agp_kern_info *info) | |||
| 692 | 689 | ||
| 693 | extern int agp_amd64_init(void); | 690 | extern int agp_amd64_init(void); |
| 694 | 691 | ||
| 695 | static const struct dma_mapping_ops gart_dma_ops = { | 692 | static struct dma_mapping_ops gart_dma_ops = { |
| 696 | .mapping_error = NULL, | ||
| 697 | .map_single = gart_map_single, | 693 | .map_single = gart_map_single, |
| 698 | .map_simple = gart_map_simple, | 694 | .map_simple = gart_map_simple, |
| 699 | .unmap_single = gart_unmap_single, | 695 | .unmap_single = gart_unmap_single, |
diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c index 792b9179eff3..3f91f71cdc3e 100644 --- a/arch/x86/kernel/pci-nommu.c +++ b/arch/x86/kernel/pci-nommu.c | |||
| @@ -72,21 +72,9 @@ static int nommu_map_sg(struct device *hwdev, struct scatterlist *sg, | |||
| 72 | return nents; | 72 | return nents; |
| 73 | } | 73 | } |
| 74 | 74 | ||
| 75 | /* Make sure we keep the same behaviour */ | 75 | struct dma_mapping_ops nommu_dma_ops = { |
| 76 | static int nommu_mapping_error(dma_addr_t dma_addr) | ||
| 77 | { | ||
| 78 | #ifdef CONFIG_X86_32 | ||
| 79 | return 0; | ||
| 80 | #else | ||
| 81 | return (dma_addr == bad_dma_address); | ||
| 82 | #endif | ||
| 83 | } | ||
| 84 | |||
| 85 | |||
| 86 | const struct dma_mapping_ops nommu_dma_ops = { | ||
| 87 | .map_single = nommu_map_single, | 76 | .map_single = nommu_map_single, |
| 88 | .map_sg = nommu_map_sg, | 77 | .map_sg = nommu_map_sg, |
| 89 | .mapping_error = nommu_mapping_error, | ||
| 90 | .is_phys = 1, | 78 | .is_phys = 1, |
| 91 | }; | 79 | }; |
| 92 | 80 | ||
diff --git a/arch/x86/kernel/pci-swiotlb_64.c b/arch/x86/kernel/pci-swiotlb_64.c index 20df839b9c20..c4ce0332759e 100644 --- a/arch/x86/kernel/pci-swiotlb_64.c +++ b/arch/x86/kernel/pci-swiotlb_64.c | |||
| @@ -18,7 +18,7 @@ swiotlb_map_single_phys(struct device *hwdev, phys_addr_t paddr, size_t size, | |||
| 18 | return swiotlb_map_single(hwdev, phys_to_virt(paddr), size, direction); | 18 | return swiotlb_map_single(hwdev, phys_to_virt(paddr), size, direction); |
| 19 | } | 19 | } |
| 20 | 20 | ||
| 21 | const struct dma_mapping_ops swiotlb_dma_ops = { | 21 | struct dma_mapping_ops swiotlb_dma_ops = { |
| 22 | .mapping_error = swiotlb_dma_mapping_error, | 22 | .mapping_error = swiotlb_dma_mapping_error, |
| 23 | .alloc_coherent = swiotlb_alloc_coherent, | 23 | .alloc_coherent = swiotlb_alloc_coherent, |
| 24 | .free_coherent = swiotlb_free_coherent, | 24 | .free_coherent = swiotlb_free_coherent, |
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 0c3927accb00..53bc653ed5ca 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c | |||
| @@ -128,7 +128,7 @@ void cpu_idle(void) | |||
| 128 | 128 | ||
| 129 | /* endless idle loop with no priority at all */ | 129 | /* endless idle loop with no priority at all */ |
| 130 | while (1) { | 130 | while (1) { |
| 131 | tick_nohz_stop_sched_tick(); | 131 | tick_nohz_stop_sched_tick(1); |
| 132 | while (!need_resched()) { | 132 | while (!need_resched()) { |
| 133 | 133 | ||
| 134 | check_pgt_cache(); | 134 | check_pgt_cache(); |
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index e8a8e1b99817..3fb62a7d9a16 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c | |||
| @@ -120,7 +120,7 @@ void cpu_idle(void) | |||
| 120 | current_thread_info()->status |= TS_POLLING; | 120 | current_thread_info()->status |= TS_POLLING; |
| 121 | /* endless idle loop with no priority at all */ | 121 | /* endless idle loop with no priority at all */ |
| 122 | while (1) { | 122 | while (1) { |
| 123 | tick_nohz_stop_sched_tick(); | 123 | tick_nohz_stop_sched_tick(1); |
| 124 | while (!need_resched()) { | 124 | while (!need_resched()) { |
| 125 | 125 | ||
| 126 | rmb(); | 126 | rmb(); |
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 9dcf39c02972..724adfc63cb9 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c | |||
| @@ -411,10 +411,9 @@ void native_machine_shutdown(void) | |||
| 411 | { | 411 | { |
| 412 | /* Stop the cpus and apics */ | 412 | /* Stop the cpus and apics */ |
| 413 | #ifdef CONFIG_SMP | 413 | #ifdef CONFIG_SMP |
| 414 | int reboot_cpu_id; | ||
| 415 | 414 | ||
| 416 | /* The boot cpu is always logical cpu 0 */ | 415 | /* The boot cpu is always logical cpu 0 */ |
| 417 | reboot_cpu_id = 0; | 416 | int reboot_cpu_id = 0; |
| 418 | 417 | ||
| 419 | #ifdef CONFIG_X86_32 | 418 | #ifdef CONFIG_X86_32 |
| 420 | /* See if there has been given a command line override */ | 419 | /* See if there has been given a command line override */ |
diff --git a/arch/x86/kernel/relocate_kernel_32.S b/arch/x86/kernel/relocate_kernel_32.S index c30fe25d470d..703310a99023 100644 --- a/arch/x86/kernel/relocate_kernel_32.S +++ b/arch/x86/kernel/relocate_kernel_32.S | |||
| @@ -20,11 +20,44 @@ | |||
| 20 | #define PAGE_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) | 20 | #define PAGE_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) |
| 21 | #define PAE_PGD_ATTR (_PAGE_PRESENT) | 21 | #define PAE_PGD_ATTR (_PAGE_PRESENT) |
| 22 | 22 | ||
| 23 | /* control_page + PAGE_SIZE/2 ~ control_page + PAGE_SIZE * 3/4 are | ||
| 24 | * used to save some data for jumping back | ||
| 25 | */ | ||
| 26 | #define DATA(offset) (PAGE_SIZE/2+(offset)) | ||
| 27 | |||
| 28 | /* Minimal CPU state */ | ||
| 29 | #define ESP DATA(0x0) | ||
| 30 | #define CR0 DATA(0x4) | ||
| 31 | #define CR3 DATA(0x8) | ||
| 32 | #define CR4 DATA(0xc) | ||
| 33 | |||
| 34 | /* other data */ | ||
| 35 | #define CP_VA_CONTROL_PAGE DATA(0x10) | ||
| 36 | #define CP_PA_PGD DATA(0x14) | ||
| 37 | #define CP_PA_SWAP_PAGE DATA(0x18) | ||
| 38 | #define CP_PA_BACKUP_PAGES_MAP DATA(0x1c) | ||
| 39 | |||
| 23 | .text | 40 | .text |
| 24 | .align PAGE_SIZE | 41 | .align PAGE_SIZE |
| 25 | .globl relocate_kernel | 42 | .globl relocate_kernel |
| 26 | relocate_kernel: | 43 | relocate_kernel: |
| 27 | movl 8(%esp), %ebp /* list of pages */ | 44 | /* Save the CPU context, used for jumping back */ |
| 45 | |||
| 46 | pushl %ebx | ||
| 47 | pushl %esi | ||
| 48 | pushl %edi | ||
| 49 | pushl %ebp | ||
| 50 | pushf | ||
| 51 | |||
| 52 | movl 20+8(%esp), %ebp /* list of pages */ | ||
| 53 | movl PTR(VA_CONTROL_PAGE)(%ebp), %edi | ||
| 54 | movl %esp, ESP(%edi) | ||
| 55 | movl %cr0, %eax | ||
| 56 | movl %eax, CR0(%edi) | ||
| 57 | movl %cr3, %eax | ||
| 58 | movl %eax, CR3(%edi) | ||
| 59 | movl %cr4, %eax | ||
| 60 | movl %eax, CR4(%edi) | ||
| 28 | 61 | ||
| 29 | #ifdef CONFIG_X86_PAE | 62 | #ifdef CONFIG_X86_PAE |
| 30 | /* map the control page at its virtual address */ | 63 | /* map the control page at its virtual address */ |
| @@ -138,15 +171,25 @@ relocate_kernel: | |||
| 138 | 171 | ||
| 139 | relocate_new_kernel: | 172 | relocate_new_kernel: |
| 140 | /* read the arguments and say goodbye to the stack */ | 173 | /* read the arguments and say goodbye to the stack */ |
| 141 | movl 4(%esp), %ebx /* page_list */ | 174 | movl 20+4(%esp), %ebx /* page_list */ |
| 142 | movl 8(%esp), %ebp /* list of pages */ | 175 | movl 20+8(%esp), %ebp /* list of pages */ |
| 143 | movl 12(%esp), %edx /* start address */ | 176 | movl 20+12(%esp), %edx /* start address */ |
| 144 | movl 16(%esp), %ecx /* cpu_has_pae */ | 177 | movl 20+16(%esp), %ecx /* cpu_has_pae */ |
| 178 | movl 20+20(%esp), %esi /* preserve_context */ | ||
| 145 | 179 | ||
| 146 | /* zero out flags, and disable interrupts */ | 180 | /* zero out flags, and disable interrupts */ |
| 147 | pushl $0 | 181 | pushl $0 |
| 148 | popfl | 182 | popfl |
| 149 | 183 | ||
| 184 | /* save some information for jumping back */ | ||
| 185 | movl PTR(VA_CONTROL_PAGE)(%ebp), %edi | ||
| 186 | movl %edi, CP_VA_CONTROL_PAGE(%edi) | ||
| 187 | movl PTR(PA_PGD)(%ebp), %eax | ||
| 188 | movl %eax, CP_PA_PGD(%edi) | ||
| 189 | movl PTR(PA_SWAP_PAGE)(%ebp), %eax | ||
| 190 | movl %eax, CP_PA_SWAP_PAGE(%edi) | ||
| 191 | movl %ebx, CP_PA_BACKUP_PAGES_MAP(%edi) | ||
| 192 | |||
| 150 | /* get physical address of control page now */ | 193 | /* get physical address of control page now */ |
| 151 | /* this is impossible after page table switch */ | 194 | /* this is impossible after page table switch */ |
| 152 | movl PTR(PA_CONTROL_PAGE)(%ebp), %edi | 195 | movl PTR(PA_CONTROL_PAGE)(%ebp), %edi |
| @@ -197,8 +240,90 @@ identity_mapped: | |||
| 197 | xorl %eax, %eax | 240 | xorl %eax, %eax |
| 198 | movl %eax, %cr3 | 241 | movl %eax, %cr3 |
| 199 | 242 | ||
| 243 | movl CP_PA_SWAP_PAGE(%edi), %eax | ||
| 244 | pushl %eax | ||
| 245 | pushl %ebx | ||
| 246 | call swap_pages | ||
| 247 | addl $8, %esp | ||
| 248 | |||
| 249 | /* To be certain of avoiding problems with self-modifying code | ||
| 250 | * I need to execute a serializing instruction here. | ||
| 251 | * So I flush the TLB, it's handy, and not processor dependent. | ||
| 252 | */ | ||
| 253 | xorl %eax, %eax | ||
| 254 | movl %eax, %cr3 | ||
| 255 | |||
| 256 | /* set all of the registers to known values */ | ||
| 257 | /* leave %esp alone */ | ||
| 258 | |||
| 259 | testl %esi, %esi | ||
| 260 | jnz 1f | ||
| 261 | xorl %edi, %edi | ||
| 262 | xorl %eax, %eax | ||
| 263 | xorl %ebx, %ebx | ||
| 264 | xorl %ecx, %ecx | ||
| 265 | xorl %edx, %edx | ||
| 266 | xorl %esi, %esi | ||
| 267 | xorl %ebp, %ebp | ||
| 268 | ret | ||
| 269 | 1: | ||
| 270 | popl %edx | ||
| 271 | movl CP_PA_SWAP_PAGE(%edi), %esp | ||
| 272 | addl $PAGE_SIZE, %esp | ||
| 273 | 2: | ||
| 274 | call *%edx | ||
| 275 | |||
| 276 | /* get the re-entry point of the peer system */ | ||
| 277 | movl 0(%esp), %ebp | ||
| 278 | call 1f | ||
| 279 | 1: | ||
| 280 | popl %ebx | ||
| 281 | subl $(1b - relocate_kernel), %ebx | ||
| 282 | movl CP_VA_CONTROL_PAGE(%ebx), %edi | ||
| 283 | lea PAGE_SIZE(%ebx), %esp | ||
| 284 | movl CP_PA_SWAP_PAGE(%ebx), %eax | ||
| 285 | movl CP_PA_BACKUP_PAGES_MAP(%ebx), %edx | ||
| 286 | pushl %eax | ||
| 287 | pushl %edx | ||
| 288 | call swap_pages | ||
| 289 | addl $8, %esp | ||
| 290 | movl CP_PA_PGD(%ebx), %eax | ||
| 291 | movl %eax, %cr3 | ||
| 292 | movl %cr0, %eax | ||
| 293 | orl $(1<<31), %eax | ||
| 294 | movl %eax, %cr0 | ||
| 295 | lea PAGE_SIZE(%edi), %esp | ||
| 296 | movl %edi, %eax | ||
| 297 | addl $(virtual_mapped - relocate_kernel), %eax | ||
| 298 | pushl %eax | ||
| 299 | ret | ||
| 300 | |||
| 301 | virtual_mapped: | ||
| 302 | movl CR4(%edi), %eax | ||
| 303 | movl %eax, %cr4 | ||
| 304 | movl CR3(%edi), %eax | ||
| 305 | movl %eax, %cr3 | ||
| 306 | movl CR0(%edi), %eax | ||
| 307 | movl %eax, %cr0 | ||
| 308 | movl ESP(%edi), %esp | ||
| 309 | movl %ebp, %eax | ||
| 310 | |||
| 311 | popf | ||
| 312 | popl %ebp | ||
| 313 | popl %edi | ||
| 314 | popl %esi | ||
| 315 | popl %ebx | ||
| 316 | ret | ||
| 317 | |||
| 200 | /* Do the copies */ | 318 | /* Do the copies */ |
| 201 | movl %ebx, %ecx | 319 | swap_pages: |
| 320 | movl 8(%esp), %edx | ||
| 321 | movl 4(%esp), %ecx | ||
| 322 | pushl %ebp | ||
| 323 | pushl %ebx | ||
| 324 | pushl %edi | ||
| 325 | pushl %esi | ||
| 326 | movl %ecx, %ebx | ||
| 202 | jmp 1f | 327 | jmp 1f |
| 203 | 328 | ||
| 204 | 0: /* top, read another word from the indirection page */ | 329 | 0: /* top, read another word from the indirection page */ |
| @@ -226,27 +351,28 @@ identity_mapped: | |||
| 226 | movl %ecx, %esi /* For every source page do a copy */ | 351 | movl %ecx, %esi /* For every source page do a copy */ |
| 227 | andl $0xfffff000, %esi | 352 | andl $0xfffff000, %esi |
| 228 | 353 | ||
| 354 | movl %edi, %eax | ||
| 355 | movl %esi, %ebp | ||
| 356 | |||
| 357 | movl %edx, %edi | ||
| 229 | movl $1024, %ecx | 358 | movl $1024, %ecx |
| 230 | rep ; movsl | 359 | rep ; movsl |
| 231 | jmp 0b | ||
| 232 | 360 | ||
| 233 | 3: | 361 | movl %ebp, %edi |
| 234 | 362 | movl %eax, %esi | |
| 235 | /* To be certain of avoiding problems with self-modifying code | 363 | movl $1024, %ecx |
| 236 | * I need to execute a serializing instruction here. | 364 | rep ; movsl |
| 237 | * So I flush the TLB, it's handy, and not processor dependent. | ||
| 238 | */ | ||
| 239 | xorl %eax, %eax | ||
| 240 | movl %eax, %cr3 | ||
| 241 | 365 | ||
| 242 | /* set all of the registers to known values */ | 366 | movl %eax, %edi |
| 243 | /* leave %esp alone */ | 367 | movl %edx, %esi |
| 368 | movl $1024, %ecx | ||
| 369 | rep ; movsl | ||
| 244 | 370 | ||
| 245 | xorl %eax, %eax | 371 | lea PAGE_SIZE(%ebp), %esi |
| 246 | xorl %ebx, %ebx | 372 | jmp 0b |
| 247 | xorl %ecx, %ecx | 373 | 3: |
| 248 | xorl %edx, %edx | 374 | popl %esi |
| 249 | xorl %esi, %esi | 375 | popl %edi |
| 250 | xorl %edi, %edi | 376 | popl %ebx |
| 251 | xorl %ebp, %ebp | 377 | popl %ebp |
| 252 | ret | 378 | ret |
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index b4aacb9f52e3..68b48e3fbcbd 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c | |||
| @@ -597,13 +597,21 @@ void __init setup_arch(char **cmdline_p) | |||
| 597 | memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data)); | 597 | memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data)); |
| 598 | visws_early_detect(); | 598 | visws_early_detect(); |
| 599 | pre_setup_arch_hook(); | 599 | pre_setup_arch_hook(); |
| 600 | early_cpu_init(); | ||
| 601 | #else | 600 | #else |
| 602 | printk(KERN_INFO "Command line: %s\n", boot_command_line); | 601 | printk(KERN_INFO "Command line: %s\n", boot_command_line); |
| 603 | #endif | 602 | #endif |
| 604 | 603 | ||
| 604 | early_cpu_init(); | ||
| 605 | early_ioremap_init(); | 605 | early_ioremap_init(); |
| 606 | 606 | ||
| 607 | #if defined(CONFIG_VMI) && defined(CONFIG_X86_32) | ||
| 608 | /* | ||
| 609 | * Must be before kernel pagetables are setup | ||
| 610 | * or fixmap area is touched. | ||
| 611 | */ | ||
| 612 | vmi_init(); | ||
| 613 | #endif | ||
| 614 | |||
| 607 | ROOT_DEV = old_decode_dev(boot_params.hdr.root_dev); | 615 | ROOT_DEV = old_decode_dev(boot_params.hdr.root_dev); |
| 608 | screen_info = boot_params.screen_info; | 616 | screen_info = boot_params.screen_info; |
| 609 | edid_info = boot_params.edid_info; | 617 | edid_info = boot_params.edid_info; |
| @@ -665,9 +673,6 @@ void __init setup_arch(char **cmdline_p) | |||
| 665 | bss_resource.start = virt_to_phys(&__bss_start); | 673 | bss_resource.start = virt_to_phys(&__bss_start); |
| 666 | bss_resource.end = virt_to_phys(&__bss_stop)-1; | 674 | bss_resource.end = virt_to_phys(&__bss_stop)-1; |
| 667 | 675 | ||
| 668 | #ifdef CONFIG_X86_64 | ||
| 669 | early_cpu_init(); | ||
| 670 | #endif | ||
| 671 | strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE); | 676 | strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE); |
| 672 | *cmdline_p = command_line; | 677 | *cmdline_p = command_line; |
| 673 | 678 | ||
| @@ -791,10 +796,6 @@ void __init setup_arch(char **cmdline_p) | |||
| 791 | 796 | ||
| 792 | initmem_init(0, max_pfn); | 797 | initmem_init(0, max_pfn); |
| 793 | 798 | ||
| 794 | #ifdef CONFIG_X86_64 | ||
| 795 | dma32_reserve_bootmem(); | ||
| 796 | #endif | ||
| 797 | |||
| 798 | #ifdef CONFIG_ACPI_SLEEP | 799 | #ifdef CONFIG_ACPI_SLEEP |
| 799 | /* | 800 | /* |
| 800 | * Reserve low memory region for sleep support. | 801 | * Reserve low memory region for sleep support. |
| @@ -809,20 +810,21 @@ void __init setup_arch(char **cmdline_p) | |||
| 809 | #endif | 810 | #endif |
| 810 | reserve_crashkernel(); | 811 | reserve_crashkernel(); |
| 811 | 812 | ||
| 813 | #ifdef CONFIG_X86_64 | ||
| 814 | /* | ||
| 815 | * dma32_reserve_bootmem() allocates bootmem which may conflict | ||
| 816 | * with the crashkernel command line, so do that after | ||
| 817 | * reserve_crashkernel() | ||
| 818 | */ | ||
| 819 | dma32_reserve_bootmem(); | ||
| 820 | #endif | ||
| 821 | |||
| 812 | reserve_ibft_region(); | 822 | reserve_ibft_region(); |
| 813 | 823 | ||
| 814 | #ifdef CONFIG_KVM_CLOCK | 824 | #ifdef CONFIG_KVM_CLOCK |
| 815 | kvmclock_init(); | 825 | kvmclock_init(); |
| 816 | #endif | 826 | #endif |
| 817 | 827 | ||
| 818 | #if defined(CONFIG_VMI) && defined(CONFIG_X86_32) | ||
| 819 | /* | ||
| 820 | * Must be after max_low_pfn is determined, and before kernel | ||
| 821 | * pagetables are setup. | ||
| 822 | */ | ||
| 823 | vmi_init(); | ||
| 824 | #endif | ||
| 825 | |||
| 826 | paravirt_pagetable_setup_start(swapper_pg_dir); | 828 | paravirt_pagetable_setup_start(swapper_pg_dir); |
| 827 | paging_init(); | 829 | paging_init(); |
| 828 | paravirt_pagetable_setup_done(swapper_pg_dir); | 830 | paravirt_pagetable_setup_done(swapper_pg_dir); |
| @@ -859,12 +861,6 @@ void __init setup_arch(char **cmdline_p) | |||
| 859 | init_apic_mappings(); | 861 | init_apic_mappings(); |
| 860 | ioapic_init_mappings(); | 862 | ioapic_init_mappings(); |
| 861 | 863 | ||
| 862 | #if defined(CONFIG_SMP) && defined(CONFIG_X86_PC) && defined(CONFIG_X86_32) | ||
| 863 | if (def_to_bigsmp) | ||
| 864 | printk(KERN_WARNING "More than 8 CPUs detected and " | ||
| 865 | "CONFIG_X86_PC cannot handle it.\nUse " | ||
| 866 | "CONFIG_X86_GENERICARCH or CONFIG_X86_BIGSMP.\n"); | ||
| 867 | #endif | ||
| 868 | kvm_guest_init(); | 864 | kvm_guest_init(); |
| 869 | 865 | ||
| 870 | e820_reserve_resources(); | 866 | e820_reserve_resources(); |
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index f7745f94c006..76e305e064f9 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c | |||
| @@ -80,24 +80,6 @@ static void __init setup_per_cpu_maps(void) | |||
| 80 | #endif | 80 | #endif |
| 81 | } | 81 | } |
| 82 | 82 | ||
| 83 | #ifdef CONFIG_HAVE_CPUMASK_OF_CPU_MAP | ||
| 84 | cpumask_t *cpumask_of_cpu_map __read_mostly; | ||
| 85 | EXPORT_SYMBOL(cpumask_of_cpu_map); | ||
| 86 | |||
| 87 | /* requires nr_cpu_ids to be initialized */ | ||
| 88 | static void __init setup_cpumask_of_cpu(void) | ||
| 89 | { | ||
| 90 | int i; | ||
| 91 | |||
| 92 | /* alloc_bootmem zeroes memory */ | ||
| 93 | cpumask_of_cpu_map = alloc_bootmem_low(sizeof(cpumask_t) * nr_cpu_ids); | ||
| 94 | for (i = 0; i < nr_cpu_ids; i++) | ||
| 95 | cpu_set(i, cpumask_of_cpu_map[i]); | ||
| 96 | } | ||
| 97 | #else | ||
| 98 | static inline void setup_cpumask_of_cpu(void) { } | ||
| 99 | #endif | ||
| 100 | |||
| 101 | #ifdef CONFIG_X86_32 | 83 | #ifdef CONFIG_X86_32 |
| 102 | /* | 84 | /* |
| 103 | * Great future not-so-futuristic plan: make i386 and x86_64 do it | 85 | * Great future not-so-futuristic plan: make i386 and x86_64 do it |
| @@ -197,9 +179,6 @@ void __init setup_per_cpu_areas(void) | |||
| 197 | 179 | ||
| 198 | /* Setup node to cpumask map */ | 180 | /* Setup node to cpumask map */ |
| 199 | setup_node_to_cpumask_map(); | 181 | setup_node_to_cpumask_map(); |
| 200 | |||
| 201 | /* Setup cpumask_of_cpu map */ | ||
| 202 | setup_cpumask_of_cpu(); | ||
| 203 | } | 182 | } |
| 204 | 183 | ||
| 205 | #endif | 184 | #endif |
diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c index 07faaa5109cb..6fb5bcdd8933 100644 --- a/arch/x86/kernel/signal_32.c +++ b/arch/x86/kernel/signal_32.c | |||
| @@ -661,8 +661,5 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) | |||
| 661 | if (thread_info_flags & _TIF_SIGPENDING) | 661 | if (thread_info_flags & _TIF_SIGPENDING) |
| 662 | do_signal(regs); | 662 | do_signal(regs); |
| 663 | 663 | ||
| 664 | if (thread_info_flags & _TIF_HRTICK_RESCHED) | ||
| 665 | hrtick_resched(); | ||
| 666 | |||
| 667 | clear_thread_flag(TIF_IRET); | 664 | clear_thread_flag(TIF_IRET); |
| 668 | } | 665 | } |
diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c index bf87684474f1..b45ef8ddd651 100644 --- a/arch/x86/kernel/signal_64.c +++ b/arch/x86/kernel/signal_64.c | |||
| @@ -53,6 +53,59 @@ sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss, | |||
| 53 | return do_sigaltstack(uss, uoss, regs->sp); | 53 | return do_sigaltstack(uss, uoss, regs->sp); |
| 54 | } | 54 | } |
| 55 | 55 | ||
| 56 | /* | ||
| 57 | * Signal frame handlers. | ||
| 58 | */ | ||
| 59 | |||
| 60 | static inline int save_i387(struct _fpstate __user *buf) | ||
| 61 | { | ||
| 62 | struct task_struct *tsk = current; | ||
| 63 | int err = 0; | ||
| 64 | |||
| 65 | BUILD_BUG_ON(sizeof(struct user_i387_struct) != | ||
| 66 | sizeof(tsk->thread.xstate->fxsave)); | ||
| 67 | |||
| 68 | if ((unsigned long)buf % 16) | ||
| 69 | printk("save_i387: bad fpstate %p\n", buf); | ||
| 70 | |||
| 71 | if (!used_math()) | ||
| 72 | return 0; | ||
| 73 | clear_used_math(); /* trigger finit */ | ||
| 74 | if (task_thread_info(tsk)->status & TS_USEDFPU) { | ||
| 75 | err = save_i387_checking((struct i387_fxsave_struct __user *) | ||
| 76 | buf); | ||
| 77 | if (err) | ||
| 78 | return err; | ||
| 79 | task_thread_info(tsk)->status &= ~TS_USEDFPU; | ||
| 80 | stts(); | ||
| 81 | } else { | ||
| 82 | if (__copy_to_user(buf, &tsk->thread.xstate->fxsave, | ||
| 83 | sizeof(struct i387_fxsave_struct))) | ||
| 84 | return -1; | ||
| 85 | } | ||
| 86 | return 1; | ||
| 87 | } | ||
| 88 | |||
| 89 | /* | ||
| 90 | * This restores directly out of user space. Exceptions are handled. | ||
| 91 | */ | ||
| 92 | static inline int restore_i387(struct _fpstate __user *buf) | ||
| 93 | { | ||
| 94 | struct task_struct *tsk = current; | ||
| 95 | int err; | ||
| 96 | |||
| 97 | if (!used_math()) { | ||
| 98 | err = init_fpu(tsk); | ||
| 99 | if (err) | ||
| 100 | return err; | ||
| 101 | } | ||
| 102 | |||
| 103 | if (!(task_thread_info(current)->status & TS_USEDFPU)) { | ||
| 104 | clts(); | ||
| 105 | task_thread_info(current)->status |= TS_USEDFPU; | ||
| 106 | } | ||
| 107 | return restore_fpu_checking((__force struct i387_fxsave_struct *)buf); | ||
| 108 | } | ||
| 56 | 109 | ||
| 57 | /* | 110 | /* |
| 58 | * Do a signal return; undo the signal stack. | 111 | * Do a signal return; undo the signal stack. |
| @@ -496,9 +549,6 @@ void do_notify_resume(struct pt_regs *regs, void *unused, | |||
| 496 | /* deal with pending signal delivery */ | 549 | /* deal with pending signal delivery */ |
| 497 | if (thread_info_flags & _TIF_SIGPENDING) | 550 | if (thread_info_flags & _TIF_SIGPENDING) |
| 498 | do_signal(regs); | 551 | do_signal(regs); |
| 499 | |||
| 500 | if (thread_info_flags & _TIF_HRTICK_RESCHED) | ||
| 501 | hrtick_resched(); | ||
| 502 | } | 552 | } |
| 503 | 553 | ||
| 504 | void signal_fault(struct pt_regs *regs, void __user *frame, char *where) | 554 | void signal_fault(struct pt_regs *regs, void __user *frame, char *where) |
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 4b53a647bc0a..91055d7fc1b0 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c | |||
| @@ -326,12 +326,16 @@ static void __cpuinit start_secondary(void *unused) | |||
| 326 | * for which cpus receive the IPI. Holding this | 326 | * for which cpus receive the IPI. Holding this |
| 327 | * lock helps us to not include this cpu in a currently in progress | 327 | * lock helps us to not include this cpu in a currently in progress |
| 328 | * smp_call_function(). | 328 | * smp_call_function(). |
| 329 | * | ||
| 330 | * We need to hold vector_lock so there the set of online cpus | ||
| 331 | * does not change while we are assigning vectors to cpus. Holding | ||
| 332 | * this lock ensures we don't half assign or remove an irq from a cpu. | ||
| 329 | */ | 333 | */ |
| 330 | ipi_call_lock_irq(); | 334 | ipi_call_lock_irq(); |
| 331 | #ifdef CONFIG_X86_IO_APIC | 335 | lock_vector_lock(); |
| 332 | setup_vector_irq(smp_processor_id()); | 336 | __setup_vector_irq(smp_processor_id()); |
| 333 | #endif | ||
| 334 | cpu_set(smp_processor_id(), cpu_online_map); | 337 | cpu_set(smp_processor_id(), cpu_online_map); |
| 338 | unlock_vector_lock(); | ||
| 335 | ipi_call_unlock_irq(); | 339 | ipi_call_unlock_irq(); |
| 336 | per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE; | 340 | per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE; |
| 337 | 341 | ||
| @@ -438,7 +442,7 @@ void __cpuinit set_cpu_sibling_map(int cpu) | |||
| 438 | cpu_set(cpu, cpu_sibling_setup_map); | 442 | cpu_set(cpu, cpu_sibling_setup_map); |
| 439 | 443 | ||
| 440 | if (smp_num_siblings > 1) { | 444 | if (smp_num_siblings > 1) { |
| 441 | for_each_cpu_mask(i, cpu_sibling_setup_map) { | 445 | for_each_cpu_mask_nr(i, cpu_sibling_setup_map) { |
| 442 | if (c->phys_proc_id == cpu_data(i).phys_proc_id && | 446 | if (c->phys_proc_id == cpu_data(i).phys_proc_id && |
| 443 | c->cpu_core_id == cpu_data(i).cpu_core_id) { | 447 | c->cpu_core_id == cpu_data(i).cpu_core_id) { |
| 444 | cpu_set(i, per_cpu(cpu_sibling_map, cpu)); | 448 | cpu_set(i, per_cpu(cpu_sibling_map, cpu)); |
| @@ -461,7 +465,7 @@ void __cpuinit set_cpu_sibling_map(int cpu) | |||
| 461 | return; | 465 | return; |
| 462 | } | 466 | } |
| 463 | 467 | ||
| 464 | for_each_cpu_mask(i, cpu_sibling_setup_map) { | 468 | for_each_cpu_mask_nr(i, cpu_sibling_setup_map) { |
| 465 | if (per_cpu(cpu_llc_id, cpu) != BAD_APICID && | 469 | if (per_cpu(cpu_llc_id, cpu) != BAD_APICID && |
| 466 | per_cpu(cpu_llc_id, cpu) == per_cpu(cpu_llc_id, i)) { | 470 | per_cpu(cpu_llc_id, cpu) == per_cpu(cpu_llc_id, i)) { |
| 467 | cpu_set(i, c->llc_shared_map); | 471 | cpu_set(i, c->llc_shared_map); |
| @@ -990,7 +994,17 @@ int __cpuinit native_cpu_up(unsigned int cpu) | |||
| 990 | flush_tlb_all(); | 994 | flush_tlb_all(); |
| 991 | low_mappings = 1; | 995 | low_mappings = 1; |
| 992 | 996 | ||
| 997 | #ifdef CONFIG_X86_PC | ||
| 998 | if (def_to_bigsmp && apicid > 8) { | ||
| 999 | printk(KERN_WARNING | ||
| 1000 | "More than 8 CPUs detected - skipping them.\n" | ||
| 1001 | "Use CONFIG_X86_GENERICARCH and CONFIG_X86_BIGSMP.\n"); | ||
| 1002 | err = -1; | ||
| 1003 | } else | ||
| 1004 | err = do_boot_cpu(apicid, cpu); | ||
| 1005 | #else | ||
| 993 | err = do_boot_cpu(apicid, cpu); | 1006 | err = do_boot_cpu(apicid, cpu); |
| 1007 | #endif | ||
| 994 | 1008 | ||
| 995 | zap_low_mappings(); | 1009 | zap_low_mappings(); |
| 996 | low_mappings = 0; | 1010 | low_mappings = 0; |
| @@ -1219,7 +1233,7 @@ static void remove_siblinginfo(int cpu) | |||
| 1219 | int sibling; | 1233 | int sibling; |
| 1220 | struct cpuinfo_x86 *c = &cpu_data(cpu); | 1234 | struct cpuinfo_x86 *c = &cpu_data(cpu); |
| 1221 | 1235 | ||
| 1222 | for_each_cpu_mask(sibling, per_cpu(cpu_core_map, cpu)) { | 1236 | for_each_cpu_mask_nr(sibling, per_cpu(cpu_core_map, cpu)) { |
| 1223 | cpu_clear(cpu, per_cpu(cpu_core_map, sibling)); | 1237 | cpu_clear(cpu, per_cpu(cpu_core_map, sibling)); |
| 1224 | /*/ | 1238 | /*/ |
| 1225 | * last thread sibling in this cpu core going down | 1239 | * last thread sibling in this cpu core going down |
| @@ -1228,7 +1242,7 @@ static void remove_siblinginfo(int cpu) | |||
| 1228 | cpu_data(sibling).booted_cores--; | 1242 | cpu_data(sibling).booted_cores--; |
| 1229 | } | 1243 | } |
| 1230 | 1244 | ||
| 1231 | for_each_cpu_mask(sibling, per_cpu(cpu_sibling_map, cpu)) | 1245 | for_each_cpu_mask_nr(sibling, per_cpu(cpu_sibling_map, cpu)) |
| 1232 | cpu_clear(cpu, per_cpu(cpu_sibling_map, sibling)); | 1246 | cpu_clear(cpu, per_cpu(cpu_sibling_map, sibling)); |
| 1233 | cpus_clear(per_cpu(cpu_sibling_map, cpu)); | 1247 | cpus_clear(per_cpu(cpu_sibling_map, cpu)); |
| 1234 | cpus_clear(per_cpu(cpu_core_map, cpu)); | 1248 | cpus_clear(per_cpu(cpu_core_map, cpu)); |
| @@ -1336,7 +1350,9 @@ int __cpu_disable(void) | |||
| 1336 | remove_siblinginfo(cpu); | 1350 | remove_siblinginfo(cpu); |
| 1337 | 1351 | ||
| 1338 | /* It's now safe to remove this processor from the online map */ | 1352 | /* It's now safe to remove this processor from the online map */ |
| 1353 | lock_vector_lock(); | ||
| 1339 | remove_cpu_from_maps(cpu); | 1354 | remove_cpu_from_maps(cpu); |
| 1355 | unlock_vector_lock(); | ||
| 1340 | fixup_irqs(cpu_online_map); | 1356 | fixup_irqs(cpu_online_map); |
| 1341 | return 0; | 1357 | return 0; |
| 1342 | } | 1358 | } |
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S index adff5562f5fd..d44395ff34c3 100644 --- a/arch/x86/kernel/syscall_table_32.S +++ b/arch/x86/kernel/syscall_table_32.S | |||
| @@ -326,3 +326,9 @@ ENTRY(sys_call_table) | |||
| 326 | .long sys_fallocate | 326 | .long sys_fallocate |
| 327 | .long sys_timerfd_settime /* 325 */ | 327 | .long sys_timerfd_settime /* 325 */ |
| 328 | .long sys_timerfd_gettime | 328 | .long sys_timerfd_gettime |
| 329 | .long sys_signalfd4 | ||
| 330 | .long sys_eventfd2 | ||
| 331 | .long sys_epoll_create1 | ||
| 332 | .long sys_dup3 /* 330 */ | ||
| 333 | .long sys_pipe2 | ||
| 334 | .long sys_inotify_init1 | ||
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c index 0a1b1a9d922d..6ca515d6db54 100644 --- a/arch/x86/kernel/vmi_32.c +++ b/arch/x86/kernel/vmi_32.c | |||
| @@ -37,6 +37,7 @@ | |||
| 37 | #include <asm/timer.h> | 37 | #include <asm/timer.h> |
| 38 | #include <asm/vmi_time.h> | 38 | #include <asm/vmi_time.h> |
| 39 | #include <asm/kmap_types.h> | 39 | #include <asm/kmap_types.h> |
| 40 | #include <asm/setup.h> | ||
| 40 | 41 | ||
| 41 | /* Convenient for calling VMI functions indirectly in the ROM */ | 42 | /* Convenient for calling VMI functions indirectly in the ROM */ |
| 42 | typedef u32 __attribute__((regparm(1))) (VROMFUNC)(void); | 43 | typedef u32 __attribute__((regparm(1))) (VROMFUNC)(void); |
| @@ -683,7 +684,7 @@ void vmi_bringup(void) | |||
| 683 | { | 684 | { |
| 684 | /* We must establish the lowmem mapping for MMU ops to work */ | 685 | /* We must establish the lowmem mapping for MMU ops to work */ |
| 685 | if (vmi_ops.set_linear_mapping) | 686 | if (vmi_ops.set_linear_mapping) |
| 686 | vmi_ops.set_linear_mapping(0, (void *)__PAGE_OFFSET, max_low_pfn, 0); | 687 | vmi_ops.set_linear_mapping(0, (void *)__PAGE_OFFSET, MAXMEM_PFN, 0); |
| 687 | } | 688 | } |
| 688 | 689 | ||
| 689 | /* | 690 | /* |
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index 8d45fabc5f3b..ce3251ce5504 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig | |||
| @@ -21,6 +21,7 @@ config KVM | |||
| 21 | tristate "Kernel-based Virtual Machine (KVM) support" | 21 | tristate "Kernel-based Virtual Machine (KVM) support" |
| 22 | depends on HAVE_KVM | 22 | depends on HAVE_KVM |
| 23 | select PREEMPT_NOTIFIERS | 23 | select PREEMPT_NOTIFIERS |
| 24 | select MMU_NOTIFIER | ||
| 24 | select ANON_INODES | 25 | select ANON_INODES |
| 25 | ---help--- | 26 | ---help--- |
| 26 | Support hosting fully virtualized guest machines using hardware | 27 | Support hosting fully virtualized guest machines using hardware |
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index b0e4ddca6c18..0bfe2bd305eb 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
| @@ -653,6 +653,84 @@ static void rmap_write_protect(struct kvm *kvm, u64 gfn) | |||
| 653 | account_shadowed(kvm, gfn); | 653 | account_shadowed(kvm, gfn); |
| 654 | } | 654 | } |
| 655 | 655 | ||
| 656 | static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp) | ||
| 657 | { | ||
| 658 | u64 *spte; | ||
| 659 | int need_tlb_flush = 0; | ||
| 660 | |||
| 661 | while ((spte = rmap_next(kvm, rmapp, NULL))) { | ||
| 662 | BUG_ON(!(*spte & PT_PRESENT_MASK)); | ||
| 663 | rmap_printk("kvm_rmap_unmap_hva: spte %p %llx\n", spte, *spte); | ||
| 664 | rmap_remove(kvm, spte); | ||
| 665 | set_shadow_pte(spte, shadow_trap_nonpresent_pte); | ||
| 666 | need_tlb_flush = 1; | ||
| 667 | } | ||
| 668 | return need_tlb_flush; | ||
| 669 | } | ||
| 670 | |||
| 671 | static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, | ||
| 672 | int (*handler)(struct kvm *kvm, unsigned long *rmapp)) | ||
| 673 | { | ||
| 674 | int i; | ||
| 675 | int retval = 0; | ||
| 676 | |||
| 677 | /* | ||
| 678 | * If mmap_sem isn't taken, we can look the memslots with only | ||
| 679 | * the mmu_lock by skipping over the slots with userspace_addr == 0. | ||
| 680 | */ | ||
| 681 | for (i = 0; i < kvm->nmemslots; i++) { | ||
| 682 | struct kvm_memory_slot *memslot = &kvm->memslots[i]; | ||
| 683 | unsigned long start = memslot->userspace_addr; | ||
| 684 | unsigned long end; | ||
| 685 | |||
| 686 | /* mmu_lock protects userspace_addr */ | ||
| 687 | if (!start) | ||
| 688 | continue; | ||
| 689 | |||
| 690 | end = start + (memslot->npages << PAGE_SHIFT); | ||
| 691 | if (hva >= start && hva < end) { | ||
| 692 | gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT; | ||
| 693 | retval |= handler(kvm, &memslot->rmap[gfn_offset]); | ||
| 694 | retval |= handler(kvm, | ||
| 695 | &memslot->lpage_info[ | ||
| 696 | gfn_offset / | ||
| 697 | KVM_PAGES_PER_HPAGE].rmap_pde); | ||
| 698 | } | ||
| 699 | } | ||
| 700 | |||
| 701 | return retval; | ||
| 702 | } | ||
| 703 | |||
| 704 | int kvm_unmap_hva(struct kvm *kvm, unsigned long hva) | ||
| 705 | { | ||
| 706 | return kvm_handle_hva(kvm, hva, kvm_unmap_rmapp); | ||
| 707 | } | ||
| 708 | |||
| 709 | static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp) | ||
| 710 | { | ||
| 711 | u64 *spte; | ||
| 712 | int young = 0; | ||
| 713 | |||
| 714 | spte = rmap_next(kvm, rmapp, NULL); | ||
| 715 | while (spte) { | ||
| 716 | int _young; | ||
| 717 | u64 _spte = *spte; | ||
| 718 | BUG_ON(!(_spte & PT_PRESENT_MASK)); | ||
| 719 | _young = _spte & PT_ACCESSED_MASK; | ||
| 720 | if (_young) { | ||
| 721 | young = 1; | ||
| 722 | clear_bit(PT_ACCESSED_SHIFT, (unsigned long *)spte); | ||
| 723 | } | ||
| 724 | spte = rmap_next(kvm, rmapp, spte); | ||
| 725 | } | ||
| 726 | return young; | ||
| 727 | } | ||
| 728 | |||
| 729 | int kvm_age_hva(struct kvm *kvm, unsigned long hva) | ||
| 730 | { | ||
| 731 | return kvm_handle_hva(kvm, hva, kvm_age_rmapp); | ||
| 732 | } | ||
| 733 | |||
| 656 | #ifdef MMU_DEBUG | 734 | #ifdef MMU_DEBUG |
| 657 | static int is_empty_shadow_page(u64 *spt) | 735 | static int is_empty_shadow_page(u64 *spt) |
| 658 | { | 736 | { |
| @@ -1203,6 +1281,7 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn) | |||
| 1203 | int r; | 1281 | int r; |
| 1204 | int largepage = 0; | 1282 | int largepage = 0; |
| 1205 | pfn_t pfn; | 1283 | pfn_t pfn; |
| 1284 | unsigned long mmu_seq; | ||
| 1206 | 1285 | ||
| 1207 | down_read(¤t->mm->mmap_sem); | 1286 | down_read(¤t->mm->mmap_sem); |
| 1208 | if (is_largepage_backed(vcpu, gfn & ~(KVM_PAGES_PER_HPAGE-1))) { | 1287 | if (is_largepage_backed(vcpu, gfn & ~(KVM_PAGES_PER_HPAGE-1))) { |
| @@ -1210,6 +1289,8 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn) | |||
| 1210 | largepage = 1; | 1289 | largepage = 1; |
| 1211 | } | 1290 | } |
| 1212 | 1291 | ||
| 1292 | mmu_seq = vcpu->kvm->mmu_notifier_seq; | ||
| 1293 | /* implicit mb(), we'll read before PT lock is unlocked */ | ||
| 1213 | pfn = gfn_to_pfn(vcpu->kvm, gfn); | 1294 | pfn = gfn_to_pfn(vcpu->kvm, gfn); |
| 1214 | up_read(¤t->mm->mmap_sem); | 1295 | up_read(¤t->mm->mmap_sem); |
| 1215 | 1296 | ||
| @@ -1220,6 +1301,8 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn) | |||
| 1220 | } | 1301 | } |
| 1221 | 1302 | ||
| 1222 | spin_lock(&vcpu->kvm->mmu_lock); | 1303 | spin_lock(&vcpu->kvm->mmu_lock); |
| 1304 | if (mmu_notifier_retry(vcpu, mmu_seq)) | ||
| 1305 | goto out_unlock; | ||
| 1223 | kvm_mmu_free_some_pages(vcpu); | 1306 | kvm_mmu_free_some_pages(vcpu); |
| 1224 | r = __direct_map(vcpu, v, write, largepage, gfn, pfn, | 1307 | r = __direct_map(vcpu, v, write, largepage, gfn, pfn, |
| 1225 | PT32E_ROOT_LEVEL); | 1308 | PT32E_ROOT_LEVEL); |
| @@ -1227,6 +1310,11 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn) | |||
| 1227 | 1310 | ||
| 1228 | 1311 | ||
| 1229 | return r; | 1312 | return r; |
| 1313 | |||
| 1314 | out_unlock: | ||
| 1315 | spin_unlock(&vcpu->kvm->mmu_lock); | ||
| 1316 | kvm_release_pfn_clean(pfn); | ||
| 1317 | return 0; | ||
| 1230 | } | 1318 | } |
| 1231 | 1319 | ||
| 1232 | 1320 | ||
| @@ -1345,6 +1433,7 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, | |||
| 1345 | int r; | 1433 | int r; |
| 1346 | int largepage = 0; | 1434 | int largepage = 0; |
| 1347 | gfn_t gfn = gpa >> PAGE_SHIFT; | 1435 | gfn_t gfn = gpa >> PAGE_SHIFT; |
| 1436 | unsigned long mmu_seq; | ||
| 1348 | 1437 | ||
| 1349 | ASSERT(vcpu); | 1438 | ASSERT(vcpu); |
| 1350 | ASSERT(VALID_PAGE(vcpu->arch.mmu.root_hpa)); | 1439 | ASSERT(VALID_PAGE(vcpu->arch.mmu.root_hpa)); |
| @@ -1358,6 +1447,8 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, | |||
| 1358 | gfn &= ~(KVM_PAGES_PER_HPAGE-1); | 1447 | gfn &= ~(KVM_PAGES_PER_HPAGE-1); |
| 1359 | largepage = 1; | 1448 | largepage = 1; |
| 1360 | } | 1449 | } |
| 1450 | mmu_seq = vcpu->kvm->mmu_notifier_seq; | ||
| 1451 | /* implicit mb(), we'll read before PT lock is unlocked */ | ||
| 1361 | pfn = gfn_to_pfn(vcpu->kvm, gfn); | 1452 | pfn = gfn_to_pfn(vcpu->kvm, gfn); |
| 1362 | up_read(¤t->mm->mmap_sem); | 1453 | up_read(¤t->mm->mmap_sem); |
| 1363 | if (is_error_pfn(pfn)) { | 1454 | if (is_error_pfn(pfn)) { |
| @@ -1365,12 +1456,19 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, | |||
| 1365 | return 1; | 1456 | return 1; |
| 1366 | } | 1457 | } |
| 1367 | spin_lock(&vcpu->kvm->mmu_lock); | 1458 | spin_lock(&vcpu->kvm->mmu_lock); |
| 1459 | if (mmu_notifier_retry(vcpu, mmu_seq)) | ||
| 1460 | goto out_unlock; | ||
| 1368 | kvm_mmu_free_some_pages(vcpu); | 1461 | kvm_mmu_free_some_pages(vcpu); |
| 1369 | r = __direct_map(vcpu, gpa, error_code & PFERR_WRITE_MASK, | 1462 | r = __direct_map(vcpu, gpa, error_code & PFERR_WRITE_MASK, |
| 1370 | largepage, gfn, pfn, kvm_x86_ops->get_tdp_level()); | 1463 | largepage, gfn, pfn, kvm_x86_ops->get_tdp_level()); |
| 1371 | spin_unlock(&vcpu->kvm->mmu_lock); | 1464 | spin_unlock(&vcpu->kvm->mmu_lock); |
| 1372 | 1465 | ||
| 1373 | return r; | 1466 | return r; |
| 1467 | |||
| 1468 | out_unlock: | ||
| 1469 | spin_unlock(&vcpu->kvm->mmu_lock); | ||
| 1470 | kvm_release_pfn_clean(pfn); | ||
| 1471 | return 0; | ||
| 1374 | } | 1472 | } |
| 1375 | 1473 | ||
| 1376 | static void nonpaging_free(struct kvm_vcpu *vcpu) | 1474 | static void nonpaging_free(struct kvm_vcpu *vcpu) |
| @@ -1670,6 +1768,8 @@ static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
| 1670 | gfn &= ~(KVM_PAGES_PER_HPAGE-1); | 1768 | gfn &= ~(KVM_PAGES_PER_HPAGE-1); |
| 1671 | vcpu->arch.update_pte.largepage = 1; | 1769 | vcpu->arch.update_pte.largepage = 1; |
| 1672 | } | 1770 | } |
| 1771 | vcpu->arch.update_pte.mmu_seq = vcpu->kvm->mmu_notifier_seq; | ||
| 1772 | /* implicit mb(), we'll read before PT lock is unlocked */ | ||
| 1673 | pfn = gfn_to_pfn(vcpu->kvm, gfn); | 1773 | pfn = gfn_to_pfn(vcpu->kvm, gfn); |
| 1674 | up_read(¤t->mm->mmap_sem); | 1774 | up_read(¤t->mm->mmap_sem); |
| 1675 | 1775 | ||
| @@ -1814,6 +1914,7 @@ int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva) | |||
| 1814 | spin_unlock(&vcpu->kvm->mmu_lock); | 1914 | spin_unlock(&vcpu->kvm->mmu_lock); |
| 1815 | return r; | 1915 | return r; |
| 1816 | } | 1916 | } |
| 1917 | EXPORT_SYMBOL_GPL(kvm_mmu_unprotect_page_virt); | ||
| 1817 | 1918 | ||
| 1818 | void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu) | 1919 | void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu) |
| 1819 | { | 1920 | { |
| @@ -1870,6 +1971,12 @@ void kvm_enable_tdp(void) | |||
| 1870 | } | 1971 | } |
| 1871 | EXPORT_SYMBOL_GPL(kvm_enable_tdp); | 1972 | EXPORT_SYMBOL_GPL(kvm_enable_tdp); |
| 1872 | 1973 | ||
| 1974 | void kvm_disable_tdp(void) | ||
| 1975 | { | ||
| 1976 | tdp_enabled = false; | ||
| 1977 | } | ||
| 1978 | EXPORT_SYMBOL_GPL(kvm_disable_tdp); | ||
| 1979 | |||
| 1873 | static void free_mmu_pages(struct kvm_vcpu *vcpu) | 1980 | static void free_mmu_pages(struct kvm_vcpu *vcpu) |
| 1874 | { | 1981 | { |
| 1875 | struct kvm_mmu_page *sp; | 1982 | struct kvm_mmu_page *sp; |
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 4d918220baeb..f72ac1fa35f0 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h | |||
| @@ -263,6 +263,8 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page, | |||
| 263 | pfn = vcpu->arch.update_pte.pfn; | 263 | pfn = vcpu->arch.update_pte.pfn; |
| 264 | if (is_error_pfn(pfn)) | 264 | if (is_error_pfn(pfn)) |
| 265 | return; | 265 | return; |
| 266 | if (mmu_notifier_retry(vcpu, vcpu->arch.update_pte.mmu_seq)) | ||
| 267 | return; | ||
| 266 | kvm_get_pfn(pfn); | 268 | kvm_get_pfn(pfn); |
| 267 | mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0, | 269 | mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0, |
| 268 | gpte & PT_DIRTY_MASK, NULL, largepage, gpte_to_gfn(gpte), | 270 | gpte & PT_DIRTY_MASK, NULL, largepage, gpte_to_gfn(gpte), |
| @@ -380,6 +382,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, | |||
| 380 | int r; | 382 | int r; |
| 381 | pfn_t pfn; | 383 | pfn_t pfn; |
| 382 | int largepage = 0; | 384 | int largepage = 0; |
| 385 | unsigned long mmu_seq; | ||
| 383 | 386 | ||
| 384 | pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code); | 387 | pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code); |
| 385 | kvm_mmu_audit(vcpu, "pre page fault"); | 388 | kvm_mmu_audit(vcpu, "pre page fault"); |
| @@ -413,6 +416,8 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, | |||
| 413 | largepage = 1; | 416 | largepage = 1; |
| 414 | } | 417 | } |
| 415 | } | 418 | } |
| 419 | mmu_seq = vcpu->kvm->mmu_notifier_seq; | ||
| 420 | /* implicit mb(), we'll read before PT lock is unlocked */ | ||
| 416 | pfn = gfn_to_pfn(vcpu->kvm, walker.gfn); | 421 | pfn = gfn_to_pfn(vcpu->kvm, walker.gfn); |
| 417 | up_read(¤t->mm->mmap_sem); | 422 | up_read(¤t->mm->mmap_sem); |
| 418 | 423 | ||
| @@ -424,6 +429,8 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, | |||
| 424 | } | 429 | } |
| 425 | 430 | ||
| 426 | spin_lock(&vcpu->kvm->mmu_lock); | 431 | spin_lock(&vcpu->kvm->mmu_lock); |
| 432 | if (mmu_notifier_retry(vcpu, mmu_seq)) | ||
| 433 | goto out_unlock; | ||
| 427 | kvm_mmu_free_some_pages(vcpu); | 434 | kvm_mmu_free_some_pages(vcpu); |
| 428 | shadow_pte = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault, | 435 | shadow_pte = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault, |
| 429 | largepage, &write_pt, pfn); | 436 | largepage, &write_pt, pfn); |
| @@ -439,6 +446,11 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, | |||
| 439 | spin_unlock(&vcpu->kvm->mmu_lock); | 446 | spin_unlock(&vcpu->kvm->mmu_lock); |
| 440 | 447 | ||
| 441 | return write_pt; | 448 | return write_pt; |
| 449 | |||
| 450 | out_unlock: | ||
| 451 | spin_unlock(&vcpu->kvm->mmu_lock); | ||
| 452 | kvm_release_pfn_clean(pfn); | ||
| 453 | return 0; | ||
| 442 | } | 454 | } |
| 443 | 455 | ||
| 444 | static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr) | 456 | static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr) |
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index b756e876dce3..e2ee264740c7 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c | |||
| @@ -453,7 +453,8 @@ static __init int svm_hardware_setup(void) | |||
| 453 | if (npt_enabled) { | 453 | if (npt_enabled) { |
| 454 | printk(KERN_INFO "kvm: Nested Paging enabled\n"); | 454 | printk(KERN_INFO "kvm: Nested Paging enabled\n"); |
| 455 | kvm_enable_tdp(); | 455 | kvm_enable_tdp(); |
| 456 | } | 456 | } else |
| 457 | kvm_disable_tdp(); | ||
| 457 | 458 | ||
| 458 | return 0; | 459 | return 0; |
| 459 | 460 | ||
| @@ -1007,10 +1008,13 @@ static int pf_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | |||
| 1007 | struct kvm *kvm = svm->vcpu.kvm; | 1008 | struct kvm *kvm = svm->vcpu.kvm; |
| 1008 | u64 fault_address; | 1009 | u64 fault_address; |
| 1009 | u32 error_code; | 1010 | u32 error_code; |
| 1011 | bool event_injection = false; | ||
| 1010 | 1012 | ||
| 1011 | if (!irqchip_in_kernel(kvm) && | 1013 | if (!irqchip_in_kernel(kvm) && |
| 1012 | is_external_interrupt(exit_int_info)) | 1014 | is_external_interrupt(exit_int_info)) { |
| 1015 | event_injection = true; | ||
| 1013 | push_irq(&svm->vcpu, exit_int_info & SVM_EVTINJ_VEC_MASK); | 1016 | push_irq(&svm->vcpu, exit_int_info & SVM_EVTINJ_VEC_MASK); |
| 1017 | } | ||
| 1014 | 1018 | ||
| 1015 | fault_address = svm->vmcb->control.exit_info_2; | 1019 | fault_address = svm->vmcb->control.exit_info_2; |
| 1016 | error_code = svm->vmcb->control.exit_info_1; | 1020 | error_code = svm->vmcb->control.exit_info_1; |
| @@ -1024,6 +1028,8 @@ static int pf_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | |||
| 1024 | (u32)fault_address, (u32)(fault_address >> 32), | 1028 | (u32)fault_address, (u32)(fault_address >> 32), |
| 1025 | handler); | 1029 | handler); |
| 1026 | 1030 | ||
| 1031 | if (event_injection) | ||
| 1032 | kvm_mmu_unprotect_page_virt(&svm->vcpu, fault_address); | ||
| 1027 | return kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code); | 1033 | return kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code); |
| 1028 | } | 1034 | } |
| 1029 | 1035 | ||
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 0cac63701719..2a69773e3b26 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c | |||
| @@ -2298,6 +2298,8 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
| 2298 | cr2 = vmcs_readl(EXIT_QUALIFICATION); | 2298 | cr2 = vmcs_readl(EXIT_QUALIFICATION); |
| 2299 | KVMTRACE_3D(PAGE_FAULT, vcpu, error_code, (u32)cr2, | 2299 | KVMTRACE_3D(PAGE_FAULT, vcpu, error_code, (u32)cr2, |
| 2300 | (u32)((u64)cr2 >> 32), handler); | 2300 | (u32)((u64)cr2 >> 32), handler); |
| 2301 | if (vect_info & VECTORING_INFO_VALID_MASK) | ||
| 2302 | kvm_mmu_unprotect_page_virt(vcpu, cr2); | ||
| 2301 | return kvm_mmu_page_fault(vcpu, cr2, error_code); | 2303 | return kvm_mmu_page_fault(vcpu, cr2, error_code); |
| 2302 | } | 2304 | } |
| 2303 | 2305 | ||
| @@ -3116,15 +3118,6 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) | |||
| 3116 | return ERR_PTR(-ENOMEM); | 3118 | return ERR_PTR(-ENOMEM); |
| 3117 | 3119 | ||
| 3118 | allocate_vpid(vmx); | 3120 | allocate_vpid(vmx); |
| 3119 | if (id == 0 && vm_need_ept()) { | ||
| 3120 | kvm_mmu_set_base_ptes(VMX_EPT_READABLE_MASK | | ||
| 3121 | VMX_EPT_WRITABLE_MASK | | ||
| 3122 | VMX_EPT_DEFAULT_MT << VMX_EPT_MT_EPTE_SHIFT); | ||
| 3123 | kvm_mmu_set_mask_ptes(0ull, VMX_EPT_FAKE_ACCESSED_MASK, | ||
| 3124 | VMX_EPT_FAKE_DIRTY_MASK, 0ull, | ||
| 3125 | VMX_EPT_EXECUTABLE_MASK); | ||
| 3126 | kvm_enable_tdp(); | ||
| 3127 | } | ||
| 3128 | 3121 | ||
| 3129 | err = kvm_vcpu_init(&vmx->vcpu, kvm, id); | 3122 | err = kvm_vcpu_init(&vmx->vcpu, kvm, id); |
| 3130 | if (err) | 3123 | if (err) |
| @@ -3303,8 +3296,17 @@ static int __init vmx_init(void) | |||
| 3303 | vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_IA32_SYSENTER_ESP); | 3296 | vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_IA32_SYSENTER_ESP); |
| 3304 | vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_IA32_SYSENTER_EIP); | 3297 | vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_IA32_SYSENTER_EIP); |
| 3305 | 3298 | ||
| 3306 | if (cpu_has_vmx_ept()) | 3299 | if (vm_need_ept()) { |
| 3307 | bypass_guest_pf = 0; | 3300 | bypass_guest_pf = 0; |
| 3301 | kvm_mmu_set_base_ptes(VMX_EPT_READABLE_MASK | | ||
| 3302 | VMX_EPT_WRITABLE_MASK | | ||
| 3303 | VMX_EPT_DEFAULT_MT << VMX_EPT_MT_EPTE_SHIFT); | ||
| 3304 | kvm_mmu_set_mask_ptes(0ull, VMX_EPT_FAKE_ACCESSED_MASK, | ||
| 3305 | VMX_EPT_FAKE_DIRTY_MASK, 0ull, | ||
| 3306 | VMX_EPT_EXECUTABLE_MASK); | ||
| 3307 | kvm_enable_tdp(); | ||
| 3308 | } else | ||
| 3309 | kvm_disable_tdp(); | ||
| 3308 | 3310 | ||
| 3309 | if (bypass_guest_pf) | 3311 | if (bypass_guest_pf) |
| 3310 | kvm_mmu_set_nonpresent_ptes(~0xffeull, 0ull); | 3312 | kvm_mmu_set_nonpresent_ptes(~0xffeull, 0ull); |
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 9f1cdb011cff..0d682fc6aeb3 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
| @@ -883,6 +883,7 @@ int kvm_dev_ioctl_check_extension(long ext) | |||
| 883 | case KVM_CAP_PIT: | 883 | case KVM_CAP_PIT: |
| 884 | case KVM_CAP_NOP_IO_DELAY: | 884 | case KVM_CAP_NOP_IO_DELAY: |
| 885 | case KVM_CAP_MP_STATE: | 885 | case KVM_CAP_MP_STATE: |
| 886 | case KVM_CAP_SYNC_MMU: | ||
| 886 | r = 1; | 887 | r = 1; |
| 887 | break; | 888 | break; |
| 888 | case KVM_CAP_COALESCED_MMIO: | 889 | case KVM_CAP_COALESCED_MMIO: |
| @@ -1495,6 +1496,7 @@ static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm, | |||
| 1495 | goto out; | 1496 | goto out; |
| 1496 | 1497 | ||
| 1497 | down_write(&kvm->slots_lock); | 1498 | down_write(&kvm->slots_lock); |
| 1499 | spin_lock(&kvm->mmu_lock); | ||
| 1498 | 1500 | ||
| 1499 | p = &kvm->arch.aliases[alias->slot]; | 1501 | p = &kvm->arch.aliases[alias->slot]; |
| 1500 | p->base_gfn = alias->guest_phys_addr >> PAGE_SHIFT; | 1502 | p->base_gfn = alias->guest_phys_addr >> PAGE_SHIFT; |
| @@ -1506,6 +1508,7 @@ static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm, | |||
| 1506 | break; | 1508 | break; |
| 1507 | kvm->arch.naliases = n; | 1509 | kvm->arch.naliases = n; |
| 1508 | 1510 | ||
| 1511 | spin_unlock(&kvm->mmu_lock); | ||
| 1509 | kvm_mmu_zap_all(kvm); | 1512 | kvm_mmu_zap_all(kvm); |
| 1510 | 1513 | ||
| 1511 | up_write(&kvm->slots_lock); | 1514 | up_write(&kvm->slots_lock); |
| @@ -3184,6 +3187,10 @@ static void seg_desct_to_kvm_desct(struct desc_struct *seg_desc, u16 selector, | |||
| 3184 | kvm_desct->base |= seg_desc->base2 << 24; | 3187 | kvm_desct->base |= seg_desc->base2 << 24; |
| 3185 | kvm_desct->limit = seg_desc->limit0; | 3188 | kvm_desct->limit = seg_desc->limit0; |
| 3186 | kvm_desct->limit |= seg_desc->limit << 16; | 3189 | kvm_desct->limit |= seg_desc->limit << 16; |
| 3190 | if (seg_desc->g) { | ||
| 3191 | kvm_desct->limit <<= 12; | ||
| 3192 | kvm_desct->limit |= 0xfff; | ||
| 3193 | } | ||
| 3187 | kvm_desct->selector = selector; | 3194 | kvm_desct->selector = selector; |
| 3188 | kvm_desct->type = seg_desc->type; | 3195 | kvm_desct->type = seg_desc->type; |
| 3189 | kvm_desct->present = seg_desc->p; | 3196 | kvm_desct->present = seg_desc->p; |
| @@ -3223,6 +3230,7 @@ static void get_segment_descritptor_dtable(struct kvm_vcpu *vcpu, | |||
| 3223 | static int load_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, | 3230 | static int load_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, |
| 3224 | struct desc_struct *seg_desc) | 3231 | struct desc_struct *seg_desc) |
| 3225 | { | 3232 | { |
| 3233 | gpa_t gpa; | ||
| 3226 | struct descriptor_table dtable; | 3234 | struct descriptor_table dtable; |
| 3227 | u16 index = selector >> 3; | 3235 | u16 index = selector >> 3; |
| 3228 | 3236 | ||
| @@ -3232,13 +3240,16 @@ static int load_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, | |||
| 3232 | kvm_queue_exception_e(vcpu, GP_VECTOR, selector & 0xfffc); | 3240 | kvm_queue_exception_e(vcpu, GP_VECTOR, selector & 0xfffc); |
| 3233 | return 1; | 3241 | return 1; |
| 3234 | } | 3242 | } |
| 3235 | return kvm_read_guest(vcpu->kvm, dtable.base + index * 8, seg_desc, 8); | 3243 | gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, dtable.base); |
| 3244 | gpa += index * 8; | ||
| 3245 | return kvm_read_guest(vcpu->kvm, gpa, seg_desc, 8); | ||
| 3236 | } | 3246 | } |
| 3237 | 3247 | ||
| 3238 | /* allowed just for 8 bytes segments */ | 3248 | /* allowed just for 8 bytes segments */ |
| 3239 | static int save_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, | 3249 | static int save_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, |
| 3240 | struct desc_struct *seg_desc) | 3250 | struct desc_struct *seg_desc) |
| 3241 | { | 3251 | { |
| 3252 | gpa_t gpa; | ||
| 3242 | struct descriptor_table dtable; | 3253 | struct descriptor_table dtable; |
| 3243 | u16 index = selector >> 3; | 3254 | u16 index = selector >> 3; |
| 3244 | 3255 | ||
| @@ -3246,7 +3257,9 @@ static int save_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, | |||
| 3246 | 3257 | ||
| 3247 | if (dtable.limit < index * 8 + 7) | 3258 | if (dtable.limit < index * 8 + 7) |
| 3248 | return 1; | 3259 | return 1; |
| 3249 | return kvm_write_guest(vcpu->kvm, dtable.base + index * 8, seg_desc, 8); | 3260 | gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, dtable.base); |
| 3261 | gpa += index * 8; | ||
| 3262 | return kvm_write_guest(vcpu->kvm, gpa, seg_desc, 8); | ||
| 3250 | } | 3263 | } |
| 3251 | 3264 | ||
| 3252 | static u32 get_tss_base_addr(struct kvm_vcpu *vcpu, | 3265 | static u32 get_tss_base_addr(struct kvm_vcpu *vcpu, |
| @@ -3258,55 +3271,7 @@ static u32 get_tss_base_addr(struct kvm_vcpu *vcpu, | |||
| 3258 | base_addr |= (seg_desc->base1 << 16); | 3271 | base_addr |= (seg_desc->base1 << 16); |
| 3259 | base_addr |= (seg_desc->base2 << 24); | 3272 | base_addr |= (seg_desc->base2 << 24); |
| 3260 | 3273 | ||
| 3261 | return base_addr; | 3274 | return vcpu->arch.mmu.gva_to_gpa(vcpu, base_addr); |
| 3262 | } | ||
| 3263 | |||
| 3264 | static int load_tss_segment32(struct kvm_vcpu *vcpu, | ||
| 3265 | struct desc_struct *seg_desc, | ||
| 3266 | struct tss_segment_32 *tss) | ||
| 3267 | { | ||
| 3268 | u32 base_addr; | ||
| 3269 | |||
| 3270 | base_addr = get_tss_base_addr(vcpu, seg_desc); | ||
| 3271 | |||
| 3272 | return kvm_read_guest(vcpu->kvm, base_addr, tss, | ||
| 3273 | sizeof(struct tss_segment_32)); | ||
| 3274 | } | ||
| 3275 | |||
| 3276 | static int save_tss_segment32(struct kvm_vcpu *vcpu, | ||
| 3277 | struct desc_struct *seg_desc, | ||
| 3278 | struct tss_segment_32 *tss) | ||
| 3279 | { | ||
| 3280 | u32 base_addr; | ||
| 3281 | |||
| 3282 | base_addr = get_tss_base_addr(vcpu, seg_desc); | ||
| 3283 | |||
| 3284 | return kvm_write_guest(vcpu->kvm, base_addr, tss, | ||
| 3285 | sizeof(struct tss_segment_32)); | ||
| 3286 | } | ||
| 3287 | |||
| 3288 | static int load_tss_segment16(struct kvm_vcpu *vcpu, | ||
| 3289 | struct desc_struct *seg_desc, | ||
| 3290 | struct tss_segment_16 *tss) | ||
| 3291 | { | ||
| 3292 | u32 base_addr; | ||
| 3293 | |||
| 3294 | base_addr = get_tss_base_addr(vcpu, seg_desc); | ||
| 3295 | |||
| 3296 | return kvm_read_guest(vcpu->kvm, base_addr, tss, | ||
| 3297 | sizeof(struct tss_segment_16)); | ||
| 3298 | } | ||
| 3299 | |||
| 3300 | static int save_tss_segment16(struct kvm_vcpu *vcpu, | ||
| 3301 | struct desc_struct *seg_desc, | ||
| 3302 | struct tss_segment_16 *tss) | ||
| 3303 | { | ||
| 3304 | u32 base_addr; | ||
| 3305 | |||
| 3306 | base_addr = get_tss_base_addr(vcpu, seg_desc); | ||
| 3307 | |||
| 3308 | return kvm_write_guest(vcpu->kvm, base_addr, tss, | ||
| 3309 | sizeof(struct tss_segment_16)); | ||
| 3310 | } | 3275 | } |
| 3311 | 3276 | ||
| 3312 | static u16 get_segment_selector(struct kvm_vcpu *vcpu, int seg) | 3277 | static u16 get_segment_selector(struct kvm_vcpu *vcpu, int seg) |
| @@ -3466,20 +3431,26 @@ static int load_state_from_tss16(struct kvm_vcpu *vcpu, | |||
| 3466 | } | 3431 | } |
| 3467 | 3432 | ||
| 3468 | static int kvm_task_switch_16(struct kvm_vcpu *vcpu, u16 tss_selector, | 3433 | static int kvm_task_switch_16(struct kvm_vcpu *vcpu, u16 tss_selector, |
| 3469 | struct desc_struct *cseg_desc, | 3434 | u32 old_tss_base, |
| 3470 | struct desc_struct *nseg_desc) | 3435 | struct desc_struct *nseg_desc) |
| 3471 | { | 3436 | { |
| 3472 | struct tss_segment_16 tss_segment_16; | 3437 | struct tss_segment_16 tss_segment_16; |
| 3473 | int ret = 0; | 3438 | int ret = 0; |
| 3474 | 3439 | ||
| 3475 | if (load_tss_segment16(vcpu, cseg_desc, &tss_segment_16)) | 3440 | if (kvm_read_guest(vcpu->kvm, old_tss_base, &tss_segment_16, |
| 3441 | sizeof tss_segment_16)) | ||
| 3476 | goto out; | 3442 | goto out; |
| 3477 | 3443 | ||
| 3478 | save_state_to_tss16(vcpu, &tss_segment_16); | 3444 | save_state_to_tss16(vcpu, &tss_segment_16); |
| 3479 | save_tss_segment16(vcpu, cseg_desc, &tss_segment_16); | ||
| 3480 | 3445 | ||
| 3481 | if (load_tss_segment16(vcpu, nseg_desc, &tss_segment_16)) | 3446 | if (kvm_write_guest(vcpu->kvm, old_tss_base, &tss_segment_16, |
| 3447 | sizeof tss_segment_16)) | ||
| 3448 | goto out; | ||
| 3449 | |||
| 3450 | if (kvm_read_guest(vcpu->kvm, get_tss_base_addr(vcpu, nseg_desc), | ||
| 3451 | &tss_segment_16, sizeof tss_segment_16)) | ||
| 3482 | goto out; | 3452 | goto out; |
| 3453 | |||
| 3483 | if (load_state_from_tss16(vcpu, &tss_segment_16)) | 3454 | if (load_state_from_tss16(vcpu, &tss_segment_16)) |
| 3484 | goto out; | 3455 | goto out; |
| 3485 | 3456 | ||
| @@ -3489,20 +3460,26 @@ out: | |||
| 3489 | } | 3460 | } |
| 3490 | 3461 | ||
| 3491 | static int kvm_task_switch_32(struct kvm_vcpu *vcpu, u16 tss_selector, | 3462 | static int kvm_task_switch_32(struct kvm_vcpu *vcpu, u16 tss_selector, |
| 3492 | struct desc_struct *cseg_desc, | 3463 | u32 old_tss_base, |
| 3493 | struct desc_struct *nseg_desc) | 3464 | struct desc_struct *nseg_desc) |
| 3494 | { | 3465 | { |
| 3495 | struct tss_segment_32 tss_segment_32; | 3466 | struct tss_segment_32 tss_segment_32; |
| 3496 | int ret = 0; | 3467 | int ret = 0; |
| 3497 | 3468 | ||
| 3498 | if (load_tss_segment32(vcpu, cseg_desc, &tss_segment_32)) | 3469 | if (kvm_read_guest(vcpu->kvm, old_tss_base, &tss_segment_32, |
| 3470 | sizeof tss_segment_32)) | ||
| 3499 | goto out; | 3471 | goto out; |
| 3500 | 3472 | ||
| 3501 | save_state_to_tss32(vcpu, &tss_segment_32); | 3473 | save_state_to_tss32(vcpu, &tss_segment_32); |
| 3502 | save_tss_segment32(vcpu, cseg_desc, &tss_segment_32); | ||
| 3503 | 3474 | ||
| 3504 | if (load_tss_segment32(vcpu, nseg_desc, &tss_segment_32)) | 3475 | if (kvm_write_guest(vcpu->kvm, old_tss_base, &tss_segment_32, |
| 3476 | sizeof tss_segment_32)) | ||
| 3505 | goto out; | 3477 | goto out; |
| 3478 | |||
| 3479 | if (kvm_read_guest(vcpu->kvm, get_tss_base_addr(vcpu, nseg_desc), | ||
| 3480 | &tss_segment_32, sizeof tss_segment_32)) | ||
| 3481 | goto out; | ||
| 3482 | |||
| 3506 | if (load_state_from_tss32(vcpu, &tss_segment_32)) | 3483 | if (load_state_from_tss32(vcpu, &tss_segment_32)) |
| 3507 | goto out; | 3484 | goto out; |
| 3508 | 3485 | ||
| @@ -3517,16 +3494,20 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason) | |||
| 3517 | struct desc_struct cseg_desc; | 3494 | struct desc_struct cseg_desc; |
| 3518 | struct desc_struct nseg_desc; | 3495 | struct desc_struct nseg_desc; |
| 3519 | int ret = 0; | 3496 | int ret = 0; |
| 3497 | u32 old_tss_base = get_segment_base(vcpu, VCPU_SREG_TR); | ||
| 3498 | u16 old_tss_sel = get_segment_selector(vcpu, VCPU_SREG_TR); | ||
| 3520 | 3499 | ||
| 3521 | kvm_get_segment(vcpu, &tr_seg, VCPU_SREG_TR); | 3500 | old_tss_base = vcpu->arch.mmu.gva_to_gpa(vcpu, old_tss_base); |
| 3522 | 3501 | ||
| 3502 | /* FIXME: Handle errors. Failure to read either TSS or their | ||
| 3503 | * descriptors should generate a pagefault. | ||
| 3504 | */ | ||
| 3523 | if (load_guest_segment_descriptor(vcpu, tss_selector, &nseg_desc)) | 3505 | if (load_guest_segment_descriptor(vcpu, tss_selector, &nseg_desc)) |
| 3524 | goto out; | 3506 | goto out; |
| 3525 | 3507 | ||
| 3526 | if (load_guest_segment_descriptor(vcpu, tr_seg.selector, &cseg_desc)) | 3508 | if (load_guest_segment_descriptor(vcpu, old_tss_sel, &cseg_desc)) |
| 3527 | goto out; | 3509 | goto out; |
| 3528 | 3510 | ||
| 3529 | |||
| 3530 | if (reason != TASK_SWITCH_IRET) { | 3511 | if (reason != TASK_SWITCH_IRET) { |
| 3531 | int cpl; | 3512 | int cpl; |
| 3532 | 3513 | ||
| @@ -3544,8 +3525,7 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason) | |||
| 3544 | 3525 | ||
| 3545 | if (reason == TASK_SWITCH_IRET || reason == TASK_SWITCH_JMP) { | 3526 | if (reason == TASK_SWITCH_IRET || reason == TASK_SWITCH_JMP) { |
| 3546 | cseg_desc.type &= ~(1 << 1); //clear the B flag | 3527 | cseg_desc.type &= ~(1 << 1); //clear the B flag |
| 3547 | save_guest_segment_descriptor(vcpu, tr_seg.selector, | 3528 | save_guest_segment_descriptor(vcpu, old_tss_sel, &cseg_desc); |
| 3548 | &cseg_desc); | ||
| 3549 | } | 3529 | } |
| 3550 | 3530 | ||
| 3551 | if (reason == TASK_SWITCH_IRET) { | 3531 | if (reason == TASK_SWITCH_IRET) { |
| @@ -3557,10 +3537,10 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason) | |||
| 3557 | kvm_x86_ops->cache_regs(vcpu); | 3537 | kvm_x86_ops->cache_regs(vcpu); |
| 3558 | 3538 | ||
| 3559 | if (nseg_desc.type & 8) | 3539 | if (nseg_desc.type & 8) |
| 3560 | ret = kvm_task_switch_32(vcpu, tss_selector, &cseg_desc, | 3540 | ret = kvm_task_switch_32(vcpu, tss_selector, old_tss_base, |
| 3561 | &nseg_desc); | 3541 | &nseg_desc); |
| 3562 | else | 3542 | else |
| 3563 | ret = kvm_task_switch_16(vcpu, tss_selector, &cseg_desc, | 3543 | ret = kvm_task_switch_16(vcpu, tss_selector, old_tss_base, |
| 3564 | &nseg_desc); | 3544 | &nseg_desc); |
| 3565 | 3545 | ||
| 3566 | if (reason == TASK_SWITCH_CALL || reason == TASK_SWITCH_GATE) { | 3546 | if (reason == TASK_SWITCH_CALL || reason == TASK_SWITCH_GATE) { |
| @@ -3995,16 +3975,23 @@ int kvm_arch_set_memory_region(struct kvm *kvm, | |||
| 3995 | */ | 3975 | */ |
| 3996 | if (!user_alloc) { | 3976 | if (!user_alloc) { |
| 3997 | if (npages && !old.rmap) { | 3977 | if (npages && !old.rmap) { |
| 3978 | unsigned long userspace_addr; | ||
| 3979 | |||
| 3998 | down_write(¤t->mm->mmap_sem); | 3980 | down_write(¤t->mm->mmap_sem); |
| 3999 | memslot->userspace_addr = do_mmap(NULL, 0, | 3981 | userspace_addr = do_mmap(NULL, 0, |
| 4000 | npages * PAGE_SIZE, | 3982 | npages * PAGE_SIZE, |
| 4001 | PROT_READ | PROT_WRITE, | 3983 | PROT_READ | PROT_WRITE, |
| 4002 | MAP_SHARED | MAP_ANONYMOUS, | 3984 | MAP_SHARED | MAP_ANONYMOUS, |
| 4003 | 0); | 3985 | 0); |
| 4004 | up_write(¤t->mm->mmap_sem); | 3986 | up_write(¤t->mm->mmap_sem); |
| 4005 | 3987 | ||
| 4006 | if (IS_ERR((void *)memslot->userspace_addr)) | 3988 | if (IS_ERR((void *)userspace_addr)) |
| 4007 | return PTR_ERR((void *)memslot->userspace_addr); | 3989 | return PTR_ERR((void *)userspace_addr); |
| 3990 | |||
| 3991 | /* set userspace_addr atomically for kvm_hva_to_rmapp */ | ||
| 3992 | spin_lock(&kvm->mmu_lock); | ||
| 3993 | memslot->userspace_addr = userspace_addr; | ||
| 3994 | spin_unlock(&kvm->mmu_lock); | ||
| 4008 | } else { | 3995 | } else { |
| 4009 | if (!old.user_alloc && old.rmap) { | 3996 | if (!old.user_alloc && old.rmap) { |
| 4010 | int ret; | 3997 | int ret; |
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index 0313a5eec412..d9249a882aa5 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c | |||
| @@ -1014,6 +1014,9 @@ __init void lguest_init(void) | |||
| 1014 | init_pg_tables_start = __pa(pg0); | 1014 | init_pg_tables_start = __pa(pg0); |
| 1015 | init_pg_tables_end = __pa(pg0); | 1015 | init_pg_tables_end = __pa(pg0); |
| 1016 | 1016 | ||
| 1017 | /* As described in head_32.S, we map the first 128M of memory. */ | ||
| 1018 | max_pfn_mapped = (128*1024*1024) >> PAGE_SHIFT; | ||
| 1019 | |||
| 1017 | /* Load the %fs segment register (the per-cpu segment register) with | 1020 | /* Load the %fs segment register (the per-cpu segment register) with |
| 1018 | * the normal data segment to get through booting. */ | 1021 | * the normal data segment to get through booting. */ |
| 1019 | asm volatile ("mov %0, %%fs" : : "r" (__KERNEL_DS) : "memory"); | 1022 | asm volatile ("mov %0, %%fs" : : "r" (__KERNEL_DS) : "memory"); |
diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S index dfdf428975c0..f118c110af32 100644 --- a/arch/x86/lib/copy_user_64.S +++ b/arch/x86/lib/copy_user_64.S | |||
| @@ -52,7 +52,7 @@ | |||
| 52 | jnz 100b | 52 | jnz 100b |
| 53 | 102: | 53 | 102: |
| 54 | .section .fixup,"ax" | 54 | .section .fixup,"ax" |
| 55 | 103: addl %r8d,%edx /* ecx is zerorest also */ | 55 | 103: addl %ecx,%edx /* ecx is zerorest also */ |
| 56 | jmp copy_user_handle_tail | 56 | jmp copy_user_handle_tail |
| 57 | .previous | 57 | .previous |
| 58 | 58 | ||
diff --git a/arch/x86/lib/copy_user_nocache_64.S b/arch/x86/lib/copy_user_nocache_64.S index 40e0e309d27e..cb0c112386fb 100644 --- a/arch/x86/lib/copy_user_nocache_64.S +++ b/arch/x86/lib/copy_user_nocache_64.S | |||
| @@ -32,7 +32,7 @@ | |||
| 32 | jnz 100b | 32 | jnz 100b |
| 33 | 102: | 33 | 102: |
| 34 | .section .fixup,"ax" | 34 | .section .fixup,"ax" |
| 35 | 103: addl %r8d,%edx /* ecx is zerorest also */ | 35 | 103: addl %ecx,%edx /* ecx is zerorest also */ |
| 36 | jmp copy_user_handle_tail | 36 | jmp copy_user_handle_tail |
| 37 | .previous | 37 | .previous |
| 38 | 38 | ||
| @@ -108,7 +108,6 @@ ENTRY(__copy_user_nocache) | |||
| 108 | jmp 60f | 108 | jmp 60f |
| 109 | 50: movl %ecx,%edx | 109 | 50: movl %ecx,%edx |
| 110 | 60: sfence | 110 | 60: sfence |
| 111 | movl %r8d,%ecx | ||
| 112 | jmp copy_user_handle_tail | 111 | jmp copy_user_handle_tail |
| 113 | .previous | 112 | .previous |
| 114 | 113 | ||
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index 1fbb844c3d7a..dfb932dcf136 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile | |||
| @@ -1,5 +1,5 @@ | |||
| 1 | obj-y := init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \ | 1 | obj-y := init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \ |
| 2 | pat.o pgtable.o | 2 | pat.o pgtable.o gup.o |
| 3 | 3 | ||
| 4 | obj-$(CONFIG_X86_32) += pgtable_32.o | 4 | obj-$(CONFIG_X86_32) += pgtable_32.o |
| 5 | 5 | ||
diff --git a/arch/x86/mm/discontig_32.c b/arch/x86/mm/discontig_32.c index 5dfef9fa061a..62fa440678d8 100644 --- a/arch/x86/mm/discontig_32.c +++ b/arch/x86/mm/discontig_32.c | |||
| @@ -42,7 +42,6 @@ | |||
| 42 | 42 | ||
| 43 | struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; | 43 | struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; |
| 44 | EXPORT_SYMBOL(node_data); | 44 | EXPORT_SYMBOL(node_data); |
| 45 | static bootmem_data_t node0_bdata; | ||
| 46 | 45 | ||
| 47 | /* | 46 | /* |
| 48 | * numa interface - we expect the numa architecture specific code to have | 47 | * numa interface - we expect the numa architecture specific code to have |
| @@ -385,7 +384,7 @@ void __init initmem_init(unsigned long start_pfn, | |||
| 385 | for_each_online_node(nid) | 384 | for_each_online_node(nid) |
| 386 | memset(NODE_DATA(nid), 0, sizeof(struct pglist_data)); | 385 | memset(NODE_DATA(nid), 0, sizeof(struct pglist_data)); |
| 387 | 386 | ||
| 388 | NODE_DATA(0)->bdata = &node0_bdata; | 387 | NODE_DATA(0)->bdata = &bootmem_node_data[0]; |
| 389 | setup_bootmem_allocator(); | 388 | setup_bootmem_allocator(); |
| 390 | } | 389 | } |
| 391 | 390 | ||
diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c new file mode 100644 index 000000000000..007bb06c7504 --- /dev/null +++ b/arch/x86/mm/gup.c | |||
| @@ -0,0 +1,298 @@ | |||
| 1 | /* | ||
| 2 | * Lockless get_user_pages_fast for x86 | ||
| 3 | * | ||
| 4 | * Copyright (C) 2008 Nick Piggin | ||
| 5 | * Copyright (C) 2008 Novell Inc. | ||
| 6 | */ | ||
| 7 | #include <linux/sched.h> | ||
| 8 | #include <linux/mm.h> | ||
| 9 | #include <linux/vmstat.h> | ||
| 10 | #include <linux/highmem.h> | ||
| 11 | |||
| 12 | #include <asm/pgtable.h> | ||
| 13 | |||
| 14 | static inline pte_t gup_get_pte(pte_t *ptep) | ||
| 15 | { | ||
| 16 | #ifndef CONFIG_X86_PAE | ||
| 17 | return *ptep; | ||
| 18 | #else | ||
| 19 | /* | ||
| 20 | * With get_user_pages_fast, we walk down the pagetables without taking | ||
| 21 | * any locks. For this we would like to load the pointers atoimcally, | ||
| 22 | * but that is not possible (without expensive cmpxchg8b) on PAE. What | ||
| 23 | * we do have is the guarantee that a pte will only either go from not | ||
| 24 | * present to present, or present to not present or both -- it will not | ||
| 25 | * switch to a completely different present page without a TLB flush in | ||
| 26 | * between; something that we are blocking by holding interrupts off. | ||
| 27 | * | ||
| 28 | * Setting ptes from not present to present goes: | ||
| 29 | * ptep->pte_high = h; | ||
| 30 | * smp_wmb(); | ||
| 31 | * ptep->pte_low = l; | ||
| 32 | * | ||
| 33 | * And present to not present goes: | ||
| 34 | * ptep->pte_low = 0; | ||
| 35 | * smp_wmb(); | ||
| 36 | * ptep->pte_high = 0; | ||
| 37 | * | ||
| 38 | * We must ensure here that the load of pte_low sees l iff pte_high | ||
| 39 | * sees h. We load pte_high *after* loading pte_low, which ensures we | ||
| 40 | * don't see an older value of pte_high. *Then* we recheck pte_low, | ||
| 41 | * which ensures that we haven't picked up a changed pte high. We might | ||
| 42 | * have got rubbish values from pte_low and pte_high, but we are | ||
| 43 | * guaranteed that pte_low will not have the present bit set *unless* | ||
| 44 | * it is 'l'. And get_user_pages_fast only operates on present ptes, so | ||
| 45 | * we're safe. | ||
| 46 | * | ||
| 47 | * gup_get_pte should not be used or copied outside gup.c without being | ||
| 48 | * very careful -- it does not atomically load the pte or anything that | ||
| 49 | * is likely to be useful for you. | ||
| 50 | */ | ||
| 51 | pte_t pte; | ||
| 52 | |||
| 53 | retry: | ||
| 54 | pte.pte_low = ptep->pte_low; | ||
| 55 | smp_rmb(); | ||
| 56 | pte.pte_high = ptep->pte_high; | ||
| 57 | smp_rmb(); | ||
| 58 | if (unlikely(pte.pte_low != ptep->pte_low)) | ||
| 59 | goto retry; | ||
| 60 | |||
| 61 | return pte; | ||
| 62 | #endif | ||
| 63 | } | ||
| 64 | |||
| 65 | /* | ||
| 66 | * The performance critical leaf functions are made noinline otherwise gcc | ||
| 67 | * inlines everything into a single function which results in too much | ||
| 68 | * register pressure. | ||
| 69 | */ | ||
| 70 | static noinline int gup_pte_range(pmd_t pmd, unsigned long addr, | ||
| 71 | unsigned long end, int write, struct page **pages, int *nr) | ||
| 72 | { | ||
| 73 | unsigned long mask; | ||
| 74 | pte_t *ptep; | ||
| 75 | |||
| 76 | mask = _PAGE_PRESENT|_PAGE_USER; | ||
| 77 | if (write) | ||
| 78 | mask |= _PAGE_RW; | ||
| 79 | |||
| 80 | ptep = pte_offset_map(&pmd, addr); | ||
| 81 | do { | ||
| 82 | pte_t pte = gup_get_pte(ptep); | ||
| 83 | struct page *page; | ||
| 84 | |||
| 85 | if ((pte_val(pte) & (mask | _PAGE_SPECIAL)) != mask) { | ||
| 86 | pte_unmap(ptep); | ||
| 87 | return 0; | ||
| 88 | } | ||
| 89 | VM_BUG_ON(!pfn_valid(pte_pfn(pte))); | ||
| 90 | page = pte_page(pte); | ||
| 91 | get_page(page); | ||
| 92 | pages[*nr] = page; | ||
| 93 | (*nr)++; | ||
| 94 | |||
| 95 | } while (ptep++, addr += PAGE_SIZE, addr != end); | ||
| 96 | pte_unmap(ptep - 1); | ||
| 97 | |||
| 98 | return 1; | ||
| 99 | } | ||
| 100 | |||
| 101 | static inline void get_head_page_multiple(struct page *page, int nr) | ||
| 102 | { | ||
| 103 | VM_BUG_ON(page != compound_head(page)); | ||
| 104 | VM_BUG_ON(page_count(page) == 0); | ||
| 105 | atomic_add(nr, &page->_count); | ||
| 106 | } | ||
| 107 | |||
| 108 | static noinline int gup_huge_pmd(pmd_t pmd, unsigned long addr, | ||
| 109 | unsigned long end, int write, struct page **pages, int *nr) | ||
| 110 | { | ||
| 111 | unsigned long mask; | ||
| 112 | pte_t pte = *(pte_t *)&pmd; | ||
| 113 | struct page *head, *page; | ||
| 114 | int refs; | ||
| 115 | |||
| 116 | mask = _PAGE_PRESENT|_PAGE_USER; | ||
| 117 | if (write) | ||
| 118 | mask |= _PAGE_RW; | ||
| 119 | if ((pte_val(pte) & mask) != mask) | ||
| 120 | return 0; | ||
| 121 | /* hugepages are never "special" */ | ||
| 122 | VM_BUG_ON(pte_val(pte) & _PAGE_SPECIAL); | ||
| 123 | VM_BUG_ON(!pfn_valid(pte_pfn(pte))); | ||
| 124 | |||
| 125 | refs = 0; | ||
| 126 | head = pte_page(pte); | ||
| 127 | page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT); | ||
| 128 | do { | ||
| 129 | VM_BUG_ON(compound_head(page) != head); | ||
| 130 | pages[*nr] = page; | ||
| 131 | (*nr)++; | ||
| 132 | page++; | ||
| 133 | refs++; | ||
| 134 | } while (addr += PAGE_SIZE, addr != end); | ||
| 135 | get_head_page_multiple(head, refs); | ||
| 136 | |||
| 137 | return 1; | ||
| 138 | } | ||
| 139 | |||
| 140 | static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end, | ||
| 141 | int write, struct page **pages, int *nr) | ||
| 142 | { | ||
| 143 | unsigned long next; | ||
| 144 | pmd_t *pmdp; | ||
| 145 | |||
| 146 | pmdp = pmd_offset(&pud, addr); | ||
| 147 | do { | ||
| 148 | pmd_t pmd = *pmdp; | ||
| 149 | |||
| 150 | next = pmd_addr_end(addr, end); | ||
| 151 | if (pmd_none(pmd)) | ||
| 152 | return 0; | ||
| 153 | if (unlikely(pmd_large(pmd))) { | ||
| 154 | if (!gup_huge_pmd(pmd, addr, next, write, pages, nr)) | ||
| 155 | return 0; | ||
| 156 | } else { | ||
| 157 | if (!gup_pte_range(pmd, addr, next, write, pages, nr)) | ||
| 158 | return 0; | ||
| 159 | } | ||
| 160 | } while (pmdp++, addr = next, addr != end); | ||
| 161 | |||
| 162 | return 1; | ||
| 163 | } | ||
| 164 | |||
| 165 | static noinline int gup_huge_pud(pud_t pud, unsigned long addr, | ||
| 166 | unsigned long end, int write, struct page **pages, int *nr) | ||
| 167 | { | ||
| 168 | unsigned long mask; | ||
| 169 | pte_t pte = *(pte_t *)&pud; | ||
| 170 | struct page *head, *page; | ||
| 171 | int refs; | ||
| 172 | |||
| 173 | mask = _PAGE_PRESENT|_PAGE_USER; | ||
| 174 | if (write) | ||
| 175 | mask |= _PAGE_RW; | ||
| 176 | if ((pte_val(pte) & mask) != mask) | ||
| 177 | return 0; | ||
| 178 | /* hugepages are never "special" */ | ||
| 179 | VM_BUG_ON(pte_val(pte) & _PAGE_SPECIAL); | ||
| 180 | VM_BUG_ON(!pfn_valid(pte_pfn(pte))); | ||
| 181 | |||
| 182 | refs = 0; | ||
| 183 | head = pte_page(pte); | ||
| 184 | page = head + ((addr & ~PUD_MASK) >> PAGE_SHIFT); | ||
| 185 | do { | ||
| 186 | VM_BUG_ON(compound_head(page) != head); | ||
| 187 | pages[*nr] = page; | ||
| 188 | (*nr)++; | ||
| 189 | page++; | ||
| 190 | refs++; | ||
| 191 | } while (addr += PAGE_SIZE, addr != end); | ||
| 192 | get_head_page_multiple(head, refs); | ||
| 193 | |||
| 194 | return 1; | ||
| 195 | } | ||
| 196 | |||
| 197 | static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end, | ||
| 198 | int write, struct page **pages, int *nr) | ||
| 199 | { | ||
| 200 | unsigned long next; | ||
| 201 | pud_t *pudp; | ||
| 202 | |||
| 203 | pudp = pud_offset(&pgd, addr); | ||
| 204 | do { | ||
| 205 | pud_t pud = *pudp; | ||
| 206 | |||
| 207 | next = pud_addr_end(addr, end); | ||
| 208 | if (pud_none(pud)) | ||
| 209 | return 0; | ||
| 210 | if (unlikely(pud_large(pud))) { | ||
| 211 | if (!gup_huge_pud(pud, addr, next, write, pages, nr)) | ||
| 212 | return 0; | ||
| 213 | } else { | ||
| 214 | if (!gup_pmd_range(pud, addr, next, write, pages, nr)) | ||
| 215 | return 0; | ||
| 216 | } | ||
| 217 | } while (pudp++, addr = next, addr != end); | ||
| 218 | |||
| 219 | return 1; | ||
| 220 | } | ||
| 221 | |||
| 222 | int get_user_pages_fast(unsigned long start, int nr_pages, int write, | ||
| 223 | struct page **pages) | ||
| 224 | { | ||
| 225 | struct mm_struct *mm = current->mm; | ||
| 226 | unsigned long addr, len, end; | ||
| 227 | unsigned long next; | ||
| 228 | pgd_t *pgdp; | ||
| 229 | int nr = 0; | ||
| 230 | |||
| 231 | start &= PAGE_MASK; | ||
| 232 | addr = start; | ||
| 233 | len = (unsigned long) nr_pages << PAGE_SHIFT; | ||
| 234 | end = start + len; | ||
| 235 | if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ, | ||
| 236 | start, len))) | ||
| 237 | goto slow_irqon; | ||
| 238 | |||
| 239 | /* | ||
| 240 | * XXX: batch / limit 'nr', to avoid large irq off latency | ||
| 241 | * needs some instrumenting to determine the common sizes used by | ||
| 242 | * important workloads (eg. DB2), and whether limiting the batch size | ||
| 243 | * will decrease performance. | ||
| 244 | * | ||
| 245 | * It seems like we're in the clear for the moment. Direct-IO is | ||
| 246 | * the main guy that batches up lots of get_user_pages, and even | ||
| 247 | * they are limited to 64-at-a-time which is not so many. | ||
| 248 | */ | ||
| 249 | /* | ||
| 250 | * This doesn't prevent pagetable teardown, but does prevent | ||
| 251 | * the pagetables and pages from being freed on x86. | ||
| 252 | * | ||
| 253 | * So long as we atomically load page table pointers versus teardown | ||
| 254 | * (which we do on x86, with the above PAE exception), we can follow the | ||
| 255 | * address down to the the page and take a ref on it. | ||
| 256 | */ | ||
| 257 | local_irq_disable(); | ||
| 258 | pgdp = pgd_offset(mm, addr); | ||
| 259 | do { | ||
| 260 | pgd_t pgd = *pgdp; | ||
| 261 | |||
| 262 | next = pgd_addr_end(addr, end); | ||
| 263 | if (pgd_none(pgd)) | ||
| 264 | goto slow; | ||
| 265 | if (!gup_pud_range(pgd, addr, next, write, pages, &nr)) | ||
| 266 | goto slow; | ||
| 267 | } while (pgdp++, addr = next, addr != end); | ||
| 268 | local_irq_enable(); | ||
| 269 | |||
| 270 | VM_BUG_ON(nr != (end - start) >> PAGE_SHIFT); | ||
| 271 | return nr; | ||
| 272 | |||
| 273 | { | ||
| 274 | int ret; | ||
| 275 | |||
| 276 | slow: | ||
| 277 | local_irq_enable(); | ||
| 278 | slow_irqon: | ||
| 279 | /* Try to get the remaining pages with get_user_pages */ | ||
| 280 | start += nr << PAGE_SHIFT; | ||
| 281 | pages += nr; | ||
| 282 | |||
| 283 | down_read(&mm->mmap_sem); | ||
| 284 | ret = get_user_pages(current, mm, start, | ||
| 285 | (end - start) >> PAGE_SHIFT, write, 0, pages, NULL); | ||
| 286 | up_read(&mm->mmap_sem); | ||
| 287 | |||
| 288 | /* Have to be a bit careful with return values */ | ||
| 289 | if (nr > 0) { | ||
| 290 | if (ret < 0) | ||
| 291 | ret = nr; | ||
| 292 | else | ||
| 293 | ret += nr; | ||
| 294 | } | ||
| 295 | |||
| 296 | return ret; | ||
| 297 | } | ||
| 298 | } | ||
diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c index 0b3d567e686d..8f307d914c2e 100644 --- a/arch/x86/mm/hugetlbpage.c +++ b/arch/x86/mm/hugetlbpage.c | |||
| @@ -124,7 +124,8 @@ int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) | |||
| 124 | return 1; | 124 | return 1; |
| 125 | } | 125 | } |
| 126 | 126 | ||
| 127 | pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) | 127 | pte_t *huge_pte_alloc(struct mm_struct *mm, |
| 128 | unsigned long addr, unsigned long sz) | ||
| 128 | { | 129 | { |
| 129 | pgd_t *pgd; | 130 | pgd_t *pgd; |
| 130 | pud_t *pud; | 131 | pud_t *pud; |
| @@ -133,9 +134,14 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) | |||
| 133 | pgd = pgd_offset(mm, addr); | 134 | pgd = pgd_offset(mm, addr); |
| 134 | pud = pud_alloc(mm, pgd, addr); | 135 | pud = pud_alloc(mm, pgd, addr); |
| 135 | if (pud) { | 136 | if (pud) { |
| 136 | if (pud_none(*pud)) | 137 | if (sz == PUD_SIZE) { |
| 137 | huge_pmd_share(mm, addr, pud); | 138 | pte = (pte_t *)pud; |
| 138 | pte = (pte_t *) pmd_alloc(mm, pud, addr); | 139 | } else { |
| 140 | BUG_ON(sz != PMD_SIZE); | ||
| 141 | if (pud_none(*pud)) | ||
| 142 | huge_pmd_share(mm, addr, pud); | ||
| 143 | pte = (pte_t *) pmd_alloc(mm, pud, addr); | ||
| 144 | } | ||
| 139 | } | 145 | } |
| 140 | BUG_ON(pte && !pte_none(*pte) && !pte_huge(*pte)); | 146 | BUG_ON(pte && !pte_none(*pte) && !pte_huge(*pte)); |
| 141 | 147 | ||
| @@ -151,8 +157,11 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) | |||
| 151 | pgd = pgd_offset(mm, addr); | 157 | pgd = pgd_offset(mm, addr); |
| 152 | if (pgd_present(*pgd)) { | 158 | if (pgd_present(*pgd)) { |
| 153 | pud = pud_offset(pgd, addr); | 159 | pud = pud_offset(pgd, addr); |
| 154 | if (pud_present(*pud)) | 160 | if (pud_present(*pud)) { |
| 161 | if (pud_large(*pud)) | ||
| 162 | return (pte_t *)pud; | ||
| 155 | pmd = pmd_offset(pud, addr); | 163 | pmd = pmd_offset(pud, addr); |
| 164 | } | ||
| 156 | } | 165 | } |
| 157 | return (pte_t *) pmd; | 166 | return (pte_t *) pmd; |
| 158 | } | 167 | } |
| @@ -188,6 +197,11 @@ int pmd_huge(pmd_t pmd) | |||
| 188 | return 0; | 197 | return 0; |
| 189 | } | 198 | } |
| 190 | 199 | ||
| 200 | int pud_huge(pud_t pud) | ||
| 201 | { | ||
| 202 | return 0; | ||
| 203 | } | ||
| 204 | |||
| 191 | struct page * | 205 | struct page * |
| 192 | follow_huge_pmd(struct mm_struct *mm, unsigned long address, | 206 | follow_huge_pmd(struct mm_struct *mm, unsigned long address, |
| 193 | pmd_t *pmd, int write) | 207 | pmd_t *pmd, int write) |
| @@ -208,6 +222,11 @@ int pmd_huge(pmd_t pmd) | |||
| 208 | return !!(pmd_val(pmd) & _PAGE_PSE); | 222 | return !!(pmd_val(pmd) & _PAGE_PSE); |
| 209 | } | 223 | } |
| 210 | 224 | ||
| 225 | int pud_huge(pud_t pud) | ||
| 226 | { | ||
| 227 | return !!(pud_val(pud) & _PAGE_PSE); | ||
| 228 | } | ||
| 229 | |||
| 211 | struct page * | 230 | struct page * |
| 212 | follow_huge_pmd(struct mm_struct *mm, unsigned long address, | 231 | follow_huge_pmd(struct mm_struct *mm, unsigned long address, |
| 213 | pmd_t *pmd, int write) | 232 | pmd_t *pmd, int write) |
| @@ -216,9 +235,22 @@ follow_huge_pmd(struct mm_struct *mm, unsigned long address, | |||
| 216 | 235 | ||
| 217 | page = pte_page(*(pte_t *)pmd); | 236 | page = pte_page(*(pte_t *)pmd); |
| 218 | if (page) | 237 | if (page) |
| 219 | page += ((address & ~HPAGE_MASK) >> PAGE_SHIFT); | 238 | page += ((address & ~PMD_MASK) >> PAGE_SHIFT); |
| 239 | return page; | ||
| 240 | } | ||
| 241 | |||
| 242 | struct page * | ||
| 243 | follow_huge_pud(struct mm_struct *mm, unsigned long address, | ||
| 244 | pud_t *pud, int write) | ||
| 245 | { | ||
| 246 | struct page *page; | ||
| 247 | |||
| 248 | page = pte_page(*(pte_t *)pud); | ||
| 249 | if (page) | ||
| 250 | page += ((address & ~PUD_MASK) >> PAGE_SHIFT); | ||
| 220 | return page; | 251 | return page; |
| 221 | } | 252 | } |
| 253 | |||
| 222 | #endif | 254 | #endif |
| 223 | 255 | ||
| 224 | /* x86_64 also uses this file */ | 256 | /* x86_64 also uses this file */ |
| @@ -228,6 +260,7 @@ static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *file, | |||
| 228 | unsigned long addr, unsigned long len, | 260 | unsigned long addr, unsigned long len, |
| 229 | unsigned long pgoff, unsigned long flags) | 261 | unsigned long pgoff, unsigned long flags) |
| 230 | { | 262 | { |
| 263 | struct hstate *h = hstate_file(file); | ||
| 231 | struct mm_struct *mm = current->mm; | 264 | struct mm_struct *mm = current->mm; |
| 232 | struct vm_area_struct *vma; | 265 | struct vm_area_struct *vma; |
| 233 | unsigned long start_addr; | 266 | unsigned long start_addr; |
| @@ -240,7 +273,7 @@ static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *file, | |||
| 240 | } | 273 | } |
| 241 | 274 | ||
| 242 | full_search: | 275 | full_search: |
| 243 | addr = ALIGN(start_addr, HPAGE_SIZE); | 276 | addr = ALIGN(start_addr, huge_page_size(h)); |
| 244 | 277 | ||
| 245 | for (vma = find_vma(mm, addr); ; vma = vma->vm_next) { | 278 | for (vma = find_vma(mm, addr); ; vma = vma->vm_next) { |
| 246 | /* At this point: (!vma || addr < vma->vm_end). */ | 279 | /* At this point: (!vma || addr < vma->vm_end). */ |
| @@ -262,7 +295,7 @@ full_search: | |||
| 262 | } | 295 | } |
| 263 | if (addr + mm->cached_hole_size < vma->vm_start) | 296 | if (addr + mm->cached_hole_size < vma->vm_start) |
| 264 | mm->cached_hole_size = vma->vm_start - addr; | 297 | mm->cached_hole_size = vma->vm_start - addr; |
| 265 | addr = ALIGN(vma->vm_end, HPAGE_SIZE); | 298 | addr = ALIGN(vma->vm_end, huge_page_size(h)); |
| 266 | } | 299 | } |
| 267 | } | 300 | } |
| 268 | 301 | ||
| @@ -270,6 +303,7 @@ static unsigned long hugetlb_get_unmapped_area_topdown(struct file *file, | |||
| 270 | unsigned long addr0, unsigned long len, | 303 | unsigned long addr0, unsigned long len, |
| 271 | unsigned long pgoff, unsigned long flags) | 304 | unsigned long pgoff, unsigned long flags) |
| 272 | { | 305 | { |
| 306 | struct hstate *h = hstate_file(file); | ||
| 273 | struct mm_struct *mm = current->mm; | 307 | struct mm_struct *mm = current->mm; |
| 274 | struct vm_area_struct *vma, *prev_vma; | 308 | struct vm_area_struct *vma, *prev_vma; |
| 275 | unsigned long base = mm->mmap_base, addr = addr0; | 309 | unsigned long base = mm->mmap_base, addr = addr0; |
| @@ -290,7 +324,7 @@ try_again: | |||
| 290 | goto fail; | 324 | goto fail; |
| 291 | 325 | ||
| 292 | /* either no address requested or cant fit in requested address hole */ | 326 | /* either no address requested or cant fit in requested address hole */ |
| 293 | addr = (mm->free_area_cache - len) & HPAGE_MASK; | 327 | addr = (mm->free_area_cache - len) & huge_page_mask(h); |
| 294 | do { | 328 | do { |
| 295 | /* | 329 | /* |
| 296 | * Lookup failure means no vma is above this address, | 330 | * Lookup failure means no vma is above this address, |
| @@ -321,7 +355,7 @@ try_again: | |||
| 321 | largest_hole = vma->vm_start - addr; | 355 | largest_hole = vma->vm_start - addr; |
| 322 | 356 | ||
| 323 | /* try just below the current vma->vm_start */ | 357 | /* try just below the current vma->vm_start */ |
| 324 | addr = (vma->vm_start - len) & HPAGE_MASK; | 358 | addr = (vma->vm_start - len) & huge_page_mask(h); |
| 325 | } while (len <= vma->vm_start); | 359 | } while (len <= vma->vm_start); |
| 326 | 360 | ||
| 327 | fail: | 361 | fail: |
| @@ -359,22 +393,23 @@ unsigned long | |||
| 359 | hugetlb_get_unmapped_area(struct file *file, unsigned long addr, | 393 | hugetlb_get_unmapped_area(struct file *file, unsigned long addr, |
| 360 | unsigned long len, unsigned long pgoff, unsigned long flags) | 394 | unsigned long len, unsigned long pgoff, unsigned long flags) |
| 361 | { | 395 | { |
| 396 | struct hstate *h = hstate_file(file); | ||
| 362 | struct mm_struct *mm = current->mm; | 397 | struct mm_struct *mm = current->mm; |
| 363 | struct vm_area_struct *vma; | 398 | struct vm_area_struct *vma; |
| 364 | 399 | ||
| 365 | if (len & ~HPAGE_MASK) | 400 | if (len & ~huge_page_mask(h)) |
| 366 | return -EINVAL; | 401 | return -EINVAL; |
| 367 | if (len > TASK_SIZE) | 402 | if (len > TASK_SIZE) |
| 368 | return -ENOMEM; | 403 | return -ENOMEM; |
| 369 | 404 | ||
| 370 | if (flags & MAP_FIXED) { | 405 | if (flags & MAP_FIXED) { |
| 371 | if (prepare_hugepage_range(addr, len)) | 406 | if (prepare_hugepage_range(file, addr, len)) |
| 372 | return -EINVAL; | 407 | return -EINVAL; |
| 373 | return addr; | 408 | return addr; |
| 374 | } | 409 | } |
| 375 | 410 | ||
| 376 | if (addr) { | 411 | if (addr) { |
| 377 | addr = ALIGN(addr, HPAGE_SIZE); | 412 | addr = ALIGN(addr, huge_page_size(h)); |
| 378 | vma = find_vma(mm, addr); | 413 | vma = find_vma(mm, addr); |
| 379 | if (TASK_SIZE - len >= addr && | 414 | if (TASK_SIZE - len >= addr && |
| 380 | (!vma || addr + len <= vma->vm_start)) | 415 | (!vma || addr + len <= vma->vm_start)) |
| @@ -390,3 +425,20 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr, | |||
| 390 | 425 | ||
| 391 | #endif /*HAVE_ARCH_HUGETLB_UNMAPPED_AREA*/ | 426 | #endif /*HAVE_ARCH_HUGETLB_UNMAPPED_AREA*/ |
| 392 | 427 | ||
| 428 | #ifdef CONFIG_X86_64 | ||
| 429 | static __init int setup_hugepagesz(char *opt) | ||
| 430 | { | ||
| 431 | unsigned long ps = memparse(opt, &opt); | ||
| 432 | if (ps == PMD_SIZE) { | ||
| 433 | hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT); | ||
| 434 | } else if (ps == PUD_SIZE && cpu_has_gbpages) { | ||
| 435 | hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT); | ||
| 436 | } else { | ||
| 437 | printk(KERN_ERR "hugepagesz: Unsupported page size %lu M\n", | ||
| 438 | ps >> 20); | ||
| 439 | return 0; | ||
| 440 | } | ||
| 441 | return 1; | ||
| 442 | } | ||
| 443 | __setup("hugepagesz=", setup_hugepagesz); | ||
| 444 | #endif | ||
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index ec37121f6709..129618ca0ea2 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c | |||
| @@ -86,43 +86,6 @@ early_param("gbpages", parse_direct_gbpages_on); | |||
| 86 | * around without checking the pgd every time. | 86 | * around without checking the pgd every time. |
| 87 | */ | 87 | */ |
| 88 | 88 | ||
| 89 | void show_mem(void) | ||
| 90 | { | ||
| 91 | long i, total = 0, reserved = 0; | ||
| 92 | long shared = 0, cached = 0; | ||
| 93 | struct page *page; | ||
| 94 | pg_data_t *pgdat; | ||
| 95 | |||
| 96 | printk(KERN_INFO "Mem-info:\n"); | ||
| 97 | show_free_areas(); | ||
| 98 | for_each_online_pgdat(pgdat) { | ||
| 99 | for (i = 0; i < pgdat->node_spanned_pages; ++i) { | ||
| 100 | /* | ||
| 101 | * This loop can take a while with 256 GB and | ||
| 102 | * 4k pages so defer the NMI watchdog: | ||
| 103 | */ | ||
| 104 | if (unlikely(i % MAX_ORDER_NR_PAGES == 0)) | ||
| 105 | touch_nmi_watchdog(); | ||
| 106 | |||
| 107 | if (!pfn_valid(pgdat->node_start_pfn + i)) | ||
| 108 | continue; | ||
| 109 | |||
| 110 | page = pfn_to_page(pgdat->node_start_pfn + i); | ||
| 111 | total++; | ||
| 112 | if (PageReserved(page)) | ||
| 113 | reserved++; | ||
| 114 | else if (PageSwapCache(page)) | ||
| 115 | cached++; | ||
| 116 | else if (page_count(page)) | ||
| 117 | shared += page_count(page) - 1; | ||
| 118 | } | ||
| 119 | } | ||
| 120 | printk(KERN_INFO "%lu pages of RAM\n", total); | ||
| 121 | printk(KERN_INFO "%lu reserved pages\n", reserved); | ||
| 122 | printk(KERN_INFO "%lu pages shared\n", shared); | ||
| 123 | printk(KERN_INFO "%lu pages swap cached\n", cached); | ||
| 124 | } | ||
| 125 | |||
| 126 | int after_bootmem; | 89 | int after_bootmem; |
| 127 | 90 | ||
| 128 | static __init void *spp_getpage(void) | 91 | static __init void *spp_getpage(void) |
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 24c1d3c30186..016f335bbeea 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c | |||
| @@ -330,6 +330,14 @@ static void __iomem *ioremap_default(resource_size_t phys_addr, | |||
| 330 | return (void __iomem *)ret; | 330 | return (void __iomem *)ret; |
| 331 | } | 331 | } |
| 332 | 332 | ||
| 333 | void __iomem *ioremap_prot(resource_size_t phys_addr, unsigned long size, | ||
| 334 | unsigned long prot_val) | ||
| 335 | { | ||
| 336 | return __ioremap_caller(phys_addr, size, (prot_val & _PAGE_CACHE_MASK), | ||
| 337 | __builtin_return_address(0)); | ||
| 338 | } | ||
| 339 | EXPORT_SYMBOL(ioremap_prot); | ||
| 340 | |||
| 333 | /** | 341 | /** |
| 334 | * iounmap - Free a IO remapping | 342 | * iounmap - Free a IO remapping |
| 335 | * @addr: virtual address from ioremap_* | 343 | * @addr: virtual address from ioremap_* |
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index 9782f42dd319..a4dd793d6003 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c | |||
| @@ -23,8 +23,6 @@ | |||
| 23 | struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; | 23 | struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; |
| 24 | EXPORT_SYMBOL(node_data); | 24 | EXPORT_SYMBOL(node_data); |
| 25 | 25 | ||
| 26 | static bootmem_data_t plat_node_bdata[MAX_NUMNODES]; | ||
| 27 | |||
| 28 | struct memnode memnode; | 26 | struct memnode memnode; |
| 29 | 27 | ||
| 30 | s16 apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = { | 28 | s16 apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = { |
| @@ -198,7 +196,7 @@ void __init setup_node_bootmem(int nodeid, unsigned long start, | |||
| 198 | nodedata_phys + pgdat_size - 1); | 196 | nodedata_phys + pgdat_size - 1); |
| 199 | 197 | ||
| 200 | memset(NODE_DATA(nodeid), 0, sizeof(pg_data_t)); | 198 | memset(NODE_DATA(nodeid), 0, sizeof(pg_data_t)); |
| 201 | NODE_DATA(nodeid)->bdata = &plat_node_bdata[nodeid]; | 199 | NODE_DATA(nodeid)->bdata = &bootmem_node_data[nodeid]; |
| 202 | NODE_DATA(nodeid)->node_start_pfn = start_pfn; | 200 | NODE_DATA(nodeid)->node_start_pfn = start_pfn; |
| 203 | NODE_DATA(nodeid)->node_spanned_pages = last_pfn - start_pfn; | 201 | NODE_DATA(nodeid)->node_spanned_pages = last_pfn - start_pfn; |
| 204 | 202 | ||
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 557b2abceef8..d50302774fe2 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c | |||
| @@ -207,6 +207,9 @@ static void pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmds[]) | |||
| 207 | unsigned long addr; | 207 | unsigned long addr; |
| 208 | int i; | 208 | int i; |
| 209 | 209 | ||
| 210 | if (PREALLOCATED_PMDS == 0) /* Work around gcc-3.4.x bug */ | ||
| 211 | return; | ||
| 212 | |||
| 210 | pud = pud_offset(pgd, 0); | 213 | pud = pud_offset(pgd, 0); |
| 211 | 214 | ||
| 212 | for (addr = i = 0; i < PREALLOCATED_PMDS; | 215 | for (addr = i = 0; i < PREALLOCATED_PMDS; |
diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c index b4becbf8c570..cab0abbd1ebe 100644 --- a/arch/x86/mm/pgtable_32.c +++ b/arch/x86/mm/pgtable_32.c | |||
| @@ -20,53 +20,6 @@ | |||
| 20 | #include <asm/tlb.h> | 20 | #include <asm/tlb.h> |
| 21 | #include <asm/tlbflush.h> | 21 | #include <asm/tlbflush.h> |
| 22 | 22 | ||
| 23 | void show_mem(void) | ||
| 24 | { | ||
| 25 | int total = 0, reserved = 0; | ||
| 26 | int shared = 0, cached = 0; | ||
| 27 | int highmem = 0; | ||
| 28 | struct page *page; | ||
| 29 | pg_data_t *pgdat; | ||
| 30 | unsigned long i; | ||
| 31 | unsigned long flags; | ||
| 32 | |||
| 33 | printk(KERN_INFO "Mem-info:\n"); | ||
| 34 | show_free_areas(); | ||
| 35 | for_each_online_pgdat(pgdat) { | ||
| 36 | pgdat_resize_lock(pgdat, &flags); | ||
| 37 | for (i = 0; i < pgdat->node_spanned_pages; ++i) { | ||
| 38 | if (unlikely(i % MAX_ORDER_NR_PAGES == 0)) | ||
| 39 | touch_nmi_watchdog(); | ||
| 40 | page = pgdat_page_nr(pgdat, i); | ||
| 41 | total++; | ||
| 42 | if (PageHighMem(page)) | ||
| 43 | highmem++; | ||
| 44 | if (PageReserved(page)) | ||
| 45 | reserved++; | ||
| 46 | else if (PageSwapCache(page)) | ||
| 47 | cached++; | ||
| 48 | else if (page_count(page)) | ||
| 49 | shared += page_count(page) - 1; | ||
| 50 | } | ||
| 51 | pgdat_resize_unlock(pgdat, &flags); | ||
| 52 | } | ||
| 53 | printk(KERN_INFO "%d pages of RAM\n", total); | ||
| 54 | printk(KERN_INFO "%d pages of HIGHMEM\n", highmem); | ||
| 55 | printk(KERN_INFO "%d reserved pages\n", reserved); | ||
| 56 | printk(KERN_INFO "%d pages shared\n", shared); | ||
| 57 | printk(KERN_INFO "%d pages swap cached\n", cached); | ||
| 58 | |||
| 59 | printk(KERN_INFO "%lu pages dirty\n", global_page_state(NR_FILE_DIRTY)); | ||
| 60 | printk(KERN_INFO "%lu pages writeback\n", | ||
| 61 | global_page_state(NR_WRITEBACK)); | ||
| 62 | printk(KERN_INFO "%lu pages mapped\n", global_page_state(NR_FILE_MAPPED)); | ||
| 63 | printk(KERN_INFO "%lu pages slab\n", | ||
| 64 | global_page_state(NR_SLAB_RECLAIMABLE) + | ||
| 65 | global_page_state(NR_SLAB_UNRECLAIMABLE)); | ||
| 66 | printk(KERN_INFO "%lu pages pagetables\n", | ||
| 67 | global_page_state(NR_PAGETABLE)); | ||
| 68 | } | ||
| 69 | |||
| 70 | /* | 23 | /* |
| 71 | * Associate a virtual page frame with a given physical page frame | 24 | * Associate a virtual page frame with a given physical page frame |
| 72 | * and protection flags for that frame. | 25 | * and protection flags for that frame. |
diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index 7f3329b55d2e..3f90289410e6 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c | |||
| @@ -369,20 +369,34 @@ static int __init ppro_init(char **cpu_type) | |||
| 369 | { | 369 | { |
| 370 | __u8 cpu_model = boot_cpu_data.x86_model; | 370 | __u8 cpu_model = boot_cpu_data.x86_model; |
| 371 | 371 | ||
| 372 | if (cpu_model == 14) | 372 | switch (cpu_model) { |
| 373 | case 0 ... 2: | ||
| 374 | *cpu_type = "i386/ppro"; | ||
| 375 | break; | ||
| 376 | case 3 ... 5: | ||
| 377 | *cpu_type = "i386/pii"; | ||
| 378 | break; | ||
| 379 | case 6 ... 8: | ||
| 380 | *cpu_type = "i386/piii"; | ||
| 381 | break; | ||
| 382 | case 9: | ||
| 383 | *cpu_type = "i386/p6_mobile"; | ||
| 384 | break; | ||
| 385 | case 10 ... 13: | ||
| 386 | *cpu_type = "i386/p6"; | ||
| 387 | break; | ||
| 388 | case 14: | ||
| 373 | *cpu_type = "i386/core"; | 389 | *cpu_type = "i386/core"; |
| 374 | else if (cpu_model == 15 || cpu_model == 23) | 390 | break; |
| 391 | case 15: case 23: | ||
| 392 | *cpu_type = "i386/core_2"; | ||
| 393 | break; | ||
| 394 | case 26: | ||
| 375 | *cpu_type = "i386/core_2"; | 395 | *cpu_type = "i386/core_2"; |
| 376 | else if (cpu_model > 0xd) | 396 | break; |
| 397 | default: | ||
| 398 | /* Unknown */ | ||
| 377 | return 0; | 399 | return 0; |
| 378 | else if (cpu_model == 9) { | ||
| 379 | *cpu_type = "i386/p6_mobile"; | ||
| 380 | } else if (cpu_model > 5) { | ||
| 381 | *cpu_type = "i386/piii"; | ||
| 382 | } else if (cpu_model > 2) { | ||
| 383 | *cpu_type = "i386/pii"; | ||
| 384 | } else { | ||
| 385 | *cpu_type = "i386/ppro"; | ||
| 386 | } | 400 | } |
| 387 | 401 | ||
| 388 | model = &op_ppro_spec; | 402 | model = &op_ppro_spec; |
diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c index ff3a6a336342..4bdaa590375d 100644 --- a/arch/x86/pci/fixup.c +++ b/arch/x86/pci/fixup.c | |||
| @@ -23,7 +23,8 @@ static void __devinit pci_fixup_i450nx(struct pci_dev *d) | |||
| 23 | pci_read_config_byte(d, reg++, &busno); | 23 | pci_read_config_byte(d, reg++, &busno); |
| 24 | pci_read_config_byte(d, reg++, &suba); | 24 | pci_read_config_byte(d, reg++, &suba); |
| 25 | pci_read_config_byte(d, reg++, &subb); | 25 | pci_read_config_byte(d, reg++, &subb); |
| 26 | DBG("i450NX PXB %d: %02x/%02x/%02x\n", pxb, busno, suba, subb); | 26 | dev_dbg(&d->dev, "i450NX PXB %d: %02x/%02x/%02x\n", pxb, busno, |
| 27 | suba, subb); | ||
| 27 | if (busno) | 28 | if (busno) |
| 28 | pci_scan_bus_with_sysdata(busno); /* Bus A */ | 29 | pci_scan_bus_with_sysdata(busno); /* Bus A */ |
| 29 | if (suba < subb) | 30 | if (suba < subb) |
diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c index 2aafb67dc5f1..5807d1bc73f7 100644 --- a/arch/x86/pci/i386.c +++ b/arch/x86/pci/i386.c | |||
| @@ -128,10 +128,8 @@ static void __init pcibios_allocate_bus_resources(struct list_head *bus_list) | |||
| 128 | pr = pci_find_parent_resource(dev, r); | 128 | pr = pci_find_parent_resource(dev, r); |
| 129 | if (!r->start || !pr || | 129 | if (!r->start || !pr || |
| 130 | request_resource(pr, r) < 0) { | 130 | request_resource(pr, r) < 0) { |
| 131 | printk(KERN_ERR "PCI: Cannot allocate " | 131 | dev_err(&dev->dev, "BAR %d: can't " |
| 132 | "resource region %d " | 132 | "allocate resource\n", idx); |
| 133 | "of bridge %s\n", | ||
| 134 | idx, pci_name(dev)); | ||
| 135 | /* | 133 | /* |
| 136 | * Something is wrong with the region. | 134 | * Something is wrong with the region. |
| 137 | * Invalidate the resource to prevent | 135 | * Invalidate the resource to prevent |
| @@ -166,15 +164,15 @@ static void __init pcibios_allocate_resources(int pass) | |||
| 166 | else | 164 | else |
| 167 | disabled = !(command & PCI_COMMAND_MEMORY); | 165 | disabled = !(command & PCI_COMMAND_MEMORY); |
| 168 | if (pass == disabled) { | 166 | if (pass == disabled) { |
| 169 | DBG("PCI: Resource %08lx-%08lx " | 167 | dev_dbg(&dev->dev, "resource %#08llx-%#08llx " |
| 170 | "(f=%lx, d=%d, p=%d)\n", | 168 | "(f=%lx, d=%d, p=%d)\n", |
| 171 | r->start, r->end, r->flags, disabled, pass); | 169 | (unsigned long long) r->start, |
| 170 | (unsigned long long) r->end, | ||
| 171 | r->flags, disabled, pass); | ||
| 172 | pr = pci_find_parent_resource(dev, r); | 172 | pr = pci_find_parent_resource(dev, r); |
| 173 | if (!pr || request_resource(pr, r) < 0) { | 173 | if (!pr || request_resource(pr, r) < 0) { |
| 174 | printk(KERN_ERR "PCI: Cannot allocate " | 174 | dev_err(&dev->dev, "BAR %d: can't " |
| 175 | "resource region %d " | 175 | "allocate resource\n", idx); |
| 176 | "of device %s\n", | ||
| 177 | idx, pci_name(dev)); | ||
| 178 | /* We'll assign a new address later */ | 176 | /* We'll assign a new address later */ |
| 179 | r->end -= r->start; | 177 | r->end -= r->start; |
| 180 | r->start = 0; | 178 | r->start = 0; |
| @@ -187,8 +185,7 @@ static void __init pcibios_allocate_resources(int pass) | |||
| 187 | /* Turn the ROM off, leave the resource region, | 185 | /* Turn the ROM off, leave the resource region, |
| 188 | * but keep it unregistered. */ | 186 | * but keep it unregistered. */ |
| 189 | u32 reg; | 187 | u32 reg; |
| 190 | DBG("PCI: Switching off ROM of %s\n", | 188 | dev_dbg(&dev->dev, "disabling ROM\n"); |
| 191 | pci_name(dev)); | ||
| 192 | r->flags &= ~IORESOURCE_ROM_ENABLE; | 189 | r->flags &= ~IORESOURCE_ROM_ENABLE; |
| 193 | pci_read_config_dword(dev, | 190 | pci_read_config_dword(dev, |
| 194 | dev->rom_base_reg, ®); | 191 | dev->rom_base_reg, ®); |
| @@ -257,8 +254,7 @@ void pcibios_set_master(struct pci_dev *dev) | |||
| 257 | lat = pcibios_max_latency; | 254 | lat = pcibios_max_latency; |
| 258 | else | 255 | else |
| 259 | return; | 256 | return; |
| 260 | printk(KERN_DEBUG "PCI: Setting latency timer of device %s to %d\n", | 257 | dev_printk(KERN_DEBUG, &dev->dev, "setting latency timer to %d\n", lat); |
| 261 | pci_name(dev), lat); | ||
| 262 | pci_write_config_byte(dev, PCI_LATENCY_TIMER, lat); | 258 | pci_write_config_byte(dev, PCI_LATENCY_TIMER, lat); |
| 263 | } | 259 | } |
| 264 | 260 | ||
| @@ -280,6 +276,7 @@ static void pci_track_mmap_page_range(struct vm_area_struct *vma) | |||
| 280 | static struct vm_operations_struct pci_mmap_ops = { | 276 | static struct vm_operations_struct pci_mmap_ops = { |
| 281 | .open = pci_track_mmap_page_range, | 277 | .open = pci_track_mmap_page_range, |
| 282 | .close = pci_unmap_page_range, | 278 | .close = pci_unmap_page_range, |
| 279 | .access = generic_access_phys, | ||
| 283 | }; | 280 | }; |
| 284 | 281 | ||
| 285 | int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma, | 282 | int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma, |
diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c index 6a06a2eb0597..fec0123b33a9 100644 --- a/arch/x86/pci/irq.c +++ b/arch/x86/pci/irq.c | |||
| @@ -436,7 +436,7 @@ static int pirq_vlsi_get(struct pci_dev *router, struct pci_dev *dev, int pirq) | |||
| 436 | { | 436 | { |
| 437 | WARN_ON_ONCE(pirq >= 9); | 437 | WARN_ON_ONCE(pirq >= 9); |
| 438 | if (pirq > 8) { | 438 | if (pirq > 8) { |
| 439 | printk(KERN_INFO "VLSI router pirq escape (%d)\n", pirq); | 439 | dev_info(&dev->dev, "VLSI router PIRQ escape (%d)\n", pirq); |
| 440 | return 0; | 440 | return 0; |
| 441 | } | 441 | } |
| 442 | return read_config_nybble(router, 0x74, pirq-1); | 442 | return read_config_nybble(router, 0x74, pirq-1); |
| @@ -446,7 +446,7 @@ static int pirq_vlsi_set(struct pci_dev *router, struct pci_dev *dev, int pirq, | |||
| 446 | { | 446 | { |
| 447 | WARN_ON_ONCE(pirq >= 9); | 447 | WARN_ON_ONCE(pirq >= 9); |
| 448 | if (pirq > 8) { | 448 | if (pirq > 8) { |
| 449 | printk(KERN_INFO "VLSI router pirq escape (%d)\n", pirq); | 449 | dev_info(&dev->dev, "VLSI router PIRQ escape (%d)\n", pirq); |
| 450 | return 0; | 450 | return 0; |
| 451 | } | 451 | } |
| 452 | write_config_nybble(router, 0x74, pirq-1, irq); | 452 | write_config_nybble(router, 0x74, pirq-1, irq); |
| @@ -492,15 +492,17 @@ static int pirq_amd756_get(struct pci_dev *router, struct pci_dev *dev, int pirq | |||
| 492 | irq = 0; | 492 | irq = 0; |
| 493 | if (pirq <= 4) | 493 | if (pirq <= 4) |
| 494 | irq = read_config_nybble(router, 0x56, pirq - 1); | 494 | irq = read_config_nybble(router, 0x56, pirq - 1); |
| 495 | printk(KERN_INFO "AMD756: dev %04x:%04x, router pirq : %d get irq : %2d\n", | 495 | dev_info(&dev->dev, |
| 496 | dev->vendor, dev->device, pirq, irq); | 496 | "AMD756: dev [%04x/%04x], router PIRQ %d get IRQ %d\n", |
| 497 | dev->vendor, dev->device, pirq, irq); | ||
| 497 | return irq; | 498 | return irq; |
| 498 | } | 499 | } |
| 499 | 500 | ||
| 500 | static int pirq_amd756_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) | 501 | static int pirq_amd756_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) |
| 501 | { | 502 | { |
| 502 | printk(KERN_INFO "AMD756: dev %04x:%04x, router pirq : %d SET irq : %2d\n", | 503 | dev_info(&dev->dev, |
| 503 | dev->vendor, dev->device, pirq, irq); | 504 | "AMD756: dev [%04x/%04x], router PIRQ %d set IRQ %d\n", |
| 505 | dev->vendor, dev->device, pirq, irq); | ||
| 504 | if (pirq <= 4) | 506 | if (pirq <= 4) |
| 505 | write_config_nybble(router, 0x56, pirq - 1, irq); | 507 | write_config_nybble(router, 0x56, pirq - 1, irq); |
| 506 | return 1; | 508 | return 1; |
| @@ -730,7 +732,6 @@ static __init int ali_router_probe(struct irq_router *r, struct pci_dev *router, | |||
| 730 | switch (device) { | 732 | switch (device) { |
| 731 | case PCI_DEVICE_ID_AL_M1533: | 733 | case PCI_DEVICE_ID_AL_M1533: |
| 732 | case PCI_DEVICE_ID_AL_M1563: | 734 | case PCI_DEVICE_ID_AL_M1563: |
| 733 | printk(KERN_DEBUG "PCI: Using ALI IRQ Router\n"); | ||
| 734 | r->name = "ALI"; | 735 | r->name = "ALI"; |
| 735 | r->get = pirq_ali_get; | 736 | r->get = pirq_ali_get; |
| 736 | r->set = pirq_ali_set; | 737 | r->set = pirq_ali_set; |
| @@ -840,11 +841,9 @@ static void __init pirq_find_router(struct irq_router *r) | |||
| 840 | h->probe(r, pirq_router_dev, pirq_router_dev->device)) | 841 | h->probe(r, pirq_router_dev, pirq_router_dev->device)) |
| 841 | break; | 842 | break; |
| 842 | } | 843 | } |
| 843 | printk(KERN_INFO "PCI: Using IRQ router %s [%04x/%04x] at %s\n", | 844 | dev_info(&pirq_router_dev->dev, "%s IRQ router [%04x/%04x]\n", |
| 844 | pirq_router.name, | 845 | pirq_router.name, |
| 845 | pirq_router_dev->vendor, | 846 | pirq_router_dev->vendor, pirq_router_dev->device); |
| 846 | pirq_router_dev->device, | ||
| 847 | pci_name(pirq_router_dev)); | ||
| 848 | 847 | ||
| 849 | /* The device remains referenced for the kernel lifetime */ | 848 | /* The device remains referenced for the kernel lifetime */ |
| 850 | } | 849 | } |
| @@ -877,7 +876,7 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign) | |||
| 877 | /* Find IRQ pin */ | 876 | /* Find IRQ pin */ |
| 878 | pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin); | 877 | pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin); |
| 879 | if (!pin) { | 878 | if (!pin) { |
| 880 | DBG(KERN_DEBUG " -> no interrupt pin\n"); | 879 | dev_dbg(&dev->dev, "no interrupt pin\n"); |
| 881 | return 0; | 880 | return 0; |
| 882 | } | 881 | } |
| 883 | pin = pin - 1; | 882 | pin = pin - 1; |
| @@ -887,20 +886,20 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign) | |||
| 887 | if (!pirq_table) | 886 | if (!pirq_table) |
| 888 | return 0; | 887 | return 0; |
| 889 | 888 | ||
| 890 | DBG(KERN_DEBUG "IRQ for %s[%c]", pci_name(dev), 'A' + pin); | ||
| 891 | info = pirq_get_info(dev); | 889 | info = pirq_get_info(dev); |
| 892 | if (!info) { | 890 | if (!info) { |
| 893 | DBG(" -> not found in routing table\n" KERN_DEBUG); | 891 | dev_dbg(&dev->dev, "PCI INT %c not found in routing table\n", |
| 892 | 'A' + pin); | ||
| 894 | return 0; | 893 | return 0; |
| 895 | } | 894 | } |
| 896 | pirq = info->irq[pin].link; | 895 | pirq = info->irq[pin].link; |
| 897 | mask = info->irq[pin].bitmap; | 896 | mask = info->irq[pin].bitmap; |
| 898 | if (!pirq) { | 897 | if (!pirq) { |
| 899 | DBG(" -> not routed\n" KERN_DEBUG); | 898 | dev_dbg(&dev->dev, "PCI INT %c not routed\n", 'A' + pin); |
| 900 | return 0; | 899 | return 0; |
| 901 | } | 900 | } |
| 902 | DBG(" -> PIRQ %02x, mask %04x, excl %04x", pirq, mask, | 901 | dev_dbg(&dev->dev, "PCI INT %c -> PIRQ %02x, mask %04x, excl %04x", |
| 903 | pirq_table->exclusive_irqs); | 902 | 'A' + pin, pirq, mask, pirq_table->exclusive_irqs); |
| 904 | mask &= pcibios_irq_mask; | 903 | mask &= pcibios_irq_mask; |
| 905 | 904 | ||
| 906 | /* Work around broken HP Pavilion Notebooks which assign USB to | 905 | /* Work around broken HP Pavilion Notebooks which assign USB to |
| @@ -930,10 +929,8 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign) | |||
| 930 | if (pci_probe & PCI_USE_PIRQ_MASK) | 929 | if (pci_probe & PCI_USE_PIRQ_MASK) |
| 931 | newirq = 0; | 930 | newirq = 0; |
| 932 | else | 931 | else |
| 933 | printk("\n" KERN_WARNING | 932 | dev_warn(&dev->dev, "IRQ %d doesn't match PIRQ mask " |
| 934 | "PCI: IRQ %i for device %s doesn't match PIRQ mask - try pci=usepirqmask\n" | 933 | "%#x; try pci=usepirqmask\n", newirq, mask); |
| 935 | KERN_DEBUG, newirq, | ||
| 936 | pci_name(dev)); | ||
| 937 | } | 934 | } |
| 938 | if (!newirq && assign) { | 935 | if (!newirq && assign) { |
| 939 | for (i = 0; i < 16; i++) { | 936 | for (i = 0; i < 16; i++) { |
| @@ -944,39 +941,35 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign) | |||
| 944 | newirq = i; | 941 | newirq = i; |
| 945 | } | 942 | } |
| 946 | } | 943 | } |
| 947 | DBG(" -> newirq=%d", newirq); | 944 | dev_dbg(&dev->dev, "PCI INT %c -> newirq %d", 'A' + pin, newirq); |
| 948 | 945 | ||
| 949 | /* Check if it is hardcoded */ | 946 | /* Check if it is hardcoded */ |
| 950 | if ((pirq & 0xf0) == 0xf0) { | 947 | if ((pirq & 0xf0) == 0xf0) { |
| 951 | irq = pirq & 0xf; | 948 | irq = pirq & 0xf; |
| 952 | DBG(" -> hardcoded IRQ %d\n", irq); | 949 | msg = "hardcoded"; |
| 953 | msg = "Hardcoded"; | ||
| 954 | } else if (r->get && (irq = r->get(pirq_router_dev, dev, pirq)) && \ | 950 | } else if (r->get && (irq = r->get(pirq_router_dev, dev, pirq)) && \ |
| 955 | ((!(pci_probe & PCI_USE_PIRQ_MASK)) || ((1 << irq) & mask))) { | 951 | ((!(pci_probe & PCI_USE_PIRQ_MASK)) || ((1 << irq) & mask))) { |
| 956 | DBG(" -> got IRQ %d\n", irq); | 952 | msg = "found"; |
| 957 | msg = "Found"; | ||
| 958 | eisa_set_level_irq(irq); | 953 | eisa_set_level_irq(irq); |
| 959 | } else if (newirq && r->set && | 954 | } else if (newirq && r->set && |
| 960 | (dev->class >> 8) != PCI_CLASS_DISPLAY_VGA) { | 955 | (dev->class >> 8) != PCI_CLASS_DISPLAY_VGA) { |
| 961 | DBG(" -> assigning IRQ %d", newirq); | ||
| 962 | if (r->set(pirq_router_dev, dev, pirq, newirq)) { | 956 | if (r->set(pirq_router_dev, dev, pirq, newirq)) { |
| 963 | eisa_set_level_irq(newirq); | 957 | eisa_set_level_irq(newirq); |
| 964 | DBG(" ... OK\n"); | 958 | msg = "assigned"; |
| 965 | msg = "Assigned"; | ||
| 966 | irq = newirq; | 959 | irq = newirq; |
| 967 | } | 960 | } |
| 968 | } | 961 | } |
| 969 | 962 | ||
| 970 | if (!irq) { | 963 | if (!irq) { |
| 971 | DBG(" ... failed\n"); | ||
| 972 | if (newirq && mask == (1 << newirq)) { | 964 | if (newirq && mask == (1 << newirq)) { |
| 973 | msg = "Guessed"; | 965 | msg = "guessed"; |
| 974 | irq = newirq; | 966 | irq = newirq; |
| 975 | } else | 967 | } else { |
| 968 | dev_dbg(&dev->dev, "can't route interrupt\n"); | ||
| 976 | return 0; | 969 | return 0; |
| 970 | } | ||
| 977 | } | 971 | } |
| 978 | printk(KERN_INFO "PCI: %s IRQ %d for device %s\n", msg, irq, | 972 | dev_info(&dev->dev, "%s PCI INT %c -> IRQ %d\n", msg, 'A' + pin, irq); |
| 979 | pci_name(dev)); | ||
| 980 | 973 | ||
| 981 | /* Update IRQ for all devices with the same pirq value */ | 974 | /* Update IRQ for all devices with the same pirq value */ |
| 982 | while ((dev2 = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev2)) != NULL) { | 975 | while ((dev2 = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev2)) != NULL) { |
| @@ -996,17 +989,17 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign) | |||
| 996 | (!(pci_probe & PCI_USE_PIRQ_MASK) || \ | 989 | (!(pci_probe & PCI_USE_PIRQ_MASK) || \ |
| 997 | ((1 << dev2->irq) & mask))) { | 990 | ((1 << dev2->irq) & mask))) { |
| 998 | #ifndef CONFIG_PCI_MSI | 991 | #ifndef CONFIG_PCI_MSI |
| 999 | printk(KERN_INFO "IRQ routing conflict for %s, have irq %d, want irq %d\n", | 992 | dev_info(&dev2->dev, "IRQ routing conflict: " |
| 1000 | pci_name(dev2), dev2->irq, irq); | 993 | "have IRQ %d, want IRQ %d\n", |
| 994 | dev2->irq, irq); | ||
| 1001 | #endif | 995 | #endif |
| 1002 | continue; | 996 | continue; |
| 1003 | } | 997 | } |
| 1004 | dev2->irq = irq; | 998 | dev2->irq = irq; |
| 1005 | pirq_penalty[irq]++; | 999 | pirq_penalty[irq]++; |
| 1006 | if (dev != dev2) | 1000 | if (dev != dev2) |
| 1007 | printk(KERN_INFO | 1001 | dev_info(&dev->dev, "sharing IRQ %d with %s\n", |
| 1008 | "PCI: Sharing IRQ %d with %s\n", | 1002 | irq, pci_name(dev2)); |
| 1009 | irq, pci_name(dev2)); | ||
| 1010 | } | 1003 | } |
| 1011 | } | 1004 | } |
| 1012 | return 1; | 1005 | return 1; |
| @@ -1025,8 +1018,7 @@ static void __init pcibios_fixup_irqs(void) | |||
| 1025 | * already in use. | 1018 | * already in use. |
| 1026 | */ | 1019 | */ |
| 1027 | if (dev->irq >= 16) { | 1020 | if (dev->irq >= 16) { |
| 1028 | DBG(KERN_DEBUG "%s: ignoring bogus IRQ %d\n", | 1021 | dev_dbg(&dev->dev, "ignoring bogus IRQ %d\n", dev->irq); |
| 1029 | pci_name(dev), dev->irq); | ||
| 1030 | dev->irq = 0; | 1022 | dev->irq = 0; |
| 1031 | } | 1023 | } |
| 1032 | /* | 1024 | /* |
| @@ -1070,12 +1062,12 @@ static void __init pcibios_fixup_irqs(void) | |||
| 1070 | irq = IO_APIC_get_PCI_irq_vector(bridge->bus->number, | 1062 | irq = IO_APIC_get_PCI_irq_vector(bridge->bus->number, |
| 1071 | PCI_SLOT(bridge->devfn), pin); | 1063 | PCI_SLOT(bridge->devfn), pin); |
| 1072 | if (irq >= 0) | 1064 | if (irq >= 0) |
| 1073 | printk(KERN_WARNING "PCI: using PPB %s[%c] to get irq %d\n", | 1065 | dev_warn(&dev->dev, "using bridge %s INT %c to get IRQ %d\n", |
| 1074 | pci_name(bridge), 'A' + pin, irq); | 1066 | pci_name(bridge), |
| 1067 | 'A' + pin, irq); | ||
| 1075 | } | 1068 | } |
| 1076 | if (irq >= 0) { | 1069 | if (irq >= 0) { |
| 1077 | printk(KERN_INFO "PCI->APIC IRQ transform: %s[%c] -> IRQ %d\n", | 1070 | dev_info(&dev->dev, "PCI->APIC IRQ transform: INT %c -> IRQ %d\n", 'A' + pin, irq); |
| 1078 | pci_name(dev), 'A' + pin, irq); | ||
| 1079 | dev->irq = irq; | 1071 | dev->irq = irq; |
| 1080 | } | 1072 | } |
| 1081 | } | 1073 | } |
| @@ -1231,25 +1223,24 @@ static int pirq_enable_irq(struct pci_dev *dev) | |||
| 1231 | irq = IO_APIC_get_PCI_irq_vector(bridge->bus->number, | 1223 | irq = IO_APIC_get_PCI_irq_vector(bridge->bus->number, |
| 1232 | PCI_SLOT(bridge->devfn), pin); | 1224 | PCI_SLOT(bridge->devfn), pin); |
| 1233 | if (irq >= 0) | 1225 | if (irq >= 0) |
| 1234 | printk(KERN_WARNING | 1226 | dev_warn(&dev->dev, "using bridge %s " |
| 1235 | "PCI: using PPB %s[%c] to get irq %d\n", | 1227 | "INT %c to get IRQ %d\n", |
| 1236 | pci_name(bridge), | 1228 | pci_name(bridge), 'A' + pin, |
| 1237 | 'A' + pin, irq); | 1229 | irq); |
| 1238 | dev = bridge; | 1230 | dev = bridge; |
| 1239 | } | 1231 | } |
| 1240 | dev = temp_dev; | 1232 | dev = temp_dev; |
| 1241 | if (irq >= 0) { | 1233 | if (irq >= 0) { |
| 1242 | printk(KERN_INFO | 1234 | dev_info(&dev->dev, "PCI->APIC IRQ transform: " |
| 1243 | "PCI->APIC IRQ transform: %s[%c] -> IRQ %d\n", | 1235 | "INT %c -> IRQ %d\n", 'A' + pin, irq); |
| 1244 | pci_name(dev), 'A' + pin, irq); | ||
| 1245 | dev->irq = irq; | 1236 | dev->irq = irq; |
| 1246 | return 0; | 1237 | return 0; |
| 1247 | } else | 1238 | } else |
| 1248 | msg = " Probably buggy MP table."; | 1239 | msg = "; probably buggy MP table"; |
| 1249 | } else if (pci_probe & PCI_BIOS_IRQ_SCAN) | 1240 | } else if (pci_probe & PCI_BIOS_IRQ_SCAN) |
| 1250 | msg = ""; | 1241 | msg = ""; |
| 1251 | else | 1242 | else |
| 1252 | msg = " Please try using pci=biosirq."; | 1243 | msg = "; please try using pci=biosirq"; |
| 1253 | 1244 | ||
| 1254 | /* | 1245 | /* |
| 1255 | * With IDE legacy devices the IRQ lookup failure is not | 1246 | * With IDE legacy devices the IRQ lookup failure is not |
| @@ -1259,9 +1250,8 @@ static int pirq_enable_irq(struct pci_dev *dev) | |||
| 1259 | !(dev->class & 0x5)) | 1250 | !(dev->class & 0x5)) |
| 1260 | return 0; | 1251 | return 0; |
| 1261 | 1252 | ||
| 1262 | printk(KERN_WARNING | 1253 | dev_warn(&dev->dev, "can't find IRQ for PCI INT %c%s\n", |
| 1263 | "PCI: No IRQ known for interrupt pin %c of device %s.%s\n", | 1254 | 'A' + pin, msg); |
| 1264 | 'A' + pin, pci_name(dev), msg); | ||
| 1265 | } | 1255 | } |
| 1266 | return 0; | 1256 | return 0; |
| 1267 | } | 1257 | } |
diff --git a/arch/x86/pci/numaq_32.c b/arch/x86/pci/numaq_32.c index f4b16dc11dad..1177845d3186 100644 --- a/arch/x86/pci/numaq_32.c +++ b/arch/x86/pci/numaq_32.c | |||
| @@ -131,13 +131,14 @@ static void __devinit pci_fixup_i450nx(struct pci_dev *d) | |||
| 131 | u8 busno, suba, subb; | 131 | u8 busno, suba, subb; |
| 132 | int quad = BUS2QUAD(d->bus->number); | 132 | int quad = BUS2QUAD(d->bus->number); |
| 133 | 133 | ||
| 134 | printk("PCI: Searching for i450NX host bridges on %s\n", pci_name(d)); | 134 | dev_info(&d->dev, "searching for i450NX host bridges\n"); |
| 135 | reg = 0xd0; | 135 | reg = 0xd0; |
| 136 | for(pxb=0; pxb<2; pxb++) { | 136 | for(pxb=0; pxb<2; pxb++) { |
| 137 | pci_read_config_byte(d, reg++, &busno); | 137 | pci_read_config_byte(d, reg++, &busno); |
| 138 | pci_read_config_byte(d, reg++, &suba); | 138 | pci_read_config_byte(d, reg++, &suba); |
| 139 | pci_read_config_byte(d, reg++, &subb); | 139 | pci_read_config_byte(d, reg++, &subb); |
| 140 | DBG("i450NX PXB %d: %02x/%02x/%02x\n", pxb, busno, suba, subb); | 140 | dev_dbg(&d->dev, "i450NX PXB %d: %02x/%02x/%02x\n", |
| 141 | pxb, busno, suba, subb); | ||
| 141 | if (busno) { | 142 | if (busno) { |
| 142 | /* Bus A */ | 143 | /* Bus A */ |
| 143 | pci_scan_bus_with_sysdata(QUADLOCAL2BUS(quad, busno)); | 144 | pci_scan_bus_with_sysdata(QUADLOCAL2BUS(quad, busno)); |
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index e693812ac59a..d8faf79a0a1d 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c | |||
| @@ -367,7 +367,7 @@ static void xen_send_IPI_mask(cpumask_t mask, enum ipi_vector vector) | |||
| 367 | 367 | ||
| 368 | cpus_and(mask, mask, cpu_online_map); | 368 | cpus_and(mask, mask, cpu_online_map); |
| 369 | 369 | ||
| 370 | for_each_cpu_mask(cpu, mask) | 370 | for_each_cpu_mask_nr(cpu, mask) |
| 371 | xen_send_IPI_one(cpu, vector); | 371 | xen_send_IPI_one(cpu, vector); |
| 372 | } | 372 | } |
| 373 | 373 | ||
| @@ -378,7 +378,7 @@ static void xen_smp_send_call_function_ipi(cpumask_t mask) | |||
| 378 | xen_send_IPI_mask(mask, XEN_CALL_FUNCTION_VECTOR); | 378 | xen_send_IPI_mask(mask, XEN_CALL_FUNCTION_VECTOR); |
| 379 | 379 | ||
| 380 | /* Make sure other vcpus get a chance to run if they need to. */ | 380 | /* Make sure other vcpus get a chance to run if they need to. */ |
| 381 | for_each_cpu_mask(cpu, mask) { | 381 | for_each_cpu_mask_nr(cpu, mask) { |
| 382 | if (xen_vcpu_stolen(cpu)) { | 382 | if (xen_vcpu_stolen(cpu)) { |
| 383 | HYPERVISOR_sched_op(SCHEDOP_yield, 0); | 383 | HYPERVISOR_sched_op(SCHEDOP_yield, 0); |
| 384 | break; | 384 | break; |
diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S index 4038cbfe3331..7f58304fafb3 100644 --- a/arch/x86/xen/xen-asm_64.S +++ b/arch/x86/xen/xen-asm_64.S | |||
| @@ -173,7 +173,7 @@ ENTRY(xen_sysexit) | |||
| 173 | pushq $__USER32_CS | 173 | pushq $__USER32_CS |
| 174 | pushq %rdx | 174 | pushq %rdx |
| 175 | 175 | ||
| 176 | pushq $VGCF_in_syscall | 176 | pushq $0 |
| 177 | 1: jmp hypercall_iret | 177 | 1: jmp hypercall_iret |
| 178 | ENDPATCH(xen_sysexit) | 178 | ENDPATCH(xen_sysexit) |
| 179 | RELOC(xen_sysexit, 1b+1) | 179 | RELOC(xen_sysexit, 1b+1) |
