diff options
author | Len Brown <len.brown@intel.com> | 2011-03-23 02:34:54 -0400 |
---|---|---|
committer | Len Brown <len.brown@intel.com> | 2011-03-23 02:34:54 -0400 |
commit | 02e2407858fd62053bf60349c0e72cd1c7a4a60e (patch) | |
tree | 0ebdbddc97d3abbc675916010e7771065b70c137 /arch/x86 | |
parent | 96e1c408ea8a556c5b51e0e7d56bd2afbfbf5fe9 (diff) | |
parent | 6447f55da90b77faec1697d499ed7986bb4f6de6 (diff) |
Merge branch 'linus' into release
Conflicts:
arch/x86/kernel/acpi/sleep.c
Signed-off-by: Len Brown <len.brown@intel.com>
Diffstat (limited to 'arch/x86')
227 files changed, 6249 insertions, 3981 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index d5ed94d30aad..d57ddd7573cc 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -64,8 +64,12 @@ config X86 | |||
64 | select HAVE_TEXT_POKE_SMP | 64 | select HAVE_TEXT_POKE_SMP |
65 | select HAVE_GENERIC_HARDIRQS | 65 | select HAVE_GENERIC_HARDIRQS |
66 | select HAVE_SPARSE_IRQ | 66 | select HAVE_SPARSE_IRQ |
67 | select GENERIC_FIND_FIRST_BIT | ||
68 | select GENERIC_FIND_NEXT_BIT | ||
67 | select GENERIC_IRQ_PROBE | 69 | select GENERIC_IRQ_PROBE |
68 | select GENERIC_PENDING_IRQ if SMP | 70 | select GENERIC_PENDING_IRQ if SMP |
71 | select GENERIC_IRQ_SHOW | ||
72 | select IRQ_FORCED_THREADING | ||
69 | select USE_GENERIC_SMP_HELPERS if SMP | 73 | select USE_GENERIC_SMP_HELPERS if SMP |
70 | 74 | ||
71 | config INSTRUCTION_DECODER | 75 | config INSTRUCTION_DECODER |
@@ -119,7 +123,7 @@ config NEED_SG_DMA_LENGTH | |||
119 | def_bool y | 123 | def_bool y |
120 | 124 | ||
121 | config GENERIC_ISA_DMA | 125 | config GENERIC_ISA_DMA |
122 | def_bool y | 126 | def_bool ISA_DMA_API |
123 | 127 | ||
124 | config GENERIC_IOMAP | 128 | config GENERIC_IOMAP |
125 | def_bool y | 129 | def_bool y |
@@ -139,7 +143,7 @@ config GENERIC_GPIO | |||
139 | bool | 143 | bool |
140 | 144 | ||
141 | config ARCH_MAY_HAVE_PC_FDC | 145 | config ARCH_MAY_HAVE_PC_FDC |
142 | def_bool y | 146 | def_bool ISA_DMA_API |
143 | 147 | ||
144 | config RWSEM_GENERIC_SPINLOCK | 148 | config RWSEM_GENERIC_SPINLOCK |
145 | def_bool !X86_XADD | 149 | def_bool !X86_XADD |
@@ -217,10 +221,6 @@ config X86_HT | |||
217 | def_bool y | 221 | def_bool y |
218 | depends on SMP | 222 | depends on SMP |
219 | 223 | ||
220 | config X86_TRAMPOLINE | ||
221 | def_bool y | ||
222 | depends on SMP || (64BIT && ACPI_SLEEP) | ||
223 | |||
224 | config X86_32_LAZY_GS | 224 | config X86_32_LAZY_GS |
225 | def_bool y | 225 | def_bool y |
226 | depends on X86_32 && !CC_STACKPROTECTOR | 226 | depends on X86_32 && !CC_STACKPROTECTOR |
@@ -382,6 +382,8 @@ config X86_INTEL_CE | |||
382 | depends on X86_32 | 382 | depends on X86_32 |
383 | depends on X86_EXTENDED_PLATFORM | 383 | depends on X86_EXTENDED_PLATFORM |
384 | select X86_REBOOTFIXUPS | 384 | select X86_REBOOTFIXUPS |
385 | select OF | ||
386 | select OF_EARLY_FLATTREE | ||
385 | ---help--- | 387 | ---help--- |
386 | Select for the Intel CE media processor (CE4100) SOC. | 388 | Select for the Intel CE media processor (CE4100) SOC. |
387 | This option compiles in support for the CE4100 SOC for settop | 389 | This option compiles in support for the CE4100 SOC for settop |
@@ -811,7 +813,7 @@ config X86_LOCAL_APIC | |||
811 | 813 | ||
812 | config X86_IO_APIC | 814 | config X86_IO_APIC |
813 | def_bool y | 815 | def_bool y |
814 | depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_APIC | 816 | depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_IOAPIC |
815 | 817 | ||
816 | config X86_VISWS_APIC | 818 | config X86_VISWS_APIC |
817 | def_bool y | 819 | def_bool y |
@@ -1705,7 +1707,7 @@ config HAVE_ARCH_EARLY_PFN_TO_NID | |||
1705 | depends on NUMA | 1707 | depends on NUMA |
1706 | 1708 | ||
1707 | config USE_PERCPU_NUMA_NODE_ID | 1709 | config USE_PERCPU_NUMA_NODE_ID |
1708 | def_bool X86_64 | 1710 | def_bool y |
1709 | depends on NUMA | 1711 | depends on NUMA |
1710 | 1712 | ||
1711 | menu "Power management and ACPI options" | 1713 | menu "Power management and ACPI options" |
@@ -2000,9 +2002,13 @@ source "drivers/pci/pcie/Kconfig" | |||
2000 | 2002 | ||
2001 | source "drivers/pci/Kconfig" | 2003 | source "drivers/pci/Kconfig" |
2002 | 2004 | ||
2003 | # x86_64 have no ISA slots, but do have ISA-style DMA. | 2005 | # x86_64 have no ISA slots, but can have ISA-style DMA. |
2004 | config ISA_DMA_API | 2006 | config ISA_DMA_API |
2005 | def_bool y | 2007 | bool "ISA-style DMA support" if (X86_64 && EXPERT) |
2008 | default y | ||
2009 | help | ||
2010 | Enables ISA-style DMA support for devices requiring such controllers. | ||
2011 | If unsure, say Y. | ||
2006 | 2012 | ||
2007 | if X86_32 | 2013 | if X86_32 |
2008 | 2014 | ||
@@ -2066,9 +2072,10 @@ config SCx200HR_TIMER | |||
2066 | 2072 | ||
2067 | config OLPC | 2073 | config OLPC |
2068 | bool "One Laptop Per Child support" | 2074 | bool "One Laptop Per Child support" |
2075 | depends on !X86_PAE | ||
2069 | select GPIOLIB | 2076 | select GPIOLIB |
2070 | select OLPC_OPENFIRMWARE | 2077 | select OF |
2071 | depends on !X86_64 && !X86_PAE | 2078 | select OF_PROMTREE if PROC_DEVICETREE |
2072 | ---help--- | 2079 | ---help--- |
2073 | Add support for detecting the unique features of the OLPC | 2080 | Add support for detecting the unique features of the OLPC |
2074 | XO hardware. | 2081 | XO hardware. |
@@ -2079,21 +2086,6 @@ config OLPC_XO1 | |||
2079 | ---help--- | 2086 | ---help--- |
2080 | Add support for non-essential features of the OLPC XO-1 laptop. | 2087 | Add support for non-essential features of the OLPC XO-1 laptop. |
2081 | 2088 | ||
2082 | config OLPC_OPENFIRMWARE | ||
2083 | bool "Support for OLPC's Open Firmware" | ||
2084 | depends on !X86_64 && !X86_PAE | ||
2085 | default n | ||
2086 | select OF | ||
2087 | help | ||
2088 | This option adds support for the implementation of Open Firmware | ||
2089 | that is used on the OLPC XO-1 Children's Machine. | ||
2090 | If unsure, say N here. | ||
2091 | |||
2092 | config OLPC_OPENFIRMWARE_DT | ||
2093 | bool | ||
2094 | default y if OLPC_OPENFIRMWARE && PROC_DEVICETREE | ||
2095 | select OF_PROMTREE | ||
2096 | |||
2097 | endif # X86_32 | 2089 | endif # X86_32 |
2098 | 2090 | ||
2099 | config AMD_NB | 2091 | config AMD_NB |
@@ -2138,6 +2130,11 @@ config SYSVIPC_COMPAT | |||
2138 | def_bool y | 2130 | def_bool y |
2139 | depends on COMPAT && SYSVIPC | 2131 | depends on COMPAT && SYSVIPC |
2140 | 2132 | ||
2133 | config KEYS_COMPAT | ||
2134 | bool | ||
2135 | depends on COMPAT && KEYS | ||
2136 | default y | ||
2137 | |||
2141 | endmenu | 2138 | endmenu |
2142 | 2139 | ||
2143 | 2140 | ||
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index 283c5a6a03a6..d161e939df62 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu | |||
@@ -294,11 +294,6 @@ config X86_GENERIC | |||
294 | 294 | ||
295 | endif | 295 | endif |
296 | 296 | ||
297 | config X86_CPU | ||
298 | def_bool y | ||
299 | select GENERIC_FIND_FIRST_BIT | ||
300 | select GENERIC_FIND_NEXT_BIT | ||
301 | |||
302 | # | 297 | # |
303 | # Define implied options from the CPU selection here | 298 | # Define implied options from the CPU selection here |
304 | config X86_INTERNODE_CACHE_SHIFT | 299 | config X86_INTERNODE_CACHE_SHIFT |
@@ -331,7 +326,7 @@ config X86_PPRO_FENCE | |||
331 | Old PentiumPro multiprocessor systems had errata that could cause | 326 | Old PentiumPro multiprocessor systems had errata that could cause |
332 | memory operations to violate the x86 ordering standard in rare cases. | 327 | memory operations to violate the x86 ordering standard in rare cases. |
333 | Enabling this option will attempt to work around some (but not all) | 328 | Enabling this option will attempt to work around some (but not all) |
334 | occurances of this problem, at the cost of much heavier spinlock and | 329 | occurrences of this problem, at the cost of much heavier spinlock and |
335 | memory barrier operations. | 330 | memory barrier operations. |
336 | 331 | ||
337 | If unsure, say n here. Even distro kernels should think twice before | 332 | If unsure, say n here. Even distro kernels should think twice before |
@@ -371,7 +366,7 @@ config X86_INTEL_USERCOPY | |||
371 | 366 | ||
372 | config X86_USE_PPRO_CHECKSUM | 367 | config X86_USE_PPRO_CHECKSUM |
373 | def_bool y | 368 | def_bool y |
374 | depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 || MEFFICEON || MGEODE_LX || MCORE2 || MATOM | 369 | depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 || MVIAC7 || MEFFICEON || MGEODE_LX || MCORE2 || MATOM |
375 | 370 | ||
376 | config X86_USE_3DNOW | 371 | config X86_USE_3DNOW |
377 | def_bool y | 372 | def_bool y |
diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S index 8fe2a4966b7a..adcf794b22e2 100644 --- a/arch/x86/crypto/aesni-intel_asm.S +++ b/arch/x86/crypto/aesni-intel_asm.S | |||
@@ -1346,7 +1346,7 @@ _zero_cipher_left_decrypt: | |||
1346 | and $15, %r13 # %r13 = arg4 (mod 16) | 1346 | and $15, %r13 # %r13 = arg4 (mod 16) |
1347 | je _multiple_of_16_bytes_decrypt | 1347 | je _multiple_of_16_bytes_decrypt |
1348 | 1348 | ||
1349 | # Handle the last <16 byte block seperately | 1349 | # Handle the last <16 byte block separately |
1350 | 1350 | ||
1351 | paddd ONE(%rip), %xmm0 # increment CNT to get Yn | 1351 | paddd ONE(%rip), %xmm0 # increment CNT to get Yn |
1352 | movdqa SHUF_MASK(%rip), %xmm10 | 1352 | movdqa SHUF_MASK(%rip), %xmm10 |
@@ -1355,7 +1355,7 @@ _zero_cipher_left_decrypt: | |||
1355 | ENCRYPT_SINGLE_BLOCK %xmm0, %xmm1 # E(K, Yn) | 1355 | ENCRYPT_SINGLE_BLOCK %xmm0, %xmm1 # E(K, Yn) |
1356 | sub $16, %r11 | 1356 | sub $16, %r11 |
1357 | add %r13, %r11 | 1357 | add %r13, %r11 |
1358 | movdqu (%arg3,%r11,1), %xmm1 # recieve the last <16 byte block | 1358 | movdqu (%arg3,%r11,1), %xmm1 # receive the last <16 byte block |
1359 | lea SHIFT_MASK+16(%rip), %r12 | 1359 | lea SHIFT_MASK+16(%rip), %r12 |
1360 | sub %r13, %r12 | 1360 | sub %r13, %r12 |
1361 | # adjust the shuffle mask pointer to be able to shift 16-%r13 bytes | 1361 | # adjust the shuffle mask pointer to be able to shift 16-%r13 bytes |
@@ -1607,7 +1607,7 @@ _zero_cipher_left_encrypt: | |||
1607 | and $15, %r13 # %r13 = arg4 (mod 16) | 1607 | and $15, %r13 # %r13 = arg4 (mod 16) |
1608 | je _multiple_of_16_bytes_encrypt | 1608 | je _multiple_of_16_bytes_encrypt |
1609 | 1609 | ||
1610 | # Handle the last <16 Byte block seperately | 1610 | # Handle the last <16 Byte block separately |
1611 | paddd ONE(%rip), %xmm0 # INCR CNT to get Yn | 1611 | paddd ONE(%rip), %xmm0 # INCR CNT to get Yn |
1612 | movdqa SHUF_MASK(%rip), %xmm10 | 1612 | movdqa SHUF_MASK(%rip), %xmm10 |
1613 | PSHUFB_XMM %xmm10, %xmm0 | 1613 | PSHUFB_XMM %xmm10, %xmm0 |
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index e1e60c7d5813..e0e6340c8dad 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c | |||
@@ -873,22 +873,18 @@ rfc4106_set_hash_subkey(u8 *hash_subkey, const u8 *key, unsigned int key_len) | |||
873 | crypto_ablkcipher_clear_flags(ctr_tfm, ~0); | 873 | crypto_ablkcipher_clear_flags(ctr_tfm, ~0); |
874 | 874 | ||
875 | ret = crypto_ablkcipher_setkey(ctr_tfm, key, key_len); | 875 | ret = crypto_ablkcipher_setkey(ctr_tfm, key, key_len); |
876 | if (ret) { | 876 | if (ret) |
877 | crypto_free_ablkcipher(ctr_tfm); | 877 | goto out_free_ablkcipher; |
878 | return ret; | ||
879 | } | ||
880 | 878 | ||
879 | ret = -ENOMEM; | ||
881 | req = ablkcipher_request_alloc(ctr_tfm, GFP_KERNEL); | 880 | req = ablkcipher_request_alloc(ctr_tfm, GFP_KERNEL); |
882 | if (!req) { | 881 | if (!req) |
883 | crypto_free_ablkcipher(ctr_tfm); | 882 | goto out_free_ablkcipher; |
884 | return -EINVAL; | ||
885 | } | ||
886 | 883 | ||
887 | req_data = kmalloc(sizeof(*req_data), GFP_KERNEL); | 884 | req_data = kmalloc(sizeof(*req_data), GFP_KERNEL); |
888 | if (!req_data) { | 885 | if (!req_data) |
889 | crypto_free_ablkcipher(ctr_tfm); | 886 | goto out_free_request; |
890 | return -ENOMEM; | 887 | |
891 | } | ||
892 | memset(req_data->iv, 0, sizeof(req_data->iv)); | 888 | memset(req_data->iv, 0, sizeof(req_data->iv)); |
893 | 889 | ||
894 | /* Clear the data in the hash sub key container to zero.*/ | 890 | /* Clear the data in the hash sub key container to zero.*/ |
@@ -913,8 +909,10 @@ rfc4106_set_hash_subkey(u8 *hash_subkey, const u8 *key, unsigned int key_len) | |||
913 | if (!ret) | 909 | if (!ret) |
914 | ret = req_data->result.err; | 910 | ret = req_data->result.err; |
915 | } | 911 | } |
916 | ablkcipher_request_free(req); | ||
917 | kfree(req_data); | 912 | kfree(req_data); |
913 | out_free_request: | ||
914 | ablkcipher_request_free(req); | ||
915 | out_free_ablkcipher: | ||
918 | crypto_free_ablkcipher(ctr_tfm); | 916 | crypto_free_ablkcipher(ctr_tfm); |
919 | return ret; | 917 | return ret; |
920 | } | 918 | } |
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index 518bb99c3394..849a9d23c71d 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S | |||
@@ -25,6 +25,8 @@ | |||
25 | #define sysretl_audit ia32_ret_from_sys_call | 25 | #define sysretl_audit ia32_ret_from_sys_call |
26 | #endif | 26 | #endif |
27 | 27 | ||
28 | .section .entry.text, "ax" | ||
29 | |||
28 | #define IA32_NR_syscalls ((ia32_syscall_end - ia32_sys_call_table)/8) | 30 | #define IA32_NR_syscalls ((ia32_syscall_end - ia32_sys_call_table)/8) |
29 | 31 | ||
30 | .macro IA32_ARG_FIXUP noebp=0 | 32 | .macro IA32_ARG_FIXUP noebp=0 |
@@ -126,26 +128,20 @@ ENTRY(ia32_sysenter_target) | |||
126 | */ | 128 | */ |
127 | ENABLE_INTERRUPTS(CLBR_NONE) | 129 | ENABLE_INTERRUPTS(CLBR_NONE) |
128 | movl %ebp,%ebp /* zero extension */ | 130 | movl %ebp,%ebp /* zero extension */ |
129 | pushq $__USER32_DS | 131 | pushq_cfi $__USER32_DS |
130 | CFI_ADJUST_CFA_OFFSET 8 | ||
131 | /*CFI_REL_OFFSET ss,0*/ | 132 | /*CFI_REL_OFFSET ss,0*/ |
132 | pushq %rbp | 133 | pushq_cfi %rbp |
133 | CFI_ADJUST_CFA_OFFSET 8 | ||
134 | CFI_REL_OFFSET rsp,0 | 134 | CFI_REL_OFFSET rsp,0 |
135 | pushfq | 135 | pushfq_cfi |
136 | CFI_ADJUST_CFA_OFFSET 8 | ||
137 | /*CFI_REL_OFFSET rflags,0*/ | 136 | /*CFI_REL_OFFSET rflags,0*/ |
138 | movl 8*3-THREAD_SIZE+TI_sysenter_return(%rsp), %r10d | 137 | movl 8*3-THREAD_SIZE+TI_sysenter_return(%rsp), %r10d |
139 | CFI_REGISTER rip,r10 | 138 | CFI_REGISTER rip,r10 |
140 | pushq $__USER32_CS | 139 | pushq_cfi $__USER32_CS |
141 | CFI_ADJUST_CFA_OFFSET 8 | ||
142 | /*CFI_REL_OFFSET cs,0*/ | 140 | /*CFI_REL_OFFSET cs,0*/ |
143 | movl %eax, %eax | 141 | movl %eax, %eax |
144 | pushq %r10 | 142 | pushq_cfi %r10 |
145 | CFI_ADJUST_CFA_OFFSET 8 | ||
146 | CFI_REL_OFFSET rip,0 | 143 | CFI_REL_OFFSET rip,0 |
147 | pushq %rax | 144 | pushq_cfi %rax |
148 | CFI_ADJUST_CFA_OFFSET 8 | ||
149 | cld | 145 | cld |
150 | SAVE_ARGS 0,0,1 | 146 | SAVE_ARGS 0,0,1 |
151 | /* no need to do an access_ok check here because rbp has been | 147 | /* no need to do an access_ok check here because rbp has been |
@@ -182,11 +178,9 @@ sysexit_from_sys_call: | |||
182 | xorq %r9,%r9 | 178 | xorq %r9,%r9 |
183 | xorq %r10,%r10 | 179 | xorq %r10,%r10 |
184 | xorq %r11,%r11 | 180 | xorq %r11,%r11 |
185 | popfq | 181 | popfq_cfi |
186 | CFI_ADJUST_CFA_OFFSET -8 | ||
187 | /*CFI_RESTORE rflags*/ | 182 | /*CFI_RESTORE rflags*/ |
188 | popq %rcx /* User %esp */ | 183 | popq_cfi %rcx /* User %esp */ |
189 | CFI_ADJUST_CFA_OFFSET -8 | ||
190 | CFI_REGISTER rsp,rcx | 184 | CFI_REGISTER rsp,rcx |
191 | TRACE_IRQS_ON | 185 | TRACE_IRQS_ON |
192 | ENABLE_INTERRUPTS_SYSEXIT32 | 186 | ENABLE_INTERRUPTS_SYSEXIT32 |
@@ -421,8 +415,7 @@ ENTRY(ia32_syscall) | |||
421 | */ | 415 | */ |
422 | ENABLE_INTERRUPTS(CLBR_NONE) | 416 | ENABLE_INTERRUPTS(CLBR_NONE) |
423 | movl %eax,%eax | 417 | movl %eax,%eax |
424 | pushq %rax | 418 | pushq_cfi %rax |
425 | CFI_ADJUST_CFA_OFFSET 8 | ||
426 | cld | 419 | cld |
427 | /* note the registers are not zero extended to the sf. | 420 | /* note the registers are not zero extended to the sf. |
428 | this could be a problem. */ | 421 | this could be a problem. */ |
@@ -851,4 +844,8 @@ ia32_sys_call_table: | |||
851 | .quad sys_fanotify_init | 844 | .quad sys_fanotify_init |
852 | .quad sys32_fanotify_mark | 845 | .quad sys32_fanotify_mark |
853 | .quad sys_prlimit64 /* 340 */ | 846 | .quad sys_prlimit64 /* 340 */ |
847 | .quad sys_name_to_handle_at | ||
848 | .quad compat_sys_open_by_handle_at | ||
849 | .quad compat_sys_clock_adjtime | ||
850 | .quad sys_syncfs | ||
854 | ia32_syscall_end: | 851 | ia32_syscall_end: |
diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h index ef14da1f4ec5..12e0e7dd869c 100644 --- a/arch/x86/include/asm/acpi.h +++ b/arch/x86/include/asm/acpi.h | |||
@@ -29,6 +29,7 @@ | |||
29 | #include <asm/processor.h> | 29 | #include <asm/processor.h> |
30 | #include <asm/mmu.h> | 30 | #include <asm/mmu.h> |
31 | #include <asm/mpspec.h> | 31 | #include <asm/mpspec.h> |
32 | #include <asm/trampoline.h> | ||
32 | 33 | ||
33 | #define COMPILER_DEPENDENT_INT64 long long | 34 | #define COMPILER_DEPENDENT_INT64 long long |
34 | #define COMPILER_DEPENDENT_UINT64 unsigned long long | 35 | #define COMPILER_DEPENDENT_UINT64 unsigned long long |
@@ -116,7 +117,8 @@ static inline void acpi_disable_pci(void) | |||
116 | /* Low-level suspend routine. */ | 117 | /* Low-level suspend routine. */ |
117 | extern int acpi_suspend_lowlevel(void); | 118 | extern int acpi_suspend_lowlevel(void); |
118 | 119 | ||
119 | extern unsigned long acpi_wakeup_address; | 120 | extern const unsigned char acpi_wakeup_code[]; |
121 | #define acpi_wakeup_address (__pa(TRAMPOLINE_SYM(acpi_wakeup_code))) | ||
120 | 122 | ||
121 | /* early initialization routine */ | 123 | /* early initialization routine */ |
122 | extern void acpi_reserve_wakeup_memory(void); | 124 | extern void acpi_reserve_wakeup_memory(void); |
@@ -185,15 +187,7 @@ struct bootnode; | |||
185 | 187 | ||
186 | #ifdef CONFIG_ACPI_NUMA | 188 | #ifdef CONFIG_ACPI_NUMA |
187 | extern int acpi_numa; | 189 | extern int acpi_numa; |
188 | extern void acpi_get_nodes(struct bootnode *physnodes, unsigned long start, | 190 | extern int x86_acpi_numa_init(void); |
189 | unsigned long end); | ||
190 | extern int acpi_scan_nodes(unsigned long start, unsigned long end); | ||
191 | #define NR_NODE_MEMBLKS (MAX_NUMNODES*2) | ||
192 | |||
193 | #ifdef CONFIG_NUMA_EMU | ||
194 | extern void acpi_fake_nodes(const struct bootnode *fake_nodes, | ||
195 | int num_nodes); | ||
196 | #endif | ||
197 | #endif /* CONFIG_ACPI_NUMA */ | 191 | #endif /* CONFIG_ACPI_NUMA */ |
198 | 192 | ||
199 | #define acpi_unlazy_tlb(x) leave_mm(x) | 193 | #define acpi_unlazy_tlb(x) leave_mm(x) |
diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h index 64dc82ee19f0..331682231bb4 100644 --- a/arch/x86/include/asm/amd_nb.h +++ b/arch/x86/include/asm/amd_nb.h | |||
@@ -9,23 +9,20 @@ struct amd_nb_bus_dev_range { | |||
9 | u8 dev_limit; | 9 | u8 dev_limit; |
10 | }; | 10 | }; |
11 | 11 | ||
12 | extern struct pci_device_id amd_nb_misc_ids[]; | 12 | extern const struct pci_device_id amd_nb_misc_ids[]; |
13 | extern const struct amd_nb_bus_dev_range amd_nb_bus_dev_ranges[]; | 13 | extern const struct amd_nb_bus_dev_range amd_nb_bus_dev_ranges[]; |
14 | struct bootnode; | 14 | struct bootnode; |
15 | 15 | ||
16 | extern int early_is_amd_nb(u32 value); | 16 | extern bool early_is_amd_nb(u32 value); |
17 | extern int amd_cache_northbridges(void); | 17 | extern int amd_cache_northbridges(void); |
18 | extern void amd_flush_garts(void); | 18 | extern void amd_flush_garts(void); |
19 | extern int amd_numa_init(unsigned long start_pfn, unsigned long end_pfn); | 19 | extern int amd_numa_init(void); |
20 | extern int amd_scan_nodes(void); | 20 | extern int amd_get_subcaches(int); |
21 | 21 | extern int amd_set_subcaches(int, int); | |
22 | #ifdef CONFIG_NUMA_EMU | ||
23 | extern void amd_fake_nodes(const struct bootnode *nodes, int nr_nodes); | ||
24 | extern void amd_get_nodes(struct bootnode *nodes); | ||
25 | #endif | ||
26 | 22 | ||
27 | struct amd_northbridge { | 23 | struct amd_northbridge { |
28 | struct pci_dev *misc; | 24 | struct pci_dev *misc; |
25 | struct pci_dev *link; | ||
29 | }; | 26 | }; |
30 | 27 | ||
31 | struct amd_northbridge_info { | 28 | struct amd_northbridge_info { |
@@ -35,17 +32,18 @@ struct amd_northbridge_info { | |||
35 | }; | 32 | }; |
36 | extern struct amd_northbridge_info amd_northbridges; | 33 | extern struct amd_northbridge_info amd_northbridges; |
37 | 34 | ||
38 | #define AMD_NB_GART 0x1 | 35 | #define AMD_NB_GART BIT(0) |
39 | #define AMD_NB_L3_INDEX_DISABLE 0x2 | 36 | #define AMD_NB_L3_INDEX_DISABLE BIT(1) |
37 | #define AMD_NB_L3_PARTITIONING BIT(2) | ||
40 | 38 | ||
41 | #ifdef CONFIG_AMD_NB | 39 | #ifdef CONFIG_AMD_NB |
42 | 40 | ||
43 | static inline int amd_nb_num(void) | 41 | static inline u16 amd_nb_num(void) |
44 | { | 42 | { |
45 | return amd_northbridges.num; | 43 | return amd_northbridges.num; |
46 | } | 44 | } |
47 | 45 | ||
48 | static inline int amd_nb_has_feature(int feature) | 46 | static inline bool amd_nb_has_feature(unsigned feature) |
49 | { | 47 | { |
50 | return ((amd_northbridges.flags & feature) == feature); | 48 | return ((amd_northbridges.flags & feature) == feature); |
51 | } | 49 | } |
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 3c896946f4cc..a279d98ea95e 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h | |||
@@ -220,7 +220,6 @@ extern void enable_IR_x2apic(void); | |||
220 | 220 | ||
221 | extern int get_physical_broadcast(void); | 221 | extern int get_physical_broadcast(void); |
222 | 222 | ||
223 | extern void apic_disable(void); | ||
224 | extern int lapic_get_maxlvt(void); | 223 | extern int lapic_get_maxlvt(void); |
225 | extern void clear_local_APIC(void); | 224 | extern void clear_local_APIC(void); |
226 | extern void connect_bsp_APIC(void); | 225 | extern void connect_bsp_APIC(void); |
@@ -228,7 +227,6 @@ extern void disconnect_bsp_APIC(int virt_wire_setup); | |||
228 | extern void disable_local_APIC(void); | 227 | extern void disable_local_APIC(void); |
229 | extern void lapic_shutdown(void); | 228 | extern void lapic_shutdown(void); |
230 | extern int verify_local_APIC(void); | 229 | extern int verify_local_APIC(void); |
231 | extern void cache_APIC_registers(void); | ||
232 | extern void sync_Arb_IDs(void); | 230 | extern void sync_Arb_IDs(void); |
233 | extern void init_bsp_APIC(void); | 231 | extern void init_bsp_APIC(void); |
234 | extern void setup_local_APIC(void); | 232 | extern void setup_local_APIC(void); |
@@ -239,8 +237,7 @@ void register_lapic_address(unsigned long address); | |||
239 | extern void setup_boot_APIC_clock(void); | 237 | extern void setup_boot_APIC_clock(void); |
240 | extern void setup_secondary_APIC_clock(void); | 238 | extern void setup_secondary_APIC_clock(void); |
241 | extern int APIC_init_uniprocessor(void); | 239 | extern int APIC_init_uniprocessor(void); |
242 | extern void enable_NMI_through_LVT0(void); | 240 | extern int apic_force_enable(unsigned long addr); |
243 | extern int apic_force_enable(void); | ||
244 | 241 | ||
245 | /* | 242 | /* |
246 | * On 32bit this is mach-xxx local | 243 | * On 32bit this is mach-xxx local |
@@ -261,7 +258,6 @@ static inline void lapic_shutdown(void) { } | |||
261 | #define local_apic_timer_c2_ok 1 | 258 | #define local_apic_timer_c2_ok 1 |
262 | static inline void init_apic_mappings(void) { } | 259 | static inline void init_apic_mappings(void) { } |
263 | static inline void disable_local_APIC(void) { } | 260 | static inline void disable_local_APIC(void) { } |
264 | static inline void apic_disable(void) { } | ||
265 | # define setup_boot_APIC_clock x86_init_noop | 261 | # define setup_boot_APIC_clock x86_init_noop |
266 | # define setup_secondary_APIC_clock x86_init_noop | 262 | # define setup_secondary_APIC_clock x86_init_noop |
267 | #endif /* !CONFIG_X86_LOCAL_APIC */ | 263 | #endif /* !CONFIG_X86_LOCAL_APIC */ |
@@ -307,8 +303,6 @@ struct apic { | |||
307 | 303 | ||
308 | void (*setup_apic_routing)(void); | 304 | void (*setup_apic_routing)(void); |
309 | int (*multi_timer_check)(int apic, int irq); | 305 | int (*multi_timer_check)(int apic, int irq); |
310 | int (*apicid_to_node)(int logical_apicid); | ||
311 | int (*cpu_to_logical_apicid)(int cpu); | ||
312 | int (*cpu_present_to_apicid)(int mps_cpu); | 306 | int (*cpu_present_to_apicid)(int mps_cpu); |
313 | void (*apicid_to_cpu_present)(int phys_apicid, physid_mask_t *retmap); | 307 | void (*apicid_to_cpu_present)(int phys_apicid, physid_mask_t *retmap); |
314 | void (*setup_portio_remap)(void); | 308 | void (*setup_portio_remap)(void); |
@@ -356,6 +350,23 @@ struct apic { | |||
356 | void (*icr_write)(u32 low, u32 high); | 350 | void (*icr_write)(u32 low, u32 high); |
357 | void (*wait_icr_idle)(void); | 351 | void (*wait_icr_idle)(void); |
358 | u32 (*safe_wait_icr_idle)(void); | 352 | u32 (*safe_wait_icr_idle)(void); |
353 | |||
354 | #ifdef CONFIG_X86_32 | ||
355 | /* | ||
356 | * Called very early during boot from get_smp_config(). It should | ||
357 | * return the logical apicid. x86_[bios]_cpu_to_apicid is | ||
358 | * initialized before this function is called. | ||
359 | * | ||
360 | * If logical apicid can't be determined that early, the function | ||
361 | * may return BAD_APICID. Logical apicid will be configured after | ||
362 | * init_apic_ldr() while bringing up CPUs. Note that NUMA affinity | ||
363 | * won't be applied properly during early boot in this case. | ||
364 | */ | ||
365 | int (*x86_32_early_logical_apicid)(int cpu); | ||
366 | |||
367 | /* determine CPU -> NUMA node mapping */ | ||
368 | int (*x86_32_numa_cpu_node)(int cpu); | ||
369 | #endif | ||
359 | }; | 370 | }; |
360 | 371 | ||
361 | /* | 372 | /* |
@@ -503,6 +514,11 @@ extern struct apic apic_noop; | |||
503 | 514 | ||
504 | extern struct apic apic_default; | 515 | extern struct apic apic_default; |
505 | 516 | ||
517 | static inline int noop_x86_32_early_logical_apicid(int cpu) | ||
518 | { | ||
519 | return BAD_APICID; | ||
520 | } | ||
521 | |||
506 | /* | 522 | /* |
507 | * Set up the logical destination ID. | 523 | * Set up the logical destination ID. |
508 | * | 524 | * |
@@ -522,7 +538,7 @@ static inline int default_phys_pkg_id(int cpuid_apic, int index_msb) | |||
522 | return cpuid_apic >> index_msb; | 538 | return cpuid_apic >> index_msb; |
523 | } | 539 | } |
524 | 540 | ||
525 | extern int default_apicid_to_node(int logical_apicid); | 541 | extern int default_x86_32_numa_cpu_node(int cpu); |
526 | 542 | ||
527 | #endif | 543 | #endif |
528 | 544 | ||
@@ -558,12 +574,6 @@ static inline void default_ioapic_phys_id_map(physid_mask_t *phys_map, physid_ma | |||
558 | *retmap = *phys_map; | 574 | *retmap = *phys_map; |
559 | } | 575 | } |
560 | 576 | ||
561 | /* Mapping from cpu number to logical apicid */ | ||
562 | static inline int default_cpu_to_logical_apicid(int cpu) | ||
563 | { | ||
564 | return 1 << cpu; | ||
565 | } | ||
566 | |||
567 | static inline int __default_cpu_present_to_apicid(int mps_cpu) | 577 | static inline int __default_cpu_present_to_apicid(int mps_cpu) |
568 | { | 578 | { |
569 | if (mps_cpu < nr_cpu_ids && cpu_present(mps_cpu)) | 579 | if (mps_cpu < nr_cpu_ids && cpu_present(mps_cpu)) |
@@ -596,8 +606,4 @@ extern int default_check_phys_apicid_present(int phys_apicid); | |||
596 | 606 | ||
597 | #endif /* CONFIG_X86_LOCAL_APIC */ | 607 | #endif /* CONFIG_X86_LOCAL_APIC */ |
598 | 608 | ||
599 | #ifdef CONFIG_X86_32 | ||
600 | extern u8 cpu_2_logical_apicid[NR_CPUS]; | ||
601 | #endif | ||
602 | |||
603 | #endif /* _ASM_X86_APIC_H */ | 609 | #endif /* _ASM_X86_APIC_H */ |
diff --git a/arch/x86/include/asm/apicdef.h b/arch/x86/include/asm/apicdef.h index 47a30ff8e517..d87988bacf3e 100644 --- a/arch/x86/include/asm/apicdef.h +++ b/arch/x86/include/asm/apicdef.h | |||
@@ -426,4 +426,16 @@ struct local_apic { | |||
426 | #else | 426 | #else |
427 | #define BAD_APICID 0xFFFFu | 427 | #define BAD_APICID 0xFFFFu |
428 | #endif | 428 | #endif |
429 | |||
430 | enum ioapic_irq_destination_types { | ||
431 | dest_Fixed = 0, | ||
432 | dest_LowestPrio = 1, | ||
433 | dest_SMI = 2, | ||
434 | dest__reserved_1 = 3, | ||
435 | dest_NMI = 4, | ||
436 | dest_INIT = 5, | ||
437 | dest__reserved_2 = 6, | ||
438 | dest_ExtINT = 7 | ||
439 | }; | ||
440 | |||
429 | #endif /* _ASM_X86_APICDEF_H */ | 441 | #endif /* _ASM_X86_APICDEF_H */ |
diff --git a/arch/x86/include/asm/bootparam.h b/arch/x86/include/asm/bootparam.h index c8bfe63a06de..e020d88ec02d 100644 --- a/arch/x86/include/asm/bootparam.h +++ b/arch/x86/include/asm/bootparam.h | |||
@@ -12,6 +12,7 @@ | |||
12 | /* setup data types */ | 12 | /* setup data types */ |
13 | #define SETUP_NONE 0 | 13 | #define SETUP_NONE 0 |
14 | #define SETUP_E820_EXT 1 | 14 | #define SETUP_E820_EXT 1 |
15 | #define SETUP_DTB 2 | ||
15 | 16 | ||
16 | /* extensible setup data list node */ | 17 | /* extensible setup data list node */ |
17 | struct setup_data { | 18 | struct setup_data { |
diff --git a/arch/x86/include/asm/cacheflush.h b/arch/x86/include/asm/cacheflush.h index 62f084478f7e..4e12668711e5 100644 --- a/arch/x86/include/asm/cacheflush.h +++ b/arch/x86/include/asm/cacheflush.h | |||
@@ -71,7 +71,7 @@ static inline void set_page_memtype(struct page *pg, unsigned long memtype) { } | |||
71 | * Read/Write : ReadOnly, ReadWrite | 71 | * Read/Write : ReadOnly, ReadWrite |
72 | * Presence : NotPresent | 72 | * Presence : NotPresent |
73 | * | 73 | * |
74 | * Within a catagory, the attributes are mutually exclusive. | 74 | * Within a category, the attributes are mutually exclusive. |
75 | * | 75 | * |
76 | * The implementation of this API will take care of various aspects that | 76 | * The implementation of this API will take care of various aspects that |
77 | * are associated with changing such attributes, such as: | 77 | * are associated with changing such attributes, such as: |
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 220e2ea08e80..91f3e087cf21 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h | |||
@@ -160,6 +160,7 @@ | |||
160 | #define X86_FEATURE_NODEID_MSR (6*32+19) /* NodeId MSR */ | 160 | #define X86_FEATURE_NODEID_MSR (6*32+19) /* NodeId MSR */ |
161 | #define X86_FEATURE_TBM (6*32+21) /* trailing bit manipulations */ | 161 | #define X86_FEATURE_TBM (6*32+21) /* trailing bit manipulations */ |
162 | #define X86_FEATURE_TOPOEXT (6*32+22) /* topology extensions CPUID leafs */ | 162 | #define X86_FEATURE_TOPOEXT (6*32+22) /* topology extensions CPUID leafs */ |
163 | #define X86_FEATURE_PERFCTR_CORE (6*32+23) /* core performance counter extensions */ | ||
163 | 164 | ||
164 | /* | 165 | /* |
165 | * Auxiliary flags: Linux defined - For features scattered in various | 166 | * Auxiliary flags: Linux defined - For features scattered in various |
@@ -279,6 +280,7 @@ extern const char * const x86_power_flags[32]; | |||
279 | #define cpu_has_xsave boot_cpu_has(X86_FEATURE_XSAVE) | 280 | #define cpu_has_xsave boot_cpu_has(X86_FEATURE_XSAVE) |
280 | #define cpu_has_hypervisor boot_cpu_has(X86_FEATURE_HYPERVISOR) | 281 | #define cpu_has_hypervisor boot_cpu_has(X86_FEATURE_HYPERVISOR) |
281 | #define cpu_has_pclmulqdq boot_cpu_has(X86_FEATURE_PCLMULQDQ) | 282 | #define cpu_has_pclmulqdq boot_cpu_has(X86_FEATURE_PCLMULQDQ) |
283 | #define cpu_has_perfctr_core boot_cpu_has(X86_FEATURE_PERFCTR_CORE) | ||
282 | 284 | ||
283 | #if defined(CONFIG_X86_INVLPG) || defined(CONFIG_X86_64) | 285 | #if defined(CONFIG_X86_INVLPG) || defined(CONFIG_X86_64) |
284 | # define cpu_has_invlpg 1 | 286 | # define cpu_has_invlpg 1 |
diff --git a/arch/x86/include/asm/dma.h b/arch/x86/include/asm/dma.h index ca1098a7e580..97b6d8114a43 100644 --- a/arch/x86/include/asm/dma.h +++ b/arch/x86/include/asm/dma.h | |||
@@ -151,6 +151,7 @@ | |||
151 | #define DMA_AUTOINIT 0x10 | 151 | #define DMA_AUTOINIT 0x10 |
152 | 152 | ||
153 | 153 | ||
154 | #ifdef CONFIG_ISA_DMA_API | ||
154 | extern spinlock_t dma_spin_lock; | 155 | extern spinlock_t dma_spin_lock; |
155 | 156 | ||
156 | static inline unsigned long claim_dma_lock(void) | 157 | static inline unsigned long claim_dma_lock(void) |
@@ -164,6 +165,7 @@ static inline void release_dma_lock(unsigned long flags) | |||
164 | { | 165 | { |
165 | spin_unlock_irqrestore(&dma_spin_lock, flags); | 166 | spin_unlock_irqrestore(&dma_spin_lock, flags); |
166 | } | 167 | } |
168 | #endif /* CONFIG_ISA_DMA_API */ | ||
167 | 169 | ||
168 | /* enable/disable a specific DMA channel */ | 170 | /* enable/disable a specific DMA channel */ |
169 | static inline void enable_dma(unsigned int dmanr) | 171 | static inline void enable_dma(unsigned int dmanr) |
@@ -303,9 +305,11 @@ static inline int get_dma_residue(unsigned int dmanr) | |||
303 | } | 305 | } |
304 | 306 | ||
305 | 307 | ||
306 | /* These are in kernel/dma.c: */ | 308 | /* These are in kernel/dma.c because x86 uses CONFIG_GENERIC_ISA_DMA */ |
309 | #ifdef CONFIG_ISA_DMA_API | ||
307 | extern int request_dma(unsigned int dmanr, const char *device_id); | 310 | extern int request_dma(unsigned int dmanr, const char *device_id); |
308 | extern void free_dma(unsigned int dmanr); | 311 | extern void free_dma(unsigned int dmanr); |
312 | #endif | ||
309 | 313 | ||
310 | /* From PCI */ | 314 | /* From PCI */ |
311 | 315 | ||
diff --git a/arch/x86/include/asm/e820.h b/arch/x86/include/asm/e820.h index e99d55d74df5..908b96957d88 100644 --- a/arch/x86/include/asm/e820.h +++ b/arch/x86/include/asm/e820.h | |||
@@ -96,7 +96,7 @@ extern void e820_setup_gap(void); | |||
96 | extern int e820_search_gap(unsigned long *gapstart, unsigned long *gapsize, | 96 | extern int e820_search_gap(unsigned long *gapstart, unsigned long *gapsize, |
97 | unsigned long start_addr, unsigned long long end_addr); | 97 | unsigned long start_addr, unsigned long long end_addr); |
98 | struct setup_data; | 98 | struct setup_data; |
99 | extern void parse_e820_ext(struct setup_data *data, unsigned long pa_data); | 99 | extern void parse_e820_ext(struct setup_data *data); |
100 | 100 | ||
101 | #if defined(CONFIG_X86_64) || \ | 101 | #if defined(CONFIG_X86_64) || \ |
102 | (defined(CONFIG_X86_32) && defined(CONFIG_HIBERNATION)) | 102 | (defined(CONFIG_X86_32) && defined(CONFIG_HIBERNATION)) |
diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h index 57650ab4a5f5..1cd6d26a0a8d 100644 --- a/arch/x86/include/asm/entry_arch.h +++ b/arch/x86/include/asm/entry_arch.h | |||
@@ -16,10 +16,13 @@ BUILD_INTERRUPT(call_function_single_interrupt,CALL_FUNCTION_SINGLE_VECTOR) | |||
16 | BUILD_INTERRUPT(irq_move_cleanup_interrupt,IRQ_MOVE_CLEANUP_VECTOR) | 16 | BUILD_INTERRUPT(irq_move_cleanup_interrupt,IRQ_MOVE_CLEANUP_VECTOR) |
17 | BUILD_INTERRUPT(reboot_interrupt,REBOOT_VECTOR) | 17 | BUILD_INTERRUPT(reboot_interrupt,REBOOT_VECTOR) |
18 | 18 | ||
19 | .irpc idx, "01234567" | 19 | .irp idx,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15, \ |
20 | 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 | ||
21 | .if NUM_INVALIDATE_TLB_VECTORS > \idx | ||
20 | BUILD_INTERRUPT3(invalidate_interrupt\idx, | 22 | BUILD_INTERRUPT3(invalidate_interrupt\idx, |
21 | (INVALIDATE_TLB_VECTOR_START)+\idx, | 23 | (INVALIDATE_TLB_VECTOR_START)+\idx, |
22 | smp_invalidate_interrupt) | 24 | smp_invalidate_interrupt) |
25 | .endif | ||
23 | .endr | 26 | .endr |
24 | #endif | 27 | #endif |
25 | 28 | ||
diff --git a/arch/x86/include/asm/frame.h b/arch/x86/include/asm/frame.h index 06850a7194e1..2c6fc9e62812 100644 --- a/arch/x86/include/asm/frame.h +++ b/arch/x86/include/asm/frame.h | |||
@@ -7,14 +7,12 @@ | |||
7 | frame pointer later */ | 7 | frame pointer later */ |
8 | #ifdef CONFIG_FRAME_POINTER | 8 | #ifdef CONFIG_FRAME_POINTER |
9 | .macro FRAME | 9 | .macro FRAME |
10 | pushl %ebp | 10 | pushl_cfi %ebp |
11 | CFI_ADJUST_CFA_OFFSET 4 | ||
12 | CFI_REL_OFFSET ebp,0 | 11 | CFI_REL_OFFSET ebp,0 |
13 | movl %esp,%ebp | 12 | movl %esp,%ebp |
14 | .endm | 13 | .endm |
15 | .macro ENDFRAME | 14 | .macro ENDFRAME |
16 | popl %ebp | 15 | popl_cfi %ebp |
17 | CFI_ADJUST_CFA_OFFSET -4 | ||
18 | CFI_RESTORE ebp | 16 | CFI_RESTORE ebp |
19 | .endm | 17 | .endm |
20 | #else | 18 | #else |
diff --git a/arch/x86/include/asm/futex.h b/arch/x86/include/asm/futex.h index 1f11ce44e956..d09bb03653f0 100644 --- a/arch/x86/include/asm/futex.h +++ b/arch/x86/include/asm/futex.h | |||
@@ -37,7 +37,7 @@ | |||
37 | "+m" (*uaddr), "=&r" (tem) \ | 37 | "+m" (*uaddr), "=&r" (tem) \ |
38 | : "r" (oparg), "i" (-EFAULT), "1" (0)) | 38 | : "r" (oparg), "i" (-EFAULT), "1" (0)) |
39 | 39 | ||
40 | static inline int futex_atomic_op_inuser(int encoded_op, int __user *uaddr) | 40 | static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr) |
41 | { | 41 | { |
42 | int op = (encoded_op >> 28) & 7; | 42 | int op = (encoded_op >> 28) & 7; |
43 | int cmp = (encoded_op >> 24) & 15; | 43 | int cmp = (encoded_op >> 24) & 15; |
@@ -48,7 +48,7 @@ static inline int futex_atomic_op_inuser(int encoded_op, int __user *uaddr) | |||
48 | if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) | 48 | if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) |
49 | oparg = 1 << oparg; | 49 | oparg = 1 << oparg; |
50 | 50 | ||
51 | if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int))) | 51 | if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))) |
52 | return -EFAULT; | 52 | return -EFAULT; |
53 | 53 | ||
54 | #if defined(CONFIG_X86_32) && !defined(CONFIG_X86_BSWAP) | 54 | #if defined(CONFIG_X86_32) && !defined(CONFIG_X86_BSWAP) |
@@ -109,9 +109,10 @@ static inline int futex_atomic_op_inuser(int encoded_op, int __user *uaddr) | |||
109 | return ret; | 109 | return ret; |
110 | } | 110 | } |
111 | 111 | ||
112 | static inline int futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, | 112 | static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, |
113 | int newval) | 113 | u32 oldval, u32 newval) |
114 | { | 114 | { |
115 | int ret = 0; | ||
115 | 116 | ||
116 | #if defined(CONFIG_X86_32) && !defined(CONFIG_X86_BSWAP) | 117 | #if defined(CONFIG_X86_32) && !defined(CONFIG_X86_BSWAP) |
117 | /* Real i386 machines have no cmpxchg instruction */ | 118 | /* Real i386 machines have no cmpxchg instruction */ |
@@ -119,21 +120,22 @@ static inline int futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, | |||
119 | return -ENOSYS; | 120 | return -ENOSYS; |
120 | #endif | 121 | #endif |
121 | 122 | ||
122 | if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int))) | 123 | if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))) |
123 | return -EFAULT; | 124 | return -EFAULT; |
124 | 125 | ||
125 | asm volatile("1:\t" LOCK_PREFIX "cmpxchgl %3, %1\n" | 126 | asm volatile("1:\t" LOCK_PREFIX "cmpxchgl %4, %2\n" |
126 | "2:\t.section .fixup, \"ax\"\n" | 127 | "2:\t.section .fixup, \"ax\"\n" |
127 | "3:\tmov %2, %0\n" | 128 | "3:\tmov %3, %0\n" |
128 | "\tjmp 2b\n" | 129 | "\tjmp 2b\n" |
129 | "\t.previous\n" | 130 | "\t.previous\n" |
130 | _ASM_EXTABLE(1b, 3b) | 131 | _ASM_EXTABLE(1b, 3b) |
131 | : "=a" (oldval), "+m" (*uaddr) | 132 | : "+r" (ret), "=a" (oldval), "+m" (*uaddr) |
132 | : "i" (-EFAULT), "r" (newval), "0" (oldval) | 133 | : "i" (-EFAULT), "r" (newval), "1" (oldval) |
133 | : "memory" | 134 | : "memory" |
134 | ); | 135 | ); |
135 | 136 | ||
136 | return oldval; | 137 | *uval = oldval; |
138 | return ret; | ||
137 | } | 139 | } |
138 | 140 | ||
139 | #endif | 141 | #endif |
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index 0274ec5a7e62..bb9efe8706e2 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h | |||
@@ -45,6 +45,30 @@ extern void invalidate_interrupt4(void); | |||
45 | extern void invalidate_interrupt5(void); | 45 | extern void invalidate_interrupt5(void); |
46 | extern void invalidate_interrupt6(void); | 46 | extern void invalidate_interrupt6(void); |
47 | extern void invalidate_interrupt7(void); | 47 | extern void invalidate_interrupt7(void); |
48 | extern void invalidate_interrupt8(void); | ||
49 | extern void invalidate_interrupt9(void); | ||
50 | extern void invalidate_interrupt10(void); | ||
51 | extern void invalidate_interrupt11(void); | ||
52 | extern void invalidate_interrupt12(void); | ||
53 | extern void invalidate_interrupt13(void); | ||
54 | extern void invalidate_interrupt14(void); | ||
55 | extern void invalidate_interrupt15(void); | ||
56 | extern void invalidate_interrupt16(void); | ||
57 | extern void invalidate_interrupt17(void); | ||
58 | extern void invalidate_interrupt18(void); | ||
59 | extern void invalidate_interrupt19(void); | ||
60 | extern void invalidate_interrupt20(void); | ||
61 | extern void invalidate_interrupt21(void); | ||
62 | extern void invalidate_interrupt22(void); | ||
63 | extern void invalidate_interrupt23(void); | ||
64 | extern void invalidate_interrupt24(void); | ||
65 | extern void invalidate_interrupt25(void); | ||
66 | extern void invalidate_interrupt26(void); | ||
67 | extern void invalidate_interrupt27(void); | ||
68 | extern void invalidate_interrupt28(void); | ||
69 | extern void invalidate_interrupt29(void); | ||
70 | extern void invalidate_interrupt30(void); | ||
71 | extern void invalidate_interrupt31(void); | ||
48 | 72 | ||
49 | extern void irq_move_cleanup_interrupt(void); | 73 | extern void irq_move_cleanup_interrupt(void); |
50 | extern void reboot_interrupt(void); | 74 | extern void reboot_interrupt(void); |
diff --git a/arch/x86/include/asm/init.h b/arch/x86/include/asm/init.h index 36fb1a6a5109..8dbe353e41e1 100644 --- a/arch/x86/include/asm/init.h +++ b/arch/x86/include/asm/init.h | |||
@@ -11,8 +11,8 @@ kernel_physical_mapping_init(unsigned long start, | |||
11 | unsigned long page_size_mask); | 11 | unsigned long page_size_mask); |
12 | 12 | ||
13 | 13 | ||
14 | extern unsigned long __initdata e820_table_start; | 14 | extern unsigned long __initdata pgt_buf_start; |
15 | extern unsigned long __meminitdata e820_table_end; | 15 | extern unsigned long __meminitdata pgt_buf_end; |
16 | extern unsigned long __meminitdata e820_table_top; | 16 | extern unsigned long __meminitdata pgt_buf_top; |
17 | 17 | ||
18 | #endif /* _ASM_X86_INIT_32_H */ | 18 | #endif /* _ASM_X86_INIT_32_H */ |
diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h index f327d386d6cc..c4bd267dfc50 100644 --- a/arch/x86/include/asm/io_apic.h +++ b/arch/x86/include/asm/io_apic.h | |||
@@ -63,17 +63,6 @@ union IO_APIC_reg_03 { | |||
63 | } __attribute__ ((packed)) bits; | 63 | } __attribute__ ((packed)) bits; |
64 | }; | 64 | }; |
65 | 65 | ||
66 | enum ioapic_irq_destination_types { | ||
67 | dest_Fixed = 0, | ||
68 | dest_LowestPrio = 1, | ||
69 | dest_SMI = 2, | ||
70 | dest__reserved_1 = 3, | ||
71 | dest_NMI = 4, | ||
72 | dest_INIT = 5, | ||
73 | dest__reserved_2 = 6, | ||
74 | dest_ExtINT = 7 | ||
75 | }; | ||
76 | |||
77 | struct IO_APIC_route_entry { | 66 | struct IO_APIC_route_entry { |
78 | __u32 vector : 8, | 67 | __u32 vector : 8, |
79 | delivery_mode : 3, /* 000: FIXED | 68 | delivery_mode : 3, /* 000: FIXED |
@@ -106,6 +95,10 @@ struct IR_IO_APIC_route_entry { | |||
106 | index : 15; | 95 | index : 15; |
107 | } __attribute__ ((packed)); | 96 | } __attribute__ ((packed)); |
108 | 97 | ||
98 | #define IOAPIC_AUTO -1 | ||
99 | #define IOAPIC_EDGE 0 | ||
100 | #define IOAPIC_LEVEL 1 | ||
101 | |||
109 | #ifdef CONFIG_X86_IO_APIC | 102 | #ifdef CONFIG_X86_IO_APIC |
110 | 103 | ||
111 | /* | 104 | /* |
@@ -150,11 +143,6 @@ extern int timer_through_8259; | |||
150 | #define io_apic_assign_pci_irqs \ | 143 | #define io_apic_assign_pci_irqs \ |
151 | (mp_irq_entries && !skip_ioapic_setup && io_apic_irqs) | 144 | (mp_irq_entries && !skip_ioapic_setup && io_apic_irqs) |
152 | 145 | ||
153 | extern u8 io_apic_unique_id(u8 id); | ||
154 | extern int io_apic_get_unique_id(int ioapic, int apic_id); | ||
155 | extern int io_apic_get_version(int ioapic); | ||
156 | extern int io_apic_get_redir_entries(int ioapic); | ||
157 | |||
158 | struct io_apic_irq_attr; | 146 | struct io_apic_irq_attr; |
159 | extern int io_apic_set_pci_routing(struct device *dev, int irq, | 147 | extern int io_apic_set_pci_routing(struct device *dev, int irq, |
160 | struct io_apic_irq_attr *irq_attr); | 148 | struct io_apic_irq_attr *irq_attr); |
@@ -162,6 +150,8 @@ void setup_IO_APIC_irq_extra(u32 gsi); | |||
162 | extern void ioapic_and_gsi_init(void); | 150 | extern void ioapic_and_gsi_init(void); |
163 | extern void ioapic_insert_resources(void); | 151 | extern void ioapic_insert_resources(void); |
164 | 152 | ||
153 | int io_apic_setup_irq_pin(unsigned int irq, int node, struct io_apic_irq_attr *attr); | ||
154 | |||
165 | extern struct IO_APIC_route_entry **alloc_ioapic_entries(void); | 155 | extern struct IO_APIC_route_entry **alloc_ioapic_entries(void); |
166 | extern void free_ioapic_entries(struct IO_APIC_route_entry **ioapic_entries); | 156 | extern void free_ioapic_entries(struct IO_APIC_route_entry **ioapic_entries); |
167 | extern int save_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries); | 157 | extern int save_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries); |
@@ -186,6 +176,8 @@ extern void __init pre_init_apic_IRQ0(void); | |||
186 | 176 | ||
187 | extern void mp_save_irq(struct mpc_intsrc *m); | 177 | extern void mp_save_irq(struct mpc_intsrc *m); |
188 | 178 | ||
179 | extern void disable_ioapic_support(void); | ||
180 | |||
189 | #else /* !CONFIG_X86_IO_APIC */ | 181 | #else /* !CONFIG_X86_IO_APIC */ |
190 | 182 | ||
191 | #define io_apic_assign_pci_irqs 0 | 183 | #define io_apic_assign_pci_irqs 0 |
@@ -199,6 +191,26 @@ static inline int mp_find_ioapic(u32 gsi) { return 0; } | |||
199 | struct io_apic_irq_attr; | 191 | struct io_apic_irq_attr; |
200 | static inline int io_apic_set_pci_routing(struct device *dev, int irq, | 192 | static inline int io_apic_set_pci_routing(struct device *dev, int irq, |
201 | struct io_apic_irq_attr *irq_attr) { return 0; } | 193 | struct io_apic_irq_attr *irq_attr) { return 0; } |
194 | |||
195 | static inline struct IO_APIC_route_entry **alloc_ioapic_entries(void) | ||
196 | { | ||
197 | return NULL; | ||
198 | } | ||
199 | |||
200 | static inline void free_ioapic_entries(struct IO_APIC_route_entry **ent) { } | ||
201 | static inline int save_IO_APIC_setup(struct IO_APIC_route_entry **ent) | ||
202 | { | ||
203 | return -ENOMEM; | ||
204 | } | ||
205 | |||
206 | static inline void mask_IO_APIC_setup(struct IO_APIC_route_entry **ent) { } | ||
207 | static inline int restore_IO_APIC_setup(struct IO_APIC_route_entry **ent) | ||
208 | { | ||
209 | return -ENOMEM; | ||
210 | } | ||
211 | |||
212 | static inline void mp_save_irq(struct mpc_intsrc *m) { }; | ||
213 | static inline void disable_ioapic_support(void) { } | ||
202 | #endif | 214 | #endif |
203 | 215 | ||
204 | #endif /* _ASM_X86_IO_APIC_H */ | 216 | #endif /* _ASM_X86_IO_APIC_H */ |
diff --git a/arch/x86/include/asm/ipi.h b/arch/x86/include/asm/ipi.h index 0b7228268a63..615fa9061b57 100644 --- a/arch/x86/include/asm/ipi.h +++ b/arch/x86/include/asm/ipi.h | |||
@@ -123,10 +123,6 @@ extern void default_send_IPI_mask_sequence_phys(const struct cpumask *mask, | |||
123 | int vector); | 123 | int vector); |
124 | extern void default_send_IPI_mask_allbutself_phys(const struct cpumask *mask, | 124 | extern void default_send_IPI_mask_allbutself_phys(const struct cpumask *mask, |
125 | int vector); | 125 | int vector); |
126 | extern void default_send_IPI_mask_sequence_logical(const struct cpumask *mask, | ||
127 | int vector); | ||
128 | extern void default_send_IPI_mask_allbutself_logical(const struct cpumask *mask, | ||
129 | int vector); | ||
130 | 126 | ||
131 | /* Avoid include hell */ | 127 | /* Avoid include hell */ |
132 | #define NMI_VECTOR 0x02 | 128 | #define NMI_VECTOR 0x02 |
@@ -150,6 +146,10 @@ static inline void __default_local_send_IPI_all(int vector) | |||
150 | } | 146 | } |
151 | 147 | ||
152 | #ifdef CONFIG_X86_32 | 148 | #ifdef CONFIG_X86_32 |
149 | extern void default_send_IPI_mask_sequence_logical(const struct cpumask *mask, | ||
150 | int vector); | ||
151 | extern void default_send_IPI_mask_allbutself_logical(const struct cpumask *mask, | ||
152 | int vector); | ||
153 | extern void default_send_IPI_mask_logical(const struct cpumask *mask, | 153 | extern void default_send_IPI_mask_logical(const struct cpumask *mask, |
154 | int vector); | 154 | int vector); |
155 | extern void default_send_IPI_allbutself(int vector); | 155 | extern void default_send_IPI_allbutself(int vector); |
diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h index c704b38c57a2..ba870bb6dd8e 100644 --- a/arch/x86/include/asm/irq.h +++ b/arch/x86/include/asm/irq.h | |||
@@ -10,9 +10,6 @@ | |||
10 | #include <asm/apicdef.h> | 10 | #include <asm/apicdef.h> |
11 | #include <asm/irq_vectors.h> | 11 | #include <asm/irq_vectors.h> |
12 | 12 | ||
13 | /* Even though we don't support this, supply it to appease OF */ | ||
14 | static inline void irq_dispose_mapping(unsigned int virq) { } | ||
15 | |||
16 | static inline int irq_canonicalize(int irq) | 13 | static inline int irq_canonicalize(int irq) |
17 | { | 14 | { |
18 | return ((irq == 2) ? 9 : irq); | 15 | return ((irq == 2) ? 9 : irq); |
diff --git a/arch/x86/include/asm/irq_controller.h b/arch/x86/include/asm/irq_controller.h new file mode 100644 index 000000000000..423bbbddf36d --- /dev/null +++ b/arch/x86/include/asm/irq_controller.h | |||
@@ -0,0 +1,12 @@ | |||
1 | #ifndef __IRQ_CONTROLLER__ | ||
2 | #define __IRQ_CONTROLLER__ | ||
3 | |||
4 | struct irq_domain { | ||
5 | int (*xlate)(struct irq_domain *h, const u32 *intspec, u32 intsize, | ||
6 | u32 *out_hwirq, u32 *out_type); | ||
7 | void *priv; | ||
8 | struct device_node *controller; | ||
9 | struct list_head l; | ||
10 | }; | ||
11 | |||
12 | #endif | ||
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index 6af0894dafb4..6e976ee3b3ef 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h | |||
@@ -1,6 +1,7 @@ | |||
1 | #ifndef _ASM_X86_IRQ_VECTORS_H | 1 | #ifndef _ASM_X86_IRQ_VECTORS_H |
2 | #define _ASM_X86_IRQ_VECTORS_H | 2 | #define _ASM_X86_IRQ_VECTORS_H |
3 | 3 | ||
4 | #include <linux/threads.h> | ||
4 | /* | 5 | /* |
5 | * Linux IRQ vector layout. | 6 | * Linux IRQ vector layout. |
6 | * | 7 | * |
@@ -16,8 +17,8 @@ | |||
16 | * Vectors 0 ... 31 : system traps and exceptions - hardcoded events | 17 | * Vectors 0 ... 31 : system traps and exceptions - hardcoded events |
17 | * Vectors 32 ... 127 : device interrupts | 18 | * Vectors 32 ... 127 : device interrupts |
18 | * Vector 128 : legacy int80 syscall interface | 19 | * Vector 128 : legacy int80 syscall interface |
19 | * Vectors 129 ... 237 : device interrupts | 20 | * Vectors 129 ... INVALIDATE_TLB_VECTOR_START-1 : device interrupts |
20 | * Vectors 238 ... 255 : special interrupts | 21 | * Vectors INVALIDATE_TLB_VECTOR_START ... 255 : special interrupts |
21 | * | 22 | * |
22 | * 64-bit x86 has per CPU IDT tables, 32-bit has one shared IDT table. | 23 | * 64-bit x86 has per CPU IDT tables, 32-bit has one shared IDT table. |
23 | * | 24 | * |
@@ -96,37 +97,43 @@ | |||
96 | #define THRESHOLD_APIC_VECTOR 0xf9 | 97 | #define THRESHOLD_APIC_VECTOR 0xf9 |
97 | #define REBOOT_VECTOR 0xf8 | 98 | #define REBOOT_VECTOR 0xf8 |
98 | 99 | ||
99 | /* f0-f7 used for spreading out TLB flushes: */ | ||
100 | #define INVALIDATE_TLB_VECTOR_END 0xf7 | ||
101 | #define INVALIDATE_TLB_VECTOR_START 0xf0 | ||
102 | #define NUM_INVALIDATE_TLB_VECTORS 8 | ||
103 | |||
104 | /* | ||
105 | * Local APIC timer IRQ vector is on a different priority level, | ||
106 | * to work around the 'lost local interrupt if more than 2 IRQ | ||
107 | * sources per level' errata. | ||
108 | */ | ||
109 | #define LOCAL_TIMER_VECTOR 0xef | ||
110 | |||
111 | /* | 100 | /* |
112 | * Generic system vector for platform specific use | 101 | * Generic system vector for platform specific use |
113 | */ | 102 | */ |
114 | #define X86_PLATFORM_IPI_VECTOR 0xed | 103 | #define X86_PLATFORM_IPI_VECTOR 0xf7 |
115 | 104 | ||
116 | /* | 105 | /* |
117 | * IRQ work vector: | 106 | * IRQ work vector: |
118 | */ | 107 | */ |
119 | #define IRQ_WORK_VECTOR 0xec | 108 | #define IRQ_WORK_VECTOR 0xf6 |
120 | 109 | ||
121 | #define UV_BAU_MESSAGE 0xea | 110 | #define UV_BAU_MESSAGE 0xf5 |
122 | 111 | ||
123 | /* | 112 | /* |
124 | * Self IPI vector for machine checks | 113 | * Self IPI vector for machine checks |
125 | */ | 114 | */ |
126 | #define MCE_SELF_VECTOR 0xeb | 115 | #define MCE_SELF_VECTOR 0xf4 |
127 | 116 | ||
128 | /* Xen vector callback to receive events in a HVM domain */ | 117 | /* Xen vector callback to receive events in a HVM domain */ |
129 | #define XEN_HVM_EVTCHN_CALLBACK 0xe9 | 118 | #define XEN_HVM_EVTCHN_CALLBACK 0xf3 |
119 | |||
120 | /* | ||
121 | * Local APIC timer IRQ vector is on a different priority level, | ||
122 | * to work around the 'lost local interrupt if more than 2 IRQ | ||
123 | * sources per level' errata. | ||
124 | */ | ||
125 | #define LOCAL_TIMER_VECTOR 0xef | ||
126 | |||
127 | /* up to 32 vectors used for spreading out TLB flushes: */ | ||
128 | #if NR_CPUS <= 32 | ||
129 | # define NUM_INVALIDATE_TLB_VECTORS (NR_CPUS) | ||
130 | #else | ||
131 | # define NUM_INVALIDATE_TLB_VECTORS (32) | ||
132 | #endif | ||
133 | |||
134 | #define INVALIDATE_TLB_VECTOR_END (0xee) | ||
135 | #define INVALIDATE_TLB_VECTOR_START \ | ||
136 | (INVALIDATE_TLB_VECTOR_END-NUM_INVALIDATE_TLB_VECTORS+1) | ||
130 | 137 | ||
131 | #define NR_VECTORS 256 | 138 | #define NR_VECTORS 256 |
132 | 139 | ||
diff --git a/arch/x86/include/asm/kdebug.h b/arch/x86/include/asm/kdebug.h index ca242d35e873..fe2cc6e105fa 100644 --- a/arch/x86/include/asm/kdebug.h +++ b/arch/x86/include/asm/kdebug.h | |||
@@ -13,7 +13,6 @@ enum die_val { | |||
13 | DIE_PANIC, | 13 | DIE_PANIC, |
14 | DIE_NMI, | 14 | DIE_NMI, |
15 | DIE_DIE, | 15 | DIE_DIE, |
16 | DIE_NMIWATCHDOG, | ||
17 | DIE_KERNELDEBUG, | 16 | DIE_KERNELDEBUG, |
18 | DIE_TRAP, | 17 | DIE_TRAP, |
19 | DIE_GPF, | 18 | DIE_GPF, |
@@ -27,7 +26,7 @@ extern void die(const char *, struct pt_regs *,long); | |||
27 | extern int __must_check __die(const char *, struct pt_regs *, long); | 26 | extern int __must_check __die(const char *, struct pt_regs *, long); |
28 | extern void show_registers(struct pt_regs *regs); | 27 | extern void show_registers(struct pt_regs *regs); |
29 | extern void show_trace(struct task_struct *t, struct pt_regs *regs, | 28 | extern void show_trace(struct task_struct *t, struct pt_regs *regs, |
30 | unsigned long *sp); | 29 | unsigned long *sp, unsigned long bp); |
31 | extern void __show_regs(struct pt_regs *regs, int all); | 30 | extern void __show_regs(struct pt_regs *regs, int all); |
32 | extern void show_regs(struct pt_regs *regs); | 31 | extern void show_regs(struct pt_regs *regs); |
33 | extern unsigned long oops_begin(void); | 32 | extern unsigned long oops_begin(void); |
diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index 8e37deb1eb38..0f5213564326 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h | |||
@@ -142,9 +142,9 @@ struct x86_emulate_ops { | |||
142 | int (*pio_out_emulated)(int size, unsigned short port, const void *val, | 142 | int (*pio_out_emulated)(int size, unsigned short port, const void *val, |
143 | unsigned int count, struct kvm_vcpu *vcpu); | 143 | unsigned int count, struct kvm_vcpu *vcpu); |
144 | 144 | ||
145 | bool (*get_cached_descriptor)(struct desc_struct *desc, | 145 | bool (*get_cached_descriptor)(struct desc_struct *desc, u32 *base3, |
146 | int seg, struct kvm_vcpu *vcpu); | 146 | int seg, struct kvm_vcpu *vcpu); |
147 | void (*set_cached_descriptor)(struct desc_struct *desc, | 147 | void (*set_cached_descriptor)(struct desc_struct *desc, u32 base3, |
148 | int seg, struct kvm_vcpu *vcpu); | 148 | int seg, struct kvm_vcpu *vcpu); |
149 | u16 (*get_segment_selector)(int seg, struct kvm_vcpu *vcpu); | 149 | u16 (*get_segment_selector)(int seg, struct kvm_vcpu *vcpu); |
150 | void (*set_segment_selector)(u16 sel, int seg, struct kvm_vcpu *vcpu); | 150 | void (*set_segment_selector)(u16 sel, int seg, struct kvm_vcpu *vcpu); |
@@ -239,6 +239,7 @@ struct x86_emulate_ctxt { | |||
239 | int interruptibility; | 239 | int interruptibility; |
240 | 240 | ||
241 | bool perm_ok; /* do not check permissions if true */ | 241 | bool perm_ok; /* do not check permissions if true */ |
242 | bool only_vendor_specific_insn; | ||
242 | 243 | ||
243 | bool have_exception; | 244 | bool have_exception; |
244 | struct x86_exception exception; | 245 | struct x86_exception exception; |
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index ffd7f8d29187..c8af0991fdf0 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h | |||
@@ -85,7 +85,7 @@ | |||
85 | 85 | ||
86 | #define ASYNC_PF_PER_VCPU 64 | 86 | #define ASYNC_PF_PER_VCPU 64 |
87 | 87 | ||
88 | extern spinlock_t kvm_lock; | 88 | extern raw_spinlock_t kvm_lock; |
89 | extern struct list_head vm_list; | 89 | extern struct list_head vm_list; |
90 | 90 | ||
91 | struct kvm_vcpu; | 91 | struct kvm_vcpu; |
@@ -255,6 +255,8 @@ struct kvm_mmu { | |||
255 | int (*sync_page)(struct kvm_vcpu *vcpu, | 255 | int (*sync_page)(struct kvm_vcpu *vcpu, |
256 | struct kvm_mmu_page *sp); | 256 | struct kvm_mmu_page *sp); |
257 | void (*invlpg)(struct kvm_vcpu *vcpu, gva_t gva); | 257 | void (*invlpg)(struct kvm_vcpu *vcpu, gva_t gva); |
258 | void (*update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, | ||
259 | u64 *spte, const void *pte, unsigned long mmu_seq); | ||
258 | hpa_t root_hpa; | 260 | hpa_t root_hpa; |
259 | int root_level; | 261 | int root_level; |
260 | int shadow_root_level; | 262 | int shadow_root_level; |
@@ -335,12 +337,6 @@ struct kvm_vcpu_arch { | |||
335 | u64 *last_pte_updated; | 337 | u64 *last_pte_updated; |
336 | gfn_t last_pte_gfn; | 338 | gfn_t last_pte_gfn; |
337 | 339 | ||
338 | struct { | ||
339 | gfn_t gfn; /* presumed gfn during guest pte update */ | ||
340 | pfn_t pfn; /* pfn corresponding to that gfn */ | ||
341 | unsigned long mmu_seq; | ||
342 | } update_pte; | ||
343 | |||
344 | struct fpu guest_fpu; | 340 | struct fpu guest_fpu; |
345 | u64 xcr0; | 341 | u64 xcr0; |
346 | 342 | ||
@@ -448,7 +444,7 @@ struct kvm_arch { | |||
448 | 444 | ||
449 | unsigned long irq_sources_bitmap; | 445 | unsigned long irq_sources_bitmap; |
450 | s64 kvmclock_offset; | 446 | s64 kvmclock_offset; |
451 | spinlock_t tsc_write_lock; | 447 | raw_spinlock_t tsc_write_lock; |
452 | u64 last_tsc_nsec; | 448 | u64 last_tsc_nsec; |
453 | u64 last_tsc_offset; | 449 | u64 last_tsc_offset; |
454 | u64 last_tsc_write; | 450 | u64 last_tsc_write; |
diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h index 0c90dd9f0505..9c7d95f6174b 100644 --- a/arch/x86/include/asm/mpspec.h +++ b/arch/x86/include/asm/mpspec.h | |||
@@ -25,7 +25,6 @@ extern int pic_mode; | |||
25 | #define MAX_IRQ_SOURCES 256 | 25 | #define MAX_IRQ_SOURCES 256 |
26 | 26 | ||
27 | extern unsigned int def_to_bigsmp; | 27 | extern unsigned int def_to_bigsmp; |
28 | extern u8 apicid_2_node[]; | ||
29 | 28 | ||
30 | #ifdef CONFIG_X86_NUMAQ | 29 | #ifdef CONFIG_X86_NUMAQ |
31 | extern int mp_bus_id_to_node[MAX_MP_BUSSES]; | 30 | extern int mp_bus_id_to_node[MAX_MP_BUSSES]; |
@@ -33,8 +32,6 @@ extern int mp_bus_id_to_local[MAX_MP_BUSSES]; | |||
33 | extern int quad_local_to_mp_bus_id [NR_CPUS/4][4]; | 32 | extern int quad_local_to_mp_bus_id [NR_CPUS/4][4]; |
34 | #endif | 33 | #endif |
35 | 34 | ||
36 | #define MAX_APICID 256 | ||
37 | |||
38 | #else /* CONFIG_X86_64: */ | 35 | #else /* CONFIG_X86_64: */ |
39 | 36 | ||
40 | #define MAX_MP_BUSSES 256 | 37 | #define MAX_MP_BUSSES 256 |
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 43a18c77676d..fd5a1f365c95 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h | |||
@@ -43,6 +43,7 @@ | |||
43 | 43 | ||
44 | #define MSR_MTRRcap 0x000000fe | 44 | #define MSR_MTRRcap 0x000000fe |
45 | #define MSR_IA32_BBL_CR_CTL 0x00000119 | 45 | #define MSR_IA32_BBL_CR_CTL 0x00000119 |
46 | #define MSR_IA32_BBL_CR_CTL3 0x0000011e | ||
46 | 47 | ||
47 | #define MSR_IA32_SYSENTER_CS 0x00000174 | 48 | #define MSR_IA32_SYSENTER_CS 0x00000174 |
48 | #define MSR_IA32_SYSENTER_ESP 0x00000175 | 49 | #define MSR_IA32_SYSENTER_ESP 0x00000175 |
@@ -52,6 +53,9 @@ | |||
52 | #define MSR_IA32_MCG_STATUS 0x0000017a | 53 | #define MSR_IA32_MCG_STATUS 0x0000017a |
53 | #define MSR_IA32_MCG_CTL 0x0000017b | 54 | #define MSR_IA32_MCG_CTL 0x0000017b |
54 | 55 | ||
56 | #define MSR_OFFCORE_RSP_0 0x000001a6 | ||
57 | #define MSR_OFFCORE_RSP_1 0x000001a7 | ||
58 | |||
55 | #define MSR_IA32_PEBS_ENABLE 0x000003f1 | 59 | #define MSR_IA32_PEBS_ENABLE 0x000003f1 |
56 | #define MSR_IA32_DS_AREA 0x00000600 | 60 | #define MSR_IA32_DS_AREA 0x00000600 |
57 | #define MSR_IA32_PERF_CAPABILITIES 0x00000345 | 61 | #define MSR_IA32_PERF_CAPABILITIES 0x00000345 |
diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h index c76f5b92b840..4886a68f267e 100644 --- a/arch/x86/include/asm/nmi.h +++ b/arch/x86/include/asm/nmi.h | |||
@@ -7,7 +7,6 @@ | |||
7 | 7 | ||
8 | #ifdef CONFIG_X86_LOCAL_APIC | 8 | #ifdef CONFIG_X86_LOCAL_APIC |
9 | 9 | ||
10 | extern void die_nmi(char *str, struct pt_regs *regs, int do_panic); | ||
11 | extern int avail_to_resrv_perfctr_nmi_bit(unsigned int); | 10 | extern int avail_to_resrv_perfctr_nmi_bit(unsigned int); |
12 | extern int reserve_perfctr_nmi(unsigned int); | 11 | extern int reserve_perfctr_nmi(unsigned int); |
13 | extern void release_perfctr_nmi(unsigned int); | 12 | extern void release_perfctr_nmi(unsigned int); |
@@ -30,8 +29,8 @@ void arch_trigger_all_cpu_backtrace(void); | |||
30 | * external nmis, because the local ones are more frequent. | 29 | * external nmis, because the local ones are more frequent. |
31 | * | 30 | * |
32 | * Also setup some default high/normal/low settings for | 31 | * Also setup some default high/normal/low settings for |
33 | * subsystems to registers with. Using 4 bits to seperate | 32 | * subsystems to registers with. Using 4 bits to separate |
34 | * the priorities. This can go alot higher if needed be. | 33 | * the priorities. This can go a lot higher if needed be. |
35 | */ | 34 | */ |
36 | 35 | ||
37 | #define NMI_LOCAL_SHIFT 16 /* randomly picked */ | 36 | #define NMI_LOCAL_SHIFT 16 /* randomly picked */ |
diff --git a/arch/x86/include/asm/nops.h b/arch/x86/include/asm/nops.h index 6d8723a766cc..af788496020b 100644 --- a/arch/x86/include/asm/nops.h +++ b/arch/x86/include/asm/nops.h | |||
@@ -38,7 +38,7 @@ | |||
38 | #define K8_NOP8 K8_NOP4 K8_NOP4 | 38 | #define K8_NOP8 K8_NOP4 K8_NOP4 |
39 | 39 | ||
40 | /* K7 nops | 40 | /* K7 nops |
41 | uses eax dependencies (arbitary choice) | 41 | uses eax dependencies (arbitrary choice) |
42 | 1: nop | 42 | 1: nop |
43 | 2: movl %eax,%eax | 43 | 2: movl %eax,%eax |
44 | 3: leal (,%eax,1),%eax | 44 | 3: leal (,%eax,1),%eax |
diff --git a/arch/x86/include/asm/numa.h b/arch/x86/include/asm/numa.h index 27da400d3138..3d4dab43c994 100644 --- a/arch/x86/include/asm/numa.h +++ b/arch/x86/include/asm/numa.h | |||
@@ -1,5 +1,57 @@ | |||
1 | #ifndef _ASM_X86_NUMA_H | ||
2 | #define _ASM_X86_NUMA_H | ||
3 | |||
4 | #include <asm/topology.h> | ||
5 | #include <asm/apicdef.h> | ||
6 | |||
7 | #ifdef CONFIG_NUMA | ||
8 | |||
9 | #define NR_NODE_MEMBLKS (MAX_NUMNODES*2) | ||
10 | |||
11 | /* | ||
12 | * __apicid_to_node[] stores the raw mapping between physical apicid and | ||
13 | * node and is used to initialize cpu_to_node mapping. | ||
14 | * | ||
15 | * The mapping may be overridden by apic->numa_cpu_node() on 32bit and thus | ||
16 | * should be accessed by the accessors - set_apicid_to_node() and | ||
17 | * numa_cpu_node(). | ||
18 | */ | ||
19 | extern s16 __apicid_to_node[MAX_LOCAL_APIC]; | ||
20 | |||
21 | static inline void set_apicid_to_node(int apicid, s16 node) | ||
22 | { | ||
23 | __apicid_to_node[apicid] = node; | ||
24 | } | ||
25 | #else /* CONFIG_NUMA */ | ||
26 | static inline void set_apicid_to_node(int apicid, s16 node) | ||
27 | { | ||
28 | } | ||
29 | #endif /* CONFIG_NUMA */ | ||
30 | |||
1 | #ifdef CONFIG_X86_32 | 31 | #ifdef CONFIG_X86_32 |
2 | # include "numa_32.h" | 32 | # include "numa_32.h" |
3 | #else | 33 | #else |
4 | # include "numa_64.h" | 34 | # include "numa_64.h" |
5 | #endif | 35 | #endif |
36 | |||
37 | #ifdef CONFIG_NUMA | ||
38 | extern void __cpuinit numa_set_node(int cpu, int node); | ||
39 | extern void __cpuinit numa_clear_node(int cpu); | ||
40 | extern void __init numa_init_array(void); | ||
41 | extern void __init init_cpu_to_node(void); | ||
42 | extern void __cpuinit numa_add_cpu(int cpu); | ||
43 | extern void __cpuinit numa_remove_cpu(int cpu); | ||
44 | #else /* CONFIG_NUMA */ | ||
45 | static inline void numa_set_node(int cpu, int node) { } | ||
46 | static inline void numa_clear_node(int cpu) { } | ||
47 | static inline void numa_init_array(void) { } | ||
48 | static inline void init_cpu_to_node(void) { } | ||
49 | static inline void numa_add_cpu(int cpu) { } | ||
50 | static inline void numa_remove_cpu(int cpu) { } | ||
51 | #endif /* CONFIG_NUMA */ | ||
52 | |||
53 | #ifdef CONFIG_DEBUG_PER_CPU_MAPS | ||
54 | struct cpumask __cpuinit *debug_cpumask_set_cpu(int cpu, int enable); | ||
55 | #endif | ||
56 | |||
57 | #endif /* _ASM_X86_NUMA_H */ | ||
diff --git a/arch/x86/include/asm/numa_32.h b/arch/x86/include/asm/numa_32.h index b0ef2b449a9d..c6beed1ef103 100644 --- a/arch/x86/include/asm/numa_32.h +++ b/arch/x86/include/asm/numa_32.h | |||
@@ -4,7 +4,12 @@ | |||
4 | extern int numa_off; | 4 | extern int numa_off; |
5 | 5 | ||
6 | extern int pxm_to_nid(int pxm); | 6 | extern int pxm_to_nid(int pxm); |
7 | extern void numa_remove_cpu(int cpu); | 7 | |
8 | #ifdef CONFIG_NUMA | ||
9 | extern int __cpuinit numa_cpu_node(int cpu); | ||
10 | #else /* CONFIG_NUMA */ | ||
11 | static inline int numa_cpu_node(int cpu) { return NUMA_NO_NODE; } | ||
12 | #endif /* CONFIG_NUMA */ | ||
8 | 13 | ||
9 | #ifdef CONFIG_HIGHMEM | 14 | #ifdef CONFIG_HIGHMEM |
10 | extern void set_highmem_pages_init(void); | 15 | extern void set_highmem_pages_init(void); |
diff --git a/arch/x86/include/asm/numa_64.h b/arch/x86/include/asm/numa_64.h index 0493be39607c..344eb1790b46 100644 --- a/arch/x86/include/asm/numa_64.h +++ b/arch/x86/include/asm/numa_64.h | |||
@@ -2,23 +2,16 @@ | |||
2 | #define _ASM_X86_NUMA_64_H | 2 | #define _ASM_X86_NUMA_64_H |
3 | 3 | ||
4 | #include <linux/nodemask.h> | 4 | #include <linux/nodemask.h> |
5 | #include <asm/apicdef.h> | ||
6 | 5 | ||
7 | struct bootnode { | 6 | struct bootnode { |
8 | u64 start; | 7 | u64 start; |
9 | u64 end; | 8 | u64 end; |
10 | }; | 9 | }; |
11 | 10 | ||
12 | extern int compute_hash_shift(struct bootnode *nodes, int numblks, | ||
13 | int *nodeids); | ||
14 | |||
15 | #define ZONE_ALIGN (1UL << (MAX_ORDER+PAGE_SHIFT)) | 11 | #define ZONE_ALIGN (1UL << (MAX_ORDER+PAGE_SHIFT)) |
16 | 12 | ||
17 | extern void numa_init_array(void); | ||
18 | extern int numa_off; | 13 | extern int numa_off; |
19 | 14 | ||
20 | extern s16 apicid_to_node[MAX_LOCAL_APIC]; | ||
21 | |||
22 | extern unsigned long numa_free_all_bootmem(void); | 15 | extern unsigned long numa_free_all_bootmem(void); |
23 | extern void setup_node_bootmem(int nodeid, unsigned long start, | 16 | extern void setup_node_bootmem(int nodeid, unsigned long start, |
24 | unsigned long end); | 17 | unsigned long end); |
@@ -31,11 +24,11 @@ extern void setup_node_bootmem(int nodeid, unsigned long start, | |||
31 | */ | 24 | */ |
32 | #define NODE_MIN_SIZE (4*1024*1024) | 25 | #define NODE_MIN_SIZE (4*1024*1024) |
33 | 26 | ||
34 | extern void __init init_cpu_to_node(void); | 27 | extern nodemask_t numa_nodes_parsed __initdata; |
35 | extern void __cpuinit numa_set_node(int cpu, int node); | 28 | |
36 | extern void __cpuinit numa_clear_node(int cpu); | 29 | extern int __cpuinit numa_cpu_node(int cpu); |
37 | extern void __cpuinit numa_add_cpu(int cpu); | 30 | extern int __init numa_add_memblk(int nodeid, u64 start, u64 end); |
38 | extern void __cpuinit numa_remove_cpu(int cpu); | 31 | extern void __init numa_set_distance(int from, int to, int distance); |
39 | 32 | ||
40 | #ifdef CONFIG_NUMA_EMU | 33 | #ifdef CONFIG_NUMA_EMU |
41 | #define FAKE_NODE_MIN_SIZE ((u64)32 << 20) | 34 | #define FAKE_NODE_MIN_SIZE ((u64)32 << 20) |
@@ -43,11 +36,7 @@ extern void __cpuinit numa_remove_cpu(int cpu); | |||
43 | void numa_emu_cmdline(char *); | 36 | void numa_emu_cmdline(char *); |
44 | #endif /* CONFIG_NUMA_EMU */ | 37 | #endif /* CONFIG_NUMA_EMU */ |
45 | #else | 38 | #else |
46 | static inline void init_cpu_to_node(void) { } | 39 | static inline int numa_cpu_node(int cpu) { return NUMA_NO_NODE; } |
47 | static inline void numa_set_node(int cpu, int node) { } | ||
48 | static inline void numa_clear_node(int cpu) { } | ||
49 | static inline void numa_add_cpu(int cpu, int node) { } | ||
50 | static inline void numa_remove_cpu(int cpu) { } | ||
51 | #endif | 40 | #endif |
52 | 41 | ||
53 | #endif /* _ASM_X86_NUMA_64_H */ | 42 | #endif /* _ASM_X86_NUMA_64_H */ |
diff --git a/arch/x86/include/asm/olpc.h b/arch/x86/include/asm/olpc.h index f482010350fb..5ca6801b75f3 100644 --- a/arch/x86/include/asm/olpc.h +++ b/arch/x86/include/asm/olpc.h | |||
@@ -20,7 +20,7 @@ extern struct olpc_platform_t olpc_platform_info; | |||
20 | 20 | ||
21 | /* | 21 | /* |
22 | * OLPC board IDs contain the major build number within the mask 0x0ff0, | 22 | * OLPC board IDs contain the major build number within the mask 0x0ff0, |
23 | * and the minor build number withing 0x000f. Pre-builds have a minor | 23 | * and the minor build number within 0x000f. Pre-builds have a minor |
24 | * number less than 8, and normal builds start at 8. For example, 0x0B10 | 24 | * number less than 8, and normal builds start at 8. For example, 0x0B10 |
25 | * is a PreB1, and 0x0C18 is a C1. | 25 | * is a PreB1, and 0x0C18 is a C1. |
26 | */ | 26 | */ |
diff --git a/arch/x86/include/asm/olpc_ofw.h b/arch/x86/include/asm/olpc_ofw.h index 641988efe063..c5d3a5abbb9f 100644 --- a/arch/x86/include/asm/olpc_ofw.h +++ b/arch/x86/include/asm/olpc_ofw.h | |||
@@ -6,7 +6,7 @@ | |||
6 | 6 | ||
7 | #define OLPC_OFW_SIG 0x2057464F /* aka "OFW " */ | 7 | #define OLPC_OFW_SIG 0x2057464F /* aka "OFW " */ |
8 | 8 | ||
9 | #ifdef CONFIG_OLPC_OPENFIRMWARE | 9 | #ifdef CONFIG_OLPC |
10 | 10 | ||
11 | extern bool olpc_ofw_is_installed(void); | 11 | extern bool olpc_ofw_is_installed(void); |
12 | 12 | ||
@@ -26,19 +26,15 @@ extern void setup_olpc_ofw_pgd(void); | |||
26 | /* check if OFW was detected during boot */ | 26 | /* check if OFW was detected during boot */ |
27 | extern bool olpc_ofw_present(void); | 27 | extern bool olpc_ofw_present(void); |
28 | 28 | ||
29 | #else /* !CONFIG_OLPC_OPENFIRMWARE */ | 29 | #else /* !CONFIG_OLPC */ |
30 | |||
31 | static inline bool olpc_ofw_is_installed(void) { return false; } | ||
32 | static inline void olpc_ofw_detect(void) { } | 30 | static inline void olpc_ofw_detect(void) { } |
33 | static inline void setup_olpc_ofw_pgd(void) { } | 31 | static inline void setup_olpc_ofw_pgd(void) { } |
34 | static inline bool olpc_ofw_present(void) { return false; } | 32 | #endif /* !CONFIG_OLPC */ |
35 | |||
36 | #endif /* !CONFIG_OLPC_OPENFIRMWARE */ | ||
37 | 33 | ||
38 | #ifdef CONFIG_OLPC_OPENFIRMWARE_DT | 34 | #ifdef CONFIG_OF_PROMTREE |
39 | extern void olpc_dt_build_devicetree(void); | 35 | extern void olpc_dt_build_devicetree(void); |
40 | #else | 36 | #else |
41 | static inline void olpc_dt_build_devicetree(void) { } | 37 | static inline void olpc_dt_build_devicetree(void) { } |
42 | #endif /* CONFIG_OLPC_OPENFIRMWARE_DT */ | 38 | #endif |
43 | 39 | ||
44 | #endif /* _ASM_X86_OLPC_OFW_H */ | 40 | #endif /* _ASM_X86_OLPC_OFW_H */ |
diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h index 1df66211fd1b..bce688d54c12 100644 --- a/arch/x86/include/asm/page_types.h +++ b/arch/x86/include/asm/page_types.h | |||
@@ -2,6 +2,7 @@ | |||
2 | #define _ASM_X86_PAGE_DEFS_H | 2 | #define _ASM_X86_PAGE_DEFS_H |
3 | 3 | ||
4 | #include <linux/const.h> | 4 | #include <linux/const.h> |
5 | #include <linux/types.h> | ||
5 | 6 | ||
6 | /* PAGE_SHIFT determines the page size */ | 7 | /* PAGE_SHIFT determines the page size */ |
7 | #define PAGE_SHIFT 12 | 8 | #define PAGE_SHIFT 12 |
@@ -45,11 +46,15 @@ extern int devmem_is_allowed(unsigned long pagenr); | |||
45 | extern unsigned long max_low_pfn_mapped; | 46 | extern unsigned long max_low_pfn_mapped; |
46 | extern unsigned long max_pfn_mapped; | 47 | extern unsigned long max_pfn_mapped; |
47 | 48 | ||
49 | static inline phys_addr_t get_max_mapped(void) | ||
50 | { | ||
51 | return (phys_addr_t)max_pfn_mapped << PAGE_SHIFT; | ||
52 | } | ||
53 | |||
48 | extern unsigned long init_memory_mapping(unsigned long start, | 54 | extern unsigned long init_memory_mapping(unsigned long start, |
49 | unsigned long end); | 55 | unsigned long end); |
50 | 56 | ||
51 | extern void initmem_init(unsigned long start_pfn, unsigned long end_pfn, | 57 | extern void initmem_init(void); |
52 | int acpi, int k8); | ||
53 | extern void free_initmem(void); | 58 | extern void free_initmem(void); |
54 | 59 | ||
55 | #endif /* !__ASSEMBLY__ */ | 60 | #endif /* !__ASSEMBLY__ */ |
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index 7e172955ee57..a09e1f052d84 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h | |||
@@ -451,6 +451,26 @@ do { \ | |||
451 | #define irqsafe_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) | 451 | #define irqsafe_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) |
452 | #endif /* !CONFIG_M386 */ | 452 | #endif /* !CONFIG_M386 */ |
453 | 453 | ||
454 | #ifdef CONFIG_X86_CMPXCHG64 | ||
455 | #define percpu_cmpxchg8b_double(pcp1, o1, o2, n1, n2) \ | ||
456 | ({ \ | ||
457 | char __ret; \ | ||
458 | typeof(o1) __o1 = o1; \ | ||
459 | typeof(o1) __n1 = n1; \ | ||
460 | typeof(o2) __o2 = o2; \ | ||
461 | typeof(o2) __n2 = n2; \ | ||
462 | typeof(o2) __dummy = n2; \ | ||
463 | asm volatile("cmpxchg8b "__percpu_arg(1)"\n\tsetz %0\n\t" \ | ||
464 | : "=a"(__ret), "=m" (pcp1), "=d"(__dummy) \ | ||
465 | : "b"(__n1), "c"(__n2), "a"(__o1), "d"(__o2)); \ | ||
466 | __ret; \ | ||
467 | }) | ||
468 | |||
469 | #define __this_cpu_cmpxchg_double_4(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg8b_double(pcp1, o1, o2, n1, n2) | ||
470 | #define this_cpu_cmpxchg_double_4(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg8b_double(pcp1, o1, o2, n1, n2) | ||
471 | #define irqsafe_cpu_cmpxchg_double_4(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg8b_double(pcp1, o1, o2, n1, n2) | ||
472 | #endif /* CONFIG_X86_CMPXCHG64 */ | ||
473 | |||
454 | /* | 474 | /* |
455 | * Per cpu atomic 64 bit operations are only available under 64 bit. | 475 | * Per cpu atomic 64 bit operations are only available under 64 bit. |
456 | * 32 bit must fall back to generic operations. | 476 | * 32 bit must fall back to generic operations. |
@@ -480,6 +500,34 @@ do { \ | |||
480 | #define irqsafe_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val) | 500 | #define irqsafe_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val) |
481 | #define irqsafe_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval) | 501 | #define irqsafe_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval) |
482 | #define irqsafe_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) | 502 | #define irqsafe_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) |
503 | |||
504 | /* | ||
505 | * Pretty complex macro to generate cmpxchg16 instruction. The instruction | ||
506 | * is not supported on early AMD64 processors so we must be able to emulate | ||
507 | * it in software. The address used in the cmpxchg16 instruction must be | ||
508 | * aligned to a 16 byte boundary. | ||
509 | */ | ||
510 | #define percpu_cmpxchg16b_double(pcp1, o1, o2, n1, n2) \ | ||
511 | ({ \ | ||
512 | char __ret; \ | ||
513 | typeof(o1) __o1 = o1; \ | ||
514 | typeof(o1) __n1 = n1; \ | ||
515 | typeof(o2) __o2 = o2; \ | ||
516 | typeof(o2) __n2 = n2; \ | ||
517 | typeof(o2) __dummy; \ | ||
518 | alternative_io("call this_cpu_cmpxchg16b_emu\n\t" P6_NOP4, \ | ||
519 | "cmpxchg16b %%gs:(%%rsi)\n\tsetz %0\n\t", \ | ||
520 | X86_FEATURE_CX16, \ | ||
521 | ASM_OUTPUT2("=a"(__ret), "=d"(__dummy)), \ | ||
522 | "S" (&pcp1), "b"(__n1), "c"(__n2), \ | ||
523 | "a"(__o1), "d"(__o2)); \ | ||
524 | __ret; \ | ||
525 | }) | ||
526 | |||
527 | #define __this_cpu_cmpxchg_double_8(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg16b_double(pcp1, o1, o2, n1, n2) | ||
528 | #define this_cpu_cmpxchg_double_8(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg16b_double(pcp1, o1, o2, n1, n2) | ||
529 | #define irqsafe_cpu_cmpxchg_double_8(pcp1, pcp2, o1, o2, n1, n2) percpu_cmpxchg16b_double(pcp1, o1, o2, n1, n2) | ||
530 | |||
483 | #endif | 531 | #endif |
484 | 532 | ||
485 | /* This is not atomic against other CPUs -- CPU preemption needs to be off */ | 533 | /* This is not atomic against other CPUs -- CPU preemption needs to be off */ |
diff --git a/arch/x86/include/asm/perf_event_p4.h b/arch/x86/include/asm/perf_event_p4.h index cc29086e30cd..56fd9e3abbda 100644 --- a/arch/x86/include/asm/perf_event_p4.h +++ b/arch/x86/include/asm/perf_event_p4.h | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * Netburst Perfomance Events (P4, old Xeon) | 2 | * Netburst Performance Events (P4, old Xeon) |
3 | */ | 3 | */ |
4 | 4 | ||
5 | #ifndef PERF_EVENT_P4_H | 5 | #ifndef PERF_EVENT_P4_H |
@@ -9,7 +9,7 @@ | |||
9 | #include <linux/bitops.h> | 9 | #include <linux/bitops.h> |
10 | 10 | ||
11 | /* | 11 | /* |
12 | * NetBurst has perfomance MSRs shared between | 12 | * NetBurst has performance MSRs shared between |
13 | * threads if HT is turned on, ie for both logical | 13 | * threads if HT is turned on, ie for both logical |
14 | * processors (mem: in turn in Atom with HT support | 14 | * processors (mem: in turn in Atom with HT support |
15 | * perf-MSRs are not shared and every thread has its | 15 | * perf-MSRs are not shared and every thread has its |
diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h index 94b979d1b58d..effff47a3c82 100644 --- a/arch/x86/include/asm/pgtable-3level.h +++ b/arch/x86/include/asm/pgtable-3level.h | |||
@@ -69,8 +69,6 @@ static inline void native_pmd_clear(pmd_t *pmd) | |||
69 | 69 | ||
70 | static inline void pud_clear(pud_t *pudp) | 70 | static inline void pud_clear(pud_t *pudp) |
71 | { | 71 | { |
72 | unsigned long pgd; | ||
73 | |||
74 | set_pud(pudp, __pud(0)); | 72 | set_pud(pudp, __pud(0)); |
75 | 73 | ||
76 | /* | 74 | /* |
@@ -79,13 +77,10 @@ static inline void pud_clear(pud_t *pudp) | |||
79 | * section 8.1: in PAE mode we explicitly have to flush the | 77 | * section 8.1: in PAE mode we explicitly have to flush the |
80 | * TLB via cr3 if the top-level pgd is changed... | 78 | * TLB via cr3 if the top-level pgd is changed... |
81 | * | 79 | * |
82 | * Make sure the pud entry we're updating is within the | 80 | * Currently all places where pud_clear() is called either have |
83 | * current pgd to avoid unnecessary TLB flushes. | 81 | * flush_tlb_mm() followed or don't need TLB flush (x86_64 code or |
82 | * pud_clear_bad()), so we don't need TLB flush here. | ||
84 | */ | 83 | */ |
85 | pgd = read_cr3(); | ||
86 | if (__pa(pudp) >= pgd && __pa(pudp) < | ||
87 | (pgd + sizeof(pgd_t)*PTRS_PER_PGD)) | ||
88 | write_cr3(pgd); | ||
89 | } | 84 | } |
90 | 85 | ||
91 | #ifdef CONFIG_SMP | 86 | #ifdef CONFIG_SMP |
diff --git a/arch/x86/include/asm/processor-flags.h b/arch/x86/include/asm/processor-flags.h index 7a3e836eb2a9..a898a2b6e10c 100644 --- a/arch/x86/include/asm/processor-flags.h +++ b/arch/x86/include/asm/processor-flags.h | |||
@@ -7,7 +7,7 @@ | |||
7 | */ | 7 | */ |
8 | #define X86_EFLAGS_CF 0x00000001 /* Carry Flag */ | 8 | #define X86_EFLAGS_CF 0x00000001 /* Carry Flag */ |
9 | #define X86_EFLAGS_PF 0x00000004 /* Parity Flag */ | 9 | #define X86_EFLAGS_PF 0x00000004 /* Parity Flag */ |
10 | #define X86_EFLAGS_AF 0x00000010 /* Auxillary carry Flag */ | 10 | #define X86_EFLAGS_AF 0x00000010 /* Auxiliary carry Flag */ |
11 | #define X86_EFLAGS_ZF 0x00000040 /* Zero Flag */ | 11 | #define X86_EFLAGS_ZF 0x00000040 /* Zero Flag */ |
12 | #define X86_EFLAGS_SF 0x00000080 /* Sign Flag */ | 12 | #define X86_EFLAGS_SF 0x00000080 /* Sign Flag */ |
13 | #define X86_EFLAGS_TF 0x00000100 /* Trap Flag */ | 13 | #define X86_EFLAGS_TF 0x00000100 /* Trap Flag */ |
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 45636cefa186..4c25ab48257b 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h | |||
@@ -94,10 +94,6 @@ struct cpuinfo_x86 { | |||
94 | int x86_cache_alignment; /* In bytes */ | 94 | int x86_cache_alignment; /* In bytes */ |
95 | int x86_power; | 95 | int x86_power; |
96 | unsigned long loops_per_jiffy; | 96 | unsigned long loops_per_jiffy; |
97 | #ifdef CONFIG_SMP | ||
98 | /* cpus sharing the last level cache: */ | ||
99 | cpumask_var_t llc_shared_map; | ||
100 | #endif | ||
101 | /* cpuid returned max cores value: */ | 97 | /* cpuid returned max cores value: */ |
102 | u16 x86_max_cores; | 98 | u16 x86_max_cores; |
103 | u16 apicid; | 99 | u16 apicid; |
diff --git a/arch/x86/include/asm/prom.h b/arch/x86/include/asm/prom.h index b4ec95f07518..971e0b46446e 100644 --- a/arch/x86/include/asm/prom.h +++ b/arch/x86/include/asm/prom.h | |||
@@ -1 +1,69 @@ | |||
1 | /* dummy prom.h; here to make linux/of.h's #includes happy */ | 1 | /* |
2 | * Definitions for Device tree / OpenFirmware handling on X86 | ||
3 | * | ||
4 | * based on arch/powerpc/include/asm/prom.h which is | ||
5 | * Copyright (C) 1996-2005 Paul Mackerras. | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or | ||
8 | * modify it under the terms of the GNU General Public License | ||
9 | * as published by the Free Software Foundation; either version | ||
10 | * 2 of the License, or (at your option) any later version. | ||
11 | */ | ||
12 | |||
13 | #ifndef _ASM_X86_PROM_H | ||
14 | #define _ASM_X86_PROM_H | ||
15 | #ifndef __ASSEMBLY__ | ||
16 | |||
17 | #include <linux/of.h> | ||
18 | #include <linux/types.h> | ||
19 | #include <linux/pci.h> | ||
20 | |||
21 | #include <asm/irq.h> | ||
22 | #include <asm/atomic.h> | ||
23 | #include <asm/setup.h> | ||
24 | #include <asm/irq_controller.h> | ||
25 | |||
26 | #ifdef CONFIG_OF | ||
27 | extern int of_ioapic; | ||
28 | extern u64 initial_dtb; | ||
29 | extern void add_dtb(u64 data); | ||
30 | extern void x86_add_irq_domains(void); | ||
31 | void __cpuinit x86_of_pci_init(void); | ||
32 | void x86_dtb_init(void); | ||
33 | |||
34 | static inline struct device_node *pci_device_to_OF_node(struct pci_dev *pdev) | ||
35 | { | ||
36 | return pdev ? pdev->dev.of_node : NULL; | ||
37 | } | ||
38 | |||
39 | static inline struct device_node *pci_bus_to_OF_node(struct pci_bus *bus) | ||
40 | { | ||
41 | return pci_device_to_OF_node(bus->self); | ||
42 | } | ||
43 | |||
44 | #else | ||
45 | static inline void add_dtb(u64 data) { } | ||
46 | static inline void x86_add_irq_domains(void) { } | ||
47 | static inline void x86_of_pci_init(void) { } | ||
48 | static inline void x86_dtb_init(void) { } | ||
49 | #define of_ioapic 0 | ||
50 | #endif | ||
51 | |||
52 | extern char cmd_line[COMMAND_LINE_SIZE]; | ||
53 | |||
54 | #define pci_address_to_pio pci_address_to_pio | ||
55 | unsigned long pci_address_to_pio(phys_addr_t addr); | ||
56 | |||
57 | /** | ||
58 | * irq_dispose_mapping - Unmap an interrupt | ||
59 | * @virq: linux virq number of the interrupt to unmap | ||
60 | * | ||
61 | * FIXME: We really should implement proper virq handling like power, | ||
62 | * but that's going to be major surgery. | ||
63 | */ | ||
64 | static inline void irq_dispose_mapping(unsigned int virq) { } | ||
65 | |||
66 | #define HAVE_ARCH_DEVTREE_FIXUPS | ||
67 | |||
68 | #endif /* __ASSEMBLY__ */ | ||
69 | #endif | ||
diff --git a/arch/x86/include/asm/ptrace-abi.h b/arch/x86/include/asm/ptrace-abi.h index 52b098a6eebb..7b0a55a88851 100644 --- a/arch/x86/include/asm/ptrace-abi.h +++ b/arch/x86/include/asm/ptrace-abi.h | |||
@@ -31,7 +31,7 @@ | |||
31 | #define R12 24 | 31 | #define R12 24 |
32 | #define RBP 32 | 32 | #define RBP 32 |
33 | #define RBX 40 | 33 | #define RBX 40 |
34 | /* arguments: interrupts/non tracing syscalls only save upto here*/ | 34 | /* arguments: interrupts/non tracing syscalls only save up to here*/ |
35 | #define R11 48 | 35 | #define R11 48 |
36 | #define R10 56 | 36 | #define R10 56 |
37 | #define R9 64 | 37 | #define R9 64 |
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h index 78cd1ea94500..1babf8adecdf 100644 --- a/arch/x86/include/asm/ptrace.h +++ b/arch/x86/include/asm/ptrace.h | |||
@@ -73,7 +73,7 @@ struct pt_regs { | |||
73 | unsigned long r12; | 73 | unsigned long r12; |
74 | unsigned long rbp; | 74 | unsigned long rbp; |
75 | unsigned long rbx; | 75 | unsigned long rbx; |
76 | /* arguments: non interrupts/non tracing syscalls only save upto here*/ | 76 | /* arguments: non interrupts/non tracing syscalls only save up to here*/ |
77 | unsigned long r11; | 77 | unsigned long r11; |
78 | unsigned long r10; | 78 | unsigned long r10; |
79 | unsigned long r9; | 79 | unsigned long r9; |
@@ -103,7 +103,7 @@ struct pt_regs { | |||
103 | unsigned long r12; | 103 | unsigned long r12; |
104 | unsigned long bp; | 104 | unsigned long bp; |
105 | unsigned long bx; | 105 | unsigned long bx; |
106 | /* arguments: non interrupts/non tracing syscalls only save upto here*/ | 106 | /* arguments: non interrupts/non tracing syscalls only save up to here*/ |
107 | unsigned long r11; | 107 | unsigned long r11; |
108 | unsigned long r10; | 108 | unsigned long r10; |
109 | unsigned long r9; | 109 | unsigned long r9; |
diff --git a/arch/x86/include/asm/reboot.h b/arch/x86/include/asm/reboot.h index 562d4fd31ba8..3250e3d605d9 100644 --- a/arch/x86/include/asm/reboot.h +++ b/arch/x86/include/asm/reboot.h | |||
@@ -18,7 +18,10 @@ extern struct machine_ops machine_ops; | |||
18 | 18 | ||
19 | void native_machine_crash_shutdown(struct pt_regs *regs); | 19 | void native_machine_crash_shutdown(struct pt_regs *regs); |
20 | void native_machine_shutdown(void); | 20 | void native_machine_shutdown(void); |
21 | void machine_real_restart(const unsigned char *code, int length); | 21 | void machine_real_restart(unsigned int type); |
22 | /* These must match dispatch_table in reboot_32.S */ | ||
23 | #define MRR_BIOS 0 | ||
24 | #define MRR_APM 1 | ||
22 | 25 | ||
23 | typedef void (*nmi_shootdown_cb)(int, struct die_args*); | 26 | typedef void (*nmi_shootdown_cb)(int, struct die_args*); |
24 | void nmi_shootdown_cpus(nmi_shootdown_cb callback); | 27 | void nmi_shootdown_cpus(nmi_shootdown_cb callback); |
diff --git a/arch/x86/include/asm/rwsem.h b/arch/x86/include/asm/rwsem.h index d1e41b0f9b60..df4cd32b4cc6 100644 --- a/arch/x86/include/asm/rwsem.h +++ b/arch/x86/include/asm/rwsem.h | |||
@@ -37,26 +37,9 @@ | |||
37 | #endif | 37 | #endif |
38 | 38 | ||
39 | #ifdef __KERNEL__ | 39 | #ifdef __KERNEL__ |
40 | |||
41 | #include <linux/list.h> | ||
42 | #include <linux/spinlock.h> | ||
43 | #include <linux/lockdep.h> | ||
44 | #include <asm/asm.h> | 40 | #include <asm/asm.h> |
45 | 41 | ||
46 | struct rwsem_waiter; | ||
47 | |||
48 | extern asmregparm struct rw_semaphore * | ||
49 | rwsem_down_read_failed(struct rw_semaphore *sem); | ||
50 | extern asmregparm struct rw_semaphore * | ||
51 | rwsem_down_write_failed(struct rw_semaphore *sem); | ||
52 | extern asmregparm struct rw_semaphore * | ||
53 | rwsem_wake(struct rw_semaphore *); | ||
54 | extern asmregparm struct rw_semaphore * | ||
55 | rwsem_downgrade_wake(struct rw_semaphore *sem); | ||
56 | |||
57 | /* | 42 | /* |
58 | * the semaphore definition | ||
59 | * | ||
60 | * The bias values and the counter type limits the number of | 43 | * The bias values and the counter type limits the number of |
61 | * potential readers/writers to 32767 for 32 bits and 2147483647 | 44 | * potential readers/writers to 32767 for 32 bits and 2147483647 |
62 | * for 64 bits. | 45 | * for 64 bits. |
@@ -74,43 +57,6 @@ extern asmregparm struct rw_semaphore * | |||
74 | #define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS | 57 | #define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS |
75 | #define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS) | 58 | #define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS) |
76 | 59 | ||
77 | typedef signed long rwsem_count_t; | ||
78 | |||
79 | struct rw_semaphore { | ||
80 | rwsem_count_t count; | ||
81 | spinlock_t wait_lock; | ||
82 | struct list_head wait_list; | ||
83 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | ||
84 | struct lockdep_map dep_map; | ||
85 | #endif | ||
86 | }; | ||
87 | |||
88 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | ||
89 | # define __RWSEM_DEP_MAP_INIT(lockname) , .dep_map = { .name = #lockname } | ||
90 | #else | ||
91 | # define __RWSEM_DEP_MAP_INIT(lockname) | ||
92 | #endif | ||
93 | |||
94 | |||
95 | #define __RWSEM_INITIALIZER(name) \ | ||
96 | { \ | ||
97 | RWSEM_UNLOCKED_VALUE, __SPIN_LOCK_UNLOCKED((name).wait_lock), \ | ||
98 | LIST_HEAD_INIT((name).wait_list) __RWSEM_DEP_MAP_INIT(name) \ | ||
99 | } | ||
100 | |||
101 | #define DECLARE_RWSEM(name) \ | ||
102 | struct rw_semaphore name = __RWSEM_INITIALIZER(name) | ||
103 | |||
104 | extern void __init_rwsem(struct rw_semaphore *sem, const char *name, | ||
105 | struct lock_class_key *key); | ||
106 | |||
107 | #define init_rwsem(sem) \ | ||
108 | do { \ | ||
109 | static struct lock_class_key __key; \ | ||
110 | \ | ||
111 | __init_rwsem((sem), #sem, &__key); \ | ||
112 | } while (0) | ||
113 | |||
114 | /* | 60 | /* |
115 | * lock for reading | 61 | * lock for reading |
116 | */ | 62 | */ |
@@ -133,7 +79,7 @@ static inline void __down_read(struct rw_semaphore *sem) | |||
133 | */ | 79 | */ |
134 | static inline int __down_read_trylock(struct rw_semaphore *sem) | 80 | static inline int __down_read_trylock(struct rw_semaphore *sem) |
135 | { | 81 | { |
136 | rwsem_count_t result, tmp; | 82 | long result, tmp; |
137 | asm volatile("# beginning __down_read_trylock\n\t" | 83 | asm volatile("# beginning __down_read_trylock\n\t" |
138 | " mov %0,%1\n\t" | 84 | " mov %0,%1\n\t" |
139 | "1:\n\t" | 85 | "1:\n\t" |
@@ -155,7 +101,7 @@ static inline int __down_read_trylock(struct rw_semaphore *sem) | |||
155 | */ | 101 | */ |
156 | static inline void __down_write_nested(struct rw_semaphore *sem, int subclass) | 102 | static inline void __down_write_nested(struct rw_semaphore *sem, int subclass) |
157 | { | 103 | { |
158 | rwsem_count_t tmp; | 104 | long tmp; |
159 | asm volatile("# beginning down_write\n\t" | 105 | asm volatile("# beginning down_write\n\t" |
160 | LOCK_PREFIX " xadd %1,(%2)\n\t" | 106 | LOCK_PREFIX " xadd %1,(%2)\n\t" |
161 | /* adds 0xffff0001, returns the old value */ | 107 | /* adds 0xffff0001, returns the old value */ |
@@ -180,9 +126,8 @@ static inline void __down_write(struct rw_semaphore *sem) | |||
180 | */ | 126 | */ |
181 | static inline int __down_write_trylock(struct rw_semaphore *sem) | 127 | static inline int __down_write_trylock(struct rw_semaphore *sem) |
182 | { | 128 | { |
183 | rwsem_count_t ret = cmpxchg(&sem->count, | 129 | long ret = cmpxchg(&sem->count, RWSEM_UNLOCKED_VALUE, |
184 | RWSEM_UNLOCKED_VALUE, | 130 | RWSEM_ACTIVE_WRITE_BIAS); |
185 | RWSEM_ACTIVE_WRITE_BIAS); | ||
186 | if (ret == RWSEM_UNLOCKED_VALUE) | 131 | if (ret == RWSEM_UNLOCKED_VALUE) |
187 | return 1; | 132 | return 1; |
188 | return 0; | 133 | return 0; |
@@ -193,7 +138,7 @@ static inline int __down_write_trylock(struct rw_semaphore *sem) | |||
193 | */ | 138 | */ |
194 | static inline void __up_read(struct rw_semaphore *sem) | 139 | static inline void __up_read(struct rw_semaphore *sem) |
195 | { | 140 | { |
196 | rwsem_count_t tmp; | 141 | long tmp; |
197 | asm volatile("# beginning __up_read\n\t" | 142 | asm volatile("# beginning __up_read\n\t" |
198 | LOCK_PREFIX " xadd %1,(%2)\n\t" | 143 | LOCK_PREFIX " xadd %1,(%2)\n\t" |
199 | /* subtracts 1, returns the old value */ | 144 | /* subtracts 1, returns the old value */ |
@@ -211,7 +156,7 @@ static inline void __up_read(struct rw_semaphore *sem) | |||
211 | */ | 156 | */ |
212 | static inline void __up_write(struct rw_semaphore *sem) | 157 | static inline void __up_write(struct rw_semaphore *sem) |
213 | { | 158 | { |
214 | rwsem_count_t tmp; | 159 | long tmp; |
215 | asm volatile("# beginning __up_write\n\t" | 160 | asm volatile("# beginning __up_write\n\t" |
216 | LOCK_PREFIX " xadd %1,(%2)\n\t" | 161 | LOCK_PREFIX " xadd %1,(%2)\n\t" |
217 | /* subtracts 0xffff0001, returns the old value */ | 162 | /* subtracts 0xffff0001, returns the old value */ |
@@ -247,8 +192,7 @@ static inline void __downgrade_write(struct rw_semaphore *sem) | |||
247 | /* | 192 | /* |
248 | * implement atomic add functionality | 193 | * implement atomic add functionality |
249 | */ | 194 | */ |
250 | static inline void rwsem_atomic_add(rwsem_count_t delta, | 195 | static inline void rwsem_atomic_add(long delta, struct rw_semaphore *sem) |
251 | struct rw_semaphore *sem) | ||
252 | { | 196 | { |
253 | asm volatile(LOCK_PREFIX _ASM_ADD "%1,%0" | 197 | asm volatile(LOCK_PREFIX _ASM_ADD "%1,%0" |
254 | : "+m" (sem->count) | 198 | : "+m" (sem->count) |
@@ -258,10 +202,9 @@ static inline void rwsem_atomic_add(rwsem_count_t delta, | |||
258 | /* | 202 | /* |
259 | * implement exchange and add functionality | 203 | * implement exchange and add functionality |
260 | */ | 204 | */ |
261 | static inline rwsem_count_t rwsem_atomic_update(rwsem_count_t delta, | 205 | static inline long rwsem_atomic_update(long delta, struct rw_semaphore *sem) |
262 | struct rw_semaphore *sem) | ||
263 | { | 206 | { |
264 | rwsem_count_t tmp = delta; | 207 | long tmp = delta; |
265 | 208 | ||
266 | asm volatile(LOCK_PREFIX "xadd %0,%1" | 209 | asm volatile(LOCK_PREFIX "xadd %0,%1" |
267 | : "+r" (tmp), "+m" (sem->count) | 210 | : "+r" (tmp), "+m" (sem->count) |
@@ -270,10 +213,5 @@ static inline rwsem_count_t rwsem_atomic_update(rwsem_count_t delta, | |||
270 | return tmp + delta; | 213 | return tmp + delta; |
271 | } | 214 | } |
272 | 215 | ||
273 | static inline int rwsem_is_locked(struct rw_semaphore *sem) | ||
274 | { | ||
275 | return (sem->count != 0); | ||
276 | } | ||
277 | |||
278 | #endif /* __KERNEL__ */ | 216 | #endif /* __KERNEL__ */ |
279 | #endif /* _ASM_X86_RWSEM_H */ | 217 | #endif /* _ASM_X86_RWSEM_H */ |
diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h index 231f1c1d6607..cd84f7208f76 100644 --- a/arch/x86/include/asm/segment.h +++ b/arch/x86/include/asm/segment.h | |||
@@ -1,14 +1,16 @@ | |||
1 | #ifndef _ASM_X86_SEGMENT_H | 1 | #ifndef _ASM_X86_SEGMENT_H |
2 | #define _ASM_X86_SEGMENT_H | 2 | #define _ASM_X86_SEGMENT_H |
3 | 3 | ||
4 | #include <linux/const.h> | ||
5 | |||
4 | /* Constructor for a conventional segment GDT (or LDT) entry */ | 6 | /* Constructor for a conventional segment GDT (or LDT) entry */ |
5 | /* This is a macro so it can be used in initializers */ | 7 | /* This is a macro so it can be used in initializers */ |
6 | #define GDT_ENTRY(flags, base, limit) \ | 8 | #define GDT_ENTRY(flags, base, limit) \ |
7 | ((((base) & 0xff000000ULL) << (56-24)) | \ | 9 | ((((base) & _AC(0xff000000,ULL)) << (56-24)) | \ |
8 | (((flags) & 0x0000f0ffULL) << 40) | \ | 10 | (((flags) & _AC(0x0000f0ff,ULL)) << 40) | \ |
9 | (((limit) & 0x000f0000ULL) << (48-16)) | \ | 11 | (((limit) & _AC(0x000f0000,ULL)) << (48-16)) | \ |
10 | (((base) & 0x00ffffffULL) << 16) | \ | 12 | (((base) & _AC(0x00ffffff,ULL)) << 16) | \ |
11 | (((limit) & 0x0000ffffULL))) | 13 | (((limit) & _AC(0x0000ffff,ULL)))) |
12 | 14 | ||
13 | /* Simple and small GDT entries for booting only */ | 15 | /* Simple and small GDT entries for booting only */ |
14 | 16 | ||
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index 1f4695136776..73b11bc0ae6f 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h | |||
@@ -17,12 +17,24 @@ | |||
17 | #endif | 17 | #endif |
18 | #include <asm/thread_info.h> | 18 | #include <asm/thread_info.h> |
19 | #include <asm/cpumask.h> | 19 | #include <asm/cpumask.h> |
20 | #include <asm/cpufeature.h> | ||
20 | 21 | ||
21 | extern int smp_num_siblings; | 22 | extern int smp_num_siblings; |
22 | extern unsigned int num_processors; | 23 | extern unsigned int num_processors; |
23 | 24 | ||
25 | static inline bool cpu_has_ht_siblings(void) | ||
26 | { | ||
27 | bool has_siblings = false; | ||
28 | #ifdef CONFIG_SMP | ||
29 | has_siblings = cpu_has_ht && smp_num_siblings > 1; | ||
30 | #endif | ||
31 | return has_siblings; | ||
32 | } | ||
33 | |||
24 | DECLARE_PER_CPU(cpumask_var_t, cpu_sibling_map); | 34 | DECLARE_PER_CPU(cpumask_var_t, cpu_sibling_map); |
25 | DECLARE_PER_CPU(cpumask_var_t, cpu_core_map); | 35 | DECLARE_PER_CPU(cpumask_var_t, cpu_core_map); |
36 | /* cpus sharing the last level cache: */ | ||
37 | DECLARE_PER_CPU(cpumask_var_t, cpu_llc_shared_map); | ||
26 | DECLARE_PER_CPU(u16, cpu_llc_id); | 38 | DECLARE_PER_CPU(u16, cpu_llc_id); |
27 | DECLARE_PER_CPU(int, cpu_number); | 39 | DECLARE_PER_CPU(int, cpu_number); |
28 | 40 | ||
@@ -36,8 +48,16 @@ static inline struct cpumask *cpu_core_mask(int cpu) | |||
36 | return per_cpu(cpu_core_map, cpu); | 48 | return per_cpu(cpu_core_map, cpu); |
37 | } | 49 | } |
38 | 50 | ||
51 | static inline struct cpumask *cpu_llc_shared_mask(int cpu) | ||
52 | { | ||
53 | return per_cpu(cpu_llc_shared_map, cpu); | ||
54 | } | ||
55 | |||
39 | DECLARE_EARLY_PER_CPU(u16, x86_cpu_to_apicid); | 56 | DECLARE_EARLY_PER_CPU(u16, x86_cpu_to_apicid); |
40 | DECLARE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid); | 57 | DECLARE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid); |
58 | #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_32) | ||
59 | DECLARE_EARLY_PER_CPU(int, x86_cpu_to_logical_apicid); | ||
60 | #endif | ||
41 | 61 | ||
42 | /* Static state in head.S used to set up a CPU */ | 62 | /* Static state in head.S used to set up a CPU */ |
43 | extern unsigned long stack_start; /* Initial stack pointer address */ | 63 | extern unsigned long stack_start; /* Initial stack pointer address */ |
diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h index 52b5c7ed3608..d7e89c83645d 100644 --- a/arch/x86/include/asm/stacktrace.h +++ b/arch/x86/include/asm/stacktrace.h | |||
@@ -47,7 +47,7 @@ struct stacktrace_ops { | |||
47 | }; | 47 | }; |
48 | 48 | ||
49 | void dump_trace(struct task_struct *tsk, struct pt_regs *regs, | 49 | void dump_trace(struct task_struct *tsk, struct pt_regs *regs, |
50 | unsigned long *stack, | 50 | unsigned long *stack, unsigned long bp, |
51 | const struct stacktrace_ops *ops, void *data); | 51 | const struct stacktrace_ops *ops, void *data); |
52 | 52 | ||
53 | #ifdef CONFIG_X86_32 | 53 | #ifdef CONFIG_X86_32 |
@@ -86,11 +86,11 @@ stack_frame(struct task_struct *task, struct pt_regs *regs) | |||
86 | 86 | ||
87 | extern void | 87 | extern void |
88 | show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, | 88 | show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, |
89 | unsigned long *stack, char *log_lvl); | 89 | unsigned long *stack, unsigned long bp, char *log_lvl); |
90 | 90 | ||
91 | extern void | 91 | extern void |
92 | show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, | 92 | show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, |
93 | unsigned long *sp, char *log_lvl); | 93 | unsigned long *sp, unsigned long bp, char *log_lvl); |
94 | 94 | ||
95 | extern unsigned int code_bytes; | 95 | extern unsigned int code_bytes; |
96 | 96 | ||
diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h index 33ecc3ea8782..12569e691ce3 100644 --- a/arch/x86/include/asm/system.h +++ b/arch/x86/include/asm/system.h | |||
@@ -98,8 +98,6 @@ do { \ | |||
98 | */ | 98 | */ |
99 | #define HAVE_DISABLE_HLT | 99 | #define HAVE_DISABLE_HLT |
100 | #else | 100 | #else |
101 | #define __SAVE(reg, offset) "movq %%" #reg ",(14-" #offset ")*8(%%rsp)\n\t" | ||
102 | #define __RESTORE(reg, offset) "movq (14-" #offset ")*8(%%rsp),%%" #reg "\n\t" | ||
103 | 101 | ||
104 | /* frame pointer must be last for get_wchan */ | 102 | /* frame pointer must be last for get_wchan */ |
105 | #define SAVE_CONTEXT "pushf ; pushq %%rbp ; movq %%rsi,%%rbp\n\t" | 103 | #define SAVE_CONTEXT "pushf ; pushq %%rbp ; movq %%rsi,%%rbp\n\t" |
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index f0b6e5dbc5a0..1f2e61e28981 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h | |||
@@ -161,8 +161,14 @@ struct thread_info { | |||
161 | 161 | ||
162 | #define __HAVE_ARCH_THREAD_INFO_ALLOCATOR | 162 | #define __HAVE_ARCH_THREAD_INFO_ALLOCATOR |
163 | 163 | ||
164 | #define alloc_thread_info(tsk) \ | 164 | #define alloc_thread_info_node(tsk, node) \ |
165 | ((struct thread_info *)__get_free_pages(THREAD_FLAGS, THREAD_ORDER)) | 165 | ({ \ |
166 | struct page *page = alloc_pages_node(node, THREAD_FLAGS, \ | ||
167 | THREAD_ORDER); \ | ||
168 | struct thread_info *ret = page ? page_address(page) : NULL; \ | ||
169 | \ | ||
170 | ret; \ | ||
171 | }) | ||
166 | 172 | ||
167 | #ifdef CONFIG_X86_32 | 173 | #ifdef CONFIG_X86_32 |
168 | 174 | ||
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index 21899cc31e52..910a7084f7f2 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h | |||
@@ -47,21 +47,6 @@ | |||
47 | 47 | ||
48 | #include <asm/mpspec.h> | 48 | #include <asm/mpspec.h> |
49 | 49 | ||
50 | #ifdef CONFIG_X86_32 | ||
51 | |||
52 | /* Mappings between logical cpu number and node number */ | ||
53 | extern int cpu_to_node_map[]; | ||
54 | |||
55 | /* Returns the number of the node containing CPU 'cpu' */ | ||
56 | static inline int __cpu_to_node(int cpu) | ||
57 | { | ||
58 | return cpu_to_node_map[cpu]; | ||
59 | } | ||
60 | #define early_cpu_to_node __cpu_to_node | ||
61 | #define cpu_to_node __cpu_to_node | ||
62 | |||
63 | #else /* CONFIG_X86_64 */ | ||
64 | |||
65 | /* Mappings between logical cpu number and node number */ | 50 | /* Mappings between logical cpu number and node number */ |
66 | DECLARE_EARLY_PER_CPU(int, x86_cpu_to_node_map); | 51 | DECLARE_EARLY_PER_CPU(int, x86_cpu_to_node_map); |
67 | 52 | ||
@@ -84,8 +69,6 @@ static inline int early_cpu_to_node(int cpu) | |||
84 | 69 | ||
85 | #endif /* !CONFIG_DEBUG_PER_CPU_MAPS */ | 70 | #endif /* !CONFIG_DEBUG_PER_CPU_MAPS */ |
86 | 71 | ||
87 | #endif /* CONFIG_X86_64 */ | ||
88 | |||
89 | /* Mappings between node number and cpus on that node. */ | 72 | /* Mappings between node number and cpus on that node. */ |
90 | extern cpumask_var_t node_to_cpumask_map[MAX_NUMNODES]; | 73 | extern cpumask_var_t node_to_cpumask_map[MAX_NUMNODES]; |
91 | 74 | ||
@@ -155,7 +138,7 @@ extern unsigned long node_remap_size[]; | |||
155 | .balance_interval = 1, \ | 138 | .balance_interval = 1, \ |
156 | } | 139 | } |
157 | 140 | ||
158 | #ifdef CONFIG_X86_64_ACPI_NUMA | 141 | #ifdef CONFIG_X86_64 |
159 | extern int __node_distance(int, int); | 142 | extern int __node_distance(int, int); |
160 | #define node_distance(a, b) __node_distance(a, b) | 143 | #define node_distance(a, b) __node_distance(a, b) |
161 | #endif | 144 | #endif |
diff --git a/arch/x86/include/asm/trampoline.h b/arch/x86/include/asm/trampoline.h index f4500fb3b485..feca3118a73b 100644 --- a/arch/x86/include/asm/trampoline.h +++ b/arch/x86/include/asm/trampoline.h | |||
@@ -3,25 +3,36 @@ | |||
3 | 3 | ||
4 | #ifndef __ASSEMBLY__ | 4 | #ifndef __ASSEMBLY__ |
5 | 5 | ||
6 | #ifdef CONFIG_X86_TRAMPOLINE | 6 | #include <linux/types.h> |
7 | #include <asm/io.h> | ||
8 | |||
7 | /* | 9 | /* |
8 | * Trampoline 80x86 program as an array. | 10 | * Trampoline 80x86 program as an array. These are in the init rodata |
11 | * segment, but that's okay, because we only care about the relative | ||
12 | * addresses of the symbols. | ||
9 | */ | 13 | */ |
10 | extern const unsigned char trampoline_data []; | 14 | extern const unsigned char x86_trampoline_start []; |
11 | extern const unsigned char trampoline_end []; | 15 | extern const unsigned char x86_trampoline_end []; |
12 | extern unsigned char *trampoline_base; | 16 | extern unsigned char *x86_trampoline_base; |
13 | 17 | ||
14 | extern unsigned long init_rsp; | 18 | extern unsigned long init_rsp; |
15 | extern unsigned long initial_code; | 19 | extern unsigned long initial_code; |
16 | extern unsigned long initial_gs; | 20 | extern unsigned long initial_gs; |
17 | 21 | ||
18 | #define TRAMPOLINE_SIZE roundup(trampoline_end - trampoline_data, PAGE_SIZE) | 22 | extern void __init setup_trampolines(void); |
23 | |||
24 | extern const unsigned char trampoline_data[]; | ||
25 | extern const unsigned char trampoline_status[]; | ||
26 | |||
27 | #define TRAMPOLINE_SYM(x) \ | ||
28 | ((void *)(x86_trampoline_base + \ | ||
29 | ((const unsigned char *)(x) - x86_trampoline_start))) | ||
19 | 30 | ||
20 | extern unsigned long setup_trampoline(void); | 31 | /* Address of the SMP trampoline */ |
21 | extern void __init reserve_trampoline_memory(void); | 32 | static inline unsigned long trampoline_address(void) |
22 | #else | 33 | { |
23 | static inline void reserve_trampoline_memory(void) {} | 34 | return virt_to_phys(TRAMPOLINE_SYM(trampoline_data)); |
24 | #endif /* CONFIG_X86_TRAMPOLINE */ | 35 | } |
25 | 36 | ||
26 | #endif /* __ASSEMBLY__ */ | 37 | #endif /* __ASSEMBLY__ */ |
27 | 38 | ||
diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h index 1ca132fc0d03..83e2efd181e2 100644 --- a/arch/x86/include/asm/tsc.h +++ b/arch/x86/include/asm/tsc.h | |||
@@ -35,7 +35,7 @@ static inline cycles_t get_cycles(void) | |||
35 | static __always_inline cycles_t vget_cycles(void) | 35 | static __always_inline cycles_t vget_cycles(void) |
36 | { | 36 | { |
37 | /* | 37 | /* |
38 | * We only do VDSOs on TSC capable CPUs, so this shouldnt | 38 | * We only do VDSOs on TSC capable CPUs, so this shouldn't |
39 | * access boot_cpu_data (which is not VDSO-safe): | 39 | * access boot_cpu_data (which is not VDSO-safe): |
40 | */ | 40 | */ |
41 | #ifndef CONFIG_X86_TSC | 41 | #ifndef CONFIG_X86_TSC |
diff --git a/arch/x86/include/asm/types.h b/arch/x86/include/asm/types.h index df1da20f4534..88102055a4b8 100644 --- a/arch/x86/include/asm/types.h +++ b/arch/x86/include/asm/types.h | |||
@@ -1,20 +1,12 @@ | |||
1 | #ifndef _ASM_X86_TYPES_H | 1 | #ifndef _ASM_X86_TYPES_H |
2 | #define _ASM_X86_TYPES_H | 2 | #define _ASM_X86_TYPES_H |
3 | 3 | ||
4 | #define dma_addr_t dma_addr_t | ||
5 | |||
6 | #include <asm-generic/types.h> | 4 | #include <asm-generic/types.h> |
7 | 5 | ||
8 | #ifdef __KERNEL__ | 6 | #ifdef __KERNEL__ |
9 | #ifndef __ASSEMBLY__ | 7 | #ifndef __ASSEMBLY__ |
10 | 8 | ||
11 | typedef u64 dma64_addr_t; | 9 | typedef u64 dma64_addr_t; |
12 | #if defined(CONFIG_X86_64) || defined(CONFIG_HIGHMEM64G) | ||
13 | /* DMA addresses come in 32-bit and 64-bit flavours. */ | ||
14 | typedef u64 dma_addr_t; | ||
15 | #else | ||
16 | typedef u32 dma_addr_t; | ||
17 | #endif | ||
18 | 10 | ||
19 | #endif /* __ASSEMBLY__ */ | 11 | #endif /* __ASSEMBLY__ */ |
20 | #endif /* __KERNEL__ */ | 12 | #endif /* __KERNEL__ */ |
diff --git a/arch/x86/include/asm/unistd_32.h b/arch/x86/include/asm/unistd_32.h index b766a5e8ba0e..a755ef5e5977 100644 --- a/arch/x86/include/asm/unistd_32.h +++ b/arch/x86/include/asm/unistd_32.h | |||
@@ -346,10 +346,14 @@ | |||
346 | #define __NR_fanotify_init 338 | 346 | #define __NR_fanotify_init 338 |
347 | #define __NR_fanotify_mark 339 | 347 | #define __NR_fanotify_mark 339 |
348 | #define __NR_prlimit64 340 | 348 | #define __NR_prlimit64 340 |
349 | #define __NR_name_to_handle_at 341 | ||
350 | #define __NR_open_by_handle_at 342 | ||
351 | #define __NR_clock_adjtime 343 | ||
352 | #define __NR_syncfs 344 | ||
349 | 353 | ||
350 | #ifdef __KERNEL__ | 354 | #ifdef __KERNEL__ |
351 | 355 | ||
352 | #define NR_syscalls 341 | 356 | #define NR_syscalls 345 |
353 | 357 | ||
354 | #define __ARCH_WANT_IPC_PARSE_VERSION | 358 | #define __ARCH_WANT_IPC_PARSE_VERSION |
355 | #define __ARCH_WANT_OLD_READDIR | 359 | #define __ARCH_WANT_OLD_READDIR |
diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h index 363e9b8a715b..160fa76bd578 100644 --- a/arch/x86/include/asm/unistd_64.h +++ b/arch/x86/include/asm/unistd_64.h | |||
@@ -669,6 +669,14 @@ __SYSCALL(__NR_fanotify_init, sys_fanotify_init) | |||
669 | __SYSCALL(__NR_fanotify_mark, sys_fanotify_mark) | 669 | __SYSCALL(__NR_fanotify_mark, sys_fanotify_mark) |
670 | #define __NR_prlimit64 302 | 670 | #define __NR_prlimit64 302 |
671 | __SYSCALL(__NR_prlimit64, sys_prlimit64) | 671 | __SYSCALL(__NR_prlimit64, sys_prlimit64) |
672 | #define __NR_name_to_handle_at 303 | ||
673 | __SYSCALL(__NR_name_to_handle_at, sys_name_to_handle_at) | ||
674 | #define __NR_open_by_handle_at 304 | ||
675 | __SYSCALL(__NR_open_by_handle_at, sys_open_by_handle_at) | ||
676 | #define __NR_clock_adjtime 305 | ||
677 | __SYSCALL(__NR_clock_adjtime, sys_clock_adjtime) | ||
678 | #define __NR_syncfs 306 | ||
679 | __SYSCALL(__NR_syncfs, sys_syncfs) | ||
672 | 680 | ||
673 | #ifndef __NO_STUBS | 681 | #ifndef __NO_STUBS |
674 | #define __ARCH_WANT_OLD_READDIR | 682 | #define __ARCH_WANT_OLD_READDIR |
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index 64642ad019fb..643ebf2e2ad8 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h | |||
@@ -83,11 +83,13 @@ struct x86_init_paging { | |||
83 | * boot cpu | 83 | * boot cpu |
84 | * @tsc_pre_init: platform function called before TSC init | 84 | * @tsc_pre_init: platform function called before TSC init |
85 | * @timer_init: initialize the platform timer (default PIT/HPET) | 85 | * @timer_init: initialize the platform timer (default PIT/HPET) |
86 | * @wallclock_init: init the wallclock device | ||
86 | */ | 87 | */ |
87 | struct x86_init_timers { | 88 | struct x86_init_timers { |
88 | void (*setup_percpu_clockev)(void); | 89 | void (*setup_percpu_clockev)(void); |
89 | void (*tsc_pre_init)(void); | 90 | void (*tsc_pre_init)(void); |
90 | void (*timer_init)(void); | 91 | void (*timer_init)(void); |
92 | void (*wallclock_init)(void); | ||
91 | }; | 93 | }; |
92 | 94 | ||
93 | /** | 95 | /** |
diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h index a3c28ae4025b..8508bfe52296 100644 --- a/arch/x86/include/asm/xen/hypercall.h +++ b/arch/x86/include/asm/xen/hypercall.h | |||
@@ -287,7 +287,7 @@ HYPERVISOR_fpu_taskswitch(int set) | |||
287 | static inline int | 287 | static inline int |
288 | HYPERVISOR_sched_op(int cmd, void *arg) | 288 | HYPERVISOR_sched_op(int cmd, void *arg) |
289 | { | 289 | { |
290 | return _hypercall2(int, sched_op_new, cmd, arg); | 290 | return _hypercall2(int, sched_op, cmd, arg); |
291 | } | 291 | } |
292 | 292 | ||
293 | static inline long | 293 | static inline long |
@@ -422,10 +422,17 @@ HYPERVISOR_set_segment_base(int reg, unsigned long value) | |||
422 | #endif | 422 | #endif |
423 | 423 | ||
424 | static inline int | 424 | static inline int |
425 | HYPERVISOR_suspend(unsigned long srec) | 425 | HYPERVISOR_suspend(unsigned long start_info_mfn) |
426 | { | 426 | { |
427 | return _hypercall3(int, sched_op, SCHEDOP_shutdown, | 427 | struct sched_shutdown r = { .reason = SHUTDOWN_suspend }; |
428 | SHUTDOWN_suspend, srec); | 428 | |
429 | /* | ||
430 | * For a PV guest the tools require that the start_info mfn be | ||
431 | * present in rdx/edx when the hypercall is made. Per the | ||
432 | * hypercall calling convention this is the third hypercall | ||
433 | * argument, which is start_info_mfn here. | ||
434 | */ | ||
435 | return _hypercall3(int, sched_op, SCHEDOP_shutdown, &r, start_info_mfn); | ||
429 | } | 436 | } |
430 | 437 | ||
431 | static inline int | 438 | static inline int |
diff --git a/arch/x86/include/asm/xen/interface.h b/arch/x86/include/asm/xen/interface.h index 1c10c88ee4e1..5d4922ad4b9b 100644 --- a/arch/x86/include/asm/xen/interface.h +++ b/arch/x86/include/asm/xen/interface.h | |||
@@ -86,7 +86,7 @@ DEFINE_GUEST_HANDLE(void); | |||
86 | * The privilege level specifies which modes may enter a trap via a software | 86 | * The privilege level specifies which modes may enter a trap via a software |
87 | * interrupt. On x86/64, since rings 1 and 2 are unavailable, we allocate | 87 | * interrupt. On x86/64, since rings 1 and 2 are unavailable, we allocate |
88 | * privilege levels as follows: | 88 | * privilege levels as follows: |
89 | * Level == 0: Noone may enter | 89 | * Level == 0: No one may enter |
90 | * Level == 1: Kernel may enter | 90 | * Level == 1: Kernel may enter |
91 | * Level == 2: Kernel may enter | 91 | * Level == 2: Kernel may enter |
92 | * Level == 3: Everyone may enter | 92 | * Level == 3: Everyone may enter |
diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h index f25bdf238a33..c61934fbf22a 100644 --- a/arch/x86/include/asm/xen/page.h +++ b/arch/x86/include/asm/xen/page.h | |||
@@ -29,8 +29,10 @@ typedef struct xpaddr { | |||
29 | 29 | ||
30 | /**** MACHINE <-> PHYSICAL CONVERSION MACROS ****/ | 30 | /**** MACHINE <-> PHYSICAL CONVERSION MACROS ****/ |
31 | #define INVALID_P2M_ENTRY (~0UL) | 31 | #define INVALID_P2M_ENTRY (~0UL) |
32 | #define FOREIGN_FRAME_BIT (1UL<<31) | 32 | #define FOREIGN_FRAME_BIT (1UL<<(BITS_PER_LONG-1)) |
33 | #define IDENTITY_FRAME_BIT (1UL<<(BITS_PER_LONG-2)) | ||
33 | #define FOREIGN_FRAME(m) ((m) | FOREIGN_FRAME_BIT) | 34 | #define FOREIGN_FRAME(m) ((m) | FOREIGN_FRAME_BIT) |
35 | #define IDENTITY_FRAME(m) ((m) | IDENTITY_FRAME_BIT) | ||
34 | 36 | ||
35 | /* Maximum amount of memory we can handle in a domain in pages */ | 37 | /* Maximum amount of memory we can handle in a domain in pages */ |
36 | #define MAX_DOMAIN_PAGES \ | 38 | #define MAX_DOMAIN_PAGES \ |
@@ -41,12 +43,18 @@ extern unsigned int machine_to_phys_order; | |||
41 | 43 | ||
42 | extern unsigned long get_phys_to_machine(unsigned long pfn); | 44 | extern unsigned long get_phys_to_machine(unsigned long pfn); |
43 | extern bool set_phys_to_machine(unsigned long pfn, unsigned long mfn); | 45 | extern bool set_phys_to_machine(unsigned long pfn, unsigned long mfn); |
46 | extern bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn); | ||
47 | extern unsigned long set_phys_range_identity(unsigned long pfn_s, | ||
48 | unsigned long pfn_e); | ||
44 | 49 | ||
45 | extern int m2p_add_override(unsigned long mfn, struct page *page); | 50 | extern int m2p_add_override(unsigned long mfn, struct page *page); |
46 | extern int m2p_remove_override(struct page *page); | 51 | extern int m2p_remove_override(struct page *page); |
47 | extern struct page *m2p_find_override(unsigned long mfn); | 52 | extern struct page *m2p_find_override(unsigned long mfn); |
48 | extern unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn); | 53 | extern unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn); |
49 | 54 | ||
55 | #ifdef CONFIG_XEN_DEBUG_FS | ||
56 | extern int p2m_dump_show(struct seq_file *m, void *v); | ||
57 | #endif | ||
50 | static inline unsigned long pfn_to_mfn(unsigned long pfn) | 58 | static inline unsigned long pfn_to_mfn(unsigned long pfn) |
51 | { | 59 | { |
52 | unsigned long mfn; | 60 | unsigned long mfn; |
@@ -57,7 +65,7 @@ static inline unsigned long pfn_to_mfn(unsigned long pfn) | |||
57 | mfn = get_phys_to_machine(pfn); | 65 | mfn = get_phys_to_machine(pfn); |
58 | 66 | ||
59 | if (mfn != INVALID_P2M_ENTRY) | 67 | if (mfn != INVALID_P2M_ENTRY) |
60 | mfn &= ~FOREIGN_FRAME_BIT; | 68 | mfn &= ~(FOREIGN_FRAME_BIT | IDENTITY_FRAME_BIT); |
61 | 69 | ||
62 | return mfn; | 70 | return mfn; |
63 | } | 71 | } |
@@ -73,25 +81,44 @@ static inline int phys_to_machine_mapping_valid(unsigned long pfn) | |||
73 | static inline unsigned long mfn_to_pfn(unsigned long mfn) | 81 | static inline unsigned long mfn_to_pfn(unsigned long mfn) |
74 | { | 82 | { |
75 | unsigned long pfn; | 83 | unsigned long pfn; |
84 | int ret = 0; | ||
76 | 85 | ||
77 | if (xen_feature(XENFEAT_auto_translated_physmap)) | 86 | if (xen_feature(XENFEAT_auto_translated_physmap)) |
78 | return mfn; | 87 | return mfn; |
79 | 88 | ||
89 | if (unlikely((mfn >> machine_to_phys_order) != 0)) { | ||
90 | pfn = ~0; | ||
91 | goto try_override; | ||
92 | } | ||
80 | pfn = 0; | 93 | pfn = 0; |
81 | /* | 94 | /* |
82 | * The array access can fail (e.g., device space beyond end of RAM). | 95 | * The array access can fail (e.g., device space beyond end of RAM). |
83 | * In such cases it doesn't matter what we return (we return garbage), | 96 | * In such cases it doesn't matter what we return (we return garbage), |
84 | * but we must handle the fault without crashing! | 97 | * but we must handle the fault without crashing! |
85 | */ | 98 | */ |
86 | __get_user(pfn, &machine_to_phys_mapping[mfn]); | 99 | ret = __get_user(pfn, &machine_to_phys_mapping[mfn]); |
87 | 100 | try_override: | |
88 | /* | 101 | /* ret might be < 0 if there are no entries in the m2p for mfn */ |
89 | * If this appears to be a foreign mfn (because the pfn | 102 | if (ret < 0) |
90 | * doesn't map back to the mfn), then check the local override | 103 | pfn = ~0; |
91 | * table to see if there's a better pfn to use. | 104 | else if (get_phys_to_machine(pfn) != mfn) |
105 | /* | ||
106 | * If this appears to be a foreign mfn (because the pfn | ||
107 | * doesn't map back to the mfn), then check the local override | ||
108 | * table to see if there's a better pfn to use. | ||
109 | * | ||
110 | * m2p_find_override_pfn returns ~0 if it doesn't find anything. | ||
111 | */ | ||
112 | pfn = m2p_find_override_pfn(mfn, ~0); | ||
113 | |||
114 | /* | ||
115 | * pfn is ~0 if there are no entries in the m2p for mfn or if the | ||
116 | * entry doesn't map back to the mfn and m2p_override doesn't have a | ||
117 | * valid entry for it. | ||
92 | */ | 118 | */ |
93 | if (get_phys_to_machine(pfn) != mfn) | 119 | if (pfn == ~0 && |
94 | pfn = m2p_find_override_pfn(mfn, pfn); | 120 | get_phys_to_machine(mfn) == IDENTITY_FRAME(mfn)) |
121 | pfn = mfn; | ||
95 | 122 | ||
96 | return pfn; | 123 | return pfn; |
97 | } | 124 | } |
diff --git a/arch/x86/include/asm/xen/pci.h b/arch/x86/include/asm/xen/pci.h index 2329b3eaf8d3..aa8620989162 100644 --- a/arch/x86/include/asm/xen/pci.h +++ b/arch/x86/include/asm/xen/pci.h | |||
@@ -27,16 +27,16 @@ static inline void __init xen_setup_pirqs(void) | |||
27 | * its own functions. | 27 | * its own functions. |
28 | */ | 28 | */ |
29 | struct xen_pci_frontend_ops { | 29 | struct xen_pci_frontend_ops { |
30 | int (*enable_msi)(struct pci_dev *dev, int **vectors); | 30 | int (*enable_msi)(struct pci_dev *dev, int vectors[]); |
31 | void (*disable_msi)(struct pci_dev *dev); | 31 | void (*disable_msi)(struct pci_dev *dev); |
32 | int (*enable_msix)(struct pci_dev *dev, int **vectors, int nvec); | 32 | int (*enable_msix)(struct pci_dev *dev, int vectors[], int nvec); |
33 | void (*disable_msix)(struct pci_dev *dev); | 33 | void (*disable_msix)(struct pci_dev *dev); |
34 | }; | 34 | }; |
35 | 35 | ||
36 | extern struct xen_pci_frontend_ops *xen_pci_frontend; | 36 | extern struct xen_pci_frontend_ops *xen_pci_frontend; |
37 | 37 | ||
38 | static inline int xen_pci_frontend_enable_msi(struct pci_dev *dev, | 38 | static inline int xen_pci_frontend_enable_msi(struct pci_dev *dev, |
39 | int **vectors) | 39 | int vectors[]) |
40 | { | 40 | { |
41 | if (xen_pci_frontend && xen_pci_frontend->enable_msi) | 41 | if (xen_pci_frontend && xen_pci_frontend->enable_msi) |
42 | return xen_pci_frontend->enable_msi(dev, vectors); | 42 | return xen_pci_frontend->enable_msi(dev, vectors); |
@@ -48,7 +48,7 @@ static inline void xen_pci_frontend_disable_msi(struct pci_dev *dev) | |||
48 | xen_pci_frontend->disable_msi(dev); | 48 | xen_pci_frontend->disable_msi(dev); |
49 | } | 49 | } |
50 | static inline int xen_pci_frontend_enable_msix(struct pci_dev *dev, | 50 | static inline int xen_pci_frontend_enable_msix(struct pci_dev *dev, |
51 | int **vectors, int nvec) | 51 | int vectors[], int nvec) |
52 | { | 52 | { |
53 | if (xen_pci_frontend && xen_pci_frontend->enable_msix) | 53 | if (xen_pci_frontend && xen_pci_frontend->enable_msix) |
54 | return xen_pci_frontend->enable_msix(dev, vectors, nvec); | 54 | return xen_pci_frontend->enable_msix(dev, vectors, nvec); |
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 34244b2cd880..7338ef2218bc 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile | |||
@@ -41,13 +41,13 @@ obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o | |||
41 | obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o | 41 | obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o |
42 | obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o | 42 | obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o |
43 | obj-y += bootflag.o e820.o | 43 | obj-y += bootflag.o e820.o |
44 | obj-y += pci-dma.o quirks.o i8237.o topology.o kdebugfs.o | 44 | obj-y += pci-dma.o quirks.o topology.o kdebugfs.o |
45 | obj-y += alternative.o i8253.o pci-nommu.o hw_breakpoint.o | 45 | obj-y += alternative.o i8253.o pci-nommu.o hw_breakpoint.o |
46 | obj-y += tsc.o io_delay.o rtc.o | 46 | obj-y += tsc.o io_delay.o rtc.o |
47 | obj-y += pci-iommu_table.o | 47 | obj-y += pci-iommu_table.o |
48 | obj-y += resource.o | 48 | obj-y += resource.o |
49 | 49 | ||
50 | obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o | 50 | obj-y += trampoline.o trampoline_$(BITS).o |
51 | obj-y += process.o | 51 | obj-y += process.o |
52 | obj-y += i387.o xsave.o | 52 | obj-y += i387.o xsave.o |
53 | obj-y += ptrace.o | 53 | obj-y += ptrace.o |
@@ -55,10 +55,12 @@ obj-$(CONFIG_X86_32) += tls.o | |||
55 | obj-$(CONFIG_IA32_EMULATION) += tls.o | 55 | obj-$(CONFIG_IA32_EMULATION) += tls.o |
56 | obj-y += step.o | 56 | obj-y += step.o |
57 | obj-$(CONFIG_INTEL_TXT) += tboot.o | 57 | obj-$(CONFIG_INTEL_TXT) += tboot.o |
58 | obj-$(CONFIG_ISA_DMA_API) += i8237.o | ||
58 | obj-$(CONFIG_STACKTRACE) += stacktrace.o | 59 | obj-$(CONFIG_STACKTRACE) += stacktrace.o |
59 | obj-y += cpu/ | 60 | obj-y += cpu/ |
60 | obj-y += acpi/ | 61 | obj-y += acpi/ |
61 | obj-y += reboot.o | 62 | obj-y += reboot.o |
63 | obj-$(CONFIG_X86_32) += reboot_32.o | ||
62 | obj-$(CONFIG_MCA) += mca_32.o | 64 | obj-$(CONFIG_MCA) += mca_32.o |
63 | obj-$(CONFIG_X86_MSR) += msr.o | 65 | obj-$(CONFIG_X86_MSR) += msr.o |
64 | obj-$(CONFIG_X86_CPUID) += cpuid.o | 66 | obj-$(CONFIG_X86_CPUID) += cpuid.o |
@@ -66,10 +68,9 @@ obj-$(CONFIG_PCI) += early-quirks.o | |||
66 | apm-y := apm_32.o | 68 | apm-y := apm_32.o |
67 | obj-$(CONFIG_APM) += apm.o | 69 | obj-$(CONFIG_APM) += apm.o |
68 | obj-$(CONFIG_SMP) += smp.o | 70 | obj-$(CONFIG_SMP) += smp.o |
69 | obj-$(CONFIG_SMP) += smpboot.o tsc_sync.o | 71 | obj-$(CONFIG_SMP) += smpboot.o |
72 | obj-$(CONFIG_SMP) += tsc_sync.o | ||
70 | obj-$(CONFIG_SMP) += setup_percpu.o | 73 | obj-$(CONFIG_SMP) += setup_percpu.o |
71 | obj-$(CONFIG_X86_64_SMP) += tsc_sync.o | ||
72 | obj-$(CONFIG_X86_TRAMPOLINE) += trampoline_$(BITS).o | ||
73 | obj-$(CONFIG_X86_MPPARSE) += mpparse.o | 74 | obj-$(CONFIG_X86_MPPARSE) += mpparse.o |
74 | obj-y += apic/ | 75 | obj-y += apic/ |
75 | obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o | 76 | obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o |
@@ -109,6 +110,7 @@ obj-$(CONFIG_MICROCODE) += microcode.o | |||
109 | obj-$(CONFIG_X86_CHECK_BIOS_CORRUPTION) += check.o | 110 | obj-$(CONFIG_X86_CHECK_BIOS_CORRUPTION) += check.o |
110 | 111 | ||
111 | obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o | 112 | obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o |
113 | obj-$(CONFIG_OF) += devicetree.o | ||
112 | 114 | ||
113 | ### | 115 | ### |
114 | # 64 bit specific files | 116 | # 64 bit specific files |
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 3e6e2d68f761..9a966c579af5 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c | |||
@@ -595,14 +595,8 @@ static void acpi_map_cpu2node(acpi_handle handle, int cpu, int physid) | |||
595 | nid = acpi_get_node(handle); | 595 | nid = acpi_get_node(handle); |
596 | if (nid == -1 || !node_online(nid)) | 596 | if (nid == -1 || !node_online(nid)) |
597 | return; | 597 | return; |
598 | #ifdef CONFIG_X86_64 | 598 | set_apicid_to_node(physid, nid); |
599 | apicid_to_node[physid] = nid; | ||
600 | numa_set_node(cpu, nid); | 599 | numa_set_node(cpu, nid); |
601 | #else /* CONFIG_X86_32 */ | ||
602 | apicid_2_node[physid] = nid; | ||
603 | cpu_to_node_map[cpu] = nid; | ||
604 | #endif | ||
605 | |||
606 | #endif | 600 | #endif |
607 | } | 601 | } |
608 | 602 | ||
diff --git a/arch/x86/kernel/acpi/realmode/wakeup.S b/arch/x86/kernel/acpi/realmode/wakeup.S index 28595d6df47c..ead21b663117 100644 --- a/arch/x86/kernel/acpi/realmode/wakeup.S +++ b/arch/x86/kernel/acpi/realmode/wakeup.S | |||
@@ -6,11 +6,17 @@ | |||
6 | #include <asm/page_types.h> | 6 | #include <asm/page_types.h> |
7 | #include <asm/pgtable_types.h> | 7 | #include <asm/pgtable_types.h> |
8 | #include <asm/processor-flags.h> | 8 | #include <asm/processor-flags.h> |
9 | #include "wakeup.h" | ||
9 | 10 | ||
10 | .code16 | 11 | .code16 |
11 | .section ".header", "a" | 12 | .section ".jump", "ax" |
13 | .globl _start | ||
14 | _start: | ||
15 | cli | ||
16 | jmp wakeup_code | ||
12 | 17 | ||
13 | /* This should match the structure in wakeup.h */ | 18 | /* This should match the structure in wakeup.h */ |
19 | .section ".header", "a" | ||
14 | .globl wakeup_header | 20 | .globl wakeup_header |
15 | wakeup_header: | 21 | wakeup_header: |
16 | video_mode: .short 0 /* Video mode number */ | 22 | video_mode: .short 0 /* Video mode number */ |
@@ -30,14 +36,11 @@ wakeup_jmp: .byte 0xea /* ljmpw */ | |||
30 | wakeup_jmp_off: .word 3f | 36 | wakeup_jmp_off: .word 3f |
31 | wakeup_jmp_seg: .word 0 | 37 | wakeup_jmp_seg: .word 0 |
32 | wakeup_gdt: .quad 0, 0, 0 | 38 | wakeup_gdt: .quad 0, 0, 0 |
33 | signature: .long 0x51ee1111 | 39 | signature: .long WAKEUP_HEADER_SIGNATURE |
34 | 40 | ||
35 | .text | 41 | .text |
36 | .globl _start | ||
37 | .code16 | 42 | .code16 |
38 | wakeup_code: | 43 | wakeup_code: |
39 | _start: | ||
40 | cli | ||
41 | cld | 44 | cld |
42 | 45 | ||
43 | /* Apparently some dimwit BIOS programmers don't know how to | 46 | /* Apparently some dimwit BIOS programmers don't know how to |
@@ -77,12 +80,12 @@ _start: | |||
77 | 80 | ||
78 | /* Check header signature... */ | 81 | /* Check header signature... */ |
79 | movl signature, %eax | 82 | movl signature, %eax |
80 | cmpl $0x51ee1111, %eax | 83 | cmpl $WAKEUP_HEADER_SIGNATURE, %eax |
81 | jne bogus_real_magic | 84 | jne bogus_real_magic |
82 | 85 | ||
83 | /* Check we really have everything... */ | 86 | /* Check we really have everything... */ |
84 | movl end_signature, %eax | 87 | movl end_signature, %eax |
85 | cmpl $0x65a22c82, %eax | 88 | cmpl $WAKEUP_END_SIGNATURE, %eax |
86 | jne bogus_real_magic | 89 | jne bogus_real_magic |
87 | 90 | ||
88 | /* Call the C code */ | 91 | /* Call the C code */ |
@@ -147,3 +150,7 @@ wakeup_heap: | |||
147 | wakeup_stack: | 150 | wakeup_stack: |
148 | .space 2048 | 151 | .space 2048 |
149 | wakeup_stack_end: | 152 | wakeup_stack_end: |
153 | |||
154 | .section ".signature","a" | ||
155 | end_signature: | ||
156 | .long WAKEUP_END_SIGNATURE | ||
diff --git a/arch/x86/kernel/acpi/realmode/wakeup.h b/arch/x86/kernel/acpi/realmode/wakeup.h index 69d38d0b2b64..e1828c07e79c 100644 --- a/arch/x86/kernel/acpi/realmode/wakeup.h +++ b/arch/x86/kernel/acpi/realmode/wakeup.h | |||
@@ -35,7 +35,8 @@ struct wakeup_header { | |||
35 | extern struct wakeup_header wakeup_header; | 35 | extern struct wakeup_header wakeup_header; |
36 | #endif | 36 | #endif |
37 | 37 | ||
38 | #define HEADER_OFFSET 0x3f00 | 38 | #define WAKEUP_HEADER_OFFSET 8 |
39 | #define WAKEUP_SIZE 0x4000 | 39 | #define WAKEUP_HEADER_SIGNATURE 0x51ee1111 |
40 | #define WAKEUP_END_SIGNATURE 0x65a22c82 | ||
40 | 41 | ||
41 | #endif /* ARCH_X86_KERNEL_ACPI_RM_WAKEUP_H */ | 42 | #endif /* ARCH_X86_KERNEL_ACPI_RM_WAKEUP_H */ |
diff --git a/arch/x86/kernel/acpi/realmode/wakeup.lds.S b/arch/x86/kernel/acpi/realmode/wakeup.lds.S index 060fff8f5c5b..d4f8010a5b1b 100644 --- a/arch/x86/kernel/acpi/realmode/wakeup.lds.S +++ b/arch/x86/kernel/acpi/realmode/wakeup.lds.S | |||
@@ -13,9 +13,19 @@ ENTRY(_start) | |||
13 | SECTIONS | 13 | SECTIONS |
14 | { | 14 | { |
15 | . = 0; | 15 | . = 0; |
16 | .jump : { | ||
17 | *(.jump) | ||
18 | } = 0x90909090 | ||
19 | |||
20 | . = WAKEUP_HEADER_OFFSET; | ||
21 | .header : { | ||
22 | *(.header) | ||
23 | } | ||
24 | |||
25 | . = ALIGN(16); | ||
16 | .text : { | 26 | .text : { |
17 | *(.text*) | 27 | *(.text*) |
18 | } | 28 | } = 0x90909090 |
19 | 29 | ||
20 | . = ALIGN(16); | 30 | . = ALIGN(16); |
21 | .rodata : { | 31 | .rodata : { |
@@ -33,11 +43,6 @@ SECTIONS | |||
33 | *(.data*) | 43 | *(.data*) |
34 | } | 44 | } |
35 | 45 | ||
36 | .signature : { | ||
37 | end_signature = .; | ||
38 | LONG(0x65a22c82) | ||
39 | } | ||
40 | |||
41 | . = ALIGN(16); | 46 | . = ALIGN(16); |
42 | .bss : { | 47 | .bss : { |
43 | __bss_start = .; | 48 | __bss_start = .; |
@@ -45,20 +50,13 @@ SECTIONS | |||
45 | __bss_end = .; | 50 | __bss_end = .; |
46 | } | 51 | } |
47 | 52 | ||
48 | . = HEADER_OFFSET; | 53 | .signature : { |
49 | .header : { | 54 | *(.signature) |
50 | *(.header) | ||
51 | } | 55 | } |
52 | 56 | ||
53 | . = ALIGN(16); | ||
54 | _end = .; | 57 | _end = .; |
55 | 58 | ||
56 | /DISCARD/ : { | 59 | /DISCARD/ : { |
57 | *(.note*) | 60 | *(.note*) |
58 | } | 61 | } |
59 | |||
60 | /* | ||
61 | * The ASSERT() sink to . is intentional, for binutils 2.14 compatibility: | ||
62 | */ | ||
63 | . = ASSERT(_end <= WAKEUP_SIZE, "Wakeup too big!"); | ||
64 | } | 62 | } |
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index 5f1b747f6ef1..ff93bc1b09c3 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c | |||
@@ -18,12 +18,8 @@ | |||
18 | #include "realmode/wakeup.h" | 18 | #include "realmode/wakeup.h" |
19 | #include "sleep.h" | 19 | #include "sleep.h" |
20 | 20 | ||
21 | unsigned long acpi_wakeup_address; | ||
22 | unsigned long acpi_realmode_flags; | 21 | unsigned long acpi_realmode_flags; |
23 | 22 | ||
24 | /* address in low memory of the wakeup routine. */ | ||
25 | static unsigned long acpi_realmode; | ||
26 | |||
27 | #if defined(CONFIG_SMP) && defined(CONFIG_64BIT) | 23 | #if defined(CONFIG_SMP) && defined(CONFIG_64BIT) |
28 | static char temp_stack[4096]; | 24 | static char temp_stack[4096]; |
29 | #endif | 25 | #endif |
@@ -33,22 +29,17 @@ static char temp_stack[4096]; | |||
33 | * | 29 | * |
34 | * Create an identity mapped page table and copy the wakeup routine to | 30 | * Create an identity mapped page table and copy the wakeup routine to |
35 | * low memory. | 31 | * low memory. |
36 | * | ||
37 | * Note that this is too late to change acpi_wakeup_address. | ||
38 | */ | 32 | */ |
39 | int acpi_suspend_lowlevel(void) | 33 | int acpi_suspend_lowlevel(void) |
40 | { | 34 | { |
41 | struct wakeup_header *header; | 35 | struct wakeup_header *header; |
36 | /* address in low memory of the wakeup routine. */ | ||
37 | char *acpi_realmode; | ||
42 | 38 | ||
43 | if (!acpi_realmode) { | 39 | acpi_realmode = TRAMPOLINE_SYM(acpi_wakeup_code); |
44 | printk(KERN_ERR "Could not allocate memory during boot, " | ||
45 | "S3 disabled\n"); | ||
46 | return -ENOMEM; | ||
47 | } | ||
48 | memcpy((void *)acpi_realmode, &wakeup_code_start, WAKEUP_SIZE); | ||
49 | 40 | ||
50 | header = (struct wakeup_header *)(acpi_realmode + HEADER_OFFSET); | 41 | header = (struct wakeup_header *)(acpi_realmode + WAKEUP_HEADER_OFFSET); |
51 | if (header->signature != 0x51ee1111) { | 42 | if (header->signature != WAKEUP_HEADER_SIGNATURE) { |
52 | printk(KERN_ERR "wakeup header does not match\n"); | 43 | printk(KERN_ERR "wakeup header does not match\n"); |
53 | return -EINVAL; | 44 | return -EINVAL; |
54 | } | 45 | } |
@@ -68,9 +59,7 @@ int acpi_suspend_lowlevel(void) | |||
68 | /* GDT[0]: GDT self-pointer */ | 59 | /* GDT[0]: GDT self-pointer */ |
69 | header->wakeup_gdt[0] = | 60 | header->wakeup_gdt[0] = |
70 | (u64)(sizeof(header->wakeup_gdt) - 1) + | 61 | (u64)(sizeof(header->wakeup_gdt) - 1) + |
71 | ((u64)(acpi_wakeup_address + | 62 | ((u64)__pa(&header->wakeup_gdt) << 16); |
72 | ((char *)&header->wakeup_gdt - (char *)acpi_realmode)) | ||
73 | << 16); | ||
74 | /* GDT[1]: big real mode-like code segment */ | 63 | /* GDT[1]: big real mode-like code segment */ |
75 | header->wakeup_gdt[1] = | 64 | header->wakeup_gdt[1] = |
76 | GDT_ENTRY(0x809b, acpi_wakeup_address, 0xfffff); | 65 | GDT_ENTRY(0x809b, acpi_wakeup_address, 0xfffff); |
@@ -96,7 +85,7 @@ int acpi_suspend_lowlevel(void) | |||
96 | header->pmode_cr3 = (u32)__pa(&initial_page_table); | 85 | header->pmode_cr3 = (u32)__pa(&initial_page_table); |
97 | saved_magic = 0x12345678; | 86 | saved_magic = 0x12345678; |
98 | #else /* CONFIG_64BIT */ | 87 | #else /* CONFIG_64BIT */ |
99 | header->trampoline_segment = setup_trampoline() >> 4; | 88 | header->trampoline_segment = trampoline_address() >> 4; |
100 | #ifdef CONFIG_SMP | 89 | #ifdef CONFIG_SMP |
101 | stack_start = (unsigned long)temp_stack + sizeof(temp_stack); | 90 | stack_start = (unsigned long)temp_stack + sizeof(temp_stack); |
102 | early_gdt_descr.address = | 91 | early_gdt_descr.address = |
@@ -111,45 +100,6 @@ int acpi_suspend_lowlevel(void) | |||
111 | return 0; | 100 | return 0; |
112 | } | 101 | } |
113 | 102 | ||
114 | /** | ||
115 | * acpi_reserve_wakeup_memory - do _very_ early ACPI initialisation | ||
116 | * | ||
117 | * We allocate a page from the first 1MB of memory for the wakeup | ||
118 | * routine for when we come back from a sleep state. The | ||
119 | * runtime allocator allows specification of <16MB pages, but not | ||
120 | * <1MB pages. | ||
121 | */ | ||
122 | void __init acpi_reserve_wakeup_memory(void) | ||
123 | { | ||
124 | phys_addr_t mem; | ||
125 | |||
126 | if ((&wakeup_code_end - &wakeup_code_start) > WAKEUP_SIZE) { | ||
127 | printk(KERN_ERR | ||
128 | "ACPI: Wakeup code way too big, S3 disabled.\n"); | ||
129 | return; | ||
130 | } | ||
131 | |||
132 | mem = memblock_find_in_range(0, 1<<20, WAKEUP_SIZE, PAGE_SIZE); | ||
133 | |||
134 | if (mem == MEMBLOCK_ERROR) { | ||
135 | printk(KERN_ERR "ACPI: Cannot allocate lowmem, S3 disabled.\n"); | ||
136 | return; | ||
137 | } | ||
138 | acpi_realmode = (unsigned long) phys_to_virt(mem); | ||
139 | acpi_wakeup_address = mem; | ||
140 | memblock_x86_reserve_range(mem, mem + WAKEUP_SIZE, "ACPI WAKEUP"); | ||
141 | } | ||
142 | |||
143 | int __init acpi_configure_wakeup_memory(void) | ||
144 | { | ||
145 | if (acpi_realmode) | ||
146 | set_memory_x(acpi_realmode, WAKEUP_SIZE >> PAGE_SHIFT); | ||
147 | |||
148 | return 0; | ||
149 | } | ||
150 | arch_initcall(acpi_configure_wakeup_memory); | ||
151 | |||
152 | |||
153 | static int __init acpi_sleep_setup(char *str) | 103 | static int __init acpi_sleep_setup(char *str) |
154 | { | 104 | { |
155 | while ((str != NULL) && (*str != '\0')) { | 105 | while ((str != NULL) && (*str != '\0')) { |
diff --git a/arch/x86/kernel/acpi/sleep.h b/arch/x86/kernel/acpi/sleep.h index 31ce13f20297..416d4be13fef 100644 --- a/arch/x86/kernel/acpi/sleep.h +++ b/arch/x86/kernel/acpi/sleep.h | |||
@@ -4,13 +4,10 @@ | |||
4 | 4 | ||
5 | #include <asm/trampoline.h> | 5 | #include <asm/trampoline.h> |
6 | 6 | ||
7 | extern char wakeup_code_start, wakeup_code_end; | ||
8 | |||
9 | extern unsigned long saved_video_mode; | 7 | extern unsigned long saved_video_mode; |
10 | extern long saved_magic; | 8 | extern long saved_magic; |
11 | 9 | ||
12 | extern int wakeup_pmode_return; | 10 | extern int wakeup_pmode_return; |
13 | extern char swsusp_pg_dir[PAGE_SIZE]; | ||
14 | 11 | ||
15 | extern unsigned long acpi_copy_wakeup_routine(unsigned long); | 12 | extern unsigned long acpi_copy_wakeup_routine(unsigned long); |
16 | extern void wakeup_long64(void); | 13 | extern void wakeup_long64(void); |
diff --git a/arch/x86/kernel/acpi/wakeup_rm.S b/arch/x86/kernel/acpi/wakeup_rm.S index 6ff3b5730575..63b8ab524f2c 100644 --- a/arch/x86/kernel/acpi/wakeup_rm.S +++ b/arch/x86/kernel/acpi/wakeup_rm.S | |||
@@ -2,9 +2,11 @@ | |||
2 | * Wrapper script for the realmode binary as a transport object | 2 | * Wrapper script for the realmode binary as a transport object |
3 | * before copying to low memory. | 3 | * before copying to low memory. |
4 | */ | 4 | */ |
5 | .section ".rodata","a" | 5 | #include <asm/page_types.h> |
6 | .globl wakeup_code_start, wakeup_code_end | 6 | |
7 | wakeup_code_start: | 7 | .section ".x86_trampoline","a" |
8 | .balign PAGE_SIZE | ||
9 | .globl acpi_wakeup_code | ||
10 | acpi_wakeup_code: | ||
8 | .incbin "arch/x86/kernel/acpi/realmode/wakeup.bin" | 11 | .incbin "arch/x86/kernel/acpi/realmode/wakeup.bin" |
9 | wakeup_code_end: | 12 | .size acpi_wakeup_code, .-acpi_wakeup_code |
10 | .size wakeup_code_start, .-wakeup_code_start | ||
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 7038b95d363f..4a234677e213 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c | |||
@@ -199,7 +199,7 @@ void *text_poke_early(void *addr, const void *opcode, size_t len); | |||
199 | 199 | ||
200 | /* Replace instructions with better alternatives for this CPU type. | 200 | /* Replace instructions with better alternatives for this CPU type. |
201 | This runs before SMP is initialized to avoid SMP problems with | 201 | This runs before SMP is initialized to avoid SMP problems with |
202 | self modifying code. This implies that assymetric systems where | 202 | self modifying code. This implies that asymmetric systems where |
203 | APs have less capabilities than the boot processor are not handled. | 203 | APs have less capabilities than the boot processor are not handled. |
204 | Tough. Make sure you disable such features by hand. */ | 204 | Tough. Make sure you disable such features by hand. */ |
205 | 205 | ||
@@ -620,7 +620,12 @@ static int __kprobes stop_machine_text_poke(void *data) | |||
620 | flush_icache_range((unsigned long)p->addr, | 620 | flush_icache_range((unsigned long)p->addr, |
621 | (unsigned long)p->addr + p->len); | 621 | (unsigned long)p->addr + p->len); |
622 | } | 622 | } |
623 | 623 | /* | |
624 | * Intel Archiecture Software Developer's Manual section 7.1.3 specifies | ||
625 | * that a core serializing instruction such as "cpuid" should be | ||
626 | * executed on _each_ core before the new instruction is made visible. | ||
627 | */ | ||
628 | sync_core(); | ||
624 | return 0; | 629 | return 0; |
625 | } | 630 | } |
626 | 631 | ||
diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c index 0a99f7198bc3..6801959a8b2a 100644 --- a/arch/x86/kernel/amd_nb.c +++ b/arch/x86/kernel/amd_nb.c | |||
@@ -12,14 +12,19 @@ | |||
12 | 12 | ||
13 | static u32 *flush_words; | 13 | static u32 *flush_words; |
14 | 14 | ||
15 | struct pci_device_id amd_nb_misc_ids[] = { | 15 | const struct pci_device_id amd_nb_misc_ids[] = { |
16 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MISC) }, | 16 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MISC) }, |
17 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC) }, | 17 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC) }, |
18 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_MISC) }, | 18 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F3) }, |
19 | {} | 19 | {} |
20 | }; | 20 | }; |
21 | EXPORT_SYMBOL(amd_nb_misc_ids); | 21 | EXPORT_SYMBOL(amd_nb_misc_ids); |
22 | 22 | ||
23 | static struct pci_device_id amd_nb_link_ids[] = { | ||
24 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_LINK) }, | ||
25 | {} | ||
26 | }; | ||
27 | |||
23 | const struct amd_nb_bus_dev_range amd_nb_bus_dev_ranges[] __initconst = { | 28 | const struct amd_nb_bus_dev_range amd_nb_bus_dev_ranges[] __initconst = { |
24 | { 0x00, 0x18, 0x20 }, | 29 | { 0x00, 0x18, 0x20 }, |
25 | { 0xff, 0x00, 0x20 }, | 30 | { 0xff, 0x00, 0x20 }, |
@@ -31,7 +36,7 @@ struct amd_northbridge_info amd_northbridges; | |||
31 | EXPORT_SYMBOL(amd_northbridges); | 36 | EXPORT_SYMBOL(amd_northbridges); |
32 | 37 | ||
33 | static struct pci_dev *next_northbridge(struct pci_dev *dev, | 38 | static struct pci_dev *next_northbridge(struct pci_dev *dev, |
34 | struct pci_device_id *ids) | 39 | const struct pci_device_id *ids) |
35 | { | 40 | { |
36 | do { | 41 | do { |
37 | dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev); | 42 | dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev); |
@@ -43,9 +48,9 @@ static struct pci_dev *next_northbridge(struct pci_dev *dev, | |||
43 | 48 | ||
44 | int amd_cache_northbridges(void) | 49 | int amd_cache_northbridges(void) |
45 | { | 50 | { |
46 | int i = 0; | 51 | u16 i = 0; |
47 | struct amd_northbridge *nb; | 52 | struct amd_northbridge *nb; |
48 | struct pci_dev *misc; | 53 | struct pci_dev *misc, *link; |
49 | 54 | ||
50 | if (amd_nb_num()) | 55 | if (amd_nb_num()) |
51 | return 0; | 56 | return 0; |
@@ -64,10 +69,12 @@ int amd_cache_northbridges(void) | |||
64 | amd_northbridges.nb = nb; | 69 | amd_northbridges.nb = nb; |
65 | amd_northbridges.num = i; | 70 | amd_northbridges.num = i; |
66 | 71 | ||
67 | misc = NULL; | 72 | link = misc = NULL; |
68 | for (i = 0; i != amd_nb_num(); i++) { | 73 | for (i = 0; i != amd_nb_num(); i++) { |
69 | node_to_amd_nb(i)->misc = misc = | 74 | node_to_amd_nb(i)->misc = misc = |
70 | next_northbridge(misc, amd_nb_misc_ids); | 75 | next_northbridge(misc, amd_nb_misc_ids); |
76 | node_to_amd_nb(i)->link = link = | ||
77 | next_northbridge(link, amd_nb_link_ids); | ||
71 | } | 78 | } |
72 | 79 | ||
73 | /* some CPU families (e.g. family 0x11) do not support GART */ | 80 | /* some CPU families (e.g. family 0x11) do not support GART */ |
@@ -85,26 +92,95 @@ int amd_cache_northbridges(void) | |||
85 | boot_cpu_data.x86_mask >= 0x1)) | 92 | boot_cpu_data.x86_mask >= 0x1)) |
86 | amd_northbridges.flags |= AMD_NB_L3_INDEX_DISABLE; | 93 | amd_northbridges.flags |= AMD_NB_L3_INDEX_DISABLE; |
87 | 94 | ||
95 | if (boot_cpu_data.x86 == 0x15) | ||
96 | amd_northbridges.flags |= AMD_NB_L3_INDEX_DISABLE; | ||
97 | |||
98 | /* L3 cache partitioning is supported on family 0x15 */ | ||
99 | if (boot_cpu_data.x86 == 0x15) | ||
100 | amd_northbridges.flags |= AMD_NB_L3_PARTITIONING; | ||
101 | |||
88 | return 0; | 102 | return 0; |
89 | } | 103 | } |
90 | EXPORT_SYMBOL_GPL(amd_cache_northbridges); | 104 | EXPORT_SYMBOL_GPL(amd_cache_northbridges); |
91 | 105 | ||
92 | /* Ignores subdevice/subvendor but as far as I can figure out | 106 | /* |
93 | they're useless anyways */ | 107 | * Ignores subdevice/subvendor but as far as I can figure out |
94 | int __init early_is_amd_nb(u32 device) | 108 | * they're useless anyways |
109 | */ | ||
110 | bool __init early_is_amd_nb(u32 device) | ||
95 | { | 111 | { |
96 | struct pci_device_id *id; | 112 | const struct pci_device_id *id; |
97 | u32 vendor = device & 0xffff; | 113 | u32 vendor = device & 0xffff; |
114 | |||
98 | device >>= 16; | 115 | device >>= 16; |
99 | for (id = amd_nb_misc_ids; id->vendor; id++) | 116 | for (id = amd_nb_misc_ids; id->vendor; id++) |
100 | if (vendor == id->vendor && device == id->device) | 117 | if (vendor == id->vendor && device == id->device) |
101 | return 1; | 118 | return true; |
119 | return false; | ||
120 | } | ||
121 | |||
122 | int amd_get_subcaches(int cpu) | ||
123 | { | ||
124 | struct pci_dev *link = node_to_amd_nb(amd_get_nb_id(cpu))->link; | ||
125 | unsigned int mask; | ||
126 | int cuid = 0; | ||
127 | |||
128 | if (!amd_nb_has_feature(AMD_NB_L3_PARTITIONING)) | ||
129 | return 0; | ||
130 | |||
131 | pci_read_config_dword(link, 0x1d4, &mask); | ||
132 | |||
133 | #ifdef CONFIG_SMP | ||
134 | cuid = cpu_data(cpu).compute_unit_id; | ||
135 | #endif | ||
136 | return (mask >> (4 * cuid)) & 0xf; | ||
137 | } | ||
138 | |||
139 | int amd_set_subcaches(int cpu, int mask) | ||
140 | { | ||
141 | static unsigned int reset, ban; | ||
142 | struct amd_northbridge *nb = node_to_amd_nb(amd_get_nb_id(cpu)); | ||
143 | unsigned int reg; | ||
144 | int cuid = 0; | ||
145 | |||
146 | if (!amd_nb_has_feature(AMD_NB_L3_PARTITIONING) || mask > 0xf) | ||
147 | return -EINVAL; | ||
148 | |||
149 | /* if necessary, collect reset state of L3 partitioning and BAN mode */ | ||
150 | if (reset == 0) { | ||
151 | pci_read_config_dword(nb->link, 0x1d4, &reset); | ||
152 | pci_read_config_dword(nb->misc, 0x1b8, &ban); | ||
153 | ban &= 0x180000; | ||
154 | } | ||
155 | |||
156 | /* deactivate BAN mode if any subcaches are to be disabled */ | ||
157 | if (mask != 0xf) { | ||
158 | pci_read_config_dword(nb->misc, 0x1b8, ®); | ||
159 | pci_write_config_dword(nb->misc, 0x1b8, reg & ~0x180000); | ||
160 | } | ||
161 | |||
162 | #ifdef CONFIG_SMP | ||
163 | cuid = cpu_data(cpu).compute_unit_id; | ||
164 | #endif | ||
165 | mask <<= 4 * cuid; | ||
166 | mask |= (0xf ^ (1 << cuid)) << 26; | ||
167 | |||
168 | pci_write_config_dword(nb->link, 0x1d4, mask); | ||
169 | |||
170 | /* reset BAN mode if L3 partitioning returned to reset state */ | ||
171 | pci_read_config_dword(nb->link, 0x1d4, ®); | ||
172 | if (reg == reset) { | ||
173 | pci_read_config_dword(nb->misc, 0x1b8, ®); | ||
174 | reg &= ~0x180000; | ||
175 | pci_write_config_dword(nb->misc, 0x1b8, reg | ban); | ||
176 | } | ||
177 | |||
102 | return 0; | 178 | return 0; |
103 | } | 179 | } |
104 | 180 | ||
105 | int amd_cache_gart(void) | 181 | static int amd_cache_gart(void) |
106 | { | 182 | { |
107 | int i; | 183 | u16 i; |
108 | 184 | ||
109 | if (!amd_nb_has_feature(AMD_NB_GART)) | 185 | if (!amd_nb_has_feature(AMD_NB_GART)) |
110 | return 0; | 186 | return 0; |
diff --git a/arch/x86/kernel/apb_timer.c b/arch/x86/kernel/apb_timer.c index 51d4e1663066..1293c709ee85 100644 --- a/arch/x86/kernel/apb_timer.c +++ b/arch/x86/kernel/apb_timer.c | |||
@@ -508,64 +508,12 @@ static int apbt_next_event(unsigned long delta, | |||
508 | return 0; | 508 | return 0; |
509 | } | 509 | } |
510 | 510 | ||
511 | /* | ||
512 | * APB timer clock is not in sync with pclk on Langwell, which translates to | ||
513 | * unreliable read value caused by sampling error. the error does not add up | ||
514 | * overtime and only happens when sampling a 0 as a 1 by mistake. so the time | ||
515 | * would go backwards. the following code is trying to prevent time traveling | ||
516 | * backwards. little bit paranoid. | ||
517 | */ | ||
518 | static cycle_t apbt_read_clocksource(struct clocksource *cs) | 511 | static cycle_t apbt_read_clocksource(struct clocksource *cs) |
519 | { | 512 | { |
520 | unsigned long t0, t1, t2; | 513 | unsigned long current_count; |
521 | static unsigned long last_read; | 514 | |
522 | 515 | current_count = apbt_readl(phy_cs_timer_id, APBTMR_N_CURRENT_VALUE); | |
523 | bad_count: | 516 | return (cycle_t)~current_count; |
524 | t1 = apbt_readl(phy_cs_timer_id, | ||
525 | APBTMR_N_CURRENT_VALUE); | ||
526 | t2 = apbt_readl(phy_cs_timer_id, | ||
527 | APBTMR_N_CURRENT_VALUE); | ||
528 | if (unlikely(t1 < t2)) { | ||
529 | pr_debug("APBT: read current count error %lx:%lx:%lx\n", | ||
530 | t1, t2, t2 - t1); | ||
531 | goto bad_count; | ||
532 | } | ||
533 | /* | ||
534 | * check against cached last read, makes sure time does not go back. | ||
535 | * it could be a normal rollover but we will do tripple check anyway | ||
536 | */ | ||
537 | if (unlikely(t2 > last_read)) { | ||
538 | /* check if we have a normal rollover */ | ||
539 | unsigned long raw_intr_status = | ||
540 | apbt_readl_reg(APBTMRS_RAW_INT_STATUS); | ||
541 | /* | ||
542 | * cs timer interrupt is masked but raw intr bit is set if | ||
543 | * rollover occurs. then we read EOI reg to clear it. | ||
544 | */ | ||
545 | if (raw_intr_status & (1 << phy_cs_timer_id)) { | ||
546 | apbt_readl(phy_cs_timer_id, APBTMR_N_EOI); | ||
547 | goto out; | ||
548 | } | ||
549 | pr_debug("APB CS going back %lx:%lx:%lx ", | ||
550 | t2, last_read, t2 - last_read); | ||
551 | bad_count_x3: | ||
552 | pr_debug("triple check enforced\n"); | ||
553 | t0 = apbt_readl(phy_cs_timer_id, | ||
554 | APBTMR_N_CURRENT_VALUE); | ||
555 | udelay(1); | ||
556 | t1 = apbt_readl(phy_cs_timer_id, | ||
557 | APBTMR_N_CURRENT_VALUE); | ||
558 | udelay(1); | ||
559 | t2 = apbt_readl(phy_cs_timer_id, | ||
560 | APBTMR_N_CURRENT_VALUE); | ||
561 | if ((t2 > t1) || (t1 > t0)) { | ||
562 | printk(KERN_ERR "Error: APB CS tripple check failed\n"); | ||
563 | goto bad_count_x3; | ||
564 | } | ||
565 | } | ||
566 | out: | ||
567 | last_read = t2; | ||
568 | return (cycle_t)~t2; | ||
569 | } | 517 | } |
570 | 518 | ||
571 | static int apbt_clocksource_register(void) | 519 | static int apbt_clocksource_register(void) |
diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c index 5955a7800a96..86d1ad4962a7 100644 --- a/arch/x86/kernel/aperture_64.c +++ b/arch/x86/kernel/aperture_64.c | |||
@@ -13,7 +13,7 @@ | |||
13 | #include <linux/kernel.h> | 13 | #include <linux/kernel.h> |
14 | #include <linux/types.h> | 14 | #include <linux/types.h> |
15 | #include <linux/init.h> | 15 | #include <linux/init.h> |
16 | #include <linux/bootmem.h> | 16 | #include <linux/memblock.h> |
17 | #include <linux/mmzone.h> | 17 | #include <linux/mmzone.h> |
18 | #include <linux/pci_ids.h> | 18 | #include <linux/pci_ids.h> |
19 | #include <linux/pci.h> | 19 | #include <linux/pci.h> |
@@ -57,7 +57,7 @@ static void __init insert_aperture_resource(u32 aper_base, u32 aper_size) | |||
57 | static u32 __init allocate_aperture(void) | 57 | static u32 __init allocate_aperture(void) |
58 | { | 58 | { |
59 | u32 aper_size; | 59 | u32 aper_size; |
60 | void *p; | 60 | unsigned long addr; |
61 | 61 | ||
62 | /* aper_size should <= 1G */ | 62 | /* aper_size should <= 1G */ |
63 | if (fallback_aper_order > 5) | 63 | if (fallback_aper_order > 5) |
@@ -73,7 +73,7 @@ static u32 __init allocate_aperture(void) | |||
73 | /* | 73 | /* |
74 | * using 512M as goal, in case kexec will load kernel_big | 74 | * using 512M as goal, in case kexec will load kernel_big |
75 | * that will do the on position decompress, and could overlap with | 75 | * that will do the on position decompress, and could overlap with |
76 | * that positon with gart that is used. | 76 | * that position with gart that is used. |
77 | * sequende: | 77 | * sequende: |
78 | * kernel_small | 78 | * kernel_small |
79 | * ==> kexec (with kdump trigger path or previous doesn't shutdown gart) | 79 | * ==> kexec (with kdump trigger path or previous doesn't shutdown gart) |
@@ -83,27 +83,26 @@ static u32 __init allocate_aperture(void) | |||
83 | * so don't use 512M below as gart iommu, leave the space for kernel | 83 | * so don't use 512M below as gart iommu, leave the space for kernel |
84 | * code for safe | 84 | * code for safe |
85 | */ | 85 | */ |
86 | p = __alloc_bootmem_nopanic(aper_size, aper_size, 512ULL<<20); | 86 | addr = memblock_find_in_range(0, 1ULL<<32, aper_size, 512ULL<<20); |
87 | if (addr == MEMBLOCK_ERROR || addr + aper_size > 0xffffffff) { | ||
88 | printk(KERN_ERR | ||
89 | "Cannot allocate aperture memory hole (%lx,%uK)\n", | ||
90 | addr, aper_size>>10); | ||
91 | return 0; | ||
92 | } | ||
93 | memblock_x86_reserve_range(addr, addr + aper_size, "aperture64"); | ||
87 | /* | 94 | /* |
88 | * Kmemleak should not scan this block as it may not be mapped via the | 95 | * Kmemleak should not scan this block as it may not be mapped via the |
89 | * kernel direct mapping. | 96 | * kernel direct mapping. |
90 | */ | 97 | */ |
91 | kmemleak_ignore(p); | 98 | kmemleak_ignore(phys_to_virt(addr)); |
92 | if (!p || __pa(p)+aper_size > 0xffffffff) { | ||
93 | printk(KERN_ERR | ||
94 | "Cannot allocate aperture memory hole (%p,%uK)\n", | ||
95 | p, aper_size>>10); | ||
96 | if (p) | ||
97 | free_bootmem(__pa(p), aper_size); | ||
98 | return 0; | ||
99 | } | ||
100 | printk(KERN_INFO "Mapping aperture over %d KB of RAM @ %lx\n", | 99 | printk(KERN_INFO "Mapping aperture over %d KB of RAM @ %lx\n", |
101 | aper_size >> 10, __pa(p)); | 100 | aper_size >> 10, addr); |
102 | insert_aperture_resource((u32)__pa(p), aper_size); | 101 | insert_aperture_resource((u32)addr, aper_size); |
103 | register_nosave_region((u32)__pa(p) >> PAGE_SHIFT, | 102 | register_nosave_region(addr >> PAGE_SHIFT, |
104 | (u32)__pa(p+aper_size) >> PAGE_SHIFT); | 103 | (addr+aper_size) >> PAGE_SHIFT); |
105 | 104 | ||
106 | return (u32)__pa(p); | 105 | return (u32)addr; |
107 | } | 106 | } |
108 | 107 | ||
109 | 108 | ||
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 76b96d74978a..966673f44141 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c | |||
@@ -43,6 +43,7 @@ | |||
43 | #include <asm/i8259.h> | 43 | #include <asm/i8259.h> |
44 | #include <asm/proto.h> | 44 | #include <asm/proto.h> |
45 | #include <asm/apic.h> | 45 | #include <asm/apic.h> |
46 | #include <asm/io_apic.h> | ||
46 | #include <asm/desc.h> | 47 | #include <asm/desc.h> |
47 | #include <asm/hpet.h> | 48 | #include <asm/hpet.h> |
48 | #include <asm/idle.h> | 49 | #include <asm/idle.h> |
@@ -78,12 +79,21 @@ EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid); | |||
78 | EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid); | 79 | EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid); |
79 | 80 | ||
80 | #ifdef CONFIG_X86_32 | 81 | #ifdef CONFIG_X86_32 |
82 | |||
83 | /* | ||
84 | * On x86_32, the mapping between cpu and logical apicid may vary | ||
85 | * depending on apic in use. The following early percpu variable is | ||
86 | * used for the mapping. This is where the behaviors of x86_64 and 32 | ||
87 | * actually diverge. Let's keep it ugly for now. | ||
88 | */ | ||
89 | DEFINE_EARLY_PER_CPU(int, x86_cpu_to_logical_apicid, BAD_APICID); | ||
90 | |||
81 | /* | 91 | /* |
82 | * Knob to control our willingness to enable the local APIC. | 92 | * Knob to control our willingness to enable the local APIC. |
83 | * | 93 | * |
84 | * +1=force-enable | 94 | * +1=force-enable |
85 | */ | 95 | */ |
86 | static int force_enable_local_apic; | 96 | static int force_enable_local_apic __initdata; |
87 | /* | 97 | /* |
88 | * APIC command line parameters | 98 | * APIC command line parameters |
89 | */ | 99 | */ |
@@ -153,7 +163,7 @@ early_param("nox2apic", setup_nox2apic); | |||
153 | unsigned long mp_lapic_addr; | 163 | unsigned long mp_lapic_addr; |
154 | int disable_apic; | 164 | int disable_apic; |
155 | /* Disable local APIC timer from the kernel commandline or via dmi quirk */ | 165 | /* Disable local APIC timer from the kernel commandline or via dmi quirk */ |
156 | static int disable_apic_timer __cpuinitdata; | 166 | static int disable_apic_timer __initdata; |
157 | /* Local APIC timer works in C2 */ | 167 | /* Local APIC timer works in C2 */ |
158 | int local_apic_timer_c2_ok; | 168 | int local_apic_timer_c2_ok; |
159 | EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok); | 169 | EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok); |
@@ -177,29 +187,8 @@ static struct resource lapic_resource = { | |||
177 | 187 | ||
178 | static unsigned int calibration_result; | 188 | static unsigned int calibration_result; |
179 | 189 | ||
180 | static int lapic_next_event(unsigned long delta, | ||
181 | struct clock_event_device *evt); | ||
182 | static void lapic_timer_setup(enum clock_event_mode mode, | ||
183 | struct clock_event_device *evt); | ||
184 | static void lapic_timer_broadcast(const struct cpumask *mask); | ||
185 | static void apic_pm_activate(void); | 190 | static void apic_pm_activate(void); |
186 | 191 | ||
187 | /* | ||
188 | * The local apic timer can be used for any function which is CPU local. | ||
189 | */ | ||
190 | static struct clock_event_device lapic_clockevent = { | ||
191 | .name = "lapic", | ||
192 | .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT | ||
193 | | CLOCK_EVT_FEAT_C3STOP | CLOCK_EVT_FEAT_DUMMY, | ||
194 | .shift = 32, | ||
195 | .set_mode = lapic_timer_setup, | ||
196 | .set_next_event = lapic_next_event, | ||
197 | .broadcast = lapic_timer_broadcast, | ||
198 | .rating = 100, | ||
199 | .irq = -1, | ||
200 | }; | ||
201 | static DEFINE_PER_CPU(struct clock_event_device, lapic_events); | ||
202 | |||
203 | static unsigned long apic_phys; | 192 | static unsigned long apic_phys; |
204 | 193 | ||
205 | /* | 194 | /* |
@@ -238,7 +227,7 @@ static int modern_apic(void) | |||
238 | * right after this call apic become NOOP driven | 227 | * right after this call apic become NOOP driven |
239 | * so apic->write/read doesn't do anything | 228 | * so apic->write/read doesn't do anything |
240 | */ | 229 | */ |
241 | void apic_disable(void) | 230 | static void __init apic_disable(void) |
242 | { | 231 | { |
243 | pr_info("APIC: switched to apic NOOP\n"); | 232 | pr_info("APIC: switched to apic NOOP\n"); |
244 | apic = &apic_noop; | 233 | apic = &apic_noop; |
@@ -282,23 +271,6 @@ u64 native_apic_icr_read(void) | |||
282 | return icr1 | ((u64)icr2 << 32); | 271 | return icr1 | ((u64)icr2 << 32); |
283 | } | 272 | } |
284 | 273 | ||
285 | /** | ||
286 | * enable_NMI_through_LVT0 - enable NMI through local vector table 0 | ||
287 | */ | ||
288 | void __cpuinit enable_NMI_through_LVT0(void) | ||
289 | { | ||
290 | unsigned int v; | ||
291 | |||
292 | /* unmask and set to NMI */ | ||
293 | v = APIC_DM_NMI; | ||
294 | |||
295 | /* Level triggered for 82489DX (32bit mode) */ | ||
296 | if (!lapic_is_integrated()) | ||
297 | v |= APIC_LVT_LEVEL_TRIGGER; | ||
298 | |||
299 | apic_write(APIC_LVT0, v); | ||
300 | } | ||
301 | |||
302 | #ifdef CONFIG_X86_32 | 274 | #ifdef CONFIG_X86_32 |
303 | /** | 275 | /** |
304 | * get_physical_broadcast - Get number of physical broadcast IDs | 276 | * get_physical_broadcast - Get number of physical broadcast IDs |
@@ -508,6 +480,23 @@ static void lapic_timer_broadcast(const struct cpumask *mask) | |||
508 | #endif | 480 | #endif |
509 | } | 481 | } |
510 | 482 | ||
483 | |||
484 | /* | ||
485 | * The local apic timer can be used for any function which is CPU local. | ||
486 | */ | ||
487 | static struct clock_event_device lapic_clockevent = { | ||
488 | .name = "lapic", | ||
489 | .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT | ||
490 | | CLOCK_EVT_FEAT_C3STOP | CLOCK_EVT_FEAT_DUMMY, | ||
491 | .shift = 32, | ||
492 | .set_mode = lapic_timer_setup, | ||
493 | .set_next_event = lapic_next_event, | ||
494 | .broadcast = lapic_timer_broadcast, | ||
495 | .rating = 100, | ||
496 | .irq = -1, | ||
497 | }; | ||
498 | static DEFINE_PER_CPU(struct clock_event_device, lapic_events); | ||
499 | |||
511 | /* | 500 | /* |
512 | * Setup the local APIC timer for this CPU. Copy the initialized values | 501 | * Setup the local APIC timer for this CPU. Copy the initialized values |
513 | * of the boot CPU and register the clock event in the framework. | 502 | * of the boot CPU and register the clock event in the framework. |
@@ -1209,7 +1198,7 @@ void __cpuinit setup_local_APIC(void) | |||
1209 | rdtscll(tsc); | 1198 | rdtscll(tsc); |
1210 | 1199 | ||
1211 | if (disable_apic) { | 1200 | if (disable_apic) { |
1212 | arch_disable_smp_support(); | 1201 | disable_ioapic_support(); |
1213 | return; | 1202 | return; |
1214 | } | 1203 | } |
1215 | 1204 | ||
@@ -1237,6 +1226,19 @@ void __cpuinit setup_local_APIC(void) | |||
1237 | */ | 1226 | */ |
1238 | apic->init_apic_ldr(); | 1227 | apic->init_apic_ldr(); |
1239 | 1228 | ||
1229 | #ifdef CONFIG_X86_32 | ||
1230 | /* | ||
1231 | * APIC LDR is initialized. If logical_apicid mapping was | ||
1232 | * initialized during get_smp_config(), make sure it matches the | ||
1233 | * actual value. | ||
1234 | */ | ||
1235 | i = early_per_cpu(x86_cpu_to_logical_apicid, cpu); | ||
1236 | WARN_ON(i != BAD_APICID && i != logical_smp_processor_id()); | ||
1237 | /* always use the value from LDR */ | ||
1238 | early_per_cpu(x86_cpu_to_logical_apicid, cpu) = | ||
1239 | logical_smp_processor_id(); | ||
1240 | #endif | ||
1241 | |||
1240 | /* | 1242 | /* |
1241 | * Set Task Priority to 'accept all'. We never change this | 1243 | * Set Task Priority to 'accept all'. We never change this |
1242 | * later on. | 1244 | * later on. |
@@ -1448,7 +1450,7 @@ int __init enable_IR(void) | |||
1448 | void __init enable_IR_x2apic(void) | 1450 | void __init enable_IR_x2apic(void) |
1449 | { | 1451 | { |
1450 | unsigned long flags; | 1452 | unsigned long flags; |
1451 | struct IO_APIC_route_entry **ioapic_entries = NULL; | 1453 | struct IO_APIC_route_entry **ioapic_entries; |
1452 | int ret, x2apic_enabled = 0; | 1454 | int ret, x2apic_enabled = 0; |
1453 | int dmar_table_init_ret; | 1455 | int dmar_table_init_ret; |
1454 | 1456 | ||
@@ -1537,7 +1539,7 @@ static int __init detect_init_APIC(void) | |||
1537 | } | 1539 | } |
1538 | #else | 1540 | #else |
1539 | 1541 | ||
1540 | static int apic_verify(void) | 1542 | static int __init apic_verify(void) |
1541 | { | 1543 | { |
1542 | u32 features, h, l; | 1544 | u32 features, h, l; |
1543 | 1545 | ||
@@ -1562,7 +1564,7 @@ static int apic_verify(void) | |||
1562 | return 0; | 1564 | return 0; |
1563 | } | 1565 | } |
1564 | 1566 | ||
1565 | int apic_force_enable(void) | 1567 | int __init apic_force_enable(unsigned long addr) |
1566 | { | 1568 | { |
1567 | u32 h, l; | 1569 | u32 h, l; |
1568 | 1570 | ||
@@ -1578,7 +1580,7 @@ int apic_force_enable(void) | |||
1578 | if (!(l & MSR_IA32_APICBASE_ENABLE)) { | 1580 | if (!(l & MSR_IA32_APICBASE_ENABLE)) { |
1579 | pr_info("Local APIC disabled by BIOS -- reenabling.\n"); | 1581 | pr_info("Local APIC disabled by BIOS -- reenabling.\n"); |
1580 | l &= ~MSR_IA32_APICBASE_BASE; | 1582 | l &= ~MSR_IA32_APICBASE_BASE; |
1581 | l |= MSR_IA32_APICBASE_ENABLE | APIC_DEFAULT_PHYS_BASE; | 1583 | l |= MSR_IA32_APICBASE_ENABLE | addr; |
1582 | wrmsr(MSR_IA32_APICBASE, l, h); | 1584 | wrmsr(MSR_IA32_APICBASE, l, h); |
1583 | enabled_via_apicbase = 1; | 1585 | enabled_via_apicbase = 1; |
1584 | } | 1586 | } |
@@ -1619,7 +1621,7 @@ static int __init detect_init_APIC(void) | |||
1619 | "you can enable it with \"lapic\"\n"); | 1621 | "you can enable it with \"lapic\"\n"); |
1620 | return -1; | 1622 | return -1; |
1621 | } | 1623 | } |
1622 | if (apic_force_enable()) | 1624 | if (apic_force_enable(APIC_DEFAULT_PHYS_BASE)) |
1623 | return -1; | 1625 | return -1; |
1624 | } else { | 1626 | } else { |
1625 | if (apic_verify()) | 1627 | if (apic_verify()) |
@@ -1930,17 +1932,6 @@ void __cpuinit generic_processor_info(int apicid, int version) | |||
1930 | { | 1932 | { |
1931 | int cpu; | 1933 | int cpu; |
1932 | 1934 | ||
1933 | /* | ||
1934 | * Validate version | ||
1935 | */ | ||
1936 | if (version == 0x0) { | ||
1937 | pr_warning("BIOS bug, APIC version is 0 for CPU#%d! " | ||
1938 | "fixing up to 0x10. (tell your hw vendor)\n", | ||
1939 | version); | ||
1940 | version = 0x10; | ||
1941 | } | ||
1942 | apic_version[apicid] = version; | ||
1943 | |||
1944 | if (num_processors >= nr_cpu_ids) { | 1935 | if (num_processors >= nr_cpu_ids) { |
1945 | int max = nr_cpu_ids; | 1936 | int max = nr_cpu_ids; |
1946 | int thiscpu = max + disabled_cpus; | 1937 | int thiscpu = max + disabled_cpus; |
@@ -1954,22 +1945,34 @@ void __cpuinit generic_processor_info(int apicid, int version) | |||
1954 | } | 1945 | } |
1955 | 1946 | ||
1956 | num_processors++; | 1947 | num_processors++; |
1957 | cpu = cpumask_next_zero(-1, cpu_present_mask); | ||
1958 | |||
1959 | if (version != apic_version[boot_cpu_physical_apicid]) | ||
1960 | WARN_ONCE(1, | ||
1961 | "ACPI: apic version mismatch, bootcpu: %x cpu %d: %x\n", | ||
1962 | apic_version[boot_cpu_physical_apicid], cpu, version); | ||
1963 | |||
1964 | physid_set(apicid, phys_cpu_present_map); | ||
1965 | if (apicid == boot_cpu_physical_apicid) { | 1948 | if (apicid == boot_cpu_physical_apicid) { |
1966 | /* | 1949 | /* |
1967 | * x86_bios_cpu_apicid is required to have processors listed | 1950 | * x86_bios_cpu_apicid is required to have processors listed |
1968 | * in same order as logical cpu numbers. Hence the first | 1951 | * in same order as logical cpu numbers. Hence the first |
1969 | * entry is BSP, and so on. | 1952 | * entry is BSP, and so on. |
1953 | * boot_cpu_init() already hold bit 0 in cpu_present_mask | ||
1954 | * for BSP. | ||
1970 | */ | 1955 | */ |
1971 | cpu = 0; | 1956 | cpu = 0; |
1957 | } else | ||
1958 | cpu = cpumask_next_zero(-1, cpu_present_mask); | ||
1959 | |||
1960 | /* | ||
1961 | * Validate version | ||
1962 | */ | ||
1963 | if (version == 0x0) { | ||
1964 | pr_warning("BIOS bug: APIC version is 0 for CPU %d/0x%x, fixing up to 0x10\n", | ||
1965 | cpu, apicid); | ||
1966 | version = 0x10; | ||
1972 | } | 1967 | } |
1968 | apic_version[apicid] = version; | ||
1969 | |||
1970 | if (version != apic_version[boot_cpu_physical_apicid]) { | ||
1971 | pr_warning("BIOS bug: APIC version mismatch, boot CPU: %x, CPU %d: version %x\n", | ||
1972 | apic_version[boot_cpu_physical_apicid], cpu, version); | ||
1973 | } | ||
1974 | |||
1975 | physid_set(apicid, phys_cpu_present_map); | ||
1973 | if (apicid > max_physical_apicid) | 1976 | if (apicid > max_physical_apicid) |
1974 | max_physical_apicid = apicid; | 1977 | max_physical_apicid = apicid; |
1975 | 1978 | ||
@@ -1977,7 +1980,10 @@ void __cpuinit generic_processor_info(int apicid, int version) | |||
1977 | early_per_cpu(x86_cpu_to_apicid, cpu) = apicid; | 1980 | early_per_cpu(x86_cpu_to_apicid, cpu) = apicid; |
1978 | early_per_cpu(x86_bios_cpu_apicid, cpu) = apicid; | 1981 | early_per_cpu(x86_bios_cpu_apicid, cpu) = apicid; |
1979 | #endif | 1982 | #endif |
1980 | 1983 | #ifdef CONFIG_X86_32 | |
1984 | early_per_cpu(x86_cpu_to_logical_apicid, cpu) = | ||
1985 | apic->x86_32_early_logical_apicid(cpu); | ||
1986 | #endif | ||
1981 | set_cpu_possible(cpu, true); | 1987 | set_cpu_possible(cpu, true); |
1982 | set_cpu_present(cpu, true); | 1988 | set_cpu_present(cpu, true); |
1983 | } | 1989 | } |
@@ -1998,10 +2004,14 @@ void default_init_apic_ldr(void) | |||
1998 | } | 2004 | } |
1999 | 2005 | ||
2000 | #ifdef CONFIG_X86_32 | 2006 | #ifdef CONFIG_X86_32 |
2001 | int default_apicid_to_node(int logical_apicid) | 2007 | int default_x86_32_numa_cpu_node(int cpu) |
2002 | { | 2008 | { |
2003 | #ifdef CONFIG_SMP | 2009 | #ifdef CONFIG_NUMA |
2004 | return apicid_2_node[hard_smp_processor_id()]; | 2010 | int apicid = early_per_cpu(x86_cpu_to_apicid, cpu); |
2011 | |||
2012 | if (apicid != BAD_APICID) | ||
2013 | return __apicid_to_node[apicid]; | ||
2014 | return NUMA_NO_NODE; | ||
2005 | #else | 2015 | #else |
2006 | return 0; | 2016 | return 0; |
2007 | #endif | 2017 | #endif |
diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c index 09d3b17ce0c2..5652d31fe108 100644 --- a/arch/x86/kernel/apic/apic_flat_64.c +++ b/arch/x86/kernel/apic/apic_flat_64.c | |||
@@ -185,8 +185,6 @@ struct apic apic_flat = { | |||
185 | .ioapic_phys_id_map = NULL, | 185 | .ioapic_phys_id_map = NULL, |
186 | .setup_apic_routing = NULL, | 186 | .setup_apic_routing = NULL, |
187 | .multi_timer_check = NULL, | 187 | .multi_timer_check = NULL, |
188 | .apicid_to_node = NULL, | ||
189 | .cpu_to_logical_apicid = NULL, | ||
190 | .cpu_present_to_apicid = default_cpu_present_to_apicid, | 188 | .cpu_present_to_apicid = default_cpu_present_to_apicid, |
191 | .apicid_to_cpu_present = NULL, | 189 | .apicid_to_cpu_present = NULL, |
192 | .setup_portio_remap = NULL, | 190 | .setup_portio_remap = NULL, |
@@ -337,8 +335,6 @@ struct apic apic_physflat = { | |||
337 | .ioapic_phys_id_map = NULL, | 335 | .ioapic_phys_id_map = NULL, |
338 | .setup_apic_routing = NULL, | 336 | .setup_apic_routing = NULL, |
339 | .multi_timer_check = NULL, | 337 | .multi_timer_check = NULL, |
340 | .apicid_to_node = NULL, | ||
341 | .cpu_to_logical_apicid = NULL, | ||
342 | .cpu_present_to_apicid = default_cpu_present_to_apicid, | 338 | .cpu_present_to_apicid = default_cpu_present_to_apicid, |
343 | .apicid_to_cpu_present = NULL, | 339 | .apicid_to_cpu_present = NULL, |
344 | .setup_portio_remap = NULL, | 340 | .setup_portio_remap = NULL, |
diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c index e31b9ffe25f5..f1baa2dc087a 100644 --- a/arch/x86/kernel/apic/apic_noop.c +++ b/arch/x86/kernel/apic/apic_noop.c | |||
@@ -54,11 +54,6 @@ static u64 noop_apic_icr_read(void) | |||
54 | return 0; | 54 | return 0; |
55 | } | 55 | } |
56 | 56 | ||
57 | static int noop_cpu_to_logical_apicid(int cpu) | ||
58 | { | ||
59 | return 0; | ||
60 | } | ||
61 | |||
62 | static int noop_phys_pkg_id(int cpuid_apic, int index_msb) | 57 | static int noop_phys_pkg_id(int cpuid_apic, int index_msb) |
63 | { | 58 | { |
64 | return 0; | 59 | return 0; |
@@ -113,12 +108,6 @@ static void noop_vector_allocation_domain(int cpu, struct cpumask *retmask) | |||
113 | cpumask_set_cpu(cpu, retmask); | 108 | cpumask_set_cpu(cpu, retmask); |
114 | } | 109 | } |
115 | 110 | ||
116 | int noop_apicid_to_node(int logical_apicid) | ||
117 | { | ||
118 | /* we're always on node 0 */ | ||
119 | return 0; | ||
120 | } | ||
121 | |||
122 | static u32 noop_apic_read(u32 reg) | 111 | static u32 noop_apic_read(u32 reg) |
123 | { | 112 | { |
124 | WARN_ON_ONCE((cpu_has_apic && !disable_apic)); | 113 | WARN_ON_ONCE((cpu_has_apic && !disable_apic)); |
@@ -130,6 +119,14 @@ static void noop_apic_write(u32 reg, u32 v) | |||
130 | WARN_ON_ONCE(cpu_has_apic && !disable_apic); | 119 | WARN_ON_ONCE(cpu_has_apic && !disable_apic); |
131 | } | 120 | } |
132 | 121 | ||
122 | #ifdef CONFIG_X86_32 | ||
123 | static int noop_x86_32_numa_cpu_node(int cpu) | ||
124 | { | ||
125 | /* we're always on node 0 */ | ||
126 | return 0; | ||
127 | } | ||
128 | #endif | ||
129 | |||
133 | struct apic apic_noop = { | 130 | struct apic apic_noop = { |
134 | .name = "noop", | 131 | .name = "noop", |
135 | .probe = noop_probe, | 132 | .probe = noop_probe, |
@@ -153,9 +150,7 @@ struct apic apic_noop = { | |||
153 | .ioapic_phys_id_map = default_ioapic_phys_id_map, | 150 | .ioapic_phys_id_map = default_ioapic_phys_id_map, |
154 | .setup_apic_routing = NULL, | 151 | .setup_apic_routing = NULL, |
155 | .multi_timer_check = NULL, | 152 | .multi_timer_check = NULL, |
156 | .apicid_to_node = noop_apicid_to_node, | ||
157 | 153 | ||
158 | .cpu_to_logical_apicid = noop_cpu_to_logical_apicid, | ||
159 | .cpu_present_to_apicid = default_cpu_present_to_apicid, | 154 | .cpu_present_to_apicid = default_cpu_present_to_apicid, |
160 | .apicid_to_cpu_present = physid_set_mask_of_physid, | 155 | .apicid_to_cpu_present = physid_set_mask_of_physid, |
161 | 156 | ||
@@ -197,4 +192,9 @@ struct apic apic_noop = { | |||
197 | .icr_write = noop_apic_icr_write, | 192 | .icr_write = noop_apic_icr_write, |
198 | .wait_icr_idle = noop_apic_wait_icr_idle, | 193 | .wait_icr_idle = noop_apic_wait_icr_idle, |
199 | .safe_wait_icr_idle = noop_safe_apic_wait_icr_idle, | 194 | .safe_wait_icr_idle = noop_safe_apic_wait_icr_idle, |
195 | |||
196 | #ifdef CONFIG_X86_32 | ||
197 | .x86_32_early_logical_apicid = noop_x86_32_early_logical_apicid, | ||
198 | .x86_32_numa_cpu_node = noop_x86_32_numa_cpu_node, | ||
199 | #endif | ||
200 | }; | 200 | }; |
diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c index cb804c5091b9..541a2e431659 100644 --- a/arch/x86/kernel/apic/bigsmp_32.c +++ b/arch/x86/kernel/apic/bigsmp_32.c | |||
@@ -45,6 +45,12 @@ static unsigned long bigsmp_check_apicid_present(int bit) | |||
45 | return 1; | 45 | return 1; |
46 | } | 46 | } |
47 | 47 | ||
48 | static int bigsmp_early_logical_apicid(int cpu) | ||
49 | { | ||
50 | /* on bigsmp, logical apicid is the same as physical */ | ||
51 | return early_per_cpu(x86_cpu_to_apicid, cpu); | ||
52 | } | ||
53 | |||
48 | static inline unsigned long calculate_ldr(int cpu) | 54 | static inline unsigned long calculate_ldr(int cpu) |
49 | { | 55 | { |
50 | unsigned long val, id; | 56 | unsigned long val, id; |
@@ -80,11 +86,6 @@ static void bigsmp_setup_apic_routing(void) | |||
80 | nr_ioapics); | 86 | nr_ioapics); |
81 | } | 87 | } |
82 | 88 | ||
83 | static int bigsmp_apicid_to_node(int logical_apicid) | ||
84 | { | ||
85 | return apicid_2_node[hard_smp_processor_id()]; | ||
86 | } | ||
87 | |||
88 | static int bigsmp_cpu_present_to_apicid(int mps_cpu) | 89 | static int bigsmp_cpu_present_to_apicid(int mps_cpu) |
89 | { | 90 | { |
90 | if (mps_cpu < nr_cpu_ids) | 91 | if (mps_cpu < nr_cpu_ids) |
@@ -93,14 +94,6 @@ static int bigsmp_cpu_present_to_apicid(int mps_cpu) | |||
93 | return BAD_APICID; | 94 | return BAD_APICID; |
94 | } | 95 | } |
95 | 96 | ||
96 | /* Mapping from cpu number to logical apicid */ | ||
97 | static inline int bigsmp_cpu_to_logical_apicid(int cpu) | ||
98 | { | ||
99 | if (cpu >= nr_cpu_ids) | ||
100 | return BAD_APICID; | ||
101 | return cpu_physical_id(cpu); | ||
102 | } | ||
103 | |||
104 | static void bigsmp_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap) | 97 | static void bigsmp_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap) |
105 | { | 98 | { |
106 | /* For clustered we don't have a good way to do this yet - hack */ | 99 | /* For clustered we don't have a good way to do this yet - hack */ |
@@ -115,7 +108,11 @@ static int bigsmp_check_phys_apicid_present(int phys_apicid) | |||
115 | /* As we are using single CPU as destination, pick only one CPU here */ | 108 | /* As we are using single CPU as destination, pick only one CPU here */ |
116 | static unsigned int bigsmp_cpu_mask_to_apicid(const struct cpumask *cpumask) | 109 | static unsigned int bigsmp_cpu_mask_to_apicid(const struct cpumask *cpumask) |
117 | { | 110 | { |
118 | return bigsmp_cpu_to_logical_apicid(cpumask_first(cpumask)); | 111 | int cpu = cpumask_first(cpumask); |
112 | |||
113 | if (cpu < nr_cpu_ids) | ||
114 | return cpu_physical_id(cpu); | ||
115 | return BAD_APICID; | ||
119 | } | 116 | } |
120 | 117 | ||
121 | static unsigned int bigsmp_cpu_mask_to_apicid_and(const struct cpumask *cpumask, | 118 | static unsigned int bigsmp_cpu_mask_to_apicid_and(const struct cpumask *cpumask, |
@@ -129,9 +126,9 @@ static unsigned int bigsmp_cpu_mask_to_apicid_and(const struct cpumask *cpumask, | |||
129 | */ | 126 | */ |
130 | for_each_cpu_and(cpu, cpumask, andmask) { | 127 | for_each_cpu_and(cpu, cpumask, andmask) { |
131 | if (cpumask_test_cpu(cpu, cpu_online_mask)) | 128 | if (cpumask_test_cpu(cpu, cpu_online_mask)) |
132 | break; | 129 | return cpu_physical_id(cpu); |
133 | } | 130 | } |
134 | return bigsmp_cpu_to_logical_apicid(cpu); | 131 | return BAD_APICID; |
135 | } | 132 | } |
136 | 133 | ||
137 | static int bigsmp_phys_pkg_id(int cpuid_apic, int index_msb) | 134 | static int bigsmp_phys_pkg_id(int cpuid_apic, int index_msb) |
@@ -219,8 +216,6 @@ struct apic apic_bigsmp = { | |||
219 | .ioapic_phys_id_map = bigsmp_ioapic_phys_id_map, | 216 | .ioapic_phys_id_map = bigsmp_ioapic_phys_id_map, |
220 | .setup_apic_routing = bigsmp_setup_apic_routing, | 217 | .setup_apic_routing = bigsmp_setup_apic_routing, |
221 | .multi_timer_check = NULL, | 218 | .multi_timer_check = NULL, |
222 | .apicid_to_node = bigsmp_apicid_to_node, | ||
223 | .cpu_to_logical_apicid = bigsmp_cpu_to_logical_apicid, | ||
224 | .cpu_present_to_apicid = bigsmp_cpu_present_to_apicid, | 219 | .cpu_present_to_apicid = bigsmp_cpu_present_to_apicid, |
225 | .apicid_to_cpu_present = physid_set_mask_of_physid, | 220 | .apicid_to_cpu_present = physid_set_mask_of_physid, |
226 | .setup_portio_remap = NULL, | 221 | .setup_portio_remap = NULL, |
@@ -256,4 +251,7 @@ struct apic apic_bigsmp = { | |||
256 | .icr_write = native_apic_icr_write, | 251 | .icr_write = native_apic_icr_write, |
257 | .wait_icr_idle = native_apic_wait_icr_idle, | 252 | .wait_icr_idle = native_apic_wait_icr_idle, |
258 | .safe_wait_icr_idle = native_safe_apic_wait_icr_idle, | 253 | .safe_wait_icr_idle = native_safe_apic_wait_icr_idle, |
254 | |||
255 | .x86_32_early_logical_apicid = bigsmp_early_logical_apicid, | ||
256 | .x86_32_numa_cpu_node = default_x86_32_numa_cpu_node, | ||
259 | }; | 257 | }; |
diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c index 8593582d8022..3e9de4854c5b 100644 --- a/arch/x86/kernel/apic/es7000_32.c +++ b/arch/x86/kernel/apic/es7000_32.c | |||
@@ -460,6 +460,12 @@ static unsigned long es7000_check_apicid_present(int bit) | |||
460 | return physid_isset(bit, phys_cpu_present_map); | 460 | return physid_isset(bit, phys_cpu_present_map); |
461 | } | 461 | } |
462 | 462 | ||
463 | static int es7000_early_logical_apicid(int cpu) | ||
464 | { | ||
465 | /* on es7000, logical apicid is the same as physical */ | ||
466 | return early_per_cpu(x86_bios_cpu_apicid, cpu); | ||
467 | } | ||
468 | |||
463 | static unsigned long calculate_ldr(int cpu) | 469 | static unsigned long calculate_ldr(int cpu) |
464 | { | 470 | { |
465 | unsigned long id = per_cpu(x86_bios_cpu_apicid, cpu); | 471 | unsigned long id = per_cpu(x86_bios_cpu_apicid, cpu); |
@@ -504,12 +510,11 @@ static void es7000_setup_apic_routing(void) | |||
504 | nr_ioapics, cpumask_bits(es7000_target_cpus())[0]); | 510 | nr_ioapics, cpumask_bits(es7000_target_cpus())[0]); |
505 | } | 511 | } |
506 | 512 | ||
507 | static int es7000_apicid_to_node(int logical_apicid) | 513 | static int es7000_numa_cpu_node(int cpu) |
508 | { | 514 | { |
509 | return 0; | 515 | return 0; |
510 | } | 516 | } |
511 | 517 | ||
512 | |||
513 | static int es7000_cpu_present_to_apicid(int mps_cpu) | 518 | static int es7000_cpu_present_to_apicid(int mps_cpu) |
514 | { | 519 | { |
515 | if (!mps_cpu) | 520 | if (!mps_cpu) |
@@ -528,18 +533,6 @@ static void es7000_apicid_to_cpu_present(int phys_apicid, physid_mask_t *retmap) | |||
528 | ++cpu_id; | 533 | ++cpu_id; |
529 | } | 534 | } |
530 | 535 | ||
531 | /* Mapping from cpu number to logical apicid */ | ||
532 | static int es7000_cpu_to_logical_apicid(int cpu) | ||
533 | { | ||
534 | #ifdef CONFIG_SMP | ||
535 | if (cpu >= nr_cpu_ids) | ||
536 | return BAD_APICID; | ||
537 | return cpu_2_logical_apicid[cpu]; | ||
538 | #else | ||
539 | return logical_smp_processor_id(); | ||
540 | #endif | ||
541 | } | ||
542 | |||
543 | static void es7000_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap) | 536 | static void es7000_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap) |
544 | { | 537 | { |
545 | /* For clustered we don't have a good way to do this yet - hack */ | 538 | /* For clustered we don't have a good way to do this yet - hack */ |
@@ -561,7 +554,7 @@ static unsigned int es7000_cpu_mask_to_apicid(const struct cpumask *cpumask) | |||
561 | * The cpus in the mask must all be on the apic cluster. | 554 | * The cpus in the mask must all be on the apic cluster. |
562 | */ | 555 | */ |
563 | for_each_cpu(cpu, cpumask) { | 556 | for_each_cpu(cpu, cpumask) { |
564 | int new_apicid = es7000_cpu_to_logical_apicid(cpu); | 557 | int new_apicid = early_per_cpu(x86_cpu_to_logical_apicid, cpu); |
565 | 558 | ||
566 | if (round && APIC_CLUSTER(apicid) != APIC_CLUSTER(new_apicid)) { | 559 | if (round && APIC_CLUSTER(apicid) != APIC_CLUSTER(new_apicid)) { |
567 | WARN(1, "Not a valid mask!"); | 560 | WARN(1, "Not a valid mask!"); |
@@ -578,7 +571,7 @@ static unsigned int | |||
578 | es7000_cpu_mask_to_apicid_and(const struct cpumask *inmask, | 571 | es7000_cpu_mask_to_apicid_and(const struct cpumask *inmask, |
579 | const struct cpumask *andmask) | 572 | const struct cpumask *andmask) |
580 | { | 573 | { |
581 | int apicid = es7000_cpu_to_logical_apicid(0); | 574 | int apicid = early_per_cpu(x86_cpu_to_logical_apicid, 0); |
582 | cpumask_var_t cpumask; | 575 | cpumask_var_t cpumask; |
583 | 576 | ||
584 | if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC)) | 577 | if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC)) |
@@ -655,8 +648,6 @@ struct apic __refdata apic_es7000_cluster = { | |||
655 | .ioapic_phys_id_map = es7000_ioapic_phys_id_map, | 648 | .ioapic_phys_id_map = es7000_ioapic_phys_id_map, |
656 | .setup_apic_routing = es7000_setup_apic_routing, | 649 | .setup_apic_routing = es7000_setup_apic_routing, |
657 | .multi_timer_check = NULL, | 650 | .multi_timer_check = NULL, |
658 | .apicid_to_node = es7000_apicid_to_node, | ||
659 | .cpu_to_logical_apicid = es7000_cpu_to_logical_apicid, | ||
660 | .cpu_present_to_apicid = es7000_cpu_present_to_apicid, | 651 | .cpu_present_to_apicid = es7000_cpu_present_to_apicid, |
661 | .apicid_to_cpu_present = es7000_apicid_to_cpu_present, | 652 | .apicid_to_cpu_present = es7000_apicid_to_cpu_present, |
662 | .setup_portio_remap = NULL, | 653 | .setup_portio_remap = NULL, |
@@ -695,6 +686,9 @@ struct apic __refdata apic_es7000_cluster = { | |||
695 | .icr_write = native_apic_icr_write, | 686 | .icr_write = native_apic_icr_write, |
696 | .wait_icr_idle = native_apic_wait_icr_idle, | 687 | .wait_icr_idle = native_apic_wait_icr_idle, |
697 | .safe_wait_icr_idle = native_safe_apic_wait_icr_idle, | 688 | .safe_wait_icr_idle = native_safe_apic_wait_icr_idle, |
689 | |||
690 | .x86_32_early_logical_apicid = es7000_early_logical_apicid, | ||
691 | .x86_32_numa_cpu_node = es7000_numa_cpu_node, | ||
698 | }; | 692 | }; |
699 | 693 | ||
700 | struct apic __refdata apic_es7000 = { | 694 | struct apic __refdata apic_es7000 = { |
@@ -720,8 +714,6 @@ struct apic __refdata apic_es7000 = { | |||
720 | .ioapic_phys_id_map = es7000_ioapic_phys_id_map, | 714 | .ioapic_phys_id_map = es7000_ioapic_phys_id_map, |
721 | .setup_apic_routing = es7000_setup_apic_routing, | 715 | .setup_apic_routing = es7000_setup_apic_routing, |
722 | .multi_timer_check = NULL, | 716 | .multi_timer_check = NULL, |
723 | .apicid_to_node = es7000_apicid_to_node, | ||
724 | .cpu_to_logical_apicid = es7000_cpu_to_logical_apicid, | ||
725 | .cpu_present_to_apicid = es7000_cpu_present_to_apicid, | 717 | .cpu_present_to_apicid = es7000_cpu_present_to_apicid, |
726 | .apicid_to_cpu_present = es7000_apicid_to_cpu_present, | 718 | .apicid_to_cpu_present = es7000_apicid_to_cpu_present, |
727 | .setup_portio_remap = NULL, | 719 | .setup_portio_remap = NULL, |
@@ -758,4 +750,7 @@ struct apic __refdata apic_es7000 = { | |||
758 | .icr_write = native_apic_icr_write, | 750 | .icr_write = native_apic_icr_write, |
759 | .wait_icr_idle = native_apic_wait_icr_idle, | 751 | .wait_icr_idle = native_apic_wait_icr_idle, |
760 | .safe_wait_icr_idle = native_safe_apic_wait_icr_idle, | 752 | .safe_wait_icr_idle = native_safe_apic_wait_icr_idle, |
753 | |||
754 | .x86_32_early_logical_apicid = es7000_early_logical_apicid, | ||
755 | .x86_32_numa_cpu_node = es7000_numa_cpu_node, | ||
761 | }; | 756 | }; |
diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c index 79fd43ca6f96..c4e557a1ebb6 100644 --- a/arch/x86/kernel/apic/hw_nmi.c +++ b/arch/x86/kernel/apic/hw_nmi.c | |||
@@ -83,7 +83,6 @@ arch_trigger_all_cpu_backtrace_handler(struct notifier_block *self, | |||
83 | arch_spin_lock(&lock); | 83 | arch_spin_lock(&lock); |
84 | printk(KERN_WARNING "NMI backtrace for cpu %d\n", cpu); | 84 | printk(KERN_WARNING "NMI backtrace for cpu %d\n", cpu); |
85 | show_regs(regs); | 85 | show_regs(regs); |
86 | dump_stack(); | ||
87 | arch_spin_unlock(&lock); | 86 | arch_spin_unlock(&lock); |
88 | cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask)); | 87 | cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask)); |
89 | return NOTIFY_STOP; | 88 | return NOTIFY_STOP; |
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index ca9e2a3545a9..180ca240e03c 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c | |||
@@ -108,7 +108,10 @@ DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES); | |||
108 | 108 | ||
109 | int skip_ioapic_setup; | 109 | int skip_ioapic_setup; |
110 | 110 | ||
111 | void arch_disable_smp_support(void) | 111 | /** |
112 | * disable_ioapic_support() - disables ioapic support at runtime | ||
113 | */ | ||
114 | void disable_ioapic_support(void) | ||
112 | { | 115 | { |
113 | #ifdef CONFIG_PCI | 116 | #ifdef CONFIG_PCI |
114 | noioapicquirk = 1; | 117 | noioapicquirk = 1; |
@@ -120,11 +123,14 @@ void arch_disable_smp_support(void) | |||
120 | static int __init parse_noapic(char *str) | 123 | static int __init parse_noapic(char *str) |
121 | { | 124 | { |
122 | /* disable IO-APIC */ | 125 | /* disable IO-APIC */ |
123 | arch_disable_smp_support(); | 126 | disable_ioapic_support(); |
124 | return 0; | 127 | return 0; |
125 | } | 128 | } |
126 | early_param("noapic", parse_noapic); | 129 | early_param("noapic", parse_noapic); |
127 | 130 | ||
131 | static int io_apic_setup_irq_pin_once(unsigned int irq, int node, | ||
132 | struct io_apic_irq_attr *attr); | ||
133 | |||
128 | /* Will be called in mpparse/acpi/sfi codes for saving IRQ info */ | 134 | /* Will be called in mpparse/acpi/sfi codes for saving IRQ info */ |
129 | void mp_save_irq(struct mpc_intsrc *m) | 135 | void mp_save_irq(struct mpc_intsrc *m) |
130 | { | 136 | { |
@@ -181,7 +187,7 @@ int __init arch_early_irq_init(void) | |||
181 | irq_reserve_irqs(0, legacy_pic->nr_legacy_irqs); | 187 | irq_reserve_irqs(0, legacy_pic->nr_legacy_irqs); |
182 | 188 | ||
183 | for (i = 0; i < count; i++) { | 189 | for (i = 0; i < count; i++) { |
184 | set_irq_chip_data(i, &cfg[i]); | 190 | irq_set_chip_data(i, &cfg[i]); |
185 | zalloc_cpumask_var_node(&cfg[i].domain, GFP_KERNEL, node); | 191 | zalloc_cpumask_var_node(&cfg[i].domain, GFP_KERNEL, node); |
186 | zalloc_cpumask_var_node(&cfg[i].old_domain, GFP_KERNEL, node); | 192 | zalloc_cpumask_var_node(&cfg[i].old_domain, GFP_KERNEL, node); |
187 | /* | 193 | /* |
@@ -200,7 +206,7 @@ int __init arch_early_irq_init(void) | |||
200 | #ifdef CONFIG_SPARSE_IRQ | 206 | #ifdef CONFIG_SPARSE_IRQ |
201 | static struct irq_cfg *irq_cfg(unsigned int irq) | 207 | static struct irq_cfg *irq_cfg(unsigned int irq) |
202 | { | 208 | { |
203 | return get_irq_chip_data(irq); | 209 | return irq_get_chip_data(irq); |
204 | } | 210 | } |
205 | 211 | ||
206 | static struct irq_cfg *alloc_irq_cfg(unsigned int irq, int node) | 212 | static struct irq_cfg *alloc_irq_cfg(unsigned int irq, int node) |
@@ -226,7 +232,7 @@ static void free_irq_cfg(unsigned int at, struct irq_cfg *cfg) | |||
226 | { | 232 | { |
227 | if (!cfg) | 233 | if (!cfg) |
228 | return; | 234 | return; |
229 | set_irq_chip_data(at, NULL); | 235 | irq_set_chip_data(at, NULL); |
230 | free_cpumask_var(cfg->domain); | 236 | free_cpumask_var(cfg->domain); |
231 | free_cpumask_var(cfg->old_domain); | 237 | free_cpumask_var(cfg->old_domain); |
232 | kfree(cfg); | 238 | kfree(cfg); |
@@ -256,14 +262,14 @@ static struct irq_cfg *alloc_irq_and_cfg_at(unsigned int at, int node) | |||
256 | if (res < 0) { | 262 | if (res < 0) { |
257 | if (res != -EEXIST) | 263 | if (res != -EEXIST) |
258 | return NULL; | 264 | return NULL; |
259 | cfg = get_irq_chip_data(at); | 265 | cfg = irq_get_chip_data(at); |
260 | if (cfg) | 266 | if (cfg) |
261 | return cfg; | 267 | return cfg; |
262 | } | 268 | } |
263 | 269 | ||
264 | cfg = alloc_irq_cfg(at, node); | 270 | cfg = alloc_irq_cfg(at, node); |
265 | if (cfg) | 271 | if (cfg) |
266 | set_irq_chip_data(at, cfg); | 272 | irq_set_chip_data(at, cfg); |
267 | else | 273 | else |
268 | irq_free_desc(at); | 274 | irq_free_desc(at); |
269 | return cfg; | 275 | return cfg; |
@@ -818,7 +824,7 @@ static int EISA_ELCR(unsigned int irq) | |||
818 | #define default_MCA_trigger(idx) (1) | 824 | #define default_MCA_trigger(idx) (1) |
819 | #define default_MCA_polarity(idx) default_ISA_polarity(idx) | 825 | #define default_MCA_polarity(idx) default_ISA_polarity(idx) |
820 | 826 | ||
821 | static int MPBIOS_polarity(int idx) | 827 | static int irq_polarity(int idx) |
822 | { | 828 | { |
823 | int bus = mp_irqs[idx].srcbus; | 829 | int bus = mp_irqs[idx].srcbus; |
824 | int polarity; | 830 | int polarity; |
@@ -860,7 +866,7 @@ static int MPBIOS_polarity(int idx) | |||
860 | return polarity; | 866 | return polarity; |
861 | } | 867 | } |
862 | 868 | ||
863 | static int MPBIOS_trigger(int idx) | 869 | static int irq_trigger(int idx) |
864 | { | 870 | { |
865 | int bus = mp_irqs[idx].srcbus; | 871 | int bus = mp_irqs[idx].srcbus; |
866 | int trigger; | 872 | int trigger; |
@@ -932,16 +938,6 @@ static int MPBIOS_trigger(int idx) | |||
932 | return trigger; | 938 | return trigger; |
933 | } | 939 | } |
934 | 940 | ||
935 | static inline int irq_polarity(int idx) | ||
936 | { | ||
937 | return MPBIOS_polarity(idx); | ||
938 | } | ||
939 | |||
940 | static inline int irq_trigger(int idx) | ||
941 | { | ||
942 | return MPBIOS_trigger(idx); | ||
943 | } | ||
944 | |||
945 | static int pin_2_irq(int idx, int apic, int pin) | 941 | static int pin_2_irq(int idx, int apic, int pin) |
946 | { | 942 | { |
947 | int irq; | 943 | int irq; |
@@ -1189,7 +1185,7 @@ void __setup_vector_irq(int cpu) | |||
1189 | raw_spin_lock(&vector_lock); | 1185 | raw_spin_lock(&vector_lock); |
1190 | /* Mark the inuse vectors */ | 1186 | /* Mark the inuse vectors */ |
1191 | for_each_active_irq(irq) { | 1187 | for_each_active_irq(irq) { |
1192 | cfg = get_irq_chip_data(irq); | 1188 | cfg = irq_get_chip_data(irq); |
1193 | if (!cfg) | 1189 | if (!cfg) |
1194 | continue; | 1190 | continue; |
1195 | /* | 1191 | /* |
@@ -1220,10 +1216,6 @@ void __setup_vector_irq(int cpu) | |||
1220 | static struct irq_chip ioapic_chip; | 1216 | static struct irq_chip ioapic_chip; |
1221 | static struct irq_chip ir_ioapic_chip; | 1217 | static struct irq_chip ir_ioapic_chip; |
1222 | 1218 | ||
1223 | #define IOAPIC_AUTO -1 | ||
1224 | #define IOAPIC_EDGE 0 | ||
1225 | #define IOAPIC_LEVEL 1 | ||
1226 | |||
1227 | #ifdef CONFIG_X86_32 | 1219 | #ifdef CONFIG_X86_32 |
1228 | static inline int IO_APIC_irq_trigger(int irq) | 1220 | static inline int IO_APIC_irq_trigger(int irq) |
1229 | { | 1221 | { |
@@ -1248,35 +1240,31 @@ static inline int IO_APIC_irq_trigger(int irq) | |||
1248 | } | 1240 | } |
1249 | #endif | 1241 | #endif |
1250 | 1242 | ||
1251 | static void ioapic_register_intr(unsigned int irq, unsigned long trigger) | 1243 | static void ioapic_register_intr(unsigned int irq, struct irq_cfg *cfg, |
1244 | unsigned long trigger) | ||
1252 | { | 1245 | { |
1246 | struct irq_chip *chip = &ioapic_chip; | ||
1247 | irq_flow_handler_t hdl; | ||
1248 | bool fasteoi; | ||
1253 | 1249 | ||
1254 | if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || | 1250 | if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || |
1255 | trigger == IOAPIC_LEVEL) | 1251 | trigger == IOAPIC_LEVEL) { |
1256 | irq_set_status_flags(irq, IRQ_LEVEL); | 1252 | irq_set_status_flags(irq, IRQ_LEVEL); |
1257 | else | 1253 | fasteoi = true; |
1254 | } else { | ||
1258 | irq_clear_status_flags(irq, IRQ_LEVEL); | 1255 | irq_clear_status_flags(irq, IRQ_LEVEL); |
1256 | fasteoi = false; | ||
1257 | } | ||
1259 | 1258 | ||
1260 | if (irq_remapped(get_irq_chip_data(irq))) { | 1259 | if (irq_remapped(cfg)) { |
1261 | irq_set_status_flags(irq, IRQ_MOVE_PCNTXT); | 1260 | irq_set_status_flags(irq, IRQ_MOVE_PCNTXT); |
1262 | if (trigger) | 1261 | chip = &ir_ioapic_chip; |
1263 | set_irq_chip_and_handler_name(irq, &ir_ioapic_chip, | 1262 | fasteoi = trigger != 0; |
1264 | handle_fasteoi_irq, | ||
1265 | "fasteoi"); | ||
1266 | else | ||
1267 | set_irq_chip_and_handler_name(irq, &ir_ioapic_chip, | ||
1268 | handle_edge_irq, "edge"); | ||
1269 | return; | ||
1270 | } | 1263 | } |
1271 | 1264 | ||
1272 | if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || | 1265 | hdl = fasteoi ? handle_fasteoi_irq : handle_edge_irq; |
1273 | trigger == IOAPIC_LEVEL) | 1266 | irq_set_chip_and_handler_name(irq, chip, hdl, |
1274 | set_irq_chip_and_handler_name(irq, &ioapic_chip, | 1267 | fasteoi ? "fasteoi" : "edge"); |
1275 | handle_fasteoi_irq, | ||
1276 | "fasteoi"); | ||
1277 | else | ||
1278 | set_irq_chip_and_handler_name(irq, &ioapic_chip, | ||
1279 | handle_edge_irq, "edge"); | ||
1280 | } | 1268 | } |
1281 | 1269 | ||
1282 | static int setup_ioapic_entry(int apic_id, int irq, | 1270 | static int setup_ioapic_entry(int apic_id, int irq, |
@@ -1374,7 +1362,7 @@ static void setup_ioapic_irq(int apic_id, int pin, unsigned int irq, | |||
1374 | return; | 1362 | return; |
1375 | } | 1363 | } |
1376 | 1364 | ||
1377 | ioapic_register_intr(irq, trigger); | 1365 | ioapic_register_intr(irq, cfg, trigger); |
1378 | if (irq < legacy_pic->nr_legacy_irqs) | 1366 | if (irq < legacy_pic->nr_legacy_irqs) |
1379 | legacy_pic->mask(irq); | 1367 | legacy_pic->mask(irq); |
1380 | 1368 | ||
@@ -1385,33 +1373,26 @@ static struct { | |||
1385 | DECLARE_BITMAP(pin_programmed, MP_MAX_IOAPIC_PIN + 1); | 1373 | DECLARE_BITMAP(pin_programmed, MP_MAX_IOAPIC_PIN + 1); |
1386 | } mp_ioapic_routing[MAX_IO_APICS]; | 1374 | } mp_ioapic_routing[MAX_IO_APICS]; |
1387 | 1375 | ||
1388 | static void __init setup_IO_APIC_irqs(void) | 1376 | static bool __init io_apic_pin_not_connected(int idx, int apic_id, int pin) |
1389 | { | 1377 | { |
1390 | int apic_id, pin, idx, irq, notcon = 0; | 1378 | if (idx != -1) |
1391 | int node = cpu_to_node(0); | 1379 | return false; |
1392 | struct irq_cfg *cfg; | ||
1393 | 1380 | ||
1394 | apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n"); | 1381 | apic_printk(APIC_VERBOSE, KERN_DEBUG " apic %d pin %d not connected\n", |
1382 | mp_ioapics[apic_id].apicid, pin); | ||
1383 | return true; | ||
1384 | } | ||
1385 | |||
1386 | static void __init __io_apic_setup_irqs(unsigned int apic_id) | ||
1387 | { | ||
1388 | int idx, node = cpu_to_node(0); | ||
1389 | struct io_apic_irq_attr attr; | ||
1390 | unsigned int pin, irq; | ||
1395 | 1391 | ||
1396 | for (apic_id = 0; apic_id < nr_ioapics; apic_id++) | ||
1397 | for (pin = 0; pin < nr_ioapic_registers[apic_id]; pin++) { | 1392 | for (pin = 0; pin < nr_ioapic_registers[apic_id]; pin++) { |
1398 | idx = find_irq_entry(apic_id, pin, mp_INT); | 1393 | idx = find_irq_entry(apic_id, pin, mp_INT); |
1399 | if (idx == -1) { | 1394 | if (io_apic_pin_not_connected(idx, apic_id, pin)) |
1400 | if (!notcon) { | ||
1401 | notcon = 1; | ||
1402 | apic_printk(APIC_VERBOSE, | ||
1403 | KERN_DEBUG " %d-%d", | ||
1404 | mp_ioapics[apic_id].apicid, pin); | ||
1405 | } else | ||
1406 | apic_printk(APIC_VERBOSE, " %d-%d", | ||
1407 | mp_ioapics[apic_id].apicid, pin); | ||
1408 | continue; | 1395 | continue; |
1409 | } | ||
1410 | if (notcon) { | ||
1411 | apic_printk(APIC_VERBOSE, | ||
1412 | " (apicid-pin) not connected\n"); | ||
1413 | notcon = 0; | ||
1414 | } | ||
1415 | 1396 | ||
1416 | irq = pin_2_irq(idx, apic_id, pin); | 1397 | irq = pin_2_irq(idx, apic_id, pin); |
1417 | 1398 | ||
@@ -1423,25 +1404,24 @@ static void __init setup_IO_APIC_irqs(void) | |||
1423 | * installed and if it returns 1: | 1404 | * installed and if it returns 1: |
1424 | */ | 1405 | */ |
1425 | if (apic->multi_timer_check && | 1406 | if (apic->multi_timer_check && |
1426 | apic->multi_timer_check(apic_id, irq)) | 1407 | apic->multi_timer_check(apic_id, irq)) |
1427 | continue; | 1408 | continue; |
1428 | 1409 | ||
1429 | cfg = alloc_irq_and_cfg_at(irq, node); | 1410 | set_io_apic_irq_attr(&attr, apic_id, pin, irq_trigger(idx), |
1430 | if (!cfg) | 1411 | irq_polarity(idx)); |
1431 | continue; | ||
1432 | 1412 | ||
1433 | add_pin_to_irq_node(cfg, node, apic_id, pin); | 1413 | io_apic_setup_irq_pin(irq, node, &attr); |
1434 | /* | ||
1435 | * don't mark it in pin_programmed, so later acpi could | ||
1436 | * set it correctly when irq < 16 | ||
1437 | */ | ||
1438 | setup_ioapic_irq(apic_id, pin, irq, cfg, irq_trigger(idx), | ||
1439 | irq_polarity(idx)); | ||
1440 | } | 1414 | } |
1415 | } | ||
1441 | 1416 | ||
1442 | if (notcon) | 1417 | static void __init setup_IO_APIC_irqs(void) |
1443 | apic_printk(APIC_VERBOSE, | 1418 | { |
1444 | " (apicid-pin) not connected\n"); | 1419 | unsigned int apic_id; |
1420 | |||
1421 | apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n"); | ||
1422 | |||
1423 | for (apic_id = 0; apic_id < nr_ioapics; apic_id++) | ||
1424 | __io_apic_setup_irqs(apic_id); | ||
1445 | } | 1425 | } |
1446 | 1426 | ||
1447 | /* | 1427 | /* |
@@ -1452,7 +1432,7 @@ static void __init setup_IO_APIC_irqs(void) | |||
1452 | void setup_IO_APIC_irq_extra(u32 gsi) | 1432 | void setup_IO_APIC_irq_extra(u32 gsi) |
1453 | { | 1433 | { |
1454 | int apic_id = 0, pin, idx, irq, node = cpu_to_node(0); | 1434 | int apic_id = 0, pin, idx, irq, node = cpu_to_node(0); |
1455 | struct irq_cfg *cfg; | 1435 | struct io_apic_irq_attr attr; |
1456 | 1436 | ||
1457 | /* | 1437 | /* |
1458 | * Convert 'gsi' to 'ioapic.pin'. | 1438 | * Convert 'gsi' to 'ioapic.pin'. |
@@ -1472,21 +1452,10 @@ void setup_IO_APIC_irq_extra(u32 gsi) | |||
1472 | if (apic_id == 0 || irq < NR_IRQS_LEGACY) | 1452 | if (apic_id == 0 || irq < NR_IRQS_LEGACY) |
1473 | return; | 1453 | return; |
1474 | 1454 | ||
1475 | cfg = alloc_irq_and_cfg_at(irq, node); | 1455 | set_io_apic_irq_attr(&attr, apic_id, pin, irq_trigger(idx), |
1476 | if (!cfg) | 1456 | irq_polarity(idx)); |
1477 | return; | ||
1478 | |||
1479 | add_pin_to_irq_node(cfg, node, apic_id, pin); | ||
1480 | |||
1481 | if (test_bit(pin, mp_ioapic_routing[apic_id].pin_programmed)) { | ||
1482 | pr_debug("Pin %d-%d already programmed\n", | ||
1483 | mp_ioapics[apic_id].apicid, pin); | ||
1484 | return; | ||
1485 | } | ||
1486 | set_bit(pin, mp_ioapic_routing[apic_id].pin_programmed); | ||
1487 | 1457 | ||
1488 | setup_ioapic_irq(apic_id, pin, irq, cfg, | 1458 | io_apic_setup_irq_pin_once(irq, node, &attr); |
1489 | irq_trigger(idx), irq_polarity(idx)); | ||
1490 | } | 1459 | } |
1491 | 1460 | ||
1492 | /* | 1461 | /* |
@@ -1518,7 +1487,8 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic_id, unsigned int pin, | |||
1518 | * The timer IRQ doesn't have to know that behind the | 1487 | * The timer IRQ doesn't have to know that behind the |
1519 | * scene we may have a 8259A-master in AEOI mode ... | 1488 | * scene we may have a 8259A-master in AEOI mode ... |
1520 | */ | 1489 | */ |
1521 | set_irq_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq, "edge"); | 1490 | irq_set_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq, |
1491 | "edge"); | ||
1522 | 1492 | ||
1523 | /* | 1493 | /* |
1524 | * Add it to the IO-APIC irq-routing table: | 1494 | * Add it to the IO-APIC irq-routing table: |
@@ -1625,7 +1595,7 @@ __apicdebuginit(void) print_IO_APIC(void) | |||
1625 | for_each_active_irq(irq) { | 1595 | for_each_active_irq(irq) { |
1626 | struct irq_pin_list *entry; | 1596 | struct irq_pin_list *entry; |
1627 | 1597 | ||
1628 | cfg = get_irq_chip_data(irq); | 1598 | cfg = irq_get_chip_data(irq); |
1629 | if (!cfg) | 1599 | if (!cfg) |
1630 | continue; | 1600 | continue; |
1631 | entry = cfg->irq_2_pin; | 1601 | entry = cfg->irq_2_pin; |
@@ -1916,7 +1886,7 @@ void disable_IO_APIC(void) | |||
1916 | * | 1886 | * |
1917 | * With interrupt-remapping, for now we will use virtual wire A mode, | 1887 | * With interrupt-remapping, for now we will use virtual wire A mode, |
1918 | * as virtual wire B is little complex (need to configure both | 1888 | * as virtual wire B is little complex (need to configure both |
1919 | * IOAPIC RTE aswell as interrupt-remapping table entry). | 1889 | * IOAPIC RTE as well as interrupt-remapping table entry). |
1920 | * As this gets called during crash dump, keep this simple for now. | 1890 | * As this gets called during crash dump, keep this simple for now. |
1921 | */ | 1891 | */ |
1922 | if (ioapic_i8259.pin != -1 && !intr_remapping_enabled) { | 1892 | if (ioapic_i8259.pin != -1 && !intr_remapping_enabled) { |
@@ -2391,7 +2361,7 @@ static void irq_complete_move(struct irq_cfg *cfg) | |||
2391 | 2361 | ||
2392 | void irq_force_complete_move(int irq) | 2362 | void irq_force_complete_move(int irq) |
2393 | { | 2363 | { |
2394 | struct irq_cfg *cfg = get_irq_chip_data(irq); | 2364 | struct irq_cfg *cfg = irq_get_chip_data(irq); |
2395 | 2365 | ||
2396 | if (!cfg) | 2366 | if (!cfg) |
2397 | return; | 2367 | return; |
@@ -2405,7 +2375,7 @@ static inline void irq_complete_move(struct irq_cfg *cfg) { } | |||
2405 | static void ack_apic_edge(struct irq_data *data) | 2375 | static void ack_apic_edge(struct irq_data *data) |
2406 | { | 2376 | { |
2407 | irq_complete_move(data->chip_data); | 2377 | irq_complete_move(data->chip_data); |
2408 | move_native_irq(data->irq); | 2378 | irq_move_irq(data); |
2409 | ack_APIC_irq(); | 2379 | ack_APIC_irq(); |
2410 | } | 2380 | } |
2411 | 2381 | ||
@@ -2462,7 +2432,7 @@ static void ack_apic_level(struct irq_data *data) | |||
2462 | irq_complete_move(cfg); | 2432 | irq_complete_move(cfg); |
2463 | #ifdef CONFIG_GENERIC_PENDING_IRQ | 2433 | #ifdef CONFIG_GENERIC_PENDING_IRQ |
2464 | /* If we are moving the irq we need to mask it */ | 2434 | /* If we are moving the irq we need to mask it */ |
2465 | if (unlikely(irq_to_desc(irq)->status & IRQ_MOVE_PENDING)) { | 2435 | if (unlikely(irqd_is_setaffinity_pending(data))) { |
2466 | do_unmask_irq = 1; | 2436 | do_unmask_irq = 1; |
2467 | mask_ioapic(cfg); | 2437 | mask_ioapic(cfg); |
2468 | } | 2438 | } |
@@ -2551,7 +2521,7 @@ static void ack_apic_level(struct irq_data *data) | |||
2551 | * and you can go talk to the chipset vendor about it. | 2521 | * and you can go talk to the chipset vendor about it. |
2552 | */ | 2522 | */ |
2553 | if (!io_apic_level_ack_pending(cfg)) | 2523 | if (!io_apic_level_ack_pending(cfg)) |
2554 | move_masked_irq(irq); | 2524 | irq_move_masked_irq(data); |
2555 | unmask_ioapic(cfg); | 2525 | unmask_ioapic(cfg); |
2556 | } | 2526 | } |
2557 | } | 2527 | } |
@@ -2614,7 +2584,7 @@ static inline void init_IO_APIC_traps(void) | |||
2614 | * 0x80, because int 0x80 is hm, kind of importantish. ;) | 2584 | * 0x80, because int 0x80 is hm, kind of importantish. ;) |
2615 | */ | 2585 | */ |
2616 | for_each_active_irq(irq) { | 2586 | for_each_active_irq(irq) { |
2617 | cfg = get_irq_chip_data(irq); | 2587 | cfg = irq_get_chip_data(irq); |
2618 | if (IO_APIC_IRQ(irq) && cfg && !cfg->vector) { | 2588 | if (IO_APIC_IRQ(irq) && cfg && !cfg->vector) { |
2619 | /* | 2589 | /* |
2620 | * Hmm.. We don't have an entry for this, | 2590 | * Hmm.. We don't have an entry for this, |
@@ -2625,7 +2595,7 @@ static inline void init_IO_APIC_traps(void) | |||
2625 | legacy_pic->make_irq(irq); | 2595 | legacy_pic->make_irq(irq); |
2626 | else | 2596 | else |
2627 | /* Strange. Oh, well.. */ | 2597 | /* Strange. Oh, well.. */ |
2628 | set_irq_chip(irq, &no_irq_chip); | 2598 | irq_set_chip(irq, &no_irq_chip); |
2629 | } | 2599 | } |
2630 | } | 2600 | } |
2631 | } | 2601 | } |
@@ -2665,7 +2635,7 @@ static struct irq_chip lapic_chip __read_mostly = { | |||
2665 | static void lapic_register_intr(int irq) | 2635 | static void lapic_register_intr(int irq) |
2666 | { | 2636 | { |
2667 | irq_clear_status_flags(irq, IRQ_LEVEL); | 2637 | irq_clear_status_flags(irq, IRQ_LEVEL); |
2668 | set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq, | 2638 | irq_set_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq, |
2669 | "edge"); | 2639 | "edge"); |
2670 | } | 2640 | } |
2671 | 2641 | ||
@@ -2749,7 +2719,7 @@ int timer_through_8259 __initdata; | |||
2749 | */ | 2719 | */ |
2750 | static inline void __init check_timer(void) | 2720 | static inline void __init check_timer(void) |
2751 | { | 2721 | { |
2752 | struct irq_cfg *cfg = get_irq_chip_data(0); | 2722 | struct irq_cfg *cfg = irq_get_chip_data(0); |
2753 | int node = cpu_to_node(0); | 2723 | int node = cpu_to_node(0); |
2754 | int apic1, pin1, apic2, pin2; | 2724 | int apic1, pin1, apic2, pin2; |
2755 | unsigned long flags; | 2725 | unsigned long flags; |
@@ -2935,7 +2905,7 @@ void __init setup_IO_APIC(void) | |||
2935 | } | 2905 | } |
2936 | 2906 | ||
2937 | /* | 2907 | /* |
2938 | * Called after all the initialization is done. If we didnt find any | 2908 | * Called after all the initialization is done. If we didn't find any |
2939 | * APIC bugs then we can allow the modify fast path | 2909 | * APIC bugs then we can allow the modify fast path |
2940 | */ | 2910 | */ |
2941 | 2911 | ||
@@ -3060,7 +3030,7 @@ unsigned int create_irq_nr(unsigned int from, int node) | |||
3060 | raw_spin_unlock_irqrestore(&vector_lock, flags); | 3030 | raw_spin_unlock_irqrestore(&vector_lock, flags); |
3061 | 3031 | ||
3062 | if (ret) { | 3032 | if (ret) { |
3063 | set_irq_chip_data(irq, cfg); | 3033 | irq_set_chip_data(irq, cfg); |
3064 | irq_clear_status_flags(irq, IRQ_NOREQUEST); | 3034 | irq_clear_status_flags(irq, IRQ_NOREQUEST); |
3065 | } else { | 3035 | } else { |
3066 | free_irq_at(irq, cfg); | 3036 | free_irq_at(irq, cfg); |
@@ -3085,7 +3055,7 @@ int create_irq(void) | |||
3085 | 3055 | ||
3086 | void destroy_irq(unsigned int irq) | 3056 | void destroy_irq(unsigned int irq) |
3087 | { | 3057 | { |
3088 | struct irq_cfg *cfg = get_irq_chip_data(irq); | 3058 | struct irq_cfg *cfg = irq_get_chip_data(irq); |
3089 | unsigned long flags; | 3059 | unsigned long flags; |
3090 | 3060 | ||
3091 | irq_set_status_flags(irq, IRQ_NOREQUEST|IRQ_NOPROBE); | 3061 | irq_set_status_flags(irq, IRQ_NOREQUEST|IRQ_NOPROBE); |
@@ -3119,7 +3089,7 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, | |||
3119 | 3089 | ||
3120 | dest = apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus()); | 3090 | dest = apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus()); |
3121 | 3091 | ||
3122 | if (irq_remapped(get_irq_chip_data(irq))) { | 3092 | if (irq_remapped(cfg)) { |
3123 | struct irte irte; | 3093 | struct irte irte; |
3124 | int ir_index; | 3094 | int ir_index; |
3125 | u16 sub_handle; | 3095 | u16 sub_handle; |
@@ -3291,6 +3261,7 @@ static int msi_alloc_irte(struct pci_dev *dev, int irq, int nvec) | |||
3291 | 3261 | ||
3292 | static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq) | 3262 | static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq) |
3293 | { | 3263 | { |
3264 | struct irq_chip *chip = &msi_chip; | ||
3294 | struct msi_msg msg; | 3265 | struct msi_msg msg; |
3295 | int ret; | 3266 | int ret; |
3296 | 3267 | ||
@@ -3298,14 +3269,15 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq) | |||
3298 | if (ret < 0) | 3269 | if (ret < 0) |
3299 | return ret; | 3270 | return ret; |
3300 | 3271 | ||
3301 | set_irq_msi(irq, msidesc); | 3272 | irq_set_msi_desc(irq, msidesc); |
3302 | write_msi_msg(irq, &msg); | 3273 | write_msi_msg(irq, &msg); |
3303 | 3274 | ||
3304 | if (irq_remapped(get_irq_chip_data(irq))) { | 3275 | if (irq_remapped(irq_get_chip_data(irq))) { |
3305 | irq_set_status_flags(irq, IRQ_MOVE_PCNTXT); | 3276 | irq_set_status_flags(irq, IRQ_MOVE_PCNTXT); |
3306 | set_irq_chip_and_handler_name(irq, &msi_ir_chip, handle_edge_irq, "edge"); | 3277 | chip = &msi_ir_chip; |
3307 | } else | 3278 | } |
3308 | set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge"); | 3279 | |
3280 | irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "edge"); | ||
3309 | 3281 | ||
3310 | dev_printk(KERN_DEBUG, &dev->dev, "irq %d for MSI/MSI-X\n", irq); | 3282 | dev_printk(KERN_DEBUG, &dev->dev, "irq %d for MSI/MSI-X\n", irq); |
3311 | 3283 | ||
@@ -3423,8 +3395,8 @@ int arch_setup_dmar_msi(unsigned int irq) | |||
3423 | if (ret < 0) | 3395 | if (ret < 0) |
3424 | return ret; | 3396 | return ret; |
3425 | dmar_msi_write(irq, &msg); | 3397 | dmar_msi_write(irq, &msg); |
3426 | set_irq_chip_and_handler_name(irq, &dmar_msi_type, handle_edge_irq, | 3398 | irq_set_chip_and_handler_name(irq, &dmar_msi_type, handle_edge_irq, |
3427 | "edge"); | 3399 | "edge"); |
3428 | return 0; | 3400 | return 0; |
3429 | } | 3401 | } |
3430 | #endif | 3402 | #endif |
@@ -3482,6 +3454,7 @@ static struct irq_chip hpet_msi_type = { | |||
3482 | 3454 | ||
3483 | int arch_setup_hpet_msi(unsigned int irq, unsigned int id) | 3455 | int arch_setup_hpet_msi(unsigned int irq, unsigned int id) |
3484 | { | 3456 | { |
3457 | struct irq_chip *chip = &hpet_msi_type; | ||
3485 | struct msi_msg msg; | 3458 | struct msi_msg msg; |
3486 | int ret; | 3459 | int ret; |
3487 | 3460 | ||
@@ -3501,15 +3474,12 @@ int arch_setup_hpet_msi(unsigned int irq, unsigned int id) | |||
3501 | if (ret < 0) | 3474 | if (ret < 0) |
3502 | return ret; | 3475 | return ret; |
3503 | 3476 | ||
3504 | hpet_msi_write(get_irq_data(irq), &msg); | 3477 | hpet_msi_write(irq_get_handler_data(irq), &msg); |
3505 | irq_set_status_flags(irq, IRQ_MOVE_PCNTXT); | 3478 | irq_set_status_flags(irq, IRQ_MOVE_PCNTXT); |
3506 | if (irq_remapped(get_irq_chip_data(irq))) | 3479 | if (irq_remapped(irq_get_chip_data(irq))) |
3507 | set_irq_chip_and_handler_name(irq, &ir_hpet_msi_type, | 3480 | chip = &ir_hpet_msi_type; |
3508 | handle_edge_irq, "edge"); | ||
3509 | else | ||
3510 | set_irq_chip_and_handler_name(irq, &hpet_msi_type, | ||
3511 | handle_edge_irq, "edge"); | ||
3512 | 3481 | ||
3482 | irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "edge"); | ||
3513 | return 0; | 3483 | return 0; |
3514 | } | 3484 | } |
3515 | #endif | 3485 | #endif |
@@ -3596,7 +3566,7 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) | |||
3596 | 3566 | ||
3597 | write_ht_irq_msg(irq, &msg); | 3567 | write_ht_irq_msg(irq, &msg); |
3598 | 3568 | ||
3599 | set_irq_chip_and_handler_name(irq, &ht_irq_chip, | 3569 | irq_set_chip_and_handler_name(irq, &ht_irq_chip, |
3600 | handle_edge_irq, "edge"); | 3570 | handle_edge_irq, "edge"); |
3601 | 3571 | ||
3602 | dev_printk(KERN_DEBUG, &dev->dev, "irq %d for HT\n", irq); | 3572 | dev_printk(KERN_DEBUG, &dev->dev, "irq %d for HT\n", irq); |
@@ -3605,7 +3575,40 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) | |||
3605 | } | 3575 | } |
3606 | #endif /* CONFIG_HT_IRQ */ | 3576 | #endif /* CONFIG_HT_IRQ */ |
3607 | 3577 | ||
3608 | int __init io_apic_get_redir_entries (int ioapic) | 3578 | int |
3579 | io_apic_setup_irq_pin(unsigned int irq, int node, struct io_apic_irq_attr *attr) | ||
3580 | { | ||
3581 | struct irq_cfg *cfg = alloc_irq_and_cfg_at(irq, node); | ||
3582 | int ret; | ||
3583 | |||
3584 | if (!cfg) | ||
3585 | return -EINVAL; | ||
3586 | ret = __add_pin_to_irq_node(cfg, node, attr->ioapic, attr->ioapic_pin); | ||
3587 | if (!ret) | ||
3588 | setup_ioapic_irq(attr->ioapic, attr->ioapic_pin, irq, cfg, | ||
3589 | attr->trigger, attr->polarity); | ||
3590 | return ret; | ||
3591 | } | ||
3592 | |||
3593 | static int io_apic_setup_irq_pin_once(unsigned int irq, int node, | ||
3594 | struct io_apic_irq_attr *attr) | ||
3595 | { | ||
3596 | unsigned int id = attr->ioapic, pin = attr->ioapic_pin; | ||
3597 | int ret; | ||
3598 | |||
3599 | /* Avoid redundant programming */ | ||
3600 | if (test_bit(pin, mp_ioapic_routing[id].pin_programmed)) { | ||
3601 | pr_debug("Pin %d-%d already programmed\n", | ||
3602 | mp_ioapics[id].apicid, pin); | ||
3603 | return 0; | ||
3604 | } | ||
3605 | ret = io_apic_setup_irq_pin(irq, node, attr); | ||
3606 | if (!ret) | ||
3607 | set_bit(pin, mp_ioapic_routing[id].pin_programmed); | ||
3608 | return ret; | ||
3609 | } | ||
3610 | |||
3611 | static int __init io_apic_get_redir_entries(int ioapic) | ||
3609 | { | 3612 | { |
3610 | union IO_APIC_reg_01 reg_01; | 3613 | union IO_APIC_reg_01 reg_01; |
3611 | unsigned long flags; | 3614 | unsigned long flags; |
@@ -3659,96 +3662,24 @@ int __init arch_probe_nr_irqs(void) | |||
3659 | } | 3662 | } |
3660 | #endif | 3663 | #endif |
3661 | 3664 | ||
3662 | static int __io_apic_set_pci_routing(struct device *dev, int irq, | 3665 | int io_apic_set_pci_routing(struct device *dev, int irq, |
3663 | struct io_apic_irq_attr *irq_attr) | 3666 | struct io_apic_irq_attr *irq_attr) |
3664 | { | 3667 | { |
3665 | struct irq_cfg *cfg; | ||
3666 | int node; | 3668 | int node; |
3667 | int ioapic, pin; | ||
3668 | int trigger, polarity; | ||
3669 | 3669 | ||
3670 | ioapic = irq_attr->ioapic; | ||
3671 | if (!IO_APIC_IRQ(irq)) { | 3670 | if (!IO_APIC_IRQ(irq)) { |
3672 | apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n", | 3671 | apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n", |
3673 | ioapic); | 3672 | irq_attr->ioapic); |
3674 | return -EINVAL; | 3673 | return -EINVAL; |
3675 | } | 3674 | } |
3676 | 3675 | ||
3677 | if (dev) | 3676 | node = dev ? dev_to_node(dev) : cpu_to_node(0); |
3678 | node = dev_to_node(dev); | ||
3679 | else | ||
3680 | node = cpu_to_node(0); | ||
3681 | |||
3682 | cfg = alloc_irq_and_cfg_at(irq, node); | ||
3683 | if (!cfg) | ||
3684 | return 0; | ||
3685 | |||
3686 | pin = irq_attr->ioapic_pin; | ||
3687 | trigger = irq_attr->trigger; | ||
3688 | polarity = irq_attr->polarity; | ||
3689 | 3677 | ||
3690 | /* | 3678 | return io_apic_setup_irq_pin_once(irq, node, irq_attr); |
3691 | * IRQs < 16 are already in the irq_2_pin[] map | ||
3692 | */ | ||
3693 | if (irq >= legacy_pic->nr_legacy_irqs) { | ||
3694 | if (__add_pin_to_irq_node(cfg, node, ioapic, pin)) { | ||
3695 | printk(KERN_INFO "can not add pin %d for irq %d\n", | ||
3696 | pin, irq); | ||
3697 | return 0; | ||
3698 | } | ||
3699 | } | ||
3700 | |||
3701 | setup_ioapic_irq(ioapic, pin, irq, cfg, trigger, polarity); | ||
3702 | |||
3703 | return 0; | ||
3704 | } | 3679 | } |
3705 | 3680 | ||
3706 | int io_apic_set_pci_routing(struct device *dev, int irq, | ||
3707 | struct io_apic_irq_attr *irq_attr) | ||
3708 | { | ||
3709 | int ioapic, pin; | ||
3710 | /* | ||
3711 | * Avoid pin reprogramming. PRTs typically include entries | ||
3712 | * with redundant pin->gsi mappings (but unique PCI devices); | ||
3713 | * we only program the IOAPIC on the first. | ||
3714 | */ | ||
3715 | ioapic = irq_attr->ioapic; | ||
3716 | pin = irq_attr->ioapic_pin; | ||
3717 | if (test_bit(pin, mp_ioapic_routing[ioapic].pin_programmed)) { | ||
3718 | pr_debug("Pin %d-%d already programmed\n", | ||
3719 | mp_ioapics[ioapic].apicid, pin); | ||
3720 | return 0; | ||
3721 | } | ||
3722 | set_bit(pin, mp_ioapic_routing[ioapic].pin_programmed); | ||
3723 | |||
3724 | return __io_apic_set_pci_routing(dev, irq, irq_attr); | ||
3725 | } | ||
3726 | |||
3727 | u8 __init io_apic_unique_id(u8 id) | ||
3728 | { | ||
3729 | #ifdef CONFIG_X86_32 | 3681 | #ifdef CONFIG_X86_32 |
3730 | if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && | 3682 | static int __init io_apic_get_unique_id(int ioapic, int apic_id) |
3731 | !APIC_XAPIC(apic_version[boot_cpu_physical_apicid])) | ||
3732 | return io_apic_get_unique_id(nr_ioapics, id); | ||
3733 | else | ||
3734 | return id; | ||
3735 | #else | ||
3736 | int i; | ||
3737 | DECLARE_BITMAP(used, 256); | ||
3738 | |||
3739 | bitmap_zero(used, 256); | ||
3740 | for (i = 0; i < nr_ioapics; i++) { | ||
3741 | struct mpc_ioapic *ia = &mp_ioapics[i]; | ||
3742 | __set_bit(ia->apicid, used); | ||
3743 | } | ||
3744 | if (!test_bit(id, used)) | ||
3745 | return id; | ||
3746 | return find_first_zero_bit(used, 256); | ||
3747 | #endif | ||
3748 | } | ||
3749 | |||
3750 | #ifdef CONFIG_X86_32 | ||
3751 | int __init io_apic_get_unique_id(int ioapic, int apic_id) | ||
3752 | { | 3683 | { |
3753 | union IO_APIC_reg_00 reg_00; | 3684 | union IO_APIC_reg_00 reg_00; |
3754 | static physid_mask_t apic_id_map = PHYSID_MASK_NONE; | 3685 | static physid_mask_t apic_id_map = PHYSID_MASK_NONE; |
@@ -3821,9 +3752,33 @@ int __init io_apic_get_unique_id(int ioapic, int apic_id) | |||
3821 | 3752 | ||
3822 | return apic_id; | 3753 | return apic_id; |
3823 | } | 3754 | } |
3755 | |||
3756 | static u8 __init io_apic_unique_id(u8 id) | ||
3757 | { | ||
3758 | if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && | ||
3759 | !APIC_XAPIC(apic_version[boot_cpu_physical_apicid])) | ||
3760 | return io_apic_get_unique_id(nr_ioapics, id); | ||
3761 | else | ||
3762 | return id; | ||
3763 | } | ||
3764 | #else | ||
3765 | static u8 __init io_apic_unique_id(u8 id) | ||
3766 | { | ||
3767 | int i; | ||
3768 | DECLARE_BITMAP(used, 256); | ||
3769 | |||
3770 | bitmap_zero(used, 256); | ||
3771 | for (i = 0; i < nr_ioapics; i++) { | ||
3772 | struct mpc_ioapic *ia = &mp_ioapics[i]; | ||
3773 | __set_bit(ia->apicid, used); | ||
3774 | } | ||
3775 | if (!test_bit(id, used)) | ||
3776 | return id; | ||
3777 | return find_first_zero_bit(used, 256); | ||
3778 | } | ||
3824 | #endif | 3779 | #endif |
3825 | 3780 | ||
3826 | int __init io_apic_get_version(int ioapic) | 3781 | static int __init io_apic_get_version(int ioapic) |
3827 | { | 3782 | { |
3828 | union IO_APIC_reg_01 reg_01; | 3783 | union IO_APIC_reg_01 reg_01; |
3829 | unsigned long flags; | 3784 | unsigned long flags; |
@@ -3868,8 +3823,8 @@ int acpi_get_override_irq(u32 gsi, int *trigger, int *polarity) | |||
3868 | void __init setup_ioapic_dest(void) | 3823 | void __init setup_ioapic_dest(void) |
3869 | { | 3824 | { |
3870 | int pin, ioapic, irq, irq_entry; | 3825 | int pin, ioapic, irq, irq_entry; |
3871 | struct irq_desc *desc; | ||
3872 | const struct cpumask *mask; | 3826 | const struct cpumask *mask; |
3827 | struct irq_data *idata; | ||
3873 | 3828 | ||
3874 | if (skip_ioapic_setup == 1) | 3829 | if (skip_ioapic_setup == 1) |
3875 | return; | 3830 | return; |
@@ -3884,21 +3839,20 @@ void __init setup_ioapic_dest(void) | |||
3884 | if ((ioapic > 0) && (irq > 16)) | 3839 | if ((ioapic > 0) && (irq > 16)) |
3885 | continue; | 3840 | continue; |
3886 | 3841 | ||
3887 | desc = irq_to_desc(irq); | 3842 | idata = irq_get_irq_data(irq); |
3888 | 3843 | ||
3889 | /* | 3844 | /* |
3890 | * Honour affinities which have been set in early boot | 3845 | * Honour affinities which have been set in early boot |
3891 | */ | 3846 | */ |
3892 | if (desc->status & | 3847 | if (!irqd_can_balance(idata) || irqd_affinity_was_set(idata)) |
3893 | (IRQ_NO_BALANCING | IRQ_AFFINITY_SET)) | 3848 | mask = idata->affinity; |
3894 | mask = desc->irq_data.affinity; | ||
3895 | else | 3849 | else |
3896 | mask = apic->target_cpus(); | 3850 | mask = apic->target_cpus(); |
3897 | 3851 | ||
3898 | if (intr_remapping_enabled) | 3852 | if (intr_remapping_enabled) |
3899 | ir_ioapic_set_affinity(&desc->irq_data, mask, false); | 3853 | ir_ioapic_set_affinity(idata, mask, false); |
3900 | else | 3854 | else |
3901 | ioapic_set_affinity(&desc->irq_data, mask, false); | 3855 | ioapic_set_affinity(idata, mask, false); |
3902 | } | 3856 | } |
3903 | 3857 | ||
3904 | } | 3858 | } |
@@ -4026,10 +3980,10 @@ int mp_find_ioapic_pin(int ioapic, u32 gsi) | |||
4026 | return gsi - mp_gsi_routing[ioapic].gsi_base; | 3980 | return gsi - mp_gsi_routing[ioapic].gsi_base; |
4027 | } | 3981 | } |
4028 | 3982 | ||
4029 | static int bad_ioapic(unsigned long address) | 3983 | static __init int bad_ioapic(unsigned long address) |
4030 | { | 3984 | { |
4031 | if (nr_ioapics >= MAX_IO_APICS) { | 3985 | if (nr_ioapics >= MAX_IO_APICS) { |
4032 | printk(KERN_WARNING "WARING: Max # of I/O APICs (%d) exceeded " | 3986 | printk(KERN_WARNING "WARNING: Max # of I/O APICs (%d) exceeded " |
4033 | "(found %d), skipping\n", MAX_IO_APICS, nr_ioapics); | 3987 | "(found %d), skipping\n", MAX_IO_APICS, nr_ioapics); |
4034 | return 1; | 3988 | return 1; |
4035 | } | 3989 | } |
@@ -4086,20 +4040,16 @@ void __init mp_register_ioapic(int id, u32 address, u32 gsi_base) | |||
4086 | /* Enable IOAPIC early just for system timer */ | 4040 | /* Enable IOAPIC early just for system timer */ |
4087 | void __init pre_init_apic_IRQ0(void) | 4041 | void __init pre_init_apic_IRQ0(void) |
4088 | { | 4042 | { |
4089 | struct irq_cfg *cfg; | 4043 | struct io_apic_irq_attr attr = { 0, 0, 0, 0 }; |
4090 | 4044 | ||
4091 | printk(KERN_INFO "Early APIC setup for system timer0\n"); | 4045 | printk(KERN_INFO "Early APIC setup for system timer0\n"); |
4092 | #ifndef CONFIG_SMP | 4046 | #ifndef CONFIG_SMP |
4093 | physid_set_mask_of_physid(boot_cpu_physical_apicid, | 4047 | physid_set_mask_of_physid(boot_cpu_physical_apicid, |
4094 | &phys_cpu_present_map); | 4048 | &phys_cpu_present_map); |
4095 | #endif | 4049 | #endif |
4096 | /* Make sure the irq descriptor is set up */ | ||
4097 | cfg = alloc_irq_and_cfg_at(0, 0); | ||
4098 | |||
4099 | setup_local_APIC(); | 4050 | setup_local_APIC(); |
4100 | 4051 | ||
4101 | add_pin_to_irq_node(cfg, 0, 0, 0); | 4052 | io_apic_setup_irq_pin(0, 0, &attr); |
4102 | set_irq_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq, "edge"); | 4053 | irq_set_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq, |
4103 | 4054 | "edge"); | |
4104 | setup_ioapic_irq(0, 0, 0, cfg, 0, 0); | ||
4105 | } | 4055 | } |
diff --git a/arch/x86/kernel/apic/ipi.c b/arch/x86/kernel/apic/ipi.c index 08385e090a6f..cce91bf26676 100644 --- a/arch/x86/kernel/apic/ipi.c +++ b/arch/x86/kernel/apic/ipi.c | |||
@@ -56,6 +56,8 @@ void default_send_IPI_mask_allbutself_phys(const struct cpumask *mask, | |||
56 | local_irq_restore(flags); | 56 | local_irq_restore(flags); |
57 | } | 57 | } |
58 | 58 | ||
59 | #ifdef CONFIG_X86_32 | ||
60 | |||
59 | void default_send_IPI_mask_sequence_logical(const struct cpumask *mask, | 61 | void default_send_IPI_mask_sequence_logical(const struct cpumask *mask, |
60 | int vector) | 62 | int vector) |
61 | { | 63 | { |
@@ -71,8 +73,8 @@ void default_send_IPI_mask_sequence_logical(const struct cpumask *mask, | |||
71 | local_irq_save(flags); | 73 | local_irq_save(flags); |
72 | for_each_cpu(query_cpu, mask) | 74 | for_each_cpu(query_cpu, mask) |
73 | __default_send_IPI_dest_field( | 75 | __default_send_IPI_dest_field( |
74 | apic->cpu_to_logical_apicid(query_cpu), vector, | 76 | early_per_cpu(x86_cpu_to_logical_apicid, query_cpu), |
75 | apic->dest_logical); | 77 | vector, apic->dest_logical); |
76 | local_irq_restore(flags); | 78 | local_irq_restore(flags); |
77 | } | 79 | } |
78 | 80 | ||
@@ -90,14 +92,12 @@ void default_send_IPI_mask_allbutself_logical(const struct cpumask *mask, | |||
90 | if (query_cpu == this_cpu) | 92 | if (query_cpu == this_cpu) |
91 | continue; | 93 | continue; |
92 | __default_send_IPI_dest_field( | 94 | __default_send_IPI_dest_field( |
93 | apic->cpu_to_logical_apicid(query_cpu), vector, | 95 | early_per_cpu(x86_cpu_to_logical_apicid, query_cpu), |
94 | apic->dest_logical); | 96 | vector, apic->dest_logical); |
95 | } | 97 | } |
96 | local_irq_restore(flags); | 98 | local_irq_restore(flags); |
97 | } | 99 | } |
98 | 100 | ||
99 | #ifdef CONFIG_X86_32 | ||
100 | |||
101 | /* | 101 | /* |
102 | * This is only used on smaller machines. | 102 | * This is only used on smaller machines. |
103 | */ | 103 | */ |
diff --git a/arch/x86/kernel/apic/numaq_32.c b/arch/x86/kernel/apic/numaq_32.c index 960f26ab5c9f..6273eee5134b 100644 --- a/arch/x86/kernel/apic/numaq_32.c +++ b/arch/x86/kernel/apic/numaq_32.c | |||
@@ -373,13 +373,6 @@ static inline void numaq_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask | |||
373 | return physids_promote(0xFUL, retmap); | 373 | return physids_promote(0xFUL, retmap); |
374 | } | 374 | } |
375 | 375 | ||
376 | static inline int numaq_cpu_to_logical_apicid(int cpu) | ||
377 | { | ||
378 | if (cpu >= nr_cpu_ids) | ||
379 | return BAD_APICID; | ||
380 | return cpu_2_logical_apicid[cpu]; | ||
381 | } | ||
382 | |||
383 | /* | 376 | /* |
384 | * Supporting over 60 cpus on NUMA-Q requires a locality-dependent | 377 | * Supporting over 60 cpus on NUMA-Q requires a locality-dependent |
385 | * cpu to APIC ID relation to properly interact with the intelligent | 378 | * cpu to APIC ID relation to properly interact with the intelligent |
@@ -398,6 +391,15 @@ static inline int numaq_apicid_to_node(int logical_apicid) | |||
398 | return logical_apicid >> 4; | 391 | return logical_apicid >> 4; |
399 | } | 392 | } |
400 | 393 | ||
394 | static int numaq_numa_cpu_node(int cpu) | ||
395 | { | ||
396 | int logical_apicid = early_per_cpu(x86_cpu_to_logical_apicid, cpu); | ||
397 | |||
398 | if (logical_apicid != BAD_APICID) | ||
399 | return numaq_apicid_to_node(logical_apicid); | ||
400 | return NUMA_NO_NODE; | ||
401 | } | ||
402 | |||
401 | static void numaq_apicid_to_cpu_present(int logical_apicid, physid_mask_t *retmap) | 403 | static void numaq_apicid_to_cpu_present(int logical_apicid, physid_mask_t *retmap) |
402 | { | 404 | { |
403 | int node = numaq_apicid_to_node(logical_apicid); | 405 | int node = numaq_apicid_to_node(logical_apicid); |
@@ -508,8 +510,6 @@ struct apic __refdata apic_numaq = { | |||
508 | .ioapic_phys_id_map = numaq_ioapic_phys_id_map, | 510 | .ioapic_phys_id_map = numaq_ioapic_phys_id_map, |
509 | .setup_apic_routing = numaq_setup_apic_routing, | 511 | .setup_apic_routing = numaq_setup_apic_routing, |
510 | .multi_timer_check = numaq_multi_timer_check, | 512 | .multi_timer_check = numaq_multi_timer_check, |
511 | .apicid_to_node = numaq_apicid_to_node, | ||
512 | .cpu_to_logical_apicid = numaq_cpu_to_logical_apicid, | ||
513 | .cpu_present_to_apicid = numaq_cpu_present_to_apicid, | 513 | .cpu_present_to_apicid = numaq_cpu_present_to_apicid, |
514 | .apicid_to_cpu_present = numaq_apicid_to_cpu_present, | 514 | .apicid_to_cpu_present = numaq_apicid_to_cpu_present, |
515 | .setup_portio_remap = numaq_setup_portio_remap, | 515 | .setup_portio_remap = numaq_setup_portio_remap, |
@@ -547,4 +547,7 @@ struct apic __refdata apic_numaq = { | |||
547 | .icr_write = native_apic_icr_write, | 547 | .icr_write = native_apic_icr_write, |
548 | .wait_icr_idle = native_apic_wait_icr_idle, | 548 | .wait_icr_idle = native_apic_wait_icr_idle, |
549 | .safe_wait_icr_idle = native_safe_apic_wait_icr_idle, | 549 | .safe_wait_icr_idle = native_safe_apic_wait_icr_idle, |
550 | |||
551 | .x86_32_early_logical_apicid = noop_x86_32_early_logical_apicid, | ||
552 | .x86_32_numa_cpu_node = numaq_numa_cpu_node, | ||
550 | }; | 553 | }; |
diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c index 99d2fe016084..fc84c7b61108 100644 --- a/arch/x86/kernel/apic/probe_32.c +++ b/arch/x86/kernel/apic/probe_32.c | |||
@@ -77,6 +77,11 @@ void __init default_setup_apic_routing(void) | |||
77 | apic->setup_apic_routing(); | 77 | apic->setup_apic_routing(); |
78 | } | 78 | } |
79 | 79 | ||
80 | static int default_x86_32_early_logical_apicid(int cpu) | ||
81 | { | ||
82 | return 1 << cpu; | ||
83 | } | ||
84 | |||
80 | static void setup_apic_flat_routing(void) | 85 | static void setup_apic_flat_routing(void) |
81 | { | 86 | { |
82 | #ifdef CONFIG_X86_IO_APIC | 87 | #ifdef CONFIG_X86_IO_APIC |
@@ -130,8 +135,6 @@ struct apic apic_default = { | |||
130 | .ioapic_phys_id_map = default_ioapic_phys_id_map, | 135 | .ioapic_phys_id_map = default_ioapic_phys_id_map, |
131 | .setup_apic_routing = setup_apic_flat_routing, | 136 | .setup_apic_routing = setup_apic_flat_routing, |
132 | .multi_timer_check = NULL, | 137 | .multi_timer_check = NULL, |
133 | .apicid_to_node = default_apicid_to_node, | ||
134 | .cpu_to_logical_apicid = default_cpu_to_logical_apicid, | ||
135 | .cpu_present_to_apicid = default_cpu_present_to_apicid, | 138 | .cpu_present_to_apicid = default_cpu_present_to_apicid, |
136 | .apicid_to_cpu_present = physid_set_mask_of_physid, | 139 | .apicid_to_cpu_present = physid_set_mask_of_physid, |
137 | .setup_portio_remap = NULL, | 140 | .setup_portio_remap = NULL, |
@@ -167,6 +170,9 @@ struct apic apic_default = { | |||
167 | .icr_write = native_apic_icr_write, | 170 | .icr_write = native_apic_icr_write, |
168 | .wait_icr_idle = native_apic_wait_icr_idle, | 171 | .wait_icr_idle = native_apic_wait_icr_idle, |
169 | .safe_wait_icr_idle = native_safe_apic_wait_icr_idle, | 172 | .safe_wait_icr_idle = native_safe_apic_wait_icr_idle, |
173 | |||
174 | .x86_32_early_logical_apicid = default_x86_32_early_logical_apicid, | ||
175 | .x86_32_numa_cpu_node = default_x86_32_numa_cpu_node, | ||
170 | }; | 176 | }; |
171 | 177 | ||
172 | extern struct apic apic_numaq; | 178 | extern struct apic apic_numaq; |
diff --git a/arch/x86/kernel/apic/summit_32.c b/arch/x86/kernel/apic/summit_32.c index 9b419263d90d..e4b8059b414a 100644 --- a/arch/x86/kernel/apic/summit_32.c +++ b/arch/x86/kernel/apic/summit_32.c | |||
@@ -194,11 +194,10 @@ static unsigned long summit_check_apicid_present(int bit) | |||
194 | return 1; | 194 | return 1; |
195 | } | 195 | } |
196 | 196 | ||
197 | static void summit_init_apic_ldr(void) | 197 | static int summit_early_logical_apicid(int cpu) |
198 | { | 198 | { |
199 | unsigned long val, id; | ||
200 | int count = 0; | 199 | int count = 0; |
201 | u8 my_id = (u8)hard_smp_processor_id(); | 200 | u8 my_id = early_per_cpu(x86_cpu_to_apicid, cpu); |
202 | u8 my_cluster = APIC_CLUSTER(my_id); | 201 | u8 my_cluster = APIC_CLUSTER(my_id); |
203 | #ifdef CONFIG_SMP | 202 | #ifdef CONFIG_SMP |
204 | u8 lid; | 203 | u8 lid; |
@@ -206,7 +205,7 @@ static void summit_init_apic_ldr(void) | |||
206 | 205 | ||
207 | /* Create logical APIC IDs by counting CPUs already in cluster. */ | 206 | /* Create logical APIC IDs by counting CPUs already in cluster. */ |
208 | for (count = 0, i = nr_cpu_ids; --i >= 0; ) { | 207 | for (count = 0, i = nr_cpu_ids; --i >= 0; ) { |
209 | lid = cpu_2_logical_apicid[i]; | 208 | lid = early_per_cpu(x86_cpu_to_logical_apicid, i); |
210 | if (lid != BAD_APICID && APIC_CLUSTER(lid) == my_cluster) | 209 | if (lid != BAD_APICID && APIC_CLUSTER(lid) == my_cluster) |
211 | ++count; | 210 | ++count; |
212 | } | 211 | } |
@@ -214,7 +213,15 @@ static void summit_init_apic_ldr(void) | |||
214 | /* We only have a 4 wide bitmap in cluster mode. If a deranged | 213 | /* We only have a 4 wide bitmap in cluster mode. If a deranged |
215 | * BIOS puts 5 CPUs in one APIC cluster, we're hosed. */ | 214 | * BIOS puts 5 CPUs in one APIC cluster, we're hosed. */ |
216 | BUG_ON(count >= XAPIC_DEST_CPUS_SHIFT); | 215 | BUG_ON(count >= XAPIC_DEST_CPUS_SHIFT); |
217 | id = my_cluster | (1UL << count); | 216 | return my_cluster | (1UL << count); |
217 | } | ||
218 | |||
219 | static void summit_init_apic_ldr(void) | ||
220 | { | ||
221 | int cpu = smp_processor_id(); | ||
222 | unsigned long id = early_per_cpu(x86_cpu_to_logical_apicid, cpu); | ||
223 | unsigned long val; | ||
224 | |||
218 | apic_write(APIC_DFR, SUMMIT_APIC_DFR_VALUE); | 225 | apic_write(APIC_DFR, SUMMIT_APIC_DFR_VALUE); |
219 | val = apic_read(APIC_LDR) & ~APIC_LDR_MASK; | 226 | val = apic_read(APIC_LDR) & ~APIC_LDR_MASK; |
220 | val |= SET_APIC_LOGICAL_ID(id); | 227 | val |= SET_APIC_LOGICAL_ID(id); |
@@ -232,27 +239,6 @@ static void summit_setup_apic_routing(void) | |||
232 | nr_ioapics); | 239 | nr_ioapics); |
233 | } | 240 | } |
234 | 241 | ||
235 | static int summit_apicid_to_node(int logical_apicid) | ||
236 | { | ||
237 | #ifdef CONFIG_SMP | ||
238 | return apicid_2_node[hard_smp_processor_id()]; | ||
239 | #else | ||
240 | return 0; | ||
241 | #endif | ||
242 | } | ||
243 | |||
244 | /* Mapping from cpu number to logical apicid */ | ||
245 | static inline int summit_cpu_to_logical_apicid(int cpu) | ||
246 | { | ||
247 | #ifdef CONFIG_SMP | ||
248 | if (cpu >= nr_cpu_ids) | ||
249 | return BAD_APICID; | ||
250 | return cpu_2_logical_apicid[cpu]; | ||
251 | #else | ||
252 | return logical_smp_processor_id(); | ||
253 | #endif | ||
254 | } | ||
255 | |||
256 | static int summit_cpu_present_to_apicid(int mps_cpu) | 242 | static int summit_cpu_present_to_apicid(int mps_cpu) |
257 | { | 243 | { |
258 | if (mps_cpu < nr_cpu_ids) | 244 | if (mps_cpu < nr_cpu_ids) |
@@ -286,7 +272,7 @@ static unsigned int summit_cpu_mask_to_apicid(const struct cpumask *cpumask) | |||
286 | * The cpus in the mask must all be on the apic cluster. | 272 | * The cpus in the mask must all be on the apic cluster. |
287 | */ | 273 | */ |
288 | for_each_cpu(cpu, cpumask) { | 274 | for_each_cpu(cpu, cpumask) { |
289 | int new_apicid = summit_cpu_to_logical_apicid(cpu); | 275 | int new_apicid = early_per_cpu(x86_cpu_to_logical_apicid, cpu); |
290 | 276 | ||
291 | if (round && APIC_CLUSTER(apicid) != APIC_CLUSTER(new_apicid)) { | 277 | if (round && APIC_CLUSTER(apicid) != APIC_CLUSTER(new_apicid)) { |
292 | printk("%s: Not a valid mask!\n", __func__); | 278 | printk("%s: Not a valid mask!\n", __func__); |
@@ -301,7 +287,7 @@ static unsigned int summit_cpu_mask_to_apicid(const struct cpumask *cpumask) | |||
301 | static unsigned int summit_cpu_mask_to_apicid_and(const struct cpumask *inmask, | 287 | static unsigned int summit_cpu_mask_to_apicid_and(const struct cpumask *inmask, |
302 | const struct cpumask *andmask) | 288 | const struct cpumask *andmask) |
303 | { | 289 | { |
304 | int apicid = summit_cpu_to_logical_apicid(0); | 290 | int apicid = early_per_cpu(x86_cpu_to_logical_apicid, 0); |
305 | cpumask_var_t cpumask; | 291 | cpumask_var_t cpumask; |
306 | 292 | ||
307 | if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC)) | 293 | if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC)) |
@@ -528,8 +514,6 @@ struct apic apic_summit = { | |||
528 | .ioapic_phys_id_map = summit_ioapic_phys_id_map, | 514 | .ioapic_phys_id_map = summit_ioapic_phys_id_map, |
529 | .setup_apic_routing = summit_setup_apic_routing, | 515 | .setup_apic_routing = summit_setup_apic_routing, |
530 | .multi_timer_check = NULL, | 516 | .multi_timer_check = NULL, |
531 | .apicid_to_node = summit_apicid_to_node, | ||
532 | .cpu_to_logical_apicid = summit_cpu_to_logical_apicid, | ||
533 | .cpu_present_to_apicid = summit_cpu_present_to_apicid, | 517 | .cpu_present_to_apicid = summit_cpu_present_to_apicid, |
534 | .apicid_to_cpu_present = summit_apicid_to_cpu_present, | 518 | .apicid_to_cpu_present = summit_apicid_to_cpu_present, |
535 | .setup_portio_remap = NULL, | 519 | .setup_portio_remap = NULL, |
@@ -565,4 +549,7 @@ struct apic apic_summit = { | |||
565 | .icr_write = native_apic_icr_write, | 549 | .icr_write = native_apic_icr_write, |
566 | .wait_icr_idle = native_apic_wait_icr_idle, | 550 | .wait_icr_idle = native_apic_wait_icr_idle, |
567 | .safe_wait_icr_idle = native_safe_apic_wait_icr_idle, | 551 | .safe_wait_icr_idle = native_safe_apic_wait_icr_idle, |
552 | |||
553 | .x86_32_early_logical_apicid = summit_early_logical_apicid, | ||
554 | .x86_32_numa_cpu_node = default_x86_32_numa_cpu_node, | ||
568 | }; | 555 | }; |
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c index cf69c59f4910..90949bbd566d 100644 --- a/arch/x86/kernel/apic/x2apic_cluster.c +++ b/arch/x86/kernel/apic/x2apic_cluster.c | |||
@@ -206,8 +206,6 @@ struct apic apic_x2apic_cluster = { | |||
206 | .ioapic_phys_id_map = NULL, | 206 | .ioapic_phys_id_map = NULL, |
207 | .setup_apic_routing = NULL, | 207 | .setup_apic_routing = NULL, |
208 | .multi_timer_check = NULL, | 208 | .multi_timer_check = NULL, |
209 | .apicid_to_node = NULL, | ||
210 | .cpu_to_logical_apicid = NULL, | ||
211 | .cpu_present_to_apicid = default_cpu_present_to_apicid, | 209 | .cpu_present_to_apicid = default_cpu_present_to_apicid, |
212 | .apicid_to_cpu_present = NULL, | 210 | .apicid_to_cpu_present = NULL, |
213 | .setup_portio_remap = NULL, | 211 | .setup_portio_remap = NULL, |
diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c index 8972f38c5ced..c7e6d6645bf4 100644 --- a/arch/x86/kernel/apic/x2apic_phys.c +++ b/arch/x86/kernel/apic/x2apic_phys.c | |||
@@ -195,8 +195,6 @@ struct apic apic_x2apic_phys = { | |||
195 | .ioapic_phys_id_map = NULL, | 195 | .ioapic_phys_id_map = NULL, |
196 | .setup_apic_routing = NULL, | 196 | .setup_apic_routing = NULL, |
197 | .multi_timer_check = NULL, | 197 | .multi_timer_check = NULL, |
198 | .apicid_to_node = NULL, | ||
199 | .cpu_to_logical_apicid = NULL, | ||
200 | .cpu_present_to_apicid = default_cpu_present_to_apicid, | 198 | .cpu_present_to_apicid = default_cpu_present_to_apicid, |
201 | .apicid_to_cpu_present = NULL, | 199 | .apicid_to_cpu_present = NULL, |
202 | .setup_portio_remap = NULL, | 200 | .setup_portio_remap = NULL, |
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index bd16b58b8850..3c289281394c 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c | |||
@@ -338,8 +338,6 @@ struct apic __refdata apic_x2apic_uv_x = { | |||
338 | .ioapic_phys_id_map = NULL, | 338 | .ioapic_phys_id_map = NULL, |
339 | .setup_apic_routing = NULL, | 339 | .setup_apic_routing = NULL, |
340 | .multi_timer_check = NULL, | 340 | .multi_timer_check = NULL, |
341 | .apicid_to_node = NULL, | ||
342 | .cpu_to_logical_apicid = NULL, | ||
343 | .cpu_present_to_apicid = default_cpu_present_to_apicid, | 341 | .cpu_present_to_apicid = default_cpu_present_to_apicid, |
344 | .apicid_to_cpu_present = NULL, | 342 | .apicid_to_cpu_present = NULL, |
345 | .setup_portio_remap = NULL, | 343 | .setup_portio_remap = NULL, |
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index 0e4f24c2a746..0b4be431c620 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c | |||
@@ -66,7 +66,7 @@ | |||
66 | * 1.5: Fix segment register reloading (in case of bad segments saved | 66 | * 1.5: Fix segment register reloading (in case of bad segments saved |
67 | * across BIOS call). | 67 | * across BIOS call). |
68 | * Stephen Rothwell | 68 | * Stephen Rothwell |
69 | * 1.6: Cope with complier/assembler differences. | 69 | * 1.6: Cope with compiler/assembler differences. |
70 | * Only try to turn off the first display device. | 70 | * Only try to turn off the first display device. |
71 | * Fix OOPS at power off with no APM BIOS by Jan Echternach | 71 | * Fix OOPS at power off with no APM BIOS by Jan Echternach |
72 | * <echter@informatik.uni-rostock.de> | 72 | * <echter@informatik.uni-rostock.de> |
@@ -227,6 +227,7 @@ | |||
227 | #include <linux/suspend.h> | 227 | #include <linux/suspend.h> |
228 | #include <linux/kthread.h> | 228 | #include <linux/kthread.h> |
229 | #include <linux/jiffies.h> | 229 | #include <linux/jiffies.h> |
230 | #include <linux/acpi.h> | ||
230 | 231 | ||
231 | #include <asm/system.h> | 232 | #include <asm/system.h> |
232 | #include <asm/uaccess.h> | 233 | #include <asm/uaccess.h> |
@@ -975,20 +976,10 @@ recalc: | |||
975 | 976 | ||
976 | static void apm_power_off(void) | 977 | static void apm_power_off(void) |
977 | { | 978 | { |
978 | unsigned char po_bios_call[] = { | ||
979 | 0xb8, 0x00, 0x10, /* movw $0x1000,ax */ | ||
980 | 0x8e, 0xd0, /* movw ax,ss */ | ||
981 | 0xbc, 0x00, 0xf0, /* movw $0xf000,sp */ | ||
982 | 0xb8, 0x07, 0x53, /* movw $0x5307,ax */ | ||
983 | 0xbb, 0x01, 0x00, /* movw $0x0001,bx */ | ||
984 | 0xb9, 0x03, 0x00, /* movw $0x0003,cx */ | ||
985 | 0xcd, 0x15 /* int $0x15 */ | ||
986 | }; | ||
987 | |||
988 | /* Some bioses don't like being called from CPU != 0 */ | 979 | /* Some bioses don't like being called from CPU != 0 */ |
989 | if (apm_info.realmode_power_off) { | 980 | if (apm_info.realmode_power_off) { |
990 | set_cpus_allowed_ptr(current, cpumask_of(0)); | 981 | set_cpus_allowed_ptr(current, cpumask_of(0)); |
991 | machine_real_restart(po_bios_call, sizeof(po_bios_call)); | 982 | machine_real_restart(MRR_APM); |
992 | } else { | 983 | } else { |
993 | (void)set_system_power_state(APM_STATE_OFF); | 984 | (void)set_system_power_state(APM_STATE_OFF); |
994 | } | 985 | } |
@@ -2331,12 +2322,11 @@ static int __init apm_init(void) | |||
2331 | apm_info.disabled = 1; | 2322 | apm_info.disabled = 1; |
2332 | return -ENODEV; | 2323 | return -ENODEV; |
2333 | } | 2324 | } |
2334 | if (pm_flags & PM_ACPI) { | 2325 | if (!acpi_disabled) { |
2335 | printk(KERN_NOTICE "apm: overridden by ACPI.\n"); | 2326 | printk(KERN_NOTICE "apm: overridden by ACPI.\n"); |
2336 | apm_info.disabled = 1; | 2327 | apm_info.disabled = 1; |
2337 | return -ENODEV; | 2328 | return -ENODEV; |
2338 | } | 2329 | } |
2339 | pm_flags |= PM_APM; | ||
2340 | 2330 | ||
2341 | /* | 2331 | /* |
2342 | * Set up the long jump entry point to the APM BIOS, which is called | 2332 | * Set up the long jump entry point to the APM BIOS, which is called |
@@ -2428,7 +2418,6 @@ static void __exit apm_exit(void) | |||
2428 | kthread_stop(kapmd_task); | 2418 | kthread_stop(kapmd_task); |
2429 | kapmd_task = NULL; | 2419 | kapmd_task = NULL; |
2430 | } | 2420 | } |
2431 | pm_flags &= ~PM_APM; | ||
2432 | } | 2421 | } |
2433 | 2422 | ||
2434 | module_init(apm_init); | 2423 | module_init(apm_init); |
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c index cfa82c899f47..4f13fafc5264 100644 --- a/arch/x86/kernel/asm-offsets.c +++ b/arch/x86/kernel/asm-offsets.c | |||
@@ -1,5 +1,70 @@ | |||
1 | /* | ||
2 | * Generate definitions needed by assembly language modules. | ||
3 | * This code generates raw asm output which is post-processed to extract | ||
4 | * and format the required data. | ||
5 | */ | ||
6 | #define COMPILE_OFFSETS | ||
7 | |||
8 | #include <linux/crypto.h> | ||
9 | #include <linux/sched.h> | ||
10 | #include <linux/stddef.h> | ||
11 | #include <linux/hardirq.h> | ||
12 | #include <linux/suspend.h> | ||
13 | #include <linux/kbuild.h> | ||
14 | #include <asm/processor.h> | ||
15 | #include <asm/thread_info.h> | ||
16 | #include <asm/sigframe.h> | ||
17 | #include <asm/bootparam.h> | ||
18 | #include <asm/suspend.h> | ||
19 | |||
20 | #ifdef CONFIG_XEN | ||
21 | #include <xen/interface/xen.h> | ||
22 | #endif | ||
23 | |||
1 | #ifdef CONFIG_X86_32 | 24 | #ifdef CONFIG_X86_32 |
2 | # include "asm-offsets_32.c" | 25 | # include "asm-offsets_32.c" |
3 | #else | 26 | #else |
4 | # include "asm-offsets_64.c" | 27 | # include "asm-offsets_64.c" |
5 | #endif | 28 | #endif |
29 | |||
30 | void common(void) { | ||
31 | BLANK(); | ||
32 | OFFSET(TI_flags, thread_info, flags); | ||
33 | OFFSET(TI_status, thread_info, status); | ||
34 | OFFSET(TI_addr_limit, thread_info, addr_limit); | ||
35 | OFFSET(TI_preempt_count, thread_info, preempt_count); | ||
36 | |||
37 | BLANK(); | ||
38 | OFFSET(crypto_tfm_ctx_offset, crypto_tfm, __crt_ctx); | ||
39 | |||
40 | BLANK(); | ||
41 | OFFSET(pbe_address, pbe, address); | ||
42 | OFFSET(pbe_orig_address, pbe, orig_address); | ||
43 | OFFSET(pbe_next, pbe, next); | ||
44 | |||
45 | #ifdef CONFIG_PARAVIRT | ||
46 | BLANK(); | ||
47 | OFFSET(PARAVIRT_enabled, pv_info, paravirt_enabled); | ||
48 | OFFSET(PARAVIRT_PATCH_pv_cpu_ops, paravirt_patch_template, pv_cpu_ops); | ||
49 | OFFSET(PARAVIRT_PATCH_pv_irq_ops, paravirt_patch_template, pv_irq_ops); | ||
50 | OFFSET(PV_IRQ_irq_disable, pv_irq_ops, irq_disable); | ||
51 | OFFSET(PV_IRQ_irq_enable, pv_irq_ops, irq_enable); | ||
52 | OFFSET(PV_CPU_iret, pv_cpu_ops, iret); | ||
53 | OFFSET(PV_CPU_irq_enable_sysexit, pv_cpu_ops, irq_enable_sysexit); | ||
54 | OFFSET(PV_CPU_read_cr0, pv_cpu_ops, read_cr0); | ||
55 | OFFSET(PV_MMU_read_cr2, pv_mmu_ops, read_cr2); | ||
56 | #endif | ||
57 | |||
58 | #ifdef CONFIG_XEN | ||
59 | BLANK(); | ||
60 | OFFSET(XEN_vcpu_info_mask, vcpu_info, evtchn_upcall_mask); | ||
61 | OFFSET(XEN_vcpu_info_pending, vcpu_info, evtchn_upcall_pending); | ||
62 | #endif | ||
63 | |||
64 | BLANK(); | ||
65 | OFFSET(BP_scratch, boot_params, scratch); | ||
66 | OFFSET(BP_loadflags, boot_params, hdr.loadflags); | ||
67 | OFFSET(BP_hardware_subarch, boot_params, hdr.hardware_subarch); | ||
68 | OFFSET(BP_version, boot_params, hdr.version); | ||
69 | OFFSET(BP_kernel_alignment, boot_params, hdr.kernel_alignment); | ||
70 | } | ||
diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c index 1a4088dda37a..c29d631af6fc 100644 --- a/arch/x86/kernel/asm-offsets_32.c +++ b/arch/x86/kernel/asm-offsets_32.c | |||
@@ -1,26 +1,4 @@ | |||
1 | /* | ||
2 | * Generate definitions needed by assembly language modules. | ||
3 | * This code generates raw asm output which is post-processed | ||
4 | * to extract and format the required data. | ||
5 | */ | ||
6 | |||
7 | #include <linux/crypto.h> | ||
8 | #include <linux/sched.h> | ||
9 | #include <linux/signal.h> | ||
10 | #include <linux/personality.h> | ||
11 | #include <linux/suspend.h> | ||
12 | #include <linux/kbuild.h> | ||
13 | #include <asm/ucontext.h> | 1 | #include <asm/ucontext.h> |
14 | #include <asm/sigframe.h> | ||
15 | #include <asm/pgtable.h> | ||
16 | #include <asm/fixmap.h> | ||
17 | #include <asm/processor.h> | ||
18 | #include <asm/thread_info.h> | ||
19 | #include <asm/bootparam.h> | ||
20 | #include <asm/elf.h> | ||
21 | #include <asm/suspend.h> | ||
22 | |||
23 | #include <xen/interface/xen.h> | ||
24 | 2 | ||
25 | #include <linux/lguest.h> | 3 | #include <linux/lguest.h> |
26 | #include "../../../drivers/lguest/lg.h" | 4 | #include "../../../drivers/lguest/lg.h" |
@@ -51,21 +29,10 @@ void foo(void) | |||
51 | OFFSET(CPUINFO_x86_vendor_id, cpuinfo_x86, x86_vendor_id); | 29 | OFFSET(CPUINFO_x86_vendor_id, cpuinfo_x86, x86_vendor_id); |
52 | BLANK(); | 30 | BLANK(); |
53 | 31 | ||
54 | OFFSET(TI_task, thread_info, task); | ||
55 | OFFSET(TI_exec_domain, thread_info, exec_domain); | ||
56 | OFFSET(TI_flags, thread_info, flags); | ||
57 | OFFSET(TI_status, thread_info, status); | ||
58 | OFFSET(TI_preempt_count, thread_info, preempt_count); | ||
59 | OFFSET(TI_addr_limit, thread_info, addr_limit); | ||
60 | OFFSET(TI_restart_block, thread_info, restart_block); | ||
61 | OFFSET(TI_sysenter_return, thread_info, sysenter_return); | 32 | OFFSET(TI_sysenter_return, thread_info, sysenter_return); |
62 | OFFSET(TI_cpu, thread_info, cpu); | 33 | OFFSET(TI_cpu, thread_info, cpu); |
63 | BLANK(); | 34 | BLANK(); |
64 | 35 | ||
65 | OFFSET(GDS_size, desc_ptr, size); | ||
66 | OFFSET(GDS_address, desc_ptr, address); | ||
67 | BLANK(); | ||
68 | |||
69 | OFFSET(PT_EBX, pt_regs, bx); | 36 | OFFSET(PT_EBX, pt_regs, bx); |
70 | OFFSET(PT_ECX, pt_regs, cx); | 37 | OFFSET(PT_ECX, pt_regs, cx); |
71 | OFFSET(PT_EDX, pt_regs, dx); | 38 | OFFSET(PT_EDX, pt_regs, dx); |
@@ -85,42 +52,13 @@ void foo(void) | |||
85 | OFFSET(PT_OLDSS, pt_regs, ss); | 52 | OFFSET(PT_OLDSS, pt_regs, ss); |
86 | BLANK(); | 53 | BLANK(); |
87 | 54 | ||
88 | OFFSET(EXEC_DOMAIN_handler, exec_domain, handler); | ||
89 | OFFSET(IA32_RT_SIGFRAME_sigcontext, rt_sigframe, uc.uc_mcontext); | 55 | OFFSET(IA32_RT_SIGFRAME_sigcontext, rt_sigframe, uc.uc_mcontext); |
90 | BLANK(); | 56 | BLANK(); |
91 | 57 | ||
92 | OFFSET(pbe_address, pbe, address); | ||
93 | OFFSET(pbe_orig_address, pbe, orig_address); | ||
94 | OFFSET(pbe_next, pbe, next); | ||
95 | |||
96 | /* Offset from the sysenter stack to tss.sp0 */ | 58 | /* Offset from the sysenter stack to tss.sp0 */ |
97 | DEFINE(TSS_sysenter_sp0, offsetof(struct tss_struct, x86_tss.sp0) - | 59 | DEFINE(TSS_sysenter_sp0, offsetof(struct tss_struct, x86_tss.sp0) - |
98 | sizeof(struct tss_struct)); | 60 | sizeof(struct tss_struct)); |
99 | 61 | ||
100 | DEFINE(PAGE_SIZE_asm, PAGE_SIZE); | ||
101 | DEFINE(PAGE_SHIFT_asm, PAGE_SHIFT); | ||
102 | DEFINE(THREAD_SIZE_asm, THREAD_SIZE); | ||
103 | |||
104 | OFFSET(crypto_tfm_ctx_offset, crypto_tfm, __crt_ctx); | ||
105 | |||
106 | #ifdef CONFIG_PARAVIRT | ||
107 | BLANK(); | ||
108 | OFFSET(PARAVIRT_enabled, pv_info, paravirt_enabled); | ||
109 | OFFSET(PARAVIRT_PATCH_pv_cpu_ops, paravirt_patch_template, pv_cpu_ops); | ||
110 | OFFSET(PARAVIRT_PATCH_pv_irq_ops, paravirt_patch_template, pv_irq_ops); | ||
111 | OFFSET(PV_IRQ_irq_disable, pv_irq_ops, irq_disable); | ||
112 | OFFSET(PV_IRQ_irq_enable, pv_irq_ops, irq_enable); | ||
113 | OFFSET(PV_CPU_iret, pv_cpu_ops, iret); | ||
114 | OFFSET(PV_CPU_irq_enable_sysexit, pv_cpu_ops, irq_enable_sysexit); | ||
115 | OFFSET(PV_CPU_read_cr0, pv_cpu_ops, read_cr0); | ||
116 | #endif | ||
117 | |||
118 | #ifdef CONFIG_XEN | ||
119 | BLANK(); | ||
120 | OFFSET(XEN_vcpu_info_mask, vcpu_info, evtchn_upcall_mask); | ||
121 | OFFSET(XEN_vcpu_info_pending, vcpu_info, evtchn_upcall_pending); | ||
122 | #endif | ||
123 | |||
124 | #if defined(CONFIG_LGUEST) || defined(CONFIG_LGUEST_GUEST) || defined(CONFIG_LGUEST_MODULE) | 62 | #if defined(CONFIG_LGUEST) || defined(CONFIG_LGUEST_GUEST) || defined(CONFIG_LGUEST_MODULE) |
125 | BLANK(); | 63 | BLANK(); |
126 | OFFSET(LGUEST_DATA_irq_enabled, lguest_data, irq_enabled); | 64 | OFFSET(LGUEST_DATA_irq_enabled, lguest_data, irq_enabled); |
@@ -139,11 +77,4 @@ void foo(void) | |||
139 | OFFSET(LGUEST_PAGES_regs_errcode, lguest_pages, regs.errcode); | 77 | OFFSET(LGUEST_PAGES_regs_errcode, lguest_pages, regs.errcode); |
140 | OFFSET(LGUEST_PAGES_regs, lguest_pages, regs); | 78 | OFFSET(LGUEST_PAGES_regs, lguest_pages, regs); |
141 | #endif | 79 | #endif |
142 | |||
143 | BLANK(); | ||
144 | OFFSET(BP_scratch, boot_params, scratch); | ||
145 | OFFSET(BP_loadflags, boot_params, hdr.loadflags); | ||
146 | OFFSET(BP_hardware_subarch, boot_params, hdr.hardware_subarch); | ||
147 | OFFSET(BP_version, boot_params, hdr.version); | ||
148 | OFFSET(BP_kernel_alignment, boot_params, hdr.kernel_alignment); | ||
149 | } | 80 | } |
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index 4a6aeedcd965..e72a1194af22 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c | |||
@@ -1,27 +1,4 @@ | |||
1 | /* | ||
2 | * Generate definitions needed by assembly language modules. | ||
3 | * This code generates raw asm output which is post-processed to extract | ||
4 | * and format the required data. | ||
5 | */ | ||
6 | #define COMPILE_OFFSETS | ||
7 | |||
8 | #include <linux/crypto.h> | ||
9 | #include <linux/sched.h> | ||
10 | #include <linux/stddef.h> | ||
11 | #include <linux/errno.h> | ||
12 | #include <linux/hardirq.h> | ||
13 | #include <linux/suspend.h> | ||
14 | #include <linux/kbuild.h> | ||
15 | #include <asm/processor.h> | ||
16 | #include <asm/segment.h> | ||
17 | #include <asm/thread_info.h> | ||
18 | #include <asm/ia32.h> | 1 | #include <asm/ia32.h> |
19 | #include <asm/bootparam.h> | ||
20 | #include <asm/suspend.h> | ||
21 | |||
22 | #include <xen/interface/xen.h> | ||
23 | |||
24 | #include <asm/sigframe.h> | ||
25 | 2 | ||
26 | #define __NO_STUBS 1 | 3 | #define __NO_STUBS 1 |
27 | #undef __SYSCALL | 4 | #undef __SYSCALL |
@@ -33,41 +10,19 @@ static char syscalls[] = { | |||
33 | 10 | ||
34 | int main(void) | 11 | int main(void) |
35 | { | 12 | { |
36 | #define ENTRY(entry) DEFINE(tsk_ ## entry, offsetof(struct task_struct, entry)) | ||
37 | ENTRY(state); | ||
38 | ENTRY(flags); | ||
39 | ENTRY(pid); | ||
40 | BLANK(); | ||
41 | #undef ENTRY | ||
42 | #define ENTRY(entry) DEFINE(TI_ ## entry, offsetof(struct thread_info, entry)) | ||
43 | ENTRY(flags); | ||
44 | ENTRY(addr_limit); | ||
45 | ENTRY(preempt_count); | ||
46 | ENTRY(status); | ||
47 | #ifdef CONFIG_IA32_EMULATION | ||
48 | ENTRY(sysenter_return); | ||
49 | #endif | ||
50 | BLANK(); | ||
51 | #undef ENTRY | ||
52 | #ifdef CONFIG_PARAVIRT | 13 | #ifdef CONFIG_PARAVIRT |
53 | BLANK(); | ||
54 | OFFSET(PARAVIRT_enabled, pv_info, paravirt_enabled); | ||
55 | OFFSET(PARAVIRT_PATCH_pv_cpu_ops, paravirt_patch_template, pv_cpu_ops); | ||
56 | OFFSET(PARAVIRT_PATCH_pv_irq_ops, paravirt_patch_template, pv_irq_ops); | ||
57 | OFFSET(PV_IRQ_irq_disable, pv_irq_ops, irq_disable); | ||
58 | OFFSET(PV_IRQ_irq_enable, pv_irq_ops, irq_enable); | ||
59 | OFFSET(PV_IRQ_adjust_exception_frame, pv_irq_ops, adjust_exception_frame); | 14 | OFFSET(PV_IRQ_adjust_exception_frame, pv_irq_ops, adjust_exception_frame); |
60 | OFFSET(PV_CPU_iret, pv_cpu_ops, iret); | ||
61 | OFFSET(PV_CPU_usergs_sysret32, pv_cpu_ops, usergs_sysret32); | 15 | OFFSET(PV_CPU_usergs_sysret32, pv_cpu_ops, usergs_sysret32); |
62 | OFFSET(PV_CPU_usergs_sysret64, pv_cpu_ops, usergs_sysret64); | 16 | OFFSET(PV_CPU_usergs_sysret64, pv_cpu_ops, usergs_sysret64); |
63 | OFFSET(PV_CPU_irq_enable_sysexit, pv_cpu_ops, irq_enable_sysexit); | ||
64 | OFFSET(PV_CPU_swapgs, pv_cpu_ops, swapgs); | 17 | OFFSET(PV_CPU_swapgs, pv_cpu_ops, swapgs); |
65 | OFFSET(PV_MMU_read_cr2, pv_mmu_ops, read_cr2); | 18 | BLANK(); |
66 | #endif | 19 | #endif |
67 | 20 | ||
68 | |||
69 | #ifdef CONFIG_IA32_EMULATION | 21 | #ifdef CONFIG_IA32_EMULATION |
70 | #define ENTRY(entry) DEFINE(IA32_SIGCONTEXT_ ## entry, offsetof(struct sigcontext_ia32, entry)) | 22 | OFFSET(TI_sysenter_return, thread_info, sysenter_return); |
23 | BLANK(); | ||
24 | |||
25 | #define ENTRY(entry) OFFSET(IA32_SIGCONTEXT_ ## entry, sigcontext_ia32, entry) | ||
71 | ENTRY(ax); | 26 | ENTRY(ax); |
72 | ENTRY(bx); | 27 | ENTRY(bx); |
73 | ENTRY(cx); | 28 | ENTRY(cx); |
@@ -79,15 +34,12 @@ int main(void) | |||
79 | ENTRY(ip); | 34 | ENTRY(ip); |
80 | BLANK(); | 35 | BLANK(); |
81 | #undef ENTRY | 36 | #undef ENTRY |
82 | DEFINE(IA32_RT_SIGFRAME_sigcontext, | 37 | |
83 | offsetof (struct rt_sigframe_ia32, uc.uc_mcontext)); | 38 | OFFSET(IA32_RT_SIGFRAME_sigcontext, rt_sigframe_ia32, uc.uc_mcontext); |
84 | BLANK(); | 39 | BLANK(); |
85 | #endif | 40 | #endif |
86 | DEFINE(pbe_address, offsetof(struct pbe, address)); | 41 | |
87 | DEFINE(pbe_orig_address, offsetof(struct pbe, orig_address)); | 42 | #define ENTRY(entry) OFFSET(pt_regs_ ## entry, pt_regs, entry) |
88 | DEFINE(pbe_next, offsetof(struct pbe, next)); | ||
89 | BLANK(); | ||
90 | #define ENTRY(entry) DEFINE(pt_regs_ ## entry, offsetof(struct pt_regs, entry)) | ||
91 | ENTRY(bx); | 43 | ENTRY(bx); |
92 | ENTRY(bx); | 44 | ENTRY(bx); |
93 | ENTRY(cx); | 45 | ENTRY(cx); |
@@ -107,7 +59,8 @@ int main(void) | |||
107 | ENTRY(flags); | 59 | ENTRY(flags); |
108 | BLANK(); | 60 | BLANK(); |
109 | #undef ENTRY | 61 | #undef ENTRY |
110 | #define ENTRY(entry) DEFINE(saved_context_ ## entry, offsetof(struct saved_context, entry)) | 62 | |
63 | #define ENTRY(entry) OFFSET(saved_context_ ## entry, saved_context, entry) | ||
111 | ENTRY(cr0); | 64 | ENTRY(cr0); |
112 | ENTRY(cr2); | 65 | ENTRY(cr2); |
113 | ENTRY(cr3); | 66 | ENTRY(cr3); |
@@ -115,26 +68,11 @@ int main(void) | |||
115 | ENTRY(cr8); | 68 | ENTRY(cr8); |
116 | BLANK(); | 69 | BLANK(); |
117 | #undef ENTRY | 70 | #undef ENTRY |
118 | DEFINE(TSS_ist, offsetof(struct tss_struct, x86_tss.ist)); | ||
119 | BLANK(); | ||
120 | DEFINE(crypto_tfm_ctx_offset, offsetof(struct crypto_tfm, __crt_ctx)); | ||
121 | BLANK(); | ||
122 | DEFINE(__NR_syscall_max, sizeof(syscalls) - 1); | ||
123 | 71 | ||
72 | OFFSET(TSS_ist, tss_struct, x86_tss.ist); | ||
124 | BLANK(); | 73 | BLANK(); |
125 | OFFSET(BP_scratch, boot_params, scratch); | ||
126 | OFFSET(BP_loadflags, boot_params, hdr.loadflags); | ||
127 | OFFSET(BP_hardware_subarch, boot_params, hdr.hardware_subarch); | ||
128 | OFFSET(BP_version, boot_params, hdr.version); | ||
129 | OFFSET(BP_kernel_alignment, boot_params, hdr.kernel_alignment); | ||
130 | 74 | ||
131 | BLANK(); | 75 | DEFINE(__NR_syscall_max, sizeof(syscalls) - 1); |
132 | DEFINE(PAGE_SIZE_asm, PAGE_SIZE); | 76 | |
133 | #ifdef CONFIG_XEN | ||
134 | BLANK(); | ||
135 | OFFSET(XEN_vcpu_info_mask, vcpu_info, evtchn_upcall_mask); | ||
136 | OFFSET(XEN_vcpu_info_pending, vcpu_info, evtchn_upcall_pending); | ||
137 | #undef ENTRY | ||
138 | #endif | ||
139 | return 0; | 77 | return 0; |
140 | } | 78 | } |
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 7c7bedb83c5a..3ecece0217ef 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c | |||
@@ -233,18 +233,22 @@ static void __cpuinit init_amd_k7(struct cpuinfo_x86 *c) | |||
233 | } | 233 | } |
234 | #endif | 234 | #endif |
235 | 235 | ||
236 | #if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) | 236 | #ifdef CONFIG_NUMA |
237 | /* | ||
238 | * To workaround broken NUMA config. Read the comment in | ||
239 | * srat_detect_node(). | ||
240 | */ | ||
237 | static int __cpuinit nearby_node(int apicid) | 241 | static int __cpuinit nearby_node(int apicid) |
238 | { | 242 | { |
239 | int i, node; | 243 | int i, node; |
240 | 244 | ||
241 | for (i = apicid - 1; i >= 0; i--) { | 245 | for (i = apicid - 1; i >= 0; i--) { |
242 | node = apicid_to_node[i]; | 246 | node = __apicid_to_node[i]; |
243 | if (node != NUMA_NO_NODE && node_online(node)) | 247 | if (node != NUMA_NO_NODE && node_online(node)) |
244 | return node; | 248 | return node; |
245 | } | 249 | } |
246 | for (i = apicid + 1; i < MAX_LOCAL_APIC; i++) { | 250 | for (i = apicid + 1; i < MAX_LOCAL_APIC; i++) { |
247 | node = apicid_to_node[i]; | 251 | node = __apicid_to_node[i]; |
248 | if (node != NUMA_NO_NODE && node_online(node)) | 252 | if (node != NUMA_NO_NODE && node_online(node)) |
249 | return node; | 253 | return node; |
250 | } | 254 | } |
@@ -261,7 +265,7 @@ static int __cpuinit nearby_node(int apicid) | |||
261 | #ifdef CONFIG_X86_HT | 265 | #ifdef CONFIG_X86_HT |
262 | static void __cpuinit amd_get_topology(struct cpuinfo_x86 *c) | 266 | static void __cpuinit amd_get_topology(struct cpuinfo_x86 *c) |
263 | { | 267 | { |
264 | u32 nodes; | 268 | u32 nodes, cores_per_cu = 1; |
265 | u8 node_id; | 269 | u8 node_id; |
266 | int cpu = smp_processor_id(); | 270 | int cpu = smp_processor_id(); |
267 | 271 | ||
@@ -276,6 +280,7 @@ static void __cpuinit amd_get_topology(struct cpuinfo_x86 *c) | |||
276 | /* get compute unit information */ | 280 | /* get compute unit information */ |
277 | smp_num_siblings = ((ebx >> 8) & 3) + 1; | 281 | smp_num_siblings = ((ebx >> 8) & 3) + 1; |
278 | c->compute_unit_id = ebx & 0xff; | 282 | c->compute_unit_id = ebx & 0xff; |
283 | cores_per_cu += ((ebx >> 8) & 3); | ||
279 | } else if (cpu_has(c, X86_FEATURE_NODEID_MSR)) { | 284 | } else if (cpu_has(c, X86_FEATURE_NODEID_MSR)) { |
280 | u64 value; | 285 | u64 value; |
281 | 286 | ||
@@ -288,15 +293,18 @@ static void __cpuinit amd_get_topology(struct cpuinfo_x86 *c) | |||
288 | /* fixup multi-node processor information */ | 293 | /* fixup multi-node processor information */ |
289 | if (nodes > 1) { | 294 | if (nodes > 1) { |
290 | u32 cores_per_node; | 295 | u32 cores_per_node; |
296 | u32 cus_per_node; | ||
291 | 297 | ||
292 | set_cpu_cap(c, X86_FEATURE_AMD_DCM); | 298 | set_cpu_cap(c, X86_FEATURE_AMD_DCM); |
293 | cores_per_node = c->x86_max_cores / nodes; | 299 | cores_per_node = c->x86_max_cores / nodes; |
300 | cus_per_node = cores_per_node / cores_per_cu; | ||
294 | 301 | ||
295 | /* store NodeID, use llc_shared_map to store sibling info */ | 302 | /* store NodeID, use llc_shared_map to store sibling info */ |
296 | per_cpu(cpu_llc_id, cpu) = node_id; | 303 | per_cpu(cpu_llc_id, cpu) = node_id; |
297 | 304 | ||
298 | /* core id to be in range from 0 to (cores_per_node - 1) */ | 305 | /* core id has to be in the [0 .. cores_per_node - 1] range */ |
299 | c->cpu_core_id = c->cpu_core_id % cores_per_node; | 306 | c->cpu_core_id %= cores_per_node; |
307 | c->compute_unit_id %= cus_per_node; | ||
300 | } | 308 | } |
301 | } | 309 | } |
302 | #endif | 310 | #endif |
@@ -334,31 +342,40 @@ EXPORT_SYMBOL_GPL(amd_get_nb_id); | |||
334 | 342 | ||
335 | static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c) | 343 | static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c) |
336 | { | 344 | { |
337 | #if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) | 345 | #ifdef CONFIG_NUMA |
338 | int cpu = smp_processor_id(); | 346 | int cpu = smp_processor_id(); |
339 | int node; | 347 | int node; |
340 | unsigned apicid = c->apicid; | 348 | unsigned apicid = c->apicid; |
341 | 349 | ||
342 | node = per_cpu(cpu_llc_id, cpu); | 350 | node = numa_cpu_node(cpu); |
351 | if (node == NUMA_NO_NODE) | ||
352 | node = per_cpu(cpu_llc_id, cpu); | ||
343 | 353 | ||
344 | if (apicid_to_node[apicid] != NUMA_NO_NODE) | ||
345 | node = apicid_to_node[apicid]; | ||
346 | if (!node_online(node)) { | 354 | if (!node_online(node)) { |
347 | /* Two possibilities here: | 355 | /* |
348 | - The CPU is missing memory and no node was created. | 356 | * Two possibilities here: |
349 | In that case try picking one from a nearby CPU | 357 | * |
350 | - The APIC IDs differ from the HyperTransport node IDs | 358 | * - The CPU is missing memory and no node was created. In |
351 | which the K8 northbridge parsing fills in. | 359 | * that case try picking one from a nearby CPU. |
352 | Assume they are all increased by a constant offset, | 360 | * |
353 | but in the same order as the HT nodeids. | 361 | * - The APIC IDs differ from the HyperTransport node IDs |
354 | If that doesn't result in a usable node fall back to the | 362 | * which the K8 northbridge parsing fills in. Assume |
355 | path for the previous case. */ | 363 | * they are all increased by a constant offset, but in |
356 | 364 | * the same order as the HT nodeids. If that doesn't | |
365 | * result in a usable node fall back to the path for the | ||
366 | * previous case. | ||
367 | * | ||
368 | * This workaround operates directly on the mapping between | ||
369 | * APIC ID and NUMA node, assuming certain relationship | ||
370 | * between APIC ID, HT node ID and NUMA topology. As going | ||
371 | * through CPU mapping may alter the outcome, directly | ||
372 | * access __apicid_to_node[]. | ||
373 | */ | ||
357 | int ht_nodeid = c->initial_apicid; | 374 | int ht_nodeid = c->initial_apicid; |
358 | 375 | ||
359 | if (ht_nodeid >= 0 && | 376 | if (ht_nodeid >= 0 && |
360 | apicid_to_node[ht_nodeid] != NUMA_NO_NODE) | 377 | __apicid_to_node[ht_nodeid] != NUMA_NO_NODE) |
361 | node = apicid_to_node[ht_nodeid]; | 378 | node = __apicid_to_node[ht_nodeid]; |
362 | /* Pick a nearby node */ | 379 | /* Pick a nearby node */ |
363 | if (!node_online(node)) | 380 | if (!node_online(node)) |
364 | node = nearby_node(apicid); | 381 | node = nearby_node(apicid); |
@@ -594,6 +611,10 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) | |||
594 | } | 611 | } |
595 | } | 612 | } |
596 | #endif | 613 | #endif |
614 | |||
615 | /* As a rule processors have APIC timer running in deep C states */ | ||
616 | if (c->x86 >= 0xf && !cpu_has_amd_erratum(amd_erratum_400)) | ||
617 | set_cpu_cap(c, X86_FEATURE_ARAT); | ||
597 | } | 618 | } |
598 | 619 | ||
599 | #ifdef CONFIG_X86_32 | 620 | #ifdef CONFIG_X86_32 |
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 1d59834396bd..e2ced0074a45 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c | |||
@@ -675,7 +675,7 @@ void __init early_cpu_init(void) | |||
675 | const struct cpu_dev *const *cdev; | 675 | const struct cpu_dev *const *cdev; |
676 | int count = 0; | 676 | int count = 0; |
677 | 677 | ||
678 | #ifdef PROCESSOR_SELECT | 678 | #ifdef CONFIG_PROCESSOR_SELECT |
679 | printk(KERN_INFO "KERNEL supported cpus:\n"); | 679 | printk(KERN_INFO "KERNEL supported cpus:\n"); |
680 | #endif | 680 | #endif |
681 | 681 | ||
@@ -687,7 +687,7 @@ void __init early_cpu_init(void) | |||
687 | cpu_devs[count] = cpudev; | 687 | cpu_devs[count] = cpudev; |
688 | count++; | 688 | count++; |
689 | 689 | ||
690 | #ifdef PROCESSOR_SELECT | 690 | #ifdef CONFIG_PROCESSOR_SELECT |
691 | { | 691 | { |
692 | unsigned int j; | 692 | unsigned int j; |
693 | 693 | ||
@@ -869,7 +869,7 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) | |||
869 | 869 | ||
870 | select_idle_routine(c); | 870 | select_idle_routine(c); |
871 | 871 | ||
872 | #if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) | 872 | #ifdef CONFIG_NUMA |
873 | numa_add_cpu(smp_processor_id()); | 873 | numa_add_cpu(smp_processor_id()); |
874 | #endif | 874 | #endif |
875 | } | 875 | } |
diff --git a/arch/x86/kernel/cpu/cpufreq/longhaul.c b/arch/x86/kernel/cpu/cpufreq/longhaul.c index 03162dac6271..cf48cdd6907d 100644 --- a/arch/x86/kernel/cpu/cpufreq/longhaul.c +++ b/arch/x86/kernel/cpu/cpufreq/longhaul.c | |||
@@ -444,7 +444,7 @@ static int __cpuinit longhaul_get_ranges(void) | |||
444 | return -EINVAL; | 444 | return -EINVAL; |
445 | } | 445 | } |
446 | /* Get max multiplier - as we always did. | 446 | /* Get max multiplier - as we always did. |
447 | * Longhaul MSR is usefull only when voltage scaling is enabled. | 447 | * Longhaul MSR is useful only when voltage scaling is enabled. |
448 | * C3 is booting at max anyway. */ | 448 | * C3 is booting at max anyway. */ |
449 | maxmult = mult; | 449 | maxmult = mult; |
450 | /* Get min multiplier */ | 450 | /* Get min multiplier */ |
@@ -1011,7 +1011,7 @@ static void __exit longhaul_exit(void) | |||
1011 | * trigger frequency transition in some cases. */ | 1011 | * trigger frequency transition in some cases. */ |
1012 | module_param(disable_acpi_c3, int, 0644); | 1012 | module_param(disable_acpi_c3, int, 0644); |
1013 | MODULE_PARM_DESC(disable_acpi_c3, "Don't use ACPI C3 support"); | 1013 | MODULE_PARM_DESC(disable_acpi_c3, "Don't use ACPI C3 support"); |
1014 | /* Change CPU voltage with frequency. Very usefull to save | 1014 | /* Change CPU voltage with frequency. Very useful to save |
1015 | * power, but most VIA C3 processors aren't supporting it. */ | 1015 | * power, but most VIA C3 processors aren't supporting it. */ |
1016 | module_param(scale_voltage, int, 0644); | 1016 | module_param(scale_voltage, int, 0644); |
1017 | MODULE_PARM_DESC(scale_voltage, "Scale voltage of processor"); | 1017 | MODULE_PARM_DESC(scale_voltage, "Scale voltage of processor"); |
diff --git a/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c index 4a5a42b842ad..755a31e0f5b0 100644 --- a/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c | |||
@@ -315,8 +315,6 @@ static int __init pcc_cpufreq_do_osc(acpi_handle *handle) | |||
315 | 315 | ||
316 | input.count = 4; | 316 | input.count = 4; |
317 | input.pointer = in_params; | 317 | input.pointer = in_params; |
318 | input.count = 4; | ||
319 | input.pointer = in_params; | ||
320 | in_params[0].type = ACPI_TYPE_BUFFER; | 318 | in_params[0].type = ACPI_TYPE_BUFFER; |
321 | in_params[0].buffer.length = 16; | 319 | in_params[0].buffer.length = 16; |
322 | in_params[0].buffer.pointer = OSC_UUID; | 320 | in_params[0].buffer.pointer = OSC_UUID; |
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index c567dec854f6..2368e38327b3 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c | |||
@@ -630,8 +630,7 @@ static void print_basics(struct powernow_k8_data *data) | |||
630 | data->powernow_table[j].frequency/1000); | 630 | data->powernow_table[j].frequency/1000); |
631 | } else { | 631 | } else { |
632 | printk(KERN_INFO PFX | 632 | printk(KERN_INFO PFX |
633 | " %d : fid 0x%x (%d MHz), vid 0x%x\n", | 633 | "fid 0x%x (%d MHz), vid 0x%x\n", |
634 | j, | ||
635 | data->powernow_table[j].index & 0xff, | 634 | data->powernow_table[j].index & 0xff, |
636 | data->powernow_table[j].frequency/1000, | 635 | data->powernow_table[j].frequency/1000, |
637 | data->powernow_table[j].index >> 8); | 636 | data->powernow_table[j].index >> 8); |
@@ -1276,7 +1275,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) | |||
1276 | 1275 | ||
1277 | if (powernow_k8_cpu_init_acpi(data)) { | 1276 | if (powernow_k8_cpu_init_acpi(data)) { |
1278 | /* | 1277 | /* |
1279 | * Use the PSB BIOS structure. This is only availabe on | 1278 | * Use the PSB BIOS structure. This is only available on |
1280 | * an UP version, and is deprecated by AMD. | 1279 | * an UP version, and is deprecated by AMD. |
1281 | */ | 1280 | */ |
1282 | if (num_online_cpus() != 1) { | 1281 | if (num_online_cpus() != 1) { |
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c b/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c index 8abd869baabf..91bc25b67bc1 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c | |||
@@ -292,7 +292,7 @@ static int speedstep_cpu_init(struct cpufreq_policy *policy) | |||
292 | 292 | ||
293 | result = speedstep_smi_ownership(); | 293 | result = speedstep_smi_ownership(); |
294 | if (result) { | 294 | if (result) { |
295 | dprintk("fails in aquiring ownership of a SMI interface.\n"); | 295 | dprintk("fails in acquiring ownership of a SMI interface.\n"); |
296 | return -EINVAL; | 296 | return -EINVAL; |
297 | } | 297 | } |
298 | 298 | ||
@@ -360,7 +360,7 @@ static int speedstep_resume(struct cpufreq_policy *policy) | |||
360 | int result = speedstep_smi_ownership(); | 360 | int result = speedstep_smi_ownership(); |
361 | 361 | ||
362 | if (result) | 362 | if (result) |
363 | dprintk("fails in re-aquiring ownership of a SMI interface.\n"); | 363 | dprintk("fails in re-acquiring ownership of a SMI interface.\n"); |
364 | 364 | ||
365 | return result; | 365 | return result; |
366 | } | 366 | } |
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index d16c2c53d6bf..df86bc8c859d 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c | |||
@@ -276,14 +276,13 @@ static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c) | |||
276 | 276 | ||
277 | static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c) | 277 | static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c) |
278 | { | 278 | { |
279 | #if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) | 279 | #ifdef CONFIG_NUMA |
280 | unsigned node; | 280 | unsigned node; |
281 | int cpu = smp_processor_id(); | 281 | int cpu = smp_processor_id(); |
282 | int apicid = cpu_has_apic ? hard_smp_processor_id() : c->apicid; | ||
283 | 282 | ||
284 | /* Don't do the funky fallback heuristics the AMD version employs | 283 | /* Don't do the funky fallback heuristics the AMD version employs |
285 | for now. */ | 284 | for now. */ |
286 | node = apicid_to_node[apicid]; | 285 | node = numa_cpu_node(cpu); |
287 | if (node == NUMA_NO_NODE || !node_online(node)) { | 286 | if (node == NUMA_NO_NODE || !node_online(node)) { |
288 | /* reuse the value from init_cpu_to_node() */ | 287 | /* reuse the value from init_cpu_to_node() */ |
289 | node = cpu_to_node(cpu); | 288 | node = cpu_to_node(cpu); |
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index ec2c19a7b8ef..1ce1af2899df 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c | |||
@@ -304,8 +304,9 @@ amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax, | |||
304 | 304 | ||
305 | struct _cache_attr { | 305 | struct _cache_attr { |
306 | struct attribute attr; | 306 | struct attribute attr; |
307 | ssize_t (*show)(struct _cpuid4_info *, char *); | 307 | ssize_t (*show)(struct _cpuid4_info *, char *, unsigned int); |
308 | ssize_t (*store)(struct _cpuid4_info *, const char *, size_t count); | 308 | ssize_t (*store)(struct _cpuid4_info *, const char *, size_t count, |
309 | unsigned int); | ||
309 | }; | 310 | }; |
310 | 311 | ||
311 | #ifdef CONFIG_AMD_NB | 312 | #ifdef CONFIG_AMD_NB |
@@ -400,7 +401,8 @@ static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf, | |||
400 | 401 | ||
401 | #define SHOW_CACHE_DISABLE(slot) \ | 402 | #define SHOW_CACHE_DISABLE(slot) \ |
402 | static ssize_t \ | 403 | static ssize_t \ |
403 | show_cache_disable_##slot(struct _cpuid4_info *this_leaf, char *buf) \ | 404 | show_cache_disable_##slot(struct _cpuid4_info *this_leaf, char *buf, \ |
405 | unsigned int cpu) \ | ||
404 | { \ | 406 | { \ |
405 | return show_cache_disable(this_leaf, buf, slot); \ | 407 | return show_cache_disable(this_leaf, buf, slot); \ |
406 | } | 408 | } |
@@ -512,7 +514,8 @@ static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf, | |||
512 | #define STORE_CACHE_DISABLE(slot) \ | 514 | #define STORE_CACHE_DISABLE(slot) \ |
513 | static ssize_t \ | 515 | static ssize_t \ |
514 | store_cache_disable_##slot(struct _cpuid4_info *this_leaf, \ | 516 | store_cache_disable_##slot(struct _cpuid4_info *this_leaf, \ |
515 | const char *buf, size_t count) \ | 517 | const char *buf, size_t count, \ |
518 | unsigned int cpu) \ | ||
516 | { \ | 519 | { \ |
517 | return store_cache_disable(this_leaf, buf, count, slot); \ | 520 | return store_cache_disable(this_leaf, buf, count, slot); \ |
518 | } | 521 | } |
@@ -524,6 +527,39 @@ static struct _cache_attr cache_disable_0 = __ATTR(cache_disable_0, 0644, | |||
524 | static struct _cache_attr cache_disable_1 = __ATTR(cache_disable_1, 0644, | 527 | static struct _cache_attr cache_disable_1 = __ATTR(cache_disable_1, 0644, |
525 | show_cache_disable_1, store_cache_disable_1); | 528 | show_cache_disable_1, store_cache_disable_1); |
526 | 529 | ||
530 | static ssize_t | ||
531 | show_subcaches(struct _cpuid4_info *this_leaf, char *buf, unsigned int cpu) | ||
532 | { | ||
533 | if (!this_leaf->l3 || !amd_nb_has_feature(AMD_NB_L3_PARTITIONING)) | ||
534 | return -EINVAL; | ||
535 | |||
536 | return sprintf(buf, "%x\n", amd_get_subcaches(cpu)); | ||
537 | } | ||
538 | |||
539 | static ssize_t | ||
540 | store_subcaches(struct _cpuid4_info *this_leaf, const char *buf, size_t count, | ||
541 | unsigned int cpu) | ||
542 | { | ||
543 | unsigned long val; | ||
544 | |||
545 | if (!capable(CAP_SYS_ADMIN)) | ||
546 | return -EPERM; | ||
547 | |||
548 | if (!this_leaf->l3 || !amd_nb_has_feature(AMD_NB_L3_PARTITIONING)) | ||
549 | return -EINVAL; | ||
550 | |||
551 | if (strict_strtoul(buf, 16, &val) < 0) | ||
552 | return -EINVAL; | ||
553 | |||
554 | if (amd_set_subcaches(cpu, val)) | ||
555 | return -EINVAL; | ||
556 | |||
557 | return count; | ||
558 | } | ||
559 | |||
560 | static struct _cache_attr subcaches = | ||
561 | __ATTR(subcaches, 0644, show_subcaches, store_subcaches); | ||
562 | |||
527 | #else /* CONFIG_AMD_NB */ | 563 | #else /* CONFIG_AMD_NB */ |
528 | #define amd_init_l3_cache(x, y) | 564 | #define amd_init_l3_cache(x, y) |
529 | #endif /* CONFIG_AMD_NB */ | 565 | #endif /* CONFIG_AMD_NB */ |
@@ -532,9 +568,9 @@ static int | |||
532 | __cpuinit cpuid4_cache_lookup_regs(int index, | 568 | __cpuinit cpuid4_cache_lookup_regs(int index, |
533 | struct _cpuid4_info_regs *this_leaf) | 569 | struct _cpuid4_info_regs *this_leaf) |
534 | { | 570 | { |
535 | union _cpuid4_leaf_eax eax; | 571 | union _cpuid4_leaf_eax eax; |
536 | union _cpuid4_leaf_ebx ebx; | 572 | union _cpuid4_leaf_ebx ebx; |
537 | union _cpuid4_leaf_ecx ecx; | 573 | union _cpuid4_leaf_ecx ecx; |
538 | unsigned edx; | 574 | unsigned edx; |
539 | 575 | ||
540 | if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) { | 576 | if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) { |
@@ -732,11 +768,11 @@ static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index) | |||
732 | struct cpuinfo_x86 *c = &cpu_data(cpu); | 768 | struct cpuinfo_x86 *c = &cpu_data(cpu); |
733 | 769 | ||
734 | if ((index == 3) && (c->x86_vendor == X86_VENDOR_AMD)) { | 770 | if ((index == 3) && (c->x86_vendor == X86_VENDOR_AMD)) { |
735 | for_each_cpu(i, c->llc_shared_map) { | 771 | for_each_cpu(i, cpu_llc_shared_mask(cpu)) { |
736 | if (!per_cpu(ici_cpuid4_info, i)) | 772 | if (!per_cpu(ici_cpuid4_info, i)) |
737 | continue; | 773 | continue; |
738 | this_leaf = CPUID4_INFO_IDX(i, index); | 774 | this_leaf = CPUID4_INFO_IDX(i, index); |
739 | for_each_cpu(sibling, c->llc_shared_map) { | 775 | for_each_cpu(sibling, cpu_llc_shared_mask(cpu)) { |
740 | if (!cpu_online(sibling)) | 776 | if (!cpu_online(sibling)) |
741 | continue; | 777 | continue; |
742 | set_bit(sibling, this_leaf->shared_cpu_map); | 778 | set_bit(sibling, this_leaf->shared_cpu_map); |
@@ -870,8 +906,8 @@ static DEFINE_PER_CPU(struct _index_kobject *, ici_index_kobject); | |||
870 | #define INDEX_KOBJECT_PTR(x, y) (&((per_cpu(ici_index_kobject, x))[y])) | 906 | #define INDEX_KOBJECT_PTR(x, y) (&((per_cpu(ici_index_kobject, x))[y])) |
871 | 907 | ||
872 | #define show_one_plus(file_name, object, val) \ | 908 | #define show_one_plus(file_name, object, val) \ |
873 | static ssize_t show_##file_name \ | 909 | static ssize_t show_##file_name(struct _cpuid4_info *this_leaf, char *buf, \ |
874 | (struct _cpuid4_info *this_leaf, char *buf) \ | 910 | unsigned int cpu) \ |
875 | { \ | 911 | { \ |
876 | return sprintf(buf, "%lu\n", (unsigned long)this_leaf->object + val); \ | 912 | return sprintf(buf, "%lu\n", (unsigned long)this_leaf->object + val); \ |
877 | } | 913 | } |
@@ -882,7 +918,8 @@ show_one_plus(physical_line_partition, ebx.split.physical_line_partition, 1); | |||
882 | show_one_plus(ways_of_associativity, ebx.split.ways_of_associativity, 1); | 918 | show_one_plus(ways_of_associativity, ebx.split.ways_of_associativity, 1); |
883 | show_one_plus(number_of_sets, ecx.split.number_of_sets, 1); | 919 | show_one_plus(number_of_sets, ecx.split.number_of_sets, 1); |
884 | 920 | ||
885 | static ssize_t show_size(struct _cpuid4_info *this_leaf, char *buf) | 921 | static ssize_t show_size(struct _cpuid4_info *this_leaf, char *buf, |
922 | unsigned int cpu) | ||
886 | { | 923 | { |
887 | return sprintf(buf, "%luK\n", this_leaf->size / 1024); | 924 | return sprintf(buf, "%luK\n", this_leaf->size / 1024); |
888 | } | 925 | } |
@@ -906,17 +943,20 @@ static ssize_t show_shared_cpu_map_func(struct _cpuid4_info *this_leaf, | |||
906 | return n; | 943 | return n; |
907 | } | 944 | } |
908 | 945 | ||
909 | static inline ssize_t show_shared_cpu_map(struct _cpuid4_info *leaf, char *buf) | 946 | static inline ssize_t show_shared_cpu_map(struct _cpuid4_info *leaf, char *buf, |
947 | unsigned int cpu) | ||
910 | { | 948 | { |
911 | return show_shared_cpu_map_func(leaf, 0, buf); | 949 | return show_shared_cpu_map_func(leaf, 0, buf); |
912 | } | 950 | } |
913 | 951 | ||
914 | static inline ssize_t show_shared_cpu_list(struct _cpuid4_info *leaf, char *buf) | 952 | static inline ssize_t show_shared_cpu_list(struct _cpuid4_info *leaf, char *buf, |
953 | unsigned int cpu) | ||
915 | { | 954 | { |
916 | return show_shared_cpu_map_func(leaf, 1, buf); | 955 | return show_shared_cpu_map_func(leaf, 1, buf); |
917 | } | 956 | } |
918 | 957 | ||
919 | static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf) | 958 | static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf, |
959 | unsigned int cpu) | ||
920 | { | 960 | { |
921 | switch (this_leaf->eax.split.type) { | 961 | switch (this_leaf->eax.split.type) { |
922 | case CACHE_TYPE_DATA: | 962 | case CACHE_TYPE_DATA: |
@@ -974,6 +1014,9 @@ static struct attribute ** __cpuinit amd_l3_attrs(void) | |||
974 | if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE)) | 1014 | if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE)) |
975 | n += 2; | 1015 | n += 2; |
976 | 1016 | ||
1017 | if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING)) | ||
1018 | n += 1; | ||
1019 | |||
977 | attrs = kzalloc(n * sizeof (struct attribute *), GFP_KERNEL); | 1020 | attrs = kzalloc(n * sizeof (struct attribute *), GFP_KERNEL); |
978 | if (attrs == NULL) | 1021 | if (attrs == NULL) |
979 | return attrs = default_attrs; | 1022 | return attrs = default_attrs; |
@@ -986,6 +1029,9 @@ static struct attribute ** __cpuinit amd_l3_attrs(void) | |||
986 | attrs[n++] = &cache_disable_1.attr; | 1029 | attrs[n++] = &cache_disable_1.attr; |
987 | } | 1030 | } |
988 | 1031 | ||
1032 | if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING)) | ||
1033 | attrs[n++] = &subcaches.attr; | ||
1034 | |||
989 | return attrs; | 1035 | return attrs; |
990 | } | 1036 | } |
991 | #endif | 1037 | #endif |
@@ -998,7 +1044,7 @@ static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf) | |||
998 | 1044 | ||
999 | ret = fattr->show ? | 1045 | ret = fattr->show ? |
1000 | fattr->show(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index), | 1046 | fattr->show(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index), |
1001 | buf) : | 1047 | buf, this_leaf->cpu) : |
1002 | 0; | 1048 | 0; |
1003 | return ret; | 1049 | return ret; |
1004 | } | 1050 | } |
@@ -1012,7 +1058,7 @@ static ssize_t store(struct kobject *kobj, struct attribute *attr, | |||
1012 | 1058 | ||
1013 | ret = fattr->store ? | 1059 | ret = fattr->store ? |
1014 | fattr->store(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index), | 1060 | fattr->store(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index), |
1015 | buf, count) : | 1061 | buf, count, this_leaf->cpu) : |
1016 | 0; | 1062 | 0; |
1017 | return ret; | 1063 | return ret; |
1018 | } | 1064 | } |
diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c index a77971979564..0ed633c5048b 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-inject.c +++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c | |||
@@ -32,7 +32,7 @@ static void inject_mce(struct mce *m) | |||
32 | { | 32 | { |
33 | struct mce *i = &per_cpu(injectm, m->extcpu); | 33 | struct mce *i = &per_cpu(injectm, m->extcpu); |
34 | 34 | ||
35 | /* Make sure noone reads partially written injectm */ | 35 | /* Make sure no one reads partially written injectm */ |
36 | i->finished = 0; | 36 | i->finished = 0; |
37 | mb(); | 37 | mb(); |
38 | m->finished = 0; | 38 | m->finished = 0; |
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index d916183b7f9c..ab1122998dba 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c | |||
@@ -881,7 +881,7 @@ reset: | |||
881 | * Check if the address reported by the CPU is in a format we can parse. | 881 | * Check if the address reported by the CPU is in a format we can parse. |
882 | * It would be possible to add code for most other cases, but all would | 882 | * It would be possible to add code for most other cases, but all would |
883 | * be somewhat complicated (e.g. segment offset would require an instruction | 883 | * be somewhat complicated (e.g. segment offset would require an instruction |
884 | * parser). So only support physical addresses upto page granuality for now. | 884 | * parser). So only support physical addresses up to page granuality for now. |
885 | */ | 885 | */ |
886 | static int mce_usable_address(struct mce *m) | 886 | static int mce_usable_address(struct mce *m) |
887 | { | 887 | { |
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index 5bf2fac52aca..167f97b5596e 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c | |||
@@ -527,15 +527,12 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) | |||
527 | int i, err = 0; | 527 | int i, err = 0; |
528 | struct threshold_bank *b = NULL; | 528 | struct threshold_bank *b = NULL; |
529 | char name[32]; | 529 | char name[32]; |
530 | #ifdef CONFIG_SMP | ||
531 | struct cpuinfo_x86 *c = &cpu_data(cpu); | ||
532 | #endif | ||
533 | 530 | ||
534 | sprintf(name, "threshold_bank%i", bank); | 531 | sprintf(name, "threshold_bank%i", bank); |
535 | 532 | ||
536 | #ifdef CONFIG_SMP | 533 | #ifdef CONFIG_SMP |
537 | if (cpu_data(cpu).cpu_core_id && shared_bank[bank]) { /* symlink */ | 534 | if (cpu_data(cpu).cpu_core_id && shared_bank[bank]) { /* symlink */ |
538 | i = cpumask_first(c->llc_shared_map); | 535 | i = cpumask_first(cpu_llc_shared_mask(cpu)); |
539 | 536 | ||
540 | /* first core not up yet */ | 537 | /* first core not up yet */ |
541 | if (cpu_data(i).cpu_core_id) | 538 | if (cpu_data(i).cpu_core_id) |
@@ -555,7 +552,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) | |||
555 | if (err) | 552 | if (err) |
556 | goto out; | 553 | goto out; |
557 | 554 | ||
558 | cpumask_copy(b->cpus, c->llc_shared_map); | 555 | cpumask_copy(b->cpus, cpu_llc_shared_mask(cpu)); |
559 | per_cpu(threshold_banks, cpu)[bank] = b; | 556 | per_cpu(threshold_banks, cpu)[bank] = b; |
560 | 557 | ||
561 | goto out; | 558 | goto out; |
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index 9f27228ceffd..a71efcdbb092 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c | |||
@@ -1,6 +1,6 @@ | |||
1 | /* | 1 | /* |
2 | * This only handles 32bit MTRR on 32bit hosts. This is strictly wrong | 2 | * This only handles 32bit MTRR on 32bit hosts. This is strictly wrong |
3 | * because MTRRs can span upto 40 bits (36bits on most modern x86) | 3 | * because MTRRs can span up to 40 bits (36bits on most modern x86) |
4 | */ | 4 | */ |
5 | #define DEBUG | 5 | #define DEBUG |
6 | 6 | ||
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 9d977a2ea693..87eab4a27dfc 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c | |||
@@ -30,6 +30,7 @@ | |||
30 | #include <asm/stacktrace.h> | 30 | #include <asm/stacktrace.h> |
31 | #include <asm/nmi.h> | 31 | #include <asm/nmi.h> |
32 | #include <asm/compat.h> | 32 | #include <asm/compat.h> |
33 | #include <asm/smp.h> | ||
33 | 34 | ||
34 | #if 0 | 35 | #if 0 |
35 | #undef wrmsrl | 36 | #undef wrmsrl |
@@ -93,6 +94,8 @@ struct amd_nb { | |||
93 | struct event_constraint event_constraints[X86_PMC_IDX_MAX]; | 94 | struct event_constraint event_constraints[X86_PMC_IDX_MAX]; |
94 | }; | 95 | }; |
95 | 96 | ||
97 | struct intel_percore; | ||
98 | |||
96 | #define MAX_LBR_ENTRIES 16 | 99 | #define MAX_LBR_ENTRIES 16 |
97 | 100 | ||
98 | struct cpu_hw_events { | 101 | struct cpu_hw_events { |
@@ -128,6 +131,13 @@ struct cpu_hw_events { | |||
128 | struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES]; | 131 | struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES]; |
129 | 132 | ||
130 | /* | 133 | /* |
134 | * Intel percore register state. | ||
135 | * Coordinate shared resources between HT threads. | ||
136 | */ | ||
137 | int percore_used; /* Used by this CPU? */ | ||
138 | struct intel_percore *per_core; | ||
139 | |||
140 | /* | ||
131 | * AMD specific bits | 141 | * AMD specific bits |
132 | */ | 142 | */ |
133 | struct amd_nb *amd_nb; | 143 | struct amd_nb *amd_nb; |
@@ -166,7 +176,7 @@ struct cpu_hw_events { | |||
166 | /* | 176 | /* |
167 | * Constraint on the Event code + UMask | 177 | * Constraint on the Event code + UMask |
168 | */ | 178 | */ |
169 | #define PEBS_EVENT_CONSTRAINT(c, n) \ | 179 | #define INTEL_UEVENT_CONSTRAINT(c, n) \ |
170 | EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK) | 180 | EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK) |
171 | 181 | ||
172 | #define EVENT_CONSTRAINT_END \ | 182 | #define EVENT_CONSTRAINT_END \ |
@@ -175,6 +185,28 @@ struct cpu_hw_events { | |||
175 | #define for_each_event_constraint(e, c) \ | 185 | #define for_each_event_constraint(e, c) \ |
176 | for ((e) = (c); (e)->weight; (e)++) | 186 | for ((e) = (c); (e)->weight; (e)++) |
177 | 187 | ||
188 | /* | ||
189 | * Extra registers for specific events. | ||
190 | * Some events need large masks and require external MSRs. | ||
191 | * Define a mapping to these extra registers. | ||
192 | */ | ||
193 | struct extra_reg { | ||
194 | unsigned int event; | ||
195 | unsigned int msr; | ||
196 | u64 config_mask; | ||
197 | u64 valid_mask; | ||
198 | }; | ||
199 | |||
200 | #define EVENT_EXTRA_REG(e, ms, m, vm) { \ | ||
201 | .event = (e), \ | ||
202 | .msr = (ms), \ | ||
203 | .config_mask = (m), \ | ||
204 | .valid_mask = (vm), \ | ||
205 | } | ||
206 | #define INTEL_EVENT_EXTRA_REG(event, msr, vm) \ | ||
207 | EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT, vm) | ||
208 | #define EVENT_EXTRA_END EVENT_EXTRA_REG(0, 0, 0, 0) | ||
209 | |||
178 | union perf_capabilities { | 210 | union perf_capabilities { |
179 | struct { | 211 | struct { |
180 | u64 lbr_format : 6; | 212 | u64 lbr_format : 6; |
@@ -219,6 +251,7 @@ struct x86_pmu { | |||
219 | void (*put_event_constraints)(struct cpu_hw_events *cpuc, | 251 | void (*put_event_constraints)(struct cpu_hw_events *cpuc, |
220 | struct perf_event *event); | 252 | struct perf_event *event); |
221 | struct event_constraint *event_constraints; | 253 | struct event_constraint *event_constraints; |
254 | struct event_constraint *percore_constraints; | ||
222 | void (*quirks)(void); | 255 | void (*quirks)(void); |
223 | int perfctr_second_write; | 256 | int perfctr_second_write; |
224 | 257 | ||
@@ -247,6 +280,11 @@ struct x86_pmu { | |||
247 | */ | 280 | */ |
248 | unsigned long lbr_tos, lbr_from, lbr_to; /* MSR base regs */ | 281 | unsigned long lbr_tos, lbr_from, lbr_to; /* MSR base regs */ |
249 | int lbr_nr; /* hardware stack size */ | 282 | int lbr_nr; /* hardware stack size */ |
283 | |||
284 | /* | ||
285 | * Extra registers for events | ||
286 | */ | ||
287 | struct extra_reg *extra_regs; | ||
250 | }; | 288 | }; |
251 | 289 | ||
252 | static struct x86_pmu x86_pmu __read_mostly; | 290 | static struct x86_pmu x86_pmu __read_mostly; |
@@ -271,6 +309,10 @@ static u64 __read_mostly hw_cache_event_ids | |||
271 | [PERF_COUNT_HW_CACHE_MAX] | 309 | [PERF_COUNT_HW_CACHE_MAX] |
272 | [PERF_COUNT_HW_CACHE_OP_MAX] | 310 | [PERF_COUNT_HW_CACHE_OP_MAX] |
273 | [PERF_COUNT_HW_CACHE_RESULT_MAX]; | 311 | [PERF_COUNT_HW_CACHE_RESULT_MAX]; |
312 | static u64 __read_mostly hw_cache_extra_regs | ||
313 | [PERF_COUNT_HW_CACHE_MAX] | ||
314 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
315 | [PERF_COUNT_HW_CACHE_RESULT_MAX]; | ||
274 | 316 | ||
275 | /* | 317 | /* |
276 | * Propagate event elapsed time into the generic event. | 318 | * Propagate event elapsed time into the generic event. |
@@ -298,7 +340,7 @@ x86_perf_event_update(struct perf_event *event) | |||
298 | */ | 340 | */ |
299 | again: | 341 | again: |
300 | prev_raw_count = local64_read(&hwc->prev_count); | 342 | prev_raw_count = local64_read(&hwc->prev_count); |
301 | rdmsrl(hwc->event_base + idx, new_raw_count); | 343 | rdmsrl(hwc->event_base, new_raw_count); |
302 | 344 | ||
303 | if (local64_cmpxchg(&hwc->prev_count, prev_raw_count, | 345 | if (local64_cmpxchg(&hwc->prev_count, prev_raw_count, |
304 | new_raw_count) != prev_raw_count) | 346 | new_raw_count) != prev_raw_count) |
@@ -321,6 +363,49 @@ again: | |||
321 | return new_raw_count; | 363 | return new_raw_count; |
322 | } | 364 | } |
323 | 365 | ||
366 | /* using X86_FEATURE_PERFCTR_CORE to later implement ALTERNATIVE() here */ | ||
367 | static inline int x86_pmu_addr_offset(int index) | ||
368 | { | ||
369 | if (boot_cpu_has(X86_FEATURE_PERFCTR_CORE)) | ||
370 | return index << 1; | ||
371 | return index; | ||
372 | } | ||
373 | |||
374 | static inline unsigned int x86_pmu_config_addr(int index) | ||
375 | { | ||
376 | return x86_pmu.eventsel + x86_pmu_addr_offset(index); | ||
377 | } | ||
378 | |||
379 | static inline unsigned int x86_pmu_event_addr(int index) | ||
380 | { | ||
381 | return x86_pmu.perfctr + x86_pmu_addr_offset(index); | ||
382 | } | ||
383 | |||
384 | /* | ||
385 | * Find and validate any extra registers to set up. | ||
386 | */ | ||
387 | static int x86_pmu_extra_regs(u64 config, struct perf_event *event) | ||
388 | { | ||
389 | struct extra_reg *er; | ||
390 | |||
391 | event->hw.extra_reg = 0; | ||
392 | event->hw.extra_config = 0; | ||
393 | |||
394 | if (!x86_pmu.extra_regs) | ||
395 | return 0; | ||
396 | |||
397 | for (er = x86_pmu.extra_regs; er->msr; er++) { | ||
398 | if (er->event != (config & er->config_mask)) | ||
399 | continue; | ||
400 | if (event->attr.config1 & ~er->valid_mask) | ||
401 | return -EINVAL; | ||
402 | event->hw.extra_reg = er->msr; | ||
403 | event->hw.extra_config = event->attr.config1; | ||
404 | break; | ||
405 | } | ||
406 | return 0; | ||
407 | } | ||
408 | |||
324 | static atomic_t active_events; | 409 | static atomic_t active_events; |
325 | static DEFINE_MUTEX(pmc_reserve_mutex); | 410 | static DEFINE_MUTEX(pmc_reserve_mutex); |
326 | 411 | ||
@@ -331,12 +416,12 @@ static bool reserve_pmc_hardware(void) | |||
331 | int i; | 416 | int i; |
332 | 417 | ||
333 | for (i = 0; i < x86_pmu.num_counters; i++) { | 418 | for (i = 0; i < x86_pmu.num_counters; i++) { |
334 | if (!reserve_perfctr_nmi(x86_pmu.perfctr + i)) | 419 | if (!reserve_perfctr_nmi(x86_pmu_event_addr(i))) |
335 | goto perfctr_fail; | 420 | goto perfctr_fail; |
336 | } | 421 | } |
337 | 422 | ||
338 | for (i = 0; i < x86_pmu.num_counters; i++) { | 423 | for (i = 0; i < x86_pmu.num_counters; i++) { |
339 | if (!reserve_evntsel_nmi(x86_pmu.eventsel + i)) | 424 | if (!reserve_evntsel_nmi(x86_pmu_config_addr(i))) |
340 | goto eventsel_fail; | 425 | goto eventsel_fail; |
341 | } | 426 | } |
342 | 427 | ||
@@ -344,13 +429,13 @@ static bool reserve_pmc_hardware(void) | |||
344 | 429 | ||
345 | eventsel_fail: | 430 | eventsel_fail: |
346 | for (i--; i >= 0; i--) | 431 | for (i--; i >= 0; i--) |
347 | release_evntsel_nmi(x86_pmu.eventsel + i); | 432 | release_evntsel_nmi(x86_pmu_config_addr(i)); |
348 | 433 | ||
349 | i = x86_pmu.num_counters; | 434 | i = x86_pmu.num_counters; |
350 | 435 | ||
351 | perfctr_fail: | 436 | perfctr_fail: |
352 | for (i--; i >= 0; i--) | 437 | for (i--; i >= 0; i--) |
353 | release_perfctr_nmi(x86_pmu.perfctr + i); | 438 | release_perfctr_nmi(x86_pmu_event_addr(i)); |
354 | 439 | ||
355 | return false; | 440 | return false; |
356 | } | 441 | } |
@@ -360,8 +445,8 @@ static void release_pmc_hardware(void) | |||
360 | int i; | 445 | int i; |
361 | 446 | ||
362 | for (i = 0; i < x86_pmu.num_counters; i++) { | 447 | for (i = 0; i < x86_pmu.num_counters; i++) { |
363 | release_perfctr_nmi(x86_pmu.perfctr + i); | 448 | release_perfctr_nmi(x86_pmu_event_addr(i)); |
364 | release_evntsel_nmi(x86_pmu.eventsel + i); | 449 | release_evntsel_nmi(x86_pmu_config_addr(i)); |
365 | } | 450 | } |
366 | } | 451 | } |
367 | 452 | ||
@@ -382,7 +467,7 @@ static bool check_hw_exists(void) | |||
382 | * complain and bail. | 467 | * complain and bail. |
383 | */ | 468 | */ |
384 | for (i = 0; i < x86_pmu.num_counters; i++) { | 469 | for (i = 0; i < x86_pmu.num_counters; i++) { |
385 | reg = x86_pmu.eventsel + i; | 470 | reg = x86_pmu_config_addr(i); |
386 | ret = rdmsrl_safe(reg, &val); | 471 | ret = rdmsrl_safe(reg, &val); |
387 | if (ret) | 472 | if (ret) |
388 | goto msr_fail; | 473 | goto msr_fail; |
@@ -407,8 +492,8 @@ static bool check_hw_exists(void) | |||
407 | * that don't trap on the MSR access and always return 0s. | 492 | * that don't trap on the MSR access and always return 0s. |
408 | */ | 493 | */ |
409 | val = 0xabcdUL; | 494 | val = 0xabcdUL; |
410 | ret = checking_wrmsrl(x86_pmu.perfctr, val); | 495 | ret = checking_wrmsrl(x86_pmu_event_addr(0), val); |
411 | ret |= rdmsrl_safe(x86_pmu.perfctr, &val_new); | 496 | ret |= rdmsrl_safe(x86_pmu_event_addr(0), &val_new); |
412 | if (ret || val != val_new) | 497 | if (ret || val != val_new) |
413 | goto msr_fail; | 498 | goto msr_fail; |
414 | 499 | ||
@@ -442,8 +527,9 @@ static inline int x86_pmu_initialized(void) | |||
442 | } | 527 | } |
443 | 528 | ||
444 | static inline int | 529 | static inline int |
445 | set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event_attr *attr) | 530 | set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event *event) |
446 | { | 531 | { |
532 | struct perf_event_attr *attr = &event->attr; | ||
447 | unsigned int cache_type, cache_op, cache_result; | 533 | unsigned int cache_type, cache_op, cache_result; |
448 | u64 config, val; | 534 | u64 config, val; |
449 | 535 | ||
@@ -470,8 +556,8 @@ set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event_attr *attr) | |||
470 | return -EINVAL; | 556 | return -EINVAL; |
471 | 557 | ||
472 | hwc->config |= val; | 558 | hwc->config |= val; |
473 | 559 | attr->config1 = hw_cache_extra_regs[cache_type][cache_op][cache_result]; | |
474 | return 0; | 560 | return x86_pmu_extra_regs(val, event); |
475 | } | 561 | } |
476 | 562 | ||
477 | static int x86_setup_perfctr(struct perf_event *event) | 563 | static int x86_setup_perfctr(struct perf_event *event) |
@@ -496,10 +582,10 @@ static int x86_setup_perfctr(struct perf_event *event) | |||
496 | } | 582 | } |
497 | 583 | ||
498 | if (attr->type == PERF_TYPE_RAW) | 584 | if (attr->type == PERF_TYPE_RAW) |
499 | return 0; | 585 | return x86_pmu_extra_regs(event->attr.config, event); |
500 | 586 | ||
501 | if (attr->type == PERF_TYPE_HW_CACHE) | 587 | if (attr->type == PERF_TYPE_HW_CACHE) |
502 | return set_ext_hw_attr(hwc, attr); | 588 | return set_ext_hw_attr(hwc, event); |
503 | 589 | ||
504 | if (attr->config >= x86_pmu.max_events) | 590 | if (attr->config >= x86_pmu.max_events) |
505 | return -EINVAL; | 591 | return -EINVAL; |
@@ -617,11 +703,11 @@ static void x86_pmu_disable_all(void) | |||
617 | 703 | ||
618 | if (!test_bit(idx, cpuc->active_mask)) | 704 | if (!test_bit(idx, cpuc->active_mask)) |
619 | continue; | 705 | continue; |
620 | rdmsrl(x86_pmu.eventsel + idx, val); | 706 | rdmsrl(x86_pmu_config_addr(idx), val); |
621 | if (!(val & ARCH_PERFMON_EVENTSEL_ENABLE)) | 707 | if (!(val & ARCH_PERFMON_EVENTSEL_ENABLE)) |
622 | continue; | 708 | continue; |
623 | val &= ~ARCH_PERFMON_EVENTSEL_ENABLE; | 709 | val &= ~ARCH_PERFMON_EVENTSEL_ENABLE; |
624 | wrmsrl(x86_pmu.eventsel + idx, val); | 710 | wrmsrl(x86_pmu_config_addr(idx), val); |
625 | } | 711 | } |
626 | } | 712 | } |
627 | 713 | ||
@@ -642,21 +728,26 @@ static void x86_pmu_disable(struct pmu *pmu) | |||
642 | x86_pmu.disable_all(); | 728 | x86_pmu.disable_all(); |
643 | } | 729 | } |
644 | 730 | ||
731 | static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc, | ||
732 | u64 enable_mask) | ||
733 | { | ||
734 | if (hwc->extra_reg) | ||
735 | wrmsrl(hwc->extra_reg, hwc->extra_config); | ||
736 | wrmsrl(hwc->config_base, hwc->config | enable_mask); | ||
737 | } | ||
738 | |||
645 | static void x86_pmu_enable_all(int added) | 739 | static void x86_pmu_enable_all(int added) |
646 | { | 740 | { |
647 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 741 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
648 | int idx; | 742 | int idx; |
649 | 743 | ||
650 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { | 744 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { |
651 | struct perf_event *event = cpuc->events[idx]; | 745 | struct hw_perf_event *hwc = &cpuc->events[idx]->hw; |
652 | u64 val; | ||
653 | 746 | ||
654 | if (!test_bit(idx, cpuc->active_mask)) | 747 | if (!test_bit(idx, cpuc->active_mask)) |
655 | continue; | 748 | continue; |
656 | 749 | ||
657 | val = event->hw.config; | 750 | __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE); |
658 | val |= ARCH_PERFMON_EVENTSEL_ENABLE; | ||
659 | wrmsrl(x86_pmu.eventsel + idx, val); | ||
660 | } | 751 | } |
661 | } | 752 | } |
662 | 753 | ||
@@ -821,15 +912,10 @@ static inline void x86_assign_hw_event(struct perf_event *event, | |||
821 | hwc->event_base = 0; | 912 | hwc->event_base = 0; |
822 | } else if (hwc->idx >= X86_PMC_IDX_FIXED) { | 913 | } else if (hwc->idx >= X86_PMC_IDX_FIXED) { |
823 | hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL; | 914 | hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL; |
824 | /* | 915 | hwc->event_base = MSR_ARCH_PERFMON_FIXED_CTR0; |
825 | * We set it so that event_base + idx in wrmsr/rdmsr maps to | ||
826 | * MSR_ARCH_PERFMON_FIXED_CTR0 ... CTR2: | ||
827 | */ | ||
828 | hwc->event_base = | ||
829 | MSR_ARCH_PERFMON_FIXED_CTR0 - X86_PMC_IDX_FIXED; | ||
830 | } else { | 916 | } else { |
831 | hwc->config_base = x86_pmu.eventsel; | 917 | hwc->config_base = x86_pmu_config_addr(hwc->idx); |
832 | hwc->event_base = x86_pmu.perfctr; | 918 | hwc->event_base = x86_pmu_event_addr(hwc->idx); |
833 | } | 919 | } |
834 | } | 920 | } |
835 | 921 | ||
@@ -915,17 +1001,11 @@ static void x86_pmu_enable(struct pmu *pmu) | |||
915 | x86_pmu.enable_all(added); | 1001 | x86_pmu.enable_all(added); |
916 | } | 1002 | } |
917 | 1003 | ||
918 | static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc, | ||
919 | u64 enable_mask) | ||
920 | { | ||
921 | wrmsrl(hwc->config_base + hwc->idx, hwc->config | enable_mask); | ||
922 | } | ||
923 | |||
924 | static inline void x86_pmu_disable_event(struct perf_event *event) | 1004 | static inline void x86_pmu_disable_event(struct perf_event *event) |
925 | { | 1005 | { |
926 | struct hw_perf_event *hwc = &event->hw; | 1006 | struct hw_perf_event *hwc = &event->hw; |
927 | 1007 | ||
928 | wrmsrl(hwc->config_base + hwc->idx, hwc->config); | 1008 | wrmsrl(hwc->config_base, hwc->config); |
929 | } | 1009 | } |
930 | 1010 | ||
931 | static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left); | 1011 | static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left); |
@@ -978,7 +1058,7 @@ x86_perf_event_set_period(struct perf_event *event) | |||
978 | */ | 1058 | */ |
979 | local64_set(&hwc->prev_count, (u64)-left); | 1059 | local64_set(&hwc->prev_count, (u64)-left); |
980 | 1060 | ||
981 | wrmsrl(hwc->event_base + idx, (u64)(-left) & x86_pmu.cntval_mask); | 1061 | wrmsrl(hwc->event_base, (u64)(-left) & x86_pmu.cntval_mask); |
982 | 1062 | ||
983 | /* | 1063 | /* |
984 | * Due to erratum on certan cpu we need | 1064 | * Due to erratum on certan cpu we need |
@@ -986,7 +1066,7 @@ x86_perf_event_set_period(struct perf_event *event) | |||
986 | * is updated properly | 1066 | * is updated properly |
987 | */ | 1067 | */ |
988 | if (x86_pmu.perfctr_second_write) { | 1068 | if (x86_pmu.perfctr_second_write) { |
989 | wrmsrl(hwc->event_base + idx, | 1069 | wrmsrl(hwc->event_base, |
990 | (u64)(-left) & x86_pmu.cntval_mask); | 1070 | (u64)(-left) & x86_pmu.cntval_mask); |
991 | } | 1071 | } |
992 | 1072 | ||
@@ -1029,7 +1109,7 @@ static int x86_pmu_add(struct perf_event *event, int flags) | |||
1029 | 1109 | ||
1030 | /* | 1110 | /* |
1031 | * If group events scheduling transaction was started, | 1111 | * If group events scheduling transaction was started, |
1032 | * skip the schedulability test here, it will be peformed | 1112 | * skip the schedulability test here, it will be performed |
1033 | * at commit time (->commit_txn) as a whole | 1113 | * at commit time (->commit_txn) as a whole |
1034 | */ | 1114 | */ |
1035 | if (cpuc->group_flag & PERF_EVENT_TXN) | 1115 | if (cpuc->group_flag & PERF_EVENT_TXN) |
@@ -1113,8 +1193,8 @@ void perf_event_print_debug(void) | |||
1113 | pr_info("CPU#%d: active: %016llx\n", cpu, *(u64 *)cpuc->active_mask); | 1193 | pr_info("CPU#%d: active: %016llx\n", cpu, *(u64 *)cpuc->active_mask); |
1114 | 1194 | ||
1115 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { | 1195 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { |
1116 | rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl); | 1196 | rdmsrl(x86_pmu_config_addr(idx), pmc_ctrl); |
1117 | rdmsrl(x86_pmu.perfctr + idx, pmc_count); | 1197 | rdmsrl(x86_pmu_event_addr(idx), pmc_count); |
1118 | 1198 | ||
1119 | prev_left = per_cpu(pmc_prev_left[idx], cpu); | 1199 | prev_left = per_cpu(pmc_prev_left[idx], cpu); |
1120 | 1200 | ||
@@ -1389,7 +1469,7 @@ static void __init pmu_check_apic(void) | |||
1389 | pr_info("no hardware sampling interrupt available.\n"); | 1469 | pr_info("no hardware sampling interrupt available.\n"); |
1390 | } | 1470 | } |
1391 | 1471 | ||
1392 | int __init init_hw_perf_events(void) | 1472 | static int __init init_hw_perf_events(void) |
1393 | { | 1473 | { |
1394 | struct event_constraint *c; | 1474 | struct event_constraint *c; |
1395 | int err; | 1475 | int err; |
@@ -1608,7 +1688,7 @@ out: | |||
1608 | return ret; | 1688 | return ret; |
1609 | } | 1689 | } |
1610 | 1690 | ||
1611 | int x86_pmu_event_init(struct perf_event *event) | 1691 | static int x86_pmu_event_init(struct perf_event *event) |
1612 | { | 1692 | { |
1613 | struct pmu *tmp; | 1693 | struct pmu *tmp; |
1614 | int err; | 1694 | int err; |
@@ -1710,7 +1790,7 @@ perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs) | |||
1710 | 1790 | ||
1711 | perf_callchain_store(entry, regs->ip); | 1791 | perf_callchain_store(entry, regs->ip); |
1712 | 1792 | ||
1713 | dump_trace(NULL, regs, NULL, &backtrace_ops, entry); | 1793 | dump_trace(NULL, regs, NULL, 0, &backtrace_ops, entry); |
1714 | } | 1794 | } |
1715 | 1795 | ||
1716 | #ifdef CONFIG_COMPAT | 1796 | #ifdef CONFIG_COMPAT |
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c index 67e2202a6039..461f62bbd774 100644 --- a/arch/x86/kernel/cpu/perf_event_amd.c +++ b/arch/x86/kernel/cpu/perf_event_amd.c | |||
@@ -127,6 +127,11 @@ static int amd_pmu_hw_config(struct perf_event *event) | |||
127 | /* | 127 | /* |
128 | * AMD64 events are detected based on their event codes. | 128 | * AMD64 events are detected based on their event codes. |
129 | */ | 129 | */ |
130 | static inline unsigned int amd_get_event_code(struct hw_perf_event *hwc) | ||
131 | { | ||
132 | return ((hwc->config >> 24) & 0x0f00) | (hwc->config & 0x00ff); | ||
133 | } | ||
134 | |||
130 | static inline int amd_is_nb_event(struct hw_perf_event *hwc) | 135 | static inline int amd_is_nb_event(struct hw_perf_event *hwc) |
131 | { | 136 | { |
132 | return (hwc->config & 0xe0) == 0xe0; | 137 | return (hwc->config & 0xe0) == 0xe0; |
@@ -385,13 +390,181 @@ static __initconst const struct x86_pmu amd_pmu = { | |||
385 | .cpu_dead = amd_pmu_cpu_dead, | 390 | .cpu_dead = amd_pmu_cpu_dead, |
386 | }; | 391 | }; |
387 | 392 | ||
393 | /* AMD Family 15h */ | ||
394 | |||
395 | #define AMD_EVENT_TYPE_MASK 0x000000F0ULL | ||
396 | |||
397 | #define AMD_EVENT_FP 0x00000000ULL ... 0x00000010ULL | ||
398 | #define AMD_EVENT_LS 0x00000020ULL ... 0x00000030ULL | ||
399 | #define AMD_EVENT_DC 0x00000040ULL ... 0x00000050ULL | ||
400 | #define AMD_EVENT_CU 0x00000060ULL ... 0x00000070ULL | ||
401 | #define AMD_EVENT_IC_DE 0x00000080ULL ... 0x00000090ULL | ||
402 | #define AMD_EVENT_EX_LS 0x000000C0ULL | ||
403 | #define AMD_EVENT_DE 0x000000D0ULL | ||
404 | #define AMD_EVENT_NB 0x000000E0ULL ... 0x000000F0ULL | ||
405 | |||
406 | /* | ||
407 | * AMD family 15h event code/PMC mappings: | ||
408 | * | ||
409 | * type = event_code & 0x0F0: | ||
410 | * | ||
411 | * 0x000 FP PERF_CTL[5:3] | ||
412 | * 0x010 FP PERF_CTL[5:3] | ||
413 | * 0x020 LS PERF_CTL[5:0] | ||
414 | * 0x030 LS PERF_CTL[5:0] | ||
415 | * 0x040 DC PERF_CTL[5:0] | ||
416 | * 0x050 DC PERF_CTL[5:0] | ||
417 | * 0x060 CU PERF_CTL[2:0] | ||
418 | * 0x070 CU PERF_CTL[2:0] | ||
419 | * 0x080 IC/DE PERF_CTL[2:0] | ||
420 | * 0x090 IC/DE PERF_CTL[2:0] | ||
421 | * 0x0A0 --- | ||
422 | * 0x0B0 --- | ||
423 | * 0x0C0 EX/LS PERF_CTL[5:0] | ||
424 | * 0x0D0 DE PERF_CTL[2:0] | ||
425 | * 0x0E0 NB NB_PERF_CTL[3:0] | ||
426 | * 0x0F0 NB NB_PERF_CTL[3:0] | ||
427 | * | ||
428 | * Exceptions: | ||
429 | * | ||
430 | * 0x003 FP PERF_CTL[3] | ||
431 | * 0x00B FP PERF_CTL[3] | ||
432 | * 0x00D FP PERF_CTL[3] | ||
433 | * 0x023 DE PERF_CTL[2:0] | ||
434 | * 0x02D LS PERF_CTL[3] | ||
435 | * 0x02E LS PERF_CTL[3,0] | ||
436 | * 0x043 CU PERF_CTL[2:0] | ||
437 | * 0x045 CU PERF_CTL[2:0] | ||
438 | * 0x046 CU PERF_CTL[2:0] | ||
439 | * 0x054 CU PERF_CTL[2:0] | ||
440 | * 0x055 CU PERF_CTL[2:0] | ||
441 | * 0x08F IC PERF_CTL[0] | ||
442 | * 0x187 DE PERF_CTL[0] | ||
443 | * 0x188 DE PERF_CTL[0] | ||
444 | * 0x0DB EX PERF_CTL[5:0] | ||
445 | * 0x0DC LS PERF_CTL[5:0] | ||
446 | * 0x0DD LS PERF_CTL[5:0] | ||
447 | * 0x0DE LS PERF_CTL[5:0] | ||
448 | * 0x0DF LS PERF_CTL[5:0] | ||
449 | * 0x1D6 EX PERF_CTL[5:0] | ||
450 | * 0x1D8 EX PERF_CTL[5:0] | ||
451 | */ | ||
452 | |||
453 | static struct event_constraint amd_f15_PMC0 = EVENT_CONSTRAINT(0, 0x01, 0); | ||
454 | static struct event_constraint amd_f15_PMC20 = EVENT_CONSTRAINT(0, 0x07, 0); | ||
455 | static struct event_constraint amd_f15_PMC3 = EVENT_CONSTRAINT(0, 0x08, 0); | ||
456 | static struct event_constraint amd_f15_PMC30 = EVENT_CONSTRAINT(0, 0x09, 0); | ||
457 | static struct event_constraint amd_f15_PMC50 = EVENT_CONSTRAINT(0, 0x3F, 0); | ||
458 | static struct event_constraint amd_f15_PMC53 = EVENT_CONSTRAINT(0, 0x38, 0); | ||
459 | |||
460 | static struct event_constraint * | ||
461 | amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, struct perf_event *event) | ||
462 | { | ||
463 | unsigned int event_code = amd_get_event_code(&event->hw); | ||
464 | |||
465 | switch (event_code & AMD_EVENT_TYPE_MASK) { | ||
466 | case AMD_EVENT_FP: | ||
467 | switch (event_code) { | ||
468 | case 0x003: | ||
469 | case 0x00B: | ||
470 | case 0x00D: | ||
471 | return &amd_f15_PMC3; | ||
472 | default: | ||
473 | return &amd_f15_PMC53; | ||
474 | } | ||
475 | case AMD_EVENT_LS: | ||
476 | case AMD_EVENT_DC: | ||
477 | case AMD_EVENT_EX_LS: | ||
478 | switch (event_code) { | ||
479 | case 0x023: | ||
480 | case 0x043: | ||
481 | case 0x045: | ||
482 | case 0x046: | ||
483 | case 0x054: | ||
484 | case 0x055: | ||
485 | return &amd_f15_PMC20; | ||
486 | case 0x02D: | ||
487 | return &amd_f15_PMC3; | ||
488 | case 0x02E: | ||
489 | return &amd_f15_PMC30; | ||
490 | default: | ||
491 | return &amd_f15_PMC50; | ||
492 | } | ||
493 | case AMD_EVENT_CU: | ||
494 | case AMD_EVENT_IC_DE: | ||
495 | case AMD_EVENT_DE: | ||
496 | switch (event_code) { | ||
497 | case 0x08F: | ||
498 | case 0x187: | ||
499 | case 0x188: | ||
500 | return &amd_f15_PMC0; | ||
501 | case 0x0DB ... 0x0DF: | ||
502 | case 0x1D6: | ||
503 | case 0x1D8: | ||
504 | return &amd_f15_PMC50; | ||
505 | default: | ||
506 | return &amd_f15_PMC20; | ||
507 | } | ||
508 | case AMD_EVENT_NB: | ||
509 | /* not yet implemented */ | ||
510 | return &emptyconstraint; | ||
511 | default: | ||
512 | return &emptyconstraint; | ||
513 | } | ||
514 | } | ||
515 | |||
516 | static __initconst const struct x86_pmu amd_pmu_f15h = { | ||
517 | .name = "AMD Family 15h", | ||
518 | .handle_irq = x86_pmu_handle_irq, | ||
519 | .disable_all = x86_pmu_disable_all, | ||
520 | .enable_all = x86_pmu_enable_all, | ||
521 | .enable = x86_pmu_enable_event, | ||
522 | .disable = x86_pmu_disable_event, | ||
523 | .hw_config = amd_pmu_hw_config, | ||
524 | .schedule_events = x86_schedule_events, | ||
525 | .eventsel = MSR_F15H_PERF_CTL, | ||
526 | .perfctr = MSR_F15H_PERF_CTR, | ||
527 | .event_map = amd_pmu_event_map, | ||
528 | .max_events = ARRAY_SIZE(amd_perfmon_event_map), | ||
529 | .num_counters = 6, | ||
530 | .cntval_bits = 48, | ||
531 | .cntval_mask = (1ULL << 48) - 1, | ||
532 | .apic = 1, | ||
533 | /* use highest bit to detect overflow */ | ||
534 | .max_period = (1ULL << 47) - 1, | ||
535 | .get_event_constraints = amd_get_event_constraints_f15h, | ||
536 | /* nortbridge counters not yet implemented: */ | ||
537 | #if 0 | ||
538 | .put_event_constraints = amd_put_event_constraints, | ||
539 | |||
540 | .cpu_prepare = amd_pmu_cpu_prepare, | ||
541 | .cpu_starting = amd_pmu_cpu_starting, | ||
542 | .cpu_dead = amd_pmu_cpu_dead, | ||
543 | #endif | ||
544 | }; | ||
545 | |||
388 | static __init int amd_pmu_init(void) | 546 | static __init int amd_pmu_init(void) |
389 | { | 547 | { |
390 | /* Performance-monitoring supported from K7 and later: */ | 548 | /* Performance-monitoring supported from K7 and later: */ |
391 | if (boot_cpu_data.x86 < 6) | 549 | if (boot_cpu_data.x86 < 6) |
392 | return -ENODEV; | 550 | return -ENODEV; |
393 | 551 | ||
394 | x86_pmu = amd_pmu; | 552 | /* |
553 | * If core performance counter extensions exists, it must be | ||
554 | * family 15h, otherwise fail. See x86_pmu_addr_offset(). | ||
555 | */ | ||
556 | switch (boot_cpu_data.x86) { | ||
557 | case 0x15: | ||
558 | if (!cpu_has_perfctr_core) | ||
559 | return -ENODEV; | ||
560 | x86_pmu = amd_pmu_f15h; | ||
561 | break; | ||
562 | default: | ||
563 | if (cpu_has_perfctr_core) | ||
564 | return -ENODEV; | ||
565 | x86_pmu = amd_pmu; | ||
566 | break; | ||
567 | } | ||
395 | 568 | ||
396 | /* Events are common for all AMDs */ | 569 | /* Events are common for all AMDs */ |
397 | memcpy(hw_cache_event_ids, amd_hw_cache_event_ids, | 570 | memcpy(hw_cache_event_ids, amd_hw_cache_event_ids, |
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 008835c1d79c..8fc2b2cee1da 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c | |||
@@ -1,5 +1,27 @@ | |||
1 | #ifdef CONFIG_CPU_SUP_INTEL | 1 | #ifdef CONFIG_CPU_SUP_INTEL |
2 | 2 | ||
3 | #define MAX_EXTRA_REGS 2 | ||
4 | |||
5 | /* | ||
6 | * Per register state. | ||
7 | */ | ||
8 | struct er_account { | ||
9 | int ref; /* reference count */ | ||
10 | unsigned int extra_reg; /* extra MSR number */ | ||
11 | u64 extra_config; /* extra MSR config */ | ||
12 | }; | ||
13 | |||
14 | /* | ||
15 | * Per core state | ||
16 | * This used to coordinate shared registers for HT threads. | ||
17 | */ | ||
18 | struct intel_percore { | ||
19 | raw_spinlock_t lock; /* protect structure */ | ||
20 | struct er_account regs[MAX_EXTRA_REGS]; | ||
21 | int refcnt; /* number of threads */ | ||
22 | unsigned core_id; | ||
23 | }; | ||
24 | |||
3 | /* | 25 | /* |
4 | * Intel PerfMon, used on Core and later. | 26 | * Intel PerfMon, used on Core and later. |
5 | */ | 27 | */ |
@@ -64,6 +86,18 @@ static struct event_constraint intel_nehalem_event_constraints[] = | |||
64 | EVENT_CONSTRAINT_END | 86 | EVENT_CONSTRAINT_END |
65 | }; | 87 | }; |
66 | 88 | ||
89 | static struct extra_reg intel_nehalem_extra_regs[] = | ||
90 | { | ||
91 | INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff), | ||
92 | EVENT_EXTRA_END | ||
93 | }; | ||
94 | |||
95 | static struct event_constraint intel_nehalem_percore_constraints[] = | ||
96 | { | ||
97 | INTEL_EVENT_CONSTRAINT(0xb7, 0), | ||
98 | EVENT_CONSTRAINT_END | ||
99 | }; | ||
100 | |||
67 | static struct event_constraint intel_westmere_event_constraints[] = | 101 | static struct event_constraint intel_westmere_event_constraints[] = |
68 | { | 102 | { |
69 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ | 103 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ |
@@ -76,6 +110,33 @@ static struct event_constraint intel_westmere_event_constraints[] = | |||
76 | EVENT_CONSTRAINT_END | 110 | EVENT_CONSTRAINT_END |
77 | }; | 111 | }; |
78 | 112 | ||
113 | static struct event_constraint intel_snb_event_constraints[] = | ||
114 | { | ||
115 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ | ||
116 | FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ | ||
117 | /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */ | ||
118 | INTEL_EVENT_CONSTRAINT(0x48, 0x4), /* L1D_PEND_MISS.PENDING */ | ||
119 | INTEL_EVENT_CONSTRAINT(0xb7, 0x1), /* OFF_CORE_RESPONSE_0 */ | ||
120 | INTEL_EVENT_CONSTRAINT(0xbb, 0x8), /* OFF_CORE_RESPONSE_1 */ | ||
121 | INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */ | ||
122 | INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */ | ||
123 | EVENT_CONSTRAINT_END | ||
124 | }; | ||
125 | |||
126 | static struct extra_reg intel_westmere_extra_regs[] = | ||
127 | { | ||
128 | INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff), | ||
129 | INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0xffff), | ||
130 | EVENT_EXTRA_END | ||
131 | }; | ||
132 | |||
133 | static struct event_constraint intel_westmere_percore_constraints[] = | ||
134 | { | ||
135 | INTEL_EVENT_CONSTRAINT(0xb7, 0), | ||
136 | INTEL_EVENT_CONSTRAINT(0xbb, 0), | ||
137 | EVENT_CONSTRAINT_END | ||
138 | }; | ||
139 | |||
79 | static struct event_constraint intel_gen_event_constraints[] = | 140 | static struct event_constraint intel_gen_event_constraints[] = |
80 | { | 141 | { |
81 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ | 142 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ |
@@ -89,6 +150,106 @@ static u64 intel_pmu_event_map(int hw_event) | |||
89 | return intel_perfmon_event_map[hw_event]; | 150 | return intel_perfmon_event_map[hw_event]; |
90 | } | 151 | } |
91 | 152 | ||
153 | static __initconst const u64 snb_hw_cache_event_ids | ||
154 | [PERF_COUNT_HW_CACHE_MAX] | ||
155 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
156 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | ||
157 | { | ||
158 | [ C(L1D) ] = { | ||
159 | [ C(OP_READ) ] = { | ||
160 | [ C(RESULT_ACCESS) ] = 0xf1d0, /* MEM_UOP_RETIRED.LOADS */ | ||
161 | [ C(RESULT_MISS) ] = 0x0151, /* L1D.REPLACEMENT */ | ||
162 | }, | ||
163 | [ C(OP_WRITE) ] = { | ||
164 | [ C(RESULT_ACCESS) ] = 0xf2d0, /* MEM_UOP_RETIRED.STORES */ | ||
165 | [ C(RESULT_MISS) ] = 0x0851, /* L1D.ALL_M_REPLACEMENT */ | ||
166 | }, | ||
167 | [ C(OP_PREFETCH) ] = { | ||
168 | [ C(RESULT_ACCESS) ] = 0x0, | ||
169 | [ C(RESULT_MISS) ] = 0x024e, /* HW_PRE_REQ.DL1_MISS */ | ||
170 | }, | ||
171 | }, | ||
172 | [ C(L1I ) ] = { | ||
173 | [ C(OP_READ) ] = { | ||
174 | [ C(RESULT_ACCESS) ] = 0x0, | ||
175 | [ C(RESULT_MISS) ] = 0x0280, /* ICACHE.MISSES */ | ||
176 | }, | ||
177 | [ C(OP_WRITE) ] = { | ||
178 | [ C(RESULT_ACCESS) ] = -1, | ||
179 | [ C(RESULT_MISS) ] = -1, | ||
180 | }, | ||
181 | [ C(OP_PREFETCH) ] = { | ||
182 | [ C(RESULT_ACCESS) ] = 0x0, | ||
183 | [ C(RESULT_MISS) ] = 0x0, | ||
184 | }, | ||
185 | }, | ||
186 | [ C(LL ) ] = { | ||
187 | /* | ||
188 | * TBD: Need Off-core Response Performance Monitoring support | ||
189 | */ | ||
190 | [ C(OP_READ) ] = { | ||
191 | /* OFFCORE_RESPONSE_0.ANY_DATA.LOCAL_CACHE */ | ||
192 | [ C(RESULT_ACCESS) ] = 0x01b7, | ||
193 | /* OFFCORE_RESPONSE_1.ANY_DATA.ANY_LLC_MISS */ | ||
194 | [ C(RESULT_MISS) ] = 0x01bb, | ||
195 | }, | ||
196 | [ C(OP_WRITE) ] = { | ||
197 | /* OFFCORE_RESPONSE_0.ANY_RFO.LOCAL_CACHE */ | ||
198 | [ C(RESULT_ACCESS) ] = 0x01b7, | ||
199 | /* OFFCORE_RESPONSE_1.ANY_RFO.ANY_LLC_MISS */ | ||
200 | [ C(RESULT_MISS) ] = 0x01bb, | ||
201 | }, | ||
202 | [ C(OP_PREFETCH) ] = { | ||
203 | /* OFFCORE_RESPONSE_0.PREFETCH.LOCAL_CACHE */ | ||
204 | [ C(RESULT_ACCESS) ] = 0x01b7, | ||
205 | /* OFFCORE_RESPONSE_1.PREFETCH.ANY_LLC_MISS */ | ||
206 | [ C(RESULT_MISS) ] = 0x01bb, | ||
207 | }, | ||
208 | }, | ||
209 | [ C(DTLB) ] = { | ||
210 | [ C(OP_READ) ] = { | ||
211 | [ C(RESULT_ACCESS) ] = 0x81d0, /* MEM_UOP_RETIRED.ALL_LOADS */ | ||
212 | [ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.CAUSES_A_WALK */ | ||
213 | }, | ||
214 | [ C(OP_WRITE) ] = { | ||
215 | [ C(RESULT_ACCESS) ] = 0x82d0, /* MEM_UOP_RETIRED.ALL_STORES */ | ||
216 | [ C(RESULT_MISS) ] = 0x0149, /* DTLB_STORE_MISSES.MISS_CAUSES_A_WALK */ | ||
217 | }, | ||
218 | [ C(OP_PREFETCH) ] = { | ||
219 | [ C(RESULT_ACCESS) ] = 0x0, | ||
220 | [ C(RESULT_MISS) ] = 0x0, | ||
221 | }, | ||
222 | }, | ||
223 | [ C(ITLB) ] = { | ||
224 | [ C(OP_READ) ] = { | ||
225 | [ C(RESULT_ACCESS) ] = 0x1085, /* ITLB_MISSES.STLB_HIT */ | ||
226 | [ C(RESULT_MISS) ] = 0x0185, /* ITLB_MISSES.CAUSES_A_WALK */ | ||
227 | }, | ||
228 | [ C(OP_WRITE) ] = { | ||
229 | [ C(RESULT_ACCESS) ] = -1, | ||
230 | [ C(RESULT_MISS) ] = -1, | ||
231 | }, | ||
232 | [ C(OP_PREFETCH) ] = { | ||
233 | [ C(RESULT_ACCESS) ] = -1, | ||
234 | [ C(RESULT_MISS) ] = -1, | ||
235 | }, | ||
236 | }, | ||
237 | [ C(BPU ) ] = { | ||
238 | [ C(OP_READ) ] = { | ||
239 | [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */ | ||
240 | [ C(RESULT_MISS) ] = 0x00c5, /* BR_MISP_RETIRED.ALL_BRANCHES */ | ||
241 | }, | ||
242 | [ C(OP_WRITE) ] = { | ||
243 | [ C(RESULT_ACCESS) ] = -1, | ||
244 | [ C(RESULT_MISS) ] = -1, | ||
245 | }, | ||
246 | [ C(OP_PREFETCH) ] = { | ||
247 | [ C(RESULT_ACCESS) ] = -1, | ||
248 | [ C(RESULT_MISS) ] = -1, | ||
249 | }, | ||
250 | }, | ||
251 | }; | ||
252 | |||
92 | static __initconst const u64 westmere_hw_cache_event_ids | 253 | static __initconst const u64 westmere_hw_cache_event_ids |
93 | [PERF_COUNT_HW_CACHE_MAX] | 254 | [PERF_COUNT_HW_CACHE_MAX] |
94 | [PERF_COUNT_HW_CACHE_OP_MAX] | 255 | [PERF_COUNT_HW_CACHE_OP_MAX] |
@@ -124,16 +285,26 @@ static __initconst const u64 westmere_hw_cache_event_ids | |||
124 | }, | 285 | }, |
125 | [ C(LL ) ] = { | 286 | [ C(LL ) ] = { |
126 | [ C(OP_READ) ] = { | 287 | [ C(OP_READ) ] = { |
127 | [ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS */ | 288 | /* OFFCORE_RESPONSE_0.ANY_DATA.LOCAL_CACHE */ |
128 | [ C(RESULT_MISS) ] = 0x0224, /* L2_RQSTS.LD_MISS */ | 289 | [ C(RESULT_ACCESS) ] = 0x01b7, |
290 | /* OFFCORE_RESPONSE_1.ANY_DATA.ANY_LLC_MISS */ | ||
291 | [ C(RESULT_MISS) ] = 0x01bb, | ||
129 | }, | 292 | }, |
293 | /* | ||
294 | * Use RFO, not WRITEBACK, because a write miss would typically occur | ||
295 | * on RFO. | ||
296 | */ | ||
130 | [ C(OP_WRITE) ] = { | 297 | [ C(OP_WRITE) ] = { |
131 | [ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS */ | 298 | /* OFFCORE_RESPONSE_1.ANY_RFO.LOCAL_CACHE */ |
132 | [ C(RESULT_MISS) ] = 0x0824, /* L2_RQSTS.RFO_MISS */ | 299 | [ C(RESULT_ACCESS) ] = 0x01bb, |
300 | /* OFFCORE_RESPONSE_0.ANY_RFO.ANY_LLC_MISS */ | ||
301 | [ C(RESULT_MISS) ] = 0x01b7, | ||
133 | }, | 302 | }, |
134 | [ C(OP_PREFETCH) ] = { | 303 | [ C(OP_PREFETCH) ] = { |
135 | [ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference */ | 304 | /* OFFCORE_RESPONSE_0.PREFETCH.LOCAL_CACHE */ |
136 | [ C(RESULT_MISS) ] = 0x412e, /* LLC Misses */ | 305 | [ C(RESULT_ACCESS) ] = 0x01b7, |
306 | /* OFFCORE_RESPONSE_1.PREFETCH.ANY_LLC_MISS */ | ||
307 | [ C(RESULT_MISS) ] = 0x01bb, | ||
137 | }, | 308 | }, |
138 | }, | 309 | }, |
139 | [ C(DTLB) ] = { | 310 | [ C(DTLB) ] = { |
@@ -180,6 +351,39 @@ static __initconst const u64 westmere_hw_cache_event_ids | |||
180 | }, | 351 | }, |
181 | }; | 352 | }; |
182 | 353 | ||
354 | /* | ||
355 | * OFFCORE_RESPONSE MSR bits (subset), See IA32 SDM Vol 3 30.6.1.3 | ||
356 | */ | ||
357 | |||
358 | #define DMND_DATA_RD (1 << 0) | ||
359 | #define DMND_RFO (1 << 1) | ||
360 | #define DMND_WB (1 << 3) | ||
361 | #define PF_DATA_RD (1 << 4) | ||
362 | #define PF_DATA_RFO (1 << 5) | ||
363 | #define RESP_UNCORE_HIT (1 << 8) | ||
364 | #define RESP_MISS (0xf600) /* non uncore hit */ | ||
365 | |||
366 | static __initconst const u64 nehalem_hw_cache_extra_regs | ||
367 | [PERF_COUNT_HW_CACHE_MAX] | ||
368 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
369 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | ||
370 | { | ||
371 | [ C(LL ) ] = { | ||
372 | [ C(OP_READ) ] = { | ||
373 | [ C(RESULT_ACCESS) ] = DMND_DATA_RD|RESP_UNCORE_HIT, | ||
374 | [ C(RESULT_MISS) ] = DMND_DATA_RD|RESP_MISS, | ||
375 | }, | ||
376 | [ C(OP_WRITE) ] = { | ||
377 | [ C(RESULT_ACCESS) ] = DMND_RFO|DMND_WB|RESP_UNCORE_HIT, | ||
378 | [ C(RESULT_MISS) ] = DMND_RFO|DMND_WB|RESP_MISS, | ||
379 | }, | ||
380 | [ C(OP_PREFETCH) ] = { | ||
381 | [ C(RESULT_ACCESS) ] = PF_DATA_RD|PF_DATA_RFO|RESP_UNCORE_HIT, | ||
382 | [ C(RESULT_MISS) ] = PF_DATA_RD|PF_DATA_RFO|RESP_MISS, | ||
383 | }, | ||
384 | } | ||
385 | }; | ||
386 | |||
183 | static __initconst const u64 nehalem_hw_cache_event_ids | 387 | static __initconst const u64 nehalem_hw_cache_event_ids |
184 | [PERF_COUNT_HW_CACHE_MAX] | 388 | [PERF_COUNT_HW_CACHE_MAX] |
185 | [PERF_COUNT_HW_CACHE_OP_MAX] | 389 | [PERF_COUNT_HW_CACHE_OP_MAX] |
@@ -215,16 +419,26 @@ static __initconst const u64 nehalem_hw_cache_event_ids | |||
215 | }, | 419 | }, |
216 | [ C(LL ) ] = { | 420 | [ C(LL ) ] = { |
217 | [ C(OP_READ) ] = { | 421 | [ C(OP_READ) ] = { |
218 | [ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS */ | 422 | /* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */ |
219 | [ C(RESULT_MISS) ] = 0x0224, /* L2_RQSTS.LD_MISS */ | 423 | [ C(RESULT_ACCESS) ] = 0x01b7, |
424 | /* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */ | ||
425 | [ C(RESULT_MISS) ] = 0x01b7, | ||
220 | }, | 426 | }, |
427 | /* | ||
428 | * Use RFO, not WRITEBACK, because a write miss would typically occur | ||
429 | * on RFO. | ||
430 | */ | ||
221 | [ C(OP_WRITE) ] = { | 431 | [ C(OP_WRITE) ] = { |
222 | [ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS */ | 432 | /* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */ |
223 | [ C(RESULT_MISS) ] = 0x0824, /* L2_RQSTS.RFO_MISS */ | 433 | [ C(RESULT_ACCESS) ] = 0x01b7, |
434 | /* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */ | ||
435 | [ C(RESULT_MISS) ] = 0x01b7, | ||
224 | }, | 436 | }, |
225 | [ C(OP_PREFETCH) ] = { | 437 | [ C(OP_PREFETCH) ] = { |
226 | [ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference */ | 438 | /* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */ |
227 | [ C(RESULT_MISS) ] = 0x412e, /* LLC Misses */ | 439 | [ C(RESULT_ACCESS) ] = 0x01b7, |
440 | /* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */ | ||
441 | [ C(RESULT_MISS) ] = 0x01b7, | ||
228 | }, | 442 | }, |
229 | }, | 443 | }, |
230 | [ C(DTLB) ] = { | 444 | [ C(DTLB) ] = { |
@@ -691,8 +905,8 @@ static void intel_pmu_reset(void) | |||
691 | printk("clearing PMU state on CPU#%d\n", smp_processor_id()); | 905 | printk("clearing PMU state on CPU#%d\n", smp_processor_id()); |
692 | 906 | ||
693 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { | 907 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { |
694 | checking_wrmsrl(x86_pmu.eventsel + idx, 0ull); | 908 | checking_wrmsrl(x86_pmu_config_addr(idx), 0ull); |
695 | checking_wrmsrl(x86_pmu.perfctr + idx, 0ull); | 909 | checking_wrmsrl(x86_pmu_event_addr(idx), 0ull); |
696 | } | 910 | } |
697 | for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) | 911 | for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) |
698 | checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull); | 912 | checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull); |
@@ -794,6 +1008,67 @@ intel_bts_constraints(struct perf_event *event) | |||
794 | } | 1008 | } |
795 | 1009 | ||
796 | static struct event_constraint * | 1010 | static struct event_constraint * |
1011 | intel_percore_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) | ||
1012 | { | ||
1013 | struct hw_perf_event *hwc = &event->hw; | ||
1014 | unsigned int e = hwc->config & ARCH_PERFMON_EVENTSEL_EVENT; | ||
1015 | struct event_constraint *c; | ||
1016 | struct intel_percore *pc; | ||
1017 | struct er_account *era; | ||
1018 | int i; | ||
1019 | int free_slot; | ||
1020 | int found; | ||
1021 | |||
1022 | if (!x86_pmu.percore_constraints || hwc->extra_alloc) | ||
1023 | return NULL; | ||
1024 | |||
1025 | for (c = x86_pmu.percore_constraints; c->cmask; c++) { | ||
1026 | if (e != c->code) | ||
1027 | continue; | ||
1028 | |||
1029 | /* | ||
1030 | * Allocate resource per core. | ||
1031 | */ | ||
1032 | pc = cpuc->per_core; | ||
1033 | if (!pc) | ||
1034 | break; | ||
1035 | c = &emptyconstraint; | ||
1036 | raw_spin_lock(&pc->lock); | ||
1037 | free_slot = -1; | ||
1038 | found = 0; | ||
1039 | for (i = 0; i < MAX_EXTRA_REGS; i++) { | ||
1040 | era = &pc->regs[i]; | ||
1041 | if (era->ref > 0 && hwc->extra_reg == era->extra_reg) { | ||
1042 | /* Allow sharing same config */ | ||
1043 | if (hwc->extra_config == era->extra_config) { | ||
1044 | era->ref++; | ||
1045 | cpuc->percore_used = 1; | ||
1046 | hwc->extra_alloc = 1; | ||
1047 | c = NULL; | ||
1048 | } | ||
1049 | /* else conflict */ | ||
1050 | found = 1; | ||
1051 | break; | ||
1052 | } else if (era->ref == 0 && free_slot == -1) | ||
1053 | free_slot = i; | ||
1054 | } | ||
1055 | if (!found && free_slot != -1) { | ||
1056 | era = &pc->regs[free_slot]; | ||
1057 | era->ref = 1; | ||
1058 | era->extra_reg = hwc->extra_reg; | ||
1059 | era->extra_config = hwc->extra_config; | ||
1060 | cpuc->percore_used = 1; | ||
1061 | hwc->extra_alloc = 1; | ||
1062 | c = NULL; | ||
1063 | } | ||
1064 | raw_spin_unlock(&pc->lock); | ||
1065 | return c; | ||
1066 | } | ||
1067 | |||
1068 | return NULL; | ||
1069 | } | ||
1070 | |||
1071 | static struct event_constraint * | ||
797 | intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) | 1072 | intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) |
798 | { | 1073 | { |
799 | struct event_constraint *c; | 1074 | struct event_constraint *c; |
@@ -806,9 +1081,51 @@ intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event | |||
806 | if (c) | 1081 | if (c) |
807 | return c; | 1082 | return c; |
808 | 1083 | ||
1084 | c = intel_percore_constraints(cpuc, event); | ||
1085 | if (c) | ||
1086 | return c; | ||
1087 | |||
809 | return x86_get_event_constraints(cpuc, event); | 1088 | return x86_get_event_constraints(cpuc, event); |
810 | } | 1089 | } |
811 | 1090 | ||
1091 | static void intel_put_event_constraints(struct cpu_hw_events *cpuc, | ||
1092 | struct perf_event *event) | ||
1093 | { | ||
1094 | struct extra_reg *er; | ||
1095 | struct intel_percore *pc; | ||
1096 | struct er_account *era; | ||
1097 | struct hw_perf_event *hwc = &event->hw; | ||
1098 | int i, allref; | ||
1099 | |||
1100 | if (!cpuc->percore_used) | ||
1101 | return; | ||
1102 | |||
1103 | for (er = x86_pmu.extra_regs; er->msr; er++) { | ||
1104 | if (er->event != (hwc->config & er->config_mask)) | ||
1105 | continue; | ||
1106 | |||
1107 | pc = cpuc->per_core; | ||
1108 | raw_spin_lock(&pc->lock); | ||
1109 | for (i = 0; i < MAX_EXTRA_REGS; i++) { | ||
1110 | era = &pc->regs[i]; | ||
1111 | if (era->ref > 0 && | ||
1112 | era->extra_config == hwc->extra_config && | ||
1113 | era->extra_reg == er->msr) { | ||
1114 | era->ref--; | ||
1115 | hwc->extra_alloc = 0; | ||
1116 | break; | ||
1117 | } | ||
1118 | } | ||
1119 | allref = 0; | ||
1120 | for (i = 0; i < MAX_EXTRA_REGS; i++) | ||
1121 | allref += pc->regs[i].ref; | ||
1122 | if (allref == 0) | ||
1123 | cpuc->percore_used = 0; | ||
1124 | raw_spin_unlock(&pc->lock); | ||
1125 | break; | ||
1126 | } | ||
1127 | } | ||
1128 | |||
812 | static int intel_pmu_hw_config(struct perf_event *event) | 1129 | static int intel_pmu_hw_config(struct perf_event *event) |
813 | { | 1130 | { |
814 | int ret = x86_pmu_hw_config(event); | 1131 | int ret = x86_pmu_hw_config(event); |
@@ -880,20 +1197,67 @@ static __initconst const struct x86_pmu core_pmu = { | |||
880 | */ | 1197 | */ |
881 | .max_period = (1ULL << 31) - 1, | 1198 | .max_period = (1ULL << 31) - 1, |
882 | .get_event_constraints = intel_get_event_constraints, | 1199 | .get_event_constraints = intel_get_event_constraints, |
1200 | .put_event_constraints = intel_put_event_constraints, | ||
883 | .event_constraints = intel_core_event_constraints, | 1201 | .event_constraints = intel_core_event_constraints, |
884 | }; | 1202 | }; |
885 | 1203 | ||
1204 | static int intel_pmu_cpu_prepare(int cpu) | ||
1205 | { | ||
1206 | struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); | ||
1207 | |||
1208 | if (!cpu_has_ht_siblings()) | ||
1209 | return NOTIFY_OK; | ||
1210 | |||
1211 | cpuc->per_core = kzalloc_node(sizeof(struct intel_percore), | ||
1212 | GFP_KERNEL, cpu_to_node(cpu)); | ||
1213 | if (!cpuc->per_core) | ||
1214 | return NOTIFY_BAD; | ||
1215 | |||
1216 | raw_spin_lock_init(&cpuc->per_core->lock); | ||
1217 | cpuc->per_core->core_id = -1; | ||
1218 | return NOTIFY_OK; | ||
1219 | } | ||
1220 | |||
886 | static void intel_pmu_cpu_starting(int cpu) | 1221 | static void intel_pmu_cpu_starting(int cpu) |
887 | { | 1222 | { |
1223 | struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); | ||
1224 | int core_id = topology_core_id(cpu); | ||
1225 | int i; | ||
1226 | |||
888 | init_debug_store_on_cpu(cpu); | 1227 | init_debug_store_on_cpu(cpu); |
889 | /* | 1228 | /* |
890 | * Deal with CPUs that don't clear their LBRs on power-up. | 1229 | * Deal with CPUs that don't clear their LBRs on power-up. |
891 | */ | 1230 | */ |
892 | intel_pmu_lbr_reset(); | 1231 | intel_pmu_lbr_reset(); |
1232 | |||
1233 | if (!cpu_has_ht_siblings()) | ||
1234 | return; | ||
1235 | |||
1236 | for_each_cpu(i, topology_thread_cpumask(cpu)) { | ||
1237 | struct intel_percore *pc = per_cpu(cpu_hw_events, i).per_core; | ||
1238 | |||
1239 | if (pc && pc->core_id == core_id) { | ||
1240 | kfree(cpuc->per_core); | ||
1241 | cpuc->per_core = pc; | ||
1242 | break; | ||
1243 | } | ||
1244 | } | ||
1245 | |||
1246 | cpuc->per_core->core_id = core_id; | ||
1247 | cpuc->per_core->refcnt++; | ||
893 | } | 1248 | } |
894 | 1249 | ||
895 | static void intel_pmu_cpu_dying(int cpu) | 1250 | static void intel_pmu_cpu_dying(int cpu) |
896 | { | 1251 | { |
1252 | struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); | ||
1253 | struct intel_percore *pc = cpuc->per_core; | ||
1254 | |||
1255 | if (pc) { | ||
1256 | if (pc->core_id == -1 || --pc->refcnt == 0) | ||
1257 | kfree(pc); | ||
1258 | cpuc->per_core = NULL; | ||
1259 | } | ||
1260 | |||
897 | fini_debug_store_on_cpu(cpu); | 1261 | fini_debug_store_on_cpu(cpu); |
898 | } | 1262 | } |
899 | 1263 | ||
@@ -918,7 +1282,9 @@ static __initconst const struct x86_pmu intel_pmu = { | |||
918 | */ | 1282 | */ |
919 | .max_period = (1ULL << 31) - 1, | 1283 | .max_period = (1ULL << 31) - 1, |
920 | .get_event_constraints = intel_get_event_constraints, | 1284 | .get_event_constraints = intel_get_event_constraints, |
1285 | .put_event_constraints = intel_put_event_constraints, | ||
921 | 1286 | ||
1287 | .cpu_prepare = intel_pmu_cpu_prepare, | ||
922 | .cpu_starting = intel_pmu_cpu_starting, | 1288 | .cpu_starting = intel_pmu_cpu_starting, |
923 | .cpu_dying = intel_pmu_cpu_dying, | 1289 | .cpu_dying = intel_pmu_cpu_dying, |
924 | }; | 1290 | }; |
@@ -1024,6 +1390,7 @@ static __init int intel_pmu_init(void) | |||
1024 | intel_pmu_lbr_init_core(); | 1390 | intel_pmu_lbr_init_core(); |
1025 | 1391 | ||
1026 | x86_pmu.event_constraints = intel_core2_event_constraints; | 1392 | x86_pmu.event_constraints = intel_core2_event_constraints; |
1393 | x86_pmu.pebs_constraints = intel_core2_pebs_event_constraints; | ||
1027 | pr_cont("Core2 events, "); | 1394 | pr_cont("Core2 events, "); |
1028 | break; | 1395 | break; |
1029 | 1396 | ||
@@ -1032,11 +1399,16 @@ static __init int intel_pmu_init(void) | |||
1032 | case 46: /* 45 nm nehalem-ex, "Beckton" */ | 1399 | case 46: /* 45 nm nehalem-ex, "Beckton" */ |
1033 | memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids, | 1400 | memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids, |
1034 | sizeof(hw_cache_event_ids)); | 1401 | sizeof(hw_cache_event_ids)); |
1402 | memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs, | ||
1403 | sizeof(hw_cache_extra_regs)); | ||
1035 | 1404 | ||
1036 | intel_pmu_lbr_init_nhm(); | 1405 | intel_pmu_lbr_init_nhm(); |
1037 | 1406 | ||
1038 | x86_pmu.event_constraints = intel_nehalem_event_constraints; | 1407 | x86_pmu.event_constraints = intel_nehalem_event_constraints; |
1408 | x86_pmu.pebs_constraints = intel_nehalem_pebs_event_constraints; | ||
1409 | x86_pmu.percore_constraints = intel_nehalem_percore_constraints; | ||
1039 | x86_pmu.enable_all = intel_pmu_nhm_enable_all; | 1410 | x86_pmu.enable_all = intel_pmu_nhm_enable_all; |
1411 | x86_pmu.extra_regs = intel_nehalem_extra_regs; | ||
1040 | pr_cont("Nehalem events, "); | 1412 | pr_cont("Nehalem events, "); |
1041 | break; | 1413 | break; |
1042 | 1414 | ||
@@ -1047,6 +1419,7 @@ static __init int intel_pmu_init(void) | |||
1047 | intel_pmu_lbr_init_atom(); | 1419 | intel_pmu_lbr_init_atom(); |
1048 | 1420 | ||
1049 | x86_pmu.event_constraints = intel_gen_event_constraints; | 1421 | x86_pmu.event_constraints = intel_gen_event_constraints; |
1422 | x86_pmu.pebs_constraints = intel_atom_pebs_event_constraints; | ||
1050 | pr_cont("Atom events, "); | 1423 | pr_cont("Atom events, "); |
1051 | break; | 1424 | break; |
1052 | 1425 | ||
@@ -1054,14 +1427,30 @@ static __init int intel_pmu_init(void) | |||
1054 | case 44: /* 32 nm nehalem, "Gulftown" */ | 1427 | case 44: /* 32 nm nehalem, "Gulftown" */ |
1055 | memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids, | 1428 | memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids, |
1056 | sizeof(hw_cache_event_ids)); | 1429 | sizeof(hw_cache_event_ids)); |
1430 | memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs, | ||
1431 | sizeof(hw_cache_extra_regs)); | ||
1057 | 1432 | ||
1058 | intel_pmu_lbr_init_nhm(); | 1433 | intel_pmu_lbr_init_nhm(); |
1059 | 1434 | ||
1060 | x86_pmu.event_constraints = intel_westmere_event_constraints; | 1435 | x86_pmu.event_constraints = intel_westmere_event_constraints; |
1436 | x86_pmu.percore_constraints = intel_westmere_percore_constraints; | ||
1061 | x86_pmu.enable_all = intel_pmu_nhm_enable_all; | 1437 | x86_pmu.enable_all = intel_pmu_nhm_enable_all; |
1438 | x86_pmu.pebs_constraints = intel_westmere_pebs_event_constraints; | ||
1439 | x86_pmu.extra_regs = intel_westmere_extra_regs; | ||
1062 | pr_cont("Westmere events, "); | 1440 | pr_cont("Westmere events, "); |
1063 | break; | 1441 | break; |
1064 | 1442 | ||
1443 | case 42: /* SandyBridge */ | ||
1444 | memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, | ||
1445 | sizeof(hw_cache_event_ids)); | ||
1446 | |||
1447 | intel_pmu_lbr_init_nhm(); | ||
1448 | |||
1449 | x86_pmu.event_constraints = intel_snb_event_constraints; | ||
1450 | x86_pmu.pebs_constraints = intel_snb_pebs_events; | ||
1451 | pr_cont("SandyBridge events, "); | ||
1452 | break; | ||
1453 | |||
1065 | default: | 1454 | default: |
1066 | /* | 1455 | /* |
1067 | * default constraints for v2 and up | 1456 | * default constraints for v2 and up |
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index b7dcd9f2b8a0..bab491b8ee25 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c | |||
@@ -361,30 +361,70 @@ static int intel_pmu_drain_bts_buffer(void) | |||
361 | /* | 361 | /* |
362 | * PEBS | 362 | * PEBS |
363 | */ | 363 | */ |
364 | static struct event_constraint intel_core2_pebs_event_constraints[] = { | ||
365 | INTEL_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY */ | ||
366 | INTEL_UEVENT_CONSTRAINT(0xfec1, 0x1), /* X87_OPS_RETIRED.ANY */ | ||
367 | INTEL_UEVENT_CONSTRAINT(0x00c5, 0x1), /* BR_INST_RETIRED.MISPRED */ | ||
368 | INTEL_UEVENT_CONSTRAINT(0x1fc7, 0x1), /* SIMD_INST_RETURED.ANY */ | ||
369 | INTEL_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED.* */ | ||
370 | EVENT_CONSTRAINT_END | ||
371 | }; | ||
372 | |||
373 | static struct event_constraint intel_atom_pebs_event_constraints[] = { | ||
374 | INTEL_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY */ | ||
375 | INTEL_UEVENT_CONSTRAINT(0x00c5, 0x1), /* MISPREDICTED_BRANCH_RETIRED */ | ||
376 | INTEL_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED.* */ | ||
377 | EVENT_CONSTRAINT_END | ||
378 | }; | ||
364 | 379 | ||
365 | static struct event_constraint intel_core_pebs_events[] = { | 380 | static struct event_constraint intel_nehalem_pebs_event_constraints[] = { |
366 | PEBS_EVENT_CONSTRAINT(0x00c0, 0x1), /* INSTR_RETIRED.ANY */ | 381 | INTEL_EVENT_CONSTRAINT(0x0b, 0xf), /* MEM_INST_RETIRED.* */ |
367 | PEBS_EVENT_CONSTRAINT(0xfec1, 0x1), /* X87_OPS_RETIRED.ANY */ | 382 | INTEL_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */ |
368 | PEBS_EVENT_CONSTRAINT(0x00c5, 0x1), /* BR_INST_RETIRED.MISPRED */ | 383 | INTEL_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */ |
369 | PEBS_EVENT_CONSTRAINT(0x1fc7, 0x1), /* SIMD_INST_RETURED.ANY */ | 384 | INTEL_EVENT_CONSTRAINT(0xc0, 0xf), /* INST_RETIRED.ANY */ |
370 | PEBS_EVENT_CONSTRAINT(0x01cb, 0x1), /* MEM_LOAD_RETIRED.L1D_MISS */ | 385 | INTEL_EVENT_CONSTRAINT(0xc2, 0xf), /* UOPS_RETIRED.* */ |
371 | PEBS_EVENT_CONSTRAINT(0x02cb, 0x1), /* MEM_LOAD_RETIRED.L1D_LINE_MISS */ | 386 | INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */ |
372 | PEBS_EVENT_CONSTRAINT(0x04cb, 0x1), /* MEM_LOAD_RETIRED.L2_MISS */ | 387 | INTEL_UEVENT_CONSTRAINT(0x02c5, 0xf), /* BR_MISP_RETIRED.NEAR_CALL */ |
373 | PEBS_EVENT_CONSTRAINT(0x08cb, 0x1), /* MEM_LOAD_RETIRED.L2_LINE_MISS */ | 388 | INTEL_EVENT_CONSTRAINT(0xc7, 0xf), /* SSEX_UOPS_RETIRED.* */ |
374 | PEBS_EVENT_CONSTRAINT(0x10cb, 0x1), /* MEM_LOAD_RETIRED.DTLB_MISS */ | 389 | INTEL_UEVENT_CONSTRAINT(0x20c8, 0xf), /* ITLB_MISS_RETIRED */ |
390 | INTEL_EVENT_CONSTRAINT(0xcb, 0xf), /* MEM_LOAD_RETIRED.* */ | ||
391 | INTEL_EVENT_CONSTRAINT(0xf7, 0xf), /* FP_ASSIST.* */ | ||
375 | EVENT_CONSTRAINT_END | 392 | EVENT_CONSTRAINT_END |
376 | }; | 393 | }; |
377 | 394 | ||
378 | static struct event_constraint intel_nehalem_pebs_events[] = { | 395 | static struct event_constraint intel_westmere_pebs_event_constraints[] = { |
379 | PEBS_EVENT_CONSTRAINT(0x00c0, 0xf), /* INSTR_RETIRED.ANY */ | 396 | INTEL_EVENT_CONSTRAINT(0x0b, 0xf), /* MEM_INST_RETIRED.* */ |
380 | PEBS_EVENT_CONSTRAINT(0xfec1, 0xf), /* X87_OPS_RETIRED.ANY */ | 397 | INTEL_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */ |
381 | PEBS_EVENT_CONSTRAINT(0x00c5, 0xf), /* BR_INST_RETIRED.MISPRED */ | 398 | INTEL_UEVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */ |
382 | PEBS_EVENT_CONSTRAINT(0x1fc7, 0xf), /* SIMD_INST_RETURED.ANY */ | 399 | INTEL_EVENT_CONSTRAINT(0xc0, 0xf), /* INSTR_RETIRED.* */ |
383 | PEBS_EVENT_CONSTRAINT(0x01cb, 0xf), /* MEM_LOAD_RETIRED.L1D_MISS */ | 400 | INTEL_EVENT_CONSTRAINT(0xc2, 0xf), /* UOPS_RETIRED.* */ |
384 | PEBS_EVENT_CONSTRAINT(0x02cb, 0xf), /* MEM_LOAD_RETIRED.L1D_LINE_MISS */ | 401 | INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */ |
385 | PEBS_EVENT_CONSTRAINT(0x04cb, 0xf), /* MEM_LOAD_RETIRED.L2_MISS */ | 402 | INTEL_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */ |
386 | PEBS_EVENT_CONSTRAINT(0x08cb, 0xf), /* MEM_LOAD_RETIRED.L2_LINE_MISS */ | 403 | INTEL_EVENT_CONSTRAINT(0xc7, 0xf), /* SSEX_UOPS_RETIRED.* */ |
387 | PEBS_EVENT_CONSTRAINT(0x10cb, 0xf), /* MEM_LOAD_RETIRED.DTLB_MISS */ | 404 | INTEL_UEVENT_CONSTRAINT(0x20c8, 0xf), /* ITLB_MISS_RETIRED */ |
405 | INTEL_EVENT_CONSTRAINT(0xcb, 0xf), /* MEM_LOAD_RETIRED.* */ | ||
406 | INTEL_EVENT_CONSTRAINT(0xf7, 0xf), /* FP_ASSIST.* */ | ||
407 | EVENT_CONSTRAINT_END | ||
408 | }; | ||
409 | |||
410 | static struct event_constraint intel_snb_pebs_events[] = { | ||
411 | INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */ | ||
412 | INTEL_UEVENT_CONSTRAINT(0x01c2, 0xf), /* UOPS_RETIRED.ALL */ | ||
413 | INTEL_UEVENT_CONSTRAINT(0x02c2, 0xf), /* UOPS_RETIRED.RETIRE_SLOTS */ | ||
414 | INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */ | ||
415 | INTEL_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */ | ||
416 | INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.* */ | ||
417 | INTEL_UEVENT_CONSTRAINT(0x11d0, 0xf), /* MEM_UOP_RETIRED.STLB_MISS_LOADS */ | ||
418 | INTEL_UEVENT_CONSTRAINT(0x12d0, 0xf), /* MEM_UOP_RETIRED.STLB_MISS_STORES */ | ||
419 | INTEL_UEVENT_CONSTRAINT(0x21d0, 0xf), /* MEM_UOP_RETIRED.LOCK_LOADS */ | ||
420 | INTEL_UEVENT_CONSTRAINT(0x22d0, 0xf), /* MEM_UOP_RETIRED.LOCK_STORES */ | ||
421 | INTEL_UEVENT_CONSTRAINT(0x41d0, 0xf), /* MEM_UOP_RETIRED.SPLIT_LOADS */ | ||
422 | INTEL_UEVENT_CONSTRAINT(0x42d0, 0xf), /* MEM_UOP_RETIRED.SPLIT_STORES */ | ||
423 | INTEL_UEVENT_CONSTRAINT(0x81d0, 0xf), /* MEM_UOP_RETIRED.ANY_LOADS */ | ||
424 | INTEL_UEVENT_CONSTRAINT(0x82d0, 0xf), /* MEM_UOP_RETIRED.ANY_STORES */ | ||
425 | INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ | ||
426 | INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */ | ||
427 | INTEL_UEVENT_CONSTRAINT(0x02d4, 0xf), /* MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS */ | ||
388 | EVENT_CONSTRAINT_END | 428 | EVENT_CONSTRAINT_END |
389 | }; | 429 | }; |
390 | 430 | ||
@@ -695,20 +735,17 @@ static void intel_ds_init(void) | |||
695 | printk(KERN_CONT "PEBS fmt0%c, ", pebs_type); | 735 | printk(KERN_CONT "PEBS fmt0%c, ", pebs_type); |
696 | x86_pmu.pebs_record_size = sizeof(struct pebs_record_core); | 736 | x86_pmu.pebs_record_size = sizeof(struct pebs_record_core); |
697 | x86_pmu.drain_pebs = intel_pmu_drain_pebs_core; | 737 | x86_pmu.drain_pebs = intel_pmu_drain_pebs_core; |
698 | x86_pmu.pebs_constraints = intel_core_pebs_events; | ||
699 | break; | 738 | break; |
700 | 739 | ||
701 | case 1: | 740 | case 1: |
702 | printk(KERN_CONT "PEBS fmt1%c, ", pebs_type); | 741 | printk(KERN_CONT "PEBS fmt1%c, ", pebs_type); |
703 | x86_pmu.pebs_record_size = sizeof(struct pebs_record_nhm); | 742 | x86_pmu.pebs_record_size = sizeof(struct pebs_record_nhm); |
704 | x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm; | 743 | x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm; |
705 | x86_pmu.pebs_constraints = intel_nehalem_pebs_events; | ||
706 | break; | 744 | break; |
707 | 745 | ||
708 | default: | 746 | default: |
709 | printk(KERN_CONT "no PEBS fmt%d%c, ", format, pebs_type); | 747 | printk(KERN_CONT "no PEBS fmt%d%c, ", format, pebs_type); |
710 | x86_pmu.pebs = 0; | 748 | x86_pmu.pebs = 0; |
711 | break; | ||
712 | } | 749 | } |
713 | } | 750 | } |
714 | } | 751 | } |
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c index ff751a9f182b..0811f5ebfba6 100644 --- a/arch/x86/kernel/cpu/perf_event_p4.c +++ b/arch/x86/kernel/cpu/perf_event_p4.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * Netburst Perfomance Events (P4, old Xeon) | 2 | * Netburst Performance Events (P4, old Xeon) |
3 | * | 3 | * |
4 | * Copyright (C) 2010 Parallels, Inc., Cyrill Gorcunov <gorcunov@openvz.org> | 4 | * Copyright (C) 2010 Parallels, Inc., Cyrill Gorcunov <gorcunov@openvz.org> |
5 | * Copyright (C) 2010 Intel Corporation, Lin Ming <ming.m.lin@intel.com> | 5 | * Copyright (C) 2010 Intel Corporation, Lin Ming <ming.m.lin@intel.com> |
@@ -679,7 +679,7 @@ static int p4_validate_raw_event(struct perf_event *event) | |||
679 | */ | 679 | */ |
680 | 680 | ||
681 | /* | 681 | /* |
682 | * if an event is shared accross the logical threads | 682 | * if an event is shared across the logical threads |
683 | * the user needs special permissions to be able to use it | 683 | * the user needs special permissions to be able to use it |
684 | */ | 684 | */ |
685 | if (p4_ht_active() && p4_event_bind_map[v].shared) { | 685 | if (p4_ht_active() && p4_event_bind_map[v].shared) { |
@@ -764,9 +764,9 @@ static inline int p4_pmu_clear_cccr_ovf(struct hw_perf_event *hwc) | |||
764 | u64 v; | 764 | u64 v; |
765 | 765 | ||
766 | /* an official way for overflow indication */ | 766 | /* an official way for overflow indication */ |
767 | rdmsrl(hwc->config_base + hwc->idx, v); | 767 | rdmsrl(hwc->config_base, v); |
768 | if (v & P4_CCCR_OVF) { | 768 | if (v & P4_CCCR_OVF) { |
769 | wrmsrl(hwc->config_base + hwc->idx, v & ~P4_CCCR_OVF); | 769 | wrmsrl(hwc->config_base, v & ~P4_CCCR_OVF); |
770 | return 1; | 770 | return 1; |
771 | } | 771 | } |
772 | 772 | ||
@@ -790,13 +790,13 @@ static void p4_pmu_disable_pebs(void) | |||
790 | * | 790 | * |
791 | * It's still allowed that two threads setup same cache | 791 | * It's still allowed that two threads setup same cache |
792 | * events so we can't simply clear metrics until we knew | 792 | * events so we can't simply clear metrics until we knew |
793 | * noone is depending on us, so we need kind of counter | 793 | * no one is depending on us, so we need kind of counter |
794 | * for "ReplayEvent" users. | 794 | * for "ReplayEvent" users. |
795 | * | 795 | * |
796 | * What is more complex -- RAW events, if user (for some | 796 | * What is more complex -- RAW events, if user (for some |
797 | * reason) will pass some cache event metric with improper | 797 | * reason) will pass some cache event metric with improper |
798 | * event opcode -- it's fine from hardware point of view | 798 | * event opcode -- it's fine from hardware point of view |
799 | * but completely nonsence from "meaning" of such action. | 799 | * but completely nonsense from "meaning" of such action. |
800 | * | 800 | * |
801 | * So at moment let leave metrics turned on forever -- it's | 801 | * So at moment let leave metrics turned on forever -- it's |
802 | * ok for now but need to be revisited! | 802 | * ok for now but need to be revisited! |
@@ -815,7 +815,7 @@ static inline void p4_pmu_disable_event(struct perf_event *event) | |||
815 | * state we need to clear P4_CCCR_OVF, otherwise interrupt get | 815 | * state we need to clear P4_CCCR_OVF, otherwise interrupt get |
816 | * asserted again and again | 816 | * asserted again and again |
817 | */ | 817 | */ |
818 | (void)checking_wrmsrl(hwc->config_base + hwc->idx, | 818 | (void)checking_wrmsrl(hwc->config_base, |
819 | (u64)(p4_config_unpack_cccr(hwc->config)) & | 819 | (u64)(p4_config_unpack_cccr(hwc->config)) & |
820 | ~P4_CCCR_ENABLE & ~P4_CCCR_OVF & ~P4_CCCR_RESERVED); | 820 | ~P4_CCCR_ENABLE & ~P4_CCCR_OVF & ~P4_CCCR_RESERVED); |
821 | } | 821 | } |
@@ -885,7 +885,7 @@ static void p4_pmu_enable_event(struct perf_event *event) | |||
885 | p4_pmu_enable_pebs(hwc->config); | 885 | p4_pmu_enable_pebs(hwc->config); |
886 | 886 | ||
887 | (void)checking_wrmsrl(escr_addr, escr_conf); | 887 | (void)checking_wrmsrl(escr_addr, escr_conf); |
888 | (void)checking_wrmsrl(hwc->config_base + hwc->idx, | 888 | (void)checking_wrmsrl(hwc->config_base, |
889 | (cccr & ~P4_CCCR_RESERVED) | P4_CCCR_ENABLE); | 889 | (cccr & ~P4_CCCR_RESERVED) | P4_CCCR_ENABLE); |
890 | } | 890 | } |
891 | 891 | ||
diff --git a/arch/x86/kernel/cpu/perf_event_p6.c b/arch/x86/kernel/cpu/perf_event_p6.c index 34ba07be2cda..20c097e33860 100644 --- a/arch/x86/kernel/cpu/perf_event_p6.c +++ b/arch/x86/kernel/cpu/perf_event_p6.c | |||
@@ -68,7 +68,7 @@ p6_pmu_disable_event(struct perf_event *event) | |||
68 | if (cpuc->enabled) | 68 | if (cpuc->enabled) |
69 | val |= ARCH_PERFMON_EVENTSEL_ENABLE; | 69 | val |= ARCH_PERFMON_EVENTSEL_ENABLE; |
70 | 70 | ||
71 | (void)checking_wrmsrl(hwc->config_base + hwc->idx, val); | 71 | (void)checking_wrmsrl(hwc->config_base, val); |
72 | } | 72 | } |
73 | 73 | ||
74 | static void p6_pmu_enable_event(struct perf_event *event) | 74 | static void p6_pmu_enable_event(struct perf_event *event) |
@@ -81,7 +81,7 @@ static void p6_pmu_enable_event(struct perf_event *event) | |||
81 | if (cpuc->enabled) | 81 | if (cpuc->enabled) |
82 | val |= ARCH_PERFMON_EVENTSEL_ENABLE; | 82 | val |= ARCH_PERFMON_EVENTSEL_ENABLE; |
83 | 83 | ||
84 | (void)checking_wrmsrl(hwc->config_base + hwc->idx, val); | 84 | (void)checking_wrmsrl(hwc->config_base, val); |
85 | } | 85 | } |
86 | 86 | ||
87 | static __initconst const struct x86_pmu p6_pmu = { | 87 | static __initconst const struct x86_pmu p6_pmu = { |
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c index d5a236615501..966512b2cacf 100644 --- a/arch/x86/kernel/cpu/perfctr-watchdog.c +++ b/arch/x86/kernel/cpu/perfctr-watchdog.c | |||
@@ -46,6 +46,8 @@ static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr) | |||
46 | /* returns the bit offset of the performance counter register */ | 46 | /* returns the bit offset of the performance counter register */ |
47 | switch (boot_cpu_data.x86_vendor) { | 47 | switch (boot_cpu_data.x86_vendor) { |
48 | case X86_VENDOR_AMD: | 48 | case X86_VENDOR_AMD: |
49 | if (msr >= MSR_F15H_PERF_CTR) | ||
50 | return (msr - MSR_F15H_PERF_CTR) >> 1; | ||
49 | return msr - MSR_K7_PERFCTR0; | 51 | return msr - MSR_K7_PERFCTR0; |
50 | case X86_VENDOR_INTEL: | 52 | case X86_VENDOR_INTEL: |
51 | if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) | 53 | if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) |
@@ -70,6 +72,8 @@ static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr) | |||
70 | /* returns the bit offset of the event selection register */ | 72 | /* returns the bit offset of the event selection register */ |
71 | switch (boot_cpu_data.x86_vendor) { | 73 | switch (boot_cpu_data.x86_vendor) { |
72 | case X86_VENDOR_AMD: | 74 | case X86_VENDOR_AMD: |
75 | if (msr >= MSR_F15H_PERF_CTL) | ||
76 | return (msr - MSR_F15H_PERF_CTL) >> 1; | ||
73 | return msr - MSR_K7_EVNTSEL0; | 77 | return msr - MSR_K7_EVNTSEL0; |
74 | case X86_VENDOR_INTEL: | 78 | case X86_VENDOR_INTEL: |
75 | if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) | 79 | if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) |
diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c index 227b0448960d..d22d0c4edcfd 100644 --- a/arch/x86/kernel/cpu/vmware.c +++ b/arch/x86/kernel/cpu/vmware.c | |||
@@ -86,7 +86,7 @@ static void __init vmware_platform_setup(void) | |||
86 | } | 86 | } |
87 | 87 | ||
88 | /* | 88 | /* |
89 | * While checking the dmi string infomation, just checking the product | 89 | * While checking the dmi string information, just checking the product |
90 | * serial key should be enough, as this will always have a VMware | 90 | * serial key should be enough, as this will always have a VMware |
91 | * specific string when running under VMware hypervisor. | 91 | * specific string when running under VMware hypervisor. |
92 | */ | 92 | */ |
diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c new file mode 100644 index 000000000000..7a8cebc9ff29 --- /dev/null +++ b/arch/x86/kernel/devicetree.c | |||
@@ -0,0 +1,441 @@ | |||
1 | /* | ||
2 | * Architecture specific OF callbacks. | ||
3 | */ | ||
4 | #include <linux/bootmem.h> | ||
5 | #include <linux/io.h> | ||
6 | #include <linux/interrupt.h> | ||
7 | #include <linux/list.h> | ||
8 | #include <linux/of.h> | ||
9 | #include <linux/of_fdt.h> | ||
10 | #include <linux/of_address.h> | ||
11 | #include <linux/of_platform.h> | ||
12 | #include <linux/of_irq.h> | ||
13 | #include <linux/slab.h> | ||
14 | #include <linux/pci.h> | ||
15 | #include <linux/of_pci.h> | ||
16 | |||
17 | #include <asm/hpet.h> | ||
18 | #include <asm/irq_controller.h> | ||
19 | #include <asm/apic.h> | ||
20 | #include <asm/pci_x86.h> | ||
21 | |||
22 | __initdata u64 initial_dtb; | ||
23 | char __initdata cmd_line[COMMAND_LINE_SIZE]; | ||
24 | static LIST_HEAD(irq_domains); | ||
25 | static DEFINE_RAW_SPINLOCK(big_irq_lock); | ||
26 | |||
27 | int __initdata of_ioapic; | ||
28 | |||
29 | #ifdef CONFIG_X86_IO_APIC | ||
30 | static void add_interrupt_host(struct irq_domain *ih) | ||
31 | { | ||
32 | unsigned long flags; | ||
33 | |||
34 | raw_spin_lock_irqsave(&big_irq_lock, flags); | ||
35 | list_add(&ih->l, &irq_domains); | ||
36 | raw_spin_unlock_irqrestore(&big_irq_lock, flags); | ||
37 | } | ||
38 | #endif | ||
39 | |||
40 | static struct irq_domain *get_ih_from_node(struct device_node *controller) | ||
41 | { | ||
42 | struct irq_domain *ih, *found = NULL; | ||
43 | unsigned long flags; | ||
44 | |||
45 | raw_spin_lock_irqsave(&big_irq_lock, flags); | ||
46 | list_for_each_entry(ih, &irq_domains, l) { | ||
47 | if (ih->controller == controller) { | ||
48 | found = ih; | ||
49 | break; | ||
50 | } | ||
51 | } | ||
52 | raw_spin_unlock_irqrestore(&big_irq_lock, flags); | ||
53 | return found; | ||
54 | } | ||
55 | |||
56 | unsigned int irq_create_of_mapping(struct device_node *controller, | ||
57 | const u32 *intspec, unsigned int intsize) | ||
58 | { | ||
59 | struct irq_domain *ih; | ||
60 | u32 virq, type; | ||
61 | int ret; | ||
62 | |||
63 | ih = get_ih_from_node(controller); | ||
64 | if (!ih) | ||
65 | return 0; | ||
66 | ret = ih->xlate(ih, intspec, intsize, &virq, &type); | ||
67 | if (ret) | ||
68 | return ret; | ||
69 | if (type == IRQ_TYPE_NONE) | ||
70 | return virq; | ||
71 | /* set the mask if it is different from current */ | ||
72 | if (type == (irq_to_desc(virq)->status & IRQF_TRIGGER_MASK)) | ||
73 | set_irq_type(virq, type); | ||
74 | return virq; | ||
75 | } | ||
76 | EXPORT_SYMBOL_GPL(irq_create_of_mapping); | ||
77 | |||
78 | unsigned long pci_address_to_pio(phys_addr_t address) | ||
79 | { | ||
80 | /* | ||
81 | * The ioport address can be directly used by inX / outX | ||
82 | */ | ||
83 | BUG_ON(address >= (1 << 16)); | ||
84 | return (unsigned long)address; | ||
85 | } | ||
86 | EXPORT_SYMBOL_GPL(pci_address_to_pio); | ||
87 | |||
88 | void __init early_init_dt_scan_chosen_arch(unsigned long node) | ||
89 | { | ||
90 | BUG(); | ||
91 | } | ||
92 | |||
93 | void __init early_init_dt_add_memory_arch(u64 base, u64 size) | ||
94 | { | ||
95 | BUG(); | ||
96 | } | ||
97 | |||
98 | void * __init early_init_dt_alloc_memory_arch(u64 size, u64 align) | ||
99 | { | ||
100 | return __alloc_bootmem(size, align, __pa(MAX_DMA_ADDRESS)); | ||
101 | } | ||
102 | |||
103 | void __init add_dtb(u64 data) | ||
104 | { | ||
105 | initial_dtb = data + offsetof(struct setup_data, data); | ||
106 | } | ||
107 | |||
108 | /* | ||
109 | * CE4100 ids. Will be moved to machine_device_initcall() once we have it. | ||
110 | */ | ||
111 | static struct of_device_id __initdata ce4100_ids[] = { | ||
112 | { .compatible = "intel,ce4100-cp", }, | ||
113 | { .compatible = "isa", }, | ||
114 | { .compatible = "pci", }, | ||
115 | {}, | ||
116 | }; | ||
117 | |||
118 | static int __init add_bus_probe(void) | ||
119 | { | ||
120 | if (!of_have_populated_dt()) | ||
121 | return 0; | ||
122 | |||
123 | return of_platform_bus_probe(NULL, ce4100_ids, NULL); | ||
124 | } | ||
125 | module_init(add_bus_probe); | ||
126 | |||
127 | #ifdef CONFIG_PCI | ||
128 | static int x86_of_pci_irq_enable(struct pci_dev *dev) | ||
129 | { | ||
130 | struct of_irq oirq; | ||
131 | u32 virq; | ||
132 | int ret; | ||
133 | u8 pin; | ||
134 | |||
135 | ret = pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin); | ||
136 | if (ret) | ||
137 | return ret; | ||
138 | if (!pin) | ||
139 | return 0; | ||
140 | |||
141 | ret = of_irq_map_pci(dev, &oirq); | ||
142 | if (ret) | ||
143 | return ret; | ||
144 | |||
145 | virq = irq_create_of_mapping(oirq.controller, oirq.specifier, | ||
146 | oirq.size); | ||
147 | if (virq == 0) | ||
148 | return -EINVAL; | ||
149 | dev->irq = virq; | ||
150 | return 0; | ||
151 | } | ||
152 | |||
153 | static void x86_of_pci_irq_disable(struct pci_dev *dev) | ||
154 | { | ||
155 | } | ||
156 | |||
157 | void __cpuinit x86_of_pci_init(void) | ||
158 | { | ||
159 | struct device_node *np; | ||
160 | |||
161 | pcibios_enable_irq = x86_of_pci_irq_enable; | ||
162 | pcibios_disable_irq = x86_of_pci_irq_disable; | ||
163 | |||
164 | for_each_node_by_type(np, "pci") { | ||
165 | const void *prop; | ||
166 | struct pci_bus *bus; | ||
167 | unsigned int bus_min; | ||
168 | struct device_node *child; | ||
169 | |||
170 | prop = of_get_property(np, "bus-range", NULL); | ||
171 | if (!prop) | ||
172 | continue; | ||
173 | bus_min = be32_to_cpup(prop); | ||
174 | |||
175 | bus = pci_find_bus(0, bus_min); | ||
176 | if (!bus) { | ||
177 | printk(KERN_ERR "Can't find a node for bus %s.\n", | ||
178 | np->full_name); | ||
179 | continue; | ||
180 | } | ||
181 | |||
182 | if (bus->self) | ||
183 | bus->self->dev.of_node = np; | ||
184 | else | ||
185 | bus->dev.of_node = np; | ||
186 | |||
187 | for_each_child_of_node(np, child) { | ||
188 | struct pci_dev *dev; | ||
189 | u32 devfn; | ||
190 | |||
191 | prop = of_get_property(child, "reg", NULL); | ||
192 | if (!prop) | ||
193 | continue; | ||
194 | |||
195 | devfn = (be32_to_cpup(prop) >> 8) & 0xff; | ||
196 | dev = pci_get_slot(bus, devfn); | ||
197 | if (!dev) | ||
198 | continue; | ||
199 | dev->dev.of_node = child; | ||
200 | pci_dev_put(dev); | ||
201 | } | ||
202 | } | ||
203 | } | ||
204 | #endif | ||
205 | |||
206 | static void __init dtb_setup_hpet(void) | ||
207 | { | ||
208 | #ifdef CONFIG_HPET_TIMER | ||
209 | struct device_node *dn; | ||
210 | struct resource r; | ||
211 | int ret; | ||
212 | |||
213 | dn = of_find_compatible_node(NULL, NULL, "intel,ce4100-hpet"); | ||
214 | if (!dn) | ||
215 | return; | ||
216 | ret = of_address_to_resource(dn, 0, &r); | ||
217 | if (ret) { | ||
218 | WARN_ON(1); | ||
219 | return; | ||
220 | } | ||
221 | hpet_address = r.start; | ||
222 | #endif | ||
223 | } | ||
224 | |||
225 | static void __init dtb_lapic_setup(void) | ||
226 | { | ||
227 | #ifdef CONFIG_X86_LOCAL_APIC | ||
228 | struct device_node *dn; | ||
229 | struct resource r; | ||
230 | int ret; | ||
231 | |||
232 | dn = of_find_compatible_node(NULL, NULL, "intel,ce4100-lapic"); | ||
233 | if (!dn) | ||
234 | return; | ||
235 | |||
236 | ret = of_address_to_resource(dn, 0, &r); | ||
237 | if (WARN_ON(ret)) | ||
238 | return; | ||
239 | |||
240 | /* Did the boot loader setup the local APIC ? */ | ||
241 | if (!cpu_has_apic) { | ||
242 | if (apic_force_enable(r.start)) | ||
243 | return; | ||
244 | } | ||
245 | smp_found_config = 1; | ||
246 | pic_mode = 1; | ||
247 | register_lapic_address(r.start); | ||
248 | generic_processor_info(boot_cpu_physical_apicid, | ||
249 | GET_APIC_VERSION(apic_read(APIC_LVR))); | ||
250 | #endif | ||
251 | } | ||
252 | |||
253 | #ifdef CONFIG_X86_IO_APIC | ||
254 | static unsigned int ioapic_id; | ||
255 | |||
256 | static void __init dtb_add_ioapic(struct device_node *dn) | ||
257 | { | ||
258 | struct resource r; | ||
259 | int ret; | ||
260 | |||
261 | ret = of_address_to_resource(dn, 0, &r); | ||
262 | if (ret) { | ||
263 | printk(KERN_ERR "Can't obtain address from node %s.\n", | ||
264 | dn->full_name); | ||
265 | return; | ||
266 | } | ||
267 | mp_register_ioapic(++ioapic_id, r.start, gsi_top); | ||
268 | } | ||
269 | |||
270 | static void __init dtb_ioapic_setup(void) | ||
271 | { | ||
272 | struct device_node *dn; | ||
273 | |||
274 | for_each_compatible_node(dn, NULL, "intel,ce4100-ioapic") | ||
275 | dtb_add_ioapic(dn); | ||
276 | |||
277 | if (nr_ioapics) { | ||
278 | of_ioapic = 1; | ||
279 | return; | ||
280 | } | ||
281 | printk(KERN_ERR "Error: No information about IO-APIC in OF.\n"); | ||
282 | } | ||
283 | #else | ||
284 | static void __init dtb_ioapic_setup(void) {} | ||
285 | #endif | ||
286 | |||
287 | static void __init dtb_apic_setup(void) | ||
288 | { | ||
289 | dtb_lapic_setup(); | ||
290 | dtb_ioapic_setup(); | ||
291 | } | ||
292 | |||
293 | #ifdef CONFIG_OF_FLATTREE | ||
294 | static void __init x86_flattree_get_config(void) | ||
295 | { | ||
296 | u32 size, map_len; | ||
297 | void *new_dtb; | ||
298 | |||
299 | if (!initial_dtb) | ||
300 | return; | ||
301 | |||
302 | map_len = max(PAGE_SIZE - (initial_dtb & ~PAGE_MASK), | ||
303 | (u64)sizeof(struct boot_param_header)); | ||
304 | |||
305 | initial_boot_params = early_memremap(initial_dtb, map_len); | ||
306 | size = be32_to_cpu(initial_boot_params->totalsize); | ||
307 | if (map_len < size) { | ||
308 | early_iounmap(initial_boot_params, map_len); | ||
309 | initial_boot_params = early_memremap(initial_dtb, size); | ||
310 | map_len = size; | ||
311 | } | ||
312 | |||
313 | new_dtb = alloc_bootmem(size); | ||
314 | memcpy(new_dtb, initial_boot_params, size); | ||
315 | early_iounmap(initial_boot_params, map_len); | ||
316 | |||
317 | initial_boot_params = new_dtb; | ||
318 | |||
319 | /* root level address cells */ | ||
320 | of_scan_flat_dt(early_init_dt_scan_root, NULL); | ||
321 | |||
322 | unflatten_device_tree(); | ||
323 | } | ||
324 | #else | ||
325 | static inline void x86_flattree_get_config(void) { } | ||
326 | #endif | ||
327 | |||
328 | void __init x86_dtb_init(void) | ||
329 | { | ||
330 | x86_flattree_get_config(); | ||
331 | |||
332 | if (!of_have_populated_dt()) | ||
333 | return; | ||
334 | |||
335 | dtb_setup_hpet(); | ||
336 | dtb_apic_setup(); | ||
337 | } | ||
338 | |||
339 | #ifdef CONFIG_X86_IO_APIC | ||
340 | |||
341 | struct of_ioapic_type { | ||
342 | u32 out_type; | ||
343 | u32 trigger; | ||
344 | u32 polarity; | ||
345 | }; | ||
346 | |||
347 | static struct of_ioapic_type of_ioapic_type[] = | ||
348 | { | ||
349 | { | ||
350 | .out_type = IRQ_TYPE_EDGE_RISING, | ||
351 | .trigger = IOAPIC_EDGE, | ||
352 | .polarity = 1, | ||
353 | }, | ||
354 | { | ||
355 | .out_type = IRQ_TYPE_LEVEL_LOW, | ||
356 | .trigger = IOAPIC_LEVEL, | ||
357 | .polarity = 0, | ||
358 | }, | ||
359 | { | ||
360 | .out_type = IRQ_TYPE_LEVEL_HIGH, | ||
361 | .trigger = IOAPIC_LEVEL, | ||
362 | .polarity = 1, | ||
363 | }, | ||
364 | { | ||
365 | .out_type = IRQ_TYPE_EDGE_FALLING, | ||
366 | .trigger = IOAPIC_EDGE, | ||
367 | .polarity = 0, | ||
368 | }, | ||
369 | }; | ||
370 | |||
371 | static int ioapic_xlate(struct irq_domain *id, const u32 *intspec, u32 intsize, | ||
372 | u32 *out_hwirq, u32 *out_type) | ||
373 | { | ||
374 | struct io_apic_irq_attr attr; | ||
375 | struct of_ioapic_type *it; | ||
376 | u32 line, idx, type; | ||
377 | |||
378 | if (intsize < 2) | ||
379 | return -EINVAL; | ||
380 | |||
381 | line = *intspec; | ||
382 | idx = (u32) id->priv; | ||
383 | *out_hwirq = line + mp_gsi_routing[idx].gsi_base; | ||
384 | |||
385 | intspec++; | ||
386 | type = *intspec; | ||
387 | |||
388 | if (type >= ARRAY_SIZE(of_ioapic_type)) | ||
389 | return -EINVAL; | ||
390 | |||
391 | it = of_ioapic_type + type; | ||
392 | *out_type = it->out_type; | ||
393 | |||
394 | set_io_apic_irq_attr(&attr, idx, line, it->trigger, it->polarity); | ||
395 | |||
396 | return io_apic_setup_irq_pin(*out_hwirq, cpu_to_node(0), &attr); | ||
397 | } | ||
398 | |||
399 | static void __init ioapic_add_ofnode(struct device_node *np) | ||
400 | { | ||
401 | struct resource r; | ||
402 | int i, ret; | ||
403 | |||
404 | ret = of_address_to_resource(np, 0, &r); | ||
405 | if (ret) { | ||
406 | printk(KERN_ERR "Failed to obtain address for %s\n", | ||
407 | np->full_name); | ||
408 | return; | ||
409 | } | ||
410 | |||
411 | for (i = 0; i < nr_ioapics; i++) { | ||
412 | if (r.start == mp_ioapics[i].apicaddr) { | ||
413 | struct irq_domain *id; | ||
414 | |||
415 | id = kzalloc(sizeof(*id), GFP_KERNEL); | ||
416 | BUG_ON(!id); | ||
417 | id->controller = np; | ||
418 | id->xlate = ioapic_xlate; | ||
419 | id->priv = (void *)i; | ||
420 | add_interrupt_host(id); | ||
421 | return; | ||
422 | } | ||
423 | } | ||
424 | printk(KERN_ERR "IOxAPIC at %s is not registered.\n", np->full_name); | ||
425 | } | ||
426 | |||
427 | void __init x86_add_irq_domains(void) | ||
428 | { | ||
429 | struct device_node *dp; | ||
430 | |||
431 | if (!of_have_populated_dt()) | ||
432 | return; | ||
433 | |||
434 | for_each_node_with_property(dp, "interrupt-controller") { | ||
435 | if (of_device_is_compatible(dp, "intel,ce4100-ioapic")) | ||
436 | ioapic_add_ofnode(dp); | ||
437 | } | ||
438 | } | ||
439 | #else | ||
440 | void __init x86_add_irq_domains(void) { } | ||
441 | #endif | ||
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index df20723a6a1b..81ac6c78c01c 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c | |||
@@ -175,21 +175,21 @@ static const struct stacktrace_ops print_trace_ops = { | |||
175 | 175 | ||
176 | void | 176 | void |
177 | show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, | 177 | show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, |
178 | unsigned long *stack, char *log_lvl) | 178 | unsigned long *stack, unsigned long bp, char *log_lvl) |
179 | { | 179 | { |
180 | printk("%sCall Trace:\n", log_lvl); | 180 | printk("%sCall Trace:\n", log_lvl); |
181 | dump_trace(task, regs, stack, &print_trace_ops, log_lvl); | 181 | dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl); |
182 | } | 182 | } |
183 | 183 | ||
184 | void show_trace(struct task_struct *task, struct pt_regs *regs, | 184 | void show_trace(struct task_struct *task, struct pt_regs *regs, |
185 | unsigned long *stack) | 185 | unsigned long *stack, unsigned long bp) |
186 | { | 186 | { |
187 | show_trace_log_lvl(task, regs, stack, ""); | 187 | show_trace_log_lvl(task, regs, stack, bp, ""); |
188 | } | 188 | } |
189 | 189 | ||
190 | void show_stack(struct task_struct *task, unsigned long *sp) | 190 | void show_stack(struct task_struct *task, unsigned long *sp) |
191 | { | 191 | { |
192 | show_stack_log_lvl(task, NULL, sp, ""); | 192 | show_stack_log_lvl(task, NULL, sp, 0, ""); |
193 | } | 193 | } |
194 | 194 | ||
195 | /* | 195 | /* |
@@ -197,14 +197,16 @@ void show_stack(struct task_struct *task, unsigned long *sp) | |||
197 | */ | 197 | */ |
198 | void dump_stack(void) | 198 | void dump_stack(void) |
199 | { | 199 | { |
200 | unsigned long bp; | ||
200 | unsigned long stack; | 201 | unsigned long stack; |
201 | 202 | ||
203 | bp = stack_frame(current, NULL); | ||
202 | printk("Pid: %d, comm: %.20s %s %s %.*s\n", | 204 | printk("Pid: %d, comm: %.20s %s %s %.*s\n", |
203 | current->pid, current->comm, print_tainted(), | 205 | current->pid, current->comm, print_tainted(), |
204 | init_utsname()->release, | 206 | init_utsname()->release, |
205 | (int)strcspn(init_utsname()->version, " "), | 207 | (int)strcspn(init_utsname()->version, " "), |
206 | init_utsname()->version); | 208 | init_utsname()->version); |
207 | show_trace(NULL, NULL, &stack); | 209 | show_trace(NULL, NULL, &stack, bp); |
208 | } | 210 | } |
209 | EXPORT_SYMBOL(dump_stack); | 211 | EXPORT_SYMBOL(dump_stack); |
210 | 212 | ||
@@ -320,41 +322,6 @@ void die(const char *str, struct pt_regs *regs, long err) | |||
320 | oops_end(flags, regs, sig); | 322 | oops_end(flags, regs, sig); |
321 | } | 323 | } |
322 | 324 | ||
323 | void notrace __kprobes | ||
324 | die_nmi(char *str, struct pt_regs *regs, int do_panic) | ||
325 | { | ||
326 | unsigned long flags; | ||
327 | |||
328 | if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) == NOTIFY_STOP) | ||
329 | return; | ||
330 | |||
331 | /* | ||
332 | * We are in trouble anyway, lets at least try | ||
333 | * to get a message out. | ||
334 | */ | ||
335 | flags = oops_begin(); | ||
336 | printk(KERN_EMERG "%s", str); | ||
337 | printk(" on CPU%d, ip %08lx, registers:\n", | ||
338 | smp_processor_id(), regs->ip); | ||
339 | show_registers(regs); | ||
340 | oops_end(flags, regs, 0); | ||
341 | if (do_panic || panic_on_oops) | ||
342 | panic("Non maskable interrupt"); | ||
343 | nmi_exit(); | ||
344 | local_irq_enable(); | ||
345 | do_exit(SIGBUS); | ||
346 | } | ||
347 | |||
348 | static int __init oops_setup(char *s) | ||
349 | { | ||
350 | if (!s) | ||
351 | return -EINVAL; | ||
352 | if (!strcmp(s, "panic")) | ||
353 | panic_on_oops = 1; | ||
354 | return 0; | ||
355 | } | ||
356 | early_param("oops", oops_setup); | ||
357 | |||
358 | static int __init kstack_setup(char *s) | 325 | static int __init kstack_setup(char *s) |
359 | { | 326 | { |
360 | if (!s) | 327 | if (!s) |
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c index 74cc1eda384b..3b97a80ce329 100644 --- a/arch/x86/kernel/dumpstack_32.c +++ b/arch/x86/kernel/dumpstack_32.c | |||
@@ -17,12 +17,11 @@ | |||
17 | #include <asm/stacktrace.h> | 17 | #include <asm/stacktrace.h> |
18 | 18 | ||
19 | 19 | ||
20 | void dump_trace(struct task_struct *task, | 20 | void dump_trace(struct task_struct *task, struct pt_regs *regs, |
21 | struct pt_regs *regs, unsigned long *stack, | 21 | unsigned long *stack, unsigned long bp, |
22 | const struct stacktrace_ops *ops, void *data) | 22 | const struct stacktrace_ops *ops, void *data) |
23 | { | 23 | { |
24 | int graph = 0; | 24 | int graph = 0; |
25 | unsigned long bp; | ||
26 | 25 | ||
27 | if (!task) | 26 | if (!task) |
28 | task = current; | 27 | task = current; |
@@ -35,7 +34,9 @@ void dump_trace(struct task_struct *task, | |||
35 | stack = (unsigned long *)task->thread.sp; | 34 | stack = (unsigned long *)task->thread.sp; |
36 | } | 35 | } |
37 | 36 | ||
38 | bp = stack_frame(task, regs); | 37 | if (!bp) |
38 | bp = stack_frame(task, regs); | ||
39 | |||
39 | for (;;) { | 40 | for (;;) { |
40 | struct thread_info *context; | 41 | struct thread_info *context; |
41 | 42 | ||
@@ -55,7 +56,7 @@ EXPORT_SYMBOL(dump_trace); | |||
55 | 56 | ||
56 | void | 57 | void |
57 | show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, | 58 | show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, |
58 | unsigned long *sp, char *log_lvl) | 59 | unsigned long *sp, unsigned long bp, char *log_lvl) |
59 | { | 60 | { |
60 | unsigned long *stack; | 61 | unsigned long *stack; |
61 | int i; | 62 | int i; |
@@ -77,7 +78,7 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, | |||
77 | touch_nmi_watchdog(); | 78 | touch_nmi_watchdog(); |
78 | } | 79 | } |
79 | printk(KERN_CONT "\n"); | 80 | printk(KERN_CONT "\n"); |
80 | show_trace_log_lvl(task, regs, sp, log_lvl); | 81 | show_trace_log_lvl(task, regs, sp, bp, log_lvl); |
81 | } | 82 | } |
82 | 83 | ||
83 | 84 | ||
@@ -102,7 +103,7 @@ void show_registers(struct pt_regs *regs) | |||
102 | u8 *ip; | 103 | u8 *ip; |
103 | 104 | ||
104 | printk(KERN_EMERG "Stack:\n"); | 105 | printk(KERN_EMERG "Stack:\n"); |
105 | show_stack_log_lvl(NULL, regs, ®s->sp, KERN_EMERG); | 106 | show_stack_log_lvl(NULL, regs, ®s->sp, 0, KERN_EMERG); |
106 | 107 | ||
107 | printk(KERN_EMERG "Code: "); | 108 | printk(KERN_EMERG "Code: "); |
108 | 109 | ||
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index a6b6fcf7f0ae..e71c98d3c0d2 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c | |||
@@ -139,8 +139,8 @@ fixup_bp_irq_link(unsigned long bp, unsigned long *stack, | |||
139 | * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack | 139 | * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack |
140 | */ | 140 | */ |
141 | 141 | ||
142 | void dump_trace(struct task_struct *task, | 142 | void dump_trace(struct task_struct *task, struct pt_regs *regs, |
143 | struct pt_regs *regs, unsigned long *stack, | 143 | unsigned long *stack, unsigned long bp, |
144 | const struct stacktrace_ops *ops, void *data) | 144 | const struct stacktrace_ops *ops, void *data) |
145 | { | 145 | { |
146 | const unsigned cpu = get_cpu(); | 146 | const unsigned cpu = get_cpu(); |
@@ -150,7 +150,6 @@ void dump_trace(struct task_struct *task, | |||
150 | struct thread_info *tinfo; | 150 | struct thread_info *tinfo; |
151 | int graph = 0; | 151 | int graph = 0; |
152 | unsigned long dummy; | 152 | unsigned long dummy; |
153 | unsigned long bp; | ||
154 | 153 | ||
155 | if (!task) | 154 | if (!task) |
156 | task = current; | 155 | task = current; |
@@ -161,7 +160,8 @@ void dump_trace(struct task_struct *task, | |||
161 | stack = (unsigned long *)task->thread.sp; | 160 | stack = (unsigned long *)task->thread.sp; |
162 | } | 161 | } |
163 | 162 | ||
164 | bp = stack_frame(task, regs); | 163 | if (!bp) |
164 | bp = stack_frame(task, regs); | ||
165 | /* | 165 | /* |
166 | * Print function call entries in all stacks, starting at the | 166 | * Print function call entries in all stacks, starting at the |
167 | * current stack address. If the stacks consist of nested | 167 | * current stack address. If the stacks consist of nested |
@@ -225,7 +225,7 @@ EXPORT_SYMBOL(dump_trace); | |||
225 | 225 | ||
226 | void | 226 | void |
227 | show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, | 227 | show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, |
228 | unsigned long *sp, char *log_lvl) | 228 | unsigned long *sp, unsigned long bp, char *log_lvl) |
229 | { | 229 | { |
230 | unsigned long *irq_stack_end; | 230 | unsigned long *irq_stack_end; |
231 | unsigned long *irq_stack; | 231 | unsigned long *irq_stack; |
@@ -269,7 +269,7 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, | |||
269 | preempt_enable(); | 269 | preempt_enable(); |
270 | 270 | ||
271 | printk(KERN_CONT "\n"); | 271 | printk(KERN_CONT "\n"); |
272 | show_trace_log_lvl(task, regs, sp, log_lvl); | 272 | show_trace_log_lvl(task, regs, sp, bp, log_lvl); |
273 | } | 273 | } |
274 | 274 | ||
275 | void show_registers(struct pt_regs *regs) | 275 | void show_registers(struct pt_regs *regs) |
@@ -298,7 +298,7 @@ void show_registers(struct pt_regs *regs) | |||
298 | 298 | ||
299 | printk(KERN_EMERG "Stack:\n"); | 299 | printk(KERN_EMERG "Stack:\n"); |
300 | show_stack_log_lvl(NULL, regs, (unsigned long *)sp, | 300 | show_stack_log_lvl(NULL, regs, (unsigned long *)sp, |
301 | KERN_EMERG); | 301 | 0, KERN_EMERG); |
302 | 302 | ||
303 | printk(KERN_EMERG "Code: "); | 303 | printk(KERN_EMERG "Code: "); |
304 | 304 | ||
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 294f26da0c0c..cdf5bfd9d4d5 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c | |||
@@ -667,21 +667,15 @@ __init void e820_setup_gap(void) | |||
667 | * boot_params.e820_map, others are passed via SETUP_E820_EXT node of | 667 | * boot_params.e820_map, others are passed via SETUP_E820_EXT node of |
668 | * linked list of struct setup_data, which is parsed here. | 668 | * linked list of struct setup_data, which is parsed here. |
669 | */ | 669 | */ |
670 | void __init parse_e820_ext(struct setup_data *sdata, unsigned long pa_data) | 670 | void __init parse_e820_ext(struct setup_data *sdata) |
671 | { | 671 | { |
672 | u32 map_len; | ||
673 | int entries; | 672 | int entries; |
674 | struct e820entry *extmap; | 673 | struct e820entry *extmap; |
675 | 674 | ||
676 | entries = sdata->len / sizeof(struct e820entry); | 675 | entries = sdata->len / sizeof(struct e820entry); |
677 | map_len = sdata->len + sizeof(struct setup_data); | ||
678 | if (map_len > PAGE_SIZE) | ||
679 | sdata = early_ioremap(pa_data, map_len); | ||
680 | extmap = (struct e820entry *)(sdata->data); | 676 | extmap = (struct e820entry *)(sdata->data); |
681 | __append_e820_map(extmap, entries); | 677 | __append_e820_map(extmap, entries); |
682 | sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); | 678 | sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); |
683 | if (map_len > PAGE_SIZE) | ||
684 | early_iounmap(sdata, map_len); | ||
685 | printk(KERN_INFO "extended physical RAM map:\n"); | 679 | printk(KERN_INFO "extended physical RAM map:\n"); |
686 | e820_print_map("extended"); | 680 | e820_print_map("extended"); |
687 | } | 681 | } |
@@ -847,15 +841,21 @@ static int __init parse_memopt(char *p) | |||
847 | if (!p) | 841 | if (!p) |
848 | return -EINVAL; | 842 | return -EINVAL; |
849 | 843 | ||
850 | #ifdef CONFIG_X86_32 | ||
851 | if (!strcmp(p, "nopentium")) { | 844 | if (!strcmp(p, "nopentium")) { |
845 | #ifdef CONFIG_X86_32 | ||
852 | setup_clear_cpu_cap(X86_FEATURE_PSE); | 846 | setup_clear_cpu_cap(X86_FEATURE_PSE); |
853 | return 0; | 847 | return 0; |
854 | } | 848 | #else |
849 | printk(KERN_WARNING "mem=nopentium ignored! (only supported on x86_32)\n"); | ||
850 | return -EINVAL; | ||
855 | #endif | 851 | #endif |
852 | } | ||
856 | 853 | ||
857 | userdef = 1; | 854 | userdef = 1; |
858 | mem_size = memparse(p, &p); | 855 | mem_size = memparse(p, &p); |
856 | /* don't remove all of memory when handling "mem={invalid}" param */ | ||
857 | if (mem_size == 0) | ||
858 | return -EINVAL; | ||
859 | e820_remove_range(mem_size, ULLONG_MAX - mem_size, E820_RAM, 1); | 859 | e820_remove_range(mem_size, ULLONG_MAX - mem_size, E820_RAM, 1); |
860 | 860 | ||
861 | return 0; | 861 | return 0; |
diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c index 9efbdcc56425..3755ef494390 100644 --- a/arch/x86/kernel/early-quirks.c +++ b/arch/x86/kernel/early-quirks.c | |||
@@ -159,7 +159,12 @@ static void __init ati_bugs_contd(int num, int slot, int func) | |||
159 | if (rev >= 0x40) | 159 | if (rev >= 0x40) |
160 | acpi_fix_pin2_polarity = 1; | 160 | acpi_fix_pin2_polarity = 1; |
161 | 161 | ||
162 | if (rev > 0x13) | 162 | /* |
163 | * SB600: revisions 0x11, 0x12, 0x13, 0x14, ... | ||
164 | * SB700: revisions 0x39, 0x3a, ... | ||
165 | * SB800: revisions 0x40, 0x41, ... | ||
166 | */ | ||
167 | if (rev >= 0x39) | ||
163 | return; | 168 | return; |
164 | 169 | ||
165 | if (acpi_use_timer_override) | 170 | if (acpi_use_timer_override) |
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index c8b4efad7ebb..5c1a91974918 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S | |||
@@ -65,6 +65,8 @@ | |||
65 | #define sysexit_audit syscall_exit_work | 65 | #define sysexit_audit syscall_exit_work |
66 | #endif | 66 | #endif |
67 | 67 | ||
68 | .section .entry.text, "ax" | ||
69 | |||
68 | /* | 70 | /* |
69 | * We use macros for low-level operations which need to be overridden | 71 | * We use macros for low-level operations which need to be overridden |
70 | * for paravirtualization. The following will never clobber any registers: | 72 | * for paravirtualization. The following will never clobber any registers: |
@@ -395,7 +397,7 @@ sysenter_past_esp: | |||
395 | * A tiny bit of offset fixup is necessary - 4*4 means the 4 words | 397 | * A tiny bit of offset fixup is necessary - 4*4 means the 4 words |
396 | * pushed above; +8 corresponds to copy_thread's esp0 setting. | 398 | * pushed above; +8 corresponds to copy_thread's esp0 setting. |
397 | */ | 399 | */ |
398 | pushl_cfi ((TI_sysenter_return)-THREAD_SIZE_asm+8+4*4)(%esp) | 400 | pushl_cfi ((TI_sysenter_return)-THREAD_SIZE+8+4*4)(%esp) |
399 | CFI_REL_OFFSET eip, 0 | 401 | CFI_REL_OFFSET eip, 0 |
400 | 402 | ||
401 | pushl_cfi %eax | 403 | pushl_cfi %eax |
@@ -788,7 +790,7 @@ ENDPROC(ptregs_clone) | |||
788 | */ | 790 | */ |
789 | .section .init.rodata,"a" | 791 | .section .init.rodata,"a" |
790 | ENTRY(interrupt) | 792 | ENTRY(interrupt) |
791 | .text | 793 | .section .entry.text, "ax" |
792 | .p2align 5 | 794 | .p2align 5 |
793 | .p2align CONFIG_X86_L1_CACHE_SHIFT | 795 | .p2align CONFIG_X86_L1_CACHE_SHIFT |
794 | ENTRY(irq_entries_start) | 796 | ENTRY(irq_entries_start) |
@@ -807,7 +809,7 @@ vector=FIRST_EXTERNAL_VECTOR | |||
807 | .endif | 809 | .endif |
808 | .previous | 810 | .previous |
809 | .long 1b | 811 | .long 1b |
810 | .text | 812 | .section .entry.text, "ax" |
811 | vector=vector+1 | 813 | vector=vector+1 |
812 | .endif | 814 | .endif |
813 | .endr | 815 | .endr |
@@ -1409,11 +1411,10 @@ END(general_protection) | |||
1409 | #ifdef CONFIG_KVM_GUEST | 1411 | #ifdef CONFIG_KVM_GUEST |
1410 | ENTRY(async_page_fault) | 1412 | ENTRY(async_page_fault) |
1411 | RING0_EC_FRAME | 1413 | RING0_EC_FRAME |
1412 | pushl $do_async_page_fault | 1414 | pushl_cfi $do_async_page_fault |
1413 | CFI_ADJUST_CFA_OFFSET 4 | ||
1414 | jmp error_code | 1415 | jmp error_code |
1415 | CFI_ENDPROC | 1416 | CFI_ENDPROC |
1416 | END(apf_page_fault) | 1417 | END(async_page_fault) |
1417 | #endif | 1418 | #endif |
1418 | 1419 | ||
1419 | /* | 1420 | /* |
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index aed1ffbeb0c9..8a445a0c989e 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S | |||
@@ -18,7 +18,7 @@ | |||
18 | * A note on terminology: | 18 | * A note on terminology: |
19 | * - top of stack: Architecture defined interrupt frame from SS to RIP | 19 | * - top of stack: Architecture defined interrupt frame from SS to RIP |
20 | * at the top of the kernel process stack. | 20 | * at the top of the kernel process stack. |
21 | * - partial stack frame: partially saved registers upto R11. | 21 | * - partial stack frame: partially saved registers up to R11. |
22 | * - full stack frame: Like partial stack frame, but all register saved. | 22 | * - full stack frame: Like partial stack frame, but all register saved. |
23 | * | 23 | * |
24 | * Some macro usage: | 24 | * Some macro usage: |
@@ -61,6 +61,8 @@ | |||
61 | #define __AUDIT_ARCH_LE 0x40000000 | 61 | #define __AUDIT_ARCH_LE 0x40000000 |
62 | 62 | ||
63 | .code64 | 63 | .code64 |
64 | .section .entry.text, "ax" | ||
65 | |||
64 | #ifdef CONFIG_FUNCTION_TRACER | 66 | #ifdef CONFIG_FUNCTION_TRACER |
65 | #ifdef CONFIG_DYNAMIC_FTRACE | 67 | #ifdef CONFIG_DYNAMIC_FTRACE |
66 | ENTRY(mcount) | 68 | ENTRY(mcount) |
@@ -420,7 +422,7 @@ ENTRY(ret_from_fork) | |||
420 | END(ret_from_fork) | 422 | END(ret_from_fork) |
421 | 423 | ||
422 | /* | 424 | /* |
423 | * System call entry. Upto 6 arguments in registers are supported. | 425 | * System call entry. Up to 6 arguments in registers are supported. |
424 | * | 426 | * |
425 | * SYSCALL does not save anything on the stack and does not change the | 427 | * SYSCALL does not save anything on the stack and does not change the |
426 | * stack pointer. | 428 | * stack pointer. |
@@ -744,7 +746,7 @@ END(stub_rt_sigreturn) | |||
744 | */ | 746 | */ |
745 | .section .init.rodata,"a" | 747 | .section .init.rodata,"a" |
746 | ENTRY(interrupt) | 748 | ENTRY(interrupt) |
747 | .text | 749 | .section .entry.text |
748 | .p2align 5 | 750 | .p2align 5 |
749 | .p2align CONFIG_X86_L1_CACHE_SHIFT | 751 | .p2align CONFIG_X86_L1_CACHE_SHIFT |
750 | ENTRY(irq_entries_start) | 752 | ENTRY(irq_entries_start) |
@@ -763,7 +765,7 @@ vector=FIRST_EXTERNAL_VECTOR | |||
763 | .endif | 765 | .endif |
764 | .previous | 766 | .previous |
765 | .quad 1b | 767 | .quad 1b |
766 | .text | 768 | .section .entry.text |
767 | vector=vector+1 | 769 | vector=vector+1 |
768 | .endif | 770 | .endif |
769 | .endr | 771 | .endr |
@@ -975,9 +977,12 @@ apicinterrupt X86_PLATFORM_IPI_VECTOR \ | |||
975 | x86_platform_ipi smp_x86_platform_ipi | 977 | x86_platform_ipi smp_x86_platform_ipi |
976 | 978 | ||
977 | #ifdef CONFIG_SMP | 979 | #ifdef CONFIG_SMP |
978 | .irpc idx, "01234567" | 980 | .irp idx,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15, \ |
981 | 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 | ||
982 | .if NUM_INVALIDATE_TLB_VECTORS > \idx | ||
979 | apicinterrupt (INVALIDATE_TLB_VECTOR_START)+\idx \ | 983 | apicinterrupt (INVALIDATE_TLB_VECTOR_START)+\idx \ |
980 | invalidate_interrupt\idx smp_invalidate_interrupt | 984 | invalidate_interrupt\idx smp_invalidate_interrupt |
985 | .endif | ||
981 | .endr | 986 | .endr |
982 | #endif | 987 | #endif |
983 | 988 | ||
@@ -1248,7 +1253,7 @@ ENTRY(xen_do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs) | |||
1248 | decl PER_CPU_VAR(irq_count) | 1253 | decl PER_CPU_VAR(irq_count) |
1249 | jmp error_exit | 1254 | jmp error_exit |
1250 | CFI_ENDPROC | 1255 | CFI_ENDPROC |
1251 | END(do_hypervisor_callback) | 1256 | END(xen_do_hypervisor_callback) |
1252 | 1257 | ||
1253 | /* | 1258 | /* |
1254 | * Hypervisor uses this for application faults while it executes. | 1259 | * Hypervisor uses this for application faults while it executes. |
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 382eb2936d4d..a93742a57468 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c | |||
@@ -437,18 +437,19 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, | |||
437 | return; | 437 | return; |
438 | } | 438 | } |
439 | 439 | ||
440 | if (ftrace_push_return_trace(old, self_addr, &trace.depth, | ||
441 | frame_pointer) == -EBUSY) { | ||
442 | *parent = old; | ||
443 | return; | ||
444 | } | ||
445 | |||
446 | trace.func = self_addr; | 440 | trace.func = self_addr; |
441 | trace.depth = current->curr_ret_stack + 1; | ||
447 | 442 | ||
448 | /* Only trace if the calling function expects to */ | 443 | /* Only trace if the calling function expects to */ |
449 | if (!ftrace_graph_entry(&trace)) { | 444 | if (!ftrace_graph_entry(&trace)) { |
450 | current->curr_ret_stack--; | ||
451 | *parent = old; | 445 | *parent = old; |
446 | return; | ||
447 | } | ||
448 | |||
449 | if (ftrace_push_return_trace(old, self_addr, &trace.depth, | ||
450 | frame_pointer) == -EBUSY) { | ||
451 | *parent = old; | ||
452 | return; | ||
452 | } | 453 | } |
453 | } | 454 | } |
454 | #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ | 455 | #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ |
diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c index 7f138b3c3c52..d6d6bb361931 100644 --- a/arch/x86/kernel/head32.c +++ b/arch/x86/kernel/head32.c | |||
@@ -34,15 +34,6 @@ void __init i386_start_kernel(void) | |||
34 | { | 34 | { |
35 | memblock_init(); | 35 | memblock_init(); |
36 | 36 | ||
37 | #ifdef CONFIG_X86_TRAMPOLINE | ||
38 | /* | ||
39 | * But first pinch a few for the stack/trampoline stuff | ||
40 | * FIXME: Don't need the extra page at 4K, but need to fix | ||
41 | * trampoline before removing it. (see the GDT stuff) | ||
42 | */ | ||
43 | memblock_x86_reserve_range(PAGE_SIZE, PAGE_SIZE + PAGE_SIZE, "EX TRAMPOLINE"); | ||
44 | #endif | ||
45 | |||
46 | memblock_x86_reserve_range(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS"); | 37 | memblock_x86_reserve_range(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS"); |
47 | 38 | ||
48 | #ifdef CONFIG_BLK_DEV_INITRD | 39 | #ifdef CONFIG_BLK_DEV_INITRD |
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 2d2673c28aff..5655c2272adb 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c | |||
@@ -77,9 +77,6 @@ void __init x86_64_start_kernel(char * real_mode_data) | |||
77 | /* Make NULL pointers segfault */ | 77 | /* Make NULL pointers segfault */ |
78 | zap_identity_mappings(); | 78 | zap_identity_mappings(); |
79 | 79 | ||
80 | /* Cleanup the over mapped high alias */ | ||
81 | cleanup_highmap(); | ||
82 | |||
83 | max_pfn_mapped = KERNEL_IMAGE_SIZE >> PAGE_SHIFT; | 80 | max_pfn_mapped = KERNEL_IMAGE_SIZE >> PAGE_SHIFT; |
84 | 81 | ||
85 | for (i = 0; i < NUM_EXCEPTION_VECTORS; i++) { | 82 | for (i = 0; i < NUM_EXCEPTION_VECTORS; i++) { |
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index 767d6c43de37..ce0be7cd085e 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S | |||
@@ -73,7 +73,7 @@ MAPPING_BEYOND_END = PAGE_TABLE_SIZE(LOWMEM_PAGES) << PAGE_SHIFT | |||
73 | */ | 73 | */ |
74 | KERNEL_PAGES = LOWMEM_PAGES | 74 | KERNEL_PAGES = LOWMEM_PAGES |
75 | 75 | ||
76 | INIT_MAP_SIZE = PAGE_TABLE_SIZE(KERNEL_PAGES) * PAGE_SIZE_asm | 76 | INIT_MAP_SIZE = PAGE_TABLE_SIZE(KERNEL_PAGES) * PAGE_SIZE |
77 | RESERVE_BRK(pagetables, INIT_MAP_SIZE) | 77 | RESERVE_BRK(pagetables, INIT_MAP_SIZE) |
78 | 78 | ||
79 | /* | 79 | /* |
@@ -137,7 +137,7 @@ ENTRY(startup_32) | |||
137 | movsl | 137 | movsl |
138 | 1: | 138 | 1: |
139 | 139 | ||
140 | #ifdef CONFIG_OLPC_OPENFIRMWARE | 140 | #ifdef CONFIG_OLPC |
141 | /* save OFW's pgdir table for later use when calling into OFW */ | 141 | /* save OFW's pgdir table for later use when calling into OFW */ |
142 | movl %cr3, %eax | 142 | movl %cr3, %eax |
143 | movl %eax, pa(olpc_ofw_pgd) | 143 | movl %eax, pa(olpc_ofw_pgd) |
@@ -623,7 +623,7 @@ ENTRY(initial_code) | |||
623 | * BSS section | 623 | * BSS section |
624 | */ | 624 | */ |
625 | __PAGE_ALIGNED_BSS | 625 | __PAGE_ALIGNED_BSS |
626 | .align PAGE_SIZE_asm | 626 | .align PAGE_SIZE |
627 | #ifdef CONFIG_X86_PAE | 627 | #ifdef CONFIG_X86_PAE |
628 | initial_pg_pmd: | 628 | initial_pg_pmd: |
629 | .fill 1024*KPMDS,4,0 | 629 | .fill 1024*KPMDS,4,0 |
@@ -644,7 +644,7 @@ ENTRY(swapper_pg_dir) | |||
644 | #ifdef CONFIG_X86_PAE | 644 | #ifdef CONFIG_X86_PAE |
645 | __PAGE_ALIGNED_DATA | 645 | __PAGE_ALIGNED_DATA |
646 | /* Page-aligned for the benefit of paravirt? */ | 646 | /* Page-aligned for the benefit of paravirt? */ |
647 | .align PAGE_SIZE_asm | 647 | .align PAGE_SIZE |
648 | ENTRY(initial_page_table) | 648 | ENTRY(initial_page_table) |
649 | .long pa(initial_pg_pmd+PGD_IDENT_ATTR),0 /* low identity map */ | 649 | .long pa(initial_pg_pmd+PGD_IDENT_ATTR),0 /* low identity map */ |
650 | # if KPMDS == 3 | 650 | # if KPMDS == 3 |
@@ -662,7 +662,7 @@ ENTRY(initial_page_table) | |||
662 | # else | 662 | # else |
663 | # error "Kernel PMDs should be 1, 2 or 3" | 663 | # error "Kernel PMDs should be 1, 2 or 3" |
664 | # endif | 664 | # endif |
665 | .align PAGE_SIZE_asm /* needs to be page-sized too */ | 665 | .align PAGE_SIZE /* needs to be page-sized too */ |
666 | #endif | 666 | #endif |
667 | 667 | ||
668 | .data | 668 | .data |
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 239046bd447f..e11e39478a49 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S | |||
@@ -136,10 +136,9 @@ ident_complete: | |||
136 | /* Fixup phys_base */ | 136 | /* Fixup phys_base */ |
137 | addq %rbp, phys_base(%rip) | 137 | addq %rbp, phys_base(%rip) |
138 | 138 | ||
139 | #ifdef CONFIG_X86_TRAMPOLINE | 139 | /* Fixup trampoline */ |
140 | addq %rbp, trampoline_level4_pgt + 0(%rip) | 140 | addq %rbp, trampoline_level4_pgt + 0(%rip) |
141 | addq %rbp, trampoline_level4_pgt + (511*8)(%rip) | 141 | addq %rbp, trampoline_level4_pgt + (511*8)(%rip) |
142 | #endif | ||
143 | 142 | ||
144 | /* Due to ENTRY(), sometimes the empty space gets filled with | 143 | /* Due to ENTRY(), sometimes the empty space gets filled with |
145 | * zeros. Better take a jmp than relying on empty space being | 144 | * zeros. Better take a jmp than relying on empty space being |
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 4ff5968f12d2..bfe8f729e086 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c | |||
@@ -503,7 +503,7 @@ static int hpet_assign_irq(struct hpet_dev *dev) | |||
503 | if (!irq) | 503 | if (!irq) |
504 | return -EINVAL; | 504 | return -EINVAL; |
505 | 505 | ||
506 | set_irq_data(irq, dev); | 506 | irq_set_handler_data(irq, dev); |
507 | 507 | ||
508 | if (hpet_setup_msi_irq(irq)) | 508 | if (hpet_setup_msi_irq(irq)) |
509 | return -EINVAL; | 509 | return -EINVAL; |
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index e60c38cc0eed..12aff2537682 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c | |||
@@ -145,7 +145,7 @@ EXPORT_SYMBOL_GPL(fpu_finit); | |||
145 | * The _current_ task is using the FPU for the first time | 145 | * The _current_ task is using the FPU for the first time |
146 | * so initialize it and set the mxcsr to its default | 146 | * so initialize it and set the mxcsr to its default |
147 | * value at reset if we support XMM instructions and then | 147 | * value at reset if we support XMM instructions and then |
148 | * remeber the current task has used the FPU. | 148 | * remember the current task has used the FPU. |
149 | */ | 149 | */ |
150 | int init_fpu(struct task_struct *tsk) | 150 | int init_fpu(struct task_struct *tsk) |
151 | { | 151 | { |
diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c index 20757cb2efa3..d9ca749c123b 100644 --- a/arch/x86/kernel/i8259.c +++ b/arch/x86/kernel/i8259.c | |||
@@ -112,7 +112,7 @@ static void make_8259A_irq(unsigned int irq) | |||
112 | { | 112 | { |
113 | disable_irq_nosync(irq); | 113 | disable_irq_nosync(irq); |
114 | io_apic_irqs &= ~(1<<irq); | 114 | io_apic_irqs &= ~(1<<irq); |
115 | set_irq_chip_and_handler_name(irq, &i8259A_chip, handle_level_irq, | 115 | irq_set_chip_and_handler_name(irq, &i8259A_chip, handle_level_irq, |
116 | i8259A_chip.name); | 116 | i8259A_chip.name); |
117 | enable_irq(irq); | 117 | enable_irq(irq); |
118 | } | 118 | } |
diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c index 8eec0ec59af2..8c968974253d 100644 --- a/arch/x86/kernel/ioport.c +++ b/arch/x86/kernel/ioport.c | |||
@@ -14,22 +14,9 @@ | |||
14 | #include <linux/slab.h> | 14 | #include <linux/slab.h> |
15 | #include <linux/thread_info.h> | 15 | #include <linux/thread_info.h> |
16 | #include <linux/syscalls.h> | 16 | #include <linux/syscalls.h> |
17 | #include <linux/bitmap.h> | ||
17 | #include <asm/syscalls.h> | 18 | #include <asm/syscalls.h> |
18 | 19 | ||
19 | /* Set EXTENT bits starting at BASE in BITMAP to value TURN_ON. */ | ||
20 | static void set_bitmap(unsigned long *bitmap, unsigned int base, | ||
21 | unsigned int extent, int new_value) | ||
22 | { | ||
23 | unsigned int i; | ||
24 | |||
25 | for (i = base; i < base + extent; i++) { | ||
26 | if (new_value) | ||
27 | __set_bit(i, bitmap); | ||
28 | else | ||
29 | __clear_bit(i, bitmap); | ||
30 | } | ||
31 | } | ||
32 | |||
33 | /* | 20 | /* |
34 | * this changes the io permissions bitmap in the current task. | 21 | * this changes the io permissions bitmap in the current task. |
35 | */ | 22 | */ |
@@ -69,7 +56,10 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on) | |||
69 | */ | 56 | */ |
70 | tss = &per_cpu(init_tss, get_cpu()); | 57 | tss = &per_cpu(init_tss, get_cpu()); |
71 | 58 | ||
72 | set_bitmap(t->io_bitmap_ptr, from, num, !turn_on); | 59 | if (turn_on) |
60 | bitmap_clear(t->io_bitmap_ptr, from, num); | ||
61 | else | ||
62 | bitmap_set(t->io_bitmap_ptr, from, num); | ||
73 | 63 | ||
74 | /* | 64 | /* |
75 | * Search for a (possibly new) maximum. This is simple and stupid, | 65 | * Search for a (possibly new) maximum. This is simple and stupid, |
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 387b6a0c9e81..948a31eae75f 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c | |||
@@ -44,9 +44,9 @@ void ack_bad_irq(unsigned int irq) | |||
44 | 44 | ||
45 | #define irq_stats(x) (&per_cpu(irq_stat, x)) | 45 | #define irq_stats(x) (&per_cpu(irq_stat, x)) |
46 | /* | 46 | /* |
47 | * /proc/interrupts printing: | 47 | * /proc/interrupts printing for arch specific interrupts |
48 | */ | 48 | */ |
49 | static int show_other_interrupts(struct seq_file *p, int prec) | 49 | int arch_show_interrupts(struct seq_file *p, int prec) |
50 | { | 50 | { |
51 | int j; | 51 | int j; |
52 | 52 | ||
@@ -122,59 +122,6 @@ static int show_other_interrupts(struct seq_file *p, int prec) | |||
122 | return 0; | 122 | return 0; |
123 | } | 123 | } |
124 | 124 | ||
125 | int show_interrupts(struct seq_file *p, void *v) | ||
126 | { | ||
127 | unsigned long flags, any_count = 0; | ||
128 | int i = *(loff_t *) v, j, prec; | ||
129 | struct irqaction *action; | ||
130 | struct irq_desc *desc; | ||
131 | |||
132 | if (i > nr_irqs) | ||
133 | return 0; | ||
134 | |||
135 | for (prec = 3, j = 1000; prec < 10 && j <= nr_irqs; ++prec) | ||
136 | j *= 10; | ||
137 | |||
138 | if (i == nr_irqs) | ||
139 | return show_other_interrupts(p, prec); | ||
140 | |||
141 | /* print header */ | ||
142 | if (i == 0) { | ||
143 | seq_printf(p, "%*s", prec + 8, ""); | ||
144 | for_each_online_cpu(j) | ||
145 | seq_printf(p, "CPU%-8d", j); | ||
146 | seq_putc(p, '\n'); | ||
147 | } | ||
148 | |||
149 | desc = irq_to_desc(i); | ||
150 | if (!desc) | ||
151 | return 0; | ||
152 | |||
153 | raw_spin_lock_irqsave(&desc->lock, flags); | ||
154 | for_each_online_cpu(j) | ||
155 | any_count |= kstat_irqs_cpu(i, j); | ||
156 | action = desc->action; | ||
157 | if (!action && !any_count) | ||
158 | goto out; | ||
159 | |||
160 | seq_printf(p, "%*d: ", prec, i); | ||
161 | for_each_online_cpu(j) | ||
162 | seq_printf(p, "%10u ", kstat_irqs_cpu(i, j)); | ||
163 | seq_printf(p, " %8s", desc->irq_data.chip->name); | ||
164 | seq_printf(p, "-%-8s", desc->name); | ||
165 | |||
166 | if (action) { | ||
167 | seq_printf(p, " %s", action->name); | ||
168 | while ((action = action->next) != NULL) | ||
169 | seq_printf(p, ", %s", action->name); | ||
170 | } | ||
171 | |||
172 | seq_putc(p, '\n'); | ||
173 | out: | ||
174 | raw_spin_unlock_irqrestore(&desc->lock, flags); | ||
175 | return 0; | ||
176 | } | ||
177 | |||
178 | /* | 125 | /* |
179 | * /proc/stat helpers | 126 | * /proc/stat helpers |
180 | */ | 127 | */ |
@@ -276,15 +223,6 @@ void smp_x86_platform_ipi(struct pt_regs *regs) | |||
276 | 223 | ||
277 | EXPORT_SYMBOL_GPL(vector_used_by_percpu_irq); | 224 | EXPORT_SYMBOL_GPL(vector_used_by_percpu_irq); |
278 | 225 | ||
279 | #ifdef CONFIG_OF | ||
280 | unsigned int irq_create_of_mapping(struct device_node *controller, | ||
281 | const u32 *intspec, unsigned int intsize) | ||
282 | { | ||
283 | return intspec[0]; | ||
284 | } | ||
285 | EXPORT_SYMBOL_GPL(irq_create_of_mapping); | ||
286 | #endif | ||
287 | |||
288 | #ifdef CONFIG_HOTPLUG_CPU | 226 | #ifdef CONFIG_HOTPLUG_CPU |
289 | /* A cpu has been removed from cpu_online_mask. Reset irq affinities. */ | 227 | /* A cpu has been removed from cpu_online_mask. Reset irq affinities. */ |
290 | void fixup_irqs(void) | 228 | void fixup_irqs(void) |
@@ -293,6 +231,7 @@ void fixup_irqs(void) | |||
293 | static int warned; | 231 | static int warned; |
294 | struct irq_desc *desc; | 232 | struct irq_desc *desc; |
295 | struct irq_data *data; | 233 | struct irq_data *data; |
234 | struct irq_chip *chip; | ||
296 | 235 | ||
297 | for_each_irq_desc(irq, desc) { | 236 | for_each_irq_desc(irq, desc) { |
298 | int break_affinity = 0; | 237 | int break_affinity = 0; |
@@ -307,10 +246,10 @@ void fixup_irqs(void) | |||
307 | /* interrupt's are disabled at this point */ | 246 | /* interrupt's are disabled at this point */ |
308 | raw_spin_lock(&desc->lock); | 247 | raw_spin_lock(&desc->lock); |
309 | 248 | ||
310 | data = &desc->irq_data; | 249 | data = irq_desc_get_irq_data(desc); |
311 | affinity = data->affinity; | 250 | affinity = data->affinity; |
312 | if (!irq_has_action(irq) || | 251 | if (!irq_has_action(irq) || |
313 | cpumask_equal(affinity, cpu_online_mask)) { | 252 | cpumask_subset(affinity, cpu_online_mask)) { |
314 | raw_spin_unlock(&desc->lock); | 253 | raw_spin_unlock(&desc->lock); |
315 | continue; | 254 | continue; |
316 | } | 255 | } |
@@ -327,16 +266,17 @@ void fixup_irqs(void) | |||
327 | affinity = cpu_all_mask; | 266 | affinity = cpu_all_mask; |
328 | } | 267 | } |
329 | 268 | ||
330 | if (!(desc->status & IRQ_MOVE_PCNTXT) && data->chip->irq_mask) | 269 | chip = irq_data_get_irq_chip(data); |
331 | data->chip->irq_mask(data); | 270 | if (!irqd_can_move_in_process_context(data) && chip->irq_mask) |
271 | chip->irq_mask(data); | ||
332 | 272 | ||
333 | if (data->chip->irq_set_affinity) | 273 | if (chip->irq_set_affinity) |
334 | data->chip->irq_set_affinity(data, affinity, true); | 274 | chip->irq_set_affinity(data, affinity, true); |
335 | else if (!(warned++)) | 275 | else if (!(warned++)) |
336 | set_affinity = 0; | 276 | set_affinity = 0; |
337 | 277 | ||
338 | if (!(desc->status & IRQ_MOVE_PCNTXT) && data->chip->irq_unmask) | 278 | if (!irqd_can_move_in_process_context(data) && chip->irq_unmask) |
339 | data->chip->irq_unmask(data); | 279 | chip->irq_unmask(data); |
340 | 280 | ||
341 | raw_spin_unlock(&desc->lock); | 281 | raw_spin_unlock(&desc->lock); |
342 | 282 | ||
@@ -368,10 +308,11 @@ void fixup_irqs(void) | |||
368 | irq = __this_cpu_read(vector_irq[vector]); | 308 | irq = __this_cpu_read(vector_irq[vector]); |
369 | 309 | ||
370 | desc = irq_to_desc(irq); | 310 | desc = irq_to_desc(irq); |
371 | data = &desc->irq_data; | 311 | data = irq_desc_get_irq_data(desc); |
312 | chip = irq_data_get_irq_chip(data); | ||
372 | raw_spin_lock(&desc->lock); | 313 | raw_spin_lock(&desc->lock); |
373 | if (data->chip->irq_retrigger) | 314 | if (chip->irq_retrigger) |
374 | data->chip->irq_retrigger(data); | 315 | chip->irq_retrigger(data); |
375 | raw_spin_unlock(&desc->lock); | 316 | raw_spin_unlock(&desc->lock); |
376 | } | 317 | } |
377 | } | 318 | } |
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index 9974d21048fd..72090705a656 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c | |||
@@ -172,7 +172,7 @@ asmlinkage void do_softirq(void) | |||
172 | 172 | ||
173 | call_on_stack(__do_softirq, isp); | 173 | call_on_stack(__do_softirq, isp); |
174 | /* | 174 | /* |
175 | * Shouldnt happen, we returned above if in_interrupt(): | 175 | * Shouldn't happen, we returned above if in_interrupt(): |
176 | */ | 176 | */ |
177 | WARN_ON_ONCE(softirq_count()); | 177 | WARN_ON_ONCE(softirq_count()); |
178 | } | 178 | } |
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c index c752e973958d..f470e4ef993e 100644 --- a/arch/x86/kernel/irqinit.c +++ b/arch/x86/kernel/irqinit.c | |||
@@ -25,6 +25,7 @@ | |||
25 | #include <asm/setup.h> | 25 | #include <asm/setup.h> |
26 | #include <asm/i8259.h> | 26 | #include <asm/i8259.h> |
27 | #include <asm/traps.h> | 27 | #include <asm/traps.h> |
28 | #include <asm/prom.h> | ||
28 | 29 | ||
29 | /* | 30 | /* |
30 | * ISA PIC or low IO-APIC triggered (INTA-cycle or APIC) interrupts: | 31 | * ISA PIC or low IO-APIC triggered (INTA-cycle or APIC) interrupts: |
@@ -71,6 +72,7 @@ static irqreturn_t math_error_irq(int cpl, void *dev_id) | |||
71 | static struct irqaction fpu_irq = { | 72 | static struct irqaction fpu_irq = { |
72 | .handler = math_error_irq, | 73 | .handler = math_error_irq, |
73 | .name = "fpu", | 74 | .name = "fpu", |
75 | .flags = IRQF_NO_THREAD, | ||
74 | }; | 76 | }; |
75 | #endif | 77 | #endif |
76 | 78 | ||
@@ -80,6 +82,7 @@ static struct irqaction fpu_irq = { | |||
80 | static struct irqaction irq2 = { | 82 | static struct irqaction irq2 = { |
81 | .handler = no_action, | 83 | .handler = no_action, |
82 | .name = "cascade", | 84 | .name = "cascade", |
85 | .flags = IRQF_NO_THREAD, | ||
83 | }; | 86 | }; |
84 | 87 | ||
85 | DEFINE_PER_CPU(vector_irq_t, vector_irq) = { | 88 | DEFINE_PER_CPU(vector_irq_t, vector_irq) = { |
@@ -110,7 +113,7 @@ void __init init_ISA_irqs(void) | |||
110 | legacy_pic->init(0); | 113 | legacy_pic->init(0); |
111 | 114 | ||
112 | for (i = 0; i < legacy_pic->nr_legacy_irqs; i++) | 115 | for (i = 0; i < legacy_pic->nr_legacy_irqs; i++) |
113 | set_irq_chip_and_handler_name(i, chip, handle_level_irq, name); | 116 | irq_set_chip_and_handler_name(i, chip, handle_level_irq, name); |
114 | } | 117 | } |
115 | 118 | ||
116 | void __init init_IRQ(void) | 119 | void __init init_IRQ(void) |
@@ -118,6 +121,12 @@ void __init init_IRQ(void) | |||
118 | int i; | 121 | int i; |
119 | 122 | ||
120 | /* | 123 | /* |
124 | * We probably need a better place for this, but it works for | ||
125 | * now ... | ||
126 | */ | ||
127 | x86_add_irq_domains(); | ||
128 | |||
129 | /* | ||
121 | * On cpu 0, Assign IRQ0_VECTOR..IRQ15_VECTOR's to IRQ 0..15. | 130 | * On cpu 0, Assign IRQ0_VECTOR..IRQ15_VECTOR's to IRQ 0..15. |
122 | * If these IRQ's are handled by legacy interrupt-controllers like PIC, | 131 | * If these IRQ's are handled by legacy interrupt-controllers like PIC, |
123 | * then this configuration will likely be static after the boot. If | 132 | * then this configuration will likely be static after the boot. If |
@@ -164,14 +173,77 @@ static void __init smp_intr_init(void) | |||
164 | alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt); | 173 | alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt); |
165 | 174 | ||
166 | /* IPIs for invalidation */ | 175 | /* IPIs for invalidation */ |
167 | alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+0, invalidate_interrupt0); | 176 | #define ALLOC_INVTLB_VEC(NR) \ |
168 | alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+1, invalidate_interrupt1); | 177 | alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+NR, \ |
169 | alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+2, invalidate_interrupt2); | 178 | invalidate_interrupt##NR) |
170 | alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+3, invalidate_interrupt3); | 179 | |
171 | alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+4, invalidate_interrupt4); | 180 | switch (NUM_INVALIDATE_TLB_VECTORS) { |
172 | alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+5, invalidate_interrupt5); | 181 | default: |
173 | alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+6, invalidate_interrupt6); | 182 | ALLOC_INVTLB_VEC(31); |
174 | alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+7, invalidate_interrupt7); | 183 | case 31: |
184 | ALLOC_INVTLB_VEC(30); | ||
185 | case 30: | ||
186 | ALLOC_INVTLB_VEC(29); | ||
187 | case 29: | ||
188 | ALLOC_INVTLB_VEC(28); | ||
189 | case 28: | ||
190 | ALLOC_INVTLB_VEC(27); | ||
191 | case 27: | ||
192 | ALLOC_INVTLB_VEC(26); | ||
193 | case 26: | ||
194 | ALLOC_INVTLB_VEC(25); | ||
195 | case 25: | ||
196 | ALLOC_INVTLB_VEC(24); | ||
197 | case 24: | ||
198 | ALLOC_INVTLB_VEC(23); | ||
199 | case 23: | ||
200 | ALLOC_INVTLB_VEC(22); | ||
201 | case 22: | ||
202 | ALLOC_INVTLB_VEC(21); | ||
203 | case 21: | ||
204 | ALLOC_INVTLB_VEC(20); | ||
205 | case 20: | ||
206 | ALLOC_INVTLB_VEC(19); | ||
207 | case 19: | ||
208 | ALLOC_INVTLB_VEC(18); | ||
209 | case 18: | ||
210 | ALLOC_INVTLB_VEC(17); | ||
211 | case 17: | ||
212 | ALLOC_INVTLB_VEC(16); | ||
213 | case 16: | ||
214 | ALLOC_INVTLB_VEC(15); | ||
215 | case 15: | ||
216 | ALLOC_INVTLB_VEC(14); | ||
217 | case 14: | ||
218 | ALLOC_INVTLB_VEC(13); | ||
219 | case 13: | ||
220 | ALLOC_INVTLB_VEC(12); | ||
221 | case 12: | ||
222 | ALLOC_INVTLB_VEC(11); | ||
223 | case 11: | ||
224 | ALLOC_INVTLB_VEC(10); | ||
225 | case 10: | ||
226 | ALLOC_INVTLB_VEC(9); | ||
227 | case 9: | ||
228 | ALLOC_INVTLB_VEC(8); | ||
229 | case 8: | ||
230 | ALLOC_INVTLB_VEC(7); | ||
231 | case 7: | ||
232 | ALLOC_INVTLB_VEC(6); | ||
233 | case 6: | ||
234 | ALLOC_INVTLB_VEC(5); | ||
235 | case 5: | ||
236 | ALLOC_INVTLB_VEC(4); | ||
237 | case 4: | ||
238 | ALLOC_INVTLB_VEC(3); | ||
239 | case 3: | ||
240 | ALLOC_INVTLB_VEC(2); | ||
241 | case 2: | ||
242 | ALLOC_INVTLB_VEC(1); | ||
243 | case 1: | ||
244 | ALLOC_INVTLB_VEC(0); | ||
245 | break; | ||
246 | } | ||
175 | 247 | ||
176 | /* IPI for generic function call */ | 248 | /* IPI for generic function call */ |
177 | alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); | 249 | alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); |
@@ -243,7 +315,7 @@ void __init native_init_IRQ(void) | |||
243 | set_intr_gate(i, interrupt[i-FIRST_EXTERNAL_VECTOR]); | 315 | set_intr_gate(i, interrupt[i-FIRST_EXTERNAL_VECTOR]); |
244 | } | 316 | } |
245 | 317 | ||
246 | if (!acpi_ioapic) | 318 | if (!acpi_ioapic && !of_ioapic) |
247 | setup_irq(2, &irq2); | 319 | setup_irq(2, &irq2); |
248 | 320 | ||
249 | #ifdef CONFIG_X86_32 | 321 | #ifdef CONFIG_X86_32 |
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index a4130005028a..dba0b36941a5 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c | |||
@@ -278,7 +278,7 @@ static int hw_break_release_slot(int breakno) | |||
278 | pevent = per_cpu_ptr(breakinfo[breakno].pev, cpu); | 278 | pevent = per_cpu_ptr(breakinfo[breakno].pev, cpu); |
279 | if (dbg_release_bp_slot(*pevent)) | 279 | if (dbg_release_bp_slot(*pevent)) |
280 | /* | 280 | /* |
281 | * The debugger is responisble for handing the retry on | 281 | * The debugger is responsible for handing the retry on |
282 | * remove failure. | 282 | * remove failure. |
283 | */ | 283 | */ |
284 | return -1; | 284 | return -1; |
@@ -533,15 +533,6 @@ static int __kgdb_notify(struct die_args *args, unsigned long cmd) | |||
533 | } | 533 | } |
534 | return NOTIFY_DONE; | 534 | return NOTIFY_DONE; |
535 | 535 | ||
536 | case DIE_NMIWATCHDOG: | ||
537 | if (atomic_read(&kgdb_active) != -1) { | ||
538 | /* KGDB CPU roundup: */ | ||
539 | kgdb_nmicallback(raw_smp_processor_id(), regs); | ||
540 | return NOTIFY_STOP; | ||
541 | } | ||
542 | /* Enter debugger: */ | ||
543 | break; | ||
544 | |||
545 | case DIE_DEBUG: | 536 | case DIE_DEBUG: |
546 | if (atomic_read(&kgdb_cpu_doing_single_step) != -1) { | 537 | if (atomic_read(&kgdb_cpu_doing_single_step) != -1) { |
547 | if (user_mode(regs)) | 538 | if (user_mode(regs)) |
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index d91c477b3f62..c969fd9d1566 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c | |||
@@ -1276,6 +1276,14 @@ static int __kprobes can_optimize(unsigned long paddr) | |||
1276 | if (!kallsyms_lookup_size_offset(paddr, &size, &offset)) | 1276 | if (!kallsyms_lookup_size_offset(paddr, &size, &offset)) |
1277 | return 0; | 1277 | return 0; |
1278 | 1278 | ||
1279 | /* | ||
1280 | * Do not optimize in the entry code due to the unstable | ||
1281 | * stack handling. | ||
1282 | */ | ||
1283 | if ((paddr >= (unsigned long )__entry_text_start) && | ||
1284 | (paddr < (unsigned long )__entry_text_end)) | ||
1285 | return 0; | ||
1286 | |||
1279 | /* Check there is enough space for a relative jump. */ | 1287 | /* Check there is enough space for a relative jump. */ |
1280 | if (size - offset < RELATIVEJUMP_SIZE) | 1288 | if (size - offset < RELATIVEJUMP_SIZE) |
1281 | return 0; | 1289 | return 0; |
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 8dc44662394b..33c07b0b122e 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c | |||
@@ -493,7 +493,7 @@ static void __init kvm_smp_prepare_boot_cpu(void) | |||
493 | native_smp_prepare_boot_cpu(); | 493 | native_smp_prepare_boot_cpu(); |
494 | } | 494 | } |
495 | 495 | ||
496 | static void kvm_guest_cpu_online(void *dummy) | 496 | static void __cpuinit kvm_guest_cpu_online(void *dummy) |
497 | { | 497 | { |
498 | kvm_guest_cpu_init(); | 498 | kvm_guest_cpu_init(); |
499 | } | 499 | } |
diff --git a/arch/x86/kernel/mca_32.c b/arch/x86/kernel/mca_32.c index 63eaf6596233..177183cbb6ae 100644 --- a/arch/x86/kernel/mca_32.c +++ b/arch/x86/kernel/mca_32.c | |||
@@ -259,7 +259,7 @@ static int __init mca_init(void) | |||
259 | /* | 259 | /* |
260 | * WARNING: Be careful when making changes here. Putting an adapter | 260 | * WARNING: Be careful when making changes here. Putting an adapter |
261 | * and the motherboard simultaneously into setup mode may result in | 261 | * and the motherboard simultaneously into setup mode may result in |
262 | * damage to chips (according to The Indispensible PC Hardware Book | 262 | * damage to chips (according to The Indispensable PC Hardware Book |
263 | * by Hans-Peter Messmer). Also, we disable system interrupts (so | 263 | * by Hans-Peter Messmer). Also, we disable system interrupts (so |
264 | * that we are not disturbed in the middle of this). | 264 | * that we are not disturbed in the middle of this). |
265 | */ | 265 | */ |
diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c index 0fe6d1a66c38..c5610384ab16 100644 --- a/arch/x86/kernel/microcode_amd.c +++ b/arch/x86/kernel/microcode_amd.c | |||
@@ -66,7 +66,6 @@ struct microcode_amd { | |||
66 | unsigned int mpb[0]; | 66 | unsigned int mpb[0]; |
67 | }; | 67 | }; |
68 | 68 | ||
69 | #define UCODE_MAX_SIZE 2048 | ||
70 | #define UCODE_CONTAINER_SECTION_HDR 8 | 69 | #define UCODE_CONTAINER_SECTION_HDR 8 |
71 | #define UCODE_CONTAINER_HEADER_SIZE 12 | 70 | #define UCODE_CONTAINER_HEADER_SIZE 12 |
72 | 71 | ||
@@ -77,20 +76,20 @@ static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig) | |||
77 | struct cpuinfo_x86 *c = &cpu_data(cpu); | 76 | struct cpuinfo_x86 *c = &cpu_data(cpu); |
78 | u32 dummy; | 77 | u32 dummy; |
79 | 78 | ||
80 | memset(csig, 0, sizeof(*csig)); | ||
81 | if (c->x86_vendor != X86_VENDOR_AMD || c->x86 < 0x10) { | 79 | if (c->x86_vendor != X86_VENDOR_AMD || c->x86 < 0x10) { |
82 | pr_warning("microcode: CPU%d: AMD CPU family 0x%x not " | 80 | pr_warning("CPU%d: family %d not supported\n", cpu, c->x86); |
83 | "supported\n", cpu, c->x86); | ||
84 | return -1; | 81 | return -1; |
85 | } | 82 | } |
83 | |||
86 | rdmsr(MSR_AMD64_PATCH_LEVEL, csig->rev, dummy); | 84 | rdmsr(MSR_AMD64_PATCH_LEVEL, csig->rev, dummy); |
87 | pr_info("CPU%d: patch_level=0x%x\n", cpu, csig->rev); | 85 | pr_info("CPU%d: patch_level=0x%08x\n", cpu, csig->rev); |
86 | |||
88 | return 0; | 87 | return 0; |
89 | } | 88 | } |
90 | 89 | ||
91 | static int get_matching_microcode(int cpu, void *mc, int rev) | 90 | static int get_matching_microcode(int cpu, struct microcode_header_amd *mc_hdr, |
91 | int rev) | ||
92 | { | 92 | { |
93 | struct microcode_header_amd *mc_header = mc; | ||
94 | unsigned int current_cpu_id; | 93 | unsigned int current_cpu_id; |
95 | u16 equiv_cpu_id = 0; | 94 | u16 equiv_cpu_id = 0; |
96 | unsigned int i = 0; | 95 | unsigned int i = 0; |
@@ -109,17 +108,17 @@ static int get_matching_microcode(int cpu, void *mc, int rev) | |||
109 | if (!equiv_cpu_id) | 108 | if (!equiv_cpu_id) |
110 | return 0; | 109 | return 0; |
111 | 110 | ||
112 | if (mc_header->processor_rev_id != equiv_cpu_id) | 111 | if (mc_hdr->processor_rev_id != equiv_cpu_id) |
113 | return 0; | 112 | return 0; |
114 | 113 | ||
115 | /* ucode might be chipset specific -- currently we don't support this */ | 114 | /* ucode might be chipset specific -- currently we don't support this */ |
116 | if (mc_header->nb_dev_id || mc_header->sb_dev_id) { | 115 | if (mc_hdr->nb_dev_id || mc_hdr->sb_dev_id) { |
117 | pr_err("CPU%d: loading of chipset specific code not yet supported\n", | 116 | pr_err("CPU%d: chipset specific code not yet supported\n", |
118 | cpu); | 117 | cpu); |
119 | return 0; | 118 | return 0; |
120 | } | 119 | } |
121 | 120 | ||
122 | if (mc_header->patch_id <= rev) | 121 | if (mc_hdr->patch_id <= rev) |
123 | return 0; | 122 | return 0; |
124 | 123 | ||
125 | return 1; | 124 | return 1; |
@@ -144,71 +143,93 @@ static int apply_microcode_amd(int cpu) | |||
144 | 143 | ||
145 | /* check current patch id and patch's id for match */ | 144 | /* check current patch id and patch's id for match */ |
146 | if (rev != mc_amd->hdr.patch_id) { | 145 | if (rev != mc_amd->hdr.patch_id) { |
147 | pr_err("CPU%d: update failed (for patch_level=0x%x)\n", | 146 | pr_err("CPU%d: update failed for patch_level=0x%08x\n", |
148 | cpu, mc_amd->hdr.patch_id); | 147 | cpu, mc_amd->hdr.patch_id); |
149 | return -1; | 148 | return -1; |
150 | } | 149 | } |
151 | 150 | ||
152 | pr_info("CPU%d: updated (new patch_level=0x%x)\n", cpu, rev); | 151 | pr_info("CPU%d: new patch_level=0x%08x\n", cpu, rev); |
153 | uci->cpu_sig.rev = rev; | 152 | uci->cpu_sig.rev = rev; |
154 | 153 | ||
155 | return 0; | 154 | return 0; |
156 | } | 155 | } |
157 | 156 | ||
158 | static void * | 157 | static unsigned int verify_ucode_size(int cpu, const u8 *buf, unsigned int size) |
159 | get_next_ucode(const u8 *buf, unsigned int size, unsigned int *mc_size) | ||
160 | { | 158 | { |
161 | unsigned int total_size; | 159 | struct cpuinfo_x86 *c = &cpu_data(cpu); |
162 | u8 section_hdr[UCODE_CONTAINER_SECTION_HDR]; | 160 | unsigned int max_size, actual_size; |
163 | void *mc; | 161 | |
162 | #define F1XH_MPB_MAX_SIZE 2048 | ||
163 | #define F14H_MPB_MAX_SIZE 1824 | ||
164 | #define F15H_MPB_MAX_SIZE 4096 | ||
165 | |||
166 | switch (c->x86) { | ||
167 | case 0x14: | ||
168 | max_size = F14H_MPB_MAX_SIZE; | ||
169 | break; | ||
170 | case 0x15: | ||
171 | max_size = F15H_MPB_MAX_SIZE; | ||
172 | break; | ||
173 | default: | ||
174 | max_size = F1XH_MPB_MAX_SIZE; | ||
175 | break; | ||
176 | } | ||
164 | 177 | ||
165 | get_ucode_data(section_hdr, buf, UCODE_CONTAINER_SECTION_HDR); | 178 | actual_size = buf[4] + (buf[5] << 8); |
166 | 179 | ||
167 | if (section_hdr[0] != UCODE_UCODE_TYPE) { | 180 | if (actual_size > size || actual_size > max_size) { |
168 | pr_err("error: invalid type field in container file section header\n"); | 181 | pr_err("section size mismatch\n"); |
169 | return NULL; | 182 | return 0; |
170 | } | 183 | } |
171 | 184 | ||
172 | total_size = (unsigned long) (section_hdr[4] + (section_hdr[5] << 8)); | 185 | return actual_size; |
186 | } | ||
173 | 187 | ||
174 | if (total_size > size || total_size > UCODE_MAX_SIZE) { | 188 | static struct microcode_header_amd * |
175 | pr_err("error: size mismatch\n"); | 189 | get_next_ucode(int cpu, const u8 *buf, unsigned int size, unsigned int *mc_size) |
176 | return NULL; | 190 | { |
191 | struct microcode_header_amd *mc = NULL; | ||
192 | unsigned int actual_size = 0; | ||
193 | |||
194 | if (buf[0] != UCODE_UCODE_TYPE) { | ||
195 | pr_err("invalid type field in container file section header\n"); | ||
196 | goto out; | ||
177 | } | 197 | } |
178 | 198 | ||
179 | mc = vzalloc(UCODE_MAX_SIZE); | 199 | actual_size = verify_ucode_size(cpu, buf, size); |
200 | if (!actual_size) | ||
201 | goto out; | ||
202 | |||
203 | mc = vzalloc(actual_size); | ||
180 | if (!mc) | 204 | if (!mc) |
181 | return NULL; | 205 | goto out; |
182 | 206 | ||
183 | get_ucode_data(mc, buf + UCODE_CONTAINER_SECTION_HDR, total_size); | 207 | get_ucode_data(mc, buf + UCODE_CONTAINER_SECTION_HDR, actual_size); |
184 | *mc_size = total_size + UCODE_CONTAINER_SECTION_HDR; | 208 | *mc_size = actual_size + UCODE_CONTAINER_SECTION_HDR; |
185 | 209 | ||
210 | out: | ||
186 | return mc; | 211 | return mc; |
187 | } | 212 | } |
188 | 213 | ||
189 | static int install_equiv_cpu_table(const u8 *buf) | 214 | static int install_equiv_cpu_table(const u8 *buf) |
190 | { | 215 | { |
191 | u8 *container_hdr[UCODE_CONTAINER_HEADER_SIZE]; | 216 | unsigned int *ibuf = (unsigned int *)buf; |
192 | unsigned int *buf_pos = (unsigned int *)container_hdr; | 217 | unsigned int type = ibuf[1]; |
193 | unsigned long size; | 218 | unsigned int size = ibuf[2]; |
194 | 219 | ||
195 | get_ucode_data(&container_hdr, buf, UCODE_CONTAINER_HEADER_SIZE); | 220 | if (type != UCODE_EQUIV_CPU_TABLE_TYPE || !size) { |
196 | 221 | pr_err("empty section/" | |
197 | size = buf_pos[2]; | 222 | "invalid type field in container file section header\n"); |
198 | 223 | return -EINVAL; | |
199 | if (buf_pos[1] != UCODE_EQUIV_CPU_TABLE_TYPE || !size) { | ||
200 | pr_err("error: invalid type field in container file section header\n"); | ||
201 | return 0; | ||
202 | } | 224 | } |
203 | 225 | ||
204 | equiv_cpu_table = vmalloc(size); | 226 | equiv_cpu_table = vmalloc(size); |
205 | if (!equiv_cpu_table) { | 227 | if (!equiv_cpu_table) { |
206 | pr_err("failed to allocate equivalent CPU table\n"); | 228 | pr_err("failed to allocate equivalent CPU table\n"); |
207 | return 0; | 229 | return -ENOMEM; |
208 | } | 230 | } |
209 | 231 | ||
210 | buf += UCODE_CONTAINER_HEADER_SIZE; | 232 | get_ucode_data(equiv_cpu_table, buf + UCODE_CONTAINER_HEADER_SIZE, size); |
211 | get_ucode_data(equiv_cpu_table, buf, size); | ||
212 | 233 | ||
213 | return size + UCODE_CONTAINER_HEADER_SIZE; /* add header length */ | 234 | return size + UCODE_CONTAINER_HEADER_SIZE; /* add header length */ |
214 | } | 235 | } |
@@ -223,16 +244,16 @@ static enum ucode_state | |||
223 | generic_load_microcode(int cpu, const u8 *data, size_t size) | 244 | generic_load_microcode(int cpu, const u8 *data, size_t size) |
224 | { | 245 | { |
225 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu; | 246 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu; |
247 | struct microcode_header_amd *mc_hdr = NULL; | ||
248 | unsigned int mc_size, leftover; | ||
249 | int offset; | ||
226 | const u8 *ucode_ptr = data; | 250 | const u8 *ucode_ptr = data; |
227 | void *new_mc = NULL; | 251 | void *new_mc = NULL; |
228 | void *mc; | 252 | unsigned int new_rev = uci->cpu_sig.rev; |
229 | int new_rev = uci->cpu_sig.rev; | ||
230 | unsigned int leftover; | ||
231 | unsigned long offset; | ||
232 | enum ucode_state state = UCODE_OK; | 253 | enum ucode_state state = UCODE_OK; |
233 | 254 | ||
234 | offset = install_equiv_cpu_table(ucode_ptr); | 255 | offset = install_equiv_cpu_table(ucode_ptr); |
235 | if (!offset) { | 256 | if (offset < 0) { |
236 | pr_err("failed to create equivalent cpu table\n"); | 257 | pr_err("failed to create equivalent cpu table\n"); |
237 | return UCODE_ERROR; | 258 | return UCODE_ERROR; |
238 | } | 259 | } |
@@ -241,64 +262,65 @@ generic_load_microcode(int cpu, const u8 *data, size_t size) | |||
241 | leftover = size - offset; | 262 | leftover = size - offset; |
242 | 263 | ||
243 | while (leftover) { | 264 | while (leftover) { |
244 | unsigned int uninitialized_var(mc_size); | 265 | mc_hdr = get_next_ucode(cpu, ucode_ptr, leftover, &mc_size); |
245 | struct microcode_header_amd *mc_header; | 266 | if (!mc_hdr) |
246 | |||
247 | mc = get_next_ucode(ucode_ptr, leftover, &mc_size); | ||
248 | if (!mc) | ||
249 | break; | 267 | break; |
250 | 268 | ||
251 | mc_header = (struct microcode_header_amd *)mc; | 269 | if (get_matching_microcode(cpu, mc_hdr, new_rev)) { |
252 | if (get_matching_microcode(cpu, mc, new_rev)) { | ||
253 | vfree(new_mc); | 270 | vfree(new_mc); |
254 | new_rev = mc_header->patch_id; | 271 | new_rev = mc_hdr->patch_id; |
255 | new_mc = mc; | 272 | new_mc = mc_hdr; |
256 | } else | 273 | } else |
257 | vfree(mc); | 274 | vfree(mc_hdr); |
258 | 275 | ||
259 | ucode_ptr += mc_size; | 276 | ucode_ptr += mc_size; |
260 | leftover -= mc_size; | 277 | leftover -= mc_size; |
261 | } | 278 | } |
262 | 279 | ||
263 | if (new_mc) { | 280 | if (!new_mc) { |
264 | if (!leftover) { | ||
265 | vfree(uci->mc); | ||
266 | uci->mc = new_mc; | ||
267 | pr_debug("CPU%d found a matching microcode update with version 0x%x (current=0x%x)\n", | ||
268 | cpu, new_rev, uci->cpu_sig.rev); | ||
269 | } else { | ||
270 | vfree(new_mc); | ||
271 | state = UCODE_ERROR; | ||
272 | } | ||
273 | } else | ||
274 | state = UCODE_NFOUND; | 281 | state = UCODE_NFOUND; |
282 | goto free_table; | ||
283 | } | ||
275 | 284 | ||
285 | if (!leftover) { | ||
286 | vfree(uci->mc); | ||
287 | uci->mc = new_mc; | ||
288 | pr_debug("CPU%d update ucode (0x%08x -> 0x%08x)\n", | ||
289 | cpu, uci->cpu_sig.rev, new_rev); | ||
290 | } else { | ||
291 | vfree(new_mc); | ||
292 | state = UCODE_ERROR; | ||
293 | } | ||
294 | |||
295 | free_table: | ||
276 | free_equiv_cpu_table(); | 296 | free_equiv_cpu_table(); |
277 | 297 | ||
278 | return state; | 298 | return state; |
279 | } | 299 | } |
280 | 300 | ||
281 | static enum ucode_state request_microcode_fw(int cpu, struct device *device) | 301 | static enum ucode_state request_microcode_amd(int cpu, struct device *device) |
282 | { | 302 | { |
283 | const char *fw_name = "amd-ucode/microcode_amd.bin"; | 303 | const char *fw_name = "amd-ucode/microcode_amd.bin"; |
284 | const struct firmware *firmware; | 304 | const struct firmware *fw; |
285 | enum ucode_state ret; | 305 | enum ucode_state ret = UCODE_NFOUND; |
286 | 306 | ||
287 | if (request_firmware(&firmware, fw_name, device)) { | 307 | if (request_firmware(&fw, fw_name, device)) { |
288 | printk(KERN_ERR "microcode: failed to load file %s\n", fw_name); | 308 | pr_err("failed to load file %s\n", fw_name); |
289 | return UCODE_NFOUND; | 309 | goto out; |
290 | } | 310 | } |
291 | 311 | ||
292 | if (*(u32 *)firmware->data != UCODE_MAGIC) { | 312 | ret = UCODE_ERROR; |
293 | pr_err("invalid UCODE_MAGIC (0x%08x)\n", | 313 | if (*(u32 *)fw->data != UCODE_MAGIC) { |
294 | *(u32 *)firmware->data); | 314 | pr_err("invalid magic value (0x%08x)\n", *(u32 *)fw->data); |
295 | return UCODE_ERROR; | 315 | goto fw_release; |
296 | } | 316 | } |
297 | 317 | ||
298 | ret = generic_load_microcode(cpu, firmware->data, firmware->size); | 318 | ret = generic_load_microcode(cpu, fw->data, fw->size); |
299 | 319 | ||
300 | release_firmware(firmware); | 320 | fw_release: |
321 | release_firmware(fw); | ||
301 | 322 | ||
323 | out: | ||
302 | return ret; | 324 | return ret; |
303 | } | 325 | } |
304 | 326 | ||
@@ -319,7 +341,7 @@ static void microcode_fini_cpu_amd(int cpu) | |||
319 | 341 | ||
320 | static struct microcode_ops microcode_amd_ops = { | 342 | static struct microcode_ops microcode_amd_ops = { |
321 | .request_microcode_user = request_microcode_user, | 343 | .request_microcode_user = request_microcode_user, |
322 | .request_microcode_fw = request_microcode_fw, | 344 | .request_microcode_fw = request_microcode_amd, |
323 | .collect_cpu_info = collect_cpu_info_amd, | 345 | .collect_cpu_info = collect_cpu_info_amd, |
324 | .apply_microcode = apply_microcode_amd, | 346 | .apply_microcode = apply_microcode_amd, |
325 | .microcode_fini_cpu = microcode_fini_cpu_amd, | 347 | .microcode_fini_cpu = microcode_fini_cpu_amd, |
diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c index 1cca374a2bac..87af68e0e1e1 100644 --- a/arch/x86/kernel/microcode_core.c +++ b/arch/x86/kernel/microcode_core.c | |||
@@ -417,8 +417,10 @@ static int mc_sysdev_add(struct sys_device *sys_dev) | |||
417 | if (err) | 417 | if (err) |
418 | return err; | 418 | return err; |
419 | 419 | ||
420 | if (microcode_init_cpu(cpu) == UCODE_ERROR) | 420 | if (microcode_init_cpu(cpu) == UCODE_ERROR) { |
421 | err = -EINVAL; | 421 | sysfs_remove_group(&sys_dev->kobj, &mc_attr_group); |
422 | return -EINVAL; | ||
423 | } | ||
422 | 424 | ||
423 | return err; | 425 | return err; |
424 | } | 426 | } |
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 01b0f6d06451..6f789a887c06 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c | |||
@@ -883,7 +883,7 @@ static int __init update_mp_table(void) | |||
883 | 883 | ||
884 | if (!mpc_new_phys) { | 884 | if (!mpc_new_phys) { |
885 | unsigned char old, new; | 885 | unsigned char old, new; |
886 | /* check if we can change the postion */ | 886 | /* check if we can change the position */ |
887 | mpc->checksum = 0; | 887 | mpc->checksum = 0; |
888 | old = mpf_checksum((unsigned char *)mpc, mpc->length); | 888 | old = mpf_checksum((unsigned char *)mpc, mpc->length); |
889 | mpc->checksum = 0xff; | 889 | mpc->checksum = 0xff; |
@@ -892,7 +892,7 @@ static int __init update_mp_table(void) | |||
892 | printk(KERN_INFO "mpc is readonly, please try alloc_mptable instead\n"); | 892 | printk(KERN_INFO "mpc is readonly, please try alloc_mptable instead\n"); |
893 | return 0; | 893 | return 0; |
894 | } | 894 | } |
895 | printk(KERN_INFO "use in-positon replacing\n"); | 895 | printk(KERN_INFO "use in-position replacing\n"); |
896 | } else { | 896 | } else { |
897 | mpf->physptr = mpc_new_phys; | 897 | mpf->physptr = mpc_new_phys; |
898 | mpc_new = phys_to_virt(mpc_new_phys); | 898 | mpc_new = phys_to_virt(mpc_new_phys); |
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c index f56a117cef68..e8c33a302006 100644 --- a/arch/x86/kernel/pci-calgary_64.c +++ b/arch/x86/kernel/pci-calgary_64.c | |||
@@ -1279,7 +1279,7 @@ static int __init calgary_bus_has_devices(int bus, unsigned short pci_dev) | |||
1279 | 1279 | ||
1280 | if (pci_dev == PCI_DEVICE_ID_IBM_CALIOC2) { | 1280 | if (pci_dev == PCI_DEVICE_ID_IBM_CALIOC2) { |
1281 | /* | 1281 | /* |
1282 | * FIXME: properly scan for devices accross the | 1282 | * FIXME: properly scan for devices across the |
1283 | * PCI-to-PCI bridge on every CalIOC2 port. | 1283 | * PCI-to-PCI bridge on every CalIOC2 port. |
1284 | */ | 1284 | */ |
1285 | return 1; | 1285 | return 1; |
@@ -1295,7 +1295,7 @@ static int __init calgary_bus_has_devices(int bus, unsigned short pci_dev) | |||
1295 | 1295 | ||
1296 | /* | 1296 | /* |
1297 | * calgary_init_bitmap_from_tce_table(): | 1297 | * calgary_init_bitmap_from_tce_table(): |
1298 | * Funtion for kdump case. In the second/kdump kernel initialize | 1298 | * Function for kdump case. In the second/kdump kernel initialize |
1299 | * the bitmap based on the tce table entries obtained from first kernel | 1299 | * the bitmap based on the tce table entries obtained from first kernel |
1300 | */ | 1300 | */ |
1301 | static void calgary_init_bitmap_from_tce_table(struct iommu_table *tbl) | 1301 | static void calgary_init_bitmap_from_tce_table(struct iommu_table *tbl) |
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index ff4554198981..d46cbe46b7ab 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c | |||
@@ -87,7 +87,7 @@ void exit_thread(void) | |||
87 | void show_regs(struct pt_regs *regs) | 87 | void show_regs(struct pt_regs *regs) |
88 | { | 88 | { |
89 | show_registers(regs); | 89 | show_registers(regs); |
90 | show_trace(NULL, regs, (unsigned long *)kernel_stack_pointer(regs)); | 90 | show_trace(NULL, regs, (unsigned long *)kernel_stack_pointer(regs), 0); |
91 | } | 91 | } |
92 | 92 | ||
93 | void show_regs_common(void) | 93 | void show_regs_common(void) |
@@ -110,12 +110,9 @@ void show_regs_common(void) | |||
110 | init_utsname()->release, | 110 | init_utsname()->release, |
111 | (int)strcspn(init_utsname()->version, " "), | 111 | (int)strcspn(init_utsname()->version, " "), |
112 | init_utsname()->version); | 112 | init_utsname()->version); |
113 | printk(KERN_CONT " "); | 113 | printk(KERN_CONT " %s %s", vendor, product); |
114 | printk(KERN_CONT "%s %s", vendor, product); | 114 | if (board) |
115 | if (board) { | 115 | printk(KERN_CONT "/%s", board); |
116 | printk(KERN_CONT "/"); | ||
117 | printk(KERN_CONT "%s", board); | ||
118 | } | ||
119 | printk(KERN_CONT "\n"); | 116 | printk(KERN_CONT "\n"); |
120 | } | 117 | } |
121 | 118 | ||
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 715037caeb43..d3ce37edb54d 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c | |||
@@ -303,68 +303,16 @@ static int __init reboot_init(void) | |||
303 | } | 303 | } |
304 | core_initcall(reboot_init); | 304 | core_initcall(reboot_init); |
305 | 305 | ||
306 | /* The following code and data reboots the machine by switching to real | 306 | extern const unsigned char machine_real_restart_asm[]; |
307 | mode and jumping to the BIOS reset entry point, as if the CPU has | 307 | extern const u64 machine_real_restart_gdt[3]; |
308 | really been reset. The previous version asked the keyboard | ||
309 | controller to pulse the CPU reset line, which is more thorough, but | ||
310 | doesn't work with at least one type of 486 motherboard. It is easy | ||
311 | to stop this code working; hence the copious comments. */ | ||
312 | static const unsigned long long | ||
313 | real_mode_gdt_entries [3] = | ||
314 | { | ||
315 | 0x0000000000000000ULL, /* Null descriptor */ | ||
316 | 0x00009b000000ffffULL, /* 16-bit real-mode 64k code at 0x00000000 */ | ||
317 | 0x000093000100ffffULL /* 16-bit real-mode 64k data at 0x00000100 */ | ||
318 | }; | ||
319 | 308 | ||
320 | static const struct desc_ptr | 309 | void machine_real_restart(unsigned int type) |
321 | real_mode_gdt = { sizeof (real_mode_gdt_entries) - 1, (long)real_mode_gdt_entries }, | ||
322 | real_mode_idt = { 0x3ff, 0 }; | ||
323 | |||
324 | /* This is 16-bit protected mode code to disable paging and the cache, | ||
325 | switch to real mode and jump to the BIOS reset code. | ||
326 | |||
327 | The instruction that switches to real mode by writing to CR0 must be | ||
328 | followed immediately by a far jump instruction, which set CS to a | ||
329 | valid value for real mode, and flushes the prefetch queue to avoid | ||
330 | running instructions that have already been decoded in protected | ||
331 | mode. | ||
332 | |||
333 | Clears all the flags except ET, especially PG (paging), PE | ||
334 | (protected-mode enable) and TS (task switch for coprocessor state | ||
335 | save). Flushes the TLB after paging has been disabled. Sets CD and | ||
336 | NW, to disable the cache on a 486, and invalidates the cache. This | ||
337 | is more like the state of a 486 after reset. I don't know if | ||
338 | something else should be done for other chips. | ||
339 | |||
340 | More could be done here to set up the registers as if a CPU reset had | ||
341 | occurred; hopefully real BIOSs don't assume much. */ | ||
342 | static const unsigned char real_mode_switch [] = | ||
343 | { | ||
344 | 0x66, 0x0f, 0x20, 0xc0, /* movl %cr0,%eax */ | ||
345 | 0x66, 0x83, 0xe0, 0x11, /* andl $0x00000011,%eax */ | ||
346 | 0x66, 0x0d, 0x00, 0x00, 0x00, 0x60, /* orl $0x60000000,%eax */ | ||
347 | 0x66, 0x0f, 0x22, 0xc0, /* movl %eax,%cr0 */ | ||
348 | 0x66, 0x0f, 0x22, 0xd8, /* movl %eax,%cr3 */ | ||
349 | 0x66, 0x0f, 0x20, 0xc3, /* movl %cr0,%ebx */ | ||
350 | 0x66, 0x81, 0xe3, 0x00, 0x00, 0x00, 0x60, /* andl $0x60000000,%ebx */ | ||
351 | 0x74, 0x02, /* jz f */ | ||
352 | 0x0f, 0x09, /* wbinvd */ | ||
353 | 0x24, 0x10, /* f: andb $0x10,al */ | ||
354 | 0x66, 0x0f, 0x22, 0xc0 /* movl %eax,%cr0 */ | ||
355 | }; | ||
356 | static const unsigned char jump_to_bios [] = | ||
357 | { | 310 | { |
358 | 0xea, 0x00, 0x00, 0xff, 0xff /* ljmp $0xffff,$0x0000 */ | 311 | void *restart_va; |
359 | }; | 312 | unsigned long restart_pa; |
313 | void (*restart_lowmem)(unsigned int); | ||
314 | u64 *lowmem_gdt; | ||
360 | 315 | ||
361 | /* | ||
362 | * Switch to real mode and then execute the code | ||
363 | * specified by the code and length parameters. | ||
364 | * We assume that length will aways be less that 100! | ||
365 | */ | ||
366 | void machine_real_restart(const unsigned char *code, int length) | ||
367 | { | ||
368 | local_irq_disable(); | 316 | local_irq_disable(); |
369 | 317 | ||
370 | /* Write zero to CMOS register number 0x0f, which the BIOS POST | 318 | /* Write zero to CMOS register number 0x0f, which the BIOS POST |
@@ -392,41 +340,23 @@ void machine_real_restart(const unsigned char *code, int length) | |||
392 | too. */ | 340 | too. */ |
393 | *((unsigned short *)0x472) = reboot_mode; | 341 | *((unsigned short *)0x472) = reboot_mode; |
394 | 342 | ||
395 | /* For the switch to real mode, copy some code to low memory. It has | 343 | /* Patch the GDT in the low memory trampoline */ |
396 | to be in the first 64k because it is running in 16-bit mode, and it | 344 | lowmem_gdt = TRAMPOLINE_SYM(machine_real_restart_gdt); |
397 | has to have the same physical and virtual address, because it turns | 345 | |
398 | off paging. Copy it near the end of the first page, out of the way | 346 | restart_va = TRAMPOLINE_SYM(machine_real_restart_asm); |
399 | of BIOS variables. */ | 347 | restart_pa = virt_to_phys(restart_va); |
400 | memcpy((void *)(0x1000 - sizeof(real_mode_switch) - 100), | 348 | restart_lowmem = (void (*)(unsigned int))restart_pa; |
401 | real_mode_switch, sizeof (real_mode_switch)); | 349 | |
402 | memcpy((void *)(0x1000 - 100), code, length); | 350 | /* GDT[0]: GDT self-pointer */ |
403 | 351 | lowmem_gdt[0] = | |
404 | /* Set up the IDT for real mode. */ | 352 | (u64)(sizeof(machine_real_restart_gdt) - 1) + |
405 | load_idt(&real_mode_idt); | 353 | ((u64)virt_to_phys(lowmem_gdt) << 16); |
406 | 354 | /* GDT[1]: 64K real mode code segment */ | |
407 | /* Set up a GDT from which we can load segment descriptors for real | 355 | lowmem_gdt[1] = |
408 | mode. The GDT is not used in real mode; it is just needed here to | 356 | GDT_ENTRY(0x009b, restart_pa, 0xffff); |
409 | prepare the descriptors. */ | 357 | |
410 | load_gdt(&real_mode_gdt); | 358 | /* Jump to the identity-mapped low memory code */ |
411 | 359 | restart_lowmem(type); | |
412 | /* Load the data segment registers, and thus the descriptors ready for | ||
413 | real mode. The base address of each segment is 0x100, 16 times the | ||
414 | selector value being loaded here. This is so that the segment | ||
415 | registers don't have to be reloaded after switching to real mode: | ||
416 | the values are consistent for real mode operation already. */ | ||
417 | __asm__ __volatile__ ("movl $0x0010,%%eax\n" | ||
418 | "\tmovl %%eax,%%ds\n" | ||
419 | "\tmovl %%eax,%%es\n" | ||
420 | "\tmovl %%eax,%%fs\n" | ||
421 | "\tmovl %%eax,%%gs\n" | ||
422 | "\tmovl %%eax,%%ss" : : : "eax"); | ||
423 | |||
424 | /* Jump to the 16-bit code that we copied earlier. It disables paging | ||
425 | and the cache, switches to real mode, and jumps to the BIOS reset | ||
426 | entry point. */ | ||
427 | __asm__ __volatile__ ("ljmp $0x0008,%0" | ||
428 | : | ||
429 | : "i" ((void *)(0x1000 - sizeof (real_mode_switch) - 100))); | ||
430 | } | 360 | } |
431 | #ifdef CONFIG_APM_MODULE | 361 | #ifdef CONFIG_APM_MODULE |
432 | EXPORT_SYMBOL(machine_real_restart); | 362 | EXPORT_SYMBOL(machine_real_restart); |
@@ -581,7 +511,7 @@ static void native_machine_emergency_restart(void) | |||
581 | 511 | ||
582 | #ifdef CONFIG_X86_32 | 512 | #ifdef CONFIG_X86_32 |
583 | case BOOT_BIOS: | 513 | case BOOT_BIOS: |
584 | machine_real_restart(jump_to_bios, sizeof(jump_to_bios)); | 514 | machine_real_restart(MRR_BIOS); |
585 | 515 | ||
586 | reboot_type = BOOT_KBD; | 516 | reboot_type = BOOT_KBD; |
587 | break; | 517 | break; |
diff --git a/arch/x86/kernel/reboot_32.S b/arch/x86/kernel/reboot_32.S new file mode 100644 index 000000000000..29092b38d816 --- /dev/null +++ b/arch/x86/kernel/reboot_32.S | |||
@@ -0,0 +1,135 @@ | |||
1 | #include <linux/linkage.h> | ||
2 | #include <linux/init.h> | ||
3 | #include <asm/segment.h> | ||
4 | #include <asm/page_types.h> | ||
5 | |||
6 | /* | ||
7 | * The following code and data reboots the machine by switching to real | ||
8 | * mode and jumping to the BIOS reset entry point, as if the CPU has | ||
9 | * really been reset. The previous version asked the keyboard | ||
10 | * controller to pulse the CPU reset line, which is more thorough, but | ||
11 | * doesn't work with at least one type of 486 motherboard. It is easy | ||
12 | * to stop this code working; hence the copious comments. | ||
13 | * | ||
14 | * This code is called with the restart type (0 = BIOS, 1 = APM) in %eax. | ||
15 | */ | ||
16 | .section ".x86_trampoline","a" | ||
17 | .balign 16 | ||
18 | .code32 | ||
19 | ENTRY(machine_real_restart_asm) | ||
20 | r_base = . | ||
21 | /* Get our own relocated address */ | ||
22 | call 1f | ||
23 | 1: popl %ebx | ||
24 | subl $1b, %ebx | ||
25 | |||
26 | /* Compute the equivalent real-mode segment */ | ||
27 | movl %ebx, %ecx | ||
28 | shrl $4, %ecx | ||
29 | |||
30 | /* Patch post-real-mode segment jump */ | ||
31 | movw dispatch_table(%ebx,%eax,2),%ax | ||
32 | movw %ax, 101f(%ebx) | ||
33 | movw %cx, 102f(%ebx) | ||
34 | |||
35 | /* Set up the IDT for real mode. */ | ||
36 | lidtl machine_real_restart_idt(%ebx) | ||
37 | |||
38 | /* | ||
39 | * Set up a GDT from which we can load segment descriptors for real | ||
40 | * mode. The GDT is not used in real mode; it is just needed here to | ||
41 | * prepare the descriptors. | ||
42 | */ | ||
43 | lgdtl machine_real_restart_gdt(%ebx) | ||
44 | |||
45 | /* | ||
46 | * Load the data segment registers with 16-bit compatible values | ||
47 | */ | ||
48 | movl $16, %ecx | ||
49 | movl %ecx, %ds | ||
50 | movl %ecx, %es | ||
51 | movl %ecx, %fs | ||
52 | movl %ecx, %gs | ||
53 | movl %ecx, %ss | ||
54 | ljmpl $8, $1f - r_base | ||
55 | |||
56 | /* | ||
57 | * This is 16-bit protected mode code to disable paging and the cache, | ||
58 | * switch to real mode and jump to the BIOS reset code. | ||
59 | * | ||
60 | * The instruction that switches to real mode by writing to CR0 must be | ||
61 | * followed immediately by a far jump instruction, which set CS to a | ||
62 | * valid value for real mode, and flushes the prefetch queue to avoid | ||
63 | * running instructions that have already been decoded in protected | ||
64 | * mode. | ||
65 | * | ||
66 | * Clears all the flags except ET, especially PG (paging), PE | ||
67 | * (protected-mode enable) and TS (task switch for coprocessor state | ||
68 | * save). Flushes the TLB after paging has been disabled. Sets CD and | ||
69 | * NW, to disable the cache on a 486, and invalidates the cache. This | ||
70 | * is more like the state of a 486 after reset. I don't know if | ||
71 | * something else should be done for other chips. | ||
72 | * | ||
73 | * More could be done here to set up the registers as if a CPU reset had | ||
74 | * occurred; hopefully real BIOSs don't assume much. This is not the | ||
75 | * actual BIOS entry point, anyway (that is at 0xfffffff0). | ||
76 | * | ||
77 | * Most of this work is probably excessive, but it is what is tested. | ||
78 | */ | ||
79 | .code16 | ||
80 | 1: | ||
81 | xorl %ecx, %ecx | ||
82 | movl %cr0, %eax | ||
83 | andl $0x00000011, %eax | ||
84 | orl $0x60000000, %eax | ||
85 | movl %eax, %cr0 | ||
86 | movl %ecx, %cr3 | ||
87 | movl %cr0, %edx | ||
88 | andl $0x60000000, %edx /* If no cache bits -> no wbinvd */ | ||
89 | jz 2f | ||
90 | wbinvd | ||
91 | 2: | ||
92 | andb $0x10, %al | ||
93 | movl %eax, %cr0 | ||
94 | .byte 0xea /* ljmpw */ | ||
95 | 101: .word 0 /* Offset */ | ||
96 | 102: .word 0 /* Segment */ | ||
97 | |||
98 | bios: | ||
99 | ljmpw $0xf000, $0xfff0 | ||
100 | |||
101 | apm: | ||
102 | movw $0x1000, %ax | ||
103 | movw %ax, %ss | ||
104 | movw $0xf000, %sp | ||
105 | movw $0x5307, %ax | ||
106 | movw $0x0001, %bx | ||
107 | movw $0x0003, %cx | ||
108 | int $0x15 | ||
109 | |||
110 | END(machine_real_restart_asm) | ||
111 | |||
112 | .balign 16 | ||
113 | /* These must match <asm/reboot.h */ | ||
114 | dispatch_table: | ||
115 | .word bios - r_base | ||
116 | .word apm - r_base | ||
117 | END(dispatch_table) | ||
118 | |||
119 | .balign 16 | ||
120 | machine_real_restart_idt: | ||
121 | .word 0xffff /* Length - real mode default value */ | ||
122 | .long 0 /* Base - real mode default value */ | ||
123 | END(machine_real_restart_idt) | ||
124 | |||
125 | .balign 16 | ||
126 | ENTRY(machine_real_restart_gdt) | ||
127 | .quad 0 /* Self-pointer, filled in by PM code */ | ||
128 | .quad 0 /* 16-bit code segment, filled in by PM code */ | ||
129 | /* | ||
130 | * 16-bit data segment with the selector value 16 = 0x10 and | ||
131 | * base value 0x100; since this is consistent with real mode | ||
132 | * semantics we don't have to reload the segments once CR0.PE = 0. | ||
133 | */ | ||
134 | .quad GDT_ENTRY(0x0093, 0x100, 0xffff) | ||
135 | END(machine_real_restart_gdt) | ||
diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c index 6f39cab052d5..3f2ad2640d85 100644 --- a/arch/x86/kernel/rtc.c +++ b/arch/x86/kernel/rtc.c | |||
@@ -6,6 +6,7 @@ | |||
6 | #include <linux/acpi.h> | 6 | #include <linux/acpi.h> |
7 | #include <linux/bcd.h> | 7 | #include <linux/bcd.h> |
8 | #include <linux/pnp.h> | 8 | #include <linux/pnp.h> |
9 | #include <linux/of.h> | ||
9 | 10 | ||
10 | #include <asm/vsyscall.h> | 11 | #include <asm/vsyscall.h> |
11 | #include <asm/x86_init.h> | 12 | #include <asm/x86_init.h> |
@@ -236,6 +237,8 @@ static __init int add_rtc_cmos(void) | |||
236 | } | 237 | } |
237 | } | 238 | } |
238 | #endif | 239 | #endif |
240 | if (of_have_populated_dt()) | ||
241 | return 0; | ||
239 | 242 | ||
240 | platform_device_register(&rtc_device); | 243 | platform_device_register(&rtc_device); |
241 | dev_info(&rtc_device.dev, | 244 | dev_info(&rtc_device.dev, |
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index d3cfe26c0252..32bd87cbf982 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c | |||
@@ -113,6 +113,7 @@ | |||
113 | #endif | 113 | #endif |
114 | #include <asm/mce.h> | 114 | #include <asm/mce.h> |
115 | #include <asm/alternative.h> | 115 | #include <asm/alternative.h> |
116 | #include <asm/prom.h> | ||
116 | 117 | ||
117 | /* | 118 | /* |
118 | * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries. | 119 | * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries. |
@@ -297,6 +298,9 @@ static void __init init_gbpages(void) | |||
297 | static inline void init_gbpages(void) | 298 | static inline void init_gbpages(void) |
298 | { | 299 | { |
299 | } | 300 | } |
301 | static void __init cleanup_highmap(void) | ||
302 | { | ||
303 | } | ||
300 | #endif | 304 | #endif |
301 | 305 | ||
302 | static void __init reserve_brk(void) | 306 | static void __init reserve_brk(void) |
@@ -429,16 +433,30 @@ static void __init parse_setup_data(void) | |||
429 | return; | 433 | return; |
430 | pa_data = boot_params.hdr.setup_data; | 434 | pa_data = boot_params.hdr.setup_data; |
431 | while (pa_data) { | 435 | while (pa_data) { |
432 | data = early_memremap(pa_data, PAGE_SIZE); | 436 | u32 data_len, map_len; |
437 | |||
438 | map_len = max(PAGE_SIZE - (pa_data & ~PAGE_MASK), | ||
439 | (u64)sizeof(struct setup_data)); | ||
440 | data = early_memremap(pa_data, map_len); | ||
441 | data_len = data->len + sizeof(struct setup_data); | ||
442 | if (data_len > map_len) { | ||
443 | early_iounmap(data, map_len); | ||
444 | data = early_memremap(pa_data, data_len); | ||
445 | map_len = data_len; | ||
446 | } | ||
447 | |||
433 | switch (data->type) { | 448 | switch (data->type) { |
434 | case SETUP_E820_EXT: | 449 | case SETUP_E820_EXT: |
435 | parse_e820_ext(data, pa_data); | 450 | parse_e820_ext(data); |
451 | break; | ||
452 | case SETUP_DTB: | ||
453 | add_dtb(pa_data); | ||
436 | break; | 454 | break; |
437 | default: | 455 | default: |
438 | break; | 456 | break; |
439 | } | 457 | } |
440 | pa_data = data->next; | 458 | pa_data = data->next; |
441 | early_iounmap(data, PAGE_SIZE); | 459 | early_iounmap(data, map_len); |
442 | } | 460 | } |
443 | } | 461 | } |
444 | 462 | ||
@@ -680,15 +698,6 @@ static int __init parse_reservelow(char *p) | |||
680 | 698 | ||
681 | early_param("reservelow", parse_reservelow); | 699 | early_param("reservelow", parse_reservelow); |
682 | 700 | ||
683 | static u64 __init get_max_mapped(void) | ||
684 | { | ||
685 | u64 end = max_pfn_mapped; | ||
686 | |||
687 | end <<= PAGE_SHIFT; | ||
688 | |||
689 | return end; | ||
690 | } | ||
691 | |||
692 | /* | 701 | /* |
693 | * Determine if we were loaded by an EFI loader. If so, then we have also been | 702 | * Determine if we were loaded by an EFI loader. If so, then we have also been |
694 | * passed the efi memmap, systab, etc., so we should use these data structures | 703 | * passed the efi memmap, systab, etc., so we should use these data structures |
@@ -704,8 +713,6 @@ static u64 __init get_max_mapped(void) | |||
704 | 713 | ||
705 | void __init setup_arch(char **cmdline_p) | 714 | void __init setup_arch(char **cmdline_p) |
706 | { | 715 | { |
707 | int acpi = 0; | ||
708 | int amd = 0; | ||
709 | unsigned long flags; | 716 | unsigned long flags; |
710 | 717 | ||
711 | #ifdef CONFIG_X86_32 | 718 | #ifdef CONFIG_X86_32 |
@@ -922,6 +929,8 @@ void __init setup_arch(char **cmdline_p) | |||
922 | */ | 929 | */ |
923 | reserve_brk(); | 930 | reserve_brk(); |
924 | 931 | ||
932 | cleanup_highmap(); | ||
933 | |||
925 | memblock.current_limit = get_max_mapped(); | 934 | memblock.current_limit = get_max_mapped(); |
926 | memblock_x86_fill(); | 935 | memblock_x86_fill(); |
927 | 936 | ||
@@ -935,15 +944,8 @@ void __init setup_arch(char **cmdline_p) | |||
935 | printk(KERN_DEBUG "initial memory mapped : 0 - %08lx\n", | 944 | printk(KERN_DEBUG "initial memory mapped : 0 - %08lx\n", |
936 | max_pfn_mapped<<PAGE_SHIFT); | 945 | max_pfn_mapped<<PAGE_SHIFT); |
937 | 946 | ||
938 | reserve_trampoline_memory(); | 947 | setup_trampolines(); |
939 | 948 | ||
940 | #ifdef CONFIG_ACPI_SLEEP | ||
941 | /* | ||
942 | * Reserve low memory region for sleep support. | ||
943 | * even before init_memory_mapping | ||
944 | */ | ||
945 | acpi_reserve_wakeup_memory(); | ||
946 | #endif | ||
947 | init_gbpages(); | 949 | init_gbpages(); |
948 | 950 | ||
949 | /* max_pfn_mapped is updated here */ | 951 | /* max_pfn_mapped is updated here */ |
@@ -984,19 +986,7 @@ void __init setup_arch(char **cmdline_p) | |||
984 | 986 | ||
985 | early_acpi_boot_init(); | 987 | early_acpi_boot_init(); |
986 | 988 | ||
987 | #ifdef CONFIG_ACPI_NUMA | 989 | initmem_init(); |
988 | /* | ||
989 | * Parse SRAT to discover nodes. | ||
990 | */ | ||
991 | acpi = acpi_numa_init(); | ||
992 | #endif | ||
993 | |||
994 | #ifdef CONFIG_AMD_NUMA | ||
995 | if (!acpi) | ||
996 | amd = !amd_numa_init(0, max_pfn); | ||
997 | #endif | ||
998 | |||
999 | initmem_init(0, max_pfn, acpi, amd); | ||
1000 | memblock_find_dma_reserve(); | 990 | memblock_find_dma_reserve(); |
1001 | dma32_reserve_bootmem(); | 991 | dma32_reserve_bootmem(); |
1002 | 992 | ||
@@ -1029,8 +1019,8 @@ void __init setup_arch(char **cmdline_p) | |||
1029 | * Read APIC and some other early information from ACPI tables. | 1019 | * Read APIC and some other early information from ACPI tables. |
1030 | */ | 1020 | */ |
1031 | acpi_boot_init(); | 1021 | acpi_boot_init(); |
1032 | |||
1033 | sfi_init(); | 1022 | sfi_init(); |
1023 | x86_dtb_init(); | ||
1034 | 1024 | ||
1035 | /* | 1025 | /* |
1036 | * get boot-time SMP configuration: | 1026 | * get boot-time SMP configuration: |
@@ -1040,9 +1030,7 @@ void __init setup_arch(char **cmdline_p) | |||
1040 | 1030 | ||
1041 | prefill_possible_map(); | 1031 | prefill_possible_map(); |
1042 | 1032 | ||
1043 | #ifdef CONFIG_X86_64 | ||
1044 | init_cpu_to_node(); | 1033 | init_cpu_to_node(); |
1045 | #endif | ||
1046 | 1034 | ||
1047 | init_apic_mappings(); | 1035 | init_apic_mappings(); |
1048 | ioapic_and_gsi_init(); | 1036 | ioapic_and_gsi_init(); |
@@ -1066,6 +1054,8 @@ void __init setup_arch(char **cmdline_p) | |||
1066 | #endif | 1054 | #endif |
1067 | x86_init.oem.banner(); | 1055 | x86_init.oem.banner(); |
1068 | 1056 | ||
1057 | x86_init.timers.wallclock_init(); | ||
1058 | |||
1069 | mcheck_init(); | 1059 | mcheck_init(); |
1070 | 1060 | ||
1071 | local_irq_save(flags); | 1061 | local_irq_save(flags); |
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 002b79685f73..71f4727da373 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c | |||
@@ -225,10 +225,15 @@ void __init setup_per_cpu_areas(void) | |||
225 | per_cpu(x86_bios_cpu_apicid, cpu) = | 225 | per_cpu(x86_bios_cpu_apicid, cpu) = |
226 | early_per_cpu_map(x86_bios_cpu_apicid, cpu); | 226 | early_per_cpu_map(x86_bios_cpu_apicid, cpu); |
227 | #endif | 227 | #endif |
228 | #ifdef CONFIG_X86_32 | ||
229 | per_cpu(x86_cpu_to_logical_apicid, cpu) = | ||
230 | early_per_cpu_map(x86_cpu_to_logical_apicid, cpu); | ||
231 | #endif | ||
228 | #ifdef CONFIG_X86_64 | 232 | #ifdef CONFIG_X86_64 |
229 | per_cpu(irq_stack_ptr, cpu) = | 233 | per_cpu(irq_stack_ptr, cpu) = |
230 | per_cpu(irq_stack_union.irq_stack, cpu) + | 234 | per_cpu(irq_stack_union.irq_stack, cpu) + |
231 | IRQ_STACK_SIZE - 64; | 235 | IRQ_STACK_SIZE - 64; |
236 | #endif | ||
232 | #ifdef CONFIG_NUMA | 237 | #ifdef CONFIG_NUMA |
233 | per_cpu(x86_cpu_to_node_map, cpu) = | 238 | per_cpu(x86_cpu_to_node_map, cpu) = |
234 | early_per_cpu_map(x86_cpu_to_node_map, cpu); | 239 | early_per_cpu_map(x86_cpu_to_node_map, cpu); |
@@ -242,7 +247,6 @@ void __init setup_per_cpu_areas(void) | |||
242 | */ | 247 | */ |
243 | set_cpu_numa_node(cpu, early_cpu_to_node(cpu)); | 248 | set_cpu_numa_node(cpu, early_cpu_to_node(cpu)); |
244 | #endif | 249 | #endif |
245 | #endif | ||
246 | /* | 250 | /* |
247 | * Up to this point, the boot CPU has been using .init.data | 251 | * Up to this point, the boot CPU has been using .init.data |
248 | * area. Reload any changed state for the boot CPU. | 252 | * area. Reload any changed state for the boot CPU. |
@@ -256,7 +260,10 @@ void __init setup_per_cpu_areas(void) | |||
256 | early_per_cpu_ptr(x86_cpu_to_apicid) = NULL; | 260 | early_per_cpu_ptr(x86_cpu_to_apicid) = NULL; |
257 | early_per_cpu_ptr(x86_bios_cpu_apicid) = NULL; | 261 | early_per_cpu_ptr(x86_bios_cpu_apicid) = NULL; |
258 | #endif | 262 | #endif |
259 | #if defined(CONFIG_X86_64) && defined(CONFIG_NUMA) | 263 | #ifdef CONFIG_X86_32 |
264 | early_per_cpu_ptr(x86_cpu_to_logical_apicid) = NULL; | ||
265 | #endif | ||
266 | #ifdef CONFIG_NUMA | ||
260 | early_per_cpu_ptr(x86_cpu_to_node_map) = NULL; | 267 | early_per_cpu_ptr(x86_cpu_to_node_map) = NULL; |
261 | #endif | 268 | #endif |
262 | 269 | ||
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 08776a953487..c2871d3c71b6 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c | |||
@@ -64,6 +64,7 @@ | |||
64 | #include <asm/mtrr.h> | 64 | #include <asm/mtrr.h> |
65 | #include <asm/mwait.h> | 65 | #include <asm/mwait.h> |
66 | #include <asm/apic.h> | 66 | #include <asm/apic.h> |
67 | #include <asm/io_apic.h> | ||
67 | #include <asm/setup.h> | 68 | #include <asm/setup.h> |
68 | #include <asm/uv/uv.h> | 69 | #include <asm/uv/uv.h> |
69 | #include <linux/mc146818rtc.h> | 70 | #include <linux/mc146818rtc.h> |
@@ -71,10 +72,6 @@ | |||
71 | #include <asm/smpboot_hooks.h> | 72 | #include <asm/smpboot_hooks.h> |
72 | #include <asm/i8259.h> | 73 | #include <asm/i8259.h> |
73 | 74 | ||
74 | #ifdef CONFIG_X86_32 | ||
75 | u8 apicid_2_node[MAX_APICID]; | ||
76 | #endif | ||
77 | |||
78 | /* State of each CPU */ | 75 | /* State of each CPU */ |
79 | DEFINE_PER_CPU(int, cpu_state) = { 0 }; | 76 | DEFINE_PER_CPU(int, cpu_state) = { 0 }; |
80 | 77 | ||
@@ -130,68 +127,14 @@ EXPORT_PER_CPU_SYMBOL(cpu_sibling_map); | |||
130 | DEFINE_PER_CPU(cpumask_var_t, cpu_core_map); | 127 | DEFINE_PER_CPU(cpumask_var_t, cpu_core_map); |
131 | EXPORT_PER_CPU_SYMBOL(cpu_core_map); | 128 | EXPORT_PER_CPU_SYMBOL(cpu_core_map); |
132 | 129 | ||
130 | DEFINE_PER_CPU(cpumask_var_t, cpu_llc_shared_map); | ||
131 | |||
133 | /* Per CPU bogomips and other parameters */ | 132 | /* Per CPU bogomips and other parameters */ |
134 | DEFINE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info); | 133 | DEFINE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info); |
135 | EXPORT_PER_CPU_SYMBOL(cpu_info); | 134 | EXPORT_PER_CPU_SYMBOL(cpu_info); |
136 | 135 | ||
137 | atomic_t init_deasserted; | 136 | atomic_t init_deasserted; |
138 | 137 | ||
139 | #if defined(CONFIG_NUMA) && defined(CONFIG_X86_32) | ||
140 | /* which node each logical CPU is on */ | ||
141 | int cpu_to_node_map[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = 0 }; | ||
142 | EXPORT_SYMBOL(cpu_to_node_map); | ||
143 | |||
144 | /* set up a mapping between cpu and node. */ | ||
145 | static void map_cpu_to_node(int cpu, int node) | ||
146 | { | ||
147 | printk(KERN_INFO "Mapping cpu %d to node %d\n", cpu, node); | ||
148 | cpumask_set_cpu(cpu, node_to_cpumask_map[node]); | ||
149 | cpu_to_node_map[cpu] = node; | ||
150 | } | ||
151 | |||
152 | /* undo a mapping between cpu and node. */ | ||
153 | static void unmap_cpu_to_node(int cpu) | ||
154 | { | ||
155 | int node; | ||
156 | |||
157 | printk(KERN_INFO "Unmapping cpu %d from all nodes\n", cpu); | ||
158 | for (node = 0; node < MAX_NUMNODES; node++) | ||
159 | cpumask_clear_cpu(cpu, node_to_cpumask_map[node]); | ||
160 | cpu_to_node_map[cpu] = 0; | ||
161 | } | ||
162 | #else /* !(CONFIG_NUMA && CONFIG_X86_32) */ | ||
163 | #define map_cpu_to_node(cpu, node) ({}) | ||
164 | #define unmap_cpu_to_node(cpu) ({}) | ||
165 | #endif | ||
166 | |||
167 | #ifdef CONFIG_X86_32 | ||
168 | static int boot_cpu_logical_apicid; | ||
169 | |||
170 | u8 cpu_2_logical_apicid[NR_CPUS] __read_mostly = | ||
171 | { [0 ... NR_CPUS-1] = BAD_APICID }; | ||
172 | |||
173 | static void map_cpu_to_logical_apicid(void) | ||
174 | { | ||
175 | int cpu = smp_processor_id(); | ||
176 | int apicid = logical_smp_processor_id(); | ||
177 | int node = apic->apicid_to_node(apicid); | ||
178 | |||
179 | if (!node_online(node)) | ||
180 | node = first_online_node; | ||
181 | |||
182 | cpu_2_logical_apicid[cpu] = apicid; | ||
183 | map_cpu_to_node(cpu, node); | ||
184 | } | ||
185 | |||
186 | void numa_remove_cpu(int cpu) | ||
187 | { | ||
188 | cpu_2_logical_apicid[cpu] = BAD_APICID; | ||
189 | unmap_cpu_to_node(cpu); | ||
190 | } | ||
191 | #else | ||
192 | #define map_cpu_to_logical_apicid() do {} while (0) | ||
193 | #endif | ||
194 | |||
195 | /* | 138 | /* |
196 | * Report back to the Boot Processor. | 139 | * Report back to the Boot Processor. |
197 | * Running on AP. | 140 | * Running on AP. |
@@ -259,7 +202,6 @@ static void __cpuinit smp_callin(void) | |||
259 | apic->smp_callin_clear_local_apic(); | 202 | apic->smp_callin_clear_local_apic(); |
260 | setup_local_APIC(); | 203 | setup_local_APIC(); |
261 | end_local_APIC_setup(); | 204 | end_local_APIC_setup(); |
262 | map_cpu_to_logical_apicid(); | ||
263 | 205 | ||
264 | /* | 206 | /* |
265 | * Need to setup vector mappings before we enable interrupts. | 207 | * Need to setup vector mappings before we enable interrupts. |
@@ -355,23 +297,6 @@ notrace static void __cpuinit start_secondary(void *unused) | |||
355 | cpu_idle(); | 297 | cpu_idle(); |
356 | } | 298 | } |
357 | 299 | ||
358 | #ifdef CONFIG_CPUMASK_OFFSTACK | ||
359 | /* In this case, llc_shared_map is a pointer to a cpumask. */ | ||
360 | static inline void copy_cpuinfo_x86(struct cpuinfo_x86 *dst, | ||
361 | const struct cpuinfo_x86 *src) | ||
362 | { | ||
363 | struct cpumask *llc = dst->llc_shared_map; | ||
364 | *dst = *src; | ||
365 | dst->llc_shared_map = llc; | ||
366 | } | ||
367 | #else | ||
368 | static inline void copy_cpuinfo_x86(struct cpuinfo_x86 *dst, | ||
369 | const struct cpuinfo_x86 *src) | ||
370 | { | ||
371 | *dst = *src; | ||
372 | } | ||
373 | #endif /* CONFIG_CPUMASK_OFFSTACK */ | ||
374 | |||
375 | /* | 300 | /* |
376 | * The bootstrap kernel entry code has set these up. Save them for | 301 | * The bootstrap kernel entry code has set these up. Save them for |
377 | * a given CPU | 302 | * a given CPU |
@@ -381,7 +306,7 @@ void __cpuinit smp_store_cpu_info(int id) | |||
381 | { | 306 | { |
382 | struct cpuinfo_x86 *c = &cpu_data(id); | 307 | struct cpuinfo_x86 *c = &cpu_data(id); |
383 | 308 | ||
384 | copy_cpuinfo_x86(c, &boot_cpu_data); | 309 | *c = boot_cpu_data; |
385 | c->cpu_index = id; | 310 | c->cpu_index = id; |
386 | if (id != 0) | 311 | if (id != 0) |
387 | identify_secondary_cpu(c); | 312 | identify_secondary_cpu(c); |
@@ -389,15 +314,12 @@ void __cpuinit smp_store_cpu_info(int id) | |||
389 | 314 | ||
390 | static void __cpuinit link_thread_siblings(int cpu1, int cpu2) | 315 | static void __cpuinit link_thread_siblings(int cpu1, int cpu2) |
391 | { | 316 | { |
392 | struct cpuinfo_x86 *c1 = &cpu_data(cpu1); | ||
393 | struct cpuinfo_x86 *c2 = &cpu_data(cpu2); | ||
394 | |||
395 | cpumask_set_cpu(cpu1, cpu_sibling_mask(cpu2)); | 317 | cpumask_set_cpu(cpu1, cpu_sibling_mask(cpu2)); |
396 | cpumask_set_cpu(cpu2, cpu_sibling_mask(cpu1)); | 318 | cpumask_set_cpu(cpu2, cpu_sibling_mask(cpu1)); |
397 | cpumask_set_cpu(cpu1, cpu_core_mask(cpu2)); | 319 | cpumask_set_cpu(cpu1, cpu_core_mask(cpu2)); |
398 | cpumask_set_cpu(cpu2, cpu_core_mask(cpu1)); | 320 | cpumask_set_cpu(cpu2, cpu_core_mask(cpu1)); |
399 | cpumask_set_cpu(cpu1, c2->llc_shared_map); | 321 | cpumask_set_cpu(cpu1, cpu_llc_shared_mask(cpu2)); |
400 | cpumask_set_cpu(cpu2, c1->llc_shared_map); | 322 | cpumask_set_cpu(cpu2, cpu_llc_shared_mask(cpu1)); |
401 | } | 323 | } |
402 | 324 | ||
403 | 325 | ||
@@ -414,6 +336,7 @@ void __cpuinit set_cpu_sibling_map(int cpu) | |||
414 | 336 | ||
415 | if (cpu_has(c, X86_FEATURE_TOPOEXT)) { | 337 | if (cpu_has(c, X86_FEATURE_TOPOEXT)) { |
416 | if (c->phys_proc_id == o->phys_proc_id && | 338 | if (c->phys_proc_id == o->phys_proc_id && |
339 | per_cpu(cpu_llc_id, cpu) == per_cpu(cpu_llc_id, i) && | ||
417 | c->compute_unit_id == o->compute_unit_id) | 340 | c->compute_unit_id == o->compute_unit_id) |
418 | link_thread_siblings(cpu, i); | 341 | link_thread_siblings(cpu, i); |
419 | } else if (c->phys_proc_id == o->phys_proc_id && | 342 | } else if (c->phys_proc_id == o->phys_proc_id && |
@@ -425,7 +348,7 @@ void __cpuinit set_cpu_sibling_map(int cpu) | |||
425 | cpumask_set_cpu(cpu, cpu_sibling_mask(cpu)); | 348 | cpumask_set_cpu(cpu, cpu_sibling_mask(cpu)); |
426 | } | 349 | } |
427 | 350 | ||
428 | cpumask_set_cpu(cpu, c->llc_shared_map); | 351 | cpumask_set_cpu(cpu, cpu_llc_shared_mask(cpu)); |
429 | 352 | ||
430 | if (__this_cpu_read(cpu_info.x86_max_cores) == 1) { | 353 | if (__this_cpu_read(cpu_info.x86_max_cores) == 1) { |
431 | cpumask_copy(cpu_core_mask(cpu), cpu_sibling_mask(cpu)); | 354 | cpumask_copy(cpu_core_mask(cpu), cpu_sibling_mask(cpu)); |
@@ -436,8 +359,8 @@ void __cpuinit set_cpu_sibling_map(int cpu) | |||
436 | for_each_cpu(i, cpu_sibling_setup_mask) { | 359 | for_each_cpu(i, cpu_sibling_setup_mask) { |
437 | if (per_cpu(cpu_llc_id, cpu) != BAD_APICID && | 360 | if (per_cpu(cpu_llc_id, cpu) != BAD_APICID && |
438 | per_cpu(cpu_llc_id, cpu) == per_cpu(cpu_llc_id, i)) { | 361 | per_cpu(cpu_llc_id, cpu) == per_cpu(cpu_llc_id, i)) { |
439 | cpumask_set_cpu(i, c->llc_shared_map); | 362 | cpumask_set_cpu(i, cpu_llc_shared_mask(cpu)); |
440 | cpumask_set_cpu(cpu, cpu_data(i).llc_shared_map); | 363 | cpumask_set_cpu(cpu, cpu_llc_shared_mask(i)); |
441 | } | 364 | } |
442 | if (c->phys_proc_id == cpu_data(i).phys_proc_id) { | 365 | if (c->phys_proc_id == cpu_data(i).phys_proc_id) { |
443 | cpumask_set_cpu(i, cpu_core_mask(cpu)); | 366 | cpumask_set_cpu(i, cpu_core_mask(cpu)); |
@@ -476,7 +399,7 @@ const struct cpumask *cpu_coregroup_mask(int cpu) | |||
476 | !(cpu_has(c, X86_FEATURE_AMD_DCM))) | 399 | !(cpu_has(c, X86_FEATURE_AMD_DCM))) |
477 | return cpu_core_mask(cpu); | 400 | return cpu_core_mask(cpu); |
478 | else | 401 | else |
479 | return c->llc_shared_map; | 402 | return cpu_llc_shared_mask(cpu); |
480 | } | 403 | } |
481 | 404 | ||
482 | static void impress_friends(void) | 405 | static void impress_friends(void) |
@@ -788,7 +711,7 @@ do_rest: | |||
788 | stack_start = c_idle.idle->thread.sp; | 711 | stack_start = c_idle.idle->thread.sp; |
789 | 712 | ||
790 | /* start_ip had better be page-aligned! */ | 713 | /* start_ip had better be page-aligned! */ |
791 | start_ip = setup_trampoline(); | 714 | start_ip = trampoline_address(); |
792 | 715 | ||
793 | /* So we see what's up */ | 716 | /* So we see what's up */ |
794 | announce_cpu(cpu, apicid); | 717 | announce_cpu(cpu, apicid); |
@@ -798,6 +721,8 @@ do_rest: | |||
798 | * the targeted processor. | 721 | * the targeted processor. |
799 | */ | 722 | */ |
800 | 723 | ||
724 | printk(KERN_DEBUG "smpboot cpu %d: start_ip = %lx\n", cpu, start_ip); | ||
725 | |||
801 | atomic_set(&init_deasserted, 0); | 726 | atomic_set(&init_deasserted, 0); |
802 | 727 | ||
803 | if (get_uv_system_type() != UV_NON_UNIQUE_APIC) { | 728 | if (get_uv_system_type() != UV_NON_UNIQUE_APIC) { |
@@ -851,8 +776,8 @@ do_rest: | |||
851 | pr_debug("CPU%d: has booted.\n", cpu); | 776 | pr_debug("CPU%d: has booted.\n", cpu); |
852 | else { | 777 | else { |
853 | boot_error = 1; | 778 | boot_error = 1; |
854 | if (*((volatile unsigned char *)trampoline_base) | 779 | if (*(volatile u32 *)TRAMPOLINE_SYM(trampoline_status) |
855 | == 0xA5) | 780 | == 0xA5A5A5A5) |
856 | /* trampoline started but...? */ | 781 | /* trampoline started but...? */ |
857 | pr_err("CPU%d: Stuck ??\n", cpu); | 782 | pr_err("CPU%d: Stuck ??\n", cpu); |
858 | else | 783 | else |
@@ -878,7 +803,7 @@ do_rest: | |||
878 | } | 803 | } |
879 | 804 | ||
880 | /* mark "stuck" area as not stuck */ | 805 | /* mark "stuck" area as not stuck */ |
881 | *((volatile unsigned long *)trampoline_base) = 0; | 806 | *(volatile u32 *)TRAMPOLINE_SYM(trampoline_status) = 0; |
882 | 807 | ||
883 | if (get_uv_system_type() != UV_NON_UNIQUE_APIC) { | 808 | if (get_uv_system_type() != UV_NON_UNIQUE_APIC) { |
884 | /* | 809 | /* |
@@ -945,6 +870,14 @@ int __cpuinit native_cpu_up(unsigned int cpu) | |||
945 | return 0; | 870 | return 0; |
946 | } | 871 | } |
947 | 872 | ||
873 | /** | ||
874 | * arch_disable_smp_support() - disables SMP support for x86 at runtime | ||
875 | */ | ||
876 | void arch_disable_smp_support(void) | ||
877 | { | ||
878 | disable_ioapic_support(); | ||
879 | } | ||
880 | |||
948 | /* | 881 | /* |
949 | * Fall back to non SMP mode after errors. | 882 | * Fall back to non SMP mode after errors. |
950 | * | 883 | * |
@@ -960,7 +893,6 @@ static __init void disable_smp(void) | |||
960 | physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map); | 893 | physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map); |
961 | else | 894 | else |
962 | physid_set_mask_of_physid(0, &phys_cpu_present_map); | 895 | physid_set_mask_of_physid(0, &phys_cpu_present_map); |
963 | map_cpu_to_logical_apicid(); | ||
964 | cpumask_set_cpu(0, cpu_sibling_mask(0)); | 896 | cpumask_set_cpu(0, cpu_sibling_mask(0)); |
965 | cpumask_set_cpu(0, cpu_core_mask(0)); | 897 | cpumask_set_cpu(0, cpu_core_mask(0)); |
966 | } | 898 | } |
@@ -1045,7 +977,7 @@ static int __init smp_sanity_check(unsigned max_cpus) | |||
1045 | "(tell your hw vendor)\n"); | 977 | "(tell your hw vendor)\n"); |
1046 | } | 978 | } |
1047 | smpboot_clear_io_apic(); | 979 | smpboot_clear_io_apic(); |
1048 | arch_disable_smp_support(); | 980 | disable_ioapic_support(); |
1049 | return -1; | 981 | return -1; |
1050 | } | 982 | } |
1051 | 983 | ||
@@ -1089,21 +1021,19 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) | |||
1089 | 1021 | ||
1090 | preempt_disable(); | 1022 | preempt_disable(); |
1091 | smp_cpu_index_default(); | 1023 | smp_cpu_index_default(); |
1092 | memcpy(__this_cpu_ptr(&cpu_info), &boot_cpu_data, sizeof(cpu_info)); | 1024 | |
1093 | cpumask_copy(cpu_callin_mask, cpumask_of(0)); | ||
1094 | mb(); | ||
1095 | /* | 1025 | /* |
1096 | * Setup boot CPU information | 1026 | * Setup boot CPU information |
1097 | */ | 1027 | */ |
1098 | smp_store_cpu_info(0); /* Final full version of the data */ | 1028 | smp_store_cpu_info(0); /* Final full version of the data */ |
1099 | #ifdef CONFIG_X86_32 | 1029 | cpumask_copy(cpu_callin_mask, cpumask_of(0)); |
1100 | boot_cpu_logical_apicid = logical_smp_processor_id(); | 1030 | mb(); |
1101 | #endif | 1031 | |
1102 | current_thread_info()->cpu = 0; /* needed? */ | 1032 | current_thread_info()->cpu = 0; /* needed? */ |
1103 | for_each_possible_cpu(i) { | 1033 | for_each_possible_cpu(i) { |
1104 | zalloc_cpumask_var(&per_cpu(cpu_sibling_map, i), GFP_KERNEL); | 1034 | zalloc_cpumask_var(&per_cpu(cpu_sibling_map, i), GFP_KERNEL); |
1105 | zalloc_cpumask_var(&per_cpu(cpu_core_map, i), GFP_KERNEL); | 1035 | zalloc_cpumask_var(&per_cpu(cpu_core_map, i), GFP_KERNEL); |
1106 | zalloc_cpumask_var(&cpu_data(i).llc_shared_map, GFP_KERNEL); | 1036 | zalloc_cpumask_var(&per_cpu(cpu_llc_shared_map, i), GFP_KERNEL); |
1107 | } | 1037 | } |
1108 | set_cpu_sibling_map(0); | 1038 | set_cpu_sibling_map(0); |
1109 | 1039 | ||
@@ -1139,8 +1069,6 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) | |||
1139 | 1069 | ||
1140 | bsp_end_local_APIC_setup(); | 1070 | bsp_end_local_APIC_setup(); |
1141 | 1071 | ||
1142 | map_cpu_to_logical_apicid(); | ||
1143 | |||
1144 | if (apic->setup_portio_remap) | 1072 | if (apic->setup_portio_remap) |
1145 | apic->setup_portio_remap(); | 1073 | apic->setup_portio_remap(); |
1146 | 1074 | ||
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c index 938c8e10a19a..6515733a289d 100644 --- a/arch/x86/kernel/stacktrace.c +++ b/arch/x86/kernel/stacktrace.c | |||
@@ -73,7 +73,7 @@ static const struct stacktrace_ops save_stack_ops_nosched = { | |||
73 | */ | 73 | */ |
74 | void save_stack_trace(struct stack_trace *trace) | 74 | void save_stack_trace(struct stack_trace *trace) |
75 | { | 75 | { |
76 | dump_trace(current, NULL, NULL, &save_stack_ops, trace); | 76 | dump_trace(current, NULL, NULL, 0, &save_stack_ops, trace); |
77 | if (trace->nr_entries < trace->max_entries) | 77 | if (trace->nr_entries < trace->max_entries) |
78 | trace->entries[trace->nr_entries++] = ULONG_MAX; | 78 | trace->entries[trace->nr_entries++] = ULONG_MAX; |
79 | } | 79 | } |
@@ -81,14 +81,14 @@ EXPORT_SYMBOL_GPL(save_stack_trace); | |||
81 | 81 | ||
82 | void save_stack_trace_regs(struct stack_trace *trace, struct pt_regs *regs) | 82 | void save_stack_trace_regs(struct stack_trace *trace, struct pt_regs *regs) |
83 | { | 83 | { |
84 | dump_trace(current, regs, NULL, &save_stack_ops, trace); | 84 | dump_trace(current, regs, NULL, 0, &save_stack_ops, trace); |
85 | if (trace->nr_entries < trace->max_entries) | 85 | if (trace->nr_entries < trace->max_entries) |
86 | trace->entries[trace->nr_entries++] = ULONG_MAX; | 86 | trace->entries[trace->nr_entries++] = ULONG_MAX; |
87 | } | 87 | } |
88 | 88 | ||
89 | void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) | 89 | void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) |
90 | { | 90 | { |
91 | dump_trace(tsk, NULL, NULL, &save_stack_ops_nosched, trace); | 91 | dump_trace(tsk, NULL, NULL, 0, &save_stack_ops_nosched, trace); |
92 | if (trace->nr_entries < trace->max_entries) | 92 | if (trace->nr_entries < trace->max_entries) |
93 | trace->entries[trace->nr_entries++] = ULONG_MAX; | 93 | trace->entries[trace->nr_entries++] = ULONG_MAX; |
94 | } | 94 | } |
diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c index 58de45ee08b6..7977f0cfe339 100644 --- a/arch/x86/kernel/step.c +++ b/arch/x86/kernel/step.c | |||
@@ -166,7 +166,7 @@ static void enable_step(struct task_struct *child, bool block) | |||
166 | * Make sure block stepping (BTF) is not enabled unless it should be. | 166 | * Make sure block stepping (BTF) is not enabled unless it should be. |
167 | * Note that we don't try to worry about any is_setting_trap_flag() | 167 | * Note that we don't try to worry about any is_setting_trap_flag() |
168 | * instructions after the first when using block stepping. | 168 | * instructions after the first when using block stepping. |
169 | * So noone should try to use debugger block stepping in a program | 169 | * So no one should try to use debugger block stepping in a program |
170 | * that uses user-mode single stepping itself. | 170 | * that uses user-mode single stepping itself. |
171 | */ | 171 | */ |
172 | if (enable_single_step(child) && block) { | 172 | if (enable_single_step(child) && block) { |
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S index b35786dc9b8f..abce34d5c79d 100644 --- a/arch/x86/kernel/syscall_table_32.S +++ b/arch/x86/kernel/syscall_table_32.S | |||
@@ -340,3 +340,7 @@ ENTRY(sys_call_table) | |||
340 | .long sys_fanotify_init | 340 | .long sys_fanotify_init |
341 | .long sys_fanotify_mark | 341 | .long sys_fanotify_mark |
342 | .long sys_prlimit64 /* 340 */ | 342 | .long sys_prlimit64 /* 340 */ |
343 | .long sys_name_to_handle_at | ||
344 | .long sys_open_by_handle_at | ||
345 | .long sys_clock_adjtime | ||
346 | .long sys_syncfs | ||
diff --git a/arch/x86/kernel/topology.c b/arch/x86/kernel/topology.c index 7e4515957a1c..8927486a4649 100644 --- a/arch/x86/kernel/topology.c +++ b/arch/x86/kernel/topology.c | |||
@@ -39,7 +39,7 @@ int __ref arch_register_cpu(int num) | |||
39 | /* | 39 | /* |
40 | * CPU0 cannot be offlined due to several | 40 | * CPU0 cannot be offlined due to several |
41 | * restrictions and assumptions in kernel. This basically | 41 | * restrictions and assumptions in kernel. This basically |
42 | * doesnt add a control file, one cannot attempt to offline | 42 | * doesn't add a control file, one cannot attempt to offline |
43 | * BSP. | 43 | * BSP. |
44 | * | 44 | * |
45 | * Also certain PCI quirks require not to enable hotplug control | 45 | * Also certain PCI quirks require not to enable hotplug control |
diff --git a/arch/x86/kernel/trampoline.c b/arch/x86/kernel/trampoline.c index a375616d77f7..a91ae7709b49 100644 --- a/arch/x86/kernel/trampoline.c +++ b/arch/x86/kernel/trampoline.c | |||
@@ -2,39 +2,41 @@ | |||
2 | #include <linux/memblock.h> | 2 | #include <linux/memblock.h> |
3 | 3 | ||
4 | #include <asm/trampoline.h> | 4 | #include <asm/trampoline.h> |
5 | #include <asm/cacheflush.h> | ||
5 | #include <asm/pgtable.h> | 6 | #include <asm/pgtable.h> |
6 | 7 | ||
7 | #if defined(CONFIG_X86_64) && defined(CONFIG_ACPI_SLEEP) | 8 | unsigned char *x86_trampoline_base; |
8 | #define __trampinit | ||
9 | #define __trampinitdata | ||
10 | #else | ||
11 | #define __trampinit __cpuinit | ||
12 | #define __trampinitdata __cpuinitdata | ||
13 | #endif | ||
14 | 9 | ||
15 | /* ready for x86_64 and x86 */ | 10 | void __init setup_trampolines(void) |
16 | unsigned char *__trampinitdata trampoline_base; | ||
17 | |||
18 | void __init reserve_trampoline_memory(void) | ||
19 | { | 11 | { |
20 | phys_addr_t mem; | 12 | phys_addr_t mem; |
13 | size_t size = PAGE_ALIGN(x86_trampoline_end - x86_trampoline_start); | ||
21 | 14 | ||
22 | /* Has to be in very low memory so we can execute real-mode AP code. */ | 15 | /* Has to be in very low memory so we can execute real-mode AP code. */ |
23 | mem = memblock_find_in_range(0, 1<<20, TRAMPOLINE_SIZE, PAGE_SIZE); | 16 | mem = memblock_find_in_range(0, 1<<20, size, PAGE_SIZE); |
24 | if (mem == MEMBLOCK_ERROR) | 17 | if (mem == MEMBLOCK_ERROR) |
25 | panic("Cannot allocate trampoline\n"); | 18 | panic("Cannot allocate trampoline\n"); |
26 | 19 | ||
27 | trampoline_base = __va(mem); | 20 | x86_trampoline_base = __va(mem); |
28 | memblock_x86_reserve_range(mem, mem + TRAMPOLINE_SIZE, "TRAMPOLINE"); | 21 | memblock_x86_reserve_range(mem, mem + size, "TRAMPOLINE"); |
22 | |||
23 | printk(KERN_DEBUG "Base memory trampoline at [%p] %llx size %zu\n", | ||
24 | x86_trampoline_base, (unsigned long long)mem, size); | ||
25 | |||
26 | memcpy(x86_trampoline_base, x86_trampoline_start, size); | ||
29 | } | 27 | } |
30 | 28 | ||
31 | /* | 29 | /* |
32 | * Currently trivial. Write the real->protected mode | 30 | * setup_trampolines() gets called very early, to guarantee the |
33 | * bootstrap into the page concerned. The caller | 31 | * availability of low memory. This is before the proper kernel page |
34 | * has made sure it's suitably aligned. | 32 | * tables are set up, so we cannot set page permissions in that |
33 | * function. Thus, we use an arch_initcall instead. | ||
35 | */ | 34 | */ |
36 | unsigned long __trampinit setup_trampoline(void) | 35 | static int __init configure_trampolines(void) |
37 | { | 36 | { |
38 | memcpy(trampoline_base, trampoline_data, TRAMPOLINE_SIZE); | 37 | size_t size = PAGE_ALIGN(x86_trampoline_end - x86_trampoline_start); |
39 | return virt_to_phys(trampoline_base); | 38 | |
39 | set_memory_x((unsigned long)x86_trampoline_base, size >> PAGE_SHIFT); | ||
40 | return 0; | ||
40 | } | 41 | } |
42 | arch_initcall(configure_trampolines); | ||
diff --git a/arch/x86/kernel/trampoline_32.S b/arch/x86/kernel/trampoline_32.S index 8508237e8e43..451c0a7ef7fd 100644 --- a/arch/x86/kernel/trampoline_32.S +++ b/arch/x86/kernel/trampoline_32.S | |||
@@ -32,9 +32,11 @@ | |||
32 | #include <asm/segment.h> | 32 | #include <asm/segment.h> |
33 | #include <asm/page_types.h> | 33 | #include <asm/page_types.h> |
34 | 34 | ||
35 | /* We can free up trampoline after bootup if cpu hotplug is not supported. */ | 35 | #ifdef CONFIG_SMP |
36 | __CPUINITRODATA | 36 | |
37 | .code16 | 37 | .section ".x86_trampoline","a" |
38 | .balign PAGE_SIZE | ||
39 | .code16 | ||
38 | 40 | ||
39 | ENTRY(trampoline_data) | 41 | ENTRY(trampoline_data) |
40 | r_base = . | 42 | r_base = . |
@@ -44,7 +46,7 @@ r_base = . | |||
44 | 46 | ||
45 | cli # We should be safe anyway | 47 | cli # We should be safe anyway |
46 | 48 | ||
47 | movl $0xA5A5A5A5, trampoline_data - r_base | 49 | movl $0xA5A5A5A5, trampoline_status - r_base |
48 | # write marker for master knows we're running | 50 | # write marker for master knows we're running |
49 | 51 | ||
50 | /* GDT tables in non default location kernel can be beyond 16MB and | 52 | /* GDT tables in non default location kernel can be beyond 16MB and |
@@ -72,5 +74,10 @@ boot_idt_descr: | |||
72 | .word 0 # idt limit = 0 | 74 | .word 0 # idt limit = 0 |
73 | .long 0 # idt base = 0L | 75 | .long 0 # idt base = 0L |
74 | 76 | ||
77 | ENTRY(trampoline_status) | ||
78 | .long 0 | ||
79 | |||
75 | .globl trampoline_end | 80 | .globl trampoline_end |
76 | trampoline_end: | 81 | trampoline_end: |
82 | |||
83 | #endif /* CONFIG_SMP */ | ||
diff --git a/arch/x86/kernel/trampoline_64.S b/arch/x86/kernel/trampoline_64.S index 075d130efcf9..09ff51799e96 100644 --- a/arch/x86/kernel/trampoline_64.S +++ b/arch/x86/kernel/trampoline_64.S | |||
@@ -32,13 +32,9 @@ | |||
32 | #include <asm/segment.h> | 32 | #include <asm/segment.h> |
33 | #include <asm/processor-flags.h> | 33 | #include <asm/processor-flags.h> |
34 | 34 | ||
35 | #ifdef CONFIG_ACPI_SLEEP | 35 | .section ".x86_trampoline","a" |
36 | .section .rodata, "a", @progbits | 36 | .balign PAGE_SIZE |
37 | #else | 37 | .code16 |
38 | /* We can free up the trampoline after bootup if cpu hotplug is not supported. */ | ||
39 | __CPUINITRODATA | ||
40 | #endif | ||
41 | .code16 | ||
42 | 38 | ||
43 | ENTRY(trampoline_data) | 39 | ENTRY(trampoline_data) |
44 | r_base = . | 40 | r_base = . |
@@ -50,7 +46,7 @@ r_base = . | |||
50 | mov %ax, %ss | 46 | mov %ax, %ss |
51 | 47 | ||
52 | 48 | ||
53 | movl $0xA5A5A5A5, trampoline_data - r_base | 49 | movl $0xA5A5A5A5, trampoline_status - r_base |
54 | # write marker for master knows we're running | 50 | # write marker for master knows we're running |
55 | 51 | ||
56 | # Setup stack | 52 | # Setup stack |
@@ -64,10 +60,13 @@ r_base = . | |||
64 | movzx %ax, %esi # Find the 32bit trampoline location | 60 | movzx %ax, %esi # Find the 32bit trampoline location |
65 | shll $4, %esi | 61 | shll $4, %esi |
66 | 62 | ||
67 | # Fixup the vectors | 63 | # Fixup the absolute vectors |
68 | addl %esi, startup_32_vector - r_base | 64 | leal (startup_32 - r_base)(%esi), %eax |
69 | addl %esi, startup_64_vector - r_base | 65 | movl %eax, startup_32_vector - r_base |
70 | addl %esi, tgdt + 2 - r_base # Fixup the gdt pointer | 66 | leal (startup_64 - r_base)(%esi), %eax |
67 | movl %eax, startup_64_vector - r_base | ||
68 | leal (tgdt - r_base)(%esi), %eax | ||
69 | movl %eax, (tgdt + 2 - r_base) | ||
71 | 70 | ||
72 | /* | 71 | /* |
73 | * GDT tables in non default location kernel can be beyond 16MB and | 72 | * GDT tables in non default location kernel can be beyond 16MB and |
@@ -129,6 +128,7 @@ no_longmode: | |||
129 | jmp no_longmode | 128 | jmp no_longmode |
130 | #include "verify_cpu.S" | 129 | #include "verify_cpu.S" |
131 | 130 | ||
131 | .balign 4 | ||
132 | # Careful these need to be in the same 64K segment as the above; | 132 | # Careful these need to be in the same 64K segment as the above; |
133 | tidt: | 133 | tidt: |
134 | .word 0 # idt limit = 0 | 134 | .word 0 # idt limit = 0 |
@@ -156,6 +156,10 @@ startup_64_vector: | |||
156 | .long startup_64 - r_base | 156 | .long startup_64 - r_base |
157 | .word __KERNEL_CS, 0 | 157 | .word __KERNEL_CS, 0 |
158 | 158 | ||
159 | .balign 4 | ||
160 | ENTRY(trampoline_status) | ||
161 | .long 0 | ||
162 | |||
159 | trampoline_stack: | 163 | trampoline_stack: |
160 | .org 0x1000 | 164 | .org 0x1000 |
161 | trampoline_stack_end: | 165 | trampoline_stack_end: |
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index ffe5755caa8b..9335bf7dd2e7 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c | |||
@@ -427,7 +427,7 @@ unsigned long native_calibrate_tsc(void) | |||
427 | * the delta to the previous read. We keep track of the min | 427 | * the delta to the previous read. We keep track of the min |
428 | * and max values of that delta. The delta is mostly defined | 428 | * and max values of that delta. The delta is mostly defined |
429 | * by the IO time of the PIT access, so we can detect when a | 429 | * by the IO time of the PIT access, so we can detect when a |
430 | * SMI/SMM disturbance happend between the two reads. If the | 430 | * SMI/SMM disturbance happened between the two reads. If the |
431 | * maximum time is significantly larger than the minimum time, | 431 | * maximum time is significantly larger than the minimum time, |
432 | * then we discard the result and have another try. | 432 | * then we discard the result and have another try. |
433 | * | 433 | * |
@@ -900,7 +900,7 @@ static DECLARE_DELAYED_WORK(tsc_irqwork, tsc_refine_calibration_work); | |||
900 | * timer based, instead of loop based, we don't block the boot | 900 | * timer based, instead of loop based, we don't block the boot |
901 | * process while this longer calibration is done. | 901 | * process while this longer calibration is done. |
902 | * | 902 | * |
903 | * If there are any calibration anomolies (too many SMIs, etc), | 903 | * If there are any calibration anomalies (too many SMIs, etc), |
904 | * or the refined calibration is off by 1% of the fast early | 904 | * or the refined calibration is off by 1% of the fast early |
905 | * calibration, we throw out the new calibration and use the | 905 | * calibration, we throw out the new calibration and use the |
906 | * early calibration. | 906 | * early calibration. |
diff --git a/arch/x86/kernel/verify_cpu.S b/arch/x86/kernel/verify_cpu.S index 0edefc19a113..b9242bacbe59 100644 --- a/arch/x86/kernel/verify_cpu.S +++ b/arch/x86/kernel/verify_cpu.S | |||
@@ -18,7 +18,7 @@ | |||
18 | * This file is expected to run in 32bit code. Currently: | 18 | * This file is expected to run in 32bit code. Currently: |
19 | * | 19 | * |
20 | * arch/x86/boot/compressed/head_64.S: Boot cpu verification | 20 | * arch/x86/boot/compressed/head_64.S: Boot cpu verification |
21 | * arch/x86/kernel/trampoline_64.S: secondary processor verfication | 21 | * arch/x86/kernel/trampoline_64.S: secondary processor verification |
22 | * arch/x86/kernel/head_32.S: processor startup | 22 | * arch/x86/kernel/head_32.S: processor startup |
23 | * | 23 | * |
24 | * verify_cpu, returns the status of longmode and SSE in register %eax. | 24 | * verify_cpu, returns the status of longmode and SSE in register %eax. |
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index bf4700755184..624a2016198e 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S | |||
@@ -105,6 +105,7 @@ SECTIONS | |||
105 | SCHED_TEXT | 105 | SCHED_TEXT |
106 | LOCK_TEXT | 106 | LOCK_TEXT |
107 | KPROBES_TEXT | 107 | KPROBES_TEXT |
108 | ENTRY_TEXT | ||
108 | IRQENTRY_TEXT | 109 | IRQENTRY_TEXT |
109 | *(.fixup) | 110 | *(.fixup) |
110 | *(.gnu.warning) | 111 | *(.gnu.warning) |
@@ -230,7 +231,7 @@ SECTIONS | |||
230 | * output PHDR, so the next output section - .init.text - should | 231 | * output PHDR, so the next output section - .init.text - should |
231 | * start another segment - init. | 232 | * start another segment - init. |
232 | */ | 233 | */ |
233 | PERCPU_VADDR(0, :percpu) | 234 | PERCPU_VADDR(INTERNODE_CACHE_BYTES, 0, :percpu) |
234 | #endif | 235 | #endif |
235 | 236 | ||
236 | INIT_TEXT_SECTION(PAGE_SIZE) | 237 | INIT_TEXT_SECTION(PAGE_SIZE) |
@@ -240,6 +241,18 @@ SECTIONS | |||
240 | 241 | ||
241 | INIT_DATA_SECTION(16) | 242 | INIT_DATA_SECTION(16) |
242 | 243 | ||
244 | /* | ||
245 | * Code and data for a variety of lowlevel trampolines, to be | ||
246 | * copied into base memory (< 1 MiB) during initialization. | ||
247 | * Since it is copied early, the main copy can be discarded | ||
248 | * afterwards. | ||
249 | */ | ||
250 | .x86_trampoline : AT(ADDR(.x86_trampoline) - LOAD_OFFSET) { | ||
251 | x86_trampoline_start = .; | ||
252 | *(.x86_trampoline) | ||
253 | x86_trampoline_end = .; | ||
254 | } | ||
255 | |||
243 | .x86_cpu_dev.init : AT(ADDR(.x86_cpu_dev.init) - LOAD_OFFSET) { | 256 | .x86_cpu_dev.init : AT(ADDR(.x86_cpu_dev.init) - LOAD_OFFSET) { |
244 | __x86_cpu_dev_start = .; | 257 | __x86_cpu_dev_start = .; |
245 | *(.x86_cpu_dev.init) | 258 | *(.x86_cpu_dev.init) |
@@ -291,6 +304,7 @@ SECTIONS | |||
291 | *(.iommu_table) | 304 | *(.iommu_table) |
292 | __iommu_table_end = .; | 305 | __iommu_table_end = .; |
293 | } | 306 | } |
307 | |||
294 | . = ALIGN(8); | 308 | . = ALIGN(8); |
295 | /* | 309 | /* |
296 | * .exit.text is discard at runtime, not link time, to deal with | 310 | * .exit.text is discard at runtime, not link time, to deal with |
@@ -305,7 +319,7 @@ SECTIONS | |||
305 | } | 319 | } |
306 | 320 | ||
307 | #if !defined(CONFIG_X86_64) || !defined(CONFIG_SMP) | 321 | #if !defined(CONFIG_X86_64) || !defined(CONFIG_SMP) |
308 | PERCPU(THREAD_SIZE) | 322 | PERCPU(INTERNODE_CACHE_BYTES, PAGE_SIZE) |
309 | #endif | 323 | #endif |
310 | 324 | ||
311 | . = ALIGN(PAGE_SIZE); | 325 | . = ALIGN(PAGE_SIZE); |
diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c index 1b950d151e58..9796c2f3d074 100644 --- a/arch/x86/kernel/x8664_ksyms_64.c +++ b/arch/x86/kernel/x8664_ksyms_64.c | |||
@@ -52,6 +52,7 @@ extern void *__memcpy(void *, const void *, __kernel_size_t); | |||
52 | EXPORT_SYMBOL(memset); | 52 | EXPORT_SYMBOL(memset); |
53 | EXPORT_SYMBOL(memcpy); | 53 | EXPORT_SYMBOL(memcpy); |
54 | EXPORT_SYMBOL(__memcpy); | 54 | EXPORT_SYMBOL(__memcpy); |
55 | EXPORT_SYMBOL(memmove); | ||
55 | 56 | ||
56 | EXPORT_SYMBOL(empty_zero_page); | 57 | EXPORT_SYMBOL(empty_zero_page); |
57 | #ifndef CONFIG_PARAVIRT | 58 | #ifndef CONFIG_PARAVIRT |
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index ceb2911aa439..c11514e9128b 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c | |||
@@ -70,6 +70,7 @@ struct x86_init_ops x86_init __initdata = { | |||
70 | .setup_percpu_clockev = setup_boot_APIC_clock, | 70 | .setup_percpu_clockev = setup_boot_APIC_clock, |
71 | .tsc_pre_init = x86_init_noop, | 71 | .tsc_pre_init = x86_init_noop, |
72 | .timer_init = hpet_time_init, | 72 | .timer_init = hpet_time_init, |
73 | .wallclock_init = x86_init_noop, | ||
73 | }, | 74 | }, |
74 | 75 | ||
75 | .iommu = { | 76 | .iommu = { |
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index 547128546cc3..a3911343976b 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c | |||
@@ -53,7 +53,7 @@ void __sanitize_i387_state(struct task_struct *tsk) | |||
53 | 53 | ||
54 | /* | 54 | /* |
55 | * None of the feature bits are in init state. So nothing else | 55 | * None of the feature bits are in init state. So nothing else |
56 | * to do for us, as the memory layout is upto date. | 56 | * to do for us, as the memory layout is up to date. |
57 | */ | 57 | */ |
58 | if ((xstate_bv & pcntxt_mask) == pcntxt_mask) | 58 | if ((xstate_bv & pcntxt_mask) == pcntxt_mask) |
59 | return; | 59 | return; |
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index caf966781d25..0ad47b819a8b 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c | |||
@@ -76,6 +76,7 @@ | |||
76 | #define Group (1<<14) /* Bits 3:5 of modrm byte extend opcode */ | 76 | #define Group (1<<14) /* Bits 3:5 of modrm byte extend opcode */ |
77 | #define GroupDual (1<<15) /* Alternate decoding of mod == 3 */ | 77 | #define GroupDual (1<<15) /* Alternate decoding of mod == 3 */ |
78 | /* Misc flags */ | 78 | /* Misc flags */ |
79 | #define VendorSpecific (1<<22) /* Vendor specific instruction */ | ||
79 | #define NoAccess (1<<23) /* Don't access memory (lea/invlpg/verr etc) */ | 80 | #define NoAccess (1<<23) /* Don't access memory (lea/invlpg/verr etc) */ |
80 | #define Op3264 (1<<24) /* Operand is 64b in long mode, 32b otherwise */ | 81 | #define Op3264 (1<<24) /* Operand is 64b in long mode, 32b otherwise */ |
81 | #define Undefined (1<<25) /* No Such Instruction */ | 82 | #define Undefined (1<<25) /* No Such Instruction */ |
@@ -877,7 +878,8 @@ static void get_descriptor_table_ptr(struct x86_emulate_ctxt *ctxt, | |||
877 | if (selector & 1 << 2) { | 878 | if (selector & 1 << 2) { |
878 | struct desc_struct desc; | 879 | struct desc_struct desc; |
879 | memset (dt, 0, sizeof *dt); | 880 | memset (dt, 0, sizeof *dt); |
880 | if (!ops->get_cached_descriptor(&desc, VCPU_SREG_LDTR, ctxt->vcpu)) | 881 | if (!ops->get_cached_descriptor(&desc, NULL, VCPU_SREG_LDTR, |
882 | ctxt->vcpu)) | ||
881 | return; | 883 | return; |
882 | 884 | ||
883 | dt->size = desc_limit_scaled(&desc); /* what if limit > 65535? */ | 885 | dt->size = desc_limit_scaled(&desc); /* what if limit > 65535? */ |
@@ -929,6 +931,7 @@ static int write_segment_descriptor(struct x86_emulate_ctxt *ctxt, | |||
929 | return ret; | 931 | return ret; |
930 | } | 932 | } |
931 | 933 | ||
934 | /* Does not support long mode */ | ||
932 | static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt, | 935 | static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt, |
933 | struct x86_emulate_ops *ops, | 936 | struct x86_emulate_ops *ops, |
934 | u16 selector, int seg) | 937 | u16 selector, int seg) |
@@ -1040,7 +1043,7 @@ static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt, | |||
1040 | } | 1043 | } |
1041 | load: | 1044 | load: |
1042 | ops->set_segment_selector(selector, seg, ctxt->vcpu); | 1045 | ops->set_segment_selector(selector, seg, ctxt->vcpu); |
1043 | ops->set_cached_descriptor(&seg_desc, seg, ctxt->vcpu); | 1046 | ops->set_cached_descriptor(&seg_desc, 0, seg, ctxt->vcpu); |
1044 | return X86EMUL_CONTINUE; | 1047 | return X86EMUL_CONTINUE; |
1045 | exception: | 1048 | exception: |
1046 | emulate_exception(ctxt, err_vec, err_code, true); | 1049 | emulate_exception(ctxt, err_vec, err_code, true); |
@@ -1560,7 +1563,7 @@ setup_syscalls_segments(struct x86_emulate_ctxt *ctxt, | |||
1560 | struct desc_struct *ss) | 1563 | struct desc_struct *ss) |
1561 | { | 1564 | { |
1562 | memset(cs, 0, sizeof(struct desc_struct)); | 1565 | memset(cs, 0, sizeof(struct desc_struct)); |
1563 | ops->get_cached_descriptor(cs, VCPU_SREG_CS, ctxt->vcpu); | 1566 | ops->get_cached_descriptor(cs, NULL, VCPU_SREG_CS, ctxt->vcpu); |
1564 | memset(ss, 0, sizeof(struct desc_struct)); | 1567 | memset(ss, 0, sizeof(struct desc_struct)); |
1565 | 1568 | ||
1566 | cs->l = 0; /* will be adjusted later */ | 1569 | cs->l = 0; /* will be adjusted later */ |
@@ -1607,9 +1610,9 @@ emulate_syscall(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) | |||
1607 | cs.d = 0; | 1610 | cs.d = 0; |
1608 | cs.l = 1; | 1611 | cs.l = 1; |
1609 | } | 1612 | } |
1610 | ops->set_cached_descriptor(&cs, VCPU_SREG_CS, ctxt->vcpu); | 1613 | ops->set_cached_descriptor(&cs, 0, VCPU_SREG_CS, ctxt->vcpu); |
1611 | ops->set_segment_selector(cs_sel, VCPU_SREG_CS, ctxt->vcpu); | 1614 | ops->set_segment_selector(cs_sel, VCPU_SREG_CS, ctxt->vcpu); |
1612 | ops->set_cached_descriptor(&ss, VCPU_SREG_SS, ctxt->vcpu); | 1615 | ops->set_cached_descriptor(&ss, 0, VCPU_SREG_SS, ctxt->vcpu); |
1613 | ops->set_segment_selector(ss_sel, VCPU_SREG_SS, ctxt->vcpu); | 1616 | ops->set_segment_selector(ss_sel, VCPU_SREG_SS, ctxt->vcpu); |
1614 | 1617 | ||
1615 | c->regs[VCPU_REGS_RCX] = c->eip; | 1618 | c->regs[VCPU_REGS_RCX] = c->eip; |
@@ -1679,9 +1682,9 @@ emulate_sysenter(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) | |||
1679 | cs.l = 1; | 1682 | cs.l = 1; |
1680 | } | 1683 | } |
1681 | 1684 | ||
1682 | ops->set_cached_descriptor(&cs, VCPU_SREG_CS, ctxt->vcpu); | 1685 | ops->set_cached_descriptor(&cs, 0, VCPU_SREG_CS, ctxt->vcpu); |
1683 | ops->set_segment_selector(cs_sel, VCPU_SREG_CS, ctxt->vcpu); | 1686 | ops->set_segment_selector(cs_sel, VCPU_SREG_CS, ctxt->vcpu); |
1684 | ops->set_cached_descriptor(&ss, VCPU_SREG_SS, ctxt->vcpu); | 1687 | ops->set_cached_descriptor(&ss, 0, VCPU_SREG_SS, ctxt->vcpu); |
1685 | ops->set_segment_selector(ss_sel, VCPU_SREG_SS, ctxt->vcpu); | 1688 | ops->set_segment_selector(ss_sel, VCPU_SREG_SS, ctxt->vcpu); |
1686 | 1689 | ||
1687 | ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_EIP, &msr_data); | 1690 | ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_EIP, &msr_data); |
@@ -1736,9 +1739,9 @@ emulate_sysexit(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) | |||
1736 | cs_sel |= SELECTOR_RPL_MASK; | 1739 | cs_sel |= SELECTOR_RPL_MASK; |
1737 | ss_sel |= SELECTOR_RPL_MASK; | 1740 | ss_sel |= SELECTOR_RPL_MASK; |
1738 | 1741 | ||
1739 | ops->set_cached_descriptor(&cs, VCPU_SREG_CS, ctxt->vcpu); | 1742 | ops->set_cached_descriptor(&cs, 0, VCPU_SREG_CS, ctxt->vcpu); |
1740 | ops->set_segment_selector(cs_sel, VCPU_SREG_CS, ctxt->vcpu); | 1743 | ops->set_segment_selector(cs_sel, VCPU_SREG_CS, ctxt->vcpu); |
1741 | ops->set_cached_descriptor(&ss, VCPU_SREG_SS, ctxt->vcpu); | 1744 | ops->set_cached_descriptor(&ss, 0, VCPU_SREG_SS, ctxt->vcpu); |
1742 | ops->set_segment_selector(ss_sel, VCPU_SREG_SS, ctxt->vcpu); | 1745 | ops->set_segment_selector(ss_sel, VCPU_SREG_SS, ctxt->vcpu); |
1743 | 1746 | ||
1744 | c->eip = c->regs[VCPU_REGS_RDX]; | 1747 | c->eip = c->regs[VCPU_REGS_RDX]; |
@@ -1764,24 +1767,28 @@ static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt, | |||
1764 | u16 port, u16 len) | 1767 | u16 port, u16 len) |
1765 | { | 1768 | { |
1766 | struct desc_struct tr_seg; | 1769 | struct desc_struct tr_seg; |
1770 | u32 base3; | ||
1767 | int r; | 1771 | int r; |
1768 | u16 io_bitmap_ptr; | 1772 | u16 io_bitmap_ptr, perm, bit_idx = port & 0x7; |
1769 | u8 perm, bit_idx = port & 0x7; | ||
1770 | unsigned mask = (1 << len) - 1; | 1773 | unsigned mask = (1 << len) - 1; |
1774 | unsigned long base; | ||
1771 | 1775 | ||
1772 | ops->get_cached_descriptor(&tr_seg, VCPU_SREG_TR, ctxt->vcpu); | 1776 | ops->get_cached_descriptor(&tr_seg, &base3, VCPU_SREG_TR, ctxt->vcpu); |
1773 | if (!tr_seg.p) | 1777 | if (!tr_seg.p) |
1774 | return false; | 1778 | return false; |
1775 | if (desc_limit_scaled(&tr_seg) < 103) | 1779 | if (desc_limit_scaled(&tr_seg) < 103) |
1776 | return false; | 1780 | return false; |
1777 | r = ops->read_std(get_desc_base(&tr_seg) + 102, &io_bitmap_ptr, 2, | 1781 | base = get_desc_base(&tr_seg); |
1778 | ctxt->vcpu, NULL); | 1782 | #ifdef CONFIG_X86_64 |
1783 | base |= ((u64)base3) << 32; | ||
1784 | #endif | ||
1785 | r = ops->read_std(base + 102, &io_bitmap_ptr, 2, ctxt->vcpu, NULL); | ||
1779 | if (r != X86EMUL_CONTINUE) | 1786 | if (r != X86EMUL_CONTINUE) |
1780 | return false; | 1787 | return false; |
1781 | if (io_bitmap_ptr + port/8 > desc_limit_scaled(&tr_seg)) | 1788 | if (io_bitmap_ptr + port/8 > desc_limit_scaled(&tr_seg)) |
1782 | return false; | 1789 | return false; |
1783 | r = ops->read_std(get_desc_base(&tr_seg) + io_bitmap_ptr + port/8, | 1790 | r = ops->read_std(base + io_bitmap_ptr + port/8, &perm, 2, ctxt->vcpu, |
1784 | &perm, 1, ctxt->vcpu, NULL); | 1791 | NULL); |
1785 | if (r != X86EMUL_CONTINUE) | 1792 | if (r != X86EMUL_CONTINUE) |
1786 | return false; | 1793 | return false; |
1787 | if ((perm >> bit_idx) & mask) | 1794 | if ((perm >> bit_idx) & mask) |
@@ -2126,7 +2133,7 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt, | |||
2126 | } | 2133 | } |
2127 | 2134 | ||
2128 | ops->set_cr(0, ops->get_cr(0, ctxt->vcpu) | X86_CR0_TS, ctxt->vcpu); | 2135 | ops->set_cr(0, ops->get_cr(0, ctxt->vcpu) | X86_CR0_TS, ctxt->vcpu); |
2129 | ops->set_cached_descriptor(&next_tss_desc, VCPU_SREG_TR, ctxt->vcpu); | 2136 | ops->set_cached_descriptor(&next_tss_desc, 0, VCPU_SREG_TR, ctxt->vcpu); |
2130 | ops->set_segment_selector(tss_selector, VCPU_SREG_TR, ctxt->vcpu); | 2137 | ops->set_segment_selector(tss_selector, VCPU_SREG_TR, ctxt->vcpu); |
2131 | 2138 | ||
2132 | if (has_error_code) { | 2139 | if (has_error_code) { |
@@ -2365,7 +2372,8 @@ static struct group_dual group7 = { { | |||
2365 | D(SrcMem16 | ModRM | Mov | Priv), | 2372 | D(SrcMem16 | ModRM | Mov | Priv), |
2366 | D(SrcMem | ModRM | ByteOp | Priv | NoAccess), | 2373 | D(SrcMem | ModRM | ByteOp | Priv | NoAccess), |
2367 | }, { | 2374 | }, { |
2368 | D(SrcNone | ModRM | Priv), N, N, D(SrcNone | ModRM | Priv), | 2375 | D(SrcNone | ModRM | Priv | VendorSpecific), N, |
2376 | N, D(SrcNone | ModRM | Priv | VendorSpecific), | ||
2369 | D(SrcNone | ModRM | DstMem | Mov), N, | 2377 | D(SrcNone | ModRM | DstMem | Mov), N, |
2370 | D(SrcMem16 | ModRM | Mov | Priv), N, | 2378 | D(SrcMem16 | ModRM | Mov | Priv), N, |
2371 | } }; | 2379 | } }; |
@@ -2489,7 +2497,7 @@ static struct opcode opcode_table[256] = { | |||
2489 | static struct opcode twobyte_table[256] = { | 2497 | static struct opcode twobyte_table[256] = { |
2490 | /* 0x00 - 0x0F */ | 2498 | /* 0x00 - 0x0F */ |
2491 | N, GD(0, &group7), N, N, | 2499 | N, GD(0, &group7), N, N, |
2492 | N, D(ImplicitOps), D(ImplicitOps | Priv), N, | 2500 | N, D(ImplicitOps | VendorSpecific), D(ImplicitOps | Priv), N, |
2493 | D(ImplicitOps | Priv), D(ImplicitOps | Priv), N, N, | 2501 | D(ImplicitOps | Priv), D(ImplicitOps | Priv), N, N, |
2494 | N, D(ImplicitOps | ModRM), N, N, | 2502 | N, D(ImplicitOps | ModRM), N, N, |
2495 | /* 0x10 - 0x1F */ | 2503 | /* 0x10 - 0x1F */ |
@@ -2502,7 +2510,8 @@ static struct opcode twobyte_table[256] = { | |||
2502 | /* 0x30 - 0x3F */ | 2510 | /* 0x30 - 0x3F */ |
2503 | D(ImplicitOps | Priv), I(ImplicitOps, em_rdtsc), | 2511 | D(ImplicitOps | Priv), I(ImplicitOps, em_rdtsc), |
2504 | D(ImplicitOps | Priv), N, | 2512 | D(ImplicitOps | Priv), N, |
2505 | D(ImplicitOps), D(ImplicitOps | Priv), N, N, | 2513 | D(ImplicitOps | VendorSpecific), D(ImplicitOps | Priv | VendorSpecific), |
2514 | N, N, | ||
2506 | N, N, N, N, N, N, N, N, | 2515 | N, N, N, N, N, N, N, N, |
2507 | /* 0x40 - 0x4F */ | 2516 | /* 0x40 - 0x4F */ |
2508 | X16(D(DstReg | SrcMem | ModRM | Mov)), | 2517 | X16(D(DstReg | SrcMem | ModRM | Mov)), |
@@ -2741,6 +2750,9 @@ done_prefixes: | |||
2741 | if (c->d == 0 || (c->d & Undefined)) | 2750 | if (c->d == 0 || (c->d & Undefined)) |
2742 | return -1; | 2751 | return -1; |
2743 | 2752 | ||
2753 | if (!(c->d & VendorSpecific) && ctxt->only_vendor_specific_insn) | ||
2754 | return -1; | ||
2755 | |||
2744 | if (mode == X86EMUL_MODE_PROT64 && (c->d & Stack)) | 2756 | if (mode == X86EMUL_MODE_PROT64 && (c->d & Stack)) |
2745 | c->op_bytes = 8; | 2757 | c->op_bytes = 8; |
2746 | 2758 | ||
diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c index 3cece05e4ac4..19fe855e7953 100644 --- a/arch/x86/kvm/i8259.c +++ b/arch/x86/kvm/i8259.c | |||
@@ -62,9 +62,6 @@ static void pic_unlock(struct kvm_pic *s) | |||
62 | } | 62 | } |
63 | 63 | ||
64 | if (!found) | 64 | if (!found) |
65 | found = s->kvm->bsp_vcpu; | ||
66 | |||
67 | if (!found) | ||
68 | return; | 65 | return; |
69 | 66 | ||
70 | kvm_make_request(KVM_REQ_EVENT, found); | 67 | kvm_make_request(KVM_REQ_EVENT, found); |
@@ -75,7 +72,6 @@ static void pic_unlock(struct kvm_pic *s) | |||
75 | static void pic_clear_isr(struct kvm_kpic_state *s, int irq) | 72 | static void pic_clear_isr(struct kvm_kpic_state *s, int irq) |
76 | { | 73 | { |
77 | s->isr &= ~(1 << irq); | 74 | s->isr &= ~(1 << irq); |
78 | s->isr_ack |= (1 << irq); | ||
79 | if (s != &s->pics_state->pics[0]) | 75 | if (s != &s->pics_state->pics[0]) |
80 | irq += 8; | 76 | irq += 8; |
81 | /* | 77 | /* |
@@ -89,16 +85,6 @@ static void pic_clear_isr(struct kvm_kpic_state *s, int irq) | |||
89 | pic_lock(s->pics_state); | 85 | pic_lock(s->pics_state); |
90 | } | 86 | } |
91 | 87 | ||
92 | void kvm_pic_clear_isr_ack(struct kvm *kvm) | ||
93 | { | ||
94 | struct kvm_pic *s = pic_irqchip(kvm); | ||
95 | |||
96 | pic_lock(s); | ||
97 | s->pics[0].isr_ack = 0xff; | ||
98 | s->pics[1].isr_ack = 0xff; | ||
99 | pic_unlock(s); | ||
100 | } | ||
101 | |||
102 | /* | 88 | /* |
103 | * set irq level. If an edge is detected, then the IRR is set to 1 | 89 | * set irq level. If an edge is detected, then the IRR is set to 1 |
104 | */ | 90 | */ |
@@ -281,7 +267,6 @@ void kvm_pic_reset(struct kvm_kpic_state *s) | |||
281 | s->irr = 0; | 267 | s->irr = 0; |
282 | s->imr = 0; | 268 | s->imr = 0; |
283 | s->isr = 0; | 269 | s->isr = 0; |
284 | s->isr_ack = 0xff; | ||
285 | s->priority_add = 0; | 270 | s->priority_add = 0; |
286 | s->irq_base = 0; | 271 | s->irq_base = 0; |
287 | s->read_reg_select = 0; | 272 | s->read_reg_select = 0; |
@@ -545,15 +530,11 @@ static int picdev_read(struct kvm_io_device *this, | |||
545 | */ | 530 | */ |
546 | static void pic_irq_request(struct kvm *kvm, int level) | 531 | static void pic_irq_request(struct kvm *kvm, int level) |
547 | { | 532 | { |
548 | struct kvm_vcpu *vcpu = kvm->bsp_vcpu; | ||
549 | struct kvm_pic *s = pic_irqchip(kvm); | 533 | struct kvm_pic *s = pic_irqchip(kvm); |
550 | int irq = pic_get_irq(&s->pics[0]); | ||
551 | 534 | ||
552 | s->output = level; | 535 | if (!s->output) |
553 | if (vcpu && level && (s->pics[0].isr_ack & (1 << irq))) { | ||
554 | s->pics[0].isr_ack &= ~(1 << irq); | ||
555 | s->wakeup_needed = true; | 536 | s->wakeup_needed = true; |
556 | } | 537 | s->output = level; |
557 | } | 538 | } |
558 | 539 | ||
559 | static const struct kvm_io_device_ops picdev_ops = { | 540 | static const struct kvm_io_device_ops picdev_ops = { |
@@ -575,8 +556,6 @@ struct kvm_pic *kvm_create_pic(struct kvm *kvm) | |||
575 | s->pics[1].elcr_mask = 0xde; | 556 | s->pics[1].elcr_mask = 0xde; |
576 | s->pics[0].pics_state = s; | 557 | s->pics[0].pics_state = s; |
577 | s->pics[1].pics_state = s; | 558 | s->pics[1].pics_state = s; |
578 | s->pics[0].isr_ack = 0xff; | ||
579 | s->pics[1].isr_ack = 0xff; | ||
580 | 559 | ||
581 | /* | 560 | /* |
582 | * Initialize PIO device | 561 | * Initialize PIO device |
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 93cf9d0d3653..2b2255b1f04b 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c | |||
@@ -417,10 +417,6 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, | |||
417 | case APIC_DM_INIT: | 417 | case APIC_DM_INIT: |
418 | if (level) { | 418 | if (level) { |
419 | result = 1; | 419 | result = 1; |
420 | if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE) | ||
421 | printk(KERN_DEBUG | ||
422 | "INIT on a runnable vcpu %d\n", | ||
423 | vcpu->vcpu_id); | ||
424 | vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED; | 420 | vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED; |
425 | kvm_make_request(KVM_REQ_EVENT, vcpu); | 421 | kvm_make_request(KVM_REQ_EVENT, vcpu); |
426 | kvm_vcpu_kick(vcpu); | 422 | kvm_vcpu_kick(vcpu); |
@@ -875,8 +871,8 @@ void kvm_free_lapic(struct kvm_vcpu *vcpu) | |||
875 | 871 | ||
876 | hrtimer_cancel(&vcpu->arch.apic->lapic_timer.timer); | 872 | hrtimer_cancel(&vcpu->arch.apic->lapic_timer.timer); |
877 | 873 | ||
878 | if (vcpu->arch.apic->regs_page) | 874 | if (vcpu->arch.apic->regs) |
879 | __free_page(vcpu->arch.apic->regs_page); | 875 | free_page((unsigned long)vcpu->arch.apic->regs); |
880 | 876 | ||
881 | kfree(vcpu->arch.apic); | 877 | kfree(vcpu->arch.apic); |
882 | } | 878 | } |
@@ -1065,13 +1061,12 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu) | |||
1065 | 1061 | ||
1066 | vcpu->arch.apic = apic; | 1062 | vcpu->arch.apic = apic; |
1067 | 1063 | ||
1068 | apic->regs_page = alloc_page(GFP_KERNEL|__GFP_ZERO); | 1064 | apic->regs = (void *)get_zeroed_page(GFP_KERNEL); |
1069 | if (apic->regs_page == NULL) { | 1065 | if (!apic->regs) { |
1070 | printk(KERN_ERR "malloc apic regs error for vcpu %x\n", | 1066 | printk(KERN_ERR "malloc apic regs error for vcpu %x\n", |
1071 | vcpu->vcpu_id); | 1067 | vcpu->vcpu_id); |
1072 | goto nomem_free_apic; | 1068 | goto nomem_free_apic; |
1073 | } | 1069 | } |
1074 | apic->regs = page_address(apic->regs_page); | ||
1075 | apic->vcpu = vcpu; | 1070 | apic->vcpu = vcpu; |
1076 | 1071 | ||
1077 | hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC, | 1072 | hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC, |
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index f5fe32c5edad..52c9e6b9e725 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h | |||
@@ -13,7 +13,6 @@ struct kvm_lapic { | |||
13 | u32 divide_count; | 13 | u32 divide_count; |
14 | struct kvm_vcpu *vcpu; | 14 | struct kvm_vcpu *vcpu; |
15 | bool irr_pending; | 15 | bool irr_pending; |
16 | struct page *regs_page; | ||
17 | void *regs; | 16 | void *regs; |
18 | gpa_t vapic_addr; | 17 | gpa_t vapic_addr; |
19 | struct page *vapic_page; | 18 | struct page *vapic_page; |
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index f02b8edc3d44..22fae7593ee7 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
@@ -111,9 +111,6 @@ module_param(oos_shadow, bool, 0644); | |||
111 | #define PT64_LEVEL_SHIFT(level) \ | 111 | #define PT64_LEVEL_SHIFT(level) \ |
112 | (PAGE_SHIFT + (level - 1) * PT64_LEVEL_BITS) | 112 | (PAGE_SHIFT + (level - 1) * PT64_LEVEL_BITS) |
113 | 113 | ||
114 | #define PT64_LEVEL_MASK(level) \ | ||
115 | (((1ULL << PT64_LEVEL_BITS) - 1) << PT64_LEVEL_SHIFT(level)) | ||
116 | |||
117 | #define PT64_INDEX(address, level)\ | 114 | #define PT64_INDEX(address, level)\ |
118 | (((address) >> PT64_LEVEL_SHIFT(level)) & ((1 << PT64_LEVEL_BITS) - 1)) | 115 | (((address) >> PT64_LEVEL_SHIFT(level)) & ((1 << PT64_LEVEL_BITS) - 1)) |
119 | 116 | ||
@@ -123,8 +120,6 @@ module_param(oos_shadow, bool, 0644); | |||
123 | #define PT32_LEVEL_SHIFT(level) \ | 120 | #define PT32_LEVEL_SHIFT(level) \ |
124 | (PAGE_SHIFT + (level - 1) * PT32_LEVEL_BITS) | 121 | (PAGE_SHIFT + (level - 1) * PT32_LEVEL_BITS) |
125 | 122 | ||
126 | #define PT32_LEVEL_MASK(level) \ | ||
127 | (((1ULL << PT32_LEVEL_BITS) - 1) << PT32_LEVEL_SHIFT(level)) | ||
128 | #define PT32_LVL_OFFSET_MASK(level) \ | 123 | #define PT32_LVL_OFFSET_MASK(level) \ |
129 | (PT32_BASE_ADDR_MASK & ((1ULL << (PAGE_SHIFT + (((level) - 1) \ | 124 | (PT32_BASE_ADDR_MASK & ((1ULL << (PAGE_SHIFT + (((level) - 1) \ |
130 | * PT32_LEVEL_BITS))) - 1)) | 125 | * PT32_LEVEL_BITS))) - 1)) |
@@ -379,15 +374,15 @@ static void mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc, | |||
379 | static int mmu_topup_memory_cache_page(struct kvm_mmu_memory_cache *cache, | 374 | static int mmu_topup_memory_cache_page(struct kvm_mmu_memory_cache *cache, |
380 | int min) | 375 | int min) |
381 | { | 376 | { |
382 | struct page *page; | 377 | void *page; |
383 | 378 | ||
384 | if (cache->nobjs >= min) | 379 | if (cache->nobjs >= min) |
385 | return 0; | 380 | return 0; |
386 | while (cache->nobjs < ARRAY_SIZE(cache->objects)) { | 381 | while (cache->nobjs < ARRAY_SIZE(cache->objects)) { |
387 | page = alloc_page(GFP_KERNEL); | 382 | page = (void *)__get_free_page(GFP_KERNEL); |
388 | if (!page) | 383 | if (!page) |
389 | return -ENOMEM; | 384 | return -ENOMEM; |
390 | cache->objects[cache->nobjs++] = page_address(page); | 385 | cache->objects[cache->nobjs++] = page; |
391 | } | 386 | } |
392 | return 0; | 387 | return 0; |
393 | } | 388 | } |
@@ -554,13 +549,23 @@ static int host_mapping_level(struct kvm *kvm, gfn_t gfn) | |||
554 | return ret; | 549 | return ret; |
555 | } | 550 | } |
556 | 551 | ||
557 | static bool mapping_level_dirty_bitmap(struct kvm_vcpu *vcpu, gfn_t large_gfn) | 552 | static struct kvm_memory_slot * |
553 | gfn_to_memslot_dirty_bitmap(struct kvm_vcpu *vcpu, gfn_t gfn, | ||
554 | bool no_dirty_log) | ||
558 | { | 555 | { |
559 | struct kvm_memory_slot *slot; | 556 | struct kvm_memory_slot *slot; |
560 | slot = gfn_to_memslot(vcpu->kvm, large_gfn); | 557 | |
561 | if (slot && slot->dirty_bitmap) | 558 | slot = gfn_to_memslot(vcpu->kvm, gfn); |
562 | return true; | 559 | if (!slot || slot->flags & KVM_MEMSLOT_INVALID || |
563 | return false; | 560 | (no_dirty_log && slot->dirty_bitmap)) |
561 | slot = NULL; | ||
562 | |||
563 | return slot; | ||
564 | } | ||
565 | |||
566 | static bool mapping_level_dirty_bitmap(struct kvm_vcpu *vcpu, gfn_t large_gfn) | ||
567 | { | ||
568 | return gfn_to_memslot_dirty_bitmap(vcpu, large_gfn, true); | ||
564 | } | 569 | } |
565 | 570 | ||
566 | static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn) | 571 | static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn) |
@@ -1032,9 +1037,9 @@ static void kvm_mmu_free_page(struct kvm *kvm, struct kvm_mmu_page *sp) | |||
1032 | ASSERT(is_empty_shadow_page(sp->spt)); | 1037 | ASSERT(is_empty_shadow_page(sp->spt)); |
1033 | hlist_del(&sp->hash_link); | 1038 | hlist_del(&sp->hash_link); |
1034 | list_del(&sp->link); | 1039 | list_del(&sp->link); |
1035 | __free_page(virt_to_page(sp->spt)); | 1040 | free_page((unsigned long)sp->spt); |
1036 | if (!sp->role.direct) | 1041 | if (!sp->role.direct) |
1037 | __free_page(virt_to_page(sp->gfns)); | 1042 | free_page((unsigned long)sp->gfns); |
1038 | kmem_cache_free(mmu_page_header_cache, sp); | 1043 | kmem_cache_free(mmu_page_header_cache, sp); |
1039 | kvm_mod_used_mmu_pages(kvm, -1); | 1044 | kvm_mod_used_mmu_pages(kvm, -1); |
1040 | } | 1045 | } |
@@ -1199,6 +1204,13 @@ static void nonpaging_invlpg(struct kvm_vcpu *vcpu, gva_t gva) | |||
1199 | { | 1204 | { |
1200 | } | 1205 | } |
1201 | 1206 | ||
1207 | static void nonpaging_update_pte(struct kvm_vcpu *vcpu, | ||
1208 | struct kvm_mmu_page *sp, u64 *spte, | ||
1209 | const void *pte, unsigned long mmu_seq) | ||
1210 | { | ||
1211 | WARN_ON(1); | ||
1212 | } | ||
1213 | |||
1202 | #define KVM_PAGE_ARRAY_NR 16 | 1214 | #define KVM_PAGE_ARRAY_NR 16 |
1203 | 1215 | ||
1204 | struct kvm_mmu_pages { | 1216 | struct kvm_mmu_pages { |
@@ -2150,26 +2162,13 @@ static void nonpaging_new_cr3(struct kvm_vcpu *vcpu) | |||
2150 | { | 2162 | { |
2151 | } | 2163 | } |
2152 | 2164 | ||
2153 | static struct kvm_memory_slot * | ||
2154 | pte_prefetch_gfn_to_memslot(struct kvm_vcpu *vcpu, gfn_t gfn, bool no_dirty_log) | ||
2155 | { | ||
2156 | struct kvm_memory_slot *slot; | ||
2157 | |||
2158 | slot = gfn_to_memslot(vcpu->kvm, gfn); | ||
2159 | if (!slot || slot->flags & KVM_MEMSLOT_INVALID || | ||
2160 | (no_dirty_log && slot->dirty_bitmap)) | ||
2161 | slot = NULL; | ||
2162 | |||
2163 | return slot; | ||
2164 | } | ||
2165 | |||
2166 | static pfn_t pte_prefetch_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn, | 2165 | static pfn_t pte_prefetch_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn, |
2167 | bool no_dirty_log) | 2166 | bool no_dirty_log) |
2168 | { | 2167 | { |
2169 | struct kvm_memory_slot *slot; | 2168 | struct kvm_memory_slot *slot; |
2170 | unsigned long hva; | 2169 | unsigned long hva; |
2171 | 2170 | ||
2172 | slot = pte_prefetch_gfn_to_memslot(vcpu, gfn, no_dirty_log); | 2171 | slot = gfn_to_memslot_dirty_bitmap(vcpu, gfn, no_dirty_log); |
2173 | if (!slot) { | 2172 | if (!slot) { |
2174 | get_page(bad_page); | 2173 | get_page(bad_page); |
2175 | return page_to_pfn(bad_page); | 2174 | return page_to_pfn(bad_page); |
@@ -2190,7 +2189,7 @@ static int direct_pte_prefetch_many(struct kvm_vcpu *vcpu, | |||
2190 | gfn_t gfn; | 2189 | gfn_t gfn; |
2191 | 2190 | ||
2192 | gfn = kvm_mmu_page_get_gfn(sp, start - sp->spt); | 2191 | gfn = kvm_mmu_page_get_gfn(sp, start - sp->spt); |
2193 | if (!pte_prefetch_gfn_to_memslot(vcpu, gfn, access & ACC_WRITE_MASK)) | 2192 | if (!gfn_to_memslot_dirty_bitmap(vcpu, gfn, access & ACC_WRITE_MASK)) |
2194 | return -1; | 2193 | return -1; |
2195 | 2194 | ||
2196 | ret = gfn_to_page_many_atomic(vcpu->kvm, gfn, pages, end - start); | 2195 | ret = gfn_to_page_many_atomic(vcpu->kvm, gfn, pages, end - start); |
@@ -2804,6 +2803,7 @@ static int nonpaging_init_context(struct kvm_vcpu *vcpu, | |||
2804 | context->prefetch_page = nonpaging_prefetch_page; | 2803 | context->prefetch_page = nonpaging_prefetch_page; |
2805 | context->sync_page = nonpaging_sync_page; | 2804 | context->sync_page = nonpaging_sync_page; |
2806 | context->invlpg = nonpaging_invlpg; | 2805 | context->invlpg = nonpaging_invlpg; |
2806 | context->update_pte = nonpaging_update_pte; | ||
2807 | context->root_level = 0; | 2807 | context->root_level = 0; |
2808 | context->shadow_root_level = PT32E_ROOT_LEVEL; | 2808 | context->shadow_root_level = PT32E_ROOT_LEVEL; |
2809 | context->root_hpa = INVALID_PAGE; | 2809 | context->root_hpa = INVALID_PAGE; |
@@ -2933,6 +2933,7 @@ static int paging64_init_context_common(struct kvm_vcpu *vcpu, | |||
2933 | context->prefetch_page = paging64_prefetch_page; | 2933 | context->prefetch_page = paging64_prefetch_page; |
2934 | context->sync_page = paging64_sync_page; | 2934 | context->sync_page = paging64_sync_page; |
2935 | context->invlpg = paging64_invlpg; | 2935 | context->invlpg = paging64_invlpg; |
2936 | context->update_pte = paging64_update_pte; | ||
2936 | context->free = paging_free; | 2937 | context->free = paging_free; |
2937 | context->root_level = level; | 2938 | context->root_level = level; |
2938 | context->shadow_root_level = level; | 2939 | context->shadow_root_level = level; |
@@ -2961,6 +2962,7 @@ static int paging32_init_context(struct kvm_vcpu *vcpu, | |||
2961 | context->prefetch_page = paging32_prefetch_page; | 2962 | context->prefetch_page = paging32_prefetch_page; |
2962 | context->sync_page = paging32_sync_page; | 2963 | context->sync_page = paging32_sync_page; |
2963 | context->invlpg = paging32_invlpg; | 2964 | context->invlpg = paging32_invlpg; |
2965 | context->update_pte = paging32_update_pte; | ||
2964 | context->root_level = PT32_ROOT_LEVEL; | 2966 | context->root_level = PT32_ROOT_LEVEL; |
2965 | context->shadow_root_level = PT32E_ROOT_LEVEL; | 2967 | context->shadow_root_level = PT32E_ROOT_LEVEL; |
2966 | context->root_hpa = INVALID_PAGE; | 2968 | context->root_hpa = INVALID_PAGE; |
@@ -2985,6 +2987,7 @@ static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu) | |||
2985 | context->prefetch_page = nonpaging_prefetch_page; | 2987 | context->prefetch_page = nonpaging_prefetch_page; |
2986 | context->sync_page = nonpaging_sync_page; | 2988 | context->sync_page = nonpaging_sync_page; |
2987 | context->invlpg = nonpaging_invlpg; | 2989 | context->invlpg = nonpaging_invlpg; |
2990 | context->update_pte = nonpaging_update_pte; | ||
2988 | context->shadow_root_level = kvm_x86_ops->get_tdp_level(); | 2991 | context->shadow_root_level = kvm_x86_ops->get_tdp_level(); |
2989 | context->root_hpa = INVALID_PAGE; | 2992 | context->root_hpa = INVALID_PAGE; |
2990 | context->direct_map = true; | 2993 | context->direct_map = true; |
@@ -3089,8 +3092,6 @@ static int init_kvm_nested_mmu(struct kvm_vcpu *vcpu) | |||
3089 | 3092 | ||
3090 | static int init_kvm_mmu(struct kvm_vcpu *vcpu) | 3093 | static int init_kvm_mmu(struct kvm_vcpu *vcpu) |
3091 | { | 3094 | { |
3092 | vcpu->arch.update_pte.pfn = bad_pfn; | ||
3093 | |||
3094 | if (mmu_is_nested(vcpu)) | 3095 | if (mmu_is_nested(vcpu)) |
3095 | return init_kvm_nested_mmu(vcpu); | 3096 | return init_kvm_nested_mmu(vcpu); |
3096 | else if (tdp_enabled) | 3097 | else if (tdp_enabled) |
@@ -3164,7 +3165,7 @@ static void mmu_pte_write_zap_pte(struct kvm_vcpu *vcpu, | |||
3164 | static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu, | 3165 | static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu, |
3165 | struct kvm_mmu_page *sp, | 3166 | struct kvm_mmu_page *sp, |
3166 | u64 *spte, | 3167 | u64 *spte, |
3167 | const void *new) | 3168 | const void *new, unsigned long mmu_seq) |
3168 | { | 3169 | { |
3169 | if (sp->role.level != PT_PAGE_TABLE_LEVEL) { | 3170 | if (sp->role.level != PT_PAGE_TABLE_LEVEL) { |
3170 | ++vcpu->kvm->stat.mmu_pde_zapped; | 3171 | ++vcpu->kvm->stat.mmu_pde_zapped; |
@@ -3172,10 +3173,7 @@ static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu, | |||
3172 | } | 3173 | } |
3173 | 3174 | ||
3174 | ++vcpu->kvm->stat.mmu_pte_updated; | 3175 | ++vcpu->kvm->stat.mmu_pte_updated; |
3175 | if (!sp->role.cr4_pae) | 3176 | vcpu->arch.mmu.update_pte(vcpu, sp, spte, new, mmu_seq); |
3176 | paging32_update_pte(vcpu, sp, spte, new); | ||
3177 | else | ||
3178 | paging64_update_pte(vcpu, sp, spte, new); | ||
3179 | } | 3177 | } |
3180 | 3178 | ||
3181 | static bool need_remote_flush(u64 old, u64 new) | 3179 | static bool need_remote_flush(u64 old, u64 new) |
@@ -3210,28 +3208,6 @@ static bool last_updated_pte_accessed(struct kvm_vcpu *vcpu) | |||
3210 | return !!(spte && (*spte & shadow_accessed_mask)); | 3208 | return !!(spte && (*spte & shadow_accessed_mask)); |
3211 | } | 3209 | } |
3212 | 3210 | ||
3213 | static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | ||
3214 | u64 gpte) | ||
3215 | { | ||
3216 | gfn_t gfn; | ||
3217 | pfn_t pfn; | ||
3218 | |||
3219 | if (!is_present_gpte(gpte)) | ||
3220 | return; | ||
3221 | gfn = (gpte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT; | ||
3222 | |||
3223 | vcpu->arch.update_pte.mmu_seq = vcpu->kvm->mmu_notifier_seq; | ||
3224 | smp_rmb(); | ||
3225 | pfn = gfn_to_pfn(vcpu->kvm, gfn); | ||
3226 | |||
3227 | if (is_error_pfn(pfn)) { | ||
3228 | kvm_release_pfn_clean(pfn); | ||
3229 | return; | ||
3230 | } | ||
3231 | vcpu->arch.update_pte.gfn = gfn; | ||
3232 | vcpu->arch.update_pte.pfn = pfn; | ||
3233 | } | ||
3234 | |||
3235 | static void kvm_mmu_access_page(struct kvm_vcpu *vcpu, gfn_t gfn) | 3211 | static void kvm_mmu_access_page(struct kvm_vcpu *vcpu, gfn_t gfn) |
3236 | { | 3212 | { |
3237 | u64 *spte = vcpu->arch.last_pte_updated; | 3213 | u64 *spte = vcpu->arch.last_pte_updated; |
@@ -3253,21 +3229,14 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
3253 | struct kvm_mmu_page *sp; | 3229 | struct kvm_mmu_page *sp; |
3254 | struct hlist_node *node; | 3230 | struct hlist_node *node; |
3255 | LIST_HEAD(invalid_list); | 3231 | LIST_HEAD(invalid_list); |
3256 | u64 entry, gentry; | 3232 | unsigned long mmu_seq; |
3257 | u64 *spte; | 3233 | u64 entry, gentry, *spte; |
3258 | unsigned offset = offset_in_page(gpa); | 3234 | unsigned pte_size, page_offset, misaligned, quadrant, offset; |
3259 | unsigned pte_size; | 3235 | int level, npte, invlpg_counter, r, flooded = 0; |
3260 | unsigned page_offset; | ||
3261 | unsigned misaligned; | ||
3262 | unsigned quadrant; | ||
3263 | int level; | ||
3264 | int flooded = 0; | ||
3265 | int npte; | ||
3266 | int r; | ||
3267 | int invlpg_counter; | ||
3268 | bool remote_flush, local_flush, zap_page; | 3236 | bool remote_flush, local_flush, zap_page; |
3269 | 3237 | ||
3270 | zap_page = remote_flush = local_flush = false; | 3238 | zap_page = remote_flush = local_flush = false; |
3239 | offset = offset_in_page(gpa); | ||
3271 | 3240 | ||
3272 | pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes); | 3241 | pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes); |
3273 | 3242 | ||
@@ -3275,9 +3244,8 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
3275 | 3244 | ||
3276 | /* | 3245 | /* |
3277 | * Assume that the pte write on a page table of the same type | 3246 | * Assume that the pte write on a page table of the same type |
3278 | * as the current vcpu paging mode. This is nearly always true | 3247 | * as the current vcpu paging mode since we update the sptes only |
3279 | * (might be false while changing modes). Note it is verified later | 3248 | * when they have the same mode. |
3280 | * by update_pte(). | ||
3281 | */ | 3249 | */ |
3282 | if ((is_pae(vcpu) && bytes == 4) || !new) { | 3250 | if ((is_pae(vcpu) && bytes == 4) || !new) { |
3283 | /* Handle a 32-bit guest writing two halves of a 64-bit gpte */ | 3251 | /* Handle a 32-bit guest writing two halves of a 64-bit gpte */ |
@@ -3303,15 +3271,17 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
3303 | break; | 3271 | break; |
3304 | } | 3272 | } |
3305 | 3273 | ||
3306 | mmu_guess_page_from_pte_write(vcpu, gpa, gentry); | 3274 | mmu_seq = vcpu->kvm->mmu_notifier_seq; |
3275 | smp_rmb(); | ||
3276 | |||
3307 | spin_lock(&vcpu->kvm->mmu_lock); | 3277 | spin_lock(&vcpu->kvm->mmu_lock); |
3308 | if (atomic_read(&vcpu->kvm->arch.invlpg_counter) != invlpg_counter) | 3278 | if (atomic_read(&vcpu->kvm->arch.invlpg_counter) != invlpg_counter) |
3309 | gentry = 0; | 3279 | gentry = 0; |
3310 | kvm_mmu_access_page(vcpu, gfn); | ||
3311 | kvm_mmu_free_some_pages(vcpu); | 3280 | kvm_mmu_free_some_pages(vcpu); |
3312 | ++vcpu->kvm->stat.mmu_pte_write; | 3281 | ++vcpu->kvm->stat.mmu_pte_write; |
3313 | trace_kvm_mmu_audit(vcpu, AUDIT_PRE_PTE_WRITE); | 3282 | trace_kvm_mmu_audit(vcpu, AUDIT_PRE_PTE_WRITE); |
3314 | if (guest_initiated) { | 3283 | if (guest_initiated) { |
3284 | kvm_mmu_access_page(vcpu, gfn); | ||
3315 | if (gfn == vcpu->arch.last_pt_write_gfn | 3285 | if (gfn == vcpu->arch.last_pt_write_gfn |
3316 | && !last_updated_pte_accessed(vcpu)) { | 3286 | && !last_updated_pte_accessed(vcpu)) { |
3317 | ++vcpu->arch.last_pt_write_count; | 3287 | ++vcpu->arch.last_pt_write_count; |
@@ -3375,7 +3345,8 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
3375 | if (gentry && | 3345 | if (gentry && |
3376 | !((sp->role.word ^ vcpu->arch.mmu.base_role.word) | 3346 | !((sp->role.word ^ vcpu->arch.mmu.base_role.word) |
3377 | & mask.word)) | 3347 | & mask.word)) |
3378 | mmu_pte_write_new_pte(vcpu, sp, spte, &gentry); | 3348 | mmu_pte_write_new_pte(vcpu, sp, spte, &gentry, |
3349 | mmu_seq); | ||
3379 | if (!remote_flush && need_remote_flush(entry, *spte)) | 3350 | if (!remote_flush && need_remote_flush(entry, *spte)) |
3380 | remote_flush = true; | 3351 | remote_flush = true; |
3381 | ++spte; | 3352 | ++spte; |
@@ -3385,10 +3356,6 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
3385 | kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list); | 3356 | kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list); |
3386 | trace_kvm_mmu_audit(vcpu, AUDIT_POST_PTE_WRITE); | 3357 | trace_kvm_mmu_audit(vcpu, AUDIT_POST_PTE_WRITE); |
3387 | spin_unlock(&vcpu->kvm->mmu_lock); | 3358 | spin_unlock(&vcpu->kvm->mmu_lock); |
3388 | if (!is_error_pfn(vcpu->arch.update_pte.pfn)) { | ||
3389 | kvm_release_pfn_clean(vcpu->arch.update_pte.pfn); | ||
3390 | vcpu->arch.update_pte.pfn = bad_pfn; | ||
3391 | } | ||
3392 | } | 3359 | } |
3393 | 3360 | ||
3394 | int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva) | 3361 | int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva) |
@@ -3538,14 +3505,23 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot) | |||
3538 | if (!test_bit(slot, sp->slot_bitmap)) | 3505 | if (!test_bit(slot, sp->slot_bitmap)) |
3539 | continue; | 3506 | continue; |
3540 | 3507 | ||
3541 | if (sp->role.level != PT_PAGE_TABLE_LEVEL) | ||
3542 | continue; | ||
3543 | |||
3544 | pt = sp->spt; | 3508 | pt = sp->spt; |
3545 | for (i = 0; i < PT64_ENT_PER_PAGE; ++i) | 3509 | for (i = 0; i < PT64_ENT_PER_PAGE; ++i) { |
3510 | if (!is_shadow_present_pte(pt[i]) || | ||
3511 | !is_last_spte(pt[i], sp->role.level)) | ||
3512 | continue; | ||
3513 | |||
3514 | if (is_large_pte(pt[i])) { | ||
3515 | drop_spte(kvm, &pt[i], | ||
3516 | shadow_trap_nonpresent_pte); | ||
3517 | --kvm->stat.lpages; | ||
3518 | continue; | ||
3519 | } | ||
3520 | |||
3546 | /* avoid RMW */ | 3521 | /* avoid RMW */ |
3547 | if (is_writable_pte(pt[i])) | 3522 | if (is_writable_pte(pt[i])) |
3548 | update_spte(&pt[i], pt[i] & ~PT_WRITABLE_MASK); | 3523 | update_spte(&pt[i], pt[i] & ~PT_WRITABLE_MASK); |
3524 | } | ||
3549 | } | 3525 | } |
3550 | kvm_flush_remote_tlbs(kvm); | 3526 | kvm_flush_remote_tlbs(kvm); |
3551 | } | 3527 | } |
@@ -3583,7 +3559,7 @@ static int mmu_shrink(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask) | |||
3583 | if (nr_to_scan == 0) | 3559 | if (nr_to_scan == 0) |
3584 | goto out; | 3560 | goto out; |
3585 | 3561 | ||
3586 | spin_lock(&kvm_lock); | 3562 | raw_spin_lock(&kvm_lock); |
3587 | 3563 | ||
3588 | list_for_each_entry(kvm, &vm_list, vm_list) { | 3564 | list_for_each_entry(kvm, &vm_list, vm_list) { |
3589 | int idx, freed_pages; | 3565 | int idx, freed_pages; |
@@ -3606,7 +3582,7 @@ static int mmu_shrink(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask) | |||
3606 | if (kvm_freed) | 3582 | if (kvm_freed) |
3607 | list_move_tail(&kvm_freed->vm_list, &vm_list); | 3583 | list_move_tail(&kvm_freed->vm_list, &vm_list); |
3608 | 3584 | ||
3609 | spin_unlock(&kvm_lock); | 3585 | raw_spin_unlock(&kvm_lock); |
3610 | 3586 | ||
3611 | out: | 3587 | out: |
3612 | return percpu_counter_read_positive(&kvm_total_used_mmu_pages); | 3588 | return percpu_counter_read_positive(&kvm_total_used_mmu_pages); |
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 6bccc24c4181..c6397795d865 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h | |||
@@ -31,7 +31,6 @@ | |||
31 | #define PT_LVL_ADDR_MASK(lvl) PT64_LVL_ADDR_MASK(lvl) | 31 | #define PT_LVL_ADDR_MASK(lvl) PT64_LVL_ADDR_MASK(lvl) |
32 | #define PT_LVL_OFFSET_MASK(lvl) PT64_LVL_OFFSET_MASK(lvl) | 32 | #define PT_LVL_OFFSET_MASK(lvl) PT64_LVL_OFFSET_MASK(lvl) |
33 | #define PT_INDEX(addr, level) PT64_INDEX(addr, level) | 33 | #define PT_INDEX(addr, level) PT64_INDEX(addr, level) |
34 | #define PT_LEVEL_MASK(level) PT64_LEVEL_MASK(level) | ||
35 | #define PT_LEVEL_BITS PT64_LEVEL_BITS | 34 | #define PT_LEVEL_BITS PT64_LEVEL_BITS |
36 | #ifdef CONFIG_X86_64 | 35 | #ifdef CONFIG_X86_64 |
37 | #define PT_MAX_FULL_LEVELS 4 | 36 | #define PT_MAX_FULL_LEVELS 4 |
@@ -48,7 +47,6 @@ | |||
48 | #define PT_LVL_ADDR_MASK(lvl) PT32_LVL_ADDR_MASK(lvl) | 47 | #define PT_LVL_ADDR_MASK(lvl) PT32_LVL_ADDR_MASK(lvl) |
49 | #define PT_LVL_OFFSET_MASK(lvl) PT32_LVL_OFFSET_MASK(lvl) | 48 | #define PT_LVL_OFFSET_MASK(lvl) PT32_LVL_OFFSET_MASK(lvl) |
50 | #define PT_INDEX(addr, level) PT32_INDEX(addr, level) | 49 | #define PT_INDEX(addr, level) PT32_INDEX(addr, level) |
51 | #define PT_LEVEL_MASK(level) PT32_LEVEL_MASK(level) | ||
52 | #define PT_LEVEL_BITS PT32_LEVEL_BITS | 50 | #define PT_LEVEL_BITS PT32_LEVEL_BITS |
53 | #define PT_MAX_FULL_LEVELS 2 | 51 | #define PT_MAX_FULL_LEVELS 2 |
54 | #define CMPXCHG cmpxchg | 52 | #define CMPXCHG cmpxchg |
@@ -327,7 +325,7 @@ no_present: | |||
327 | } | 325 | } |
328 | 326 | ||
329 | static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, | 327 | static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, |
330 | u64 *spte, const void *pte) | 328 | u64 *spte, const void *pte, unsigned long mmu_seq) |
331 | { | 329 | { |
332 | pt_element_t gpte; | 330 | pt_element_t gpte; |
333 | unsigned pte_access; | 331 | unsigned pte_access; |
@@ -339,16 +337,16 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, | |||
339 | 337 | ||
340 | pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte); | 338 | pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte); |
341 | pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte); | 339 | pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte); |
342 | if (gpte_to_gfn(gpte) != vcpu->arch.update_pte.gfn) | 340 | pfn = gfn_to_pfn_atomic(vcpu->kvm, gpte_to_gfn(gpte)); |
341 | if (is_error_pfn(pfn)) { | ||
342 | kvm_release_pfn_clean(pfn); | ||
343 | return; | 343 | return; |
344 | pfn = vcpu->arch.update_pte.pfn; | 344 | } |
345 | if (is_error_pfn(pfn)) | 345 | if (mmu_notifier_retry(vcpu, mmu_seq)) |
346 | return; | ||
347 | if (mmu_notifier_retry(vcpu, vcpu->arch.update_pte.mmu_seq)) | ||
348 | return; | 346 | return; |
349 | kvm_get_pfn(pfn); | 347 | |
350 | /* | 348 | /* |
351 | * we call mmu_set_spte() with host_writable = true beacuse that | 349 | * we call mmu_set_spte() with host_writable = true because that |
352 | * vcpu->arch.update_pte.pfn was fetched from get_user_pages(write = 1). | 350 | * vcpu->arch.update_pte.pfn was fetched from get_user_pages(write = 1). |
353 | */ | 351 | */ |
354 | mmu_set_spte(vcpu, spte, sp->role.access, pte_access, 0, 0, | 352 | mmu_set_spte(vcpu, spte, sp->role.access, pte_access, 0, 0, |
@@ -829,7 +827,6 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | |||
829 | #undef FNAME | 827 | #undef FNAME |
830 | #undef PT_BASE_ADDR_MASK | 828 | #undef PT_BASE_ADDR_MASK |
831 | #undef PT_INDEX | 829 | #undef PT_INDEX |
832 | #undef PT_LEVEL_MASK | ||
833 | #undef PT_LVL_ADDR_MASK | 830 | #undef PT_LVL_ADDR_MASK |
834 | #undef PT_LVL_OFFSET_MASK | 831 | #undef PT_LVL_OFFSET_MASK |
835 | #undef PT_LEVEL_BITS | 832 | #undef PT_LEVEL_BITS |
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 63fec1531e89..6bb15d583e47 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c | |||
@@ -135,6 +135,8 @@ struct vcpu_svm { | |||
135 | 135 | ||
136 | u32 *msrpm; | 136 | u32 *msrpm; |
137 | 137 | ||
138 | ulong nmi_iret_rip; | ||
139 | |||
138 | struct nested_state nested; | 140 | struct nested_state nested; |
139 | 141 | ||
140 | bool nmi_singlestep; | 142 | bool nmi_singlestep; |
@@ -1153,8 +1155,10 @@ static void svm_vcpu_put(struct kvm_vcpu *vcpu) | |||
1153 | wrmsrl(MSR_KERNEL_GS_BASE, current->thread.gs); | 1155 | wrmsrl(MSR_KERNEL_GS_BASE, current->thread.gs); |
1154 | load_gs_index(svm->host.gs); | 1156 | load_gs_index(svm->host.gs); |
1155 | #else | 1157 | #else |
1158 | #ifdef CONFIG_X86_32_LAZY_GS | ||
1156 | loadsegment(gs, svm->host.gs); | 1159 | loadsegment(gs, svm->host.gs); |
1157 | #endif | 1160 | #endif |
1161 | #endif | ||
1158 | for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++) | 1162 | for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++) |
1159 | wrmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]); | 1163 | wrmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]); |
1160 | } | 1164 | } |
@@ -2653,6 +2657,7 @@ static int iret_interception(struct vcpu_svm *svm) | |||
2653 | ++svm->vcpu.stat.nmi_window_exits; | 2657 | ++svm->vcpu.stat.nmi_window_exits; |
2654 | clr_intercept(svm, INTERCEPT_IRET); | 2658 | clr_intercept(svm, INTERCEPT_IRET); |
2655 | svm->vcpu.arch.hflags |= HF_IRET_MASK; | 2659 | svm->vcpu.arch.hflags |= HF_IRET_MASK; |
2660 | svm->nmi_iret_rip = kvm_rip_read(&svm->vcpu); | ||
2656 | return 1; | 2661 | return 1; |
2657 | } | 2662 | } |
2658 | 2663 | ||
@@ -3474,7 +3479,12 @@ static void svm_complete_interrupts(struct vcpu_svm *svm) | |||
3474 | 3479 | ||
3475 | svm->int3_injected = 0; | 3480 | svm->int3_injected = 0; |
3476 | 3481 | ||
3477 | if (svm->vcpu.arch.hflags & HF_IRET_MASK) { | 3482 | /* |
3483 | * If we've made progress since setting HF_IRET_MASK, we've | ||
3484 | * executed an IRET and can allow NMI injection. | ||
3485 | */ | ||
3486 | if ((svm->vcpu.arch.hflags & HF_IRET_MASK) | ||
3487 | && kvm_rip_read(&svm->vcpu) != svm->nmi_iret_rip) { | ||
3478 | svm->vcpu.arch.hflags &= ~(HF_NMI_MASK | HF_IRET_MASK); | 3488 | svm->vcpu.arch.hflags &= ~(HF_NMI_MASK | HF_IRET_MASK); |
3479 | kvm_make_request(KVM_REQ_EVENT, &svm->vcpu); | 3489 | kvm_make_request(KVM_REQ_EVENT, &svm->vcpu); |
3480 | } | 3490 | } |
@@ -3641,19 +3651,30 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) | |||
3641 | wrmsrl(MSR_GS_BASE, svm->host.gs_base); | 3651 | wrmsrl(MSR_GS_BASE, svm->host.gs_base); |
3642 | #else | 3652 | #else |
3643 | loadsegment(fs, svm->host.fs); | 3653 | loadsegment(fs, svm->host.fs); |
3654 | #ifndef CONFIG_X86_32_LAZY_GS | ||
3655 | loadsegment(gs, svm->host.gs); | ||
3656 | #endif | ||
3644 | #endif | 3657 | #endif |
3645 | 3658 | ||
3646 | reload_tss(vcpu); | 3659 | reload_tss(vcpu); |
3647 | 3660 | ||
3648 | local_irq_disable(); | 3661 | local_irq_disable(); |
3649 | 3662 | ||
3650 | stgi(); | ||
3651 | |||
3652 | vcpu->arch.cr2 = svm->vmcb->save.cr2; | 3663 | vcpu->arch.cr2 = svm->vmcb->save.cr2; |
3653 | vcpu->arch.regs[VCPU_REGS_RAX] = svm->vmcb->save.rax; | 3664 | vcpu->arch.regs[VCPU_REGS_RAX] = svm->vmcb->save.rax; |
3654 | vcpu->arch.regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp; | 3665 | vcpu->arch.regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp; |
3655 | vcpu->arch.regs[VCPU_REGS_RIP] = svm->vmcb->save.rip; | 3666 | vcpu->arch.regs[VCPU_REGS_RIP] = svm->vmcb->save.rip; |
3656 | 3667 | ||
3668 | if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI)) | ||
3669 | kvm_before_handle_nmi(&svm->vcpu); | ||
3670 | |||
3671 | stgi(); | ||
3672 | |||
3673 | /* Any pending NMI will happen here */ | ||
3674 | |||
3675 | if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI)) | ||
3676 | kvm_after_handle_nmi(&svm->vcpu); | ||
3677 | |||
3657 | sync_cr8_to_lapic(vcpu); | 3678 | sync_cr8_to_lapic(vcpu); |
3658 | 3679 | ||
3659 | svm->next_rip = 0; | 3680 | svm->next_rip = 0; |
diff --git a/arch/x86/kvm/timer.c b/arch/x86/kvm/timer.c index fc7a101c4a35..abd86e865be3 100644 --- a/arch/x86/kvm/timer.c +++ b/arch/x86/kvm/timer.c | |||
@@ -25,7 +25,7 @@ static int __kvm_timer_fn(struct kvm_vcpu *vcpu, struct kvm_timer *ktimer) | |||
25 | 25 | ||
26 | /* | 26 | /* |
27 | * There is a race window between reading and incrementing, but we do | 27 | * There is a race window between reading and incrementing, but we do |
28 | * not care about potentially loosing timer events in the !reinject | 28 | * not care about potentially losing timer events in the !reinject |
29 | * case anyway. Note: KVM_REQ_PENDING_TIMER is implicitly checked | 29 | * case anyway. Note: KVM_REQ_PENDING_TIMER is implicitly checked |
30 | * in vcpu_enter_guest. | 30 | * in vcpu_enter_guest. |
31 | */ | 31 | */ |
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index 1357d7cf4ec8..db932760ea82 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h | |||
@@ -62,21 +62,21 @@ TRACE_EVENT(kvm_hv_hypercall, | |||
62 | TP_ARGS(code, fast, rep_cnt, rep_idx, ingpa, outgpa), | 62 | TP_ARGS(code, fast, rep_cnt, rep_idx, ingpa, outgpa), |
63 | 63 | ||
64 | TP_STRUCT__entry( | 64 | TP_STRUCT__entry( |
65 | __field( __u16, code ) | ||
66 | __field( bool, fast ) | ||
67 | __field( __u16, rep_cnt ) | 65 | __field( __u16, rep_cnt ) |
68 | __field( __u16, rep_idx ) | 66 | __field( __u16, rep_idx ) |
69 | __field( __u64, ingpa ) | 67 | __field( __u64, ingpa ) |
70 | __field( __u64, outgpa ) | 68 | __field( __u64, outgpa ) |
69 | __field( __u16, code ) | ||
70 | __field( bool, fast ) | ||
71 | ), | 71 | ), |
72 | 72 | ||
73 | TP_fast_assign( | 73 | TP_fast_assign( |
74 | __entry->code = code; | ||
75 | __entry->fast = fast; | ||
76 | __entry->rep_cnt = rep_cnt; | 74 | __entry->rep_cnt = rep_cnt; |
77 | __entry->rep_idx = rep_idx; | 75 | __entry->rep_idx = rep_idx; |
78 | __entry->ingpa = ingpa; | 76 | __entry->ingpa = ingpa; |
79 | __entry->outgpa = outgpa; | 77 | __entry->outgpa = outgpa; |
78 | __entry->code = code; | ||
79 | __entry->fast = fast; | ||
80 | ), | 80 | ), |
81 | 81 | ||
82 | TP_printk("code 0x%x %s cnt 0x%x idx 0x%x in 0x%llx out 0x%llx", | 82 | TP_printk("code 0x%x %s cnt 0x%x idx 0x%x in 0x%llx out 0x%llx", |
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index bf89ec2cfb82..5b4cdcbd154c 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c | |||
@@ -93,14 +93,14 @@ module_param(yield_on_hlt, bool, S_IRUGO); | |||
93 | * These 2 parameters are used to config the controls for Pause-Loop Exiting: | 93 | * These 2 parameters are used to config the controls for Pause-Loop Exiting: |
94 | * ple_gap: upper bound on the amount of time between two successive | 94 | * ple_gap: upper bound on the amount of time between two successive |
95 | * executions of PAUSE in a loop. Also indicate if ple enabled. | 95 | * executions of PAUSE in a loop. Also indicate if ple enabled. |
96 | * According to test, this time is usually small than 41 cycles. | 96 | * According to test, this time is usually smaller than 128 cycles. |
97 | * ple_window: upper bound on the amount of time a guest is allowed to execute | 97 | * ple_window: upper bound on the amount of time a guest is allowed to execute |
98 | * in a PAUSE loop. Tests indicate that most spinlocks are held for | 98 | * in a PAUSE loop. Tests indicate that most spinlocks are held for |
99 | * less than 2^12 cycles | 99 | * less than 2^12 cycles |
100 | * Time is measured based on a counter that runs at the same rate as the TSC, | 100 | * Time is measured based on a counter that runs at the same rate as the TSC, |
101 | * refer SDM volume 3b section 21.6.13 & 22.1.3. | 101 | * refer SDM volume 3b section 21.6.13 & 22.1.3. |
102 | */ | 102 | */ |
103 | #define KVM_VMX_DEFAULT_PLE_GAP 41 | 103 | #define KVM_VMX_DEFAULT_PLE_GAP 128 |
104 | #define KVM_VMX_DEFAULT_PLE_WINDOW 4096 | 104 | #define KVM_VMX_DEFAULT_PLE_WINDOW 4096 |
105 | static int ple_gap = KVM_VMX_DEFAULT_PLE_GAP; | 105 | static int ple_gap = KVM_VMX_DEFAULT_PLE_GAP; |
106 | module_param(ple_gap, int, S_IRUGO); | 106 | module_param(ple_gap, int, S_IRUGO); |
@@ -176,11 +176,11 @@ static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu) | |||
176 | return container_of(vcpu, struct vcpu_vmx, vcpu); | 176 | return container_of(vcpu, struct vcpu_vmx, vcpu); |
177 | } | 177 | } |
178 | 178 | ||
179 | static int init_rmode(struct kvm *kvm); | ||
180 | static u64 construct_eptp(unsigned long root_hpa); | 179 | static u64 construct_eptp(unsigned long root_hpa); |
181 | static void kvm_cpu_vmxon(u64 addr); | 180 | static void kvm_cpu_vmxon(u64 addr); |
182 | static void kvm_cpu_vmxoff(void); | 181 | static void kvm_cpu_vmxoff(void); |
183 | static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3); | 182 | static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3); |
183 | static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr); | ||
184 | 184 | ||
185 | static DEFINE_PER_CPU(struct vmcs *, vmxarea); | 185 | static DEFINE_PER_CPU(struct vmcs *, vmxarea); |
186 | static DEFINE_PER_CPU(struct vmcs *, current_vmcs); | 186 | static DEFINE_PER_CPU(struct vmcs *, current_vmcs); |
@@ -1333,19 +1333,25 @@ static __init int vmx_disabled_by_bios(void) | |||
1333 | 1333 | ||
1334 | rdmsrl(MSR_IA32_FEATURE_CONTROL, msr); | 1334 | rdmsrl(MSR_IA32_FEATURE_CONTROL, msr); |
1335 | if (msr & FEATURE_CONTROL_LOCKED) { | 1335 | if (msr & FEATURE_CONTROL_LOCKED) { |
1336 | /* launched w/ TXT and VMX disabled */ | ||
1336 | if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX) | 1337 | if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX) |
1337 | && tboot_enabled()) | 1338 | && tboot_enabled()) |
1338 | return 1; | 1339 | return 1; |
1340 | /* launched w/o TXT and VMX only enabled w/ TXT */ | ||
1339 | if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX) | 1341 | if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX) |
1342 | && (msr & FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX) | ||
1340 | && !tboot_enabled()) { | 1343 | && !tboot_enabled()) { |
1341 | printk(KERN_WARNING "kvm: disable TXT in the BIOS or " | 1344 | printk(KERN_WARNING "kvm: disable TXT in the BIOS or " |
1342 | " activate TXT before enabling KVM\n"); | 1345 | "activate TXT before enabling KVM\n"); |
1343 | return 1; | 1346 | return 1; |
1344 | } | 1347 | } |
1348 | /* launched w/o TXT and VMX disabled */ | ||
1349 | if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX) | ||
1350 | && !tboot_enabled()) | ||
1351 | return 1; | ||
1345 | } | 1352 | } |
1346 | 1353 | ||
1347 | return 0; | 1354 | return 0; |
1348 | /* locked but not enabled */ | ||
1349 | } | 1355 | } |
1350 | 1356 | ||
1351 | static void kvm_cpu_vmxon(u64 addr) | 1357 | static void kvm_cpu_vmxon(u64 addr) |
@@ -1683,6 +1689,7 @@ static void enter_pmode(struct kvm_vcpu *vcpu) | |||
1683 | vmx->emulation_required = 1; | 1689 | vmx->emulation_required = 1; |
1684 | vmx->rmode.vm86_active = 0; | 1690 | vmx->rmode.vm86_active = 0; |
1685 | 1691 | ||
1692 | vmcs_write16(GUEST_TR_SELECTOR, vmx->rmode.tr.selector); | ||
1686 | vmcs_writel(GUEST_TR_BASE, vmx->rmode.tr.base); | 1693 | vmcs_writel(GUEST_TR_BASE, vmx->rmode.tr.base); |
1687 | vmcs_write32(GUEST_TR_LIMIT, vmx->rmode.tr.limit); | 1694 | vmcs_write32(GUEST_TR_LIMIT, vmx->rmode.tr.limit); |
1688 | vmcs_write32(GUEST_TR_AR_BYTES, vmx->rmode.tr.ar); | 1695 | vmcs_write32(GUEST_TR_AR_BYTES, vmx->rmode.tr.ar); |
@@ -1756,6 +1763,19 @@ static void enter_rmode(struct kvm_vcpu *vcpu) | |||
1756 | vmx->emulation_required = 1; | 1763 | vmx->emulation_required = 1; |
1757 | vmx->rmode.vm86_active = 1; | 1764 | vmx->rmode.vm86_active = 1; |
1758 | 1765 | ||
1766 | /* | ||
1767 | * Very old userspace does not call KVM_SET_TSS_ADDR before entering | ||
1768 | * vcpu. Call it here with phys address pointing 16M below 4G. | ||
1769 | */ | ||
1770 | if (!vcpu->kvm->arch.tss_addr) { | ||
1771 | printk_once(KERN_WARNING "kvm: KVM_SET_TSS_ADDR need to be " | ||
1772 | "called before entering vcpu\n"); | ||
1773 | srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); | ||
1774 | vmx_set_tss_addr(vcpu->kvm, 0xfeffd000); | ||
1775 | vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); | ||
1776 | } | ||
1777 | |||
1778 | vmx->rmode.tr.selector = vmcs_read16(GUEST_TR_SELECTOR); | ||
1759 | vmx->rmode.tr.base = vmcs_readl(GUEST_TR_BASE); | 1779 | vmx->rmode.tr.base = vmcs_readl(GUEST_TR_BASE); |
1760 | vmcs_writel(GUEST_TR_BASE, rmode_tss_base(vcpu->kvm)); | 1780 | vmcs_writel(GUEST_TR_BASE, rmode_tss_base(vcpu->kvm)); |
1761 | 1781 | ||
@@ -1794,7 +1814,6 @@ static void enter_rmode(struct kvm_vcpu *vcpu) | |||
1794 | 1814 | ||
1795 | continue_rmode: | 1815 | continue_rmode: |
1796 | kvm_mmu_reset_context(vcpu); | 1816 | kvm_mmu_reset_context(vcpu); |
1797 | init_rmode(vcpu->kvm); | ||
1798 | } | 1817 | } |
1799 | 1818 | ||
1800 | static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer) | 1819 | static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer) |
@@ -2030,23 +2049,40 @@ static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) | |||
2030 | vmcs_writel(GUEST_CR4, hw_cr4); | 2049 | vmcs_writel(GUEST_CR4, hw_cr4); |
2031 | } | 2050 | } |
2032 | 2051 | ||
2033 | static u64 vmx_get_segment_base(struct kvm_vcpu *vcpu, int seg) | ||
2034 | { | ||
2035 | struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; | ||
2036 | |||
2037 | return vmcs_readl(sf->base); | ||
2038 | } | ||
2039 | |||
2040 | static void vmx_get_segment(struct kvm_vcpu *vcpu, | 2052 | static void vmx_get_segment(struct kvm_vcpu *vcpu, |
2041 | struct kvm_segment *var, int seg) | 2053 | struct kvm_segment *var, int seg) |
2042 | { | 2054 | { |
2055 | struct vcpu_vmx *vmx = to_vmx(vcpu); | ||
2043 | struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; | 2056 | struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; |
2057 | struct kvm_save_segment *save; | ||
2044 | u32 ar; | 2058 | u32 ar; |
2045 | 2059 | ||
2060 | if (vmx->rmode.vm86_active | ||
2061 | && (seg == VCPU_SREG_TR || seg == VCPU_SREG_ES | ||
2062 | || seg == VCPU_SREG_DS || seg == VCPU_SREG_FS | ||
2063 | || seg == VCPU_SREG_GS) | ||
2064 | && !emulate_invalid_guest_state) { | ||
2065 | switch (seg) { | ||
2066 | case VCPU_SREG_TR: save = &vmx->rmode.tr; break; | ||
2067 | case VCPU_SREG_ES: save = &vmx->rmode.es; break; | ||
2068 | case VCPU_SREG_DS: save = &vmx->rmode.ds; break; | ||
2069 | case VCPU_SREG_FS: save = &vmx->rmode.fs; break; | ||
2070 | case VCPU_SREG_GS: save = &vmx->rmode.gs; break; | ||
2071 | default: BUG(); | ||
2072 | } | ||
2073 | var->selector = save->selector; | ||
2074 | var->base = save->base; | ||
2075 | var->limit = save->limit; | ||
2076 | ar = save->ar; | ||
2077 | if (seg == VCPU_SREG_TR | ||
2078 | || var->selector == vmcs_read16(sf->selector)) | ||
2079 | goto use_saved_rmode_seg; | ||
2080 | } | ||
2046 | var->base = vmcs_readl(sf->base); | 2081 | var->base = vmcs_readl(sf->base); |
2047 | var->limit = vmcs_read32(sf->limit); | 2082 | var->limit = vmcs_read32(sf->limit); |
2048 | var->selector = vmcs_read16(sf->selector); | 2083 | var->selector = vmcs_read16(sf->selector); |
2049 | ar = vmcs_read32(sf->ar_bytes); | 2084 | ar = vmcs_read32(sf->ar_bytes); |
2085 | use_saved_rmode_seg: | ||
2050 | if ((ar & AR_UNUSABLE_MASK) && !emulate_invalid_guest_state) | 2086 | if ((ar & AR_UNUSABLE_MASK) && !emulate_invalid_guest_state) |
2051 | ar = 0; | 2087 | ar = 0; |
2052 | var->type = ar & 15; | 2088 | var->type = ar & 15; |
@@ -2060,6 +2096,18 @@ static void vmx_get_segment(struct kvm_vcpu *vcpu, | |||
2060 | var->unusable = (ar >> 16) & 1; | 2096 | var->unusable = (ar >> 16) & 1; |
2061 | } | 2097 | } |
2062 | 2098 | ||
2099 | static u64 vmx_get_segment_base(struct kvm_vcpu *vcpu, int seg) | ||
2100 | { | ||
2101 | struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; | ||
2102 | struct kvm_segment s; | ||
2103 | |||
2104 | if (to_vmx(vcpu)->rmode.vm86_active) { | ||
2105 | vmx_get_segment(vcpu, &s, seg); | ||
2106 | return s.base; | ||
2107 | } | ||
2108 | return vmcs_readl(sf->base); | ||
2109 | } | ||
2110 | |||
2063 | static int vmx_get_cpl(struct kvm_vcpu *vcpu) | 2111 | static int vmx_get_cpl(struct kvm_vcpu *vcpu) |
2064 | { | 2112 | { |
2065 | if (!is_protmode(vcpu)) | 2113 | if (!is_protmode(vcpu)) |
@@ -2101,6 +2149,7 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu, | |||
2101 | u32 ar; | 2149 | u32 ar; |
2102 | 2150 | ||
2103 | if (vmx->rmode.vm86_active && seg == VCPU_SREG_TR) { | 2151 | if (vmx->rmode.vm86_active && seg == VCPU_SREG_TR) { |
2152 | vmcs_write16(sf->selector, var->selector); | ||
2104 | vmx->rmode.tr.selector = var->selector; | 2153 | vmx->rmode.tr.selector = var->selector; |
2105 | vmx->rmode.tr.base = var->base; | 2154 | vmx->rmode.tr.base = var->base; |
2106 | vmx->rmode.tr.limit = var->limit; | 2155 | vmx->rmode.tr.limit = var->limit; |
@@ -2361,11 +2410,12 @@ static bool guest_state_valid(struct kvm_vcpu *vcpu) | |||
2361 | 2410 | ||
2362 | static int init_rmode_tss(struct kvm *kvm) | 2411 | static int init_rmode_tss(struct kvm *kvm) |
2363 | { | 2412 | { |
2364 | gfn_t fn = rmode_tss_base(kvm) >> PAGE_SHIFT; | 2413 | gfn_t fn; |
2365 | u16 data = 0; | 2414 | u16 data = 0; |
2366 | int ret = 0; | 2415 | int r, idx, ret = 0; |
2367 | int r; | ||
2368 | 2416 | ||
2417 | idx = srcu_read_lock(&kvm->srcu); | ||
2418 | fn = rmode_tss_base(kvm) >> PAGE_SHIFT; | ||
2369 | r = kvm_clear_guest_page(kvm, fn, 0, PAGE_SIZE); | 2419 | r = kvm_clear_guest_page(kvm, fn, 0, PAGE_SIZE); |
2370 | if (r < 0) | 2420 | if (r < 0) |
2371 | goto out; | 2421 | goto out; |
@@ -2389,12 +2439,13 @@ static int init_rmode_tss(struct kvm *kvm) | |||
2389 | 2439 | ||
2390 | ret = 1; | 2440 | ret = 1; |
2391 | out: | 2441 | out: |
2442 | srcu_read_unlock(&kvm->srcu, idx); | ||
2392 | return ret; | 2443 | return ret; |
2393 | } | 2444 | } |
2394 | 2445 | ||
2395 | static int init_rmode_identity_map(struct kvm *kvm) | 2446 | static int init_rmode_identity_map(struct kvm *kvm) |
2396 | { | 2447 | { |
2397 | int i, r, ret; | 2448 | int i, idx, r, ret; |
2398 | pfn_t identity_map_pfn; | 2449 | pfn_t identity_map_pfn; |
2399 | u32 tmp; | 2450 | u32 tmp; |
2400 | 2451 | ||
@@ -2409,6 +2460,7 @@ static int init_rmode_identity_map(struct kvm *kvm) | |||
2409 | return 1; | 2460 | return 1; |
2410 | ret = 0; | 2461 | ret = 0; |
2411 | identity_map_pfn = kvm->arch.ept_identity_map_addr >> PAGE_SHIFT; | 2462 | identity_map_pfn = kvm->arch.ept_identity_map_addr >> PAGE_SHIFT; |
2463 | idx = srcu_read_lock(&kvm->srcu); | ||
2412 | r = kvm_clear_guest_page(kvm, identity_map_pfn, 0, PAGE_SIZE); | 2464 | r = kvm_clear_guest_page(kvm, identity_map_pfn, 0, PAGE_SIZE); |
2413 | if (r < 0) | 2465 | if (r < 0) |
2414 | goto out; | 2466 | goto out; |
@@ -2424,6 +2476,7 @@ static int init_rmode_identity_map(struct kvm *kvm) | |||
2424 | kvm->arch.ept_identity_pagetable_done = true; | 2476 | kvm->arch.ept_identity_pagetable_done = true; |
2425 | ret = 1; | 2477 | ret = 1; |
2426 | out: | 2478 | out: |
2479 | srcu_read_unlock(&kvm->srcu, idx); | ||
2427 | return ret; | 2480 | return ret; |
2428 | } | 2481 | } |
2429 | 2482 | ||
@@ -2699,22 +2752,6 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) | |||
2699 | return 0; | 2752 | return 0; |
2700 | } | 2753 | } |
2701 | 2754 | ||
2702 | static int init_rmode(struct kvm *kvm) | ||
2703 | { | ||
2704 | int idx, ret = 0; | ||
2705 | |||
2706 | idx = srcu_read_lock(&kvm->srcu); | ||
2707 | if (!init_rmode_tss(kvm)) | ||
2708 | goto exit; | ||
2709 | if (!init_rmode_identity_map(kvm)) | ||
2710 | goto exit; | ||
2711 | |||
2712 | ret = 1; | ||
2713 | exit: | ||
2714 | srcu_read_unlock(&kvm->srcu, idx); | ||
2715 | return ret; | ||
2716 | } | ||
2717 | |||
2718 | static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) | 2755 | static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) |
2719 | { | 2756 | { |
2720 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 2757 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
@@ -2722,10 +2759,6 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) | |||
2722 | int ret; | 2759 | int ret; |
2723 | 2760 | ||
2724 | vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP)); | 2761 | vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP)); |
2725 | if (!init_rmode(vmx->vcpu.kvm)) { | ||
2726 | ret = -ENOMEM; | ||
2727 | goto out; | ||
2728 | } | ||
2729 | 2762 | ||
2730 | vmx->rmode.vm86_active = 0; | 2763 | vmx->rmode.vm86_active = 0; |
2731 | 2764 | ||
@@ -2805,7 +2838,7 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) | |||
2805 | vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, 0); | 2838 | vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, 0); |
2806 | if (vm_need_tpr_shadow(vmx->vcpu.kvm)) | 2839 | if (vm_need_tpr_shadow(vmx->vcpu.kvm)) |
2807 | vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, | 2840 | vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, |
2808 | page_to_phys(vmx->vcpu.arch.apic->regs_page)); | 2841 | __pa(vmx->vcpu.arch.apic->regs)); |
2809 | vmcs_write32(TPR_THRESHOLD, 0); | 2842 | vmcs_write32(TPR_THRESHOLD, 0); |
2810 | } | 2843 | } |
2811 | 2844 | ||
@@ -2971,6 +3004,9 @@ static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr) | |||
2971 | if (ret) | 3004 | if (ret) |
2972 | return ret; | 3005 | return ret; |
2973 | kvm->arch.tss_addr = addr; | 3006 | kvm->arch.tss_addr = addr; |
3007 | if (!init_rmode_tss(kvm)) | ||
3008 | return -ENOMEM; | ||
3009 | |||
2974 | return 0; | 3010 | return 0; |
2975 | } | 3011 | } |
2976 | 3012 | ||
@@ -3962,7 +3998,7 @@ static void vmx_cancel_injection(struct kvm_vcpu *vcpu) | |||
3962 | #define Q "l" | 3998 | #define Q "l" |
3963 | #endif | 3999 | #endif |
3964 | 4000 | ||
3965 | static void vmx_vcpu_run(struct kvm_vcpu *vcpu) | 4001 | static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) |
3966 | { | 4002 | { |
3967 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 4003 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
3968 | 4004 | ||
@@ -3991,6 +4027,7 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu) | |||
3991 | asm( | 4027 | asm( |
3992 | /* Store host registers */ | 4028 | /* Store host registers */ |
3993 | "push %%"R"dx; push %%"R"bp;" | 4029 | "push %%"R"dx; push %%"R"bp;" |
4030 | "push %%"R"cx \n\t" /* placeholder for guest rcx */ | ||
3994 | "push %%"R"cx \n\t" | 4031 | "push %%"R"cx \n\t" |
3995 | "cmp %%"R"sp, %c[host_rsp](%0) \n\t" | 4032 | "cmp %%"R"sp, %c[host_rsp](%0) \n\t" |
3996 | "je 1f \n\t" | 4033 | "je 1f \n\t" |
@@ -4032,10 +4069,11 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu) | |||
4032 | ".Llaunched: " __ex(ASM_VMX_VMRESUME) "\n\t" | 4069 | ".Llaunched: " __ex(ASM_VMX_VMRESUME) "\n\t" |
4033 | ".Lkvm_vmx_return: " | 4070 | ".Lkvm_vmx_return: " |
4034 | /* Save guest registers, load host registers, keep flags */ | 4071 | /* Save guest registers, load host registers, keep flags */ |
4035 | "xchg %0, (%%"R"sp) \n\t" | 4072 | "mov %0, %c[wordsize](%%"R"sp) \n\t" |
4073 | "pop %0 \n\t" | ||
4036 | "mov %%"R"ax, %c[rax](%0) \n\t" | 4074 | "mov %%"R"ax, %c[rax](%0) \n\t" |
4037 | "mov %%"R"bx, %c[rbx](%0) \n\t" | 4075 | "mov %%"R"bx, %c[rbx](%0) \n\t" |
4038 | "push"Q" (%%"R"sp); pop"Q" %c[rcx](%0) \n\t" | 4076 | "pop"Q" %c[rcx](%0) \n\t" |
4039 | "mov %%"R"dx, %c[rdx](%0) \n\t" | 4077 | "mov %%"R"dx, %c[rdx](%0) \n\t" |
4040 | "mov %%"R"si, %c[rsi](%0) \n\t" | 4078 | "mov %%"R"si, %c[rsi](%0) \n\t" |
4041 | "mov %%"R"di, %c[rdi](%0) \n\t" | 4079 | "mov %%"R"di, %c[rdi](%0) \n\t" |
@@ -4053,7 +4091,7 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu) | |||
4053 | "mov %%cr2, %%"R"ax \n\t" | 4091 | "mov %%cr2, %%"R"ax \n\t" |
4054 | "mov %%"R"ax, %c[cr2](%0) \n\t" | 4092 | "mov %%"R"ax, %c[cr2](%0) \n\t" |
4055 | 4093 | ||
4056 | "pop %%"R"bp; pop %%"R"bp; pop %%"R"dx \n\t" | 4094 | "pop %%"R"bp; pop %%"R"dx \n\t" |
4057 | "setbe %c[fail](%0) \n\t" | 4095 | "setbe %c[fail](%0) \n\t" |
4058 | : : "c"(vmx), "d"((unsigned long)HOST_RSP), | 4096 | : : "c"(vmx), "d"((unsigned long)HOST_RSP), |
4059 | [launched]"i"(offsetof(struct vcpu_vmx, launched)), | 4097 | [launched]"i"(offsetof(struct vcpu_vmx, launched)), |
@@ -4076,7 +4114,8 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu) | |||
4076 | [r14]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R14])), | 4114 | [r14]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R14])), |
4077 | [r15]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R15])), | 4115 | [r15]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R15])), |
4078 | #endif | 4116 | #endif |
4079 | [cr2]"i"(offsetof(struct vcpu_vmx, vcpu.arch.cr2)) | 4117 | [cr2]"i"(offsetof(struct vcpu_vmx, vcpu.arch.cr2)), |
4118 | [wordsize]"i"(sizeof(ulong)) | ||
4080 | : "cc", "memory" | 4119 | : "cc", "memory" |
4081 | , R"ax", R"bx", R"di", R"si" | 4120 | , R"ax", R"bx", R"di", R"si" |
4082 | #ifdef CONFIG_X86_64 | 4121 | #ifdef CONFIG_X86_64 |
@@ -4183,8 +4222,11 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) | |||
4183 | if (!kvm->arch.ept_identity_map_addr) | 4222 | if (!kvm->arch.ept_identity_map_addr) |
4184 | kvm->arch.ept_identity_map_addr = | 4223 | kvm->arch.ept_identity_map_addr = |
4185 | VMX_EPT_IDENTITY_PAGETABLE_ADDR; | 4224 | VMX_EPT_IDENTITY_PAGETABLE_ADDR; |
4225 | err = -ENOMEM; | ||
4186 | if (alloc_identity_pagetable(kvm) != 0) | 4226 | if (alloc_identity_pagetable(kvm) != 0) |
4187 | goto free_vmcs; | 4227 | goto free_vmcs; |
4228 | if (!init_rmode_identity_map(kvm)) | ||
4229 | goto free_vmcs; | ||
4188 | } | 4230 | } |
4189 | 4231 | ||
4190 | return &vmx->vcpu; | 4232 | return &vmx->vcpu; |
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index bcc0efce85bf..58f517b59645 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
@@ -81,9 +81,10 @@ | |||
81 | * - enable LME and LMA per default on 64 bit KVM | 81 | * - enable LME and LMA per default on 64 bit KVM |
82 | */ | 82 | */ |
83 | #ifdef CONFIG_X86_64 | 83 | #ifdef CONFIG_X86_64 |
84 | static u64 __read_mostly efer_reserved_bits = 0xfffffffffffffafeULL; | 84 | static |
85 | u64 __read_mostly efer_reserved_bits = ~((u64)(EFER_SCE | EFER_LME | EFER_LMA)); | ||
85 | #else | 86 | #else |
86 | static u64 __read_mostly efer_reserved_bits = 0xfffffffffffffffeULL; | 87 | static u64 __read_mostly efer_reserved_bits = ~((u64)EFER_SCE); |
87 | #endif | 88 | #endif |
88 | 89 | ||
89 | #define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM | 90 | #define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM |
@@ -360,8 +361,8 @@ void kvm_propagate_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault) | |||
360 | 361 | ||
361 | void kvm_inject_nmi(struct kvm_vcpu *vcpu) | 362 | void kvm_inject_nmi(struct kvm_vcpu *vcpu) |
362 | { | 363 | { |
364 | kvm_make_request(KVM_REQ_NMI, vcpu); | ||
363 | kvm_make_request(KVM_REQ_EVENT, vcpu); | 365 | kvm_make_request(KVM_REQ_EVENT, vcpu); |
364 | vcpu->arch.nmi_pending = 1; | ||
365 | } | 366 | } |
366 | EXPORT_SYMBOL_GPL(kvm_inject_nmi); | 367 | EXPORT_SYMBOL_GPL(kvm_inject_nmi); |
367 | 368 | ||
@@ -525,8 +526,10 @@ int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) | |||
525 | 526 | ||
526 | kvm_x86_ops->set_cr0(vcpu, cr0); | 527 | kvm_x86_ops->set_cr0(vcpu, cr0); |
527 | 528 | ||
528 | if ((cr0 ^ old_cr0) & X86_CR0_PG) | 529 | if ((cr0 ^ old_cr0) & X86_CR0_PG) { |
529 | kvm_clear_async_pf_completion_queue(vcpu); | 530 | kvm_clear_async_pf_completion_queue(vcpu); |
531 | kvm_async_pf_hash_reset(vcpu); | ||
532 | } | ||
530 | 533 | ||
531 | if ((cr0 ^ old_cr0) & update_bits) | 534 | if ((cr0 ^ old_cr0) & update_bits) |
532 | kvm_mmu_reset_context(vcpu); | 535 | kvm_mmu_reset_context(vcpu); |
@@ -1017,7 +1020,7 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data) | |||
1017 | unsigned long flags; | 1020 | unsigned long flags; |
1018 | s64 sdiff; | 1021 | s64 sdiff; |
1019 | 1022 | ||
1020 | spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags); | 1023 | raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags); |
1021 | offset = data - native_read_tsc(); | 1024 | offset = data - native_read_tsc(); |
1022 | ns = get_kernel_ns(); | 1025 | ns = get_kernel_ns(); |
1023 | elapsed = ns - kvm->arch.last_tsc_nsec; | 1026 | elapsed = ns - kvm->arch.last_tsc_nsec; |
@@ -1028,7 +1031,7 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data) | |||
1028 | /* | 1031 | /* |
1029 | * Special case: close write to TSC within 5 seconds of | 1032 | * Special case: close write to TSC within 5 seconds of |
1030 | * another CPU is interpreted as an attempt to synchronize | 1033 | * another CPU is interpreted as an attempt to synchronize |
1031 | * The 5 seconds is to accomodate host load / swapping as | 1034 | * The 5 seconds is to accommodate host load / swapping as |
1032 | * well as any reset of TSC during the boot process. | 1035 | * well as any reset of TSC during the boot process. |
1033 | * | 1036 | * |
1034 | * In that case, for a reliable TSC, we can match TSC offsets, | 1037 | * In that case, for a reliable TSC, we can match TSC offsets, |
@@ -1050,7 +1053,7 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data) | |||
1050 | kvm->arch.last_tsc_write = data; | 1053 | kvm->arch.last_tsc_write = data; |
1051 | kvm->arch.last_tsc_offset = offset; | 1054 | kvm->arch.last_tsc_offset = offset; |
1052 | kvm_x86_ops->write_tsc_offset(vcpu, offset); | 1055 | kvm_x86_ops->write_tsc_offset(vcpu, offset); |
1053 | spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags); | 1056 | raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags); |
1054 | 1057 | ||
1055 | /* Reset of TSC must disable overshoot protection below */ | 1058 | /* Reset of TSC must disable overshoot protection below */ |
1056 | vcpu->arch.hv_clock.tsc_timestamp = 0; | 1059 | vcpu->arch.hv_clock.tsc_timestamp = 0; |
@@ -1453,6 +1456,14 @@ static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data) | |||
1453 | return 0; | 1456 | return 0; |
1454 | } | 1457 | } |
1455 | 1458 | ||
1459 | static void kvmclock_reset(struct kvm_vcpu *vcpu) | ||
1460 | { | ||
1461 | if (vcpu->arch.time_page) { | ||
1462 | kvm_release_page_dirty(vcpu->arch.time_page); | ||
1463 | vcpu->arch.time_page = NULL; | ||
1464 | } | ||
1465 | } | ||
1466 | |||
1456 | int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) | 1467 | int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) |
1457 | { | 1468 | { |
1458 | switch (msr) { | 1469 | switch (msr) { |
@@ -1510,10 +1521,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) | |||
1510 | break; | 1521 | break; |
1511 | case MSR_KVM_SYSTEM_TIME_NEW: | 1522 | case MSR_KVM_SYSTEM_TIME_NEW: |
1512 | case MSR_KVM_SYSTEM_TIME: { | 1523 | case MSR_KVM_SYSTEM_TIME: { |
1513 | if (vcpu->arch.time_page) { | 1524 | kvmclock_reset(vcpu); |
1514 | kvm_release_page_dirty(vcpu->arch.time_page); | ||
1515 | vcpu->arch.time_page = NULL; | ||
1516 | } | ||
1517 | 1525 | ||
1518 | vcpu->arch.time = data; | 1526 | vcpu->arch.time = data; |
1519 | kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); | 1527 | kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); |
@@ -1592,6 +1600,12 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) | |||
1592 | } else | 1600 | } else |
1593 | return set_msr_hyperv(vcpu, msr, data); | 1601 | return set_msr_hyperv(vcpu, msr, data); |
1594 | break; | 1602 | break; |
1603 | case MSR_IA32_BBL_CR_CTL3: | ||
1604 | /* Drop writes to this legacy MSR -- see rdmsr | ||
1605 | * counterpart for further detail. | ||
1606 | */ | ||
1607 | pr_unimpl(vcpu, "ignored wrmsr: 0x%x data %llx\n", msr, data); | ||
1608 | break; | ||
1595 | default: | 1609 | default: |
1596 | if (msr && (msr == vcpu->kvm->arch.xen_hvm_config.msr)) | 1610 | if (msr && (msr == vcpu->kvm->arch.xen_hvm_config.msr)) |
1597 | return xen_hvm_config(vcpu, data); | 1611 | return xen_hvm_config(vcpu, data); |
@@ -1846,6 +1860,19 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) | |||
1846 | } else | 1860 | } else |
1847 | return get_msr_hyperv(vcpu, msr, pdata); | 1861 | return get_msr_hyperv(vcpu, msr, pdata); |
1848 | break; | 1862 | break; |
1863 | case MSR_IA32_BBL_CR_CTL3: | ||
1864 | /* This legacy MSR exists but isn't fully documented in current | ||
1865 | * silicon. It is however accessed by winxp in very narrow | ||
1866 | * scenarios where it sets bit #19, itself documented as | ||
1867 | * a "reserved" bit. Best effort attempt to source coherent | ||
1868 | * read data here should the balance of the register be | ||
1869 | * interpreted by the guest: | ||
1870 | * | ||
1871 | * L2 cache control register 3: 64GB range, 256KB size, | ||
1872 | * enabled, latency 0x1, configured | ||
1873 | */ | ||
1874 | data = 0xbe702111; | ||
1875 | break; | ||
1849 | default: | 1876 | default: |
1850 | if (!ignore_msrs) { | 1877 | if (!ignore_msrs) { |
1851 | pr_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr); | 1878 | pr_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr); |
@@ -2100,8 +2127,8 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | |||
2100 | if (check_tsc_unstable()) { | 2127 | if (check_tsc_unstable()) { |
2101 | kvm_x86_ops->adjust_tsc_offset(vcpu, -tsc_delta); | 2128 | kvm_x86_ops->adjust_tsc_offset(vcpu, -tsc_delta); |
2102 | vcpu->arch.tsc_catchup = 1; | 2129 | vcpu->arch.tsc_catchup = 1; |
2103 | kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); | ||
2104 | } | 2130 | } |
2131 | kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); | ||
2105 | if (vcpu->cpu != cpu) | 2132 | if (vcpu->cpu != cpu) |
2106 | kvm_migrate_timers(vcpu); | 2133 | kvm_migrate_timers(vcpu); |
2107 | vcpu->cpu = cpu; | 2134 | vcpu->cpu = cpu; |
@@ -2575,9 +2602,6 @@ static int kvm_vcpu_ioctl_x86_set_mce(struct kvm_vcpu *vcpu, | |||
2575 | if (mce->status & MCI_STATUS_UC) { | 2602 | if (mce->status & MCI_STATUS_UC) { |
2576 | if ((vcpu->arch.mcg_status & MCG_STATUS_MCIP) || | 2603 | if ((vcpu->arch.mcg_status & MCG_STATUS_MCIP) || |
2577 | !kvm_read_cr4_bits(vcpu, X86_CR4_MCE)) { | 2604 | !kvm_read_cr4_bits(vcpu, X86_CR4_MCE)) { |
2578 | printk(KERN_DEBUG "kvm: set_mce: " | ||
2579 | "injects mce exception while " | ||
2580 | "previous one is in progress!\n"); | ||
2581 | kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); | 2605 | kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); |
2582 | return 0; | 2606 | return 0; |
2583 | } | 2607 | } |
@@ -2648,8 +2672,6 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, | |||
2648 | vcpu->arch.interrupt.pending = events->interrupt.injected; | 2672 | vcpu->arch.interrupt.pending = events->interrupt.injected; |
2649 | vcpu->arch.interrupt.nr = events->interrupt.nr; | 2673 | vcpu->arch.interrupt.nr = events->interrupt.nr; |
2650 | vcpu->arch.interrupt.soft = events->interrupt.soft; | 2674 | vcpu->arch.interrupt.soft = events->interrupt.soft; |
2651 | if (vcpu->arch.interrupt.pending && irqchip_in_kernel(vcpu->kvm)) | ||
2652 | kvm_pic_clear_isr_ack(vcpu->kvm); | ||
2653 | if (events->flags & KVM_VCPUEVENT_VALID_SHADOW) | 2675 | if (events->flags & KVM_VCPUEVENT_VALID_SHADOW) |
2654 | kvm_x86_ops->set_interrupt_shadow(vcpu, | 2676 | kvm_x86_ops->set_interrupt_shadow(vcpu, |
2655 | events->interrupt.shadow); | 2677 | events->interrupt.shadow); |
@@ -4140,8 +4162,8 @@ static unsigned long emulator_get_cached_segment_base(int seg, | |||
4140 | return get_segment_base(vcpu, seg); | 4162 | return get_segment_base(vcpu, seg); |
4141 | } | 4163 | } |
4142 | 4164 | ||
4143 | static bool emulator_get_cached_descriptor(struct desc_struct *desc, int seg, | 4165 | static bool emulator_get_cached_descriptor(struct desc_struct *desc, u32 *base3, |
4144 | struct kvm_vcpu *vcpu) | 4166 | int seg, struct kvm_vcpu *vcpu) |
4145 | { | 4167 | { |
4146 | struct kvm_segment var; | 4168 | struct kvm_segment var; |
4147 | 4169 | ||
@@ -4154,6 +4176,10 @@ static bool emulator_get_cached_descriptor(struct desc_struct *desc, int seg, | |||
4154 | var.limit >>= 12; | 4176 | var.limit >>= 12; |
4155 | set_desc_limit(desc, var.limit); | 4177 | set_desc_limit(desc, var.limit); |
4156 | set_desc_base(desc, (unsigned long)var.base); | 4178 | set_desc_base(desc, (unsigned long)var.base); |
4179 | #ifdef CONFIG_X86_64 | ||
4180 | if (base3) | ||
4181 | *base3 = var.base >> 32; | ||
4182 | #endif | ||
4157 | desc->type = var.type; | 4183 | desc->type = var.type; |
4158 | desc->s = var.s; | 4184 | desc->s = var.s; |
4159 | desc->dpl = var.dpl; | 4185 | desc->dpl = var.dpl; |
@@ -4166,8 +4192,8 @@ static bool emulator_get_cached_descriptor(struct desc_struct *desc, int seg, | |||
4166 | return true; | 4192 | return true; |
4167 | } | 4193 | } |
4168 | 4194 | ||
4169 | static void emulator_set_cached_descriptor(struct desc_struct *desc, int seg, | 4195 | static void emulator_set_cached_descriptor(struct desc_struct *desc, u32 base3, |
4170 | struct kvm_vcpu *vcpu) | 4196 | int seg, struct kvm_vcpu *vcpu) |
4171 | { | 4197 | { |
4172 | struct kvm_segment var; | 4198 | struct kvm_segment var; |
4173 | 4199 | ||
@@ -4175,6 +4201,9 @@ static void emulator_set_cached_descriptor(struct desc_struct *desc, int seg, | |||
4175 | kvm_get_segment(vcpu, &var, seg); | 4201 | kvm_get_segment(vcpu, &var, seg); |
4176 | 4202 | ||
4177 | var.base = get_desc_base(desc); | 4203 | var.base = get_desc_base(desc); |
4204 | #ifdef CONFIG_X86_64 | ||
4205 | var.base |= ((u64)base3) << 32; | ||
4206 | #endif | ||
4178 | var.limit = get_desc_limit(desc); | 4207 | var.limit = get_desc_limit(desc); |
4179 | if (desc->g) | 4208 | if (desc->g) |
4180 | var.limit = (var.limit << 12) | 0xfff; | 4209 | var.limit = (var.limit << 12) | 0xfff; |
@@ -4390,41 +4419,16 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, | |||
4390 | vcpu->arch.emulate_ctxt.have_exception = false; | 4419 | vcpu->arch.emulate_ctxt.have_exception = false; |
4391 | vcpu->arch.emulate_ctxt.perm_ok = false; | 4420 | vcpu->arch.emulate_ctxt.perm_ok = false; |
4392 | 4421 | ||
4422 | vcpu->arch.emulate_ctxt.only_vendor_specific_insn | ||
4423 | = emulation_type & EMULTYPE_TRAP_UD; | ||
4424 | |||
4393 | r = x86_decode_insn(&vcpu->arch.emulate_ctxt, insn, insn_len); | 4425 | r = x86_decode_insn(&vcpu->arch.emulate_ctxt, insn, insn_len); |
4394 | if (r == X86EMUL_PROPAGATE_FAULT) | ||
4395 | goto done; | ||
4396 | 4426 | ||
4397 | trace_kvm_emulate_insn_start(vcpu); | 4427 | trace_kvm_emulate_insn_start(vcpu); |
4398 | |||
4399 | /* Only allow emulation of specific instructions on #UD | ||
4400 | * (namely VMMCALL, sysenter, sysexit, syscall)*/ | ||
4401 | if (emulation_type & EMULTYPE_TRAP_UD) { | ||
4402 | if (!c->twobyte) | ||
4403 | return EMULATE_FAIL; | ||
4404 | switch (c->b) { | ||
4405 | case 0x01: /* VMMCALL */ | ||
4406 | if (c->modrm_mod != 3 || c->modrm_rm != 1) | ||
4407 | return EMULATE_FAIL; | ||
4408 | break; | ||
4409 | case 0x34: /* sysenter */ | ||
4410 | case 0x35: /* sysexit */ | ||
4411 | if (c->modrm_mod != 0 || c->modrm_rm != 0) | ||
4412 | return EMULATE_FAIL; | ||
4413 | break; | ||
4414 | case 0x05: /* syscall */ | ||
4415 | if (c->modrm_mod != 0 || c->modrm_rm != 0) | ||
4416 | return EMULATE_FAIL; | ||
4417 | break; | ||
4418 | default: | ||
4419 | return EMULATE_FAIL; | ||
4420 | } | ||
4421 | |||
4422 | if (!(c->modrm_reg == 0 || c->modrm_reg == 3)) | ||
4423 | return EMULATE_FAIL; | ||
4424 | } | ||
4425 | |||
4426 | ++vcpu->stat.insn_emulation; | 4428 | ++vcpu->stat.insn_emulation; |
4427 | if (r) { | 4429 | if (r) { |
4430 | if (emulation_type & EMULTYPE_TRAP_UD) | ||
4431 | return EMULATE_FAIL; | ||
4428 | if (reexecute_instruction(vcpu, cr2)) | 4432 | if (reexecute_instruction(vcpu, cr2)) |
4429 | return EMULATE_DONE; | 4433 | return EMULATE_DONE; |
4430 | if (emulation_type & EMULTYPE_SKIP) | 4434 | if (emulation_type & EMULTYPE_SKIP) |
@@ -4452,7 +4456,6 @@ restart: | |||
4452 | return handle_emulation_failure(vcpu); | 4456 | return handle_emulation_failure(vcpu); |
4453 | } | 4457 | } |
4454 | 4458 | ||
4455 | done: | ||
4456 | if (vcpu->arch.emulate_ctxt.have_exception) { | 4459 | if (vcpu->arch.emulate_ctxt.have_exception) { |
4457 | inject_emulated_exception(vcpu); | 4460 | inject_emulated_exception(vcpu); |
4458 | r = EMULATE_DONE; | 4461 | r = EMULATE_DONE; |
@@ -4562,7 +4565,7 @@ static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long va | |||
4562 | 4565 | ||
4563 | smp_call_function_single(freq->cpu, tsc_khz_changed, freq, 1); | 4566 | smp_call_function_single(freq->cpu, tsc_khz_changed, freq, 1); |
4564 | 4567 | ||
4565 | spin_lock(&kvm_lock); | 4568 | raw_spin_lock(&kvm_lock); |
4566 | list_for_each_entry(kvm, &vm_list, vm_list) { | 4569 | list_for_each_entry(kvm, &vm_list, vm_list) { |
4567 | kvm_for_each_vcpu(i, vcpu, kvm) { | 4570 | kvm_for_each_vcpu(i, vcpu, kvm) { |
4568 | if (vcpu->cpu != freq->cpu) | 4571 | if (vcpu->cpu != freq->cpu) |
@@ -4572,7 +4575,7 @@ static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long va | |||
4572 | send_ipi = 1; | 4575 | send_ipi = 1; |
4573 | } | 4576 | } |
4574 | } | 4577 | } |
4575 | spin_unlock(&kvm_lock); | 4578 | raw_spin_unlock(&kvm_lock); |
4576 | 4579 | ||
4577 | if (freq->old < freq->new && send_ipi) { | 4580 | if (freq->old < freq->new && send_ipi) { |
4578 | /* | 4581 | /* |
@@ -5185,6 +5188,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | |||
5185 | r = 1; | 5188 | r = 1; |
5186 | goto out; | 5189 | goto out; |
5187 | } | 5190 | } |
5191 | if (kvm_check_request(KVM_REQ_NMI, vcpu)) | ||
5192 | vcpu->arch.nmi_pending = true; | ||
5188 | } | 5193 | } |
5189 | 5194 | ||
5190 | r = kvm_mmu_reload(vcpu); | 5195 | r = kvm_mmu_reload(vcpu); |
@@ -5213,14 +5218,18 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | |||
5213 | kvm_load_guest_fpu(vcpu); | 5218 | kvm_load_guest_fpu(vcpu); |
5214 | kvm_load_guest_xcr0(vcpu); | 5219 | kvm_load_guest_xcr0(vcpu); |
5215 | 5220 | ||
5216 | atomic_set(&vcpu->guest_mode, 1); | 5221 | vcpu->mode = IN_GUEST_MODE; |
5217 | smp_wmb(); | 5222 | |
5223 | /* We should set ->mode before check ->requests, | ||
5224 | * see the comment in make_all_cpus_request. | ||
5225 | */ | ||
5226 | smp_mb(); | ||
5218 | 5227 | ||
5219 | local_irq_disable(); | 5228 | local_irq_disable(); |
5220 | 5229 | ||
5221 | if (!atomic_read(&vcpu->guest_mode) || vcpu->requests | 5230 | if (vcpu->mode == EXITING_GUEST_MODE || vcpu->requests |
5222 | || need_resched() || signal_pending(current)) { | 5231 | || need_resched() || signal_pending(current)) { |
5223 | atomic_set(&vcpu->guest_mode, 0); | 5232 | vcpu->mode = OUTSIDE_GUEST_MODE; |
5224 | smp_wmb(); | 5233 | smp_wmb(); |
5225 | local_irq_enable(); | 5234 | local_irq_enable(); |
5226 | preempt_enable(); | 5235 | preempt_enable(); |
@@ -5256,7 +5265,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | |||
5256 | 5265 | ||
5257 | kvm_get_msr(vcpu, MSR_IA32_TSC, &vcpu->arch.last_guest_tsc); | 5266 | kvm_get_msr(vcpu, MSR_IA32_TSC, &vcpu->arch.last_guest_tsc); |
5258 | 5267 | ||
5259 | atomic_set(&vcpu->guest_mode, 0); | 5268 | vcpu->mode = OUTSIDE_GUEST_MODE; |
5260 | smp_wmb(); | 5269 | smp_wmb(); |
5261 | local_irq_enable(); | 5270 | local_irq_enable(); |
5262 | 5271 | ||
@@ -5574,7 +5583,7 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, | |||
5574 | struct kvm_sregs *sregs) | 5583 | struct kvm_sregs *sregs) |
5575 | { | 5584 | { |
5576 | int mmu_reset_needed = 0; | 5585 | int mmu_reset_needed = 0; |
5577 | int pending_vec, max_bits; | 5586 | int pending_vec, max_bits, idx; |
5578 | struct desc_ptr dt; | 5587 | struct desc_ptr dt; |
5579 | 5588 | ||
5580 | dt.size = sregs->idt.limit; | 5589 | dt.size = sregs->idt.limit; |
@@ -5603,10 +5612,13 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, | |||
5603 | kvm_x86_ops->set_cr4(vcpu, sregs->cr4); | 5612 | kvm_x86_ops->set_cr4(vcpu, sregs->cr4); |
5604 | if (sregs->cr4 & X86_CR4_OSXSAVE) | 5613 | if (sregs->cr4 & X86_CR4_OSXSAVE) |
5605 | update_cpuid(vcpu); | 5614 | update_cpuid(vcpu); |
5615 | |||
5616 | idx = srcu_read_lock(&vcpu->kvm->srcu); | ||
5606 | if (!is_long_mode(vcpu) && is_pae(vcpu)) { | 5617 | if (!is_long_mode(vcpu) && is_pae(vcpu)) { |
5607 | load_pdptrs(vcpu, vcpu->arch.walk_mmu, kvm_read_cr3(vcpu)); | 5618 | load_pdptrs(vcpu, vcpu->arch.walk_mmu, kvm_read_cr3(vcpu)); |
5608 | mmu_reset_needed = 1; | 5619 | mmu_reset_needed = 1; |
5609 | } | 5620 | } |
5621 | srcu_read_unlock(&vcpu->kvm->srcu, idx); | ||
5610 | 5622 | ||
5611 | if (mmu_reset_needed) | 5623 | if (mmu_reset_needed) |
5612 | kvm_mmu_reset_context(vcpu); | 5624 | kvm_mmu_reset_context(vcpu); |
@@ -5617,8 +5629,6 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, | |||
5617 | if (pending_vec < max_bits) { | 5629 | if (pending_vec < max_bits) { |
5618 | kvm_queue_interrupt(vcpu, pending_vec, false); | 5630 | kvm_queue_interrupt(vcpu, pending_vec, false); |
5619 | pr_debug("Set back pending irq %d\n", pending_vec); | 5631 | pr_debug("Set back pending irq %d\n", pending_vec); |
5620 | if (irqchip_in_kernel(vcpu->kvm)) | ||
5621 | kvm_pic_clear_isr_ack(vcpu->kvm); | ||
5622 | } | 5632 | } |
5623 | 5633 | ||
5624 | kvm_set_segment(vcpu, &sregs->cs, VCPU_SREG_CS); | 5634 | kvm_set_segment(vcpu, &sregs->cs, VCPU_SREG_CS); |
@@ -5814,10 +5824,7 @@ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu) | |||
5814 | 5824 | ||
5815 | void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) | 5825 | void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) |
5816 | { | 5826 | { |
5817 | if (vcpu->arch.time_page) { | 5827 | kvmclock_reset(vcpu); |
5818 | kvm_release_page_dirty(vcpu->arch.time_page); | ||
5819 | vcpu->arch.time_page = NULL; | ||
5820 | } | ||
5821 | 5828 | ||
5822 | free_cpumask_var(vcpu->arch.wbinvd_dirty_mask); | 5829 | free_cpumask_var(vcpu->arch.wbinvd_dirty_mask); |
5823 | fx_free(vcpu); | 5830 | fx_free(vcpu); |
@@ -5878,6 +5885,8 @@ int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu) | |||
5878 | kvm_make_request(KVM_REQ_EVENT, vcpu); | 5885 | kvm_make_request(KVM_REQ_EVENT, vcpu); |
5879 | vcpu->arch.apf.msr_val = 0; | 5886 | vcpu->arch.apf.msr_val = 0; |
5880 | 5887 | ||
5888 | kvmclock_reset(vcpu); | ||
5889 | |||
5881 | kvm_clear_async_pf_completion_queue(vcpu); | 5890 | kvm_clear_async_pf_completion_queue(vcpu); |
5882 | kvm_async_pf_hash_reset(vcpu); | 5891 | kvm_async_pf_hash_reset(vcpu); |
5883 | vcpu->arch.apf.halted = false; | 5892 | vcpu->arch.apf.halted = false; |
@@ -6005,7 +6014,7 @@ int kvm_arch_init_vm(struct kvm *kvm) | |||
6005 | /* Reserve bit 0 of irq_sources_bitmap for userspace irq source */ | 6014 | /* Reserve bit 0 of irq_sources_bitmap for userspace irq source */ |
6006 | set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap); | 6015 | set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap); |
6007 | 6016 | ||
6008 | spin_lock_init(&kvm->arch.tsc_write_lock); | 6017 | raw_spin_lock_init(&kvm->arch.tsc_write_lock); |
6009 | 6018 | ||
6010 | return 0; | 6019 | return 0; |
6011 | } | 6020 | } |
@@ -6103,7 +6112,7 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, | |||
6103 | int user_alloc) | 6112 | int user_alloc) |
6104 | { | 6113 | { |
6105 | 6114 | ||
6106 | int npages = mem->memory_size >> PAGE_SHIFT; | 6115 | int nr_mmu_pages = 0, npages = mem->memory_size >> PAGE_SHIFT; |
6107 | 6116 | ||
6108 | if (!user_alloc && !old.user_alloc && old.rmap && !npages) { | 6117 | if (!user_alloc && !old.user_alloc && old.rmap && !npages) { |
6109 | int ret; | 6118 | int ret; |
@@ -6118,12 +6127,12 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, | |||
6118 | "failed to munmap memory\n"); | 6127 | "failed to munmap memory\n"); |
6119 | } | 6128 | } |
6120 | 6129 | ||
6130 | if (!kvm->arch.n_requested_mmu_pages) | ||
6131 | nr_mmu_pages = kvm_mmu_calculate_mmu_pages(kvm); | ||
6132 | |||
6121 | spin_lock(&kvm->mmu_lock); | 6133 | spin_lock(&kvm->mmu_lock); |
6122 | if (!kvm->arch.n_requested_mmu_pages) { | 6134 | if (nr_mmu_pages) |
6123 | unsigned int nr_mmu_pages = kvm_mmu_calculate_mmu_pages(kvm); | ||
6124 | kvm_mmu_change_mmu_pages(kvm, nr_mmu_pages); | 6135 | kvm_mmu_change_mmu_pages(kvm, nr_mmu_pages); |
6125 | } | ||
6126 | |||
6127 | kvm_mmu_slot_remove_write_access(kvm, mem->slot); | 6136 | kvm_mmu_slot_remove_write_access(kvm, mem->slot); |
6128 | spin_unlock(&kvm->mmu_lock); | 6137 | spin_unlock(&kvm->mmu_lock); |
6129 | } | 6138 | } |
@@ -6157,7 +6166,7 @@ void kvm_vcpu_kick(struct kvm_vcpu *vcpu) | |||
6157 | 6166 | ||
6158 | me = get_cpu(); | 6167 | me = get_cpu(); |
6159 | if (cpu != me && (unsigned)cpu < nr_cpu_ids && cpu_online(cpu)) | 6168 | if (cpu != me && (unsigned)cpu < nr_cpu_ids && cpu_online(cpu)) |
6160 | if (atomic_xchg(&vcpu->guest_mode, 0)) | 6169 | if (kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE) |
6161 | smp_send_reschedule(cpu); | 6170 | smp_send_reschedule(cpu); |
6162 | put_cpu(); | 6171 | put_cpu(); |
6163 | } | 6172 | } |
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index eba687f0cc0c..1cd608973ce5 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c | |||
@@ -397,7 +397,7 @@ static void lguest_load_tr_desc(void) | |||
397 | * instead we just use the real "cpuid" instruction. Then I pretty much turned | 397 | * instead we just use the real "cpuid" instruction. Then I pretty much turned |
398 | * off feature bits until the Guest booted. (Don't say that: you'll damage | 398 | * off feature bits until the Guest booted. (Don't say that: you'll damage |
399 | * lguest sales!) Shut up, inner voice! (Hey, just pointing out that this is | 399 | * lguest sales!) Shut up, inner voice! (Hey, just pointing out that this is |
400 | * hardly future proof.) Noone's listening! They don't like you anyway, | 400 | * hardly future proof.) No one's listening! They don't like you anyway, |
401 | * parenthetic weirdo! | 401 | * parenthetic weirdo! |
402 | * | 402 | * |
403 | * Replacing the cpuid so we can turn features off is great for the kernel, but | 403 | * Replacing the cpuid so we can turn features off is great for the kernel, but |
@@ -847,7 +847,7 @@ static void __init lguest_init_IRQ(void) | |||
847 | void lguest_setup_irq(unsigned int irq) | 847 | void lguest_setup_irq(unsigned int irq) |
848 | { | 848 | { |
849 | irq_alloc_desc_at(irq, 0); | 849 | irq_alloc_desc_at(irq, 0); |
850 | set_irq_chip_and_handler_name(irq, &lguest_irq_controller, | 850 | irq_set_chip_and_handler_name(irq, &lguest_irq_controller, |
851 | handle_level_irq, "level"); | 851 | handle_level_irq, "level"); |
852 | } | 852 | } |
853 | 853 | ||
@@ -995,7 +995,7 @@ static void lguest_time_irq(unsigned int irq, struct irq_desc *desc) | |||
995 | static void lguest_time_init(void) | 995 | static void lguest_time_init(void) |
996 | { | 996 | { |
997 | /* Set up the timer interrupt (0) to go to our simple timer routine */ | 997 | /* Set up the timer interrupt (0) to go to our simple timer routine */ |
998 | set_irq_handler(0, lguest_time_irq); | 998 | irq_set_handler(0, lguest_time_irq); |
999 | 999 | ||
1000 | clocksource_register(&lguest_clock); | 1000 | clocksource_register(&lguest_clock); |
1001 | 1001 | ||
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index e10cf070ede0..f2479f19ddde 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile | |||
@@ -42,4 +42,5 @@ else | |||
42 | lib-y += memmove_64.o memset_64.o | 42 | lib-y += memmove_64.o memset_64.o |
43 | lib-y += copy_user_64.o rwlock_64.o copy_user_nocache_64.o | 43 | lib-y += copy_user_64.o rwlock_64.o copy_user_nocache_64.o |
44 | lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem_64.o | 44 | lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem_64.o |
45 | lib-y += cmpxchg16b_emu.o | ||
45 | endif | 46 | endif |
diff --git a/arch/x86/lib/atomic64_386_32.S b/arch/x86/lib/atomic64_386_32.S index 2cda60a06e65..e8e7e0d06f42 100644 --- a/arch/x86/lib/atomic64_386_32.S +++ b/arch/x86/lib/atomic64_386_32.S | |||
@@ -15,14 +15,12 @@ | |||
15 | 15 | ||
16 | /* if you want SMP support, implement these with real spinlocks */ | 16 | /* if you want SMP support, implement these with real spinlocks */ |
17 | .macro LOCK reg | 17 | .macro LOCK reg |
18 | pushfl | 18 | pushfl_cfi |
19 | CFI_ADJUST_CFA_OFFSET 4 | ||
20 | cli | 19 | cli |
21 | .endm | 20 | .endm |
22 | 21 | ||
23 | .macro UNLOCK reg | 22 | .macro UNLOCK reg |
24 | popfl | 23 | popfl_cfi |
25 | CFI_ADJUST_CFA_OFFSET -4 | ||
26 | .endm | 24 | .endm |
27 | 25 | ||
28 | #define BEGIN(op) \ | 26 | #define BEGIN(op) \ |
diff --git a/arch/x86/lib/atomic64_cx8_32.S b/arch/x86/lib/atomic64_cx8_32.S index 71e080de3352..391a083674b4 100644 --- a/arch/x86/lib/atomic64_cx8_32.S +++ b/arch/x86/lib/atomic64_cx8_32.S | |||
@@ -14,14 +14,12 @@ | |||
14 | #include <asm/dwarf2.h> | 14 | #include <asm/dwarf2.h> |
15 | 15 | ||
16 | .macro SAVE reg | 16 | .macro SAVE reg |
17 | pushl %\reg | 17 | pushl_cfi %\reg |
18 | CFI_ADJUST_CFA_OFFSET 4 | ||
19 | CFI_REL_OFFSET \reg, 0 | 18 | CFI_REL_OFFSET \reg, 0 |
20 | .endm | 19 | .endm |
21 | 20 | ||
22 | .macro RESTORE reg | 21 | .macro RESTORE reg |
23 | popl %\reg | 22 | popl_cfi %\reg |
24 | CFI_ADJUST_CFA_OFFSET -4 | ||
25 | CFI_RESTORE \reg | 23 | CFI_RESTORE \reg |
26 | .endm | 24 | .endm |
27 | 25 | ||
diff --git a/arch/x86/lib/checksum_32.S b/arch/x86/lib/checksum_32.S index adbccd0bbb78..78d16a554db0 100644 --- a/arch/x86/lib/checksum_32.S +++ b/arch/x86/lib/checksum_32.S | |||
@@ -50,11 +50,9 @@ unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum) | |||
50 | */ | 50 | */ |
51 | ENTRY(csum_partial) | 51 | ENTRY(csum_partial) |
52 | CFI_STARTPROC | 52 | CFI_STARTPROC |
53 | pushl %esi | 53 | pushl_cfi %esi |
54 | CFI_ADJUST_CFA_OFFSET 4 | ||
55 | CFI_REL_OFFSET esi, 0 | 54 | CFI_REL_OFFSET esi, 0 |
56 | pushl %ebx | 55 | pushl_cfi %ebx |
57 | CFI_ADJUST_CFA_OFFSET 4 | ||
58 | CFI_REL_OFFSET ebx, 0 | 56 | CFI_REL_OFFSET ebx, 0 |
59 | movl 20(%esp),%eax # Function arg: unsigned int sum | 57 | movl 20(%esp),%eax # Function arg: unsigned int sum |
60 | movl 16(%esp),%ecx # Function arg: int len | 58 | movl 16(%esp),%ecx # Function arg: int len |
@@ -132,11 +130,9 @@ ENTRY(csum_partial) | |||
132 | jz 8f | 130 | jz 8f |
133 | roll $8, %eax | 131 | roll $8, %eax |
134 | 8: | 132 | 8: |
135 | popl %ebx | 133 | popl_cfi %ebx |
136 | CFI_ADJUST_CFA_OFFSET -4 | ||
137 | CFI_RESTORE ebx | 134 | CFI_RESTORE ebx |
138 | popl %esi | 135 | popl_cfi %esi |
139 | CFI_ADJUST_CFA_OFFSET -4 | ||
140 | CFI_RESTORE esi | 136 | CFI_RESTORE esi |
141 | ret | 137 | ret |
142 | CFI_ENDPROC | 138 | CFI_ENDPROC |
@@ -148,11 +144,9 @@ ENDPROC(csum_partial) | |||
148 | 144 | ||
149 | ENTRY(csum_partial) | 145 | ENTRY(csum_partial) |
150 | CFI_STARTPROC | 146 | CFI_STARTPROC |
151 | pushl %esi | 147 | pushl_cfi %esi |
152 | CFI_ADJUST_CFA_OFFSET 4 | ||
153 | CFI_REL_OFFSET esi, 0 | 148 | CFI_REL_OFFSET esi, 0 |
154 | pushl %ebx | 149 | pushl_cfi %ebx |
155 | CFI_ADJUST_CFA_OFFSET 4 | ||
156 | CFI_REL_OFFSET ebx, 0 | 150 | CFI_REL_OFFSET ebx, 0 |
157 | movl 20(%esp),%eax # Function arg: unsigned int sum | 151 | movl 20(%esp),%eax # Function arg: unsigned int sum |
158 | movl 16(%esp),%ecx # Function arg: int len | 152 | movl 16(%esp),%ecx # Function arg: int len |
@@ -260,11 +254,9 @@ ENTRY(csum_partial) | |||
260 | jz 90f | 254 | jz 90f |
261 | roll $8, %eax | 255 | roll $8, %eax |
262 | 90: | 256 | 90: |
263 | popl %ebx | 257 | popl_cfi %ebx |
264 | CFI_ADJUST_CFA_OFFSET -4 | ||
265 | CFI_RESTORE ebx | 258 | CFI_RESTORE ebx |
266 | popl %esi | 259 | popl_cfi %esi |
267 | CFI_ADJUST_CFA_OFFSET -4 | ||
268 | CFI_RESTORE esi | 260 | CFI_RESTORE esi |
269 | ret | 261 | ret |
270 | CFI_ENDPROC | 262 | CFI_ENDPROC |
@@ -309,14 +301,11 @@ ENTRY(csum_partial_copy_generic) | |||
309 | CFI_STARTPROC | 301 | CFI_STARTPROC |
310 | subl $4,%esp | 302 | subl $4,%esp |
311 | CFI_ADJUST_CFA_OFFSET 4 | 303 | CFI_ADJUST_CFA_OFFSET 4 |
312 | pushl %edi | 304 | pushl_cfi %edi |
313 | CFI_ADJUST_CFA_OFFSET 4 | ||
314 | CFI_REL_OFFSET edi, 0 | 305 | CFI_REL_OFFSET edi, 0 |
315 | pushl %esi | 306 | pushl_cfi %esi |
316 | CFI_ADJUST_CFA_OFFSET 4 | ||
317 | CFI_REL_OFFSET esi, 0 | 307 | CFI_REL_OFFSET esi, 0 |
318 | pushl %ebx | 308 | pushl_cfi %ebx |
319 | CFI_ADJUST_CFA_OFFSET 4 | ||
320 | CFI_REL_OFFSET ebx, 0 | 309 | CFI_REL_OFFSET ebx, 0 |
321 | movl ARGBASE+16(%esp),%eax # sum | 310 | movl ARGBASE+16(%esp),%eax # sum |
322 | movl ARGBASE+12(%esp),%ecx # len | 311 | movl ARGBASE+12(%esp),%ecx # len |
@@ -426,17 +415,13 @@ DST( movb %cl, (%edi) ) | |||
426 | 415 | ||
427 | .previous | 416 | .previous |
428 | 417 | ||
429 | popl %ebx | 418 | popl_cfi %ebx |
430 | CFI_ADJUST_CFA_OFFSET -4 | ||
431 | CFI_RESTORE ebx | 419 | CFI_RESTORE ebx |
432 | popl %esi | 420 | popl_cfi %esi |
433 | CFI_ADJUST_CFA_OFFSET -4 | ||
434 | CFI_RESTORE esi | 421 | CFI_RESTORE esi |
435 | popl %edi | 422 | popl_cfi %edi |
436 | CFI_ADJUST_CFA_OFFSET -4 | ||
437 | CFI_RESTORE edi | 423 | CFI_RESTORE edi |
438 | popl %ecx # equivalent to addl $4,%esp | 424 | popl_cfi %ecx # equivalent to addl $4,%esp |
439 | CFI_ADJUST_CFA_OFFSET -4 | ||
440 | ret | 425 | ret |
441 | CFI_ENDPROC | 426 | CFI_ENDPROC |
442 | ENDPROC(csum_partial_copy_generic) | 427 | ENDPROC(csum_partial_copy_generic) |
@@ -459,14 +444,11 @@ ENDPROC(csum_partial_copy_generic) | |||
459 | 444 | ||
460 | ENTRY(csum_partial_copy_generic) | 445 | ENTRY(csum_partial_copy_generic) |
461 | CFI_STARTPROC | 446 | CFI_STARTPROC |
462 | pushl %ebx | 447 | pushl_cfi %ebx |
463 | CFI_ADJUST_CFA_OFFSET 4 | ||
464 | CFI_REL_OFFSET ebx, 0 | 448 | CFI_REL_OFFSET ebx, 0 |
465 | pushl %edi | 449 | pushl_cfi %edi |
466 | CFI_ADJUST_CFA_OFFSET 4 | ||
467 | CFI_REL_OFFSET edi, 0 | 450 | CFI_REL_OFFSET edi, 0 |
468 | pushl %esi | 451 | pushl_cfi %esi |
469 | CFI_ADJUST_CFA_OFFSET 4 | ||
470 | CFI_REL_OFFSET esi, 0 | 452 | CFI_REL_OFFSET esi, 0 |
471 | movl ARGBASE+4(%esp),%esi #src | 453 | movl ARGBASE+4(%esp),%esi #src |
472 | movl ARGBASE+8(%esp),%edi #dst | 454 | movl ARGBASE+8(%esp),%edi #dst |
@@ -527,14 +509,11 @@ DST( movb %dl, (%edi) ) | |||
527 | jmp 7b | 509 | jmp 7b |
528 | .previous | 510 | .previous |
529 | 511 | ||
530 | popl %esi | 512 | popl_cfi %esi |
531 | CFI_ADJUST_CFA_OFFSET -4 | ||
532 | CFI_RESTORE esi | 513 | CFI_RESTORE esi |
533 | popl %edi | 514 | popl_cfi %edi |
534 | CFI_ADJUST_CFA_OFFSET -4 | ||
535 | CFI_RESTORE edi | 515 | CFI_RESTORE edi |
536 | popl %ebx | 516 | popl_cfi %ebx |
537 | CFI_ADJUST_CFA_OFFSET -4 | ||
538 | CFI_RESTORE ebx | 517 | CFI_RESTORE ebx |
539 | ret | 518 | ret |
540 | CFI_ENDPROC | 519 | CFI_ENDPROC |
diff --git a/arch/x86/lib/cmpxchg16b_emu.S b/arch/x86/lib/cmpxchg16b_emu.S new file mode 100644 index 000000000000..3e8b08a6de2b --- /dev/null +++ b/arch/x86/lib/cmpxchg16b_emu.S | |||
@@ -0,0 +1,59 @@ | |||
1 | /* | ||
2 | * This program is free software; you can redistribute it and/or | ||
3 | * modify it under the terms of the GNU General Public License | ||
4 | * as published by the Free Software Foundation; version 2 | ||
5 | * of the License. | ||
6 | * | ||
7 | */ | ||
8 | #include <linux/linkage.h> | ||
9 | #include <asm/alternative-asm.h> | ||
10 | #include <asm/frame.h> | ||
11 | #include <asm/dwarf2.h> | ||
12 | |||
13 | .text | ||
14 | |||
15 | /* | ||
16 | * Inputs: | ||
17 | * %rsi : memory location to compare | ||
18 | * %rax : low 64 bits of old value | ||
19 | * %rdx : high 64 bits of old value | ||
20 | * %rbx : low 64 bits of new value | ||
21 | * %rcx : high 64 bits of new value | ||
22 | * %al : Operation successful | ||
23 | */ | ||
24 | ENTRY(this_cpu_cmpxchg16b_emu) | ||
25 | CFI_STARTPROC | ||
26 | |||
27 | # | ||
28 | # Emulate 'cmpxchg16b %gs:(%rsi)' except we return the result in %al not | ||
29 | # via the ZF. Caller will access %al to get result. | ||
30 | # | ||
31 | # Note that this is only useful for a cpuops operation. Meaning that we | ||
32 | # do *not* have a fully atomic operation but just an operation that is | ||
33 | # *atomic* on a single cpu (as provided by the this_cpu_xx class of | ||
34 | # macros). | ||
35 | # | ||
36 | this_cpu_cmpxchg16b_emu: | ||
37 | pushf | ||
38 | cli | ||
39 | |||
40 | cmpq %gs:(%rsi), %rax | ||
41 | jne not_same | ||
42 | cmpq %gs:8(%rsi), %rdx | ||
43 | jne not_same | ||
44 | |||
45 | movq %rbx, %gs:(%rsi) | ||
46 | movq %rcx, %gs:8(%rsi) | ||
47 | |||
48 | popf | ||
49 | mov $1, %al | ||
50 | ret | ||
51 | |||
52 | not_same: | ||
53 | popf | ||
54 | xor %al,%al | ||
55 | ret | ||
56 | |||
57 | CFI_ENDPROC | ||
58 | |||
59 | ENDPROC(this_cpu_cmpxchg16b_emu) | ||
diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S index a460158b5ac5..99e482615195 100644 --- a/arch/x86/lib/copy_user_64.S +++ b/arch/x86/lib/copy_user_64.S | |||
@@ -117,7 +117,7 @@ ENDPROC(bad_from_user) | |||
117 | * rdx count | 117 | * rdx count |
118 | * | 118 | * |
119 | * Output: | 119 | * Output: |
120 | * eax uncopied bytes or 0 if successfull. | 120 | * eax uncopied bytes or 0 if successful. |
121 | */ | 121 | */ |
122 | ENTRY(copy_user_generic_unrolled) | 122 | ENTRY(copy_user_generic_unrolled) |
123 | CFI_STARTPROC | 123 | CFI_STARTPROC |
diff --git a/arch/x86/lib/csum-copy_64.S b/arch/x86/lib/csum-copy_64.S index f0dba36578ea..fb903b758da8 100644 --- a/arch/x86/lib/csum-copy_64.S +++ b/arch/x86/lib/csum-copy_64.S | |||
@@ -1,6 +1,6 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright 2002,2003 Andi Kleen, SuSE Labs. | 2 | * Copyright 2002, 2003 Andi Kleen, SuSE Labs. |
3 | * | 3 | * |
4 | * This file is subject to the terms and conditions of the GNU General Public | 4 | * This file is subject to the terms and conditions of the GNU General Public |
5 | * License. See the file COPYING in the main directory of this archive | 5 | * License. See the file COPYING in the main directory of this archive |
6 | * for more details. No warranty for anything given at all. | 6 | * for more details. No warranty for anything given at all. |
@@ -11,82 +11,82 @@ | |||
11 | 11 | ||
12 | /* | 12 | /* |
13 | * Checksum copy with exception handling. | 13 | * Checksum copy with exception handling. |
14 | * On exceptions src_err_ptr or dst_err_ptr is set to -EFAULT and the | 14 | * On exceptions src_err_ptr or dst_err_ptr is set to -EFAULT and the |
15 | * destination is zeroed. | 15 | * destination is zeroed. |
16 | * | 16 | * |
17 | * Input | 17 | * Input |
18 | * rdi source | 18 | * rdi source |
19 | * rsi destination | 19 | * rsi destination |
20 | * edx len (32bit) | 20 | * edx len (32bit) |
21 | * ecx sum (32bit) | 21 | * ecx sum (32bit) |
22 | * r8 src_err_ptr (int) | 22 | * r8 src_err_ptr (int) |
23 | * r9 dst_err_ptr (int) | 23 | * r9 dst_err_ptr (int) |
24 | * | 24 | * |
25 | * Output | 25 | * Output |
26 | * eax 64bit sum. undefined in case of exception. | 26 | * eax 64bit sum. undefined in case of exception. |
27 | * | 27 | * |
28 | * Wrappers need to take care of valid exception sum and zeroing. | 28 | * Wrappers need to take care of valid exception sum and zeroing. |
29 | * They also should align source or destination to 8 bytes. | 29 | * They also should align source or destination to 8 bytes. |
30 | */ | 30 | */ |
31 | 31 | ||
32 | .macro source | 32 | .macro source |
33 | 10: | 33 | 10: |
34 | .section __ex_table,"a" | 34 | .section __ex_table, "a" |
35 | .align 8 | 35 | .align 8 |
36 | .quad 10b,.Lbad_source | 36 | .quad 10b, .Lbad_source |
37 | .previous | 37 | .previous |
38 | .endm | 38 | .endm |
39 | 39 | ||
40 | .macro dest | 40 | .macro dest |
41 | 20: | 41 | 20: |
42 | .section __ex_table,"a" | 42 | .section __ex_table, "a" |
43 | .align 8 | 43 | .align 8 |
44 | .quad 20b,.Lbad_dest | 44 | .quad 20b, .Lbad_dest |
45 | .previous | 45 | .previous |
46 | .endm | 46 | .endm |
47 | 47 | ||
48 | .macro ignore L=.Lignore | 48 | .macro ignore L=.Lignore |
49 | 30: | 49 | 30: |
50 | .section __ex_table,"a" | 50 | .section __ex_table, "a" |
51 | .align 8 | 51 | .align 8 |
52 | .quad 30b,\L | 52 | .quad 30b, \L |
53 | .previous | 53 | .previous |
54 | .endm | 54 | .endm |
55 | 55 | ||
56 | 56 | ||
57 | ENTRY(csum_partial_copy_generic) | 57 | ENTRY(csum_partial_copy_generic) |
58 | CFI_STARTPROC | 58 | CFI_STARTPROC |
59 | cmpl $3*64,%edx | 59 | cmpl $3*64, %edx |
60 | jle .Lignore | 60 | jle .Lignore |
61 | 61 | ||
62 | .Lignore: | 62 | .Lignore: |
63 | subq $7*8,%rsp | 63 | subq $7*8, %rsp |
64 | CFI_ADJUST_CFA_OFFSET 7*8 | 64 | CFI_ADJUST_CFA_OFFSET 7*8 |
65 | movq %rbx,2*8(%rsp) | 65 | movq %rbx, 2*8(%rsp) |
66 | CFI_REL_OFFSET rbx, 2*8 | 66 | CFI_REL_OFFSET rbx, 2*8 |
67 | movq %r12,3*8(%rsp) | 67 | movq %r12, 3*8(%rsp) |
68 | CFI_REL_OFFSET r12, 3*8 | 68 | CFI_REL_OFFSET r12, 3*8 |
69 | movq %r14,4*8(%rsp) | 69 | movq %r14, 4*8(%rsp) |
70 | CFI_REL_OFFSET r14, 4*8 | 70 | CFI_REL_OFFSET r14, 4*8 |
71 | movq %r13,5*8(%rsp) | 71 | movq %r13, 5*8(%rsp) |
72 | CFI_REL_OFFSET r13, 5*8 | 72 | CFI_REL_OFFSET r13, 5*8 |
73 | movq %rbp,6*8(%rsp) | 73 | movq %rbp, 6*8(%rsp) |
74 | CFI_REL_OFFSET rbp, 6*8 | 74 | CFI_REL_OFFSET rbp, 6*8 |
75 | 75 | ||
76 | movq %r8,(%rsp) | 76 | movq %r8, (%rsp) |
77 | movq %r9,1*8(%rsp) | 77 | movq %r9, 1*8(%rsp) |
78 | |||
79 | movl %ecx,%eax | ||
80 | movl %edx,%ecx | ||
81 | 78 | ||
82 | xorl %r9d,%r9d | 79 | movl %ecx, %eax |
83 | movq %rcx,%r12 | 80 | movl %edx, %ecx |
84 | 81 | ||
85 | shrq $6,%r12 | 82 | xorl %r9d, %r9d |
86 | jz .Lhandle_tail /* < 64 */ | 83 | movq %rcx, %r12 |
84 | |||
85 | shrq $6, %r12 | ||
86 | jz .Lhandle_tail /* < 64 */ | ||
87 | 87 | ||
88 | clc | 88 | clc |
89 | 89 | ||
90 | /* main loop. clear in 64 byte blocks */ | 90 | /* main loop. clear in 64 byte blocks */ |
91 | /* r9: zero, r8: temp2, rbx: temp1, rax: sum, rcx: saved length */ | 91 | /* r9: zero, r8: temp2, rbx: temp1, rax: sum, rcx: saved length */ |
92 | /* r11: temp3, rdx: temp4, r12 loopcnt */ | 92 | /* r11: temp3, rdx: temp4, r12 loopcnt */ |
@@ -94,156 +94,156 @@ ENTRY(csum_partial_copy_generic) | |||
94 | .p2align 4 | 94 | .p2align 4 |
95 | .Lloop: | 95 | .Lloop: |
96 | source | 96 | source |
97 | movq (%rdi),%rbx | 97 | movq (%rdi), %rbx |
98 | source | 98 | source |
99 | movq 8(%rdi),%r8 | 99 | movq 8(%rdi), %r8 |
100 | source | 100 | source |
101 | movq 16(%rdi),%r11 | 101 | movq 16(%rdi), %r11 |
102 | source | 102 | source |
103 | movq 24(%rdi),%rdx | 103 | movq 24(%rdi), %rdx |
104 | 104 | ||
105 | source | 105 | source |
106 | movq 32(%rdi),%r10 | 106 | movq 32(%rdi), %r10 |
107 | source | 107 | source |
108 | movq 40(%rdi),%rbp | 108 | movq 40(%rdi), %rbp |
109 | source | 109 | source |
110 | movq 48(%rdi),%r14 | 110 | movq 48(%rdi), %r14 |
111 | source | 111 | source |
112 | movq 56(%rdi),%r13 | 112 | movq 56(%rdi), %r13 |
113 | 113 | ||
114 | ignore 2f | 114 | ignore 2f |
115 | prefetcht0 5*64(%rdi) | 115 | prefetcht0 5*64(%rdi) |
116 | 2: | 116 | 2: |
117 | adcq %rbx,%rax | 117 | adcq %rbx, %rax |
118 | adcq %r8,%rax | 118 | adcq %r8, %rax |
119 | adcq %r11,%rax | 119 | adcq %r11, %rax |
120 | adcq %rdx,%rax | 120 | adcq %rdx, %rax |
121 | adcq %r10,%rax | 121 | adcq %r10, %rax |
122 | adcq %rbp,%rax | 122 | adcq %rbp, %rax |
123 | adcq %r14,%rax | 123 | adcq %r14, %rax |
124 | adcq %r13,%rax | 124 | adcq %r13, %rax |
125 | 125 | ||
126 | decl %r12d | 126 | decl %r12d |
127 | 127 | ||
128 | dest | 128 | dest |
129 | movq %rbx,(%rsi) | 129 | movq %rbx, (%rsi) |
130 | dest | 130 | dest |
131 | movq %r8,8(%rsi) | 131 | movq %r8, 8(%rsi) |
132 | dest | 132 | dest |
133 | movq %r11,16(%rsi) | 133 | movq %r11, 16(%rsi) |
134 | dest | 134 | dest |
135 | movq %rdx,24(%rsi) | 135 | movq %rdx, 24(%rsi) |
136 | 136 | ||
137 | dest | 137 | dest |
138 | movq %r10,32(%rsi) | 138 | movq %r10, 32(%rsi) |
139 | dest | 139 | dest |
140 | movq %rbp,40(%rsi) | 140 | movq %rbp, 40(%rsi) |
141 | dest | 141 | dest |
142 | movq %r14,48(%rsi) | 142 | movq %r14, 48(%rsi) |
143 | dest | 143 | dest |
144 | movq %r13,56(%rsi) | 144 | movq %r13, 56(%rsi) |
145 | 145 | ||
146 | 3: | 146 | 3: |
147 | |||
148 | leaq 64(%rdi),%rdi | ||
149 | leaq 64(%rsi),%rsi | ||
150 | 147 | ||
151 | jnz .Lloop | 148 | leaq 64(%rdi), %rdi |
149 | leaq 64(%rsi), %rsi | ||
152 | 150 | ||
153 | adcq %r9,%rax | 151 | jnz .Lloop |
154 | 152 | ||
155 | /* do last upto 56 bytes */ | 153 | adcq %r9, %rax |
154 | |||
155 | /* do last up to 56 bytes */ | ||
156 | .Lhandle_tail: | 156 | .Lhandle_tail: |
157 | /* ecx: count */ | 157 | /* ecx: count */ |
158 | movl %ecx,%r10d | 158 | movl %ecx, %r10d |
159 | andl $63,%ecx | 159 | andl $63, %ecx |
160 | shrl $3,%ecx | 160 | shrl $3, %ecx |
161 | jz .Lfold | 161 | jz .Lfold |
162 | clc | 162 | clc |
163 | .p2align 4 | 163 | .p2align 4 |
164 | .Lloop_8: | 164 | .Lloop_8: |
165 | source | 165 | source |
166 | movq (%rdi),%rbx | 166 | movq (%rdi), %rbx |
167 | adcq %rbx,%rax | 167 | adcq %rbx, %rax |
168 | decl %ecx | 168 | decl %ecx |
169 | dest | 169 | dest |
170 | movq %rbx,(%rsi) | 170 | movq %rbx, (%rsi) |
171 | leaq 8(%rsi),%rsi /* preserve carry */ | 171 | leaq 8(%rsi), %rsi /* preserve carry */ |
172 | leaq 8(%rdi),%rdi | 172 | leaq 8(%rdi), %rdi |
173 | jnz .Lloop_8 | 173 | jnz .Lloop_8 |
174 | adcq %r9,%rax /* add in carry */ | 174 | adcq %r9, %rax /* add in carry */ |
175 | 175 | ||
176 | .Lfold: | 176 | .Lfold: |
177 | /* reduce checksum to 32bits */ | 177 | /* reduce checksum to 32bits */ |
178 | movl %eax,%ebx | 178 | movl %eax, %ebx |
179 | shrq $32,%rax | 179 | shrq $32, %rax |
180 | addl %ebx,%eax | 180 | addl %ebx, %eax |
181 | adcl %r9d,%eax | 181 | adcl %r9d, %eax |
182 | 182 | ||
183 | /* do last upto 6 bytes */ | 183 | /* do last up to 6 bytes */ |
184 | .Lhandle_7: | 184 | .Lhandle_7: |
185 | movl %r10d,%ecx | 185 | movl %r10d, %ecx |
186 | andl $7,%ecx | 186 | andl $7, %ecx |
187 | shrl $1,%ecx | 187 | shrl $1, %ecx |
188 | jz .Lhandle_1 | 188 | jz .Lhandle_1 |
189 | movl $2,%edx | 189 | movl $2, %edx |
190 | xorl %ebx,%ebx | 190 | xorl %ebx, %ebx |
191 | clc | 191 | clc |
192 | .p2align 4 | 192 | .p2align 4 |
193 | .Lloop_1: | 193 | .Lloop_1: |
194 | source | 194 | source |
195 | movw (%rdi),%bx | 195 | movw (%rdi), %bx |
196 | adcl %ebx,%eax | 196 | adcl %ebx, %eax |
197 | decl %ecx | 197 | decl %ecx |
198 | dest | 198 | dest |
199 | movw %bx,(%rsi) | 199 | movw %bx, (%rsi) |
200 | leaq 2(%rdi),%rdi | 200 | leaq 2(%rdi), %rdi |
201 | leaq 2(%rsi),%rsi | 201 | leaq 2(%rsi), %rsi |
202 | jnz .Lloop_1 | 202 | jnz .Lloop_1 |
203 | adcl %r9d,%eax /* add in carry */ | 203 | adcl %r9d, %eax /* add in carry */ |
204 | 204 | ||
205 | /* handle last odd byte */ | 205 | /* handle last odd byte */ |
206 | .Lhandle_1: | 206 | .Lhandle_1: |
207 | testl $1,%r10d | 207 | testl $1, %r10d |
208 | jz .Lende | 208 | jz .Lende |
209 | xorl %ebx,%ebx | 209 | xorl %ebx, %ebx |
210 | source | 210 | source |
211 | movb (%rdi),%bl | 211 | movb (%rdi), %bl |
212 | dest | 212 | dest |
213 | movb %bl,(%rsi) | 213 | movb %bl, (%rsi) |
214 | addl %ebx,%eax | 214 | addl %ebx, %eax |
215 | adcl %r9d,%eax /* carry */ | 215 | adcl %r9d, %eax /* carry */ |
216 | 216 | ||
217 | CFI_REMEMBER_STATE | 217 | CFI_REMEMBER_STATE |
218 | .Lende: | 218 | .Lende: |
219 | movq 2*8(%rsp),%rbx | 219 | movq 2*8(%rsp), %rbx |
220 | CFI_RESTORE rbx | 220 | CFI_RESTORE rbx |
221 | movq 3*8(%rsp),%r12 | 221 | movq 3*8(%rsp), %r12 |
222 | CFI_RESTORE r12 | 222 | CFI_RESTORE r12 |
223 | movq 4*8(%rsp),%r14 | 223 | movq 4*8(%rsp), %r14 |
224 | CFI_RESTORE r14 | 224 | CFI_RESTORE r14 |
225 | movq 5*8(%rsp),%r13 | 225 | movq 5*8(%rsp), %r13 |
226 | CFI_RESTORE r13 | 226 | CFI_RESTORE r13 |
227 | movq 6*8(%rsp),%rbp | 227 | movq 6*8(%rsp), %rbp |
228 | CFI_RESTORE rbp | 228 | CFI_RESTORE rbp |
229 | addq $7*8,%rsp | 229 | addq $7*8, %rsp |
230 | CFI_ADJUST_CFA_OFFSET -7*8 | 230 | CFI_ADJUST_CFA_OFFSET -7*8 |
231 | ret | 231 | ret |
232 | CFI_RESTORE_STATE | 232 | CFI_RESTORE_STATE |
233 | 233 | ||
234 | /* Exception handlers. Very simple, zeroing is done in the wrappers */ | 234 | /* Exception handlers. Very simple, zeroing is done in the wrappers */ |
235 | .Lbad_source: | 235 | .Lbad_source: |
236 | movq (%rsp),%rax | 236 | movq (%rsp), %rax |
237 | testq %rax,%rax | 237 | testq %rax, %rax |
238 | jz .Lende | 238 | jz .Lende |
239 | movl $-EFAULT,(%rax) | 239 | movl $-EFAULT, (%rax) |
240 | jmp .Lende | 240 | jmp .Lende |
241 | 241 | ||
242 | .Lbad_dest: | 242 | .Lbad_dest: |
243 | movq 8(%rsp),%rax | 243 | movq 8(%rsp), %rax |
244 | testq %rax,%rax | 244 | testq %rax, %rax |
245 | jz .Lende | 245 | jz .Lende |
246 | movl $-EFAULT,(%rax) | 246 | movl $-EFAULT, (%rax) |
247 | jmp .Lende | 247 | jmp .Lende |
248 | CFI_ENDPROC | 248 | CFI_ENDPROC |
249 | ENDPROC(csum_partial_copy_generic) | 249 | ENDPROC(csum_partial_copy_generic) |
diff --git a/arch/x86/lib/csum-partial_64.c b/arch/x86/lib/csum-partial_64.c index bf51144d97e1..9845371c5c36 100644 --- a/arch/x86/lib/csum-partial_64.c +++ b/arch/x86/lib/csum-partial_64.c | |||
@@ -84,7 +84,7 @@ static unsigned do_csum(const unsigned char *buff, unsigned len) | |||
84 | count64--; | 84 | count64--; |
85 | } | 85 | } |
86 | 86 | ||
87 | /* last upto 7 8byte blocks */ | 87 | /* last up to 7 8byte blocks */ |
88 | count %= 8; | 88 | count %= 8; |
89 | while (count) { | 89 | while (count) { |
90 | asm("addq %1,%0\n\t" | 90 | asm("addq %1,%0\n\t" |
diff --git a/arch/x86/lib/memmove_64.S b/arch/x86/lib/memmove_64.S new file mode 100644 index 000000000000..0ecb8433e5a8 --- /dev/null +++ b/arch/x86/lib/memmove_64.S | |||
@@ -0,0 +1,197 @@ | |||
1 | /* | ||
2 | * Normally compiler builtins are used, but sometimes the compiler calls out | ||
3 | * of line code. Based on asm-i386/string.h. | ||
4 | * | ||
5 | * This assembly file is re-written from memmove_64.c file. | ||
6 | * - Copyright 2011 Fenghua Yu <fenghua.yu@intel.com> | ||
7 | */ | ||
8 | #define _STRING_C | ||
9 | #include <linux/linkage.h> | ||
10 | #include <asm/dwarf2.h> | ||
11 | |||
12 | #undef memmove | ||
13 | |||
14 | /* | ||
15 | * Implement memmove(). This can handle overlap between src and dst. | ||
16 | * | ||
17 | * Input: | ||
18 | * rdi: dest | ||
19 | * rsi: src | ||
20 | * rdx: count | ||
21 | * | ||
22 | * Output: | ||
23 | * rax: dest | ||
24 | */ | ||
25 | ENTRY(memmove) | ||
26 | CFI_STARTPROC | ||
27 | /* Handle more 32bytes in loop */ | ||
28 | mov %rdi, %rax | ||
29 | cmp $0x20, %rdx | ||
30 | jb 1f | ||
31 | |||
32 | /* Decide forward/backward copy mode */ | ||
33 | cmp %rdi, %rsi | ||
34 | jb 2f | ||
35 | |||
36 | /* | ||
37 | * movsq instruction have many startup latency | ||
38 | * so we handle small size by general register. | ||
39 | */ | ||
40 | cmp $680, %rdx | ||
41 | jb 3f | ||
42 | /* | ||
43 | * movsq instruction is only good for aligned case. | ||
44 | */ | ||
45 | |||
46 | cmpb %dil, %sil | ||
47 | je 4f | ||
48 | 3: | ||
49 | sub $0x20, %rdx | ||
50 | /* | ||
51 | * We gobble 32byts forward in each loop. | ||
52 | */ | ||
53 | 5: | ||
54 | sub $0x20, %rdx | ||
55 | movq 0*8(%rsi), %r11 | ||
56 | movq 1*8(%rsi), %r10 | ||
57 | movq 2*8(%rsi), %r9 | ||
58 | movq 3*8(%rsi), %r8 | ||
59 | leaq 4*8(%rsi), %rsi | ||
60 | |||
61 | movq %r11, 0*8(%rdi) | ||
62 | movq %r10, 1*8(%rdi) | ||
63 | movq %r9, 2*8(%rdi) | ||
64 | movq %r8, 3*8(%rdi) | ||
65 | leaq 4*8(%rdi), %rdi | ||
66 | jae 5b | ||
67 | addq $0x20, %rdx | ||
68 | jmp 1f | ||
69 | /* | ||
70 | * Handle data forward by movsq. | ||
71 | */ | ||
72 | .p2align 4 | ||
73 | 4: | ||
74 | movq %rdx, %rcx | ||
75 | movq -8(%rsi, %rdx), %r11 | ||
76 | lea -8(%rdi, %rdx), %r10 | ||
77 | shrq $3, %rcx | ||
78 | rep movsq | ||
79 | movq %r11, (%r10) | ||
80 | jmp 13f | ||
81 | /* | ||
82 | * Handle data backward by movsq. | ||
83 | */ | ||
84 | .p2align 4 | ||
85 | 7: | ||
86 | movq %rdx, %rcx | ||
87 | movq (%rsi), %r11 | ||
88 | movq %rdi, %r10 | ||
89 | leaq -8(%rsi, %rdx), %rsi | ||
90 | leaq -8(%rdi, %rdx), %rdi | ||
91 | shrq $3, %rcx | ||
92 | std | ||
93 | rep movsq | ||
94 | cld | ||
95 | movq %r11, (%r10) | ||
96 | jmp 13f | ||
97 | |||
98 | /* | ||
99 | * Start to prepare for backward copy. | ||
100 | */ | ||
101 | .p2align 4 | ||
102 | 2: | ||
103 | cmp $680, %rdx | ||
104 | jb 6f | ||
105 | cmp %dil, %sil | ||
106 | je 7b | ||
107 | 6: | ||
108 | /* | ||
109 | * Calculate copy position to tail. | ||
110 | */ | ||
111 | addq %rdx, %rsi | ||
112 | addq %rdx, %rdi | ||
113 | subq $0x20, %rdx | ||
114 | /* | ||
115 | * We gobble 32byts backward in each loop. | ||
116 | */ | ||
117 | 8: | ||
118 | subq $0x20, %rdx | ||
119 | movq -1*8(%rsi), %r11 | ||
120 | movq -2*8(%rsi), %r10 | ||
121 | movq -3*8(%rsi), %r9 | ||
122 | movq -4*8(%rsi), %r8 | ||
123 | leaq -4*8(%rsi), %rsi | ||
124 | |||
125 | movq %r11, -1*8(%rdi) | ||
126 | movq %r10, -2*8(%rdi) | ||
127 | movq %r9, -3*8(%rdi) | ||
128 | movq %r8, -4*8(%rdi) | ||
129 | leaq -4*8(%rdi), %rdi | ||
130 | jae 8b | ||
131 | /* | ||
132 | * Calculate copy position to head. | ||
133 | */ | ||
134 | addq $0x20, %rdx | ||
135 | subq %rdx, %rsi | ||
136 | subq %rdx, %rdi | ||
137 | 1: | ||
138 | cmpq $16, %rdx | ||
139 | jb 9f | ||
140 | /* | ||
141 | * Move data from 16 bytes to 31 bytes. | ||
142 | */ | ||
143 | movq 0*8(%rsi), %r11 | ||
144 | movq 1*8(%rsi), %r10 | ||
145 | movq -2*8(%rsi, %rdx), %r9 | ||
146 | movq -1*8(%rsi, %rdx), %r8 | ||
147 | movq %r11, 0*8(%rdi) | ||
148 | movq %r10, 1*8(%rdi) | ||
149 | movq %r9, -2*8(%rdi, %rdx) | ||
150 | movq %r8, -1*8(%rdi, %rdx) | ||
151 | jmp 13f | ||
152 | .p2align 4 | ||
153 | 9: | ||
154 | cmpq $8, %rdx | ||
155 | jb 10f | ||
156 | /* | ||
157 | * Move data from 8 bytes to 15 bytes. | ||
158 | */ | ||
159 | movq 0*8(%rsi), %r11 | ||
160 | movq -1*8(%rsi, %rdx), %r10 | ||
161 | movq %r11, 0*8(%rdi) | ||
162 | movq %r10, -1*8(%rdi, %rdx) | ||
163 | jmp 13f | ||
164 | 10: | ||
165 | cmpq $4, %rdx | ||
166 | jb 11f | ||
167 | /* | ||
168 | * Move data from 4 bytes to 7 bytes. | ||
169 | */ | ||
170 | movl (%rsi), %r11d | ||
171 | movl -4(%rsi, %rdx), %r10d | ||
172 | movl %r11d, (%rdi) | ||
173 | movl %r10d, -4(%rdi, %rdx) | ||
174 | jmp 13f | ||
175 | 11: | ||
176 | cmp $2, %rdx | ||
177 | jb 12f | ||
178 | /* | ||
179 | * Move data from 2 bytes to 3 bytes. | ||
180 | */ | ||
181 | movw (%rsi), %r11w | ||
182 | movw -2(%rsi, %rdx), %r10w | ||
183 | movw %r11w, (%rdi) | ||
184 | movw %r10w, -2(%rdi, %rdx) | ||
185 | jmp 13f | ||
186 | 12: | ||
187 | cmp $1, %rdx | ||
188 | jb 13f | ||
189 | /* | ||
190 | * Move data for 1 byte. | ||
191 | */ | ||
192 | movb (%rsi), %r11b | ||
193 | movb %r11b, (%rdi) | ||
194 | 13: | ||
195 | retq | ||
196 | CFI_ENDPROC | ||
197 | ENDPROC(memmove) | ||
diff --git a/arch/x86/lib/memmove_64.c b/arch/x86/lib/memmove_64.c deleted file mode 100644 index 6d0f0ec41b34..000000000000 --- a/arch/x86/lib/memmove_64.c +++ /dev/null | |||
@@ -1,192 +0,0 @@ | |||
1 | /* Normally compiler builtins are used, but sometimes the compiler calls out | ||
2 | of line code. Based on asm-i386/string.h. | ||
3 | */ | ||
4 | #define _STRING_C | ||
5 | #include <linux/string.h> | ||
6 | #include <linux/module.h> | ||
7 | |||
8 | #undef memmove | ||
9 | void *memmove(void *dest, const void *src, size_t count) | ||
10 | { | ||
11 | unsigned long d0,d1,d2,d3,d4,d5,d6,d7; | ||
12 | char *ret; | ||
13 | |||
14 | __asm__ __volatile__( | ||
15 | /* Handle more 32bytes in loop */ | ||
16 | "mov %2, %3\n\t" | ||
17 | "cmp $0x20, %0\n\t" | ||
18 | "jb 1f\n\t" | ||
19 | |||
20 | /* Decide forward/backward copy mode */ | ||
21 | "cmp %2, %1\n\t" | ||
22 | "jb 2f\n\t" | ||
23 | |||
24 | /* | ||
25 | * movsq instruction have many startup latency | ||
26 | * so we handle small size by general register. | ||
27 | */ | ||
28 | "cmp $680, %0\n\t" | ||
29 | "jb 3f\n\t" | ||
30 | /* | ||
31 | * movsq instruction is only good for aligned case. | ||
32 | */ | ||
33 | "cmpb %%dil, %%sil\n\t" | ||
34 | "je 4f\n\t" | ||
35 | "3:\n\t" | ||
36 | "sub $0x20, %0\n\t" | ||
37 | /* | ||
38 | * We gobble 32byts forward in each loop. | ||
39 | */ | ||
40 | "5:\n\t" | ||
41 | "sub $0x20, %0\n\t" | ||
42 | "movq 0*8(%1), %4\n\t" | ||
43 | "movq 1*8(%1), %5\n\t" | ||
44 | "movq 2*8(%1), %6\n\t" | ||
45 | "movq 3*8(%1), %7\n\t" | ||
46 | "leaq 4*8(%1), %1\n\t" | ||
47 | |||
48 | "movq %4, 0*8(%2)\n\t" | ||
49 | "movq %5, 1*8(%2)\n\t" | ||
50 | "movq %6, 2*8(%2)\n\t" | ||
51 | "movq %7, 3*8(%2)\n\t" | ||
52 | "leaq 4*8(%2), %2\n\t" | ||
53 | "jae 5b\n\t" | ||
54 | "addq $0x20, %0\n\t" | ||
55 | "jmp 1f\n\t" | ||
56 | /* | ||
57 | * Handle data forward by movsq. | ||
58 | */ | ||
59 | ".p2align 4\n\t" | ||
60 | "4:\n\t" | ||
61 | "movq %0, %8\n\t" | ||
62 | "movq -8(%1, %0), %4\n\t" | ||
63 | "lea -8(%2, %0), %5\n\t" | ||
64 | "shrq $3, %8\n\t" | ||
65 | "rep movsq\n\t" | ||
66 | "movq %4, (%5)\n\t" | ||
67 | "jmp 13f\n\t" | ||
68 | /* | ||
69 | * Handle data backward by movsq. | ||
70 | */ | ||
71 | ".p2align 4\n\t" | ||
72 | "7:\n\t" | ||
73 | "movq %0, %8\n\t" | ||
74 | "movq (%1), %4\n\t" | ||
75 | "movq %2, %5\n\t" | ||
76 | "leaq -8(%1, %0), %1\n\t" | ||
77 | "leaq -8(%2, %0), %2\n\t" | ||
78 | "shrq $3, %8\n\t" | ||
79 | "std\n\t" | ||
80 | "rep movsq\n\t" | ||
81 | "cld\n\t" | ||
82 | "movq %4, (%5)\n\t" | ||
83 | "jmp 13f\n\t" | ||
84 | |||
85 | /* | ||
86 | * Start to prepare for backward copy. | ||
87 | */ | ||
88 | ".p2align 4\n\t" | ||
89 | "2:\n\t" | ||
90 | "cmp $680, %0\n\t" | ||
91 | "jb 6f \n\t" | ||
92 | "cmp %%dil, %%sil\n\t" | ||
93 | "je 7b \n\t" | ||
94 | "6:\n\t" | ||
95 | /* | ||
96 | * Calculate copy position to tail. | ||
97 | */ | ||
98 | "addq %0, %1\n\t" | ||
99 | "addq %0, %2\n\t" | ||
100 | "subq $0x20, %0\n\t" | ||
101 | /* | ||
102 | * We gobble 32byts backward in each loop. | ||
103 | */ | ||
104 | "8:\n\t" | ||
105 | "subq $0x20, %0\n\t" | ||
106 | "movq -1*8(%1), %4\n\t" | ||
107 | "movq -2*8(%1), %5\n\t" | ||
108 | "movq -3*8(%1), %6\n\t" | ||
109 | "movq -4*8(%1), %7\n\t" | ||
110 | "leaq -4*8(%1), %1\n\t" | ||
111 | |||
112 | "movq %4, -1*8(%2)\n\t" | ||
113 | "movq %5, -2*8(%2)\n\t" | ||
114 | "movq %6, -3*8(%2)\n\t" | ||
115 | "movq %7, -4*8(%2)\n\t" | ||
116 | "leaq -4*8(%2), %2\n\t" | ||
117 | "jae 8b\n\t" | ||
118 | /* | ||
119 | * Calculate copy position to head. | ||
120 | */ | ||
121 | "addq $0x20, %0\n\t" | ||
122 | "subq %0, %1\n\t" | ||
123 | "subq %0, %2\n\t" | ||
124 | "1:\n\t" | ||
125 | "cmpq $16, %0\n\t" | ||
126 | "jb 9f\n\t" | ||
127 | /* | ||
128 | * Move data from 16 bytes to 31 bytes. | ||
129 | */ | ||
130 | "movq 0*8(%1), %4\n\t" | ||
131 | "movq 1*8(%1), %5\n\t" | ||
132 | "movq -2*8(%1, %0), %6\n\t" | ||
133 | "movq -1*8(%1, %0), %7\n\t" | ||
134 | "movq %4, 0*8(%2)\n\t" | ||
135 | "movq %5, 1*8(%2)\n\t" | ||
136 | "movq %6, -2*8(%2, %0)\n\t" | ||
137 | "movq %7, -1*8(%2, %0)\n\t" | ||
138 | "jmp 13f\n\t" | ||
139 | ".p2align 4\n\t" | ||
140 | "9:\n\t" | ||
141 | "cmpq $8, %0\n\t" | ||
142 | "jb 10f\n\t" | ||
143 | /* | ||
144 | * Move data from 8 bytes to 15 bytes. | ||
145 | */ | ||
146 | "movq 0*8(%1), %4\n\t" | ||
147 | "movq -1*8(%1, %0), %5\n\t" | ||
148 | "movq %4, 0*8(%2)\n\t" | ||
149 | "movq %5, -1*8(%2, %0)\n\t" | ||
150 | "jmp 13f\n\t" | ||
151 | "10:\n\t" | ||
152 | "cmpq $4, %0\n\t" | ||
153 | "jb 11f\n\t" | ||
154 | /* | ||
155 | * Move data from 4 bytes to 7 bytes. | ||
156 | */ | ||
157 | "movl (%1), %4d\n\t" | ||
158 | "movl -4(%1, %0), %5d\n\t" | ||
159 | "movl %4d, (%2)\n\t" | ||
160 | "movl %5d, -4(%2, %0)\n\t" | ||
161 | "jmp 13f\n\t" | ||
162 | "11:\n\t" | ||
163 | "cmp $2, %0\n\t" | ||
164 | "jb 12f\n\t" | ||
165 | /* | ||
166 | * Move data from 2 bytes to 3 bytes. | ||
167 | */ | ||
168 | "movw (%1), %4w\n\t" | ||
169 | "movw -2(%1, %0), %5w\n\t" | ||
170 | "movw %4w, (%2)\n\t" | ||
171 | "movw %5w, -2(%2, %0)\n\t" | ||
172 | "jmp 13f\n\t" | ||
173 | "12:\n\t" | ||
174 | "cmp $1, %0\n\t" | ||
175 | "jb 13f\n\t" | ||
176 | /* | ||
177 | * Move data for 1 byte. | ||
178 | */ | ||
179 | "movb (%1), %4b\n\t" | ||
180 | "movb %4b, (%2)\n\t" | ||
181 | "13:\n\t" | ||
182 | : "=&d" (d0), "=&S" (d1), "=&D" (d2), "=&a" (ret) , | ||
183 | "=r"(d3), "=r"(d4), "=r"(d5), "=r"(d6), "=&c" (d7) | ||
184 | :"0" (count), | ||
185 | "1" (src), | ||
186 | "2" (dest) | ||
187 | :"memory"); | ||
188 | |||
189 | return ret; | ||
190 | |||
191 | } | ||
192 | EXPORT_SYMBOL(memmove); | ||
diff --git a/arch/x86/lib/rwsem_64.S b/arch/x86/lib/rwsem_64.S index 41fcf00e49df..67743977398b 100644 --- a/arch/x86/lib/rwsem_64.S +++ b/arch/x86/lib/rwsem_64.S | |||
@@ -23,43 +23,50 @@ | |||
23 | #include <asm/dwarf2.h> | 23 | #include <asm/dwarf2.h> |
24 | 24 | ||
25 | #define save_common_regs \ | 25 | #define save_common_regs \ |
26 | pushq %rdi; \ | 26 | pushq_cfi %rdi; CFI_REL_OFFSET rdi, 0; \ |
27 | pushq %rsi; \ | 27 | pushq_cfi %rsi; CFI_REL_OFFSET rsi, 0; \ |
28 | pushq %rcx; \ | 28 | pushq_cfi %rcx; CFI_REL_OFFSET rcx, 0; \ |
29 | pushq %r8; \ | 29 | pushq_cfi %r8; CFI_REL_OFFSET r8, 0; \ |
30 | pushq %r9; \ | 30 | pushq_cfi %r9; CFI_REL_OFFSET r9, 0; \ |
31 | pushq %r10; \ | 31 | pushq_cfi %r10; CFI_REL_OFFSET r10, 0; \ |
32 | pushq %r11 | 32 | pushq_cfi %r11; CFI_REL_OFFSET r11, 0 |
33 | 33 | ||
34 | #define restore_common_regs \ | 34 | #define restore_common_regs \ |
35 | popq %r11; \ | 35 | popq_cfi %r11; CFI_RESTORE r11; \ |
36 | popq %r10; \ | 36 | popq_cfi %r10; CFI_RESTORE r10; \ |
37 | popq %r9; \ | 37 | popq_cfi %r9; CFI_RESTORE r9; \ |
38 | popq %r8; \ | 38 | popq_cfi %r8; CFI_RESTORE r8; \ |
39 | popq %rcx; \ | 39 | popq_cfi %rcx; CFI_RESTORE rcx; \ |
40 | popq %rsi; \ | 40 | popq_cfi %rsi; CFI_RESTORE rsi; \ |
41 | popq %rdi | 41 | popq_cfi %rdi; CFI_RESTORE rdi |
42 | 42 | ||
43 | /* Fix up special calling conventions */ | 43 | /* Fix up special calling conventions */ |
44 | ENTRY(call_rwsem_down_read_failed) | 44 | ENTRY(call_rwsem_down_read_failed) |
45 | CFI_STARTPROC | ||
45 | save_common_regs | 46 | save_common_regs |
46 | pushq %rdx | 47 | pushq_cfi %rdx |
48 | CFI_REL_OFFSET rdx, 0 | ||
47 | movq %rax,%rdi | 49 | movq %rax,%rdi |
48 | call rwsem_down_read_failed | 50 | call rwsem_down_read_failed |
49 | popq %rdx | 51 | popq_cfi %rdx |
52 | CFI_RESTORE rdx | ||
50 | restore_common_regs | 53 | restore_common_regs |
51 | ret | 54 | ret |
52 | ENDPROC(call_rwsem_down_read_failed) | 55 | CFI_ENDPROC |
56 | ENDPROC(call_rwsem_down_read_failed) | ||
53 | 57 | ||
54 | ENTRY(call_rwsem_down_write_failed) | 58 | ENTRY(call_rwsem_down_write_failed) |
59 | CFI_STARTPROC | ||
55 | save_common_regs | 60 | save_common_regs |
56 | movq %rax,%rdi | 61 | movq %rax,%rdi |
57 | call rwsem_down_write_failed | 62 | call rwsem_down_write_failed |
58 | restore_common_regs | 63 | restore_common_regs |
59 | ret | 64 | ret |
60 | ENDPROC(call_rwsem_down_write_failed) | 65 | CFI_ENDPROC |
66 | ENDPROC(call_rwsem_down_write_failed) | ||
61 | 67 | ||
62 | ENTRY(call_rwsem_wake) | 68 | ENTRY(call_rwsem_wake) |
69 | CFI_STARTPROC | ||
63 | decl %edx /* do nothing if still outstanding active readers */ | 70 | decl %edx /* do nothing if still outstanding active readers */ |
64 | jnz 1f | 71 | jnz 1f |
65 | save_common_regs | 72 | save_common_regs |
@@ -67,15 +74,20 @@ ENTRY(call_rwsem_wake) | |||
67 | call rwsem_wake | 74 | call rwsem_wake |
68 | restore_common_regs | 75 | restore_common_regs |
69 | 1: ret | 76 | 1: ret |
70 | ENDPROC(call_rwsem_wake) | 77 | CFI_ENDPROC |
78 | ENDPROC(call_rwsem_wake) | ||
71 | 79 | ||
72 | /* Fix up special calling conventions */ | 80 | /* Fix up special calling conventions */ |
73 | ENTRY(call_rwsem_downgrade_wake) | 81 | ENTRY(call_rwsem_downgrade_wake) |
82 | CFI_STARTPROC | ||
74 | save_common_regs | 83 | save_common_regs |
75 | pushq %rdx | 84 | pushq_cfi %rdx |
85 | CFI_REL_OFFSET rdx, 0 | ||
76 | movq %rax,%rdi | 86 | movq %rax,%rdi |
77 | call rwsem_downgrade_wake | 87 | call rwsem_downgrade_wake |
78 | popq %rdx | 88 | popq_cfi %rdx |
89 | CFI_RESTORE rdx | ||
79 | restore_common_regs | 90 | restore_common_regs |
80 | ret | 91 | ret |
81 | ENDPROC(call_rwsem_downgrade_wake) | 92 | CFI_ENDPROC |
93 | ENDPROC(call_rwsem_downgrade_wake) | ||
diff --git a/arch/x86/lib/semaphore_32.S b/arch/x86/lib/semaphore_32.S index 648fe4741782..06691daa4108 100644 --- a/arch/x86/lib/semaphore_32.S +++ b/arch/x86/lib/semaphore_32.S | |||
@@ -36,7 +36,7 @@ | |||
36 | */ | 36 | */ |
37 | #ifdef CONFIG_SMP | 37 | #ifdef CONFIG_SMP |
38 | ENTRY(__write_lock_failed) | 38 | ENTRY(__write_lock_failed) |
39 | CFI_STARTPROC simple | 39 | CFI_STARTPROC |
40 | FRAME | 40 | FRAME |
41 | 2: LOCK_PREFIX | 41 | 2: LOCK_PREFIX |
42 | addl $ RW_LOCK_BIAS,(%eax) | 42 | addl $ RW_LOCK_BIAS,(%eax) |
@@ -74,29 +74,23 @@ ENTRY(__read_lock_failed) | |||
74 | /* Fix up special calling conventions */ | 74 | /* Fix up special calling conventions */ |
75 | ENTRY(call_rwsem_down_read_failed) | 75 | ENTRY(call_rwsem_down_read_failed) |
76 | CFI_STARTPROC | 76 | CFI_STARTPROC |
77 | push %ecx | 77 | pushl_cfi %ecx |
78 | CFI_ADJUST_CFA_OFFSET 4 | ||
79 | CFI_REL_OFFSET ecx,0 | 78 | CFI_REL_OFFSET ecx,0 |
80 | push %edx | 79 | pushl_cfi %edx |
81 | CFI_ADJUST_CFA_OFFSET 4 | ||
82 | CFI_REL_OFFSET edx,0 | 80 | CFI_REL_OFFSET edx,0 |
83 | call rwsem_down_read_failed | 81 | call rwsem_down_read_failed |
84 | pop %edx | 82 | popl_cfi %edx |
85 | CFI_ADJUST_CFA_OFFSET -4 | 83 | popl_cfi %ecx |
86 | pop %ecx | ||
87 | CFI_ADJUST_CFA_OFFSET -4 | ||
88 | ret | 84 | ret |
89 | CFI_ENDPROC | 85 | CFI_ENDPROC |
90 | ENDPROC(call_rwsem_down_read_failed) | 86 | ENDPROC(call_rwsem_down_read_failed) |
91 | 87 | ||
92 | ENTRY(call_rwsem_down_write_failed) | 88 | ENTRY(call_rwsem_down_write_failed) |
93 | CFI_STARTPROC | 89 | CFI_STARTPROC |
94 | push %ecx | 90 | pushl_cfi %ecx |
95 | CFI_ADJUST_CFA_OFFSET 4 | ||
96 | CFI_REL_OFFSET ecx,0 | 91 | CFI_REL_OFFSET ecx,0 |
97 | calll rwsem_down_write_failed | 92 | calll rwsem_down_write_failed |
98 | pop %ecx | 93 | popl_cfi %ecx |
99 | CFI_ADJUST_CFA_OFFSET -4 | ||
100 | ret | 94 | ret |
101 | CFI_ENDPROC | 95 | CFI_ENDPROC |
102 | ENDPROC(call_rwsem_down_write_failed) | 96 | ENDPROC(call_rwsem_down_write_failed) |
@@ -105,12 +99,10 @@ ENTRY(call_rwsem_wake) | |||
105 | CFI_STARTPROC | 99 | CFI_STARTPROC |
106 | decw %dx /* do nothing if still outstanding active readers */ | 100 | decw %dx /* do nothing if still outstanding active readers */ |
107 | jnz 1f | 101 | jnz 1f |
108 | push %ecx | 102 | pushl_cfi %ecx |
109 | CFI_ADJUST_CFA_OFFSET 4 | ||
110 | CFI_REL_OFFSET ecx,0 | 103 | CFI_REL_OFFSET ecx,0 |
111 | call rwsem_wake | 104 | call rwsem_wake |
112 | pop %ecx | 105 | popl_cfi %ecx |
113 | CFI_ADJUST_CFA_OFFSET -4 | ||
114 | 1: ret | 106 | 1: ret |
115 | CFI_ENDPROC | 107 | CFI_ENDPROC |
116 | ENDPROC(call_rwsem_wake) | 108 | ENDPROC(call_rwsem_wake) |
@@ -118,17 +110,13 @@ ENTRY(call_rwsem_wake) | |||
118 | /* Fix up special calling conventions */ | 110 | /* Fix up special calling conventions */ |
119 | ENTRY(call_rwsem_downgrade_wake) | 111 | ENTRY(call_rwsem_downgrade_wake) |
120 | CFI_STARTPROC | 112 | CFI_STARTPROC |
121 | push %ecx | 113 | pushl_cfi %ecx |
122 | CFI_ADJUST_CFA_OFFSET 4 | ||
123 | CFI_REL_OFFSET ecx,0 | 114 | CFI_REL_OFFSET ecx,0 |
124 | push %edx | 115 | pushl_cfi %edx |
125 | CFI_ADJUST_CFA_OFFSET 4 | ||
126 | CFI_REL_OFFSET edx,0 | 116 | CFI_REL_OFFSET edx,0 |
127 | call rwsem_downgrade_wake | 117 | call rwsem_downgrade_wake |
128 | pop %edx | 118 | popl_cfi %edx |
129 | CFI_ADJUST_CFA_OFFSET -4 | 119 | popl_cfi %ecx |
130 | pop %ecx | ||
131 | CFI_ADJUST_CFA_OFFSET -4 | ||
132 | ret | 120 | ret |
133 | CFI_ENDPROC | 121 | CFI_ENDPROC |
134 | ENDPROC(call_rwsem_downgrade_wake) | 122 | ENDPROC(call_rwsem_downgrade_wake) |
diff --git a/arch/x86/lib/thunk_32.S b/arch/x86/lib/thunk_32.S index 650b11e00ecc..2930ae05d773 100644 --- a/arch/x86/lib/thunk_32.S +++ b/arch/x86/lib/thunk_32.S | |||
@@ -7,24 +7,6 @@ | |||
7 | 7 | ||
8 | #include <linux/linkage.h> | 8 | #include <linux/linkage.h> |
9 | 9 | ||
10 | #define ARCH_TRACE_IRQS_ON \ | ||
11 | pushl %eax; \ | ||
12 | pushl %ecx; \ | ||
13 | pushl %edx; \ | ||
14 | call trace_hardirqs_on; \ | ||
15 | popl %edx; \ | ||
16 | popl %ecx; \ | ||
17 | popl %eax; | ||
18 | |||
19 | #define ARCH_TRACE_IRQS_OFF \ | ||
20 | pushl %eax; \ | ||
21 | pushl %ecx; \ | ||
22 | pushl %edx; \ | ||
23 | call trace_hardirqs_off; \ | ||
24 | popl %edx; \ | ||
25 | popl %ecx; \ | ||
26 | popl %eax; | ||
27 | |||
28 | #ifdef CONFIG_TRACE_IRQFLAGS | 10 | #ifdef CONFIG_TRACE_IRQFLAGS |
29 | /* put return address in eax (arg1) */ | 11 | /* put return address in eax (arg1) */ |
30 | .macro thunk_ra name,func | 12 | .macro thunk_ra name,func |
diff --git a/arch/x86/lib/thunk_64.S b/arch/x86/lib/thunk_64.S index bf9a7d5a5428..782b082c9ff7 100644 --- a/arch/x86/lib/thunk_64.S +++ b/arch/x86/lib/thunk_64.S | |||
@@ -22,26 +22,6 @@ | |||
22 | CFI_ENDPROC | 22 | CFI_ENDPROC |
23 | .endm | 23 | .endm |
24 | 24 | ||
25 | /* rdi: arg1 ... normal C conventions. rax is passed from C. */ | ||
26 | .macro thunk_retrax name,func | ||
27 | .globl \name | ||
28 | \name: | ||
29 | CFI_STARTPROC | ||
30 | SAVE_ARGS | ||
31 | call \func | ||
32 | jmp restore_norax | ||
33 | CFI_ENDPROC | ||
34 | .endm | ||
35 | |||
36 | |||
37 | .section .sched.text, "ax" | ||
38 | #ifdef CONFIG_RWSEM_XCHGADD_ALGORITHM | ||
39 | thunk rwsem_down_read_failed_thunk,rwsem_down_read_failed | ||
40 | thunk rwsem_down_write_failed_thunk,rwsem_down_write_failed | ||
41 | thunk rwsem_wake_thunk,rwsem_wake | ||
42 | thunk rwsem_downgrade_thunk,rwsem_downgrade_wake | ||
43 | #endif | ||
44 | |||
45 | #ifdef CONFIG_TRACE_IRQFLAGS | 25 | #ifdef CONFIG_TRACE_IRQFLAGS |
46 | /* put return address in rdi (arg1) */ | 26 | /* put return address in rdi (arg1) */ |
47 | .macro thunk_ra name,func | 27 | .macro thunk_ra name,func |
@@ -72,10 +52,3 @@ restore: | |||
72 | RESTORE_ARGS | 52 | RESTORE_ARGS |
73 | ret | 53 | ret |
74 | CFI_ENDPROC | 54 | CFI_ENDPROC |
75 | |||
76 | CFI_STARTPROC | ||
77 | SAVE_ARGS | ||
78 | restore_norax: | ||
79 | RESTORE_ARGS 1 | ||
80 | ret | ||
81 | CFI_ENDPROC | ||
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index 09df2f9a3d69..3e608edf9958 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile | |||
@@ -25,6 +25,7 @@ obj-$(CONFIG_MMIOTRACE_TEST) += testmmiotrace.o | |||
25 | obj-$(CONFIG_NUMA) += numa.o numa_$(BITS).o | 25 | obj-$(CONFIG_NUMA) += numa.o numa_$(BITS).o |
26 | obj-$(CONFIG_AMD_NUMA) += amdtopology_64.o | 26 | obj-$(CONFIG_AMD_NUMA) += amdtopology_64.o |
27 | obj-$(CONFIG_ACPI_NUMA) += srat_$(BITS).o | 27 | obj-$(CONFIG_ACPI_NUMA) += srat_$(BITS).o |
28 | obj-$(CONFIG_NUMA_EMU) += numa_emulation.o | ||
28 | 29 | ||
29 | obj-$(CONFIG_HAVE_MEMBLOCK) += memblock.o | 30 | obj-$(CONFIG_HAVE_MEMBLOCK) += memblock.o |
30 | 31 | ||
diff --git a/arch/x86/mm/amdtopology_64.c b/arch/x86/mm/amdtopology_64.c index f21962c435ed..0919c26820d4 100644 --- a/arch/x86/mm/amdtopology_64.c +++ b/arch/x86/mm/amdtopology_64.c | |||
@@ -26,9 +26,7 @@ | |||
26 | #include <asm/apic.h> | 26 | #include <asm/apic.h> |
27 | #include <asm/amd_nb.h> | 27 | #include <asm/amd_nb.h> |
28 | 28 | ||
29 | static struct bootnode __initdata nodes[8]; | ||
30 | static unsigned char __initdata nodeids[8]; | 29 | static unsigned char __initdata nodeids[8]; |
31 | static nodemask_t __initdata nodes_parsed = NODE_MASK_NONE; | ||
32 | 30 | ||
33 | static __init int find_northbridge(void) | 31 | static __init int find_northbridge(void) |
34 | { | 32 | { |
@@ -51,7 +49,7 @@ static __init int find_northbridge(void) | |||
51 | return num; | 49 | return num; |
52 | } | 50 | } |
53 | 51 | ||
54 | return -1; | 52 | return -ENOENT; |
55 | } | 53 | } |
56 | 54 | ||
57 | static __init void early_get_boot_cpu_id(void) | 55 | static __init void early_get_boot_cpu_id(void) |
@@ -69,17 +67,18 @@ static __init void early_get_boot_cpu_id(void) | |||
69 | #endif | 67 | #endif |
70 | } | 68 | } |
71 | 69 | ||
72 | int __init amd_numa_init(unsigned long start_pfn, unsigned long end_pfn) | 70 | int __init amd_numa_init(void) |
73 | { | 71 | { |
74 | unsigned long start = PFN_PHYS(start_pfn); | 72 | unsigned long start = PFN_PHYS(0); |
75 | unsigned long end = PFN_PHYS(end_pfn); | 73 | unsigned long end = PFN_PHYS(max_pfn); |
76 | unsigned numnodes; | 74 | unsigned numnodes; |
77 | unsigned long prevbase; | 75 | unsigned long prevbase; |
78 | int i, nb, found = 0; | 76 | int i, j, nb; |
79 | u32 nodeid, reg; | 77 | u32 nodeid, reg; |
78 | unsigned int bits, cores, apicid_base; | ||
80 | 79 | ||
81 | if (!early_pci_allowed()) | 80 | if (!early_pci_allowed()) |
82 | return -1; | 81 | return -EINVAL; |
83 | 82 | ||
84 | nb = find_northbridge(); | 83 | nb = find_northbridge(); |
85 | if (nb < 0) | 84 | if (nb < 0) |
@@ -90,7 +89,7 @@ int __init amd_numa_init(unsigned long start_pfn, unsigned long end_pfn) | |||
90 | reg = read_pci_config(0, nb, 0, 0x60); | 89 | reg = read_pci_config(0, nb, 0, 0x60); |
91 | numnodes = ((reg >> 4) & 0xF) + 1; | 90 | numnodes = ((reg >> 4) & 0xF) + 1; |
92 | if (numnodes <= 1) | 91 | if (numnodes <= 1) |
93 | return -1; | 92 | return -ENOENT; |
94 | 93 | ||
95 | pr_info("Number of physical nodes %d\n", numnodes); | 94 | pr_info("Number of physical nodes %d\n", numnodes); |
96 | 95 | ||
@@ -121,9 +120,9 @@ int __init amd_numa_init(unsigned long start_pfn, unsigned long end_pfn) | |||
121 | if ((base >> 8) & 3 || (limit >> 8) & 3) { | 120 | if ((base >> 8) & 3 || (limit >> 8) & 3) { |
122 | pr_err("Node %d using interleaving mode %lx/%lx\n", | 121 | pr_err("Node %d using interleaving mode %lx/%lx\n", |
123 | nodeid, (base >> 8) & 3, (limit >> 8) & 3); | 122 | nodeid, (base >> 8) & 3, (limit >> 8) & 3); |
124 | return -1; | 123 | return -EINVAL; |
125 | } | 124 | } |
126 | if (node_isset(nodeid, nodes_parsed)) { | 125 | if (node_isset(nodeid, numa_nodes_parsed)) { |
127 | pr_info("Node %d already present, skipping\n", | 126 | pr_info("Node %d already present, skipping\n", |
128 | nodeid); | 127 | nodeid); |
129 | continue; | 128 | continue; |
@@ -160,117 +159,28 @@ int __init amd_numa_init(unsigned long start_pfn, unsigned long end_pfn) | |||
160 | if (prevbase > base) { | 159 | if (prevbase > base) { |
161 | pr_err("Node map not sorted %lx,%lx\n", | 160 | pr_err("Node map not sorted %lx,%lx\n", |
162 | prevbase, base); | 161 | prevbase, base); |
163 | return -1; | 162 | return -EINVAL; |
164 | } | 163 | } |
165 | 164 | ||
166 | pr_info("Node %d MemBase %016lx Limit %016lx\n", | 165 | pr_info("Node %d MemBase %016lx Limit %016lx\n", |
167 | nodeid, base, limit); | 166 | nodeid, base, limit); |
168 | 167 | ||
169 | found++; | ||
170 | |||
171 | nodes[nodeid].start = base; | ||
172 | nodes[nodeid].end = limit; | ||
173 | |||
174 | prevbase = base; | 168 | prevbase = base; |
175 | 169 | numa_add_memblk(nodeid, base, limit); | |
176 | node_set(nodeid, nodes_parsed); | 170 | node_set(nodeid, numa_nodes_parsed); |
177 | } | ||
178 | |||
179 | if (!found) | ||
180 | return -1; | ||
181 | return 0; | ||
182 | } | ||
183 | |||
184 | #ifdef CONFIG_NUMA_EMU | ||
185 | static s16 fake_apicid_to_node[MAX_LOCAL_APIC] __initdata = { | ||
186 | [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE | ||
187 | }; | ||
188 | |||
189 | void __init amd_get_nodes(struct bootnode *physnodes) | ||
190 | { | ||
191 | int i; | ||
192 | |||
193 | for_each_node_mask(i, nodes_parsed) { | ||
194 | physnodes[i].start = nodes[i].start; | ||
195 | physnodes[i].end = nodes[i].end; | ||
196 | } | 171 | } |
197 | } | ||
198 | |||
199 | static int __init find_node_by_addr(unsigned long addr) | ||
200 | { | ||
201 | int ret = NUMA_NO_NODE; | ||
202 | int i; | ||
203 | |||
204 | for (i = 0; i < 8; i++) | ||
205 | if (addr >= nodes[i].start && addr < nodes[i].end) { | ||
206 | ret = i; | ||
207 | break; | ||
208 | } | ||
209 | return ret; | ||
210 | } | ||
211 | 172 | ||
212 | /* | 173 | if (!nodes_weight(numa_nodes_parsed)) |
213 | * For NUMA emulation, fake proximity domain (_PXM) to node id mappings must be | 174 | return -ENOENT; |
214 | * setup to represent the physical topology but reflect the emulated | ||
215 | * environment. For each emulated node, the real node which it appears on is | ||
216 | * found and a fake pxm to nid mapping is created which mirrors the actual | ||
217 | * locality. node_distance() then represents the correct distances between | ||
218 | * emulated nodes by using the fake acpi mappings to pxms. | ||
219 | */ | ||
220 | void __init amd_fake_nodes(const struct bootnode *nodes, int nr_nodes) | ||
221 | { | ||
222 | unsigned int bits; | ||
223 | unsigned int cores; | ||
224 | unsigned int apicid_base = 0; | ||
225 | int i; | ||
226 | 175 | ||
176 | /* | ||
177 | * We seem to have valid NUMA configuration. Map apicids to nodes | ||
178 | * using the coreid bits from early_identify_cpu. | ||
179 | */ | ||
227 | bits = boot_cpu_data.x86_coreid_bits; | 180 | bits = boot_cpu_data.x86_coreid_bits; |
228 | cores = 1 << bits; | 181 | cores = 1 << bits; |
229 | early_get_boot_cpu_id(); | ||
230 | if (boot_cpu_physical_apicid > 0) | ||
231 | apicid_base = boot_cpu_physical_apicid; | ||
232 | |||
233 | for (i = 0; i < nr_nodes; i++) { | ||
234 | int index; | ||
235 | int nid; | ||
236 | int j; | ||
237 | |||
238 | nid = find_node_by_addr(nodes[i].start); | ||
239 | if (nid == NUMA_NO_NODE) | ||
240 | continue; | ||
241 | |||
242 | index = nodeids[nid] << bits; | ||
243 | if (fake_apicid_to_node[index + apicid_base] == NUMA_NO_NODE) | ||
244 | for (j = apicid_base; j < cores + apicid_base; j++) | ||
245 | fake_apicid_to_node[index + j] = i; | ||
246 | #ifdef CONFIG_ACPI_NUMA | ||
247 | __acpi_map_pxm_to_node(nid, i); | ||
248 | #endif | ||
249 | } | ||
250 | memcpy(apicid_to_node, fake_apicid_to_node, sizeof(apicid_to_node)); | ||
251 | } | ||
252 | #endif /* CONFIG_NUMA_EMU */ | ||
253 | |||
254 | int __init amd_scan_nodes(void) | ||
255 | { | ||
256 | unsigned int bits; | ||
257 | unsigned int cores; | ||
258 | unsigned int apicid_base; | ||
259 | int i; | ||
260 | |||
261 | BUG_ON(nodes_empty(nodes_parsed)); | ||
262 | node_possible_map = nodes_parsed; | ||
263 | memnode_shift = compute_hash_shift(nodes, 8, NULL); | ||
264 | if (memnode_shift < 0) { | ||
265 | pr_err("No NUMA node hash function found. Contact maintainer\n"); | ||
266 | return -1; | ||
267 | } | ||
268 | pr_info("Using node hash shift of %d\n", memnode_shift); | ||
269 | |||
270 | /* use the coreid bits from early_identify_cpu */ | ||
271 | bits = boot_cpu_data.x86_coreid_bits; | ||
272 | cores = (1<<bits); | ||
273 | apicid_base = 0; | 182 | apicid_base = 0; |
183 | |||
274 | /* get the APIC ID of the BSP early for systems with apicid lifting */ | 184 | /* get the APIC ID of the BSP early for systems with apicid lifting */ |
275 | early_get_boot_cpu_id(); | 185 | early_get_boot_cpu_id(); |
276 | if (boot_cpu_physical_apicid > 0) { | 186 | if (boot_cpu_physical_apicid > 0) { |
@@ -278,17 +188,9 @@ int __init amd_scan_nodes(void) | |||
278 | apicid_base = boot_cpu_physical_apicid; | 188 | apicid_base = boot_cpu_physical_apicid; |
279 | } | 189 | } |
280 | 190 | ||
281 | for_each_node_mask(i, node_possible_map) { | 191 | for_each_node_mask(i, numa_nodes_parsed) |
282 | int j; | ||
283 | |||
284 | memblock_x86_register_active_regions(i, | ||
285 | nodes[i].start >> PAGE_SHIFT, | ||
286 | nodes[i].end >> PAGE_SHIFT); | ||
287 | for (j = apicid_base; j < cores + apicid_base; j++) | 192 | for (j = apicid_base; j < cores + apicid_base; j++) |
288 | apicid_to_node[(i << bits) + j] = i; | 193 | set_apicid_to_node((i << bits) + j, i); |
289 | setup_node_bootmem(i, nodes[i].start, nodes[i].end); | ||
290 | } | ||
291 | 194 | ||
292 | numa_init_array(); | ||
293 | return 0; | 195 | return 0; |
294 | } | 196 | } |
diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c index 069ce7c37c01..d4203988504a 100644 --- a/arch/x86/mm/hugetlbpage.c +++ b/arch/x86/mm/hugetlbpage.c | |||
@@ -326,7 +326,7 @@ try_again: | |||
326 | if (mm->free_area_cache < len) | 326 | if (mm->free_area_cache < len) |
327 | goto fail; | 327 | goto fail; |
328 | 328 | ||
329 | /* either no address requested or cant fit in requested address hole */ | 329 | /* either no address requested or can't fit in requested address hole */ |
330 | addr = (mm->free_area_cache - len) & huge_page_mask(h); | 330 | addr = (mm->free_area_cache - len) & huge_page_mask(h); |
331 | do { | 331 | do { |
332 | /* | 332 | /* |
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 947f42abe820..286d289b039b 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c | |||
@@ -18,9 +18,9 @@ | |||
18 | 18 | ||
19 | DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); | 19 | DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); |
20 | 20 | ||
21 | unsigned long __initdata e820_table_start; | 21 | unsigned long __initdata pgt_buf_start; |
22 | unsigned long __meminitdata e820_table_end; | 22 | unsigned long __meminitdata pgt_buf_end; |
23 | unsigned long __meminitdata e820_table_top; | 23 | unsigned long __meminitdata pgt_buf_top; |
24 | 24 | ||
25 | int after_bootmem; | 25 | int after_bootmem; |
26 | 26 | ||
@@ -33,7 +33,7 @@ int direct_gbpages | |||
33 | static void __init find_early_table_space(unsigned long end, int use_pse, | 33 | static void __init find_early_table_space(unsigned long end, int use_pse, |
34 | int use_gbpages) | 34 | int use_gbpages) |
35 | { | 35 | { |
36 | unsigned long puds, pmds, ptes, tables, start; | 36 | unsigned long puds, pmds, ptes, tables, start = 0, good_end = end; |
37 | phys_addr_t base; | 37 | phys_addr_t base; |
38 | 38 | ||
39 | puds = (end + PUD_SIZE - 1) >> PUD_SHIFT; | 39 | puds = (end + PUD_SIZE - 1) >> PUD_SHIFT; |
@@ -65,29 +65,20 @@ static void __init find_early_table_space(unsigned long end, int use_pse, | |||
65 | #ifdef CONFIG_X86_32 | 65 | #ifdef CONFIG_X86_32 |
66 | /* for fixmap */ | 66 | /* for fixmap */ |
67 | tables += roundup(__end_of_fixed_addresses * sizeof(pte_t), PAGE_SIZE); | 67 | tables += roundup(__end_of_fixed_addresses * sizeof(pte_t), PAGE_SIZE); |
68 | #endif | ||
69 | 68 | ||
70 | /* | 69 | good_end = max_pfn_mapped << PAGE_SHIFT; |
71 | * RED-PEN putting page tables only on node 0 could | ||
72 | * cause a hotspot and fill up ZONE_DMA. The page tables | ||
73 | * need roughly 0.5KB per GB. | ||
74 | */ | ||
75 | #ifdef CONFIG_X86_32 | ||
76 | start = 0x7000; | ||
77 | #else | ||
78 | start = 0x8000; | ||
79 | #endif | 70 | #endif |
80 | base = memblock_find_in_range(start, max_pfn_mapped<<PAGE_SHIFT, | 71 | |
81 | tables, PAGE_SIZE); | 72 | base = memblock_find_in_range(start, good_end, tables, PAGE_SIZE); |
82 | if (base == MEMBLOCK_ERROR) | 73 | if (base == MEMBLOCK_ERROR) |
83 | panic("Cannot find space for the kernel page tables"); | 74 | panic("Cannot find space for the kernel page tables"); |
84 | 75 | ||
85 | e820_table_start = base >> PAGE_SHIFT; | 76 | pgt_buf_start = base >> PAGE_SHIFT; |
86 | e820_table_end = e820_table_start; | 77 | pgt_buf_end = pgt_buf_start; |
87 | e820_table_top = e820_table_start + (tables >> PAGE_SHIFT); | 78 | pgt_buf_top = pgt_buf_start + (tables >> PAGE_SHIFT); |
88 | 79 | ||
89 | printk(KERN_DEBUG "kernel direct mapping tables up to %lx @ %lx-%lx\n", | 80 | printk(KERN_DEBUG "kernel direct mapping tables up to %lx @ %lx-%lx\n", |
90 | end, e820_table_start << PAGE_SHIFT, e820_table_top << PAGE_SHIFT); | 81 | end, pgt_buf_start << PAGE_SHIFT, pgt_buf_top << PAGE_SHIFT); |
91 | } | 82 | } |
92 | 83 | ||
93 | struct map_range { | 84 | struct map_range { |
@@ -279,30 +270,11 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, | |||
279 | load_cr3(swapper_pg_dir); | 270 | load_cr3(swapper_pg_dir); |
280 | #endif | 271 | #endif |
281 | 272 | ||
282 | #ifdef CONFIG_X86_64 | ||
283 | if (!after_bootmem && !start) { | ||
284 | pud_t *pud; | ||
285 | pmd_t *pmd; | ||
286 | |||
287 | mmu_cr4_features = read_cr4(); | ||
288 | |||
289 | /* | ||
290 | * _brk_end cannot change anymore, but it and _end may be | ||
291 | * located on different 2M pages. cleanup_highmap(), however, | ||
292 | * can only consider _end when it runs, so destroy any | ||
293 | * mappings beyond _brk_end here. | ||
294 | */ | ||
295 | pud = pud_offset(pgd_offset_k(_brk_end), _brk_end); | ||
296 | pmd = pmd_offset(pud, _brk_end - 1); | ||
297 | while (++pmd <= pmd_offset(pud, (unsigned long)_end - 1)) | ||
298 | pmd_clear(pmd); | ||
299 | } | ||
300 | #endif | ||
301 | __flush_tlb_all(); | 273 | __flush_tlb_all(); |
302 | 274 | ||
303 | if (!after_bootmem && e820_table_end > e820_table_start) | 275 | if (!after_bootmem && pgt_buf_end > pgt_buf_start) |
304 | memblock_x86_reserve_range(e820_table_start << PAGE_SHIFT, | 276 | memblock_x86_reserve_range(pgt_buf_start << PAGE_SHIFT, |
305 | e820_table_end << PAGE_SHIFT, "PGTABLE"); | 277 | pgt_buf_end << PAGE_SHIFT, "PGTABLE"); |
306 | 278 | ||
307 | if (!after_bootmem) | 279 | if (!after_bootmem) |
308 | early_memtest(start, end); | 280 | early_memtest(start, end); |
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index c821074b7f0b..80088f994193 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c | |||
@@ -62,10 +62,10 @@ bool __read_mostly __vmalloc_start_set = false; | |||
62 | 62 | ||
63 | static __init void *alloc_low_page(void) | 63 | static __init void *alloc_low_page(void) |
64 | { | 64 | { |
65 | unsigned long pfn = e820_table_end++; | 65 | unsigned long pfn = pgt_buf_end++; |
66 | void *adr; | 66 | void *adr; |
67 | 67 | ||
68 | if (pfn >= e820_table_top) | 68 | if (pfn >= pgt_buf_top) |
69 | panic("alloc_low_page: ran out of memory"); | 69 | panic("alloc_low_page: ran out of memory"); |
70 | 70 | ||
71 | adr = __va(pfn * PAGE_SIZE); | 71 | adr = __va(pfn * PAGE_SIZE); |
@@ -163,8 +163,8 @@ static pte_t *__init page_table_kmap_check(pte_t *pte, pmd_t *pmd, | |||
163 | if (pmd_idx_kmap_begin != pmd_idx_kmap_end | 163 | if (pmd_idx_kmap_begin != pmd_idx_kmap_end |
164 | && (vaddr >> PMD_SHIFT) >= pmd_idx_kmap_begin | 164 | && (vaddr >> PMD_SHIFT) >= pmd_idx_kmap_begin |
165 | && (vaddr >> PMD_SHIFT) <= pmd_idx_kmap_end | 165 | && (vaddr >> PMD_SHIFT) <= pmd_idx_kmap_end |
166 | && ((__pa(pte) >> PAGE_SHIFT) < e820_table_start | 166 | && ((__pa(pte) >> PAGE_SHIFT) < pgt_buf_start |
167 | || (__pa(pte) >> PAGE_SHIFT) >= e820_table_end)) { | 167 | || (__pa(pte) >> PAGE_SHIFT) >= pgt_buf_end)) { |
168 | pte_t *newpte; | 168 | pte_t *newpte; |
169 | int i; | 169 | int i; |
170 | 170 | ||
@@ -644,8 +644,7 @@ void __init find_low_pfn_range(void) | |||
644 | } | 644 | } |
645 | 645 | ||
646 | #ifndef CONFIG_NEED_MULTIPLE_NODES | 646 | #ifndef CONFIG_NEED_MULTIPLE_NODES |
647 | void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn, | 647 | void __init initmem_init(void) |
648 | int acpi, int k8) | ||
649 | { | 648 | { |
650 | #ifdef CONFIG_HIGHMEM | 649 | #ifdef CONFIG_HIGHMEM |
651 | highstart_pfn = highend_pfn = max_pfn; | 650 | highstart_pfn = highend_pfn = max_pfn; |
@@ -918,7 +917,7 @@ static void mark_nxdata_nx(void) | |||
918 | { | 917 | { |
919 | /* | 918 | /* |
920 | * When this called, init has already been executed and released, | 919 | * When this called, init has already been executed and released, |
921 | * so everything past _etext sould be NX. | 920 | * so everything past _etext should be NX. |
922 | */ | 921 | */ |
923 | unsigned long start = PFN_ALIGN(_etext); | 922 | unsigned long start = PFN_ALIGN(_etext); |
924 | /* | 923 | /* |
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index c14a5422e152..2362b646178e 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c | |||
@@ -51,6 +51,8 @@ | |||
51 | #include <asm/numa.h> | 51 | #include <asm/numa.h> |
52 | #include <asm/cacheflush.h> | 52 | #include <asm/cacheflush.h> |
53 | #include <asm/init.h> | 53 | #include <asm/init.h> |
54 | #include <asm/uv/uv.h> | ||
55 | #include <asm/setup.h> | ||
54 | 56 | ||
55 | static int __init parse_direct_gbpages_off(char *arg) | 57 | static int __init parse_direct_gbpages_off(char *arg) |
56 | { | 58 | { |
@@ -293,18 +295,18 @@ void __init init_extra_mapping_uc(unsigned long phys, unsigned long size) | |||
293 | * to the compile time generated pmds. This results in invalid pmds up | 295 | * to the compile time generated pmds. This results in invalid pmds up |
294 | * to the point where we hit the physaddr 0 mapping. | 296 | * to the point where we hit the physaddr 0 mapping. |
295 | * | 297 | * |
296 | * We limit the mappings to the region from _text to _end. _end is | 298 | * We limit the mappings to the region from _text to _brk_end. _brk_end |
297 | * rounded up to the 2MB boundary. This catches the invalid pmds as | 299 | * is rounded up to the 2MB boundary. This catches the invalid pmds as |
298 | * well, as they are located before _text: | 300 | * well, as they are located before _text: |
299 | */ | 301 | */ |
300 | void __init cleanup_highmap(void) | 302 | void __init cleanup_highmap(void) |
301 | { | 303 | { |
302 | unsigned long vaddr = __START_KERNEL_map; | 304 | unsigned long vaddr = __START_KERNEL_map; |
303 | unsigned long end = roundup((unsigned long)_end, PMD_SIZE) - 1; | 305 | unsigned long vaddr_end = __START_KERNEL_map + (max_pfn_mapped << PAGE_SHIFT); |
306 | unsigned long end = roundup((unsigned long)_brk_end, PMD_SIZE) - 1; | ||
304 | pmd_t *pmd = level2_kernel_pgt; | 307 | pmd_t *pmd = level2_kernel_pgt; |
305 | pmd_t *last_pmd = pmd + PTRS_PER_PMD; | ||
306 | 308 | ||
307 | for (; pmd < last_pmd; pmd++, vaddr += PMD_SIZE) { | 309 | for (; vaddr + PMD_SIZE - 1 < vaddr_end; pmd++, vaddr += PMD_SIZE) { |
308 | if (pmd_none(*pmd)) | 310 | if (pmd_none(*pmd)) |
309 | continue; | 311 | continue; |
310 | if (vaddr < (unsigned long) _text || vaddr > end) | 312 | if (vaddr < (unsigned long) _text || vaddr > end) |
@@ -314,7 +316,7 @@ void __init cleanup_highmap(void) | |||
314 | 316 | ||
315 | static __ref void *alloc_low_page(unsigned long *phys) | 317 | static __ref void *alloc_low_page(unsigned long *phys) |
316 | { | 318 | { |
317 | unsigned long pfn = e820_table_end++; | 319 | unsigned long pfn = pgt_buf_end++; |
318 | void *adr; | 320 | void *adr; |
319 | 321 | ||
320 | if (after_bootmem) { | 322 | if (after_bootmem) { |
@@ -324,7 +326,7 @@ static __ref void *alloc_low_page(unsigned long *phys) | |||
324 | return adr; | 326 | return adr; |
325 | } | 327 | } |
326 | 328 | ||
327 | if (pfn >= e820_table_top) | 329 | if (pfn >= pgt_buf_top) |
328 | panic("alloc_low_page: ran out of memory"); | 330 | panic("alloc_low_page: ran out of memory"); |
329 | 331 | ||
330 | adr = early_memremap(pfn * PAGE_SIZE, PAGE_SIZE); | 332 | adr = early_memremap(pfn * PAGE_SIZE, PAGE_SIZE); |
@@ -333,12 +335,28 @@ static __ref void *alloc_low_page(unsigned long *phys) | |||
333 | return adr; | 335 | return adr; |
334 | } | 336 | } |
335 | 337 | ||
338 | static __ref void *map_low_page(void *virt) | ||
339 | { | ||
340 | void *adr; | ||
341 | unsigned long phys, left; | ||
342 | |||
343 | if (after_bootmem) | ||
344 | return virt; | ||
345 | |||
346 | phys = __pa(virt); | ||
347 | left = phys & (PAGE_SIZE - 1); | ||
348 | adr = early_memremap(phys & PAGE_MASK, PAGE_SIZE); | ||
349 | adr = (void *)(((unsigned long)adr) | left); | ||
350 | |||
351 | return adr; | ||
352 | } | ||
353 | |||
336 | static __ref void unmap_low_page(void *adr) | 354 | static __ref void unmap_low_page(void *adr) |
337 | { | 355 | { |
338 | if (after_bootmem) | 356 | if (after_bootmem) |
339 | return; | 357 | return; |
340 | 358 | ||
341 | early_iounmap(adr, PAGE_SIZE); | 359 | early_iounmap((void *)((unsigned long)adr & PAGE_MASK), PAGE_SIZE); |
342 | } | 360 | } |
343 | 361 | ||
344 | static unsigned long __meminit | 362 | static unsigned long __meminit |
@@ -386,15 +404,6 @@ phys_pte_init(pte_t *pte_page, unsigned long addr, unsigned long end, | |||
386 | } | 404 | } |
387 | 405 | ||
388 | static unsigned long __meminit | 406 | static unsigned long __meminit |
389 | phys_pte_update(pmd_t *pmd, unsigned long address, unsigned long end, | ||
390 | pgprot_t prot) | ||
391 | { | ||
392 | pte_t *pte = (pte_t *)pmd_page_vaddr(*pmd); | ||
393 | |||
394 | return phys_pte_init(pte, address, end, prot); | ||
395 | } | ||
396 | |||
397 | static unsigned long __meminit | ||
398 | phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end, | 407 | phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end, |
399 | unsigned long page_size_mask, pgprot_t prot) | 408 | unsigned long page_size_mask, pgprot_t prot) |
400 | { | 409 | { |
@@ -420,8 +429,10 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end, | |||
420 | if (pmd_val(*pmd)) { | 429 | if (pmd_val(*pmd)) { |
421 | if (!pmd_large(*pmd)) { | 430 | if (!pmd_large(*pmd)) { |
422 | spin_lock(&init_mm.page_table_lock); | 431 | spin_lock(&init_mm.page_table_lock); |
423 | last_map_addr = phys_pte_update(pmd, address, | 432 | pte = map_low_page((pte_t *)pmd_page_vaddr(*pmd)); |
433 | last_map_addr = phys_pte_init(pte, address, | ||
424 | end, prot); | 434 | end, prot); |
435 | unmap_low_page(pte); | ||
425 | spin_unlock(&init_mm.page_table_lock); | 436 | spin_unlock(&init_mm.page_table_lock); |
426 | continue; | 437 | continue; |
427 | } | 438 | } |
@@ -468,18 +479,6 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end, | |||
468 | } | 479 | } |
469 | 480 | ||
470 | static unsigned long __meminit | 481 | static unsigned long __meminit |
471 | phys_pmd_update(pud_t *pud, unsigned long address, unsigned long end, | ||
472 | unsigned long page_size_mask, pgprot_t prot) | ||
473 | { | ||
474 | pmd_t *pmd = pmd_offset(pud, 0); | ||
475 | unsigned long last_map_addr; | ||
476 | |||
477 | last_map_addr = phys_pmd_init(pmd, address, end, page_size_mask, prot); | ||
478 | __flush_tlb_all(); | ||
479 | return last_map_addr; | ||
480 | } | ||
481 | |||
482 | static unsigned long __meminit | ||
483 | phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end, | 482 | phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end, |
484 | unsigned long page_size_mask) | 483 | unsigned long page_size_mask) |
485 | { | 484 | { |
@@ -504,8 +503,11 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end, | |||
504 | 503 | ||
505 | if (pud_val(*pud)) { | 504 | if (pud_val(*pud)) { |
506 | if (!pud_large(*pud)) { | 505 | if (!pud_large(*pud)) { |
507 | last_map_addr = phys_pmd_update(pud, addr, end, | 506 | pmd = map_low_page(pmd_offset(pud, 0)); |
507 | last_map_addr = phys_pmd_init(pmd, addr, end, | ||
508 | page_size_mask, prot); | 508 | page_size_mask, prot); |
509 | unmap_low_page(pmd); | ||
510 | __flush_tlb_all(); | ||
509 | continue; | 511 | continue; |
510 | } | 512 | } |
511 | /* | 513 | /* |
@@ -553,17 +555,6 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end, | |||
553 | return last_map_addr; | 555 | return last_map_addr; |
554 | } | 556 | } |
555 | 557 | ||
556 | static unsigned long __meminit | ||
557 | phys_pud_update(pgd_t *pgd, unsigned long addr, unsigned long end, | ||
558 | unsigned long page_size_mask) | ||
559 | { | ||
560 | pud_t *pud; | ||
561 | |||
562 | pud = (pud_t *)pgd_page_vaddr(*pgd); | ||
563 | |||
564 | return phys_pud_init(pud, addr, end, page_size_mask); | ||
565 | } | ||
566 | |||
567 | unsigned long __meminit | 558 | unsigned long __meminit |
568 | kernel_physical_mapping_init(unsigned long start, | 559 | kernel_physical_mapping_init(unsigned long start, |
569 | unsigned long end, | 560 | unsigned long end, |
@@ -587,8 +578,10 @@ kernel_physical_mapping_init(unsigned long start, | |||
587 | next = end; | 578 | next = end; |
588 | 579 | ||
589 | if (pgd_val(*pgd)) { | 580 | if (pgd_val(*pgd)) { |
590 | last_map_addr = phys_pud_update(pgd, __pa(start), | 581 | pud = map_low_page((pud_t *)pgd_page_vaddr(*pgd)); |
582 | last_map_addr = phys_pud_init(pud, __pa(start), | ||
591 | __pa(end), page_size_mask); | 583 | __pa(end), page_size_mask); |
584 | unmap_low_page(pud); | ||
592 | continue; | 585 | continue; |
593 | } | 586 | } |
594 | 587 | ||
@@ -612,10 +605,9 @@ kernel_physical_mapping_init(unsigned long start, | |||
612 | } | 605 | } |
613 | 606 | ||
614 | #ifndef CONFIG_NUMA | 607 | #ifndef CONFIG_NUMA |
615 | void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn, | 608 | void __init initmem_init(void) |
616 | int acpi, int k8) | ||
617 | { | 609 | { |
618 | memblock_x86_register_active_regions(0, start_pfn, end_pfn); | 610 | memblock_x86_register_active_regions(0, 0, max_pfn); |
619 | } | 611 | } |
620 | #endif | 612 | #endif |
621 | 613 | ||
@@ -908,6 +900,19 @@ const char *arch_vma_name(struct vm_area_struct *vma) | |||
908 | return NULL; | 900 | return NULL; |
909 | } | 901 | } |
910 | 902 | ||
903 | #ifdef CONFIG_X86_UV | ||
904 | #define MIN_MEMORY_BLOCK_SIZE (1 << SECTION_SIZE_BITS) | ||
905 | |||
906 | unsigned long memory_block_size_bytes(void) | ||
907 | { | ||
908 | if (is_uv_system()) { | ||
909 | printk(KERN_INFO "UV: memory block size 2GB\n"); | ||
910 | return 2UL * 1024 * 1024 * 1024; | ||
911 | } | ||
912 | return MIN_MEMORY_BLOCK_SIZE; | ||
913 | } | ||
914 | #endif | ||
915 | |||
911 | #ifdef CONFIG_SPARSEMEM_VMEMMAP | 916 | #ifdef CONFIG_SPARSEMEM_VMEMMAP |
912 | /* | 917 | /* |
913 | * Initialise the sparsemem vmemmap using huge-pages at the PMD level. | 918 | * Initialise the sparsemem vmemmap using huge-pages at the PMD level. |
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index ebf6d7887a38..9559d360fde7 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c | |||
@@ -26,12 +26,50 @@ static __init int numa_setup(char *opt) | |||
26 | early_param("numa", numa_setup); | 26 | early_param("numa", numa_setup); |
27 | 27 | ||
28 | /* | 28 | /* |
29 | * Which logical CPUs are on which nodes | 29 | * apicid, cpu, node mappings |
30 | */ | 30 | */ |
31 | s16 __apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = { | ||
32 | [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE | ||
33 | }; | ||
34 | |||
31 | cpumask_var_t node_to_cpumask_map[MAX_NUMNODES]; | 35 | cpumask_var_t node_to_cpumask_map[MAX_NUMNODES]; |
32 | EXPORT_SYMBOL(node_to_cpumask_map); | 36 | EXPORT_SYMBOL(node_to_cpumask_map); |
33 | 37 | ||
34 | /* | 38 | /* |
39 | * Map cpu index to node index | ||
40 | */ | ||
41 | DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, NUMA_NO_NODE); | ||
42 | EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map); | ||
43 | |||
44 | void __cpuinit numa_set_node(int cpu, int node) | ||
45 | { | ||
46 | int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map); | ||
47 | |||
48 | /* early setting, no percpu area yet */ | ||
49 | if (cpu_to_node_map) { | ||
50 | cpu_to_node_map[cpu] = node; | ||
51 | return; | ||
52 | } | ||
53 | |||
54 | #ifdef CONFIG_DEBUG_PER_CPU_MAPS | ||
55 | if (cpu >= nr_cpu_ids || !cpu_possible(cpu)) { | ||
56 | printk(KERN_ERR "numa_set_node: invalid cpu# (%d)\n", cpu); | ||
57 | dump_stack(); | ||
58 | return; | ||
59 | } | ||
60 | #endif | ||
61 | per_cpu(x86_cpu_to_node_map, cpu) = node; | ||
62 | |||
63 | if (node != NUMA_NO_NODE) | ||
64 | set_cpu_numa_node(cpu, node); | ||
65 | } | ||
66 | |||
67 | void __cpuinit numa_clear_node(int cpu) | ||
68 | { | ||
69 | numa_set_node(cpu, NUMA_NO_NODE); | ||
70 | } | ||
71 | |||
72 | /* | ||
35 | * Allocate node_to_cpumask_map based on number of available nodes | 73 | * Allocate node_to_cpumask_map based on number of available nodes |
36 | * Requires node_possible_map to be valid. | 74 | * Requires node_possible_map to be valid. |
37 | * | 75 | * |
@@ -57,7 +95,174 @@ void __init setup_node_to_cpumask_map(void) | |||
57 | pr_debug("Node to cpumask map for %d nodes\n", nr_node_ids); | 95 | pr_debug("Node to cpumask map for %d nodes\n", nr_node_ids); |
58 | } | 96 | } |
59 | 97 | ||
60 | #ifdef CONFIG_DEBUG_PER_CPU_MAPS | 98 | /* |
99 | * There are unfortunately some poorly designed mainboards around that | ||
100 | * only connect memory to a single CPU. This breaks the 1:1 cpu->node | ||
101 | * mapping. To avoid this fill in the mapping for all possible CPUs, | ||
102 | * as the number of CPUs is not known yet. We round robin the existing | ||
103 | * nodes. | ||
104 | */ | ||
105 | void __init numa_init_array(void) | ||
106 | { | ||
107 | int rr, i; | ||
108 | |||
109 | rr = first_node(node_online_map); | ||
110 | for (i = 0; i < nr_cpu_ids; i++) { | ||
111 | if (early_cpu_to_node(i) != NUMA_NO_NODE) | ||
112 | continue; | ||
113 | numa_set_node(i, rr); | ||
114 | rr = next_node(rr, node_online_map); | ||
115 | if (rr == MAX_NUMNODES) | ||
116 | rr = first_node(node_online_map); | ||
117 | } | ||
118 | } | ||
119 | |||
120 | static __init int find_near_online_node(int node) | ||
121 | { | ||
122 | int n, val; | ||
123 | int min_val = INT_MAX; | ||
124 | int best_node = -1; | ||
125 | |||
126 | for_each_online_node(n) { | ||
127 | val = node_distance(node, n); | ||
128 | |||
129 | if (val < min_val) { | ||
130 | min_val = val; | ||
131 | best_node = n; | ||
132 | } | ||
133 | } | ||
134 | |||
135 | return best_node; | ||
136 | } | ||
137 | |||
138 | /* | ||
139 | * Setup early cpu_to_node. | ||
140 | * | ||
141 | * Populate cpu_to_node[] only if x86_cpu_to_apicid[], | ||
142 | * and apicid_to_node[] tables have valid entries for a CPU. | ||
143 | * This means we skip cpu_to_node[] initialisation for NUMA | ||
144 | * emulation and faking node case (when running a kernel compiled | ||
145 | * for NUMA on a non NUMA box), which is OK as cpu_to_node[] | ||
146 | * is already initialized in a round robin manner at numa_init_array, | ||
147 | * prior to this call, and this initialization is good enough | ||
148 | * for the fake NUMA cases. | ||
149 | * | ||
150 | * Called before the per_cpu areas are setup. | ||
151 | */ | ||
152 | void __init init_cpu_to_node(void) | ||
153 | { | ||
154 | int cpu; | ||
155 | u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid); | ||
156 | |||
157 | BUG_ON(cpu_to_apicid == NULL); | ||
158 | |||
159 | for_each_possible_cpu(cpu) { | ||
160 | int node = numa_cpu_node(cpu); | ||
161 | |||
162 | if (node == NUMA_NO_NODE) | ||
163 | continue; | ||
164 | if (!node_online(node)) | ||
165 | node = find_near_online_node(node); | ||
166 | numa_set_node(cpu, node); | ||
167 | } | ||
168 | } | ||
169 | |||
170 | #ifndef CONFIG_DEBUG_PER_CPU_MAPS | ||
171 | |||
172 | # ifndef CONFIG_NUMA_EMU | ||
173 | void __cpuinit numa_add_cpu(int cpu) | ||
174 | { | ||
175 | cpumask_set_cpu(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]); | ||
176 | } | ||
177 | |||
178 | void __cpuinit numa_remove_cpu(int cpu) | ||
179 | { | ||
180 | cpumask_clear_cpu(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]); | ||
181 | } | ||
182 | # endif /* !CONFIG_NUMA_EMU */ | ||
183 | |||
184 | #else /* !CONFIG_DEBUG_PER_CPU_MAPS */ | ||
185 | |||
186 | int __cpu_to_node(int cpu) | ||
187 | { | ||
188 | if (early_per_cpu_ptr(x86_cpu_to_node_map)) { | ||
189 | printk(KERN_WARNING | ||
190 | "cpu_to_node(%d): usage too early!\n", cpu); | ||
191 | dump_stack(); | ||
192 | return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu]; | ||
193 | } | ||
194 | return per_cpu(x86_cpu_to_node_map, cpu); | ||
195 | } | ||
196 | EXPORT_SYMBOL(__cpu_to_node); | ||
197 | |||
198 | /* | ||
199 | * Same function as cpu_to_node() but used if called before the | ||
200 | * per_cpu areas are setup. | ||
201 | */ | ||
202 | int early_cpu_to_node(int cpu) | ||
203 | { | ||
204 | if (early_per_cpu_ptr(x86_cpu_to_node_map)) | ||
205 | return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu]; | ||
206 | |||
207 | if (!cpu_possible(cpu)) { | ||
208 | printk(KERN_WARNING | ||
209 | "early_cpu_to_node(%d): no per_cpu area!\n", cpu); | ||
210 | dump_stack(); | ||
211 | return NUMA_NO_NODE; | ||
212 | } | ||
213 | return per_cpu(x86_cpu_to_node_map, cpu); | ||
214 | } | ||
215 | |||
216 | struct cpumask __cpuinit *debug_cpumask_set_cpu(int cpu, int enable) | ||
217 | { | ||
218 | int node = early_cpu_to_node(cpu); | ||
219 | struct cpumask *mask; | ||
220 | char buf[64]; | ||
221 | |||
222 | if (node == NUMA_NO_NODE) { | ||
223 | /* early_cpu_to_node() already emits a warning and trace */ | ||
224 | return NULL; | ||
225 | } | ||
226 | mask = node_to_cpumask_map[node]; | ||
227 | if (!mask) { | ||
228 | pr_err("node_to_cpumask_map[%i] NULL\n", node); | ||
229 | dump_stack(); | ||
230 | return NULL; | ||
231 | } | ||
232 | |||
233 | cpulist_scnprintf(buf, sizeof(buf), mask); | ||
234 | printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n", | ||
235 | enable ? "numa_add_cpu" : "numa_remove_cpu", | ||
236 | cpu, node, buf); | ||
237 | return mask; | ||
238 | } | ||
239 | |||
240 | # ifndef CONFIG_NUMA_EMU | ||
241 | static void __cpuinit numa_set_cpumask(int cpu, int enable) | ||
242 | { | ||
243 | struct cpumask *mask; | ||
244 | |||
245 | mask = debug_cpumask_set_cpu(cpu, enable); | ||
246 | if (!mask) | ||
247 | return; | ||
248 | |||
249 | if (enable) | ||
250 | cpumask_set_cpu(cpu, mask); | ||
251 | else | ||
252 | cpumask_clear_cpu(cpu, mask); | ||
253 | } | ||
254 | |||
255 | void __cpuinit numa_add_cpu(int cpu) | ||
256 | { | ||
257 | numa_set_cpumask(cpu, 1); | ||
258 | } | ||
259 | |||
260 | void __cpuinit numa_remove_cpu(int cpu) | ||
261 | { | ||
262 | numa_set_cpumask(cpu, 0); | ||
263 | } | ||
264 | # endif /* !CONFIG_NUMA_EMU */ | ||
265 | |||
61 | /* | 266 | /* |
62 | * Returns a pointer to the bitmask of CPUs on Node 'node'. | 267 | * Returns a pointer to the bitmask of CPUs on Node 'node'. |
63 | */ | 268 | */ |
@@ -80,4 +285,5 @@ const struct cpumask *cpumask_of_node(int node) | |||
80 | return node_to_cpumask_map[node]; | 285 | return node_to_cpumask_map[node]; |
81 | } | 286 | } |
82 | EXPORT_SYMBOL(cpumask_of_node); | 287 | EXPORT_SYMBOL(cpumask_of_node); |
83 | #endif | 288 | |
289 | #endif /* !CONFIG_DEBUG_PER_CPU_MAPS */ | ||
diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c index 84a3e4c9f277..bde3906420df 100644 --- a/arch/x86/mm/numa_32.c +++ b/arch/x86/mm/numa_32.c | |||
@@ -110,6 +110,12 @@ void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags); | |||
110 | 110 | ||
111 | static unsigned long kva_start_pfn; | 111 | static unsigned long kva_start_pfn; |
112 | static unsigned long kva_pages; | 112 | static unsigned long kva_pages; |
113 | |||
114 | int __cpuinit numa_cpu_node(int cpu) | ||
115 | { | ||
116 | return apic->x86_32_numa_cpu_node(cpu); | ||
117 | } | ||
118 | |||
113 | /* | 119 | /* |
114 | * FLAT - support for basic PC memory model with discontig enabled, essentially | 120 | * FLAT - support for basic PC memory model with discontig enabled, essentially |
115 | * a single node with all available processors in it with a flat | 121 | * a single node with all available processors in it with a flat |
@@ -346,8 +352,7 @@ static void init_remap_allocator(int nid) | |||
346 | (ulong) node_remap_end_vaddr[nid]); | 352 | (ulong) node_remap_end_vaddr[nid]); |
347 | } | 353 | } |
348 | 354 | ||
349 | void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn, | 355 | void __init initmem_init(void) |
350 | int acpi, int k8) | ||
351 | { | 356 | { |
352 | int nid; | 357 | int nid; |
353 | long kva_target_pfn; | 358 | long kva_target_pfn; |
@@ -361,6 +366,7 @@ void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn, | |||
361 | */ | 366 | */ |
362 | 367 | ||
363 | get_memcfg_numa(); | 368 | get_memcfg_numa(); |
369 | numa_init_array(); | ||
364 | 370 | ||
365 | kva_pages = roundup(calculate_numa_remap_pages(), PTRS_PER_PTE); | 371 | kva_pages = roundup(calculate_numa_remap_pages(), PTRS_PER_PTE); |
366 | 372 | ||
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index 1337c51b07d7..e8c00cc72033 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c | |||
@@ -13,31 +13,30 @@ | |||
13 | #include <linux/module.h> | 13 | #include <linux/module.h> |
14 | #include <linux/nodemask.h> | 14 | #include <linux/nodemask.h> |
15 | #include <linux/sched.h> | 15 | #include <linux/sched.h> |
16 | #include <linux/acpi.h> | ||
16 | 17 | ||
17 | #include <asm/e820.h> | 18 | #include <asm/e820.h> |
18 | #include <asm/proto.h> | 19 | #include <asm/proto.h> |
19 | #include <asm/dma.h> | 20 | #include <asm/dma.h> |
20 | #include <asm/numa.h> | ||
21 | #include <asm/acpi.h> | 21 | #include <asm/acpi.h> |
22 | #include <asm/amd_nb.h> | 22 | #include <asm/amd_nb.h> |
23 | 23 | ||
24 | #include "numa_internal.h" | ||
25 | |||
24 | struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; | 26 | struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; |
25 | EXPORT_SYMBOL(node_data); | 27 | EXPORT_SYMBOL(node_data); |
26 | 28 | ||
27 | struct memnode memnode; | 29 | nodemask_t numa_nodes_parsed __initdata; |
28 | 30 | ||
29 | s16 apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = { | 31 | struct memnode memnode; |
30 | [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE | ||
31 | }; | ||
32 | 32 | ||
33 | static unsigned long __initdata nodemap_addr; | 33 | static unsigned long __initdata nodemap_addr; |
34 | static unsigned long __initdata nodemap_size; | 34 | static unsigned long __initdata nodemap_size; |
35 | 35 | ||
36 | /* | 36 | static struct numa_meminfo numa_meminfo __initdata; |
37 | * Map cpu index to node index | 37 | |
38 | */ | 38 | static int numa_distance_cnt; |
39 | DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, NUMA_NO_NODE); | 39 | static u8 *numa_distance; |
40 | EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map); | ||
41 | 40 | ||
42 | /* | 41 | /* |
43 | * Given a shift value, try to populate memnodemap[] | 42 | * Given a shift value, try to populate memnodemap[] |
@@ -46,16 +45,15 @@ EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map); | |||
46 | * 0 if memnodmap[] too small (of shift too small) | 45 | * 0 if memnodmap[] too small (of shift too small) |
47 | * -1 if node overlap or lost ram (shift too big) | 46 | * -1 if node overlap or lost ram (shift too big) |
48 | */ | 47 | */ |
49 | static int __init populate_memnodemap(const struct bootnode *nodes, | 48 | static int __init populate_memnodemap(const struct numa_meminfo *mi, int shift) |
50 | int numnodes, int shift, int *nodeids) | ||
51 | { | 49 | { |
52 | unsigned long addr, end; | 50 | unsigned long addr, end; |
53 | int i, res = -1; | 51 | int i, res = -1; |
54 | 52 | ||
55 | memset(memnodemap, 0xff, sizeof(s16)*memnodemapsize); | 53 | memset(memnodemap, 0xff, sizeof(s16)*memnodemapsize); |
56 | for (i = 0; i < numnodes; i++) { | 54 | for (i = 0; i < mi->nr_blks; i++) { |
57 | addr = nodes[i].start; | 55 | addr = mi->blk[i].start; |
58 | end = nodes[i].end; | 56 | end = mi->blk[i].end; |
59 | if (addr >= end) | 57 | if (addr >= end) |
60 | continue; | 58 | continue; |
61 | if ((end >> shift) >= memnodemapsize) | 59 | if ((end >> shift) >= memnodemapsize) |
@@ -63,12 +61,7 @@ static int __init populate_memnodemap(const struct bootnode *nodes, | |||
63 | do { | 61 | do { |
64 | if (memnodemap[addr >> shift] != NUMA_NO_NODE) | 62 | if (memnodemap[addr >> shift] != NUMA_NO_NODE) |
65 | return -1; | 63 | return -1; |
66 | 64 | memnodemap[addr >> shift] = mi->blk[i].nid; | |
67 | if (!nodeids) | ||
68 | memnodemap[addr >> shift] = i; | ||
69 | else | ||
70 | memnodemap[addr >> shift] = nodeids[i]; | ||
71 | |||
72 | addr += (1UL << shift); | 65 | addr += (1UL << shift); |
73 | } while (addr < end); | 66 | } while (addr < end); |
74 | res = 1; | 67 | res = 1; |
@@ -86,7 +79,7 @@ static int __init allocate_cachealigned_memnodemap(void) | |||
86 | 79 | ||
87 | addr = 0x8000; | 80 | addr = 0x8000; |
88 | nodemap_size = roundup(sizeof(s16) * memnodemapsize, L1_CACHE_BYTES); | 81 | nodemap_size = roundup(sizeof(s16) * memnodemapsize, L1_CACHE_BYTES); |
89 | nodemap_addr = memblock_find_in_range(addr, max_pfn<<PAGE_SHIFT, | 82 | nodemap_addr = memblock_find_in_range(addr, get_max_mapped(), |
90 | nodemap_size, L1_CACHE_BYTES); | 83 | nodemap_size, L1_CACHE_BYTES); |
91 | if (nodemap_addr == MEMBLOCK_ERROR) { | 84 | if (nodemap_addr == MEMBLOCK_ERROR) { |
92 | printk(KERN_ERR | 85 | printk(KERN_ERR |
@@ -106,16 +99,15 @@ static int __init allocate_cachealigned_memnodemap(void) | |||
106 | * The LSB of all start and end addresses in the node map is the value of the | 99 | * The LSB of all start and end addresses in the node map is the value of the |
107 | * maximum possible shift. | 100 | * maximum possible shift. |
108 | */ | 101 | */ |
109 | static int __init extract_lsb_from_nodes(const struct bootnode *nodes, | 102 | static int __init extract_lsb_from_nodes(const struct numa_meminfo *mi) |
110 | int numnodes) | ||
111 | { | 103 | { |
112 | int i, nodes_used = 0; | 104 | int i, nodes_used = 0; |
113 | unsigned long start, end; | 105 | unsigned long start, end; |
114 | unsigned long bitfield = 0, memtop = 0; | 106 | unsigned long bitfield = 0, memtop = 0; |
115 | 107 | ||
116 | for (i = 0; i < numnodes; i++) { | 108 | for (i = 0; i < mi->nr_blks; i++) { |
117 | start = nodes[i].start; | 109 | start = mi->blk[i].start; |
118 | end = nodes[i].end; | 110 | end = mi->blk[i].end; |
119 | if (start >= end) | 111 | if (start >= end) |
120 | continue; | 112 | continue; |
121 | bitfield |= start; | 113 | bitfield |= start; |
@@ -131,18 +123,17 @@ static int __init extract_lsb_from_nodes(const struct bootnode *nodes, | |||
131 | return i; | 123 | return i; |
132 | } | 124 | } |
133 | 125 | ||
134 | int __init compute_hash_shift(struct bootnode *nodes, int numnodes, | 126 | static int __init compute_hash_shift(const struct numa_meminfo *mi) |
135 | int *nodeids) | ||
136 | { | 127 | { |
137 | int shift; | 128 | int shift; |
138 | 129 | ||
139 | shift = extract_lsb_from_nodes(nodes, numnodes); | 130 | shift = extract_lsb_from_nodes(mi); |
140 | if (allocate_cachealigned_memnodemap()) | 131 | if (allocate_cachealigned_memnodemap()) |
141 | return -1; | 132 | return -1; |
142 | printk(KERN_DEBUG "NUMA: Using %d for the hash shift.\n", | 133 | printk(KERN_DEBUG "NUMA: Using %d for the hash shift.\n", |
143 | shift); | 134 | shift); |
144 | 135 | ||
145 | if (populate_memnodemap(nodes, numnodes, shift, nodeids) != 1) { | 136 | if (populate_memnodemap(mi, shift) != 1) { |
146 | printk(KERN_INFO "Your memory is not aligned you need to " | 137 | printk(KERN_INFO "Your memory is not aligned you need to " |
147 | "rebuild your kernel with a bigger NODEMAPSIZE " | 138 | "rebuild your kernel with a bigger NODEMAPSIZE " |
148 | "shift=%d\n", shift); | 139 | "shift=%d\n", shift); |
@@ -188,6 +179,63 @@ static void * __init early_node_mem(int nodeid, unsigned long start, | |||
188 | return NULL; | 179 | return NULL; |
189 | } | 180 | } |
190 | 181 | ||
182 | static int __init numa_add_memblk_to(int nid, u64 start, u64 end, | ||
183 | struct numa_meminfo *mi) | ||
184 | { | ||
185 | /* ignore zero length blks */ | ||
186 | if (start == end) | ||
187 | return 0; | ||
188 | |||
189 | /* whine about and ignore invalid blks */ | ||
190 | if (start > end || nid < 0 || nid >= MAX_NUMNODES) { | ||
191 | pr_warning("NUMA: Warning: invalid memblk node %d (%Lx-%Lx)\n", | ||
192 | nid, start, end); | ||
193 | return 0; | ||
194 | } | ||
195 | |||
196 | if (mi->nr_blks >= NR_NODE_MEMBLKS) { | ||
197 | pr_err("NUMA: too many memblk ranges\n"); | ||
198 | return -EINVAL; | ||
199 | } | ||
200 | |||
201 | mi->blk[mi->nr_blks].start = start; | ||
202 | mi->blk[mi->nr_blks].end = end; | ||
203 | mi->blk[mi->nr_blks].nid = nid; | ||
204 | mi->nr_blks++; | ||
205 | return 0; | ||
206 | } | ||
207 | |||
208 | /** | ||
209 | * numa_remove_memblk_from - Remove one numa_memblk from a numa_meminfo | ||
210 | * @idx: Index of memblk to remove | ||
211 | * @mi: numa_meminfo to remove memblk from | ||
212 | * | ||
213 | * Remove @idx'th numa_memblk from @mi by shifting @mi->blk[] and | ||
214 | * decrementing @mi->nr_blks. | ||
215 | */ | ||
216 | void __init numa_remove_memblk_from(int idx, struct numa_meminfo *mi) | ||
217 | { | ||
218 | mi->nr_blks--; | ||
219 | memmove(&mi->blk[idx], &mi->blk[idx + 1], | ||
220 | (mi->nr_blks - idx) * sizeof(mi->blk[0])); | ||
221 | } | ||
222 | |||
223 | /** | ||
224 | * numa_add_memblk - Add one numa_memblk to numa_meminfo | ||
225 | * @nid: NUMA node ID of the new memblk | ||
226 | * @start: Start address of the new memblk | ||
227 | * @end: End address of the new memblk | ||
228 | * | ||
229 | * Add a new memblk to the default numa_meminfo. | ||
230 | * | ||
231 | * RETURNS: | ||
232 | * 0 on success, -errno on failure. | ||
233 | */ | ||
234 | int __init numa_add_memblk(int nid, u64 start, u64 end) | ||
235 | { | ||
236 | return numa_add_memblk_to(nid, start, end, &numa_meminfo); | ||
237 | } | ||
238 | |||
191 | /* Initialize bootmem allocator for a node */ | 239 | /* Initialize bootmem allocator for a node */ |
192 | void __init | 240 | void __init |
193 | setup_node_bootmem(int nodeid, unsigned long start, unsigned long end) | 241 | setup_node_bootmem(int nodeid, unsigned long start, unsigned long end) |
@@ -234,692 +282,386 @@ setup_node_bootmem(int nodeid, unsigned long start, unsigned long end) | |||
234 | node_set_online(nodeid); | 282 | node_set_online(nodeid); |
235 | } | 283 | } |
236 | 284 | ||
237 | /* | 285 | /** |
238 | * There are unfortunately some poorly designed mainboards around that | 286 | * numa_cleanup_meminfo - Cleanup a numa_meminfo |
239 | * only connect memory to a single CPU. This breaks the 1:1 cpu->node | 287 | * @mi: numa_meminfo to clean up |
240 | * mapping. To avoid this fill in the mapping for all possible CPUs, | 288 | * |
241 | * as the number of CPUs is not known yet. We round robin the existing | 289 | * Sanitize @mi by merging and removing unncessary memblks. Also check for |
242 | * nodes. | 290 | * conflicts and clear unused memblks. |
291 | * | ||
292 | * RETURNS: | ||
293 | * 0 on success, -errno on failure. | ||
243 | */ | 294 | */ |
244 | void __init numa_init_array(void) | 295 | int __init numa_cleanup_meminfo(struct numa_meminfo *mi) |
245 | { | 296 | { |
246 | int rr, i; | 297 | const u64 low = 0; |
298 | const u64 high = (u64)max_pfn << PAGE_SHIFT; | ||
299 | int i, j, k; | ||
247 | 300 | ||
248 | rr = first_node(node_online_map); | 301 | for (i = 0; i < mi->nr_blks; i++) { |
249 | for (i = 0; i < nr_cpu_ids; i++) { | 302 | struct numa_memblk *bi = &mi->blk[i]; |
250 | if (early_cpu_to_node(i) != NUMA_NO_NODE) | ||
251 | continue; | ||
252 | numa_set_node(i, rr); | ||
253 | rr = next_node(rr, node_online_map); | ||
254 | if (rr == MAX_NUMNODES) | ||
255 | rr = first_node(node_online_map); | ||
256 | } | ||
257 | } | ||
258 | |||
259 | #ifdef CONFIG_NUMA_EMU | ||
260 | /* Numa emulation */ | ||
261 | static struct bootnode nodes[MAX_NUMNODES] __initdata; | ||
262 | static struct bootnode physnodes[MAX_NUMNODES] __cpuinitdata; | ||
263 | static char *cmdline __initdata; | ||
264 | 303 | ||
265 | void __init numa_emu_cmdline(char *str) | 304 | /* make sure all blocks are inside the limits */ |
266 | { | 305 | bi->start = max(bi->start, low); |
267 | cmdline = str; | 306 | bi->end = min(bi->end, high); |
268 | } | ||
269 | 307 | ||
270 | static int __init setup_physnodes(unsigned long start, unsigned long end, | 308 | /* and there's no empty block */ |
271 | int acpi, int amd) | 309 | if (bi->start == bi->end) { |
272 | { | 310 | numa_remove_memblk_from(i--, mi); |
273 | int ret = 0; | ||
274 | int i; | ||
275 | |||
276 | memset(physnodes, 0, sizeof(physnodes)); | ||
277 | #ifdef CONFIG_ACPI_NUMA | ||
278 | if (acpi) | ||
279 | acpi_get_nodes(physnodes, start, end); | ||
280 | #endif | ||
281 | #ifdef CONFIG_AMD_NUMA | ||
282 | if (amd) | ||
283 | amd_get_nodes(physnodes); | ||
284 | #endif | ||
285 | /* | ||
286 | * Basic sanity checking on the physical node map: there may be errors | ||
287 | * if the SRAT or AMD code incorrectly reported the topology or the mem= | ||
288 | * kernel parameter is used. | ||
289 | */ | ||
290 | for (i = 0; i < MAX_NUMNODES; i++) { | ||
291 | if (physnodes[i].start == physnodes[i].end) | ||
292 | continue; | ||
293 | if (physnodes[i].start > end) { | ||
294 | physnodes[i].end = physnodes[i].start; | ||
295 | continue; | ||
296 | } | ||
297 | if (physnodes[i].end < start) { | ||
298 | physnodes[i].start = physnodes[i].end; | ||
299 | continue; | 311 | continue; |
300 | } | 312 | } |
301 | if (physnodes[i].start < start) | ||
302 | physnodes[i].start = start; | ||
303 | if (physnodes[i].end > end) | ||
304 | physnodes[i].end = end; | ||
305 | ret++; | ||
306 | } | ||
307 | 313 | ||
308 | /* | 314 | for (j = i + 1; j < mi->nr_blks; j++) { |
309 | * If no physical topology was detected, a single node is faked to cover | 315 | struct numa_memblk *bj = &mi->blk[j]; |
310 | * the entire address space. | 316 | unsigned long start, end; |
311 | */ | ||
312 | if (!ret) { | ||
313 | physnodes[ret].start = start; | ||
314 | physnodes[ret].end = end; | ||
315 | ret = 1; | ||
316 | } | ||
317 | return ret; | ||
318 | } | ||
319 | |||
320 | static void __init fake_physnodes(int acpi, int amd, int nr_nodes) | ||
321 | { | ||
322 | int i; | ||
323 | |||
324 | BUG_ON(acpi && amd); | ||
325 | #ifdef CONFIG_ACPI_NUMA | ||
326 | if (acpi) | ||
327 | acpi_fake_nodes(nodes, nr_nodes); | ||
328 | #endif | ||
329 | #ifdef CONFIG_AMD_NUMA | ||
330 | if (amd) | ||
331 | amd_fake_nodes(nodes, nr_nodes); | ||
332 | #endif | ||
333 | if (!acpi && !amd) | ||
334 | for (i = 0; i < nr_cpu_ids; i++) | ||
335 | numa_set_node(i, 0); | ||
336 | } | ||
337 | |||
338 | /* | ||
339 | * Setups up nid to range from addr to addr + size. If the end | ||
340 | * boundary is greater than max_addr, then max_addr is used instead. | ||
341 | * The return value is 0 if there is additional memory left for | ||
342 | * allocation past addr and -1 otherwise. addr is adjusted to be at | ||
343 | * the end of the node. | ||
344 | */ | ||
345 | static int __init setup_node_range(int nid, u64 *addr, u64 size, u64 max_addr) | ||
346 | { | ||
347 | int ret = 0; | ||
348 | nodes[nid].start = *addr; | ||
349 | *addr += size; | ||
350 | if (*addr >= max_addr) { | ||
351 | *addr = max_addr; | ||
352 | ret = -1; | ||
353 | } | ||
354 | nodes[nid].end = *addr; | ||
355 | node_set(nid, node_possible_map); | ||
356 | printk(KERN_INFO "Faking node %d at %016Lx-%016Lx (%LuMB)\n", nid, | ||
357 | nodes[nid].start, nodes[nid].end, | ||
358 | (nodes[nid].end - nodes[nid].start) >> 20); | ||
359 | return ret; | ||
360 | } | ||
361 | |||
362 | /* | ||
363 | * Sets up nr_nodes fake nodes interleaved over physical nodes ranging from addr | ||
364 | * to max_addr. The return value is the number of nodes allocated. | ||
365 | */ | ||
366 | static int __init split_nodes_interleave(u64 addr, u64 max_addr, int nr_nodes) | ||
367 | { | ||
368 | nodemask_t physnode_mask = NODE_MASK_NONE; | ||
369 | u64 size; | ||
370 | int big; | ||
371 | int ret = 0; | ||
372 | int i; | ||
373 | |||
374 | if (nr_nodes <= 0) | ||
375 | return -1; | ||
376 | if (nr_nodes > MAX_NUMNODES) { | ||
377 | pr_info("numa=fake=%d too large, reducing to %d\n", | ||
378 | nr_nodes, MAX_NUMNODES); | ||
379 | nr_nodes = MAX_NUMNODES; | ||
380 | } | ||
381 | |||
382 | size = (max_addr - addr - memblock_x86_hole_size(addr, max_addr)) / nr_nodes; | ||
383 | /* | ||
384 | * Calculate the number of big nodes that can be allocated as a result | ||
385 | * of consolidating the remainder. | ||
386 | */ | ||
387 | big = ((size & ~FAKE_NODE_MIN_HASH_MASK) * nr_nodes) / | ||
388 | FAKE_NODE_MIN_SIZE; | ||
389 | |||
390 | size &= FAKE_NODE_MIN_HASH_MASK; | ||
391 | if (!size) { | ||
392 | pr_err("Not enough memory for each node. " | ||
393 | "NUMA emulation disabled.\n"); | ||
394 | return -1; | ||
395 | } | ||
396 | |||
397 | for (i = 0; i < MAX_NUMNODES; i++) | ||
398 | if (physnodes[i].start != physnodes[i].end) | ||
399 | node_set(i, physnode_mask); | ||
400 | |||
401 | /* | ||
402 | * Continue to fill physical nodes with fake nodes until there is no | ||
403 | * memory left on any of them. | ||
404 | */ | ||
405 | while (nodes_weight(physnode_mask)) { | ||
406 | for_each_node_mask(i, physnode_mask) { | ||
407 | u64 end = physnodes[i].start + size; | ||
408 | u64 dma32_end = PFN_PHYS(MAX_DMA32_PFN); | ||
409 | |||
410 | if (ret < big) | ||
411 | end += FAKE_NODE_MIN_SIZE; | ||
412 | 317 | ||
413 | /* | 318 | /* |
414 | * Continue to add memory to this fake node if its | 319 | * See whether there are overlapping blocks. Whine |
415 | * non-reserved memory is less than the per-node size. | 320 | * about but allow overlaps of the same nid. They |
321 | * will be merged below. | ||
416 | */ | 322 | */ |
417 | while (end - physnodes[i].start - | 323 | if (bi->end > bj->start && bi->start < bj->end) { |
418 | memblock_x86_hole_size(physnodes[i].start, end) < size) { | 324 | if (bi->nid != bj->nid) { |
419 | end += FAKE_NODE_MIN_SIZE; | 325 | pr_err("NUMA: node %d (%Lx-%Lx) overlaps with node %d (%Lx-%Lx)\n", |
420 | if (end > physnodes[i].end) { | 326 | bi->nid, bi->start, bi->end, |
421 | end = physnodes[i].end; | 327 | bj->nid, bj->start, bj->end); |
422 | break; | 328 | return -EINVAL; |
423 | } | 329 | } |
330 | pr_warning("NUMA: Warning: node %d (%Lx-%Lx) overlaps with itself (%Lx-%Lx)\n", | ||
331 | bi->nid, bi->start, bi->end, | ||
332 | bj->start, bj->end); | ||
424 | } | 333 | } |
425 | 334 | ||
426 | /* | 335 | /* |
427 | * If there won't be at least FAKE_NODE_MIN_SIZE of | 336 | * Join together blocks on the same node, holes |
428 | * non-reserved memory in ZONE_DMA32 for the next node, | 337 | * between which don't overlap with memory on other |
429 | * this one must extend to the boundary. | 338 | * nodes. |
430 | */ | ||
431 | if (end < dma32_end && dma32_end - end - | ||
432 | memblock_x86_hole_size(end, dma32_end) < FAKE_NODE_MIN_SIZE) | ||
433 | end = dma32_end; | ||
434 | |||
435 | /* | ||
436 | * If there won't be enough non-reserved memory for the | ||
437 | * next node, this one must extend to the end of the | ||
438 | * physical node. | ||
439 | */ | 339 | */ |
440 | if (physnodes[i].end - end - | 340 | if (bi->nid != bj->nid) |
441 | memblock_x86_hole_size(end, physnodes[i].end) < size) | 341 | continue; |
442 | end = physnodes[i].end; | 342 | start = max(min(bi->start, bj->start), low); |
443 | 343 | end = min(max(bi->end, bj->end), high); | |
444 | /* | 344 | for (k = 0; k < mi->nr_blks; k++) { |
445 | * Avoid allocating more nodes than requested, which can | 345 | struct numa_memblk *bk = &mi->blk[k]; |
446 | * happen as a result of rounding down each node's size | 346 | |
447 | * to FAKE_NODE_MIN_SIZE. | 347 | if (bi->nid == bk->nid) |
448 | */ | 348 | continue; |
449 | if (nodes_weight(physnode_mask) + ret >= nr_nodes) | 349 | if (start < bk->end && end > bk->start) |
450 | end = physnodes[i].end; | 350 | break; |
451 | 351 | } | |
452 | if (setup_node_range(ret++, &physnodes[i].start, | 352 | if (k < mi->nr_blks) |
453 | end - physnodes[i].start, | 353 | continue; |
454 | physnodes[i].end) < 0) | 354 | printk(KERN_INFO "NUMA: Node %d [%Lx,%Lx) + [%Lx,%Lx) -> [%lx,%lx)\n", |
455 | node_clear(i, physnode_mask); | 355 | bi->nid, bi->start, bi->end, bj->start, bj->end, |
356 | start, end); | ||
357 | bi->start = start; | ||
358 | bi->end = end; | ||
359 | numa_remove_memblk_from(j--, mi); | ||
456 | } | 360 | } |
457 | } | 361 | } |
458 | return ret; | ||
459 | } | ||
460 | |||
461 | /* | ||
462 | * Returns the end address of a node so that there is at least `size' amount of | ||
463 | * non-reserved memory or `max_addr' is reached. | ||
464 | */ | ||
465 | static u64 __init find_end_of_node(u64 start, u64 max_addr, u64 size) | ||
466 | { | ||
467 | u64 end = start + size; | ||
468 | 362 | ||
469 | while (end - start - memblock_x86_hole_size(start, end) < size) { | 363 | for (i = mi->nr_blks; i < ARRAY_SIZE(mi->blk); i++) { |
470 | end += FAKE_NODE_MIN_SIZE; | 364 | mi->blk[i].start = mi->blk[i].end = 0; |
471 | if (end > max_addr) { | 365 | mi->blk[i].nid = NUMA_NO_NODE; |
472 | end = max_addr; | ||
473 | break; | ||
474 | } | ||
475 | } | 366 | } |
476 | return end; | 367 | |
368 | return 0; | ||
477 | } | 369 | } |
478 | 370 | ||
479 | /* | 371 | /* |
480 | * Sets up fake nodes of `size' interleaved over physical nodes ranging from | 372 | * Set nodes, which have memory in @mi, in *@nodemask. |
481 | * `addr' to `max_addr'. The return value is the number of nodes allocated. | ||
482 | */ | 373 | */ |
483 | static int __init split_nodes_size_interleave(u64 addr, u64 max_addr, u64 size) | 374 | static void __init numa_nodemask_from_meminfo(nodemask_t *nodemask, |
375 | const struct numa_meminfo *mi) | ||
484 | { | 376 | { |
485 | nodemask_t physnode_mask = NODE_MASK_NONE; | ||
486 | u64 min_size; | ||
487 | int ret = 0; | ||
488 | int i; | 377 | int i; |
489 | 378 | ||
490 | if (!size) | 379 | for (i = 0; i < ARRAY_SIZE(mi->blk); i++) |
491 | return -1; | 380 | if (mi->blk[i].start != mi->blk[i].end && |
492 | /* | 381 | mi->blk[i].nid != NUMA_NO_NODE) |
493 | * The limit on emulated nodes is MAX_NUMNODES, so the size per node is | 382 | node_set(mi->blk[i].nid, *nodemask); |
494 | * increased accordingly if the requested size is too small. This | 383 | } |
495 | * creates a uniform distribution of node sizes across the entire | ||
496 | * machine (but not necessarily over physical nodes). | ||
497 | */ | ||
498 | min_size = (max_addr - addr - memblock_x86_hole_size(addr, max_addr)) / | ||
499 | MAX_NUMNODES; | ||
500 | min_size = max(min_size, FAKE_NODE_MIN_SIZE); | ||
501 | if ((min_size & FAKE_NODE_MIN_HASH_MASK) < min_size) | ||
502 | min_size = (min_size + FAKE_NODE_MIN_SIZE) & | ||
503 | FAKE_NODE_MIN_HASH_MASK; | ||
504 | if (size < min_size) { | ||
505 | pr_err("Fake node size %LuMB too small, increasing to %LuMB\n", | ||
506 | size >> 20, min_size >> 20); | ||
507 | size = min_size; | ||
508 | } | ||
509 | size &= FAKE_NODE_MIN_HASH_MASK; | ||
510 | |||
511 | for (i = 0; i < MAX_NUMNODES; i++) | ||
512 | if (physnodes[i].start != physnodes[i].end) | ||
513 | node_set(i, physnode_mask); | ||
514 | /* | ||
515 | * Fill physical nodes with fake nodes of size until there is no memory | ||
516 | * left on any of them. | ||
517 | */ | ||
518 | while (nodes_weight(physnode_mask)) { | ||
519 | for_each_node_mask(i, physnode_mask) { | ||
520 | u64 dma32_end = MAX_DMA32_PFN << PAGE_SHIFT; | ||
521 | u64 end; | ||
522 | |||
523 | end = find_end_of_node(physnodes[i].start, | ||
524 | physnodes[i].end, size); | ||
525 | /* | ||
526 | * If there won't be at least FAKE_NODE_MIN_SIZE of | ||
527 | * non-reserved memory in ZONE_DMA32 for the next node, | ||
528 | * this one must extend to the boundary. | ||
529 | */ | ||
530 | if (end < dma32_end && dma32_end - end - | ||
531 | memblock_x86_hole_size(end, dma32_end) < FAKE_NODE_MIN_SIZE) | ||
532 | end = dma32_end; | ||
533 | 384 | ||
534 | /* | 385 | /** |
535 | * If there won't be enough non-reserved memory for the | 386 | * numa_reset_distance - Reset NUMA distance table |
536 | * next node, this one must extend to the end of the | 387 | * |
537 | * physical node. | 388 | * The current table is freed. The next numa_set_distance() call will |
538 | */ | 389 | * create a new one. |
539 | if (physnodes[i].end - end - | 390 | */ |
540 | memblock_x86_hole_size(end, physnodes[i].end) < size) | 391 | void __init numa_reset_distance(void) |
541 | end = physnodes[i].end; | 392 | { |
393 | size_t size = numa_distance_cnt * numa_distance_cnt * sizeof(numa_distance[0]); | ||
542 | 394 | ||
543 | /* | 395 | /* numa_distance could be 1LU marking allocation failure, test cnt */ |
544 | * Setup the fake node that will be allocated as bootmem | 396 | if (numa_distance_cnt) |
545 | * later. If setup_node_range() returns non-zero, there | 397 | memblock_x86_free_range(__pa(numa_distance), |
546 | * is no more memory available on this physical node. | 398 | __pa(numa_distance) + size); |
547 | */ | 399 | numa_distance_cnt = 0; |
548 | if (setup_node_range(ret++, &physnodes[i].start, | 400 | numa_distance = NULL; /* enable table creation */ |
549 | end - physnodes[i].start, | ||
550 | physnodes[i].end) < 0) | ||
551 | node_clear(i, physnode_mask); | ||
552 | } | ||
553 | } | ||
554 | return ret; | ||
555 | } | 401 | } |
556 | 402 | ||
557 | /* | 403 | static int __init numa_alloc_distance(void) |
558 | * Sets up the system RAM area from start_pfn to last_pfn according to the | ||
559 | * numa=fake command-line option. | ||
560 | */ | ||
561 | static int __init numa_emulation(unsigned long start_pfn, | ||
562 | unsigned long last_pfn, int acpi, int amd) | ||
563 | { | 404 | { |
564 | u64 addr = start_pfn << PAGE_SHIFT; | 405 | nodemask_t nodes_parsed; |
565 | u64 max_addr = last_pfn << PAGE_SHIFT; | 406 | size_t size; |
566 | int num_nodes; | 407 | int i, j, cnt = 0; |
567 | int i; | 408 | u64 phys; |
568 | 409 | ||
569 | /* | 410 | /* size the new table and allocate it */ |
570 | * If the numa=fake command-line contains a 'M' or 'G', it represents | 411 | nodes_parsed = numa_nodes_parsed; |
571 | * the fixed node size. Otherwise, if it is just a single number N, | 412 | numa_nodemask_from_meminfo(&nodes_parsed, &numa_meminfo); |
572 | * split the system RAM into N fake nodes. | ||
573 | */ | ||
574 | if (strchr(cmdline, 'M') || strchr(cmdline, 'G')) { | ||
575 | u64 size; | ||
576 | 413 | ||
577 | size = memparse(cmdline, &cmdline); | 414 | for_each_node_mask(i, nodes_parsed) |
578 | num_nodes = split_nodes_size_interleave(addr, max_addr, size); | 415 | cnt = i; |
579 | } else { | 416 | cnt++; |
580 | unsigned long n; | 417 | size = cnt * cnt * sizeof(numa_distance[0]); |
581 | 418 | ||
582 | n = simple_strtoul(cmdline, NULL, 0); | 419 | phys = memblock_find_in_range(0, (u64)max_pfn_mapped << PAGE_SHIFT, |
583 | num_nodes = split_nodes_interleave(addr, max_addr, n); | 420 | size, PAGE_SIZE); |
421 | if (phys == MEMBLOCK_ERROR) { | ||
422 | pr_warning("NUMA: Warning: can't allocate distance table!\n"); | ||
423 | /* don't retry until explicitly reset */ | ||
424 | numa_distance = (void *)1LU; | ||
425 | return -ENOMEM; | ||
584 | } | 426 | } |
427 | memblock_x86_reserve_range(phys, phys + size, "NUMA DIST"); | ||
585 | 428 | ||
586 | if (num_nodes < 0) | 429 | numa_distance = __va(phys); |
587 | return num_nodes; | 430 | numa_distance_cnt = cnt; |
588 | memnode_shift = compute_hash_shift(nodes, num_nodes, NULL); | 431 | |
589 | if (memnode_shift < 0) { | 432 | /* fill with the default distances */ |
590 | memnode_shift = 0; | 433 | for (i = 0; i < cnt; i++) |
591 | printk(KERN_ERR "No NUMA hash function found. NUMA emulation " | 434 | for (j = 0; j < cnt; j++) |
592 | "disabled.\n"); | 435 | numa_distance[i * cnt + j] = i == j ? |
593 | return -1; | 436 | LOCAL_DISTANCE : REMOTE_DISTANCE; |
594 | } | 437 | printk(KERN_DEBUG "NUMA: Initialized distance table, cnt=%d\n", cnt); |
595 | 438 | ||
596 | /* | ||
597 | * We need to vacate all active ranges that may have been registered for | ||
598 | * the e820 memory map. | ||
599 | */ | ||
600 | remove_all_active_ranges(); | ||
601 | for_each_node_mask(i, node_possible_map) { | ||
602 | memblock_x86_register_active_regions(i, nodes[i].start >> PAGE_SHIFT, | ||
603 | nodes[i].end >> PAGE_SHIFT); | ||
604 | setup_node_bootmem(i, nodes[i].start, nodes[i].end); | ||
605 | } | ||
606 | setup_physnodes(addr, max_addr, acpi, amd); | ||
607 | fake_physnodes(acpi, amd, num_nodes); | ||
608 | numa_init_array(); | ||
609 | return 0; | 439 | return 0; |
610 | } | 440 | } |
611 | #endif /* CONFIG_NUMA_EMU */ | ||
612 | 441 | ||
613 | void __init initmem_init(unsigned long start_pfn, unsigned long last_pfn, | 442 | /** |
614 | int acpi, int amd) | 443 | * numa_set_distance - Set NUMA distance from one NUMA to another |
444 | * @from: the 'from' node to set distance | ||
445 | * @to: the 'to' node to set distance | ||
446 | * @distance: NUMA distance | ||
447 | * | ||
448 | * Set the distance from node @from to @to to @distance. If distance table | ||
449 | * doesn't exist, one which is large enough to accommodate all the currently | ||
450 | * known nodes will be created. | ||
451 | * | ||
452 | * If such table cannot be allocated, a warning is printed and further | ||
453 | * calls are ignored until the distance table is reset with | ||
454 | * numa_reset_distance(). | ||
455 | * | ||
456 | * If @from or @to is higher than the highest known node at the time of | ||
457 | * table creation or @distance doesn't make sense, the call is ignored. | ||
458 | * This is to allow simplification of specific NUMA config implementations. | ||
459 | */ | ||
460 | void __init numa_set_distance(int from, int to, int distance) | ||
615 | { | 461 | { |
616 | int i; | 462 | if (!numa_distance && numa_alloc_distance() < 0) |
617 | |||
618 | nodes_clear(node_possible_map); | ||
619 | nodes_clear(node_online_map); | ||
620 | |||
621 | #ifdef CONFIG_NUMA_EMU | ||
622 | setup_physnodes(start_pfn << PAGE_SHIFT, last_pfn << PAGE_SHIFT, | ||
623 | acpi, amd); | ||
624 | if (cmdline && !numa_emulation(start_pfn, last_pfn, acpi, amd)) | ||
625 | return; | 463 | return; |
626 | setup_physnodes(start_pfn << PAGE_SHIFT, last_pfn << PAGE_SHIFT, | ||
627 | acpi, amd); | ||
628 | nodes_clear(node_possible_map); | ||
629 | nodes_clear(node_online_map); | ||
630 | #endif | ||
631 | 464 | ||
632 | #ifdef CONFIG_ACPI_NUMA | 465 | if (from >= numa_distance_cnt || to >= numa_distance_cnt) { |
633 | if (!numa_off && acpi && !acpi_scan_nodes(start_pfn << PAGE_SHIFT, | 466 | printk_once(KERN_DEBUG "NUMA: Debug: distance out of bound, from=%d to=%d distance=%d\n", |
634 | last_pfn << PAGE_SHIFT)) | 467 | from, to, distance); |
635 | return; | 468 | return; |
636 | nodes_clear(node_possible_map); | 469 | } |
637 | nodes_clear(node_online_map); | ||
638 | #endif | ||
639 | 470 | ||
640 | #ifdef CONFIG_AMD_NUMA | 471 | if ((u8)distance != distance || |
641 | if (!numa_off && amd && !amd_scan_nodes()) | 472 | (from == to && distance != LOCAL_DISTANCE)) { |
473 | pr_warn_once("NUMA: Warning: invalid distance parameter, from=%d to=%d distance=%d\n", | ||
474 | from, to, distance); | ||
642 | return; | 475 | return; |
643 | nodes_clear(node_possible_map); | 476 | } |
644 | nodes_clear(node_online_map); | ||
645 | #endif | ||
646 | printk(KERN_INFO "%s\n", | ||
647 | numa_off ? "NUMA turned off" : "No NUMA configuration found"); | ||
648 | 477 | ||
649 | printk(KERN_INFO "Faking a node at %016lx-%016lx\n", | 478 | numa_distance[from * numa_distance_cnt + to] = distance; |
650 | start_pfn << PAGE_SHIFT, | ||
651 | last_pfn << PAGE_SHIFT); | ||
652 | /* setup dummy node covering all memory */ | ||
653 | memnode_shift = 63; | ||
654 | memnodemap = memnode.embedded_map; | ||
655 | memnodemap[0] = 0; | ||
656 | node_set_online(0); | ||
657 | node_set(0, node_possible_map); | ||
658 | for (i = 0; i < nr_cpu_ids; i++) | ||
659 | numa_set_node(i, 0); | ||
660 | memblock_x86_register_active_regions(0, start_pfn, last_pfn); | ||
661 | setup_node_bootmem(0, start_pfn << PAGE_SHIFT, last_pfn << PAGE_SHIFT); | ||
662 | } | 479 | } |
663 | 480 | ||
664 | unsigned long __init numa_free_all_bootmem(void) | 481 | int __node_distance(int from, int to) |
665 | { | 482 | { |
666 | unsigned long pages = 0; | 483 | if (from >= numa_distance_cnt || to >= numa_distance_cnt) |
667 | int i; | 484 | return from == to ? LOCAL_DISTANCE : REMOTE_DISTANCE; |
485 | return numa_distance[from * numa_distance_cnt + to]; | ||
486 | } | ||
487 | EXPORT_SYMBOL(__node_distance); | ||
668 | 488 | ||
669 | for_each_online_node(i) | 489 | /* |
670 | pages += free_all_bootmem_node(NODE_DATA(i)); | 490 | * Sanity check to catch more bad NUMA configurations (they are amazingly |
491 | * common). Make sure the nodes cover all memory. | ||
492 | */ | ||
493 | static bool __init numa_meminfo_cover_memory(const struct numa_meminfo *mi) | ||
494 | { | ||
495 | unsigned long numaram, e820ram; | ||
496 | int i; | ||
671 | 497 | ||
672 | pages += free_all_memory_core_early(MAX_NUMNODES); | 498 | numaram = 0; |
499 | for (i = 0; i < mi->nr_blks; i++) { | ||
500 | unsigned long s = mi->blk[i].start >> PAGE_SHIFT; | ||
501 | unsigned long e = mi->blk[i].end >> PAGE_SHIFT; | ||
502 | numaram += e - s; | ||
503 | numaram -= __absent_pages_in_range(mi->blk[i].nid, s, e); | ||
504 | if ((long)numaram < 0) | ||
505 | numaram = 0; | ||
506 | } | ||
673 | 507 | ||
674 | return pages; | 508 | e820ram = max_pfn - (memblock_x86_hole_size(0, |
509 | max_pfn << PAGE_SHIFT) >> PAGE_SHIFT); | ||
510 | /* We seem to lose 3 pages somewhere. Allow 1M of slack. */ | ||
511 | if ((long)(e820ram - numaram) >= (1 << (20 - PAGE_SHIFT))) { | ||
512 | printk(KERN_ERR "NUMA: nodes only cover %luMB of your %luMB e820 RAM. Not used.\n", | ||
513 | (numaram << PAGE_SHIFT) >> 20, | ||
514 | (e820ram << PAGE_SHIFT) >> 20); | ||
515 | return false; | ||
516 | } | ||
517 | return true; | ||
675 | } | 518 | } |
676 | 519 | ||
677 | #ifdef CONFIG_NUMA | 520 | static int __init numa_register_memblks(struct numa_meminfo *mi) |
678 | |||
679 | static __init int find_near_online_node(int node) | ||
680 | { | 521 | { |
681 | int n, val; | 522 | int i, nid; |
682 | int min_val = INT_MAX; | ||
683 | int best_node = -1; | ||
684 | 523 | ||
685 | for_each_online_node(n) { | 524 | /* Account for nodes with cpus and no memory */ |
686 | val = node_distance(node, n); | 525 | node_possible_map = numa_nodes_parsed; |
526 | numa_nodemask_from_meminfo(&node_possible_map, mi); | ||
527 | if (WARN_ON(nodes_empty(node_possible_map))) | ||
528 | return -EINVAL; | ||
687 | 529 | ||
688 | if (val < min_val) { | 530 | memnode_shift = compute_hash_shift(mi); |
689 | min_val = val; | 531 | if (memnode_shift < 0) { |
690 | best_node = n; | 532 | printk(KERN_ERR "NUMA: No NUMA node hash function found. Contact maintainer\n"); |
533 | return -EINVAL; | ||
534 | } | ||
535 | |||
536 | for (i = 0; i < mi->nr_blks; i++) | ||
537 | memblock_x86_register_active_regions(mi->blk[i].nid, | ||
538 | mi->blk[i].start >> PAGE_SHIFT, | ||
539 | mi->blk[i].end >> PAGE_SHIFT); | ||
540 | |||
541 | /* for out of order entries */ | ||
542 | sort_node_map(); | ||
543 | if (!numa_meminfo_cover_memory(mi)) | ||
544 | return -EINVAL; | ||
545 | |||
546 | /* Finally register nodes. */ | ||
547 | for_each_node_mask(nid, node_possible_map) { | ||
548 | u64 start = (u64)max_pfn << PAGE_SHIFT; | ||
549 | u64 end = 0; | ||
550 | |||
551 | for (i = 0; i < mi->nr_blks; i++) { | ||
552 | if (nid != mi->blk[i].nid) | ||
553 | continue; | ||
554 | start = min(mi->blk[i].start, start); | ||
555 | end = max(mi->blk[i].end, end); | ||
691 | } | 556 | } |
557 | |||
558 | if (start < end) | ||
559 | setup_node_bootmem(nid, start, end); | ||
692 | } | 560 | } |
693 | 561 | ||
694 | return best_node; | 562 | return 0; |
695 | } | 563 | } |
696 | 564 | ||
697 | /* | 565 | /** |
698 | * Setup early cpu_to_node. | 566 | * dummy_numma_init - Fallback dummy NUMA init |
699 | * | 567 | * |
700 | * Populate cpu_to_node[] only if x86_cpu_to_apicid[], | 568 | * Used if there's no underlying NUMA architecture, NUMA initialization |
701 | * and apicid_to_node[] tables have valid entries for a CPU. | 569 | * fails, or NUMA is disabled on the command line. |
702 | * This means we skip cpu_to_node[] initialisation for NUMA | ||
703 | * emulation and faking node case (when running a kernel compiled | ||
704 | * for NUMA on a non NUMA box), which is OK as cpu_to_node[] | ||
705 | * is already initialized in a round robin manner at numa_init_array, | ||
706 | * prior to this call, and this initialization is good enough | ||
707 | * for the fake NUMA cases. | ||
708 | * | 570 | * |
709 | * Called before the per_cpu areas are setup. | 571 | * Must online at least one node and add memory blocks that cover all |
572 | * allowed memory. This function must not fail. | ||
710 | */ | 573 | */ |
711 | void __init init_cpu_to_node(void) | 574 | static int __init dummy_numa_init(void) |
712 | { | 575 | { |
713 | int cpu; | 576 | printk(KERN_INFO "%s\n", |
714 | u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid); | 577 | numa_off ? "NUMA turned off" : "No NUMA configuration found"); |
715 | 578 | printk(KERN_INFO "Faking a node at %016lx-%016lx\n", | |
716 | BUG_ON(cpu_to_apicid == NULL); | 579 | 0LU, max_pfn << PAGE_SHIFT); |
717 | 580 | ||
718 | for_each_possible_cpu(cpu) { | 581 | node_set(0, numa_nodes_parsed); |
719 | int node; | 582 | numa_add_memblk(0, 0, (u64)max_pfn << PAGE_SHIFT); |
720 | u16 apicid = cpu_to_apicid[cpu]; | ||
721 | 583 | ||
722 | if (apicid == BAD_APICID) | 584 | return 0; |
723 | continue; | ||
724 | node = apicid_to_node[apicid]; | ||
725 | if (node == NUMA_NO_NODE) | ||
726 | continue; | ||
727 | if (!node_online(node)) | ||
728 | node = find_near_online_node(node); | ||
729 | numa_set_node(cpu, node); | ||
730 | } | ||
731 | } | 585 | } |
732 | #endif | ||
733 | 586 | ||
734 | 587 | static int __init numa_init(int (*init_func)(void)) | |
735 | void __cpuinit numa_set_node(int cpu, int node) | ||
736 | { | 588 | { |
737 | int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map); | 589 | int i; |
738 | 590 | int ret; | |
739 | /* early setting, no percpu area yet */ | ||
740 | if (cpu_to_node_map) { | ||
741 | cpu_to_node_map[cpu] = node; | ||
742 | return; | ||
743 | } | ||
744 | |||
745 | #ifdef CONFIG_DEBUG_PER_CPU_MAPS | ||
746 | if (cpu >= nr_cpu_ids || !cpu_possible(cpu)) { | ||
747 | printk(KERN_ERR "numa_set_node: invalid cpu# (%d)\n", cpu); | ||
748 | dump_stack(); | ||
749 | return; | ||
750 | } | ||
751 | #endif | ||
752 | per_cpu(x86_cpu_to_node_map, cpu) = node; | ||
753 | 591 | ||
754 | if (node != NUMA_NO_NODE) | 592 | for (i = 0; i < MAX_LOCAL_APIC; i++) |
755 | set_cpu_numa_node(cpu, node); | 593 | set_apicid_to_node(i, NUMA_NO_NODE); |
756 | } | ||
757 | 594 | ||
758 | void __cpuinit numa_clear_node(int cpu) | 595 | nodes_clear(numa_nodes_parsed); |
759 | { | 596 | nodes_clear(node_possible_map); |
760 | numa_set_node(cpu, NUMA_NO_NODE); | 597 | nodes_clear(node_online_map); |
761 | } | 598 | memset(&numa_meminfo, 0, sizeof(numa_meminfo)); |
762 | 599 | remove_all_active_ranges(); | |
763 | #ifndef CONFIG_DEBUG_PER_CPU_MAPS | 600 | numa_reset_distance(); |
764 | 601 | ||
765 | #ifndef CONFIG_NUMA_EMU | 602 | ret = init_func(); |
766 | void __cpuinit numa_add_cpu(int cpu) | 603 | if (ret < 0) |
767 | { | 604 | return ret; |
768 | cpumask_set_cpu(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]); | 605 | ret = numa_cleanup_meminfo(&numa_meminfo); |
769 | } | 606 | if (ret < 0) |
607 | return ret; | ||
770 | 608 | ||
771 | void __cpuinit numa_remove_cpu(int cpu) | 609 | numa_emulation(&numa_meminfo, numa_distance_cnt); |
772 | { | ||
773 | cpumask_clear_cpu(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]); | ||
774 | } | ||
775 | #else | ||
776 | void __cpuinit numa_add_cpu(int cpu) | ||
777 | { | ||
778 | unsigned long addr; | ||
779 | u16 apicid; | ||
780 | int physnid; | ||
781 | int nid = NUMA_NO_NODE; | ||
782 | 610 | ||
783 | nid = early_cpu_to_node(cpu); | 611 | ret = numa_register_memblks(&numa_meminfo); |
784 | BUG_ON(nid == NUMA_NO_NODE || !node_online(nid)); | 612 | if (ret < 0) |
613 | return ret; | ||
785 | 614 | ||
786 | /* | 615 | for (i = 0; i < nr_cpu_ids; i++) { |
787 | * Use the starting address of the emulated node to find which physical | 616 | int nid = early_cpu_to_node(i); |
788 | * node it is allocated on. | ||
789 | */ | ||
790 | addr = node_start_pfn(nid) << PAGE_SHIFT; | ||
791 | for (physnid = 0; physnid < MAX_NUMNODES; physnid++) | ||
792 | if (addr >= physnodes[physnid].start && | ||
793 | addr < physnodes[physnid].end) | ||
794 | break; | ||
795 | 617 | ||
796 | /* | 618 | if (nid == NUMA_NO_NODE) |
797 | * Map the cpu to each emulated node that is allocated on the physical | 619 | continue; |
798 | * node of the cpu's apic id. | 620 | if (!node_online(nid)) |
799 | */ | 621 | numa_clear_node(i); |
800 | for_each_online_node(nid) { | ||
801 | addr = node_start_pfn(nid) << PAGE_SHIFT; | ||
802 | if (addr >= physnodes[physnid].start && | ||
803 | addr < physnodes[physnid].end) | ||
804 | cpumask_set_cpu(cpu, node_to_cpumask_map[nid]); | ||
805 | } | 622 | } |
623 | numa_init_array(); | ||
624 | return 0; | ||
806 | } | 625 | } |
807 | 626 | ||
808 | void __cpuinit numa_remove_cpu(int cpu) | 627 | void __init initmem_init(void) |
809 | { | 628 | { |
810 | int i; | 629 | int ret; |
811 | 630 | ||
812 | for_each_online_node(i) | 631 | if (!numa_off) { |
813 | cpumask_clear_cpu(cpu, node_to_cpumask_map[i]); | 632 | #ifdef CONFIG_ACPI_NUMA |
814 | } | 633 | ret = numa_init(x86_acpi_numa_init); |
815 | #endif /* !CONFIG_NUMA_EMU */ | 634 | if (!ret) |
816 | 635 | return; | |
817 | #else /* CONFIG_DEBUG_PER_CPU_MAPS */ | 636 | #endif |
818 | static struct cpumask __cpuinit *debug_cpumask_set_cpu(int cpu, int enable) | 637 | #ifdef CONFIG_AMD_NUMA |
819 | { | 638 | ret = numa_init(amd_numa_init); |
820 | int node = early_cpu_to_node(cpu); | 639 | if (!ret) |
821 | struct cpumask *mask; | 640 | return; |
822 | char buf[64]; | 641 | #endif |
823 | |||
824 | mask = node_to_cpumask_map[node]; | ||
825 | if (!mask) { | ||
826 | pr_err("node_to_cpumask_map[%i] NULL\n", node); | ||
827 | dump_stack(); | ||
828 | return NULL; | ||
829 | } | 642 | } |
830 | 643 | ||
831 | cpulist_scnprintf(buf, sizeof(buf), mask); | 644 | numa_init(dummy_numa_init); |
832 | printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n", | ||
833 | enable ? "numa_add_cpu" : "numa_remove_cpu", | ||
834 | cpu, node, buf); | ||
835 | return mask; | ||
836 | } | 645 | } |
837 | 646 | ||
838 | /* | 647 | unsigned long __init numa_free_all_bootmem(void) |
839 | * --------- debug versions of the numa functions --------- | ||
840 | */ | ||
841 | #ifndef CONFIG_NUMA_EMU | ||
842 | static void __cpuinit numa_set_cpumask(int cpu, int enable) | ||
843 | { | ||
844 | struct cpumask *mask; | ||
845 | |||
846 | mask = debug_cpumask_set_cpu(cpu, enable); | ||
847 | if (!mask) | ||
848 | return; | ||
849 | |||
850 | if (enable) | ||
851 | cpumask_set_cpu(cpu, mask); | ||
852 | else | ||
853 | cpumask_clear_cpu(cpu, mask); | ||
854 | } | ||
855 | #else | ||
856 | static void __cpuinit numa_set_cpumask(int cpu, int enable) | ||
857 | { | 648 | { |
858 | int node = early_cpu_to_node(cpu); | 649 | unsigned long pages = 0; |
859 | struct cpumask *mask; | ||
860 | int i; | 650 | int i; |
861 | 651 | ||
862 | for_each_online_node(i) { | 652 | for_each_online_node(i) |
863 | unsigned long addr; | 653 | pages += free_all_bootmem_node(NODE_DATA(i)); |
864 | |||
865 | addr = node_start_pfn(i) << PAGE_SHIFT; | ||
866 | if (addr < physnodes[node].start || | ||
867 | addr >= physnodes[node].end) | ||
868 | continue; | ||
869 | mask = debug_cpumask_set_cpu(cpu, enable); | ||
870 | if (!mask) | ||
871 | return; | ||
872 | |||
873 | if (enable) | ||
874 | cpumask_set_cpu(cpu, mask); | ||
875 | else | ||
876 | cpumask_clear_cpu(cpu, mask); | ||
877 | } | ||
878 | } | ||
879 | #endif /* CONFIG_NUMA_EMU */ | ||
880 | 654 | ||
881 | void __cpuinit numa_add_cpu(int cpu) | 655 | pages += free_all_memory_core_early(MAX_NUMNODES); |
882 | { | ||
883 | numa_set_cpumask(cpu, 1); | ||
884 | } | ||
885 | 656 | ||
886 | void __cpuinit numa_remove_cpu(int cpu) | 657 | return pages; |
887 | { | ||
888 | numa_set_cpumask(cpu, 0); | ||
889 | } | 658 | } |
890 | 659 | ||
891 | int __cpu_to_node(int cpu) | 660 | int __cpuinit numa_cpu_node(int cpu) |
892 | { | 661 | { |
893 | if (early_per_cpu_ptr(x86_cpu_to_node_map)) { | 662 | int apicid = early_per_cpu(x86_cpu_to_apicid, cpu); |
894 | printk(KERN_WARNING | ||
895 | "cpu_to_node(%d): usage too early!\n", cpu); | ||
896 | dump_stack(); | ||
897 | return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu]; | ||
898 | } | ||
899 | return per_cpu(x86_cpu_to_node_map, cpu); | ||
900 | } | ||
901 | EXPORT_SYMBOL(__cpu_to_node); | ||
902 | 663 | ||
903 | /* | 664 | if (apicid != BAD_APICID) |
904 | * Same function as cpu_to_node() but used if called before the | 665 | return __apicid_to_node[apicid]; |
905 | * per_cpu areas are setup. | 666 | return NUMA_NO_NODE; |
906 | */ | ||
907 | int early_cpu_to_node(int cpu) | ||
908 | { | ||
909 | if (early_per_cpu_ptr(x86_cpu_to_node_map)) | ||
910 | return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu]; | ||
911 | |||
912 | if (!cpu_possible(cpu)) { | ||
913 | printk(KERN_WARNING | ||
914 | "early_cpu_to_node(%d): no per_cpu area!\n", cpu); | ||
915 | dump_stack(); | ||
916 | return NUMA_NO_NODE; | ||
917 | } | ||
918 | return per_cpu(x86_cpu_to_node_map, cpu); | ||
919 | } | 667 | } |
920 | |||
921 | /* | ||
922 | * --------- end of debug versions of the numa functions --------- | ||
923 | */ | ||
924 | |||
925 | #endif /* CONFIG_DEBUG_PER_CPU_MAPS */ | ||
diff --git a/arch/x86/mm/numa_emulation.c b/arch/x86/mm/numa_emulation.c new file mode 100644 index 000000000000..ad091e4cff17 --- /dev/null +++ b/arch/x86/mm/numa_emulation.c | |||
@@ -0,0 +1,494 @@ | |||
1 | /* | ||
2 | * NUMA emulation | ||
3 | */ | ||
4 | #include <linux/kernel.h> | ||
5 | #include <linux/errno.h> | ||
6 | #include <linux/topology.h> | ||
7 | #include <linux/memblock.h> | ||
8 | #include <asm/dma.h> | ||
9 | |||
10 | #include "numa_internal.h" | ||
11 | |||
12 | static int emu_nid_to_phys[MAX_NUMNODES] __cpuinitdata; | ||
13 | static char *emu_cmdline __initdata; | ||
14 | |||
15 | void __init numa_emu_cmdline(char *str) | ||
16 | { | ||
17 | emu_cmdline = str; | ||
18 | } | ||
19 | |||
20 | static int __init emu_find_memblk_by_nid(int nid, const struct numa_meminfo *mi) | ||
21 | { | ||
22 | int i; | ||
23 | |||
24 | for (i = 0; i < mi->nr_blks; i++) | ||
25 | if (mi->blk[i].nid == nid) | ||
26 | return i; | ||
27 | return -ENOENT; | ||
28 | } | ||
29 | |||
30 | /* | ||
31 | * Sets up nid to range from @start to @end. The return value is -errno if | ||
32 | * something went wrong, 0 otherwise. | ||
33 | */ | ||
34 | static int __init emu_setup_memblk(struct numa_meminfo *ei, | ||
35 | struct numa_meminfo *pi, | ||
36 | int nid, int phys_blk, u64 size) | ||
37 | { | ||
38 | struct numa_memblk *eb = &ei->blk[ei->nr_blks]; | ||
39 | struct numa_memblk *pb = &pi->blk[phys_blk]; | ||
40 | |||
41 | if (ei->nr_blks >= NR_NODE_MEMBLKS) { | ||
42 | pr_err("NUMA: Too many emulated memblks, failing emulation\n"); | ||
43 | return -EINVAL; | ||
44 | } | ||
45 | |||
46 | ei->nr_blks++; | ||
47 | eb->start = pb->start; | ||
48 | eb->end = pb->start + size; | ||
49 | eb->nid = nid; | ||
50 | |||
51 | if (emu_nid_to_phys[nid] == NUMA_NO_NODE) | ||
52 | emu_nid_to_phys[nid] = pb->nid; | ||
53 | |||
54 | pb->start += size; | ||
55 | if (pb->start >= pb->end) { | ||
56 | WARN_ON_ONCE(pb->start > pb->end); | ||
57 | numa_remove_memblk_from(phys_blk, pi); | ||
58 | } | ||
59 | |||
60 | printk(KERN_INFO "Faking node %d at %016Lx-%016Lx (%LuMB)\n", nid, | ||
61 | eb->start, eb->end, (eb->end - eb->start) >> 20); | ||
62 | return 0; | ||
63 | } | ||
64 | |||
65 | /* | ||
66 | * Sets up nr_nodes fake nodes interleaved over physical nodes ranging from addr | ||
67 | * to max_addr. The return value is the number of nodes allocated. | ||
68 | */ | ||
69 | static int __init split_nodes_interleave(struct numa_meminfo *ei, | ||
70 | struct numa_meminfo *pi, | ||
71 | u64 addr, u64 max_addr, int nr_nodes) | ||
72 | { | ||
73 | nodemask_t physnode_mask = NODE_MASK_NONE; | ||
74 | u64 size; | ||
75 | int big; | ||
76 | int nid = 0; | ||
77 | int i, ret; | ||
78 | |||
79 | if (nr_nodes <= 0) | ||
80 | return -1; | ||
81 | if (nr_nodes > MAX_NUMNODES) { | ||
82 | pr_info("numa=fake=%d too large, reducing to %d\n", | ||
83 | nr_nodes, MAX_NUMNODES); | ||
84 | nr_nodes = MAX_NUMNODES; | ||
85 | } | ||
86 | |||
87 | size = (max_addr - addr - memblock_x86_hole_size(addr, max_addr)) / nr_nodes; | ||
88 | /* | ||
89 | * Calculate the number of big nodes that can be allocated as a result | ||
90 | * of consolidating the remainder. | ||
91 | */ | ||
92 | big = ((size & ~FAKE_NODE_MIN_HASH_MASK) * nr_nodes) / | ||
93 | FAKE_NODE_MIN_SIZE; | ||
94 | |||
95 | size &= FAKE_NODE_MIN_HASH_MASK; | ||
96 | if (!size) { | ||
97 | pr_err("Not enough memory for each node. " | ||
98 | "NUMA emulation disabled.\n"); | ||
99 | return -1; | ||
100 | } | ||
101 | |||
102 | for (i = 0; i < pi->nr_blks; i++) | ||
103 | node_set(pi->blk[i].nid, physnode_mask); | ||
104 | |||
105 | /* | ||
106 | * Continue to fill physical nodes with fake nodes until there is no | ||
107 | * memory left on any of them. | ||
108 | */ | ||
109 | while (nodes_weight(physnode_mask)) { | ||
110 | for_each_node_mask(i, physnode_mask) { | ||
111 | u64 dma32_end = PFN_PHYS(MAX_DMA32_PFN); | ||
112 | u64 start, limit, end; | ||
113 | int phys_blk; | ||
114 | |||
115 | phys_blk = emu_find_memblk_by_nid(i, pi); | ||
116 | if (phys_blk < 0) { | ||
117 | node_clear(i, physnode_mask); | ||
118 | continue; | ||
119 | } | ||
120 | start = pi->blk[phys_blk].start; | ||
121 | limit = pi->blk[phys_blk].end; | ||
122 | end = start + size; | ||
123 | |||
124 | if (nid < big) | ||
125 | end += FAKE_NODE_MIN_SIZE; | ||
126 | |||
127 | /* | ||
128 | * Continue to add memory to this fake node if its | ||
129 | * non-reserved memory is less than the per-node size. | ||
130 | */ | ||
131 | while (end - start - | ||
132 | memblock_x86_hole_size(start, end) < size) { | ||
133 | end += FAKE_NODE_MIN_SIZE; | ||
134 | if (end > limit) { | ||
135 | end = limit; | ||
136 | break; | ||
137 | } | ||
138 | } | ||
139 | |||
140 | /* | ||
141 | * If there won't be at least FAKE_NODE_MIN_SIZE of | ||
142 | * non-reserved memory in ZONE_DMA32 for the next node, | ||
143 | * this one must extend to the boundary. | ||
144 | */ | ||
145 | if (end < dma32_end && dma32_end - end - | ||
146 | memblock_x86_hole_size(end, dma32_end) < FAKE_NODE_MIN_SIZE) | ||
147 | end = dma32_end; | ||
148 | |||
149 | /* | ||
150 | * If there won't be enough non-reserved memory for the | ||
151 | * next node, this one must extend to the end of the | ||
152 | * physical node. | ||
153 | */ | ||
154 | if (limit - end - | ||
155 | memblock_x86_hole_size(end, limit) < size) | ||
156 | end = limit; | ||
157 | |||
158 | ret = emu_setup_memblk(ei, pi, nid++ % nr_nodes, | ||
159 | phys_blk, | ||
160 | min(end, limit) - start); | ||
161 | if (ret < 0) | ||
162 | return ret; | ||
163 | } | ||
164 | } | ||
165 | return 0; | ||
166 | } | ||
167 | |||
168 | /* | ||
169 | * Returns the end address of a node so that there is at least `size' amount of | ||
170 | * non-reserved memory or `max_addr' is reached. | ||
171 | */ | ||
172 | static u64 __init find_end_of_node(u64 start, u64 max_addr, u64 size) | ||
173 | { | ||
174 | u64 end = start + size; | ||
175 | |||
176 | while (end - start - memblock_x86_hole_size(start, end) < size) { | ||
177 | end += FAKE_NODE_MIN_SIZE; | ||
178 | if (end > max_addr) { | ||
179 | end = max_addr; | ||
180 | break; | ||
181 | } | ||
182 | } | ||
183 | return end; | ||
184 | } | ||
185 | |||
186 | /* | ||
187 | * Sets up fake nodes of `size' interleaved over physical nodes ranging from | ||
188 | * `addr' to `max_addr'. The return value is the number of nodes allocated. | ||
189 | */ | ||
190 | static int __init split_nodes_size_interleave(struct numa_meminfo *ei, | ||
191 | struct numa_meminfo *pi, | ||
192 | u64 addr, u64 max_addr, u64 size) | ||
193 | { | ||
194 | nodemask_t physnode_mask = NODE_MASK_NONE; | ||
195 | u64 min_size; | ||
196 | int nid = 0; | ||
197 | int i, ret; | ||
198 | |||
199 | if (!size) | ||
200 | return -1; | ||
201 | /* | ||
202 | * The limit on emulated nodes is MAX_NUMNODES, so the size per node is | ||
203 | * increased accordingly if the requested size is too small. This | ||
204 | * creates a uniform distribution of node sizes across the entire | ||
205 | * machine (but not necessarily over physical nodes). | ||
206 | */ | ||
207 | min_size = (max_addr - addr - memblock_x86_hole_size(addr, max_addr)) / | ||
208 | MAX_NUMNODES; | ||
209 | min_size = max(min_size, FAKE_NODE_MIN_SIZE); | ||
210 | if ((min_size & FAKE_NODE_MIN_HASH_MASK) < min_size) | ||
211 | min_size = (min_size + FAKE_NODE_MIN_SIZE) & | ||
212 | FAKE_NODE_MIN_HASH_MASK; | ||
213 | if (size < min_size) { | ||
214 | pr_err("Fake node size %LuMB too small, increasing to %LuMB\n", | ||
215 | size >> 20, min_size >> 20); | ||
216 | size = min_size; | ||
217 | } | ||
218 | size &= FAKE_NODE_MIN_HASH_MASK; | ||
219 | |||
220 | for (i = 0; i < pi->nr_blks; i++) | ||
221 | node_set(pi->blk[i].nid, physnode_mask); | ||
222 | |||
223 | /* | ||
224 | * Fill physical nodes with fake nodes of size until there is no memory | ||
225 | * left on any of them. | ||
226 | */ | ||
227 | while (nodes_weight(physnode_mask)) { | ||
228 | for_each_node_mask(i, physnode_mask) { | ||
229 | u64 dma32_end = MAX_DMA32_PFN << PAGE_SHIFT; | ||
230 | u64 start, limit, end; | ||
231 | int phys_blk; | ||
232 | |||
233 | phys_blk = emu_find_memblk_by_nid(i, pi); | ||
234 | if (phys_blk < 0) { | ||
235 | node_clear(i, physnode_mask); | ||
236 | continue; | ||
237 | } | ||
238 | start = pi->blk[phys_blk].start; | ||
239 | limit = pi->blk[phys_blk].end; | ||
240 | |||
241 | end = find_end_of_node(start, limit, size); | ||
242 | /* | ||
243 | * If there won't be at least FAKE_NODE_MIN_SIZE of | ||
244 | * non-reserved memory in ZONE_DMA32 for the next node, | ||
245 | * this one must extend to the boundary. | ||
246 | */ | ||
247 | if (end < dma32_end && dma32_end - end - | ||
248 | memblock_x86_hole_size(end, dma32_end) < FAKE_NODE_MIN_SIZE) | ||
249 | end = dma32_end; | ||
250 | |||
251 | /* | ||
252 | * If there won't be enough non-reserved memory for the | ||
253 | * next node, this one must extend to the end of the | ||
254 | * physical node. | ||
255 | */ | ||
256 | if (limit - end - | ||
257 | memblock_x86_hole_size(end, limit) < size) | ||
258 | end = limit; | ||
259 | |||
260 | ret = emu_setup_memblk(ei, pi, nid++ % MAX_NUMNODES, | ||
261 | phys_blk, | ||
262 | min(end, limit) - start); | ||
263 | if (ret < 0) | ||
264 | return ret; | ||
265 | } | ||
266 | } | ||
267 | return 0; | ||
268 | } | ||
269 | |||
270 | /** | ||
271 | * numa_emulation - Emulate NUMA nodes | ||
272 | * @numa_meminfo: NUMA configuration to massage | ||
273 | * @numa_dist_cnt: The size of the physical NUMA distance table | ||
274 | * | ||
275 | * Emulate NUMA nodes according to the numa=fake kernel parameter. | ||
276 | * @numa_meminfo contains the physical memory configuration and is modified | ||
277 | * to reflect the emulated configuration on success. @numa_dist_cnt is | ||
278 | * used to determine the size of the physical distance table. | ||
279 | * | ||
280 | * On success, the following modifications are made. | ||
281 | * | ||
282 | * - @numa_meminfo is updated to reflect the emulated nodes. | ||
283 | * | ||
284 | * - __apicid_to_node[] is updated such that APIC IDs are mapped to the | ||
285 | * emulated nodes. | ||
286 | * | ||
287 | * - NUMA distance table is rebuilt to represent distances between emulated | ||
288 | * nodes. The distances are determined considering how emulated nodes | ||
289 | * are mapped to physical nodes and match the actual distances. | ||
290 | * | ||
291 | * - emu_nid_to_phys[] reflects how emulated nodes are mapped to physical | ||
292 | * nodes. This is used by numa_add_cpu() and numa_remove_cpu(). | ||
293 | * | ||
294 | * If emulation is not enabled or fails, emu_nid_to_phys[] is filled with | ||
295 | * identity mapping and no other modification is made. | ||
296 | */ | ||
297 | void __init numa_emulation(struct numa_meminfo *numa_meminfo, int numa_dist_cnt) | ||
298 | { | ||
299 | static struct numa_meminfo ei __initdata; | ||
300 | static struct numa_meminfo pi __initdata; | ||
301 | const u64 max_addr = max_pfn << PAGE_SHIFT; | ||
302 | u8 *phys_dist = NULL; | ||
303 | size_t phys_size = numa_dist_cnt * numa_dist_cnt * sizeof(phys_dist[0]); | ||
304 | int max_emu_nid, dfl_phys_nid; | ||
305 | int i, j, ret; | ||
306 | |||
307 | if (!emu_cmdline) | ||
308 | goto no_emu; | ||
309 | |||
310 | memset(&ei, 0, sizeof(ei)); | ||
311 | pi = *numa_meminfo; | ||
312 | |||
313 | for (i = 0; i < MAX_NUMNODES; i++) | ||
314 | emu_nid_to_phys[i] = NUMA_NO_NODE; | ||
315 | |||
316 | /* | ||
317 | * If the numa=fake command-line contains a 'M' or 'G', it represents | ||
318 | * the fixed node size. Otherwise, if it is just a single number N, | ||
319 | * split the system RAM into N fake nodes. | ||
320 | */ | ||
321 | if (strchr(emu_cmdline, 'M') || strchr(emu_cmdline, 'G')) { | ||
322 | u64 size; | ||
323 | |||
324 | size = memparse(emu_cmdline, &emu_cmdline); | ||
325 | ret = split_nodes_size_interleave(&ei, &pi, 0, max_addr, size); | ||
326 | } else { | ||
327 | unsigned long n; | ||
328 | |||
329 | n = simple_strtoul(emu_cmdline, NULL, 0); | ||
330 | ret = split_nodes_interleave(&ei, &pi, 0, max_addr, n); | ||
331 | } | ||
332 | |||
333 | if (ret < 0) | ||
334 | goto no_emu; | ||
335 | |||
336 | if (numa_cleanup_meminfo(&ei) < 0) { | ||
337 | pr_warning("NUMA: Warning: constructed meminfo invalid, disabling emulation\n"); | ||
338 | goto no_emu; | ||
339 | } | ||
340 | |||
341 | /* copy the physical distance table */ | ||
342 | if (numa_dist_cnt) { | ||
343 | u64 phys; | ||
344 | |||
345 | phys = memblock_find_in_range(0, | ||
346 | (u64)max_pfn_mapped << PAGE_SHIFT, | ||
347 | phys_size, PAGE_SIZE); | ||
348 | if (phys == MEMBLOCK_ERROR) { | ||
349 | pr_warning("NUMA: Warning: can't allocate copy of distance table, disabling emulation\n"); | ||
350 | goto no_emu; | ||
351 | } | ||
352 | memblock_x86_reserve_range(phys, phys + phys_size, "TMP NUMA DIST"); | ||
353 | phys_dist = __va(phys); | ||
354 | |||
355 | for (i = 0; i < numa_dist_cnt; i++) | ||
356 | for (j = 0; j < numa_dist_cnt; j++) | ||
357 | phys_dist[i * numa_dist_cnt + j] = | ||
358 | node_distance(i, j); | ||
359 | } | ||
360 | |||
361 | /* | ||
362 | * Determine the max emulated nid and the default phys nid to use | ||
363 | * for unmapped nodes. | ||
364 | */ | ||
365 | max_emu_nid = 0; | ||
366 | dfl_phys_nid = NUMA_NO_NODE; | ||
367 | for (i = 0; i < ARRAY_SIZE(emu_nid_to_phys); i++) { | ||
368 | if (emu_nid_to_phys[i] != NUMA_NO_NODE) { | ||
369 | max_emu_nid = i; | ||
370 | if (dfl_phys_nid == NUMA_NO_NODE) | ||
371 | dfl_phys_nid = emu_nid_to_phys[i]; | ||
372 | } | ||
373 | } | ||
374 | if (dfl_phys_nid == NUMA_NO_NODE) { | ||
375 | pr_warning("NUMA: Warning: can't determine default physical node, disabling emulation\n"); | ||
376 | goto no_emu; | ||
377 | } | ||
378 | |||
379 | /* commit */ | ||
380 | *numa_meminfo = ei; | ||
381 | |||
382 | /* | ||
383 | * Transform __apicid_to_node table to use emulated nids by | ||
384 | * reverse-mapping phys_nid. The maps should always exist but fall | ||
385 | * back to zero just in case. | ||
386 | */ | ||
387 | for (i = 0; i < ARRAY_SIZE(__apicid_to_node); i++) { | ||
388 | if (__apicid_to_node[i] == NUMA_NO_NODE) | ||
389 | continue; | ||
390 | for (j = 0; j < ARRAY_SIZE(emu_nid_to_phys); j++) | ||
391 | if (__apicid_to_node[i] == emu_nid_to_phys[j]) | ||
392 | break; | ||
393 | __apicid_to_node[i] = j < ARRAY_SIZE(emu_nid_to_phys) ? j : 0; | ||
394 | } | ||
395 | |||
396 | /* make sure all emulated nodes are mapped to a physical node */ | ||
397 | for (i = 0; i < ARRAY_SIZE(emu_nid_to_phys); i++) | ||
398 | if (emu_nid_to_phys[i] == NUMA_NO_NODE) | ||
399 | emu_nid_to_phys[i] = dfl_phys_nid; | ||
400 | |||
401 | /* transform distance table */ | ||
402 | numa_reset_distance(); | ||
403 | for (i = 0; i < max_emu_nid + 1; i++) { | ||
404 | for (j = 0; j < max_emu_nid + 1; j++) { | ||
405 | int physi = emu_nid_to_phys[i]; | ||
406 | int physj = emu_nid_to_phys[j]; | ||
407 | int dist; | ||
408 | |||
409 | if (physi >= numa_dist_cnt || physj >= numa_dist_cnt) | ||
410 | dist = physi == physj ? | ||
411 | LOCAL_DISTANCE : REMOTE_DISTANCE; | ||
412 | else | ||
413 | dist = phys_dist[physi * numa_dist_cnt + physj]; | ||
414 | |||
415 | numa_set_distance(i, j, dist); | ||
416 | } | ||
417 | } | ||
418 | |||
419 | /* free the copied physical distance table */ | ||
420 | if (phys_dist) | ||
421 | memblock_x86_free_range(__pa(phys_dist), __pa(phys_dist) + phys_size); | ||
422 | return; | ||
423 | |||
424 | no_emu: | ||
425 | /* No emulation. Build identity emu_nid_to_phys[] for numa_add_cpu() */ | ||
426 | for (i = 0; i < ARRAY_SIZE(emu_nid_to_phys); i++) | ||
427 | emu_nid_to_phys[i] = i; | ||
428 | } | ||
429 | |||
430 | #ifndef CONFIG_DEBUG_PER_CPU_MAPS | ||
431 | void __cpuinit numa_add_cpu(int cpu) | ||
432 | { | ||
433 | int physnid, nid; | ||
434 | |||
435 | nid = early_cpu_to_node(cpu); | ||
436 | BUG_ON(nid == NUMA_NO_NODE || !node_online(nid)); | ||
437 | |||
438 | physnid = emu_nid_to_phys[nid]; | ||
439 | |||
440 | /* | ||
441 | * Map the cpu to each emulated node that is allocated on the physical | ||
442 | * node of the cpu's apic id. | ||
443 | */ | ||
444 | for_each_online_node(nid) | ||
445 | if (emu_nid_to_phys[nid] == physnid) | ||
446 | cpumask_set_cpu(cpu, node_to_cpumask_map[nid]); | ||
447 | } | ||
448 | |||
449 | void __cpuinit numa_remove_cpu(int cpu) | ||
450 | { | ||
451 | int i; | ||
452 | |||
453 | for_each_online_node(i) | ||
454 | cpumask_clear_cpu(cpu, node_to_cpumask_map[i]); | ||
455 | } | ||
456 | #else /* !CONFIG_DEBUG_PER_CPU_MAPS */ | ||
457 | static void __cpuinit numa_set_cpumask(int cpu, int enable) | ||
458 | { | ||
459 | struct cpumask *mask; | ||
460 | int nid, physnid, i; | ||
461 | |||
462 | nid = early_cpu_to_node(cpu); | ||
463 | if (nid == NUMA_NO_NODE) { | ||
464 | /* early_cpu_to_node() already emits a warning and trace */ | ||
465 | return; | ||
466 | } | ||
467 | |||
468 | physnid = emu_nid_to_phys[nid]; | ||
469 | |||
470 | for_each_online_node(i) { | ||
471 | if (emu_nid_to_phys[nid] != physnid) | ||
472 | continue; | ||
473 | |||
474 | mask = debug_cpumask_set_cpu(cpu, enable); | ||
475 | if (!mask) | ||
476 | return; | ||
477 | |||
478 | if (enable) | ||
479 | cpumask_set_cpu(cpu, mask); | ||
480 | else | ||
481 | cpumask_clear_cpu(cpu, mask); | ||
482 | } | ||
483 | } | ||
484 | |||
485 | void __cpuinit numa_add_cpu(int cpu) | ||
486 | { | ||
487 | numa_set_cpumask(cpu, 1); | ||
488 | } | ||
489 | |||
490 | void __cpuinit numa_remove_cpu(int cpu) | ||
491 | { | ||
492 | numa_set_cpumask(cpu, 0); | ||
493 | } | ||
494 | #endif /* !CONFIG_DEBUG_PER_CPU_MAPS */ | ||
diff --git a/arch/x86/mm/numa_internal.h b/arch/x86/mm/numa_internal.h new file mode 100644 index 000000000000..ef2d97377d7c --- /dev/null +++ b/arch/x86/mm/numa_internal.h | |||
@@ -0,0 +1,31 @@ | |||
1 | #ifndef __X86_MM_NUMA_INTERNAL_H | ||
2 | #define __X86_MM_NUMA_INTERNAL_H | ||
3 | |||
4 | #include <linux/types.h> | ||
5 | #include <asm/numa.h> | ||
6 | |||
7 | struct numa_memblk { | ||
8 | u64 start; | ||
9 | u64 end; | ||
10 | int nid; | ||
11 | }; | ||
12 | |||
13 | struct numa_meminfo { | ||
14 | int nr_blks; | ||
15 | struct numa_memblk blk[NR_NODE_MEMBLKS]; | ||
16 | }; | ||
17 | |||
18 | void __init numa_remove_memblk_from(int idx, struct numa_meminfo *mi); | ||
19 | int __init numa_cleanup_meminfo(struct numa_meminfo *mi); | ||
20 | void __init numa_reset_distance(void); | ||
21 | |||
22 | #ifdef CONFIG_NUMA_EMU | ||
23 | void __init numa_emulation(struct numa_meminfo *numa_meminfo, | ||
24 | int numa_dist_cnt); | ||
25 | #else | ||
26 | static inline void numa_emulation(struct numa_meminfo *numa_meminfo, | ||
27 | int numa_dist_cnt) | ||
28 | { } | ||
29 | #endif | ||
30 | |||
31 | #endif /* __X86_MM_NUMA_INTERNAL_H */ | ||
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 90825f2eb0f4..f9e526742fa1 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c | |||
@@ -310,7 +310,7 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address, | |||
310 | * these shared mappings are made of small page mappings. | 310 | * these shared mappings are made of small page mappings. |
311 | * Thus this don't enforce !RW mapping for small page kernel | 311 | * Thus this don't enforce !RW mapping for small page kernel |
312 | * text mapping logic will help Linux Xen parvirt guest boot | 312 | * text mapping logic will help Linux Xen parvirt guest boot |
313 | * aswell. | 313 | * as well. |
314 | */ | 314 | */ |
315 | if (lookup_address(address, &level) && (level != PG_LEVEL_4K)) | 315 | if (lookup_address(address, &level) && (level != PG_LEVEL_4K)) |
316 | pgprot_val(forbidden) |= _PAGE_RW; | 316 | pgprot_val(forbidden) |= _PAGE_RW; |
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 0113d19c8aa6..8573b83a63d0 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c | |||
@@ -168,8 +168,7 @@ void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd) | |||
168 | * section 8.1: in PAE mode we explicitly have to flush the | 168 | * section 8.1: in PAE mode we explicitly have to flush the |
169 | * TLB via cr3 if the top-level pgd is changed... | 169 | * TLB via cr3 if the top-level pgd is changed... |
170 | */ | 170 | */ |
171 | if (mm == current->active_mm) | 171 | flush_tlb_mm(mm); |
172 | write_cr3(read_cr3()); | ||
173 | } | 172 | } |
174 | #else /* !CONFIG_X86_PAE */ | 173 | #else /* !CONFIG_X86_PAE */ |
175 | 174 | ||
diff --git a/arch/x86/mm/srat_32.c b/arch/x86/mm/srat_32.c index ae96e7b8051d..48651c6f657d 100644 --- a/arch/x86/mm/srat_32.c +++ b/arch/x86/mm/srat_32.c | |||
@@ -57,7 +57,7 @@ struct node_memory_chunk_s { | |||
57 | static struct node_memory_chunk_s __initdata node_memory_chunk[MAXCHUNKS]; | 57 | static struct node_memory_chunk_s __initdata node_memory_chunk[MAXCHUNKS]; |
58 | 58 | ||
59 | static int __initdata num_memory_chunks; /* total number of memory chunks */ | 59 | static int __initdata num_memory_chunks; /* total number of memory chunks */ |
60 | static u8 __initdata apicid_to_pxm[MAX_APICID]; | 60 | static u8 __initdata apicid_to_pxm[MAX_LOCAL_APIC]; |
61 | 61 | ||
62 | int acpi_numa __initdata; | 62 | int acpi_numa __initdata; |
63 | 63 | ||
@@ -254,8 +254,8 @@ int __init get_memcfg_from_srat(void) | |||
254 | printk(KERN_DEBUG "Number of memory chunks in system = %d\n", | 254 | printk(KERN_DEBUG "Number of memory chunks in system = %d\n", |
255 | num_memory_chunks); | 255 | num_memory_chunks); |
256 | 256 | ||
257 | for (i = 0; i < MAX_APICID; i++) | 257 | for (i = 0; i < MAX_LOCAL_APIC; i++) |
258 | apicid_2_node[i] = pxm_to_node(apicid_to_pxm[i]); | 258 | set_apicid_to_node(i, pxm_to_node(apicid_to_pxm[i])); |
259 | 259 | ||
260 | for (j = 0; j < num_memory_chunks; j++){ | 260 | for (j = 0; j < num_memory_chunks; j++){ |
261 | struct node_memory_chunk_s * chunk = &node_memory_chunk[j]; | 261 | struct node_memory_chunk_s * chunk = &node_memory_chunk[j]; |
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c index 603d285d1daa..8e9d3394f6d4 100644 --- a/arch/x86/mm/srat_64.c +++ b/arch/x86/mm/srat_64.c | |||
@@ -26,88 +26,34 @@ | |||
26 | 26 | ||
27 | int acpi_numa __initdata; | 27 | int acpi_numa __initdata; |
28 | 28 | ||
29 | static struct acpi_table_slit *acpi_slit; | ||
30 | |||
31 | static nodemask_t nodes_parsed __initdata; | ||
32 | static nodemask_t cpu_nodes_parsed __initdata; | ||
33 | static struct bootnode nodes[MAX_NUMNODES] __initdata; | ||
34 | static struct bootnode nodes_add[MAX_NUMNODES]; | 29 | static struct bootnode nodes_add[MAX_NUMNODES]; |
35 | 30 | ||
36 | static int num_node_memblks __initdata; | ||
37 | static struct bootnode node_memblk_range[NR_NODE_MEMBLKS] __initdata; | ||
38 | static int memblk_nodeid[NR_NODE_MEMBLKS] __initdata; | ||
39 | |||
40 | static __init int setup_node(int pxm) | 31 | static __init int setup_node(int pxm) |
41 | { | 32 | { |
42 | return acpi_map_pxm_to_node(pxm); | 33 | return acpi_map_pxm_to_node(pxm); |
43 | } | 34 | } |
44 | 35 | ||
45 | static __init int conflicting_memblks(unsigned long start, unsigned long end) | ||
46 | { | ||
47 | int i; | ||
48 | for (i = 0; i < num_node_memblks; i++) { | ||
49 | struct bootnode *nd = &node_memblk_range[i]; | ||
50 | if (nd->start == nd->end) | ||
51 | continue; | ||
52 | if (nd->end > start && nd->start < end) | ||
53 | return memblk_nodeid[i]; | ||
54 | if (nd->end == end && nd->start == start) | ||
55 | return memblk_nodeid[i]; | ||
56 | } | ||
57 | return -1; | ||
58 | } | ||
59 | |||
60 | static __init void cutoff_node(int i, unsigned long start, unsigned long end) | ||
61 | { | ||
62 | struct bootnode *nd = &nodes[i]; | ||
63 | |||
64 | if (nd->start < start) { | ||
65 | nd->start = start; | ||
66 | if (nd->end < nd->start) | ||
67 | nd->start = nd->end; | ||
68 | } | ||
69 | if (nd->end > end) { | ||
70 | nd->end = end; | ||
71 | if (nd->start > nd->end) | ||
72 | nd->start = nd->end; | ||
73 | } | ||
74 | } | ||
75 | |||
76 | static __init void bad_srat(void) | 36 | static __init void bad_srat(void) |
77 | { | 37 | { |
78 | int i; | ||
79 | printk(KERN_ERR "SRAT: SRAT not used.\n"); | 38 | printk(KERN_ERR "SRAT: SRAT not used.\n"); |
80 | acpi_numa = -1; | 39 | acpi_numa = -1; |
81 | for (i = 0; i < MAX_LOCAL_APIC; i++) | 40 | memset(nodes_add, 0, sizeof(nodes_add)); |
82 | apicid_to_node[i] = NUMA_NO_NODE; | ||
83 | for (i = 0; i < MAX_NUMNODES; i++) { | ||
84 | nodes[i].start = nodes[i].end = 0; | ||
85 | nodes_add[i].start = nodes_add[i].end = 0; | ||
86 | } | ||
87 | remove_all_active_ranges(); | ||
88 | } | 41 | } |
89 | 42 | ||
90 | static __init inline int srat_disabled(void) | 43 | static __init inline int srat_disabled(void) |
91 | { | 44 | { |
92 | return numa_off || acpi_numa < 0; | 45 | return acpi_numa < 0; |
93 | } | 46 | } |
94 | 47 | ||
95 | /* Callback for SLIT parsing */ | 48 | /* Callback for SLIT parsing */ |
96 | void __init acpi_numa_slit_init(struct acpi_table_slit *slit) | 49 | void __init acpi_numa_slit_init(struct acpi_table_slit *slit) |
97 | { | 50 | { |
98 | unsigned length; | 51 | int i, j; |
99 | unsigned long phys; | ||
100 | |||
101 | length = slit->header.length; | ||
102 | phys = memblock_find_in_range(0, max_pfn_mapped<<PAGE_SHIFT, length, | ||
103 | PAGE_SIZE); | ||
104 | |||
105 | if (phys == MEMBLOCK_ERROR) | ||
106 | panic(" Can not save slit!\n"); | ||
107 | 52 | ||
108 | acpi_slit = __va(phys); | 53 | for (i = 0; i < slit->locality_count; i++) |
109 | memcpy(acpi_slit, slit, length); | 54 | for (j = 0; j < slit->locality_count; j++) |
110 | memblock_x86_reserve_range(phys, phys + length, "ACPI SLIT"); | 55 | numa_set_distance(pxm_to_node(i), pxm_to_node(j), |
56 | slit->entry[slit->locality_count * i + j]); | ||
111 | } | 57 | } |
112 | 58 | ||
113 | /* Callback for Proximity Domain -> x2APIC mapping */ | 59 | /* Callback for Proximity Domain -> x2APIC mapping */ |
@@ -138,8 +84,8 @@ acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa) | |||
138 | printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%04x -> Node %u skipped apicid that is too big\n", pxm, apic_id, node); | 84 | printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%04x -> Node %u skipped apicid that is too big\n", pxm, apic_id, node); |
139 | return; | 85 | return; |
140 | } | 86 | } |
141 | apicid_to_node[apic_id] = node; | 87 | set_apicid_to_node(apic_id, node); |
142 | node_set(node, cpu_nodes_parsed); | 88 | node_set(node, numa_nodes_parsed); |
143 | acpi_numa = 1; | 89 | acpi_numa = 1; |
144 | printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%04x -> Node %u\n", | 90 | printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%04x -> Node %u\n", |
145 | pxm, apic_id, node); | 91 | pxm, apic_id, node); |
@@ -178,8 +124,8 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa) | |||
178 | return; | 124 | return; |
179 | } | 125 | } |
180 | 126 | ||
181 | apicid_to_node[apic_id] = node; | 127 | set_apicid_to_node(apic_id, node); |
182 | node_set(node, cpu_nodes_parsed); | 128 | node_set(node, numa_nodes_parsed); |
183 | acpi_numa = 1; | 129 | acpi_numa = 1; |
184 | printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%02x -> Node %u\n", | 130 | printk(KERN_INFO "SRAT: PXM %u -> APIC 0x%02x -> Node %u\n", |
185 | pxm, apic_id, node); | 131 | pxm, apic_id, node); |
@@ -241,7 +187,7 @@ update_nodes_add(int node, unsigned long start, unsigned long end) | |||
241 | } | 187 | } |
242 | 188 | ||
243 | if (changed) { | 189 | if (changed) { |
244 | node_set(node, cpu_nodes_parsed); | 190 | node_set(node, numa_nodes_parsed); |
245 | printk(KERN_INFO "SRAT: hot plug zone found %Lx - %Lx\n", | 191 | printk(KERN_INFO "SRAT: hot plug zone found %Lx - %Lx\n", |
246 | nd->start, nd->end); | 192 | nd->start, nd->end); |
247 | } | 193 | } |
@@ -251,10 +197,8 @@ update_nodes_add(int node, unsigned long start, unsigned long end) | |||
251 | void __init | 197 | void __init |
252 | acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma) | 198 | acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma) |
253 | { | 199 | { |
254 | struct bootnode *nd, oldnode; | ||
255 | unsigned long start, end; | 200 | unsigned long start, end; |
256 | int node, pxm; | 201 | int node, pxm; |
257 | int i; | ||
258 | 202 | ||
259 | if (srat_disabled()) | 203 | if (srat_disabled()) |
260 | return; | 204 | return; |
@@ -276,300 +220,31 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma) | |||
276 | bad_srat(); | 220 | bad_srat(); |
277 | return; | 221 | return; |
278 | } | 222 | } |
279 | i = conflicting_memblks(start, end); | 223 | |
280 | if (i == node) { | 224 | if (numa_add_memblk(node, start, end) < 0) { |
281 | printk(KERN_WARNING | ||
282 | "SRAT: Warning: PXM %d (%lx-%lx) overlaps with itself (%Lx-%Lx)\n", | ||
283 | pxm, start, end, nodes[i].start, nodes[i].end); | ||
284 | } else if (i >= 0) { | ||
285 | printk(KERN_ERR | ||
286 | "SRAT: PXM %d (%lx-%lx) overlaps with PXM %d (%Lx-%Lx)\n", | ||
287 | pxm, start, end, node_to_pxm(i), | ||
288 | nodes[i].start, nodes[i].end); | ||
289 | bad_srat(); | 225 | bad_srat(); |
290 | return; | 226 | return; |
291 | } | 227 | } |
292 | nd = &nodes[node]; | ||
293 | oldnode = *nd; | ||
294 | if (!node_test_and_set(node, nodes_parsed)) { | ||
295 | nd->start = start; | ||
296 | nd->end = end; | ||
297 | } else { | ||
298 | if (start < nd->start) | ||
299 | nd->start = start; | ||
300 | if (nd->end < end) | ||
301 | nd->end = end; | ||
302 | } | ||
303 | 228 | ||
304 | printk(KERN_INFO "SRAT: Node %u PXM %u %lx-%lx\n", node, pxm, | 229 | printk(KERN_INFO "SRAT: Node %u PXM %u %lx-%lx\n", node, pxm, |
305 | start, end); | 230 | start, end); |
306 | 231 | ||
307 | if (ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) { | 232 | if (ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) |
308 | update_nodes_add(node, start, end); | 233 | update_nodes_add(node, start, end); |
309 | /* restore nodes[node] */ | ||
310 | *nd = oldnode; | ||
311 | if ((nd->start | nd->end) == 0) | ||
312 | node_clear(node, nodes_parsed); | ||
313 | } | ||
314 | |||
315 | node_memblk_range[num_node_memblks].start = start; | ||
316 | node_memblk_range[num_node_memblks].end = end; | ||
317 | memblk_nodeid[num_node_memblks] = node; | ||
318 | num_node_memblks++; | ||
319 | } | ||
320 | |||
321 | /* Sanity check to catch more bad SRATs (they are amazingly common). | ||
322 | Make sure the PXMs cover all memory. */ | ||
323 | static int __init nodes_cover_memory(const struct bootnode *nodes) | ||
324 | { | ||
325 | int i; | ||
326 | unsigned long pxmram, e820ram; | ||
327 | |||
328 | pxmram = 0; | ||
329 | for_each_node_mask(i, nodes_parsed) { | ||
330 | unsigned long s = nodes[i].start >> PAGE_SHIFT; | ||
331 | unsigned long e = nodes[i].end >> PAGE_SHIFT; | ||
332 | pxmram += e - s; | ||
333 | pxmram -= __absent_pages_in_range(i, s, e); | ||
334 | if ((long)pxmram < 0) | ||
335 | pxmram = 0; | ||
336 | } | ||
337 | |||
338 | e820ram = max_pfn - (memblock_x86_hole_size(0, max_pfn<<PAGE_SHIFT)>>PAGE_SHIFT); | ||
339 | /* We seem to lose 3 pages somewhere. Allow 1M of slack. */ | ||
340 | if ((long)(e820ram - pxmram) >= (1<<(20 - PAGE_SHIFT))) { | ||
341 | printk(KERN_ERR | ||
342 | "SRAT: PXMs only cover %luMB of your %luMB e820 RAM. Not used.\n", | ||
343 | (pxmram << PAGE_SHIFT) >> 20, | ||
344 | (e820ram << PAGE_SHIFT) >> 20); | ||
345 | return 0; | ||
346 | } | ||
347 | return 1; | ||
348 | } | 234 | } |
349 | 235 | ||
350 | void __init acpi_numa_arch_fixup(void) {} | 236 | void __init acpi_numa_arch_fixup(void) {} |
351 | 237 | ||
352 | #ifdef CONFIG_NUMA_EMU | 238 | int __init x86_acpi_numa_init(void) |
353 | void __init acpi_get_nodes(struct bootnode *physnodes, unsigned long start, | ||
354 | unsigned long end) | ||
355 | { | ||
356 | int i; | ||
357 | |||
358 | for_each_node_mask(i, nodes_parsed) { | ||
359 | cutoff_node(i, start, end); | ||
360 | physnodes[i].start = nodes[i].start; | ||
361 | physnodes[i].end = nodes[i].end; | ||
362 | } | ||
363 | } | ||
364 | #endif /* CONFIG_NUMA_EMU */ | ||
365 | |||
366 | /* Use the information discovered above to actually set up the nodes. */ | ||
367 | int __init acpi_scan_nodes(unsigned long start, unsigned long end) | ||
368 | { | 239 | { |
369 | int i; | 240 | int ret; |
370 | |||
371 | if (acpi_numa <= 0) | ||
372 | return -1; | ||
373 | |||
374 | /* First clean up the node list */ | ||
375 | for (i = 0; i < MAX_NUMNODES; i++) | ||
376 | cutoff_node(i, start, end); | ||
377 | |||
378 | /* | ||
379 | * Join together blocks on the same node, holes between | ||
380 | * which don't overlap with memory on other nodes. | ||
381 | */ | ||
382 | for (i = 0; i < num_node_memblks; ++i) { | ||
383 | int j, k; | ||
384 | |||
385 | for (j = i + 1; j < num_node_memblks; ++j) { | ||
386 | unsigned long start, end; | ||
387 | |||
388 | if (memblk_nodeid[i] != memblk_nodeid[j]) | ||
389 | continue; | ||
390 | start = min(node_memblk_range[i].end, | ||
391 | node_memblk_range[j].end); | ||
392 | end = max(node_memblk_range[i].start, | ||
393 | node_memblk_range[j].start); | ||
394 | for (k = 0; k < num_node_memblks; ++k) { | ||
395 | if (memblk_nodeid[i] == memblk_nodeid[k]) | ||
396 | continue; | ||
397 | if (start < node_memblk_range[k].end && | ||
398 | end > node_memblk_range[k].start) | ||
399 | break; | ||
400 | } | ||
401 | if (k < num_node_memblks) | ||
402 | continue; | ||
403 | start = min(node_memblk_range[i].start, | ||
404 | node_memblk_range[j].start); | ||
405 | end = max(node_memblk_range[i].end, | ||
406 | node_memblk_range[j].end); | ||
407 | printk(KERN_INFO "SRAT: Node %d " | ||
408 | "[%Lx,%Lx) + [%Lx,%Lx) -> [%lx,%lx)\n", | ||
409 | memblk_nodeid[i], | ||
410 | node_memblk_range[i].start, | ||
411 | node_memblk_range[i].end, | ||
412 | node_memblk_range[j].start, | ||
413 | node_memblk_range[j].end, | ||
414 | start, end); | ||
415 | node_memblk_range[i].start = start; | ||
416 | node_memblk_range[i].end = end; | ||
417 | k = --num_node_memblks - j; | ||
418 | memmove(memblk_nodeid + j, memblk_nodeid + j+1, | ||
419 | k * sizeof(*memblk_nodeid)); | ||
420 | memmove(node_memblk_range + j, node_memblk_range + j+1, | ||
421 | k * sizeof(*node_memblk_range)); | ||
422 | --j; | ||
423 | } | ||
424 | } | ||
425 | |||
426 | memnode_shift = compute_hash_shift(node_memblk_range, num_node_memblks, | ||
427 | memblk_nodeid); | ||
428 | if (memnode_shift < 0) { | ||
429 | printk(KERN_ERR | ||
430 | "SRAT: No NUMA node hash function found. Contact maintainer\n"); | ||
431 | bad_srat(); | ||
432 | return -1; | ||
433 | } | ||
434 | |||
435 | for (i = 0; i < num_node_memblks; i++) | ||
436 | memblock_x86_register_active_regions(memblk_nodeid[i], | ||
437 | node_memblk_range[i].start >> PAGE_SHIFT, | ||
438 | node_memblk_range[i].end >> PAGE_SHIFT); | ||
439 | |||
440 | /* for out of order entries in SRAT */ | ||
441 | sort_node_map(); | ||
442 | if (!nodes_cover_memory(nodes)) { | ||
443 | bad_srat(); | ||
444 | return -1; | ||
445 | } | ||
446 | 241 | ||
447 | /* Account for nodes with cpus and no memory */ | 242 | ret = acpi_numa_init(); |
448 | nodes_or(node_possible_map, nodes_parsed, cpu_nodes_parsed); | 243 | if (ret < 0) |
449 | 244 | return ret; | |
450 | /* Finally register nodes */ | 245 | return srat_disabled() ? -EINVAL : 0; |
451 | for_each_node_mask(i, node_possible_map) | ||
452 | setup_node_bootmem(i, nodes[i].start, nodes[i].end); | ||
453 | /* Try again in case setup_node_bootmem missed one due | ||
454 | to missing bootmem */ | ||
455 | for_each_node_mask(i, node_possible_map) | ||
456 | if (!node_online(i)) | ||
457 | setup_node_bootmem(i, nodes[i].start, nodes[i].end); | ||
458 | |||
459 | for (i = 0; i < nr_cpu_ids; i++) { | ||
460 | int node = early_cpu_to_node(i); | ||
461 | |||
462 | if (node == NUMA_NO_NODE) | ||
463 | continue; | ||
464 | if (!node_online(node)) | ||
465 | numa_clear_node(i); | ||
466 | } | ||
467 | numa_init_array(); | ||
468 | return 0; | ||
469 | } | ||
470 | |||
471 | #ifdef CONFIG_NUMA_EMU | ||
472 | static int fake_node_to_pxm_map[MAX_NUMNODES] __initdata = { | ||
473 | [0 ... MAX_NUMNODES-1] = PXM_INVAL | ||
474 | }; | ||
475 | static s16 fake_apicid_to_node[MAX_LOCAL_APIC] __initdata = { | ||
476 | [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE | ||
477 | }; | ||
478 | static int __init find_node_by_addr(unsigned long addr) | ||
479 | { | ||
480 | int ret = NUMA_NO_NODE; | ||
481 | int i; | ||
482 | |||
483 | for_each_node_mask(i, nodes_parsed) { | ||
484 | /* | ||
485 | * Find the real node that this emulated node appears on. For | ||
486 | * the sake of simplicity, we only use a real node's starting | ||
487 | * address to determine which emulated node it appears on. | ||
488 | */ | ||
489 | if (addr >= nodes[i].start && addr < nodes[i].end) { | ||
490 | ret = i; | ||
491 | break; | ||
492 | } | ||
493 | } | ||
494 | return ret; | ||
495 | } | 246 | } |
496 | 247 | ||
497 | /* | ||
498 | * In NUMA emulation, we need to setup proximity domain (_PXM) to node ID | ||
499 | * mappings that respect the real ACPI topology but reflect our emulated | ||
500 | * environment. For each emulated node, we find which real node it appears on | ||
501 | * and create PXM to NID mappings for those fake nodes which mirror that | ||
502 | * locality. SLIT will now represent the correct distances between emulated | ||
503 | * nodes as a result of the real topology. | ||
504 | */ | ||
505 | void __init acpi_fake_nodes(const struct bootnode *fake_nodes, int num_nodes) | ||
506 | { | ||
507 | int i, j; | ||
508 | |||
509 | for (i = 0; i < num_nodes; i++) { | ||
510 | int nid, pxm; | ||
511 | |||
512 | nid = find_node_by_addr(fake_nodes[i].start); | ||
513 | if (nid == NUMA_NO_NODE) | ||
514 | continue; | ||
515 | pxm = node_to_pxm(nid); | ||
516 | if (pxm == PXM_INVAL) | ||
517 | continue; | ||
518 | fake_node_to_pxm_map[i] = pxm; | ||
519 | /* | ||
520 | * For each apicid_to_node mapping that exists for this real | ||
521 | * node, it must now point to the fake node ID. | ||
522 | */ | ||
523 | for (j = 0; j < MAX_LOCAL_APIC; j++) | ||
524 | if (apicid_to_node[j] == nid && | ||
525 | fake_apicid_to_node[j] == NUMA_NO_NODE) | ||
526 | fake_apicid_to_node[j] = i; | ||
527 | } | ||
528 | |||
529 | /* | ||
530 | * If there are apicid-to-node mappings for physical nodes that do not | ||
531 | * have a corresponding emulated node, it should default to a guaranteed | ||
532 | * value. | ||
533 | */ | ||
534 | for (i = 0; i < MAX_LOCAL_APIC; i++) | ||
535 | if (apicid_to_node[i] != NUMA_NO_NODE && | ||
536 | fake_apicid_to_node[i] == NUMA_NO_NODE) | ||
537 | fake_apicid_to_node[i] = 0; | ||
538 | |||
539 | for (i = 0; i < num_nodes; i++) | ||
540 | __acpi_map_pxm_to_node(fake_node_to_pxm_map[i], i); | ||
541 | memcpy(apicid_to_node, fake_apicid_to_node, sizeof(apicid_to_node)); | ||
542 | |||
543 | nodes_clear(nodes_parsed); | ||
544 | for (i = 0; i < num_nodes; i++) | ||
545 | if (fake_nodes[i].start != fake_nodes[i].end) | ||
546 | node_set(i, nodes_parsed); | ||
547 | } | ||
548 | |||
549 | static int null_slit_node_compare(int a, int b) | ||
550 | { | ||
551 | return node_to_pxm(a) == node_to_pxm(b); | ||
552 | } | ||
553 | #else | ||
554 | static int null_slit_node_compare(int a, int b) | ||
555 | { | ||
556 | return a == b; | ||
557 | } | ||
558 | #endif /* CONFIG_NUMA_EMU */ | ||
559 | |||
560 | int __node_distance(int a, int b) | ||
561 | { | ||
562 | int index; | ||
563 | |||
564 | if (!acpi_slit) | ||
565 | return null_slit_node_compare(a, b) ? LOCAL_DISTANCE : | ||
566 | REMOTE_DISTANCE; | ||
567 | index = acpi_slit->locality_count * node_to_pxm(a); | ||
568 | return acpi_slit->entry[index + node_to_pxm(b)]; | ||
569 | } | ||
570 | |||
571 | EXPORT_SYMBOL(__node_distance); | ||
572 | |||
573 | #if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) || defined(CONFIG_ACPI_HOTPLUG_MEMORY) | 248 | #if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) || defined(CONFIG_ACPI_HOTPLUG_MEMORY) |
574 | int memory_add_physaddr_to_nid(u64 start) | 249 | int memory_add_physaddr_to_nid(u64 start) |
575 | { | 250 | { |
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 6acc724d5d8f..d6c0418c3e47 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c | |||
@@ -179,12 +179,8 @@ static void flush_tlb_others_ipi(const struct cpumask *cpumask, | |||
179 | sender = this_cpu_read(tlb_vector_offset); | 179 | sender = this_cpu_read(tlb_vector_offset); |
180 | f = &flush_state[sender]; | 180 | f = &flush_state[sender]; |
181 | 181 | ||
182 | /* | 182 | if (nr_cpu_ids > NUM_INVALIDATE_TLB_VECTORS) |
183 | * Could avoid this lock when | 183 | raw_spin_lock(&f->tlbstate_lock); |
184 | * num_online_cpus() <= NUM_INVALIDATE_TLB_VECTORS, but it is | ||
185 | * probably not worth checking this for a cache-hot lock. | ||
186 | */ | ||
187 | raw_spin_lock(&f->tlbstate_lock); | ||
188 | 184 | ||
189 | f->flush_mm = mm; | 185 | f->flush_mm = mm; |
190 | f->flush_va = va; | 186 | f->flush_va = va; |
@@ -202,7 +198,8 @@ static void flush_tlb_others_ipi(const struct cpumask *cpumask, | |||
202 | 198 | ||
203 | f->flush_mm = NULL; | 199 | f->flush_mm = NULL; |
204 | f->flush_va = 0; | 200 | f->flush_va = 0; |
205 | raw_spin_unlock(&f->tlbstate_lock); | 201 | if (nr_cpu_ids > NUM_INVALIDATE_TLB_VECTORS) |
202 | raw_spin_unlock(&f->tlbstate_lock); | ||
206 | } | 203 | } |
207 | 204 | ||
208 | void native_flush_tlb_others(const struct cpumask *cpumask, | 205 | void native_flush_tlb_others(const struct cpumask *cpumask, |
@@ -211,11 +208,10 @@ void native_flush_tlb_others(const struct cpumask *cpumask, | |||
211 | if (is_uv_system()) { | 208 | if (is_uv_system()) { |
212 | unsigned int cpu; | 209 | unsigned int cpu; |
213 | 210 | ||
214 | cpu = get_cpu(); | 211 | cpu = smp_processor_id(); |
215 | cpumask = uv_flush_tlb_others(cpumask, mm, va, cpu); | 212 | cpumask = uv_flush_tlb_others(cpumask, mm, va, cpu); |
216 | if (cpumask) | 213 | if (cpumask) |
217 | flush_tlb_others_ipi(cpumask, mm, va); | 214 | flush_tlb_others_ipi(cpumask, mm, va); |
218 | put_cpu(); | ||
219 | return; | 215 | return; |
220 | } | 216 | } |
221 | flush_tlb_others_ipi(cpumask, mm, va); | 217 | flush_tlb_others_ipi(cpumask, mm, va); |
diff --git a/arch/x86/oprofile/backtrace.c b/arch/x86/oprofile/backtrace.c index 72cbec14d783..2d49d4e19a36 100644 --- a/arch/x86/oprofile/backtrace.c +++ b/arch/x86/oprofile/backtrace.c | |||
@@ -126,7 +126,7 @@ x86_backtrace(struct pt_regs * const regs, unsigned int depth) | |||
126 | if (!user_mode_vm(regs)) { | 126 | if (!user_mode_vm(regs)) { |
127 | unsigned long stack = kernel_stack_pointer(regs); | 127 | unsigned long stack = kernel_stack_pointer(regs); |
128 | if (depth) | 128 | if (depth) |
129 | dump_trace(NULL, regs, (unsigned long *)stack, | 129 | dump_trace(NULL, regs, (unsigned long *)stack, 0, |
130 | &backtrace_ops, &depth); | 130 | &backtrace_ops, &depth); |
131 | return; | 131 | return; |
132 | } | 132 | } |
diff --git a/arch/x86/oprofile/op_model_p4.c b/arch/x86/oprofile/op_model_p4.c index 9fadec074142..98ab13058f89 100644 --- a/arch/x86/oprofile/op_model_p4.c +++ b/arch/x86/oprofile/op_model_p4.c | |||
@@ -50,7 +50,7 @@ static inline void setup_num_counters(void) | |||
50 | #endif | 50 | #endif |
51 | } | 51 | } |
52 | 52 | ||
53 | static int inline addr_increment(void) | 53 | static inline int addr_increment(void) |
54 | { | 54 | { |
55 | #ifdef CONFIG_SMP | 55 | #ifdef CONFIG_SMP |
56 | return smp_num_siblings == 2 ? 2 : 1; | 56 | return smp_num_siblings == 2 ? 2 : 1; |
diff --git a/arch/x86/pci/amd_bus.c b/arch/x86/pci/amd_bus.c index e27dffbbb1a7..026e4931d162 100644 --- a/arch/x86/pci/amd_bus.c +++ b/arch/x86/pci/amd_bus.c | |||
@@ -350,7 +350,7 @@ static int __init early_fill_mp_bus_info(void) | |||
350 | 350 | ||
351 | #define ENABLE_CF8_EXT_CFG (1ULL << 46) | 351 | #define ENABLE_CF8_EXT_CFG (1ULL << 46) |
352 | 352 | ||
353 | static void enable_pci_io_ecs(void *unused) | 353 | static void __cpuinit enable_pci_io_ecs(void *unused) |
354 | { | 354 | { |
355 | u64 reg; | 355 | u64 reg; |
356 | rdmsrl(MSR_AMD64_NB_CFG, reg); | 356 | rdmsrl(MSR_AMD64_NB_CFG, reg); |
diff --git a/arch/x86/pci/ce4100.c b/arch/x86/pci/ce4100.c index 9260b3eb18d4..67858be4b52b 100644 --- a/arch/x86/pci/ce4100.c +++ b/arch/x86/pci/ce4100.c | |||
@@ -255,7 +255,7 @@ int bridge_read(unsigned int devfn, int reg, int len, u32 *value) | |||
255 | static int ce4100_conf_read(unsigned int seg, unsigned int bus, | 255 | static int ce4100_conf_read(unsigned int seg, unsigned int bus, |
256 | unsigned int devfn, int reg, int len, u32 *value) | 256 | unsigned int devfn, int reg, int len, u32 *value) |
257 | { | 257 | { |
258 | int i, retval = 1; | 258 | int i; |
259 | 259 | ||
260 | if (bus == 1) { | 260 | if (bus == 1) { |
261 | for (i = 0; i < ARRAY_SIZE(bus1_fixups); i++) { | 261 | for (i = 0; i < ARRAY_SIZE(bus1_fixups); i++) { |
diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c index b1805b78842f..494f2e7ea2b4 100644 --- a/arch/x86/pci/i386.c +++ b/arch/x86/pci/i386.c | |||
@@ -241,7 +241,7 @@ void __init pcibios_resource_survey(void) | |||
241 | e820_reserve_resources_late(); | 241 | e820_reserve_resources_late(); |
242 | /* | 242 | /* |
243 | * Insert the IO APIC resources after PCI initialization has | 243 | * Insert the IO APIC resources after PCI initialization has |
244 | * occured to handle IO APICS that are mapped in on a BAR in | 244 | * occurred to handle IO APICS that are mapped in on a BAR in |
245 | * PCI space, but before trying to assign unassigned pci res. | 245 | * PCI space, but before trying to assign unassigned pci res. |
246 | */ | 246 | */ |
247 | ioapic_insert_resources(); | 247 | ioapic_insert_resources(); |
@@ -304,7 +304,7 @@ int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma, | |||
304 | /* | 304 | /* |
305 | * ioremap() and ioremap_nocache() defaults to UC MINUS for now. | 305 | * ioremap() and ioremap_nocache() defaults to UC MINUS for now. |
306 | * To avoid attribute conflicts, request UC MINUS here | 306 | * To avoid attribute conflicts, request UC MINUS here |
307 | * aswell. | 307 | * as well. |
308 | */ | 308 | */ |
309 | prot |= _PAGE_CACHE_UC_MINUS; | 309 | prot |= _PAGE_CACHE_UC_MINUS; |
310 | 310 | ||
diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c index 87e6c8323117..8201165bae28 100644 --- a/arch/x86/pci/irq.c +++ b/arch/x86/pci/irq.c | |||
@@ -597,21 +597,18 @@ static __init int intel_router_probe(struct irq_router *r, struct pci_dev *route | |||
597 | return 1; | 597 | return 1; |
598 | } | 598 | } |
599 | 599 | ||
600 | if ((device >= PCI_DEVICE_ID_INTEL_5_3400_SERIES_LPC_MIN) && | 600 | if ((device >= PCI_DEVICE_ID_INTEL_5_3400_SERIES_LPC_MIN && |
601 | (device <= PCI_DEVICE_ID_INTEL_5_3400_SERIES_LPC_MAX)) { | 601 | device <= PCI_DEVICE_ID_INTEL_5_3400_SERIES_LPC_MAX) |
602 | || (device >= PCI_DEVICE_ID_INTEL_COUGARPOINT_LPC_MIN && | ||
603 | device <= PCI_DEVICE_ID_INTEL_COUGARPOINT_LPC_MAX) | ||
604 | || (device >= PCI_DEVICE_ID_INTEL_DH89XXCC_LPC_MIN && | ||
605 | device <= PCI_DEVICE_ID_INTEL_DH89XXCC_LPC_MAX)) { | ||
602 | r->name = "PIIX/ICH"; | 606 | r->name = "PIIX/ICH"; |
603 | r->get = pirq_piix_get; | 607 | r->get = pirq_piix_get; |
604 | r->set = pirq_piix_set; | 608 | r->set = pirq_piix_set; |
605 | return 1; | 609 | return 1; |
606 | } | 610 | } |
607 | 611 | ||
608 | if ((device >= PCI_DEVICE_ID_INTEL_COUGARPOINT_LPC_MIN) && | ||
609 | (device <= PCI_DEVICE_ID_INTEL_COUGARPOINT_LPC_MAX)) { | ||
610 | r->name = "PIIX/ICH"; | ||
611 | r->get = pirq_piix_get; | ||
612 | r->set = pirq_piix_set; | ||
613 | return 1; | ||
614 | } | ||
615 | return 0; | 612 | return 0; |
616 | } | 613 | } |
617 | 614 | ||
diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c index 25cd4a07d09f..e37b407a0ee8 100644 --- a/arch/x86/pci/xen.c +++ b/arch/x86/pci/xen.c | |||
@@ -20,7 +20,8 @@ | |||
20 | #include <asm/xen/pci.h> | 20 | #include <asm/xen/pci.h> |
21 | 21 | ||
22 | #ifdef CONFIG_ACPI | 22 | #ifdef CONFIG_ACPI |
23 | static int xen_hvm_register_pirq(u32 gsi, int triggering) | 23 | static int acpi_register_gsi_xen_hvm(struct device *dev, u32 gsi, |
24 | int trigger, int polarity) | ||
24 | { | 25 | { |
25 | int rc, irq; | 26 | int rc, irq; |
26 | struct physdev_map_pirq map_irq; | 27 | struct physdev_map_pirq map_irq; |
@@ -41,7 +42,7 @@ static int xen_hvm_register_pirq(u32 gsi, int triggering) | |||
41 | return -1; | 42 | return -1; |
42 | } | 43 | } |
43 | 44 | ||
44 | if (triggering == ACPI_EDGE_SENSITIVE) { | 45 | if (trigger == ACPI_EDGE_SENSITIVE) { |
45 | shareable = 0; | 46 | shareable = 0; |
46 | name = "ioapic-edge"; | 47 | name = "ioapic-edge"; |
47 | } else { | 48 | } else { |
@@ -49,18 +50,12 @@ static int xen_hvm_register_pirq(u32 gsi, int triggering) | |||
49 | name = "ioapic-level"; | 50 | name = "ioapic-level"; |
50 | } | 51 | } |
51 | 52 | ||
52 | irq = xen_map_pirq_gsi(map_irq.pirq, gsi, shareable, name); | 53 | irq = xen_bind_pirq_gsi_to_irq(gsi, map_irq.pirq, shareable, name); |
53 | 54 | ||
54 | printk(KERN_DEBUG "xen: --> irq=%d, pirq=%d\n", irq, map_irq.pirq); | 55 | printk(KERN_DEBUG "xen: --> irq=%d, pirq=%d\n", irq, map_irq.pirq); |
55 | 56 | ||
56 | return irq; | 57 | return irq; |
57 | } | 58 | } |
58 | |||
59 | static int acpi_register_gsi_xen_hvm(struct device *dev, u32 gsi, | ||
60 | int trigger, int polarity) | ||
61 | { | ||
62 | return xen_hvm_register_pirq(gsi, trigger); | ||
63 | } | ||
64 | #endif | 59 | #endif |
65 | 60 | ||
66 | #if defined(CONFIG_PCI_MSI) | 61 | #if defined(CONFIG_PCI_MSI) |
@@ -91,7 +86,7 @@ static void xen_msi_compose_msg(struct pci_dev *pdev, unsigned int pirq, | |||
91 | 86 | ||
92 | static int xen_hvm_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) | 87 | static int xen_hvm_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) |
93 | { | 88 | { |
94 | int irq, pirq, ret = 0; | 89 | int irq, pirq; |
95 | struct msi_desc *msidesc; | 90 | struct msi_desc *msidesc; |
96 | struct msi_msg msg; | 91 | struct msi_msg msg; |
97 | 92 | ||
@@ -99,39 +94,32 @@ static int xen_hvm_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) | |||
99 | __read_msi_msg(msidesc, &msg); | 94 | __read_msi_msg(msidesc, &msg); |
100 | pirq = MSI_ADDR_EXT_DEST_ID(msg.address_hi) | | 95 | pirq = MSI_ADDR_EXT_DEST_ID(msg.address_hi) | |
101 | ((msg.address_lo >> MSI_ADDR_DEST_ID_SHIFT) & 0xff); | 96 | ((msg.address_lo >> MSI_ADDR_DEST_ID_SHIFT) & 0xff); |
102 | if (xen_irq_from_pirq(pirq) >= 0 && msg.data == XEN_PIRQ_MSI_DATA) { | 97 | if (msg.data != XEN_PIRQ_MSI_DATA || |
103 | xen_allocate_pirq_msi((type == PCI_CAP_ID_MSIX) ? | 98 | xen_irq_from_pirq(pirq) < 0) { |
104 | "msi-x" : "msi", &irq, &pirq, XEN_ALLOC_IRQ); | 99 | pirq = xen_allocate_pirq_msi(dev, msidesc); |
105 | if (irq < 0) | 100 | if (pirq < 0) |
106 | goto error; | 101 | goto error; |
107 | ret = set_irq_msi(irq, msidesc); | 102 | xen_msi_compose_msg(dev, pirq, &msg); |
108 | if (ret < 0) | 103 | __write_msi_msg(msidesc, &msg); |
109 | goto error_while; | 104 | dev_dbg(&dev->dev, "xen: msi bound to pirq=%d\n", pirq); |
110 | printk(KERN_DEBUG "xen: msi already setup: msi --> irq=%d" | 105 | } else { |
111 | " pirq=%d\n", irq, pirq); | 106 | dev_dbg(&dev->dev, |
112 | return 0; | 107 | "xen: msi already bound to pirq=%d\n", pirq); |
113 | } | 108 | } |
114 | xen_allocate_pirq_msi((type == PCI_CAP_ID_MSIX) ? | 109 | irq = xen_bind_pirq_msi_to_irq(dev, msidesc, pirq, 0, |
115 | "msi-x" : "msi", &irq, &pirq, (XEN_ALLOC_IRQ | XEN_ALLOC_PIRQ)); | 110 | (type == PCI_CAP_ID_MSIX) ? |
116 | if (irq < 0 || pirq < 0) | 111 | "msi-x" : "msi"); |
112 | if (irq < 0) | ||
117 | goto error; | 113 | goto error; |
118 | printk(KERN_DEBUG "xen: msi --> irq=%d, pirq=%d\n", irq, pirq); | 114 | dev_dbg(&dev->dev, |
119 | xen_msi_compose_msg(dev, pirq, &msg); | 115 | "xen: msi --> pirq=%d --> irq=%d\n", pirq, irq); |
120 | ret = set_irq_msi(irq, msidesc); | ||
121 | if (ret < 0) | ||
122 | goto error_while; | ||
123 | write_msi_msg(irq, &msg); | ||
124 | } | 116 | } |
125 | return 0; | 117 | return 0; |
126 | 118 | ||
127 | error_while: | ||
128 | unbind_from_irqhandler(irq, NULL); | ||
129 | error: | 119 | error: |
130 | if (ret == -ENODEV) | 120 | dev_err(&dev->dev, |
131 | dev_err(&dev->dev, "Xen PCI frontend has not registered" \ | 121 | "Xen PCI frontend has not registered MSI/MSI-X support!\n"); |
132 | " MSI/MSI-X support!\n"); | 122 | return -ENODEV; |
133 | |||
134 | return ret; | ||
135 | } | 123 | } |
136 | 124 | ||
137 | /* | 125 | /* |
@@ -150,35 +138,26 @@ static int xen_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) | |||
150 | return -ENOMEM; | 138 | return -ENOMEM; |
151 | 139 | ||
152 | if (type == PCI_CAP_ID_MSIX) | 140 | if (type == PCI_CAP_ID_MSIX) |
153 | ret = xen_pci_frontend_enable_msix(dev, &v, nvec); | 141 | ret = xen_pci_frontend_enable_msix(dev, v, nvec); |
154 | else | 142 | else |
155 | ret = xen_pci_frontend_enable_msi(dev, &v); | 143 | ret = xen_pci_frontend_enable_msi(dev, v); |
156 | if (ret) | 144 | if (ret) |
157 | goto error; | 145 | goto error; |
158 | i = 0; | 146 | i = 0; |
159 | list_for_each_entry(msidesc, &dev->msi_list, list) { | 147 | list_for_each_entry(msidesc, &dev->msi_list, list) { |
160 | irq = xen_allocate_pirq(v[i], 0, /* not sharable */ | 148 | irq = xen_bind_pirq_msi_to_irq(dev, msidesc, v[i], 0, |
161 | (type == PCI_CAP_ID_MSIX) ? | 149 | (type == PCI_CAP_ID_MSIX) ? |
162 | "pcifront-msi-x" : "pcifront-msi"); | 150 | "pcifront-msi-x" : |
163 | if (irq < 0) { | 151 | "pcifront-msi"); |
164 | ret = -1; | 152 | if (irq < 0) |
165 | goto free; | 153 | goto free; |
166 | } | ||
167 | |||
168 | ret = set_irq_msi(irq, msidesc); | ||
169 | if (ret) | ||
170 | goto error_while; | ||
171 | i++; | 154 | i++; |
172 | } | 155 | } |
173 | kfree(v); | 156 | kfree(v); |
174 | return 0; | 157 | return 0; |
175 | 158 | ||
176 | error_while: | ||
177 | unbind_from_irqhandler(irq, NULL); | ||
178 | error: | 159 | error: |
179 | if (ret == -ENODEV) | 160 | dev_err(&dev->dev, "Xen PCI frontend has not registered MSI/MSI-X support!\n"); |
180 | dev_err(&dev->dev, "Xen PCI frontend has not registered" \ | ||
181 | " MSI/MSI-X support!\n"); | ||
182 | free: | 161 | free: |
183 | kfree(v); | 162 | kfree(v); |
184 | return ret; | 163 | return ret; |
@@ -193,6 +172,9 @@ static void xen_teardown_msi_irqs(struct pci_dev *dev) | |||
193 | xen_pci_frontend_disable_msix(dev); | 172 | xen_pci_frontend_disable_msix(dev); |
194 | else | 173 | else |
195 | xen_pci_frontend_disable_msi(dev); | 174 | xen_pci_frontend_disable_msi(dev); |
175 | |||
176 | /* Free the IRQ's and the msidesc using the generic code. */ | ||
177 | default_teardown_msi_irqs(dev); | ||
196 | } | 178 | } |
197 | 179 | ||
198 | static void xen_teardown_msi_irq(unsigned int irq) | 180 | static void xen_teardown_msi_irq(unsigned int irq) |
@@ -200,47 +182,91 @@ static void xen_teardown_msi_irq(unsigned int irq) | |||
200 | xen_destroy_irq(irq); | 182 | xen_destroy_irq(irq); |
201 | } | 183 | } |
202 | 184 | ||
185 | #ifdef CONFIG_XEN_DOM0 | ||
203 | static int xen_initdom_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) | 186 | static int xen_initdom_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) |
204 | { | 187 | { |
205 | int irq, ret; | 188 | int ret = 0; |
206 | struct msi_desc *msidesc; | 189 | struct msi_desc *msidesc; |
207 | 190 | ||
208 | list_for_each_entry(msidesc, &dev->msi_list, list) { | 191 | list_for_each_entry(msidesc, &dev->msi_list, list) { |
209 | irq = xen_create_msi_irq(dev, msidesc, type); | 192 | struct physdev_map_pirq map_irq; |
210 | if (irq < 0) | ||
211 | return -1; | ||
212 | 193 | ||
213 | ret = set_irq_msi(irq, msidesc); | 194 | memset(&map_irq, 0, sizeof(map_irq)); |
214 | if (ret) | 195 | map_irq.domid = DOMID_SELF; |
215 | goto error; | 196 | map_irq.type = MAP_PIRQ_TYPE_MSI; |
216 | } | 197 | map_irq.index = -1; |
217 | return 0; | 198 | map_irq.pirq = -1; |
199 | map_irq.bus = dev->bus->number; | ||
200 | map_irq.devfn = dev->devfn; | ||
218 | 201 | ||
219 | error: | 202 | if (type == PCI_CAP_ID_MSIX) { |
220 | xen_destroy_irq(irq); | 203 | int pos; |
204 | u32 table_offset, bir; | ||
205 | |||
206 | pos = pci_find_capability(dev, PCI_CAP_ID_MSIX); | ||
207 | |||
208 | pci_read_config_dword(dev, pos + PCI_MSIX_TABLE, | ||
209 | &table_offset); | ||
210 | bir = (u8)(table_offset & PCI_MSIX_FLAGS_BIRMASK); | ||
211 | |||
212 | map_irq.table_base = pci_resource_start(dev, bir); | ||
213 | map_irq.entry_nr = msidesc->msi_attrib.entry_nr; | ||
214 | } | ||
215 | |||
216 | ret = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq); | ||
217 | if (ret) { | ||
218 | dev_warn(&dev->dev, "xen map irq failed %d\n", ret); | ||
219 | goto out; | ||
220 | } | ||
221 | |||
222 | ret = xen_bind_pirq_msi_to_irq(dev, msidesc, | ||
223 | map_irq.pirq, map_irq.index, | ||
224 | (type == PCI_CAP_ID_MSIX) ? | ||
225 | "msi-x" : "msi"); | ||
226 | if (ret < 0) | ||
227 | goto out; | ||
228 | } | ||
229 | ret = 0; | ||
230 | out: | ||
221 | return ret; | 231 | return ret; |
222 | } | 232 | } |
223 | #endif | 233 | #endif |
234 | #endif | ||
224 | 235 | ||
225 | static int xen_pcifront_enable_irq(struct pci_dev *dev) | 236 | static int xen_pcifront_enable_irq(struct pci_dev *dev) |
226 | { | 237 | { |
227 | int rc; | 238 | int rc; |
228 | int share = 1; | 239 | int share = 1; |
240 | int pirq; | ||
241 | u8 gsi; | ||
229 | 242 | ||
230 | dev_info(&dev->dev, "Xen PCI enabling IRQ: %d\n", dev->irq); | 243 | rc = pci_read_config_byte(dev, PCI_INTERRUPT_LINE, &gsi); |
244 | if (rc < 0) { | ||
245 | dev_warn(&dev->dev, "Xen PCI: failed to read interrupt line: %d\n", | ||
246 | rc); | ||
247 | return rc; | ||
248 | } | ||
231 | 249 | ||
232 | if (dev->irq < 0) | 250 | rc = xen_allocate_pirq_gsi(gsi); |
233 | return -EINVAL; | 251 | if (rc < 0) { |
252 | dev_warn(&dev->dev, "Xen PCI: failed to allocate a PIRQ for GSI%d: %d\n", | ||
253 | gsi, rc); | ||
254 | return rc; | ||
255 | } | ||
256 | pirq = rc; | ||
234 | 257 | ||
235 | if (dev->irq < NR_IRQS_LEGACY) | 258 | if (gsi < NR_IRQS_LEGACY) |
236 | share = 0; | 259 | share = 0; |
237 | 260 | ||
238 | rc = xen_allocate_pirq(dev->irq, share, "pcifront"); | 261 | rc = xen_bind_pirq_gsi_to_irq(gsi, pirq, share, "pcifront"); |
239 | if (rc < 0) { | 262 | if (rc < 0) { |
240 | dev_warn(&dev->dev, "Xen PCI IRQ: %d, failed to register:%d\n", | 263 | dev_warn(&dev->dev, "Xen PCI: failed to bind GSI%d (PIRQ%d) to IRQ: %d\n", |
241 | dev->irq, rc); | 264 | gsi, pirq, rc); |
242 | return rc; | 265 | return rc; |
243 | } | 266 | } |
267 | |||
268 | dev->irq = rc; | ||
269 | dev_info(&dev->dev, "Xen PCI mapped GSI%d to IRQ%d\n", gsi, dev->irq); | ||
244 | return 0; | 270 | return 0; |
245 | } | 271 | } |
246 | 272 | ||
@@ -292,7 +318,7 @@ int __init pci_xen_hvm_init(void) | |||
292 | #ifdef CONFIG_XEN_DOM0 | 318 | #ifdef CONFIG_XEN_DOM0 |
293 | static int xen_register_pirq(u32 gsi, int triggering) | 319 | static int xen_register_pirq(u32 gsi, int triggering) |
294 | { | 320 | { |
295 | int rc, irq; | 321 | int rc, pirq, irq = -1; |
296 | struct physdev_map_pirq map_irq; | 322 | struct physdev_map_pirq map_irq; |
297 | int shareable = 0; | 323 | int shareable = 0; |
298 | char *name; | 324 | char *name; |
@@ -308,17 +334,20 @@ static int xen_register_pirq(u32 gsi, int triggering) | |||
308 | name = "ioapic-level"; | 334 | name = "ioapic-level"; |
309 | } | 335 | } |
310 | 336 | ||
311 | irq = xen_allocate_pirq(gsi, shareable, name); | 337 | pirq = xen_allocate_pirq_gsi(gsi); |
312 | 338 | if (pirq < 0) | |
313 | printk(KERN_DEBUG "xen: --> irq=%d\n", irq); | 339 | goto out; |
314 | 340 | ||
341 | irq = xen_bind_pirq_gsi_to_irq(gsi, pirq, shareable, name); | ||
315 | if (irq < 0) | 342 | if (irq < 0) |
316 | goto out; | 343 | goto out; |
317 | 344 | ||
345 | printk(KERN_DEBUG "xen: --> pirq=%d -> irq=%d\n", pirq, irq); | ||
346 | |||
318 | map_irq.domid = DOMID_SELF; | 347 | map_irq.domid = DOMID_SELF; |
319 | map_irq.type = MAP_PIRQ_TYPE_GSI; | 348 | map_irq.type = MAP_PIRQ_TYPE_GSI; |
320 | map_irq.index = gsi; | 349 | map_irq.index = gsi; |
321 | map_irq.pirq = irq; | 350 | map_irq.pirq = pirq; |
322 | 351 | ||
323 | rc = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq); | 352 | rc = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq); |
324 | if (rc) { | 353 | if (rc) { |
@@ -405,13 +434,18 @@ static int __init pci_xen_initial_domain(void) | |||
405 | 434 | ||
406 | void __init xen_setup_pirqs(void) | 435 | void __init xen_setup_pirqs(void) |
407 | { | 436 | { |
408 | int irq; | 437 | int pirq, irq; |
409 | 438 | ||
410 | pci_xen_initial_domain(); | 439 | pci_xen_initial_domain(); |
411 | 440 | ||
412 | if (0 == nr_ioapics) { | 441 | if (0 == nr_ioapics) { |
413 | for (irq = 0; irq < NR_IRQS_LEGACY; irq++) | 442 | for (irq = 0; irq < NR_IRQS_LEGACY; irq++) { |
414 | xen_allocate_pirq(irq, 0, "xt-pic"); | 443 | pirq = xen_allocate_pirq_gsi(irq); |
444 | if (WARN(pirq < 0, | ||
445 | "Could not allocate PIRQ for legacy interrupt\n")) | ||
446 | break; | ||
447 | irq = xen_bind_pirq_gsi_to_irq(irq, pirq, 0, "xt-pic"); | ||
448 | } | ||
415 | return; | 449 | return; |
416 | } | 450 | } |
417 | 451 | ||
diff --git a/arch/x86/platform/ce4100/ce4100.c b/arch/x86/platform/ce4100/ce4100.c index cd6f184c3b3f..28071bb31db7 100644 --- a/arch/x86/platform/ce4100/ce4100.c +++ b/arch/x86/platform/ce4100/ce4100.c | |||
@@ -16,21 +16,19 @@ | |||
16 | #include <linux/serial_8250.h> | 16 | #include <linux/serial_8250.h> |
17 | 17 | ||
18 | #include <asm/ce4100.h> | 18 | #include <asm/ce4100.h> |
19 | #include <asm/prom.h> | ||
19 | #include <asm/setup.h> | 20 | #include <asm/setup.h> |
21 | #include <asm/i8259.h> | ||
20 | #include <asm/io.h> | 22 | #include <asm/io.h> |
23 | #include <asm/io_apic.h> | ||
21 | 24 | ||
22 | static int ce4100_i8042_detect(void) | 25 | static int ce4100_i8042_detect(void) |
23 | { | 26 | { |
24 | return 0; | 27 | return 0; |
25 | } | 28 | } |
26 | 29 | ||
27 | static void __init sdv_find_smp_config(void) | ||
28 | { | ||
29 | } | ||
30 | |||
31 | #ifdef CONFIG_SERIAL_8250 | 30 | #ifdef CONFIG_SERIAL_8250 |
32 | 31 | ||
33 | |||
34 | static unsigned int mem_serial_in(struct uart_port *p, int offset) | 32 | static unsigned int mem_serial_in(struct uart_port *p, int offset) |
35 | { | 33 | { |
36 | offset = offset << p->regshift; | 34 | offset = offset << p->regshift; |
@@ -119,6 +117,15 @@ static void __init sdv_arch_setup(void) | |||
119 | sdv_serial_fixup(); | 117 | sdv_serial_fixup(); |
120 | } | 118 | } |
121 | 119 | ||
120 | #ifdef CONFIG_X86_IO_APIC | ||
121 | static void __cpuinit sdv_pci_init(void) | ||
122 | { | ||
123 | x86_of_pci_init(); | ||
124 | /* We can't set this earlier, because we need to calibrate the timer */ | ||
125 | legacy_pic = &null_legacy_pic; | ||
126 | } | ||
127 | #endif | ||
128 | |||
122 | /* | 129 | /* |
123 | * CE4100 specific x86_init function overrides and early setup | 130 | * CE4100 specific x86_init function overrides and early setup |
124 | * calls. | 131 | * calls. |
@@ -129,6 +136,11 @@ void __init x86_ce4100_early_setup(void) | |||
129 | x86_platform.i8042_detect = ce4100_i8042_detect; | 136 | x86_platform.i8042_detect = ce4100_i8042_detect; |
130 | x86_init.resources.probe_roms = x86_init_noop; | 137 | x86_init.resources.probe_roms = x86_init_noop; |
131 | x86_init.mpparse.get_smp_config = x86_init_uint_noop; | 138 | x86_init.mpparse.get_smp_config = x86_init_uint_noop; |
132 | x86_init.mpparse.find_smp_config = sdv_find_smp_config; | 139 | x86_init.mpparse.find_smp_config = x86_init_noop; |
133 | x86_init.pci.init = ce4100_pci_init; | 140 | x86_init.pci.init = ce4100_pci_init; |
141 | |||
142 | #ifdef CONFIG_X86_IO_APIC | ||
143 | x86_init.pci.init_irq = sdv_pci_init; | ||
144 | x86_init.mpparse.setup_ioapic_ids = setup_ioapic_ids_from_mpc_nocheck; | ||
145 | #endif | ||
134 | } | 146 | } |
diff --git a/arch/x86/platform/ce4100/falconfalls.dts b/arch/x86/platform/ce4100/falconfalls.dts new file mode 100644 index 000000000000..dc701ea58546 --- /dev/null +++ b/arch/x86/platform/ce4100/falconfalls.dts | |||
@@ -0,0 +1,428 @@ | |||
1 | /* | ||
2 | * CE4100 on Falcon Falls | ||
3 | * | ||
4 | * (c) Copyright 2010 Intel Corporation | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify it | ||
7 | * under the terms of the GNU General Public License as published by the | ||
8 | * Free Software Foundation; version 2 of the License. | ||
9 | */ | ||
10 | /dts-v1/; | ||
11 | / { | ||
12 | model = "intel,falconfalls"; | ||
13 | compatible = "intel,falconfalls"; | ||
14 | #address-cells = <1>; | ||
15 | #size-cells = <1>; | ||
16 | |||
17 | cpus { | ||
18 | #address-cells = <1>; | ||
19 | #size-cells = <0>; | ||
20 | |||
21 | cpu@0 { | ||
22 | device_type = "cpu"; | ||
23 | compatible = "intel,ce4100"; | ||
24 | reg = <0>; | ||
25 | lapic = <&lapic0>; | ||
26 | }; | ||
27 | }; | ||
28 | |||
29 | soc@0 { | ||
30 | #address-cells = <1>; | ||
31 | #size-cells = <1>; | ||
32 | compatible = "intel,ce4100-cp"; | ||
33 | ranges; | ||
34 | |||
35 | ioapic1: interrupt-controller@fec00000 { | ||
36 | #interrupt-cells = <2>; | ||
37 | compatible = "intel,ce4100-ioapic"; | ||
38 | interrupt-controller; | ||
39 | reg = <0xfec00000 0x1000>; | ||
40 | }; | ||
41 | |||
42 | timer@fed00000 { | ||
43 | compatible = "intel,ce4100-hpet"; | ||
44 | reg = <0xfed00000 0x200>; | ||
45 | }; | ||
46 | |||
47 | lapic0: interrupt-controller@fee00000 { | ||
48 | compatible = "intel,ce4100-lapic"; | ||
49 | reg = <0xfee00000 0x1000>; | ||
50 | }; | ||
51 | |||
52 | pci@3fc { | ||
53 | #address-cells = <3>; | ||
54 | #size-cells = <2>; | ||
55 | compatible = "intel,ce4100-pci", "pci"; | ||
56 | device_type = "pci"; | ||
57 | bus-range = <0 0>; | ||
58 | ranges = <0x2000000 0 0xbffff000 0xbffff000 0 0x1000 | ||
59 | 0x2000000 0 0xdffe0000 0xdffe0000 0 0x1000 | ||
60 | 0x0000000 0 0x0 0x0 0 0x100>; | ||
61 | |||
62 | /* Secondary IO-APIC */ | ||
63 | ioapic2: interrupt-controller@0,1 { | ||
64 | #interrupt-cells = <2>; | ||
65 | compatible = "intel,ce4100-ioapic"; | ||
66 | interrupt-controller; | ||
67 | reg = <0x100 0x0 0x0 0x0 0x0>; | ||
68 | assigned-addresses = <0x02000000 0x0 0xbffff000 0x0 0x1000>; | ||
69 | }; | ||
70 | |||
71 | pci@1,0 { | ||
72 | #address-cells = <3>; | ||
73 | #size-cells = <2>; | ||
74 | compatible = "intel,ce4100-pci", "pci"; | ||
75 | device_type = "pci"; | ||
76 | bus-range = <1 1>; | ||
77 | ranges = <0x2000000 0 0xdffe0000 0x2000000 0 0xdffe0000 0 0x1000>; | ||
78 | |||
79 | interrupt-parent = <&ioapic2>; | ||
80 | |||
81 | display@2,0 { | ||
82 | compatible = "pci8086,2e5b.2", | ||
83 | "pci8086,2e5b", | ||
84 | "pciclass038000", | ||
85 | "pciclass0380"; | ||
86 | |||
87 | reg = <0x11000 0x0 0x0 0x0 0x0>; | ||
88 | interrupts = <0 1>; | ||
89 | }; | ||
90 | |||
91 | multimedia@3,0 { | ||
92 | compatible = "pci8086,2e5c.2", | ||
93 | "pci8086,2e5c", | ||
94 | "pciclass048000", | ||
95 | "pciclass0480"; | ||
96 | |||
97 | reg = <0x11800 0x0 0x0 0x0 0x0>; | ||
98 | interrupts = <2 1>; | ||
99 | }; | ||
100 | |||
101 | multimedia@4,0 { | ||
102 | compatible = "pci8086,2e5d.2", | ||
103 | "pci8086,2e5d", | ||
104 | "pciclass048000", | ||
105 | "pciclass0480"; | ||
106 | |||
107 | reg = <0x12000 0x0 0x0 0x0 0x0>; | ||
108 | interrupts = <4 1>; | ||
109 | }; | ||
110 | |||
111 | multimedia@4,1 { | ||
112 | compatible = "pci8086,2e5e.2", | ||
113 | "pci8086,2e5e", | ||
114 | "pciclass048000", | ||
115 | "pciclass0480"; | ||
116 | |||
117 | reg = <0x12100 0x0 0x0 0x0 0x0>; | ||
118 | interrupts = <5 1>; | ||
119 | }; | ||
120 | |||
121 | sound@6,0 { | ||
122 | compatible = "pci8086,2e5f.2", | ||
123 | "pci8086,2e5f", | ||
124 | "pciclass040100", | ||
125 | "pciclass0401"; | ||
126 | |||
127 | reg = <0x13000 0x0 0x0 0x0 0x0>; | ||
128 | interrupts = <6 1>; | ||
129 | }; | ||
130 | |||
131 | sound@6,1 { | ||
132 | compatible = "pci8086,2e5f.2", | ||
133 | "pci8086,2e5f", | ||
134 | "pciclass040100", | ||
135 | "pciclass0401"; | ||
136 | |||
137 | reg = <0x13100 0x0 0x0 0x0 0x0>; | ||
138 | interrupts = <7 1>; | ||
139 | }; | ||
140 | |||
141 | sound@6,2 { | ||
142 | compatible = "pci8086,2e60.2", | ||
143 | "pci8086,2e60", | ||
144 | "pciclass040100", | ||
145 | "pciclass0401"; | ||
146 | |||
147 | reg = <0x13200 0x0 0x0 0x0 0x0>; | ||
148 | interrupts = <8 1>; | ||
149 | }; | ||
150 | |||
151 | display@8,0 { | ||
152 | compatible = "pci8086,2e61.2", | ||
153 | "pci8086,2e61", | ||
154 | "pciclass038000", | ||
155 | "pciclass0380"; | ||
156 | |||
157 | reg = <0x14000 0x0 0x0 0x0 0x0>; | ||
158 | interrupts = <9 1>; | ||
159 | }; | ||
160 | |||
161 | display@8,1 { | ||
162 | compatible = "pci8086,2e62.2", | ||
163 | "pci8086,2e62", | ||
164 | "pciclass038000", | ||
165 | "pciclass0380"; | ||
166 | |||
167 | reg = <0x14100 0x0 0x0 0x0 0x0>; | ||
168 | interrupts = <10 1>; | ||
169 | }; | ||
170 | |||
171 | multimedia@8,2 { | ||
172 | compatible = "pci8086,2e63.2", | ||
173 | "pci8086,2e63", | ||
174 | "pciclass048000", | ||
175 | "pciclass0480"; | ||
176 | |||
177 | reg = <0x14200 0x0 0x0 0x0 0x0>; | ||
178 | interrupts = <11 1>; | ||
179 | }; | ||
180 | |||
181 | entertainment-encryption@9,0 { | ||
182 | compatible = "pci8086,2e64.2", | ||
183 | "pci8086,2e64", | ||
184 | "pciclass101000", | ||
185 | "pciclass1010"; | ||
186 | |||
187 | reg = <0x14800 0x0 0x0 0x0 0x0>; | ||
188 | interrupts = <12 1>; | ||
189 | }; | ||
190 | |||
191 | localbus@a,0 { | ||
192 | compatible = "pci8086,2e65.2", | ||
193 | "pci8086,2e65", | ||
194 | "pciclassff0000", | ||
195 | "pciclassff00"; | ||
196 | |||
197 | reg = <0x15000 0x0 0x0 0x0 0x0>; | ||
198 | }; | ||
199 | |||
200 | serial@b,0 { | ||
201 | compatible = "pci8086,2e66.2", | ||
202 | "pci8086,2e66", | ||
203 | "pciclass070003", | ||
204 | "pciclass0700"; | ||
205 | |||
206 | reg = <0x15800 0x0 0x0 0x0 0x0>; | ||
207 | interrupts = <14 1>; | ||
208 | }; | ||
209 | |||
210 | gpio@b,1 { | ||
211 | compatible = "pci8086,2e67.2", | ||
212 | "pci8086,2e67", | ||
213 | "pciclassff0000", | ||
214 | "pciclassff00"; | ||
215 | |||
216 | #gpio-cells = <2>; | ||
217 | reg = <0x15900 0x0 0x0 0x0 0x0>; | ||
218 | interrupts = <15 1>; | ||
219 | gpio-controller; | ||
220 | }; | ||
221 | |||
222 | i2c-controller@b,2 { | ||
223 | #address-cells = <2>; | ||
224 | #size-cells = <1>; | ||
225 | compatible = "pci8086,2e68.2", | ||
226 | "pci8086,2e68", | ||
227 | "pciclass,ff0000", | ||
228 | "pciclass,ff00"; | ||
229 | |||
230 | reg = <0x15a00 0x0 0x0 0x0 0x0>; | ||
231 | interrupts = <16 1>; | ||
232 | ranges = <0 0 0x02000000 0 0xdffe0500 0x100 | ||
233 | 1 0 0x02000000 0 0xdffe0600 0x100 | ||
234 | 2 0 0x02000000 0 0xdffe0700 0x100>; | ||
235 | |||
236 | i2c@0 { | ||
237 | #address-cells = <1>; | ||
238 | #size-cells = <0>; | ||
239 | compatible = "intel,ce4100-i2c-controller"; | ||
240 | reg = <0 0 0x100>; | ||
241 | }; | ||
242 | |||
243 | i2c@1 { | ||
244 | #address-cells = <1>; | ||
245 | #size-cells = <0>; | ||
246 | compatible = "intel,ce4100-i2c-controller"; | ||
247 | reg = <1 0 0x100>; | ||
248 | |||
249 | gpio@26 { | ||
250 | #gpio-cells = <2>; | ||
251 | compatible = "ti,pcf8575"; | ||
252 | reg = <0x26>; | ||
253 | gpio-controller; | ||
254 | }; | ||
255 | }; | ||
256 | |||
257 | i2c@2 { | ||
258 | #address-cells = <1>; | ||
259 | #size-cells = <0>; | ||
260 | compatible = "intel,ce4100-i2c-controller"; | ||
261 | reg = <2 0 0x100>; | ||
262 | |||
263 | gpio@26 { | ||
264 | #gpio-cells = <2>; | ||
265 | compatible = "ti,pcf8575"; | ||
266 | reg = <0x26>; | ||
267 | gpio-controller; | ||
268 | }; | ||
269 | }; | ||
270 | }; | ||
271 | |||
272 | smard-card@b,3 { | ||
273 | compatible = "pci8086,2e69.2", | ||
274 | "pci8086,2e69", | ||
275 | "pciclass070500", | ||
276 | "pciclass0705"; | ||
277 | |||
278 | reg = <0x15b00 0x0 0x0 0x0 0x0>; | ||
279 | interrupts = <15 1>; | ||
280 | }; | ||
281 | |||
282 | spi-controller@b,4 { | ||
283 | #address-cells = <1>; | ||
284 | #size-cells = <0>; | ||
285 | compatible = | ||
286 | "pci8086,2e6a.2", | ||
287 | "pci8086,2e6a", | ||
288 | "pciclass,ff0000", | ||
289 | "pciclass,ff00"; | ||
290 | |||
291 | reg = <0x15c00 0x0 0x0 0x0 0x0>; | ||
292 | interrupts = <15 1>; | ||
293 | |||
294 | dac@0 { | ||
295 | compatible = "ti,pcm1755"; | ||
296 | reg = <0>; | ||
297 | spi-max-frequency = <115200>; | ||
298 | }; | ||
299 | |||
300 | dac@1 { | ||
301 | compatible = "ti,pcm1609a"; | ||
302 | reg = <1>; | ||
303 | spi-max-frequency = <115200>; | ||
304 | }; | ||
305 | |||
306 | eeprom@2 { | ||
307 | compatible = "atmel,at93c46"; | ||
308 | reg = <2>; | ||
309 | spi-max-frequency = <115200>; | ||
310 | }; | ||
311 | }; | ||
312 | |||
313 | multimedia@b,7 { | ||
314 | compatible = "pci8086,2e6d.2", | ||
315 | "pci8086,2e6d", | ||
316 | "pciclassff0000", | ||
317 | "pciclassff00"; | ||
318 | |||
319 | reg = <0x15f00 0x0 0x0 0x0 0x0>; | ||
320 | }; | ||
321 | |||
322 | ethernet@c,0 { | ||
323 | compatible = "pci8086,2e6e.2", | ||
324 | "pci8086,2e6e", | ||
325 | "pciclass020000", | ||
326 | "pciclass0200"; | ||
327 | |||
328 | reg = <0x16000 0x0 0x0 0x0 0x0>; | ||
329 | interrupts = <21 1>; | ||
330 | }; | ||
331 | |||
332 | clock@c,1 { | ||
333 | compatible = "pci8086,2e6f.2", | ||
334 | "pci8086,2e6f", | ||
335 | "pciclassff0000", | ||
336 | "pciclassff00"; | ||
337 | |||
338 | reg = <0x16100 0x0 0x0 0x0 0x0>; | ||
339 | interrupts = <3 1>; | ||
340 | }; | ||
341 | |||
342 | usb@d,0 { | ||
343 | compatible = "pci8086,2e70.2", | ||
344 | "pci8086,2e70", | ||
345 | "pciclass0c0320", | ||
346 | "pciclass0c03"; | ||
347 | |||
348 | reg = <0x16800 0x0 0x0 0x0 0x0>; | ||
349 | interrupts = <22 3>; | ||
350 | }; | ||
351 | |||
352 | usb@d,1 { | ||
353 | compatible = "pci8086,2e70.2", | ||
354 | "pci8086,2e70", | ||
355 | "pciclass0c0320", | ||
356 | "pciclass0c03"; | ||
357 | |||
358 | reg = <0x16900 0x0 0x0 0x0 0x0>; | ||
359 | interrupts = <22 3>; | ||
360 | }; | ||
361 | |||
362 | sata@e,0 { | ||
363 | compatible = "pci8086,2e71.0", | ||
364 | "pci8086,2e71", | ||
365 | "pciclass010601", | ||
366 | "pciclass0106"; | ||
367 | |||
368 | reg = <0x17000 0x0 0x0 0x0 0x0>; | ||
369 | interrupts = <23 3>; | ||
370 | }; | ||
371 | |||
372 | flash@f,0 { | ||
373 | compatible = "pci8086,701.1", | ||
374 | "pci8086,701", | ||
375 | "pciclass050100", | ||
376 | "pciclass0501"; | ||
377 | |||
378 | reg = <0x17800 0x0 0x0 0x0 0x0>; | ||
379 | interrupts = <13 1>; | ||
380 | }; | ||
381 | |||
382 | entertainment-encryption@10,0 { | ||
383 | compatible = "pci8086,702.1", | ||
384 | "pci8086,702", | ||
385 | "pciclass101000", | ||
386 | "pciclass1010"; | ||
387 | |||
388 | reg = <0x18000 0x0 0x0 0x0 0x0>; | ||
389 | }; | ||
390 | |||
391 | co-processor@11,0 { | ||
392 | compatible = "pci8086,703.1", | ||
393 | "pci8086,703", | ||
394 | "pciclass0b4000", | ||
395 | "pciclass0b40"; | ||
396 | |||
397 | reg = <0x18800 0x0 0x0 0x0 0x0>; | ||
398 | interrupts = <1 1>; | ||
399 | }; | ||
400 | |||
401 | multimedia@12,0 { | ||
402 | compatible = "pci8086,704.0", | ||
403 | "pci8086,704", | ||
404 | "pciclass048000", | ||
405 | "pciclass0480"; | ||
406 | |||
407 | reg = <0x19000 0x0 0x0 0x0 0x0>; | ||
408 | }; | ||
409 | }; | ||
410 | |||
411 | isa@1f,0 { | ||
412 | #address-cells = <2>; | ||
413 | #size-cells = <1>; | ||
414 | compatible = "isa"; | ||
415 | ranges = <1 0 0 0 0 0x100>; | ||
416 | |||
417 | rtc@70 { | ||
418 | compatible = "intel,ce4100-rtc", "motorola,mc146818"; | ||
419 | interrupts = <8 3>; | ||
420 | interrupt-parent = <&ioapic1>; | ||
421 | ctrl-reg = <2>; | ||
422 | freq-reg = <0x26>; | ||
423 | reg = <1 0x70 2>; | ||
424 | }; | ||
425 | }; | ||
426 | }; | ||
427 | }; | ||
428 | }; | ||
diff --git a/arch/x86/platform/mrst/mrst.c b/arch/x86/platform/mrst/mrst.c index ea6529e93c6f..5c0207bf959b 100644 --- a/arch/x86/platform/mrst/mrst.c +++ b/arch/x86/platform/mrst/mrst.c | |||
@@ -31,6 +31,7 @@ | |||
31 | #include <asm/apic.h> | 31 | #include <asm/apic.h> |
32 | #include <asm/io_apic.h> | 32 | #include <asm/io_apic.h> |
33 | #include <asm/mrst.h> | 33 | #include <asm/mrst.h> |
34 | #include <asm/mrst-vrtc.h> | ||
34 | #include <asm/io.h> | 35 | #include <asm/io.h> |
35 | #include <asm/i8259.h> | 36 | #include <asm/i8259.h> |
36 | #include <asm/intel_scu_ipc.h> | 37 | #include <asm/intel_scu_ipc.h> |
@@ -268,6 +269,7 @@ void __init x86_mrst_early_setup(void) | |||
268 | 269 | ||
269 | x86_platform.calibrate_tsc = mrst_calibrate_tsc; | 270 | x86_platform.calibrate_tsc = mrst_calibrate_tsc; |
270 | x86_platform.i8042_detect = mrst_i8042_detect; | 271 | x86_platform.i8042_detect = mrst_i8042_detect; |
272 | x86_init.timers.wallclock_init = mrst_rtc_init; | ||
271 | x86_init.pci.init = pci_mrst_init; | 273 | x86_init.pci.init = pci_mrst_init; |
272 | x86_init.pci.fixup_irqs = x86_init_noop; | 274 | x86_init.pci.fixup_irqs = x86_init_noop; |
273 | 275 | ||
diff --git a/arch/x86/platform/mrst/vrtc.c b/arch/x86/platform/mrst/vrtc.c index 32cd7edd71a0..04cf645feb92 100644 --- a/arch/x86/platform/mrst/vrtc.c +++ b/arch/x86/platform/mrst/vrtc.c | |||
@@ -100,22 +100,14 @@ int vrtc_set_mmss(unsigned long nowtime) | |||
100 | 100 | ||
101 | void __init mrst_rtc_init(void) | 101 | void __init mrst_rtc_init(void) |
102 | { | 102 | { |
103 | unsigned long rtc_paddr; | 103 | unsigned long vrtc_paddr = sfi_mrtc_array[0].phys_addr; |
104 | void __iomem *virt_base; | ||
105 | 104 | ||
106 | sfi_table_parse(SFI_SIG_MRTC, NULL, NULL, sfi_parse_mrtc); | 105 | sfi_table_parse(SFI_SIG_MRTC, NULL, NULL, sfi_parse_mrtc); |
107 | if (!sfi_mrtc_num) | 106 | if (!sfi_mrtc_num || !vrtc_paddr) |
108 | return; | 107 | return; |
109 | 108 | ||
110 | rtc_paddr = sfi_mrtc_array[0].phys_addr; | 109 | vrtc_virt_base = (void __iomem *)set_fixmap_offset_nocache(FIX_LNW_VRTC, |
111 | 110 | vrtc_paddr); | |
112 | /* vRTC's register address may not be page aligned */ | ||
113 | set_fixmap_nocache(FIX_LNW_VRTC, rtc_paddr); | ||
114 | |||
115 | virt_base = (void __iomem *)__fix_to_virt(FIX_LNW_VRTC); | ||
116 | virt_base += rtc_paddr & ~PAGE_MASK; | ||
117 | vrtc_virt_base = virt_base; | ||
118 | |||
119 | x86_platform.get_wallclock = vrtc_get_time; | 111 | x86_platform.get_wallclock = vrtc_get_time; |
120 | x86_platform.set_wallclock = vrtc_set_mmss; | 112 | x86_platform.set_wallclock = vrtc_set_mmss; |
121 | } | 113 | } |
diff --git a/arch/x86/platform/olpc/Makefile b/arch/x86/platform/olpc/Makefile index e797428b163b..c2a8cab65e5d 100644 --- a/arch/x86/platform/olpc/Makefile +++ b/arch/x86/platform/olpc/Makefile | |||
@@ -1,4 +1,4 @@ | |||
1 | obj-$(CONFIG_OLPC) += olpc.o | 1 | obj-$(CONFIG_OLPC) += olpc.o |
2 | obj-$(CONFIG_OLPC_XO1) += olpc-xo1.o | 2 | obj-$(CONFIG_OLPC_XO1) += olpc-xo1.o |
3 | obj-$(CONFIG_OLPC_OPENFIRMWARE) += olpc_ofw.o | 3 | obj-$(CONFIG_OLPC) += olpc_ofw.o |
4 | obj-$(CONFIG_OLPC_OPENFIRMWARE_DT) += olpc_dt.o | 4 | obj-$(CONFIG_OF_PROMTREE) += olpc_dt.o |
diff --git a/arch/x86/platform/uv/uv_irq.c b/arch/x86/platform/uv/uv_irq.c index 7b24460917d5..374a05d8ad22 100644 --- a/arch/x86/platform/uv/uv_irq.c +++ b/arch/x86/platform/uv/uv_irq.c | |||
@@ -131,7 +131,7 @@ arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, | |||
131 | unsigned long mmr_offset, int limit) | 131 | unsigned long mmr_offset, int limit) |
132 | { | 132 | { |
133 | const struct cpumask *eligible_cpu = cpumask_of(cpu); | 133 | const struct cpumask *eligible_cpu = cpumask_of(cpu); |
134 | struct irq_cfg *cfg = get_irq_chip_data(irq); | 134 | struct irq_cfg *cfg = irq_get_chip_data(irq); |
135 | unsigned long mmr_value; | 135 | unsigned long mmr_value; |
136 | struct uv_IO_APIC_route_entry *entry; | 136 | struct uv_IO_APIC_route_entry *entry; |
137 | int mmr_pnode, err; | 137 | int mmr_pnode, err; |
@@ -148,7 +148,7 @@ arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, | |||
148 | else | 148 | else |
149 | irq_set_status_flags(irq, IRQ_MOVE_PCNTXT); | 149 | irq_set_status_flags(irq, IRQ_MOVE_PCNTXT); |
150 | 150 | ||
151 | set_irq_chip_and_handler_name(irq, &uv_irq_chip, handle_percpu_irq, | 151 | irq_set_chip_and_handler_name(irq, &uv_irq_chip, handle_percpu_irq, |
152 | irq_name); | 152 | irq_name); |
153 | 153 | ||
154 | mmr_value = 0; | 154 | mmr_value = 0; |
diff --git a/arch/x86/platform/visws/visws_quirks.c b/arch/x86/platform/visws/visws_quirks.c index 632037671746..fe4cf8294878 100644 --- a/arch/x86/platform/visws/visws_quirks.c +++ b/arch/x86/platform/visws/visws_quirks.c | |||
@@ -569,11 +569,13 @@ out_unlock: | |||
569 | static struct irqaction master_action = { | 569 | static struct irqaction master_action = { |
570 | .handler = piix4_master_intr, | 570 | .handler = piix4_master_intr, |
571 | .name = "PIIX4-8259", | 571 | .name = "PIIX4-8259", |
572 | .flags = IRQF_NO_THREAD, | ||
572 | }; | 573 | }; |
573 | 574 | ||
574 | static struct irqaction cascade_action = { | 575 | static struct irqaction cascade_action = { |
575 | .handler = no_action, | 576 | .handler = no_action, |
576 | .name = "cascade", | 577 | .name = "cascade", |
578 | .flags = IRQF_NO_THREAD, | ||
577 | }; | 579 | }; |
578 | 580 | ||
579 | static inline void set_piix4_virtual_irq_type(void) | 581 | static inline void set_piix4_virtual_irq_type(void) |
@@ -606,7 +608,7 @@ static void __init visws_pre_intr_init(void) | |||
606 | chip = &cobalt_irq_type; | 608 | chip = &cobalt_irq_type; |
607 | 609 | ||
608 | if (chip) | 610 | if (chip) |
609 | set_irq_chip(i, chip); | 611 | irq_set_chip(i, chip); |
610 | } | 612 | } |
611 | 613 | ||
612 | setup_irq(CO_IRQ_8259, &master_action); | 614 | setup_irq(CO_IRQ_8259, &master_action); |
diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig index 5b54892e4bc3..1c7121ba18ff 100644 --- a/arch/x86/xen/Kconfig +++ b/arch/x86/xen/Kconfig | |||
@@ -38,7 +38,7 @@ config XEN_MAX_DOMAIN_MEMORY | |||
38 | 38 | ||
39 | config XEN_SAVE_RESTORE | 39 | config XEN_SAVE_RESTORE |
40 | bool | 40 | bool |
41 | depends on XEN && PM | 41 | depends on XEN |
42 | default y | 42 | default y |
43 | 43 | ||
44 | config XEN_DEBUG_FS | 44 | config XEN_DEBUG_FS |
@@ -48,3 +48,11 @@ config XEN_DEBUG_FS | |||
48 | help | 48 | help |
49 | Enable statistics output and various tuning options in debugfs. | 49 | Enable statistics output and various tuning options in debugfs. |
50 | Enabling this option may incur a significant performance overhead. | 50 | Enabling this option may incur a significant performance overhead. |
51 | |||
52 | config XEN_DEBUG | ||
53 | bool "Enable Xen debug checks" | ||
54 | depends on XEN | ||
55 | default n | ||
56 | help | ||
57 | Enable various WARN_ON checks in the Xen MMU code. | ||
58 | Enabling this option WILL incur a significant performance overhead. | ||
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 50542efe45fb..49dbd78ec3cb 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c | |||
@@ -1284,15 +1284,14 @@ static int init_hvm_pv_info(int *major, int *minor) | |||
1284 | 1284 | ||
1285 | xen_setup_features(); | 1285 | xen_setup_features(); |
1286 | 1286 | ||
1287 | pv_info = xen_info; | 1287 | pv_info.name = "Xen HVM"; |
1288 | pv_info.kernel_rpl = 0; | ||
1289 | 1288 | ||
1290 | xen_domain_type = XEN_HVM_DOMAIN; | 1289 | xen_domain_type = XEN_HVM_DOMAIN; |
1291 | 1290 | ||
1292 | return 0; | 1291 | return 0; |
1293 | } | 1292 | } |
1294 | 1293 | ||
1295 | void xen_hvm_init_shared_info(void) | 1294 | void __ref xen_hvm_init_shared_info(void) |
1296 | { | 1295 | { |
1297 | int cpu; | 1296 | int cpu; |
1298 | struct xen_add_to_physmap xatp; | 1297 | struct xen_add_to_physmap xatp; |
@@ -1331,6 +1330,8 @@ static int __cpuinit xen_hvm_cpu_notify(struct notifier_block *self, | |||
1331 | switch (action) { | 1330 | switch (action) { |
1332 | case CPU_UP_PREPARE: | 1331 | case CPU_UP_PREPARE: |
1333 | per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu]; | 1332 | per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu]; |
1333 | if (xen_have_vector_callback) | ||
1334 | xen_init_lock_cpu(cpu); | ||
1334 | break; | 1335 | break; |
1335 | default: | 1336 | default: |
1336 | break; | 1337 | break; |
@@ -1355,6 +1356,7 @@ static void __init xen_hvm_guest_init(void) | |||
1355 | 1356 | ||
1356 | if (xen_feature(XENFEAT_hvm_callback_vector)) | 1357 | if (xen_feature(XENFEAT_hvm_callback_vector)) |
1357 | xen_have_vector_callback = 1; | 1358 | xen_have_vector_callback = 1; |
1359 | xen_hvm_smp_init(); | ||
1358 | register_cpu_notifier(&xen_hvm_cpu_notifier); | 1360 | register_cpu_notifier(&xen_hvm_cpu_notifier); |
1359 | xen_unplug_emulated_devices(); | 1361 | xen_unplug_emulated_devices(); |
1360 | have_vcpu_info_placement = 0; | 1362 | have_vcpu_info_placement = 0; |
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index f6089421147a..c82df6c9c0f0 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c | |||
@@ -46,6 +46,7 @@ | |||
46 | #include <linux/module.h> | 46 | #include <linux/module.h> |
47 | #include <linux/gfp.h> | 47 | #include <linux/gfp.h> |
48 | #include <linux/memblock.h> | 48 | #include <linux/memblock.h> |
49 | #include <linux/seq_file.h> | ||
49 | 50 | ||
50 | #include <asm/pgtable.h> | 51 | #include <asm/pgtable.h> |
51 | #include <asm/tlbflush.h> | 52 | #include <asm/tlbflush.h> |
@@ -78,8 +79,7 @@ | |||
78 | 79 | ||
79 | /* | 80 | /* |
80 | * Protects atomic reservation decrease/increase against concurrent increases. | 81 | * Protects atomic reservation decrease/increase against concurrent increases. |
81 | * Also protects non-atomic updates of current_pages and driver_pages, and | 82 | * Also protects non-atomic updates of current_pages and balloon lists. |
82 | * balloon lists. | ||
83 | */ | 83 | */ |
84 | DEFINE_SPINLOCK(xen_reservation_lock); | 84 | DEFINE_SPINLOCK(xen_reservation_lock); |
85 | 85 | ||
@@ -416,8 +416,12 @@ static pteval_t pte_pfn_to_mfn(pteval_t val) | |||
416 | if (val & _PAGE_PRESENT) { | 416 | if (val & _PAGE_PRESENT) { |
417 | unsigned long pfn = (val & PTE_PFN_MASK) >> PAGE_SHIFT; | 417 | unsigned long pfn = (val & PTE_PFN_MASK) >> PAGE_SHIFT; |
418 | pteval_t flags = val & PTE_FLAGS_MASK; | 418 | pteval_t flags = val & PTE_FLAGS_MASK; |
419 | unsigned long mfn = pfn_to_mfn(pfn); | 419 | unsigned long mfn; |
420 | 420 | ||
421 | if (!xen_feature(XENFEAT_auto_translated_physmap)) | ||
422 | mfn = get_phys_to_machine(pfn); | ||
423 | else | ||
424 | mfn = pfn; | ||
421 | /* | 425 | /* |
422 | * If there's no mfn for the pfn, then just create an | 426 | * If there's no mfn for the pfn, then just create an |
423 | * empty non-present pte. Unfortunately this loses | 427 | * empty non-present pte. Unfortunately this loses |
@@ -427,8 +431,18 @@ static pteval_t pte_pfn_to_mfn(pteval_t val) | |||
427 | if (unlikely(mfn == INVALID_P2M_ENTRY)) { | 431 | if (unlikely(mfn == INVALID_P2M_ENTRY)) { |
428 | mfn = 0; | 432 | mfn = 0; |
429 | flags = 0; | 433 | flags = 0; |
434 | } else { | ||
435 | /* | ||
436 | * Paramount to do this test _after_ the | ||
437 | * INVALID_P2M_ENTRY as INVALID_P2M_ENTRY & | ||
438 | * IDENTITY_FRAME_BIT resolves to true. | ||
439 | */ | ||
440 | mfn &= ~FOREIGN_FRAME_BIT; | ||
441 | if (mfn & IDENTITY_FRAME_BIT) { | ||
442 | mfn &= ~IDENTITY_FRAME_BIT; | ||
443 | flags |= _PAGE_IOMAP; | ||
444 | } | ||
430 | } | 445 | } |
431 | |||
432 | val = ((pteval_t)mfn << PAGE_SHIFT) | flags; | 446 | val = ((pteval_t)mfn << PAGE_SHIFT) | flags; |
433 | } | 447 | } |
434 | 448 | ||
@@ -532,6 +546,41 @@ pte_t xen_make_pte(pteval_t pte) | |||
532 | } | 546 | } |
533 | PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte); | 547 | PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte); |
534 | 548 | ||
549 | #ifdef CONFIG_XEN_DEBUG | ||
550 | pte_t xen_make_pte_debug(pteval_t pte) | ||
551 | { | ||
552 | phys_addr_t addr = (pte & PTE_PFN_MASK); | ||
553 | phys_addr_t other_addr; | ||
554 | bool io_page = false; | ||
555 | pte_t _pte; | ||
556 | |||
557 | if (pte & _PAGE_IOMAP) | ||
558 | io_page = true; | ||
559 | |||
560 | _pte = xen_make_pte(pte); | ||
561 | |||
562 | if (!addr) | ||
563 | return _pte; | ||
564 | |||
565 | if (io_page && | ||
566 | (xen_initial_domain() || addr >= ISA_END_ADDRESS)) { | ||
567 | other_addr = pfn_to_mfn(addr >> PAGE_SHIFT) << PAGE_SHIFT; | ||
568 | WARN(addr != other_addr, | ||
569 | "0x%lx is using VM_IO, but it is 0x%lx!\n", | ||
570 | (unsigned long)addr, (unsigned long)other_addr); | ||
571 | } else { | ||
572 | pteval_t iomap_set = (_pte.pte & PTE_FLAGS_MASK) & _PAGE_IOMAP; | ||
573 | other_addr = (_pte.pte & PTE_PFN_MASK); | ||
574 | WARN((addr == other_addr) && (!io_page) && (!iomap_set), | ||
575 | "0x%lx is missing VM_IO (and wasn't fixed)!\n", | ||
576 | (unsigned long)addr); | ||
577 | } | ||
578 | |||
579 | return _pte; | ||
580 | } | ||
581 | PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte_debug); | ||
582 | #endif | ||
583 | |||
535 | pgd_t xen_make_pgd(pgdval_t pgd) | 584 | pgd_t xen_make_pgd(pgdval_t pgd) |
536 | { | 585 | { |
537 | pgd = pte_pfn_to_mfn(pgd); | 586 | pgd = pte_pfn_to_mfn(pgd); |
@@ -1438,10 +1487,12 @@ static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte) | |||
1438 | /* | 1487 | /* |
1439 | * If the new pfn is within the range of the newly allocated | 1488 | * If the new pfn is within the range of the newly allocated |
1440 | * kernel pagetable, and it isn't being mapped into an | 1489 | * kernel pagetable, and it isn't being mapped into an |
1441 | * early_ioremap fixmap slot, make sure it is RO. | 1490 | * early_ioremap fixmap slot as a freshly allocated page, make sure |
1491 | * it is RO. | ||
1442 | */ | 1492 | */ |
1443 | if (!is_early_ioremap_ptep(ptep) && | 1493 | if (((!is_early_ioremap_ptep(ptep) && |
1444 | pfn >= e820_table_start && pfn < e820_table_end) | 1494 | pfn >= pgt_buf_start && pfn < pgt_buf_end)) || |
1495 | (is_early_ioremap_ptep(ptep) && pfn != (pgt_buf_end - 1))) | ||
1445 | pte = pte_wrprotect(pte); | 1496 | pte = pte_wrprotect(pte); |
1446 | 1497 | ||
1447 | return pte; | 1498 | return pte; |
@@ -1651,9 +1702,6 @@ static __init void xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn) | |||
1651 | for (pteidx = 0; pteidx < PTRS_PER_PTE; pteidx++, pfn++) { | 1702 | for (pteidx = 0; pteidx < PTRS_PER_PTE; pteidx++, pfn++) { |
1652 | pte_t pte; | 1703 | pte_t pte; |
1653 | 1704 | ||
1654 | if (pfn > max_pfn_mapped) | ||
1655 | max_pfn_mapped = pfn; | ||
1656 | |||
1657 | if (!pte_none(pte_page[pteidx])) | 1705 | if (!pte_none(pte_page[pteidx])) |
1658 | continue; | 1706 | continue; |
1659 | 1707 | ||
@@ -1695,7 +1743,7 @@ static void convert_pfn_mfn(void *v) | |||
1695 | } | 1743 | } |
1696 | 1744 | ||
1697 | /* | 1745 | /* |
1698 | * Set up the inital kernel pagetable. | 1746 | * Set up the initial kernel pagetable. |
1699 | * | 1747 | * |
1700 | * We can construct this by grafting the Xen provided pagetable into | 1748 | * We can construct this by grafting the Xen provided pagetable into |
1701 | * head_64.S's preconstructed pagetables. We copy the Xen L2's into | 1749 | * head_64.S's preconstructed pagetables. We copy the Xen L2's into |
@@ -1711,6 +1759,12 @@ __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, | |||
1711 | pud_t *l3; | 1759 | pud_t *l3; |
1712 | pmd_t *l2; | 1760 | pmd_t *l2; |
1713 | 1761 | ||
1762 | /* max_pfn_mapped is the last pfn mapped in the initial memory | ||
1763 | * mappings. Considering that on Xen after the kernel mappings we | ||
1764 | * have the mappings of some pages that don't exist in pfn space, we | ||
1765 | * set max_pfn_mapped to the last real pfn mapped. */ | ||
1766 | max_pfn_mapped = PFN_DOWN(__pa(xen_start_info->mfn_list)); | ||
1767 | |||
1714 | /* Zap identity mapping */ | 1768 | /* Zap identity mapping */ |
1715 | init_level4_pgt[0] = __pgd(0); | 1769 | init_level4_pgt[0] = __pgd(0); |
1716 | 1770 | ||
@@ -1815,9 +1869,7 @@ __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, | |||
1815 | initial_kernel_pmd = | 1869 | initial_kernel_pmd = |
1816 | extend_brk(sizeof(pmd_t) * PTRS_PER_PMD, PAGE_SIZE); | 1870 | extend_brk(sizeof(pmd_t) * PTRS_PER_PMD, PAGE_SIZE); |
1817 | 1871 | ||
1818 | max_pfn_mapped = PFN_DOWN(__pa(xen_start_info->pt_base) + | 1872 | max_pfn_mapped = PFN_DOWN(__pa(xen_start_info->mfn_list)); |
1819 | xen_start_info->nr_pt_frames * PAGE_SIZE + | ||
1820 | 512*1024); | ||
1821 | 1873 | ||
1822 | kernel_pmd = m2v(pgd[KERNEL_PGD_BOUNDARY].pgd); | 1874 | kernel_pmd = m2v(pgd[KERNEL_PGD_BOUNDARY].pgd); |
1823 | memcpy(initial_kernel_pmd, kernel_pmd, sizeof(pmd_t) * PTRS_PER_PMD); | 1875 | memcpy(initial_kernel_pmd, kernel_pmd, sizeof(pmd_t) * PTRS_PER_PMD); |
@@ -1940,6 +1992,9 @@ __init void xen_ident_map_ISA(void) | |||
1940 | 1992 | ||
1941 | static __init void xen_post_allocator_init(void) | 1993 | static __init void xen_post_allocator_init(void) |
1942 | { | 1994 | { |
1995 | #ifdef CONFIG_XEN_DEBUG | ||
1996 | pv_mmu_ops.make_pte = PV_CALLEE_SAVE(xen_make_pte_debug); | ||
1997 | #endif | ||
1943 | pv_mmu_ops.set_pte = xen_set_pte; | 1998 | pv_mmu_ops.set_pte = xen_set_pte; |
1944 | pv_mmu_ops.set_pmd = xen_set_pmd; | 1999 | pv_mmu_ops.set_pmd = xen_set_pmd; |
1945 | pv_mmu_ops.set_pud = xen_set_pud; | 2000 | pv_mmu_ops.set_pud = xen_set_pud; |
@@ -2072,7 +2127,7 @@ static void xen_zap_pfn_range(unsigned long vaddr, unsigned int order, | |||
2072 | in_frames[i] = virt_to_mfn(vaddr); | 2127 | in_frames[i] = virt_to_mfn(vaddr); |
2073 | 2128 | ||
2074 | MULTI_update_va_mapping(mcs.mc, vaddr, VOID_PTE, 0); | 2129 | MULTI_update_va_mapping(mcs.mc, vaddr, VOID_PTE, 0); |
2075 | set_phys_to_machine(virt_to_pfn(vaddr), INVALID_P2M_ENTRY); | 2130 | __set_phys_to_machine(virt_to_pfn(vaddr), INVALID_P2M_ENTRY); |
2076 | 2131 | ||
2077 | if (out_frames) | 2132 | if (out_frames) |
2078 | out_frames[i] = virt_to_pfn(vaddr); | 2133 | out_frames[i] = virt_to_pfn(vaddr); |
@@ -2351,6 +2406,18 @@ EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_range); | |||
2351 | 2406 | ||
2352 | #ifdef CONFIG_XEN_DEBUG_FS | 2407 | #ifdef CONFIG_XEN_DEBUG_FS |
2353 | 2408 | ||
2409 | static int p2m_dump_open(struct inode *inode, struct file *filp) | ||
2410 | { | ||
2411 | return single_open(filp, p2m_dump_show, NULL); | ||
2412 | } | ||
2413 | |||
2414 | static const struct file_operations p2m_dump_fops = { | ||
2415 | .open = p2m_dump_open, | ||
2416 | .read = seq_read, | ||
2417 | .llseek = seq_lseek, | ||
2418 | .release = single_release, | ||
2419 | }; | ||
2420 | |||
2354 | static struct dentry *d_mmu_debug; | 2421 | static struct dentry *d_mmu_debug; |
2355 | 2422 | ||
2356 | static int __init xen_mmu_debugfs(void) | 2423 | static int __init xen_mmu_debugfs(void) |
@@ -2406,6 +2473,7 @@ static int __init xen_mmu_debugfs(void) | |||
2406 | debugfs_create_u32("prot_commit_batched", 0444, d_mmu_debug, | 2473 | debugfs_create_u32("prot_commit_batched", 0444, d_mmu_debug, |
2407 | &mmu_stats.prot_commit_batched); | 2474 | &mmu_stats.prot_commit_batched); |
2408 | 2475 | ||
2476 | debugfs_create_file("p2m", 0600, d_mmu_debug, NULL, &p2m_dump_fops); | ||
2409 | return 0; | 2477 | return 0; |
2410 | } | 2478 | } |
2411 | fs_initcall(xen_mmu_debugfs); | 2479 | fs_initcall(xen_mmu_debugfs); |
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index fd12d7ce7ff9..215a3ce61068 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c | |||
@@ -23,6 +23,129 @@ | |||
23 | * P2M_PER_PAGE depends on the architecture, as a mfn is always | 23 | * P2M_PER_PAGE depends on the architecture, as a mfn is always |
24 | * unsigned long (8 bytes on 64-bit, 4 bytes on 32), leading to | 24 | * unsigned long (8 bytes on 64-bit, 4 bytes on 32), leading to |
25 | * 512 and 1024 entries respectively. | 25 | * 512 and 1024 entries respectively. |
26 | * | ||
27 | * In short, these structures contain the Machine Frame Number (MFN) of the PFN. | ||
28 | * | ||
29 | * However not all entries are filled with MFNs. Specifically for all other | ||
30 | * leaf entries, or for the top root, or middle one, for which there is a void | ||
31 | * entry, we assume it is "missing". So (for example) | ||
32 | * pfn_to_mfn(0x90909090)=INVALID_P2M_ENTRY. | ||
33 | * | ||
34 | * We also have the possibility of setting 1-1 mappings on certain regions, so | ||
35 | * that: | ||
36 | * pfn_to_mfn(0xc0000)=0xc0000 | ||
37 | * | ||
38 | * The benefit of this is, that we can assume for non-RAM regions (think | ||
39 | * PCI BARs, or ACPI spaces), we can create mappings easily b/c we | ||
40 | * get the PFN value to match the MFN. | ||
41 | * | ||
42 | * For this to work efficiently we have one new page p2m_identity and | ||
43 | * allocate (via reserved_brk) any other pages we need to cover the sides | ||
44 | * (1GB or 4MB boundary violations). All entries in p2m_identity are set to | ||
45 | * INVALID_P2M_ENTRY type (Xen toolstack only recognizes that and MFNs, | ||
46 | * no other fancy value). | ||
47 | * | ||
48 | * On lookup we spot that the entry points to p2m_identity and return the | ||
49 | * identity value instead of dereferencing and returning INVALID_P2M_ENTRY. | ||
50 | * If the entry points to an allocated page, we just proceed as before and | ||
51 | * return the PFN. If the PFN has IDENTITY_FRAME_BIT set we unmask that in | ||
52 | * appropriate functions (pfn_to_mfn). | ||
53 | * | ||
54 | * The reason for having the IDENTITY_FRAME_BIT instead of just returning the | ||
55 | * PFN is that we could find ourselves where pfn_to_mfn(pfn)==pfn for a | ||
56 | * non-identity pfn. To protect ourselves against we elect to set (and get) the | ||
57 | * IDENTITY_FRAME_BIT on all identity mapped PFNs. | ||
58 | * | ||
59 | * This simplistic diagram is used to explain the more subtle piece of code. | ||
60 | * There is also a digram of the P2M at the end that can help. | ||
61 | * Imagine your E820 looking as so: | ||
62 | * | ||
63 | * 1GB 2GB | ||
64 | * /-------------------+---------\/----\ /----------\ /---+-----\ | ||
65 | * | System RAM | Sys RAM ||ACPI| | reserved | | Sys RAM | | ||
66 | * \-------------------+---------/\----/ \----------/ \---+-----/ | ||
67 | * ^- 1029MB ^- 2001MB | ||
68 | * | ||
69 | * [1029MB = 263424 (0x40500), 2001MB = 512256 (0x7D100), | ||
70 | * 2048MB = 524288 (0x80000)] | ||
71 | * | ||
72 | * And dom0_mem=max:3GB,1GB is passed in to the guest, meaning memory past 1GB | ||
73 | * is actually not present (would have to kick the balloon driver to put it in). | ||
74 | * | ||
75 | * When we are told to set the PFNs for identity mapping (see patch: "xen/setup: | ||
76 | * Set identity mapping for non-RAM E820 and E820 gaps.") we pass in the start | ||
77 | * of the PFN and the end PFN (263424 and 512256 respectively). The first step | ||
78 | * is to reserve_brk a top leaf page if the p2m[1] is missing. The top leaf page | ||
79 | * covers 512^2 of page estate (1GB) and in case the start or end PFN is not | ||
80 | * aligned on 512^2*PAGE_SIZE (1GB) we loop on aligned 1GB PFNs from start pfn | ||
81 | * to end pfn. We reserve_brk top leaf pages if they are missing (means they | ||
82 | * point to p2m_mid_missing). | ||
83 | * | ||
84 | * With the E820 example above, 263424 is not 1GB aligned so we allocate a | ||
85 | * reserve_brk page which will cover the PFNs estate from 0x40000 to 0x80000. | ||
86 | * Each entry in the allocate page is "missing" (points to p2m_missing). | ||
87 | * | ||
88 | * Next stage is to determine if we need to do a more granular boundary check | ||
89 | * on the 4MB (or 2MB depending on architecture) off the start and end pfn's. | ||
90 | * We check if the start pfn and end pfn violate that boundary check, and if | ||
91 | * so reserve_brk a middle (p2m[x][y]) leaf page. This way we have a much finer | ||
92 | * granularity of setting which PFNs are missing and which ones are identity. | ||
93 | * In our example 263424 and 512256 both fail the check so we reserve_brk two | ||
94 | * pages. Populate them with INVALID_P2M_ENTRY (so they both have "missing" | ||
95 | * values) and assign them to p2m[1][2] and p2m[1][488] respectively. | ||
96 | * | ||
97 | * At this point we would at minimum reserve_brk one page, but could be up to | ||
98 | * three. Each call to set_phys_range_identity has at maximum a three page | ||
99 | * cost. If we were to query the P2M at this stage, all those entries from | ||
100 | * start PFN through end PFN (so 1029MB -> 2001MB) would return | ||
101 | * INVALID_P2M_ENTRY ("missing"). | ||
102 | * | ||
103 | * The next step is to walk from the start pfn to the end pfn setting | ||
104 | * the IDENTITY_FRAME_BIT on each PFN. This is done in set_phys_range_identity. | ||
105 | * If we find that the middle leaf is pointing to p2m_missing we can swap it | ||
106 | * over to p2m_identity - this way covering 4MB (or 2MB) PFN space. At this | ||
107 | * point we do not need to worry about boundary aligment (so no need to | ||
108 | * reserve_brk a middle page, figure out which PFNs are "missing" and which | ||
109 | * ones are identity), as that has been done earlier. If we find that the | ||
110 | * middle leaf is not occupied by p2m_identity or p2m_missing, we dereference | ||
111 | * that page (which covers 512 PFNs) and set the appropriate PFN with | ||
112 | * IDENTITY_FRAME_BIT. In our example 263424 and 512256 end up there, and we | ||
113 | * set from p2m[1][2][256->511] and p2m[1][488][0->256] with | ||
114 | * IDENTITY_FRAME_BIT set. | ||
115 | * | ||
116 | * All other regions that are void (or not filled) either point to p2m_missing | ||
117 | * (considered missing) or have the default value of INVALID_P2M_ENTRY (also | ||
118 | * considered missing). In our case, p2m[1][2][0->255] and p2m[1][488][257->511] | ||
119 | * contain the INVALID_P2M_ENTRY value and are considered "missing." | ||
120 | * | ||
121 | * This is what the p2m ends up looking (for the E820 above) with this | ||
122 | * fabulous drawing: | ||
123 | * | ||
124 | * p2m /--------------\ | ||
125 | * /-----\ | &mfn_list[0],| /-----------------\ | ||
126 | * | 0 |------>| &mfn_list[1],| /---------------\ | ~0, ~0, .. | | ||
127 | * |-----| | ..., ~0, ~0 | | ~0, ~0, [x]---+----->| IDENTITY [@256] | | ||
128 | * | 1 |---\ \--------------/ | [p2m_identity]+\ | IDENTITY [@257] | | ||
129 | * |-----| \ | [p2m_identity]+\\ | .... | | ||
130 | * | 2 |--\ \-------------------->| ... | \\ \----------------/ | ||
131 | * |-----| \ \---------------/ \\ | ||
132 | * | 3 |\ \ \\ p2m_identity | ||
133 | * |-----| \ \-------------------->/---------------\ /-----------------\ | ||
134 | * | .. +->+ | [p2m_identity]+-->| ~0, ~0, ~0, ... | | ||
135 | * \-----/ / | [p2m_identity]+-->| ..., ~0 | | ||
136 | * / /---------------\ | .... | \-----------------/ | ||
137 | * / | IDENTITY[@0] | /-+-[x], ~0, ~0.. | | ||
138 | * / | IDENTITY[@256]|<----/ \---------------/ | ||
139 | * / | ~0, ~0, .... | | ||
140 | * | \---------------/ | ||
141 | * | | ||
142 | * p2m_missing p2m_missing | ||
143 | * /------------------\ /------------\ | ||
144 | * | [p2m_mid_missing]+---->| ~0, ~0, ~0 | | ||
145 | * | [p2m_mid_missing]+---->| ..., ~0 | | ||
146 | * \------------------/ \------------/ | ||
147 | * | ||
148 | * where ~0 is INVALID_P2M_ENTRY. IDENTITY is (PFN | IDENTITY_BIT) | ||
26 | */ | 149 | */ |
27 | 150 | ||
28 | #include <linux/init.h> | 151 | #include <linux/init.h> |
@@ -30,6 +153,7 @@ | |||
30 | #include <linux/list.h> | 153 | #include <linux/list.h> |
31 | #include <linux/hash.h> | 154 | #include <linux/hash.h> |
32 | #include <linux/sched.h> | 155 | #include <linux/sched.h> |
156 | #include <linux/seq_file.h> | ||
33 | 157 | ||
34 | #include <asm/cache.h> | 158 | #include <asm/cache.h> |
35 | #include <asm/setup.h> | 159 | #include <asm/setup.h> |
@@ -59,9 +183,15 @@ static RESERVE_BRK_ARRAY(unsigned long **, p2m_top, P2M_TOP_PER_PAGE); | |||
59 | static RESERVE_BRK_ARRAY(unsigned long, p2m_top_mfn, P2M_TOP_PER_PAGE); | 183 | static RESERVE_BRK_ARRAY(unsigned long, p2m_top_mfn, P2M_TOP_PER_PAGE); |
60 | static RESERVE_BRK_ARRAY(unsigned long *, p2m_top_mfn_p, P2M_TOP_PER_PAGE); | 184 | static RESERVE_BRK_ARRAY(unsigned long *, p2m_top_mfn_p, P2M_TOP_PER_PAGE); |
61 | 185 | ||
186 | static RESERVE_BRK_ARRAY(unsigned long, p2m_identity, P2M_PER_PAGE); | ||
187 | |||
62 | RESERVE_BRK(p2m_mid, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE))); | 188 | RESERVE_BRK(p2m_mid, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE))); |
63 | RESERVE_BRK(p2m_mid_mfn, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE))); | 189 | RESERVE_BRK(p2m_mid_mfn, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE))); |
64 | 190 | ||
191 | /* We might hit two boundary violations at the start and end, at max each | ||
192 | * boundary violation will require three middle nodes. */ | ||
193 | RESERVE_BRK(p2m_mid_identity, PAGE_SIZE * 2 * 3); | ||
194 | |||
65 | static inline unsigned p2m_top_index(unsigned long pfn) | 195 | static inline unsigned p2m_top_index(unsigned long pfn) |
66 | { | 196 | { |
67 | BUG_ON(pfn >= MAX_P2M_PFN); | 197 | BUG_ON(pfn >= MAX_P2M_PFN); |
@@ -136,7 +266,7 @@ static void p2m_init(unsigned long *p2m) | |||
136 | * - After resume we're called from within stop_machine, but the mfn | 266 | * - After resume we're called from within stop_machine, but the mfn |
137 | * tree should alreay be completely allocated. | 267 | * tree should alreay be completely allocated. |
138 | */ | 268 | */ |
139 | void xen_build_mfn_list_list(void) | 269 | void __ref xen_build_mfn_list_list(void) |
140 | { | 270 | { |
141 | unsigned long pfn; | 271 | unsigned long pfn; |
142 | 272 | ||
@@ -221,6 +351,9 @@ void __init xen_build_dynamic_phys_to_machine(void) | |||
221 | p2m_top = extend_brk(PAGE_SIZE, PAGE_SIZE); | 351 | p2m_top = extend_brk(PAGE_SIZE, PAGE_SIZE); |
222 | p2m_top_init(p2m_top); | 352 | p2m_top_init(p2m_top); |
223 | 353 | ||
354 | p2m_identity = extend_brk(PAGE_SIZE, PAGE_SIZE); | ||
355 | p2m_init(p2m_identity); | ||
356 | |||
224 | /* | 357 | /* |
225 | * The domain builder gives us a pre-constructed p2m array in | 358 | * The domain builder gives us a pre-constructed p2m array in |
226 | * mfn_list for all the pages initially given to us, so we just | 359 | * mfn_list for all the pages initially given to us, so we just |
@@ -266,6 +399,14 @@ unsigned long get_phys_to_machine(unsigned long pfn) | |||
266 | mididx = p2m_mid_index(pfn); | 399 | mididx = p2m_mid_index(pfn); |
267 | idx = p2m_index(pfn); | 400 | idx = p2m_index(pfn); |
268 | 401 | ||
402 | /* | ||
403 | * The INVALID_P2M_ENTRY is filled in both p2m_*identity | ||
404 | * and in p2m_*missing, so returning the INVALID_P2M_ENTRY | ||
405 | * would be wrong. | ||
406 | */ | ||
407 | if (p2m_top[topidx][mididx] == p2m_identity) | ||
408 | return IDENTITY_FRAME(pfn); | ||
409 | |||
269 | return p2m_top[topidx][mididx][idx]; | 410 | return p2m_top[topidx][mididx][idx]; |
270 | } | 411 | } |
271 | EXPORT_SYMBOL_GPL(get_phys_to_machine); | 412 | EXPORT_SYMBOL_GPL(get_phys_to_machine); |
@@ -335,9 +476,11 @@ static bool alloc_p2m(unsigned long pfn) | |||
335 | p2m_top_mfn_p[topidx] = mid_mfn; | 476 | p2m_top_mfn_p[topidx] = mid_mfn; |
336 | } | 477 | } |
337 | 478 | ||
338 | if (p2m_top[topidx][mididx] == p2m_missing) { | 479 | if (p2m_top[topidx][mididx] == p2m_identity || |
480 | p2m_top[topidx][mididx] == p2m_missing) { | ||
339 | /* p2m leaf page is missing */ | 481 | /* p2m leaf page is missing */ |
340 | unsigned long *p2m; | 482 | unsigned long *p2m; |
483 | unsigned long *p2m_orig = p2m_top[topidx][mididx]; | ||
341 | 484 | ||
342 | p2m = alloc_p2m_page(); | 485 | p2m = alloc_p2m_page(); |
343 | if (!p2m) | 486 | if (!p2m) |
@@ -345,7 +488,7 @@ static bool alloc_p2m(unsigned long pfn) | |||
345 | 488 | ||
346 | p2m_init(p2m); | 489 | p2m_init(p2m); |
347 | 490 | ||
348 | if (cmpxchg(&mid[mididx], p2m_missing, p2m) != p2m_missing) | 491 | if (cmpxchg(&mid[mididx], p2m_orig, p2m) != p2m_orig) |
349 | free_p2m_page(p2m); | 492 | free_p2m_page(p2m); |
350 | else | 493 | else |
351 | mid_mfn[mididx] = virt_to_mfn(p2m); | 494 | mid_mfn[mididx] = virt_to_mfn(p2m); |
@@ -354,11 +497,91 @@ static bool alloc_p2m(unsigned long pfn) | |||
354 | return true; | 497 | return true; |
355 | } | 498 | } |
356 | 499 | ||
500 | bool __early_alloc_p2m(unsigned long pfn) | ||
501 | { | ||
502 | unsigned topidx, mididx, idx; | ||
503 | |||
504 | topidx = p2m_top_index(pfn); | ||
505 | mididx = p2m_mid_index(pfn); | ||
506 | idx = p2m_index(pfn); | ||
507 | |||
508 | /* Pfff.. No boundary cross-over, lets get out. */ | ||
509 | if (!idx) | ||
510 | return false; | ||
511 | |||
512 | WARN(p2m_top[topidx][mididx] == p2m_identity, | ||
513 | "P2M[%d][%d] == IDENTITY, should be MISSING (or alloced)!\n", | ||
514 | topidx, mididx); | ||
515 | |||
516 | /* | ||
517 | * Could be done by xen_build_dynamic_phys_to_machine.. | ||
518 | */ | ||
519 | if (p2m_top[topidx][mididx] != p2m_missing) | ||
520 | return false; | ||
521 | |||
522 | /* Boundary cross-over for the edges: */ | ||
523 | if (idx) { | ||
524 | unsigned long *p2m = extend_brk(PAGE_SIZE, PAGE_SIZE); | ||
525 | |||
526 | p2m_init(p2m); | ||
527 | |||
528 | p2m_top[topidx][mididx] = p2m; | ||
529 | |||
530 | } | ||
531 | return idx != 0; | ||
532 | } | ||
533 | unsigned long set_phys_range_identity(unsigned long pfn_s, | ||
534 | unsigned long pfn_e) | ||
535 | { | ||
536 | unsigned long pfn; | ||
537 | |||
538 | if (unlikely(pfn_s >= MAX_P2M_PFN || pfn_e >= MAX_P2M_PFN)) | ||
539 | return 0; | ||
540 | |||
541 | if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) | ||
542 | return pfn_e - pfn_s; | ||
543 | |||
544 | if (pfn_s > pfn_e) | ||
545 | return 0; | ||
546 | |||
547 | for (pfn = (pfn_s & ~(P2M_MID_PER_PAGE * P2M_PER_PAGE - 1)); | ||
548 | pfn < ALIGN(pfn_e, (P2M_MID_PER_PAGE * P2M_PER_PAGE)); | ||
549 | pfn += P2M_MID_PER_PAGE * P2M_PER_PAGE) | ||
550 | { | ||
551 | unsigned topidx = p2m_top_index(pfn); | ||
552 | if (p2m_top[topidx] == p2m_mid_missing) { | ||
553 | unsigned long **mid = extend_brk(PAGE_SIZE, PAGE_SIZE); | ||
554 | |||
555 | p2m_mid_init(mid); | ||
556 | |||
557 | p2m_top[topidx] = mid; | ||
558 | } | ||
559 | } | ||
560 | |||
561 | __early_alloc_p2m(pfn_s); | ||
562 | __early_alloc_p2m(pfn_e); | ||
563 | |||
564 | for (pfn = pfn_s; pfn < pfn_e; pfn++) | ||
565 | if (!__set_phys_to_machine(pfn, IDENTITY_FRAME(pfn))) | ||
566 | break; | ||
567 | |||
568 | if (!WARN((pfn - pfn_s) != (pfn_e - pfn_s), | ||
569 | "Identity mapping failed. We are %ld short of 1-1 mappings!\n", | ||
570 | (pfn_e - pfn_s) - (pfn - pfn_s))) | ||
571 | printk(KERN_DEBUG "1-1 mapping on %lx->%lx\n", pfn_s, pfn); | ||
572 | |||
573 | return pfn - pfn_s; | ||
574 | } | ||
575 | |||
357 | /* Try to install p2m mapping; fail if intermediate bits missing */ | 576 | /* Try to install p2m mapping; fail if intermediate bits missing */ |
358 | bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn) | 577 | bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn) |
359 | { | 578 | { |
360 | unsigned topidx, mididx, idx; | 579 | unsigned topidx, mididx, idx; |
361 | 580 | ||
581 | if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) { | ||
582 | BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY); | ||
583 | return true; | ||
584 | } | ||
362 | if (unlikely(pfn >= MAX_P2M_PFN)) { | 585 | if (unlikely(pfn >= MAX_P2M_PFN)) { |
363 | BUG_ON(mfn != INVALID_P2M_ENTRY); | 586 | BUG_ON(mfn != INVALID_P2M_ENTRY); |
364 | return true; | 587 | return true; |
@@ -368,6 +591,21 @@ bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn) | |||
368 | mididx = p2m_mid_index(pfn); | 591 | mididx = p2m_mid_index(pfn); |
369 | idx = p2m_index(pfn); | 592 | idx = p2m_index(pfn); |
370 | 593 | ||
594 | /* For sparse holes were the p2m leaf has real PFN along with | ||
595 | * PCI holes, stick in the PFN as the MFN value. | ||
596 | */ | ||
597 | if (mfn != INVALID_P2M_ENTRY && (mfn & IDENTITY_FRAME_BIT)) { | ||
598 | if (p2m_top[topidx][mididx] == p2m_identity) | ||
599 | return true; | ||
600 | |||
601 | /* Swap over from MISSING to IDENTITY if needed. */ | ||
602 | if (p2m_top[topidx][mididx] == p2m_missing) { | ||
603 | WARN_ON(cmpxchg(&p2m_top[topidx][mididx], p2m_missing, | ||
604 | p2m_identity) != p2m_missing); | ||
605 | return true; | ||
606 | } | ||
607 | } | ||
608 | |||
371 | if (p2m_top[topidx][mididx] == p2m_missing) | 609 | if (p2m_top[topidx][mididx] == p2m_missing) |
372 | return mfn == INVALID_P2M_ENTRY; | 610 | return mfn == INVALID_P2M_ENTRY; |
373 | 611 | ||
@@ -378,11 +616,6 @@ bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn) | |||
378 | 616 | ||
379 | bool set_phys_to_machine(unsigned long pfn, unsigned long mfn) | 617 | bool set_phys_to_machine(unsigned long pfn, unsigned long mfn) |
380 | { | 618 | { |
381 | if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) { | ||
382 | BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY); | ||
383 | return true; | ||
384 | } | ||
385 | |||
386 | if (unlikely(!__set_phys_to_machine(pfn, mfn))) { | 619 | if (unlikely(!__set_phys_to_machine(pfn, mfn))) { |
387 | if (!alloc_p2m(pfn)) | 620 | if (!alloc_p2m(pfn)) |
388 | return false; | 621 | return false; |
@@ -421,7 +654,7 @@ int m2p_add_override(unsigned long mfn, struct page *page) | |||
421 | { | 654 | { |
422 | unsigned long flags; | 655 | unsigned long flags; |
423 | unsigned long pfn; | 656 | unsigned long pfn; |
424 | unsigned long address; | 657 | unsigned long uninitialized_var(address); |
425 | unsigned level; | 658 | unsigned level; |
426 | pte_t *ptep = NULL; | 659 | pte_t *ptep = NULL; |
427 | 660 | ||
@@ -455,7 +688,7 @@ int m2p_remove_override(struct page *page) | |||
455 | unsigned long flags; | 688 | unsigned long flags; |
456 | unsigned long mfn; | 689 | unsigned long mfn; |
457 | unsigned long pfn; | 690 | unsigned long pfn; |
458 | unsigned long address; | 691 | unsigned long uninitialized_var(address); |
459 | unsigned level; | 692 | unsigned level; |
460 | pte_t *ptep = NULL; | 693 | pte_t *ptep = NULL; |
461 | 694 | ||
@@ -520,3 +753,80 @@ unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn) | |||
520 | return ret; | 753 | return ret; |
521 | } | 754 | } |
522 | EXPORT_SYMBOL_GPL(m2p_find_override_pfn); | 755 | EXPORT_SYMBOL_GPL(m2p_find_override_pfn); |
756 | |||
757 | #ifdef CONFIG_XEN_DEBUG_FS | ||
758 | |||
759 | int p2m_dump_show(struct seq_file *m, void *v) | ||
760 | { | ||
761 | static const char * const level_name[] = { "top", "middle", | ||
762 | "entry", "abnormal" }; | ||
763 | static const char * const type_name[] = { "identity", "missing", | ||
764 | "pfn", "abnormal"}; | ||
765 | #define TYPE_IDENTITY 0 | ||
766 | #define TYPE_MISSING 1 | ||
767 | #define TYPE_PFN 2 | ||
768 | #define TYPE_UNKNOWN 3 | ||
769 | unsigned long pfn, prev_pfn_type = 0, prev_pfn_level = 0; | ||
770 | unsigned int uninitialized_var(prev_level); | ||
771 | unsigned int uninitialized_var(prev_type); | ||
772 | |||
773 | if (!p2m_top) | ||
774 | return 0; | ||
775 | |||
776 | for (pfn = 0; pfn < MAX_DOMAIN_PAGES; pfn++) { | ||
777 | unsigned topidx = p2m_top_index(pfn); | ||
778 | unsigned mididx = p2m_mid_index(pfn); | ||
779 | unsigned idx = p2m_index(pfn); | ||
780 | unsigned lvl, type; | ||
781 | |||
782 | lvl = 4; | ||
783 | type = TYPE_UNKNOWN; | ||
784 | if (p2m_top[topidx] == p2m_mid_missing) { | ||
785 | lvl = 0; type = TYPE_MISSING; | ||
786 | } else if (p2m_top[topidx] == NULL) { | ||
787 | lvl = 0; type = TYPE_UNKNOWN; | ||
788 | } else if (p2m_top[topidx][mididx] == NULL) { | ||
789 | lvl = 1; type = TYPE_UNKNOWN; | ||
790 | } else if (p2m_top[topidx][mididx] == p2m_identity) { | ||
791 | lvl = 1; type = TYPE_IDENTITY; | ||
792 | } else if (p2m_top[topidx][mididx] == p2m_missing) { | ||
793 | lvl = 1; type = TYPE_MISSING; | ||
794 | } else if (p2m_top[topidx][mididx][idx] == 0) { | ||
795 | lvl = 2; type = TYPE_UNKNOWN; | ||
796 | } else if (p2m_top[topidx][mididx][idx] == IDENTITY_FRAME(pfn)) { | ||
797 | lvl = 2; type = TYPE_IDENTITY; | ||
798 | } else if (p2m_top[topidx][mididx][idx] == INVALID_P2M_ENTRY) { | ||
799 | lvl = 2; type = TYPE_MISSING; | ||
800 | } else if (p2m_top[topidx][mididx][idx] == pfn) { | ||
801 | lvl = 2; type = TYPE_PFN; | ||
802 | } else if (p2m_top[topidx][mididx][idx] != pfn) { | ||
803 | lvl = 2; type = TYPE_PFN; | ||
804 | } | ||
805 | if (pfn == 0) { | ||
806 | prev_level = lvl; | ||
807 | prev_type = type; | ||
808 | } | ||
809 | if (pfn == MAX_DOMAIN_PAGES-1) { | ||
810 | lvl = 3; | ||
811 | type = TYPE_UNKNOWN; | ||
812 | } | ||
813 | if (prev_type != type) { | ||
814 | seq_printf(m, " [0x%lx->0x%lx] %s\n", | ||
815 | prev_pfn_type, pfn, type_name[prev_type]); | ||
816 | prev_pfn_type = pfn; | ||
817 | prev_type = type; | ||
818 | } | ||
819 | if (prev_level != lvl) { | ||
820 | seq_printf(m, " [0x%lx->0x%lx] level %s\n", | ||
821 | prev_pfn_level, pfn, level_name[prev_level]); | ||
822 | prev_pfn_level = pfn; | ||
823 | prev_level = lvl; | ||
824 | } | ||
825 | } | ||
826 | return 0; | ||
827 | #undef TYPE_IDENTITY | ||
828 | #undef TYPE_MISSING | ||
829 | #undef TYPE_PFN | ||
830 | #undef TYPE_UNKNOWN | ||
831 | } | ||
832 | #endif | ||
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index a8a66a50d446..fa0269a99377 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c | |||
@@ -52,6 +52,8 @@ phys_addr_t xen_extra_mem_start, xen_extra_mem_size; | |||
52 | 52 | ||
53 | static __init void xen_add_extra_mem(unsigned long pages) | 53 | static __init void xen_add_extra_mem(unsigned long pages) |
54 | { | 54 | { |
55 | unsigned long pfn; | ||
56 | |||
55 | u64 size = (u64)pages * PAGE_SIZE; | 57 | u64 size = (u64)pages * PAGE_SIZE; |
56 | u64 extra_start = xen_extra_mem_start + xen_extra_mem_size; | 58 | u64 extra_start = xen_extra_mem_start + xen_extra_mem_size; |
57 | 59 | ||
@@ -66,6 +68,9 @@ static __init void xen_add_extra_mem(unsigned long pages) | |||
66 | xen_extra_mem_size += size; | 68 | xen_extra_mem_size += size; |
67 | 69 | ||
68 | xen_max_p2m_pfn = PFN_DOWN(extra_start + size); | 70 | xen_max_p2m_pfn = PFN_DOWN(extra_start + size); |
71 | |||
72 | for (pfn = PFN_DOWN(extra_start); pfn <= xen_max_p2m_pfn; pfn++) | ||
73 | __set_phys_to_machine(pfn, INVALID_P2M_ENTRY); | ||
69 | } | 74 | } |
70 | 75 | ||
71 | static unsigned long __init xen_release_chunk(phys_addr_t start_addr, | 76 | static unsigned long __init xen_release_chunk(phys_addr_t start_addr, |
@@ -104,7 +109,7 @@ static unsigned long __init xen_release_chunk(phys_addr_t start_addr, | |||
104 | WARN(ret != 1, "Failed to release memory %lx-%lx err=%d\n", | 109 | WARN(ret != 1, "Failed to release memory %lx-%lx err=%d\n", |
105 | start, end, ret); | 110 | start, end, ret); |
106 | if (ret == 1) { | 111 | if (ret == 1) { |
107 | set_phys_to_machine(pfn, INVALID_P2M_ENTRY); | 112 | __set_phys_to_machine(pfn, INVALID_P2M_ENTRY); |
108 | len++; | 113 | len++; |
109 | } | 114 | } |
110 | } | 115 | } |
@@ -138,12 +143,55 @@ static unsigned long __init xen_return_unused_memory(unsigned long max_pfn, | |||
138 | return released; | 143 | return released; |
139 | } | 144 | } |
140 | 145 | ||
146 | static unsigned long __init xen_set_identity(const struct e820entry *list, | ||
147 | ssize_t map_size) | ||
148 | { | ||
149 | phys_addr_t last = xen_initial_domain() ? 0 : ISA_END_ADDRESS; | ||
150 | phys_addr_t start_pci = last; | ||
151 | const struct e820entry *entry; | ||
152 | unsigned long identity = 0; | ||
153 | int i; | ||
154 | |||
155 | for (i = 0, entry = list; i < map_size; i++, entry++) { | ||
156 | phys_addr_t start = entry->addr; | ||
157 | phys_addr_t end = start + entry->size; | ||
158 | |||
159 | if (start < last) | ||
160 | start = last; | ||
161 | |||
162 | if (end <= start) | ||
163 | continue; | ||
164 | |||
165 | /* Skip over the 1MB region. */ | ||
166 | if (last > end) | ||
167 | continue; | ||
168 | |||
169 | if (entry->type == E820_RAM) { | ||
170 | if (start > start_pci) | ||
171 | identity += set_phys_range_identity( | ||
172 | PFN_UP(start_pci), PFN_DOWN(start)); | ||
173 | |||
174 | /* Without saving 'last' we would gooble RAM too | ||
175 | * at the end of the loop. */ | ||
176 | last = end; | ||
177 | start_pci = end; | ||
178 | continue; | ||
179 | } | ||
180 | start_pci = min(start, start_pci); | ||
181 | last = end; | ||
182 | } | ||
183 | if (last > start_pci) | ||
184 | identity += set_phys_range_identity( | ||
185 | PFN_UP(start_pci), PFN_DOWN(last)); | ||
186 | return identity; | ||
187 | } | ||
141 | /** | 188 | /** |
142 | * machine_specific_memory_setup - Hook for machine specific memory setup. | 189 | * machine_specific_memory_setup - Hook for machine specific memory setup. |
143 | **/ | 190 | **/ |
144 | char * __init xen_memory_setup(void) | 191 | char * __init xen_memory_setup(void) |
145 | { | 192 | { |
146 | static struct e820entry map[E820MAX] __initdata; | 193 | static struct e820entry map[E820MAX] __initdata; |
194 | static struct e820entry map_raw[E820MAX] __initdata; | ||
147 | 195 | ||
148 | unsigned long max_pfn = xen_start_info->nr_pages; | 196 | unsigned long max_pfn = xen_start_info->nr_pages; |
149 | unsigned long long mem_end; | 197 | unsigned long long mem_end; |
@@ -151,6 +199,7 @@ char * __init xen_memory_setup(void) | |||
151 | struct xen_memory_map memmap; | 199 | struct xen_memory_map memmap; |
152 | unsigned long extra_pages = 0; | 200 | unsigned long extra_pages = 0; |
153 | unsigned long extra_limit; | 201 | unsigned long extra_limit; |
202 | unsigned long identity_pages = 0; | ||
154 | int i; | 203 | int i; |
155 | int op; | 204 | int op; |
156 | 205 | ||
@@ -176,6 +225,7 @@ char * __init xen_memory_setup(void) | |||
176 | } | 225 | } |
177 | BUG_ON(rc); | 226 | BUG_ON(rc); |
178 | 227 | ||
228 | memcpy(map_raw, map, sizeof(map)); | ||
179 | e820.nr_map = 0; | 229 | e820.nr_map = 0; |
180 | xen_extra_mem_start = mem_end; | 230 | xen_extra_mem_start = mem_end; |
181 | for (i = 0; i < memmap.nr_entries; i++) { | 231 | for (i = 0; i < memmap.nr_entries; i++) { |
@@ -194,6 +244,15 @@ char * __init xen_memory_setup(void) | |||
194 | end -= delta; | 244 | end -= delta; |
195 | 245 | ||
196 | extra_pages += PFN_DOWN(delta); | 246 | extra_pages += PFN_DOWN(delta); |
247 | /* | ||
248 | * Set RAM below 4GB that is not for us to be unusable. | ||
249 | * This prevents "System RAM" address space from being | ||
250 | * used as potential resource for I/O address (happens | ||
251 | * when 'allocate_resource' is called). | ||
252 | */ | ||
253 | if (delta && | ||
254 | (xen_initial_domain() && end < 0x100000000ULL)) | ||
255 | e820_add_region(end, delta, E820_UNUSABLE); | ||
197 | } | 256 | } |
198 | 257 | ||
199 | if (map[i].size > 0 && end > xen_extra_mem_start) | 258 | if (map[i].size > 0 && end > xen_extra_mem_start) |
@@ -251,6 +310,13 @@ char * __init xen_memory_setup(void) | |||
251 | 310 | ||
252 | xen_add_extra_mem(extra_pages); | 311 | xen_add_extra_mem(extra_pages); |
253 | 312 | ||
313 | /* | ||
314 | * Set P2M for all non-RAM pages and E820 gaps to be identity | ||
315 | * type PFNs. We supply it with the non-sanitized version | ||
316 | * of the E820. | ||
317 | */ | ||
318 | identity_pages = xen_set_identity(map_raw, memmap.nr_entries); | ||
319 | printk(KERN_INFO "Set %ld page(s) to 1-1 mapping.\n", identity_pages); | ||
254 | return "Xen"; | 320 | return "Xen"; |
255 | } | 321 | } |
256 | 322 | ||
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 72a4c7959045..30612441ed99 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c | |||
@@ -509,3 +509,41 @@ void __init xen_smp_init(void) | |||
509 | xen_fill_possible_map(); | 509 | xen_fill_possible_map(); |
510 | xen_init_spinlocks(); | 510 | xen_init_spinlocks(); |
511 | } | 511 | } |
512 | |||
513 | static void __init xen_hvm_smp_prepare_cpus(unsigned int max_cpus) | ||
514 | { | ||
515 | native_smp_prepare_cpus(max_cpus); | ||
516 | WARN_ON(xen_smp_intr_init(0)); | ||
517 | |||
518 | if (!xen_have_vector_callback) | ||
519 | return; | ||
520 | xen_init_lock_cpu(0); | ||
521 | xen_init_spinlocks(); | ||
522 | } | ||
523 | |||
524 | static int __cpuinit xen_hvm_cpu_up(unsigned int cpu) | ||
525 | { | ||
526 | int rc; | ||
527 | rc = native_cpu_up(cpu); | ||
528 | WARN_ON (xen_smp_intr_init(cpu)); | ||
529 | return rc; | ||
530 | } | ||
531 | |||
532 | static void xen_hvm_cpu_die(unsigned int cpu) | ||
533 | { | ||
534 | unbind_from_irqhandler(per_cpu(xen_resched_irq, cpu), NULL); | ||
535 | unbind_from_irqhandler(per_cpu(xen_callfunc_irq, cpu), NULL); | ||
536 | unbind_from_irqhandler(per_cpu(xen_debug_irq, cpu), NULL); | ||
537 | unbind_from_irqhandler(per_cpu(xen_callfuncsingle_irq, cpu), NULL); | ||
538 | native_cpu_die(cpu); | ||
539 | } | ||
540 | |||
541 | void __init xen_hvm_smp_init(void) | ||
542 | { | ||
543 | smp_ops.smp_prepare_cpus = xen_hvm_smp_prepare_cpus; | ||
544 | smp_ops.smp_send_reschedule = xen_smp_send_reschedule; | ||
545 | smp_ops.cpu_up = xen_hvm_cpu_up; | ||
546 | smp_ops.cpu_die = xen_hvm_cpu_die; | ||
547 | smp_ops.send_call_func_ipi = xen_smp_send_call_function_ipi; | ||
548 | smp_ops.send_call_func_single_ipi = xen_smp_send_call_function_single_ipi; | ||
549 | } | ||
diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c index 9bbd63a129b5..45329c8c226e 100644 --- a/arch/x86/xen/suspend.c +++ b/arch/x86/xen/suspend.c | |||
@@ -12,7 +12,7 @@ | |||
12 | #include "xen-ops.h" | 12 | #include "xen-ops.h" |
13 | #include "mmu.h" | 13 | #include "mmu.h" |
14 | 14 | ||
15 | void xen_pre_suspend(void) | 15 | void xen_arch_pre_suspend(void) |
16 | { | 16 | { |
17 | xen_start_info->store_mfn = mfn_to_pfn(xen_start_info->store_mfn); | 17 | xen_start_info->store_mfn = mfn_to_pfn(xen_start_info->store_mfn); |
18 | xen_start_info->console.domU.mfn = | 18 | xen_start_info->console.domU.mfn = |
@@ -26,8 +26,9 @@ void xen_pre_suspend(void) | |||
26 | BUG(); | 26 | BUG(); |
27 | } | 27 | } |
28 | 28 | ||
29 | void xen_hvm_post_suspend(int suspend_cancelled) | 29 | void xen_arch_hvm_post_suspend(int suspend_cancelled) |
30 | { | 30 | { |
31 | #ifdef CONFIG_XEN_PVHVM | ||
31 | int cpu; | 32 | int cpu; |
32 | xen_hvm_init_shared_info(); | 33 | xen_hvm_init_shared_info(); |
33 | xen_callback_vector(); | 34 | xen_callback_vector(); |
@@ -37,9 +38,10 @@ void xen_hvm_post_suspend(int suspend_cancelled) | |||
37 | xen_setup_runstate_info(cpu); | 38 | xen_setup_runstate_info(cpu); |
38 | } | 39 | } |
39 | } | 40 | } |
41 | #endif | ||
40 | } | 42 | } |
41 | 43 | ||
42 | void xen_post_suspend(int suspend_cancelled) | 44 | void xen_arch_post_suspend(int suspend_cancelled) |
43 | { | 45 | { |
44 | xen_build_mfn_list_list(); | 46 | xen_build_mfn_list_list(); |
45 | 47 | ||
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index 067759e3d6a5..2e2d370a47b1 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c | |||
@@ -397,7 +397,9 @@ void xen_setup_timer(int cpu) | |||
397 | name = "<timer kasprintf failed>"; | 397 | name = "<timer kasprintf failed>"; |
398 | 398 | ||
399 | irq = bind_virq_to_irqhandler(VIRQ_TIMER, cpu, xen_timer_interrupt, | 399 | irq = bind_virq_to_irqhandler(VIRQ_TIMER, cpu, xen_timer_interrupt, |
400 | IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING|IRQF_TIMER, | 400 | IRQF_DISABLED|IRQF_PERCPU| |
401 | IRQF_NOBALANCING|IRQF_TIMER| | ||
402 | IRQF_FORCE_RESUME, | ||
401 | name, NULL); | 403 | name, NULL); |
402 | 404 | ||
403 | evt = &per_cpu(xen_clock_events, cpu); | 405 | evt = &per_cpu(xen_clock_events, cpu); |
diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S index 1a5ff24e29c0..aaa7291c9259 100644 --- a/arch/x86/xen/xen-head.S +++ b/arch/x86/xen/xen-head.S | |||
@@ -28,9 +28,9 @@ ENTRY(startup_xen) | |||
28 | __FINIT | 28 | __FINIT |
29 | 29 | ||
30 | .pushsection .text | 30 | .pushsection .text |
31 | .align PAGE_SIZE_asm | 31 | .align PAGE_SIZE |
32 | ENTRY(hypercall_page) | 32 | ENTRY(hypercall_page) |
33 | .skip PAGE_SIZE_asm | 33 | .skip PAGE_SIZE |
34 | .popsection | 34 | .popsection |
35 | 35 | ||
36 | ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS, .asciz "linux") | 36 | ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS, .asciz "linux") |
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 9d41bf985757..3112f55638c4 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h | |||
@@ -64,10 +64,12 @@ void xen_setup_vcpu_info_placement(void); | |||
64 | 64 | ||
65 | #ifdef CONFIG_SMP | 65 | #ifdef CONFIG_SMP |
66 | void xen_smp_init(void); | 66 | void xen_smp_init(void); |
67 | void __init xen_hvm_smp_init(void); | ||
67 | 68 | ||
68 | extern cpumask_var_t xen_cpu_initialized_map; | 69 | extern cpumask_var_t xen_cpu_initialized_map; |
69 | #else | 70 | #else |
70 | static inline void xen_smp_init(void) {} | 71 | static inline void xen_smp_init(void) {} |
72 | static inline void xen_hvm_smp_init(void) {} | ||
71 | #endif | 73 | #endif |
72 | 74 | ||
73 | #ifdef CONFIG_PARAVIRT_SPINLOCKS | 75 | #ifdef CONFIG_PARAVIRT_SPINLOCKS |