diff options
author | David S. Miller <davem@davemloft.net> | 2010-05-19 02:01:55 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2010-05-19 02:01:55 -0400 |
commit | 2ec8c6bb5d8f3a62a79f463525054bae1e3d4487 (patch) | |
tree | fa7f8400ac685fb52e96f64997c7c682fc2aa021 /arch | |
parent | 7b39f90fabcf9e2af0cd79d0a60440d821e22b56 (diff) | |
parent | 537b60d17894b7c19a6060feae40299d7109d6e7 (diff) |
Merge branch 'master' of /home/davem/src/GIT/linux-2.6/
Conflicts:
include/linux/mod_devicetable.h
scripts/mod/file2alias.c
Diffstat (limited to 'arch')
200 files changed, 7852 insertions, 6314 deletions
diff --git a/arch/Kconfig b/arch/Kconfig index e5eb1337a537..acda512da2e2 100644 --- a/arch/Kconfig +++ b/arch/Kconfig | |||
@@ -42,15 +42,10 @@ config KPROBES | |||
42 | If in doubt, say "N". | 42 | If in doubt, say "N". |
43 | 43 | ||
44 | config OPTPROBES | 44 | config OPTPROBES |
45 | bool "Kprobes jump optimization support (EXPERIMENTAL)" | 45 | def_bool y |
46 | default y | 46 | depends on KPROBES && HAVE_OPTPROBES |
47 | depends on KPROBES | ||
48 | depends on !PREEMPT | 47 | depends on !PREEMPT |
49 | depends on HAVE_OPTPROBES | ||
50 | select KALLSYMS_ALL | 48 | select KALLSYMS_ALL |
51 | help | ||
52 | This option will allow kprobes to optimize breakpoint to | ||
53 | a jump for reducing its overhead. | ||
54 | 49 | ||
55 | config HAVE_EFFICIENT_UNALIGNED_ACCESS | 50 | config HAVE_EFFICIENT_UNALIGNED_ACCESS |
56 | bool | 51 | bool |
@@ -142,6 +137,17 @@ config HAVE_HW_BREAKPOINT | |||
142 | bool | 137 | bool |
143 | depends on PERF_EVENTS | 138 | depends on PERF_EVENTS |
144 | 139 | ||
140 | config HAVE_MIXED_BREAKPOINTS_REGS | ||
141 | bool | ||
142 | depends on HAVE_HW_BREAKPOINT | ||
143 | help | ||
144 | Depending on the arch implementation of hardware breakpoints, | ||
145 | some of them have separate registers for data and instruction | ||
146 | breakpoints addresses, others have mixed registers to store | ||
147 | them but define the access type in a control register. | ||
148 | Select this option if your arch implements breakpoints under the | ||
149 | latter fashion. | ||
150 | |||
145 | config HAVE_USER_RETURN_NOTIFIER | 151 | config HAVE_USER_RETURN_NOTIFIER |
146 | bool | 152 | bool |
147 | 153 | ||
diff --git a/arch/alpha/include/asm/atomic.h b/arch/alpha/include/asm/atomic.h index 610dff44d94b..e756d04b6cd5 100644 --- a/arch/alpha/include/asm/atomic.h +++ b/arch/alpha/include/asm/atomic.h | |||
@@ -17,8 +17,8 @@ | |||
17 | #define ATOMIC_INIT(i) ( (atomic_t) { (i) } ) | 17 | #define ATOMIC_INIT(i) ( (atomic_t) { (i) } ) |
18 | #define ATOMIC64_INIT(i) ( (atomic64_t) { (i) } ) | 18 | #define ATOMIC64_INIT(i) ( (atomic64_t) { (i) } ) |
19 | 19 | ||
20 | #define atomic_read(v) ((v)->counter + 0) | 20 | #define atomic_read(v) (*(volatile int *)&(v)->counter) |
21 | #define atomic64_read(v) ((v)->counter + 0) | 21 | #define atomic64_read(v) (*(volatile long *)&(v)->counter) |
22 | 22 | ||
23 | #define atomic_set(v,i) ((v)->counter = (i)) | 23 | #define atomic_set(v,i) ((v)->counter = (i)) |
24 | #define atomic64_set(v,i) ((v)->counter = (i)) | 24 | #define atomic64_set(v,i) ((v)->counter = (i)) |
diff --git a/arch/alpha/include/asm/bitops.h b/arch/alpha/include/asm/bitops.h index 15f3ae25c511..296da1d5ed57 100644 --- a/arch/alpha/include/asm/bitops.h +++ b/arch/alpha/include/asm/bitops.h | |||
@@ -405,29 +405,31 @@ static inline int fls(int x) | |||
405 | 405 | ||
406 | #if defined(CONFIG_ALPHA_EV6) && defined(CONFIG_ALPHA_EV67) | 406 | #if defined(CONFIG_ALPHA_EV6) && defined(CONFIG_ALPHA_EV67) |
407 | /* Whee. EV67 can calculate it directly. */ | 407 | /* Whee. EV67 can calculate it directly. */ |
408 | static inline unsigned long hweight64(unsigned long w) | 408 | static inline unsigned long __arch_hweight64(unsigned long w) |
409 | { | 409 | { |
410 | return __kernel_ctpop(w); | 410 | return __kernel_ctpop(w); |
411 | } | 411 | } |
412 | 412 | ||
413 | static inline unsigned int hweight32(unsigned int w) | 413 | static inline unsigned int __arch_weight32(unsigned int w) |
414 | { | 414 | { |
415 | return hweight64(w); | 415 | return __arch_hweight64(w); |
416 | } | 416 | } |
417 | 417 | ||
418 | static inline unsigned int hweight16(unsigned int w) | 418 | static inline unsigned int __arch_hweight16(unsigned int w) |
419 | { | 419 | { |
420 | return hweight64(w & 0xffff); | 420 | return __arch_hweight64(w & 0xffff); |
421 | } | 421 | } |
422 | 422 | ||
423 | static inline unsigned int hweight8(unsigned int w) | 423 | static inline unsigned int __arch_hweight8(unsigned int w) |
424 | { | 424 | { |
425 | return hweight64(w & 0xff); | 425 | return __arch_hweight64(w & 0xff); |
426 | } | 426 | } |
427 | #else | 427 | #else |
428 | #include <asm-generic/bitops/hweight.h> | 428 | #include <asm-generic/bitops/arch_hweight.h> |
429 | #endif | 429 | #endif |
430 | 430 | ||
431 | #include <asm-generic/bitops/const_hweight.h> | ||
432 | |||
431 | #endif /* __KERNEL__ */ | 433 | #endif /* __KERNEL__ */ |
432 | 434 | ||
433 | #include <asm-generic/bitops/find.h> | 435 | #include <asm-generic/bitops/find.h> |
diff --git a/arch/arm/include/asm/atomic.h b/arch/arm/include/asm/atomic.h index e8ddec2cb158..a0162fa94564 100644 --- a/arch/arm/include/asm/atomic.h +++ b/arch/arm/include/asm/atomic.h | |||
@@ -24,7 +24,7 @@ | |||
24 | * strex/ldrex monitor on some implementations. The reason we can use it for | 24 | * strex/ldrex monitor on some implementations. The reason we can use it for |
25 | * atomic_set() is the clrex or dummy strex done on every exception return. | 25 | * atomic_set() is the clrex or dummy strex done on every exception return. |
26 | */ | 26 | */ |
27 | #define atomic_read(v) ((v)->counter) | 27 | #define atomic_read(v) (*(volatile int *)&(v)->counter) |
28 | #define atomic_set(v,i) (((v)->counter) = (i)) | 28 | #define atomic_set(v,i) (((v)->counter) = (i)) |
29 | 29 | ||
30 | #if __LINUX_ARM_ARCH__ >= 6 | 30 | #if __LINUX_ARM_ARCH__ >= 6 |
diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h index 0d08d4170b64..4656a24058d2 100644 --- a/arch/arm/include/asm/cacheflush.h +++ b/arch/arm/include/asm/cacheflush.h | |||
@@ -371,6 +371,10 @@ static inline void __flush_icache_all(void) | |||
371 | #ifdef CONFIG_ARM_ERRATA_411920 | 371 | #ifdef CONFIG_ARM_ERRATA_411920 |
372 | extern void v6_icache_inval_all(void); | 372 | extern void v6_icache_inval_all(void); |
373 | v6_icache_inval_all(); | 373 | v6_icache_inval_all(); |
374 | #elif defined(CONFIG_SMP) && __LINUX_ARM_ARCH__ >= 7 | ||
375 | asm("mcr p15, 0, %0, c7, c1, 0 @ invalidate I-cache inner shareable\n" | ||
376 | : | ||
377 | : "r" (0)); | ||
374 | #else | 378 | #else |
375 | asm("mcr p15, 0, %0, c7, c5, 0 @ invalidate I-cache\n" | 379 | asm("mcr p15, 0, %0, c7, c5, 0 @ invalidate I-cache\n" |
376 | : | 380 | : |
diff --git a/arch/arm/include/asm/smp_twd.h b/arch/arm/include/asm/smp_twd.h index 7be0978b2625..634f357be6bb 100644 --- a/arch/arm/include/asm/smp_twd.h +++ b/arch/arm/include/asm/smp_twd.h | |||
@@ -1,6 +1,23 @@ | |||
1 | #ifndef __ASMARM_SMP_TWD_H | 1 | #ifndef __ASMARM_SMP_TWD_H |
2 | #define __ASMARM_SMP_TWD_H | 2 | #define __ASMARM_SMP_TWD_H |
3 | 3 | ||
4 | #define TWD_TIMER_LOAD 0x00 | ||
5 | #define TWD_TIMER_COUNTER 0x04 | ||
6 | #define TWD_TIMER_CONTROL 0x08 | ||
7 | #define TWD_TIMER_INTSTAT 0x0C | ||
8 | |||
9 | #define TWD_WDOG_LOAD 0x20 | ||
10 | #define TWD_WDOG_COUNTER 0x24 | ||
11 | #define TWD_WDOG_CONTROL 0x28 | ||
12 | #define TWD_WDOG_INTSTAT 0x2C | ||
13 | #define TWD_WDOG_RESETSTAT 0x30 | ||
14 | #define TWD_WDOG_DISABLE 0x34 | ||
15 | |||
16 | #define TWD_TIMER_CONTROL_ENABLE (1 << 0) | ||
17 | #define TWD_TIMER_CONTROL_ONESHOT (0 << 1) | ||
18 | #define TWD_TIMER_CONTROL_PERIODIC (1 << 1) | ||
19 | #define TWD_TIMER_CONTROL_IT_ENABLE (1 << 2) | ||
20 | |||
4 | struct clock_event_device; | 21 | struct clock_event_device; |
5 | 22 | ||
6 | extern void __iomem *twd_base; | 23 | extern void __iomem *twd_base; |
diff --git a/arch/arm/include/asm/tlbflush.h b/arch/arm/include/asm/tlbflush.h index e085e2c545eb..bd863d8608cd 100644 --- a/arch/arm/include/asm/tlbflush.h +++ b/arch/arm/include/asm/tlbflush.h | |||
@@ -46,6 +46,9 @@ | |||
46 | #define TLB_V7_UIS_FULL (1 << 20) | 46 | #define TLB_V7_UIS_FULL (1 << 20) |
47 | #define TLB_V7_UIS_ASID (1 << 21) | 47 | #define TLB_V7_UIS_ASID (1 << 21) |
48 | 48 | ||
49 | /* Inner Shareable BTB operation (ARMv7 MP extensions) */ | ||
50 | #define TLB_V7_IS_BTB (1 << 22) | ||
51 | |||
49 | #define TLB_L2CLEAN_FR (1 << 29) /* Feroceon */ | 52 | #define TLB_L2CLEAN_FR (1 << 29) /* Feroceon */ |
50 | #define TLB_DCLEAN (1 << 30) | 53 | #define TLB_DCLEAN (1 << 30) |
51 | #define TLB_WB (1 << 31) | 54 | #define TLB_WB (1 << 31) |
@@ -183,7 +186,7 @@ | |||
183 | #endif | 186 | #endif |
184 | 187 | ||
185 | #ifdef CONFIG_SMP | 188 | #ifdef CONFIG_SMP |
186 | #define v7wbi_tlb_flags (TLB_WB | TLB_DCLEAN | TLB_BTB | \ | 189 | #define v7wbi_tlb_flags (TLB_WB | TLB_DCLEAN | TLB_V7_IS_BTB | \ |
187 | TLB_V7_UIS_FULL | TLB_V7_UIS_PAGE | TLB_V7_UIS_ASID) | 190 | TLB_V7_UIS_FULL | TLB_V7_UIS_PAGE | TLB_V7_UIS_ASID) |
188 | #else | 191 | #else |
189 | #define v7wbi_tlb_flags (TLB_WB | TLB_DCLEAN | TLB_BTB | \ | 192 | #define v7wbi_tlb_flags (TLB_WB | TLB_DCLEAN | TLB_BTB | \ |
@@ -339,6 +342,12 @@ static inline void local_flush_tlb_all(void) | |||
339 | dsb(); | 342 | dsb(); |
340 | isb(); | 343 | isb(); |
341 | } | 344 | } |
345 | if (tlb_flag(TLB_V7_IS_BTB)) { | ||
346 | /* flush the branch target cache */ | ||
347 | asm("mcr p15, 0, %0, c7, c1, 6" : : "r" (zero) : "cc"); | ||
348 | dsb(); | ||
349 | isb(); | ||
350 | } | ||
342 | } | 351 | } |
343 | 352 | ||
344 | static inline void local_flush_tlb_mm(struct mm_struct *mm) | 353 | static inline void local_flush_tlb_mm(struct mm_struct *mm) |
@@ -376,6 +385,12 @@ static inline void local_flush_tlb_mm(struct mm_struct *mm) | |||
376 | asm("mcr p15, 0, %0, c7, c5, 6" : : "r" (zero) : "cc"); | 385 | asm("mcr p15, 0, %0, c7, c5, 6" : : "r" (zero) : "cc"); |
377 | dsb(); | 386 | dsb(); |
378 | } | 387 | } |
388 | if (tlb_flag(TLB_V7_IS_BTB)) { | ||
389 | /* flush the branch target cache */ | ||
390 | asm("mcr p15, 0, %0, c7, c1, 6" : : "r" (zero) : "cc"); | ||
391 | dsb(); | ||
392 | isb(); | ||
393 | } | ||
379 | } | 394 | } |
380 | 395 | ||
381 | static inline void | 396 | static inline void |
@@ -416,6 +431,12 @@ local_flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr) | |||
416 | asm("mcr p15, 0, %0, c7, c5, 6" : : "r" (zero) : "cc"); | 431 | asm("mcr p15, 0, %0, c7, c5, 6" : : "r" (zero) : "cc"); |
417 | dsb(); | 432 | dsb(); |
418 | } | 433 | } |
434 | if (tlb_flag(TLB_V7_IS_BTB)) { | ||
435 | /* flush the branch target cache */ | ||
436 | asm("mcr p15, 0, %0, c7, c1, 6" : : "r" (zero) : "cc"); | ||
437 | dsb(); | ||
438 | isb(); | ||
439 | } | ||
419 | } | 440 | } |
420 | 441 | ||
421 | static inline void local_flush_tlb_kernel_page(unsigned long kaddr) | 442 | static inline void local_flush_tlb_kernel_page(unsigned long kaddr) |
@@ -454,6 +475,12 @@ static inline void local_flush_tlb_kernel_page(unsigned long kaddr) | |||
454 | dsb(); | 475 | dsb(); |
455 | isb(); | 476 | isb(); |
456 | } | 477 | } |
478 | if (tlb_flag(TLB_V7_IS_BTB)) { | ||
479 | /* flush the branch target cache */ | ||
480 | asm("mcr p15, 0, %0, c7, c1, 6" : : "r" (zero) : "cc"); | ||
481 | dsb(); | ||
482 | isb(); | ||
483 | } | ||
457 | } | 484 | } |
458 | 485 | ||
459 | /* | 486 | /* |
diff --git a/arch/arm/kernel/smp_twd.c b/arch/arm/kernel/smp_twd.c index ea02a7b1c244..7c5f0c024db7 100644 --- a/arch/arm/kernel/smp_twd.c +++ b/arch/arm/kernel/smp_twd.c | |||
@@ -21,23 +21,6 @@ | |||
21 | #include <asm/smp_twd.h> | 21 | #include <asm/smp_twd.h> |
22 | #include <asm/hardware/gic.h> | 22 | #include <asm/hardware/gic.h> |
23 | 23 | ||
24 | #define TWD_TIMER_LOAD 0x00 | ||
25 | #define TWD_TIMER_COUNTER 0x04 | ||
26 | #define TWD_TIMER_CONTROL 0x08 | ||
27 | #define TWD_TIMER_INTSTAT 0x0C | ||
28 | |||
29 | #define TWD_WDOG_LOAD 0x20 | ||
30 | #define TWD_WDOG_COUNTER 0x24 | ||
31 | #define TWD_WDOG_CONTROL 0x28 | ||
32 | #define TWD_WDOG_INTSTAT 0x2C | ||
33 | #define TWD_WDOG_RESETSTAT 0x30 | ||
34 | #define TWD_WDOG_DISABLE 0x34 | ||
35 | |||
36 | #define TWD_TIMER_CONTROL_ENABLE (1 << 0) | ||
37 | #define TWD_TIMER_CONTROL_ONESHOT (0 << 1) | ||
38 | #define TWD_TIMER_CONTROL_PERIODIC (1 << 1) | ||
39 | #define TWD_TIMER_CONTROL_IT_ENABLE (1 << 2) | ||
40 | |||
41 | /* set up by the platform code */ | 24 | /* set up by the platform code */ |
42 | void __iomem *twd_base; | 25 | void __iomem *twd_base; |
43 | 26 | ||
diff --git a/arch/arm/lib/clear_user.S b/arch/arm/lib/clear_user.S index 5e3f99620c04..14a0d988c82c 100644 --- a/arch/arm/lib/clear_user.S +++ b/arch/arm/lib/clear_user.S | |||
@@ -45,6 +45,7 @@ USER( strnebt r2, [r0]) | |||
45 | mov r0, #0 | 45 | mov r0, #0 |
46 | ldmfd sp!, {r1, pc} | 46 | ldmfd sp!, {r1, pc} |
47 | ENDPROC(__clear_user) | 47 | ENDPROC(__clear_user) |
48 | ENDPROC(__clear_user_std) | ||
48 | 49 | ||
49 | .pushsection .fixup,"ax" | 50 | .pushsection .fixup,"ax" |
50 | .align 0 | 51 | .align 0 |
diff --git a/arch/arm/lib/copy_to_user.S b/arch/arm/lib/copy_to_user.S index 027b69bdbad1..d066df686e17 100644 --- a/arch/arm/lib/copy_to_user.S +++ b/arch/arm/lib/copy_to_user.S | |||
@@ -93,6 +93,7 @@ WEAK(__copy_to_user) | |||
93 | #include "copy_template.S" | 93 | #include "copy_template.S" |
94 | 94 | ||
95 | ENDPROC(__copy_to_user) | 95 | ENDPROC(__copy_to_user) |
96 | ENDPROC(__copy_to_user_std) | ||
96 | 97 | ||
97 | .pushsection .fixup,"ax" | 98 | .pushsection .fixup,"ax" |
98 | .align 0 | 99 | .align 0 |
diff --git a/arch/arm/mach-davinci/da830.c b/arch/arm/mach-davinci/da830.c index 122e61a9f505..e8cb982f5e8e 100644 --- a/arch/arm/mach-davinci/da830.c +++ b/arch/arm/mach-davinci/da830.c | |||
@@ -410,7 +410,7 @@ static struct clk_lookup da830_clks[] = { | |||
410 | CLK("davinci-mcasp.0", NULL, &mcasp0_clk), | 410 | CLK("davinci-mcasp.0", NULL, &mcasp0_clk), |
411 | CLK("davinci-mcasp.1", NULL, &mcasp1_clk), | 411 | CLK("davinci-mcasp.1", NULL, &mcasp1_clk), |
412 | CLK("davinci-mcasp.2", NULL, &mcasp2_clk), | 412 | CLK("davinci-mcasp.2", NULL, &mcasp2_clk), |
413 | CLK("musb_hdrc", NULL, &usb20_clk), | 413 | CLK(NULL, "usb20", &usb20_clk), |
414 | CLK(NULL, "aemif", &aemif_clk), | 414 | CLK(NULL, "aemif", &aemif_clk), |
415 | CLK(NULL, "aintc", &aintc_clk), | 415 | CLK(NULL, "aintc", &aintc_clk), |
416 | CLK(NULL, "secu_mgr", &secu_mgr_clk), | 416 | CLK(NULL, "secu_mgr", &secu_mgr_clk), |
diff --git a/arch/arm/mm/cache-v6.S b/arch/arm/mm/cache-v6.S index 9d89c67a1cc3..e46ecd847138 100644 --- a/arch/arm/mm/cache-v6.S +++ b/arch/arm/mm/cache-v6.S | |||
@@ -211,6 +211,9 @@ v6_dma_inv_range: | |||
211 | mcrne p15, 0, r1, c7, c15, 1 @ clean & invalidate unified line | 211 | mcrne p15, 0, r1, c7, c15, 1 @ clean & invalidate unified line |
212 | #endif | 212 | #endif |
213 | 1: | 213 | 1: |
214 | #ifdef CONFIG_SMP | ||
215 | str r0, [r0] @ write for ownership | ||
216 | #endif | ||
214 | #ifdef HARVARD_CACHE | 217 | #ifdef HARVARD_CACHE |
215 | mcr p15, 0, r0, c7, c6, 1 @ invalidate D line | 218 | mcr p15, 0, r0, c7, c6, 1 @ invalidate D line |
216 | #else | 219 | #else |
@@ -231,6 +234,9 @@ v6_dma_inv_range: | |||
231 | v6_dma_clean_range: | 234 | v6_dma_clean_range: |
232 | bic r0, r0, #D_CACHE_LINE_SIZE - 1 | 235 | bic r0, r0, #D_CACHE_LINE_SIZE - 1 |
233 | 1: | 236 | 1: |
237 | #ifdef CONFIG_SMP | ||
238 | ldr r2, [r0] @ read for ownership | ||
239 | #endif | ||
234 | #ifdef HARVARD_CACHE | 240 | #ifdef HARVARD_CACHE |
235 | mcr p15, 0, r0, c7, c10, 1 @ clean D line | 241 | mcr p15, 0, r0, c7, c10, 1 @ clean D line |
236 | #else | 242 | #else |
@@ -251,6 +257,10 @@ v6_dma_clean_range: | |||
251 | ENTRY(v6_dma_flush_range) | 257 | ENTRY(v6_dma_flush_range) |
252 | bic r0, r0, #D_CACHE_LINE_SIZE - 1 | 258 | bic r0, r0, #D_CACHE_LINE_SIZE - 1 |
253 | 1: | 259 | 1: |
260 | #ifdef CONFIG_SMP | ||
261 | ldr r2, [r0] @ read for ownership | ||
262 | str r2, [r0] @ write for ownership | ||
263 | #endif | ||
254 | #ifdef HARVARD_CACHE | 264 | #ifdef HARVARD_CACHE |
255 | mcr p15, 0, r0, c7, c14, 1 @ clean & invalidate D line | 265 | mcr p15, 0, r0, c7, c14, 1 @ clean & invalidate D line |
256 | #else | 266 | #else |
@@ -273,7 +283,9 @@ ENTRY(v6_dma_map_area) | |||
273 | add r1, r1, r0 | 283 | add r1, r1, r0 |
274 | teq r2, #DMA_FROM_DEVICE | 284 | teq r2, #DMA_FROM_DEVICE |
275 | beq v6_dma_inv_range | 285 | beq v6_dma_inv_range |
276 | b v6_dma_clean_range | 286 | teq r2, #DMA_TO_DEVICE |
287 | beq v6_dma_clean_range | ||
288 | b v6_dma_flush_range | ||
277 | ENDPROC(v6_dma_map_area) | 289 | ENDPROC(v6_dma_map_area) |
278 | 290 | ||
279 | /* | 291 | /* |
@@ -283,9 +295,6 @@ ENDPROC(v6_dma_map_area) | |||
283 | * - dir - DMA direction | 295 | * - dir - DMA direction |
284 | */ | 296 | */ |
285 | ENTRY(v6_dma_unmap_area) | 297 | ENTRY(v6_dma_unmap_area) |
286 | add r1, r1, r0 | ||
287 | teq r2, #DMA_TO_DEVICE | ||
288 | bne v6_dma_inv_range | ||
289 | mov pc, lr | 298 | mov pc, lr |
290 | ENDPROC(v6_dma_unmap_area) | 299 | ENDPROC(v6_dma_unmap_area) |
291 | 300 | ||
diff --git a/arch/arm/mm/cache-v7.S b/arch/arm/mm/cache-v7.S index bcd64f265870..06a90dcfc60a 100644 --- a/arch/arm/mm/cache-v7.S +++ b/arch/arm/mm/cache-v7.S | |||
@@ -167,7 +167,11 @@ ENTRY(v7_coherent_user_range) | |||
167 | cmp r0, r1 | 167 | cmp r0, r1 |
168 | blo 1b | 168 | blo 1b |
169 | mov r0, #0 | 169 | mov r0, #0 |
170 | #ifdef CONFIG_SMP | ||
171 | mcr p15, 0, r0, c7, c1, 6 @ invalidate BTB Inner Shareable | ||
172 | #else | ||
170 | mcr p15, 0, r0, c7, c5, 6 @ invalidate BTB | 173 | mcr p15, 0, r0, c7, c5, 6 @ invalidate BTB |
174 | #endif | ||
171 | dsb | 175 | dsb |
172 | isb | 176 | isb |
173 | mov pc, lr | 177 | mov pc, lr |
diff --git a/arch/arm/mm/nommu.c b/arch/arm/mm/nommu.c index 9bfeb6b9509a..33b327379f07 100644 --- a/arch/arm/mm/nommu.c +++ b/arch/arm/mm/nommu.c | |||
@@ -65,6 +65,15 @@ void flush_dcache_page(struct page *page) | |||
65 | } | 65 | } |
66 | EXPORT_SYMBOL(flush_dcache_page); | 66 | EXPORT_SYMBOL(flush_dcache_page); |
67 | 67 | ||
68 | void copy_to_user_page(struct vm_area_struct *vma, struct page *page, | ||
69 | unsigned long uaddr, void *dst, const void *src, | ||
70 | unsigned long len) | ||
71 | { | ||
72 | memcpy(dst, src, len); | ||
73 | if (vma->vm_flags & VM_EXEC) | ||
74 | __cpuc_coherent_user_range(uaddr, uaddr + len); | ||
75 | } | ||
76 | |||
68 | void __iomem *__arm_ioremap_pfn(unsigned long pfn, unsigned long offset, | 77 | void __iomem *__arm_ioremap_pfn(unsigned long pfn, unsigned long offset, |
69 | size_t size, unsigned int mtype) | 78 | size_t size, unsigned int mtype) |
70 | { | 79 | { |
@@ -87,8 +96,8 @@ void __iomem *__arm_ioremap(unsigned long phys_addr, size_t size, | |||
87 | } | 96 | } |
88 | EXPORT_SYMBOL(__arm_ioremap); | 97 | EXPORT_SYMBOL(__arm_ioremap); |
89 | 98 | ||
90 | void __iomem *__arm_ioremap(unsigned long phys_addr, size_t size, | 99 | void __iomem *__arm_ioremap_caller(unsigned long phys_addr, size_t size, |
91 | unsigned int mtype, void *caller) | 100 | unsigned int mtype, void *caller) |
92 | { | 101 | { |
93 | return __arm_ioremap(phys_addr, size, mtype); | 102 | return __arm_ioremap(phys_addr, size, mtype); |
94 | } | 103 | } |
diff --git a/arch/arm/mm/tlb-v7.S b/arch/arm/mm/tlb-v7.S index 0cb1848bd876..f3f288a9546d 100644 --- a/arch/arm/mm/tlb-v7.S +++ b/arch/arm/mm/tlb-v7.S | |||
@@ -50,7 +50,11 @@ ENTRY(v7wbi_flush_user_tlb_range) | |||
50 | cmp r0, r1 | 50 | cmp r0, r1 |
51 | blo 1b | 51 | blo 1b |
52 | mov ip, #0 | 52 | mov ip, #0 |
53 | #ifdef CONFIG_SMP | ||
54 | mcr p15, 0, ip, c7, c1, 6 @ flush BTAC/BTB Inner Shareable | ||
55 | #else | ||
53 | mcr p15, 0, ip, c7, c5, 6 @ flush BTAC/BTB | 56 | mcr p15, 0, ip, c7, c5, 6 @ flush BTAC/BTB |
57 | #endif | ||
54 | dsb | 58 | dsb |
55 | mov pc, lr | 59 | mov pc, lr |
56 | ENDPROC(v7wbi_flush_user_tlb_range) | 60 | ENDPROC(v7wbi_flush_user_tlb_range) |
@@ -79,7 +83,11 @@ ENTRY(v7wbi_flush_kern_tlb_range) | |||
79 | cmp r0, r1 | 83 | cmp r0, r1 |
80 | blo 1b | 84 | blo 1b |
81 | mov r2, #0 | 85 | mov r2, #0 |
86 | #ifdef CONFIG_SMP | ||
87 | mcr p15, 0, r2, c7, c1, 6 @ flush BTAC/BTB Inner Shareable | ||
88 | #else | ||
82 | mcr p15, 0, r2, c7, c5, 6 @ flush BTAC/BTB | 89 | mcr p15, 0, r2, c7, c5, 6 @ flush BTAC/BTB |
90 | #endif | ||
83 | dsb | 91 | dsb |
84 | isb | 92 | isb |
85 | mov pc, lr | 93 | mov pc, lr |
diff --git a/arch/avr32/include/asm/atomic.h b/arch/avr32/include/asm/atomic.h index b131c27ddf57..bbce6a1c6bb6 100644 --- a/arch/avr32/include/asm/atomic.h +++ b/arch/avr32/include/asm/atomic.h | |||
@@ -19,7 +19,7 @@ | |||
19 | 19 | ||
20 | #define ATOMIC_INIT(i) { (i) } | 20 | #define ATOMIC_INIT(i) { (i) } |
21 | 21 | ||
22 | #define atomic_read(v) ((v)->counter) | 22 | #define atomic_read(v) (*(volatile int *)&(v)->counter) |
23 | #define atomic_set(v, i) (((v)->counter) = i) | 23 | #define atomic_set(v, i) (((v)->counter) = i) |
24 | 24 | ||
25 | /* | 25 | /* |
diff --git a/arch/cris/include/asm/atomic.h b/arch/cris/include/asm/atomic.h index a6aca819e9f3..88dc9b9c4ba0 100644 --- a/arch/cris/include/asm/atomic.h +++ b/arch/cris/include/asm/atomic.h | |||
@@ -15,7 +15,7 @@ | |||
15 | 15 | ||
16 | #define ATOMIC_INIT(i) { (i) } | 16 | #define ATOMIC_INIT(i) { (i) } |
17 | 17 | ||
18 | #define atomic_read(v) ((v)->counter) | 18 | #define atomic_read(v) (*(volatile int *)&(v)->counter) |
19 | #define atomic_set(v,i) (((v)->counter) = (i)) | 19 | #define atomic_set(v,i) (((v)->counter) = (i)) |
20 | 20 | ||
21 | /* These should be written in asm but we do it in C for now. */ | 21 | /* These should be written in asm but we do it in C for now. */ |
diff --git a/arch/frv/include/asm/atomic.h b/arch/frv/include/asm/atomic.h index 00a57af79afc..fae32c7fdcb6 100644 --- a/arch/frv/include/asm/atomic.h +++ b/arch/frv/include/asm/atomic.h | |||
@@ -36,7 +36,7 @@ | |||
36 | #define smp_mb__after_atomic_inc() barrier() | 36 | #define smp_mb__after_atomic_inc() barrier() |
37 | 37 | ||
38 | #define ATOMIC_INIT(i) { (i) } | 38 | #define ATOMIC_INIT(i) { (i) } |
39 | #define atomic_read(v) ((v)->counter) | 39 | #define atomic_read(v) (*(volatile int *)&(v)->counter) |
40 | #define atomic_set(v, i) (((v)->counter) = (i)) | 40 | #define atomic_set(v, i) (((v)->counter) = (i)) |
41 | 41 | ||
42 | #ifndef CONFIG_FRV_OUTOFLINE_ATOMIC_OPS | 42 | #ifndef CONFIG_FRV_OUTOFLINE_ATOMIC_OPS |
diff --git a/arch/h8300/include/asm/atomic.h b/arch/h8300/include/asm/atomic.h index 33c8c0fa9583..e936804b7508 100644 --- a/arch/h8300/include/asm/atomic.h +++ b/arch/h8300/include/asm/atomic.h | |||
@@ -10,7 +10,7 @@ | |||
10 | 10 | ||
11 | #define ATOMIC_INIT(i) { (i) } | 11 | #define ATOMIC_INIT(i) { (i) } |
12 | 12 | ||
13 | #define atomic_read(v) ((v)->counter) | 13 | #define atomic_read(v) (*(volatile int *)&(v)->counter) |
14 | #define atomic_set(v, i) (((v)->counter) = i) | 14 | #define atomic_set(v, i) (((v)->counter) = i) |
15 | 15 | ||
16 | #include <asm/system.h> | 16 | #include <asm/system.h> |
diff --git a/arch/ia64/include/asm/atomic.h b/arch/ia64/include/asm/atomic.h index 88405cb0832a..4e1948447a00 100644 --- a/arch/ia64/include/asm/atomic.h +++ b/arch/ia64/include/asm/atomic.h | |||
@@ -21,8 +21,8 @@ | |||
21 | #define ATOMIC_INIT(i) ((atomic_t) { (i) }) | 21 | #define ATOMIC_INIT(i) ((atomic_t) { (i) }) |
22 | #define ATOMIC64_INIT(i) ((atomic64_t) { (i) }) | 22 | #define ATOMIC64_INIT(i) ((atomic64_t) { (i) }) |
23 | 23 | ||
24 | #define atomic_read(v) ((v)->counter) | 24 | #define atomic_read(v) (*(volatile int *)&(v)->counter) |
25 | #define atomic64_read(v) ((v)->counter) | 25 | #define atomic64_read(v) (*(volatile long *)&(v)->counter) |
26 | 26 | ||
27 | #define atomic_set(v,i) (((v)->counter) = (i)) | 27 | #define atomic_set(v,i) (((v)->counter) = (i)) |
28 | #define atomic64_set(v,i) (((v)->counter) = (i)) | 28 | #define atomic64_set(v,i) (((v)->counter) = (i)) |
diff --git a/arch/ia64/include/asm/bitops.h b/arch/ia64/include/asm/bitops.h index 6ebc229a1c51..9da3df6f1a52 100644 --- a/arch/ia64/include/asm/bitops.h +++ b/arch/ia64/include/asm/bitops.h | |||
@@ -437,17 +437,18 @@ __fls (unsigned long x) | |||
437 | * hweightN: returns the hamming weight (i.e. the number | 437 | * hweightN: returns the hamming weight (i.e. the number |
438 | * of bits set) of a N-bit word | 438 | * of bits set) of a N-bit word |
439 | */ | 439 | */ |
440 | static __inline__ unsigned long | 440 | static __inline__ unsigned long __arch_hweight64(unsigned long x) |
441 | hweight64 (unsigned long x) | ||
442 | { | 441 | { |
443 | unsigned long result; | 442 | unsigned long result; |
444 | result = ia64_popcnt(x); | 443 | result = ia64_popcnt(x); |
445 | return result; | 444 | return result; |
446 | } | 445 | } |
447 | 446 | ||
448 | #define hweight32(x) (unsigned int) hweight64((x) & 0xfffffffful) | 447 | #define __arch_hweight32(x) ((unsigned int) __arch_hweight64((x) & 0xfffffffful)) |
449 | #define hweight16(x) (unsigned int) hweight64((x) & 0xfffful) | 448 | #define __arch_hweight16(x) ((unsigned int) __arch_hweight64((x) & 0xfffful)) |
450 | #define hweight8(x) (unsigned int) hweight64((x) & 0xfful) | 449 | #define __arch_hweight8(x) ((unsigned int) __arch_hweight64((x) & 0xfful)) |
450 | |||
451 | #include <asm-generic/bitops/const_hweight.h> | ||
451 | 452 | ||
452 | #endif /* __KERNEL__ */ | 453 | #endif /* __KERNEL__ */ |
453 | 454 | ||
diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c index 4d1a7e9314cf..c6c90f39f4d9 100644 --- a/arch/ia64/kernel/acpi.c +++ b/arch/ia64/kernel/acpi.c | |||
@@ -785,6 +785,14 @@ int acpi_gsi_to_irq(u32 gsi, unsigned int *irq) | |||
785 | return 0; | 785 | return 0; |
786 | } | 786 | } |
787 | 787 | ||
788 | int acpi_isa_irq_to_gsi(unsigned isa_irq, u32 *gsi) | ||
789 | { | ||
790 | if (isa_irq >= 16) | ||
791 | return -1; | ||
792 | *gsi = isa_irq; | ||
793 | return 0; | ||
794 | } | ||
795 | |||
788 | /* | 796 | /* |
789 | * ACPI based hotplug CPU support | 797 | * ACPI based hotplug CPU support |
790 | */ | 798 | */ |
diff --git a/arch/m32r/include/asm/atomic.h b/arch/m32r/include/asm/atomic.h index 63f0cf0f50dd..d44a51e5271b 100644 --- a/arch/m32r/include/asm/atomic.h +++ b/arch/m32r/include/asm/atomic.h | |||
@@ -26,7 +26,7 @@ | |||
26 | * | 26 | * |
27 | * Atomically reads the value of @v. | 27 | * Atomically reads the value of @v. |
28 | */ | 28 | */ |
29 | #define atomic_read(v) ((v)->counter) | 29 | #define atomic_read(v) (*(volatile int *)&(v)->counter) |
30 | 30 | ||
31 | /** | 31 | /** |
32 | * atomic_set - set atomic variable | 32 | * atomic_set - set atomic variable |
diff --git a/arch/m68k/amiga/Makefile b/arch/m68k/amiga/Makefile index 6a0d7650f980..11dd30b16b3b 100644 --- a/arch/m68k/amiga/Makefile +++ b/arch/m68k/amiga/Makefile | |||
@@ -2,6 +2,6 @@ | |||
2 | # Makefile for Linux arch/m68k/amiga source directory | 2 | # Makefile for Linux arch/m68k/amiga source directory |
3 | # | 3 | # |
4 | 4 | ||
5 | obj-y := config.o amiints.o cia.o chipram.o amisound.o | 5 | obj-y := config.o amiints.o cia.o chipram.o amisound.o platform.o |
6 | 6 | ||
7 | obj-$(CONFIG_AMIGA_PCMCIA) += pcmcia.o | 7 | obj-$(CONFIG_AMIGA_PCMCIA) += pcmcia.o |
diff --git a/arch/m68k/amiga/platform.c b/arch/m68k/amiga/platform.c new file mode 100644 index 000000000000..38f18bf14737 --- /dev/null +++ b/arch/m68k/amiga/platform.c | |||
@@ -0,0 +1,83 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2007-2009 Geert Uytterhoeven | ||
3 | * | ||
4 | * This file is subject to the terms and conditions of the GNU General Public | ||
5 | * License. See the file COPYING in the main directory of this archive | ||
6 | * for more details. | ||
7 | */ | ||
8 | |||
9 | #include <linux/init.h> | ||
10 | #include <linux/platform_device.h> | ||
11 | #include <linux/zorro.h> | ||
12 | |||
13 | #include <asm/amigahw.h> | ||
14 | |||
15 | |||
16 | #ifdef CONFIG_ZORRO | ||
17 | |||
18 | static const struct resource zorro_resources[] __initconst = { | ||
19 | /* Zorro II regions (on Zorro II/III) */ | ||
20 | { | ||
21 | .name = "Zorro II exp", | ||
22 | .start = 0x00e80000, | ||
23 | .end = 0x00efffff, | ||
24 | .flags = IORESOURCE_MEM, | ||
25 | }, { | ||
26 | .name = "Zorro II mem", | ||
27 | .start = 0x00200000, | ||
28 | .end = 0x009fffff, | ||
29 | .flags = IORESOURCE_MEM, | ||
30 | }, | ||
31 | /* Zorro III regions (on Zorro III only) */ | ||
32 | { | ||
33 | .name = "Zorro III exp", | ||
34 | .start = 0xff000000, | ||
35 | .end = 0xffffffff, | ||
36 | .flags = IORESOURCE_MEM, | ||
37 | }, { | ||
38 | .name = "Zorro III cfg", | ||
39 | .start = 0x40000000, | ||
40 | .end = 0x7fffffff, | ||
41 | .flags = IORESOURCE_MEM, | ||
42 | } | ||
43 | }; | ||
44 | |||
45 | |||
46 | static int __init amiga_init_bus(void) | ||
47 | { | ||
48 | if (!MACH_IS_AMIGA || !AMIGAHW_PRESENT(ZORRO)) | ||
49 | return -ENODEV; | ||
50 | |||
51 | platform_device_register_simple("amiga-zorro", -1, zorro_resources, | ||
52 | AMIGAHW_PRESENT(ZORRO3) ? 4 : 2); | ||
53 | return 0; | ||
54 | } | ||
55 | |||
56 | subsys_initcall(amiga_init_bus); | ||
57 | |||
58 | #endif /* CONFIG_ZORRO */ | ||
59 | |||
60 | |||
61 | static int __init amiga_init_devices(void) | ||
62 | { | ||
63 | if (!MACH_IS_AMIGA) | ||
64 | return -ENODEV; | ||
65 | |||
66 | /* video hardware */ | ||
67 | if (AMIGAHW_PRESENT(AMI_VIDEO)) | ||
68 | platform_device_register_simple("amiga-video", -1, NULL, 0); | ||
69 | |||
70 | |||
71 | /* sound hardware */ | ||
72 | if (AMIGAHW_PRESENT(AMI_AUDIO)) | ||
73 | platform_device_register_simple("amiga-audio", -1, NULL, 0); | ||
74 | |||
75 | |||
76 | /* storage interfaces */ | ||
77 | if (AMIGAHW_PRESENT(AMI_FLOPPY)) | ||
78 | platform_device_register_simple("amiga-floppy", -1, NULL, 0); | ||
79 | |||
80 | return 0; | ||
81 | } | ||
82 | |||
83 | device_initcall(amiga_init_devices); | ||
diff --git a/arch/m68k/bvme6000/rtc.c b/arch/m68k/bvme6000/rtc.c index b46ea1714a89..cb8617bb194b 100644 --- a/arch/m68k/bvme6000/rtc.c +++ b/arch/m68k/bvme6000/rtc.c | |||
@@ -9,7 +9,6 @@ | |||
9 | #include <linux/types.h> | 9 | #include <linux/types.h> |
10 | #include <linux/errno.h> | 10 | #include <linux/errno.h> |
11 | #include <linux/miscdevice.h> | 11 | #include <linux/miscdevice.h> |
12 | #include <linux/smp_lock.h> | ||
13 | #include <linux/ioport.h> | 12 | #include <linux/ioport.h> |
14 | #include <linux/capability.h> | 13 | #include <linux/capability.h> |
15 | #include <linux/fcntl.h> | 14 | #include <linux/fcntl.h> |
@@ -35,10 +34,9 @@ | |||
35 | static unsigned char days_in_mo[] = | 34 | static unsigned char days_in_mo[] = |
36 | {0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}; | 35 | {0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}; |
37 | 36 | ||
38 | static char rtc_status; | 37 | static atomic_t rtc_status = ATOMIC_INIT(1); |
39 | 38 | ||
40 | static int rtc_ioctl(struct inode *inode, struct file *file, unsigned int cmd, | 39 | static long rtc_ioctl(struct file *file, unsigned int cmd, unsigned long arg) |
41 | unsigned long arg) | ||
42 | { | 40 | { |
43 | volatile RtcPtr_t rtc = (RtcPtr_t)BVME_RTC_BASE; | 41 | volatile RtcPtr_t rtc = (RtcPtr_t)BVME_RTC_BASE; |
44 | unsigned char msr; | 42 | unsigned char msr; |
@@ -132,29 +130,20 @@ static int rtc_ioctl(struct inode *inode, struct file *file, unsigned int cmd, | |||
132 | } | 130 | } |
133 | 131 | ||
134 | /* | 132 | /* |
135 | * We enforce only one user at a time here with the open/close. | 133 | * We enforce only one user at a time here with the open/close. |
136 | * Also clear the previous interrupt data on an open, and clean | ||
137 | * up things on a close. | ||
138 | */ | 134 | */ |
139 | |||
140 | static int rtc_open(struct inode *inode, struct file *file) | 135 | static int rtc_open(struct inode *inode, struct file *file) |
141 | { | 136 | { |
142 | lock_kernel(); | 137 | if (!atomic_dec_and_test(&rtc_status)) { |
143 | if(rtc_status) { | 138 | atomic_inc(&rtc_status); |
144 | unlock_kernel(); | ||
145 | return -EBUSY; | 139 | return -EBUSY; |
146 | } | 140 | } |
147 | |||
148 | rtc_status = 1; | ||
149 | unlock_kernel(); | ||
150 | return 0; | 141 | return 0; |
151 | } | 142 | } |
152 | 143 | ||
153 | static int rtc_release(struct inode *inode, struct file *file) | 144 | static int rtc_release(struct inode *inode, struct file *file) |
154 | { | 145 | { |
155 | lock_kernel(); | 146 | atomic_inc(&rtc_status); |
156 | rtc_status = 0; | ||
157 | unlock_kernel(); | ||
158 | return 0; | 147 | return 0; |
159 | } | 148 | } |
160 | 149 | ||
@@ -163,9 +152,9 @@ static int rtc_release(struct inode *inode, struct file *file) | |||
163 | */ | 152 | */ |
164 | 153 | ||
165 | static const struct file_operations rtc_fops = { | 154 | static const struct file_operations rtc_fops = { |
166 | .ioctl = rtc_ioctl, | 155 | .unlocked_ioctl = rtc_ioctl, |
167 | .open = rtc_open, | 156 | .open = rtc_open, |
168 | .release = rtc_release, | 157 | .release = rtc_release, |
169 | }; | 158 | }; |
170 | 159 | ||
171 | static struct miscdevice rtc_dev = { | 160 | static struct miscdevice rtc_dev = { |
diff --git a/arch/m68k/hp300/time.h b/arch/m68k/hp300/time.h index f5b3d098b0f5..7b98242960de 100644 --- a/arch/m68k/hp300/time.h +++ b/arch/m68k/hp300/time.h | |||
@@ -1,4 +1,2 @@ | |||
1 | extern void hp300_sched_init(irq_handler_t vector); | 1 | extern void hp300_sched_init(irq_handler_t vector); |
2 | extern unsigned long hp300_gettimeoffset (void); | 2 | extern unsigned long hp300_gettimeoffset(void); |
3 | |||
4 | |||
diff --git a/arch/m68k/include/asm/atomic_mm.h b/arch/m68k/include/asm/atomic_mm.h index d9d2ed647435..6a223b3f7e74 100644 --- a/arch/m68k/include/asm/atomic_mm.h +++ b/arch/m68k/include/asm/atomic_mm.h | |||
@@ -15,7 +15,7 @@ | |||
15 | 15 | ||
16 | #define ATOMIC_INIT(i) { (i) } | 16 | #define ATOMIC_INIT(i) { (i) } |
17 | 17 | ||
18 | #define atomic_read(v) ((v)->counter) | 18 | #define atomic_read(v) (*(volatile int *)&(v)->counter) |
19 | #define atomic_set(v, i) (((v)->counter) = i) | 19 | #define atomic_set(v, i) (((v)->counter) = i) |
20 | 20 | ||
21 | static inline void atomic_add(int i, atomic_t *v) | 21 | static inline void atomic_add(int i, atomic_t *v) |
diff --git a/arch/m68k/include/asm/atomic_no.h b/arch/m68k/include/asm/atomic_no.h index 5674cb9449bd..289310c63a8a 100644 --- a/arch/m68k/include/asm/atomic_no.h +++ b/arch/m68k/include/asm/atomic_no.h | |||
@@ -15,7 +15,7 @@ | |||
15 | 15 | ||
16 | #define ATOMIC_INIT(i) { (i) } | 16 | #define ATOMIC_INIT(i) { (i) } |
17 | 17 | ||
18 | #define atomic_read(v) ((v)->counter) | 18 | #define atomic_read(v) (*(volatile int *)&(v)->counter) |
19 | #define atomic_set(v, i) (((v)->counter) = i) | 19 | #define atomic_set(v, i) (((v)->counter) = i) |
20 | 20 | ||
21 | static __inline__ void atomic_add(int i, atomic_t *v) | 21 | static __inline__ void atomic_add(int i, atomic_t *v) |
diff --git a/arch/m68k/include/asm/bitops_mm.h b/arch/m68k/include/asm/bitops_mm.h index 9bde784e7bad..b4ecdaada520 100644 --- a/arch/m68k/include/asm/bitops_mm.h +++ b/arch/m68k/include/asm/bitops_mm.h | |||
@@ -365,6 +365,10 @@ static inline int minix_test_bit(int nr, const void *vaddr) | |||
365 | #define ext2_set_bit_atomic(lock, nr, addr) test_and_set_bit((nr) ^ 24, (unsigned long *)(addr)) | 365 | #define ext2_set_bit_atomic(lock, nr, addr) test_and_set_bit((nr) ^ 24, (unsigned long *)(addr)) |
366 | #define ext2_clear_bit(nr, addr) __test_and_clear_bit((nr) ^ 24, (unsigned long *)(addr)) | 366 | #define ext2_clear_bit(nr, addr) __test_and_clear_bit((nr) ^ 24, (unsigned long *)(addr)) |
367 | #define ext2_clear_bit_atomic(lock, nr, addr) test_and_clear_bit((nr) ^ 24, (unsigned long *)(addr)) | 367 | #define ext2_clear_bit_atomic(lock, nr, addr) test_and_clear_bit((nr) ^ 24, (unsigned long *)(addr)) |
368 | #define ext2_find_next_zero_bit(addr, size, offset) \ | ||
369 | generic_find_next_zero_le_bit((unsigned long *)addr, size, offset) | ||
370 | #define ext2_find_next_bit(addr, size, offset) \ | ||
371 | generic_find_next_le_bit((unsigned long *)addr, size, offset) | ||
368 | 372 | ||
369 | static inline int ext2_test_bit(int nr, const void *vaddr) | 373 | static inline int ext2_test_bit(int nr, const void *vaddr) |
370 | { | 374 | { |
@@ -394,10 +398,9 @@ static inline int ext2_find_first_zero_bit(const void *vaddr, unsigned size) | |||
394 | return (p - addr) * 32 + res; | 398 | return (p - addr) * 32 + res; |
395 | } | 399 | } |
396 | 400 | ||
397 | static inline int ext2_find_next_zero_bit(const void *vaddr, unsigned size, | 401 | static inline unsigned long generic_find_next_zero_le_bit(const unsigned long *addr, |
398 | unsigned offset) | 402 | unsigned long size, unsigned long offset) |
399 | { | 403 | { |
400 | const unsigned long *addr = vaddr; | ||
401 | const unsigned long *p = addr + (offset >> 5); | 404 | const unsigned long *p = addr + (offset >> 5); |
402 | int bit = offset & 31UL, res; | 405 | int bit = offset & 31UL, res; |
403 | 406 | ||
@@ -437,10 +440,9 @@ static inline int ext2_find_first_bit(const void *vaddr, unsigned size) | |||
437 | return (p - addr) * 32 + res; | 440 | return (p - addr) * 32 + res; |
438 | } | 441 | } |
439 | 442 | ||
440 | static inline int ext2_find_next_bit(const void *vaddr, unsigned size, | 443 | static inline unsigned long generic_find_next_le_bit(const unsigned long *addr, |
441 | unsigned offset) | 444 | unsigned long size, unsigned long offset) |
442 | { | 445 | { |
443 | const unsigned long *addr = vaddr; | ||
444 | const unsigned long *p = addr + (offset >> 5); | 446 | const unsigned long *p = addr + (offset >> 5); |
445 | int bit = offset & 31UL, res; | 447 | int bit = offset & 31UL, res; |
446 | 448 | ||
diff --git a/arch/m68k/include/asm/param.h b/arch/m68k/include/asm/param.h index 85c41b75aa78..36265ccf5c7b 100644 --- a/arch/m68k/include/asm/param.h +++ b/arch/m68k/include/asm/param.h | |||
@@ -1,26 +1,12 @@ | |||
1 | #ifndef _M68K_PARAM_H | 1 | #ifndef _M68K_PARAM_H |
2 | #define _M68K_PARAM_H | 2 | #define _M68K_PARAM_H |
3 | 3 | ||
4 | #ifdef __KERNEL__ | ||
5 | # define HZ CONFIG_HZ /* Internal kernel timer frequency */ | ||
6 | # define USER_HZ 100 /* .. some user interfaces are in "ticks" */ | ||
7 | # define CLOCKS_PER_SEC (USER_HZ) /* like times() */ | ||
8 | #endif | ||
9 | |||
10 | #ifndef HZ | ||
11 | #define HZ 100 | ||
12 | #endif | ||
13 | |||
14 | #ifdef __uClinux__ | 4 | #ifdef __uClinux__ |
15 | #define EXEC_PAGESIZE 4096 | 5 | #define EXEC_PAGESIZE 4096 |
16 | #else | 6 | #else |
17 | #define EXEC_PAGESIZE 8192 | 7 | #define EXEC_PAGESIZE 8192 |
18 | #endif | 8 | #endif |
19 | 9 | ||
20 | #ifndef NOGROUP | 10 | #include <asm-generic/param.h> |
21 | #define NOGROUP (-1) | ||
22 | #endif | ||
23 | |||
24 | #define MAXHOSTNAMELEN 64 /* max length of hostname */ | ||
25 | 11 | ||
26 | #endif /* _M68K_PARAM_H */ | 12 | #endif /* _M68K_PARAM_H */ |
diff --git a/arch/m68k/kernel/traps.c b/arch/m68k/kernel/traps.c index aacd6d17b833..ada4f4cca811 100644 --- a/arch/m68k/kernel/traps.c +++ b/arch/m68k/kernel/traps.c | |||
@@ -455,7 +455,7 @@ static inline void access_error040(struct frame *fp) | |||
455 | 455 | ||
456 | if (do_page_fault(&fp->ptregs, addr, errorcode)) { | 456 | if (do_page_fault(&fp->ptregs, addr, errorcode)) { |
457 | #ifdef DEBUG | 457 | #ifdef DEBUG |
458 | printk("do_page_fault() !=0 \n"); | 458 | printk("do_page_fault() !=0\n"); |
459 | #endif | 459 | #endif |
460 | if (user_mode(&fp->ptregs)){ | 460 | if (user_mode(&fp->ptregs)){ |
461 | /* delay writebacks after signal delivery */ | 461 | /* delay writebacks after signal delivery */ |
diff --git a/arch/m68k/mac/config.c b/arch/m68k/mac/config.c index 0356da9bf763..1c16b1baf8db 100644 --- a/arch/m68k/mac/config.c +++ b/arch/m68k/mac/config.c | |||
@@ -148,7 +148,7 @@ static void mac_cache_card_flush(int writeback) | |||
148 | void __init config_mac(void) | 148 | void __init config_mac(void) |
149 | { | 149 | { |
150 | if (!MACH_IS_MAC) | 150 | if (!MACH_IS_MAC) |
151 | printk(KERN_ERR "ERROR: no Mac, but config_mac() called!! \n"); | 151 | printk(KERN_ERR "ERROR: no Mac, but config_mac() called!!\n"); |
152 | 152 | ||
153 | mach_sched_init = mac_sched_init; | 153 | mach_sched_init = mac_sched_init; |
154 | mach_init_IRQ = mac_init_IRQ; | 154 | mach_init_IRQ = mac_init_IRQ; |
@@ -867,7 +867,7 @@ static void __init mac_identify(void) | |||
867 | */ | 867 | */ |
868 | iop_preinit(); | 868 | iop_preinit(); |
869 | 869 | ||
870 | printk(KERN_INFO "Detected Macintosh model: %d \n", model); | 870 | printk(KERN_INFO "Detected Macintosh model: %d\n", model); |
871 | 871 | ||
872 | /* | 872 | /* |
873 | * Report booter data: | 873 | * Report booter data: |
@@ -878,12 +878,12 @@ static void __init mac_identify(void) | |||
878 | mac_bi_data.videoaddr, mac_bi_data.videorow, | 878 | mac_bi_data.videoaddr, mac_bi_data.videorow, |
879 | mac_bi_data.videodepth, mac_bi_data.dimensions & 0xFFFF, | 879 | mac_bi_data.videodepth, mac_bi_data.dimensions & 0xFFFF, |
880 | mac_bi_data.dimensions >> 16); | 880 | mac_bi_data.dimensions >> 16); |
881 | printk(KERN_DEBUG " Videological 0x%lx phys. 0x%lx, SCC at 0x%lx \n", | 881 | printk(KERN_DEBUG " Videological 0x%lx phys. 0x%lx, SCC at 0x%lx\n", |
882 | mac_bi_data.videological, mac_orig_videoaddr, | 882 | mac_bi_data.videological, mac_orig_videoaddr, |
883 | mac_bi_data.sccbase); | 883 | mac_bi_data.sccbase); |
884 | printk(KERN_DEBUG " Boottime: 0x%lx GMTBias: 0x%lx \n", | 884 | printk(KERN_DEBUG " Boottime: 0x%lx GMTBias: 0x%lx\n", |
885 | mac_bi_data.boottime, mac_bi_data.gmtbias); | 885 | mac_bi_data.boottime, mac_bi_data.gmtbias); |
886 | printk(KERN_DEBUG " Machine ID: %ld CPUid: 0x%lx memory size: 0x%lx \n", | 886 | printk(KERN_DEBUG " Machine ID: %ld CPUid: 0x%lx memory size: 0x%lx\n", |
887 | mac_bi_data.id, mac_bi_data.cpuid, mac_bi_data.memsize); | 887 | mac_bi_data.id, mac_bi_data.cpuid, mac_bi_data.memsize); |
888 | 888 | ||
889 | iop_init(); | 889 | iop_init(); |
diff --git a/arch/m68k/mm/fault.c b/arch/m68k/mm/fault.c index d0e35cf99fc6..a96394a0333d 100644 --- a/arch/m68k/mm/fault.c +++ b/arch/m68k/mm/fault.c | |||
@@ -154,7 +154,6 @@ good_area: | |||
154 | * the fault. | 154 | * the fault. |
155 | */ | 155 | */ |
156 | 156 | ||
157 | survive: | ||
158 | fault = handle_mm_fault(mm, vma, address, write ? FAULT_FLAG_WRITE : 0); | 157 | fault = handle_mm_fault(mm, vma, address, write ? FAULT_FLAG_WRITE : 0); |
159 | #ifdef DEBUG | 158 | #ifdef DEBUG |
160 | printk("handle_mm_fault returns %d\n",fault); | 159 | printk("handle_mm_fault returns %d\n",fault); |
@@ -180,15 +179,10 @@ good_area: | |||
180 | */ | 179 | */ |
181 | out_of_memory: | 180 | out_of_memory: |
182 | up_read(&mm->mmap_sem); | 181 | up_read(&mm->mmap_sem); |
183 | if (is_global_init(current)) { | 182 | if (!user_mode(regs)) |
184 | yield(); | 183 | goto no_context; |
185 | down_read(&mm->mmap_sem); | 184 | pagefault_out_of_memory(); |
186 | goto survive; | 185 | return 0; |
187 | } | ||
188 | |||
189 | printk("VM: killing process %s\n", current->comm); | ||
190 | if (user_mode(regs)) | ||
191 | do_group_exit(SIGKILL); | ||
192 | 186 | ||
193 | no_context: | 187 | no_context: |
194 | current->thread.signo = SIGBUS; | 188 | current->thread.signo = SIGBUS; |
diff --git a/arch/m68k/mvme16x/rtc.c b/arch/m68k/mvme16x/rtc.c index 8da9c250d3e1..11ac6f63967a 100644 --- a/arch/m68k/mvme16x/rtc.c +++ b/arch/m68k/mvme16x/rtc.c | |||
@@ -9,7 +9,6 @@ | |||
9 | #include <linux/types.h> | 9 | #include <linux/types.h> |
10 | #include <linux/errno.h> | 10 | #include <linux/errno.h> |
11 | #include <linux/miscdevice.h> | 11 | #include <linux/miscdevice.h> |
12 | #include <linux/smp_lock.h> | ||
13 | #include <linux/ioport.h> | 12 | #include <linux/ioport.h> |
14 | #include <linux/capability.h> | 13 | #include <linux/capability.h> |
15 | #include <linux/fcntl.h> | 14 | #include <linux/fcntl.h> |
@@ -36,8 +35,7 @@ static const unsigned char days_in_mo[] = | |||
36 | 35 | ||
37 | static atomic_t rtc_ready = ATOMIC_INIT(1); | 36 | static atomic_t rtc_ready = ATOMIC_INIT(1); |
38 | 37 | ||
39 | static int rtc_ioctl(struct inode *inode, struct file *file, unsigned int cmd, | 38 | static long rtc_ioctl(struct file *file, unsigned int cmd, unsigned long arg) |
40 | unsigned long arg) | ||
41 | { | 39 | { |
42 | volatile MK48T08ptr_t rtc = (MK48T08ptr_t)MVME_RTC_BASE; | 40 | volatile MK48T08ptr_t rtc = (MK48T08ptr_t)MVME_RTC_BASE; |
43 | unsigned long flags; | 41 | unsigned long flags; |
@@ -120,22 +118,15 @@ static int rtc_ioctl(struct inode *inode, struct file *file, unsigned int cmd, | |||
120 | } | 118 | } |
121 | 119 | ||
122 | /* | 120 | /* |
123 | * We enforce only one user at a time here with the open/close. | 121 | * We enforce only one user at a time here with the open/close. |
124 | * Also clear the previous interrupt data on an open, and clean | ||
125 | * up things on a close. | ||
126 | */ | 122 | */ |
127 | |||
128 | static int rtc_open(struct inode *inode, struct file *file) | 123 | static int rtc_open(struct inode *inode, struct file *file) |
129 | { | 124 | { |
130 | lock_kernel(); | ||
131 | if( !atomic_dec_and_test(&rtc_ready) ) | 125 | if( !atomic_dec_and_test(&rtc_ready) ) |
132 | { | 126 | { |
133 | atomic_inc( &rtc_ready ); | 127 | atomic_inc( &rtc_ready ); |
134 | unlock_kernel(); | ||
135 | return -EBUSY; | 128 | return -EBUSY; |
136 | } | 129 | } |
137 | unlock_kernel(); | ||
138 | |||
139 | return 0; | 130 | return 0; |
140 | } | 131 | } |
141 | 132 | ||
@@ -150,9 +141,9 @@ static int rtc_release(struct inode *inode, struct file *file) | |||
150 | */ | 141 | */ |
151 | 142 | ||
152 | static const struct file_operations rtc_fops = { | 143 | static const struct file_operations rtc_fops = { |
153 | .ioctl = rtc_ioctl, | 144 | .unlocked_ioctl = rtc_ioctl, |
154 | .open = rtc_open, | 145 | .open = rtc_open, |
155 | .release = rtc_release, | 146 | .release = rtc_release, |
156 | }; | 147 | }; |
157 | 148 | ||
158 | static struct miscdevice rtc_dev= | 149 | static struct miscdevice rtc_dev= |
diff --git a/arch/m68k/q40/config.c b/arch/m68k/q40/config.c index 31ab3f08bbda..ad10fecec2fe 100644 --- a/arch/m68k/q40/config.c +++ b/arch/m68k/q40/config.c | |||
@@ -126,7 +126,7 @@ static void q40_reset(void) | |||
126 | { | 126 | { |
127 | halted = 1; | 127 | halted = 1; |
128 | printk("\n\n*******************************************\n" | 128 | printk("\n\n*******************************************\n" |
129 | "Called q40_reset : press the RESET button!! \n" | 129 | "Called q40_reset : press the RESET button!!\n" |
130 | "*******************************************\n"); | 130 | "*******************************************\n"); |
131 | Q40_LED_ON(); | 131 | Q40_LED_ON(); |
132 | while (1) | 132 | while (1) |
diff --git a/arch/microblaze/include/asm/uaccess.h b/arch/microblaze/include/asm/uaccess.h index 446bec29b142..26460d15b338 100644 --- a/arch/microblaze/include/asm/uaccess.h +++ b/arch/microblaze/include/asm/uaccess.h | |||
@@ -182,6 +182,39 @@ extern long __user_bad(void); | |||
182 | * Returns zero on success, or -EFAULT on error. | 182 | * Returns zero on success, or -EFAULT on error. |
183 | * On error, the variable @x is set to zero. | 183 | * On error, the variable @x is set to zero. |
184 | */ | 184 | */ |
185 | #define get_user(x, ptr) \ | ||
186 | __get_user_check((x), (ptr), sizeof(*(ptr))) | ||
187 | |||
188 | #define __get_user_check(x, ptr, size) \ | ||
189 | ({ \ | ||
190 | unsigned long __gu_val = 0; \ | ||
191 | const typeof(*(ptr)) __user *__gu_addr = (ptr); \ | ||
192 | int __gu_err = 0; \ | ||
193 | \ | ||
194 | if (access_ok(VERIFY_READ, __gu_addr, size)) { \ | ||
195 | switch (size) { \ | ||
196 | case 1: \ | ||
197 | __get_user_asm("lbu", __gu_addr, __gu_val, \ | ||
198 | __gu_err); \ | ||
199 | break; \ | ||
200 | case 2: \ | ||
201 | __get_user_asm("lhu", __gu_addr, __gu_val, \ | ||
202 | __gu_err); \ | ||
203 | break; \ | ||
204 | case 4: \ | ||
205 | __get_user_asm("lw", __gu_addr, __gu_val, \ | ||
206 | __gu_err); \ | ||
207 | break; \ | ||
208 | default: \ | ||
209 | __gu_err = __user_bad(); \ | ||
210 | break; \ | ||
211 | } \ | ||
212 | } else { \ | ||
213 | __gu_err = -EFAULT; \ | ||
214 | } \ | ||
215 | x = (typeof(*(ptr)))__gu_val; \ | ||
216 | __gu_err; \ | ||
217 | }) | ||
185 | 218 | ||
186 | #define __get_user(x, ptr) \ | 219 | #define __get_user(x, ptr) \ |
187 | ({ \ | 220 | ({ \ |
@@ -206,12 +239,6 @@ extern long __user_bad(void); | |||
206 | }) | 239 | }) |
207 | 240 | ||
208 | 241 | ||
209 | #define get_user(x, ptr) \ | ||
210 | ({ \ | ||
211 | access_ok(VERIFY_READ, (ptr), sizeof(*(ptr))) \ | ||
212 | ? __get_user((x), (ptr)) : -EFAULT; \ | ||
213 | }) | ||
214 | |||
215 | #define __put_user_asm(insn, __gu_ptr, __gu_val, __gu_err) \ | 242 | #define __put_user_asm(insn, __gu_ptr, __gu_val, __gu_err) \ |
216 | ({ \ | 243 | ({ \ |
217 | __asm__ __volatile__ ( \ | 244 | __asm__ __volatile__ ( \ |
@@ -266,6 +293,42 @@ extern long __user_bad(void); | |||
266 | * | 293 | * |
267 | * Returns zero on success, or -EFAULT on error. | 294 | * Returns zero on success, or -EFAULT on error. |
268 | */ | 295 | */ |
296 | #define put_user(x, ptr) \ | ||
297 | __put_user_check((x), (ptr), sizeof(*(ptr))) | ||
298 | |||
299 | #define __put_user_check(x, ptr, size) \ | ||
300 | ({ \ | ||
301 | typeof(*(ptr)) __pu_val; \ | ||
302 | typeof(*(ptr)) __user *__pu_addr = (ptr); \ | ||
303 | int __pu_err = 0; \ | ||
304 | \ | ||
305 | __pu_val = (x); \ | ||
306 | if (access_ok(VERIFY_WRITE, __pu_addr, size)) { \ | ||
307 | switch (size) { \ | ||
308 | case 1: \ | ||
309 | __put_user_asm("sb", __pu_addr, __pu_val, \ | ||
310 | __pu_err); \ | ||
311 | break; \ | ||
312 | case 2: \ | ||
313 | __put_user_asm("sh", __pu_addr, __pu_val, \ | ||
314 | __pu_err); \ | ||
315 | break; \ | ||
316 | case 4: \ | ||
317 | __put_user_asm("sw", __pu_addr, __pu_val, \ | ||
318 | __pu_err); \ | ||
319 | break; \ | ||
320 | case 8: \ | ||
321 | __put_user_asm_8(__pu_addr, __pu_val, __pu_err);\ | ||
322 | break; \ | ||
323 | default: \ | ||
324 | __pu_err = __user_bad(); \ | ||
325 | break; \ | ||
326 | } \ | ||
327 | } else { \ | ||
328 | __pu_err = -EFAULT; \ | ||
329 | } \ | ||
330 | __pu_err; \ | ||
331 | }) | ||
269 | 332 | ||
270 | #define __put_user(x, ptr) \ | 333 | #define __put_user(x, ptr) \ |
271 | ({ \ | 334 | ({ \ |
@@ -290,18 +353,6 @@ extern long __user_bad(void); | |||
290 | __gu_err; \ | 353 | __gu_err; \ |
291 | }) | 354 | }) |
292 | 355 | ||
293 | #ifndef CONFIG_MMU | ||
294 | |||
295 | #define put_user(x, ptr) __put_user((x), (ptr)) | ||
296 | |||
297 | #else /* CONFIG_MMU */ | ||
298 | |||
299 | #define put_user(x, ptr) \ | ||
300 | ({ \ | ||
301 | access_ok(VERIFY_WRITE, (ptr), sizeof(*(ptr))) \ | ||
302 | ? __put_user((x), (ptr)) : -EFAULT; \ | ||
303 | }) | ||
304 | #endif /* CONFIG_MMU */ | ||
305 | 356 | ||
306 | /* copy_to_from_user */ | 357 | /* copy_to_from_user */ |
307 | #define __copy_from_user(to, from, n) \ | 358 | #define __copy_from_user(to, from, n) \ |
diff --git a/arch/microblaze/kernel/cpu/cache.c b/arch/microblaze/kernel/cpu/cache.c index 21c3a92394de..109876e8d643 100644 --- a/arch/microblaze/kernel/cpu/cache.c +++ b/arch/microblaze/kernel/cpu/cache.c | |||
@@ -137,8 +137,9 @@ do { \ | |||
137 | do { \ | 137 | do { \ |
138 | int step = -line_length; \ | 138 | int step = -line_length; \ |
139 | int align = ~(line_length - 1); \ | 139 | int align = ~(line_length - 1); \ |
140 | int count; \ | ||
140 | end = ((end & align) == end) ? end - line_length : end & align; \ | 141 | end = ((end & align) == end) ? end - line_length : end & align; \ |
141 | int count = end - start; \ | 142 | count = end - start; \ |
142 | WARN_ON(count < 0); \ | 143 | WARN_ON(count < 0); \ |
143 | \ | 144 | \ |
144 | __asm__ __volatile__ (" 1: " #op " %0, %1; \ | 145 | __asm__ __volatile__ (" 1: " #op " %0, %1; \ |
diff --git a/arch/microblaze/kernel/entry-nommu.S b/arch/microblaze/kernel/entry-nommu.S index 391d6197fc3b..8cc18cd2cce6 100644 --- a/arch/microblaze/kernel/entry-nommu.S +++ b/arch/microblaze/kernel/entry-nommu.S | |||
@@ -476,6 +476,8 @@ ENTRY(ret_from_fork) | |||
476 | nop | 476 | nop |
477 | 477 | ||
478 | work_pending: | 478 | work_pending: |
479 | enable_irq | ||
480 | |||
479 | andi r11, r19, _TIF_NEED_RESCHED | 481 | andi r11, r19, _TIF_NEED_RESCHED |
480 | beqi r11, 1f | 482 | beqi r11, 1f |
481 | bralid r15, schedule | 483 | bralid r15, schedule |
diff --git a/arch/microblaze/kernel/microblaze_ksyms.c b/arch/microblaze/kernel/microblaze_ksyms.c index bc4dcb7d3861..ff85f7718035 100644 --- a/arch/microblaze/kernel/microblaze_ksyms.c +++ b/arch/microblaze/kernel/microblaze_ksyms.c | |||
@@ -52,3 +52,14 @@ EXPORT_SYMBOL_GPL(_ebss); | |||
52 | extern void _mcount(void); | 52 | extern void _mcount(void); |
53 | EXPORT_SYMBOL(_mcount); | 53 | EXPORT_SYMBOL(_mcount); |
54 | #endif | 54 | #endif |
55 | |||
56 | /* | ||
57 | * Assembly functions that may be used (directly or indirectly) by modules | ||
58 | */ | ||
59 | EXPORT_SYMBOL(__copy_tofrom_user); | ||
60 | EXPORT_SYMBOL(__strncpy_user); | ||
61 | |||
62 | #ifdef CONFIG_OPT_LIB_ASM | ||
63 | EXPORT_SYMBOL(memcpy); | ||
64 | EXPORT_SYMBOL(memmove); | ||
65 | #endif | ||
diff --git a/arch/microblaze/kernel/module.c b/arch/microblaze/kernel/module.c index cbecf110dc30..0e73f6606547 100644 --- a/arch/microblaze/kernel/module.c +++ b/arch/microblaze/kernel/module.c | |||
@@ -16,6 +16,7 @@ | |||
16 | #include <linux/string.h> | 16 | #include <linux/string.h> |
17 | 17 | ||
18 | #include <asm/pgtable.h> | 18 | #include <asm/pgtable.h> |
19 | #include <asm/cacheflush.h> | ||
19 | 20 | ||
20 | void *module_alloc(unsigned long size) | 21 | void *module_alloc(unsigned long size) |
21 | { | 22 | { |
@@ -151,6 +152,7 @@ int apply_relocate_add(Elf32_Shdr *sechdrs, const char *strtab, | |||
151 | int module_finalize(const Elf32_Ehdr *hdr, const Elf_Shdr *sechdrs, | 152 | int module_finalize(const Elf32_Ehdr *hdr, const Elf_Shdr *sechdrs, |
152 | struct module *module) | 153 | struct module *module) |
153 | { | 154 | { |
155 | flush_dcache(); | ||
154 | return 0; | 156 | return 0; |
155 | } | 157 | } |
156 | 158 | ||
diff --git a/arch/microblaze/mm/init.c b/arch/microblaze/mm/init.c index f42c2dde8b1c..cca3579d4268 100644 --- a/arch/microblaze/mm/init.c +++ b/arch/microblaze/mm/init.c | |||
@@ -47,6 +47,7 @@ unsigned long memory_start; | |||
47 | EXPORT_SYMBOL(memory_start); | 47 | EXPORT_SYMBOL(memory_start); |
48 | unsigned long memory_end; /* due to mm/nommu.c */ | 48 | unsigned long memory_end; /* due to mm/nommu.c */ |
49 | unsigned long memory_size; | 49 | unsigned long memory_size; |
50 | EXPORT_SYMBOL(memory_size); | ||
50 | 51 | ||
51 | /* | 52 | /* |
52 | * paging_init() sets up the page tables - in fact we've already done this. | 53 | * paging_init() sets up the page tables - in fact we've already done this. |
diff --git a/arch/microblaze/mm/pgtable.c b/arch/microblaze/mm/pgtable.c index 784557fb28cf..59bf2335a4ce 100644 --- a/arch/microblaze/mm/pgtable.c +++ b/arch/microblaze/mm/pgtable.c | |||
@@ -42,6 +42,7 @@ | |||
42 | 42 | ||
43 | unsigned long ioremap_base; | 43 | unsigned long ioremap_base; |
44 | unsigned long ioremap_bot; | 44 | unsigned long ioremap_bot; |
45 | EXPORT_SYMBOL(ioremap_bot); | ||
45 | 46 | ||
46 | /* The maximum lowmem defaults to 768Mb, but this can be configured to | 47 | /* The maximum lowmem defaults to 768Mb, but this can be configured to |
47 | * another value. | 48 | * another value. |
diff --git a/arch/microblaze/pci/pci-common.c b/arch/microblaze/pci/pci-common.c index 01c8c97c15b7..9cb782b8e036 100644 --- a/arch/microblaze/pci/pci-common.c +++ b/arch/microblaze/pci/pci-common.c | |||
@@ -1507,7 +1507,7 @@ void pcibios_finish_adding_to_bus(struct pci_bus *bus) | |||
1507 | pci_bus_add_devices(bus); | 1507 | pci_bus_add_devices(bus); |
1508 | 1508 | ||
1509 | /* Fixup EEH */ | 1509 | /* Fixup EEH */ |
1510 | eeh_add_device_tree_late(bus); | 1510 | /* eeh_add_device_tree_late(bus); */ |
1511 | } | 1511 | } |
1512 | EXPORT_SYMBOL_GPL(pcibios_finish_adding_to_bus); | 1512 | EXPORT_SYMBOL_GPL(pcibios_finish_adding_to_bus); |
1513 | 1513 | ||
diff --git a/arch/mips/include/asm/atomic.h b/arch/mips/include/asm/atomic.h index 519197ede089..59dc0c7ef733 100644 --- a/arch/mips/include/asm/atomic.h +++ b/arch/mips/include/asm/atomic.h | |||
@@ -29,7 +29,7 @@ | |||
29 | * | 29 | * |
30 | * Atomically reads the value of @v. | 30 | * Atomically reads the value of @v. |
31 | */ | 31 | */ |
32 | #define atomic_read(v) ((v)->counter) | 32 | #define atomic_read(v) (*(volatile int *)&(v)->counter) |
33 | 33 | ||
34 | /* | 34 | /* |
35 | * atomic_set - set atomic variable | 35 | * atomic_set - set atomic variable |
@@ -410,7 +410,7 @@ static __inline__ int atomic_add_unless(atomic_t *v, int a, int u) | |||
410 | * @v: pointer of type atomic64_t | 410 | * @v: pointer of type atomic64_t |
411 | * | 411 | * |
412 | */ | 412 | */ |
413 | #define atomic64_read(v) ((v)->counter) | 413 | #define atomic64_read(v) (*(volatile long *)&(v)->counter) |
414 | 414 | ||
415 | /* | 415 | /* |
416 | * atomic64_set - set atomic variable | 416 | * atomic64_set - set atomic variable |
diff --git a/arch/mips/include/asm/i8253.h b/arch/mips/include/asm/i8253.h index 032ca73f181b..48bb82372994 100644 --- a/arch/mips/include/asm/i8253.h +++ b/arch/mips/include/asm/i8253.h | |||
@@ -12,7 +12,7 @@ | |||
12 | #define PIT_CH0 0x40 | 12 | #define PIT_CH0 0x40 |
13 | #define PIT_CH2 0x42 | 13 | #define PIT_CH2 0x42 |
14 | 14 | ||
15 | extern spinlock_t i8253_lock; | 15 | extern raw_spinlock_t i8253_lock; |
16 | 16 | ||
17 | extern void setup_pit_timer(void); | 17 | extern void setup_pit_timer(void); |
18 | 18 | ||
diff --git a/arch/mips/include/asm/mipsregs.h b/arch/mips/include/asm/mipsregs.h index 49382d5e891a..c6e3c93ce7c7 100644 --- a/arch/mips/include/asm/mipsregs.h +++ b/arch/mips/include/asm/mipsregs.h | |||
@@ -135,6 +135,12 @@ | |||
135 | #define FPU_CSR_COND7 0x80000000 /* $fcc7 */ | 135 | #define FPU_CSR_COND7 0x80000000 /* $fcc7 */ |
136 | 136 | ||
137 | /* | 137 | /* |
138 | * Bits 18 - 20 of the FPU Status Register will be read as 0, | ||
139 | * and should be written as zero. | ||
140 | */ | ||
141 | #define FPU_CSR_RSVD 0x001c0000 | ||
142 | |||
143 | /* | ||
138 | * X the exception cause indicator | 144 | * X the exception cause indicator |
139 | * E the exception enable | 145 | * E the exception enable |
140 | * S the sticky/flag bit | 146 | * S the sticky/flag bit |
@@ -161,7 +167,8 @@ | |||
161 | #define FPU_CSR_UDF_S 0x00000008 | 167 | #define FPU_CSR_UDF_S 0x00000008 |
162 | #define FPU_CSR_INE_S 0x00000004 | 168 | #define FPU_CSR_INE_S 0x00000004 |
163 | 169 | ||
164 | /* rounding mode */ | 170 | /* Bits 0 and 1 of FPU Status Register specify the rounding mode */ |
171 | #define FPU_CSR_RM 0x00000003 | ||
165 | #define FPU_CSR_RN 0x0 /* nearest */ | 172 | #define FPU_CSR_RN 0x0 /* nearest */ |
166 | #define FPU_CSR_RZ 0x1 /* towards zero */ | 173 | #define FPU_CSR_RZ 0x1 /* towards zero */ |
167 | #define FPU_CSR_RU 0x2 /* towards +Infinity */ | 174 | #define FPU_CSR_RU 0x2 /* towards +Infinity */ |
diff --git a/arch/mips/kernel/i8253.c b/arch/mips/kernel/i8253.c index ed5c441615e4..94794062a177 100644 --- a/arch/mips/kernel/i8253.c +++ b/arch/mips/kernel/i8253.c | |||
@@ -15,7 +15,7 @@ | |||
15 | #include <asm/io.h> | 15 | #include <asm/io.h> |
16 | #include <asm/time.h> | 16 | #include <asm/time.h> |
17 | 17 | ||
18 | DEFINE_SPINLOCK(i8253_lock); | 18 | DEFINE_RAW_SPINLOCK(i8253_lock); |
19 | EXPORT_SYMBOL(i8253_lock); | 19 | EXPORT_SYMBOL(i8253_lock); |
20 | 20 | ||
21 | /* | 21 | /* |
@@ -26,7 +26,7 @@ EXPORT_SYMBOL(i8253_lock); | |||
26 | static void init_pit_timer(enum clock_event_mode mode, | 26 | static void init_pit_timer(enum clock_event_mode mode, |
27 | struct clock_event_device *evt) | 27 | struct clock_event_device *evt) |
28 | { | 28 | { |
29 | spin_lock(&i8253_lock); | 29 | raw_spin_lock(&i8253_lock); |
30 | 30 | ||
31 | switch(mode) { | 31 | switch(mode) { |
32 | case CLOCK_EVT_MODE_PERIODIC: | 32 | case CLOCK_EVT_MODE_PERIODIC: |
@@ -55,7 +55,7 @@ static void init_pit_timer(enum clock_event_mode mode, | |||
55 | /* Nothing to do here */ | 55 | /* Nothing to do here */ |
56 | break; | 56 | break; |
57 | } | 57 | } |
58 | spin_unlock(&i8253_lock); | 58 | raw_spin_unlock(&i8253_lock); |
59 | } | 59 | } |
60 | 60 | ||
61 | /* | 61 | /* |
@@ -65,10 +65,10 @@ static void init_pit_timer(enum clock_event_mode mode, | |||
65 | */ | 65 | */ |
66 | static int pit_next_event(unsigned long delta, struct clock_event_device *evt) | 66 | static int pit_next_event(unsigned long delta, struct clock_event_device *evt) |
67 | { | 67 | { |
68 | spin_lock(&i8253_lock); | 68 | raw_spin_lock(&i8253_lock); |
69 | outb_p(delta & 0xff , PIT_CH0); /* LSB */ | 69 | outb_p(delta & 0xff , PIT_CH0); /* LSB */ |
70 | outb(delta >> 8 , PIT_CH0); /* MSB */ | 70 | outb(delta >> 8 , PIT_CH0); /* MSB */ |
71 | spin_unlock(&i8253_lock); | 71 | raw_spin_unlock(&i8253_lock); |
72 | 72 | ||
73 | return 0; | 73 | return 0; |
74 | } | 74 | } |
@@ -137,7 +137,7 @@ static cycle_t pit_read(struct clocksource *cs) | |||
137 | static int old_count; | 137 | static int old_count; |
138 | static u32 old_jifs; | 138 | static u32 old_jifs; |
139 | 139 | ||
140 | spin_lock_irqsave(&i8253_lock, flags); | 140 | raw_spin_lock_irqsave(&i8253_lock, flags); |
141 | /* | 141 | /* |
142 | * Although our caller may have the read side of xtime_lock, | 142 | * Although our caller may have the read side of xtime_lock, |
143 | * this is now a seqlock, and we are cheating in this routine | 143 | * this is now a seqlock, and we are cheating in this routine |
@@ -183,7 +183,7 @@ static cycle_t pit_read(struct clocksource *cs) | |||
183 | old_count = count; | 183 | old_count = count; |
184 | old_jifs = jifs; | 184 | old_jifs = jifs; |
185 | 185 | ||
186 | spin_unlock_irqrestore(&i8253_lock, flags); | 186 | raw_spin_unlock_irqrestore(&i8253_lock, flags); |
187 | 187 | ||
188 | count = (LATCH - 1) - count; | 188 | count = (LATCH - 1) - count; |
189 | 189 | ||
diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S index 44337ba03717..a5297e2a353a 100644 --- a/arch/mips/kernel/scall64-n32.S +++ b/arch/mips/kernel/scall64-n32.S | |||
@@ -385,7 +385,7 @@ EXPORT(sysn32_call_table) | |||
385 | PTR sys_fchmodat | 385 | PTR sys_fchmodat |
386 | PTR sys_faccessat | 386 | PTR sys_faccessat |
387 | PTR compat_sys_pselect6 | 387 | PTR compat_sys_pselect6 |
388 | PTR sys_ppoll /* 6265 */ | 388 | PTR compat_sys_ppoll /* 6265 */ |
389 | PTR sys_unshare | 389 | PTR sys_unshare |
390 | PTR sys_splice | 390 | PTR sys_splice |
391 | PTR sys_sync_file_range | 391 | PTR sys_sync_file_range |
diff --git a/arch/mips/math-emu/cp1emu.c b/arch/mips/math-emu/cp1emu.c index 8f2f8e9d8b21..f2338d1c0b48 100644 --- a/arch/mips/math-emu/cp1emu.c +++ b/arch/mips/math-emu/cp1emu.c | |||
@@ -78,6 +78,9 @@ DEFINE_PER_CPU(struct mips_fpu_emulator_stats, fpuemustats); | |||
78 | #define FPCREG_RID 0 /* $0 = revision id */ | 78 | #define FPCREG_RID 0 /* $0 = revision id */ |
79 | #define FPCREG_CSR 31 /* $31 = csr */ | 79 | #define FPCREG_CSR 31 /* $31 = csr */ |
80 | 80 | ||
81 | /* Determine rounding mode from the RM bits of the FCSR */ | ||
82 | #define modeindex(v) ((v) & FPU_CSR_RM) | ||
83 | |||
81 | /* Convert Mips rounding mode (0..3) to IEEE library modes. */ | 84 | /* Convert Mips rounding mode (0..3) to IEEE library modes. */ |
82 | static const unsigned char ieee_rm[4] = { | 85 | static const unsigned char ieee_rm[4] = { |
83 | [FPU_CSR_RN] = IEEE754_RN, | 86 | [FPU_CSR_RN] = IEEE754_RN, |
@@ -384,10 +387,14 @@ static int cop1Emulate(struct pt_regs *xcp, struct mips_fpu_struct *ctx) | |||
384 | (void *) (xcp->cp0_epc), | 387 | (void *) (xcp->cp0_epc), |
385 | MIPSInst_RT(ir), value); | 388 | MIPSInst_RT(ir), value); |
386 | #endif | 389 | #endif |
387 | value &= (FPU_CSR_FLUSH | FPU_CSR_ALL_E | FPU_CSR_ALL_S | 0x03); | 390 | |
388 | ctx->fcr31 &= ~(FPU_CSR_FLUSH | FPU_CSR_ALL_E | FPU_CSR_ALL_S | 0x03); | 391 | /* |
389 | /* convert to ieee library modes */ | 392 | * Don't write reserved bits, |
390 | ctx->fcr31 |= (value & ~0x3) | ieee_rm[value & 0x3]; | 393 | * and convert to ieee library modes |
394 | */ | ||
395 | ctx->fcr31 = (value & | ||
396 | ~(FPU_CSR_RSVD | FPU_CSR_RM)) | | ||
397 | ieee_rm[modeindex(value)]; | ||
391 | } | 398 | } |
392 | if ((ctx->fcr31 >> 5) & ctx->fcr31 & FPU_CSR_ALL_E) { | 399 | if ((ctx->fcr31 >> 5) & ctx->fcr31 & FPU_CSR_ALL_E) { |
393 | return SIGFPE; | 400 | return SIGFPE; |
diff --git a/arch/mips/oprofile/op_model_loongson2.c b/arch/mips/oprofile/op_model_loongson2.c index 29e2326b6257..fa3bf661ae29 100644 --- a/arch/mips/oprofile/op_model_loongson2.c +++ b/arch/mips/oprofile/op_model_loongson2.c | |||
@@ -122,7 +122,7 @@ static irqreturn_t loongson2_perfcount_handler(int irq, void *dev_id) | |||
122 | */ | 122 | */ |
123 | 123 | ||
124 | /* Check whether the irq belongs to me */ | 124 | /* Check whether the irq belongs to me */ |
125 | enabled = read_c0_perfcnt() & LOONGSON2_PERFCNT_INT_EN; | 125 | enabled = read_c0_perfctrl() & LOONGSON2_PERFCNT_INT_EN; |
126 | if (!enabled) | 126 | if (!enabled) |
127 | return IRQ_NONE; | 127 | return IRQ_NONE; |
128 | enabled = reg.cnt1_enabled | reg.cnt2_enabled; | 128 | enabled = reg.cnt1_enabled | reg.cnt2_enabled; |
diff --git a/arch/mn10300/include/asm/atomic.h b/arch/mn10300/include/asm/atomic.h index 5bf5be9566de..e41222d6c2fd 100644 --- a/arch/mn10300/include/asm/atomic.h +++ b/arch/mn10300/include/asm/atomic.h | |||
@@ -31,7 +31,7 @@ | |||
31 | * Atomically reads the value of @v. Note that the guaranteed | 31 | * Atomically reads the value of @v. Note that the guaranteed |
32 | * useful range of an atomic_t is only 24 bits. | 32 | * useful range of an atomic_t is only 24 bits. |
33 | */ | 33 | */ |
34 | #define atomic_read(v) ((v)->counter) | 34 | #define atomic_read(v) (*(volatile int *)&(v)->counter) |
35 | 35 | ||
36 | /** | 36 | /** |
37 | * atomic_set - set atomic variable | 37 | * atomic_set - set atomic variable |
diff --git a/arch/parisc/include/asm/atomic.h b/arch/parisc/include/asm/atomic.h index 716634d1f546..f81955934aeb 100644 --- a/arch/parisc/include/asm/atomic.h +++ b/arch/parisc/include/asm/atomic.h | |||
@@ -189,7 +189,7 @@ static __inline__ void atomic_set(atomic_t *v, int i) | |||
189 | 189 | ||
190 | static __inline__ int atomic_read(const atomic_t *v) | 190 | static __inline__ int atomic_read(const atomic_t *v) |
191 | { | 191 | { |
192 | return v->counter; | 192 | return (*(volatile int *)&(v)->counter); |
193 | } | 193 | } |
194 | 194 | ||
195 | /* exported interface */ | 195 | /* exported interface */ |
@@ -286,7 +286,7 @@ atomic64_set(atomic64_t *v, s64 i) | |||
286 | static __inline__ s64 | 286 | static __inline__ s64 |
287 | atomic64_read(const atomic64_t *v) | 287 | atomic64_read(const atomic64_t *v) |
288 | { | 288 | { |
289 | return v->counter; | 289 | return (*(volatile long *)&(v)->counter); |
290 | } | 290 | } |
291 | 291 | ||
292 | #define atomic64_add(i,v) ((void)(__atomic64_add_return( ((s64)(i)),(v)))) | 292 | #define atomic64_add(i,v) ((void)(__atomic64_add_return( ((s64)(i)),(v)))) |
diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h index 9f4c9d4f5803..bd100fcf40d0 100644 --- a/arch/powerpc/include/asm/hw_irq.h +++ b/arch/powerpc/include/asm/hw_irq.h | |||
@@ -130,43 +130,5 @@ static inline int irqs_disabled_flags(unsigned long flags) | |||
130 | */ | 130 | */ |
131 | struct irq_chip; | 131 | struct irq_chip; |
132 | 132 | ||
133 | #ifdef CONFIG_PERF_EVENTS | ||
134 | |||
135 | #ifdef CONFIG_PPC64 | ||
136 | static inline unsigned long test_perf_event_pending(void) | ||
137 | { | ||
138 | unsigned long x; | ||
139 | |||
140 | asm volatile("lbz %0,%1(13)" | ||
141 | : "=r" (x) | ||
142 | : "i" (offsetof(struct paca_struct, perf_event_pending))); | ||
143 | return x; | ||
144 | } | ||
145 | |||
146 | static inline void set_perf_event_pending(void) | ||
147 | { | ||
148 | asm volatile("stb %0,%1(13)" : : | ||
149 | "r" (1), | ||
150 | "i" (offsetof(struct paca_struct, perf_event_pending))); | ||
151 | } | ||
152 | |||
153 | static inline void clear_perf_event_pending(void) | ||
154 | { | ||
155 | asm volatile("stb %0,%1(13)" : : | ||
156 | "r" (0), | ||
157 | "i" (offsetof(struct paca_struct, perf_event_pending))); | ||
158 | } | ||
159 | #endif /* CONFIG_PPC64 */ | ||
160 | |||
161 | #else /* CONFIG_PERF_EVENTS */ | ||
162 | |||
163 | static inline unsigned long test_perf_event_pending(void) | ||
164 | { | ||
165 | return 0; | ||
166 | } | ||
167 | |||
168 | static inline void clear_perf_event_pending(void) {} | ||
169 | #endif /* CONFIG_PERF_EVENTS */ | ||
170 | |||
171 | #endif /* __KERNEL__ */ | 133 | #endif /* __KERNEL__ */ |
172 | #endif /* _ASM_POWERPC_HW_IRQ_H */ | 134 | #endif /* _ASM_POWERPC_HW_IRQ_H */ |
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 957ceb7059c5..c09138d150d4 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c | |||
@@ -133,7 +133,6 @@ int main(void) | |||
133 | DEFINE(PACAKMSR, offsetof(struct paca_struct, kernel_msr)); | 133 | DEFINE(PACAKMSR, offsetof(struct paca_struct, kernel_msr)); |
134 | DEFINE(PACASOFTIRQEN, offsetof(struct paca_struct, soft_enabled)); | 134 | DEFINE(PACASOFTIRQEN, offsetof(struct paca_struct, soft_enabled)); |
135 | DEFINE(PACAHARDIRQEN, offsetof(struct paca_struct, hard_enabled)); | 135 | DEFINE(PACAHARDIRQEN, offsetof(struct paca_struct, hard_enabled)); |
136 | DEFINE(PACAPERFPEND, offsetof(struct paca_struct, perf_event_pending)); | ||
137 | DEFINE(PACACONTEXTID, offsetof(struct paca_struct, context.id)); | 136 | DEFINE(PACACONTEXTID, offsetof(struct paca_struct, context.id)); |
138 | #ifdef CONFIG_PPC_MM_SLICES | 137 | #ifdef CONFIG_PPC_MM_SLICES |
139 | DEFINE(PACALOWSLICESPSIZE, offsetof(struct paca_struct, | 138 | DEFINE(PACALOWSLICESPSIZE, offsetof(struct paca_struct, |
diff --git a/arch/powerpc/kernel/dma-swiotlb.c b/arch/powerpc/kernel/dma-swiotlb.c index 59c928564a03..4ff4da2c238b 100644 --- a/arch/powerpc/kernel/dma-swiotlb.c +++ b/arch/powerpc/kernel/dma-swiotlb.c | |||
@@ -1,7 +1,8 @@ | |||
1 | /* | 1 | /* |
2 | * Contains routines needed to support swiotlb for ppc. | 2 | * Contains routines needed to support swiotlb for ppc. |
3 | * | 3 | * |
4 | * Copyright (C) 2009 Becky Bruce, Freescale Semiconductor | 4 | * Copyright (C) 2009-2010 Freescale Semiconductor, Inc. |
5 | * Author: Becky Bruce | ||
5 | * | 6 | * |
6 | * This program is free software; you can redistribute it and/or modify it | 7 | * This program is free software; you can redistribute it and/or modify it |
7 | * under the terms of the GNU General Public License as published by the | 8 | * under the terms of the GNU General Public License as published by the |
@@ -70,7 +71,7 @@ static int ppc_swiotlb_bus_notify(struct notifier_block *nb, | |||
70 | sd->max_direct_dma_addr = 0; | 71 | sd->max_direct_dma_addr = 0; |
71 | 72 | ||
72 | /* May need to bounce if the device can't address all of DRAM */ | 73 | /* May need to bounce if the device can't address all of DRAM */ |
73 | if (dma_get_mask(dev) < lmb_end_of_DRAM()) | 74 | if ((dma_get_mask(dev) + 1) < lmb_end_of_DRAM()) |
74 | set_dma_ops(dev, &swiotlb_dma_ops); | 75 | set_dma_ops(dev, &swiotlb_dma_ops); |
75 | 76 | ||
76 | return NOTIFY_DONE; | 77 | return NOTIFY_DONE; |
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 07109d843787..42e9d908914a 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S | |||
@@ -556,15 +556,6 @@ ALT_FW_FTR_SECTION_END_IFCLR(FW_FEATURE_ISERIES) | |||
556 | 2: | 556 | 2: |
557 | TRACE_AND_RESTORE_IRQ(r5); | 557 | TRACE_AND_RESTORE_IRQ(r5); |
558 | 558 | ||
559 | #ifdef CONFIG_PERF_EVENTS | ||
560 | /* check paca->perf_event_pending if we're enabling ints */ | ||
561 | lbz r3,PACAPERFPEND(r13) | ||
562 | and. r3,r3,r5 | ||
563 | beq 27f | ||
564 | bl .perf_event_do_pending | ||
565 | 27: | ||
566 | #endif /* CONFIG_PERF_EVENTS */ | ||
567 | |||
568 | /* extract EE bit and use it to restore paca->hard_enabled */ | 559 | /* extract EE bit and use it to restore paca->hard_enabled */ |
569 | ld r3,_MSR(r1) | 560 | ld r3,_MSR(r1) |
570 | rldicl r4,r3,49,63 /* r0 = (r3 >> 15) & 1 */ | 561 | rldicl r4,r3,49,63 /* r0 = (r3 >> 15) & 1 */ |
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 64f6f2031c22..066bd31551d5 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c | |||
@@ -53,7 +53,6 @@ | |||
53 | #include <linux/bootmem.h> | 53 | #include <linux/bootmem.h> |
54 | #include <linux/pci.h> | 54 | #include <linux/pci.h> |
55 | #include <linux/debugfs.h> | 55 | #include <linux/debugfs.h> |
56 | #include <linux/perf_event.h> | ||
57 | 56 | ||
58 | #include <asm/uaccess.h> | 57 | #include <asm/uaccess.h> |
59 | #include <asm/system.h> | 58 | #include <asm/system.h> |
@@ -145,11 +144,6 @@ notrace void raw_local_irq_restore(unsigned long en) | |||
145 | } | 144 | } |
146 | #endif /* CONFIG_PPC_STD_MMU_64 */ | 145 | #endif /* CONFIG_PPC_STD_MMU_64 */ |
147 | 146 | ||
148 | if (test_perf_event_pending()) { | ||
149 | clear_perf_event_pending(); | ||
150 | perf_event_do_pending(); | ||
151 | } | ||
152 | |||
153 | /* | 147 | /* |
154 | * if (get_paca()->hard_enabled) return; | 148 | * if (get_paca()->hard_enabled) return; |
155 | * But again we need to take care that gcc gets hard_enabled directly | 149 | * But again we need to take care that gcc gets hard_enabled directly |
diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c index 08460a2e9f41..43b83c35cf54 100644 --- a/arch/powerpc/kernel/perf_event.c +++ b/arch/powerpc/kernel/perf_event.c | |||
@@ -35,6 +35,9 @@ struct cpu_hw_events { | |||
35 | u64 alternatives[MAX_HWEVENTS][MAX_EVENT_ALTERNATIVES]; | 35 | u64 alternatives[MAX_HWEVENTS][MAX_EVENT_ALTERNATIVES]; |
36 | unsigned long amasks[MAX_HWEVENTS][MAX_EVENT_ALTERNATIVES]; | 36 | unsigned long amasks[MAX_HWEVENTS][MAX_EVENT_ALTERNATIVES]; |
37 | unsigned long avalues[MAX_HWEVENTS][MAX_EVENT_ALTERNATIVES]; | 37 | unsigned long avalues[MAX_HWEVENTS][MAX_EVENT_ALTERNATIVES]; |
38 | |||
39 | unsigned int group_flag; | ||
40 | int n_txn_start; | ||
38 | }; | 41 | }; |
39 | DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events); | 42 | DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events); |
40 | 43 | ||
@@ -718,66 +721,6 @@ static int collect_events(struct perf_event *group, int max_count, | |||
718 | return n; | 721 | return n; |
719 | } | 722 | } |
720 | 723 | ||
721 | static void event_sched_in(struct perf_event *event) | ||
722 | { | ||
723 | event->state = PERF_EVENT_STATE_ACTIVE; | ||
724 | event->oncpu = smp_processor_id(); | ||
725 | event->tstamp_running += event->ctx->time - event->tstamp_stopped; | ||
726 | if (is_software_event(event)) | ||
727 | event->pmu->enable(event); | ||
728 | } | ||
729 | |||
730 | /* | ||
731 | * Called to enable a whole group of events. | ||
732 | * Returns 1 if the group was enabled, or -EAGAIN if it could not be. | ||
733 | * Assumes the caller has disabled interrupts and has | ||
734 | * frozen the PMU with hw_perf_save_disable. | ||
735 | */ | ||
736 | int hw_perf_group_sched_in(struct perf_event *group_leader, | ||
737 | struct perf_cpu_context *cpuctx, | ||
738 | struct perf_event_context *ctx) | ||
739 | { | ||
740 | struct cpu_hw_events *cpuhw; | ||
741 | long i, n, n0; | ||
742 | struct perf_event *sub; | ||
743 | |||
744 | if (!ppmu) | ||
745 | return 0; | ||
746 | cpuhw = &__get_cpu_var(cpu_hw_events); | ||
747 | n0 = cpuhw->n_events; | ||
748 | n = collect_events(group_leader, ppmu->n_counter - n0, | ||
749 | &cpuhw->event[n0], &cpuhw->events[n0], | ||
750 | &cpuhw->flags[n0]); | ||
751 | if (n < 0) | ||
752 | return -EAGAIN; | ||
753 | if (check_excludes(cpuhw->event, cpuhw->flags, n0, n)) | ||
754 | return -EAGAIN; | ||
755 | i = power_check_constraints(cpuhw, cpuhw->events, cpuhw->flags, n + n0); | ||
756 | if (i < 0) | ||
757 | return -EAGAIN; | ||
758 | cpuhw->n_events = n0 + n; | ||
759 | cpuhw->n_added += n; | ||
760 | |||
761 | /* | ||
762 | * OK, this group can go on; update event states etc., | ||
763 | * and enable any software events | ||
764 | */ | ||
765 | for (i = n0; i < n0 + n; ++i) | ||
766 | cpuhw->event[i]->hw.config = cpuhw->events[i]; | ||
767 | cpuctx->active_oncpu += n; | ||
768 | n = 1; | ||
769 | event_sched_in(group_leader); | ||
770 | list_for_each_entry(sub, &group_leader->sibling_list, group_entry) { | ||
771 | if (sub->state != PERF_EVENT_STATE_OFF) { | ||
772 | event_sched_in(sub); | ||
773 | ++n; | ||
774 | } | ||
775 | } | ||
776 | ctx->nr_active += n; | ||
777 | |||
778 | return 1; | ||
779 | } | ||
780 | |||
781 | /* | 724 | /* |
782 | * Add a event to the PMU. | 725 | * Add a event to the PMU. |
783 | * If all events are not already frozen, then we disable and | 726 | * If all events are not already frozen, then we disable and |
@@ -805,12 +748,22 @@ static int power_pmu_enable(struct perf_event *event) | |||
805 | cpuhw->event[n0] = event; | 748 | cpuhw->event[n0] = event; |
806 | cpuhw->events[n0] = event->hw.config; | 749 | cpuhw->events[n0] = event->hw.config; |
807 | cpuhw->flags[n0] = event->hw.event_base; | 750 | cpuhw->flags[n0] = event->hw.event_base; |
751 | |||
752 | /* | ||
753 | * If group events scheduling transaction was started, | ||
754 | * skip the schedulability test here, it will be peformed | ||
755 | * at commit time(->commit_txn) as a whole | ||
756 | */ | ||
757 | if (cpuhw->group_flag & PERF_EVENT_TXN_STARTED) | ||
758 | goto nocheck; | ||
759 | |||
808 | if (check_excludes(cpuhw->event, cpuhw->flags, n0, 1)) | 760 | if (check_excludes(cpuhw->event, cpuhw->flags, n0, 1)) |
809 | goto out; | 761 | goto out; |
810 | if (power_check_constraints(cpuhw, cpuhw->events, cpuhw->flags, n0 + 1)) | 762 | if (power_check_constraints(cpuhw, cpuhw->events, cpuhw->flags, n0 + 1)) |
811 | goto out; | 763 | goto out; |
812 | |||
813 | event->hw.config = cpuhw->events[n0]; | 764 | event->hw.config = cpuhw->events[n0]; |
765 | |||
766 | nocheck: | ||
814 | ++cpuhw->n_events; | 767 | ++cpuhw->n_events; |
815 | ++cpuhw->n_added; | 768 | ++cpuhw->n_added; |
816 | 769 | ||
@@ -896,11 +849,65 @@ static void power_pmu_unthrottle(struct perf_event *event) | |||
896 | local_irq_restore(flags); | 849 | local_irq_restore(flags); |
897 | } | 850 | } |
898 | 851 | ||
852 | /* | ||
853 | * Start group events scheduling transaction | ||
854 | * Set the flag to make pmu::enable() not perform the | ||
855 | * schedulability test, it will be performed at commit time | ||
856 | */ | ||
857 | void power_pmu_start_txn(const struct pmu *pmu) | ||
858 | { | ||
859 | struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); | ||
860 | |||
861 | cpuhw->group_flag |= PERF_EVENT_TXN_STARTED; | ||
862 | cpuhw->n_txn_start = cpuhw->n_events; | ||
863 | } | ||
864 | |||
865 | /* | ||
866 | * Stop group events scheduling transaction | ||
867 | * Clear the flag and pmu::enable() will perform the | ||
868 | * schedulability test. | ||
869 | */ | ||
870 | void power_pmu_cancel_txn(const struct pmu *pmu) | ||
871 | { | ||
872 | struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); | ||
873 | |||
874 | cpuhw->group_flag &= ~PERF_EVENT_TXN_STARTED; | ||
875 | } | ||
876 | |||
877 | /* | ||
878 | * Commit group events scheduling transaction | ||
879 | * Perform the group schedulability test as a whole | ||
880 | * Return 0 if success | ||
881 | */ | ||
882 | int power_pmu_commit_txn(const struct pmu *pmu) | ||
883 | { | ||
884 | struct cpu_hw_events *cpuhw; | ||
885 | long i, n; | ||
886 | |||
887 | if (!ppmu) | ||
888 | return -EAGAIN; | ||
889 | cpuhw = &__get_cpu_var(cpu_hw_events); | ||
890 | n = cpuhw->n_events; | ||
891 | if (check_excludes(cpuhw->event, cpuhw->flags, 0, n)) | ||
892 | return -EAGAIN; | ||
893 | i = power_check_constraints(cpuhw, cpuhw->events, cpuhw->flags, n); | ||
894 | if (i < 0) | ||
895 | return -EAGAIN; | ||
896 | |||
897 | for (i = cpuhw->n_txn_start; i < n; ++i) | ||
898 | cpuhw->event[i]->hw.config = cpuhw->events[i]; | ||
899 | |||
900 | return 0; | ||
901 | } | ||
902 | |||
899 | struct pmu power_pmu = { | 903 | struct pmu power_pmu = { |
900 | .enable = power_pmu_enable, | 904 | .enable = power_pmu_enable, |
901 | .disable = power_pmu_disable, | 905 | .disable = power_pmu_disable, |
902 | .read = power_pmu_read, | 906 | .read = power_pmu_read, |
903 | .unthrottle = power_pmu_unthrottle, | 907 | .unthrottle = power_pmu_unthrottle, |
908 | .start_txn = power_pmu_start_txn, | ||
909 | .cancel_txn = power_pmu_cancel_txn, | ||
910 | .commit_txn = power_pmu_commit_txn, | ||
904 | }; | 911 | }; |
905 | 912 | ||
906 | /* | 913 | /* |
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 1b16b9a3e49a..0441bbdadbd1 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c | |||
@@ -532,25 +532,60 @@ void __init iSeries_time_init_early(void) | |||
532 | } | 532 | } |
533 | #endif /* CONFIG_PPC_ISERIES */ | 533 | #endif /* CONFIG_PPC_ISERIES */ |
534 | 534 | ||
535 | #if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_PPC32) | 535 | #ifdef CONFIG_PERF_EVENTS |
536 | DEFINE_PER_CPU(u8, perf_event_pending); | ||
537 | 536 | ||
538 | void set_perf_event_pending(void) | 537 | /* |
538 | * 64-bit uses a byte in the PACA, 32-bit uses a per-cpu variable... | ||
539 | */ | ||
540 | #ifdef CONFIG_PPC64 | ||
541 | static inline unsigned long test_perf_event_pending(void) | ||
539 | { | 542 | { |
540 | get_cpu_var(perf_event_pending) = 1; | 543 | unsigned long x; |
541 | set_dec(1); | 544 | |
542 | put_cpu_var(perf_event_pending); | 545 | asm volatile("lbz %0,%1(13)" |
546 | : "=r" (x) | ||
547 | : "i" (offsetof(struct paca_struct, perf_event_pending))); | ||
548 | return x; | ||
543 | } | 549 | } |
544 | 550 | ||
551 | static inline void set_perf_event_pending_flag(void) | ||
552 | { | ||
553 | asm volatile("stb %0,%1(13)" : : | ||
554 | "r" (1), | ||
555 | "i" (offsetof(struct paca_struct, perf_event_pending))); | ||
556 | } | ||
557 | |||
558 | static inline void clear_perf_event_pending(void) | ||
559 | { | ||
560 | asm volatile("stb %0,%1(13)" : : | ||
561 | "r" (0), | ||
562 | "i" (offsetof(struct paca_struct, perf_event_pending))); | ||
563 | } | ||
564 | |||
565 | #else /* 32-bit */ | ||
566 | |||
567 | DEFINE_PER_CPU(u8, perf_event_pending); | ||
568 | |||
569 | #define set_perf_event_pending_flag() __get_cpu_var(perf_event_pending) = 1 | ||
545 | #define test_perf_event_pending() __get_cpu_var(perf_event_pending) | 570 | #define test_perf_event_pending() __get_cpu_var(perf_event_pending) |
546 | #define clear_perf_event_pending() __get_cpu_var(perf_event_pending) = 0 | 571 | #define clear_perf_event_pending() __get_cpu_var(perf_event_pending) = 0 |
547 | 572 | ||
548 | #else /* CONFIG_PERF_EVENTS && CONFIG_PPC32 */ | 573 | #endif /* 32 vs 64 bit */ |
574 | |||
575 | void set_perf_event_pending(void) | ||
576 | { | ||
577 | preempt_disable(); | ||
578 | set_perf_event_pending_flag(); | ||
579 | set_dec(1); | ||
580 | preempt_enable(); | ||
581 | } | ||
582 | |||
583 | #else /* CONFIG_PERF_EVENTS */ | ||
549 | 584 | ||
550 | #define test_perf_event_pending() 0 | 585 | #define test_perf_event_pending() 0 |
551 | #define clear_perf_event_pending() | 586 | #define clear_perf_event_pending() |
552 | 587 | ||
553 | #endif /* CONFIG_PERF_EVENTS && CONFIG_PPC32 */ | 588 | #endif /* CONFIG_PERF_EVENTS */ |
554 | 589 | ||
555 | /* | 590 | /* |
556 | * For iSeries shared processors, we have to let the hypervisor | 591 | * For iSeries shared processors, we have to let the hypervisor |
@@ -582,10 +617,6 @@ void timer_interrupt(struct pt_regs * regs) | |||
582 | set_dec(DECREMENTER_MAX); | 617 | set_dec(DECREMENTER_MAX); |
583 | 618 | ||
584 | #ifdef CONFIG_PPC32 | 619 | #ifdef CONFIG_PPC32 |
585 | if (test_perf_event_pending()) { | ||
586 | clear_perf_event_pending(); | ||
587 | perf_event_do_pending(); | ||
588 | } | ||
589 | if (atomic_read(&ppc_n_lost_interrupts) != 0) | 620 | if (atomic_read(&ppc_n_lost_interrupts) != 0) |
590 | do_IRQ(regs); | 621 | do_IRQ(regs); |
591 | #endif | 622 | #endif |
@@ -604,6 +635,11 @@ void timer_interrupt(struct pt_regs * regs) | |||
604 | 635 | ||
605 | calculate_steal_time(); | 636 | calculate_steal_time(); |
606 | 637 | ||
638 | if (test_perf_event_pending()) { | ||
639 | clear_perf_event_pending(); | ||
640 | perf_event_do_pending(); | ||
641 | } | ||
642 | |||
607 | #ifdef CONFIG_PPC_ISERIES | 643 | #ifdef CONFIG_PPC_ISERIES |
608 | if (firmware_has_feature(FW_FEATURE_ISERIES)) | 644 | if (firmware_has_feature(FW_FEATURE_ISERIES)) |
609 | get_lppaca()->int_dword.fields.decr_int = 0; | 645 | get_lppaca()->int_dword.fields.decr_int = 0; |
diff --git a/arch/powerpc/kvm/44x_tlb.c b/arch/powerpc/kvm/44x_tlb.c index 2570fcc7665d..812312542e50 100644 --- a/arch/powerpc/kvm/44x_tlb.c +++ b/arch/powerpc/kvm/44x_tlb.c | |||
@@ -440,7 +440,7 @@ int kvmppc_44x_emul_tlbwe(struct kvm_vcpu *vcpu, u8 ra, u8 rs, u8 ws) | |||
440 | unsigned int gtlb_index; | 440 | unsigned int gtlb_index; |
441 | 441 | ||
442 | gtlb_index = kvmppc_get_gpr(vcpu, ra); | 442 | gtlb_index = kvmppc_get_gpr(vcpu, ra); |
443 | if (gtlb_index > KVM44x_GUEST_TLB_SIZE) { | 443 | if (gtlb_index >= KVM44x_GUEST_TLB_SIZE) { |
444 | printk("%s: index %d\n", __func__, gtlb_index); | 444 | printk("%s: index %d\n", __func__, gtlb_index); |
445 | kvmppc_dump_vcpu(vcpu); | 445 | kvmppc_dump_vcpu(vcpu); |
446 | return EMULATE_FAIL; | 446 | return EMULATE_FAIL; |
diff --git a/arch/s390/kernel/head31.S b/arch/s390/kernel/head31.S index 1bbcc499d455..b8f8dc126102 100644 --- a/arch/s390/kernel/head31.S +++ b/arch/s390/kernel/head31.S | |||
@@ -82,7 +82,7 @@ startup_continue: | |||
82 | _ehead: | 82 | _ehead: |
83 | 83 | ||
84 | #ifdef CONFIG_SHARED_KERNEL | 84 | #ifdef CONFIG_SHARED_KERNEL |
85 | .org 0x100000 | 85 | .org 0x100000 - 0x11000 # head.o ends at 0x11000 |
86 | #endif | 86 | #endif |
87 | 87 | ||
88 | # | 88 | # |
diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S index 1f70970de0aa..cdef68717416 100644 --- a/arch/s390/kernel/head64.S +++ b/arch/s390/kernel/head64.S | |||
@@ -80,7 +80,7 @@ startup_continue: | |||
80 | _ehead: | 80 | _ehead: |
81 | 81 | ||
82 | #ifdef CONFIG_SHARED_KERNEL | 82 | #ifdef CONFIG_SHARED_KERNEL |
83 | .org 0x100000 | 83 | .org 0x100000 - 0x11000 # head.o ends at 0x11000 |
84 | #endif | 84 | #endif |
85 | 85 | ||
86 | # | 86 | # |
diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index 33fdc5a79764..9f654da4cecc 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c | |||
@@ -640,7 +640,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, | |||
640 | 640 | ||
641 | asmlinkage long do_syscall_trace_enter(struct pt_regs *regs) | 641 | asmlinkage long do_syscall_trace_enter(struct pt_regs *regs) |
642 | { | 642 | { |
643 | long ret; | 643 | long ret = 0; |
644 | 644 | ||
645 | /* Do the secure computing check first. */ | 645 | /* Do the secure computing check first. */ |
646 | secure_computing(regs->gprs[2]); | 646 | secure_computing(regs->gprs[2]); |
@@ -649,7 +649,6 @@ asmlinkage long do_syscall_trace_enter(struct pt_regs *regs) | |||
649 | * The sysc_tracesys code in entry.S stored the system | 649 | * The sysc_tracesys code in entry.S stored the system |
650 | * call number to gprs[2]. | 650 | * call number to gprs[2]. |
651 | */ | 651 | */ |
652 | ret = regs->gprs[2]; | ||
653 | if (test_thread_flag(TIF_SYSCALL_TRACE) && | 652 | if (test_thread_flag(TIF_SYSCALL_TRACE) && |
654 | (tracehook_report_syscall_entry(regs) || | 653 | (tracehook_report_syscall_entry(regs) || |
655 | regs->gprs[2] >= NR_syscalls)) { | 654 | regs->gprs[2] >= NR_syscalls)) { |
@@ -671,7 +670,7 @@ asmlinkage long do_syscall_trace_enter(struct pt_regs *regs) | |||
671 | regs->gprs[2], regs->orig_gpr2, | 670 | regs->gprs[2], regs->orig_gpr2, |
672 | regs->gprs[3], regs->gprs[4], | 671 | regs->gprs[3], regs->gprs[4], |
673 | regs->gprs[5]); | 672 | regs->gprs[5]); |
674 | return ret; | 673 | return ret ?: regs->gprs[2]; |
675 | } | 674 | } |
676 | 675 | ||
677 | asmlinkage void do_syscall_trace_exit(struct pt_regs *regs) | 676 | asmlinkage void do_syscall_trace_exit(struct pt_regs *regs) |
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index d906bf19c14a..a2163c95eb98 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c | |||
@@ -391,7 +391,6 @@ static void __init time_init_wq(void) | |||
391 | if (time_sync_wq) | 391 | if (time_sync_wq) |
392 | return; | 392 | return; |
393 | time_sync_wq = create_singlethread_workqueue("timesync"); | 393 | time_sync_wq = create_singlethread_workqueue("timesync"); |
394 | stop_machine_create(); | ||
395 | } | 394 | } |
396 | 395 | ||
397 | /* | 396 | /* |
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index 8d90564c2bcf..e6d8ab5cfa9d 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig | |||
@@ -44,6 +44,7 @@ config SUPERH32 | |||
44 | select HAVE_FUNCTION_GRAPH_TRACER | 44 | select HAVE_FUNCTION_GRAPH_TRACER |
45 | select HAVE_ARCH_KGDB | 45 | select HAVE_ARCH_KGDB |
46 | select HAVE_HW_BREAKPOINT | 46 | select HAVE_HW_BREAKPOINT |
47 | select HAVE_MIXED_BREAKPOINTS_REGS | ||
47 | select PERF_EVENTS if HAVE_HW_BREAKPOINT | 48 | select PERF_EVENTS if HAVE_HW_BREAKPOINT |
48 | select ARCH_HIBERNATION_POSSIBLE if MMU | 49 | select ARCH_HIBERNATION_POSSIBLE if MMU |
49 | 50 | ||
diff --git a/arch/sh/include/asm/atomic.h b/arch/sh/include/asm/atomic.h index 275a448ae8c2..c7983124d99d 100644 --- a/arch/sh/include/asm/atomic.h +++ b/arch/sh/include/asm/atomic.h | |||
@@ -13,7 +13,7 @@ | |||
13 | 13 | ||
14 | #define ATOMIC_INIT(i) ( (atomic_t) { (i) } ) | 14 | #define ATOMIC_INIT(i) ( (atomic_t) { (i) } ) |
15 | 15 | ||
16 | #define atomic_read(v) ((v)->counter) | 16 | #define atomic_read(v) (*(volatile int *)&(v)->counter) |
17 | #define atomic_set(v,i) ((v)->counter = (i)) | 17 | #define atomic_set(v,i) ((v)->counter = (i)) |
18 | 18 | ||
19 | #if defined(CONFIG_GUSA_RB) | 19 | #if defined(CONFIG_GUSA_RB) |
diff --git a/arch/sh/include/asm/hw_breakpoint.h b/arch/sh/include/asm/hw_breakpoint.h index 965dd780d51b..e14cad96798f 100644 --- a/arch/sh/include/asm/hw_breakpoint.h +++ b/arch/sh/include/asm/hw_breakpoint.h | |||
@@ -46,10 +46,14 @@ struct pmu; | |||
46 | /* Maximum number of UBC channels */ | 46 | /* Maximum number of UBC channels */ |
47 | #define HBP_NUM 2 | 47 | #define HBP_NUM 2 |
48 | 48 | ||
49 | static inline int hw_breakpoint_slots(int type) | ||
50 | { | ||
51 | return HBP_NUM; | ||
52 | } | ||
53 | |||
49 | /* arch/sh/kernel/hw_breakpoint.c */ | 54 | /* arch/sh/kernel/hw_breakpoint.c */ |
50 | extern int arch_check_va_in_userspace(unsigned long va, u16 hbp_len); | 55 | extern int arch_check_bp_in_kernelspace(struct perf_event *bp); |
51 | extern int arch_validate_hwbkpt_settings(struct perf_event *bp, | 56 | extern int arch_validate_hwbkpt_settings(struct perf_event *bp); |
52 | struct task_struct *tsk); | ||
53 | extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused, | 57 | extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused, |
54 | unsigned long val, void *data); | 58 | unsigned long val, void *data); |
55 | 59 | ||
diff --git a/arch/sh/kernel/hw_breakpoint.c b/arch/sh/kernel/hw_breakpoint.c index 675eea7785d9..1f2cf6229862 100644 --- a/arch/sh/kernel/hw_breakpoint.c +++ b/arch/sh/kernel/hw_breakpoint.c | |||
@@ -120,25 +120,16 @@ static int get_hbp_len(u16 hbp_len) | |||
120 | } | 120 | } |
121 | 121 | ||
122 | /* | 122 | /* |
123 | * Check for virtual address in user space. | ||
124 | */ | ||
125 | int arch_check_va_in_userspace(unsigned long va, u16 hbp_len) | ||
126 | { | ||
127 | unsigned int len; | ||
128 | |||
129 | len = get_hbp_len(hbp_len); | ||
130 | |||
131 | return (va <= TASK_SIZE - len); | ||
132 | } | ||
133 | |||
134 | /* | ||
135 | * Check for virtual address in kernel space. | 123 | * Check for virtual address in kernel space. |
136 | */ | 124 | */ |
137 | static int arch_check_va_in_kernelspace(unsigned long va, u8 hbp_len) | 125 | int arch_check_bp_in_kernelspace(struct perf_event *bp) |
138 | { | 126 | { |
139 | unsigned int len; | 127 | unsigned int len; |
128 | unsigned long va; | ||
129 | struct arch_hw_breakpoint *info = counter_arch_bp(bp); | ||
140 | 130 | ||
141 | len = get_hbp_len(hbp_len); | 131 | va = info->address; |
132 | len = get_hbp_len(info->len); | ||
142 | 133 | ||
143 | return (va >= TASK_SIZE) && ((va + len - 1) >= TASK_SIZE); | 134 | return (va >= TASK_SIZE) && ((va + len - 1) >= TASK_SIZE); |
144 | } | 135 | } |
@@ -226,8 +217,7 @@ static int arch_build_bp_info(struct perf_event *bp) | |||
226 | /* | 217 | /* |
227 | * Validate the arch-specific HW Breakpoint register settings | 218 | * Validate the arch-specific HW Breakpoint register settings |
228 | */ | 219 | */ |
229 | int arch_validate_hwbkpt_settings(struct perf_event *bp, | 220 | int arch_validate_hwbkpt_settings(struct perf_event *bp) |
230 | struct task_struct *tsk) | ||
231 | { | 221 | { |
232 | struct arch_hw_breakpoint *info = counter_arch_bp(bp); | 222 | struct arch_hw_breakpoint *info = counter_arch_bp(bp); |
233 | unsigned int align; | 223 | unsigned int align; |
@@ -270,15 +260,6 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp, | |||
270 | if (info->address & align) | 260 | if (info->address & align) |
271 | return -EINVAL; | 261 | return -EINVAL; |
272 | 262 | ||
273 | /* Check that the virtual address is in the proper range */ | ||
274 | if (tsk) { | ||
275 | if (!arch_check_va_in_userspace(info->address, info->len)) | ||
276 | return -EFAULT; | ||
277 | } else { | ||
278 | if (!arch_check_va_in_kernelspace(info->address, info->len)) | ||
279 | return -EFAULT; | ||
280 | } | ||
281 | |||
282 | return 0; | 263 | return 0; |
283 | } | 264 | } |
284 | 265 | ||
@@ -363,8 +344,7 @@ static int __kprobes hw_breakpoint_handler(struct die_args *args) | |||
363 | perf_bp_event(bp, args->regs); | 344 | perf_bp_event(bp, args->regs); |
364 | 345 | ||
365 | /* Deliver the signal to userspace */ | 346 | /* Deliver the signal to userspace */ |
366 | if (arch_check_va_in_userspace(bp->attr.bp_addr, | 347 | if (!arch_check_bp_in_kernelspace(bp)) { |
367 | bp->attr.bp_len)) { | ||
368 | siginfo_t info; | 348 | siginfo_t info; |
369 | 349 | ||
370 | info.si_signo = args->signr; | 350 | info.si_signo = args->signr; |
diff --git a/arch/sh/kernel/ptrace_32.c b/arch/sh/kernel/ptrace_32.c index 7759a9a93211..d4104ce9fe53 100644 --- a/arch/sh/kernel/ptrace_32.c +++ b/arch/sh/kernel/ptrace_32.c | |||
@@ -85,7 +85,7 @@ static int set_single_step(struct task_struct *tsk, unsigned long addr) | |||
85 | 85 | ||
86 | bp = thread->ptrace_bps[0]; | 86 | bp = thread->ptrace_bps[0]; |
87 | if (!bp) { | 87 | if (!bp) { |
88 | hw_breakpoint_init(&attr); | 88 | ptrace_breakpoint_init(&attr); |
89 | 89 | ||
90 | attr.bp_addr = addr; | 90 | attr.bp_addr = addr; |
91 | attr.bp_len = HW_BREAKPOINT_LEN_2; | 91 | attr.bp_len = HW_BREAKPOINT_LEN_2; |
diff --git a/arch/sparc/include/asm/atomic_32.h b/arch/sparc/include/asm/atomic_32.h index f0d343c3b956..7ae128b19d3f 100644 --- a/arch/sparc/include/asm/atomic_32.h +++ b/arch/sparc/include/asm/atomic_32.h | |||
@@ -25,7 +25,7 @@ extern int atomic_cmpxchg(atomic_t *, int, int); | |||
25 | extern int atomic_add_unless(atomic_t *, int, int); | 25 | extern int atomic_add_unless(atomic_t *, int, int); |
26 | extern void atomic_set(atomic_t *, int); | 26 | extern void atomic_set(atomic_t *, int); |
27 | 27 | ||
28 | #define atomic_read(v) ((v)->counter) | 28 | #define atomic_read(v) (*(volatile int *)&(v)->counter) |
29 | 29 | ||
30 | #define atomic_add(i, v) ((void)__atomic_add_return( (int)(i), (v))) | 30 | #define atomic_add(i, v) ((void)__atomic_add_return( (int)(i), (v))) |
31 | #define atomic_sub(i, v) ((void)__atomic_add_return(-(int)(i), (v))) | 31 | #define atomic_sub(i, v) ((void)__atomic_add_return(-(int)(i), (v))) |
diff --git a/arch/sparc/include/asm/atomic_64.h b/arch/sparc/include/asm/atomic_64.h index f2e48009989e..2050ca02c423 100644 --- a/arch/sparc/include/asm/atomic_64.h +++ b/arch/sparc/include/asm/atomic_64.h | |||
@@ -13,8 +13,8 @@ | |||
13 | #define ATOMIC_INIT(i) { (i) } | 13 | #define ATOMIC_INIT(i) { (i) } |
14 | #define ATOMIC64_INIT(i) { (i) } | 14 | #define ATOMIC64_INIT(i) { (i) } |
15 | 15 | ||
16 | #define atomic_read(v) ((v)->counter) | 16 | #define atomic_read(v) (*(volatile int *)&(v)->counter) |
17 | #define atomic64_read(v) ((v)->counter) | 17 | #define atomic64_read(v) (*(volatile long *)&(v)->counter) |
18 | 18 | ||
19 | #define atomic_set(v, i) (((v)->counter) = i) | 19 | #define atomic_set(v, i) (((v)->counter) = i) |
20 | #define atomic64_set(v, i) (((v)->counter) = i) | 20 | #define atomic64_set(v, i) (((v)->counter) = i) |
diff --git a/arch/sparc/include/asm/bitops_64.h b/arch/sparc/include/asm/bitops_64.h index e72ac9cdfb98..766121a67a24 100644 --- a/arch/sparc/include/asm/bitops_64.h +++ b/arch/sparc/include/asm/bitops_64.h | |||
@@ -44,7 +44,7 @@ extern void change_bit(unsigned long nr, volatile unsigned long *addr); | |||
44 | 44 | ||
45 | #ifdef ULTRA_HAS_POPULATION_COUNT | 45 | #ifdef ULTRA_HAS_POPULATION_COUNT |
46 | 46 | ||
47 | static inline unsigned int hweight64(unsigned long w) | 47 | static inline unsigned int __arch_hweight64(unsigned long w) |
48 | { | 48 | { |
49 | unsigned int res; | 49 | unsigned int res; |
50 | 50 | ||
@@ -52,7 +52,7 @@ static inline unsigned int hweight64(unsigned long w) | |||
52 | return res; | 52 | return res; |
53 | } | 53 | } |
54 | 54 | ||
55 | static inline unsigned int hweight32(unsigned int w) | 55 | static inline unsigned int __arch_hweight32(unsigned int w) |
56 | { | 56 | { |
57 | unsigned int res; | 57 | unsigned int res; |
58 | 58 | ||
@@ -60,7 +60,7 @@ static inline unsigned int hweight32(unsigned int w) | |||
60 | return res; | 60 | return res; |
61 | } | 61 | } |
62 | 62 | ||
63 | static inline unsigned int hweight16(unsigned int w) | 63 | static inline unsigned int __arch_hweight16(unsigned int w) |
64 | { | 64 | { |
65 | unsigned int res; | 65 | unsigned int res; |
66 | 66 | ||
@@ -68,7 +68,7 @@ static inline unsigned int hweight16(unsigned int w) | |||
68 | return res; | 68 | return res; |
69 | } | 69 | } |
70 | 70 | ||
71 | static inline unsigned int hweight8(unsigned int w) | 71 | static inline unsigned int __arch_hweight8(unsigned int w) |
72 | { | 72 | { |
73 | unsigned int res; | 73 | unsigned int res; |
74 | 74 | ||
@@ -78,9 +78,10 @@ static inline unsigned int hweight8(unsigned int w) | |||
78 | 78 | ||
79 | #else | 79 | #else |
80 | 80 | ||
81 | #include <asm-generic/bitops/hweight.h> | 81 | #include <asm-generic/bitops/arch_hweight.h> |
82 | 82 | ||
83 | #endif | 83 | #endif |
84 | #include <asm-generic/bitops/const_hweight.h> | ||
84 | #include <asm-generic/bitops/lock.h> | 85 | #include <asm-generic/bitops/lock.h> |
85 | #endif /* __KERNEL__ */ | 86 | #endif /* __KERNEL__ */ |
86 | 87 | ||
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 9458685902bd..a2d3a5fbeeda 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -53,11 +53,15 @@ config X86 | |||
53 | select HAVE_KERNEL_LZMA | 53 | select HAVE_KERNEL_LZMA |
54 | select HAVE_KERNEL_LZO | 54 | select HAVE_KERNEL_LZO |
55 | select HAVE_HW_BREAKPOINT | 55 | select HAVE_HW_BREAKPOINT |
56 | select HAVE_MIXED_BREAKPOINTS_REGS | ||
56 | select PERF_EVENTS | 57 | select PERF_EVENTS |
57 | select ANON_INODES | 58 | select ANON_INODES |
58 | select HAVE_ARCH_KMEMCHECK | 59 | select HAVE_ARCH_KMEMCHECK |
59 | select HAVE_USER_RETURN_NOTIFIER | 60 | select HAVE_USER_RETURN_NOTIFIER |
60 | 61 | ||
62 | config INSTRUCTION_DECODER | ||
63 | def_bool (KPROBES || PERF_EVENTS) | ||
64 | |||
61 | config OUTPUT_FORMAT | 65 | config OUTPUT_FORMAT |
62 | string | 66 | string |
63 | default "elf32-i386" if X86_32 | 67 | default "elf32-i386" if X86_32 |
@@ -197,20 +201,17 @@ config HAVE_INTEL_TXT | |||
197 | 201 | ||
198 | # Use the generic interrupt handling code in kernel/irq/: | 202 | # Use the generic interrupt handling code in kernel/irq/: |
199 | config GENERIC_HARDIRQS | 203 | config GENERIC_HARDIRQS |
200 | bool | 204 | def_bool y |
201 | default y | ||
202 | 205 | ||
203 | config GENERIC_HARDIRQS_NO__DO_IRQ | 206 | config GENERIC_HARDIRQS_NO__DO_IRQ |
204 | def_bool y | 207 | def_bool y |
205 | 208 | ||
206 | config GENERIC_IRQ_PROBE | 209 | config GENERIC_IRQ_PROBE |
207 | bool | 210 | def_bool y |
208 | default y | ||
209 | 211 | ||
210 | config GENERIC_PENDING_IRQ | 212 | config GENERIC_PENDING_IRQ |
211 | bool | 213 | def_bool y |
212 | depends on GENERIC_HARDIRQS && SMP | 214 | depends on GENERIC_HARDIRQS && SMP |
213 | default y | ||
214 | 215 | ||
215 | config USE_GENERIC_SMP_HELPERS | 216 | config USE_GENERIC_SMP_HELPERS |
216 | def_bool y | 217 | def_bool y |
@@ -225,19 +226,22 @@ config X86_64_SMP | |||
225 | depends on X86_64 && SMP | 226 | depends on X86_64 && SMP |
226 | 227 | ||
227 | config X86_HT | 228 | config X86_HT |
228 | bool | 229 | def_bool y |
229 | depends on SMP | 230 | depends on SMP |
230 | default y | ||
231 | 231 | ||
232 | config X86_TRAMPOLINE | 232 | config X86_TRAMPOLINE |
233 | bool | 233 | def_bool y |
234 | depends on SMP || (64BIT && ACPI_SLEEP) | 234 | depends on SMP || (64BIT && ACPI_SLEEP) |
235 | default y | ||
236 | 235 | ||
237 | config X86_32_LAZY_GS | 236 | config X86_32_LAZY_GS |
238 | def_bool y | 237 | def_bool y |
239 | depends on X86_32 && !CC_STACKPROTECTOR | 238 | depends on X86_32 && !CC_STACKPROTECTOR |
240 | 239 | ||
240 | config ARCH_HWEIGHT_CFLAGS | ||
241 | string | ||
242 | default "-fcall-saved-ecx -fcall-saved-edx" if X86_32 | ||
243 | default "-fcall-saved-rdi -fcall-saved-rsi -fcall-saved-rdx -fcall-saved-rcx -fcall-saved-r8 -fcall-saved-r9 -fcall-saved-r10 -fcall-saved-r11" if X86_64 | ||
244 | |||
241 | config KTIME_SCALAR | 245 | config KTIME_SCALAR |
242 | def_bool X86_32 | 246 | def_bool X86_32 |
243 | source "init/Kconfig" | 247 | source "init/Kconfig" |
@@ -447,7 +451,7 @@ config X86_NUMAQ | |||
447 | firmware with - send email to <Martin.Bligh@us.ibm.com>. | 451 | firmware with - send email to <Martin.Bligh@us.ibm.com>. |
448 | 452 | ||
449 | config X86_SUPPORTS_MEMORY_FAILURE | 453 | config X86_SUPPORTS_MEMORY_FAILURE |
450 | bool | 454 | def_bool y |
451 | # MCE code calls memory_failure(): | 455 | # MCE code calls memory_failure(): |
452 | depends on X86_MCE | 456 | depends on X86_MCE |
453 | # On 32-bit this adds too big of NODES_SHIFT and we run out of page flags: | 457 | # On 32-bit this adds too big of NODES_SHIFT and we run out of page flags: |
@@ -455,7 +459,6 @@ config X86_SUPPORTS_MEMORY_FAILURE | |||
455 | # On 32-bit SPARSEMEM adds too big of SECTIONS_WIDTH: | 459 | # On 32-bit SPARSEMEM adds too big of SECTIONS_WIDTH: |
456 | depends on X86_64 || !SPARSEMEM | 460 | depends on X86_64 || !SPARSEMEM |
457 | select ARCH_SUPPORTS_MEMORY_FAILURE | 461 | select ARCH_SUPPORTS_MEMORY_FAILURE |
458 | default y | ||
459 | 462 | ||
460 | config X86_VISWS | 463 | config X86_VISWS |
461 | bool "SGI 320/540 (Visual Workstation)" | 464 | bool "SGI 320/540 (Visual Workstation)" |
@@ -570,7 +573,6 @@ config PARAVIRT_SPINLOCKS | |||
570 | 573 | ||
571 | config PARAVIRT_CLOCK | 574 | config PARAVIRT_CLOCK |
572 | bool | 575 | bool |
573 | default n | ||
574 | 576 | ||
575 | endif | 577 | endif |
576 | 578 | ||
@@ -749,7 +751,6 @@ config MAXSMP | |||
749 | bool "Configure Maximum number of SMP Processors and NUMA Nodes" | 751 | bool "Configure Maximum number of SMP Processors and NUMA Nodes" |
750 | depends on X86_64 && SMP && DEBUG_KERNEL && EXPERIMENTAL | 752 | depends on X86_64 && SMP && DEBUG_KERNEL && EXPERIMENTAL |
751 | select CPUMASK_OFFSTACK | 753 | select CPUMASK_OFFSTACK |
752 | default n | ||
753 | ---help--- | 754 | ---help--- |
754 | Configure maximum number of CPUS and NUMA Nodes for this architecture. | 755 | Configure maximum number of CPUS and NUMA Nodes for this architecture. |
755 | If unsure, say N. | 756 | If unsure, say N. |
@@ -829,7 +830,6 @@ config X86_VISWS_APIC | |||
829 | 830 | ||
830 | config X86_REROUTE_FOR_BROKEN_BOOT_IRQS | 831 | config X86_REROUTE_FOR_BROKEN_BOOT_IRQS |
831 | bool "Reroute for broken boot IRQs" | 832 | bool "Reroute for broken boot IRQs" |
832 | default n | ||
833 | depends on X86_IO_APIC | 833 | depends on X86_IO_APIC |
834 | ---help--- | 834 | ---help--- |
835 | This option enables a workaround that fixes a source of | 835 | This option enables a workaround that fixes a source of |
@@ -876,9 +876,8 @@ config X86_MCE_AMD | |||
876 | the DRAM Error Threshold. | 876 | the DRAM Error Threshold. |
877 | 877 | ||
878 | config X86_ANCIENT_MCE | 878 | config X86_ANCIENT_MCE |
879 | def_bool n | 879 | bool "Support for old Pentium 5 / WinChip machine checks" |
880 | depends on X86_32 && X86_MCE | 880 | depends on X86_32 && X86_MCE |
881 | prompt "Support for old Pentium 5 / WinChip machine checks" | ||
882 | ---help--- | 881 | ---help--- |
883 | Include support for machine check handling on old Pentium 5 or WinChip | 882 | Include support for machine check handling on old Pentium 5 or WinChip |
884 | systems. These typically need to be enabled explicitely on the command | 883 | systems. These typically need to be enabled explicitely on the command |
@@ -886,8 +885,7 @@ config X86_ANCIENT_MCE | |||
886 | 885 | ||
887 | config X86_MCE_THRESHOLD | 886 | config X86_MCE_THRESHOLD |
888 | depends on X86_MCE_AMD || X86_MCE_INTEL | 887 | depends on X86_MCE_AMD || X86_MCE_INTEL |
889 | bool | 888 | def_bool y |
890 | default y | ||
891 | 889 | ||
892 | config X86_MCE_INJECT | 890 | config X86_MCE_INJECT |
893 | depends on X86_MCE | 891 | depends on X86_MCE |
@@ -1026,8 +1024,8 @@ config X86_CPUID | |||
1026 | 1024 | ||
1027 | choice | 1025 | choice |
1028 | prompt "High Memory Support" | 1026 | prompt "High Memory Support" |
1029 | default HIGHMEM4G if !X86_NUMAQ | ||
1030 | default HIGHMEM64G if X86_NUMAQ | 1027 | default HIGHMEM64G if X86_NUMAQ |
1028 | default HIGHMEM4G | ||
1031 | depends on X86_32 | 1029 | depends on X86_32 |
1032 | 1030 | ||
1033 | config NOHIGHMEM | 1031 | config NOHIGHMEM |
@@ -1285,7 +1283,7 @@ source "mm/Kconfig" | |||
1285 | 1283 | ||
1286 | config HIGHPTE | 1284 | config HIGHPTE |
1287 | bool "Allocate 3rd-level pagetables from highmem" | 1285 | bool "Allocate 3rd-level pagetables from highmem" |
1288 | depends on X86_32 && (HIGHMEM4G || HIGHMEM64G) | 1286 | depends on HIGHMEM |
1289 | ---help--- | 1287 | ---help--- |
1290 | The VM uses one page table entry for each page of physical memory. | 1288 | The VM uses one page table entry for each page of physical memory. |
1291 | For systems with a lot of RAM, this can be wasteful of precious | 1289 | For systems with a lot of RAM, this can be wasteful of precious |
@@ -1369,8 +1367,7 @@ config MATH_EMULATION | |||
1369 | kernel, it won't hurt. | 1367 | kernel, it won't hurt. |
1370 | 1368 | ||
1371 | config MTRR | 1369 | config MTRR |
1372 | bool | 1370 | def_bool y |
1373 | default y | ||
1374 | prompt "MTRR (Memory Type Range Register) support" if EMBEDDED | 1371 | prompt "MTRR (Memory Type Range Register) support" if EMBEDDED |
1375 | ---help--- | 1372 | ---help--- |
1376 | On Intel P6 family processors (Pentium Pro, Pentium II and later) | 1373 | On Intel P6 family processors (Pentium Pro, Pentium II and later) |
@@ -1436,8 +1433,7 @@ config MTRR_SANITIZER_SPARE_REG_NR_DEFAULT | |||
1436 | mtrr_spare_reg_nr=N on the kernel command line. | 1433 | mtrr_spare_reg_nr=N on the kernel command line. |
1437 | 1434 | ||
1438 | config X86_PAT | 1435 | config X86_PAT |
1439 | bool | 1436 | def_bool y |
1440 | default y | ||
1441 | prompt "x86 PAT support" if EMBEDDED | 1437 | prompt "x86 PAT support" if EMBEDDED |
1442 | depends on MTRR | 1438 | depends on MTRR |
1443 | ---help--- | 1439 | ---help--- |
@@ -1605,8 +1601,7 @@ config X86_NEED_RELOCS | |||
1605 | depends on X86_32 && RELOCATABLE | 1601 | depends on X86_32 && RELOCATABLE |
1606 | 1602 | ||
1607 | config PHYSICAL_ALIGN | 1603 | config PHYSICAL_ALIGN |
1608 | hex | 1604 | hex "Alignment value to which kernel should be aligned" if X86_32 |
1609 | prompt "Alignment value to which kernel should be aligned" if X86_32 | ||
1610 | default "0x1000000" | 1605 | default "0x1000000" |
1611 | range 0x2000 0x1000000 | 1606 | range 0x2000 0x1000000 |
1612 | ---help--- | 1607 | ---help--- |
@@ -1653,7 +1648,6 @@ config COMPAT_VDSO | |||
1653 | 1648 | ||
1654 | config CMDLINE_BOOL | 1649 | config CMDLINE_BOOL |
1655 | bool "Built-in kernel command line" | 1650 | bool "Built-in kernel command line" |
1656 | default n | ||
1657 | ---help--- | 1651 | ---help--- |
1658 | Allow for specifying boot arguments to the kernel at | 1652 | Allow for specifying boot arguments to the kernel at |
1659 | build time. On some systems (e.g. embedded ones), it is | 1653 | build time. On some systems (e.g. embedded ones), it is |
@@ -1687,7 +1681,6 @@ config CMDLINE | |||
1687 | 1681 | ||
1688 | config CMDLINE_OVERRIDE | 1682 | config CMDLINE_OVERRIDE |
1689 | bool "Built-in command line overrides boot loader arguments" | 1683 | bool "Built-in command line overrides boot loader arguments" |
1690 | default n | ||
1691 | depends on CMDLINE_BOOL | 1684 | depends on CMDLINE_BOOL |
1692 | ---help--- | 1685 | ---help--- |
1693 | Set this option to 'Y' to have the kernel ignore the boot loader | 1686 | Set this option to 'Y' to have the kernel ignore the boot loader |
@@ -1723,8 +1716,7 @@ source "drivers/acpi/Kconfig" | |||
1723 | source "drivers/sfi/Kconfig" | 1716 | source "drivers/sfi/Kconfig" |
1724 | 1717 | ||
1725 | config X86_APM_BOOT | 1718 | config X86_APM_BOOT |
1726 | bool | 1719 | def_bool y |
1727 | default y | ||
1728 | depends on APM || APM_MODULE | 1720 | depends on APM || APM_MODULE |
1729 | 1721 | ||
1730 | menuconfig APM | 1722 | menuconfig APM |
@@ -1953,8 +1945,7 @@ config DMAR_DEFAULT_ON | |||
1953 | experimental. | 1945 | experimental. |
1954 | 1946 | ||
1955 | config DMAR_BROKEN_GFX_WA | 1947 | config DMAR_BROKEN_GFX_WA |
1956 | def_bool n | 1948 | bool "Workaround broken graphics drivers (going away soon)" |
1957 | prompt "Workaround broken graphics drivers (going away soon)" | ||
1958 | depends on DMAR && BROKEN | 1949 | depends on DMAR && BROKEN |
1959 | ---help--- | 1950 | ---help--- |
1960 | Current Graphics drivers tend to use physical address | 1951 | Current Graphics drivers tend to use physical address |
@@ -2052,7 +2043,6 @@ config SCx200HR_TIMER | |||
2052 | config OLPC | 2043 | config OLPC |
2053 | bool "One Laptop Per Child support" | 2044 | bool "One Laptop Per Child support" |
2054 | select GPIOLIB | 2045 | select GPIOLIB |
2055 | default n | ||
2056 | ---help--- | 2046 | ---help--- |
2057 | Add support for detecting the unique features of the OLPC | 2047 | Add support for detecting the unique features of the OLPC |
2058 | XO hardware. | 2048 | XO hardware. |
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index a19829374e6a..2ac9069890cd 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu | |||
@@ -338,6 +338,10 @@ config X86_F00F_BUG | |||
338 | def_bool y | 338 | def_bool y |
339 | depends on M586MMX || M586TSC || M586 || M486 || M386 | 339 | depends on M586MMX || M586TSC || M586 || M486 || M386 |
340 | 340 | ||
341 | config X86_INVD_BUG | ||
342 | def_bool y | ||
343 | depends on M486 || M386 | ||
344 | |||
341 | config X86_WP_WORKS_OK | 345 | config X86_WP_WORKS_OK |
342 | def_bool y | 346 | def_bool y |
343 | depends on !M386 | 347 | depends on !M386 |
@@ -502,23 +506,3 @@ config CPU_SUP_UMC_32 | |||
502 | CPU might render the kernel unbootable. | 506 | CPU might render the kernel unbootable. |
503 | 507 | ||
504 | If unsure, say N. | 508 | If unsure, say N. |
505 | |||
506 | config X86_DS | ||
507 | def_bool X86_PTRACE_BTS | ||
508 | depends on X86_DEBUGCTLMSR | ||
509 | select HAVE_HW_BRANCH_TRACER | ||
510 | |||
511 | config X86_PTRACE_BTS | ||
512 | bool "Branch Trace Store" | ||
513 | default y | ||
514 | depends on X86_DEBUGCTLMSR | ||
515 | depends on BROKEN | ||
516 | ---help--- | ||
517 | This adds a ptrace interface to the hardware's branch trace store. | ||
518 | |||
519 | Debuggers may use it to collect an execution trace of the debugged | ||
520 | application in order to answer the question 'how did I get here?'. | ||
521 | Debuggers may trace user mode as well as kernel mode. | ||
522 | |||
523 | Say Y unless there is no application development on this machine | ||
524 | and you want to save a small amount of code size. | ||
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index bc01e3ebfeb2..75085080b63e 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug | |||
@@ -45,7 +45,6 @@ config EARLY_PRINTK | |||
45 | 45 | ||
46 | config EARLY_PRINTK_DBGP | 46 | config EARLY_PRINTK_DBGP |
47 | bool "Early printk via EHCI debug port" | 47 | bool "Early printk via EHCI debug port" |
48 | default n | ||
49 | depends on EARLY_PRINTK && PCI | 48 | depends on EARLY_PRINTK && PCI |
50 | ---help--- | 49 | ---help--- |
51 | Write kernel log output directly into the EHCI debug port. | 50 | Write kernel log output directly into the EHCI debug port. |
@@ -76,7 +75,6 @@ config DEBUG_PER_CPU_MAPS | |||
76 | bool "Debug access to per_cpu maps" | 75 | bool "Debug access to per_cpu maps" |
77 | depends on DEBUG_KERNEL | 76 | depends on DEBUG_KERNEL |
78 | depends on SMP | 77 | depends on SMP |
79 | default n | ||
80 | ---help--- | 78 | ---help--- |
81 | Say Y to verify that the per_cpu map being accessed has | 79 | Say Y to verify that the per_cpu map being accessed has |
82 | been setup. Adds a fair amount of code to kernel memory | 80 | been setup. Adds a fair amount of code to kernel memory |
@@ -174,15 +172,6 @@ config IOMMU_LEAK | |||
174 | Add a simple leak tracer to the IOMMU code. This is useful when you | 172 | Add a simple leak tracer to the IOMMU code. This is useful when you |
175 | are debugging a buggy device driver that leaks IOMMU mappings. | 173 | are debugging a buggy device driver that leaks IOMMU mappings. |
176 | 174 | ||
177 | config X86_DS_SELFTEST | ||
178 | bool "DS selftest" | ||
179 | default y | ||
180 | depends on DEBUG_KERNEL | ||
181 | depends on X86_DS | ||
182 | ---help--- | ||
183 | Perform Debug Store selftests at boot time. | ||
184 | If in doubt, say "N". | ||
185 | |||
186 | config HAVE_MMIOTRACE_SUPPORT | 175 | config HAVE_MMIOTRACE_SUPPORT |
187 | def_bool y | 176 | def_bool y |
188 | 177 | ||
diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 0a43dc515e4c..8aa1b59b9074 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile | |||
@@ -95,8 +95,9 @@ sp-$(CONFIG_X86_64) := rsp | |||
95 | cfi := $(call as-instr,.cfi_startproc\n.cfi_rel_offset $(sp-y)$(comma)0\n.cfi_endproc,-DCONFIG_AS_CFI=1) | 95 | cfi := $(call as-instr,.cfi_startproc\n.cfi_rel_offset $(sp-y)$(comma)0\n.cfi_endproc,-DCONFIG_AS_CFI=1) |
96 | # is .cfi_signal_frame supported too? | 96 | # is .cfi_signal_frame supported too? |
97 | cfi-sigframe := $(call as-instr,.cfi_startproc\n.cfi_signal_frame\n.cfi_endproc,-DCONFIG_AS_CFI_SIGNAL_FRAME=1) | 97 | cfi-sigframe := $(call as-instr,.cfi_startproc\n.cfi_signal_frame\n.cfi_endproc,-DCONFIG_AS_CFI_SIGNAL_FRAME=1) |
98 | KBUILD_AFLAGS += $(cfi) $(cfi-sigframe) | 98 | cfi-sections := $(call as-instr,.cfi_sections .debug_frame,-DCONFIG_AS_CFI_SECTIONS=1) |
99 | KBUILD_CFLAGS += $(cfi) $(cfi-sigframe) | 99 | KBUILD_AFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) |
100 | KBUILD_CFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) | ||
100 | 101 | ||
101 | LDFLAGS := -m elf_$(UTS_MACHINE) | 102 | LDFLAGS := -m elf_$(UTS_MACHINE) |
102 | 103 | ||
diff --git a/arch/x86/include/asm/alternative-asm.h b/arch/x86/include/asm/alternative-asm.h index b97f786a48d5..a63a68be1cce 100644 --- a/arch/x86/include/asm/alternative-asm.h +++ b/arch/x86/include/asm/alternative-asm.h | |||
@@ -6,8 +6,8 @@ | |||
6 | .macro LOCK_PREFIX | 6 | .macro LOCK_PREFIX |
7 | 1: lock | 7 | 1: lock |
8 | .section .smp_locks,"a" | 8 | .section .smp_locks,"a" |
9 | _ASM_ALIGN | 9 | .balign 4 |
10 | _ASM_PTR 1b | 10 | .long 1b - . |
11 | .previous | 11 | .previous |
12 | .endm | 12 | .endm |
13 | #else | 13 | #else |
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index b09ec55650b3..03b6bb5394a0 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h | |||
@@ -28,20 +28,20 @@ | |||
28 | */ | 28 | */ |
29 | 29 | ||
30 | #ifdef CONFIG_SMP | 30 | #ifdef CONFIG_SMP |
31 | #define LOCK_PREFIX \ | 31 | #define LOCK_PREFIX_HERE \ |
32 | ".section .smp_locks,\"a\"\n" \ | 32 | ".section .smp_locks,\"a\"\n" \ |
33 | _ASM_ALIGN "\n" \ | 33 | ".balign 4\n" \ |
34 | _ASM_PTR "661f\n" /* address */ \ | 34 | ".long 671f - .\n" /* offset */ \ |
35 | ".previous\n" \ | 35 | ".previous\n" \ |
36 | "661:\n\tlock; " | 36 | "671:" |
37 | |||
38 | #define LOCK_PREFIX LOCK_PREFIX_HERE "\n\tlock; " | ||
37 | 39 | ||
38 | #else /* ! CONFIG_SMP */ | 40 | #else /* ! CONFIG_SMP */ |
41 | #define LOCK_PREFIX_HERE "" | ||
39 | #define LOCK_PREFIX "" | 42 | #define LOCK_PREFIX "" |
40 | #endif | 43 | #endif |
41 | 44 | ||
42 | /* This must be included *after* the definition of LOCK_PREFIX */ | ||
43 | #include <asm/cpufeature.h> | ||
44 | |||
45 | struct alt_instr { | 45 | struct alt_instr { |
46 | u8 *instr; /* original instruction */ | 46 | u8 *instr; /* original instruction */ |
47 | u8 *replacement; | 47 | u8 *replacement; |
@@ -96,6 +96,12 @@ static inline int alternatives_text_reserved(void *start, void *end) | |||
96 | ".previous" | 96 | ".previous" |
97 | 97 | ||
98 | /* | 98 | /* |
99 | * This must be included *after* the definition of ALTERNATIVE due to | ||
100 | * <asm/arch_hweight.h> | ||
101 | */ | ||
102 | #include <asm/cpufeature.h> | ||
103 | |||
104 | /* | ||
99 | * Alternative instructions for different CPU types or capabilities. | 105 | * Alternative instructions for different CPU types or capabilities. |
100 | * | 106 | * |
101 | * This allows to use optimized instructions even on generic binary | 107 | * This allows to use optimized instructions even on generic binary |
diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h index 86a0ff0aeac7..7014e88bc779 100644 --- a/arch/x86/include/asm/amd_iommu_types.h +++ b/arch/x86/include/asm/amd_iommu_types.h | |||
@@ -174,6 +174,40 @@ | |||
174 | (~((1ULL << (12 + ((lvl) * 9))) - 1))) | 174 | (~((1ULL << (12 + ((lvl) * 9))) - 1))) |
175 | #define PM_ALIGNED(lvl, addr) ((PM_MAP_MASK(lvl) & (addr)) == (addr)) | 175 | #define PM_ALIGNED(lvl, addr) ((PM_MAP_MASK(lvl) & (addr)) == (addr)) |
176 | 176 | ||
177 | /* | ||
178 | * Returns the page table level to use for a given page size | ||
179 | * Pagesize is expected to be a power-of-two | ||
180 | */ | ||
181 | #define PAGE_SIZE_LEVEL(pagesize) \ | ||
182 | ((__ffs(pagesize) - 12) / 9) | ||
183 | /* | ||
184 | * Returns the number of ptes to use for a given page size | ||
185 | * Pagesize is expected to be a power-of-two | ||
186 | */ | ||
187 | #define PAGE_SIZE_PTE_COUNT(pagesize) \ | ||
188 | (1ULL << ((__ffs(pagesize) - 12) % 9)) | ||
189 | |||
190 | /* | ||
191 | * Aligns a given io-virtual address to a given page size | ||
192 | * Pagesize is expected to be a power-of-two | ||
193 | */ | ||
194 | #define PAGE_SIZE_ALIGN(address, pagesize) \ | ||
195 | ((address) & ~((pagesize) - 1)) | ||
196 | /* | ||
197 | * Creates an IOMMU PTE for an address an a given pagesize | ||
198 | * The PTE has no permission bits set | ||
199 | * Pagesize is expected to be a power-of-two larger than 4096 | ||
200 | */ | ||
201 | #define PAGE_SIZE_PTE(address, pagesize) \ | ||
202 | (((address) | ((pagesize) - 1)) & \ | ||
203 | (~(pagesize >> 1)) & PM_ADDR_MASK) | ||
204 | |||
205 | /* | ||
206 | * Takes a PTE value with mode=0x07 and returns the page size it maps | ||
207 | */ | ||
208 | #define PTE_PAGE_SIZE(pte) \ | ||
209 | (1ULL << (1 + ffz(((pte) | 0xfffULL)))) | ||
210 | |||
177 | #define IOMMU_PTE_P (1ULL << 0) | 211 | #define IOMMU_PTE_P (1ULL << 0) |
178 | #define IOMMU_PTE_TV (1ULL << 1) | 212 | #define IOMMU_PTE_TV (1ULL << 1) |
179 | #define IOMMU_PTE_U (1ULL << 59) | 213 | #define IOMMU_PTE_U (1ULL << 59) |
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index b4ac2cdcb64f..1fa03e04ae44 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h | |||
@@ -373,6 +373,7 @@ extern atomic_t init_deasserted; | |||
373 | extern int wakeup_secondary_cpu_via_nmi(int apicid, unsigned long start_eip); | 373 | extern int wakeup_secondary_cpu_via_nmi(int apicid, unsigned long start_eip); |
374 | #endif | 374 | #endif |
375 | 375 | ||
376 | #ifdef CONFIG_X86_LOCAL_APIC | ||
376 | static inline u32 apic_read(u32 reg) | 377 | static inline u32 apic_read(u32 reg) |
377 | { | 378 | { |
378 | return apic->read(reg); | 379 | return apic->read(reg); |
@@ -403,10 +404,19 @@ static inline u32 safe_apic_wait_icr_idle(void) | |||
403 | return apic->safe_wait_icr_idle(); | 404 | return apic->safe_wait_icr_idle(); |
404 | } | 405 | } |
405 | 406 | ||
407 | #else /* CONFIG_X86_LOCAL_APIC */ | ||
408 | |||
409 | static inline u32 apic_read(u32 reg) { return 0; } | ||
410 | static inline void apic_write(u32 reg, u32 val) { } | ||
411 | static inline u64 apic_icr_read(void) { return 0; } | ||
412 | static inline void apic_icr_write(u32 low, u32 high) { } | ||
413 | static inline void apic_wait_icr_idle(void) { } | ||
414 | static inline u32 safe_apic_wait_icr_idle(void) { return 0; } | ||
415 | |||
416 | #endif /* CONFIG_X86_LOCAL_APIC */ | ||
406 | 417 | ||
407 | static inline void ack_APIC_irq(void) | 418 | static inline void ack_APIC_irq(void) |
408 | { | 419 | { |
409 | #ifdef CONFIG_X86_LOCAL_APIC | ||
410 | /* | 420 | /* |
411 | * ack_APIC_irq() actually gets compiled as a single instruction | 421 | * ack_APIC_irq() actually gets compiled as a single instruction |
412 | * ... yummie. | 422 | * ... yummie. |
@@ -414,7 +424,6 @@ static inline void ack_APIC_irq(void) | |||
414 | 424 | ||
415 | /* Docs say use 0 for future compatibility */ | 425 | /* Docs say use 0 for future compatibility */ |
416 | apic_write(APIC_EOI, 0); | 426 | apic_write(APIC_EOI, 0); |
417 | #endif | ||
418 | } | 427 | } |
419 | 428 | ||
420 | static inline unsigned default_get_apic_id(unsigned long x) | 429 | static inline unsigned default_get_apic_id(unsigned long x) |
diff --git a/arch/x86/include/asm/arch_hweight.h b/arch/x86/include/asm/arch_hweight.h new file mode 100644 index 000000000000..9686c3d9ff73 --- /dev/null +++ b/arch/x86/include/asm/arch_hweight.h | |||
@@ -0,0 +1,61 @@ | |||
1 | #ifndef _ASM_X86_HWEIGHT_H | ||
2 | #define _ASM_X86_HWEIGHT_H | ||
3 | |||
4 | #ifdef CONFIG_64BIT | ||
5 | /* popcnt %edi, %eax -- redundant REX prefix for alignment */ | ||
6 | #define POPCNT32 ".byte 0xf3,0x40,0x0f,0xb8,0xc7" | ||
7 | /* popcnt %rdi, %rax */ | ||
8 | #define POPCNT64 ".byte 0xf3,0x48,0x0f,0xb8,0xc7" | ||
9 | #define REG_IN "D" | ||
10 | #define REG_OUT "a" | ||
11 | #else | ||
12 | /* popcnt %eax, %eax */ | ||
13 | #define POPCNT32 ".byte 0xf3,0x0f,0xb8,0xc0" | ||
14 | #define REG_IN "a" | ||
15 | #define REG_OUT "a" | ||
16 | #endif | ||
17 | |||
18 | /* | ||
19 | * __sw_hweightXX are called from within the alternatives below | ||
20 | * and callee-clobbered registers need to be taken care of. See | ||
21 | * ARCH_HWEIGHT_CFLAGS in <arch/x86/Kconfig> for the respective | ||
22 | * compiler switches. | ||
23 | */ | ||
24 | static inline unsigned int __arch_hweight32(unsigned int w) | ||
25 | { | ||
26 | unsigned int res = 0; | ||
27 | |||
28 | asm (ALTERNATIVE("call __sw_hweight32", POPCNT32, X86_FEATURE_POPCNT) | ||
29 | : "="REG_OUT (res) | ||
30 | : REG_IN (w)); | ||
31 | |||
32 | return res; | ||
33 | } | ||
34 | |||
35 | static inline unsigned int __arch_hweight16(unsigned int w) | ||
36 | { | ||
37 | return __arch_hweight32(w & 0xffff); | ||
38 | } | ||
39 | |||
40 | static inline unsigned int __arch_hweight8(unsigned int w) | ||
41 | { | ||
42 | return __arch_hweight32(w & 0xff); | ||
43 | } | ||
44 | |||
45 | static inline unsigned long __arch_hweight64(__u64 w) | ||
46 | { | ||
47 | unsigned long res = 0; | ||
48 | |||
49 | #ifdef CONFIG_X86_32 | ||
50 | return __arch_hweight32((u32)w) + | ||
51 | __arch_hweight32((u32)(w >> 32)); | ||
52 | #else | ||
53 | asm (ALTERNATIVE("call __sw_hweight64", POPCNT64, X86_FEATURE_POPCNT) | ||
54 | : "="REG_OUT (res) | ||
55 | : REG_IN (w)); | ||
56 | #endif /* CONFIG_X86_32 */ | ||
57 | |||
58 | return res; | ||
59 | } | ||
60 | |||
61 | #endif | ||
diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h index 8f8217b9bdac..952a826ac4e5 100644 --- a/arch/x86/include/asm/atomic.h +++ b/arch/x86/include/asm/atomic.h | |||
@@ -22,7 +22,7 @@ | |||
22 | */ | 22 | */ |
23 | static inline int atomic_read(const atomic_t *v) | 23 | static inline int atomic_read(const atomic_t *v) |
24 | { | 24 | { |
25 | return v->counter; | 25 | return (*(volatile int *)&(v)->counter); |
26 | } | 26 | } |
27 | 27 | ||
28 | /** | 28 | /** |
@@ -246,6 +246,29 @@ static inline int atomic_add_unless(atomic_t *v, int a, int u) | |||
246 | 246 | ||
247 | #define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0) | 247 | #define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0) |
248 | 248 | ||
249 | /* | ||
250 | * atomic_dec_if_positive - decrement by 1 if old value positive | ||
251 | * @v: pointer of type atomic_t | ||
252 | * | ||
253 | * The function returns the old value of *v minus 1, even if | ||
254 | * the atomic variable, v, was not decremented. | ||
255 | */ | ||
256 | static inline int atomic_dec_if_positive(atomic_t *v) | ||
257 | { | ||
258 | int c, old, dec; | ||
259 | c = atomic_read(v); | ||
260 | for (;;) { | ||
261 | dec = c - 1; | ||
262 | if (unlikely(dec < 0)) | ||
263 | break; | ||
264 | old = atomic_cmpxchg((v), c, dec); | ||
265 | if (likely(old == c)) | ||
266 | break; | ||
267 | c = old; | ||
268 | } | ||
269 | return dec; | ||
270 | } | ||
271 | |||
249 | /** | 272 | /** |
250 | * atomic_inc_short - increment of a short integer | 273 | * atomic_inc_short - increment of a short integer |
251 | * @v: pointer to type int | 274 | * @v: pointer to type int |
diff --git a/arch/x86/include/asm/atomic64_32.h b/arch/x86/include/asm/atomic64_32.h index 03027bf28de5..2a934aa19a43 100644 --- a/arch/x86/include/asm/atomic64_32.h +++ b/arch/x86/include/asm/atomic64_32.h | |||
@@ -14,109 +14,193 @@ typedef struct { | |||
14 | 14 | ||
15 | #define ATOMIC64_INIT(val) { (val) } | 15 | #define ATOMIC64_INIT(val) { (val) } |
16 | 16 | ||
17 | extern u64 atomic64_cmpxchg(atomic64_t *ptr, u64 old_val, u64 new_val); | 17 | #ifdef CONFIG_X86_CMPXCHG64 |
18 | #define ATOMIC64_ALTERNATIVE_(f, g) "call atomic64_" #g "_cx8" | ||
19 | #else | ||
20 | #define ATOMIC64_ALTERNATIVE_(f, g) ALTERNATIVE("call atomic64_" #f "_386", "call atomic64_" #g "_cx8", X86_FEATURE_CX8) | ||
21 | #endif | ||
22 | |||
23 | #define ATOMIC64_ALTERNATIVE(f) ATOMIC64_ALTERNATIVE_(f, f) | ||
24 | |||
25 | /** | ||
26 | * atomic64_cmpxchg - cmpxchg atomic64 variable | ||
27 | * @p: pointer to type atomic64_t | ||
28 | * @o: expected value | ||
29 | * @n: new value | ||
30 | * | ||
31 | * Atomically sets @v to @n if it was equal to @o and returns | ||
32 | * the old value. | ||
33 | */ | ||
34 | |||
35 | static inline long long atomic64_cmpxchg(atomic64_t *v, long long o, long long n) | ||
36 | { | ||
37 | return cmpxchg64(&v->counter, o, n); | ||
38 | } | ||
18 | 39 | ||
19 | /** | 40 | /** |
20 | * atomic64_xchg - xchg atomic64 variable | 41 | * atomic64_xchg - xchg atomic64 variable |
21 | * @ptr: pointer to type atomic64_t | 42 | * @v: pointer to type atomic64_t |
22 | * @new_val: value to assign | 43 | * @n: value to assign |
23 | * | 44 | * |
24 | * Atomically xchgs the value of @ptr to @new_val and returns | 45 | * Atomically xchgs the value of @v to @n and returns |
25 | * the old value. | 46 | * the old value. |
26 | */ | 47 | */ |
27 | extern u64 atomic64_xchg(atomic64_t *ptr, u64 new_val); | 48 | static inline long long atomic64_xchg(atomic64_t *v, long long n) |
49 | { | ||
50 | long long o; | ||
51 | unsigned high = (unsigned)(n >> 32); | ||
52 | unsigned low = (unsigned)n; | ||
53 | asm volatile(ATOMIC64_ALTERNATIVE(xchg) | ||
54 | : "=A" (o), "+b" (low), "+c" (high) | ||
55 | : "S" (v) | ||
56 | : "memory" | ||
57 | ); | ||
58 | return o; | ||
59 | } | ||
28 | 60 | ||
29 | /** | 61 | /** |
30 | * atomic64_set - set atomic64 variable | 62 | * atomic64_set - set atomic64 variable |
31 | * @ptr: pointer to type atomic64_t | 63 | * @v: pointer to type atomic64_t |
32 | * @new_val: value to assign | 64 | * @n: value to assign |
33 | * | 65 | * |
34 | * Atomically sets the value of @ptr to @new_val. | 66 | * Atomically sets the value of @v to @n. |
35 | */ | 67 | */ |
36 | extern void atomic64_set(atomic64_t *ptr, u64 new_val); | 68 | static inline void atomic64_set(atomic64_t *v, long long i) |
69 | { | ||
70 | unsigned high = (unsigned)(i >> 32); | ||
71 | unsigned low = (unsigned)i; | ||
72 | asm volatile(ATOMIC64_ALTERNATIVE(set) | ||
73 | : "+b" (low), "+c" (high) | ||
74 | : "S" (v) | ||
75 | : "eax", "edx", "memory" | ||
76 | ); | ||
77 | } | ||
37 | 78 | ||
38 | /** | 79 | /** |
39 | * atomic64_read - read atomic64 variable | 80 | * atomic64_read - read atomic64 variable |
40 | * @ptr: pointer to type atomic64_t | 81 | * @v: pointer to type atomic64_t |
41 | * | 82 | * |
42 | * Atomically reads the value of @ptr and returns it. | 83 | * Atomically reads the value of @v and returns it. |
43 | */ | 84 | */ |
44 | static inline u64 atomic64_read(atomic64_t *ptr) | 85 | static inline long long atomic64_read(atomic64_t *v) |
45 | { | 86 | { |
46 | u64 res; | 87 | long long r; |
47 | 88 | asm volatile(ATOMIC64_ALTERNATIVE(read) | |
48 | /* | 89 | : "=A" (r), "+c" (v) |
49 | * Note, we inline this atomic64_t primitive because | 90 | : : "memory" |
50 | * it only clobbers EAX/EDX and leaves the others | 91 | ); |
51 | * untouched. We also (somewhat subtly) rely on the | 92 | return r; |
52 | * fact that cmpxchg8b returns the current 64-bit value | 93 | } |
53 | * of the memory location we are touching: | ||
54 | */ | ||
55 | asm volatile( | ||
56 | "mov %%ebx, %%eax\n\t" | ||
57 | "mov %%ecx, %%edx\n\t" | ||
58 | LOCK_PREFIX "cmpxchg8b %1\n" | ||
59 | : "=&A" (res) | ||
60 | : "m" (*ptr) | ||
61 | ); | ||
62 | |||
63 | return res; | ||
64 | } | ||
65 | |||
66 | extern u64 atomic64_read(atomic64_t *ptr); | ||
67 | 94 | ||
68 | /** | 95 | /** |
69 | * atomic64_add_return - add and return | 96 | * atomic64_add_return - add and return |
70 | * @delta: integer value to add | 97 | * @i: integer value to add |
71 | * @ptr: pointer to type atomic64_t | 98 | * @v: pointer to type atomic64_t |
72 | * | 99 | * |
73 | * Atomically adds @delta to @ptr and returns @delta + *@ptr | 100 | * Atomically adds @i to @v and returns @i + *@v |
74 | */ | 101 | */ |
75 | extern u64 atomic64_add_return(u64 delta, atomic64_t *ptr); | 102 | static inline long long atomic64_add_return(long long i, atomic64_t *v) |
103 | { | ||
104 | asm volatile(ATOMIC64_ALTERNATIVE(add_return) | ||
105 | : "+A" (i), "+c" (v) | ||
106 | : : "memory" | ||
107 | ); | ||
108 | return i; | ||
109 | } | ||
76 | 110 | ||
77 | /* | 111 | /* |
78 | * Other variants with different arithmetic operators: | 112 | * Other variants with different arithmetic operators: |
79 | */ | 113 | */ |
80 | extern u64 atomic64_sub_return(u64 delta, atomic64_t *ptr); | 114 | static inline long long atomic64_sub_return(long long i, atomic64_t *v) |
81 | extern u64 atomic64_inc_return(atomic64_t *ptr); | 115 | { |
82 | extern u64 atomic64_dec_return(atomic64_t *ptr); | 116 | asm volatile(ATOMIC64_ALTERNATIVE(sub_return) |
117 | : "+A" (i), "+c" (v) | ||
118 | : : "memory" | ||
119 | ); | ||
120 | return i; | ||
121 | } | ||
122 | |||
123 | static inline long long atomic64_inc_return(atomic64_t *v) | ||
124 | { | ||
125 | long long a; | ||
126 | asm volatile(ATOMIC64_ALTERNATIVE(inc_return) | ||
127 | : "=A" (a) | ||
128 | : "S" (v) | ||
129 | : "memory", "ecx" | ||
130 | ); | ||
131 | return a; | ||
132 | } | ||
133 | |||
134 | static inline long long atomic64_dec_return(atomic64_t *v) | ||
135 | { | ||
136 | long long a; | ||
137 | asm volatile(ATOMIC64_ALTERNATIVE(dec_return) | ||
138 | : "=A" (a) | ||
139 | : "S" (v) | ||
140 | : "memory", "ecx" | ||
141 | ); | ||
142 | return a; | ||
143 | } | ||
83 | 144 | ||
84 | /** | 145 | /** |
85 | * atomic64_add - add integer to atomic64 variable | 146 | * atomic64_add - add integer to atomic64 variable |
86 | * @delta: integer value to add | 147 | * @i: integer value to add |
87 | * @ptr: pointer to type atomic64_t | 148 | * @v: pointer to type atomic64_t |
88 | * | 149 | * |
89 | * Atomically adds @delta to @ptr. | 150 | * Atomically adds @i to @v. |
90 | */ | 151 | */ |
91 | extern void atomic64_add(u64 delta, atomic64_t *ptr); | 152 | static inline long long atomic64_add(long long i, atomic64_t *v) |
153 | { | ||
154 | asm volatile(ATOMIC64_ALTERNATIVE_(add, add_return) | ||
155 | : "+A" (i), "+c" (v) | ||
156 | : : "memory" | ||
157 | ); | ||
158 | return i; | ||
159 | } | ||
92 | 160 | ||
93 | /** | 161 | /** |
94 | * atomic64_sub - subtract the atomic64 variable | 162 | * atomic64_sub - subtract the atomic64 variable |
95 | * @delta: integer value to subtract | 163 | * @i: integer value to subtract |
96 | * @ptr: pointer to type atomic64_t | 164 | * @v: pointer to type atomic64_t |
97 | * | 165 | * |
98 | * Atomically subtracts @delta from @ptr. | 166 | * Atomically subtracts @i from @v. |
99 | */ | 167 | */ |
100 | extern void atomic64_sub(u64 delta, atomic64_t *ptr); | 168 | static inline long long atomic64_sub(long long i, atomic64_t *v) |
169 | { | ||
170 | asm volatile(ATOMIC64_ALTERNATIVE_(sub, sub_return) | ||
171 | : "+A" (i), "+c" (v) | ||
172 | : : "memory" | ||
173 | ); | ||
174 | return i; | ||
175 | } | ||
101 | 176 | ||
102 | /** | 177 | /** |
103 | * atomic64_sub_and_test - subtract value from variable and test result | 178 | * atomic64_sub_and_test - subtract value from variable and test result |
104 | * @delta: integer value to subtract | 179 | * @i: integer value to subtract |
105 | * @ptr: pointer to type atomic64_t | 180 | * @v: pointer to type atomic64_t |
106 | * | 181 | * |
107 | * Atomically subtracts @delta from @ptr and returns | 182 | * Atomically subtracts @i from @v and returns |
108 | * true if the result is zero, or false for all | 183 | * true if the result is zero, or false for all |
109 | * other cases. | 184 | * other cases. |
110 | */ | 185 | */ |
111 | extern int atomic64_sub_and_test(u64 delta, atomic64_t *ptr); | 186 | static inline int atomic64_sub_and_test(long long i, atomic64_t *v) |
187 | { | ||
188 | return atomic64_sub_return(i, v) == 0; | ||
189 | } | ||
112 | 190 | ||
113 | /** | 191 | /** |
114 | * atomic64_inc - increment atomic64 variable | 192 | * atomic64_inc - increment atomic64 variable |
115 | * @ptr: pointer to type atomic64_t | 193 | * @v: pointer to type atomic64_t |
116 | * | 194 | * |
117 | * Atomically increments @ptr by 1. | 195 | * Atomically increments @v by 1. |
118 | */ | 196 | */ |
119 | extern void atomic64_inc(atomic64_t *ptr); | 197 | static inline void atomic64_inc(atomic64_t *v) |
198 | { | ||
199 | asm volatile(ATOMIC64_ALTERNATIVE_(inc, inc_return) | ||
200 | : : "S" (v) | ||
201 | : "memory", "eax", "ecx", "edx" | ||
202 | ); | ||
203 | } | ||
120 | 204 | ||
121 | /** | 205 | /** |
122 | * atomic64_dec - decrement atomic64 variable | 206 | * atomic64_dec - decrement atomic64 variable |
@@ -124,37 +208,97 @@ extern void atomic64_inc(atomic64_t *ptr); | |||
124 | * | 208 | * |
125 | * Atomically decrements @ptr by 1. | 209 | * Atomically decrements @ptr by 1. |
126 | */ | 210 | */ |
127 | extern void atomic64_dec(atomic64_t *ptr); | 211 | static inline void atomic64_dec(atomic64_t *v) |
212 | { | ||
213 | asm volatile(ATOMIC64_ALTERNATIVE_(dec, dec_return) | ||
214 | : : "S" (v) | ||
215 | : "memory", "eax", "ecx", "edx" | ||
216 | ); | ||
217 | } | ||
128 | 218 | ||
129 | /** | 219 | /** |
130 | * atomic64_dec_and_test - decrement and test | 220 | * atomic64_dec_and_test - decrement and test |
131 | * @ptr: pointer to type atomic64_t | 221 | * @v: pointer to type atomic64_t |
132 | * | 222 | * |
133 | * Atomically decrements @ptr by 1 and | 223 | * Atomically decrements @v by 1 and |
134 | * returns true if the result is 0, or false for all other | 224 | * returns true if the result is 0, or false for all other |
135 | * cases. | 225 | * cases. |
136 | */ | 226 | */ |
137 | extern int atomic64_dec_and_test(atomic64_t *ptr); | 227 | static inline int atomic64_dec_and_test(atomic64_t *v) |
228 | { | ||
229 | return atomic64_dec_return(v) == 0; | ||
230 | } | ||
138 | 231 | ||
139 | /** | 232 | /** |
140 | * atomic64_inc_and_test - increment and test | 233 | * atomic64_inc_and_test - increment and test |
141 | * @ptr: pointer to type atomic64_t | 234 | * @v: pointer to type atomic64_t |
142 | * | 235 | * |
143 | * Atomically increments @ptr by 1 | 236 | * Atomically increments @v by 1 |
144 | * and returns true if the result is zero, or false for all | 237 | * and returns true if the result is zero, or false for all |
145 | * other cases. | 238 | * other cases. |
146 | */ | 239 | */ |
147 | extern int atomic64_inc_and_test(atomic64_t *ptr); | 240 | static inline int atomic64_inc_and_test(atomic64_t *v) |
241 | { | ||
242 | return atomic64_inc_return(v) == 0; | ||
243 | } | ||
148 | 244 | ||
149 | /** | 245 | /** |
150 | * atomic64_add_negative - add and test if negative | 246 | * atomic64_add_negative - add and test if negative |
151 | * @delta: integer value to add | 247 | * @i: integer value to add |
152 | * @ptr: pointer to type atomic64_t | 248 | * @v: pointer to type atomic64_t |
153 | * | 249 | * |
154 | * Atomically adds @delta to @ptr and returns true | 250 | * Atomically adds @i to @v and returns true |
155 | * if the result is negative, or false when | 251 | * if the result is negative, or false when |
156 | * result is greater than or equal to zero. | 252 | * result is greater than or equal to zero. |
157 | */ | 253 | */ |
158 | extern int atomic64_add_negative(u64 delta, atomic64_t *ptr); | 254 | static inline int atomic64_add_negative(long long i, atomic64_t *v) |
255 | { | ||
256 | return atomic64_add_return(i, v) < 0; | ||
257 | } | ||
258 | |||
259 | /** | ||
260 | * atomic64_add_unless - add unless the number is a given value | ||
261 | * @v: pointer of type atomic64_t | ||
262 | * @a: the amount to add to v... | ||
263 | * @u: ...unless v is equal to u. | ||
264 | * | ||
265 | * Atomically adds @a to @v, so long as it was not @u. | ||
266 | * Returns non-zero if @v was not @u, and zero otherwise. | ||
267 | */ | ||
268 | static inline int atomic64_add_unless(atomic64_t *v, long long a, long long u) | ||
269 | { | ||
270 | unsigned low = (unsigned)u; | ||
271 | unsigned high = (unsigned)(u >> 32); | ||
272 | asm volatile(ATOMIC64_ALTERNATIVE(add_unless) "\n\t" | ||
273 | : "+A" (a), "+c" (v), "+S" (low), "+D" (high) | ||
274 | : : "memory"); | ||
275 | return (int)a; | ||
276 | } | ||
277 | |||
278 | |||
279 | static inline int atomic64_inc_not_zero(atomic64_t *v) | ||
280 | { | ||
281 | int r; | ||
282 | asm volatile(ATOMIC64_ALTERNATIVE(inc_not_zero) | ||
283 | : "=a" (r) | ||
284 | : "S" (v) | ||
285 | : "ecx", "edx", "memory" | ||
286 | ); | ||
287 | return r; | ||
288 | } | ||
289 | |||
290 | static inline long long atomic64_dec_if_positive(atomic64_t *v) | ||
291 | { | ||
292 | long long r; | ||
293 | asm volatile(ATOMIC64_ALTERNATIVE(dec_if_positive) | ||
294 | : "=A" (r) | ||
295 | : "S" (v) | ||
296 | : "ecx", "memory" | ||
297 | ); | ||
298 | return r; | ||
299 | } | ||
300 | |||
301 | #undef ATOMIC64_ALTERNATIVE | ||
302 | #undef ATOMIC64_ALTERNATIVE_ | ||
159 | 303 | ||
160 | #endif /* _ASM_X86_ATOMIC64_32_H */ | 304 | #endif /* _ASM_X86_ATOMIC64_32_H */ |
diff --git a/arch/x86/include/asm/atomic64_64.h b/arch/x86/include/asm/atomic64_64.h index 51c5b4056929..49fd1ea22951 100644 --- a/arch/x86/include/asm/atomic64_64.h +++ b/arch/x86/include/asm/atomic64_64.h | |||
@@ -18,7 +18,7 @@ | |||
18 | */ | 18 | */ |
19 | static inline long atomic64_read(const atomic64_t *v) | 19 | static inline long atomic64_read(const atomic64_t *v) |
20 | { | 20 | { |
21 | return v->counter; | 21 | return (*(volatile long *)&(v)->counter); |
22 | } | 22 | } |
23 | 23 | ||
24 | /** | 24 | /** |
@@ -221,4 +221,27 @@ static inline int atomic64_add_unless(atomic64_t *v, long a, long u) | |||
221 | 221 | ||
222 | #define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0) | 222 | #define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0) |
223 | 223 | ||
224 | /* | ||
225 | * atomic64_dec_if_positive - decrement by 1 if old value positive | ||
226 | * @v: pointer of type atomic_t | ||
227 | * | ||
228 | * The function returns the old value of *v minus 1, even if | ||
229 | * the atomic variable, v, was not decremented. | ||
230 | */ | ||
231 | static inline long atomic64_dec_if_positive(atomic64_t *v) | ||
232 | { | ||
233 | long c, old, dec; | ||
234 | c = atomic64_read(v); | ||
235 | for (;;) { | ||
236 | dec = c - 1; | ||
237 | if (unlikely(dec < 0)) | ||
238 | break; | ||
239 | old = atomic64_cmpxchg((v), c, dec); | ||
240 | if (likely(old == c)) | ||
241 | break; | ||
242 | c = old; | ||
243 | } | ||
244 | return dec; | ||
245 | } | ||
246 | |||
224 | #endif /* _ASM_X86_ATOMIC64_64_H */ | 247 | #endif /* _ASM_X86_ATOMIC64_64_H */ |
diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h index 02b47a603fc8..545776efeb16 100644 --- a/arch/x86/include/asm/bitops.h +++ b/arch/x86/include/asm/bitops.h | |||
@@ -444,7 +444,9 @@ static inline int fls(int x) | |||
444 | 444 | ||
445 | #define ARCH_HAS_FAST_MULTIPLIER 1 | 445 | #define ARCH_HAS_FAST_MULTIPLIER 1 |
446 | 446 | ||
447 | #include <asm-generic/bitops/hweight.h> | 447 | #include <asm/arch_hweight.h> |
448 | |||
449 | #include <asm-generic/bitops/const_hweight.h> | ||
448 | 450 | ||
449 | #endif /* __KERNEL__ */ | 451 | #endif /* __KERNEL__ */ |
450 | 452 | ||
diff --git a/arch/x86/include/asm/boot.h b/arch/x86/include/asm/boot.h index 7a1065958ba9..3b62ab56c7a0 100644 --- a/arch/x86/include/asm/boot.h +++ b/arch/x86/include/asm/boot.h | |||
@@ -24,7 +24,7 @@ | |||
24 | #define MIN_KERNEL_ALIGN (_AC(1, UL) << MIN_KERNEL_ALIGN_LG2) | 24 | #define MIN_KERNEL_ALIGN (_AC(1, UL) << MIN_KERNEL_ALIGN_LG2) |
25 | 25 | ||
26 | #if (CONFIG_PHYSICAL_ALIGN & (CONFIG_PHYSICAL_ALIGN-1)) || \ | 26 | #if (CONFIG_PHYSICAL_ALIGN & (CONFIG_PHYSICAL_ALIGN-1)) || \ |
27 | (CONFIG_PHYSICAL_ALIGN < (_AC(1, UL) << MIN_KERNEL_ALIGN_LG2)) | 27 | (CONFIG_PHYSICAL_ALIGN < MIN_KERNEL_ALIGN) |
28 | #error "Invalid value for CONFIG_PHYSICAL_ALIGN" | 28 | #error "Invalid value for CONFIG_PHYSICAL_ALIGN" |
29 | #endif | 29 | #endif |
30 | 30 | ||
diff --git a/arch/x86/include/asm/cacheflush.h b/arch/x86/include/asm/cacheflush.h index 634c40a739a6..c70068d05f70 100644 --- a/arch/x86/include/asm/cacheflush.h +++ b/arch/x86/include/asm/cacheflush.h | |||
@@ -44,9 +44,6 @@ static inline void copy_from_user_page(struct vm_area_struct *vma, | |||
44 | memcpy(dst, src, len); | 44 | memcpy(dst, src, len); |
45 | } | 45 | } |
46 | 46 | ||
47 | #define PG_WC PG_arch_1 | ||
48 | PAGEFLAG(WC, WC) | ||
49 | |||
50 | #ifdef CONFIG_X86_PAT | 47 | #ifdef CONFIG_X86_PAT |
51 | /* | 48 | /* |
52 | * X86 PAT uses page flags WC and Uncached together to keep track of | 49 | * X86 PAT uses page flags WC and Uncached together to keep track of |
@@ -55,16 +52,24 @@ PAGEFLAG(WC, WC) | |||
55 | * _PAGE_CACHE_UC_MINUS and fourth state where page's memory type has not | 52 | * _PAGE_CACHE_UC_MINUS and fourth state where page's memory type has not |
56 | * been changed from its default (value of -1 used to denote this). | 53 | * been changed from its default (value of -1 used to denote this). |
57 | * Note we do not support _PAGE_CACHE_UC here. | 54 | * Note we do not support _PAGE_CACHE_UC here. |
58 | * | ||
59 | * Caller must hold memtype_lock for atomicity. | ||
60 | */ | 55 | */ |
56 | |||
57 | #define _PGMT_DEFAULT 0 | ||
58 | #define _PGMT_WC (1UL << PG_arch_1) | ||
59 | #define _PGMT_UC_MINUS (1UL << PG_uncached) | ||
60 | #define _PGMT_WB (1UL << PG_uncached | 1UL << PG_arch_1) | ||
61 | #define _PGMT_MASK (1UL << PG_uncached | 1UL << PG_arch_1) | ||
62 | #define _PGMT_CLEAR_MASK (~_PGMT_MASK) | ||
63 | |||
61 | static inline unsigned long get_page_memtype(struct page *pg) | 64 | static inline unsigned long get_page_memtype(struct page *pg) |
62 | { | 65 | { |
63 | if (!PageUncached(pg) && !PageWC(pg)) | 66 | unsigned long pg_flags = pg->flags & _PGMT_MASK; |
67 | |||
68 | if (pg_flags == _PGMT_DEFAULT) | ||
64 | return -1; | 69 | return -1; |
65 | else if (!PageUncached(pg) && PageWC(pg)) | 70 | else if (pg_flags == _PGMT_WC) |
66 | return _PAGE_CACHE_WC; | 71 | return _PAGE_CACHE_WC; |
67 | else if (PageUncached(pg) && !PageWC(pg)) | 72 | else if (pg_flags == _PGMT_UC_MINUS) |
68 | return _PAGE_CACHE_UC_MINUS; | 73 | return _PAGE_CACHE_UC_MINUS; |
69 | else | 74 | else |
70 | return _PAGE_CACHE_WB; | 75 | return _PAGE_CACHE_WB; |
@@ -72,25 +77,26 @@ static inline unsigned long get_page_memtype(struct page *pg) | |||
72 | 77 | ||
73 | static inline void set_page_memtype(struct page *pg, unsigned long memtype) | 78 | static inline void set_page_memtype(struct page *pg, unsigned long memtype) |
74 | { | 79 | { |
80 | unsigned long memtype_flags = _PGMT_DEFAULT; | ||
81 | unsigned long old_flags; | ||
82 | unsigned long new_flags; | ||
83 | |||
75 | switch (memtype) { | 84 | switch (memtype) { |
76 | case _PAGE_CACHE_WC: | 85 | case _PAGE_CACHE_WC: |
77 | ClearPageUncached(pg); | 86 | memtype_flags = _PGMT_WC; |
78 | SetPageWC(pg); | ||
79 | break; | 87 | break; |
80 | case _PAGE_CACHE_UC_MINUS: | 88 | case _PAGE_CACHE_UC_MINUS: |
81 | SetPageUncached(pg); | 89 | memtype_flags = _PGMT_UC_MINUS; |
82 | ClearPageWC(pg); | ||
83 | break; | 90 | break; |
84 | case _PAGE_CACHE_WB: | 91 | case _PAGE_CACHE_WB: |
85 | SetPageUncached(pg); | 92 | memtype_flags = _PGMT_WB; |
86 | SetPageWC(pg); | ||
87 | break; | ||
88 | default: | ||
89 | case -1: | ||
90 | ClearPageUncached(pg); | ||
91 | ClearPageWC(pg); | ||
92 | break; | 93 | break; |
93 | } | 94 | } |
95 | |||
96 | do { | ||
97 | old_flags = pg->flags; | ||
98 | new_flags = (old_flags & _PGMT_CLEAR_MASK) | memtype_flags; | ||
99 | } while (cmpxchg(&pg->flags, old_flags, new_flags) != old_flags); | ||
94 | } | 100 | } |
95 | #else | 101 | #else |
96 | static inline unsigned long get_page_memtype(struct page *pg) { return -1; } | 102 | static inline unsigned long get_page_memtype(struct page *pg) { return -1; } |
diff --git a/arch/x86/include/asm/cmpxchg_32.h b/arch/x86/include/asm/cmpxchg_32.h index ffb9bb6b6c37..8859e12dd3cf 100644 --- a/arch/x86/include/asm/cmpxchg_32.h +++ b/arch/x86/include/asm/cmpxchg_32.h | |||
@@ -271,7 +271,8 @@ extern unsigned long long cmpxchg_486_u64(volatile void *, u64, u64); | |||
271 | __typeof__(*(ptr)) __ret; \ | 271 | __typeof__(*(ptr)) __ret; \ |
272 | __typeof__(*(ptr)) __old = (o); \ | 272 | __typeof__(*(ptr)) __old = (o); \ |
273 | __typeof__(*(ptr)) __new = (n); \ | 273 | __typeof__(*(ptr)) __new = (n); \ |
274 | alternative_io("call cmpxchg8b_emu", \ | 274 | alternative_io(LOCK_PREFIX_HERE \ |
275 | "call cmpxchg8b_emu", \ | ||
275 | "lock; cmpxchg8b (%%esi)" , \ | 276 | "lock; cmpxchg8b (%%esi)" , \ |
276 | X86_FEATURE_CX8, \ | 277 | X86_FEATURE_CX8, \ |
277 | "=A" (__ret), \ | 278 | "=A" (__ret), \ |
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 0cd82d068613..dca9c545f44e 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h | |||
@@ -161,6 +161,7 @@ | |||
161 | */ | 161 | */ |
162 | #define X86_FEATURE_IDA (7*32+ 0) /* Intel Dynamic Acceleration */ | 162 | #define X86_FEATURE_IDA (7*32+ 0) /* Intel Dynamic Acceleration */ |
163 | #define X86_FEATURE_ARAT (7*32+ 1) /* Always Running APIC Timer */ | 163 | #define X86_FEATURE_ARAT (7*32+ 1) /* Always Running APIC Timer */ |
164 | #define X86_FEATURE_CPB (7*32+ 2) /* AMD Core Performance Boost */ | ||
164 | 165 | ||
165 | /* Virtualization flags: Linux defined */ | 166 | /* Virtualization flags: Linux defined */ |
166 | #define X86_FEATURE_TPR_SHADOW (8*32+ 0) /* Intel TPR Shadow */ | 167 | #define X86_FEATURE_TPR_SHADOW (8*32+ 0) /* Intel TPR Shadow */ |
@@ -175,6 +176,7 @@ | |||
175 | 176 | ||
176 | #if defined(__KERNEL__) && !defined(__ASSEMBLY__) | 177 | #if defined(__KERNEL__) && !defined(__ASSEMBLY__) |
177 | 178 | ||
179 | #include <asm/asm.h> | ||
178 | #include <linux/bitops.h> | 180 | #include <linux/bitops.h> |
179 | 181 | ||
180 | extern const char * const x86_cap_flags[NCAPINTS*32]; | 182 | extern const char * const x86_cap_flags[NCAPINTS*32]; |
@@ -283,6 +285,62 @@ extern const char * const x86_power_flags[32]; | |||
283 | 285 | ||
284 | #endif /* CONFIG_X86_64 */ | 286 | #endif /* CONFIG_X86_64 */ |
285 | 287 | ||
288 | /* | ||
289 | * Static testing of CPU features. Used the same as boot_cpu_has(). | ||
290 | * These are only valid after alternatives have run, but will statically | ||
291 | * patch the target code for additional performance. | ||
292 | * | ||
293 | */ | ||
294 | static __always_inline __pure bool __static_cpu_has(u8 bit) | ||
295 | { | ||
296 | #if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 5) | ||
297 | asm goto("1: jmp %l[t_no]\n" | ||
298 | "2:\n" | ||
299 | ".section .altinstructions,\"a\"\n" | ||
300 | _ASM_ALIGN "\n" | ||
301 | _ASM_PTR "1b\n" | ||
302 | _ASM_PTR "0\n" /* no replacement */ | ||
303 | " .byte %P0\n" /* feature bit */ | ||
304 | " .byte 2b - 1b\n" /* source len */ | ||
305 | " .byte 0\n" /* replacement len */ | ||
306 | " .byte 0xff + 0 - (2b-1b)\n" /* padding */ | ||
307 | ".previous\n" | ||
308 | : : "i" (bit) : : t_no); | ||
309 | return true; | ||
310 | t_no: | ||
311 | return false; | ||
312 | #else | ||
313 | u8 flag; | ||
314 | /* Open-coded due to __stringify() in ALTERNATIVE() */ | ||
315 | asm volatile("1: movb $0,%0\n" | ||
316 | "2:\n" | ||
317 | ".section .altinstructions,\"a\"\n" | ||
318 | _ASM_ALIGN "\n" | ||
319 | _ASM_PTR "1b\n" | ||
320 | _ASM_PTR "3f\n" | ||
321 | " .byte %P1\n" /* feature bit */ | ||
322 | " .byte 2b - 1b\n" /* source len */ | ||
323 | " .byte 4f - 3f\n" /* replacement len */ | ||
324 | " .byte 0xff + (4f-3f) - (2b-1b)\n" /* padding */ | ||
325 | ".previous\n" | ||
326 | ".section .altinstr_replacement,\"ax\"\n" | ||
327 | "3: movb $1,%0\n" | ||
328 | "4:\n" | ||
329 | ".previous\n" | ||
330 | : "=qm" (flag) : "i" (bit)); | ||
331 | return flag; | ||
332 | #endif | ||
333 | } | ||
334 | |||
335 | #define static_cpu_has(bit) \ | ||
336 | ( \ | ||
337 | __builtin_constant_p(boot_cpu_has(bit)) ? \ | ||
338 | boot_cpu_has(bit) : \ | ||
339 | (__builtin_constant_p(bit) && !((bit) & ~0xff)) ? \ | ||
340 | __static_cpu_has(bit) : \ | ||
341 | boot_cpu_has(bit) \ | ||
342 | ) | ||
343 | |||
286 | #endif /* defined(__KERNEL__) && !defined(__ASSEMBLY__) */ | 344 | #endif /* defined(__KERNEL__) && !defined(__ASSEMBLY__) */ |
287 | 345 | ||
288 | #endif /* _ASM_X86_CPUFEATURE_H */ | 346 | #endif /* _ASM_X86_CPUFEATURE_H */ |
diff --git a/arch/x86/include/asm/ds.h b/arch/x86/include/asm/ds.h deleted file mode 100644 index 70dac199b093..000000000000 --- a/arch/x86/include/asm/ds.h +++ /dev/null | |||
@@ -1,302 +0,0 @@ | |||
1 | /* | ||
2 | * Debug Store (DS) support | ||
3 | * | ||
4 | * This provides a low-level interface to the hardware's Debug Store | ||
5 | * feature that is used for branch trace store (BTS) and | ||
6 | * precise-event based sampling (PEBS). | ||
7 | * | ||
8 | * It manages: | ||
9 | * - DS and BTS hardware configuration | ||
10 | * - buffer overflow handling (to be done) | ||
11 | * - buffer access | ||
12 | * | ||
13 | * It does not do: | ||
14 | * - security checking (is the caller allowed to trace the task) | ||
15 | * - buffer allocation (memory accounting) | ||
16 | * | ||
17 | * | ||
18 | * Copyright (C) 2007-2009 Intel Corporation. | ||
19 | * Markus Metzger <markus.t.metzger@intel.com>, 2007-2009 | ||
20 | */ | ||
21 | |||
22 | #ifndef _ASM_X86_DS_H | ||
23 | #define _ASM_X86_DS_H | ||
24 | |||
25 | |||
26 | #include <linux/types.h> | ||
27 | #include <linux/init.h> | ||
28 | #include <linux/err.h> | ||
29 | |||
30 | |||
31 | #ifdef CONFIG_X86_DS | ||
32 | |||
33 | struct task_struct; | ||
34 | struct ds_context; | ||
35 | struct ds_tracer; | ||
36 | struct bts_tracer; | ||
37 | struct pebs_tracer; | ||
38 | |||
39 | typedef void (*bts_ovfl_callback_t)(struct bts_tracer *); | ||
40 | typedef void (*pebs_ovfl_callback_t)(struct pebs_tracer *); | ||
41 | |||
42 | |||
43 | /* | ||
44 | * A list of features plus corresponding macros to talk about them in | ||
45 | * the ds_request function's flags parameter. | ||
46 | * | ||
47 | * We use the enum to index an array of corresponding control bits; | ||
48 | * we use the macro to index a flags bit-vector. | ||
49 | */ | ||
50 | enum ds_feature { | ||
51 | dsf_bts = 0, | ||
52 | dsf_bts_kernel, | ||
53 | #define BTS_KERNEL (1 << dsf_bts_kernel) | ||
54 | /* trace kernel-mode branches */ | ||
55 | |||
56 | dsf_bts_user, | ||
57 | #define BTS_USER (1 << dsf_bts_user) | ||
58 | /* trace user-mode branches */ | ||
59 | |||
60 | dsf_bts_overflow, | ||
61 | dsf_bts_max, | ||
62 | dsf_pebs = dsf_bts_max, | ||
63 | |||
64 | dsf_pebs_max, | ||
65 | dsf_ctl_max = dsf_pebs_max, | ||
66 | dsf_bts_timestamps = dsf_ctl_max, | ||
67 | #define BTS_TIMESTAMPS (1 << dsf_bts_timestamps) | ||
68 | /* add timestamps into BTS trace */ | ||
69 | |||
70 | #define BTS_USER_FLAGS (BTS_KERNEL | BTS_USER | BTS_TIMESTAMPS) | ||
71 | }; | ||
72 | |||
73 | |||
74 | /* | ||
75 | * Request BTS or PEBS | ||
76 | * | ||
77 | * Due to alignement constraints, the actual buffer may be slightly | ||
78 | * smaller than the requested or provided buffer. | ||
79 | * | ||
80 | * Returns a pointer to a tracer structure on success, or | ||
81 | * ERR_PTR(errcode) on failure. | ||
82 | * | ||
83 | * The interrupt threshold is independent from the overflow callback | ||
84 | * to allow users to use their own overflow interrupt handling mechanism. | ||
85 | * | ||
86 | * The function might sleep. | ||
87 | * | ||
88 | * task: the task to request recording for | ||
89 | * cpu: the cpu to request recording for | ||
90 | * base: the base pointer for the (non-pageable) buffer; | ||
91 | * size: the size of the provided buffer in bytes | ||
92 | * ovfl: pointer to a function to be called on buffer overflow; | ||
93 | * NULL if cyclic buffer requested | ||
94 | * th: the interrupt threshold in records from the end of the buffer; | ||
95 | * -1 if no interrupt threshold is requested. | ||
96 | * flags: a bit-mask of the above flags | ||
97 | */ | ||
98 | extern struct bts_tracer *ds_request_bts_task(struct task_struct *task, | ||
99 | void *base, size_t size, | ||
100 | bts_ovfl_callback_t ovfl, | ||
101 | size_t th, unsigned int flags); | ||
102 | extern struct bts_tracer *ds_request_bts_cpu(int cpu, void *base, size_t size, | ||
103 | bts_ovfl_callback_t ovfl, | ||
104 | size_t th, unsigned int flags); | ||
105 | extern struct pebs_tracer *ds_request_pebs_task(struct task_struct *task, | ||
106 | void *base, size_t size, | ||
107 | pebs_ovfl_callback_t ovfl, | ||
108 | size_t th, unsigned int flags); | ||
109 | extern struct pebs_tracer *ds_request_pebs_cpu(int cpu, | ||
110 | void *base, size_t size, | ||
111 | pebs_ovfl_callback_t ovfl, | ||
112 | size_t th, unsigned int flags); | ||
113 | |||
114 | /* | ||
115 | * Release BTS or PEBS resources | ||
116 | * Suspend and resume BTS or PEBS tracing | ||
117 | * | ||
118 | * Must be called with irq's enabled. | ||
119 | * | ||
120 | * tracer: the tracer handle returned from ds_request_~() | ||
121 | */ | ||
122 | extern void ds_release_bts(struct bts_tracer *tracer); | ||
123 | extern void ds_suspend_bts(struct bts_tracer *tracer); | ||
124 | extern void ds_resume_bts(struct bts_tracer *tracer); | ||
125 | extern void ds_release_pebs(struct pebs_tracer *tracer); | ||
126 | extern void ds_suspend_pebs(struct pebs_tracer *tracer); | ||
127 | extern void ds_resume_pebs(struct pebs_tracer *tracer); | ||
128 | |||
129 | /* | ||
130 | * Release BTS or PEBS resources | ||
131 | * Suspend and resume BTS or PEBS tracing | ||
132 | * | ||
133 | * Cpu tracers must call this on the traced cpu. | ||
134 | * Task tracers must call ds_release_~_noirq() for themselves. | ||
135 | * | ||
136 | * May be called with irq's disabled. | ||
137 | * | ||
138 | * Returns 0 if successful; | ||
139 | * -EPERM if the cpu tracer does not trace the current cpu. | ||
140 | * -EPERM if the task tracer does not trace itself. | ||
141 | * | ||
142 | * tracer: the tracer handle returned from ds_request_~() | ||
143 | */ | ||
144 | extern int ds_release_bts_noirq(struct bts_tracer *tracer); | ||
145 | extern int ds_suspend_bts_noirq(struct bts_tracer *tracer); | ||
146 | extern int ds_resume_bts_noirq(struct bts_tracer *tracer); | ||
147 | extern int ds_release_pebs_noirq(struct pebs_tracer *tracer); | ||
148 | extern int ds_suspend_pebs_noirq(struct pebs_tracer *tracer); | ||
149 | extern int ds_resume_pebs_noirq(struct pebs_tracer *tracer); | ||
150 | |||
151 | |||
152 | /* | ||
153 | * The raw DS buffer state as it is used for BTS and PEBS recording. | ||
154 | * | ||
155 | * This is the low-level, arch-dependent interface for working | ||
156 | * directly on the raw trace data. | ||
157 | */ | ||
158 | struct ds_trace { | ||
159 | /* the number of bts/pebs records */ | ||
160 | size_t n; | ||
161 | /* the size of a bts/pebs record in bytes */ | ||
162 | size_t size; | ||
163 | /* pointers into the raw buffer: | ||
164 | - to the first entry */ | ||
165 | void *begin; | ||
166 | /* - one beyond the last entry */ | ||
167 | void *end; | ||
168 | /* - one beyond the newest entry */ | ||
169 | void *top; | ||
170 | /* - the interrupt threshold */ | ||
171 | void *ith; | ||
172 | /* flags given on ds_request() */ | ||
173 | unsigned int flags; | ||
174 | }; | ||
175 | |||
176 | /* | ||
177 | * An arch-independent view on branch trace data. | ||
178 | */ | ||
179 | enum bts_qualifier { | ||
180 | bts_invalid, | ||
181 | #define BTS_INVALID bts_invalid | ||
182 | |||
183 | bts_branch, | ||
184 | #define BTS_BRANCH bts_branch | ||
185 | |||
186 | bts_task_arrives, | ||
187 | #define BTS_TASK_ARRIVES bts_task_arrives | ||
188 | |||
189 | bts_task_departs, | ||
190 | #define BTS_TASK_DEPARTS bts_task_departs | ||
191 | |||
192 | bts_qual_bit_size = 4, | ||
193 | bts_qual_max = (1 << bts_qual_bit_size), | ||
194 | }; | ||
195 | |||
196 | struct bts_struct { | ||
197 | __u64 qualifier; | ||
198 | union { | ||
199 | /* BTS_BRANCH */ | ||
200 | struct { | ||
201 | __u64 from; | ||
202 | __u64 to; | ||
203 | } lbr; | ||
204 | /* BTS_TASK_ARRIVES or BTS_TASK_DEPARTS */ | ||
205 | struct { | ||
206 | __u64 clock; | ||
207 | pid_t pid; | ||
208 | } event; | ||
209 | } variant; | ||
210 | }; | ||
211 | |||
212 | |||
213 | /* | ||
214 | * The BTS state. | ||
215 | * | ||
216 | * This gives access to the raw DS state and adds functions to provide | ||
217 | * an arch-independent view of the BTS data. | ||
218 | */ | ||
219 | struct bts_trace { | ||
220 | struct ds_trace ds; | ||
221 | |||
222 | int (*read)(struct bts_tracer *tracer, const void *at, | ||
223 | struct bts_struct *out); | ||
224 | int (*write)(struct bts_tracer *tracer, const struct bts_struct *in); | ||
225 | }; | ||
226 | |||
227 | |||
228 | /* | ||
229 | * The PEBS state. | ||
230 | * | ||
231 | * This gives access to the raw DS state and the PEBS-specific counter | ||
232 | * reset value. | ||
233 | */ | ||
234 | struct pebs_trace { | ||
235 | struct ds_trace ds; | ||
236 | |||
237 | /* the number of valid counters in the below array */ | ||
238 | unsigned int counters; | ||
239 | |||
240 | #define MAX_PEBS_COUNTERS 4 | ||
241 | /* the counter reset value */ | ||
242 | unsigned long long counter_reset[MAX_PEBS_COUNTERS]; | ||
243 | }; | ||
244 | |||
245 | |||
246 | /* | ||
247 | * Read the BTS or PEBS trace. | ||
248 | * | ||
249 | * Returns a view on the trace collected for the parameter tracer. | ||
250 | * | ||
251 | * The view remains valid as long as the traced task is not running or | ||
252 | * the tracer is suspended. | ||
253 | * Writes into the trace buffer are not reflected. | ||
254 | * | ||
255 | * tracer: the tracer handle returned from ds_request_~() | ||
256 | */ | ||
257 | extern const struct bts_trace *ds_read_bts(struct bts_tracer *tracer); | ||
258 | extern const struct pebs_trace *ds_read_pebs(struct pebs_tracer *tracer); | ||
259 | |||
260 | |||
261 | /* | ||
262 | * Reset the write pointer of the BTS/PEBS buffer. | ||
263 | * | ||
264 | * Returns 0 on success; -Eerrno on error | ||
265 | * | ||
266 | * tracer: the tracer handle returned from ds_request_~() | ||
267 | */ | ||
268 | extern int ds_reset_bts(struct bts_tracer *tracer); | ||
269 | extern int ds_reset_pebs(struct pebs_tracer *tracer); | ||
270 | |||
271 | /* | ||
272 | * Set the PEBS counter reset value. | ||
273 | * | ||
274 | * Returns 0 on success; -Eerrno on error | ||
275 | * | ||
276 | * tracer: the tracer handle returned from ds_request_pebs() | ||
277 | * counter: the index of the counter | ||
278 | * value: the new counter reset value | ||
279 | */ | ||
280 | extern int ds_set_pebs_reset(struct pebs_tracer *tracer, | ||
281 | unsigned int counter, u64 value); | ||
282 | |||
283 | /* | ||
284 | * Initialization | ||
285 | */ | ||
286 | struct cpuinfo_x86; | ||
287 | extern void __cpuinit ds_init_intel(struct cpuinfo_x86 *); | ||
288 | |||
289 | /* | ||
290 | * Context switch work | ||
291 | */ | ||
292 | extern void ds_switch_to(struct task_struct *prev, struct task_struct *next); | ||
293 | |||
294 | #else /* CONFIG_X86_DS */ | ||
295 | |||
296 | struct cpuinfo_x86; | ||
297 | static inline void __cpuinit ds_init_intel(struct cpuinfo_x86 *ignored) {} | ||
298 | static inline void ds_switch_to(struct task_struct *prev, | ||
299 | struct task_struct *next) {} | ||
300 | |||
301 | #endif /* CONFIG_X86_DS */ | ||
302 | #endif /* _ASM_X86_DS_H */ | ||
diff --git a/arch/x86/include/asm/dwarf2.h b/arch/x86/include/asm/dwarf2.h index ae6253ab9029..733f7e91e7a9 100644 --- a/arch/x86/include/asm/dwarf2.h +++ b/arch/x86/include/asm/dwarf2.h | |||
@@ -34,6 +34,18 @@ | |||
34 | #define CFI_SIGNAL_FRAME | 34 | #define CFI_SIGNAL_FRAME |
35 | #endif | 35 | #endif |
36 | 36 | ||
37 | #if defined(CONFIG_AS_CFI_SECTIONS) && defined(__ASSEMBLY__) | ||
38 | /* | ||
39 | * Emit CFI data in .debug_frame sections, not .eh_frame sections. | ||
40 | * The latter we currently just discard since we don't do DWARF | ||
41 | * unwinding at runtime. So only the offline DWARF information is | ||
42 | * useful to anyone. Note we should not use this directive if this | ||
43 | * file is used in the vDSO assembly, or if vmlinux.lds.S gets | ||
44 | * changed so it doesn't discard .eh_frame. | ||
45 | */ | ||
46 | .cfi_sections .debug_frame | ||
47 | #endif | ||
48 | |||
37 | #else | 49 | #else |
38 | 50 | ||
39 | /* | 51 | /* |
diff --git a/arch/x86/include/asm/e820.h b/arch/x86/include/asm/e820.h index 0e22296790d3..ec8a52d14ab1 100644 --- a/arch/x86/include/asm/e820.h +++ b/arch/x86/include/asm/e820.h | |||
@@ -45,7 +45,12 @@ | |||
45 | #define E820_NVS 4 | 45 | #define E820_NVS 4 |
46 | #define E820_UNUSABLE 5 | 46 | #define E820_UNUSABLE 5 |
47 | 47 | ||
48 | /* reserved RAM used by kernel itself */ | 48 | /* |
49 | * reserved RAM used by kernel itself | ||
50 | * if CONFIG_INTEL_TXT is enabled, memory of this type will be | ||
51 | * included in the S3 integrity calculation and so should not include | ||
52 | * any memory that BIOS might alter over the S3 transition | ||
53 | */ | ||
49 | #define E820_RESERVED_KERN 128 | 54 | #define E820_RESERVED_KERN 128 |
50 | 55 | ||
51 | #ifndef __ASSEMBLY__ | 56 | #ifndef __ASSEMBLY__ |
diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h index 0f8576427cfe..aeab29aee617 100644 --- a/arch/x86/include/asm/hardirq.h +++ b/arch/x86/include/asm/hardirq.h | |||
@@ -35,7 +35,7 @@ DECLARE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat); | |||
35 | 35 | ||
36 | #define __ARCH_IRQ_STAT | 36 | #define __ARCH_IRQ_STAT |
37 | 37 | ||
38 | #define inc_irq_stat(member) percpu_add(irq_stat.member, 1) | 38 | #define inc_irq_stat(member) percpu_inc(irq_stat.member) |
39 | 39 | ||
40 | #define local_softirq_pending() percpu_read(irq_stat.__softirq_pending) | 40 | #define local_softirq_pending() percpu_read(irq_stat.__softirq_pending) |
41 | 41 | ||
diff --git a/arch/x86/include/asm/hw_breakpoint.h b/arch/x86/include/asm/hw_breakpoint.h index 2a1bd8f4f23a..942255310e6a 100644 --- a/arch/x86/include/asm/hw_breakpoint.h +++ b/arch/x86/include/asm/hw_breakpoint.h | |||
@@ -41,12 +41,16 @@ struct arch_hw_breakpoint { | |||
41 | /* Total number of available HW breakpoint registers */ | 41 | /* Total number of available HW breakpoint registers */ |
42 | #define HBP_NUM 4 | 42 | #define HBP_NUM 4 |
43 | 43 | ||
44 | static inline int hw_breakpoint_slots(int type) | ||
45 | { | ||
46 | return HBP_NUM; | ||
47 | } | ||
48 | |||
44 | struct perf_event; | 49 | struct perf_event; |
45 | struct pmu; | 50 | struct pmu; |
46 | 51 | ||
47 | extern int arch_check_va_in_userspace(unsigned long va, u8 hbp_len); | 52 | extern int arch_check_bp_in_kernelspace(struct perf_event *bp); |
48 | extern int arch_validate_hwbkpt_settings(struct perf_event *bp, | 53 | extern int arch_validate_hwbkpt_settings(struct perf_event *bp); |
49 | struct task_struct *tsk); | ||
50 | extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused, | 54 | extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused, |
51 | unsigned long val, void *data); | 55 | unsigned long val, void *data); |
52 | 56 | ||
diff --git a/arch/x86/include/asm/hyperv.h b/arch/x86/include/asm/hyperv.h index e153a2b3889a..5df477ac3af7 100644 --- a/arch/x86/include/asm/hyperv.h +++ b/arch/x86/include/asm/hyperv.h | |||
@@ -1,5 +1,5 @@ | |||
1 | #ifndef _ASM_X86_KVM_HYPERV_H | 1 | #ifndef _ASM_X86_HYPERV_H |
2 | #define _ASM_X86_KVM_HYPERV_H | 2 | #define _ASM_X86_HYPERV_H |
3 | 3 | ||
4 | #include <linux/types.h> | 4 | #include <linux/types.h> |
5 | 5 | ||
@@ -14,6 +14,10 @@ | |||
14 | #define HYPERV_CPUID_ENLIGHTMENT_INFO 0x40000004 | 14 | #define HYPERV_CPUID_ENLIGHTMENT_INFO 0x40000004 |
15 | #define HYPERV_CPUID_IMPLEMENT_LIMITS 0x40000005 | 15 | #define HYPERV_CPUID_IMPLEMENT_LIMITS 0x40000005 |
16 | 16 | ||
17 | #define HYPERV_HYPERVISOR_PRESENT_BIT 0x80000000 | ||
18 | #define HYPERV_CPUID_MIN 0x40000005 | ||
19 | #define HYPERV_CPUID_MAX 0x4000ffff | ||
20 | |||
17 | /* | 21 | /* |
18 | * Feature identification. EAX indicates which features are available | 22 | * Feature identification. EAX indicates which features are available |
19 | * to the partition based upon the current partition privileges. | 23 | * to the partition based upon the current partition privileges. |
@@ -129,6 +133,9 @@ | |||
129 | /* MSR used to provide vcpu index */ | 133 | /* MSR used to provide vcpu index */ |
130 | #define HV_X64_MSR_VP_INDEX 0x40000002 | 134 | #define HV_X64_MSR_VP_INDEX 0x40000002 |
131 | 135 | ||
136 | /* MSR used to read the per-partition time reference counter */ | ||
137 | #define HV_X64_MSR_TIME_REF_COUNT 0x40000020 | ||
138 | |||
132 | /* Define the virtual APIC registers */ | 139 | /* Define the virtual APIC registers */ |
133 | #define HV_X64_MSR_EOI 0x40000070 | 140 | #define HV_X64_MSR_EOI 0x40000070 |
134 | #define HV_X64_MSR_ICR 0x40000071 | 141 | #define HV_X64_MSR_ICR 0x40000071 |
diff --git a/arch/x86/include/asm/hypervisor.h b/arch/x86/include/asm/hypervisor.h index b78c0941e422..70abda7058c8 100644 --- a/arch/x86/include/asm/hypervisor.h +++ b/arch/x86/include/asm/hypervisor.h | |||
@@ -17,10 +17,33 @@ | |||
17 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. | 17 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
18 | * | 18 | * |
19 | */ | 19 | */ |
20 | #ifndef ASM_X86__HYPERVISOR_H | 20 | #ifndef _ASM_X86_HYPERVISOR_H |
21 | #define ASM_X86__HYPERVISOR_H | 21 | #define _ASM_X86_HYPERVISOR_H |
22 | 22 | ||
23 | extern void init_hypervisor(struct cpuinfo_x86 *c); | 23 | extern void init_hypervisor(struct cpuinfo_x86 *c); |
24 | extern void init_hypervisor_platform(void); | 24 | extern void init_hypervisor_platform(void); |
25 | 25 | ||
26 | /* | ||
27 | * x86 hypervisor information | ||
28 | */ | ||
29 | struct hypervisor_x86 { | ||
30 | /* Hypervisor name */ | ||
31 | const char *name; | ||
32 | |||
33 | /* Detection routine */ | ||
34 | bool (*detect)(void); | ||
35 | |||
36 | /* Adjust CPU feature bits (run once per CPU) */ | ||
37 | void (*set_cpu_features)(struct cpuinfo_x86 *); | ||
38 | |||
39 | /* Platform setup (run once per boot) */ | ||
40 | void (*init_platform)(void); | ||
41 | }; | ||
42 | |||
43 | extern const struct hypervisor_x86 *x86_hyper; | ||
44 | |||
45 | /* Recognized hypervisors */ | ||
46 | extern const struct hypervisor_x86 x86_hyper_vmware; | ||
47 | extern const struct hypervisor_x86 x86_hyper_ms_hyperv; | ||
48 | |||
26 | #endif | 49 | #endif |
diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h index da2930924501..c991b3a7b904 100644 --- a/arch/x86/include/asm/i387.h +++ b/arch/x86/include/asm/i387.h | |||
@@ -16,7 +16,9 @@ | |||
16 | #include <linux/kernel_stat.h> | 16 | #include <linux/kernel_stat.h> |
17 | #include <linux/regset.h> | 17 | #include <linux/regset.h> |
18 | #include <linux/hardirq.h> | 18 | #include <linux/hardirq.h> |
19 | #include <linux/slab.h> | ||
19 | #include <asm/asm.h> | 20 | #include <asm/asm.h> |
21 | #include <asm/cpufeature.h> | ||
20 | #include <asm/processor.h> | 22 | #include <asm/processor.h> |
21 | #include <asm/sigcontext.h> | 23 | #include <asm/sigcontext.h> |
22 | #include <asm/user.h> | 24 | #include <asm/user.h> |
@@ -56,6 +58,11 @@ extern int restore_i387_xstate_ia32(void __user *buf); | |||
56 | 58 | ||
57 | #define X87_FSW_ES (1 << 7) /* Exception Summary */ | 59 | #define X87_FSW_ES (1 << 7) /* Exception Summary */ |
58 | 60 | ||
61 | static __always_inline __pure bool use_xsave(void) | ||
62 | { | ||
63 | return static_cpu_has(X86_FEATURE_XSAVE); | ||
64 | } | ||
65 | |||
59 | #ifdef CONFIG_X86_64 | 66 | #ifdef CONFIG_X86_64 |
60 | 67 | ||
61 | /* Ignore delayed exceptions from user space */ | 68 | /* Ignore delayed exceptions from user space */ |
@@ -91,15 +98,15 @@ static inline int fxrstor_checking(struct i387_fxsave_struct *fx) | |||
91 | values. The kernel data segment can be sometimes 0 and sometimes | 98 | values. The kernel data segment can be sometimes 0 and sometimes |
92 | new user value. Both should be ok. | 99 | new user value. Both should be ok. |
93 | Use the PDA as safe address because it should be already in L1. */ | 100 | Use the PDA as safe address because it should be already in L1. */ |
94 | static inline void clear_fpu_state(struct task_struct *tsk) | 101 | static inline void fpu_clear(struct fpu *fpu) |
95 | { | 102 | { |
96 | struct xsave_struct *xstate = &tsk->thread.xstate->xsave; | 103 | struct xsave_struct *xstate = &fpu->state->xsave; |
97 | struct i387_fxsave_struct *fx = &tsk->thread.xstate->fxsave; | 104 | struct i387_fxsave_struct *fx = &fpu->state->fxsave; |
98 | 105 | ||
99 | /* | 106 | /* |
100 | * xsave header may indicate the init state of the FP. | 107 | * xsave header may indicate the init state of the FP. |
101 | */ | 108 | */ |
102 | if ((task_thread_info(tsk)->status & TS_XSAVE) && | 109 | if (use_xsave() && |
103 | !(xstate->xsave_hdr.xstate_bv & XSTATE_FP)) | 110 | !(xstate->xsave_hdr.xstate_bv & XSTATE_FP)) |
104 | return; | 111 | return; |
105 | 112 | ||
@@ -111,6 +118,11 @@ static inline void clear_fpu_state(struct task_struct *tsk) | |||
111 | X86_FEATURE_FXSAVE_LEAK); | 118 | X86_FEATURE_FXSAVE_LEAK); |
112 | } | 119 | } |
113 | 120 | ||
121 | static inline void clear_fpu_state(struct task_struct *tsk) | ||
122 | { | ||
123 | fpu_clear(&tsk->thread.fpu); | ||
124 | } | ||
125 | |||
114 | static inline int fxsave_user(struct i387_fxsave_struct __user *fx) | 126 | static inline int fxsave_user(struct i387_fxsave_struct __user *fx) |
115 | { | 127 | { |
116 | int err; | 128 | int err; |
@@ -135,7 +147,7 @@ static inline int fxsave_user(struct i387_fxsave_struct __user *fx) | |||
135 | return err; | 147 | return err; |
136 | } | 148 | } |
137 | 149 | ||
138 | static inline void fxsave(struct task_struct *tsk) | 150 | static inline void fpu_fxsave(struct fpu *fpu) |
139 | { | 151 | { |
140 | /* Using "rex64; fxsave %0" is broken because, if the memory operand | 152 | /* Using "rex64; fxsave %0" is broken because, if the memory operand |
141 | uses any extended registers for addressing, a second REX prefix | 153 | uses any extended registers for addressing, a second REX prefix |
@@ -145,42 +157,45 @@ static inline void fxsave(struct task_struct *tsk) | |||
145 | /* Using "fxsaveq %0" would be the ideal choice, but is only supported | 157 | /* Using "fxsaveq %0" would be the ideal choice, but is only supported |
146 | starting with gas 2.16. */ | 158 | starting with gas 2.16. */ |
147 | __asm__ __volatile__("fxsaveq %0" | 159 | __asm__ __volatile__("fxsaveq %0" |
148 | : "=m" (tsk->thread.xstate->fxsave)); | 160 | : "=m" (fpu->state->fxsave)); |
149 | #elif 0 | 161 | #elif 0 |
150 | /* Using, as a workaround, the properly prefixed form below isn't | 162 | /* Using, as a workaround, the properly prefixed form below isn't |
151 | accepted by any binutils version so far released, complaining that | 163 | accepted by any binutils version so far released, complaining that |
152 | the same type of prefix is used twice if an extended register is | 164 | the same type of prefix is used twice if an extended register is |
153 | needed for addressing (fix submitted to mainline 2005-11-21). */ | 165 | needed for addressing (fix submitted to mainline 2005-11-21). */ |
154 | __asm__ __volatile__("rex64/fxsave %0" | 166 | __asm__ __volatile__("rex64/fxsave %0" |
155 | : "=m" (tsk->thread.xstate->fxsave)); | 167 | : "=m" (fpu->state->fxsave)); |
156 | #else | 168 | #else |
157 | /* This, however, we can work around by forcing the compiler to select | 169 | /* This, however, we can work around by forcing the compiler to select |
158 | an addressing mode that doesn't require extended registers. */ | 170 | an addressing mode that doesn't require extended registers. */ |
159 | __asm__ __volatile__("rex64/fxsave (%1)" | 171 | __asm__ __volatile__("rex64/fxsave (%1)" |
160 | : "=m" (tsk->thread.xstate->fxsave) | 172 | : "=m" (fpu->state->fxsave) |
161 | : "cdaSDb" (&tsk->thread.xstate->fxsave)); | 173 | : "cdaSDb" (&fpu->state->fxsave)); |
162 | #endif | 174 | #endif |
163 | } | 175 | } |
164 | 176 | ||
165 | static inline void __save_init_fpu(struct task_struct *tsk) | 177 | static inline void fpu_save_init(struct fpu *fpu) |
166 | { | 178 | { |
167 | if (task_thread_info(tsk)->status & TS_XSAVE) | 179 | if (use_xsave()) |
168 | xsave(tsk); | 180 | fpu_xsave(fpu); |
169 | else | 181 | else |
170 | fxsave(tsk); | 182 | fpu_fxsave(fpu); |
183 | |||
184 | fpu_clear(fpu); | ||
185 | } | ||
171 | 186 | ||
172 | clear_fpu_state(tsk); | 187 | static inline void __save_init_fpu(struct task_struct *tsk) |
188 | { | ||
189 | fpu_save_init(&tsk->thread.fpu); | ||
173 | task_thread_info(tsk)->status &= ~TS_USEDFPU; | 190 | task_thread_info(tsk)->status &= ~TS_USEDFPU; |
174 | } | 191 | } |
175 | 192 | ||
176 | #else /* CONFIG_X86_32 */ | 193 | #else /* CONFIG_X86_32 */ |
177 | 194 | ||
178 | #ifdef CONFIG_MATH_EMULATION | 195 | #ifdef CONFIG_MATH_EMULATION |
179 | extern void finit_task(struct task_struct *tsk); | 196 | extern void finit_soft_fpu(struct i387_soft_struct *soft); |
180 | #else | 197 | #else |
181 | static inline void finit_task(struct task_struct *tsk) | 198 | static inline void finit_soft_fpu(struct i387_soft_struct *soft) {} |
182 | { | ||
183 | } | ||
184 | #endif | 199 | #endif |
185 | 200 | ||
186 | static inline void tolerant_fwait(void) | 201 | static inline void tolerant_fwait(void) |
@@ -216,13 +231,13 @@ static inline int fxrstor_checking(struct i387_fxsave_struct *fx) | |||
216 | /* | 231 | /* |
217 | * These must be called with preempt disabled | 232 | * These must be called with preempt disabled |
218 | */ | 233 | */ |
219 | static inline void __save_init_fpu(struct task_struct *tsk) | 234 | static inline void fpu_save_init(struct fpu *fpu) |
220 | { | 235 | { |
221 | if (task_thread_info(tsk)->status & TS_XSAVE) { | 236 | if (use_xsave()) { |
222 | struct xsave_struct *xstate = &tsk->thread.xstate->xsave; | 237 | struct xsave_struct *xstate = &fpu->state->xsave; |
223 | struct i387_fxsave_struct *fx = &tsk->thread.xstate->fxsave; | 238 | struct i387_fxsave_struct *fx = &fpu->state->fxsave; |
224 | 239 | ||
225 | xsave(tsk); | 240 | fpu_xsave(fpu); |
226 | 241 | ||
227 | /* | 242 | /* |
228 | * xsave header may indicate the init state of the FP. | 243 | * xsave header may indicate the init state of the FP. |
@@ -246,8 +261,8 @@ static inline void __save_init_fpu(struct task_struct *tsk) | |||
246 | "fxsave %[fx]\n" | 261 | "fxsave %[fx]\n" |
247 | "bt $7,%[fsw] ; jnc 1f ; fnclex\n1:", | 262 | "bt $7,%[fsw] ; jnc 1f ; fnclex\n1:", |
248 | X86_FEATURE_FXSR, | 263 | X86_FEATURE_FXSR, |
249 | [fx] "m" (tsk->thread.xstate->fxsave), | 264 | [fx] "m" (fpu->state->fxsave), |
250 | [fsw] "m" (tsk->thread.xstate->fxsave.swd) : "memory"); | 265 | [fsw] "m" (fpu->state->fxsave.swd) : "memory"); |
251 | clear_state: | 266 | clear_state: |
252 | /* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception | 267 | /* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception |
253 | is pending. Clear the x87 state here by setting it to fixed | 268 | is pending. Clear the x87 state here by setting it to fixed |
@@ -259,17 +274,34 @@ clear_state: | |||
259 | X86_FEATURE_FXSAVE_LEAK, | 274 | X86_FEATURE_FXSAVE_LEAK, |
260 | [addr] "m" (safe_address)); | 275 | [addr] "m" (safe_address)); |
261 | end: | 276 | end: |
277 | ; | ||
278 | } | ||
279 | |||
280 | static inline void __save_init_fpu(struct task_struct *tsk) | ||
281 | { | ||
282 | fpu_save_init(&tsk->thread.fpu); | ||
262 | task_thread_info(tsk)->status &= ~TS_USEDFPU; | 283 | task_thread_info(tsk)->status &= ~TS_USEDFPU; |
263 | } | 284 | } |
264 | 285 | ||
286 | |||
265 | #endif /* CONFIG_X86_64 */ | 287 | #endif /* CONFIG_X86_64 */ |
266 | 288 | ||
267 | static inline int restore_fpu_checking(struct task_struct *tsk) | 289 | static inline int fpu_fxrstor_checking(struct fpu *fpu) |
268 | { | 290 | { |
269 | if (task_thread_info(tsk)->status & TS_XSAVE) | 291 | return fxrstor_checking(&fpu->state->fxsave); |
270 | return xrstor_checking(&tsk->thread.xstate->xsave); | 292 | } |
293 | |||
294 | static inline int fpu_restore_checking(struct fpu *fpu) | ||
295 | { | ||
296 | if (use_xsave()) | ||
297 | return fpu_xrstor_checking(fpu); | ||
271 | else | 298 | else |
272 | return fxrstor_checking(&tsk->thread.xstate->fxsave); | 299 | return fpu_fxrstor_checking(fpu); |
300 | } | ||
301 | |||
302 | static inline int restore_fpu_checking(struct task_struct *tsk) | ||
303 | { | ||
304 | return fpu_restore_checking(&tsk->thread.fpu); | ||
273 | } | 305 | } |
274 | 306 | ||
275 | /* | 307 | /* |
@@ -397,30 +429,59 @@ static inline void clear_fpu(struct task_struct *tsk) | |||
397 | static inline unsigned short get_fpu_cwd(struct task_struct *tsk) | 429 | static inline unsigned short get_fpu_cwd(struct task_struct *tsk) |
398 | { | 430 | { |
399 | if (cpu_has_fxsr) { | 431 | if (cpu_has_fxsr) { |
400 | return tsk->thread.xstate->fxsave.cwd; | 432 | return tsk->thread.fpu.state->fxsave.cwd; |
401 | } else { | 433 | } else { |
402 | return (unsigned short)tsk->thread.xstate->fsave.cwd; | 434 | return (unsigned short)tsk->thread.fpu.state->fsave.cwd; |
403 | } | 435 | } |
404 | } | 436 | } |
405 | 437 | ||
406 | static inline unsigned short get_fpu_swd(struct task_struct *tsk) | 438 | static inline unsigned short get_fpu_swd(struct task_struct *tsk) |
407 | { | 439 | { |
408 | if (cpu_has_fxsr) { | 440 | if (cpu_has_fxsr) { |
409 | return tsk->thread.xstate->fxsave.swd; | 441 | return tsk->thread.fpu.state->fxsave.swd; |
410 | } else { | 442 | } else { |
411 | return (unsigned short)tsk->thread.xstate->fsave.swd; | 443 | return (unsigned short)tsk->thread.fpu.state->fsave.swd; |
412 | } | 444 | } |
413 | } | 445 | } |
414 | 446 | ||
415 | static inline unsigned short get_fpu_mxcsr(struct task_struct *tsk) | 447 | static inline unsigned short get_fpu_mxcsr(struct task_struct *tsk) |
416 | { | 448 | { |
417 | if (cpu_has_xmm) { | 449 | if (cpu_has_xmm) { |
418 | return tsk->thread.xstate->fxsave.mxcsr; | 450 | return tsk->thread.fpu.state->fxsave.mxcsr; |
419 | } else { | 451 | } else { |
420 | return MXCSR_DEFAULT; | 452 | return MXCSR_DEFAULT; |
421 | } | 453 | } |
422 | } | 454 | } |
423 | 455 | ||
456 | static bool fpu_allocated(struct fpu *fpu) | ||
457 | { | ||
458 | return fpu->state != NULL; | ||
459 | } | ||
460 | |||
461 | static inline int fpu_alloc(struct fpu *fpu) | ||
462 | { | ||
463 | if (fpu_allocated(fpu)) | ||
464 | return 0; | ||
465 | fpu->state = kmem_cache_alloc(task_xstate_cachep, GFP_KERNEL); | ||
466 | if (!fpu->state) | ||
467 | return -ENOMEM; | ||
468 | WARN_ON((unsigned long)fpu->state & 15); | ||
469 | return 0; | ||
470 | } | ||
471 | |||
472 | static inline void fpu_free(struct fpu *fpu) | ||
473 | { | ||
474 | if (fpu->state) { | ||
475 | kmem_cache_free(task_xstate_cachep, fpu->state); | ||
476 | fpu->state = NULL; | ||
477 | } | ||
478 | } | ||
479 | |||
480 | static inline void fpu_copy(struct fpu *dst, struct fpu *src) | ||
481 | { | ||
482 | memcpy(dst->state, src->state, xstate_size); | ||
483 | } | ||
484 | |||
424 | #endif /* __ASSEMBLY__ */ | 485 | #endif /* __ASSEMBLY__ */ |
425 | 486 | ||
426 | #define PSHUFB_XMM5_XMM0 .byte 0x66, 0x0f, 0x38, 0x00, 0xc5 | 487 | #define PSHUFB_XMM5_XMM0 .byte 0x66, 0x0f, 0x38, 0x00, 0xc5 |
diff --git a/arch/x86/include/asm/i8253.h b/arch/x86/include/asm/i8253.h index 1edbf89680fd..fc1f579fb965 100644 --- a/arch/x86/include/asm/i8253.h +++ b/arch/x86/include/asm/i8253.h | |||
@@ -6,7 +6,7 @@ | |||
6 | #define PIT_CH0 0x40 | 6 | #define PIT_CH0 0x40 |
7 | #define PIT_CH2 0x42 | 7 | #define PIT_CH2 0x42 |
8 | 8 | ||
9 | extern spinlock_t i8253_lock; | 9 | extern raw_spinlock_t i8253_lock; |
10 | 10 | ||
11 | extern struct clock_event_device *global_clock_event; | 11 | extern struct clock_event_device *global_clock_event; |
12 | 12 | ||
diff --git a/arch/x86/include/asm/insn.h b/arch/x86/include/asm/insn.h index 96c2e0ad04ca..88c765e16410 100644 --- a/arch/x86/include/asm/insn.h +++ b/arch/x86/include/asm/insn.h | |||
@@ -68,6 +68,8 @@ struct insn { | |||
68 | const insn_byte_t *next_byte; | 68 | const insn_byte_t *next_byte; |
69 | }; | 69 | }; |
70 | 70 | ||
71 | #define MAX_INSN_SIZE 16 | ||
72 | |||
71 | #define X86_MODRM_MOD(modrm) (((modrm) & 0xc0) >> 6) | 73 | #define X86_MODRM_MOD(modrm) (((modrm) & 0xc0) >> 6) |
72 | #define X86_MODRM_REG(modrm) (((modrm) & 0x38) >> 3) | 74 | #define X86_MODRM_REG(modrm) (((modrm) & 0x38) >> 3) |
73 | #define X86_MODRM_RM(modrm) ((modrm) & 0x07) | 75 | #define X86_MODRM_RM(modrm) ((modrm) & 0x07) |
diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h index 35832a03a515..63cb4096c3dc 100644 --- a/arch/x86/include/asm/io_apic.h +++ b/arch/x86/include/asm/io_apic.h | |||
@@ -159,7 +159,6 @@ struct io_apic_irq_attr; | |||
159 | extern int io_apic_set_pci_routing(struct device *dev, int irq, | 159 | extern int io_apic_set_pci_routing(struct device *dev, int irq, |
160 | struct io_apic_irq_attr *irq_attr); | 160 | struct io_apic_irq_attr *irq_attr); |
161 | void setup_IO_APIC_irq_extra(u32 gsi); | 161 | void setup_IO_APIC_irq_extra(u32 gsi); |
162 | extern int (*ioapic_renumber_irq)(int ioapic, int irq); | ||
163 | extern void ioapic_init_mappings(void); | 162 | extern void ioapic_init_mappings(void); |
164 | extern void ioapic_insert_resources(void); | 163 | extern void ioapic_insert_resources(void); |
165 | 164 | ||
@@ -180,12 +179,13 @@ extern void ioapic_write_entry(int apic, int pin, | |||
180 | extern void setup_ioapic_ids_from_mpc(void); | 179 | extern void setup_ioapic_ids_from_mpc(void); |
181 | 180 | ||
182 | struct mp_ioapic_gsi{ | 181 | struct mp_ioapic_gsi{ |
183 | int gsi_base; | 182 | u32 gsi_base; |
184 | int gsi_end; | 183 | u32 gsi_end; |
185 | }; | 184 | }; |
186 | extern struct mp_ioapic_gsi mp_gsi_routing[]; | 185 | extern struct mp_ioapic_gsi mp_gsi_routing[]; |
187 | int mp_find_ioapic(int gsi); | 186 | extern u32 gsi_end; |
188 | int mp_find_ioapic_pin(int ioapic, int gsi); | 187 | int mp_find_ioapic(u32 gsi); |
188 | int mp_find_ioapic_pin(int ioapic, u32 gsi); | ||
189 | void __init mp_register_ioapic(int id, u32 address, u32 gsi_base); | 189 | void __init mp_register_ioapic(int id, u32 address, u32 gsi_base); |
190 | extern void __init pre_init_apic_IRQ0(void); | 190 | extern void __init pre_init_apic_IRQ0(void); |
191 | 191 | ||
@@ -197,7 +197,8 @@ static const int timer_through_8259 = 0; | |||
197 | static inline void ioapic_init_mappings(void) { } | 197 | static inline void ioapic_init_mappings(void) { } |
198 | static inline void ioapic_insert_resources(void) { } | 198 | static inline void ioapic_insert_resources(void) { } |
199 | static inline void probe_nr_irqs_gsi(void) { } | 199 | static inline void probe_nr_irqs_gsi(void) { } |
200 | static inline int mp_find_ioapic(int gsi) { return 0; } | 200 | #define gsi_end (NR_IRQS_LEGACY - 1) |
201 | static inline int mp_find_ioapic(u32 gsi) { return 0; } | ||
201 | 202 | ||
202 | struct io_apic_irq_attr; | 203 | struct io_apic_irq_attr; |
203 | static inline int io_apic_set_pci_routing(struct device *dev, int irq, | 204 | static inline int io_apic_set_pci_routing(struct device *dev, int irq, |
diff --git a/arch/x86/include/asm/k8.h b/arch/x86/include/asm/k8.h index f70e60071fe8..af00bd1d2089 100644 --- a/arch/x86/include/asm/k8.h +++ b/arch/x86/include/asm/k8.h | |||
@@ -16,11 +16,16 @@ extern int k8_numa_init(unsigned long start_pfn, unsigned long end_pfn); | |||
16 | extern int k8_scan_nodes(void); | 16 | extern int k8_scan_nodes(void); |
17 | 17 | ||
18 | #ifdef CONFIG_K8_NB | 18 | #ifdef CONFIG_K8_NB |
19 | extern int num_k8_northbridges; | ||
20 | |||
19 | static inline struct pci_dev *node_to_k8_nb_misc(int node) | 21 | static inline struct pci_dev *node_to_k8_nb_misc(int node) |
20 | { | 22 | { |
21 | return (node < num_k8_northbridges) ? k8_northbridges[node] : NULL; | 23 | return (node < num_k8_northbridges) ? k8_northbridges[node] : NULL; |
22 | } | 24 | } |
25 | |||
23 | #else | 26 | #else |
27 | #define num_k8_northbridges 0 | ||
28 | |||
24 | static inline struct pci_dev *node_to_k8_nb_misc(int node) | 29 | static inline struct pci_dev *node_to_k8_nb_misc(int node) |
25 | { | 30 | { |
26 | return NULL; | 31 | return NULL; |
diff --git a/arch/x86/include/asm/kprobes.h b/arch/x86/include/asm/kprobes.h index 4ffa345a8ccb..547882539157 100644 --- a/arch/x86/include/asm/kprobes.h +++ b/arch/x86/include/asm/kprobes.h | |||
@@ -24,6 +24,7 @@ | |||
24 | #include <linux/types.h> | 24 | #include <linux/types.h> |
25 | #include <linux/ptrace.h> | 25 | #include <linux/ptrace.h> |
26 | #include <linux/percpu.h> | 26 | #include <linux/percpu.h> |
27 | #include <asm/insn.h> | ||
27 | 28 | ||
28 | #define __ARCH_WANT_KPROBES_INSN_SLOT | 29 | #define __ARCH_WANT_KPROBES_INSN_SLOT |
29 | 30 | ||
@@ -36,7 +37,6 @@ typedef u8 kprobe_opcode_t; | |||
36 | #define RELATIVEJUMP_SIZE 5 | 37 | #define RELATIVEJUMP_SIZE 5 |
37 | #define RELATIVECALL_OPCODE 0xe8 | 38 | #define RELATIVECALL_OPCODE 0xe8 |
38 | #define RELATIVE_ADDR_SIZE 4 | 39 | #define RELATIVE_ADDR_SIZE 4 |
39 | #define MAX_INSN_SIZE 16 | ||
40 | #define MAX_STACK_SIZE 64 | 40 | #define MAX_STACK_SIZE 64 |
41 | #define MIN_STACK_SIZE(ADDR) \ | 41 | #define MIN_STACK_SIZE(ADDR) \ |
42 | (((MAX_STACK_SIZE) < (((unsigned long)current_thread_info()) + \ | 42 | (((MAX_STACK_SIZE) < (((unsigned long)current_thread_info()) + \ |
diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h index d8bf23a88d05..c82868e9f905 100644 --- a/arch/x86/include/asm/mpspec.h +++ b/arch/x86/include/asm/mpspec.h | |||
@@ -105,16 +105,6 @@ extern void mp_config_acpi_legacy_irqs(void); | |||
105 | struct device; | 105 | struct device; |
106 | extern int mp_register_gsi(struct device *dev, u32 gsi, int edge_level, | 106 | extern int mp_register_gsi(struct device *dev, u32 gsi, int edge_level, |
107 | int active_high_low); | 107 | int active_high_low); |
108 | extern int acpi_probe_gsi(void); | ||
109 | #ifdef CONFIG_X86_IO_APIC | ||
110 | extern int mp_find_ioapic(int gsi); | ||
111 | extern int mp_find_ioapic_pin(int ioapic, int gsi); | ||
112 | #endif | ||
113 | #else /* !CONFIG_ACPI: */ | ||
114 | static inline int acpi_probe_gsi(void) | ||
115 | { | ||
116 | return 0; | ||
117 | } | ||
118 | #endif /* CONFIG_ACPI */ | 108 | #endif /* CONFIG_ACPI */ |
119 | 109 | ||
120 | #define PHYSID_ARRAY_SIZE BITS_TO_LONGS(MAX_APICS) | 110 | #define PHYSID_ARRAY_SIZE BITS_TO_LONGS(MAX_APICS) |
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h new file mode 100644 index 000000000000..79ce5685ab64 --- /dev/null +++ b/arch/x86/include/asm/mshyperv.h | |||
@@ -0,0 +1,14 @@ | |||
1 | #ifndef _ASM_X86_MSHYPER_H | ||
2 | #define _ASM_X86_MSHYPER_H | ||
3 | |||
4 | #include <linux/types.h> | ||
5 | #include <asm/hyperv.h> | ||
6 | |||
7 | struct ms_hyperv_info { | ||
8 | u32 features; | ||
9 | u32 hints; | ||
10 | }; | ||
11 | |||
12 | extern struct ms_hyperv_info ms_hyperv; | ||
13 | |||
14 | #endif | ||
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 4604e6a54d36..bc473acfa7f9 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h | |||
@@ -71,11 +71,14 @@ | |||
71 | #define MSR_IA32_LASTINTTOIP 0x000001de | 71 | #define MSR_IA32_LASTINTTOIP 0x000001de |
72 | 72 | ||
73 | /* DEBUGCTLMSR bits (others vary by model): */ | 73 | /* DEBUGCTLMSR bits (others vary by model): */ |
74 | #define _DEBUGCTLMSR_LBR 0 /* last branch recording */ | 74 | #define DEBUGCTLMSR_LBR (1UL << 0) /* last branch recording */ |
75 | #define _DEBUGCTLMSR_BTF 1 /* single-step on branches */ | 75 | #define DEBUGCTLMSR_BTF (1UL << 1) /* single-step on branches */ |
76 | 76 | #define DEBUGCTLMSR_TR (1UL << 6) | |
77 | #define DEBUGCTLMSR_LBR (1UL << _DEBUGCTLMSR_LBR) | 77 | #define DEBUGCTLMSR_BTS (1UL << 7) |
78 | #define DEBUGCTLMSR_BTF (1UL << _DEBUGCTLMSR_BTF) | 78 | #define DEBUGCTLMSR_BTINT (1UL << 8) |
79 | #define DEBUGCTLMSR_BTS_OFF_OS (1UL << 9) | ||
80 | #define DEBUGCTLMSR_BTS_OFF_USR (1UL << 10) | ||
81 | #define DEBUGCTLMSR_FREEZE_LBRS_ON_PMI (1UL << 11) | ||
79 | 82 | ||
80 | #define MSR_IA32_MC0_CTL 0x00000400 | 83 | #define MSR_IA32_MC0_CTL 0x00000400 |
81 | #define MSR_IA32_MC0_STATUS 0x00000401 | 84 | #define MSR_IA32_MC0_STATUS 0x00000401 |
@@ -359,6 +362,8 @@ | |||
359 | #define MSR_P4_U2L_ESCR0 0x000003b0 | 362 | #define MSR_P4_U2L_ESCR0 0x000003b0 |
360 | #define MSR_P4_U2L_ESCR1 0x000003b1 | 363 | #define MSR_P4_U2L_ESCR1 0x000003b1 |
361 | 364 | ||
365 | #define MSR_P4_PEBS_MATRIX_VERT 0x000003f2 | ||
366 | |||
362 | /* Intel Core-based CPU performance counters */ | 367 | /* Intel Core-based CPU performance counters */ |
363 | #define MSR_CORE_PERF_FIXED_CTR0 0x00000309 | 368 | #define MSR_CORE_PERF_FIXED_CTR0 0x00000309 |
364 | #define MSR_CORE_PERF_FIXED_CTR1 0x0000030a | 369 | #define MSR_CORE_PERF_FIXED_CTR1 0x0000030a |
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index 66a272dfd8b8..0ec6d12d84e6 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h | |||
@@ -190,6 +190,29 @@ do { \ | |||
190 | pfo_ret__; \ | 190 | pfo_ret__; \ |
191 | }) | 191 | }) |
192 | 192 | ||
193 | #define percpu_unary_op(op, var) \ | ||
194 | ({ \ | ||
195 | switch (sizeof(var)) { \ | ||
196 | case 1: \ | ||
197 | asm(op "b "__percpu_arg(0) \ | ||
198 | : "+m" (var)); \ | ||
199 | break; \ | ||
200 | case 2: \ | ||
201 | asm(op "w "__percpu_arg(0) \ | ||
202 | : "+m" (var)); \ | ||
203 | break; \ | ||
204 | case 4: \ | ||
205 | asm(op "l "__percpu_arg(0) \ | ||
206 | : "+m" (var)); \ | ||
207 | break; \ | ||
208 | case 8: \ | ||
209 | asm(op "q "__percpu_arg(0) \ | ||
210 | : "+m" (var)); \ | ||
211 | break; \ | ||
212 | default: __bad_percpu_size(); \ | ||
213 | } \ | ||
214 | }) | ||
215 | |||
193 | /* | 216 | /* |
194 | * percpu_read() makes gcc load the percpu variable every time it is | 217 | * percpu_read() makes gcc load the percpu variable every time it is |
195 | * accessed while percpu_read_stable() allows the value to be cached. | 218 | * accessed while percpu_read_stable() allows the value to be cached. |
@@ -207,6 +230,7 @@ do { \ | |||
207 | #define percpu_and(var, val) percpu_to_op("and", var, val) | 230 | #define percpu_and(var, val) percpu_to_op("and", var, val) |
208 | #define percpu_or(var, val) percpu_to_op("or", var, val) | 231 | #define percpu_or(var, val) percpu_to_op("or", var, val) |
209 | #define percpu_xor(var, val) percpu_to_op("xor", var, val) | 232 | #define percpu_xor(var, val) percpu_to_op("xor", var, val) |
233 | #define percpu_inc(var) percpu_unary_op("inc", var) | ||
210 | 234 | ||
211 | #define __this_cpu_read_1(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) | 235 | #define __this_cpu_read_1(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) |
212 | #define __this_cpu_read_2(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) | 236 | #define __this_cpu_read_2(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) |
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index db6109a885a7..254883d0c7e0 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h | |||
@@ -5,7 +5,7 @@ | |||
5 | * Performance event hw details: | 5 | * Performance event hw details: |
6 | */ | 6 | */ |
7 | 7 | ||
8 | #define X86_PMC_MAX_GENERIC 8 | 8 | #define X86_PMC_MAX_GENERIC 32 |
9 | #define X86_PMC_MAX_FIXED 3 | 9 | #define X86_PMC_MAX_FIXED 3 |
10 | 10 | ||
11 | #define X86_PMC_IDX_GENERIC 0 | 11 | #define X86_PMC_IDX_GENERIC 0 |
@@ -18,39 +18,31 @@ | |||
18 | #define MSR_ARCH_PERFMON_EVENTSEL0 0x186 | 18 | #define MSR_ARCH_PERFMON_EVENTSEL0 0x186 |
19 | #define MSR_ARCH_PERFMON_EVENTSEL1 0x187 | 19 | #define MSR_ARCH_PERFMON_EVENTSEL1 0x187 |
20 | 20 | ||
21 | #define ARCH_PERFMON_EVENTSEL_ENABLE (1 << 22) | 21 | #define ARCH_PERFMON_EVENTSEL_EVENT 0x000000FFULL |
22 | #define ARCH_PERFMON_EVENTSEL_ANY (1 << 21) | 22 | #define ARCH_PERFMON_EVENTSEL_UMASK 0x0000FF00ULL |
23 | #define ARCH_PERFMON_EVENTSEL_INT (1 << 20) | 23 | #define ARCH_PERFMON_EVENTSEL_USR (1ULL << 16) |
24 | #define ARCH_PERFMON_EVENTSEL_OS (1 << 17) | 24 | #define ARCH_PERFMON_EVENTSEL_OS (1ULL << 17) |
25 | #define ARCH_PERFMON_EVENTSEL_USR (1 << 16) | 25 | #define ARCH_PERFMON_EVENTSEL_EDGE (1ULL << 18) |
26 | 26 | #define ARCH_PERFMON_EVENTSEL_INT (1ULL << 20) | |
27 | /* | 27 | #define ARCH_PERFMON_EVENTSEL_ANY (1ULL << 21) |
28 | * Includes eventsel and unit mask as well: | 28 | #define ARCH_PERFMON_EVENTSEL_ENABLE (1ULL << 22) |
29 | */ | 29 | #define ARCH_PERFMON_EVENTSEL_INV (1ULL << 23) |
30 | 30 | #define ARCH_PERFMON_EVENTSEL_CMASK 0xFF000000ULL | |
31 | 31 | ||
32 | #define INTEL_ARCH_EVTSEL_MASK 0x000000FFULL | 32 | #define AMD64_EVENTSEL_EVENT \ |
33 | #define INTEL_ARCH_UNIT_MASK 0x0000FF00ULL | 33 | (ARCH_PERFMON_EVENTSEL_EVENT | (0x0FULL << 32)) |
34 | #define INTEL_ARCH_EDGE_MASK 0x00040000ULL | 34 | #define INTEL_ARCH_EVENT_MASK \ |
35 | #define INTEL_ARCH_INV_MASK 0x00800000ULL | 35 | (ARCH_PERFMON_EVENTSEL_UMASK | ARCH_PERFMON_EVENTSEL_EVENT) |
36 | #define INTEL_ARCH_CNT_MASK 0xFF000000ULL | 36 | |
37 | #define INTEL_ARCH_EVENT_MASK (INTEL_ARCH_UNIT_MASK|INTEL_ARCH_EVTSEL_MASK) | 37 | #define X86_RAW_EVENT_MASK \ |
38 | 38 | (ARCH_PERFMON_EVENTSEL_EVENT | \ | |
39 | /* | 39 | ARCH_PERFMON_EVENTSEL_UMASK | \ |
40 | * filter mask to validate fixed counter events. | 40 | ARCH_PERFMON_EVENTSEL_EDGE | \ |
41 | * the following filters disqualify for fixed counters: | 41 | ARCH_PERFMON_EVENTSEL_INV | \ |
42 | * - inv | 42 | ARCH_PERFMON_EVENTSEL_CMASK) |
43 | * - edge | 43 | #define AMD64_RAW_EVENT_MASK \ |
44 | * - cnt-mask | 44 | (X86_RAW_EVENT_MASK | \ |
45 | * The other filters are supported by fixed counters. | 45 | AMD64_EVENTSEL_EVENT) |
46 | * The any-thread option is supported starting with v3. | ||
47 | */ | ||
48 | #define INTEL_ARCH_FIXED_MASK \ | ||
49 | (INTEL_ARCH_CNT_MASK| \ | ||
50 | INTEL_ARCH_INV_MASK| \ | ||
51 | INTEL_ARCH_EDGE_MASK|\ | ||
52 | INTEL_ARCH_UNIT_MASK|\ | ||
53 | INTEL_ARCH_EVENT_MASK) | ||
54 | 46 | ||
55 | #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL 0x3c | 47 | #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL 0x3c |
56 | #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8) | 48 | #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8) |
@@ -67,7 +59,7 @@ | |||
67 | union cpuid10_eax { | 59 | union cpuid10_eax { |
68 | struct { | 60 | struct { |
69 | unsigned int version_id:8; | 61 | unsigned int version_id:8; |
70 | unsigned int num_events:8; | 62 | unsigned int num_counters:8; |
71 | unsigned int bit_width:8; | 63 | unsigned int bit_width:8; |
72 | unsigned int mask_length:8; | 64 | unsigned int mask_length:8; |
73 | } split; | 65 | } split; |
@@ -76,7 +68,7 @@ union cpuid10_eax { | |||
76 | 68 | ||
77 | union cpuid10_edx { | 69 | union cpuid10_edx { |
78 | struct { | 70 | struct { |
79 | unsigned int num_events_fixed:4; | 71 | unsigned int num_counters_fixed:4; |
80 | unsigned int reserved:28; | 72 | unsigned int reserved:28; |
81 | } split; | 73 | } split; |
82 | unsigned int full; | 74 | unsigned int full; |
@@ -136,6 +128,18 @@ extern void perf_events_lapic_init(void); | |||
136 | 128 | ||
137 | #define PERF_EVENT_INDEX_OFFSET 0 | 129 | #define PERF_EVENT_INDEX_OFFSET 0 |
138 | 130 | ||
131 | /* | ||
132 | * Abuse bit 3 of the cpu eflags register to indicate proper PEBS IP fixups. | ||
133 | * This flag is otherwise unused and ABI specified to be 0, so nobody should | ||
134 | * care what we do with it. | ||
135 | */ | ||
136 | #define PERF_EFLAGS_EXACT (1UL << 3) | ||
137 | |||
138 | struct pt_regs; | ||
139 | extern unsigned long perf_instruction_pointer(struct pt_regs *regs); | ||
140 | extern unsigned long perf_misc_flags(struct pt_regs *regs); | ||
141 | #define perf_misc_flags(regs) perf_misc_flags(regs) | ||
142 | |||
139 | #else | 143 | #else |
140 | static inline void init_hw_perf_events(void) { } | 144 | static inline void init_hw_perf_events(void) { } |
141 | static inline void perf_events_lapic_init(void) { } | 145 | static inline void perf_events_lapic_init(void) { } |
diff --git a/arch/x86/include/asm/perf_event_p4.h b/arch/x86/include/asm/perf_event_p4.h new file mode 100644 index 000000000000..b05400a542ff --- /dev/null +++ b/arch/x86/include/asm/perf_event_p4.h | |||
@@ -0,0 +1,794 @@ | |||
1 | /* | ||
2 | * Netburst Perfomance Events (P4, old Xeon) | ||
3 | */ | ||
4 | |||
5 | #ifndef PERF_EVENT_P4_H | ||
6 | #define PERF_EVENT_P4_H | ||
7 | |||
8 | #include <linux/cpu.h> | ||
9 | #include <linux/bitops.h> | ||
10 | |||
11 | /* | ||
12 | * NetBurst has perfomance MSRs shared between | ||
13 | * threads if HT is turned on, ie for both logical | ||
14 | * processors (mem: in turn in Atom with HT support | ||
15 | * perf-MSRs are not shared and every thread has its | ||
16 | * own perf-MSRs set) | ||
17 | */ | ||
18 | #define ARCH_P4_TOTAL_ESCR (46) | ||
19 | #define ARCH_P4_RESERVED_ESCR (2) /* IQ_ESCR(0,1) not always present */ | ||
20 | #define ARCH_P4_MAX_ESCR (ARCH_P4_TOTAL_ESCR - ARCH_P4_RESERVED_ESCR) | ||
21 | #define ARCH_P4_MAX_CCCR (18) | ||
22 | #define ARCH_P4_MAX_COUNTER (ARCH_P4_MAX_CCCR / 2) | ||
23 | |||
24 | #define P4_ESCR_EVENT_MASK 0x7e000000U | ||
25 | #define P4_ESCR_EVENT_SHIFT 25 | ||
26 | #define P4_ESCR_EVENTMASK_MASK 0x01fffe00U | ||
27 | #define P4_ESCR_EVENTMASK_SHIFT 9 | ||
28 | #define P4_ESCR_TAG_MASK 0x000001e0U | ||
29 | #define P4_ESCR_TAG_SHIFT 5 | ||
30 | #define P4_ESCR_TAG_ENABLE 0x00000010U | ||
31 | #define P4_ESCR_T0_OS 0x00000008U | ||
32 | #define P4_ESCR_T0_USR 0x00000004U | ||
33 | #define P4_ESCR_T1_OS 0x00000002U | ||
34 | #define P4_ESCR_T1_USR 0x00000001U | ||
35 | |||
36 | #define P4_ESCR_EVENT(v) ((v) << P4_ESCR_EVENT_SHIFT) | ||
37 | #define P4_ESCR_EMASK(v) ((v) << P4_ESCR_EVENTMASK_SHIFT) | ||
38 | #define P4_ESCR_TAG(v) ((v) << P4_ESCR_TAG_SHIFT) | ||
39 | |||
40 | /* Non HT mask */ | ||
41 | #define P4_ESCR_MASK \ | ||
42 | (P4_ESCR_EVENT_MASK | \ | ||
43 | P4_ESCR_EVENTMASK_MASK | \ | ||
44 | P4_ESCR_TAG_MASK | \ | ||
45 | P4_ESCR_TAG_ENABLE | \ | ||
46 | P4_ESCR_T0_OS | \ | ||
47 | P4_ESCR_T0_USR) | ||
48 | |||
49 | /* HT mask */ | ||
50 | #define P4_ESCR_MASK_HT \ | ||
51 | (P4_ESCR_MASK | P4_ESCR_T1_OS | P4_ESCR_T1_USR) | ||
52 | |||
53 | #define P4_CCCR_OVF 0x80000000U | ||
54 | #define P4_CCCR_CASCADE 0x40000000U | ||
55 | #define P4_CCCR_OVF_PMI_T0 0x04000000U | ||
56 | #define P4_CCCR_OVF_PMI_T1 0x08000000U | ||
57 | #define P4_CCCR_FORCE_OVF 0x02000000U | ||
58 | #define P4_CCCR_EDGE 0x01000000U | ||
59 | #define P4_CCCR_THRESHOLD_MASK 0x00f00000U | ||
60 | #define P4_CCCR_THRESHOLD_SHIFT 20 | ||
61 | #define P4_CCCR_COMPLEMENT 0x00080000U | ||
62 | #define P4_CCCR_COMPARE 0x00040000U | ||
63 | #define P4_CCCR_ESCR_SELECT_MASK 0x0000e000U | ||
64 | #define P4_CCCR_ESCR_SELECT_SHIFT 13 | ||
65 | #define P4_CCCR_ENABLE 0x00001000U | ||
66 | #define P4_CCCR_THREAD_SINGLE 0x00010000U | ||
67 | #define P4_CCCR_THREAD_BOTH 0x00020000U | ||
68 | #define P4_CCCR_THREAD_ANY 0x00030000U | ||
69 | #define P4_CCCR_RESERVED 0x00000fffU | ||
70 | |||
71 | #define P4_CCCR_THRESHOLD(v) ((v) << P4_CCCR_THRESHOLD_SHIFT) | ||
72 | #define P4_CCCR_ESEL(v) ((v) << P4_CCCR_ESCR_SELECT_SHIFT) | ||
73 | |||
74 | /* Custom bits in reerved CCCR area */ | ||
75 | #define P4_CCCR_CACHE_OPS_MASK 0x0000003fU | ||
76 | |||
77 | |||
78 | /* Non HT mask */ | ||
79 | #define P4_CCCR_MASK \ | ||
80 | (P4_CCCR_OVF | \ | ||
81 | P4_CCCR_CASCADE | \ | ||
82 | P4_CCCR_OVF_PMI_T0 | \ | ||
83 | P4_CCCR_FORCE_OVF | \ | ||
84 | P4_CCCR_EDGE | \ | ||
85 | P4_CCCR_THRESHOLD_MASK | \ | ||
86 | P4_CCCR_COMPLEMENT | \ | ||
87 | P4_CCCR_COMPARE | \ | ||
88 | P4_CCCR_ESCR_SELECT_MASK | \ | ||
89 | P4_CCCR_ENABLE) | ||
90 | |||
91 | /* HT mask */ | ||
92 | #define P4_CCCR_MASK_HT (P4_CCCR_MASK | P4_CCCR_THREAD_ANY) | ||
93 | |||
94 | #define P4_GEN_ESCR_EMASK(class, name, bit) \ | ||
95 | class##__##name = ((1 << bit) << P4_ESCR_EVENTMASK_SHIFT) | ||
96 | #define P4_ESCR_EMASK_BIT(class, name) class##__##name | ||
97 | |||
98 | /* | ||
99 | * config field is 64bit width and consists of | ||
100 | * HT << 63 | ESCR << 32 | CCCR | ||
101 | * where HT is HyperThreading bit (since ESCR | ||
102 | * has it reserved we may use it for own purpose) | ||
103 | * | ||
104 | * note that this is NOT the addresses of respective | ||
105 | * ESCR and CCCR but rather an only packed value should | ||
106 | * be unpacked and written to a proper addresses | ||
107 | * | ||
108 | * the base idea is to pack as much info as | ||
109 | * possible | ||
110 | */ | ||
111 | #define p4_config_pack_escr(v) (((u64)(v)) << 32) | ||
112 | #define p4_config_pack_cccr(v) (((u64)(v)) & 0xffffffffULL) | ||
113 | #define p4_config_unpack_escr(v) (((u64)(v)) >> 32) | ||
114 | #define p4_config_unpack_cccr(v) (((u64)(v)) & 0xffffffffULL) | ||
115 | |||
116 | #define p4_config_unpack_emask(v) \ | ||
117 | ({ \ | ||
118 | u32 t = p4_config_unpack_escr((v)); \ | ||
119 | t = t & P4_ESCR_EVENTMASK_MASK; \ | ||
120 | t = t >> P4_ESCR_EVENTMASK_SHIFT; \ | ||
121 | t; \ | ||
122 | }) | ||
123 | |||
124 | #define p4_config_unpack_event(v) \ | ||
125 | ({ \ | ||
126 | u32 t = p4_config_unpack_escr((v)); \ | ||
127 | t = t & P4_ESCR_EVENT_MASK; \ | ||
128 | t = t >> P4_ESCR_EVENT_SHIFT; \ | ||
129 | t; \ | ||
130 | }) | ||
131 | |||
132 | #define p4_config_unpack_cache_event(v) (((u64)(v)) & P4_CCCR_CACHE_OPS_MASK) | ||
133 | |||
134 | #define P4_CONFIG_HT_SHIFT 63 | ||
135 | #define P4_CONFIG_HT (1ULL << P4_CONFIG_HT_SHIFT) | ||
136 | |||
137 | static inline bool p4_is_event_cascaded(u64 config) | ||
138 | { | ||
139 | u32 cccr = p4_config_unpack_cccr(config); | ||
140 | return !!(cccr & P4_CCCR_CASCADE); | ||
141 | } | ||
142 | |||
143 | static inline int p4_ht_config_thread(u64 config) | ||
144 | { | ||
145 | return !!(config & P4_CONFIG_HT); | ||
146 | } | ||
147 | |||
148 | static inline u64 p4_set_ht_bit(u64 config) | ||
149 | { | ||
150 | return config | P4_CONFIG_HT; | ||
151 | } | ||
152 | |||
153 | static inline u64 p4_clear_ht_bit(u64 config) | ||
154 | { | ||
155 | return config & ~P4_CONFIG_HT; | ||
156 | } | ||
157 | |||
158 | static inline int p4_ht_active(void) | ||
159 | { | ||
160 | #ifdef CONFIG_SMP | ||
161 | return smp_num_siblings > 1; | ||
162 | #endif | ||
163 | return 0; | ||
164 | } | ||
165 | |||
166 | static inline int p4_ht_thread(int cpu) | ||
167 | { | ||
168 | #ifdef CONFIG_SMP | ||
169 | if (smp_num_siblings == 2) | ||
170 | return cpu != cpumask_first(__get_cpu_var(cpu_sibling_map)); | ||
171 | #endif | ||
172 | return 0; | ||
173 | } | ||
174 | |||
175 | static inline int p4_should_swap_ts(u64 config, int cpu) | ||
176 | { | ||
177 | return p4_ht_config_thread(config) ^ p4_ht_thread(cpu); | ||
178 | } | ||
179 | |||
180 | static inline u32 p4_default_cccr_conf(int cpu) | ||
181 | { | ||
182 | /* | ||
183 | * Note that P4_CCCR_THREAD_ANY is "required" on | ||
184 | * non-HT machines (on HT machines we count TS events | ||
185 | * regardless the state of second logical processor | ||
186 | */ | ||
187 | u32 cccr = P4_CCCR_THREAD_ANY; | ||
188 | |||
189 | if (!p4_ht_thread(cpu)) | ||
190 | cccr |= P4_CCCR_OVF_PMI_T0; | ||
191 | else | ||
192 | cccr |= P4_CCCR_OVF_PMI_T1; | ||
193 | |||
194 | return cccr; | ||
195 | } | ||
196 | |||
197 | static inline u32 p4_default_escr_conf(int cpu, int exclude_os, int exclude_usr) | ||
198 | { | ||
199 | u32 escr = 0; | ||
200 | |||
201 | if (!p4_ht_thread(cpu)) { | ||
202 | if (!exclude_os) | ||
203 | escr |= P4_ESCR_T0_OS; | ||
204 | if (!exclude_usr) | ||
205 | escr |= P4_ESCR_T0_USR; | ||
206 | } else { | ||
207 | if (!exclude_os) | ||
208 | escr |= P4_ESCR_T1_OS; | ||
209 | if (!exclude_usr) | ||
210 | escr |= P4_ESCR_T1_USR; | ||
211 | } | ||
212 | |||
213 | return escr; | ||
214 | } | ||
215 | |||
216 | enum P4_EVENTS { | ||
217 | P4_EVENT_TC_DELIVER_MODE, | ||
218 | P4_EVENT_BPU_FETCH_REQUEST, | ||
219 | P4_EVENT_ITLB_REFERENCE, | ||
220 | P4_EVENT_MEMORY_CANCEL, | ||
221 | P4_EVENT_MEMORY_COMPLETE, | ||
222 | P4_EVENT_LOAD_PORT_REPLAY, | ||
223 | P4_EVENT_STORE_PORT_REPLAY, | ||
224 | P4_EVENT_MOB_LOAD_REPLAY, | ||
225 | P4_EVENT_PAGE_WALK_TYPE, | ||
226 | P4_EVENT_BSQ_CACHE_REFERENCE, | ||
227 | P4_EVENT_IOQ_ALLOCATION, | ||
228 | P4_EVENT_IOQ_ACTIVE_ENTRIES, | ||
229 | P4_EVENT_FSB_DATA_ACTIVITY, | ||
230 | P4_EVENT_BSQ_ALLOCATION, | ||
231 | P4_EVENT_BSQ_ACTIVE_ENTRIES, | ||
232 | P4_EVENT_SSE_INPUT_ASSIST, | ||
233 | P4_EVENT_PACKED_SP_UOP, | ||
234 | P4_EVENT_PACKED_DP_UOP, | ||
235 | P4_EVENT_SCALAR_SP_UOP, | ||
236 | P4_EVENT_SCALAR_DP_UOP, | ||
237 | P4_EVENT_64BIT_MMX_UOP, | ||
238 | P4_EVENT_128BIT_MMX_UOP, | ||
239 | P4_EVENT_X87_FP_UOP, | ||
240 | P4_EVENT_TC_MISC, | ||
241 | P4_EVENT_GLOBAL_POWER_EVENTS, | ||
242 | P4_EVENT_TC_MS_XFER, | ||
243 | P4_EVENT_UOP_QUEUE_WRITES, | ||
244 | P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, | ||
245 | P4_EVENT_RETIRED_BRANCH_TYPE, | ||
246 | P4_EVENT_RESOURCE_STALL, | ||
247 | P4_EVENT_WC_BUFFER, | ||
248 | P4_EVENT_B2B_CYCLES, | ||
249 | P4_EVENT_BNR, | ||
250 | P4_EVENT_SNOOP, | ||
251 | P4_EVENT_RESPONSE, | ||
252 | P4_EVENT_FRONT_END_EVENT, | ||
253 | P4_EVENT_EXECUTION_EVENT, | ||
254 | P4_EVENT_REPLAY_EVENT, | ||
255 | P4_EVENT_INSTR_RETIRED, | ||
256 | P4_EVENT_UOPS_RETIRED, | ||
257 | P4_EVENT_UOP_TYPE, | ||
258 | P4_EVENT_BRANCH_RETIRED, | ||
259 | P4_EVENT_MISPRED_BRANCH_RETIRED, | ||
260 | P4_EVENT_X87_ASSIST, | ||
261 | P4_EVENT_MACHINE_CLEAR, | ||
262 | P4_EVENT_INSTR_COMPLETED, | ||
263 | }; | ||
264 | |||
265 | #define P4_OPCODE(event) event##_OPCODE | ||
266 | #define P4_OPCODE_ESEL(opcode) ((opcode & 0x00ff) >> 0) | ||
267 | #define P4_OPCODE_EVNT(opcode) ((opcode & 0xff00) >> 8) | ||
268 | #define P4_OPCODE_PACK(event, sel) (((event) << 8) | sel) | ||
269 | |||
270 | /* | ||
271 | * Comments below the event represent ESCR restriction | ||
272 | * for this event and counter index per ESCR | ||
273 | * | ||
274 | * MSR_P4_IQ_ESCR0 and MSR_P4_IQ_ESCR1 are available only on early | ||
275 | * processor builds (family 0FH, models 01H-02H). These MSRs | ||
276 | * are not available on later versions, so that we don't use | ||
277 | * them completely | ||
278 | * | ||
279 | * Also note that CCCR1 do not have P4_CCCR_ENABLE bit properly | ||
280 | * working so that we should not use this CCCR and respective | ||
281 | * counter as result | ||
282 | */ | ||
283 | enum P4_EVENT_OPCODES { | ||
284 | P4_OPCODE(P4_EVENT_TC_DELIVER_MODE) = P4_OPCODE_PACK(0x01, 0x01), | ||
285 | /* | ||
286 | * MSR_P4_TC_ESCR0: 4, 5 | ||
287 | * MSR_P4_TC_ESCR1: 6, 7 | ||
288 | */ | ||
289 | |||
290 | P4_OPCODE(P4_EVENT_BPU_FETCH_REQUEST) = P4_OPCODE_PACK(0x03, 0x00), | ||
291 | /* | ||
292 | * MSR_P4_BPU_ESCR0: 0, 1 | ||
293 | * MSR_P4_BPU_ESCR1: 2, 3 | ||
294 | */ | ||
295 | |||
296 | P4_OPCODE(P4_EVENT_ITLB_REFERENCE) = P4_OPCODE_PACK(0x18, 0x03), | ||
297 | /* | ||
298 | * MSR_P4_ITLB_ESCR0: 0, 1 | ||
299 | * MSR_P4_ITLB_ESCR1: 2, 3 | ||
300 | */ | ||
301 | |||
302 | P4_OPCODE(P4_EVENT_MEMORY_CANCEL) = P4_OPCODE_PACK(0x02, 0x05), | ||
303 | /* | ||
304 | * MSR_P4_DAC_ESCR0: 8, 9 | ||
305 | * MSR_P4_DAC_ESCR1: 10, 11 | ||
306 | */ | ||
307 | |||
308 | P4_OPCODE(P4_EVENT_MEMORY_COMPLETE) = P4_OPCODE_PACK(0x08, 0x02), | ||
309 | /* | ||
310 | * MSR_P4_SAAT_ESCR0: 8, 9 | ||
311 | * MSR_P4_SAAT_ESCR1: 10, 11 | ||
312 | */ | ||
313 | |||
314 | P4_OPCODE(P4_EVENT_LOAD_PORT_REPLAY) = P4_OPCODE_PACK(0x04, 0x02), | ||
315 | /* | ||
316 | * MSR_P4_SAAT_ESCR0: 8, 9 | ||
317 | * MSR_P4_SAAT_ESCR1: 10, 11 | ||
318 | */ | ||
319 | |||
320 | P4_OPCODE(P4_EVENT_STORE_PORT_REPLAY) = P4_OPCODE_PACK(0x05, 0x02), | ||
321 | /* | ||
322 | * MSR_P4_SAAT_ESCR0: 8, 9 | ||
323 | * MSR_P4_SAAT_ESCR1: 10, 11 | ||
324 | */ | ||
325 | |||
326 | P4_OPCODE(P4_EVENT_MOB_LOAD_REPLAY) = P4_OPCODE_PACK(0x03, 0x02), | ||
327 | /* | ||
328 | * MSR_P4_MOB_ESCR0: 0, 1 | ||
329 | * MSR_P4_MOB_ESCR1: 2, 3 | ||
330 | */ | ||
331 | |||
332 | P4_OPCODE(P4_EVENT_PAGE_WALK_TYPE) = P4_OPCODE_PACK(0x01, 0x04), | ||
333 | /* | ||
334 | * MSR_P4_PMH_ESCR0: 0, 1 | ||
335 | * MSR_P4_PMH_ESCR1: 2, 3 | ||
336 | */ | ||
337 | |||
338 | P4_OPCODE(P4_EVENT_BSQ_CACHE_REFERENCE) = P4_OPCODE_PACK(0x0c, 0x07), | ||
339 | /* | ||
340 | * MSR_P4_BSU_ESCR0: 0, 1 | ||
341 | * MSR_P4_BSU_ESCR1: 2, 3 | ||
342 | */ | ||
343 | |||
344 | P4_OPCODE(P4_EVENT_IOQ_ALLOCATION) = P4_OPCODE_PACK(0x03, 0x06), | ||
345 | /* | ||
346 | * MSR_P4_FSB_ESCR0: 0, 1 | ||
347 | * MSR_P4_FSB_ESCR1: 2, 3 | ||
348 | */ | ||
349 | |||
350 | P4_OPCODE(P4_EVENT_IOQ_ACTIVE_ENTRIES) = P4_OPCODE_PACK(0x1a, 0x06), | ||
351 | /* | ||
352 | * MSR_P4_FSB_ESCR1: 2, 3 | ||
353 | */ | ||
354 | |||
355 | P4_OPCODE(P4_EVENT_FSB_DATA_ACTIVITY) = P4_OPCODE_PACK(0x17, 0x06), | ||
356 | /* | ||
357 | * MSR_P4_FSB_ESCR0: 0, 1 | ||
358 | * MSR_P4_FSB_ESCR1: 2, 3 | ||
359 | */ | ||
360 | |||
361 | P4_OPCODE(P4_EVENT_BSQ_ALLOCATION) = P4_OPCODE_PACK(0x05, 0x07), | ||
362 | /* | ||
363 | * MSR_P4_BSU_ESCR0: 0, 1 | ||
364 | */ | ||
365 | |||
366 | P4_OPCODE(P4_EVENT_BSQ_ACTIVE_ENTRIES) = P4_OPCODE_PACK(0x06, 0x07), | ||
367 | /* | ||
368 | * NOTE: no ESCR name in docs, it's guessed | ||
369 | * MSR_P4_BSU_ESCR1: 2, 3 | ||
370 | */ | ||
371 | |||
372 | P4_OPCODE(P4_EVENT_SSE_INPUT_ASSIST) = P4_OPCODE_PACK(0x34, 0x01), | ||
373 | /* | ||
374 | * MSR_P4_FIRM_ESCR0: 8, 9 | ||
375 | * MSR_P4_FIRM_ESCR1: 10, 11 | ||
376 | */ | ||
377 | |||
378 | P4_OPCODE(P4_EVENT_PACKED_SP_UOP) = P4_OPCODE_PACK(0x08, 0x01), | ||
379 | /* | ||
380 | * MSR_P4_FIRM_ESCR0: 8, 9 | ||
381 | * MSR_P4_FIRM_ESCR1: 10, 11 | ||
382 | */ | ||
383 | |||
384 | P4_OPCODE(P4_EVENT_PACKED_DP_UOP) = P4_OPCODE_PACK(0x0c, 0x01), | ||
385 | /* | ||
386 | * MSR_P4_FIRM_ESCR0: 8, 9 | ||
387 | * MSR_P4_FIRM_ESCR1: 10, 11 | ||
388 | */ | ||
389 | |||
390 | P4_OPCODE(P4_EVENT_SCALAR_SP_UOP) = P4_OPCODE_PACK(0x0a, 0x01), | ||
391 | /* | ||
392 | * MSR_P4_FIRM_ESCR0: 8, 9 | ||
393 | * MSR_P4_FIRM_ESCR1: 10, 11 | ||
394 | */ | ||
395 | |||
396 | P4_OPCODE(P4_EVENT_SCALAR_DP_UOP) = P4_OPCODE_PACK(0x0e, 0x01), | ||
397 | /* | ||
398 | * MSR_P4_FIRM_ESCR0: 8, 9 | ||
399 | * MSR_P4_FIRM_ESCR1: 10, 11 | ||
400 | */ | ||
401 | |||
402 | P4_OPCODE(P4_EVENT_64BIT_MMX_UOP) = P4_OPCODE_PACK(0x02, 0x01), | ||
403 | /* | ||
404 | * MSR_P4_FIRM_ESCR0: 8, 9 | ||
405 | * MSR_P4_FIRM_ESCR1: 10, 11 | ||
406 | */ | ||
407 | |||
408 | P4_OPCODE(P4_EVENT_128BIT_MMX_UOP) = P4_OPCODE_PACK(0x1a, 0x01), | ||
409 | /* | ||
410 | * MSR_P4_FIRM_ESCR0: 8, 9 | ||
411 | * MSR_P4_FIRM_ESCR1: 10, 11 | ||
412 | */ | ||
413 | |||
414 | P4_OPCODE(P4_EVENT_X87_FP_UOP) = P4_OPCODE_PACK(0x04, 0x01), | ||
415 | /* | ||
416 | * MSR_P4_FIRM_ESCR0: 8, 9 | ||
417 | * MSR_P4_FIRM_ESCR1: 10, 11 | ||
418 | */ | ||
419 | |||
420 | P4_OPCODE(P4_EVENT_TC_MISC) = P4_OPCODE_PACK(0x06, 0x01), | ||
421 | /* | ||
422 | * MSR_P4_TC_ESCR0: 4, 5 | ||
423 | * MSR_P4_TC_ESCR1: 6, 7 | ||
424 | */ | ||
425 | |||
426 | P4_OPCODE(P4_EVENT_GLOBAL_POWER_EVENTS) = P4_OPCODE_PACK(0x13, 0x06), | ||
427 | /* | ||
428 | * MSR_P4_FSB_ESCR0: 0, 1 | ||
429 | * MSR_P4_FSB_ESCR1: 2, 3 | ||
430 | */ | ||
431 | |||
432 | P4_OPCODE(P4_EVENT_TC_MS_XFER) = P4_OPCODE_PACK(0x05, 0x00), | ||
433 | /* | ||
434 | * MSR_P4_MS_ESCR0: 4, 5 | ||
435 | * MSR_P4_MS_ESCR1: 6, 7 | ||
436 | */ | ||
437 | |||
438 | P4_OPCODE(P4_EVENT_UOP_QUEUE_WRITES) = P4_OPCODE_PACK(0x09, 0x00), | ||
439 | /* | ||
440 | * MSR_P4_MS_ESCR0: 4, 5 | ||
441 | * MSR_P4_MS_ESCR1: 6, 7 | ||
442 | */ | ||
443 | |||
444 | P4_OPCODE(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE) = P4_OPCODE_PACK(0x05, 0x02), | ||
445 | /* | ||
446 | * MSR_P4_TBPU_ESCR0: 4, 5 | ||
447 | * MSR_P4_TBPU_ESCR1: 6, 7 | ||
448 | */ | ||
449 | |||
450 | P4_OPCODE(P4_EVENT_RETIRED_BRANCH_TYPE) = P4_OPCODE_PACK(0x04, 0x02), | ||
451 | /* | ||
452 | * MSR_P4_TBPU_ESCR0: 4, 5 | ||
453 | * MSR_P4_TBPU_ESCR1: 6, 7 | ||
454 | */ | ||
455 | |||
456 | P4_OPCODE(P4_EVENT_RESOURCE_STALL) = P4_OPCODE_PACK(0x01, 0x01), | ||
457 | /* | ||
458 | * MSR_P4_ALF_ESCR0: 12, 13, 16 | ||
459 | * MSR_P4_ALF_ESCR1: 14, 15, 17 | ||
460 | */ | ||
461 | |||
462 | P4_OPCODE(P4_EVENT_WC_BUFFER) = P4_OPCODE_PACK(0x05, 0x05), | ||
463 | /* | ||
464 | * MSR_P4_DAC_ESCR0: 8, 9 | ||
465 | * MSR_P4_DAC_ESCR1: 10, 11 | ||
466 | */ | ||
467 | |||
468 | P4_OPCODE(P4_EVENT_B2B_CYCLES) = P4_OPCODE_PACK(0x16, 0x03), | ||
469 | /* | ||
470 | * MSR_P4_FSB_ESCR0: 0, 1 | ||
471 | * MSR_P4_FSB_ESCR1: 2, 3 | ||
472 | */ | ||
473 | |||
474 | P4_OPCODE(P4_EVENT_BNR) = P4_OPCODE_PACK(0x08, 0x03), | ||
475 | /* | ||
476 | * MSR_P4_FSB_ESCR0: 0, 1 | ||
477 | * MSR_P4_FSB_ESCR1: 2, 3 | ||
478 | */ | ||
479 | |||
480 | P4_OPCODE(P4_EVENT_SNOOP) = P4_OPCODE_PACK(0x06, 0x03), | ||
481 | /* | ||
482 | * MSR_P4_FSB_ESCR0: 0, 1 | ||
483 | * MSR_P4_FSB_ESCR1: 2, 3 | ||
484 | */ | ||
485 | |||
486 | P4_OPCODE(P4_EVENT_RESPONSE) = P4_OPCODE_PACK(0x04, 0x03), | ||
487 | /* | ||
488 | * MSR_P4_FSB_ESCR0: 0, 1 | ||
489 | * MSR_P4_FSB_ESCR1: 2, 3 | ||
490 | */ | ||
491 | |||
492 | P4_OPCODE(P4_EVENT_FRONT_END_EVENT) = P4_OPCODE_PACK(0x08, 0x05), | ||
493 | /* | ||
494 | * MSR_P4_CRU_ESCR2: 12, 13, 16 | ||
495 | * MSR_P4_CRU_ESCR3: 14, 15, 17 | ||
496 | */ | ||
497 | |||
498 | P4_OPCODE(P4_EVENT_EXECUTION_EVENT) = P4_OPCODE_PACK(0x0c, 0x05), | ||
499 | /* | ||
500 | * MSR_P4_CRU_ESCR2: 12, 13, 16 | ||
501 | * MSR_P4_CRU_ESCR3: 14, 15, 17 | ||
502 | */ | ||
503 | |||
504 | P4_OPCODE(P4_EVENT_REPLAY_EVENT) = P4_OPCODE_PACK(0x09, 0x05), | ||
505 | /* | ||
506 | * MSR_P4_CRU_ESCR2: 12, 13, 16 | ||
507 | * MSR_P4_CRU_ESCR3: 14, 15, 17 | ||
508 | */ | ||
509 | |||
510 | P4_OPCODE(P4_EVENT_INSTR_RETIRED) = P4_OPCODE_PACK(0x02, 0x04), | ||
511 | /* | ||
512 | * MSR_P4_CRU_ESCR0: 12, 13, 16 | ||
513 | * MSR_P4_CRU_ESCR1: 14, 15, 17 | ||
514 | */ | ||
515 | |||
516 | P4_OPCODE(P4_EVENT_UOPS_RETIRED) = P4_OPCODE_PACK(0x01, 0x04), | ||
517 | /* | ||
518 | * MSR_P4_CRU_ESCR0: 12, 13, 16 | ||
519 | * MSR_P4_CRU_ESCR1: 14, 15, 17 | ||
520 | */ | ||
521 | |||
522 | P4_OPCODE(P4_EVENT_UOP_TYPE) = P4_OPCODE_PACK(0x02, 0x02), | ||
523 | /* | ||
524 | * MSR_P4_RAT_ESCR0: 12, 13, 16 | ||
525 | * MSR_P4_RAT_ESCR1: 14, 15, 17 | ||
526 | */ | ||
527 | |||
528 | P4_OPCODE(P4_EVENT_BRANCH_RETIRED) = P4_OPCODE_PACK(0x06, 0x05), | ||
529 | /* | ||
530 | * MSR_P4_CRU_ESCR2: 12, 13, 16 | ||
531 | * MSR_P4_CRU_ESCR3: 14, 15, 17 | ||
532 | */ | ||
533 | |||
534 | P4_OPCODE(P4_EVENT_MISPRED_BRANCH_RETIRED) = P4_OPCODE_PACK(0x03, 0x04), | ||
535 | /* | ||
536 | * MSR_P4_CRU_ESCR0: 12, 13, 16 | ||
537 | * MSR_P4_CRU_ESCR1: 14, 15, 17 | ||
538 | */ | ||
539 | |||
540 | P4_OPCODE(P4_EVENT_X87_ASSIST) = P4_OPCODE_PACK(0x03, 0x05), | ||
541 | /* | ||
542 | * MSR_P4_CRU_ESCR2: 12, 13, 16 | ||
543 | * MSR_P4_CRU_ESCR3: 14, 15, 17 | ||
544 | */ | ||
545 | |||
546 | P4_OPCODE(P4_EVENT_MACHINE_CLEAR) = P4_OPCODE_PACK(0x02, 0x05), | ||
547 | /* | ||
548 | * MSR_P4_CRU_ESCR2: 12, 13, 16 | ||
549 | * MSR_P4_CRU_ESCR3: 14, 15, 17 | ||
550 | */ | ||
551 | |||
552 | P4_OPCODE(P4_EVENT_INSTR_COMPLETED) = P4_OPCODE_PACK(0x07, 0x04), | ||
553 | /* | ||
554 | * MSR_P4_CRU_ESCR0: 12, 13, 16 | ||
555 | * MSR_P4_CRU_ESCR1: 14, 15, 17 | ||
556 | */ | ||
557 | }; | ||
558 | |||
559 | /* | ||
560 | * a caller should use P4_ESCR_EMASK_NAME helper to | ||
561 | * pick the EventMask needed, for example | ||
562 | * | ||
563 | * P4_ESCR_EMASK_NAME(P4_EVENT_TC_DELIVER_MODE, DD) | ||
564 | */ | ||
565 | enum P4_ESCR_EMASKS { | ||
566 | P4_GEN_ESCR_EMASK(P4_EVENT_TC_DELIVER_MODE, DD, 0), | ||
567 | P4_GEN_ESCR_EMASK(P4_EVENT_TC_DELIVER_MODE, DB, 1), | ||
568 | P4_GEN_ESCR_EMASK(P4_EVENT_TC_DELIVER_MODE, DI, 2), | ||
569 | P4_GEN_ESCR_EMASK(P4_EVENT_TC_DELIVER_MODE, BD, 3), | ||
570 | P4_GEN_ESCR_EMASK(P4_EVENT_TC_DELIVER_MODE, BB, 4), | ||
571 | P4_GEN_ESCR_EMASK(P4_EVENT_TC_DELIVER_MODE, BI, 5), | ||
572 | P4_GEN_ESCR_EMASK(P4_EVENT_TC_DELIVER_MODE, ID, 6), | ||
573 | |||
574 | P4_GEN_ESCR_EMASK(P4_EVENT_BPU_FETCH_REQUEST, TCMISS, 0), | ||
575 | |||
576 | P4_GEN_ESCR_EMASK(P4_EVENT_ITLB_REFERENCE, HIT, 0), | ||
577 | P4_GEN_ESCR_EMASK(P4_EVENT_ITLB_REFERENCE, MISS, 1), | ||
578 | P4_GEN_ESCR_EMASK(P4_EVENT_ITLB_REFERENCE, HIT_UK, 2), | ||
579 | |||
580 | P4_GEN_ESCR_EMASK(P4_EVENT_MEMORY_CANCEL, ST_RB_FULL, 2), | ||
581 | P4_GEN_ESCR_EMASK(P4_EVENT_MEMORY_CANCEL, 64K_CONF, 3), | ||
582 | |||
583 | P4_GEN_ESCR_EMASK(P4_EVENT_MEMORY_COMPLETE, LSC, 0), | ||
584 | P4_GEN_ESCR_EMASK(P4_EVENT_MEMORY_COMPLETE, SSC, 1), | ||
585 | |||
586 | P4_GEN_ESCR_EMASK(P4_EVENT_LOAD_PORT_REPLAY, SPLIT_LD, 1), | ||
587 | |||
588 | P4_GEN_ESCR_EMASK(P4_EVENT_STORE_PORT_REPLAY, SPLIT_ST, 1), | ||
589 | |||
590 | P4_GEN_ESCR_EMASK(P4_EVENT_MOB_LOAD_REPLAY, NO_STA, 1), | ||
591 | P4_GEN_ESCR_EMASK(P4_EVENT_MOB_LOAD_REPLAY, NO_STD, 3), | ||
592 | P4_GEN_ESCR_EMASK(P4_EVENT_MOB_LOAD_REPLAY, PARTIAL_DATA, 4), | ||
593 | P4_GEN_ESCR_EMASK(P4_EVENT_MOB_LOAD_REPLAY, UNALGN_ADDR, 5), | ||
594 | |||
595 | P4_GEN_ESCR_EMASK(P4_EVENT_PAGE_WALK_TYPE, DTMISS, 0), | ||
596 | P4_GEN_ESCR_EMASK(P4_EVENT_PAGE_WALK_TYPE, ITMISS, 1), | ||
597 | |||
598 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITS, 0), | ||
599 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITE, 1), | ||
600 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITM, 2), | ||
601 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITS, 3), | ||
602 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITE, 4), | ||
603 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITM, 5), | ||
604 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_MISS, 8), | ||
605 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_MISS, 9), | ||
606 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_CACHE_REFERENCE, WR_2ndL_MISS, 10), | ||
607 | |||
608 | P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ALLOCATION, DEFAULT, 0), | ||
609 | P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ALLOCATION, ALL_READ, 5), | ||
610 | P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ALLOCATION, ALL_WRITE, 6), | ||
611 | P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ALLOCATION, MEM_UC, 7), | ||
612 | P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ALLOCATION, MEM_WC, 8), | ||
613 | P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ALLOCATION, MEM_WT, 9), | ||
614 | P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ALLOCATION, MEM_WP, 10), | ||
615 | P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ALLOCATION, MEM_WB, 11), | ||
616 | P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ALLOCATION, OWN, 13), | ||
617 | P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ALLOCATION, OTHER, 14), | ||
618 | P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ALLOCATION, PREFETCH, 15), | ||
619 | |||
620 | P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ACTIVE_ENTRIES, DEFAULT, 0), | ||
621 | P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ACTIVE_ENTRIES, ALL_READ, 5), | ||
622 | P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ACTIVE_ENTRIES, ALL_WRITE, 6), | ||
623 | P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_UC, 7), | ||
624 | P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WC, 8), | ||
625 | P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WT, 9), | ||
626 | P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WP, 10), | ||
627 | P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WB, 11), | ||
628 | P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ACTIVE_ENTRIES, OWN, 13), | ||
629 | P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ACTIVE_ENTRIES, OTHER, 14), | ||
630 | P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ACTIVE_ENTRIES, PREFETCH, 15), | ||
631 | |||
632 | P4_GEN_ESCR_EMASK(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_DRV, 0), | ||
633 | P4_GEN_ESCR_EMASK(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_OWN, 1), | ||
634 | P4_GEN_ESCR_EMASK(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_OTHER, 2), | ||
635 | P4_GEN_ESCR_EMASK(P4_EVENT_FSB_DATA_ACTIVITY, DBSY_DRV, 3), | ||
636 | P4_GEN_ESCR_EMASK(P4_EVENT_FSB_DATA_ACTIVITY, DBSY_OWN, 4), | ||
637 | P4_GEN_ESCR_EMASK(P4_EVENT_FSB_DATA_ACTIVITY, DBSY_OTHER, 5), | ||
638 | |||
639 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ALLOCATION, REQ_TYPE0, 0), | ||
640 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ALLOCATION, REQ_TYPE1, 1), | ||
641 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ALLOCATION, REQ_LEN0, 2), | ||
642 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ALLOCATION, REQ_LEN1, 3), | ||
643 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ALLOCATION, REQ_IO_TYPE, 5), | ||
644 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ALLOCATION, REQ_LOCK_TYPE, 6), | ||
645 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ALLOCATION, REQ_CACHE_TYPE, 7), | ||
646 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ALLOCATION, REQ_SPLIT_TYPE, 8), | ||
647 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ALLOCATION, REQ_DEM_TYPE, 9), | ||
648 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ALLOCATION, REQ_ORD_TYPE, 10), | ||
649 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ALLOCATION, MEM_TYPE0, 11), | ||
650 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ALLOCATION, MEM_TYPE1, 12), | ||
651 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ALLOCATION, MEM_TYPE2, 13), | ||
652 | |||
653 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_TYPE0, 0), | ||
654 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_TYPE1, 1), | ||
655 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_LEN0, 2), | ||
656 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_LEN1, 3), | ||
657 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_IO_TYPE, 5), | ||
658 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_LOCK_TYPE, 6), | ||
659 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_CACHE_TYPE, 7), | ||
660 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_SPLIT_TYPE, 8), | ||
661 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_DEM_TYPE, 9), | ||
662 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_ORD_TYPE, 10), | ||
663 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ACTIVE_ENTRIES, MEM_TYPE0, 11), | ||
664 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ACTIVE_ENTRIES, MEM_TYPE1, 12), | ||
665 | P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ACTIVE_ENTRIES, MEM_TYPE2, 13), | ||
666 | |||
667 | P4_GEN_ESCR_EMASK(P4_EVENT_SSE_INPUT_ASSIST, ALL, 15), | ||
668 | |||
669 | P4_GEN_ESCR_EMASK(P4_EVENT_PACKED_SP_UOP, ALL, 15), | ||
670 | |||
671 | P4_GEN_ESCR_EMASK(P4_EVENT_PACKED_DP_UOP, ALL, 15), | ||
672 | |||
673 | P4_GEN_ESCR_EMASK(P4_EVENT_SCALAR_SP_UOP, ALL, 15), | ||
674 | |||
675 | P4_GEN_ESCR_EMASK(P4_EVENT_SCALAR_DP_UOP, ALL, 15), | ||
676 | |||
677 | P4_GEN_ESCR_EMASK(P4_EVENT_64BIT_MMX_UOP, ALL, 15), | ||
678 | |||
679 | P4_GEN_ESCR_EMASK(P4_EVENT_128BIT_MMX_UOP, ALL, 15), | ||
680 | |||
681 | P4_GEN_ESCR_EMASK(P4_EVENT_X87_FP_UOP, ALL, 15), | ||
682 | |||
683 | P4_GEN_ESCR_EMASK(P4_EVENT_TC_MISC, FLUSH, 4), | ||
684 | |||
685 | P4_GEN_ESCR_EMASK(P4_EVENT_GLOBAL_POWER_EVENTS, RUNNING, 0), | ||
686 | |||
687 | P4_GEN_ESCR_EMASK(P4_EVENT_TC_MS_XFER, CISC, 0), | ||
688 | |||
689 | P4_GEN_ESCR_EMASK(P4_EVENT_UOP_QUEUE_WRITES, FROM_TC_BUILD, 0), | ||
690 | P4_GEN_ESCR_EMASK(P4_EVENT_UOP_QUEUE_WRITES, FROM_TC_DELIVER, 1), | ||
691 | P4_GEN_ESCR_EMASK(P4_EVENT_UOP_QUEUE_WRITES, FROM_ROM, 2), | ||
692 | |||
693 | P4_GEN_ESCR_EMASK(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, CONDITIONAL, 1), | ||
694 | P4_GEN_ESCR_EMASK(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, CALL, 2), | ||
695 | P4_GEN_ESCR_EMASK(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, RETURN, 3), | ||
696 | P4_GEN_ESCR_EMASK(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, INDIRECT, 4), | ||
697 | |||
698 | P4_GEN_ESCR_EMASK(P4_EVENT_RETIRED_BRANCH_TYPE, CONDITIONAL, 1), | ||
699 | P4_GEN_ESCR_EMASK(P4_EVENT_RETIRED_BRANCH_TYPE, CALL, 2), | ||
700 | P4_GEN_ESCR_EMASK(P4_EVENT_RETIRED_BRANCH_TYPE, RETURN, 3), | ||
701 | P4_GEN_ESCR_EMASK(P4_EVENT_RETIRED_BRANCH_TYPE, INDIRECT, 4), | ||
702 | |||
703 | P4_GEN_ESCR_EMASK(P4_EVENT_RESOURCE_STALL, SBFULL, 5), | ||
704 | |||
705 | P4_GEN_ESCR_EMASK(P4_EVENT_WC_BUFFER, WCB_EVICTS, 0), | ||
706 | P4_GEN_ESCR_EMASK(P4_EVENT_WC_BUFFER, WCB_FULL_EVICTS, 1), | ||
707 | |||
708 | P4_GEN_ESCR_EMASK(P4_EVENT_FRONT_END_EVENT, NBOGUS, 0), | ||
709 | P4_GEN_ESCR_EMASK(P4_EVENT_FRONT_END_EVENT, BOGUS, 1), | ||
710 | |||
711 | P4_GEN_ESCR_EMASK(P4_EVENT_EXECUTION_EVENT, NBOGUS0, 0), | ||
712 | P4_GEN_ESCR_EMASK(P4_EVENT_EXECUTION_EVENT, NBOGUS1, 1), | ||
713 | P4_GEN_ESCR_EMASK(P4_EVENT_EXECUTION_EVENT, NBOGUS2, 2), | ||
714 | P4_GEN_ESCR_EMASK(P4_EVENT_EXECUTION_EVENT, NBOGUS3, 3), | ||
715 | P4_GEN_ESCR_EMASK(P4_EVENT_EXECUTION_EVENT, BOGUS0, 4), | ||
716 | P4_GEN_ESCR_EMASK(P4_EVENT_EXECUTION_EVENT, BOGUS1, 5), | ||
717 | P4_GEN_ESCR_EMASK(P4_EVENT_EXECUTION_EVENT, BOGUS2, 6), | ||
718 | P4_GEN_ESCR_EMASK(P4_EVENT_EXECUTION_EVENT, BOGUS3, 7), | ||
719 | |||
720 | P4_GEN_ESCR_EMASK(P4_EVENT_REPLAY_EVENT, NBOGUS, 0), | ||
721 | P4_GEN_ESCR_EMASK(P4_EVENT_REPLAY_EVENT, BOGUS, 1), | ||
722 | |||
723 | P4_GEN_ESCR_EMASK(P4_EVENT_INSTR_RETIRED, NBOGUSNTAG, 0), | ||
724 | P4_GEN_ESCR_EMASK(P4_EVENT_INSTR_RETIRED, NBOGUSTAG, 1), | ||
725 | P4_GEN_ESCR_EMASK(P4_EVENT_INSTR_RETIRED, BOGUSNTAG, 2), | ||
726 | P4_GEN_ESCR_EMASK(P4_EVENT_INSTR_RETIRED, BOGUSTAG, 3), | ||
727 | |||
728 | P4_GEN_ESCR_EMASK(P4_EVENT_UOPS_RETIRED, NBOGUS, 0), | ||
729 | P4_GEN_ESCR_EMASK(P4_EVENT_UOPS_RETIRED, BOGUS, 1), | ||
730 | |||
731 | P4_GEN_ESCR_EMASK(P4_EVENT_UOP_TYPE, TAGLOADS, 1), | ||
732 | P4_GEN_ESCR_EMASK(P4_EVENT_UOP_TYPE, TAGSTORES, 2), | ||
733 | |||
734 | P4_GEN_ESCR_EMASK(P4_EVENT_BRANCH_RETIRED, MMNP, 0), | ||
735 | P4_GEN_ESCR_EMASK(P4_EVENT_BRANCH_RETIRED, MMNM, 1), | ||
736 | P4_GEN_ESCR_EMASK(P4_EVENT_BRANCH_RETIRED, MMTP, 2), | ||
737 | P4_GEN_ESCR_EMASK(P4_EVENT_BRANCH_RETIRED, MMTM, 3), | ||
738 | |||
739 | P4_GEN_ESCR_EMASK(P4_EVENT_MISPRED_BRANCH_RETIRED, NBOGUS, 0), | ||
740 | |||
741 | P4_GEN_ESCR_EMASK(P4_EVENT_X87_ASSIST, FPSU, 0), | ||
742 | P4_GEN_ESCR_EMASK(P4_EVENT_X87_ASSIST, FPSO, 1), | ||
743 | P4_GEN_ESCR_EMASK(P4_EVENT_X87_ASSIST, POAO, 2), | ||
744 | P4_GEN_ESCR_EMASK(P4_EVENT_X87_ASSIST, POAU, 3), | ||
745 | P4_GEN_ESCR_EMASK(P4_EVENT_X87_ASSIST, PREA, 4), | ||
746 | |||
747 | P4_GEN_ESCR_EMASK(P4_EVENT_MACHINE_CLEAR, CLEAR, 0), | ||
748 | P4_GEN_ESCR_EMASK(P4_EVENT_MACHINE_CLEAR, MOCLEAR, 1), | ||
749 | P4_GEN_ESCR_EMASK(P4_EVENT_MACHINE_CLEAR, SMCLEAR, 2), | ||
750 | |||
751 | P4_GEN_ESCR_EMASK(P4_EVENT_INSTR_COMPLETED, NBOGUS, 0), | ||
752 | P4_GEN_ESCR_EMASK(P4_EVENT_INSTR_COMPLETED, BOGUS, 1), | ||
753 | }; | ||
754 | |||
755 | /* P4 PEBS: stale for a while */ | ||
756 | #define P4_PEBS_METRIC_MASK 0x00001fffU | ||
757 | #define P4_PEBS_UOB_TAG 0x01000000U | ||
758 | #define P4_PEBS_ENABLE 0x02000000U | ||
759 | |||
760 | /* Replay metrics for MSR_IA32_PEBS_ENABLE and MSR_P4_PEBS_MATRIX_VERT */ | ||
761 | #define P4_PEBS__1stl_cache_load_miss_retired 0x3000001 | ||
762 | #define P4_PEBS__2ndl_cache_load_miss_retired 0x3000002 | ||
763 | #define P4_PEBS__dtlb_load_miss_retired 0x3000004 | ||
764 | #define P4_PEBS__dtlb_store_miss_retired 0x3000004 | ||
765 | #define P4_PEBS__dtlb_all_miss_retired 0x3000004 | ||
766 | #define P4_PEBS__tagged_mispred_branch 0x3018000 | ||
767 | #define P4_PEBS__mob_load_replay_retired 0x3000200 | ||
768 | #define P4_PEBS__split_load_retired 0x3000400 | ||
769 | #define P4_PEBS__split_store_retired 0x3000400 | ||
770 | |||
771 | #define P4_VERT__1stl_cache_load_miss_retired 0x0000001 | ||
772 | #define P4_VERT__2ndl_cache_load_miss_retired 0x0000001 | ||
773 | #define P4_VERT__dtlb_load_miss_retired 0x0000001 | ||
774 | #define P4_VERT__dtlb_store_miss_retired 0x0000002 | ||
775 | #define P4_VERT__dtlb_all_miss_retired 0x0000003 | ||
776 | #define P4_VERT__tagged_mispred_branch 0x0000010 | ||
777 | #define P4_VERT__mob_load_replay_retired 0x0000001 | ||
778 | #define P4_VERT__split_load_retired 0x0000001 | ||
779 | #define P4_VERT__split_store_retired 0x0000002 | ||
780 | |||
781 | enum P4_CACHE_EVENTS { | ||
782 | P4_CACHE__NONE, | ||
783 | |||
784 | P4_CACHE__1stl_cache_load_miss_retired, | ||
785 | P4_CACHE__2ndl_cache_load_miss_retired, | ||
786 | P4_CACHE__dtlb_load_miss_retired, | ||
787 | P4_CACHE__dtlb_store_miss_retired, | ||
788 | P4_CACHE__itlb_reference_hit, | ||
789 | P4_CACHE__itlb_reference_miss, | ||
790 | |||
791 | P4_CACHE__MAX | ||
792 | }; | ||
793 | |||
794 | #endif /* PERF_EVENT_P4_H */ | ||
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index b753ea59703a..5a51379dcbe4 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h | |||
@@ -21,7 +21,6 @@ struct mm_struct; | |||
21 | #include <asm/msr.h> | 21 | #include <asm/msr.h> |
22 | #include <asm/desc_defs.h> | 22 | #include <asm/desc_defs.h> |
23 | #include <asm/nops.h> | 23 | #include <asm/nops.h> |
24 | #include <asm/ds.h> | ||
25 | 24 | ||
26 | #include <linux/personality.h> | 25 | #include <linux/personality.h> |
27 | #include <linux/cpumask.h> | 26 | #include <linux/cpumask.h> |
@@ -29,6 +28,7 @@ struct mm_struct; | |||
29 | #include <linux/threads.h> | 28 | #include <linux/threads.h> |
30 | #include <linux/math64.h> | 29 | #include <linux/math64.h> |
31 | #include <linux/init.h> | 30 | #include <linux/init.h> |
31 | #include <linux/err.h> | ||
32 | 32 | ||
33 | #define HBP_NUM 4 | 33 | #define HBP_NUM 4 |
34 | /* | 34 | /* |
@@ -113,7 +113,6 @@ struct cpuinfo_x86 { | |||
113 | /* Index into per_cpu list: */ | 113 | /* Index into per_cpu list: */ |
114 | u16 cpu_index; | 114 | u16 cpu_index; |
115 | #endif | 115 | #endif |
116 | unsigned int x86_hyper_vendor; | ||
117 | } __attribute__((__aligned__(SMP_CACHE_BYTES))); | 116 | } __attribute__((__aligned__(SMP_CACHE_BYTES))); |
118 | 117 | ||
119 | #define X86_VENDOR_INTEL 0 | 118 | #define X86_VENDOR_INTEL 0 |
@@ -127,9 +126,6 @@ struct cpuinfo_x86 { | |||
127 | 126 | ||
128 | #define X86_VENDOR_UNKNOWN 0xff | 127 | #define X86_VENDOR_UNKNOWN 0xff |
129 | 128 | ||
130 | #define X86_HYPER_VENDOR_NONE 0 | ||
131 | #define X86_HYPER_VENDOR_VMWARE 1 | ||
132 | |||
133 | /* | 129 | /* |
134 | * capabilities of CPUs | 130 | * capabilities of CPUs |
135 | */ | 131 | */ |
@@ -380,6 +376,10 @@ union thread_xstate { | |||
380 | struct xsave_struct xsave; | 376 | struct xsave_struct xsave; |
381 | }; | 377 | }; |
382 | 378 | ||
379 | struct fpu { | ||
380 | union thread_xstate *state; | ||
381 | }; | ||
382 | |||
383 | #ifdef CONFIG_X86_64 | 383 | #ifdef CONFIG_X86_64 |
384 | DECLARE_PER_CPU(struct orig_ist, orig_ist); | 384 | DECLARE_PER_CPU(struct orig_ist, orig_ist); |
385 | 385 | ||
@@ -457,7 +457,7 @@ struct thread_struct { | |||
457 | unsigned long trap_no; | 457 | unsigned long trap_no; |
458 | unsigned long error_code; | 458 | unsigned long error_code; |
459 | /* floating point and extended processor state */ | 459 | /* floating point and extended processor state */ |
460 | union thread_xstate *xstate; | 460 | struct fpu fpu; |
461 | #ifdef CONFIG_X86_32 | 461 | #ifdef CONFIG_X86_32 |
462 | /* Virtual 86 mode info */ | 462 | /* Virtual 86 mode info */ |
463 | struct vm86_struct __user *vm86_info; | 463 | struct vm86_struct __user *vm86_info; |
@@ -473,10 +473,6 @@ struct thread_struct { | |||
473 | unsigned long iopl; | 473 | unsigned long iopl; |
474 | /* Max allowed port in the bitmap, in bytes: */ | 474 | /* Max allowed port in the bitmap, in bytes: */ |
475 | unsigned io_bitmap_max; | 475 | unsigned io_bitmap_max; |
476 | /* MSR_IA32_DEBUGCTLMSR value to switch in if TIF_DEBUGCTLMSR is set. */ | ||
477 | unsigned long debugctlmsr; | ||
478 | /* Debug Store context; see asm/ds.h */ | ||
479 | struct ds_context *ds_ctx; | ||
480 | }; | 476 | }; |
481 | 477 | ||
482 | static inline unsigned long native_get_debugreg(int regno) | 478 | static inline unsigned long native_get_debugreg(int regno) |
@@ -803,7 +799,7 @@ extern void cpu_init(void); | |||
803 | 799 | ||
804 | static inline unsigned long get_debugctlmsr(void) | 800 | static inline unsigned long get_debugctlmsr(void) |
805 | { | 801 | { |
806 | unsigned long debugctlmsr = 0; | 802 | unsigned long debugctlmsr = 0; |
807 | 803 | ||
808 | #ifndef CONFIG_X86_DEBUGCTLMSR | 804 | #ifndef CONFIG_X86_DEBUGCTLMSR |
809 | if (boot_cpu_data.x86 < 6) | 805 | if (boot_cpu_data.x86 < 6) |
@@ -811,21 +807,6 @@ static inline unsigned long get_debugctlmsr(void) | |||
811 | #endif | 807 | #endif |
812 | rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctlmsr); | 808 | rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctlmsr); |
813 | 809 | ||
814 | return debugctlmsr; | ||
815 | } | ||
816 | |||
817 | static inline unsigned long get_debugctlmsr_on_cpu(int cpu) | ||
818 | { | ||
819 | u64 debugctlmsr = 0; | ||
820 | u32 val1, val2; | ||
821 | |||
822 | #ifndef CONFIG_X86_DEBUGCTLMSR | ||
823 | if (boot_cpu_data.x86 < 6) | ||
824 | return 0; | ||
825 | #endif | ||
826 | rdmsr_on_cpu(cpu, MSR_IA32_DEBUGCTLMSR, &val1, &val2); | ||
827 | debugctlmsr = val1 | ((u64)val2 << 32); | ||
828 | |||
829 | return debugctlmsr; | 810 | return debugctlmsr; |
830 | } | 811 | } |
831 | 812 | ||
@@ -838,18 +819,6 @@ static inline void update_debugctlmsr(unsigned long debugctlmsr) | |||
838 | wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctlmsr); | 819 | wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctlmsr); |
839 | } | 820 | } |
840 | 821 | ||
841 | static inline void update_debugctlmsr_on_cpu(int cpu, | ||
842 | unsigned long debugctlmsr) | ||
843 | { | ||
844 | #ifndef CONFIG_X86_DEBUGCTLMSR | ||
845 | if (boot_cpu_data.x86 < 6) | ||
846 | return; | ||
847 | #endif | ||
848 | wrmsr_on_cpu(cpu, MSR_IA32_DEBUGCTLMSR, | ||
849 | (u32)((u64)debugctlmsr), | ||
850 | (u32)((u64)debugctlmsr >> 32)); | ||
851 | } | ||
852 | |||
853 | /* | 822 | /* |
854 | * from system description table in BIOS. Mostly for MCA use, but | 823 | * from system description table in BIOS. Mostly for MCA use, but |
855 | * others may find it useful: | 824 | * others may find it useful: |
diff --git a/arch/x86/include/asm/ptrace-abi.h b/arch/x86/include/asm/ptrace-abi.h index 86723035a515..52b098a6eebb 100644 --- a/arch/x86/include/asm/ptrace-abi.h +++ b/arch/x86/include/asm/ptrace-abi.h | |||
@@ -82,61 +82,6 @@ | |||
82 | 82 | ||
83 | #ifndef __ASSEMBLY__ | 83 | #ifndef __ASSEMBLY__ |
84 | #include <linux/types.h> | 84 | #include <linux/types.h> |
85 | 85 | #endif | |
86 | /* configuration/status structure used in PTRACE_BTS_CONFIG and | ||
87 | PTRACE_BTS_STATUS commands. | ||
88 | */ | ||
89 | struct ptrace_bts_config { | ||
90 | /* requested or actual size of BTS buffer in bytes */ | ||
91 | __u32 size; | ||
92 | /* bitmask of below flags */ | ||
93 | __u32 flags; | ||
94 | /* buffer overflow signal */ | ||
95 | __u32 signal; | ||
96 | /* actual size of bts_struct in bytes */ | ||
97 | __u32 bts_size; | ||
98 | }; | ||
99 | #endif /* __ASSEMBLY__ */ | ||
100 | |||
101 | #define PTRACE_BTS_O_TRACE 0x1 /* branch trace */ | ||
102 | #define PTRACE_BTS_O_SCHED 0x2 /* scheduling events w/ jiffies */ | ||
103 | #define PTRACE_BTS_O_SIGNAL 0x4 /* send SIG<signal> on buffer overflow | ||
104 | instead of wrapping around */ | ||
105 | #define PTRACE_BTS_O_ALLOC 0x8 /* (re)allocate buffer */ | ||
106 | |||
107 | #define PTRACE_BTS_CONFIG 40 | ||
108 | /* Configure branch trace recording. | ||
109 | ADDR points to a struct ptrace_bts_config. | ||
110 | DATA gives the size of that buffer. | ||
111 | A new buffer is allocated, if requested in the flags. | ||
112 | An overflow signal may only be requested for new buffers. | ||
113 | Returns the number of bytes read. | ||
114 | */ | ||
115 | #define PTRACE_BTS_STATUS 41 | ||
116 | /* Return the current configuration in a struct ptrace_bts_config | ||
117 | pointed to by ADDR; DATA gives the size of that buffer. | ||
118 | Returns the number of bytes written. | ||
119 | */ | ||
120 | #define PTRACE_BTS_SIZE 42 | ||
121 | /* Return the number of available BTS records for draining. | ||
122 | DATA and ADDR are ignored. | ||
123 | */ | ||
124 | #define PTRACE_BTS_GET 43 | ||
125 | /* Get a single BTS record. | ||
126 | DATA defines the index into the BTS array, where 0 is the newest | ||
127 | entry, and higher indices refer to older entries. | ||
128 | ADDR is pointing to struct bts_struct (see asm/ds.h). | ||
129 | */ | ||
130 | #define PTRACE_BTS_CLEAR 44 | ||
131 | /* Clear the BTS buffer. | ||
132 | DATA and ADDR are ignored. | ||
133 | */ | ||
134 | #define PTRACE_BTS_DRAIN 45 | ||
135 | /* Read all available BTS records and clear the buffer. | ||
136 | ADDR points to an array of struct bts_struct. | ||
137 | DATA gives the size of that buffer. | ||
138 | BTS records are read from oldest to newest. | ||
139 | Returns number of BTS records drained. | ||
140 | */ | ||
141 | 86 | ||
142 | #endif /* _ASM_X86_PTRACE_ABI_H */ | 87 | #endif /* _ASM_X86_PTRACE_ABI_H */ |
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h index 69a686a7dff0..78cd1ea94500 100644 --- a/arch/x86/include/asm/ptrace.h +++ b/arch/x86/include/asm/ptrace.h | |||
@@ -289,12 +289,6 @@ extern int do_get_thread_area(struct task_struct *p, int idx, | |||
289 | extern int do_set_thread_area(struct task_struct *p, int idx, | 289 | extern int do_set_thread_area(struct task_struct *p, int idx, |
290 | struct user_desc __user *info, int can_allocate); | 290 | struct user_desc __user *info, int can_allocate); |
291 | 291 | ||
292 | #ifdef CONFIG_X86_PTRACE_BTS | ||
293 | extern void ptrace_bts_untrace(struct task_struct *tsk); | ||
294 | |||
295 | #define arch_ptrace_untrace(tsk) ptrace_bts_untrace(tsk) | ||
296 | #endif /* CONFIG_X86_PTRACE_BTS */ | ||
297 | |||
298 | #endif /* __KERNEL__ */ | 292 | #endif /* __KERNEL__ */ |
299 | 293 | ||
300 | #endif /* !__ASSEMBLY__ */ | 294 | #endif /* !__ASSEMBLY__ */ |
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index e0d28901e969..d4092fac226b 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h | |||
@@ -92,8 +92,7 @@ struct thread_info { | |||
92 | #define TIF_IO_BITMAP 22 /* uses I/O bitmap */ | 92 | #define TIF_IO_BITMAP 22 /* uses I/O bitmap */ |
93 | #define TIF_FREEZE 23 /* is freezing for suspend */ | 93 | #define TIF_FREEZE 23 /* is freezing for suspend */ |
94 | #define TIF_FORCED_TF 24 /* true if TF in eflags artificially */ | 94 | #define TIF_FORCED_TF 24 /* true if TF in eflags artificially */ |
95 | #define TIF_DEBUGCTLMSR 25 /* uses thread_struct.debugctlmsr */ | 95 | #define TIF_BLOCKSTEP 25 /* set when we want DEBUGCTLMSR_BTF */ |
96 | #define TIF_DS_AREA_MSR 26 /* uses thread_struct.ds_area_msr */ | ||
97 | #define TIF_LAZY_MMU_UPDATES 27 /* task is updating the mmu lazily */ | 96 | #define TIF_LAZY_MMU_UPDATES 27 /* task is updating the mmu lazily */ |
98 | #define TIF_SYSCALL_TRACEPOINT 28 /* syscall tracepoint instrumentation */ | 97 | #define TIF_SYSCALL_TRACEPOINT 28 /* syscall tracepoint instrumentation */ |
99 | 98 | ||
@@ -115,8 +114,7 @@ struct thread_info { | |||
115 | #define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP) | 114 | #define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP) |
116 | #define _TIF_FREEZE (1 << TIF_FREEZE) | 115 | #define _TIF_FREEZE (1 << TIF_FREEZE) |
117 | #define _TIF_FORCED_TF (1 << TIF_FORCED_TF) | 116 | #define _TIF_FORCED_TF (1 << TIF_FORCED_TF) |
118 | #define _TIF_DEBUGCTLMSR (1 << TIF_DEBUGCTLMSR) | 117 | #define _TIF_BLOCKSTEP (1 << TIF_BLOCKSTEP) |
119 | #define _TIF_DS_AREA_MSR (1 << TIF_DS_AREA_MSR) | ||
120 | #define _TIF_LAZY_MMU_UPDATES (1 << TIF_LAZY_MMU_UPDATES) | 118 | #define _TIF_LAZY_MMU_UPDATES (1 << TIF_LAZY_MMU_UPDATES) |
121 | #define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT) | 119 | #define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT) |
122 | 120 | ||
@@ -147,7 +145,7 @@ struct thread_info { | |||
147 | 145 | ||
148 | /* flags to check in __switch_to() */ | 146 | /* flags to check in __switch_to() */ |
149 | #define _TIF_WORK_CTXSW \ | 147 | #define _TIF_WORK_CTXSW \ |
150 | (_TIF_IO_BITMAP|_TIF_DEBUGCTLMSR|_TIF_DS_AREA_MSR|_TIF_NOTSC) | 148 | (_TIF_IO_BITMAP|_TIF_NOTSC|_TIF_BLOCKSTEP) |
151 | 149 | ||
152 | #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY) | 150 | #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY) |
153 | #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW|_TIF_DEBUG) | 151 | #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW|_TIF_DEBUG) |
@@ -244,7 +242,6 @@ static inline struct thread_info *current_thread_info(void) | |||
244 | #define TS_POLLING 0x0004 /* true if in idle loop | 242 | #define TS_POLLING 0x0004 /* true if in idle loop |
245 | and not sleeping */ | 243 | and not sleeping */ |
246 | #define TS_RESTORE_SIGMASK 0x0008 /* restore signal mask in do_signal() */ | 244 | #define TS_RESTORE_SIGMASK 0x0008 /* restore signal mask in do_signal() */ |
247 | #define TS_XSAVE 0x0010 /* Use xsave/xrstor */ | ||
248 | 245 | ||
249 | #define tsk_is_polling(t) (task_thread_info(t)->status & TS_POLLING) | 246 | #define tsk_is_polling(t) (task_thread_info(t)->status & TS_POLLING) |
250 | 247 | ||
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index 4da91ad69e0d..f66cda56781d 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h | |||
@@ -79,7 +79,7 @@ static inline int get_si_code(unsigned long condition) | |||
79 | 79 | ||
80 | extern int panic_on_unrecovered_nmi; | 80 | extern int panic_on_unrecovered_nmi; |
81 | 81 | ||
82 | void math_error(void __user *); | 82 | void math_error(struct pt_regs *, int, int); |
83 | void math_emulate(struct math_emu_info *); | 83 | void math_emulate(struct math_emu_info *); |
84 | #ifndef CONFIG_X86_32 | 84 | #ifndef CONFIG_X86_32 |
85 | asmlinkage void smp_thermal_interrupt(void); | 85 | asmlinkage void smp_thermal_interrupt(void); |
diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h index b414d2b401f6..aa558ac0306e 100644 --- a/arch/x86/include/asm/uv/uv_bau.h +++ b/arch/x86/include/asm/uv/uv_bau.h | |||
@@ -27,13 +27,14 @@ | |||
27 | * set 2 is at BASE + 2*512, set 3 at BASE + 3*512, and so on. | 27 | * set 2 is at BASE + 2*512, set 3 at BASE + 3*512, and so on. |
28 | * | 28 | * |
29 | * We will use 31 sets, one for sending BAU messages from each of the 32 | 29 | * We will use 31 sets, one for sending BAU messages from each of the 32 |
30 | * cpu's on the node. | 30 | * cpu's on the uvhub. |
31 | * | 31 | * |
32 | * TLB shootdown will use the first of the 8 descriptors of each set. | 32 | * TLB shootdown will use the first of the 8 descriptors of each set. |
33 | * Each of the descriptors is 64 bytes in size (8*64 = 512 bytes in a set). | 33 | * Each of the descriptors is 64 bytes in size (8*64 = 512 bytes in a set). |
34 | */ | 34 | */ |
35 | 35 | ||
36 | #define UV_ITEMS_PER_DESCRIPTOR 8 | 36 | #define UV_ITEMS_PER_DESCRIPTOR 8 |
37 | #define MAX_BAU_CONCURRENT 3 | ||
37 | #define UV_CPUS_PER_ACT_STATUS 32 | 38 | #define UV_CPUS_PER_ACT_STATUS 32 |
38 | #define UV_ACT_STATUS_MASK 0x3 | 39 | #define UV_ACT_STATUS_MASK 0x3 |
39 | #define UV_ACT_STATUS_SIZE 2 | 40 | #define UV_ACT_STATUS_SIZE 2 |
@@ -45,6 +46,9 @@ | |||
45 | #define UV_PAYLOADQ_PNODE_SHIFT 49 | 46 | #define UV_PAYLOADQ_PNODE_SHIFT 49 |
46 | #define UV_PTC_BASENAME "sgi_uv/ptc_statistics" | 47 | #define UV_PTC_BASENAME "sgi_uv/ptc_statistics" |
47 | #define uv_physnodeaddr(x) ((__pa((unsigned long)(x)) & uv_mmask)) | 48 | #define uv_physnodeaddr(x) ((__pa((unsigned long)(x)) & uv_mmask)) |
49 | #define UV_ENABLE_INTD_SOFT_ACK_MODE_SHIFT 15 | ||
50 | #define UV_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHIFT 16 | ||
51 | #define UV_INTD_SOFT_ACK_TIMEOUT_PERIOD 0x000000000bUL | ||
48 | 52 | ||
49 | /* | 53 | /* |
50 | * bits in UVH_LB_BAU_SB_ACTIVATION_STATUS_0/1 | 54 | * bits in UVH_LB_BAU_SB_ACTIVATION_STATUS_0/1 |
@@ -55,15 +59,29 @@ | |||
55 | #define DESC_STATUS_SOURCE_TIMEOUT 3 | 59 | #define DESC_STATUS_SOURCE_TIMEOUT 3 |
56 | 60 | ||
57 | /* | 61 | /* |
58 | * source side thresholds at which message retries print a warning | 62 | * source side threshholds at which message retries print a warning |
59 | */ | 63 | */ |
60 | #define SOURCE_TIMEOUT_LIMIT 20 | 64 | #define SOURCE_TIMEOUT_LIMIT 20 |
61 | #define DESTINATION_TIMEOUT_LIMIT 20 | 65 | #define DESTINATION_TIMEOUT_LIMIT 20 |
62 | 66 | ||
63 | /* | 67 | /* |
68 | * misc. delays, in microseconds | ||
69 | */ | ||
70 | #define THROTTLE_DELAY 10 | ||
71 | #define TIMEOUT_DELAY 10 | ||
72 | #define BIOS_TO 1000 | ||
73 | /* BIOS is assumed to set the destination timeout to 1003520 nanoseconds */ | ||
74 | |||
75 | /* | ||
76 | * threshholds at which to use IPI to free resources | ||
77 | */ | ||
78 | #define PLUGSB4RESET 100 | ||
79 | #define TIMEOUTSB4RESET 100 | ||
80 | |||
81 | /* | ||
64 | * number of entries in the destination side payload queue | 82 | * number of entries in the destination side payload queue |
65 | */ | 83 | */ |
66 | #define DEST_Q_SIZE 17 | 84 | #define DEST_Q_SIZE 20 |
67 | /* | 85 | /* |
68 | * number of destination side software ack resources | 86 | * number of destination side software ack resources |
69 | */ | 87 | */ |
@@ -72,9 +90,10 @@ | |||
72 | /* | 90 | /* |
73 | * completion statuses for sending a TLB flush message | 91 | * completion statuses for sending a TLB flush message |
74 | */ | 92 | */ |
75 | #define FLUSH_RETRY 1 | 93 | #define FLUSH_RETRY_PLUGGED 1 |
76 | #define FLUSH_GIVEUP 2 | 94 | #define FLUSH_RETRY_TIMEOUT 2 |
77 | #define FLUSH_COMPLETE 3 | 95 | #define FLUSH_GIVEUP 3 |
96 | #define FLUSH_COMPLETE 4 | ||
78 | 97 | ||
79 | /* | 98 | /* |
80 | * Distribution: 32 bytes (256 bits) (bytes 0-0x1f of descriptor) | 99 | * Distribution: 32 bytes (256 bits) (bytes 0-0x1f of descriptor) |
@@ -86,14 +105,14 @@ | |||
86 | * 'base_dest_nodeid' field of the header corresponds to the | 105 | * 'base_dest_nodeid' field of the header corresponds to the |
87 | * destination nodeID associated with that specified bit. | 106 | * destination nodeID associated with that specified bit. |
88 | */ | 107 | */ |
89 | struct bau_target_nodemask { | 108 | struct bau_target_uvhubmask { |
90 | unsigned long bits[BITS_TO_LONGS(256)]; | 109 | unsigned long bits[BITS_TO_LONGS(UV_DISTRIBUTION_SIZE)]; |
91 | }; | 110 | }; |
92 | 111 | ||
93 | /* | 112 | /* |
94 | * mask of cpu's on a node | 113 | * mask of cpu's on a uvhub |
95 | * (during initialization we need to check that unsigned long has | 114 | * (during initialization we need to check that unsigned long has |
96 | * enough bits for max. cpu's per node) | 115 | * enough bits for max. cpu's per uvhub) |
97 | */ | 116 | */ |
98 | struct bau_local_cpumask { | 117 | struct bau_local_cpumask { |
99 | unsigned long bits; | 118 | unsigned long bits; |
@@ -135,8 +154,8 @@ struct bau_msg_payload { | |||
135 | struct bau_msg_header { | 154 | struct bau_msg_header { |
136 | unsigned int dest_subnodeid:6; /* must be 0x10, for the LB */ | 155 | unsigned int dest_subnodeid:6; /* must be 0x10, for the LB */ |
137 | /* bits 5:0 */ | 156 | /* bits 5:0 */ |
138 | unsigned int base_dest_nodeid:15; /* nasid>>1 (pnode) of */ | 157 | unsigned int base_dest_nodeid:15; /* nasid (pnode<<1) of */ |
139 | /* bits 20:6 */ /* first bit in node_map */ | 158 | /* bits 20:6 */ /* first bit in uvhub map */ |
140 | unsigned int command:8; /* message type */ | 159 | unsigned int command:8; /* message type */ |
141 | /* bits 28:21 */ | 160 | /* bits 28:21 */ |
142 | /* 0x38: SN3net EndPoint Message */ | 161 | /* 0x38: SN3net EndPoint Message */ |
@@ -146,26 +165,38 @@ struct bau_msg_header { | |||
146 | unsigned int rsvd_2:9; /* must be zero */ | 165 | unsigned int rsvd_2:9; /* must be zero */ |
147 | /* bits 40:32 */ | 166 | /* bits 40:32 */ |
148 | /* Suppl_A is 56-41 */ | 167 | /* Suppl_A is 56-41 */ |
149 | unsigned int payload_2a:8;/* becomes byte 16 of msg */ | 168 | unsigned int sequence:16;/* message sequence number */ |
150 | /* bits 48:41 */ /* not currently using */ | 169 | /* bits 56:41 */ /* becomes bytes 16-17 of msg */ |
151 | unsigned int payload_2b:8;/* becomes byte 17 of msg */ | ||
152 | /* bits 56:49 */ /* not currently using */ | ||
153 | /* Address field (96:57) is never used as an | 170 | /* Address field (96:57) is never used as an |
154 | address (these are address bits 42:3) */ | 171 | address (these are address bits 42:3) */ |
172 | |||
155 | unsigned int rsvd_3:1; /* must be zero */ | 173 | unsigned int rsvd_3:1; /* must be zero */ |
156 | /* bit 57 */ | 174 | /* bit 57 */ |
157 | /* address bits 27:4 are payload */ | 175 | /* address bits 27:4 are payload */ |
158 | /* these 24 bits become bytes 12-14 of msg */ | 176 | /* these next 24 (58-81) bits become bytes 12-14 of msg */ |
177 | |||
178 | /* bits 65:58 land in byte 12 */ | ||
159 | unsigned int replied_to:1;/* sent as 0 by the source to byte 12 */ | 179 | unsigned int replied_to:1;/* sent as 0 by the source to byte 12 */ |
160 | /* bit 58 */ | 180 | /* bit 58 */ |
161 | 181 | unsigned int msg_type:3; /* software type of the message*/ | |
162 | unsigned int payload_1a:5;/* not currently used */ | 182 | /* bits 61:59 */ |
163 | /* bits 63:59 */ | 183 | unsigned int canceled:1; /* message canceled, resource to be freed*/ |
164 | unsigned int payload_1b:8;/* not currently used */ | 184 | /* bit 62 */ |
165 | /* bits 71:64 */ | 185 | unsigned int payload_1a:1;/* not currently used */ |
166 | unsigned int payload_1c:8;/* not currently used */ | 186 | /* bit 63 */ |
167 | /* bits 79:72 */ | 187 | unsigned int payload_1b:2;/* not currently used */ |
168 | unsigned int payload_1d:2;/* not currently used */ | 188 | /* bits 65:64 */ |
189 | |||
190 | /* bits 73:66 land in byte 13 */ | ||
191 | unsigned int payload_1ca:6;/* not currently used */ | ||
192 | /* bits 71:66 */ | ||
193 | unsigned int payload_1c:2;/* not currently used */ | ||
194 | /* bits 73:72 */ | ||
195 | |||
196 | /* bits 81:74 land in byte 14 */ | ||
197 | unsigned int payload_1d:6;/* not currently used */ | ||
198 | /* bits 79:74 */ | ||
199 | unsigned int payload_1e:2;/* not currently used */ | ||
169 | /* bits 81:80 */ | 200 | /* bits 81:80 */ |
170 | 201 | ||
171 | unsigned int rsvd_4:7; /* must be zero */ | 202 | unsigned int rsvd_4:7; /* must be zero */ |
@@ -178,7 +209,7 @@ struct bau_msg_header { | |||
178 | /* bits 95:90 */ | 209 | /* bits 95:90 */ |
179 | unsigned int rsvd_6:5; /* must be zero */ | 210 | unsigned int rsvd_6:5; /* must be zero */ |
180 | /* bits 100:96 */ | 211 | /* bits 100:96 */ |
181 | unsigned int int_both:1;/* if 1, interrupt both sockets on the blade */ | 212 | unsigned int int_both:1;/* if 1, interrupt both sockets on the uvhub */ |
182 | /* bit 101*/ | 213 | /* bit 101*/ |
183 | unsigned int fairness:3;/* usually zero */ | 214 | unsigned int fairness:3;/* usually zero */ |
184 | /* bits 104:102 */ | 215 | /* bits 104:102 */ |
@@ -191,13 +222,18 @@ struct bau_msg_header { | |||
191 | /* bits 127:107 */ | 222 | /* bits 127:107 */ |
192 | }; | 223 | }; |
193 | 224 | ||
225 | /* see msg_type: */ | ||
226 | #define MSG_NOOP 0 | ||
227 | #define MSG_REGULAR 1 | ||
228 | #define MSG_RETRY 2 | ||
229 | |||
194 | /* | 230 | /* |
195 | * The activation descriptor: | 231 | * The activation descriptor: |
196 | * The format of the message to send, plus all accompanying control | 232 | * The format of the message to send, plus all accompanying control |
197 | * Should be 64 bytes | 233 | * Should be 64 bytes |
198 | */ | 234 | */ |
199 | struct bau_desc { | 235 | struct bau_desc { |
200 | struct bau_target_nodemask distribution; | 236 | struct bau_target_uvhubmask distribution; |
201 | /* | 237 | /* |
202 | * message template, consisting of header and payload: | 238 | * message template, consisting of header and payload: |
203 | */ | 239 | */ |
@@ -237,19 +273,25 @@ struct bau_payload_queue_entry { | |||
237 | unsigned short acknowledge_count; /* filled in by destination */ | 273 | unsigned short acknowledge_count; /* filled in by destination */ |
238 | /* 16 bits, bytes 10-11 */ | 274 | /* 16 bits, bytes 10-11 */ |
239 | 275 | ||
240 | unsigned short replied_to:1; /* sent as 0 by the source */ | 276 | /* these next 3 bytes come from bits 58-81 of the message header */ |
241 | /* 1 bit */ | 277 | unsigned short replied_to:1; /* sent as 0 by the source */ |
242 | unsigned short unused1:7; /* not currently using */ | 278 | unsigned short msg_type:3; /* software message type */ |
243 | /* 7 bits: byte 12) */ | 279 | unsigned short canceled:1; /* sent as 0 by the source */ |
280 | unsigned short unused1:3; /* not currently using */ | ||
281 | /* byte 12 */ | ||
244 | 282 | ||
245 | unsigned char unused2[2]; /* not currently using */ | 283 | unsigned char unused2a; /* not currently using */ |
246 | /* bytes 13-14 */ | 284 | /* byte 13 */ |
285 | unsigned char unused2; /* not currently using */ | ||
286 | /* byte 14 */ | ||
247 | 287 | ||
248 | unsigned char sw_ack_vector; /* filled in by the hardware */ | 288 | unsigned char sw_ack_vector; /* filled in by the hardware */ |
249 | /* byte 15 (bits 127:120) */ | 289 | /* byte 15 (bits 127:120) */ |
250 | 290 | ||
251 | unsigned char unused4[3]; /* not currently using bytes 17-19 */ | 291 | unsigned short sequence; /* message sequence number */ |
252 | /* bytes 17-19 */ | 292 | /* bytes 16-17 */ |
293 | unsigned char unused4[2]; /* not currently using bytes 18-19 */ | ||
294 | /* bytes 18-19 */ | ||
253 | 295 | ||
254 | int number_of_cpus; /* filled in at destination */ | 296 | int number_of_cpus; /* filled in at destination */ |
255 | /* 32 bits, bytes 20-23 (aligned) */ | 297 | /* 32 bits, bytes 20-23 (aligned) */ |
@@ -259,63 +301,93 @@ struct bau_payload_queue_entry { | |||
259 | }; | 301 | }; |
260 | 302 | ||
261 | /* | 303 | /* |
262 | * one for every slot in the destination payload queue | 304 | * one per-cpu; to locate the software tables |
263 | */ | ||
264 | struct bau_msg_status { | ||
265 | struct bau_local_cpumask seen_by; /* map of cpu's */ | ||
266 | }; | ||
267 | |||
268 | /* | ||
269 | * one for every slot in the destination software ack resources | ||
270 | */ | ||
271 | struct bau_sw_ack_status { | ||
272 | struct bau_payload_queue_entry *msg; /* associated message */ | ||
273 | int watcher; /* cpu monitoring, or -1 */ | ||
274 | }; | ||
275 | |||
276 | /* | ||
277 | * one on every node and per-cpu; to locate the software tables | ||
278 | */ | 305 | */ |
279 | struct bau_control { | 306 | struct bau_control { |
280 | struct bau_desc *descriptor_base; | 307 | struct bau_desc *descriptor_base; |
281 | struct bau_payload_queue_entry *bau_msg_head; | ||
282 | struct bau_payload_queue_entry *va_queue_first; | 308 | struct bau_payload_queue_entry *va_queue_first; |
283 | struct bau_payload_queue_entry *va_queue_last; | 309 | struct bau_payload_queue_entry *va_queue_last; |
284 | struct bau_msg_status *msg_statuses; | 310 | struct bau_payload_queue_entry *bau_msg_head; |
285 | int *watching; /* pointer to array */ | 311 | struct bau_control *uvhub_master; |
312 | struct bau_control *socket_master; | ||
313 | unsigned long timeout_interval; | ||
314 | atomic_t active_descriptor_count; | ||
315 | int max_concurrent; | ||
316 | int max_concurrent_constant; | ||
317 | int retry_message_scans; | ||
318 | int plugged_tries; | ||
319 | int timeout_tries; | ||
320 | int ipi_attempts; | ||
321 | int conseccompletes; | ||
322 | short cpu; | ||
323 | short uvhub_cpu; | ||
324 | short uvhub; | ||
325 | short cpus_in_socket; | ||
326 | short cpus_in_uvhub; | ||
327 | unsigned short message_number; | ||
328 | unsigned short uvhub_quiesce; | ||
329 | short socket_acknowledge_count[DEST_Q_SIZE]; | ||
330 | cycles_t send_message; | ||
331 | spinlock_t masks_lock; | ||
332 | spinlock_t uvhub_lock; | ||
333 | spinlock_t queue_lock; | ||
286 | }; | 334 | }; |
287 | 335 | ||
288 | /* | 336 | /* |
289 | * This structure is allocated per_cpu for UV TLB shootdown statistics. | 337 | * This structure is allocated per_cpu for UV TLB shootdown statistics. |
290 | */ | 338 | */ |
291 | struct ptc_stats { | 339 | struct ptc_stats { |
292 | unsigned long ptc_i; /* number of IPI-style flushes */ | 340 | /* sender statistics */ |
293 | unsigned long requestor; /* number of nodes this cpu sent to */ | 341 | unsigned long s_giveup; /* number of fall backs to IPI-style flushes */ |
294 | unsigned long requestee; /* times cpu was remotely requested */ | 342 | unsigned long s_requestor; /* number of shootdown requests */ |
295 | unsigned long alltlb; /* times all tlb's on this cpu were flushed */ | 343 | unsigned long s_stimeout; /* source side timeouts */ |
296 | unsigned long onetlb; /* times just one tlb on this cpu was flushed */ | 344 | unsigned long s_dtimeout; /* destination side timeouts */ |
297 | unsigned long s_retry; /* retries on source side timeouts */ | 345 | unsigned long s_time; /* time spent in sending side */ |
298 | unsigned long d_retry; /* retries on destination side timeouts */ | 346 | unsigned long s_retriesok; /* successful retries */ |
299 | unsigned long sflush; /* cycles spent in uv_flush_tlb_others */ | 347 | unsigned long s_ntargcpu; /* number of cpus targeted */ |
300 | unsigned long dflush; /* cycles spent on destination side */ | 348 | unsigned long s_ntarguvhub; /* number of uvhubs targeted */ |
301 | unsigned long retriesok; /* successes on retries */ | 349 | unsigned long s_ntarguvhub16; /* number of times >= 16 target hubs */ |
302 | unsigned long nomsg; /* interrupts with no message */ | 350 | unsigned long s_ntarguvhub8; /* number of times >= 8 target hubs */ |
303 | unsigned long multmsg; /* interrupts with multiple messages */ | 351 | unsigned long s_ntarguvhub4; /* number of times >= 4 target hubs */ |
304 | unsigned long ntargeted;/* nodes targeted */ | 352 | unsigned long s_ntarguvhub2; /* number of times >= 2 target hubs */ |
353 | unsigned long s_ntarguvhub1; /* number of times == 1 target hub */ | ||
354 | unsigned long s_resets_plug; /* ipi-style resets from plug state */ | ||
355 | unsigned long s_resets_timeout; /* ipi-style resets from timeouts */ | ||
356 | unsigned long s_busy; /* status stayed busy past s/w timer */ | ||
357 | unsigned long s_throttles; /* waits in throttle */ | ||
358 | unsigned long s_retry_messages; /* retry broadcasts */ | ||
359 | /* destination statistics */ | ||
360 | unsigned long d_alltlb; /* times all tlb's on this cpu were flushed */ | ||
361 | unsigned long d_onetlb; /* times just one tlb on this cpu was flushed */ | ||
362 | unsigned long d_multmsg; /* interrupts with multiple messages */ | ||
363 | unsigned long d_nomsg; /* interrupts with no message */ | ||
364 | unsigned long d_time; /* time spent on destination side */ | ||
365 | unsigned long d_requestee; /* number of messages processed */ | ||
366 | unsigned long d_retries; /* number of retry messages processed */ | ||
367 | unsigned long d_canceled; /* number of messages canceled by retries */ | ||
368 | unsigned long d_nocanceled; /* retries that found nothing to cancel */ | ||
369 | unsigned long d_resets; /* number of ipi-style requests processed */ | ||
370 | unsigned long d_rcanceled; /* number of messages canceled by resets */ | ||
305 | }; | 371 | }; |
306 | 372 | ||
307 | static inline int bau_node_isset(int node, struct bau_target_nodemask *dstp) | 373 | static inline int bau_uvhub_isset(int uvhub, struct bau_target_uvhubmask *dstp) |
308 | { | 374 | { |
309 | return constant_test_bit(node, &dstp->bits[0]); | 375 | return constant_test_bit(uvhub, &dstp->bits[0]); |
310 | } | 376 | } |
311 | static inline void bau_node_set(int node, struct bau_target_nodemask *dstp) | 377 | static inline void bau_uvhub_set(int uvhub, struct bau_target_uvhubmask *dstp) |
312 | { | 378 | { |
313 | __set_bit(node, &dstp->bits[0]); | 379 | __set_bit(uvhub, &dstp->bits[0]); |
314 | } | 380 | } |
315 | static inline void bau_nodes_clear(struct bau_target_nodemask *dstp, int nbits) | 381 | static inline void bau_uvhubs_clear(struct bau_target_uvhubmask *dstp, |
382 | int nbits) | ||
316 | { | 383 | { |
317 | bitmap_zero(&dstp->bits[0], nbits); | 384 | bitmap_zero(&dstp->bits[0], nbits); |
318 | } | 385 | } |
386 | static inline int bau_uvhub_weight(struct bau_target_uvhubmask *dstp) | ||
387 | { | ||
388 | return bitmap_weight((unsigned long *)&dstp->bits[0], | ||
389 | UV_DISTRIBUTION_SIZE); | ||
390 | } | ||
319 | 391 | ||
320 | static inline void bau_cpubits_clear(struct bau_local_cpumask *dstp, int nbits) | 392 | static inline void bau_cpubits_clear(struct bau_local_cpumask *dstp, int nbits) |
321 | { | 393 | { |
@@ -328,4 +400,35 @@ static inline void bau_cpubits_clear(struct bau_local_cpumask *dstp, int nbits) | |||
328 | extern void uv_bau_message_intr1(void); | 400 | extern void uv_bau_message_intr1(void); |
329 | extern void uv_bau_timeout_intr1(void); | 401 | extern void uv_bau_timeout_intr1(void); |
330 | 402 | ||
403 | struct atomic_short { | ||
404 | short counter; | ||
405 | }; | ||
406 | |||
407 | /** | ||
408 | * atomic_read_short - read a short atomic variable | ||
409 | * @v: pointer of type atomic_short | ||
410 | * | ||
411 | * Atomically reads the value of @v. | ||
412 | */ | ||
413 | static inline int atomic_read_short(const struct atomic_short *v) | ||
414 | { | ||
415 | return v->counter; | ||
416 | } | ||
417 | |||
418 | /** | ||
419 | * atomic_add_short_return - add and return a short int | ||
420 | * @i: short value to add | ||
421 | * @v: pointer of type atomic_short | ||
422 | * | ||
423 | * Atomically adds @i to @v and returns @i + @v | ||
424 | */ | ||
425 | static inline int atomic_add_short_return(short i, struct atomic_short *v) | ||
426 | { | ||
427 | short __i = i; | ||
428 | asm volatile(LOCK_PREFIX "xaddw %0, %1" | ||
429 | : "+r" (i), "+m" (v->counter) | ||
430 | : : "memory"); | ||
431 | return i + __i; | ||
432 | } | ||
433 | |||
331 | #endif /* _ASM_X86_UV_UV_BAU_H */ | 434 | #endif /* _ASM_X86_UV_UV_BAU_H */ |
diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h index 14cc74ba5d23..bf6b88ef8eeb 100644 --- a/arch/x86/include/asm/uv/uv_hub.h +++ b/arch/x86/include/asm/uv/uv_hub.h | |||
@@ -307,7 +307,7 @@ static inline unsigned long uv_read_global_mmr32(int pnode, unsigned long offset | |||
307 | * Access Global MMR space using the MMR space located at the top of physical | 307 | * Access Global MMR space using the MMR space located at the top of physical |
308 | * memory. | 308 | * memory. |
309 | */ | 309 | */ |
310 | static inline unsigned long *uv_global_mmr64_address(int pnode, unsigned long offset) | 310 | static inline volatile void __iomem *uv_global_mmr64_address(int pnode, unsigned long offset) |
311 | { | 311 | { |
312 | return __va(UV_GLOBAL_MMR64_BASE | | 312 | return __va(UV_GLOBAL_MMR64_BASE | |
313 | UV_GLOBAL_MMR64_PNODE_BITS(pnode) | offset); | 313 | UV_GLOBAL_MMR64_PNODE_BITS(pnode) | offset); |
diff --git a/arch/x86/include/asm/uv/uv_mmrs.h b/arch/x86/include/asm/uv/uv_mmrs.h index 2cae46c7c8a2..b2f2d2e05cec 100644 --- a/arch/x86/include/asm/uv/uv_mmrs.h +++ b/arch/x86/include/asm/uv/uv_mmrs.h | |||
@@ -1,4 +1,3 @@ | |||
1 | |||
2 | /* | 1 | /* |
3 | * This file is subject to the terms and conditions of the GNU General Public | 2 | * This file is subject to the terms and conditions of the GNU General Public |
4 | * License. See the file "COPYING" in the main directory of this archive | 3 | * License. See the file "COPYING" in the main directory of this archive |
@@ -15,13 +14,25 @@ | |||
15 | #define UV_MMR_ENABLE (1UL << 63) | 14 | #define UV_MMR_ENABLE (1UL << 63) |
16 | 15 | ||
17 | /* ========================================================================= */ | 16 | /* ========================================================================= */ |
17 | /* UVH_BAU_DATA_BROADCAST */ | ||
18 | /* ========================================================================= */ | ||
19 | #define UVH_BAU_DATA_BROADCAST 0x61688UL | ||
20 | #define UVH_BAU_DATA_BROADCAST_32 0x0440 | ||
21 | |||
22 | #define UVH_BAU_DATA_BROADCAST_ENABLE_SHFT 0 | ||
23 | #define UVH_BAU_DATA_BROADCAST_ENABLE_MASK 0x0000000000000001UL | ||
24 | |||
25 | union uvh_bau_data_broadcast_u { | ||
26 | unsigned long v; | ||
27 | struct uvh_bau_data_broadcast_s { | ||
28 | unsigned long enable : 1; /* RW */ | ||
29 | unsigned long rsvd_1_63: 63; /* */ | ||
30 | } s; | ||
31 | }; | ||
32 | |||
33 | /* ========================================================================= */ | ||
18 | /* UVH_BAU_DATA_CONFIG */ | 34 | /* UVH_BAU_DATA_CONFIG */ |
19 | /* ========================================================================= */ | 35 | /* ========================================================================= */ |
20 | #define UVH_LB_BAU_MISC_CONTROL 0x320170UL | ||
21 | #define UV_ENABLE_INTD_SOFT_ACK_MODE_SHIFT 15 | ||
22 | #define UV_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHIFT 16 | ||
23 | #define UV_INTD_SOFT_ACK_TIMEOUT_PERIOD 0x000000000bUL | ||
24 | /* 1011 timebase 7 (168millisec) * 3 ticks -> 500ms */ | ||
25 | #define UVH_BAU_DATA_CONFIG 0x61680UL | 36 | #define UVH_BAU_DATA_CONFIG 0x61680UL |
26 | #define UVH_BAU_DATA_CONFIG_32 0x0438 | 37 | #define UVH_BAU_DATA_CONFIG_32 0x0438 |
27 | 38 | ||
@@ -604,6 +615,68 @@ union uvh_lb_bau_intd_software_acknowledge_u { | |||
604 | #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS_32 0x0a70 | 615 | #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS_32 0x0a70 |
605 | 616 | ||
606 | /* ========================================================================= */ | 617 | /* ========================================================================= */ |
618 | /* UVH_LB_BAU_MISC_CONTROL */ | ||
619 | /* ========================================================================= */ | ||
620 | #define UVH_LB_BAU_MISC_CONTROL 0x320170UL | ||
621 | #define UVH_LB_BAU_MISC_CONTROL_32 0x00a10 | ||
622 | |||
623 | #define UVH_LB_BAU_MISC_CONTROL_REJECTION_DELAY_SHFT 0 | ||
624 | #define UVH_LB_BAU_MISC_CONTROL_REJECTION_DELAY_MASK 0x00000000000000ffUL | ||
625 | #define UVH_LB_BAU_MISC_CONTROL_APIC_MODE_SHFT 8 | ||
626 | #define UVH_LB_BAU_MISC_CONTROL_APIC_MODE_MASK 0x0000000000000100UL | ||
627 | #define UVH_LB_BAU_MISC_CONTROL_FORCE_BROADCAST_SHFT 9 | ||
628 | #define UVH_LB_BAU_MISC_CONTROL_FORCE_BROADCAST_MASK 0x0000000000000200UL | ||
629 | #define UVH_LB_BAU_MISC_CONTROL_FORCE_LOCK_NOP_SHFT 10 | ||
630 | #define UVH_LB_BAU_MISC_CONTROL_FORCE_LOCK_NOP_MASK 0x0000000000000400UL | ||
631 | #define UVH_LB_BAU_MISC_CONTROL_CSI_AGENT_PRESENCE_VECTOR_SHFT 11 | ||
632 | #define UVH_LB_BAU_MISC_CONTROL_CSI_AGENT_PRESENCE_VECTOR_MASK 0x0000000000003800UL | ||
633 | #define UVH_LB_BAU_MISC_CONTROL_DESCRIPTOR_FETCH_MODE_SHFT 14 | ||
634 | #define UVH_LB_BAU_MISC_CONTROL_DESCRIPTOR_FETCH_MODE_MASK 0x0000000000004000UL | ||
635 | #define UVH_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT 15 | ||
636 | #define UVH_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_MASK 0x0000000000008000UL | ||
637 | #define UVH_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT 16 | ||
638 | #define UVH_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_MASK 0x00000000000f0000UL | ||
639 | #define UVH_LB_BAU_MISC_CONTROL_ENABLE_DUAL_MAPPING_MODE_SHFT 20 | ||
640 | #define UVH_LB_BAU_MISC_CONTROL_ENABLE_DUAL_MAPPING_MODE_MASK 0x0000000000100000UL | ||
641 | #define UVH_LB_BAU_MISC_CONTROL_VGA_IO_PORT_DECODE_ENABLE_SHFT 21 | ||
642 | #define UVH_LB_BAU_MISC_CONTROL_VGA_IO_PORT_DECODE_ENABLE_MASK 0x0000000000200000UL | ||
643 | #define UVH_LB_BAU_MISC_CONTROL_VGA_IO_PORT_16_BIT_DECODE_SHFT 22 | ||
644 | #define UVH_LB_BAU_MISC_CONTROL_VGA_IO_PORT_16_BIT_DECODE_MASK 0x0000000000400000UL | ||
645 | #define UVH_LB_BAU_MISC_CONTROL_SUPPRESS_DEST_REGISTRATION_SHFT 23 | ||
646 | #define UVH_LB_BAU_MISC_CONTROL_SUPPRESS_DEST_REGISTRATION_MASK 0x0000000000800000UL | ||
647 | #define UVH_LB_BAU_MISC_CONTROL_PROGRAMMED_INITIAL_PRIORITY_SHFT 24 | ||
648 | #define UVH_LB_BAU_MISC_CONTROL_PROGRAMMED_INITIAL_PRIORITY_MASK 0x0000000007000000UL | ||
649 | #define UVH_LB_BAU_MISC_CONTROL_USE_INCOMING_PRIORITY_SHFT 27 | ||
650 | #define UVH_LB_BAU_MISC_CONTROL_USE_INCOMING_PRIORITY_MASK 0x0000000008000000UL | ||
651 | #define UVH_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_SHFT 28 | ||
652 | #define UVH_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_MASK 0x0000000010000000UL | ||
653 | #define UVH_LB_BAU_MISC_CONTROL_FUN_SHFT 48 | ||
654 | #define UVH_LB_BAU_MISC_CONTROL_FUN_MASK 0xffff000000000000UL | ||
655 | |||
656 | union uvh_lb_bau_misc_control_u { | ||
657 | unsigned long v; | ||
658 | struct uvh_lb_bau_misc_control_s { | ||
659 | unsigned long rejection_delay : 8; /* RW */ | ||
660 | unsigned long apic_mode : 1; /* RW */ | ||
661 | unsigned long force_broadcast : 1; /* RW */ | ||
662 | unsigned long force_lock_nop : 1; /* RW */ | ||
663 | unsigned long csi_agent_presence_vector : 3; /* RW */ | ||
664 | unsigned long descriptor_fetch_mode : 1; /* RW */ | ||
665 | unsigned long enable_intd_soft_ack_mode : 1; /* RW */ | ||
666 | unsigned long intd_soft_ack_timeout_period : 4; /* RW */ | ||
667 | unsigned long enable_dual_mapping_mode : 1; /* RW */ | ||
668 | unsigned long vga_io_port_decode_enable : 1; /* RW */ | ||
669 | unsigned long vga_io_port_16_bit_decode : 1; /* RW */ | ||
670 | unsigned long suppress_dest_registration : 1; /* RW */ | ||
671 | unsigned long programmed_initial_priority : 3; /* RW */ | ||
672 | unsigned long use_incoming_priority : 1; /* RW */ | ||
673 | unsigned long enable_programmed_initial_priority : 1; /* RW */ | ||
674 | unsigned long rsvd_29_47 : 19; /* */ | ||
675 | unsigned long fun : 16; /* RW */ | ||
676 | } s; | ||
677 | }; | ||
678 | |||
679 | /* ========================================================================= */ | ||
607 | /* UVH_LB_BAU_SB_ACTIVATION_CONTROL */ | 680 | /* UVH_LB_BAU_SB_ACTIVATION_CONTROL */ |
608 | /* ========================================================================= */ | 681 | /* ========================================================================= */ |
609 | #define UVH_LB_BAU_SB_ACTIVATION_CONTROL 0x320020UL | 682 | #define UVH_LB_BAU_SB_ACTIVATION_CONTROL 0x320020UL |
@@ -681,334 +754,6 @@ union uvh_lb_bau_sb_descriptor_base_u { | |||
681 | }; | 754 | }; |
682 | 755 | ||
683 | /* ========================================================================= */ | 756 | /* ========================================================================= */ |
684 | /* UVH_LB_MCAST_AOERR0_RPT_ENABLE */ | ||
685 | /* ========================================================================= */ | ||
686 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE 0x50b20UL | ||
687 | |||
688 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_OBESE_MSG_SHFT 0 | ||
689 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_OBESE_MSG_MASK 0x0000000000000001UL | ||
690 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_DATA_SB_ERR_SHFT 1 | ||
691 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_DATA_SB_ERR_MASK 0x0000000000000002UL | ||
692 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_NACK_BUFF_PARITY_SHFT 2 | ||
693 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_NACK_BUFF_PARITY_MASK 0x0000000000000004UL | ||
694 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_TIMEOUT_SHFT 3 | ||
695 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_TIMEOUT_MASK 0x0000000000000008UL | ||
696 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_INACTIVE_REPLY_SHFT 4 | ||
697 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_INACTIVE_REPLY_MASK 0x0000000000000010UL | ||
698 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_UPGRADE_ERROR_SHFT 5 | ||
699 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_UPGRADE_ERROR_MASK 0x0000000000000020UL | ||
700 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_REG_COUNT_UNDERFLOW_SHFT 6 | ||
701 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_REG_COUNT_UNDERFLOW_MASK 0x0000000000000040UL | ||
702 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_REP_OBESE_MSG_SHFT 7 | ||
703 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MCAST_REP_OBESE_MSG_MASK 0x0000000000000080UL | ||
704 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REQ_RUNT_MSG_SHFT 8 | ||
705 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REQ_RUNT_MSG_MASK 0x0000000000000100UL | ||
706 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REQ_OBESE_MSG_SHFT 9 | ||
707 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REQ_OBESE_MSG_MASK 0x0000000000000200UL | ||
708 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REQ_DATA_SB_ERR_SHFT 10 | ||
709 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REQ_DATA_SB_ERR_MASK 0x0000000000000400UL | ||
710 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REP_RUNT_MSG_SHFT 11 | ||
711 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REP_RUNT_MSG_MASK 0x0000000000000800UL | ||
712 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REP_OBESE_MSG_SHFT 12 | ||
713 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REP_OBESE_MSG_MASK 0x0000000000001000UL | ||
714 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REP_DATA_SB_ERR_SHFT 13 | ||
715 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REP_DATA_SB_ERR_MASK 0x0000000000002000UL | ||
716 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REP_COMMAND_ERR_SHFT 14 | ||
717 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_REP_COMMAND_ERR_MASK 0x0000000000004000UL | ||
718 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_PEND_TIMEOUT_SHFT 15 | ||
719 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_UCACHE_PEND_TIMEOUT_MASK 0x0000000000008000UL | ||
720 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_REQ_RUNT_MSG_SHFT 16 | ||
721 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_REQ_RUNT_MSG_MASK 0x0000000000010000UL | ||
722 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_REQ_OBESE_MSG_SHFT 17 | ||
723 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_REQ_OBESE_MSG_MASK 0x0000000000020000UL | ||
724 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_REQ_DATA_SB_ERR_SHFT 18 | ||
725 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_REQ_DATA_SB_ERR_MASK 0x0000000000040000UL | ||
726 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_REP_RUNT_MSG_SHFT 19 | ||
727 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_REP_RUNT_MSG_MASK 0x0000000000080000UL | ||
728 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_REP_OBESE_MSG_SHFT 20 | ||
729 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_REP_OBESE_MSG_MASK 0x0000000000100000UL | ||
730 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_REP_DATA_SB_ERR_SHFT 21 | ||
731 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_REP_DATA_SB_ERR_MASK 0x0000000000200000UL | ||
732 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_AMO_TIMEOUT_SHFT 22 | ||
733 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_AMO_TIMEOUT_MASK 0x0000000000400000UL | ||
734 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_PUT_TIMEOUT_SHFT 23 | ||
735 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_PUT_TIMEOUT_MASK 0x0000000000800000UL | ||
736 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_SPURIOUS_EVENT_SHFT 24 | ||
737 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_MACC_SPURIOUS_EVENT_MASK 0x0000000001000000UL | ||
738 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_IOH_DESTINATION_TABLE_PARITY_SHFT 25 | ||
739 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_IOH_DESTINATION_TABLE_PARITY_MASK 0x0000000002000000UL | ||
740 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_GET_HAD_ERROR_REPLY_SHFT 26 | ||
741 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_GET_HAD_ERROR_REPLY_MASK 0x0000000004000000UL | ||
742 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_GET_TIMEOUT_SHFT 27 | ||
743 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_GET_TIMEOUT_MASK 0x0000000008000000UL | ||
744 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_LOCK_MANAGER_HAD_ERROR_REPLY_SHFT 28 | ||
745 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_LOCK_MANAGER_HAD_ERROR_REPLY_MASK 0x0000000010000000UL | ||
746 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_PUT_HAD_ERROR_REPLY_SHFT 29 | ||
747 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_PUT_HAD_ERROR_REPLY_MASK 0x0000000020000000UL | ||
748 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_PUT_TIMEOUT_SHFT 30 | ||
749 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_PUT_TIMEOUT_MASK 0x0000000040000000UL | ||
750 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_SB_ACTIVATION_OVERRUN_SHFT 31 | ||
751 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_SB_ACTIVATION_OVERRUN_MASK 0x0000000080000000UL | ||
752 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_COMPLETED_GB_ACTIVATION_HAD_ERROR_REPLY_SHFT 32 | ||
753 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_COMPLETED_GB_ACTIVATION_HAD_ERROR_REPLY_MASK 0x0000000100000000UL | ||
754 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_COMPLETED_GB_ACTIVATION_TIMEOUT_SHFT 33 | ||
755 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_COMPLETED_GB_ACTIVATION_TIMEOUT_MASK 0x0000000200000000UL | ||
756 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_DESCRIPTOR_BUFFER_0_PARITY_SHFT 34 | ||
757 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_DESCRIPTOR_BUFFER_0_PARITY_MASK 0x0000000400000000UL | ||
758 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_DESCRIPTOR_BUFFER_1_PARITY_SHFT 35 | ||
759 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_DESCRIPTOR_BUFFER_1_PARITY_MASK 0x0000000800000000UL | ||
760 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_SOCKET_DESTINATION_TABLE_PARITY_SHFT 36 | ||
761 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_SOCKET_DESTINATION_TABLE_PARITY_MASK 0x0000001000000000UL | ||
762 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_BAU_REPLY_PAYLOAD_CORRUPTION_SHFT 37 | ||
763 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_BAU_REPLY_PAYLOAD_CORRUPTION_MASK 0x0000002000000000UL | ||
764 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_IO_PORT_DESTINATION_TABLE_PARITY_SHFT 38 | ||
765 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_IO_PORT_DESTINATION_TABLE_PARITY_MASK 0x0000004000000000UL | ||
766 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_INTD_SOFT_ACK_TIMEOUT_SHFT 39 | ||
767 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_INTD_SOFT_ACK_TIMEOUT_MASK 0x0000008000000000UL | ||
768 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_INT_REP_OBESE_MSG_SHFT 40 | ||
769 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_INT_REP_OBESE_MSG_MASK 0x0000010000000000UL | ||
770 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_INT_REP_COMMAND_ERR_SHFT 41 | ||
771 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_INT_REP_COMMAND_ERR_MASK 0x0000020000000000UL | ||
772 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_INT_TIMEOUT_SHFT 42 | ||
773 | #define UVH_LB_MCAST_AOERR0_RPT_ENABLE_INT_TIMEOUT_MASK 0x0000040000000000UL | ||
774 | |||
775 | union uvh_lb_mcast_aoerr0_rpt_enable_u { | ||
776 | unsigned long v; | ||
777 | struct uvh_lb_mcast_aoerr0_rpt_enable_s { | ||
778 | unsigned long mcast_obese_msg : 1; /* RW */ | ||
779 | unsigned long mcast_data_sb_err : 1; /* RW */ | ||
780 | unsigned long mcast_nack_buff_parity : 1; /* RW */ | ||
781 | unsigned long mcast_timeout : 1; /* RW */ | ||
782 | unsigned long mcast_inactive_reply : 1; /* RW */ | ||
783 | unsigned long mcast_upgrade_error : 1; /* RW */ | ||
784 | unsigned long mcast_reg_count_underflow : 1; /* RW */ | ||
785 | unsigned long mcast_rep_obese_msg : 1; /* RW */ | ||
786 | unsigned long ucache_req_runt_msg : 1; /* RW */ | ||
787 | unsigned long ucache_req_obese_msg : 1; /* RW */ | ||
788 | unsigned long ucache_req_data_sb_err : 1; /* RW */ | ||
789 | unsigned long ucache_rep_runt_msg : 1; /* RW */ | ||
790 | unsigned long ucache_rep_obese_msg : 1; /* RW */ | ||
791 | unsigned long ucache_rep_data_sb_err : 1; /* RW */ | ||
792 | unsigned long ucache_rep_command_err : 1; /* RW */ | ||
793 | unsigned long ucache_pend_timeout : 1; /* RW */ | ||
794 | unsigned long macc_req_runt_msg : 1; /* RW */ | ||
795 | unsigned long macc_req_obese_msg : 1; /* RW */ | ||
796 | unsigned long macc_req_data_sb_err : 1; /* RW */ | ||
797 | unsigned long macc_rep_runt_msg : 1; /* RW */ | ||
798 | unsigned long macc_rep_obese_msg : 1; /* RW */ | ||
799 | unsigned long macc_rep_data_sb_err : 1; /* RW */ | ||
800 | unsigned long macc_amo_timeout : 1; /* RW */ | ||
801 | unsigned long macc_put_timeout : 1; /* RW */ | ||
802 | unsigned long macc_spurious_event : 1; /* RW */ | ||
803 | unsigned long ioh_destination_table_parity : 1; /* RW */ | ||
804 | unsigned long get_had_error_reply : 1; /* RW */ | ||
805 | unsigned long get_timeout : 1; /* RW */ | ||
806 | unsigned long lock_manager_had_error_reply : 1; /* RW */ | ||
807 | unsigned long put_had_error_reply : 1; /* RW */ | ||
808 | unsigned long put_timeout : 1; /* RW */ | ||
809 | unsigned long sb_activation_overrun : 1; /* RW */ | ||
810 | unsigned long completed_gb_activation_had_error_reply : 1; /* RW */ | ||
811 | unsigned long completed_gb_activation_timeout : 1; /* RW */ | ||
812 | unsigned long descriptor_buffer_0_parity : 1; /* RW */ | ||
813 | unsigned long descriptor_buffer_1_parity : 1; /* RW */ | ||
814 | unsigned long socket_destination_table_parity : 1; /* RW */ | ||
815 | unsigned long bau_reply_payload_corruption : 1; /* RW */ | ||
816 | unsigned long io_port_destination_table_parity : 1; /* RW */ | ||
817 | unsigned long intd_soft_ack_timeout : 1; /* RW */ | ||
818 | unsigned long int_rep_obese_msg : 1; /* RW */ | ||
819 | unsigned long int_rep_command_err : 1; /* RW */ | ||
820 | unsigned long int_timeout : 1; /* RW */ | ||
821 | unsigned long rsvd_43_63 : 21; /* */ | ||
822 | } s; | ||
823 | }; | ||
824 | |||
825 | /* ========================================================================= */ | ||
826 | /* UVH_LOCAL_INT0_CONFIG */ | ||
827 | /* ========================================================================= */ | ||
828 | #define UVH_LOCAL_INT0_CONFIG 0x61000UL | ||
829 | |||
830 | #define UVH_LOCAL_INT0_CONFIG_VECTOR_SHFT 0 | ||
831 | #define UVH_LOCAL_INT0_CONFIG_VECTOR_MASK 0x00000000000000ffUL | ||
832 | #define UVH_LOCAL_INT0_CONFIG_DM_SHFT 8 | ||
833 | #define UVH_LOCAL_INT0_CONFIG_DM_MASK 0x0000000000000700UL | ||
834 | #define UVH_LOCAL_INT0_CONFIG_DESTMODE_SHFT 11 | ||
835 | #define UVH_LOCAL_INT0_CONFIG_DESTMODE_MASK 0x0000000000000800UL | ||
836 | #define UVH_LOCAL_INT0_CONFIG_STATUS_SHFT 12 | ||
837 | #define UVH_LOCAL_INT0_CONFIG_STATUS_MASK 0x0000000000001000UL | ||
838 | #define UVH_LOCAL_INT0_CONFIG_P_SHFT 13 | ||
839 | #define UVH_LOCAL_INT0_CONFIG_P_MASK 0x0000000000002000UL | ||
840 | #define UVH_LOCAL_INT0_CONFIG_T_SHFT 15 | ||
841 | #define UVH_LOCAL_INT0_CONFIG_T_MASK 0x0000000000008000UL | ||
842 | #define UVH_LOCAL_INT0_CONFIG_M_SHFT 16 | ||
843 | #define UVH_LOCAL_INT0_CONFIG_M_MASK 0x0000000000010000UL | ||
844 | #define UVH_LOCAL_INT0_CONFIG_APIC_ID_SHFT 32 | ||
845 | #define UVH_LOCAL_INT0_CONFIG_APIC_ID_MASK 0xffffffff00000000UL | ||
846 | |||
847 | union uvh_local_int0_config_u { | ||
848 | unsigned long v; | ||
849 | struct uvh_local_int0_config_s { | ||
850 | unsigned long vector_ : 8; /* RW */ | ||
851 | unsigned long dm : 3; /* RW */ | ||
852 | unsigned long destmode : 1; /* RW */ | ||
853 | unsigned long status : 1; /* RO */ | ||
854 | unsigned long p : 1; /* RO */ | ||
855 | unsigned long rsvd_14 : 1; /* */ | ||
856 | unsigned long t : 1; /* RO */ | ||
857 | unsigned long m : 1; /* RW */ | ||
858 | unsigned long rsvd_17_31: 15; /* */ | ||
859 | unsigned long apic_id : 32; /* RW */ | ||
860 | } s; | ||
861 | }; | ||
862 | |||
863 | /* ========================================================================= */ | ||
864 | /* UVH_LOCAL_INT0_ENABLE */ | ||
865 | /* ========================================================================= */ | ||
866 | #define UVH_LOCAL_INT0_ENABLE 0x65000UL | ||
867 | |||
868 | #define UVH_LOCAL_INT0_ENABLE_LB_HCERR_SHFT 0 | ||
869 | #define UVH_LOCAL_INT0_ENABLE_LB_HCERR_MASK 0x0000000000000001UL | ||
870 | #define UVH_LOCAL_INT0_ENABLE_GR0_HCERR_SHFT 1 | ||
871 | #define UVH_LOCAL_INT0_ENABLE_GR0_HCERR_MASK 0x0000000000000002UL | ||
872 | #define UVH_LOCAL_INT0_ENABLE_GR1_HCERR_SHFT 2 | ||
873 | #define UVH_LOCAL_INT0_ENABLE_GR1_HCERR_MASK 0x0000000000000004UL | ||
874 | #define UVH_LOCAL_INT0_ENABLE_LH_HCERR_SHFT 3 | ||
875 | #define UVH_LOCAL_INT0_ENABLE_LH_HCERR_MASK 0x0000000000000008UL | ||
876 | #define UVH_LOCAL_INT0_ENABLE_RH_HCERR_SHFT 4 | ||
877 | #define UVH_LOCAL_INT0_ENABLE_RH_HCERR_MASK 0x0000000000000010UL | ||
878 | #define UVH_LOCAL_INT0_ENABLE_XN_HCERR_SHFT 5 | ||
879 | #define UVH_LOCAL_INT0_ENABLE_XN_HCERR_MASK 0x0000000000000020UL | ||
880 | #define UVH_LOCAL_INT0_ENABLE_SI_HCERR_SHFT 6 | ||
881 | #define UVH_LOCAL_INT0_ENABLE_SI_HCERR_MASK 0x0000000000000040UL | ||
882 | #define UVH_LOCAL_INT0_ENABLE_LB_AOERR0_SHFT 7 | ||
883 | #define UVH_LOCAL_INT0_ENABLE_LB_AOERR0_MASK 0x0000000000000080UL | ||
884 | #define UVH_LOCAL_INT0_ENABLE_GR0_AOERR0_SHFT 8 | ||
885 | #define UVH_LOCAL_INT0_ENABLE_GR0_AOERR0_MASK 0x0000000000000100UL | ||
886 | #define UVH_LOCAL_INT0_ENABLE_GR1_AOERR0_SHFT 9 | ||
887 | #define UVH_LOCAL_INT0_ENABLE_GR1_AOERR0_MASK 0x0000000000000200UL | ||
888 | #define UVH_LOCAL_INT0_ENABLE_LH_AOERR0_SHFT 10 | ||
889 | #define UVH_LOCAL_INT0_ENABLE_LH_AOERR0_MASK 0x0000000000000400UL | ||
890 | #define UVH_LOCAL_INT0_ENABLE_RH_AOERR0_SHFT 11 | ||
891 | #define UVH_LOCAL_INT0_ENABLE_RH_AOERR0_MASK 0x0000000000000800UL | ||
892 | #define UVH_LOCAL_INT0_ENABLE_XN_AOERR0_SHFT 12 | ||
893 | #define UVH_LOCAL_INT0_ENABLE_XN_AOERR0_MASK 0x0000000000001000UL | ||
894 | #define UVH_LOCAL_INT0_ENABLE_SI_AOERR0_SHFT 13 | ||
895 | #define UVH_LOCAL_INT0_ENABLE_SI_AOERR0_MASK 0x0000000000002000UL | ||
896 | #define UVH_LOCAL_INT0_ENABLE_LB_AOERR1_SHFT 14 | ||
897 | #define UVH_LOCAL_INT0_ENABLE_LB_AOERR1_MASK 0x0000000000004000UL | ||
898 | #define UVH_LOCAL_INT0_ENABLE_GR0_AOERR1_SHFT 15 | ||
899 | #define UVH_LOCAL_INT0_ENABLE_GR0_AOERR1_MASK 0x0000000000008000UL | ||
900 | #define UVH_LOCAL_INT0_ENABLE_GR1_AOERR1_SHFT 16 | ||
901 | #define UVH_LOCAL_INT0_ENABLE_GR1_AOERR1_MASK 0x0000000000010000UL | ||
902 | #define UVH_LOCAL_INT0_ENABLE_LH_AOERR1_SHFT 17 | ||
903 | #define UVH_LOCAL_INT0_ENABLE_LH_AOERR1_MASK 0x0000000000020000UL | ||
904 | #define UVH_LOCAL_INT0_ENABLE_RH_AOERR1_SHFT 18 | ||
905 | #define UVH_LOCAL_INT0_ENABLE_RH_AOERR1_MASK 0x0000000000040000UL | ||
906 | #define UVH_LOCAL_INT0_ENABLE_XN_AOERR1_SHFT 19 | ||
907 | #define UVH_LOCAL_INT0_ENABLE_XN_AOERR1_MASK 0x0000000000080000UL | ||
908 | #define UVH_LOCAL_INT0_ENABLE_SI_AOERR1_SHFT 20 | ||
909 | #define UVH_LOCAL_INT0_ENABLE_SI_AOERR1_MASK 0x0000000000100000UL | ||
910 | #define UVH_LOCAL_INT0_ENABLE_RH_VPI_INT_SHFT 21 | ||
911 | #define UVH_LOCAL_INT0_ENABLE_RH_VPI_INT_MASK 0x0000000000200000UL | ||
912 | #define UVH_LOCAL_INT0_ENABLE_SYSTEM_SHUTDOWN_INT_SHFT 22 | ||
913 | #define UVH_LOCAL_INT0_ENABLE_SYSTEM_SHUTDOWN_INT_MASK 0x0000000000400000UL | ||
914 | #define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_0_SHFT 23 | ||
915 | #define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_0_MASK 0x0000000000800000UL | ||
916 | #define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_1_SHFT 24 | ||
917 | #define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_1_MASK 0x0000000001000000UL | ||
918 | #define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_2_SHFT 25 | ||
919 | #define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_2_MASK 0x0000000002000000UL | ||
920 | #define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_3_SHFT 26 | ||
921 | #define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_3_MASK 0x0000000004000000UL | ||
922 | #define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_4_SHFT 27 | ||
923 | #define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_4_MASK 0x0000000008000000UL | ||
924 | #define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_5_SHFT 28 | ||
925 | #define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_5_MASK 0x0000000010000000UL | ||
926 | #define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_6_SHFT 29 | ||
927 | #define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_6_MASK 0x0000000020000000UL | ||
928 | #define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_7_SHFT 30 | ||
929 | #define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_7_MASK 0x0000000040000000UL | ||
930 | #define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_8_SHFT 31 | ||
931 | #define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_8_MASK 0x0000000080000000UL | ||
932 | #define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_9_SHFT 32 | ||
933 | #define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_9_MASK 0x0000000100000000UL | ||
934 | #define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_10_SHFT 33 | ||
935 | #define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_10_MASK 0x0000000200000000UL | ||
936 | #define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_11_SHFT 34 | ||
937 | #define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_11_MASK 0x0000000400000000UL | ||
938 | #define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_12_SHFT 35 | ||
939 | #define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_12_MASK 0x0000000800000000UL | ||
940 | #define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_13_SHFT 36 | ||
941 | #define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_13_MASK 0x0000001000000000UL | ||
942 | #define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_14_SHFT 37 | ||
943 | #define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_14_MASK 0x0000002000000000UL | ||
944 | #define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_15_SHFT 38 | ||
945 | #define UVH_LOCAL_INT0_ENABLE_LB_IRQ_INT_15_MASK 0x0000004000000000UL | ||
946 | #define UVH_LOCAL_INT0_ENABLE_L1_NMI_INT_SHFT 39 | ||
947 | #define UVH_LOCAL_INT0_ENABLE_L1_NMI_INT_MASK 0x0000008000000000UL | ||
948 | #define UVH_LOCAL_INT0_ENABLE_STOP_CLOCK_SHFT 40 | ||
949 | #define UVH_LOCAL_INT0_ENABLE_STOP_CLOCK_MASK 0x0000010000000000UL | ||
950 | #define UVH_LOCAL_INT0_ENABLE_ASIC_TO_L1_SHFT 41 | ||
951 | #define UVH_LOCAL_INT0_ENABLE_ASIC_TO_L1_MASK 0x0000020000000000UL | ||
952 | #define UVH_LOCAL_INT0_ENABLE_L1_TO_ASIC_SHFT 42 | ||
953 | #define UVH_LOCAL_INT0_ENABLE_L1_TO_ASIC_MASK 0x0000040000000000UL | ||
954 | #define UVH_LOCAL_INT0_ENABLE_LTC_INT_SHFT 43 | ||
955 | #define UVH_LOCAL_INT0_ENABLE_LTC_INT_MASK 0x0000080000000000UL | ||
956 | #define UVH_LOCAL_INT0_ENABLE_LA_SEQ_TRIGGER_SHFT 44 | ||
957 | #define UVH_LOCAL_INT0_ENABLE_LA_SEQ_TRIGGER_MASK 0x0000100000000000UL | ||
958 | |||
959 | union uvh_local_int0_enable_u { | ||
960 | unsigned long v; | ||
961 | struct uvh_local_int0_enable_s { | ||
962 | unsigned long lb_hcerr : 1; /* RW */ | ||
963 | unsigned long gr0_hcerr : 1; /* RW */ | ||
964 | unsigned long gr1_hcerr : 1; /* RW */ | ||
965 | unsigned long lh_hcerr : 1; /* RW */ | ||
966 | unsigned long rh_hcerr : 1; /* RW */ | ||
967 | unsigned long xn_hcerr : 1; /* RW */ | ||
968 | unsigned long si_hcerr : 1; /* RW */ | ||
969 | unsigned long lb_aoerr0 : 1; /* RW */ | ||
970 | unsigned long gr0_aoerr0 : 1; /* RW */ | ||
971 | unsigned long gr1_aoerr0 : 1; /* RW */ | ||
972 | unsigned long lh_aoerr0 : 1; /* RW */ | ||
973 | unsigned long rh_aoerr0 : 1; /* RW */ | ||
974 | unsigned long xn_aoerr0 : 1; /* RW */ | ||
975 | unsigned long si_aoerr0 : 1; /* RW */ | ||
976 | unsigned long lb_aoerr1 : 1; /* RW */ | ||
977 | unsigned long gr0_aoerr1 : 1; /* RW */ | ||
978 | unsigned long gr1_aoerr1 : 1; /* RW */ | ||
979 | unsigned long lh_aoerr1 : 1; /* RW */ | ||
980 | unsigned long rh_aoerr1 : 1; /* RW */ | ||
981 | unsigned long xn_aoerr1 : 1; /* RW */ | ||
982 | unsigned long si_aoerr1 : 1; /* RW */ | ||
983 | unsigned long rh_vpi_int : 1; /* RW */ | ||
984 | unsigned long system_shutdown_int : 1; /* RW */ | ||
985 | unsigned long lb_irq_int_0 : 1; /* RW */ | ||
986 | unsigned long lb_irq_int_1 : 1; /* RW */ | ||
987 | unsigned long lb_irq_int_2 : 1; /* RW */ | ||
988 | unsigned long lb_irq_int_3 : 1; /* RW */ | ||
989 | unsigned long lb_irq_int_4 : 1; /* RW */ | ||
990 | unsigned long lb_irq_int_5 : 1; /* RW */ | ||
991 | unsigned long lb_irq_int_6 : 1; /* RW */ | ||
992 | unsigned long lb_irq_int_7 : 1; /* RW */ | ||
993 | unsigned long lb_irq_int_8 : 1; /* RW */ | ||
994 | unsigned long lb_irq_int_9 : 1; /* RW */ | ||
995 | unsigned long lb_irq_int_10 : 1; /* RW */ | ||
996 | unsigned long lb_irq_int_11 : 1; /* RW */ | ||
997 | unsigned long lb_irq_int_12 : 1; /* RW */ | ||
998 | unsigned long lb_irq_int_13 : 1; /* RW */ | ||
999 | unsigned long lb_irq_int_14 : 1; /* RW */ | ||
1000 | unsigned long lb_irq_int_15 : 1; /* RW */ | ||
1001 | unsigned long l1_nmi_int : 1; /* RW */ | ||
1002 | unsigned long stop_clock : 1; /* RW */ | ||
1003 | unsigned long asic_to_l1 : 1; /* RW */ | ||
1004 | unsigned long l1_to_asic : 1; /* RW */ | ||
1005 | unsigned long ltc_int : 1; /* RW */ | ||
1006 | unsigned long la_seq_trigger : 1; /* RW */ | ||
1007 | unsigned long rsvd_45_63 : 19; /* */ | ||
1008 | } s; | ||
1009 | }; | ||
1010 | |||
1011 | /* ========================================================================= */ | ||
1012 | /* UVH_NODE_ID */ | 757 | /* UVH_NODE_ID */ |
1013 | /* ========================================================================= */ | 758 | /* ========================================================================= */ |
1014 | #define UVH_NODE_ID 0x0UL | 759 | #define UVH_NODE_ID 0x0UL |
@@ -1112,26 +857,6 @@ union uvh_rh_gam_alias210_redirect_config_2_mmr_u { | |||
1112 | }; | 857 | }; |
1113 | 858 | ||
1114 | /* ========================================================================= */ | 859 | /* ========================================================================= */ |
1115 | /* UVH_RH_GAM_CFG_OVERLAY_CONFIG_MMR */ | ||
1116 | /* ========================================================================= */ | ||
1117 | #define UVH_RH_GAM_CFG_OVERLAY_CONFIG_MMR 0x1600020UL | ||
1118 | |||
1119 | #define UVH_RH_GAM_CFG_OVERLAY_CONFIG_MMR_BASE_SHFT 26 | ||
1120 | #define UVH_RH_GAM_CFG_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffffc000000UL | ||
1121 | #define UVH_RH_GAM_CFG_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63 | ||
1122 | #define UVH_RH_GAM_CFG_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL | ||
1123 | |||
1124 | union uvh_rh_gam_cfg_overlay_config_mmr_u { | ||
1125 | unsigned long v; | ||
1126 | struct uvh_rh_gam_cfg_overlay_config_mmr_s { | ||
1127 | unsigned long rsvd_0_25: 26; /* */ | ||
1128 | unsigned long base : 20; /* RW */ | ||
1129 | unsigned long rsvd_46_62: 17; /* */ | ||
1130 | unsigned long enable : 1; /* RW */ | ||
1131 | } s; | ||
1132 | }; | ||
1133 | |||
1134 | /* ========================================================================= */ | ||
1135 | /* UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR */ | 860 | /* UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR */ |
1136 | /* ========================================================================= */ | 861 | /* ========================================================================= */ |
1137 | #define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR 0x1600010UL | 862 | #define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR 0x1600010UL |
@@ -1263,101 +988,6 @@ union uvh_rtc1_int_config_u { | |||
1263 | }; | 988 | }; |
1264 | 989 | ||
1265 | /* ========================================================================= */ | 990 | /* ========================================================================= */ |
1266 | /* UVH_RTC2_INT_CONFIG */ | ||
1267 | /* ========================================================================= */ | ||
1268 | #define UVH_RTC2_INT_CONFIG 0x61600UL | ||
1269 | |||
1270 | #define UVH_RTC2_INT_CONFIG_VECTOR_SHFT 0 | ||
1271 | #define UVH_RTC2_INT_CONFIG_VECTOR_MASK 0x00000000000000ffUL | ||
1272 | #define UVH_RTC2_INT_CONFIG_DM_SHFT 8 | ||
1273 | #define UVH_RTC2_INT_CONFIG_DM_MASK 0x0000000000000700UL | ||
1274 | #define UVH_RTC2_INT_CONFIG_DESTMODE_SHFT 11 | ||
1275 | #define UVH_RTC2_INT_CONFIG_DESTMODE_MASK 0x0000000000000800UL | ||
1276 | #define UVH_RTC2_INT_CONFIG_STATUS_SHFT 12 | ||
1277 | #define UVH_RTC2_INT_CONFIG_STATUS_MASK 0x0000000000001000UL | ||
1278 | #define UVH_RTC2_INT_CONFIG_P_SHFT 13 | ||
1279 | #define UVH_RTC2_INT_CONFIG_P_MASK 0x0000000000002000UL | ||
1280 | #define UVH_RTC2_INT_CONFIG_T_SHFT 15 | ||
1281 | #define UVH_RTC2_INT_CONFIG_T_MASK 0x0000000000008000UL | ||
1282 | #define UVH_RTC2_INT_CONFIG_M_SHFT 16 | ||
1283 | #define UVH_RTC2_INT_CONFIG_M_MASK 0x0000000000010000UL | ||
1284 | #define UVH_RTC2_INT_CONFIG_APIC_ID_SHFT 32 | ||
1285 | #define UVH_RTC2_INT_CONFIG_APIC_ID_MASK 0xffffffff00000000UL | ||
1286 | |||
1287 | union uvh_rtc2_int_config_u { | ||
1288 | unsigned long v; | ||
1289 | struct uvh_rtc2_int_config_s { | ||
1290 | unsigned long vector_ : 8; /* RW */ | ||
1291 | unsigned long dm : 3; /* RW */ | ||
1292 | unsigned long destmode : 1; /* RW */ | ||
1293 | unsigned long status : 1; /* RO */ | ||
1294 | unsigned long p : 1; /* RO */ | ||
1295 | unsigned long rsvd_14 : 1; /* */ | ||
1296 | unsigned long t : 1; /* RO */ | ||
1297 | unsigned long m : 1; /* RW */ | ||
1298 | unsigned long rsvd_17_31: 15; /* */ | ||
1299 | unsigned long apic_id : 32; /* RW */ | ||
1300 | } s; | ||
1301 | }; | ||
1302 | |||
1303 | /* ========================================================================= */ | ||
1304 | /* UVH_RTC3_INT_CONFIG */ | ||
1305 | /* ========================================================================= */ | ||
1306 | #define UVH_RTC3_INT_CONFIG 0x61640UL | ||
1307 | |||
1308 | #define UVH_RTC3_INT_CONFIG_VECTOR_SHFT 0 | ||
1309 | #define UVH_RTC3_INT_CONFIG_VECTOR_MASK 0x00000000000000ffUL | ||
1310 | #define UVH_RTC3_INT_CONFIG_DM_SHFT 8 | ||
1311 | #define UVH_RTC3_INT_CONFIG_DM_MASK 0x0000000000000700UL | ||
1312 | #define UVH_RTC3_INT_CONFIG_DESTMODE_SHFT 11 | ||
1313 | #define UVH_RTC3_INT_CONFIG_DESTMODE_MASK 0x0000000000000800UL | ||
1314 | #define UVH_RTC3_INT_CONFIG_STATUS_SHFT 12 | ||
1315 | #define UVH_RTC3_INT_CONFIG_STATUS_MASK 0x0000000000001000UL | ||
1316 | #define UVH_RTC3_INT_CONFIG_P_SHFT 13 | ||
1317 | #define UVH_RTC3_INT_CONFIG_P_MASK 0x0000000000002000UL | ||
1318 | #define UVH_RTC3_INT_CONFIG_T_SHFT 15 | ||
1319 | #define UVH_RTC3_INT_CONFIG_T_MASK 0x0000000000008000UL | ||
1320 | #define UVH_RTC3_INT_CONFIG_M_SHFT 16 | ||
1321 | #define UVH_RTC3_INT_CONFIG_M_MASK 0x0000000000010000UL | ||
1322 | #define UVH_RTC3_INT_CONFIG_APIC_ID_SHFT 32 | ||
1323 | #define UVH_RTC3_INT_CONFIG_APIC_ID_MASK 0xffffffff00000000UL | ||
1324 | |||
1325 | union uvh_rtc3_int_config_u { | ||
1326 | unsigned long v; | ||
1327 | struct uvh_rtc3_int_config_s { | ||
1328 | unsigned long vector_ : 8; /* RW */ | ||
1329 | unsigned long dm : 3; /* RW */ | ||
1330 | unsigned long destmode : 1; /* RW */ | ||
1331 | unsigned long status : 1; /* RO */ | ||
1332 | unsigned long p : 1; /* RO */ | ||
1333 | unsigned long rsvd_14 : 1; /* */ | ||
1334 | unsigned long t : 1; /* RO */ | ||
1335 | unsigned long m : 1; /* RW */ | ||
1336 | unsigned long rsvd_17_31: 15; /* */ | ||
1337 | unsigned long apic_id : 32; /* RW */ | ||
1338 | } s; | ||
1339 | }; | ||
1340 | |||
1341 | /* ========================================================================= */ | ||
1342 | /* UVH_RTC_INC_RATIO */ | ||
1343 | /* ========================================================================= */ | ||
1344 | #define UVH_RTC_INC_RATIO 0x350000UL | ||
1345 | |||
1346 | #define UVH_RTC_INC_RATIO_FRACTION_SHFT 0 | ||
1347 | #define UVH_RTC_INC_RATIO_FRACTION_MASK 0x00000000000fffffUL | ||
1348 | #define UVH_RTC_INC_RATIO_RATIO_SHFT 20 | ||
1349 | #define UVH_RTC_INC_RATIO_RATIO_MASK 0x0000000000700000UL | ||
1350 | |||
1351 | union uvh_rtc_inc_ratio_u { | ||
1352 | unsigned long v; | ||
1353 | struct uvh_rtc_inc_ratio_s { | ||
1354 | unsigned long fraction : 20; /* RW */ | ||
1355 | unsigned long ratio : 3; /* RW */ | ||
1356 | unsigned long rsvd_23_63: 41; /* */ | ||
1357 | } s; | ||
1358 | }; | ||
1359 | |||
1360 | /* ========================================================================= */ | ||
1361 | /* UVH_SI_ADDR_MAP_CONFIG */ | 991 | /* UVH_SI_ADDR_MAP_CONFIG */ |
1362 | /* ========================================================================= */ | 992 | /* ========================================================================= */ |
1363 | #define UVH_SI_ADDR_MAP_CONFIG 0xc80000UL | 993 | #define UVH_SI_ADDR_MAP_CONFIG 0xc80000UL |
diff --git a/arch/x86/include/asm/vmware.h b/arch/x86/include/asm/vmware.h deleted file mode 100644 index e49ed6d2fd4e..000000000000 --- a/arch/x86/include/asm/vmware.h +++ /dev/null | |||
@@ -1,27 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2008, VMware, Inc. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License as published by | ||
6 | * the Free Software Foundation; either version 2 of the License, or | ||
7 | * (at your option) any later version. | ||
8 | * | ||
9 | * This program is distributed in the hope that it will be useful, but | ||
10 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or | ||
12 | * NON INFRINGEMENT. See the GNU General Public License for more | ||
13 | * details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU General Public License | ||
16 | * along with this program; if not, write to the Free Software | ||
17 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. | ||
18 | * | ||
19 | */ | ||
20 | #ifndef ASM_X86__VMWARE_H | ||
21 | #define ASM_X86__VMWARE_H | ||
22 | |||
23 | extern void vmware_platform_setup(void); | ||
24 | extern int vmware_platform(void); | ||
25 | extern void vmware_set_feature_bits(struct cpuinfo_x86 *c); | ||
26 | |||
27 | #endif | ||
diff --git a/arch/x86/include/asm/xsave.h b/arch/x86/include/asm/xsave.h index ddc04ccad03b..2c4390cae228 100644 --- a/arch/x86/include/asm/xsave.h +++ b/arch/x86/include/asm/xsave.h | |||
@@ -37,8 +37,9 @@ extern int check_for_xstate(struct i387_fxsave_struct __user *buf, | |||
37 | void __user *fpstate, | 37 | void __user *fpstate, |
38 | struct _fpx_sw_bytes *sw); | 38 | struct _fpx_sw_bytes *sw); |
39 | 39 | ||
40 | static inline int xrstor_checking(struct xsave_struct *fx) | 40 | static inline int fpu_xrstor_checking(struct fpu *fpu) |
41 | { | 41 | { |
42 | struct xsave_struct *fx = &fpu->state->xsave; | ||
42 | int err; | 43 | int err; |
43 | 44 | ||
44 | asm volatile("1: .byte " REX_PREFIX "0x0f,0xae,0x2f\n\t" | 45 | asm volatile("1: .byte " REX_PREFIX "0x0f,0xae,0x2f\n\t" |
@@ -110,12 +111,12 @@ static inline void xrstor_state(struct xsave_struct *fx, u64 mask) | |||
110 | : "memory"); | 111 | : "memory"); |
111 | } | 112 | } |
112 | 113 | ||
113 | static inline void xsave(struct task_struct *tsk) | 114 | static inline void fpu_xsave(struct fpu *fpu) |
114 | { | 115 | { |
115 | /* This, however, we can work around by forcing the compiler to select | 116 | /* This, however, we can work around by forcing the compiler to select |
116 | an addressing mode that doesn't require extended registers. */ | 117 | an addressing mode that doesn't require extended registers. */ |
117 | __asm__ __volatile__(".byte " REX_PREFIX "0x0f,0xae,0x27" | 118 | __asm__ __volatile__(".byte " REX_PREFIX "0x0f,0xae,0x27" |
118 | : : "D" (&(tsk->thread.xstate->xsave)), | 119 | : : "D" (&(fpu->state->xsave)), |
119 | "a" (-1), "d"(-1) : "memory"); | 120 | "a" (-1), "d"(-1) : "memory"); |
120 | } | 121 | } |
121 | #endif | 122 | #endif |
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 4c58352209e0..e77b22083721 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile | |||
@@ -47,8 +47,6 @@ obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o | |||
47 | obj-y += process.o | 47 | obj-y += process.o |
48 | obj-y += i387.o xsave.o | 48 | obj-y += i387.o xsave.o |
49 | obj-y += ptrace.o | 49 | obj-y += ptrace.o |
50 | obj-$(CONFIG_X86_DS) += ds.o | ||
51 | obj-$(CONFIG_X86_DS_SELFTEST) += ds_selftest.o | ||
52 | obj-$(CONFIG_X86_32) += tls.o | 50 | obj-$(CONFIG_X86_32) += tls.o |
53 | obj-$(CONFIG_IA32_EMULATION) += tls.o | 51 | obj-$(CONFIG_IA32_EMULATION) += tls.o |
54 | obj-y += step.o | 52 | obj-y += step.o |
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index cd40aba6aa95..9a5ed58f09dc 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c | |||
@@ -94,6 +94,53 @@ enum acpi_irq_model_id acpi_irq_model = ACPI_IRQ_MODEL_PIC; | |||
94 | 94 | ||
95 | 95 | ||
96 | /* | 96 | /* |
97 | * ISA irqs by default are the first 16 gsis but can be | ||
98 | * any gsi as specified by an interrupt source override. | ||
99 | */ | ||
100 | static u32 isa_irq_to_gsi[NR_IRQS_LEGACY] __read_mostly = { | ||
101 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 | ||
102 | }; | ||
103 | |||
104 | static unsigned int gsi_to_irq(unsigned int gsi) | ||
105 | { | ||
106 | unsigned int irq = gsi + NR_IRQS_LEGACY; | ||
107 | unsigned int i; | ||
108 | |||
109 | for (i = 0; i < NR_IRQS_LEGACY; i++) { | ||
110 | if (isa_irq_to_gsi[i] == gsi) { | ||
111 | return i; | ||
112 | } | ||
113 | } | ||
114 | |||
115 | /* Provide an identity mapping of gsi == irq | ||
116 | * except on truly weird platforms that have | ||
117 | * non isa irqs in the first 16 gsis. | ||
118 | */ | ||
119 | if (gsi >= NR_IRQS_LEGACY) | ||
120 | irq = gsi; | ||
121 | else | ||
122 | irq = gsi_end + 1 + gsi; | ||
123 | |||
124 | return irq; | ||
125 | } | ||
126 | |||
127 | static u32 irq_to_gsi(int irq) | ||
128 | { | ||
129 | unsigned int gsi; | ||
130 | |||
131 | if (irq < NR_IRQS_LEGACY) | ||
132 | gsi = isa_irq_to_gsi[irq]; | ||
133 | else if (irq <= gsi_end) | ||
134 | gsi = irq; | ||
135 | else if (irq <= (gsi_end + NR_IRQS_LEGACY)) | ||
136 | gsi = irq - gsi_end; | ||
137 | else | ||
138 | gsi = 0xffffffff; | ||
139 | |||
140 | return gsi; | ||
141 | } | ||
142 | |||
143 | /* | ||
97 | * Temporarily use the virtual area starting from FIX_IO_APIC_BASE_END, | 144 | * Temporarily use the virtual area starting from FIX_IO_APIC_BASE_END, |
98 | * to map the target physical address. The problem is that set_fixmap() | 145 | * to map the target physical address. The problem is that set_fixmap() |
99 | * provides a single page, and it is possible that the page is not | 146 | * provides a single page, and it is possible that the page is not |
@@ -313,7 +360,7 @@ acpi_parse_ioapic(struct acpi_subtable_header * header, const unsigned long end) | |||
313 | /* | 360 | /* |
314 | * Parse Interrupt Source Override for the ACPI SCI | 361 | * Parse Interrupt Source Override for the ACPI SCI |
315 | */ | 362 | */ |
316 | static void __init acpi_sci_ioapic_setup(u32 gsi, u16 polarity, u16 trigger) | 363 | static void __init acpi_sci_ioapic_setup(u8 bus_irq, u16 polarity, u16 trigger, u32 gsi) |
317 | { | 364 | { |
318 | if (trigger == 0) /* compatible SCI trigger is level */ | 365 | if (trigger == 0) /* compatible SCI trigger is level */ |
319 | trigger = 3; | 366 | trigger = 3; |
@@ -333,7 +380,7 @@ static void __init acpi_sci_ioapic_setup(u32 gsi, u16 polarity, u16 trigger) | |||
333 | * If GSI is < 16, this will update its flags, | 380 | * If GSI is < 16, this will update its flags, |
334 | * else it will create a new mp_irqs[] entry. | 381 | * else it will create a new mp_irqs[] entry. |
335 | */ | 382 | */ |
336 | mp_override_legacy_irq(gsi, polarity, trigger, gsi); | 383 | mp_override_legacy_irq(bus_irq, polarity, trigger, gsi); |
337 | 384 | ||
338 | /* | 385 | /* |
339 | * stash over-ride to indicate we've been here | 386 | * stash over-ride to indicate we've been here |
@@ -357,9 +404,10 @@ acpi_parse_int_src_ovr(struct acpi_subtable_header * header, | |||
357 | acpi_table_print_madt_entry(header); | 404 | acpi_table_print_madt_entry(header); |
358 | 405 | ||
359 | if (intsrc->source_irq == acpi_gbl_FADT.sci_interrupt) { | 406 | if (intsrc->source_irq == acpi_gbl_FADT.sci_interrupt) { |
360 | acpi_sci_ioapic_setup(intsrc->global_irq, | 407 | acpi_sci_ioapic_setup(intsrc->source_irq, |
361 | intsrc->inti_flags & ACPI_MADT_POLARITY_MASK, | 408 | intsrc->inti_flags & ACPI_MADT_POLARITY_MASK, |
362 | (intsrc->inti_flags & ACPI_MADT_TRIGGER_MASK) >> 2); | 409 | (intsrc->inti_flags & ACPI_MADT_TRIGGER_MASK) >> 2, |
410 | intsrc->global_irq); | ||
363 | return 0; | 411 | return 0; |
364 | } | 412 | } |
365 | 413 | ||
@@ -448,7 +496,7 @@ void __init acpi_pic_sci_set_trigger(unsigned int irq, u16 trigger) | |||
448 | 496 | ||
449 | int acpi_gsi_to_irq(u32 gsi, unsigned int *irq) | 497 | int acpi_gsi_to_irq(u32 gsi, unsigned int *irq) |
450 | { | 498 | { |
451 | *irq = gsi; | 499 | *irq = gsi_to_irq(gsi); |
452 | 500 | ||
453 | #ifdef CONFIG_X86_IO_APIC | 501 | #ifdef CONFIG_X86_IO_APIC |
454 | if (acpi_irq_model == ACPI_IRQ_MODEL_IOAPIC) | 502 | if (acpi_irq_model == ACPI_IRQ_MODEL_IOAPIC) |
@@ -458,6 +506,14 @@ int acpi_gsi_to_irq(u32 gsi, unsigned int *irq) | |||
458 | return 0; | 506 | return 0; |
459 | } | 507 | } |
460 | 508 | ||
509 | int acpi_isa_irq_to_gsi(unsigned isa_irq, u32 *gsi) | ||
510 | { | ||
511 | if (isa_irq >= 16) | ||
512 | return -1; | ||
513 | *gsi = irq_to_gsi(isa_irq); | ||
514 | return 0; | ||
515 | } | ||
516 | |||
461 | /* | 517 | /* |
462 | * success: return IRQ number (>=0) | 518 | * success: return IRQ number (>=0) |
463 | * failure: return < 0 | 519 | * failure: return < 0 |
@@ -482,7 +538,7 @@ int acpi_register_gsi(struct device *dev, u32 gsi, int trigger, int polarity) | |||
482 | plat_gsi = mp_register_gsi(dev, gsi, trigger, polarity); | 538 | plat_gsi = mp_register_gsi(dev, gsi, trigger, polarity); |
483 | } | 539 | } |
484 | #endif | 540 | #endif |
485 | irq = plat_gsi; | 541 | irq = gsi_to_irq(plat_gsi); |
486 | 542 | ||
487 | return irq; | 543 | return irq; |
488 | } | 544 | } |
@@ -867,29 +923,6 @@ static int __init acpi_parse_madt_lapic_entries(void) | |||
867 | extern int es7000_plat; | 923 | extern int es7000_plat; |
868 | #endif | 924 | #endif |
869 | 925 | ||
870 | int __init acpi_probe_gsi(void) | ||
871 | { | ||
872 | int idx; | ||
873 | int gsi; | ||
874 | int max_gsi = 0; | ||
875 | |||
876 | if (acpi_disabled) | ||
877 | return 0; | ||
878 | |||
879 | if (!acpi_ioapic) | ||
880 | return 0; | ||
881 | |||
882 | max_gsi = 0; | ||
883 | for (idx = 0; idx < nr_ioapics; idx++) { | ||
884 | gsi = mp_gsi_routing[idx].gsi_end; | ||
885 | |||
886 | if (gsi > max_gsi) | ||
887 | max_gsi = gsi; | ||
888 | } | ||
889 | |||
890 | return max_gsi + 1; | ||
891 | } | ||
892 | |||
893 | static void assign_to_mp_irq(struct mpc_intsrc *m, | 926 | static void assign_to_mp_irq(struct mpc_intsrc *m, |
894 | struct mpc_intsrc *mp_irq) | 927 | struct mpc_intsrc *mp_irq) |
895 | { | 928 | { |
@@ -947,13 +980,13 @@ void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi) | |||
947 | mp_irq.dstirq = pin; /* INTIN# */ | 980 | mp_irq.dstirq = pin; /* INTIN# */ |
948 | 981 | ||
949 | save_mp_irq(&mp_irq); | 982 | save_mp_irq(&mp_irq); |
983 | |||
984 | isa_irq_to_gsi[bus_irq] = gsi; | ||
950 | } | 985 | } |
951 | 986 | ||
952 | void __init mp_config_acpi_legacy_irqs(void) | 987 | void __init mp_config_acpi_legacy_irqs(void) |
953 | { | 988 | { |
954 | int i; | 989 | int i; |
955 | int ioapic; | ||
956 | unsigned int dstapic; | ||
957 | struct mpc_intsrc mp_irq; | 990 | struct mpc_intsrc mp_irq; |
958 | 991 | ||
959 | #if defined (CONFIG_MCA) || defined (CONFIG_EISA) | 992 | #if defined (CONFIG_MCA) || defined (CONFIG_EISA) |
@@ -974,19 +1007,27 @@ void __init mp_config_acpi_legacy_irqs(void) | |||
974 | #endif | 1007 | #endif |
975 | 1008 | ||
976 | /* | 1009 | /* |
977 | * Locate the IOAPIC that manages the ISA IRQs (0-15). | ||
978 | */ | ||
979 | ioapic = mp_find_ioapic(0); | ||
980 | if (ioapic < 0) | ||
981 | return; | ||
982 | dstapic = mp_ioapics[ioapic].apicid; | ||
983 | |||
984 | /* | ||
985 | * Use the default configuration for the IRQs 0-15. Unless | 1010 | * Use the default configuration for the IRQs 0-15. Unless |
986 | * overridden by (MADT) interrupt source override entries. | 1011 | * overridden by (MADT) interrupt source override entries. |
987 | */ | 1012 | */ |
988 | for (i = 0; i < 16; i++) { | 1013 | for (i = 0; i < 16; i++) { |
1014 | int ioapic, pin; | ||
1015 | unsigned int dstapic; | ||
989 | int idx; | 1016 | int idx; |
1017 | u32 gsi; | ||
1018 | |||
1019 | /* Locate the gsi that irq i maps to. */ | ||
1020 | if (acpi_isa_irq_to_gsi(i, &gsi)) | ||
1021 | continue; | ||
1022 | |||
1023 | /* | ||
1024 | * Locate the IOAPIC that manages the ISA IRQ. | ||
1025 | */ | ||
1026 | ioapic = mp_find_ioapic(gsi); | ||
1027 | if (ioapic < 0) | ||
1028 | continue; | ||
1029 | pin = mp_find_ioapic_pin(ioapic, gsi); | ||
1030 | dstapic = mp_ioapics[ioapic].apicid; | ||
990 | 1031 | ||
991 | for (idx = 0; idx < mp_irq_entries; idx++) { | 1032 | for (idx = 0; idx < mp_irq_entries; idx++) { |
992 | struct mpc_intsrc *irq = mp_irqs + idx; | 1033 | struct mpc_intsrc *irq = mp_irqs + idx; |
@@ -996,7 +1037,7 @@ void __init mp_config_acpi_legacy_irqs(void) | |||
996 | break; | 1037 | break; |
997 | 1038 | ||
998 | /* Do we already have a mapping for this IOAPIC pin */ | 1039 | /* Do we already have a mapping for this IOAPIC pin */ |
999 | if (irq->dstapic == dstapic && irq->dstirq == i) | 1040 | if (irq->dstapic == dstapic && irq->dstirq == pin) |
1000 | break; | 1041 | break; |
1001 | } | 1042 | } |
1002 | 1043 | ||
@@ -1011,7 +1052,7 @@ void __init mp_config_acpi_legacy_irqs(void) | |||
1011 | mp_irq.dstapic = dstapic; | 1052 | mp_irq.dstapic = dstapic; |
1012 | mp_irq.irqtype = mp_INT; | 1053 | mp_irq.irqtype = mp_INT; |
1013 | mp_irq.srcbusirq = i; /* Identity mapped */ | 1054 | mp_irq.srcbusirq = i; /* Identity mapped */ |
1014 | mp_irq.dstirq = i; | 1055 | mp_irq.dstirq = pin; |
1015 | 1056 | ||
1016 | save_mp_irq(&mp_irq); | 1057 | save_mp_irq(&mp_irq); |
1017 | } | 1058 | } |
@@ -1076,11 +1117,6 @@ int mp_register_gsi(struct device *dev, u32 gsi, int trigger, int polarity) | |||
1076 | 1117 | ||
1077 | ioapic_pin = mp_find_ioapic_pin(ioapic, gsi); | 1118 | ioapic_pin = mp_find_ioapic_pin(ioapic, gsi); |
1078 | 1119 | ||
1079 | #ifdef CONFIG_X86_32 | ||
1080 | if (ioapic_renumber_irq) | ||
1081 | gsi = ioapic_renumber_irq(ioapic, gsi); | ||
1082 | #endif | ||
1083 | |||
1084 | if (ioapic_pin > MP_MAX_IOAPIC_PIN) { | 1120 | if (ioapic_pin > MP_MAX_IOAPIC_PIN) { |
1085 | printk(KERN_ERR "Invalid reference to IOAPIC pin " | 1121 | printk(KERN_ERR "Invalid reference to IOAPIC pin " |
1086 | "%d-%d\n", mp_ioapics[ioapic].apicid, | 1122 | "%d-%d\n", mp_ioapics[ioapic].apicid, |
@@ -1094,7 +1130,7 @@ int mp_register_gsi(struct device *dev, u32 gsi, int trigger, int polarity) | |||
1094 | set_io_apic_irq_attr(&irq_attr, ioapic, ioapic_pin, | 1130 | set_io_apic_irq_attr(&irq_attr, ioapic, ioapic_pin, |
1095 | trigger == ACPI_EDGE_SENSITIVE ? 0 : 1, | 1131 | trigger == ACPI_EDGE_SENSITIVE ? 0 : 1, |
1096 | polarity == ACPI_ACTIVE_HIGH ? 0 : 1); | 1132 | polarity == ACPI_ACTIVE_HIGH ? 0 : 1); |
1097 | io_apic_set_pci_routing(dev, gsi, &irq_attr); | 1133 | io_apic_set_pci_routing(dev, gsi_to_irq(gsi), &irq_attr); |
1098 | 1134 | ||
1099 | return gsi; | 1135 | return gsi; |
1100 | } | 1136 | } |
@@ -1154,7 +1190,8 @@ static int __init acpi_parse_madt_ioapic_entries(void) | |||
1154 | * pretend we got one so we can set the SCI flags. | 1190 | * pretend we got one so we can set the SCI flags. |
1155 | */ | 1191 | */ |
1156 | if (!acpi_sci_override_gsi) | 1192 | if (!acpi_sci_override_gsi) |
1157 | acpi_sci_ioapic_setup(acpi_gbl_FADT.sci_interrupt, 0, 0); | 1193 | acpi_sci_ioapic_setup(acpi_gbl_FADT.sci_interrupt, 0, 0, |
1194 | acpi_gbl_FADT.sci_interrupt); | ||
1158 | 1195 | ||
1159 | /* Fill in identity legacy mappings where no override */ | 1196 | /* Fill in identity legacy mappings where no override */ |
1160 | mp_config_acpi_legacy_irqs(); | 1197 | mp_config_acpi_legacy_irqs(); |
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 1a160d5d44d0..70237732a6c7 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c | |||
@@ -194,7 +194,7 @@ static void __init_or_module add_nops(void *insns, unsigned int len) | |||
194 | } | 194 | } |
195 | 195 | ||
196 | extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; | 196 | extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; |
197 | extern u8 *__smp_locks[], *__smp_locks_end[]; | 197 | extern s32 __smp_locks[], __smp_locks_end[]; |
198 | static void *text_poke_early(void *addr, const void *opcode, size_t len); | 198 | static void *text_poke_early(void *addr, const void *opcode, size_t len); |
199 | 199 | ||
200 | /* Replace instructions with better alternatives for this CPU type. | 200 | /* Replace instructions with better alternatives for this CPU type. |
@@ -235,37 +235,41 @@ void __init_or_module apply_alternatives(struct alt_instr *start, | |||
235 | 235 | ||
236 | #ifdef CONFIG_SMP | 236 | #ifdef CONFIG_SMP |
237 | 237 | ||
238 | static void alternatives_smp_lock(u8 **start, u8 **end, u8 *text, u8 *text_end) | 238 | static void alternatives_smp_lock(const s32 *start, const s32 *end, |
239 | u8 *text, u8 *text_end) | ||
239 | { | 240 | { |
240 | u8 **ptr; | 241 | const s32 *poff; |
241 | 242 | ||
242 | mutex_lock(&text_mutex); | 243 | mutex_lock(&text_mutex); |
243 | for (ptr = start; ptr < end; ptr++) { | 244 | for (poff = start; poff < end; poff++) { |
244 | if (*ptr < text) | 245 | u8 *ptr = (u8 *)poff + *poff; |
245 | continue; | 246 | |
246 | if (*ptr > text_end) | 247 | if (!*poff || ptr < text || ptr >= text_end) |
247 | continue; | 248 | continue; |
248 | /* turn DS segment override prefix into lock prefix */ | 249 | /* turn DS segment override prefix into lock prefix */ |
249 | text_poke(*ptr, ((unsigned char []){0xf0}), 1); | 250 | if (*ptr == 0x3e) |
251 | text_poke(ptr, ((unsigned char []){0xf0}), 1); | ||
250 | }; | 252 | }; |
251 | mutex_unlock(&text_mutex); | 253 | mutex_unlock(&text_mutex); |
252 | } | 254 | } |
253 | 255 | ||
254 | static void alternatives_smp_unlock(u8 **start, u8 **end, u8 *text, u8 *text_end) | 256 | static void alternatives_smp_unlock(const s32 *start, const s32 *end, |
257 | u8 *text, u8 *text_end) | ||
255 | { | 258 | { |
256 | u8 **ptr; | 259 | const s32 *poff; |
257 | 260 | ||
258 | if (noreplace_smp) | 261 | if (noreplace_smp) |
259 | return; | 262 | return; |
260 | 263 | ||
261 | mutex_lock(&text_mutex); | 264 | mutex_lock(&text_mutex); |
262 | for (ptr = start; ptr < end; ptr++) { | 265 | for (poff = start; poff < end; poff++) { |
263 | if (*ptr < text) | 266 | u8 *ptr = (u8 *)poff + *poff; |
264 | continue; | 267 | |
265 | if (*ptr > text_end) | 268 | if (!*poff || ptr < text || ptr >= text_end) |
266 | continue; | 269 | continue; |
267 | /* turn lock prefix into DS segment override prefix */ | 270 | /* turn lock prefix into DS segment override prefix */ |
268 | text_poke(*ptr, ((unsigned char []){0x3E}), 1); | 271 | if (*ptr == 0xf0) |
272 | text_poke(ptr, ((unsigned char []){0x3E}), 1); | ||
269 | }; | 273 | }; |
270 | mutex_unlock(&text_mutex); | 274 | mutex_unlock(&text_mutex); |
271 | } | 275 | } |
@@ -276,8 +280,8 @@ struct smp_alt_module { | |||
276 | char *name; | 280 | char *name; |
277 | 281 | ||
278 | /* ptrs to lock prefixes */ | 282 | /* ptrs to lock prefixes */ |
279 | u8 **locks; | 283 | const s32 *locks; |
280 | u8 **locks_end; | 284 | const s32 *locks_end; |
281 | 285 | ||
282 | /* .text segment, needed to avoid patching init code ;) */ | 286 | /* .text segment, needed to avoid patching init code ;) */ |
283 | u8 *text; | 287 | u8 *text; |
@@ -398,16 +402,19 @@ void alternatives_smp_switch(int smp) | |||
398 | int alternatives_text_reserved(void *start, void *end) | 402 | int alternatives_text_reserved(void *start, void *end) |
399 | { | 403 | { |
400 | struct smp_alt_module *mod; | 404 | struct smp_alt_module *mod; |
401 | u8 **ptr; | 405 | const s32 *poff; |
402 | u8 *text_start = start; | 406 | u8 *text_start = start; |
403 | u8 *text_end = end; | 407 | u8 *text_end = end; |
404 | 408 | ||
405 | list_for_each_entry(mod, &smp_alt_modules, next) { | 409 | list_for_each_entry(mod, &smp_alt_modules, next) { |
406 | if (mod->text > text_end || mod->text_end < text_start) | 410 | if (mod->text > text_end || mod->text_end < text_start) |
407 | continue; | 411 | continue; |
408 | for (ptr = mod->locks; ptr < mod->locks_end; ptr++) | 412 | for (poff = mod->locks; poff < mod->locks_end; poff++) { |
409 | if (text_start <= *ptr && text_end >= *ptr) | 413 | const u8 *ptr = (const u8 *)poff + *poff; |
414 | |||
415 | if (text_start <= ptr && text_end > ptr) | ||
410 | return 1; | 416 | return 1; |
417 | } | ||
411 | } | 418 | } |
412 | 419 | ||
413 | return 0; | 420 | return 0; |
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index f854d89b7edf..fa5a1474cd18 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c | |||
@@ -731,18 +731,22 @@ static bool increase_address_space(struct protection_domain *domain, | |||
731 | 731 | ||
732 | static u64 *alloc_pte(struct protection_domain *domain, | 732 | static u64 *alloc_pte(struct protection_domain *domain, |
733 | unsigned long address, | 733 | unsigned long address, |
734 | int end_lvl, | 734 | unsigned long page_size, |
735 | u64 **pte_page, | 735 | u64 **pte_page, |
736 | gfp_t gfp) | 736 | gfp_t gfp) |
737 | { | 737 | { |
738 | int level, end_lvl; | ||
738 | u64 *pte, *page; | 739 | u64 *pte, *page; |
739 | int level; | 740 | |
741 | BUG_ON(!is_power_of_2(page_size)); | ||
740 | 742 | ||
741 | while (address > PM_LEVEL_SIZE(domain->mode)) | 743 | while (address > PM_LEVEL_SIZE(domain->mode)) |
742 | increase_address_space(domain, gfp); | 744 | increase_address_space(domain, gfp); |
743 | 745 | ||
744 | level = domain->mode - 1; | 746 | level = domain->mode - 1; |
745 | pte = &domain->pt_root[PM_LEVEL_INDEX(level, address)]; | 747 | pte = &domain->pt_root[PM_LEVEL_INDEX(level, address)]; |
748 | address = PAGE_SIZE_ALIGN(address, page_size); | ||
749 | end_lvl = PAGE_SIZE_LEVEL(page_size); | ||
746 | 750 | ||
747 | while (level > end_lvl) { | 751 | while (level > end_lvl) { |
748 | if (!IOMMU_PTE_PRESENT(*pte)) { | 752 | if (!IOMMU_PTE_PRESENT(*pte)) { |
@@ -752,6 +756,10 @@ static u64 *alloc_pte(struct protection_domain *domain, | |||
752 | *pte = PM_LEVEL_PDE(level, virt_to_phys(page)); | 756 | *pte = PM_LEVEL_PDE(level, virt_to_phys(page)); |
753 | } | 757 | } |
754 | 758 | ||
759 | /* No level skipping support yet */ | ||
760 | if (PM_PTE_LEVEL(*pte) != level) | ||
761 | return NULL; | ||
762 | |||
755 | level -= 1; | 763 | level -= 1; |
756 | 764 | ||
757 | pte = IOMMU_PTE_PAGE(*pte); | 765 | pte = IOMMU_PTE_PAGE(*pte); |
@@ -769,28 +777,47 @@ static u64 *alloc_pte(struct protection_domain *domain, | |||
769 | * This function checks if there is a PTE for a given dma address. If | 777 | * This function checks if there is a PTE for a given dma address. If |
770 | * there is one, it returns the pointer to it. | 778 | * there is one, it returns the pointer to it. |
771 | */ | 779 | */ |
772 | static u64 *fetch_pte(struct protection_domain *domain, | 780 | static u64 *fetch_pte(struct protection_domain *domain, unsigned long address) |
773 | unsigned long address, int map_size) | ||
774 | { | 781 | { |
775 | int level; | 782 | int level; |
776 | u64 *pte; | 783 | u64 *pte; |
777 | 784 | ||
778 | level = domain->mode - 1; | 785 | if (address > PM_LEVEL_SIZE(domain->mode)) |
779 | pte = &domain->pt_root[PM_LEVEL_INDEX(level, address)]; | 786 | return NULL; |
787 | |||
788 | level = domain->mode - 1; | ||
789 | pte = &domain->pt_root[PM_LEVEL_INDEX(level, address)]; | ||
780 | 790 | ||
781 | while (level > map_size) { | 791 | while (level > 0) { |
792 | |||
793 | /* Not Present */ | ||
782 | if (!IOMMU_PTE_PRESENT(*pte)) | 794 | if (!IOMMU_PTE_PRESENT(*pte)) |
783 | return NULL; | 795 | return NULL; |
784 | 796 | ||
797 | /* Large PTE */ | ||
798 | if (PM_PTE_LEVEL(*pte) == 0x07) { | ||
799 | unsigned long pte_mask, __pte; | ||
800 | |||
801 | /* | ||
802 | * If we have a series of large PTEs, make | ||
803 | * sure to return a pointer to the first one. | ||
804 | */ | ||
805 | pte_mask = PTE_PAGE_SIZE(*pte); | ||
806 | pte_mask = ~((PAGE_SIZE_PTE_COUNT(pte_mask) << 3) - 1); | ||
807 | __pte = ((unsigned long)pte) & pte_mask; | ||
808 | |||
809 | return (u64 *)__pte; | ||
810 | } | ||
811 | |||
812 | /* No level skipping support yet */ | ||
813 | if (PM_PTE_LEVEL(*pte) != level) | ||
814 | return NULL; | ||
815 | |||
785 | level -= 1; | 816 | level -= 1; |
786 | 817 | ||
818 | /* Walk to the next level */ | ||
787 | pte = IOMMU_PTE_PAGE(*pte); | 819 | pte = IOMMU_PTE_PAGE(*pte); |
788 | pte = &pte[PM_LEVEL_INDEX(level, address)]; | 820 | pte = &pte[PM_LEVEL_INDEX(level, address)]; |
789 | |||
790 | if ((PM_PTE_LEVEL(*pte) == 0) && level != map_size) { | ||
791 | pte = NULL; | ||
792 | break; | ||
793 | } | ||
794 | } | 821 | } |
795 | 822 | ||
796 | return pte; | 823 | return pte; |
@@ -807,44 +834,84 @@ static int iommu_map_page(struct protection_domain *dom, | |||
807 | unsigned long bus_addr, | 834 | unsigned long bus_addr, |
808 | unsigned long phys_addr, | 835 | unsigned long phys_addr, |
809 | int prot, | 836 | int prot, |
810 | int map_size) | 837 | unsigned long page_size) |
811 | { | 838 | { |
812 | u64 __pte, *pte; | 839 | u64 __pte, *pte; |
813 | 840 | int i, count; | |
814 | bus_addr = PAGE_ALIGN(bus_addr); | ||
815 | phys_addr = PAGE_ALIGN(phys_addr); | ||
816 | |||
817 | BUG_ON(!PM_ALIGNED(map_size, bus_addr)); | ||
818 | BUG_ON(!PM_ALIGNED(map_size, phys_addr)); | ||
819 | 841 | ||
820 | if (!(prot & IOMMU_PROT_MASK)) | 842 | if (!(prot & IOMMU_PROT_MASK)) |
821 | return -EINVAL; | 843 | return -EINVAL; |
822 | 844 | ||
823 | pte = alloc_pte(dom, bus_addr, map_size, NULL, GFP_KERNEL); | 845 | bus_addr = PAGE_ALIGN(bus_addr); |
846 | phys_addr = PAGE_ALIGN(phys_addr); | ||
847 | count = PAGE_SIZE_PTE_COUNT(page_size); | ||
848 | pte = alloc_pte(dom, bus_addr, page_size, NULL, GFP_KERNEL); | ||
849 | |||
850 | for (i = 0; i < count; ++i) | ||
851 | if (IOMMU_PTE_PRESENT(pte[i])) | ||
852 | return -EBUSY; | ||
824 | 853 | ||
825 | if (IOMMU_PTE_PRESENT(*pte)) | 854 | if (page_size > PAGE_SIZE) { |
826 | return -EBUSY; | 855 | __pte = PAGE_SIZE_PTE(phys_addr, page_size); |
856 | __pte |= PM_LEVEL_ENC(7) | IOMMU_PTE_P | IOMMU_PTE_FC; | ||
857 | } else | ||
858 | __pte = phys_addr | IOMMU_PTE_P | IOMMU_PTE_FC; | ||
827 | 859 | ||
828 | __pte = phys_addr | IOMMU_PTE_P; | ||
829 | if (prot & IOMMU_PROT_IR) | 860 | if (prot & IOMMU_PROT_IR) |
830 | __pte |= IOMMU_PTE_IR; | 861 | __pte |= IOMMU_PTE_IR; |
831 | if (prot & IOMMU_PROT_IW) | 862 | if (prot & IOMMU_PROT_IW) |
832 | __pte |= IOMMU_PTE_IW; | 863 | __pte |= IOMMU_PTE_IW; |
833 | 864 | ||
834 | *pte = __pte; | 865 | for (i = 0; i < count; ++i) |
866 | pte[i] = __pte; | ||
835 | 867 | ||
836 | update_domain(dom); | 868 | update_domain(dom); |
837 | 869 | ||
838 | return 0; | 870 | return 0; |
839 | } | 871 | } |
840 | 872 | ||
841 | static void iommu_unmap_page(struct protection_domain *dom, | 873 | static unsigned long iommu_unmap_page(struct protection_domain *dom, |
842 | unsigned long bus_addr, int map_size) | 874 | unsigned long bus_addr, |
875 | unsigned long page_size) | ||
843 | { | 876 | { |
844 | u64 *pte = fetch_pte(dom, bus_addr, map_size); | 877 | unsigned long long unmap_size, unmapped; |
878 | u64 *pte; | ||
879 | |||
880 | BUG_ON(!is_power_of_2(page_size)); | ||
881 | |||
882 | unmapped = 0; | ||
845 | 883 | ||
846 | if (pte) | 884 | while (unmapped < page_size) { |
847 | *pte = 0; | 885 | |
886 | pte = fetch_pte(dom, bus_addr); | ||
887 | |||
888 | if (!pte) { | ||
889 | /* | ||
890 | * No PTE for this address | ||
891 | * move forward in 4kb steps | ||
892 | */ | ||
893 | unmap_size = PAGE_SIZE; | ||
894 | } else if (PM_PTE_LEVEL(*pte) == 0) { | ||
895 | /* 4kb PTE found for this address */ | ||
896 | unmap_size = PAGE_SIZE; | ||
897 | *pte = 0ULL; | ||
898 | } else { | ||
899 | int count, i; | ||
900 | |||
901 | /* Large PTE found which maps this address */ | ||
902 | unmap_size = PTE_PAGE_SIZE(*pte); | ||
903 | count = PAGE_SIZE_PTE_COUNT(unmap_size); | ||
904 | for (i = 0; i < count; i++) | ||
905 | pte[i] = 0ULL; | ||
906 | } | ||
907 | |||
908 | bus_addr = (bus_addr & ~(unmap_size - 1)) + unmap_size; | ||
909 | unmapped += unmap_size; | ||
910 | } | ||
911 | |||
912 | BUG_ON(!is_power_of_2(unmapped)); | ||
913 | |||
914 | return unmapped; | ||
848 | } | 915 | } |
849 | 916 | ||
850 | /* | 917 | /* |
@@ -878,7 +945,7 @@ static int dma_ops_unity_map(struct dma_ops_domain *dma_dom, | |||
878 | for (addr = e->address_start; addr < e->address_end; | 945 | for (addr = e->address_start; addr < e->address_end; |
879 | addr += PAGE_SIZE) { | 946 | addr += PAGE_SIZE) { |
880 | ret = iommu_map_page(&dma_dom->domain, addr, addr, e->prot, | 947 | ret = iommu_map_page(&dma_dom->domain, addr, addr, e->prot, |
881 | PM_MAP_4k); | 948 | PAGE_SIZE); |
882 | if (ret) | 949 | if (ret) |
883 | return ret; | 950 | return ret; |
884 | /* | 951 | /* |
@@ -1006,7 +1073,7 @@ static int alloc_new_range(struct dma_ops_domain *dma_dom, | |||
1006 | u64 *pte, *pte_page; | 1073 | u64 *pte, *pte_page; |
1007 | 1074 | ||
1008 | for (i = 0; i < num_ptes; ++i) { | 1075 | for (i = 0; i < num_ptes; ++i) { |
1009 | pte = alloc_pte(&dma_dom->domain, address, PM_MAP_4k, | 1076 | pte = alloc_pte(&dma_dom->domain, address, PAGE_SIZE, |
1010 | &pte_page, gfp); | 1077 | &pte_page, gfp); |
1011 | if (!pte) | 1078 | if (!pte) |
1012 | goto out_free; | 1079 | goto out_free; |
@@ -1042,7 +1109,7 @@ static int alloc_new_range(struct dma_ops_domain *dma_dom, | |||
1042 | for (i = dma_dom->aperture[index]->offset; | 1109 | for (i = dma_dom->aperture[index]->offset; |
1043 | i < dma_dom->aperture_size; | 1110 | i < dma_dom->aperture_size; |
1044 | i += PAGE_SIZE) { | 1111 | i += PAGE_SIZE) { |
1045 | u64 *pte = fetch_pte(&dma_dom->domain, i, PM_MAP_4k); | 1112 | u64 *pte = fetch_pte(&dma_dom->domain, i); |
1046 | if (!pte || !IOMMU_PTE_PRESENT(*pte)) | 1113 | if (!pte || !IOMMU_PTE_PRESENT(*pte)) |
1047 | continue; | 1114 | continue; |
1048 | 1115 | ||
@@ -1712,7 +1779,7 @@ static u64* dma_ops_get_pte(struct dma_ops_domain *dom, | |||
1712 | 1779 | ||
1713 | pte = aperture->pte_pages[APERTURE_PAGE_INDEX(address)]; | 1780 | pte = aperture->pte_pages[APERTURE_PAGE_INDEX(address)]; |
1714 | if (!pte) { | 1781 | if (!pte) { |
1715 | pte = alloc_pte(&dom->domain, address, PM_MAP_4k, &pte_page, | 1782 | pte = alloc_pte(&dom->domain, address, PAGE_SIZE, &pte_page, |
1716 | GFP_ATOMIC); | 1783 | GFP_ATOMIC); |
1717 | aperture->pte_pages[APERTURE_PAGE_INDEX(address)] = pte_page; | 1784 | aperture->pte_pages[APERTURE_PAGE_INDEX(address)] = pte_page; |
1718 | } else | 1785 | } else |
@@ -2439,12 +2506,11 @@ static int amd_iommu_attach_device(struct iommu_domain *dom, | |||
2439 | return ret; | 2506 | return ret; |
2440 | } | 2507 | } |
2441 | 2508 | ||
2442 | static int amd_iommu_map_range(struct iommu_domain *dom, | 2509 | static int amd_iommu_map(struct iommu_domain *dom, unsigned long iova, |
2443 | unsigned long iova, phys_addr_t paddr, | 2510 | phys_addr_t paddr, int gfp_order, int iommu_prot) |
2444 | size_t size, int iommu_prot) | ||
2445 | { | 2511 | { |
2512 | unsigned long page_size = 0x1000UL << gfp_order; | ||
2446 | struct protection_domain *domain = dom->priv; | 2513 | struct protection_domain *domain = dom->priv; |
2447 | unsigned long i, npages = iommu_num_pages(paddr, size, PAGE_SIZE); | ||
2448 | int prot = 0; | 2514 | int prot = 0; |
2449 | int ret; | 2515 | int ret; |
2450 | 2516 | ||
@@ -2453,61 +2519,50 @@ static int amd_iommu_map_range(struct iommu_domain *dom, | |||
2453 | if (iommu_prot & IOMMU_WRITE) | 2519 | if (iommu_prot & IOMMU_WRITE) |
2454 | prot |= IOMMU_PROT_IW; | 2520 | prot |= IOMMU_PROT_IW; |
2455 | 2521 | ||
2456 | iova &= PAGE_MASK; | ||
2457 | paddr &= PAGE_MASK; | ||
2458 | |||
2459 | mutex_lock(&domain->api_lock); | 2522 | mutex_lock(&domain->api_lock); |
2460 | 2523 | ret = iommu_map_page(domain, iova, paddr, prot, page_size); | |
2461 | for (i = 0; i < npages; ++i) { | ||
2462 | ret = iommu_map_page(domain, iova, paddr, prot, PM_MAP_4k); | ||
2463 | if (ret) | ||
2464 | return ret; | ||
2465 | |||
2466 | iova += PAGE_SIZE; | ||
2467 | paddr += PAGE_SIZE; | ||
2468 | } | ||
2469 | |||
2470 | mutex_unlock(&domain->api_lock); | 2524 | mutex_unlock(&domain->api_lock); |
2471 | 2525 | ||
2472 | return 0; | 2526 | return ret; |
2473 | } | 2527 | } |
2474 | 2528 | ||
2475 | static void amd_iommu_unmap_range(struct iommu_domain *dom, | 2529 | static int amd_iommu_unmap(struct iommu_domain *dom, unsigned long iova, |
2476 | unsigned long iova, size_t size) | 2530 | int gfp_order) |
2477 | { | 2531 | { |
2478 | |||
2479 | struct protection_domain *domain = dom->priv; | 2532 | struct protection_domain *domain = dom->priv; |
2480 | unsigned long i, npages = iommu_num_pages(iova, size, PAGE_SIZE); | 2533 | unsigned long page_size, unmap_size; |
2481 | 2534 | ||
2482 | iova &= PAGE_MASK; | 2535 | page_size = 0x1000UL << gfp_order; |
2483 | 2536 | ||
2484 | mutex_lock(&domain->api_lock); | 2537 | mutex_lock(&domain->api_lock); |
2485 | 2538 | unmap_size = iommu_unmap_page(domain, iova, page_size); | |
2486 | for (i = 0; i < npages; ++i) { | 2539 | mutex_unlock(&domain->api_lock); |
2487 | iommu_unmap_page(domain, iova, PM_MAP_4k); | ||
2488 | iova += PAGE_SIZE; | ||
2489 | } | ||
2490 | 2540 | ||
2491 | iommu_flush_tlb_pde(domain); | 2541 | iommu_flush_tlb_pde(domain); |
2492 | 2542 | ||
2493 | mutex_unlock(&domain->api_lock); | 2543 | return get_order(unmap_size); |
2494 | } | 2544 | } |
2495 | 2545 | ||
2496 | static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom, | 2546 | static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom, |
2497 | unsigned long iova) | 2547 | unsigned long iova) |
2498 | { | 2548 | { |
2499 | struct protection_domain *domain = dom->priv; | 2549 | struct protection_domain *domain = dom->priv; |
2500 | unsigned long offset = iova & ~PAGE_MASK; | 2550 | unsigned long offset_mask; |
2501 | phys_addr_t paddr; | 2551 | phys_addr_t paddr; |
2502 | u64 *pte; | 2552 | u64 *pte, __pte; |
2503 | 2553 | ||
2504 | pte = fetch_pte(domain, iova, PM_MAP_4k); | 2554 | pte = fetch_pte(domain, iova); |
2505 | 2555 | ||
2506 | if (!pte || !IOMMU_PTE_PRESENT(*pte)) | 2556 | if (!pte || !IOMMU_PTE_PRESENT(*pte)) |
2507 | return 0; | 2557 | return 0; |
2508 | 2558 | ||
2509 | paddr = *pte & IOMMU_PAGE_MASK; | 2559 | if (PM_PTE_LEVEL(*pte) == 0) |
2510 | paddr |= offset; | 2560 | offset_mask = PAGE_SIZE - 1; |
2561 | else | ||
2562 | offset_mask = PTE_PAGE_SIZE(*pte) - 1; | ||
2563 | |||
2564 | __pte = *pte & PM_ADDR_MASK; | ||
2565 | paddr = (__pte & ~offset_mask) | (iova & offset_mask); | ||
2511 | 2566 | ||
2512 | return paddr; | 2567 | return paddr; |
2513 | } | 2568 | } |
@@ -2523,8 +2578,8 @@ static struct iommu_ops amd_iommu_ops = { | |||
2523 | .domain_destroy = amd_iommu_domain_destroy, | 2578 | .domain_destroy = amd_iommu_domain_destroy, |
2524 | .attach_dev = amd_iommu_attach_device, | 2579 | .attach_dev = amd_iommu_attach_device, |
2525 | .detach_dev = amd_iommu_detach_device, | 2580 | .detach_dev = amd_iommu_detach_device, |
2526 | .map = amd_iommu_map_range, | 2581 | .map = amd_iommu_map, |
2527 | .unmap = amd_iommu_unmap_range, | 2582 | .unmap = amd_iommu_unmap, |
2528 | .iova_to_phys = amd_iommu_iova_to_phys, | 2583 | .iova_to_phys = amd_iommu_iova_to_phys, |
2529 | .domain_has_cap = amd_iommu_domain_has_cap, | 2584 | .domain_has_cap = amd_iommu_domain_has_cap, |
2530 | }; | 2585 | }; |
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index 6360abf993d4..3bacb4d0844c 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c | |||
@@ -120,6 +120,7 @@ struct ivmd_header { | |||
120 | bool amd_iommu_dump; | 120 | bool amd_iommu_dump; |
121 | 121 | ||
122 | static int __initdata amd_iommu_detected; | 122 | static int __initdata amd_iommu_detected; |
123 | static bool __initdata amd_iommu_disabled; | ||
123 | 124 | ||
124 | u16 amd_iommu_last_bdf; /* largest PCI device id we have | 125 | u16 amd_iommu_last_bdf; /* largest PCI device id we have |
125 | to handle */ | 126 | to handle */ |
@@ -1372,6 +1373,9 @@ void __init amd_iommu_detect(void) | |||
1372 | if (no_iommu || (iommu_detected && !gart_iommu_aperture)) | 1373 | if (no_iommu || (iommu_detected && !gart_iommu_aperture)) |
1373 | return; | 1374 | return; |
1374 | 1375 | ||
1376 | if (amd_iommu_disabled) | ||
1377 | return; | ||
1378 | |||
1375 | if (acpi_table_parse("IVRS", early_amd_iommu_detect) == 0) { | 1379 | if (acpi_table_parse("IVRS", early_amd_iommu_detect) == 0) { |
1376 | iommu_detected = 1; | 1380 | iommu_detected = 1; |
1377 | amd_iommu_detected = 1; | 1381 | amd_iommu_detected = 1; |
@@ -1401,6 +1405,8 @@ static int __init parse_amd_iommu_options(char *str) | |||
1401 | for (; *str; ++str) { | 1405 | for (; *str; ++str) { |
1402 | if (strncmp(str, "fullflush", 9) == 0) | 1406 | if (strncmp(str, "fullflush", 9) == 0) |
1403 | amd_iommu_unmap_flush = true; | 1407 | amd_iommu_unmap_flush = true; |
1408 | if (strncmp(str, "off", 3) == 0) | ||
1409 | amd_iommu_disabled = true; | ||
1404 | } | 1410 | } |
1405 | 1411 | ||
1406 | return 1; | 1412 | return 1; |
diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c index 03ba1b895f5e..425e53a87feb 100644 --- a/arch/x86/kernel/apic/es7000_32.c +++ b/arch/x86/kernel/apic/es7000_32.c | |||
@@ -131,24 +131,6 @@ int es7000_plat; | |||
131 | 131 | ||
132 | static unsigned int base; | 132 | static unsigned int base; |
133 | 133 | ||
134 | static int | ||
135 | es7000_rename_gsi(int ioapic, int gsi) | ||
136 | { | ||
137 | if (es7000_plat == ES7000_ZORRO) | ||
138 | return gsi; | ||
139 | |||
140 | if (!base) { | ||
141 | int i; | ||
142 | for (i = 0; i < nr_ioapics; i++) | ||
143 | base += nr_ioapic_registers[i]; | ||
144 | } | ||
145 | |||
146 | if (!ioapic && (gsi < 16)) | ||
147 | gsi += base; | ||
148 | |||
149 | return gsi; | ||
150 | } | ||
151 | |||
152 | static int __cpuinit wakeup_secondary_cpu_via_mip(int cpu, unsigned long eip) | 134 | static int __cpuinit wakeup_secondary_cpu_via_mip(int cpu, unsigned long eip) |
153 | { | 135 | { |
154 | unsigned long vect = 0, psaival = 0; | 136 | unsigned long vect = 0, psaival = 0; |
@@ -190,7 +172,6 @@ static void setup_unisys(void) | |||
190 | es7000_plat = ES7000_ZORRO; | 172 | es7000_plat = ES7000_ZORRO; |
191 | else | 173 | else |
192 | es7000_plat = ES7000_CLASSIC; | 174 | es7000_plat = ES7000_CLASSIC; |
193 | ioapic_renumber_irq = es7000_rename_gsi; | ||
194 | } | 175 | } |
195 | 176 | ||
196 | /* | 177 | /* |
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index eb2789c3f721..33f3563a2a52 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c | |||
@@ -89,6 +89,9 @@ int nr_ioapics; | |||
89 | /* IO APIC gsi routing info */ | 89 | /* IO APIC gsi routing info */ |
90 | struct mp_ioapic_gsi mp_gsi_routing[MAX_IO_APICS]; | 90 | struct mp_ioapic_gsi mp_gsi_routing[MAX_IO_APICS]; |
91 | 91 | ||
92 | /* The last gsi number used */ | ||
93 | u32 gsi_end; | ||
94 | |||
92 | /* MP IRQ source entries */ | 95 | /* MP IRQ source entries */ |
93 | struct mpc_intsrc mp_irqs[MAX_IRQ_SOURCES]; | 96 | struct mpc_intsrc mp_irqs[MAX_IRQ_SOURCES]; |
94 | 97 | ||
@@ -1013,10 +1016,9 @@ static inline int irq_trigger(int idx) | |||
1013 | return MPBIOS_trigger(idx); | 1016 | return MPBIOS_trigger(idx); |
1014 | } | 1017 | } |
1015 | 1018 | ||
1016 | int (*ioapic_renumber_irq)(int ioapic, int irq); | ||
1017 | static int pin_2_irq(int idx, int apic, int pin) | 1019 | static int pin_2_irq(int idx, int apic, int pin) |
1018 | { | 1020 | { |
1019 | int irq, i; | 1021 | int irq; |
1020 | int bus = mp_irqs[idx].srcbus; | 1022 | int bus = mp_irqs[idx].srcbus; |
1021 | 1023 | ||
1022 | /* | 1024 | /* |
@@ -1028,18 +1030,12 @@ static int pin_2_irq(int idx, int apic, int pin) | |||
1028 | if (test_bit(bus, mp_bus_not_pci)) { | 1030 | if (test_bit(bus, mp_bus_not_pci)) { |
1029 | irq = mp_irqs[idx].srcbusirq; | 1031 | irq = mp_irqs[idx].srcbusirq; |
1030 | } else { | 1032 | } else { |
1031 | /* | 1033 | u32 gsi = mp_gsi_routing[apic].gsi_base + pin; |
1032 | * PCI IRQs are mapped in order | 1034 | |
1033 | */ | 1035 | if (gsi >= NR_IRQS_LEGACY) |
1034 | i = irq = 0; | 1036 | irq = gsi; |
1035 | while (i < apic) | 1037 | else |
1036 | irq += nr_ioapic_registers[i++]; | 1038 | irq = gsi_end + 1 + gsi; |
1037 | irq += pin; | ||
1038 | /* | ||
1039 | * For MPS mode, so far only needed by ES7000 platform | ||
1040 | */ | ||
1041 | if (ioapic_renumber_irq) | ||
1042 | irq = ioapic_renumber_irq(apic, irq); | ||
1043 | } | 1039 | } |
1044 | 1040 | ||
1045 | #ifdef CONFIG_X86_32 | 1041 | #ifdef CONFIG_X86_32 |
@@ -1950,20 +1946,8 @@ static struct { int pin, apic; } ioapic_i8259 = { -1, -1 }; | |||
1950 | 1946 | ||
1951 | void __init enable_IO_APIC(void) | 1947 | void __init enable_IO_APIC(void) |
1952 | { | 1948 | { |
1953 | union IO_APIC_reg_01 reg_01; | ||
1954 | int i8259_apic, i8259_pin; | 1949 | int i8259_apic, i8259_pin; |
1955 | int apic; | 1950 | int apic; |
1956 | unsigned long flags; | ||
1957 | |||
1958 | /* | ||
1959 | * The number of IO-APIC IRQ registers (== #pins): | ||
1960 | */ | ||
1961 | for (apic = 0; apic < nr_ioapics; apic++) { | ||
1962 | raw_spin_lock_irqsave(&ioapic_lock, flags); | ||
1963 | reg_01.raw = io_apic_read(apic, 1); | ||
1964 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); | ||
1965 | nr_ioapic_registers[apic] = reg_01.bits.entries+1; | ||
1966 | } | ||
1967 | 1951 | ||
1968 | if (!legacy_pic->nr_legacy_irqs) | 1952 | if (!legacy_pic->nr_legacy_irqs) |
1969 | return; | 1953 | return; |
@@ -3858,27 +3842,20 @@ int __init io_apic_get_redir_entries (int ioapic) | |||
3858 | reg_01.raw = io_apic_read(ioapic, 1); | 3842 | reg_01.raw = io_apic_read(ioapic, 1); |
3859 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); | 3843 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); |
3860 | 3844 | ||
3861 | return reg_01.bits.entries; | 3845 | /* The register returns the maximum index redir index |
3846 | * supported, which is one less than the total number of redir | ||
3847 | * entries. | ||
3848 | */ | ||
3849 | return reg_01.bits.entries + 1; | ||
3862 | } | 3850 | } |
3863 | 3851 | ||
3864 | void __init probe_nr_irqs_gsi(void) | 3852 | void __init probe_nr_irqs_gsi(void) |
3865 | { | 3853 | { |
3866 | int nr = 0; | 3854 | int nr; |
3867 | 3855 | ||
3868 | nr = acpi_probe_gsi(); | 3856 | nr = gsi_end + 1 + NR_IRQS_LEGACY; |
3869 | if (nr > nr_irqs_gsi) { | 3857 | if (nr > nr_irqs_gsi) |
3870 | nr_irqs_gsi = nr; | 3858 | nr_irqs_gsi = nr; |
3871 | } else { | ||
3872 | /* for acpi=off or acpi is not compiled in */ | ||
3873 | int idx; | ||
3874 | |||
3875 | nr = 0; | ||
3876 | for (idx = 0; idx < nr_ioapics; idx++) | ||
3877 | nr += io_apic_get_redir_entries(idx) + 1; | ||
3878 | |||
3879 | if (nr > nr_irqs_gsi) | ||
3880 | nr_irqs_gsi = nr; | ||
3881 | } | ||
3882 | 3859 | ||
3883 | printk(KERN_DEBUG "nr_irqs_gsi: %d\n", nr_irqs_gsi); | 3860 | printk(KERN_DEBUG "nr_irqs_gsi: %d\n", nr_irqs_gsi); |
3884 | } | 3861 | } |
@@ -4085,22 +4062,27 @@ int __init io_apic_get_version(int ioapic) | |||
4085 | return reg_01.bits.version; | 4062 | return reg_01.bits.version; |
4086 | } | 4063 | } |
4087 | 4064 | ||
4088 | int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity) | 4065 | int acpi_get_override_irq(u32 gsi, int *trigger, int *polarity) |
4089 | { | 4066 | { |
4090 | int i; | 4067 | int ioapic, pin, idx; |
4091 | 4068 | ||
4092 | if (skip_ioapic_setup) | 4069 | if (skip_ioapic_setup) |
4093 | return -1; | 4070 | return -1; |
4094 | 4071 | ||
4095 | for (i = 0; i < mp_irq_entries; i++) | 4072 | ioapic = mp_find_ioapic(gsi); |
4096 | if (mp_irqs[i].irqtype == mp_INT && | 4073 | if (ioapic < 0) |
4097 | mp_irqs[i].srcbusirq == bus_irq) | ||
4098 | break; | ||
4099 | if (i >= mp_irq_entries) | ||
4100 | return -1; | 4074 | return -1; |
4101 | 4075 | ||
4102 | *trigger = irq_trigger(i); | 4076 | pin = mp_find_ioapic_pin(ioapic, gsi); |
4103 | *polarity = irq_polarity(i); | 4077 | if (pin < 0) |
4078 | return -1; | ||
4079 | |||
4080 | idx = find_irq_entry(ioapic, pin, mp_INT); | ||
4081 | if (idx < 0) | ||
4082 | return -1; | ||
4083 | |||
4084 | *trigger = irq_trigger(idx); | ||
4085 | *polarity = irq_polarity(idx); | ||
4104 | return 0; | 4086 | return 0; |
4105 | } | 4087 | } |
4106 | 4088 | ||
@@ -4241,7 +4223,7 @@ void __init ioapic_insert_resources(void) | |||
4241 | } | 4223 | } |
4242 | } | 4224 | } |
4243 | 4225 | ||
4244 | int mp_find_ioapic(int gsi) | 4226 | int mp_find_ioapic(u32 gsi) |
4245 | { | 4227 | { |
4246 | int i = 0; | 4228 | int i = 0; |
4247 | 4229 | ||
@@ -4256,7 +4238,7 @@ int mp_find_ioapic(int gsi) | |||
4256 | return -1; | 4238 | return -1; |
4257 | } | 4239 | } |
4258 | 4240 | ||
4259 | int mp_find_ioapic_pin(int ioapic, int gsi) | 4241 | int mp_find_ioapic_pin(int ioapic, u32 gsi) |
4260 | { | 4242 | { |
4261 | if (WARN_ON(ioapic == -1)) | 4243 | if (WARN_ON(ioapic == -1)) |
4262 | return -1; | 4244 | return -1; |
@@ -4284,6 +4266,7 @@ static int bad_ioapic(unsigned long address) | |||
4284 | void __init mp_register_ioapic(int id, u32 address, u32 gsi_base) | 4266 | void __init mp_register_ioapic(int id, u32 address, u32 gsi_base) |
4285 | { | 4267 | { |
4286 | int idx = 0; | 4268 | int idx = 0; |
4269 | int entries; | ||
4287 | 4270 | ||
4288 | if (bad_ioapic(address)) | 4271 | if (bad_ioapic(address)) |
4289 | return; | 4272 | return; |
@@ -4302,9 +4285,17 @@ void __init mp_register_ioapic(int id, u32 address, u32 gsi_base) | |||
4302 | * Build basic GSI lookup table to facilitate gsi->io_apic lookups | 4285 | * Build basic GSI lookup table to facilitate gsi->io_apic lookups |
4303 | * and to prevent reprogramming of IOAPIC pins (PCI GSIs). | 4286 | * and to prevent reprogramming of IOAPIC pins (PCI GSIs). |
4304 | */ | 4287 | */ |
4288 | entries = io_apic_get_redir_entries(idx); | ||
4305 | mp_gsi_routing[idx].gsi_base = gsi_base; | 4289 | mp_gsi_routing[idx].gsi_base = gsi_base; |
4306 | mp_gsi_routing[idx].gsi_end = gsi_base + | 4290 | mp_gsi_routing[idx].gsi_end = gsi_base + entries - 1; |
4307 | io_apic_get_redir_entries(idx); | 4291 | |
4292 | /* | ||
4293 | * The number of IO-APIC IRQ registers (== #pins): | ||
4294 | */ | ||
4295 | nr_ioapic_registers[idx] = entries; | ||
4296 | |||
4297 | if (mp_gsi_routing[idx].gsi_end > gsi_end) | ||
4298 | gsi_end = mp_gsi_routing[idx].gsi_end; | ||
4308 | 4299 | ||
4309 | printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%x, " | 4300 | printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%x, " |
4310 | "GSI %d-%d\n", idx, mp_ioapics[idx].apicid, | 4301 | "GSI %d-%d\n", idx, mp_ioapics[idx].apicid, |
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index c085d52dbaf2..e46f98f36e31 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c | |||
@@ -735,9 +735,6 @@ void __init uv_system_init(void) | |||
735 | uv_node_to_blade[nid] = blade; | 735 | uv_node_to_blade[nid] = blade; |
736 | uv_cpu_to_blade[cpu] = blade; | 736 | uv_cpu_to_blade[cpu] = blade; |
737 | max_pnode = max(pnode, max_pnode); | 737 | max_pnode = max(pnode, max_pnode); |
738 | |||
739 | printk(KERN_DEBUG "UV: cpu %d, apicid 0x%x, pnode %d, nid %d, lcpu %d, blade %d\n", | ||
740 | cpu, apicid, pnode, nid, lcpu, blade); | ||
741 | } | 738 | } |
742 | 739 | ||
743 | /* Add blade/pnode info for nodes without cpus */ | 740 | /* Add blade/pnode info for nodes without cpus */ |
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index 031aa887b0eb..c4f9182ca3ac 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c | |||
@@ -1224,7 +1224,7 @@ static void reinit_timer(void) | |||
1224 | #ifdef INIT_TIMER_AFTER_SUSPEND | 1224 | #ifdef INIT_TIMER_AFTER_SUSPEND |
1225 | unsigned long flags; | 1225 | unsigned long flags; |
1226 | 1226 | ||
1227 | spin_lock_irqsave(&i8253_lock, flags); | 1227 | raw_spin_lock_irqsave(&i8253_lock, flags); |
1228 | /* set the clock to HZ */ | 1228 | /* set the clock to HZ */ |
1229 | outb_pit(0x34, PIT_MODE); /* binary, mode 2, LSB/MSB, ch 0 */ | 1229 | outb_pit(0x34, PIT_MODE); /* binary, mode 2, LSB/MSB, ch 0 */ |
1230 | udelay(10); | 1230 | udelay(10); |
@@ -1232,7 +1232,7 @@ static void reinit_timer(void) | |||
1232 | udelay(10); | 1232 | udelay(10); |
1233 | outb_pit(LATCH >> 8, PIT_CH0); /* MSB */ | 1233 | outb_pit(LATCH >> 8, PIT_CH0); /* MSB */ |
1234 | udelay(10); | 1234 | udelay(10); |
1235 | spin_unlock_irqrestore(&i8253_lock, flags); | 1235 | raw_spin_unlock_irqrestore(&i8253_lock, flags); |
1236 | #endif | 1236 | #endif |
1237 | } | 1237 | } |
1238 | 1238 | ||
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index c202b62f3671..3a785da34b6f 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile | |||
@@ -14,7 +14,7 @@ CFLAGS_common.o := $(nostackp) | |||
14 | 14 | ||
15 | obj-y := intel_cacheinfo.o addon_cpuid_features.o | 15 | obj-y := intel_cacheinfo.o addon_cpuid_features.o |
16 | obj-y += proc.o capflags.o powerflags.o common.o | 16 | obj-y += proc.o capflags.o powerflags.o common.o |
17 | obj-y += vmware.o hypervisor.o sched.o | 17 | obj-y += vmware.o hypervisor.o sched.o mshyperv.o |
18 | 18 | ||
19 | obj-$(CONFIG_X86_32) += bugs.o cmpxchg.o | 19 | obj-$(CONFIG_X86_32) += bugs.o cmpxchg.o |
20 | obj-$(CONFIG_X86_64) += bugs_64.o | 20 | obj-$(CONFIG_X86_64) += bugs_64.o |
diff --git a/arch/x86/kernel/cpu/addon_cpuid_features.c b/arch/x86/kernel/cpu/addon_cpuid_features.c index 97ad79cdf688..10fa5684a662 100644 --- a/arch/x86/kernel/cpu/addon_cpuid_features.c +++ b/arch/x86/kernel/cpu/addon_cpuid_features.c | |||
@@ -30,12 +30,14 @@ void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c) | |||
30 | const struct cpuid_bit *cb; | 30 | const struct cpuid_bit *cb; |
31 | 31 | ||
32 | static const struct cpuid_bit __cpuinitconst cpuid_bits[] = { | 32 | static const struct cpuid_bit __cpuinitconst cpuid_bits[] = { |
33 | { X86_FEATURE_IDA, CR_EAX, 1, 0x00000006 }, | 33 | { X86_FEATURE_IDA, CR_EAX, 1, 0x00000006 }, |
34 | { X86_FEATURE_ARAT, CR_EAX, 2, 0x00000006 }, | 34 | { X86_FEATURE_ARAT, CR_EAX, 2, 0x00000006 }, |
35 | { X86_FEATURE_NPT, CR_EDX, 0, 0x8000000a }, | 35 | { X86_FEATURE_APERFMPERF, CR_ECX, 0, 0x00000006 }, |
36 | { X86_FEATURE_LBRV, CR_EDX, 1, 0x8000000a }, | 36 | { X86_FEATURE_CPB, CR_EDX, 9, 0x80000007 }, |
37 | { X86_FEATURE_SVML, CR_EDX, 2, 0x8000000a }, | 37 | { X86_FEATURE_NPT, CR_EDX, 0, 0x8000000a }, |
38 | { X86_FEATURE_NRIPS, CR_EDX, 3, 0x8000000a }, | 38 | { X86_FEATURE_LBRV, CR_EDX, 1, 0x8000000a }, |
39 | { X86_FEATURE_SVML, CR_EDX, 2, 0x8000000a }, | ||
40 | { X86_FEATURE_NRIPS, CR_EDX, 3, 0x8000000a }, | ||
39 | { 0, 0, 0, 0 } | 41 | { 0, 0, 0, 0 } |
40 | }; | 42 | }; |
41 | 43 | ||
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 01a265212395..c39576cb3018 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c | |||
@@ -86,7 +86,7 @@ static void __init check_fpu(void) | |||
86 | 86 | ||
87 | static void __init check_hlt(void) | 87 | static void __init check_hlt(void) |
88 | { | 88 | { |
89 | if (paravirt_enabled()) | 89 | if (boot_cpu_data.x86 >= 5 || paravirt_enabled()) |
90 | return; | 90 | return; |
91 | 91 | ||
92 | printk(KERN_INFO "Checking 'hlt' instruction... "); | 92 | printk(KERN_INFO "Checking 'hlt' instruction... "); |
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 4868e4a951ee..c1c00d0b1692 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c | |||
@@ -1243,10 +1243,7 @@ void __cpuinit cpu_init(void) | |||
1243 | /* | 1243 | /* |
1244 | * Force FPU initialization: | 1244 | * Force FPU initialization: |
1245 | */ | 1245 | */ |
1246 | if (cpu_has_xsave) | 1246 | current_thread_info()->status = 0; |
1247 | current_thread_info()->status = TS_XSAVE; | ||
1248 | else | ||
1249 | current_thread_info()->status = 0; | ||
1250 | clear_used_math(); | 1247 | clear_used_math(); |
1251 | mxcsr_feature_mask_init(); | 1248 | mxcsr_feature_mask_init(); |
1252 | 1249 | ||
diff --git a/arch/x86/kernel/cpu/cpufreq/Makefile b/arch/x86/kernel/cpu/cpufreq/Makefile index 1840c0a5170b..bd54bf67e6fb 100644 --- a/arch/x86/kernel/cpu/cpufreq/Makefile +++ b/arch/x86/kernel/cpu/cpufreq/Makefile | |||
@@ -2,8 +2,8 @@ | |||
2 | # K8 systems. ACPI is preferred to all other hardware-specific drivers. | 2 | # K8 systems. ACPI is preferred to all other hardware-specific drivers. |
3 | # speedstep-* is preferred over p4-clockmod. | 3 | # speedstep-* is preferred over p4-clockmod. |
4 | 4 | ||
5 | obj-$(CONFIG_X86_POWERNOW_K8) += powernow-k8.o | 5 | obj-$(CONFIG_X86_POWERNOW_K8) += powernow-k8.o mperf.o |
6 | obj-$(CONFIG_X86_ACPI_CPUFREQ) += acpi-cpufreq.o | 6 | obj-$(CONFIG_X86_ACPI_CPUFREQ) += acpi-cpufreq.o mperf.o |
7 | obj-$(CONFIG_X86_PCC_CPUFREQ) += pcc-cpufreq.o | 7 | obj-$(CONFIG_X86_PCC_CPUFREQ) += pcc-cpufreq.o |
8 | obj-$(CONFIG_X86_POWERNOW_K6) += powernow-k6.o | 8 | obj-$(CONFIG_X86_POWERNOW_K6) += powernow-k6.o |
9 | obj-$(CONFIG_X86_POWERNOW_K7) += powernow-k7.o | 9 | obj-$(CONFIG_X86_POWERNOW_K7) += powernow-k7.o |
diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c index 459168083b77..1d3cddaa40ee 100644 --- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c | |||
@@ -46,6 +46,7 @@ | |||
46 | #include <asm/msr.h> | 46 | #include <asm/msr.h> |
47 | #include <asm/processor.h> | 47 | #include <asm/processor.h> |
48 | #include <asm/cpufeature.h> | 48 | #include <asm/cpufeature.h> |
49 | #include "mperf.h" | ||
49 | 50 | ||
50 | #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \ | 51 | #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \ |
51 | "acpi-cpufreq", msg) | 52 | "acpi-cpufreq", msg) |
@@ -71,8 +72,6 @@ struct acpi_cpufreq_data { | |||
71 | 72 | ||
72 | static DEFINE_PER_CPU(struct acpi_cpufreq_data *, acfreq_data); | 73 | static DEFINE_PER_CPU(struct acpi_cpufreq_data *, acfreq_data); |
73 | 74 | ||
74 | static DEFINE_PER_CPU(struct aperfmperf, acfreq_old_perf); | ||
75 | |||
76 | /* acpi_perf_data is a pointer to percpu data. */ | 75 | /* acpi_perf_data is a pointer to percpu data. */ |
77 | static struct acpi_processor_performance *acpi_perf_data; | 76 | static struct acpi_processor_performance *acpi_perf_data; |
78 | 77 | ||
@@ -240,45 +239,6 @@ static u32 get_cur_val(const struct cpumask *mask) | |||
240 | return cmd.val; | 239 | return cmd.val; |
241 | } | 240 | } |
242 | 241 | ||
243 | /* Called via smp_call_function_single(), on the target CPU */ | ||
244 | static void read_measured_perf_ctrs(void *_cur) | ||
245 | { | ||
246 | struct aperfmperf *am = _cur; | ||
247 | |||
248 | get_aperfmperf(am); | ||
249 | } | ||
250 | |||
251 | /* | ||
252 | * Return the measured active (C0) frequency on this CPU since last call | ||
253 | * to this function. | ||
254 | * Input: cpu number | ||
255 | * Return: Average CPU frequency in terms of max frequency (zero on error) | ||
256 | * | ||
257 | * We use IA32_MPERF and IA32_APERF MSRs to get the measured performance | ||
258 | * over a period of time, while CPU is in C0 state. | ||
259 | * IA32_MPERF counts at the rate of max advertised frequency | ||
260 | * IA32_APERF counts at the rate of actual CPU frequency | ||
261 | * Only IA32_APERF/IA32_MPERF ratio is architecturally defined and | ||
262 | * no meaning should be associated with absolute values of these MSRs. | ||
263 | */ | ||
264 | static unsigned int get_measured_perf(struct cpufreq_policy *policy, | ||
265 | unsigned int cpu) | ||
266 | { | ||
267 | struct aperfmperf perf; | ||
268 | unsigned long ratio; | ||
269 | unsigned int retval; | ||
270 | |||
271 | if (smp_call_function_single(cpu, read_measured_perf_ctrs, &perf, 1)) | ||
272 | return 0; | ||
273 | |||
274 | ratio = calc_aperfmperf_ratio(&per_cpu(acfreq_old_perf, cpu), &perf); | ||
275 | per_cpu(acfreq_old_perf, cpu) = perf; | ||
276 | |||
277 | retval = (policy->cpuinfo.max_freq * ratio) >> APERFMPERF_SHIFT; | ||
278 | |||
279 | return retval; | ||
280 | } | ||
281 | |||
282 | static unsigned int get_cur_freq_on_cpu(unsigned int cpu) | 242 | static unsigned int get_cur_freq_on_cpu(unsigned int cpu) |
283 | { | 243 | { |
284 | struct acpi_cpufreq_data *data = per_cpu(acfreq_data, cpu); | 244 | struct acpi_cpufreq_data *data = per_cpu(acfreq_data, cpu); |
@@ -702,7 +662,7 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) | |||
702 | 662 | ||
703 | /* Check for APERF/MPERF support in hardware */ | 663 | /* Check for APERF/MPERF support in hardware */ |
704 | if (cpu_has(c, X86_FEATURE_APERFMPERF)) | 664 | if (cpu_has(c, X86_FEATURE_APERFMPERF)) |
705 | acpi_cpufreq_driver.getavg = get_measured_perf; | 665 | acpi_cpufreq_driver.getavg = cpufreq_get_measured_perf; |
706 | 666 | ||
707 | dprintk("CPU%u - ACPI performance management activated.\n", cpu); | 667 | dprintk("CPU%u - ACPI performance management activated.\n", cpu); |
708 | for (i = 0; i < perf->state_count; i++) | 668 | for (i = 0; i < perf->state_count; i++) |
diff --git a/arch/x86/kernel/cpu/cpufreq/mperf.c b/arch/x86/kernel/cpu/cpufreq/mperf.c new file mode 100644 index 000000000000..911e193018ae --- /dev/null +++ b/arch/x86/kernel/cpu/cpufreq/mperf.c | |||
@@ -0,0 +1,51 @@ | |||
1 | #include <linux/kernel.h> | ||
2 | #include <linux/smp.h> | ||
3 | #include <linux/module.h> | ||
4 | #include <linux/init.h> | ||
5 | #include <linux/cpufreq.h> | ||
6 | #include <linux/slab.h> | ||
7 | |||
8 | #include "mperf.h" | ||
9 | |||
10 | static DEFINE_PER_CPU(struct aperfmperf, acfreq_old_perf); | ||
11 | |||
12 | /* Called via smp_call_function_single(), on the target CPU */ | ||
13 | static void read_measured_perf_ctrs(void *_cur) | ||
14 | { | ||
15 | struct aperfmperf *am = _cur; | ||
16 | |||
17 | get_aperfmperf(am); | ||
18 | } | ||
19 | |||
20 | /* | ||
21 | * Return the measured active (C0) frequency on this CPU since last call | ||
22 | * to this function. | ||
23 | * Input: cpu number | ||
24 | * Return: Average CPU frequency in terms of max frequency (zero on error) | ||
25 | * | ||
26 | * We use IA32_MPERF and IA32_APERF MSRs to get the measured performance | ||
27 | * over a period of time, while CPU is in C0 state. | ||
28 | * IA32_MPERF counts at the rate of max advertised frequency | ||
29 | * IA32_APERF counts at the rate of actual CPU frequency | ||
30 | * Only IA32_APERF/IA32_MPERF ratio is architecturally defined and | ||
31 | * no meaning should be associated with absolute values of these MSRs. | ||
32 | */ | ||
33 | unsigned int cpufreq_get_measured_perf(struct cpufreq_policy *policy, | ||
34 | unsigned int cpu) | ||
35 | { | ||
36 | struct aperfmperf perf; | ||
37 | unsigned long ratio; | ||
38 | unsigned int retval; | ||
39 | |||
40 | if (smp_call_function_single(cpu, read_measured_perf_ctrs, &perf, 1)) | ||
41 | return 0; | ||
42 | |||
43 | ratio = calc_aperfmperf_ratio(&per_cpu(acfreq_old_perf, cpu), &perf); | ||
44 | per_cpu(acfreq_old_perf, cpu) = perf; | ||
45 | |||
46 | retval = (policy->cpuinfo.max_freq * ratio) >> APERFMPERF_SHIFT; | ||
47 | |||
48 | return retval; | ||
49 | } | ||
50 | EXPORT_SYMBOL_GPL(cpufreq_get_measured_perf); | ||
51 | MODULE_LICENSE("GPL"); | ||
diff --git a/arch/x86/kernel/cpu/cpufreq/mperf.h b/arch/x86/kernel/cpu/cpufreq/mperf.h new file mode 100644 index 000000000000..5dbf2950dc22 --- /dev/null +++ b/arch/x86/kernel/cpu/cpufreq/mperf.h | |||
@@ -0,0 +1,9 @@ | |||
1 | /* | ||
2 | * (c) 2010 Advanced Micro Devices, Inc. | ||
3 | * Your use of this code is subject to the terms and conditions of the | ||
4 | * GNU general public license version 2. See "COPYING" or | ||
5 | * http://www.gnu.org/licenses/gpl.html | ||
6 | */ | ||
7 | |||
8 | unsigned int cpufreq_get_measured_perf(struct cpufreq_policy *policy, | ||
9 | unsigned int cpu); | ||
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index b6215b9798e2..6f3dc8fbbfdc 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c | |||
@@ -1,6 +1,5 @@ | |||
1 | |||
2 | /* | 1 | /* |
3 | * (c) 2003-2006 Advanced Micro Devices, Inc. | 2 | * (c) 2003-2010 Advanced Micro Devices, Inc. |
4 | * Your use of this code is subject to the terms and conditions of the | 3 | * Your use of this code is subject to the terms and conditions of the |
5 | * GNU general public license version 2. See "COPYING" or | 4 | * GNU general public license version 2. See "COPYING" or |
6 | * http://www.gnu.org/licenses/gpl.html | 5 | * http://www.gnu.org/licenses/gpl.html |
@@ -46,6 +45,7 @@ | |||
46 | #define PFX "powernow-k8: " | 45 | #define PFX "powernow-k8: " |
47 | #define VERSION "version 2.20.00" | 46 | #define VERSION "version 2.20.00" |
48 | #include "powernow-k8.h" | 47 | #include "powernow-k8.h" |
48 | #include "mperf.h" | ||
49 | 49 | ||
50 | /* serialize freq changes */ | 50 | /* serialize freq changes */ |
51 | static DEFINE_MUTEX(fidvid_mutex); | 51 | static DEFINE_MUTEX(fidvid_mutex); |
@@ -54,6 +54,12 @@ static DEFINE_PER_CPU(struct powernow_k8_data *, powernow_data); | |||
54 | 54 | ||
55 | static int cpu_family = CPU_OPTERON; | 55 | static int cpu_family = CPU_OPTERON; |
56 | 56 | ||
57 | /* core performance boost */ | ||
58 | static bool cpb_capable, cpb_enabled; | ||
59 | static struct msr __percpu *msrs; | ||
60 | |||
61 | static struct cpufreq_driver cpufreq_amd64_driver; | ||
62 | |||
57 | #ifndef CONFIG_SMP | 63 | #ifndef CONFIG_SMP |
58 | static inline const struct cpumask *cpu_core_mask(int cpu) | 64 | static inline const struct cpumask *cpu_core_mask(int cpu) |
59 | { | 65 | { |
@@ -1249,6 +1255,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) | |||
1249 | struct powernow_k8_data *data; | 1255 | struct powernow_k8_data *data; |
1250 | struct init_on_cpu init_on_cpu; | 1256 | struct init_on_cpu init_on_cpu; |
1251 | int rc; | 1257 | int rc; |
1258 | struct cpuinfo_x86 *c = &cpu_data(pol->cpu); | ||
1252 | 1259 | ||
1253 | if (!cpu_online(pol->cpu)) | 1260 | if (!cpu_online(pol->cpu)) |
1254 | return -ENODEV; | 1261 | return -ENODEV; |
@@ -1323,6 +1330,10 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) | |||
1323 | return -EINVAL; | 1330 | return -EINVAL; |
1324 | } | 1331 | } |
1325 | 1332 | ||
1333 | /* Check for APERF/MPERF support in hardware */ | ||
1334 | if (cpu_has(c, X86_FEATURE_APERFMPERF)) | ||
1335 | cpufreq_amd64_driver.getavg = cpufreq_get_measured_perf; | ||
1336 | |||
1326 | cpufreq_frequency_table_get_attr(data->powernow_table, pol->cpu); | 1337 | cpufreq_frequency_table_get_attr(data->powernow_table, pol->cpu); |
1327 | 1338 | ||
1328 | if (cpu_family == CPU_HW_PSTATE) | 1339 | if (cpu_family == CPU_HW_PSTATE) |
@@ -1394,8 +1405,77 @@ out: | |||
1394 | return khz; | 1405 | return khz; |
1395 | } | 1406 | } |
1396 | 1407 | ||
1408 | static void _cpb_toggle_msrs(bool t) | ||
1409 | { | ||
1410 | int cpu; | ||
1411 | |||
1412 | get_online_cpus(); | ||
1413 | |||
1414 | rdmsr_on_cpus(cpu_online_mask, MSR_K7_HWCR, msrs); | ||
1415 | |||
1416 | for_each_cpu(cpu, cpu_online_mask) { | ||
1417 | struct msr *reg = per_cpu_ptr(msrs, cpu); | ||
1418 | if (t) | ||
1419 | reg->l &= ~BIT(25); | ||
1420 | else | ||
1421 | reg->l |= BIT(25); | ||
1422 | } | ||
1423 | wrmsr_on_cpus(cpu_online_mask, MSR_K7_HWCR, msrs); | ||
1424 | |||
1425 | put_online_cpus(); | ||
1426 | } | ||
1427 | |||
1428 | /* | ||
1429 | * Switch on/off core performance boosting. | ||
1430 | * | ||
1431 | * 0=disable | ||
1432 | * 1=enable. | ||
1433 | */ | ||
1434 | static void cpb_toggle(bool t) | ||
1435 | { | ||
1436 | if (!cpb_capable) | ||
1437 | return; | ||
1438 | |||
1439 | if (t && !cpb_enabled) { | ||
1440 | cpb_enabled = true; | ||
1441 | _cpb_toggle_msrs(t); | ||
1442 | printk(KERN_INFO PFX "Core Boosting enabled.\n"); | ||
1443 | } else if (!t && cpb_enabled) { | ||
1444 | cpb_enabled = false; | ||
1445 | _cpb_toggle_msrs(t); | ||
1446 | printk(KERN_INFO PFX "Core Boosting disabled.\n"); | ||
1447 | } | ||
1448 | } | ||
1449 | |||
1450 | static ssize_t store_cpb(struct cpufreq_policy *policy, const char *buf, | ||
1451 | size_t count) | ||
1452 | { | ||
1453 | int ret = -EINVAL; | ||
1454 | unsigned long val = 0; | ||
1455 | |||
1456 | ret = strict_strtoul(buf, 10, &val); | ||
1457 | if (!ret && (val == 0 || val == 1) && cpb_capable) | ||
1458 | cpb_toggle(val); | ||
1459 | else | ||
1460 | return -EINVAL; | ||
1461 | |||
1462 | return count; | ||
1463 | } | ||
1464 | |||
1465 | static ssize_t show_cpb(struct cpufreq_policy *policy, char *buf) | ||
1466 | { | ||
1467 | return sprintf(buf, "%u\n", cpb_enabled); | ||
1468 | } | ||
1469 | |||
1470 | #define define_one_rw(_name) \ | ||
1471 | static struct freq_attr _name = \ | ||
1472 | __ATTR(_name, 0644, show_##_name, store_##_name) | ||
1473 | |||
1474 | define_one_rw(cpb); | ||
1475 | |||
1397 | static struct freq_attr *powernow_k8_attr[] = { | 1476 | static struct freq_attr *powernow_k8_attr[] = { |
1398 | &cpufreq_freq_attr_scaling_available_freqs, | 1477 | &cpufreq_freq_attr_scaling_available_freqs, |
1478 | &cpb, | ||
1399 | NULL, | 1479 | NULL, |
1400 | }; | 1480 | }; |
1401 | 1481 | ||
@@ -1411,10 +1491,51 @@ static struct cpufreq_driver cpufreq_amd64_driver = { | |||
1411 | .attr = powernow_k8_attr, | 1491 | .attr = powernow_k8_attr, |
1412 | }; | 1492 | }; |
1413 | 1493 | ||
1494 | /* | ||
1495 | * Clear the boost-disable flag on the CPU_DOWN path so that this cpu | ||
1496 | * cannot block the remaining ones from boosting. On the CPU_UP path we | ||
1497 | * simply keep the boost-disable flag in sync with the current global | ||
1498 | * state. | ||
1499 | */ | ||
1500 | static int __cpuinit cpb_notify(struct notifier_block *nb, unsigned long action, | ||
1501 | void *hcpu) | ||
1502 | { | ||
1503 | unsigned cpu = (long)hcpu; | ||
1504 | u32 lo, hi; | ||
1505 | |||
1506 | switch (action) { | ||
1507 | case CPU_UP_PREPARE: | ||
1508 | case CPU_UP_PREPARE_FROZEN: | ||
1509 | |||
1510 | if (!cpb_enabled) { | ||
1511 | rdmsr_on_cpu(cpu, MSR_K7_HWCR, &lo, &hi); | ||
1512 | lo |= BIT(25); | ||
1513 | wrmsr_on_cpu(cpu, MSR_K7_HWCR, lo, hi); | ||
1514 | } | ||
1515 | break; | ||
1516 | |||
1517 | case CPU_DOWN_PREPARE: | ||
1518 | case CPU_DOWN_PREPARE_FROZEN: | ||
1519 | rdmsr_on_cpu(cpu, MSR_K7_HWCR, &lo, &hi); | ||
1520 | lo &= ~BIT(25); | ||
1521 | wrmsr_on_cpu(cpu, MSR_K7_HWCR, lo, hi); | ||
1522 | break; | ||
1523 | |||
1524 | default: | ||
1525 | break; | ||
1526 | } | ||
1527 | |||
1528 | return NOTIFY_OK; | ||
1529 | } | ||
1530 | |||
1531 | static struct notifier_block __cpuinitdata cpb_nb = { | ||
1532 | .notifier_call = cpb_notify, | ||
1533 | }; | ||
1534 | |||
1414 | /* driver entry point for init */ | 1535 | /* driver entry point for init */ |
1415 | static int __cpuinit powernowk8_init(void) | 1536 | static int __cpuinit powernowk8_init(void) |
1416 | { | 1537 | { |
1417 | unsigned int i, supported_cpus = 0; | 1538 | unsigned int i, supported_cpus = 0, cpu; |
1418 | 1539 | ||
1419 | for_each_online_cpu(i) { | 1540 | for_each_online_cpu(i) { |
1420 | int rc; | 1541 | int rc; |
@@ -1423,15 +1544,36 @@ static int __cpuinit powernowk8_init(void) | |||
1423 | supported_cpus++; | 1544 | supported_cpus++; |
1424 | } | 1545 | } |
1425 | 1546 | ||
1426 | if (supported_cpus == num_online_cpus()) { | 1547 | if (supported_cpus != num_online_cpus()) |
1427 | printk(KERN_INFO PFX "Found %d %s " | 1548 | return -ENODEV; |
1428 | "processors (%d cpu cores) (" VERSION ")\n", | 1549 | |
1429 | num_online_nodes(), | 1550 | printk(KERN_INFO PFX "Found %d %s (%d cpu cores) (" VERSION ")\n", |
1430 | boot_cpu_data.x86_model_id, supported_cpus); | 1551 | num_online_nodes(), boot_cpu_data.x86_model_id, supported_cpus); |
1431 | return cpufreq_register_driver(&cpufreq_amd64_driver); | 1552 | |
1553 | if (boot_cpu_has(X86_FEATURE_CPB)) { | ||
1554 | |||
1555 | cpb_capable = true; | ||
1556 | |||
1557 | register_cpu_notifier(&cpb_nb); | ||
1558 | |||
1559 | msrs = msrs_alloc(); | ||
1560 | if (!msrs) { | ||
1561 | printk(KERN_ERR "%s: Error allocating msrs!\n", __func__); | ||
1562 | return -ENOMEM; | ||
1563 | } | ||
1564 | |||
1565 | rdmsr_on_cpus(cpu_online_mask, MSR_K7_HWCR, msrs); | ||
1566 | |||
1567 | for_each_cpu(cpu, cpu_online_mask) { | ||
1568 | struct msr *reg = per_cpu_ptr(msrs, cpu); | ||
1569 | cpb_enabled |= !(!!(reg->l & BIT(25))); | ||
1570 | } | ||
1571 | |||
1572 | printk(KERN_INFO PFX "Core Performance Boosting: %s.\n", | ||
1573 | (cpb_enabled ? "on" : "off")); | ||
1432 | } | 1574 | } |
1433 | 1575 | ||
1434 | return -ENODEV; | 1576 | return cpufreq_register_driver(&cpufreq_amd64_driver); |
1435 | } | 1577 | } |
1436 | 1578 | ||
1437 | /* driver entry point for term */ | 1579 | /* driver entry point for term */ |
@@ -1439,6 +1581,13 @@ static void __exit powernowk8_exit(void) | |||
1439 | { | 1581 | { |
1440 | dprintk("exit\n"); | 1582 | dprintk("exit\n"); |
1441 | 1583 | ||
1584 | if (boot_cpu_has(X86_FEATURE_CPB)) { | ||
1585 | msrs_free(msrs); | ||
1586 | msrs = NULL; | ||
1587 | |||
1588 | unregister_cpu_notifier(&cpb_nb); | ||
1589 | } | ||
1590 | |||
1442 | cpufreq_unregister_driver(&cpufreq_amd64_driver); | 1591 | cpufreq_unregister_driver(&cpufreq_amd64_driver); |
1443 | } | 1592 | } |
1444 | 1593 | ||
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h index 02ce824073cb..df3529b1c02d 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h | |||
@@ -5,7 +5,6 @@ | |||
5 | * http://www.gnu.org/licenses/gpl.html | 5 | * http://www.gnu.org/licenses/gpl.html |
6 | */ | 6 | */ |
7 | 7 | ||
8 | |||
9 | enum pstate { | 8 | enum pstate { |
10 | HW_PSTATE_INVALID = 0xff, | 9 | HW_PSTATE_INVALID = 0xff, |
11 | HW_PSTATE_0 = 0, | 10 | HW_PSTATE_0 = 0, |
@@ -55,7 +54,6 @@ struct powernow_k8_data { | |||
55 | struct cpumask *available_cores; | 54 | struct cpumask *available_cores; |
56 | }; | 55 | }; |
57 | 56 | ||
58 | |||
59 | /* processor's cpuid instruction support */ | 57 | /* processor's cpuid instruction support */ |
60 | #define CPUID_PROCESSOR_SIGNATURE 1 /* function 1 */ | 58 | #define CPUID_PROCESSOR_SIGNATURE 1 /* function 1 */ |
61 | #define CPUID_XFAM 0x0ff00000 /* extended family */ | 59 | #define CPUID_XFAM 0x0ff00000 /* extended family */ |
diff --git a/arch/x86/kernel/cpu/hypervisor.c b/arch/x86/kernel/cpu/hypervisor.c index 08be922de33a..dd531cc56a8f 100644 --- a/arch/x86/kernel/cpu/hypervisor.c +++ b/arch/x86/kernel/cpu/hypervisor.c | |||
@@ -21,37 +21,55 @@ | |||
21 | * | 21 | * |
22 | */ | 22 | */ |
23 | 23 | ||
24 | #include <linux/module.h> | ||
24 | #include <asm/processor.h> | 25 | #include <asm/processor.h> |
25 | #include <asm/vmware.h> | ||
26 | #include <asm/hypervisor.h> | 26 | #include <asm/hypervisor.h> |
27 | 27 | ||
28 | static inline void __cpuinit | 28 | /* |
29 | detect_hypervisor_vendor(struct cpuinfo_x86 *c) | 29 | * Hypervisor detect order. This is specified explicitly here because |
30 | * some hypervisors might implement compatibility modes for other | ||
31 | * hypervisors and therefore need to be detected in specific sequence. | ||
32 | */ | ||
33 | static const __initconst struct hypervisor_x86 * const hypervisors[] = | ||
30 | { | 34 | { |
31 | if (vmware_platform()) | 35 | &x86_hyper_vmware, |
32 | c->x86_hyper_vendor = X86_HYPER_VENDOR_VMWARE; | 36 | &x86_hyper_ms_hyperv, |
33 | else | 37 | }; |
34 | c->x86_hyper_vendor = X86_HYPER_VENDOR_NONE; | ||
35 | } | ||
36 | 38 | ||
37 | static inline void __cpuinit | 39 | const struct hypervisor_x86 *x86_hyper; |
38 | hypervisor_set_feature_bits(struct cpuinfo_x86 *c) | 40 | EXPORT_SYMBOL(x86_hyper); |
41 | |||
42 | static inline void __init | ||
43 | detect_hypervisor_vendor(void) | ||
39 | { | 44 | { |
40 | if (boot_cpu_data.x86_hyper_vendor == X86_HYPER_VENDOR_VMWARE) { | 45 | const struct hypervisor_x86 *h, * const *p; |
41 | vmware_set_feature_bits(c); | 46 | |
42 | return; | 47 | for (p = hypervisors; p < hypervisors + ARRAY_SIZE(hypervisors); p++) { |
48 | h = *p; | ||
49 | if (h->detect()) { | ||
50 | x86_hyper = h; | ||
51 | printk(KERN_INFO "Hypervisor detected: %s\n", h->name); | ||
52 | break; | ||
53 | } | ||
43 | } | 54 | } |
44 | } | 55 | } |
45 | 56 | ||
46 | void __cpuinit init_hypervisor(struct cpuinfo_x86 *c) | 57 | void __cpuinit init_hypervisor(struct cpuinfo_x86 *c) |
47 | { | 58 | { |
48 | detect_hypervisor_vendor(c); | 59 | if (x86_hyper && x86_hyper->set_cpu_features) |
49 | hypervisor_set_feature_bits(c); | 60 | x86_hyper->set_cpu_features(c); |
50 | } | 61 | } |
51 | 62 | ||
52 | void __init init_hypervisor_platform(void) | 63 | void __init init_hypervisor_platform(void) |
53 | { | 64 | { |
65 | |||
66 | detect_hypervisor_vendor(); | ||
67 | |||
68 | if (!x86_hyper) | ||
69 | return; | ||
70 | |||
54 | init_hypervisor(&boot_cpu_data); | 71 | init_hypervisor(&boot_cpu_data); |
55 | if (boot_cpu_data.x86_hyper_vendor == X86_HYPER_VENDOR_VMWARE) | 72 | |
56 | vmware_platform_setup(); | 73 | if (x86_hyper->init_platform) |
74 | x86_hyper->init_platform(); | ||
57 | } | 75 | } |
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 1366c7cfd483..85f69cdeae10 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c | |||
@@ -12,7 +12,6 @@ | |||
12 | #include <asm/processor.h> | 12 | #include <asm/processor.h> |
13 | #include <asm/pgtable.h> | 13 | #include <asm/pgtable.h> |
14 | #include <asm/msr.h> | 14 | #include <asm/msr.h> |
15 | #include <asm/ds.h> | ||
16 | #include <asm/bugs.h> | 15 | #include <asm/bugs.h> |
17 | #include <asm/cpu.h> | 16 | #include <asm/cpu.h> |
18 | 17 | ||
@@ -373,12 +372,6 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) | |||
373 | set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON); | 372 | set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON); |
374 | } | 373 | } |
375 | 374 | ||
376 | if (c->cpuid_level > 6) { | ||
377 | unsigned ecx = cpuid_ecx(6); | ||
378 | if (ecx & 0x01) | ||
379 | set_cpu_cap(c, X86_FEATURE_APERFMPERF); | ||
380 | } | ||
381 | |||
382 | if (cpu_has_xmm2) | 375 | if (cpu_has_xmm2) |
383 | set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); | 376 | set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); |
384 | if (cpu_has_ds) { | 377 | if (cpu_has_ds) { |
@@ -388,7 +381,6 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) | |||
388 | set_cpu_cap(c, X86_FEATURE_BTS); | 381 | set_cpu_cap(c, X86_FEATURE_BTS); |
389 | if (!(l1 & (1<<12))) | 382 | if (!(l1 & (1<<12))) |
390 | set_cpu_cap(c, X86_FEATURE_PEBS); | 383 | set_cpu_cap(c, X86_FEATURE_PEBS); |
391 | ds_init_intel(c); | ||
392 | } | 384 | } |
393 | 385 | ||
394 | if (c->x86 == 6 && c->x86_model == 29 && cpu_has_clflush) | 386 | if (c->x86 == 6 && c->x86_model == 29 && cpu_has_clflush) |
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index b3eeb66c0a51..33eae2062cf5 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c | |||
@@ -148,13 +148,19 @@ union _cpuid4_leaf_ecx { | |||
148 | u32 full; | 148 | u32 full; |
149 | }; | 149 | }; |
150 | 150 | ||
151 | struct amd_l3_cache { | ||
152 | struct pci_dev *dev; | ||
153 | bool can_disable; | ||
154 | unsigned indices; | ||
155 | u8 subcaches[4]; | ||
156 | }; | ||
157 | |||
151 | struct _cpuid4_info { | 158 | struct _cpuid4_info { |
152 | union _cpuid4_leaf_eax eax; | 159 | union _cpuid4_leaf_eax eax; |
153 | union _cpuid4_leaf_ebx ebx; | 160 | union _cpuid4_leaf_ebx ebx; |
154 | union _cpuid4_leaf_ecx ecx; | 161 | union _cpuid4_leaf_ecx ecx; |
155 | unsigned long size; | 162 | unsigned long size; |
156 | bool can_disable; | 163 | struct amd_l3_cache *l3; |
157 | unsigned int l3_indices; | ||
158 | DECLARE_BITMAP(shared_cpu_map, NR_CPUS); | 164 | DECLARE_BITMAP(shared_cpu_map, NR_CPUS); |
159 | }; | 165 | }; |
160 | 166 | ||
@@ -164,8 +170,7 @@ struct _cpuid4_info_regs { | |||
164 | union _cpuid4_leaf_ebx ebx; | 170 | union _cpuid4_leaf_ebx ebx; |
165 | union _cpuid4_leaf_ecx ecx; | 171 | union _cpuid4_leaf_ecx ecx; |
166 | unsigned long size; | 172 | unsigned long size; |
167 | bool can_disable; | 173 | struct amd_l3_cache *l3; |
168 | unsigned int l3_indices; | ||
169 | }; | 174 | }; |
170 | 175 | ||
171 | unsigned short num_cache_leaves; | 176 | unsigned short num_cache_leaves; |
@@ -302,87 +307,163 @@ struct _cache_attr { | |||
302 | }; | 307 | }; |
303 | 308 | ||
304 | #ifdef CONFIG_CPU_SUP_AMD | 309 | #ifdef CONFIG_CPU_SUP_AMD |
305 | static unsigned int __cpuinit amd_calc_l3_indices(void) | 310 | |
311 | /* | ||
312 | * L3 cache descriptors | ||
313 | */ | ||
314 | static struct amd_l3_cache **__cpuinitdata l3_caches; | ||
315 | |||
316 | static void __cpuinit amd_calc_l3_indices(struct amd_l3_cache *l3) | ||
306 | { | 317 | { |
307 | /* | ||
308 | * We're called over smp_call_function_single() and therefore | ||
309 | * are on the correct cpu. | ||
310 | */ | ||
311 | int cpu = smp_processor_id(); | ||
312 | int node = cpu_to_node(cpu); | ||
313 | struct pci_dev *dev = node_to_k8_nb_misc(node); | ||
314 | unsigned int sc0, sc1, sc2, sc3; | 318 | unsigned int sc0, sc1, sc2, sc3; |
315 | u32 val = 0; | 319 | u32 val = 0; |
316 | 320 | ||
317 | pci_read_config_dword(dev, 0x1C4, &val); | 321 | pci_read_config_dword(l3->dev, 0x1C4, &val); |
318 | 322 | ||
319 | /* calculate subcache sizes */ | 323 | /* calculate subcache sizes */ |
320 | sc0 = !(val & BIT(0)); | 324 | l3->subcaches[0] = sc0 = !(val & BIT(0)); |
321 | sc1 = !(val & BIT(4)); | 325 | l3->subcaches[1] = sc1 = !(val & BIT(4)); |
322 | sc2 = !(val & BIT(8)) + !(val & BIT(9)); | 326 | l3->subcaches[2] = sc2 = !(val & BIT(8)) + !(val & BIT(9)); |
323 | sc3 = !(val & BIT(12)) + !(val & BIT(13)); | 327 | l3->subcaches[3] = sc3 = !(val & BIT(12)) + !(val & BIT(13)); |
324 | 328 | ||
325 | return (max(max(max(sc0, sc1), sc2), sc3) << 10) - 1; | 329 | l3->indices = (max(max(max(sc0, sc1), sc2), sc3) << 10) - 1; |
330 | } | ||
331 | |||
332 | static struct amd_l3_cache * __cpuinit amd_init_l3_cache(int node) | ||
333 | { | ||
334 | struct amd_l3_cache *l3; | ||
335 | struct pci_dev *dev = node_to_k8_nb_misc(node); | ||
336 | |||
337 | l3 = kzalloc(sizeof(struct amd_l3_cache), GFP_ATOMIC); | ||
338 | if (!l3) { | ||
339 | printk(KERN_WARNING "Error allocating L3 struct\n"); | ||
340 | return NULL; | ||
341 | } | ||
342 | |||
343 | l3->dev = dev; | ||
344 | |||
345 | amd_calc_l3_indices(l3); | ||
346 | |||
347 | return l3; | ||
326 | } | 348 | } |
327 | 349 | ||
328 | static void __cpuinit | 350 | static void __cpuinit |
329 | amd_check_l3_disable(int index, struct _cpuid4_info_regs *this_leaf) | 351 | amd_check_l3_disable(int index, struct _cpuid4_info_regs *this_leaf) |
330 | { | 352 | { |
331 | if (index < 3) | 353 | int node; |
354 | |||
355 | if (boot_cpu_data.x86 != 0x10) | ||
332 | return; | 356 | return; |
333 | 357 | ||
334 | if (boot_cpu_data.x86 == 0x11) | 358 | if (index < 3) |
335 | return; | 359 | return; |
336 | 360 | ||
337 | /* see errata #382 and #388 */ | 361 | /* see errata #382 and #388 */ |
338 | if ((boot_cpu_data.x86 == 0x10) && | 362 | if (boot_cpu_data.x86_model < 0x8) |
339 | ((boot_cpu_data.x86_model < 0x8) || | 363 | return; |
340 | (boot_cpu_data.x86_mask < 0x1))) | 364 | |
365 | if ((boot_cpu_data.x86_model == 0x8 || | ||
366 | boot_cpu_data.x86_model == 0x9) | ||
367 | && | ||
368 | boot_cpu_data.x86_mask < 0x1) | ||
369 | return; | ||
370 | |||
371 | /* not in virtualized environments */ | ||
372 | if (num_k8_northbridges == 0) | ||
341 | return; | 373 | return; |
342 | 374 | ||
343 | this_leaf->can_disable = true; | 375 | /* |
344 | this_leaf->l3_indices = amd_calc_l3_indices(); | 376 | * Strictly speaking, the amount in @size below is leaked since it is |
377 | * never freed but this is done only on shutdown so it doesn't matter. | ||
378 | */ | ||
379 | if (!l3_caches) { | ||
380 | int size = num_k8_northbridges * sizeof(struct amd_l3_cache *); | ||
381 | |||
382 | l3_caches = kzalloc(size, GFP_ATOMIC); | ||
383 | if (!l3_caches) | ||
384 | return; | ||
385 | } | ||
386 | |||
387 | node = amd_get_nb_id(smp_processor_id()); | ||
388 | |||
389 | if (!l3_caches[node]) { | ||
390 | l3_caches[node] = amd_init_l3_cache(node); | ||
391 | l3_caches[node]->can_disable = true; | ||
392 | } | ||
393 | |||
394 | WARN_ON(!l3_caches[node]); | ||
395 | |||
396 | this_leaf->l3 = l3_caches[node]; | ||
345 | } | 397 | } |
346 | 398 | ||
347 | static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf, | 399 | static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf, |
348 | unsigned int index) | 400 | unsigned int slot) |
349 | { | 401 | { |
350 | int cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map)); | 402 | struct pci_dev *dev = this_leaf->l3->dev; |
351 | int node = amd_get_nb_id(cpu); | ||
352 | struct pci_dev *dev = node_to_k8_nb_misc(node); | ||
353 | unsigned int reg = 0; | 403 | unsigned int reg = 0; |
354 | 404 | ||
355 | if (!this_leaf->can_disable) | 405 | if (!this_leaf->l3 || !this_leaf->l3->can_disable) |
356 | return -EINVAL; | 406 | return -EINVAL; |
357 | 407 | ||
358 | if (!dev) | 408 | if (!dev) |
359 | return -EINVAL; | 409 | return -EINVAL; |
360 | 410 | ||
361 | pci_read_config_dword(dev, 0x1BC + index * 4, ®); | 411 | pci_read_config_dword(dev, 0x1BC + slot * 4, ®); |
362 | return sprintf(buf, "0x%08x\n", reg); | 412 | return sprintf(buf, "0x%08x\n", reg); |
363 | } | 413 | } |
364 | 414 | ||
365 | #define SHOW_CACHE_DISABLE(index) \ | 415 | #define SHOW_CACHE_DISABLE(slot) \ |
366 | static ssize_t \ | 416 | static ssize_t \ |
367 | show_cache_disable_##index(struct _cpuid4_info *this_leaf, char *buf) \ | 417 | show_cache_disable_##slot(struct _cpuid4_info *this_leaf, char *buf) \ |
368 | { \ | 418 | { \ |
369 | return show_cache_disable(this_leaf, buf, index); \ | 419 | return show_cache_disable(this_leaf, buf, slot); \ |
370 | } | 420 | } |
371 | SHOW_CACHE_DISABLE(0) | 421 | SHOW_CACHE_DISABLE(0) |
372 | SHOW_CACHE_DISABLE(1) | 422 | SHOW_CACHE_DISABLE(1) |
373 | 423 | ||
424 | static void amd_l3_disable_index(struct amd_l3_cache *l3, int cpu, | ||
425 | unsigned slot, unsigned long idx) | ||
426 | { | ||
427 | int i; | ||
428 | |||
429 | idx |= BIT(30); | ||
430 | |||
431 | /* | ||
432 | * disable index in all 4 subcaches | ||
433 | */ | ||
434 | for (i = 0; i < 4; i++) { | ||
435 | u32 reg = idx | (i << 20); | ||
436 | |||
437 | if (!l3->subcaches[i]) | ||
438 | continue; | ||
439 | |||
440 | pci_write_config_dword(l3->dev, 0x1BC + slot * 4, reg); | ||
441 | |||
442 | /* | ||
443 | * We need to WBINVD on a core on the node containing the L3 | ||
444 | * cache which indices we disable therefore a simple wbinvd() | ||
445 | * is not sufficient. | ||
446 | */ | ||
447 | wbinvd_on_cpu(cpu); | ||
448 | |||
449 | reg |= BIT(31); | ||
450 | pci_write_config_dword(l3->dev, 0x1BC + slot * 4, reg); | ||
451 | } | ||
452 | } | ||
453 | |||
454 | |||
374 | static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf, | 455 | static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf, |
375 | const char *buf, size_t count, unsigned int index) | 456 | const char *buf, size_t count, |
457 | unsigned int slot) | ||
376 | { | 458 | { |
459 | struct pci_dev *dev = this_leaf->l3->dev; | ||
377 | int cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map)); | 460 | int cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map)); |
378 | int node = amd_get_nb_id(cpu); | ||
379 | struct pci_dev *dev = node_to_k8_nb_misc(node); | ||
380 | unsigned long val = 0; | 461 | unsigned long val = 0; |
381 | 462 | ||
382 | #define SUBCACHE_MASK (3UL << 20) | 463 | #define SUBCACHE_MASK (3UL << 20) |
383 | #define SUBCACHE_INDEX 0xfff | 464 | #define SUBCACHE_INDEX 0xfff |
384 | 465 | ||
385 | if (!this_leaf->can_disable) | 466 | if (!this_leaf->l3 || !this_leaf->l3->can_disable) |
386 | return -EINVAL; | 467 | return -EINVAL; |
387 | 468 | ||
388 | if (!capable(CAP_SYS_ADMIN)) | 469 | if (!capable(CAP_SYS_ADMIN)) |
@@ -396,26 +477,20 @@ static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf, | |||
396 | 477 | ||
397 | /* do not allow writes outside of allowed bits */ | 478 | /* do not allow writes outside of allowed bits */ |
398 | if ((val & ~(SUBCACHE_MASK | SUBCACHE_INDEX)) || | 479 | if ((val & ~(SUBCACHE_MASK | SUBCACHE_INDEX)) || |
399 | ((val & SUBCACHE_INDEX) > this_leaf->l3_indices)) | 480 | ((val & SUBCACHE_INDEX) > this_leaf->l3->indices)) |
400 | return -EINVAL; | 481 | return -EINVAL; |
401 | 482 | ||
402 | val |= BIT(30); | 483 | amd_l3_disable_index(this_leaf->l3, cpu, slot, val); |
403 | pci_write_config_dword(dev, 0x1BC + index * 4, val); | 484 | |
404 | /* | ||
405 | * We need to WBINVD on a core on the node containing the L3 cache which | ||
406 | * indices we disable therefore a simple wbinvd() is not sufficient. | ||
407 | */ | ||
408 | wbinvd_on_cpu(cpu); | ||
409 | pci_write_config_dword(dev, 0x1BC + index * 4, val | BIT(31)); | ||
410 | return count; | 485 | return count; |
411 | } | 486 | } |
412 | 487 | ||
413 | #define STORE_CACHE_DISABLE(index) \ | 488 | #define STORE_CACHE_DISABLE(slot) \ |
414 | static ssize_t \ | 489 | static ssize_t \ |
415 | store_cache_disable_##index(struct _cpuid4_info *this_leaf, \ | 490 | store_cache_disable_##slot(struct _cpuid4_info *this_leaf, \ |
416 | const char *buf, size_t count) \ | 491 | const char *buf, size_t count) \ |
417 | { \ | 492 | { \ |
418 | return store_cache_disable(this_leaf, buf, count, index); \ | 493 | return store_cache_disable(this_leaf, buf, count, slot); \ |
419 | } | 494 | } |
420 | STORE_CACHE_DISABLE(0) | 495 | STORE_CACHE_DISABLE(0) |
421 | STORE_CACHE_DISABLE(1) | 496 | STORE_CACHE_DISABLE(1) |
@@ -443,8 +518,7 @@ __cpuinit cpuid4_cache_lookup_regs(int index, | |||
443 | 518 | ||
444 | if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) { | 519 | if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) { |
445 | amd_cpuid4(index, &eax, &ebx, &ecx); | 520 | amd_cpuid4(index, &eax, &ebx, &ecx); |
446 | if (boot_cpu_data.x86 >= 0x10) | 521 | amd_check_l3_disable(index, this_leaf); |
447 | amd_check_l3_disable(index, this_leaf); | ||
448 | } else { | 522 | } else { |
449 | cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx); | 523 | cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx); |
450 | } | 524 | } |
@@ -701,6 +775,7 @@ static void __cpuinit free_cache_attributes(unsigned int cpu) | |||
701 | for (i = 0; i < num_cache_leaves; i++) | 775 | for (i = 0; i < num_cache_leaves; i++) |
702 | cache_remove_shared_cpu_map(cpu, i); | 776 | cache_remove_shared_cpu_map(cpu, i); |
703 | 777 | ||
778 | kfree(per_cpu(ici_cpuid4_info, cpu)->l3); | ||
704 | kfree(per_cpu(ici_cpuid4_info, cpu)); | 779 | kfree(per_cpu(ici_cpuid4_info, cpu)); |
705 | per_cpu(ici_cpuid4_info, cpu) = NULL; | 780 | per_cpu(ici_cpuid4_info, cpu) = NULL; |
706 | } | 781 | } |
@@ -985,7 +1060,7 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev) | |||
985 | 1060 | ||
986 | this_leaf = CPUID4_INFO_IDX(cpu, i); | 1061 | this_leaf = CPUID4_INFO_IDX(cpu, i); |
987 | 1062 | ||
988 | if (this_leaf->can_disable) | 1063 | if (this_leaf->l3 && this_leaf->l3->can_disable) |
989 | ktype_cache.default_attrs = default_l3_attrs; | 1064 | ktype_cache.default_attrs = default_l3_attrs; |
990 | else | 1065 | else |
991 | ktype_cache.default_attrs = default_attrs; | 1066 | ktype_cache.default_attrs = default_attrs; |
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 8a6f0afa767e..7a355ddcc64b 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c | |||
@@ -539,7 +539,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) | |||
539 | struct mce m; | 539 | struct mce m; |
540 | int i; | 540 | int i; |
541 | 541 | ||
542 | __get_cpu_var(mce_poll_count)++; | 542 | percpu_inc(mce_poll_count); |
543 | 543 | ||
544 | mce_setup(&m); | 544 | mce_setup(&m); |
545 | 545 | ||
@@ -934,7 +934,7 @@ void do_machine_check(struct pt_regs *regs, long error_code) | |||
934 | 934 | ||
935 | atomic_inc(&mce_entry); | 935 | atomic_inc(&mce_entry); |
936 | 936 | ||
937 | __get_cpu_var(mce_exception_count)++; | 937 | percpu_inc(mce_exception_count); |
938 | 938 | ||
939 | if (notify_die(DIE_NMI, "machine check", regs, error_code, | 939 | if (notify_die(DIE_NMI, "machine check", regs, error_code, |
940 | 18, SIGKILL) == NOTIFY_STOP) | 940 | 18, SIGKILL) == NOTIFY_STOP) |
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c new file mode 100644 index 000000000000..16f41bbe46b6 --- /dev/null +++ b/arch/x86/kernel/cpu/mshyperv.c | |||
@@ -0,0 +1,55 @@ | |||
1 | /* | ||
2 | * HyperV Detection code. | ||
3 | * | ||
4 | * Copyright (C) 2010, Novell, Inc. | ||
5 | * Author : K. Y. Srinivasan <ksrinivasan@novell.com> | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or modify | ||
8 | * it under the terms of the GNU General Public License as published by | ||
9 | * the Free Software Foundation; version 2 of the License. | ||
10 | * | ||
11 | */ | ||
12 | |||
13 | #include <linux/types.h> | ||
14 | #include <linux/module.h> | ||
15 | #include <asm/processor.h> | ||
16 | #include <asm/hypervisor.h> | ||
17 | #include <asm/hyperv.h> | ||
18 | #include <asm/mshyperv.h> | ||
19 | |||
20 | struct ms_hyperv_info ms_hyperv; | ||
21 | |||
22 | static bool __init ms_hyperv_platform(void) | ||
23 | { | ||
24 | u32 eax; | ||
25 | u32 hyp_signature[3]; | ||
26 | |||
27 | if (!boot_cpu_has(X86_FEATURE_HYPERVISOR)) | ||
28 | return false; | ||
29 | |||
30 | cpuid(HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS, | ||
31 | &eax, &hyp_signature[0], &hyp_signature[1], &hyp_signature[2]); | ||
32 | |||
33 | return eax >= HYPERV_CPUID_MIN && | ||
34 | eax <= HYPERV_CPUID_MAX && | ||
35 | !memcmp("Microsoft Hv", hyp_signature, 12); | ||
36 | } | ||
37 | |||
38 | static void __init ms_hyperv_init_platform(void) | ||
39 | { | ||
40 | /* | ||
41 | * Extract the features and hints | ||
42 | */ | ||
43 | ms_hyperv.features = cpuid_eax(HYPERV_CPUID_FEATURES); | ||
44 | ms_hyperv.hints = cpuid_eax(HYPERV_CPUID_ENLIGHTMENT_INFO); | ||
45 | |||
46 | printk(KERN_INFO "HyperV: features 0x%x, hints 0x%x\n", | ||
47 | ms_hyperv.features, ms_hyperv.hints); | ||
48 | } | ||
49 | |||
50 | const __refconst struct hypervisor_x86 x86_hyper_ms_hyperv = { | ||
51 | .name = "Microsoft HyperV", | ||
52 | .detect = ms_hyperv_platform, | ||
53 | .init_platform = ms_hyperv_init_platform, | ||
54 | }; | ||
55 | EXPORT_SYMBOL(x86_hyper_ms_hyperv); | ||
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index db5bdc8addf8..fd4db0db3708 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c | |||
@@ -31,46 +31,51 @@ | |||
31 | #include <asm/nmi.h> | 31 | #include <asm/nmi.h> |
32 | #include <asm/compat.h> | 32 | #include <asm/compat.h> |
33 | 33 | ||
34 | static u64 perf_event_mask __read_mostly; | 34 | #if 0 |
35 | #undef wrmsrl | ||
36 | #define wrmsrl(msr, val) \ | ||
37 | do { \ | ||
38 | trace_printk("wrmsrl(%lx, %lx)\n", (unsigned long)(msr),\ | ||
39 | (unsigned long)(val)); \ | ||
40 | native_write_msr((msr), (u32)((u64)(val)), \ | ||
41 | (u32)((u64)(val) >> 32)); \ | ||
42 | } while (0) | ||
43 | #endif | ||
35 | 44 | ||
36 | /* The maximal number of PEBS events: */ | 45 | /* |
37 | #define MAX_PEBS_EVENTS 4 | 46 | * best effort, GUP based copy_from_user() that assumes IRQ or NMI context |
47 | */ | ||
48 | static unsigned long | ||
49 | copy_from_user_nmi(void *to, const void __user *from, unsigned long n) | ||
50 | { | ||
51 | unsigned long offset, addr = (unsigned long)from; | ||
52 | int type = in_nmi() ? KM_NMI : KM_IRQ0; | ||
53 | unsigned long size, len = 0; | ||
54 | struct page *page; | ||
55 | void *map; | ||
56 | int ret; | ||
38 | 57 | ||
39 | /* The size of a BTS record in bytes: */ | 58 | do { |
40 | #define BTS_RECORD_SIZE 24 | 59 | ret = __get_user_pages_fast(addr, 1, 0, &page); |
60 | if (!ret) | ||
61 | break; | ||
41 | 62 | ||
42 | /* The size of a per-cpu BTS buffer in bytes: */ | 63 | offset = addr & (PAGE_SIZE - 1); |
43 | #define BTS_BUFFER_SIZE (BTS_RECORD_SIZE * 2048) | 64 | size = min(PAGE_SIZE - offset, n - len); |
44 | 65 | ||
45 | /* The BTS overflow threshold in bytes from the end of the buffer: */ | 66 | map = kmap_atomic(page, type); |
46 | #define BTS_OVFL_TH (BTS_RECORD_SIZE * 128) | 67 | memcpy(to, map+offset, size); |
68 | kunmap_atomic(map, type); | ||
69 | put_page(page); | ||
47 | 70 | ||
71 | len += size; | ||
72 | to += size; | ||
73 | addr += size; | ||
48 | 74 | ||
49 | /* | 75 | } while (len < n); |
50 | * Bits in the debugctlmsr controlling branch tracing. | ||
51 | */ | ||
52 | #define X86_DEBUGCTL_TR (1 << 6) | ||
53 | #define X86_DEBUGCTL_BTS (1 << 7) | ||
54 | #define X86_DEBUGCTL_BTINT (1 << 8) | ||
55 | #define X86_DEBUGCTL_BTS_OFF_OS (1 << 9) | ||
56 | #define X86_DEBUGCTL_BTS_OFF_USR (1 << 10) | ||
57 | 76 | ||
58 | /* | 77 | return len; |
59 | * A debug store configuration. | 78 | } |
60 | * | ||
61 | * We only support architectures that use 64bit fields. | ||
62 | */ | ||
63 | struct debug_store { | ||
64 | u64 bts_buffer_base; | ||
65 | u64 bts_index; | ||
66 | u64 bts_absolute_maximum; | ||
67 | u64 bts_interrupt_threshold; | ||
68 | u64 pebs_buffer_base; | ||
69 | u64 pebs_index; | ||
70 | u64 pebs_absolute_maximum; | ||
71 | u64 pebs_interrupt_threshold; | ||
72 | u64 pebs_event_reset[MAX_PEBS_EVENTS]; | ||
73 | }; | ||
74 | 79 | ||
75 | struct event_constraint { | 80 | struct event_constraint { |
76 | union { | 81 | union { |
@@ -89,18 +94,41 @@ struct amd_nb { | |||
89 | struct event_constraint event_constraints[X86_PMC_IDX_MAX]; | 94 | struct event_constraint event_constraints[X86_PMC_IDX_MAX]; |
90 | }; | 95 | }; |
91 | 96 | ||
97 | #define MAX_LBR_ENTRIES 16 | ||
98 | |||
92 | struct cpu_hw_events { | 99 | struct cpu_hw_events { |
100 | /* | ||
101 | * Generic x86 PMC bits | ||
102 | */ | ||
93 | struct perf_event *events[X86_PMC_IDX_MAX]; /* in counter order */ | 103 | struct perf_event *events[X86_PMC_IDX_MAX]; /* in counter order */ |
94 | unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; | 104 | unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; |
95 | unsigned long interrupts; | ||
96 | int enabled; | 105 | int enabled; |
97 | struct debug_store *ds; | ||
98 | 106 | ||
99 | int n_events; | 107 | int n_events; |
100 | int n_added; | 108 | int n_added; |
101 | int assign[X86_PMC_IDX_MAX]; /* event to counter assignment */ | 109 | int assign[X86_PMC_IDX_MAX]; /* event to counter assignment */ |
102 | u64 tags[X86_PMC_IDX_MAX]; | 110 | u64 tags[X86_PMC_IDX_MAX]; |
103 | struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */ | 111 | struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */ |
112 | |||
113 | unsigned int group_flag; | ||
114 | |||
115 | /* | ||
116 | * Intel DebugStore bits | ||
117 | */ | ||
118 | struct debug_store *ds; | ||
119 | u64 pebs_enabled; | ||
120 | |||
121 | /* | ||
122 | * Intel LBR bits | ||
123 | */ | ||
124 | int lbr_users; | ||
125 | void *lbr_context; | ||
126 | struct perf_branch_stack lbr_stack; | ||
127 | struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES]; | ||
128 | |||
129 | /* | ||
130 | * AMD specific bits | ||
131 | */ | ||
104 | struct amd_nb *amd_nb; | 132 | struct amd_nb *amd_nb; |
105 | }; | 133 | }; |
106 | 134 | ||
@@ -114,44 +142,75 @@ struct cpu_hw_events { | |||
114 | #define EVENT_CONSTRAINT(c, n, m) \ | 142 | #define EVENT_CONSTRAINT(c, n, m) \ |
115 | __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n)) | 143 | __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n)) |
116 | 144 | ||
145 | /* | ||
146 | * Constraint on the Event code. | ||
147 | */ | ||
117 | #define INTEL_EVENT_CONSTRAINT(c, n) \ | 148 | #define INTEL_EVENT_CONSTRAINT(c, n) \ |
118 | EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVTSEL_MASK) | 149 | EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT) |
119 | 150 | ||
151 | /* | ||
152 | * Constraint on the Event code + UMask + fixed-mask | ||
153 | * | ||
154 | * filter mask to validate fixed counter events. | ||
155 | * the following filters disqualify for fixed counters: | ||
156 | * - inv | ||
157 | * - edge | ||
158 | * - cnt-mask | ||
159 | * The other filters are supported by fixed counters. | ||
160 | * The any-thread option is supported starting with v3. | ||
161 | */ | ||
120 | #define FIXED_EVENT_CONSTRAINT(c, n) \ | 162 | #define FIXED_EVENT_CONSTRAINT(c, n) \ |
121 | EVENT_CONSTRAINT(c, (1ULL << (32+n)), INTEL_ARCH_FIXED_MASK) | 163 | EVENT_CONSTRAINT(c, (1ULL << (32+n)), X86_RAW_EVENT_MASK) |
164 | |||
165 | /* | ||
166 | * Constraint on the Event code + UMask | ||
167 | */ | ||
168 | #define PEBS_EVENT_CONSTRAINT(c, n) \ | ||
169 | EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK) | ||
122 | 170 | ||
123 | #define EVENT_CONSTRAINT_END \ | 171 | #define EVENT_CONSTRAINT_END \ |
124 | EVENT_CONSTRAINT(0, 0, 0) | 172 | EVENT_CONSTRAINT(0, 0, 0) |
125 | 173 | ||
126 | #define for_each_event_constraint(e, c) \ | 174 | #define for_each_event_constraint(e, c) \ |
127 | for ((e) = (c); (e)->cmask; (e)++) | 175 | for ((e) = (c); (e)->weight; (e)++) |
176 | |||
177 | union perf_capabilities { | ||
178 | struct { | ||
179 | u64 lbr_format : 6; | ||
180 | u64 pebs_trap : 1; | ||
181 | u64 pebs_arch_reg : 1; | ||
182 | u64 pebs_format : 4; | ||
183 | u64 smm_freeze : 1; | ||
184 | }; | ||
185 | u64 capabilities; | ||
186 | }; | ||
128 | 187 | ||
129 | /* | 188 | /* |
130 | * struct x86_pmu - generic x86 pmu | 189 | * struct x86_pmu - generic x86 pmu |
131 | */ | 190 | */ |
132 | struct x86_pmu { | 191 | struct x86_pmu { |
192 | /* | ||
193 | * Generic x86 PMC bits | ||
194 | */ | ||
133 | const char *name; | 195 | const char *name; |
134 | int version; | 196 | int version; |
135 | int (*handle_irq)(struct pt_regs *); | 197 | int (*handle_irq)(struct pt_regs *); |
136 | void (*disable_all)(void); | 198 | void (*disable_all)(void); |
137 | void (*enable_all)(void); | 199 | void (*enable_all)(int added); |
138 | void (*enable)(struct perf_event *); | 200 | void (*enable)(struct perf_event *); |
139 | void (*disable)(struct perf_event *); | 201 | void (*disable)(struct perf_event *); |
202 | int (*hw_config)(struct perf_event *event); | ||
203 | int (*schedule_events)(struct cpu_hw_events *cpuc, int n, int *assign); | ||
140 | unsigned eventsel; | 204 | unsigned eventsel; |
141 | unsigned perfctr; | 205 | unsigned perfctr; |
142 | u64 (*event_map)(int); | 206 | u64 (*event_map)(int); |
143 | u64 (*raw_event)(u64); | ||
144 | int max_events; | 207 | int max_events; |
145 | int num_events; | 208 | int num_counters; |
146 | int num_events_fixed; | 209 | int num_counters_fixed; |
147 | int event_bits; | 210 | int cntval_bits; |
148 | u64 event_mask; | 211 | u64 cntval_mask; |
149 | int apic; | 212 | int apic; |
150 | u64 max_period; | 213 | u64 max_period; |
151 | u64 intel_ctrl; | ||
152 | void (*enable_bts)(u64 config); | ||
153 | void (*disable_bts)(void); | ||
154 | |||
155 | struct event_constraint * | 214 | struct event_constraint * |
156 | (*get_event_constraints)(struct cpu_hw_events *cpuc, | 215 | (*get_event_constraints)(struct cpu_hw_events *cpuc, |
157 | struct perf_event *event); | 216 | struct perf_event *event); |
@@ -159,11 +218,32 @@ struct x86_pmu { | |||
159 | void (*put_event_constraints)(struct cpu_hw_events *cpuc, | 218 | void (*put_event_constraints)(struct cpu_hw_events *cpuc, |
160 | struct perf_event *event); | 219 | struct perf_event *event); |
161 | struct event_constraint *event_constraints; | 220 | struct event_constraint *event_constraints; |
221 | void (*quirks)(void); | ||
162 | 222 | ||
163 | int (*cpu_prepare)(int cpu); | 223 | int (*cpu_prepare)(int cpu); |
164 | void (*cpu_starting)(int cpu); | 224 | void (*cpu_starting)(int cpu); |
165 | void (*cpu_dying)(int cpu); | 225 | void (*cpu_dying)(int cpu); |
166 | void (*cpu_dead)(int cpu); | 226 | void (*cpu_dead)(int cpu); |
227 | |||
228 | /* | ||
229 | * Intel Arch Perfmon v2+ | ||
230 | */ | ||
231 | u64 intel_ctrl; | ||
232 | union perf_capabilities intel_cap; | ||
233 | |||
234 | /* | ||
235 | * Intel DebugStore bits | ||
236 | */ | ||
237 | int bts, pebs; | ||
238 | int pebs_record_size; | ||
239 | void (*drain_pebs)(struct pt_regs *regs); | ||
240 | struct event_constraint *pebs_constraints; | ||
241 | |||
242 | /* | ||
243 | * Intel LBR | ||
244 | */ | ||
245 | unsigned long lbr_tos, lbr_from, lbr_to; /* MSR base regs */ | ||
246 | int lbr_nr; /* hardware stack size */ | ||
167 | }; | 247 | }; |
168 | 248 | ||
169 | static struct x86_pmu x86_pmu __read_mostly; | 249 | static struct x86_pmu x86_pmu __read_mostly; |
@@ -198,7 +278,7 @@ static u64 | |||
198 | x86_perf_event_update(struct perf_event *event) | 278 | x86_perf_event_update(struct perf_event *event) |
199 | { | 279 | { |
200 | struct hw_perf_event *hwc = &event->hw; | 280 | struct hw_perf_event *hwc = &event->hw; |
201 | int shift = 64 - x86_pmu.event_bits; | 281 | int shift = 64 - x86_pmu.cntval_bits; |
202 | u64 prev_raw_count, new_raw_count; | 282 | u64 prev_raw_count, new_raw_count; |
203 | int idx = hwc->idx; | 283 | int idx = hwc->idx; |
204 | s64 delta; | 284 | s64 delta; |
@@ -241,33 +321,32 @@ again: | |||
241 | static atomic_t active_events; | 321 | static atomic_t active_events; |
242 | static DEFINE_MUTEX(pmc_reserve_mutex); | 322 | static DEFINE_MUTEX(pmc_reserve_mutex); |
243 | 323 | ||
324 | #ifdef CONFIG_X86_LOCAL_APIC | ||
325 | |||
244 | static bool reserve_pmc_hardware(void) | 326 | static bool reserve_pmc_hardware(void) |
245 | { | 327 | { |
246 | #ifdef CONFIG_X86_LOCAL_APIC | ||
247 | int i; | 328 | int i; |
248 | 329 | ||
249 | if (nmi_watchdog == NMI_LOCAL_APIC) | 330 | if (nmi_watchdog == NMI_LOCAL_APIC) |
250 | disable_lapic_nmi_watchdog(); | 331 | disable_lapic_nmi_watchdog(); |
251 | 332 | ||
252 | for (i = 0; i < x86_pmu.num_events; i++) { | 333 | for (i = 0; i < x86_pmu.num_counters; i++) { |
253 | if (!reserve_perfctr_nmi(x86_pmu.perfctr + i)) | 334 | if (!reserve_perfctr_nmi(x86_pmu.perfctr + i)) |
254 | goto perfctr_fail; | 335 | goto perfctr_fail; |
255 | } | 336 | } |
256 | 337 | ||
257 | for (i = 0; i < x86_pmu.num_events; i++) { | 338 | for (i = 0; i < x86_pmu.num_counters; i++) { |
258 | if (!reserve_evntsel_nmi(x86_pmu.eventsel + i)) | 339 | if (!reserve_evntsel_nmi(x86_pmu.eventsel + i)) |
259 | goto eventsel_fail; | 340 | goto eventsel_fail; |
260 | } | 341 | } |
261 | #endif | ||
262 | 342 | ||
263 | return true; | 343 | return true; |
264 | 344 | ||
265 | #ifdef CONFIG_X86_LOCAL_APIC | ||
266 | eventsel_fail: | 345 | eventsel_fail: |
267 | for (i--; i >= 0; i--) | 346 | for (i--; i >= 0; i--) |
268 | release_evntsel_nmi(x86_pmu.eventsel + i); | 347 | release_evntsel_nmi(x86_pmu.eventsel + i); |
269 | 348 | ||
270 | i = x86_pmu.num_events; | 349 | i = x86_pmu.num_counters; |
271 | 350 | ||
272 | perfctr_fail: | 351 | perfctr_fail: |
273 | for (i--; i >= 0; i--) | 352 | for (i--; i >= 0; i--) |
@@ -277,128 +356,36 @@ perfctr_fail: | |||
277 | enable_lapic_nmi_watchdog(); | 356 | enable_lapic_nmi_watchdog(); |
278 | 357 | ||
279 | return false; | 358 | return false; |
280 | #endif | ||
281 | } | 359 | } |
282 | 360 | ||
283 | static void release_pmc_hardware(void) | 361 | static void release_pmc_hardware(void) |
284 | { | 362 | { |
285 | #ifdef CONFIG_X86_LOCAL_APIC | ||
286 | int i; | 363 | int i; |
287 | 364 | ||
288 | for (i = 0; i < x86_pmu.num_events; i++) { | 365 | for (i = 0; i < x86_pmu.num_counters; i++) { |
289 | release_perfctr_nmi(x86_pmu.perfctr + i); | 366 | release_perfctr_nmi(x86_pmu.perfctr + i); |
290 | release_evntsel_nmi(x86_pmu.eventsel + i); | 367 | release_evntsel_nmi(x86_pmu.eventsel + i); |
291 | } | 368 | } |
292 | 369 | ||
293 | if (nmi_watchdog == NMI_LOCAL_APIC) | 370 | if (nmi_watchdog == NMI_LOCAL_APIC) |
294 | enable_lapic_nmi_watchdog(); | 371 | enable_lapic_nmi_watchdog(); |
295 | #endif | ||
296 | } | ||
297 | |||
298 | static inline bool bts_available(void) | ||
299 | { | ||
300 | return x86_pmu.enable_bts != NULL; | ||
301 | } | 372 | } |
302 | 373 | ||
303 | static void init_debug_store_on_cpu(int cpu) | 374 | #else |
304 | { | ||
305 | struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; | ||
306 | |||
307 | if (!ds) | ||
308 | return; | ||
309 | |||
310 | wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, | ||
311 | (u32)((u64)(unsigned long)ds), | ||
312 | (u32)((u64)(unsigned long)ds >> 32)); | ||
313 | } | ||
314 | |||
315 | static void fini_debug_store_on_cpu(int cpu) | ||
316 | { | ||
317 | if (!per_cpu(cpu_hw_events, cpu).ds) | ||
318 | return; | ||
319 | |||
320 | wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0); | ||
321 | } | ||
322 | |||
323 | static void release_bts_hardware(void) | ||
324 | { | ||
325 | int cpu; | ||
326 | |||
327 | if (!bts_available()) | ||
328 | return; | ||
329 | |||
330 | get_online_cpus(); | ||
331 | |||
332 | for_each_online_cpu(cpu) | ||
333 | fini_debug_store_on_cpu(cpu); | ||
334 | |||
335 | for_each_possible_cpu(cpu) { | ||
336 | struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; | ||
337 | |||
338 | if (!ds) | ||
339 | continue; | ||
340 | |||
341 | per_cpu(cpu_hw_events, cpu).ds = NULL; | ||
342 | |||
343 | kfree((void *)(unsigned long)ds->bts_buffer_base); | ||
344 | kfree(ds); | ||
345 | } | ||
346 | |||
347 | put_online_cpus(); | ||
348 | } | ||
349 | |||
350 | static int reserve_bts_hardware(void) | ||
351 | { | ||
352 | int cpu, err = 0; | ||
353 | |||
354 | if (!bts_available()) | ||
355 | return 0; | ||
356 | |||
357 | get_online_cpus(); | ||
358 | |||
359 | for_each_possible_cpu(cpu) { | ||
360 | struct debug_store *ds; | ||
361 | void *buffer; | ||
362 | |||
363 | err = -ENOMEM; | ||
364 | buffer = kzalloc(BTS_BUFFER_SIZE, GFP_KERNEL); | ||
365 | if (unlikely(!buffer)) | ||
366 | break; | ||
367 | |||
368 | ds = kzalloc(sizeof(*ds), GFP_KERNEL); | ||
369 | if (unlikely(!ds)) { | ||
370 | kfree(buffer); | ||
371 | break; | ||
372 | } | ||
373 | |||
374 | ds->bts_buffer_base = (u64)(unsigned long)buffer; | ||
375 | ds->bts_index = ds->bts_buffer_base; | ||
376 | ds->bts_absolute_maximum = | ||
377 | ds->bts_buffer_base + BTS_BUFFER_SIZE; | ||
378 | ds->bts_interrupt_threshold = | ||
379 | ds->bts_absolute_maximum - BTS_OVFL_TH; | ||
380 | |||
381 | per_cpu(cpu_hw_events, cpu).ds = ds; | ||
382 | err = 0; | ||
383 | } | ||
384 | 375 | ||
385 | if (err) | 376 | static bool reserve_pmc_hardware(void) { return true; } |
386 | release_bts_hardware(); | 377 | static void release_pmc_hardware(void) {} |
387 | else { | ||
388 | for_each_online_cpu(cpu) | ||
389 | init_debug_store_on_cpu(cpu); | ||
390 | } | ||
391 | 378 | ||
392 | put_online_cpus(); | 379 | #endif |
393 | 380 | ||
394 | return err; | 381 | static int reserve_ds_buffers(void); |
395 | } | 382 | static void release_ds_buffers(void); |
396 | 383 | ||
397 | static void hw_perf_event_destroy(struct perf_event *event) | 384 | static void hw_perf_event_destroy(struct perf_event *event) |
398 | { | 385 | { |
399 | if (atomic_dec_and_mutex_lock(&active_events, &pmc_reserve_mutex)) { | 386 | if (atomic_dec_and_mutex_lock(&active_events, &pmc_reserve_mutex)) { |
400 | release_pmc_hardware(); | 387 | release_pmc_hardware(); |
401 | release_bts_hardware(); | 388 | release_ds_buffers(); |
402 | mutex_unlock(&pmc_reserve_mutex); | 389 | mutex_unlock(&pmc_reserve_mutex); |
403 | } | 390 | } |
404 | } | 391 | } |
@@ -441,54 +428,11 @@ set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event_attr *attr) | |||
441 | return 0; | 428 | return 0; |
442 | } | 429 | } |
443 | 430 | ||
444 | /* | 431 | static int x86_setup_perfctr(struct perf_event *event) |
445 | * Setup the hardware configuration for a given attr_type | ||
446 | */ | ||
447 | static int __hw_perf_event_init(struct perf_event *event) | ||
448 | { | 432 | { |
449 | struct perf_event_attr *attr = &event->attr; | 433 | struct perf_event_attr *attr = &event->attr; |
450 | struct hw_perf_event *hwc = &event->hw; | 434 | struct hw_perf_event *hwc = &event->hw; |
451 | u64 config; | 435 | u64 config; |
452 | int err; | ||
453 | |||
454 | if (!x86_pmu_initialized()) | ||
455 | return -ENODEV; | ||
456 | |||
457 | err = 0; | ||
458 | if (!atomic_inc_not_zero(&active_events)) { | ||
459 | mutex_lock(&pmc_reserve_mutex); | ||
460 | if (atomic_read(&active_events) == 0) { | ||
461 | if (!reserve_pmc_hardware()) | ||
462 | err = -EBUSY; | ||
463 | else | ||
464 | err = reserve_bts_hardware(); | ||
465 | } | ||
466 | if (!err) | ||
467 | atomic_inc(&active_events); | ||
468 | mutex_unlock(&pmc_reserve_mutex); | ||
469 | } | ||
470 | if (err) | ||
471 | return err; | ||
472 | |||
473 | event->destroy = hw_perf_event_destroy; | ||
474 | |||
475 | /* | ||
476 | * Generate PMC IRQs: | ||
477 | * (keep 'enabled' bit clear for now) | ||
478 | */ | ||
479 | hwc->config = ARCH_PERFMON_EVENTSEL_INT; | ||
480 | |||
481 | hwc->idx = -1; | ||
482 | hwc->last_cpu = -1; | ||
483 | hwc->last_tag = ~0ULL; | ||
484 | |||
485 | /* | ||
486 | * Count user and OS events unless requested not to. | ||
487 | */ | ||
488 | if (!attr->exclude_user) | ||
489 | hwc->config |= ARCH_PERFMON_EVENTSEL_USR; | ||
490 | if (!attr->exclude_kernel) | ||
491 | hwc->config |= ARCH_PERFMON_EVENTSEL_OS; | ||
492 | 436 | ||
493 | if (!hwc->sample_period) { | 437 | if (!hwc->sample_period) { |
494 | hwc->sample_period = x86_pmu.max_period; | 438 | hwc->sample_period = x86_pmu.max_period; |
@@ -505,16 +449,8 @@ static int __hw_perf_event_init(struct perf_event *event) | |||
505 | return -EOPNOTSUPP; | 449 | return -EOPNOTSUPP; |
506 | } | 450 | } |
507 | 451 | ||
508 | /* | 452 | if (attr->type == PERF_TYPE_RAW) |
509 | * Raw hw_event type provide the config in the hw_event structure | ||
510 | */ | ||
511 | if (attr->type == PERF_TYPE_RAW) { | ||
512 | hwc->config |= x86_pmu.raw_event(attr->config); | ||
513 | if ((hwc->config & ARCH_PERFMON_EVENTSEL_ANY) && | ||
514 | perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN)) | ||
515 | return -EACCES; | ||
516 | return 0; | 453 | return 0; |
517 | } | ||
518 | 454 | ||
519 | if (attr->type == PERF_TYPE_HW_CACHE) | 455 | if (attr->type == PERF_TYPE_HW_CACHE) |
520 | return set_ext_hw_attr(hwc, attr); | 456 | return set_ext_hw_attr(hwc, attr); |
@@ -539,11 +475,11 @@ static int __hw_perf_event_init(struct perf_event *event) | |||
539 | if ((attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) && | 475 | if ((attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) && |
540 | (hwc->sample_period == 1)) { | 476 | (hwc->sample_period == 1)) { |
541 | /* BTS is not supported by this architecture. */ | 477 | /* BTS is not supported by this architecture. */ |
542 | if (!bts_available()) | 478 | if (!x86_pmu.bts) |
543 | return -EOPNOTSUPP; | 479 | return -EOPNOTSUPP; |
544 | 480 | ||
545 | /* BTS is currently only allowed for user-mode. */ | 481 | /* BTS is currently only allowed for user-mode. */ |
546 | if (hwc->config & ARCH_PERFMON_EVENTSEL_OS) | 482 | if (!attr->exclude_kernel) |
547 | return -EOPNOTSUPP; | 483 | return -EOPNOTSUPP; |
548 | } | 484 | } |
549 | 485 | ||
@@ -552,12 +488,87 @@ static int __hw_perf_event_init(struct perf_event *event) | |||
552 | return 0; | 488 | return 0; |
553 | } | 489 | } |
554 | 490 | ||
491 | static int x86_pmu_hw_config(struct perf_event *event) | ||
492 | { | ||
493 | if (event->attr.precise_ip) { | ||
494 | int precise = 0; | ||
495 | |||
496 | /* Support for constant skid */ | ||
497 | if (x86_pmu.pebs) | ||
498 | precise++; | ||
499 | |||
500 | /* Support for IP fixup */ | ||
501 | if (x86_pmu.lbr_nr) | ||
502 | precise++; | ||
503 | |||
504 | if (event->attr.precise_ip > precise) | ||
505 | return -EOPNOTSUPP; | ||
506 | } | ||
507 | |||
508 | /* | ||
509 | * Generate PMC IRQs: | ||
510 | * (keep 'enabled' bit clear for now) | ||
511 | */ | ||
512 | event->hw.config = ARCH_PERFMON_EVENTSEL_INT; | ||
513 | |||
514 | /* | ||
515 | * Count user and OS events unless requested not to | ||
516 | */ | ||
517 | if (!event->attr.exclude_user) | ||
518 | event->hw.config |= ARCH_PERFMON_EVENTSEL_USR; | ||
519 | if (!event->attr.exclude_kernel) | ||
520 | event->hw.config |= ARCH_PERFMON_EVENTSEL_OS; | ||
521 | |||
522 | if (event->attr.type == PERF_TYPE_RAW) | ||
523 | event->hw.config |= event->attr.config & X86_RAW_EVENT_MASK; | ||
524 | |||
525 | return x86_setup_perfctr(event); | ||
526 | } | ||
527 | |||
528 | /* | ||
529 | * Setup the hardware configuration for a given attr_type | ||
530 | */ | ||
531 | static int __hw_perf_event_init(struct perf_event *event) | ||
532 | { | ||
533 | int err; | ||
534 | |||
535 | if (!x86_pmu_initialized()) | ||
536 | return -ENODEV; | ||
537 | |||
538 | err = 0; | ||
539 | if (!atomic_inc_not_zero(&active_events)) { | ||
540 | mutex_lock(&pmc_reserve_mutex); | ||
541 | if (atomic_read(&active_events) == 0) { | ||
542 | if (!reserve_pmc_hardware()) | ||
543 | err = -EBUSY; | ||
544 | else { | ||
545 | err = reserve_ds_buffers(); | ||
546 | if (err) | ||
547 | release_pmc_hardware(); | ||
548 | } | ||
549 | } | ||
550 | if (!err) | ||
551 | atomic_inc(&active_events); | ||
552 | mutex_unlock(&pmc_reserve_mutex); | ||
553 | } | ||
554 | if (err) | ||
555 | return err; | ||
556 | |||
557 | event->destroy = hw_perf_event_destroy; | ||
558 | |||
559 | event->hw.idx = -1; | ||
560 | event->hw.last_cpu = -1; | ||
561 | event->hw.last_tag = ~0ULL; | ||
562 | |||
563 | return x86_pmu.hw_config(event); | ||
564 | } | ||
565 | |||
555 | static void x86_pmu_disable_all(void) | 566 | static void x86_pmu_disable_all(void) |
556 | { | 567 | { |
557 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 568 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
558 | int idx; | 569 | int idx; |
559 | 570 | ||
560 | for (idx = 0; idx < x86_pmu.num_events; idx++) { | 571 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { |
561 | u64 val; | 572 | u64 val; |
562 | 573 | ||
563 | if (!test_bit(idx, cpuc->active_mask)) | 574 | if (!test_bit(idx, cpuc->active_mask)) |
@@ -587,12 +598,12 @@ void hw_perf_disable(void) | |||
587 | x86_pmu.disable_all(); | 598 | x86_pmu.disable_all(); |
588 | } | 599 | } |
589 | 600 | ||
590 | static void x86_pmu_enable_all(void) | 601 | static void x86_pmu_enable_all(int added) |
591 | { | 602 | { |
592 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 603 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
593 | int idx; | 604 | int idx; |
594 | 605 | ||
595 | for (idx = 0; idx < x86_pmu.num_events; idx++) { | 606 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { |
596 | struct perf_event *event = cpuc->events[idx]; | 607 | struct perf_event *event = cpuc->events[idx]; |
597 | u64 val; | 608 | u64 val; |
598 | 609 | ||
@@ -667,14 +678,14 @@ static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) | |||
667 | * assign events to counters starting with most | 678 | * assign events to counters starting with most |
668 | * constrained events. | 679 | * constrained events. |
669 | */ | 680 | */ |
670 | wmax = x86_pmu.num_events; | 681 | wmax = x86_pmu.num_counters; |
671 | 682 | ||
672 | /* | 683 | /* |
673 | * when fixed event counters are present, | 684 | * when fixed event counters are present, |
674 | * wmax is incremented by 1 to account | 685 | * wmax is incremented by 1 to account |
675 | * for one more choice | 686 | * for one more choice |
676 | */ | 687 | */ |
677 | if (x86_pmu.num_events_fixed) | 688 | if (x86_pmu.num_counters_fixed) |
678 | wmax++; | 689 | wmax++; |
679 | 690 | ||
680 | for (w = 1, num = n; num && w <= wmax; w++) { | 691 | for (w = 1, num = n; num && w <= wmax; w++) { |
@@ -724,7 +735,7 @@ static int collect_events(struct cpu_hw_events *cpuc, struct perf_event *leader, | |||
724 | struct perf_event *event; | 735 | struct perf_event *event; |
725 | int n, max_count; | 736 | int n, max_count; |
726 | 737 | ||
727 | max_count = x86_pmu.num_events + x86_pmu.num_events_fixed; | 738 | max_count = x86_pmu.num_counters + x86_pmu.num_counters_fixed; |
728 | 739 | ||
729 | /* current number of events already accepted */ | 740 | /* current number of events already accepted */ |
730 | n = cpuc->n_events; | 741 | n = cpuc->n_events; |
@@ -795,7 +806,7 @@ void hw_perf_enable(void) | |||
795 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 806 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
796 | struct perf_event *event; | 807 | struct perf_event *event; |
797 | struct hw_perf_event *hwc; | 808 | struct hw_perf_event *hwc; |
798 | int i; | 809 | int i, added = cpuc->n_added; |
799 | 810 | ||
800 | if (!x86_pmu_initialized()) | 811 | if (!x86_pmu_initialized()) |
801 | return; | 812 | return; |
@@ -847,19 +858,20 @@ void hw_perf_enable(void) | |||
847 | cpuc->enabled = 1; | 858 | cpuc->enabled = 1; |
848 | barrier(); | 859 | barrier(); |
849 | 860 | ||
850 | x86_pmu.enable_all(); | 861 | x86_pmu.enable_all(added); |
851 | } | 862 | } |
852 | 863 | ||
853 | static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc) | 864 | static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc, |
865 | u64 enable_mask) | ||
854 | { | 866 | { |
855 | (void)checking_wrmsrl(hwc->config_base + hwc->idx, | 867 | wrmsrl(hwc->config_base + hwc->idx, hwc->config | enable_mask); |
856 | hwc->config | ARCH_PERFMON_EVENTSEL_ENABLE); | ||
857 | } | 868 | } |
858 | 869 | ||
859 | static inline void x86_pmu_disable_event(struct perf_event *event) | 870 | static inline void x86_pmu_disable_event(struct perf_event *event) |
860 | { | 871 | { |
861 | struct hw_perf_event *hwc = &event->hw; | 872 | struct hw_perf_event *hwc = &event->hw; |
862 | (void)checking_wrmsrl(hwc->config_base + hwc->idx, hwc->config); | 873 | |
874 | wrmsrl(hwc->config_base + hwc->idx, hwc->config); | ||
863 | } | 875 | } |
864 | 876 | ||
865 | static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left); | 877 | static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left); |
@@ -874,7 +886,7 @@ x86_perf_event_set_period(struct perf_event *event) | |||
874 | struct hw_perf_event *hwc = &event->hw; | 886 | struct hw_perf_event *hwc = &event->hw; |
875 | s64 left = atomic64_read(&hwc->period_left); | 887 | s64 left = atomic64_read(&hwc->period_left); |
876 | s64 period = hwc->sample_period; | 888 | s64 period = hwc->sample_period; |
877 | int err, ret = 0, idx = hwc->idx; | 889 | int ret = 0, idx = hwc->idx; |
878 | 890 | ||
879 | if (idx == X86_PMC_IDX_FIXED_BTS) | 891 | if (idx == X86_PMC_IDX_FIXED_BTS) |
880 | return 0; | 892 | return 0; |
@@ -912,8 +924,8 @@ x86_perf_event_set_period(struct perf_event *event) | |||
912 | */ | 924 | */ |
913 | atomic64_set(&hwc->prev_count, (u64)-left); | 925 | atomic64_set(&hwc->prev_count, (u64)-left); |
914 | 926 | ||
915 | err = checking_wrmsrl(hwc->event_base + idx, | 927 | wrmsrl(hwc->event_base + idx, |
916 | (u64)(-left) & x86_pmu.event_mask); | 928 | (u64)(-left) & x86_pmu.cntval_mask); |
917 | 929 | ||
918 | perf_event_update_userpage(event); | 930 | perf_event_update_userpage(event); |
919 | 931 | ||
@@ -924,7 +936,8 @@ static void x86_pmu_enable_event(struct perf_event *event) | |||
924 | { | 936 | { |
925 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 937 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
926 | if (cpuc->enabled) | 938 | if (cpuc->enabled) |
927 | __x86_pmu_enable_event(&event->hw); | 939 | __x86_pmu_enable_event(&event->hw, |
940 | ARCH_PERFMON_EVENTSEL_ENABLE); | ||
928 | } | 941 | } |
929 | 942 | ||
930 | /* | 943 | /* |
@@ -950,7 +963,15 @@ static int x86_pmu_enable(struct perf_event *event) | |||
950 | if (n < 0) | 963 | if (n < 0) |
951 | return n; | 964 | return n; |
952 | 965 | ||
953 | ret = x86_schedule_events(cpuc, n, assign); | 966 | /* |
967 | * If group events scheduling transaction was started, | ||
968 | * skip the schedulability test here, it will be peformed | ||
969 | * at commit time(->commit_txn) as a whole | ||
970 | */ | ||
971 | if (cpuc->group_flag & PERF_EVENT_TXN_STARTED) | ||
972 | goto out; | ||
973 | |||
974 | ret = x86_pmu.schedule_events(cpuc, n, assign); | ||
954 | if (ret) | 975 | if (ret) |
955 | return ret; | 976 | return ret; |
956 | /* | 977 | /* |
@@ -959,6 +980,7 @@ static int x86_pmu_enable(struct perf_event *event) | |||
959 | */ | 980 | */ |
960 | memcpy(cpuc->assign, assign, n*sizeof(int)); | 981 | memcpy(cpuc->assign, assign, n*sizeof(int)); |
961 | 982 | ||
983 | out: | ||
962 | cpuc->n_events = n; | 984 | cpuc->n_events = n; |
963 | cpuc->n_added += n - n0; | 985 | cpuc->n_added += n - n0; |
964 | 986 | ||
@@ -991,11 +1013,12 @@ static void x86_pmu_unthrottle(struct perf_event *event) | |||
991 | void perf_event_print_debug(void) | 1013 | void perf_event_print_debug(void) |
992 | { | 1014 | { |
993 | u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed; | 1015 | u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed; |
1016 | u64 pebs; | ||
994 | struct cpu_hw_events *cpuc; | 1017 | struct cpu_hw_events *cpuc; |
995 | unsigned long flags; | 1018 | unsigned long flags; |
996 | int cpu, idx; | 1019 | int cpu, idx; |
997 | 1020 | ||
998 | if (!x86_pmu.num_events) | 1021 | if (!x86_pmu.num_counters) |
999 | return; | 1022 | return; |
1000 | 1023 | ||
1001 | local_irq_save(flags); | 1024 | local_irq_save(flags); |
@@ -1008,16 +1031,18 @@ void perf_event_print_debug(void) | |||
1008 | rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status); | 1031 | rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status); |
1009 | rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow); | 1032 | rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow); |
1010 | rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed); | 1033 | rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed); |
1034 | rdmsrl(MSR_IA32_PEBS_ENABLE, pebs); | ||
1011 | 1035 | ||
1012 | pr_info("\n"); | 1036 | pr_info("\n"); |
1013 | pr_info("CPU#%d: ctrl: %016llx\n", cpu, ctrl); | 1037 | pr_info("CPU#%d: ctrl: %016llx\n", cpu, ctrl); |
1014 | pr_info("CPU#%d: status: %016llx\n", cpu, status); | 1038 | pr_info("CPU#%d: status: %016llx\n", cpu, status); |
1015 | pr_info("CPU#%d: overflow: %016llx\n", cpu, overflow); | 1039 | pr_info("CPU#%d: overflow: %016llx\n", cpu, overflow); |
1016 | pr_info("CPU#%d: fixed: %016llx\n", cpu, fixed); | 1040 | pr_info("CPU#%d: fixed: %016llx\n", cpu, fixed); |
1041 | pr_info("CPU#%d: pebs: %016llx\n", cpu, pebs); | ||
1017 | } | 1042 | } |
1018 | pr_info("CPU#%d: active: %016llx\n", cpu, *(u64 *)cpuc->active_mask); | 1043 | pr_info("CPU#%d: active: %016llx\n", cpu, *(u64 *)cpuc->active_mask); |
1019 | 1044 | ||
1020 | for (idx = 0; idx < x86_pmu.num_events; idx++) { | 1045 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { |
1021 | rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl); | 1046 | rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl); |
1022 | rdmsrl(x86_pmu.perfctr + idx, pmc_count); | 1047 | rdmsrl(x86_pmu.perfctr + idx, pmc_count); |
1023 | 1048 | ||
@@ -1030,7 +1055,7 @@ void perf_event_print_debug(void) | |||
1030 | pr_info("CPU#%d: gen-PMC%d left: %016llx\n", | 1055 | pr_info("CPU#%d: gen-PMC%d left: %016llx\n", |
1031 | cpu, idx, prev_left); | 1056 | cpu, idx, prev_left); |
1032 | } | 1057 | } |
1033 | for (idx = 0; idx < x86_pmu.num_events_fixed; idx++) { | 1058 | for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) { |
1034 | rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count); | 1059 | rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count); |
1035 | 1060 | ||
1036 | pr_info("CPU#%d: fixed-PMC%d count: %016llx\n", | 1061 | pr_info("CPU#%d: fixed-PMC%d count: %016llx\n", |
@@ -1095,7 +1120,7 @@ static int x86_pmu_handle_irq(struct pt_regs *regs) | |||
1095 | 1120 | ||
1096 | cpuc = &__get_cpu_var(cpu_hw_events); | 1121 | cpuc = &__get_cpu_var(cpu_hw_events); |
1097 | 1122 | ||
1098 | for (idx = 0; idx < x86_pmu.num_events; idx++) { | 1123 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { |
1099 | if (!test_bit(idx, cpuc->active_mask)) | 1124 | if (!test_bit(idx, cpuc->active_mask)) |
1100 | continue; | 1125 | continue; |
1101 | 1126 | ||
@@ -1103,7 +1128,7 @@ static int x86_pmu_handle_irq(struct pt_regs *regs) | |||
1103 | hwc = &event->hw; | 1128 | hwc = &event->hw; |
1104 | 1129 | ||
1105 | val = x86_perf_event_update(event); | 1130 | val = x86_perf_event_update(event); |
1106 | if (val & (1ULL << (x86_pmu.event_bits - 1))) | 1131 | if (val & (1ULL << (x86_pmu.cntval_bits - 1))) |
1107 | continue; | 1132 | continue; |
1108 | 1133 | ||
1109 | /* | 1134 | /* |
@@ -1146,7 +1171,6 @@ void set_perf_event_pending(void) | |||
1146 | 1171 | ||
1147 | void perf_events_lapic_init(void) | 1172 | void perf_events_lapic_init(void) |
1148 | { | 1173 | { |
1149 | #ifdef CONFIG_X86_LOCAL_APIC | ||
1150 | if (!x86_pmu.apic || !x86_pmu_initialized()) | 1174 | if (!x86_pmu.apic || !x86_pmu_initialized()) |
1151 | return; | 1175 | return; |
1152 | 1176 | ||
@@ -1154,7 +1178,6 @@ void perf_events_lapic_init(void) | |||
1154 | * Always use NMI for PMU | 1178 | * Always use NMI for PMU |
1155 | */ | 1179 | */ |
1156 | apic_write(APIC_LVTPC, APIC_DM_NMI); | 1180 | apic_write(APIC_LVTPC, APIC_DM_NMI); |
1157 | #endif | ||
1158 | } | 1181 | } |
1159 | 1182 | ||
1160 | static int __kprobes | 1183 | static int __kprobes |
@@ -1178,9 +1201,7 @@ perf_event_nmi_handler(struct notifier_block *self, | |||
1178 | 1201 | ||
1179 | regs = args->regs; | 1202 | regs = args->regs; |
1180 | 1203 | ||
1181 | #ifdef CONFIG_X86_LOCAL_APIC | ||
1182 | apic_write(APIC_LVTPC, APIC_DM_NMI); | 1204 | apic_write(APIC_LVTPC, APIC_DM_NMI); |
1183 | #endif | ||
1184 | /* | 1205 | /* |
1185 | * Can't rely on the handled return value to say it was our NMI, two | 1206 | * Can't rely on the handled return value to say it was our NMI, two |
1186 | * events could trigger 'simultaneously' raising two back-to-back NMIs. | 1207 | * events could trigger 'simultaneously' raising two back-to-back NMIs. |
@@ -1217,118 +1238,11 @@ x86_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) | |||
1217 | return &unconstrained; | 1238 | return &unconstrained; |
1218 | } | 1239 | } |
1219 | 1240 | ||
1220 | static int x86_event_sched_in(struct perf_event *event, | ||
1221 | struct perf_cpu_context *cpuctx) | ||
1222 | { | ||
1223 | int ret = 0; | ||
1224 | |||
1225 | event->state = PERF_EVENT_STATE_ACTIVE; | ||
1226 | event->oncpu = smp_processor_id(); | ||
1227 | event->tstamp_running += event->ctx->time - event->tstamp_stopped; | ||
1228 | |||
1229 | if (!is_x86_event(event)) | ||
1230 | ret = event->pmu->enable(event); | ||
1231 | |||
1232 | if (!ret && !is_software_event(event)) | ||
1233 | cpuctx->active_oncpu++; | ||
1234 | |||
1235 | if (!ret && event->attr.exclusive) | ||
1236 | cpuctx->exclusive = 1; | ||
1237 | |||
1238 | return ret; | ||
1239 | } | ||
1240 | |||
1241 | static void x86_event_sched_out(struct perf_event *event, | ||
1242 | struct perf_cpu_context *cpuctx) | ||
1243 | { | ||
1244 | event->state = PERF_EVENT_STATE_INACTIVE; | ||
1245 | event->oncpu = -1; | ||
1246 | |||
1247 | if (!is_x86_event(event)) | ||
1248 | event->pmu->disable(event); | ||
1249 | |||
1250 | event->tstamp_running -= event->ctx->time - event->tstamp_stopped; | ||
1251 | |||
1252 | if (!is_software_event(event)) | ||
1253 | cpuctx->active_oncpu--; | ||
1254 | |||
1255 | if (event->attr.exclusive || !cpuctx->active_oncpu) | ||
1256 | cpuctx->exclusive = 0; | ||
1257 | } | ||
1258 | |||
1259 | /* | ||
1260 | * Called to enable a whole group of events. | ||
1261 | * Returns 1 if the group was enabled, or -EAGAIN if it could not be. | ||
1262 | * Assumes the caller has disabled interrupts and has | ||
1263 | * frozen the PMU with hw_perf_save_disable. | ||
1264 | * | ||
1265 | * called with PMU disabled. If successful and return value 1, | ||
1266 | * then guaranteed to call perf_enable() and hw_perf_enable() | ||
1267 | */ | ||
1268 | int hw_perf_group_sched_in(struct perf_event *leader, | ||
1269 | struct perf_cpu_context *cpuctx, | ||
1270 | struct perf_event_context *ctx) | ||
1271 | { | ||
1272 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
1273 | struct perf_event *sub; | ||
1274 | int assign[X86_PMC_IDX_MAX]; | ||
1275 | int n0, n1, ret; | ||
1276 | |||
1277 | /* n0 = total number of events */ | ||
1278 | n0 = collect_events(cpuc, leader, true); | ||
1279 | if (n0 < 0) | ||
1280 | return n0; | ||
1281 | |||
1282 | ret = x86_schedule_events(cpuc, n0, assign); | ||
1283 | if (ret) | ||
1284 | return ret; | ||
1285 | |||
1286 | ret = x86_event_sched_in(leader, cpuctx); | ||
1287 | if (ret) | ||
1288 | return ret; | ||
1289 | |||
1290 | n1 = 1; | ||
1291 | list_for_each_entry(sub, &leader->sibling_list, group_entry) { | ||
1292 | if (sub->state > PERF_EVENT_STATE_OFF) { | ||
1293 | ret = x86_event_sched_in(sub, cpuctx); | ||
1294 | if (ret) | ||
1295 | goto undo; | ||
1296 | ++n1; | ||
1297 | } | ||
1298 | } | ||
1299 | /* | ||
1300 | * copy new assignment, now we know it is possible | ||
1301 | * will be used by hw_perf_enable() | ||
1302 | */ | ||
1303 | memcpy(cpuc->assign, assign, n0*sizeof(int)); | ||
1304 | |||
1305 | cpuc->n_events = n0; | ||
1306 | cpuc->n_added += n1; | ||
1307 | ctx->nr_active += n1; | ||
1308 | |||
1309 | /* | ||
1310 | * 1 means successful and events are active | ||
1311 | * This is not quite true because we defer | ||
1312 | * actual activation until hw_perf_enable() but | ||
1313 | * this way we* ensure caller won't try to enable | ||
1314 | * individual events | ||
1315 | */ | ||
1316 | return 1; | ||
1317 | undo: | ||
1318 | x86_event_sched_out(leader, cpuctx); | ||
1319 | n0 = 1; | ||
1320 | list_for_each_entry(sub, &leader->sibling_list, group_entry) { | ||
1321 | if (sub->state == PERF_EVENT_STATE_ACTIVE) { | ||
1322 | x86_event_sched_out(sub, cpuctx); | ||
1323 | if (++n0 == n1) | ||
1324 | break; | ||
1325 | } | ||
1326 | } | ||
1327 | return ret; | ||
1328 | } | ||
1329 | |||
1330 | #include "perf_event_amd.c" | 1241 | #include "perf_event_amd.c" |
1331 | #include "perf_event_p6.c" | 1242 | #include "perf_event_p6.c" |
1243 | #include "perf_event_p4.c" | ||
1244 | #include "perf_event_intel_lbr.c" | ||
1245 | #include "perf_event_intel_ds.c" | ||
1332 | #include "perf_event_intel.c" | 1246 | #include "perf_event_intel.c" |
1333 | 1247 | ||
1334 | static int __cpuinit | 1248 | static int __cpuinit |
@@ -1402,48 +1316,50 @@ void __init init_hw_perf_events(void) | |||
1402 | 1316 | ||
1403 | pr_cont("%s PMU driver.\n", x86_pmu.name); | 1317 | pr_cont("%s PMU driver.\n", x86_pmu.name); |
1404 | 1318 | ||
1405 | if (x86_pmu.num_events > X86_PMC_MAX_GENERIC) { | 1319 | if (x86_pmu.quirks) |
1320 | x86_pmu.quirks(); | ||
1321 | |||
1322 | if (x86_pmu.num_counters > X86_PMC_MAX_GENERIC) { | ||
1406 | WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!", | 1323 | WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!", |
1407 | x86_pmu.num_events, X86_PMC_MAX_GENERIC); | 1324 | x86_pmu.num_counters, X86_PMC_MAX_GENERIC); |
1408 | x86_pmu.num_events = X86_PMC_MAX_GENERIC; | 1325 | x86_pmu.num_counters = X86_PMC_MAX_GENERIC; |
1409 | } | 1326 | } |
1410 | perf_event_mask = (1 << x86_pmu.num_events) - 1; | 1327 | x86_pmu.intel_ctrl = (1 << x86_pmu.num_counters) - 1; |
1411 | perf_max_events = x86_pmu.num_events; | 1328 | perf_max_events = x86_pmu.num_counters; |
1412 | 1329 | ||
1413 | if (x86_pmu.num_events_fixed > X86_PMC_MAX_FIXED) { | 1330 | if (x86_pmu.num_counters_fixed > X86_PMC_MAX_FIXED) { |
1414 | WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!", | 1331 | WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!", |
1415 | x86_pmu.num_events_fixed, X86_PMC_MAX_FIXED); | 1332 | x86_pmu.num_counters_fixed, X86_PMC_MAX_FIXED); |
1416 | x86_pmu.num_events_fixed = X86_PMC_MAX_FIXED; | 1333 | x86_pmu.num_counters_fixed = X86_PMC_MAX_FIXED; |
1417 | } | 1334 | } |
1418 | 1335 | ||
1419 | perf_event_mask |= | 1336 | x86_pmu.intel_ctrl |= |
1420 | ((1LL << x86_pmu.num_events_fixed)-1) << X86_PMC_IDX_FIXED; | 1337 | ((1LL << x86_pmu.num_counters_fixed)-1) << X86_PMC_IDX_FIXED; |
1421 | x86_pmu.intel_ctrl = perf_event_mask; | ||
1422 | 1338 | ||
1423 | perf_events_lapic_init(); | 1339 | perf_events_lapic_init(); |
1424 | register_die_notifier(&perf_event_nmi_notifier); | 1340 | register_die_notifier(&perf_event_nmi_notifier); |
1425 | 1341 | ||
1426 | unconstrained = (struct event_constraint) | 1342 | unconstrained = (struct event_constraint) |
1427 | __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_events) - 1, | 1343 | __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1, |
1428 | 0, x86_pmu.num_events); | 1344 | 0, x86_pmu.num_counters); |
1429 | 1345 | ||
1430 | if (x86_pmu.event_constraints) { | 1346 | if (x86_pmu.event_constraints) { |
1431 | for_each_event_constraint(c, x86_pmu.event_constraints) { | 1347 | for_each_event_constraint(c, x86_pmu.event_constraints) { |
1432 | if (c->cmask != INTEL_ARCH_FIXED_MASK) | 1348 | if (c->cmask != X86_RAW_EVENT_MASK) |
1433 | continue; | 1349 | continue; |
1434 | 1350 | ||
1435 | c->idxmsk64 |= (1ULL << x86_pmu.num_events) - 1; | 1351 | c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1; |
1436 | c->weight += x86_pmu.num_events; | 1352 | c->weight += x86_pmu.num_counters; |
1437 | } | 1353 | } |
1438 | } | 1354 | } |
1439 | 1355 | ||
1440 | pr_info("... version: %d\n", x86_pmu.version); | 1356 | pr_info("... version: %d\n", x86_pmu.version); |
1441 | pr_info("... bit width: %d\n", x86_pmu.event_bits); | 1357 | pr_info("... bit width: %d\n", x86_pmu.cntval_bits); |
1442 | pr_info("... generic registers: %d\n", x86_pmu.num_events); | 1358 | pr_info("... generic registers: %d\n", x86_pmu.num_counters); |
1443 | pr_info("... value mask: %016Lx\n", x86_pmu.event_mask); | 1359 | pr_info("... value mask: %016Lx\n", x86_pmu.cntval_mask); |
1444 | pr_info("... max period: %016Lx\n", x86_pmu.max_period); | 1360 | pr_info("... max period: %016Lx\n", x86_pmu.max_period); |
1445 | pr_info("... fixed-purpose events: %d\n", x86_pmu.num_events_fixed); | 1361 | pr_info("... fixed-purpose events: %d\n", x86_pmu.num_counters_fixed); |
1446 | pr_info("... event mask: %016Lx\n", perf_event_mask); | 1362 | pr_info("... event mask: %016Lx\n", x86_pmu.intel_ctrl); |
1447 | 1363 | ||
1448 | perf_cpu_notifier(x86_pmu_notifier); | 1364 | perf_cpu_notifier(x86_pmu_notifier); |
1449 | } | 1365 | } |
@@ -1453,6 +1369,59 @@ static inline void x86_pmu_read(struct perf_event *event) | |||
1453 | x86_perf_event_update(event); | 1369 | x86_perf_event_update(event); |
1454 | } | 1370 | } |
1455 | 1371 | ||
1372 | /* | ||
1373 | * Start group events scheduling transaction | ||
1374 | * Set the flag to make pmu::enable() not perform the | ||
1375 | * schedulability test, it will be performed at commit time | ||
1376 | */ | ||
1377 | static void x86_pmu_start_txn(const struct pmu *pmu) | ||
1378 | { | ||
1379 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
1380 | |||
1381 | cpuc->group_flag |= PERF_EVENT_TXN_STARTED; | ||
1382 | } | ||
1383 | |||
1384 | /* | ||
1385 | * Stop group events scheduling transaction | ||
1386 | * Clear the flag and pmu::enable() will perform the | ||
1387 | * schedulability test. | ||
1388 | */ | ||
1389 | static void x86_pmu_cancel_txn(const struct pmu *pmu) | ||
1390 | { | ||
1391 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
1392 | |||
1393 | cpuc->group_flag &= ~PERF_EVENT_TXN_STARTED; | ||
1394 | } | ||
1395 | |||
1396 | /* | ||
1397 | * Commit group events scheduling transaction | ||
1398 | * Perform the group schedulability test as a whole | ||
1399 | * Return 0 if success | ||
1400 | */ | ||
1401 | static int x86_pmu_commit_txn(const struct pmu *pmu) | ||
1402 | { | ||
1403 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
1404 | int assign[X86_PMC_IDX_MAX]; | ||
1405 | int n, ret; | ||
1406 | |||
1407 | n = cpuc->n_events; | ||
1408 | |||
1409 | if (!x86_pmu_initialized()) | ||
1410 | return -EAGAIN; | ||
1411 | |||
1412 | ret = x86_pmu.schedule_events(cpuc, n, assign); | ||
1413 | if (ret) | ||
1414 | return ret; | ||
1415 | |||
1416 | /* | ||
1417 | * copy new assignment, now we know it is possible | ||
1418 | * will be used by hw_perf_enable() | ||
1419 | */ | ||
1420 | memcpy(cpuc->assign, assign, n*sizeof(int)); | ||
1421 | |||
1422 | return 0; | ||
1423 | } | ||
1424 | |||
1456 | static const struct pmu pmu = { | 1425 | static const struct pmu pmu = { |
1457 | .enable = x86_pmu_enable, | 1426 | .enable = x86_pmu_enable, |
1458 | .disable = x86_pmu_disable, | 1427 | .disable = x86_pmu_disable, |
@@ -1460,9 +1429,38 @@ static const struct pmu pmu = { | |||
1460 | .stop = x86_pmu_stop, | 1429 | .stop = x86_pmu_stop, |
1461 | .read = x86_pmu_read, | 1430 | .read = x86_pmu_read, |
1462 | .unthrottle = x86_pmu_unthrottle, | 1431 | .unthrottle = x86_pmu_unthrottle, |
1432 | .start_txn = x86_pmu_start_txn, | ||
1433 | .cancel_txn = x86_pmu_cancel_txn, | ||
1434 | .commit_txn = x86_pmu_commit_txn, | ||
1463 | }; | 1435 | }; |
1464 | 1436 | ||
1465 | /* | 1437 | /* |
1438 | * validate that we can schedule this event | ||
1439 | */ | ||
1440 | static int validate_event(struct perf_event *event) | ||
1441 | { | ||
1442 | struct cpu_hw_events *fake_cpuc; | ||
1443 | struct event_constraint *c; | ||
1444 | int ret = 0; | ||
1445 | |||
1446 | fake_cpuc = kmalloc(sizeof(*fake_cpuc), GFP_KERNEL | __GFP_ZERO); | ||
1447 | if (!fake_cpuc) | ||
1448 | return -ENOMEM; | ||
1449 | |||
1450 | c = x86_pmu.get_event_constraints(fake_cpuc, event); | ||
1451 | |||
1452 | if (!c || !c->weight) | ||
1453 | ret = -ENOSPC; | ||
1454 | |||
1455 | if (x86_pmu.put_event_constraints) | ||
1456 | x86_pmu.put_event_constraints(fake_cpuc, event); | ||
1457 | |||
1458 | kfree(fake_cpuc); | ||
1459 | |||
1460 | return ret; | ||
1461 | } | ||
1462 | |||
1463 | /* | ||
1466 | * validate a single event group | 1464 | * validate a single event group |
1467 | * | 1465 | * |
1468 | * validation include: | 1466 | * validation include: |
@@ -1502,7 +1500,7 @@ static int validate_group(struct perf_event *event) | |||
1502 | 1500 | ||
1503 | fake_cpuc->n_events = n; | 1501 | fake_cpuc->n_events = n; |
1504 | 1502 | ||
1505 | ret = x86_schedule_events(fake_cpuc, n, NULL); | 1503 | ret = x86_pmu.schedule_events(fake_cpuc, n, NULL); |
1506 | 1504 | ||
1507 | out_free: | 1505 | out_free: |
1508 | kfree(fake_cpuc); | 1506 | kfree(fake_cpuc); |
@@ -1527,6 +1525,8 @@ const struct pmu *hw_perf_event_init(struct perf_event *event) | |||
1527 | 1525 | ||
1528 | if (event->group_leader != event) | 1526 | if (event->group_leader != event) |
1529 | err = validate_group(event); | 1527 | err = validate_group(event); |
1528 | else | ||
1529 | err = validate_event(event); | ||
1530 | 1530 | ||
1531 | event->pmu = tmp; | 1531 | event->pmu = tmp; |
1532 | } | 1532 | } |
@@ -1574,8 +1574,7 @@ static void backtrace_address(void *data, unsigned long addr, int reliable) | |||
1574 | { | 1574 | { |
1575 | struct perf_callchain_entry *entry = data; | 1575 | struct perf_callchain_entry *entry = data; |
1576 | 1576 | ||
1577 | if (reliable) | 1577 | callchain_store(entry, addr); |
1578 | callchain_store(entry, addr); | ||
1579 | } | 1578 | } |
1580 | 1579 | ||
1581 | static const struct stacktrace_ops backtrace_ops = { | 1580 | static const struct stacktrace_ops backtrace_ops = { |
@@ -1597,41 +1596,6 @@ perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry) | |||
1597 | dump_trace(NULL, regs, NULL, regs->bp, &backtrace_ops, entry); | 1596 | dump_trace(NULL, regs, NULL, regs->bp, &backtrace_ops, entry); |
1598 | } | 1597 | } |
1599 | 1598 | ||
1600 | /* | ||
1601 | * best effort, GUP based copy_from_user() that assumes IRQ or NMI context | ||
1602 | */ | ||
1603 | static unsigned long | ||
1604 | copy_from_user_nmi(void *to, const void __user *from, unsigned long n) | ||
1605 | { | ||
1606 | unsigned long offset, addr = (unsigned long)from; | ||
1607 | int type = in_nmi() ? KM_NMI : KM_IRQ0; | ||
1608 | unsigned long size, len = 0; | ||
1609 | struct page *page; | ||
1610 | void *map; | ||
1611 | int ret; | ||
1612 | |||
1613 | do { | ||
1614 | ret = __get_user_pages_fast(addr, 1, 0, &page); | ||
1615 | if (!ret) | ||
1616 | break; | ||
1617 | |||
1618 | offset = addr & (PAGE_SIZE - 1); | ||
1619 | size = min(PAGE_SIZE - offset, n - len); | ||
1620 | |||
1621 | map = kmap_atomic(page, type); | ||
1622 | memcpy(to, map+offset, size); | ||
1623 | kunmap_atomic(map, type); | ||
1624 | put_page(page); | ||
1625 | |||
1626 | len += size; | ||
1627 | to += size; | ||
1628 | addr += size; | ||
1629 | |||
1630 | } while (len < n); | ||
1631 | |||
1632 | return len; | ||
1633 | } | ||
1634 | |||
1635 | #ifdef CONFIG_COMPAT | 1599 | #ifdef CONFIG_COMPAT |
1636 | static inline int | 1600 | static inline int |
1637 | perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry) | 1601 | perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry) |
@@ -1727,6 +1691,11 @@ struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) | |||
1727 | { | 1691 | { |
1728 | struct perf_callchain_entry *entry; | 1692 | struct perf_callchain_entry *entry; |
1729 | 1693 | ||
1694 | if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { | ||
1695 | /* TODO: We don't support guest os callchain now */ | ||
1696 | return NULL; | ||
1697 | } | ||
1698 | |||
1730 | if (in_nmi()) | 1699 | if (in_nmi()) |
1731 | entry = &__get_cpu_var(pmc_nmi_entry); | 1700 | entry = &__get_cpu_var(pmc_nmi_entry); |
1732 | else | 1701 | else |
@@ -1750,3 +1719,37 @@ void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int ski | |||
1750 | regs->cs = __KERNEL_CS; | 1719 | regs->cs = __KERNEL_CS; |
1751 | local_save_flags(regs->flags); | 1720 | local_save_flags(regs->flags); |
1752 | } | 1721 | } |
1722 | |||
1723 | unsigned long perf_instruction_pointer(struct pt_regs *regs) | ||
1724 | { | ||
1725 | unsigned long ip; | ||
1726 | |||
1727 | if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) | ||
1728 | ip = perf_guest_cbs->get_guest_ip(); | ||
1729 | else | ||
1730 | ip = instruction_pointer(regs); | ||
1731 | |||
1732 | return ip; | ||
1733 | } | ||
1734 | |||
1735 | unsigned long perf_misc_flags(struct pt_regs *regs) | ||
1736 | { | ||
1737 | int misc = 0; | ||
1738 | |||
1739 | if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { | ||
1740 | if (perf_guest_cbs->is_user_mode()) | ||
1741 | misc |= PERF_RECORD_MISC_GUEST_USER; | ||
1742 | else | ||
1743 | misc |= PERF_RECORD_MISC_GUEST_KERNEL; | ||
1744 | } else { | ||
1745 | if (user_mode(regs)) | ||
1746 | misc |= PERF_RECORD_MISC_USER; | ||
1747 | else | ||
1748 | misc |= PERF_RECORD_MISC_KERNEL; | ||
1749 | } | ||
1750 | |||
1751 | if (regs->flags & PERF_EFLAGS_EXACT) | ||
1752 | misc |= PERF_RECORD_MISC_EXACT_IP; | ||
1753 | |||
1754 | return misc; | ||
1755 | } | ||
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c index db6f7d4056e1..611df11ba15e 100644 --- a/arch/x86/kernel/cpu/perf_event_amd.c +++ b/arch/x86/kernel/cpu/perf_event_amd.c | |||
@@ -2,7 +2,7 @@ | |||
2 | 2 | ||
3 | static DEFINE_RAW_SPINLOCK(amd_nb_lock); | 3 | static DEFINE_RAW_SPINLOCK(amd_nb_lock); |
4 | 4 | ||
5 | static __initconst u64 amd_hw_cache_event_ids | 5 | static __initconst const u64 amd_hw_cache_event_ids |
6 | [PERF_COUNT_HW_CACHE_MAX] | 6 | [PERF_COUNT_HW_CACHE_MAX] |
7 | [PERF_COUNT_HW_CACHE_OP_MAX] | 7 | [PERF_COUNT_HW_CACHE_OP_MAX] |
8 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | 8 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = |
@@ -111,22 +111,19 @@ static u64 amd_pmu_event_map(int hw_event) | |||
111 | return amd_perfmon_event_map[hw_event]; | 111 | return amd_perfmon_event_map[hw_event]; |
112 | } | 112 | } |
113 | 113 | ||
114 | static u64 amd_pmu_raw_event(u64 hw_event) | 114 | static int amd_pmu_hw_config(struct perf_event *event) |
115 | { | 115 | { |
116 | #define K7_EVNTSEL_EVENT_MASK 0xF000000FFULL | 116 | int ret = x86_pmu_hw_config(event); |
117 | #define K7_EVNTSEL_UNIT_MASK 0x00000FF00ULL | 117 | |
118 | #define K7_EVNTSEL_EDGE_MASK 0x000040000ULL | 118 | if (ret) |
119 | #define K7_EVNTSEL_INV_MASK 0x000800000ULL | 119 | return ret; |
120 | #define K7_EVNTSEL_REG_MASK 0x0FF000000ULL | 120 | |
121 | 121 | if (event->attr.type != PERF_TYPE_RAW) | |
122 | #define K7_EVNTSEL_MASK \ | 122 | return 0; |
123 | (K7_EVNTSEL_EVENT_MASK | \ | 123 | |
124 | K7_EVNTSEL_UNIT_MASK | \ | 124 | event->hw.config |= event->attr.config & AMD64_RAW_EVENT_MASK; |
125 | K7_EVNTSEL_EDGE_MASK | \ | 125 | |
126 | K7_EVNTSEL_INV_MASK | \ | 126 | return 0; |
127 | K7_EVNTSEL_REG_MASK) | ||
128 | |||
129 | return hw_event & K7_EVNTSEL_MASK; | ||
130 | } | 127 | } |
131 | 128 | ||
132 | /* | 129 | /* |
@@ -165,7 +162,7 @@ static void amd_put_event_constraints(struct cpu_hw_events *cpuc, | |||
165 | * be removed on one CPU at a time AND PMU is disabled | 162 | * be removed on one CPU at a time AND PMU is disabled |
166 | * when we come here | 163 | * when we come here |
167 | */ | 164 | */ |
168 | for (i = 0; i < x86_pmu.num_events; i++) { | 165 | for (i = 0; i < x86_pmu.num_counters; i++) { |
169 | if (nb->owners[i] == event) { | 166 | if (nb->owners[i] == event) { |
170 | cmpxchg(nb->owners+i, event, NULL); | 167 | cmpxchg(nb->owners+i, event, NULL); |
171 | break; | 168 | break; |
@@ -215,7 +212,7 @@ amd_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) | |||
215 | struct hw_perf_event *hwc = &event->hw; | 212 | struct hw_perf_event *hwc = &event->hw; |
216 | struct amd_nb *nb = cpuc->amd_nb; | 213 | struct amd_nb *nb = cpuc->amd_nb; |
217 | struct perf_event *old = NULL; | 214 | struct perf_event *old = NULL; |
218 | int max = x86_pmu.num_events; | 215 | int max = x86_pmu.num_counters; |
219 | int i, j, k = -1; | 216 | int i, j, k = -1; |
220 | 217 | ||
221 | /* | 218 | /* |
@@ -293,7 +290,7 @@ static struct amd_nb *amd_alloc_nb(int cpu, int nb_id) | |||
293 | /* | 290 | /* |
294 | * initialize all possible NB constraints | 291 | * initialize all possible NB constraints |
295 | */ | 292 | */ |
296 | for (i = 0; i < x86_pmu.num_events; i++) { | 293 | for (i = 0; i < x86_pmu.num_counters; i++) { |
297 | __set_bit(i, nb->event_constraints[i].idxmsk); | 294 | __set_bit(i, nb->event_constraints[i].idxmsk); |
298 | nb->event_constraints[i].weight = 1; | 295 | nb->event_constraints[i].weight = 1; |
299 | } | 296 | } |
@@ -371,21 +368,22 @@ static void amd_pmu_cpu_dead(int cpu) | |||
371 | raw_spin_unlock(&amd_nb_lock); | 368 | raw_spin_unlock(&amd_nb_lock); |
372 | } | 369 | } |
373 | 370 | ||
374 | static __initconst struct x86_pmu amd_pmu = { | 371 | static __initconst const struct x86_pmu amd_pmu = { |
375 | .name = "AMD", | 372 | .name = "AMD", |
376 | .handle_irq = x86_pmu_handle_irq, | 373 | .handle_irq = x86_pmu_handle_irq, |
377 | .disable_all = x86_pmu_disable_all, | 374 | .disable_all = x86_pmu_disable_all, |
378 | .enable_all = x86_pmu_enable_all, | 375 | .enable_all = x86_pmu_enable_all, |
379 | .enable = x86_pmu_enable_event, | 376 | .enable = x86_pmu_enable_event, |
380 | .disable = x86_pmu_disable_event, | 377 | .disable = x86_pmu_disable_event, |
378 | .hw_config = amd_pmu_hw_config, | ||
379 | .schedule_events = x86_schedule_events, | ||
381 | .eventsel = MSR_K7_EVNTSEL0, | 380 | .eventsel = MSR_K7_EVNTSEL0, |
382 | .perfctr = MSR_K7_PERFCTR0, | 381 | .perfctr = MSR_K7_PERFCTR0, |
383 | .event_map = amd_pmu_event_map, | 382 | .event_map = amd_pmu_event_map, |
384 | .raw_event = amd_pmu_raw_event, | ||
385 | .max_events = ARRAY_SIZE(amd_perfmon_event_map), | 383 | .max_events = ARRAY_SIZE(amd_perfmon_event_map), |
386 | .num_events = 4, | 384 | .num_counters = 4, |
387 | .event_bits = 48, | 385 | .cntval_bits = 48, |
388 | .event_mask = (1ULL << 48) - 1, | 386 | .cntval_mask = (1ULL << 48) - 1, |
389 | .apic = 1, | 387 | .apic = 1, |
390 | /* use highest bit to detect overflow */ | 388 | /* use highest bit to detect overflow */ |
391 | .max_period = (1ULL << 47) - 1, | 389 | .max_period = (1ULL << 47) - 1, |
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 9c794ac87837..fdbc652d3feb 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c | |||
@@ -88,7 +88,7 @@ static u64 intel_pmu_event_map(int hw_event) | |||
88 | return intel_perfmon_event_map[hw_event]; | 88 | return intel_perfmon_event_map[hw_event]; |
89 | } | 89 | } |
90 | 90 | ||
91 | static __initconst u64 westmere_hw_cache_event_ids | 91 | static __initconst const u64 westmere_hw_cache_event_ids |
92 | [PERF_COUNT_HW_CACHE_MAX] | 92 | [PERF_COUNT_HW_CACHE_MAX] |
93 | [PERF_COUNT_HW_CACHE_OP_MAX] | 93 | [PERF_COUNT_HW_CACHE_OP_MAX] |
94 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | 94 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = |
@@ -179,7 +179,7 @@ static __initconst u64 westmere_hw_cache_event_ids | |||
179 | }, | 179 | }, |
180 | }; | 180 | }; |
181 | 181 | ||
182 | static __initconst u64 nehalem_hw_cache_event_ids | 182 | static __initconst const u64 nehalem_hw_cache_event_ids |
183 | [PERF_COUNT_HW_CACHE_MAX] | 183 | [PERF_COUNT_HW_CACHE_MAX] |
184 | [PERF_COUNT_HW_CACHE_OP_MAX] | 184 | [PERF_COUNT_HW_CACHE_OP_MAX] |
185 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | 185 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = |
@@ -270,7 +270,7 @@ static __initconst u64 nehalem_hw_cache_event_ids | |||
270 | }, | 270 | }, |
271 | }; | 271 | }; |
272 | 272 | ||
273 | static __initconst u64 core2_hw_cache_event_ids | 273 | static __initconst const u64 core2_hw_cache_event_ids |
274 | [PERF_COUNT_HW_CACHE_MAX] | 274 | [PERF_COUNT_HW_CACHE_MAX] |
275 | [PERF_COUNT_HW_CACHE_OP_MAX] | 275 | [PERF_COUNT_HW_CACHE_OP_MAX] |
276 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | 276 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = |
@@ -361,7 +361,7 @@ static __initconst u64 core2_hw_cache_event_ids | |||
361 | }, | 361 | }, |
362 | }; | 362 | }; |
363 | 363 | ||
364 | static __initconst u64 atom_hw_cache_event_ids | 364 | static __initconst const u64 atom_hw_cache_event_ids |
365 | [PERF_COUNT_HW_CACHE_MAX] | 365 | [PERF_COUNT_HW_CACHE_MAX] |
366 | [PERF_COUNT_HW_CACHE_OP_MAX] | 366 | [PERF_COUNT_HW_CACHE_OP_MAX] |
367 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | 367 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = |
@@ -452,60 +452,6 @@ static __initconst u64 atom_hw_cache_event_ids | |||
452 | }, | 452 | }, |
453 | }; | 453 | }; |
454 | 454 | ||
455 | static u64 intel_pmu_raw_event(u64 hw_event) | ||
456 | { | ||
457 | #define CORE_EVNTSEL_EVENT_MASK 0x000000FFULL | ||
458 | #define CORE_EVNTSEL_UNIT_MASK 0x0000FF00ULL | ||
459 | #define CORE_EVNTSEL_EDGE_MASK 0x00040000ULL | ||
460 | #define CORE_EVNTSEL_INV_MASK 0x00800000ULL | ||
461 | #define CORE_EVNTSEL_REG_MASK 0xFF000000ULL | ||
462 | |||
463 | #define CORE_EVNTSEL_MASK \ | ||
464 | (INTEL_ARCH_EVTSEL_MASK | \ | ||
465 | INTEL_ARCH_UNIT_MASK | \ | ||
466 | INTEL_ARCH_EDGE_MASK | \ | ||
467 | INTEL_ARCH_INV_MASK | \ | ||
468 | INTEL_ARCH_CNT_MASK) | ||
469 | |||
470 | return hw_event & CORE_EVNTSEL_MASK; | ||
471 | } | ||
472 | |||
473 | static void intel_pmu_enable_bts(u64 config) | ||
474 | { | ||
475 | unsigned long debugctlmsr; | ||
476 | |||
477 | debugctlmsr = get_debugctlmsr(); | ||
478 | |||
479 | debugctlmsr |= X86_DEBUGCTL_TR; | ||
480 | debugctlmsr |= X86_DEBUGCTL_BTS; | ||
481 | debugctlmsr |= X86_DEBUGCTL_BTINT; | ||
482 | |||
483 | if (!(config & ARCH_PERFMON_EVENTSEL_OS)) | ||
484 | debugctlmsr |= X86_DEBUGCTL_BTS_OFF_OS; | ||
485 | |||
486 | if (!(config & ARCH_PERFMON_EVENTSEL_USR)) | ||
487 | debugctlmsr |= X86_DEBUGCTL_BTS_OFF_USR; | ||
488 | |||
489 | update_debugctlmsr(debugctlmsr); | ||
490 | } | ||
491 | |||
492 | static void intel_pmu_disable_bts(void) | ||
493 | { | ||
494 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
495 | unsigned long debugctlmsr; | ||
496 | |||
497 | if (!cpuc->ds) | ||
498 | return; | ||
499 | |||
500 | debugctlmsr = get_debugctlmsr(); | ||
501 | |||
502 | debugctlmsr &= | ||
503 | ~(X86_DEBUGCTL_TR | X86_DEBUGCTL_BTS | X86_DEBUGCTL_BTINT | | ||
504 | X86_DEBUGCTL_BTS_OFF_OS | X86_DEBUGCTL_BTS_OFF_USR); | ||
505 | |||
506 | update_debugctlmsr(debugctlmsr); | ||
507 | } | ||
508 | |||
509 | static void intel_pmu_disable_all(void) | 455 | static void intel_pmu_disable_all(void) |
510 | { | 456 | { |
511 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 457 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
@@ -514,12 +460,17 @@ static void intel_pmu_disable_all(void) | |||
514 | 460 | ||
515 | if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) | 461 | if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) |
516 | intel_pmu_disable_bts(); | 462 | intel_pmu_disable_bts(); |
463 | |||
464 | intel_pmu_pebs_disable_all(); | ||
465 | intel_pmu_lbr_disable_all(); | ||
517 | } | 466 | } |
518 | 467 | ||
519 | static void intel_pmu_enable_all(void) | 468 | static void intel_pmu_enable_all(int added) |
520 | { | 469 | { |
521 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 470 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
522 | 471 | ||
472 | intel_pmu_pebs_enable_all(); | ||
473 | intel_pmu_lbr_enable_all(); | ||
523 | wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl); | 474 | wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl); |
524 | 475 | ||
525 | if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) { | 476 | if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) { |
@@ -533,6 +484,42 @@ static void intel_pmu_enable_all(void) | |||
533 | } | 484 | } |
534 | } | 485 | } |
535 | 486 | ||
487 | /* | ||
488 | * Workaround for: | ||
489 | * Intel Errata AAK100 (model 26) | ||
490 | * Intel Errata AAP53 (model 30) | ||
491 | * Intel Errata BD53 (model 44) | ||
492 | * | ||
493 | * These chips need to be 'reset' when adding counters by programming | ||
494 | * the magic three (non counting) events 0x4300D2, 0x4300B1 and 0x4300B5 | ||
495 | * either in sequence on the same PMC or on different PMCs. | ||
496 | */ | ||
497 | static void intel_pmu_nhm_enable_all(int added) | ||
498 | { | ||
499 | if (added) { | ||
500 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
501 | int i; | ||
502 | |||
503 | wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + 0, 0x4300D2); | ||
504 | wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + 1, 0x4300B1); | ||
505 | wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + 2, 0x4300B5); | ||
506 | |||
507 | wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0x3); | ||
508 | wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0x0); | ||
509 | |||
510 | for (i = 0; i < 3; i++) { | ||
511 | struct perf_event *event = cpuc->events[i]; | ||
512 | |||
513 | if (!event) | ||
514 | continue; | ||
515 | |||
516 | __x86_pmu_enable_event(&event->hw, | ||
517 | ARCH_PERFMON_EVENTSEL_ENABLE); | ||
518 | } | ||
519 | } | ||
520 | intel_pmu_enable_all(added); | ||
521 | } | ||
522 | |||
536 | static inline u64 intel_pmu_get_status(void) | 523 | static inline u64 intel_pmu_get_status(void) |
537 | { | 524 | { |
538 | u64 status; | 525 | u64 status; |
@@ -547,8 +534,7 @@ static inline void intel_pmu_ack_status(u64 ack) | |||
547 | wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack); | 534 | wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack); |
548 | } | 535 | } |
549 | 536 | ||
550 | static inline void | 537 | static void intel_pmu_disable_fixed(struct hw_perf_event *hwc) |
551 | intel_pmu_disable_fixed(struct hw_perf_event *hwc) | ||
552 | { | 538 | { |
553 | int idx = hwc->idx - X86_PMC_IDX_FIXED; | 539 | int idx = hwc->idx - X86_PMC_IDX_FIXED; |
554 | u64 ctrl_val, mask; | 540 | u64 ctrl_val, mask; |
@@ -557,71 +543,10 @@ intel_pmu_disable_fixed(struct hw_perf_event *hwc) | |||
557 | 543 | ||
558 | rdmsrl(hwc->config_base, ctrl_val); | 544 | rdmsrl(hwc->config_base, ctrl_val); |
559 | ctrl_val &= ~mask; | 545 | ctrl_val &= ~mask; |
560 | (void)checking_wrmsrl(hwc->config_base, ctrl_val); | 546 | wrmsrl(hwc->config_base, ctrl_val); |
561 | } | ||
562 | |||
563 | static void intel_pmu_drain_bts_buffer(void) | ||
564 | { | ||
565 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
566 | struct debug_store *ds = cpuc->ds; | ||
567 | struct bts_record { | ||
568 | u64 from; | ||
569 | u64 to; | ||
570 | u64 flags; | ||
571 | }; | ||
572 | struct perf_event *event = cpuc->events[X86_PMC_IDX_FIXED_BTS]; | ||
573 | struct bts_record *at, *top; | ||
574 | struct perf_output_handle handle; | ||
575 | struct perf_event_header header; | ||
576 | struct perf_sample_data data; | ||
577 | struct pt_regs regs; | ||
578 | |||
579 | if (!event) | ||
580 | return; | ||
581 | |||
582 | if (!ds) | ||
583 | return; | ||
584 | |||
585 | at = (struct bts_record *)(unsigned long)ds->bts_buffer_base; | ||
586 | top = (struct bts_record *)(unsigned long)ds->bts_index; | ||
587 | |||
588 | if (top <= at) | ||
589 | return; | ||
590 | |||
591 | ds->bts_index = ds->bts_buffer_base; | ||
592 | |||
593 | perf_sample_data_init(&data, 0); | ||
594 | |||
595 | data.period = event->hw.last_period; | ||
596 | regs.ip = 0; | ||
597 | |||
598 | /* | ||
599 | * Prepare a generic sample, i.e. fill in the invariant fields. | ||
600 | * We will overwrite the from and to address before we output | ||
601 | * the sample. | ||
602 | */ | ||
603 | perf_prepare_sample(&header, &data, event, ®s); | ||
604 | |||
605 | if (perf_output_begin(&handle, event, | ||
606 | header.size * (top - at), 1, 1)) | ||
607 | return; | ||
608 | |||
609 | for (; at < top; at++) { | ||
610 | data.ip = at->from; | ||
611 | data.addr = at->to; | ||
612 | |||
613 | perf_output_sample(&handle, &header, &data, event); | ||
614 | } | ||
615 | |||
616 | perf_output_end(&handle); | ||
617 | |||
618 | /* There's new data available. */ | ||
619 | event->hw.interrupts++; | ||
620 | event->pending_kill = POLL_IN; | ||
621 | } | 547 | } |
622 | 548 | ||
623 | static inline void | 549 | static void intel_pmu_disable_event(struct perf_event *event) |
624 | intel_pmu_disable_event(struct perf_event *event) | ||
625 | { | 550 | { |
626 | struct hw_perf_event *hwc = &event->hw; | 551 | struct hw_perf_event *hwc = &event->hw; |
627 | 552 | ||
@@ -637,14 +562,15 @@ intel_pmu_disable_event(struct perf_event *event) | |||
637 | } | 562 | } |
638 | 563 | ||
639 | x86_pmu_disable_event(event); | 564 | x86_pmu_disable_event(event); |
565 | |||
566 | if (unlikely(event->attr.precise_ip)) | ||
567 | intel_pmu_pebs_disable(event); | ||
640 | } | 568 | } |
641 | 569 | ||
642 | static inline void | 570 | static void intel_pmu_enable_fixed(struct hw_perf_event *hwc) |
643 | intel_pmu_enable_fixed(struct hw_perf_event *hwc) | ||
644 | { | 571 | { |
645 | int idx = hwc->idx - X86_PMC_IDX_FIXED; | 572 | int idx = hwc->idx - X86_PMC_IDX_FIXED; |
646 | u64 ctrl_val, bits, mask; | 573 | u64 ctrl_val, bits, mask; |
647 | int err; | ||
648 | 574 | ||
649 | /* | 575 | /* |
650 | * Enable IRQ generation (0x8), | 576 | * Enable IRQ generation (0x8), |
@@ -669,7 +595,7 @@ intel_pmu_enable_fixed(struct hw_perf_event *hwc) | |||
669 | rdmsrl(hwc->config_base, ctrl_val); | 595 | rdmsrl(hwc->config_base, ctrl_val); |
670 | ctrl_val &= ~mask; | 596 | ctrl_val &= ~mask; |
671 | ctrl_val |= bits; | 597 | ctrl_val |= bits; |
672 | err = checking_wrmsrl(hwc->config_base, ctrl_val); | 598 | wrmsrl(hwc->config_base, ctrl_val); |
673 | } | 599 | } |
674 | 600 | ||
675 | static void intel_pmu_enable_event(struct perf_event *event) | 601 | static void intel_pmu_enable_event(struct perf_event *event) |
@@ -689,7 +615,10 @@ static void intel_pmu_enable_event(struct perf_event *event) | |||
689 | return; | 615 | return; |
690 | } | 616 | } |
691 | 617 | ||
692 | __x86_pmu_enable_event(hwc); | 618 | if (unlikely(event->attr.precise_ip)) |
619 | intel_pmu_pebs_enable(event); | ||
620 | |||
621 | __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE); | ||
693 | } | 622 | } |
694 | 623 | ||
695 | /* | 624 | /* |
@@ -708,20 +637,20 @@ static void intel_pmu_reset(void) | |||
708 | unsigned long flags; | 637 | unsigned long flags; |
709 | int idx; | 638 | int idx; |
710 | 639 | ||
711 | if (!x86_pmu.num_events) | 640 | if (!x86_pmu.num_counters) |
712 | return; | 641 | return; |
713 | 642 | ||
714 | local_irq_save(flags); | 643 | local_irq_save(flags); |
715 | 644 | ||
716 | printk("clearing PMU state on CPU#%d\n", smp_processor_id()); | 645 | printk("clearing PMU state on CPU#%d\n", smp_processor_id()); |
717 | 646 | ||
718 | for (idx = 0; idx < x86_pmu.num_events; idx++) { | 647 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { |
719 | checking_wrmsrl(x86_pmu.eventsel + idx, 0ull); | 648 | checking_wrmsrl(x86_pmu.eventsel + idx, 0ull); |
720 | checking_wrmsrl(x86_pmu.perfctr + idx, 0ull); | 649 | checking_wrmsrl(x86_pmu.perfctr + idx, 0ull); |
721 | } | 650 | } |
722 | for (idx = 0; idx < x86_pmu.num_events_fixed; idx++) { | 651 | for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) |
723 | checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull); | 652 | checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull); |
724 | } | 653 | |
725 | if (ds) | 654 | if (ds) |
726 | ds->bts_index = ds->bts_buffer_base; | 655 | ds->bts_index = ds->bts_buffer_base; |
727 | 656 | ||
@@ -747,7 +676,7 @@ static int intel_pmu_handle_irq(struct pt_regs *regs) | |||
747 | intel_pmu_drain_bts_buffer(); | 676 | intel_pmu_drain_bts_buffer(); |
748 | status = intel_pmu_get_status(); | 677 | status = intel_pmu_get_status(); |
749 | if (!status) { | 678 | if (!status) { |
750 | intel_pmu_enable_all(); | 679 | intel_pmu_enable_all(0); |
751 | return 0; | 680 | return 0; |
752 | } | 681 | } |
753 | 682 | ||
@@ -762,6 +691,15 @@ again: | |||
762 | 691 | ||
763 | inc_irq_stat(apic_perf_irqs); | 692 | inc_irq_stat(apic_perf_irqs); |
764 | ack = status; | 693 | ack = status; |
694 | |||
695 | intel_pmu_lbr_read(); | ||
696 | |||
697 | /* | ||
698 | * PEBS overflow sets bit 62 in the global status register | ||
699 | */ | ||
700 | if (__test_and_clear_bit(62, (unsigned long *)&status)) | ||
701 | x86_pmu.drain_pebs(regs); | ||
702 | |||
765 | for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) { | 703 | for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) { |
766 | struct perf_event *event = cpuc->events[bit]; | 704 | struct perf_event *event = cpuc->events[bit]; |
767 | 705 | ||
@@ -787,26 +725,22 @@ again: | |||
787 | goto again; | 725 | goto again; |
788 | 726 | ||
789 | done: | 727 | done: |
790 | intel_pmu_enable_all(); | 728 | intel_pmu_enable_all(0); |
791 | return 1; | 729 | return 1; |
792 | } | 730 | } |
793 | 731 | ||
794 | static struct event_constraint bts_constraint = | ||
795 | EVENT_CONSTRAINT(0, 1ULL << X86_PMC_IDX_FIXED_BTS, 0); | ||
796 | |||
797 | static struct event_constraint * | 732 | static struct event_constraint * |
798 | intel_special_constraints(struct perf_event *event) | 733 | intel_bts_constraints(struct perf_event *event) |
799 | { | 734 | { |
800 | unsigned int hw_event; | 735 | struct hw_perf_event *hwc = &event->hw; |
801 | 736 | unsigned int hw_event, bts_event; | |
802 | hw_event = event->hw.config & INTEL_ARCH_EVENT_MASK; | ||
803 | 737 | ||
804 | if (unlikely((hw_event == | 738 | hw_event = hwc->config & INTEL_ARCH_EVENT_MASK; |
805 | x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS)) && | 739 | bts_event = x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS); |
806 | (event->hw.sample_period == 1))) { | ||
807 | 740 | ||
741 | if (unlikely(hw_event == bts_event && hwc->sample_period == 1)) | ||
808 | return &bts_constraint; | 742 | return &bts_constraint; |
809 | } | 743 | |
810 | return NULL; | 744 | return NULL; |
811 | } | 745 | } |
812 | 746 | ||
@@ -815,24 +749,53 @@ intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event | |||
815 | { | 749 | { |
816 | struct event_constraint *c; | 750 | struct event_constraint *c; |
817 | 751 | ||
818 | c = intel_special_constraints(event); | 752 | c = intel_bts_constraints(event); |
753 | if (c) | ||
754 | return c; | ||
755 | |||
756 | c = intel_pebs_constraints(event); | ||
819 | if (c) | 757 | if (c) |
820 | return c; | 758 | return c; |
821 | 759 | ||
822 | return x86_get_event_constraints(cpuc, event); | 760 | return x86_get_event_constraints(cpuc, event); |
823 | } | 761 | } |
824 | 762 | ||
825 | static __initconst struct x86_pmu core_pmu = { | 763 | static int intel_pmu_hw_config(struct perf_event *event) |
764 | { | ||
765 | int ret = x86_pmu_hw_config(event); | ||
766 | |||
767 | if (ret) | ||
768 | return ret; | ||
769 | |||
770 | if (event->attr.type != PERF_TYPE_RAW) | ||
771 | return 0; | ||
772 | |||
773 | if (!(event->attr.config & ARCH_PERFMON_EVENTSEL_ANY)) | ||
774 | return 0; | ||
775 | |||
776 | if (x86_pmu.version < 3) | ||
777 | return -EINVAL; | ||
778 | |||
779 | if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN)) | ||
780 | return -EACCES; | ||
781 | |||
782 | event->hw.config |= ARCH_PERFMON_EVENTSEL_ANY; | ||
783 | |||
784 | return 0; | ||
785 | } | ||
786 | |||
787 | static __initconst const struct x86_pmu core_pmu = { | ||
826 | .name = "core", | 788 | .name = "core", |
827 | .handle_irq = x86_pmu_handle_irq, | 789 | .handle_irq = x86_pmu_handle_irq, |
828 | .disable_all = x86_pmu_disable_all, | 790 | .disable_all = x86_pmu_disable_all, |
829 | .enable_all = x86_pmu_enable_all, | 791 | .enable_all = x86_pmu_enable_all, |
830 | .enable = x86_pmu_enable_event, | 792 | .enable = x86_pmu_enable_event, |
831 | .disable = x86_pmu_disable_event, | 793 | .disable = x86_pmu_disable_event, |
794 | .hw_config = x86_pmu_hw_config, | ||
795 | .schedule_events = x86_schedule_events, | ||
832 | .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, | 796 | .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, |
833 | .perfctr = MSR_ARCH_PERFMON_PERFCTR0, | 797 | .perfctr = MSR_ARCH_PERFMON_PERFCTR0, |
834 | .event_map = intel_pmu_event_map, | 798 | .event_map = intel_pmu_event_map, |
835 | .raw_event = intel_pmu_raw_event, | ||
836 | .max_events = ARRAY_SIZE(intel_perfmon_event_map), | 799 | .max_events = ARRAY_SIZE(intel_perfmon_event_map), |
837 | .apic = 1, | 800 | .apic = 1, |
838 | /* | 801 | /* |
@@ -845,17 +808,32 @@ static __initconst struct x86_pmu core_pmu = { | |||
845 | .event_constraints = intel_core_event_constraints, | 808 | .event_constraints = intel_core_event_constraints, |
846 | }; | 809 | }; |
847 | 810 | ||
848 | static __initconst struct x86_pmu intel_pmu = { | 811 | static void intel_pmu_cpu_starting(int cpu) |
812 | { | ||
813 | init_debug_store_on_cpu(cpu); | ||
814 | /* | ||
815 | * Deal with CPUs that don't clear their LBRs on power-up. | ||
816 | */ | ||
817 | intel_pmu_lbr_reset(); | ||
818 | } | ||
819 | |||
820 | static void intel_pmu_cpu_dying(int cpu) | ||
821 | { | ||
822 | fini_debug_store_on_cpu(cpu); | ||
823 | } | ||
824 | |||
825 | static __initconst const struct x86_pmu intel_pmu = { | ||
849 | .name = "Intel", | 826 | .name = "Intel", |
850 | .handle_irq = intel_pmu_handle_irq, | 827 | .handle_irq = intel_pmu_handle_irq, |
851 | .disable_all = intel_pmu_disable_all, | 828 | .disable_all = intel_pmu_disable_all, |
852 | .enable_all = intel_pmu_enable_all, | 829 | .enable_all = intel_pmu_enable_all, |
853 | .enable = intel_pmu_enable_event, | 830 | .enable = intel_pmu_enable_event, |
854 | .disable = intel_pmu_disable_event, | 831 | .disable = intel_pmu_disable_event, |
832 | .hw_config = intel_pmu_hw_config, | ||
833 | .schedule_events = x86_schedule_events, | ||
855 | .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, | 834 | .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, |
856 | .perfctr = MSR_ARCH_PERFMON_PERFCTR0, | 835 | .perfctr = MSR_ARCH_PERFMON_PERFCTR0, |
857 | .event_map = intel_pmu_event_map, | 836 | .event_map = intel_pmu_event_map, |
858 | .raw_event = intel_pmu_raw_event, | ||
859 | .max_events = ARRAY_SIZE(intel_perfmon_event_map), | 837 | .max_events = ARRAY_SIZE(intel_perfmon_event_map), |
860 | .apic = 1, | 838 | .apic = 1, |
861 | /* | 839 | /* |
@@ -864,14 +842,38 @@ static __initconst struct x86_pmu intel_pmu = { | |||
864 | * the generic event period: | 842 | * the generic event period: |
865 | */ | 843 | */ |
866 | .max_period = (1ULL << 31) - 1, | 844 | .max_period = (1ULL << 31) - 1, |
867 | .enable_bts = intel_pmu_enable_bts, | ||
868 | .disable_bts = intel_pmu_disable_bts, | ||
869 | .get_event_constraints = intel_get_event_constraints, | 845 | .get_event_constraints = intel_get_event_constraints, |
870 | 846 | ||
871 | .cpu_starting = init_debug_store_on_cpu, | 847 | .cpu_starting = intel_pmu_cpu_starting, |
872 | .cpu_dying = fini_debug_store_on_cpu, | 848 | .cpu_dying = intel_pmu_cpu_dying, |
873 | }; | 849 | }; |
874 | 850 | ||
851 | static void intel_clovertown_quirks(void) | ||
852 | { | ||
853 | /* | ||
854 | * PEBS is unreliable due to: | ||
855 | * | ||
856 | * AJ67 - PEBS may experience CPL leaks | ||
857 | * AJ68 - PEBS PMI may be delayed by one event | ||
858 | * AJ69 - GLOBAL_STATUS[62] will only be set when DEBUGCTL[12] | ||
859 | * AJ106 - FREEZE_LBRS_ON_PMI doesn't work in combination with PEBS | ||
860 | * | ||
861 | * AJ67 could be worked around by restricting the OS/USR flags. | ||
862 | * AJ69 could be worked around by setting PMU_FREEZE_ON_PMI. | ||
863 | * | ||
864 | * AJ106 could possibly be worked around by not allowing LBR | ||
865 | * usage from PEBS, including the fixup. | ||
866 | * AJ68 could possibly be worked around by always programming | ||
867 | * a pebs_event_reset[0] value and coping with the lost events. | ||
868 | * | ||
869 | * But taken together it might just make sense to not enable PEBS on | ||
870 | * these chips. | ||
871 | */ | ||
872 | printk(KERN_WARNING "PEBS disabled due to CPU errata.\n"); | ||
873 | x86_pmu.pebs = 0; | ||
874 | x86_pmu.pebs_constraints = NULL; | ||
875 | } | ||
876 | |||
875 | static __init int intel_pmu_init(void) | 877 | static __init int intel_pmu_init(void) |
876 | { | 878 | { |
877 | union cpuid10_edx edx; | 879 | union cpuid10_edx edx; |
@@ -881,12 +883,13 @@ static __init int intel_pmu_init(void) | |||
881 | int version; | 883 | int version; |
882 | 884 | ||
883 | if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { | 885 | if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { |
884 | /* check for P6 processor family */ | 886 | switch (boot_cpu_data.x86) { |
885 | if (boot_cpu_data.x86 == 6) { | 887 | case 0x6: |
886 | return p6_pmu_init(); | 888 | return p6_pmu_init(); |
887 | } else { | 889 | case 0xf: |
890 | return p4_pmu_init(); | ||
891 | } | ||
888 | return -ENODEV; | 892 | return -ENODEV; |
889 | } | ||
890 | } | 893 | } |
891 | 894 | ||
892 | /* | 895 | /* |
@@ -904,16 +907,28 @@ static __init int intel_pmu_init(void) | |||
904 | x86_pmu = intel_pmu; | 907 | x86_pmu = intel_pmu; |
905 | 908 | ||
906 | x86_pmu.version = version; | 909 | x86_pmu.version = version; |
907 | x86_pmu.num_events = eax.split.num_events; | 910 | x86_pmu.num_counters = eax.split.num_counters; |
908 | x86_pmu.event_bits = eax.split.bit_width; | 911 | x86_pmu.cntval_bits = eax.split.bit_width; |
909 | x86_pmu.event_mask = (1ULL << eax.split.bit_width) - 1; | 912 | x86_pmu.cntval_mask = (1ULL << eax.split.bit_width) - 1; |
910 | 913 | ||
911 | /* | 914 | /* |
912 | * Quirk: v2 perfmon does not report fixed-purpose events, so | 915 | * Quirk: v2 perfmon does not report fixed-purpose events, so |
913 | * assume at least 3 events: | 916 | * assume at least 3 events: |
914 | */ | 917 | */ |
915 | if (version > 1) | 918 | if (version > 1) |
916 | x86_pmu.num_events_fixed = max((int)edx.split.num_events_fixed, 3); | 919 | x86_pmu.num_counters_fixed = max((int)edx.split.num_counters_fixed, 3); |
920 | |||
921 | /* | ||
922 | * v2 and above have a perf capabilities MSR | ||
923 | */ | ||
924 | if (version > 1) { | ||
925 | u64 capabilities; | ||
926 | |||
927 | rdmsrl(MSR_IA32_PERF_CAPABILITIES, capabilities); | ||
928 | x86_pmu.intel_cap.capabilities = capabilities; | ||
929 | } | ||
930 | |||
931 | intel_ds_init(); | ||
917 | 932 | ||
918 | /* | 933 | /* |
919 | * Install the hw-cache-events table: | 934 | * Install the hw-cache-events table: |
@@ -924,12 +939,15 @@ static __init int intel_pmu_init(void) | |||
924 | break; | 939 | break; |
925 | 940 | ||
926 | case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */ | 941 | case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */ |
942 | x86_pmu.quirks = intel_clovertown_quirks; | ||
927 | case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */ | 943 | case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */ |
928 | case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */ | 944 | case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */ |
929 | case 29: /* six-core 45 nm xeon "Dunnington" */ | 945 | case 29: /* six-core 45 nm xeon "Dunnington" */ |
930 | memcpy(hw_cache_event_ids, core2_hw_cache_event_ids, | 946 | memcpy(hw_cache_event_ids, core2_hw_cache_event_ids, |
931 | sizeof(hw_cache_event_ids)); | 947 | sizeof(hw_cache_event_ids)); |
932 | 948 | ||
949 | intel_pmu_lbr_init_core(); | ||
950 | |||
933 | x86_pmu.event_constraints = intel_core2_event_constraints; | 951 | x86_pmu.event_constraints = intel_core2_event_constraints; |
934 | pr_cont("Core2 events, "); | 952 | pr_cont("Core2 events, "); |
935 | break; | 953 | break; |
@@ -940,13 +958,19 @@ static __init int intel_pmu_init(void) | |||
940 | memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids, | 958 | memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids, |
941 | sizeof(hw_cache_event_ids)); | 959 | sizeof(hw_cache_event_ids)); |
942 | 960 | ||
961 | intel_pmu_lbr_init_nhm(); | ||
962 | |||
943 | x86_pmu.event_constraints = intel_nehalem_event_constraints; | 963 | x86_pmu.event_constraints = intel_nehalem_event_constraints; |
944 | pr_cont("Nehalem/Corei7 events, "); | 964 | x86_pmu.enable_all = intel_pmu_nhm_enable_all; |
965 | pr_cont("Nehalem events, "); | ||
945 | break; | 966 | break; |
967 | |||
946 | case 28: /* Atom */ | 968 | case 28: /* Atom */ |
947 | memcpy(hw_cache_event_ids, atom_hw_cache_event_ids, | 969 | memcpy(hw_cache_event_ids, atom_hw_cache_event_ids, |
948 | sizeof(hw_cache_event_ids)); | 970 | sizeof(hw_cache_event_ids)); |
949 | 971 | ||
972 | intel_pmu_lbr_init_atom(); | ||
973 | |||
950 | x86_pmu.event_constraints = intel_gen_event_constraints; | 974 | x86_pmu.event_constraints = intel_gen_event_constraints; |
951 | pr_cont("Atom events, "); | 975 | pr_cont("Atom events, "); |
952 | break; | 976 | break; |
@@ -956,7 +980,10 @@ static __init int intel_pmu_init(void) | |||
956 | memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids, | 980 | memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids, |
957 | sizeof(hw_cache_event_ids)); | 981 | sizeof(hw_cache_event_ids)); |
958 | 982 | ||
983 | intel_pmu_lbr_init_nhm(); | ||
984 | |||
959 | x86_pmu.event_constraints = intel_westmere_event_constraints; | 985 | x86_pmu.event_constraints = intel_westmere_event_constraints; |
986 | x86_pmu.enable_all = intel_pmu_nhm_enable_all; | ||
960 | pr_cont("Westmere events, "); | 987 | pr_cont("Westmere events, "); |
961 | break; | 988 | break; |
962 | 989 | ||
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c new file mode 100644 index 000000000000..18018d1311cd --- /dev/null +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c | |||
@@ -0,0 +1,641 @@ | |||
1 | #ifdef CONFIG_CPU_SUP_INTEL | ||
2 | |||
3 | /* The maximal number of PEBS events: */ | ||
4 | #define MAX_PEBS_EVENTS 4 | ||
5 | |||
6 | /* The size of a BTS record in bytes: */ | ||
7 | #define BTS_RECORD_SIZE 24 | ||
8 | |||
9 | #define BTS_BUFFER_SIZE (PAGE_SIZE << 4) | ||
10 | #define PEBS_BUFFER_SIZE PAGE_SIZE | ||
11 | |||
12 | /* | ||
13 | * pebs_record_32 for p4 and core not supported | ||
14 | |||
15 | struct pebs_record_32 { | ||
16 | u32 flags, ip; | ||
17 | u32 ax, bc, cx, dx; | ||
18 | u32 si, di, bp, sp; | ||
19 | }; | ||
20 | |||
21 | */ | ||
22 | |||
23 | struct pebs_record_core { | ||
24 | u64 flags, ip; | ||
25 | u64 ax, bx, cx, dx; | ||
26 | u64 si, di, bp, sp; | ||
27 | u64 r8, r9, r10, r11; | ||
28 | u64 r12, r13, r14, r15; | ||
29 | }; | ||
30 | |||
31 | struct pebs_record_nhm { | ||
32 | u64 flags, ip; | ||
33 | u64 ax, bx, cx, dx; | ||
34 | u64 si, di, bp, sp; | ||
35 | u64 r8, r9, r10, r11; | ||
36 | u64 r12, r13, r14, r15; | ||
37 | u64 status, dla, dse, lat; | ||
38 | }; | ||
39 | |||
40 | /* | ||
41 | * A debug store configuration. | ||
42 | * | ||
43 | * We only support architectures that use 64bit fields. | ||
44 | */ | ||
45 | struct debug_store { | ||
46 | u64 bts_buffer_base; | ||
47 | u64 bts_index; | ||
48 | u64 bts_absolute_maximum; | ||
49 | u64 bts_interrupt_threshold; | ||
50 | u64 pebs_buffer_base; | ||
51 | u64 pebs_index; | ||
52 | u64 pebs_absolute_maximum; | ||
53 | u64 pebs_interrupt_threshold; | ||
54 | u64 pebs_event_reset[MAX_PEBS_EVENTS]; | ||
55 | }; | ||
56 | |||
57 | static void init_debug_store_on_cpu(int cpu) | ||
58 | { | ||
59 | struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; | ||
60 | |||
61 | if (!ds) | ||
62 | return; | ||
63 | |||
64 | wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, | ||
65 | (u32)((u64)(unsigned long)ds), | ||
66 | (u32)((u64)(unsigned long)ds >> 32)); | ||
67 | } | ||
68 | |||
69 | static void fini_debug_store_on_cpu(int cpu) | ||
70 | { | ||
71 | if (!per_cpu(cpu_hw_events, cpu).ds) | ||
72 | return; | ||
73 | |||
74 | wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0); | ||
75 | } | ||
76 | |||
77 | static void release_ds_buffers(void) | ||
78 | { | ||
79 | int cpu; | ||
80 | |||
81 | if (!x86_pmu.bts && !x86_pmu.pebs) | ||
82 | return; | ||
83 | |||
84 | get_online_cpus(); | ||
85 | |||
86 | for_each_online_cpu(cpu) | ||
87 | fini_debug_store_on_cpu(cpu); | ||
88 | |||
89 | for_each_possible_cpu(cpu) { | ||
90 | struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; | ||
91 | |||
92 | if (!ds) | ||
93 | continue; | ||
94 | |||
95 | per_cpu(cpu_hw_events, cpu).ds = NULL; | ||
96 | |||
97 | kfree((void *)(unsigned long)ds->pebs_buffer_base); | ||
98 | kfree((void *)(unsigned long)ds->bts_buffer_base); | ||
99 | kfree(ds); | ||
100 | } | ||
101 | |||
102 | put_online_cpus(); | ||
103 | } | ||
104 | |||
105 | static int reserve_ds_buffers(void) | ||
106 | { | ||
107 | int cpu, err = 0; | ||
108 | |||
109 | if (!x86_pmu.bts && !x86_pmu.pebs) | ||
110 | return 0; | ||
111 | |||
112 | get_online_cpus(); | ||
113 | |||
114 | for_each_possible_cpu(cpu) { | ||
115 | struct debug_store *ds; | ||
116 | void *buffer; | ||
117 | int max, thresh; | ||
118 | |||
119 | err = -ENOMEM; | ||
120 | ds = kzalloc(sizeof(*ds), GFP_KERNEL); | ||
121 | if (unlikely(!ds)) | ||
122 | break; | ||
123 | per_cpu(cpu_hw_events, cpu).ds = ds; | ||
124 | |||
125 | if (x86_pmu.bts) { | ||
126 | buffer = kzalloc(BTS_BUFFER_SIZE, GFP_KERNEL); | ||
127 | if (unlikely(!buffer)) | ||
128 | break; | ||
129 | |||
130 | max = BTS_BUFFER_SIZE / BTS_RECORD_SIZE; | ||
131 | thresh = max / 16; | ||
132 | |||
133 | ds->bts_buffer_base = (u64)(unsigned long)buffer; | ||
134 | ds->bts_index = ds->bts_buffer_base; | ||
135 | ds->bts_absolute_maximum = ds->bts_buffer_base + | ||
136 | max * BTS_RECORD_SIZE; | ||
137 | ds->bts_interrupt_threshold = ds->bts_absolute_maximum - | ||
138 | thresh * BTS_RECORD_SIZE; | ||
139 | } | ||
140 | |||
141 | if (x86_pmu.pebs) { | ||
142 | buffer = kzalloc(PEBS_BUFFER_SIZE, GFP_KERNEL); | ||
143 | if (unlikely(!buffer)) | ||
144 | break; | ||
145 | |||
146 | max = PEBS_BUFFER_SIZE / x86_pmu.pebs_record_size; | ||
147 | |||
148 | ds->pebs_buffer_base = (u64)(unsigned long)buffer; | ||
149 | ds->pebs_index = ds->pebs_buffer_base; | ||
150 | ds->pebs_absolute_maximum = ds->pebs_buffer_base + | ||
151 | max * x86_pmu.pebs_record_size; | ||
152 | /* | ||
153 | * Always use single record PEBS | ||
154 | */ | ||
155 | ds->pebs_interrupt_threshold = ds->pebs_buffer_base + | ||
156 | x86_pmu.pebs_record_size; | ||
157 | } | ||
158 | |||
159 | err = 0; | ||
160 | } | ||
161 | |||
162 | if (err) | ||
163 | release_ds_buffers(); | ||
164 | else { | ||
165 | for_each_online_cpu(cpu) | ||
166 | init_debug_store_on_cpu(cpu); | ||
167 | } | ||
168 | |||
169 | put_online_cpus(); | ||
170 | |||
171 | return err; | ||
172 | } | ||
173 | |||
174 | /* | ||
175 | * BTS | ||
176 | */ | ||
177 | |||
178 | static struct event_constraint bts_constraint = | ||
179 | EVENT_CONSTRAINT(0, 1ULL << X86_PMC_IDX_FIXED_BTS, 0); | ||
180 | |||
181 | static void intel_pmu_enable_bts(u64 config) | ||
182 | { | ||
183 | unsigned long debugctlmsr; | ||
184 | |||
185 | debugctlmsr = get_debugctlmsr(); | ||
186 | |||
187 | debugctlmsr |= DEBUGCTLMSR_TR; | ||
188 | debugctlmsr |= DEBUGCTLMSR_BTS; | ||
189 | debugctlmsr |= DEBUGCTLMSR_BTINT; | ||
190 | |||
191 | if (!(config & ARCH_PERFMON_EVENTSEL_OS)) | ||
192 | debugctlmsr |= DEBUGCTLMSR_BTS_OFF_OS; | ||
193 | |||
194 | if (!(config & ARCH_PERFMON_EVENTSEL_USR)) | ||
195 | debugctlmsr |= DEBUGCTLMSR_BTS_OFF_USR; | ||
196 | |||
197 | update_debugctlmsr(debugctlmsr); | ||
198 | } | ||
199 | |||
200 | static void intel_pmu_disable_bts(void) | ||
201 | { | ||
202 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
203 | unsigned long debugctlmsr; | ||
204 | |||
205 | if (!cpuc->ds) | ||
206 | return; | ||
207 | |||
208 | debugctlmsr = get_debugctlmsr(); | ||
209 | |||
210 | debugctlmsr &= | ||
211 | ~(DEBUGCTLMSR_TR | DEBUGCTLMSR_BTS | DEBUGCTLMSR_BTINT | | ||
212 | DEBUGCTLMSR_BTS_OFF_OS | DEBUGCTLMSR_BTS_OFF_USR); | ||
213 | |||
214 | update_debugctlmsr(debugctlmsr); | ||
215 | } | ||
216 | |||
217 | static void intel_pmu_drain_bts_buffer(void) | ||
218 | { | ||
219 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
220 | struct debug_store *ds = cpuc->ds; | ||
221 | struct bts_record { | ||
222 | u64 from; | ||
223 | u64 to; | ||
224 | u64 flags; | ||
225 | }; | ||
226 | struct perf_event *event = cpuc->events[X86_PMC_IDX_FIXED_BTS]; | ||
227 | struct bts_record *at, *top; | ||
228 | struct perf_output_handle handle; | ||
229 | struct perf_event_header header; | ||
230 | struct perf_sample_data data; | ||
231 | struct pt_regs regs; | ||
232 | |||
233 | if (!event) | ||
234 | return; | ||
235 | |||
236 | if (!ds) | ||
237 | return; | ||
238 | |||
239 | at = (struct bts_record *)(unsigned long)ds->bts_buffer_base; | ||
240 | top = (struct bts_record *)(unsigned long)ds->bts_index; | ||
241 | |||
242 | if (top <= at) | ||
243 | return; | ||
244 | |||
245 | ds->bts_index = ds->bts_buffer_base; | ||
246 | |||
247 | perf_sample_data_init(&data, 0); | ||
248 | data.period = event->hw.last_period; | ||
249 | regs.ip = 0; | ||
250 | |||
251 | /* | ||
252 | * Prepare a generic sample, i.e. fill in the invariant fields. | ||
253 | * We will overwrite the from and to address before we output | ||
254 | * the sample. | ||
255 | */ | ||
256 | perf_prepare_sample(&header, &data, event, ®s); | ||
257 | |||
258 | if (perf_output_begin(&handle, event, header.size * (top - at), 1, 1)) | ||
259 | return; | ||
260 | |||
261 | for (; at < top; at++) { | ||
262 | data.ip = at->from; | ||
263 | data.addr = at->to; | ||
264 | |||
265 | perf_output_sample(&handle, &header, &data, event); | ||
266 | } | ||
267 | |||
268 | perf_output_end(&handle); | ||
269 | |||
270 | /* There's new data available. */ | ||
271 | event->hw.interrupts++; | ||
272 | event->pending_kill = POLL_IN; | ||
273 | } | ||
274 | |||
275 | /* | ||
276 | * PEBS | ||
277 | */ | ||
278 | |||
279 | static struct event_constraint intel_core_pebs_events[] = { | ||
280 | PEBS_EVENT_CONSTRAINT(0x00c0, 0x1), /* INSTR_RETIRED.ANY */ | ||
281 | PEBS_EVENT_CONSTRAINT(0xfec1, 0x1), /* X87_OPS_RETIRED.ANY */ | ||
282 | PEBS_EVENT_CONSTRAINT(0x00c5, 0x1), /* BR_INST_RETIRED.MISPRED */ | ||
283 | PEBS_EVENT_CONSTRAINT(0x1fc7, 0x1), /* SIMD_INST_RETURED.ANY */ | ||
284 | PEBS_EVENT_CONSTRAINT(0x01cb, 0x1), /* MEM_LOAD_RETIRED.L1D_MISS */ | ||
285 | PEBS_EVENT_CONSTRAINT(0x02cb, 0x1), /* MEM_LOAD_RETIRED.L1D_LINE_MISS */ | ||
286 | PEBS_EVENT_CONSTRAINT(0x04cb, 0x1), /* MEM_LOAD_RETIRED.L2_MISS */ | ||
287 | PEBS_EVENT_CONSTRAINT(0x08cb, 0x1), /* MEM_LOAD_RETIRED.L2_LINE_MISS */ | ||
288 | PEBS_EVENT_CONSTRAINT(0x10cb, 0x1), /* MEM_LOAD_RETIRED.DTLB_MISS */ | ||
289 | EVENT_CONSTRAINT_END | ||
290 | }; | ||
291 | |||
292 | static struct event_constraint intel_nehalem_pebs_events[] = { | ||
293 | PEBS_EVENT_CONSTRAINT(0x00c0, 0xf), /* INSTR_RETIRED.ANY */ | ||
294 | PEBS_EVENT_CONSTRAINT(0xfec1, 0xf), /* X87_OPS_RETIRED.ANY */ | ||
295 | PEBS_EVENT_CONSTRAINT(0x00c5, 0xf), /* BR_INST_RETIRED.MISPRED */ | ||
296 | PEBS_EVENT_CONSTRAINT(0x1fc7, 0xf), /* SIMD_INST_RETURED.ANY */ | ||
297 | PEBS_EVENT_CONSTRAINT(0x01cb, 0xf), /* MEM_LOAD_RETIRED.L1D_MISS */ | ||
298 | PEBS_EVENT_CONSTRAINT(0x02cb, 0xf), /* MEM_LOAD_RETIRED.L1D_LINE_MISS */ | ||
299 | PEBS_EVENT_CONSTRAINT(0x04cb, 0xf), /* MEM_LOAD_RETIRED.L2_MISS */ | ||
300 | PEBS_EVENT_CONSTRAINT(0x08cb, 0xf), /* MEM_LOAD_RETIRED.L2_LINE_MISS */ | ||
301 | PEBS_EVENT_CONSTRAINT(0x10cb, 0xf), /* MEM_LOAD_RETIRED.DTLB_MISS */ | ||
302 | EVENT_CONSTRAINT_END | ||
303 | }; | ||
304 | |||
305 | static struct event_constraint * | ||
306 | intel_pebs_constraints(struct perf_event *event) | ||
307 | { | ||
308 | struct event_constraint *c; | ||
309 | |||
310 | if (!event->attr.precise_ip) | ||
311 | return NULL; | ||
312 | |||
313 | if (x86_pmu.pebs_constraints) { | ||
314 | for_each_event_constraint(c, x86_pmu.pebs_constraints) { | ||
315 | if ((event->hw.config & c->cmask) == c->code) | ||
316 | return c; | ||
317 | } | ||
318 | } | ||
319 | |||
320 | return &emptyconstraint; | ||
321 | } | ||
322 | |||
323 | static void intel_pmu_pebs_enable(struct perf_event *event) | ||
324 | { | ||
325 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
326 | struct hw_perf_event *hwc = &event->hw; | ||
327 | |||
328 | hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT; | ||
329 | |||
330 | cpuc->pebs_enabled |= 1ULL << hwc->idx; | ||
331 | WARN_ON_ONCE(cpuc->enabled); | ||
332 | |||
333 | if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1) | ||
334 | intel_pmu_lbr_enable(event); | ||
335 | } | ||
336 | |||
337 | static void intel_pmu_pebs_disable(struct perf_event *event) | ||
338 | { | ||
339 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
340 | struct hw_perf_event *hwc = &event->hw; | ||
341 | |||
342 | cpuc->pebs_enabled &= ~(1ULL << hwc->idx); | ||
343 | if (cpuc->enabled) | ||
344 | wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled); | ||
345 | |||
346 | hwc->config |= ARCH_PERFMON_EVENTSEL_INT; | ||
347 | |||
348 | if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1) | ||
349 | intel_pmu_lbr_disable(event); | ||
350 | } | ||
351 | |||
352 | static void intel_pmu_pebs_enable_all(void) | ||
353 | { | ||
354 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
355 | |||
356 | if (cpuc->pebs_enabled) | ||
357 | wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled); | ||
358 | } | ||
359 | |||
360 | static void intel_pmu_pebs_disable_all(void) | ||
361 | { | ||
362 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
363 | |||
364 | if (cpuc->pebs_enabled) | ||
365 | wrmsrl(MSR_IA32_PEBS_ENABLE, 0); | ||
366 | } | ||
367 | |||
368 | #include <asm/insn.h> | ||
369 | |||
370 | static inline bool kernel_ip(unsigned long ip) | ||
371 | { | ||
372 | #ifdef CONFIG_X86_32 | ||
373 | return ip > PAGE_OFFSET; | ||
374 | #else | ||
375 | return (long)ip < 0; | ||
376 | #endif | ||
377 | } | ||
378 | |||
379 | static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs) | ||
380 | { | ||
381 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
382 | unsigned long from = cpuc->lbr_entries[0].from; | ||
383 | unsigned long old_to, to = cpuc->lbr_entries[0].to; | ||
384 | unsigned long ip = regs->ip; | ||
385 | |||
386 | /* | ||
387 | * We don't need to fixup if the PEBS assist is fault like | ||
388 | */ | ||
389 | if (!x86_pmu.intel_cap.pebs_trap) | ||
390 | return 1; | ||
391 | |||
392 | /* | ||
393 | * No LBR entry, no basic block, no rewinding | ||
394 | */ | ||
395 | if (!cpuc->lbr_stack.nr || !from || !to) | ||
396 | return 0; | ||
397 | |||
398 | /* | ||
399 | * Basic blocks should never cross user/kernel boundaries | ||
400 | */ | ||
401 | if (kernel_ip(ip) != kernel_ip(to)) | ||
402 | return 0; | ||
403 | |||
404 | /* | ||
405 | * unsigned math, either ip is before the start (impossible) or | ||
406 | * the basic block is larger than 1 page (sanity) | ||
407 | */ | ||
408 | if ((ip - to) > PAGE_SIZE) | ||
409 | return 0; | ||
410 | |||
411 | /* | ||
412 | * We sampled a branch insn, rewind using the LBR stack | ||
413 | */ | ||
414 | if (ip == to) { | ||
415 | regs->ip = from; | ||
416 | return 1; | ||
417 | } | ||
418 | |||
419 | do { | ||
420 | struct insn insn; | ||
421 | u8 buf[MAX_INSN_SIZE]; | ||
422 | void *kaddr; | ||
423 | |||
424 | old_to = to; | ||
425 | if (!kernel_ip(ip)) { | ||
426 | int bytes, size = MAX_INSN_SIZE; | ||
427 | |||
428 | bytes = copy_from_user_nmi(buf, (void __user *)to, size); | ||
429 | if (bytes != size) | ||
430 | return 0; | ||
431 | |||
432 | kaddr = buf; | ||
433 | } else | ||
434 | kaddr = (void *)to; | ||
435 | |||
436 | kernel_insn_init(&insn, kaddr); | ||
437 | insn_get_length(&insn); | ||
438 | to += insn.length; | ||
439 | } while (to < ip); | ||
440 | |||
441 | if (to == ip) { | ||
442 | regs->ip = old_to; | ||
443 | return 1; | ||
444 | } | ||
445 | |||
446 | /* | ||
447 | * Even though we decoded the basic block, the instruction stream | ||
448 | * never matched the given IP, either the TO or the IP got corrupted. | ||
449 | */ | ||
450 | return 0; | ||
451 | } | ||
452 | |||
453 | static int intel_pmu_save_and_restart(struct perf_event *event); | ||
454 | |||
455 | static void __intel_pmu_pebs_event(struct perf_event *event, | ||
456 | struct pt_regs *iregs, void *__pebs) | ||
457 | { | ||
458 | /* | ||
459 | * We cast to pebs_record_core since that is a subset of | ||
460 | * both formats and we don't use the other fields in this | ||
461 | * routine. | ||
462 | */ | ||
463 | struct pebs_record_core *pebs = __pebs; | ||
464 | struct perf_sample_data data; | ||
465 | struct pt_regs regs; | ||
466 | |||
467 | if (!intel_pmu_save_and_restart(event)) | ||
468 | return; | ||
469 | |||
470 | perf_sample_data_init(&data, 0); | ||
471 | data.period = event->hw.last_period; | ||
472 | |||
473 | /* | ||
474 | * We use the interrupt regs as a base because the PEBS record | ||
475 | * does not contain a full regs set, specifically it seems to | ||
476 | * lack segment descriptors, which get used by things like | ||
477 | * user_mode(). | ||
478 | * | ||
479 | * In the simple case fix up only the IP and BP,SP regs, for | ||
480 | * PERF_SAMPLE_IP and PERF_SAMPLE_CALLCHAIN to function properly. | ||
481 | * A possible PERF_SAMPLE_REGS will have to transfer all regs. | ||
482 | */ | ||
483 | regs = *iregs; | ||
484 | regs.ip = pebs->ip; | ||
485 | regs.bp = pebs->bp; | ||
486 | regs.sp = pebs->sp; | ||
487 | |||
488 | if (event->attr.precise_ip > 1 && intel_pmu_pebs_fixup_ip(®s)) | ||
489 | regs.flags |= PERF_EFLAGS_EXACT; | ||
490 | else | ||
491 | regs.flags &= ~PERF_EFLAGS_EXACT; | ||
492 | |||
493 | if (perf_event_overflow(event, 1, &data, ®s)) | ||
494 | x86_pmu_stop(event); | ||
495 | } | ||
496 | |||
497 | static void intel_pmu_drain_pebs_core(struct pt_regs *iregs) | ||
498 | { | ||
499 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
500 | struct debug_store *ds = cpuc->ds; | ||
501 | struct perf_event *event = cpuc->events[0]; /* PMC0 only */ | ||
502 | struct pebs_record_core *at, *top; | ||
503 | int n; | ||
504 | |||
505 | if (!ds || !x86_pmu.pebs) | ||
506 | return; | ||
507 | |||
508 | at = (struct pebs_record_core *)(unsigned long)ds->pebs_buffer_base; | ||
509 | top = (struct pebs_record_core *)(unsigned long)ds->pebs_index; | ||
510 | |||
511 | /* | ||
512 | * Whatever else happens, drain the thing | ||
513 | */ | ||
514 | ds->pebs_index = ds->pebs_buffer_base; | ||
515 | |||
516 | if (!test_bit(0, cpuc->active_mask)) | ||
517 | return; | ||
518 | |||
519 | WARN_ON_ONCE(!event); | ||
520 | |||
521 | if (!event->attr.precise_ip) | ||
522 | return; | ||
523 | |||
524 | n = top - at; | ||
525 | if (n <= 0) | ||
526 | return; | ||
527 | |||
528 | /* | ||
529 | * Should not happen, we program the threshold at 1 and do not | ||
530 | * set a reset value. | ||
531 | */ | ||
532 | WARN_ON_ONCE(n > 1); | ||
533 | at += n - 1; | ||
534 | |||
535 | __intel_pmu_pebs_event(event, iregs, at); | ||
536 | } | ||
537 | |||
538 | static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) | ||
539 | { | ||
540 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
541 | struct debug_store *ds = cpuc->ds; | ||
542 | struct pebs_record_nhm *at, *top; | ||
543 | struct perf_event *event = NULL; | ||
544 | u64 status = 0; | ||
545 | int bit, n; | ||
546 | |||
547 | if (!ds || !x86_pmu.pebs) | ||
548 | return; | ||
549 | |||
550 | at = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base; | ||
551 | top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index; | ||
552 | |||
553 | ds->pebs_index = ds->pebs_buffer_base; | ||
554 | |||
555 | n = top - at; | ||
556 | if (n <= 0) | ||
557 | return; | ||
558 | |||
559 | /* | ||
560 | * Should not happen, we program the threshold at 1 and do not | ||
561 | * set a reset value. | ||
562 | */ | ||
563 | WARN_ON_ONCE(n > MAX_PEBS_EVENTS); | ||
564 | |||
565 | for ( ; at < top; at++) { | ||
566 | for_each_set_bit(bit, (unsigned long *)&at->status, MAX_PEBS_EVENTS) { | ||
567 | event = cpuc->events[bit]; | ||
568 | if (!test_bit(bit, cpuc->active_mask)) | ||
569 | continue; | ||
570 | |||
571 | WARN_ON_ONCE(!event); | ||
572 | |||
573 | if (!event->attr.precise_ip) | ||
574 | continue; | ||
575 | |||
576 | if (__test_and_set_bit(bit, (unsigned long *)&status)) | ||
577 | continue; | ||
578 | |||
579 | break; | ||
580 | } | ||
581 | |||
582 | if (!event || bit >= MAX_PEBS_EVENTS) | ||
583 | continue; | ||
584 | |||
585 | __intel_pmu_pebs_event(event, iregs, at); | ||
586 | } | ||
587 | } | ||
588 | |||
589 | /* | ||
590 | * BTS, PEBS probe and setup | ||
591 | */ | ||
592 | |||
593 | static void intel_ds_init(void) | ||
594 | { | ||
595 | /* | ||
596 | * No support for 32bit formats | ||
597 | */ | ||
598 | if (!boot_cpu_has(X86_FEATURE_DTES64)) | ||
599 | return; | ||
600 | |||
601 | x86_pmu.bts = boot_cpu_has(X86_FEATURE_BTS); | ||
602 | x86_pmu.pebs = boot_cpu_has(X86_FEATURE_PEBS); | ||
603 | if (x86_pmu.pebs) { | ||
604 | char pebs_type = x86_pmu.intel_cap.pebs_trap ? '+' : '-'; | ||
605 | int format = x86_pmu.intel_cap.pebs_format; | ||
606 | |||
607 | switch (format) { | ||
608 | case 0: | ||
609 | printk(KERN_CONT "PEBS fmt0%c, ", pebs_type); | ||
610 | x86_pmu.pebs_record_size = sizeof(struct pebs_record_core); | ||
611 | x86_pmu.drain_pebs = intel_pmu_drain_pebs_core; | ||
612 | x86_pmu.pebs_constraints = intel_core_pebs_events; | ||
613 | break; | ||
614 | |||
615 | case 1: | ||
616 | printk(KERN_CONT "PEBS fmt1%c, ", pebs_type); | ||
617 | x86_pmu.pebs_record_size = sizeof(struct pebs_record_nhm); | ||
618 | x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm; | ||
619 | x86_pmu.pebs_constraints = intel_nehalem_pebs_events; | ||
620 | break; | ||
621 | |||
622 | default: | ||
623 | printk(KERN_CONT "no PEBS fmt%d%c, ", format, pebs_type); | ||
624 | x86_pmu.pebs = 0; | ||
625 | break; | ||
626 | } | ||
627 | } | ||
628 | } | ||
629 | |||
630 | #else /* CONFIG_CPU_SUP_INTEL */ | ||
631 | |||
632 | static int reserve_ds_buffers(void) | ||
633 | { | ||
634 | return 0; | ||
635 | } | ||
636 | |||
637 | static void release_ds_buffers(void) | ||
638 | { | ||
639 | } | ||
640 | |||
641 | #endif /* CONFIG_CPU_SUP_INTEL */ | ||
diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c new file mode 100644 index 000000000000..d202c1bece1a --- /dev/null +++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c | |||
@@ -0,0 +1,218 @@ | |||
1 | #ifdef CONFIG_CPU_SUP_INTEL | ||
2 | |||
3 | enum { | ||
4 | LBR_FORMAT_32 = 0x00, | ||
5 | LBR_FORMAT_LIP = 0x01, | ||
6 | LBR_FORMAT_EIP = 0x02, | ||
7 | LBR_FORMAT_EIP_FLAGS = 0x03, | ||
8 | }; | ||
9 | |||
10 | /* | ||
11 | * We only support LBR implementations that have FREEZE_LBRS_ON_PMI | ||
12 | * otherwise it becomes near impossible to get a reliable stack. | ||
13 | */ | ||
14 | |||
15 | static void __intel_pmu_lbr_enable(void) | ||
16 | { | ||
17 | u64 debugctl; | ||
18 | |||
19 | rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); | ||
20 | debugctl |= (DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI); | ||
21 | wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); | ||
22 | } | ||
23 | |||
24 | static void __intel_pmu_lbr_disable(void) | ||
25 | { | ||
26 | u64 debugctl; | ||
27 | |||
28 | rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); | ||
29 | debugctl &= ~(DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI); | ||
30 | wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); | ||
31 | } | ||
32 | |||
33 | static void intel_pmu_lbr_reset_32(void) | ||
34 | { | ||
35 | int i; | ||
36 | |||
37 | for (i = 0; i < x86_pmu.lbr_nr; i++) | ||
38 | wrmsrl(x86_pmu.lbr_from + i, 0); | ||
39 | } | ||
40 | |||
41 | static void intel_pmu_lbr_reset_64(void) | ||
42 | { | ||
43 | int i; | ||
44 | |||
45 | for (i = 0; i < x86_pmu.lbr_nr; i++) { | ||
46 | wrmsrl(x86_pmu.lbr_from + i, 0); | ||
47 | wrmsrl(x86_pmu.lbr_to + i, 0); | ||
48 | } | ||
49 | } | ||
50 | |||
51 | static void intel_pmu_lbr_reset(void) | ||
52 | { | ||
53 | if (!x86_pmu.lbr_nr) | ||
54 | return; | ||
55 | |||
56 | if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_32) | ||
57 | intel_pmu_lbr_reset_32(); | ||
58 | else | ||
59 | intel_pmu_lbr_reset_64(); | ||
60 | } | ||
61 | |||
62 | static void intel_pmu_lbr_enable(struct perf_event *event) | ||
63 | { | ||
64 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
65 | |||
66 | if (!x86_pmu.lbr_nr) | ||
67 | return; | ||
68 | |||
69 | WARN_ON_ONCE(cpuc->enabled); | ||
70 | |||
71 | /* | ||
72 | * Reset the LBR stack if we changed task context to | ||
73 | * avoid data leaks. | ||
74 | */ | ||
75 | |||
76 | if (event->ctx->task && cpuc->lbr_context != event->ctx) { | ||
77 | intel_pmu_lbr_reset(); | ||
78 | cpuc->lbr_context = event->ctx; | ||
79 | } | ||
80 | |||
81 | cpuc->lbr_users++; | ||
82 | } | ||
83 | |||
84 | static void intel_pmu_lbr_disable(struct perf_event *event) | ||
85 | { | ||
86 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
87 | |||
88 | if (!x86_pmu.lbr_nr) | ||
89 | return; | ||
90 | |||
91 | cpuc->lbr_users--; | ||
92 | WARN_ON_ONCE(cpuc->lbr_users < 0); | ||
93 | |||
94 | if (cpuc->enabled && !cpuc->lbr_users) | ||
95 | __intel_pmu_lbr_disable(); | ||
96 | } | ||
97 | |||
98 | static void intel_pmu_lbr_enable_all(void) | ||
99 | { | ||
100 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
101 | |||
102 | if (cpuc->lbr_users) | ||
103 | __intel_pmu_lbr_enable(); | ||
104 | } | ||
105 | |||
106 | static void intel_pmu_lbr_disable_all(void) | ||
107 | { | ||
108 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
109 | |||
110 | if (cpuc->lbr_users) | ||
111 | __intel_pmu_lbr_disable(); | ||
112 | } | ||
113 | |||
114 | static inline u64 intel_pmu_lbr_tos(void) | ||
115 | { | ||
116 | u64 tos; | ||
117 | |||
118 | rdmsrl(x86_pmu.lbr_tos, tos); | ||
119 | |||
120 | return tos; | ||
121 | } | ||
122 | |||
123 | static void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc) | ||
124 | { | ||
125 | unsigned long mask = x86_pmu.lbr_nr - 1; | ||
126 | u64 tos = intel_pmu_lbr_tos(); | ||
127 | int i; | ||
128 | |||
129 | for (i = 0; i < x86_pmu.lbr_nr; i++) { | ||
130 | unsigned long lbr_idx = (tos - i) & mask; | ||
131 | union { | ||
132 | struct { | ||
133 | u32 from; | ||
134 | u32 to; | ||
135 | }; | ||
136 | u64 lbr; | ||
137 | } msr_lastbranch; | ||
138 | |||
139 | rdmsrl(x86_pmu.lbr_from + lbr_idx, msr_lastbranch.lbr); | ||
140 | |||
141 | cpuc->lbr_entries[i].from = msr_lastbranch.from; | ||
142 | cpuc->lbr_entries[i].to = msr_lastbranch.to; | ||
143 | cpuc->lbr_entries[i].flags = 0; | ||
144 | } | ||
145 | cpuc->lbr_stack.nr = i; | ||
146 | } | ||
147 | |||
148 | #define LBR_FROM_FLAG_MISPRED (1ULL << 63) | ||
149 | |||
150 | /* | ||
151 | * Due to lack of segmentation in Linux the effective address (offset) | ||
152 | * is the same as the linear address, allowing us to merge the LIP and EIP | ||
153 | * LBR formats. | ||
154 | */ | ||
155 | static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc) | ||
156 | { | ||
157 | unsigned long mask = x86_pmu.lbr_nr - 1; | ||
158 | int lbr_format = x86_pmu.intel_cap.lbr_format; | ||
159 | u64 tos = intel_pmu_lbr_tos(); | ||
160 | int i; | ||
161 | |||
162 | for (i = 0; i < x86_pmu.lbr_nr; i++) { | ||
163 | unsigned long lbr_idx = (tos - i) & mask; | ||
164 | u64 from, to, flags = 0; | ||
165 | |||
166 | rdmsrl(x86_pmu.lbr_from + lbr_idx, from); | ||
167 | rdmsrl(x86_pmu.lbr_to + lbr_idx, to); | ||
168 | |||
169 | if (lbr_format == LBR_FORMAT_EIP_FLAGS) { | ||
170 | flags = !!(from & LBR_FROM_FLAG_MISPRED); | ||
171 | from = (u64)((((s64)from) << 1) >> 1); | ||
172 | } | ||
173 | |||
174 | cpuc->lbr_entries[i].from = from; | ||
175 | cpuc->lbr_entries[i].to = to; | ||
176 | cpuc->lbr_entries[i].flags = flags; | ||
177 | } | ||
178 | cpuc->lbr_stack.nr = i; | ||
179 | } | ||
180 | |||
181 | static void intel_pmu_lbr_read(void) | ||
182 | { | ||
183 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
184 | |||
185 | if (!cpuc->lbr_users) | ||
186 | return; | ||
187 | |||
188 | if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_32) | ||
189 | intel_pmu_lbr_read_32(cpuc); | ||
190 | else | ||
191 | intel_pmu_lbr_read_64(cpuc); | ||
192 | } | ||
193 | |||
194 | static void intel_pmu_lbr_init_core(void) | ||
195 | { | ||
196 | x86_pmu.lbr_nr = 4; | ||
197 | x86_pmu.lbr_tos = 0x01c9; | ||
198 | x86_pmu.lbr_from = 0x40; | ||
199 | x86_pmu.lbr_to = 0x60; | ||
200 | } | ||
201 | |||
202 | static void intel_pmu_lbr_init_nhm(void) | ||
203 | { | ||
204 | x86_pmu.lbr_nr = 16; | ||
205 | x86_pmu.lbr_tos = 0x01c9; | ||
206 | x86_pmu.lbr_from = 0x680; | ||
207 | x86_pmu.lbr_to = 0x6c0; | ||
208 | } | ||
209 | |||
210 | static void intel_pmu_lbr_init_atom(void) | ||
211 | { | ||
212 | x86_pmu.lbr_nr = 8; | ||
213 | x86_pmu.lbr_tos = 0x01c9; | ||
214 | x86_pmu.lbr_from = 0x40; | ||
215 | x86_pmu.lbr_to = 0x60; | ||
216 | } | ||
217 | |||
218 | #endif /* CONFIG_CPU_SUP_INTEL */ | ||
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c new file mode 100644 index 000000000000..424fc8de68e4 --- /dev/null +++ b/arch/x86/kernel/cpu/perf_event_p4.c | |||
@@ -0,0 +1,857 @@ | |||
1 | /* | ||
2 | * Netburst Perfomance Events (P4, old Xeon) | ||
3 | * | ||
4 | * Copyright (C) 2010 Parallels, Inc., Cyrill Gorcunov <gorcunov@openvz.org> | ||
5 | * Copyright (C) 2010 Intel Corporation, Lin Ming <ming.m.lin@intel.com> | ||
6 | * | ||
7 | * For licencing details see kernel-base/COPYING | ||
8 | */ | ||
9 | |||
10 | #ifdef CONFIG_CPU_SUP_INTEL | ||
11 | |||
12 | #include <asm/perf_event_p4.h> | ||
13 | |||
14 | #define P4_CNTR_LIMIT 3 | ||
15 | /* | ||
16 | * array indices: 0,1 - HT threads, used with HT enabled cpu | ||
17 | */ | ||
18 | struct p4_event_bind { | ||
19 | unsigned int opcode; /* Event code and ESCR selector */ | ||
20 | unsigned int escr_msr[2]; /* ESCR MSR for this event */ | ||
21 | char cntr[2][P4_CNTR_LIMIT]; /* counter index (offset), -1 on abscence */ | ||
22 | }; | ||
23 | |||
24 | struct p4_cache_event_bind { | ||
25 | unsigned int metric_pebs; | ||
26 | unsigned int metric_vert; | ||
27 | }; | ||
28 | |||
29 | #define P4_GEN_CACHE_EVENT_BIND(name) \ | ||
30 | [P4_CACHE__##name] = { \ | ||
31 | .metric_pebs = P4_PEBS__##name, \ | ||
32 | .metric_vert = P4_VERT__##name, \ | ||
33 | } | ||
34 | |||
35 | static struct p4_cache_event_bind p4_cache_event_bind_map[] = { | ||
36 | P4_GEN_CACHE_EVENT_BIND(1stl_cache_load_miss_retired), | ||
37 | P4_GEN_CACHE_EVENT_BIND(2ndl_cache_load_miss_retired), | ||
38 | P4_GEN_CACHE_EVENT_BIND(dtlb_load_miss_retired), | ||
39 | P4_GEN_CACHE_EVENT_BIND(dtlb_store_miss_retired), | ||
40 | }; | ||
41 | |||
42 | /* | ||
43 | * Note that we don't use CCCR1 here, there is an | ||
44 | * exception for P4_BSQ_ALLOCATION but we just have | ||
45 | * no workaround | ||
46 | * | ||
47 | * consider this binding as resources which particular | ||
48 | * event may borrow, it doesn't contain EventMask, | ||
49 | * Tags and friends -- they are left to a caller | ||
50 | */ | ||
51 | static struct p4_event_bind p4_event_bind_map[] = { | ||
52 | [P4_EVENT_TC_DELIVER_MODE] = { | ||
53 | .opcode = P4_OPCODE(P4_EVENT_TC_DELIVER_MODE), | ||
54 | .escr_msr = { MSR_P4_TC_ESCR0, MSR_P4_TC_ESCR1 }, | ||
55 | .cntr = { {4, 5, -1}, {6, 7, -1} }, | ||
56 | }, | ||
57 | [P4_EVENT_BPU_FETCH_REQUEST] = { | ||
58 | .opcode = P4_OPCODE(P4_EVENT_BPU_FETCH_REQUEST), | ||
59 | .escr_msr = { MSR_P4_BPU_ESCR0, MSR_P4_BPU_ESCR1 }, | ||
60 | .cntr = { {0, -1, -1}, {2, -1, -1} }, | ||
61 | }, | ||
62 | [P4_EVENT_ITLB_REFERENCE] = { | ||
63 | .opcode = P4_OPCODE(P4_EVENT_ITLB_REFERENCE), | ||
64 | .escr_msr = { MSR_P4_ITLB_ESCR0, MSR_P4_ITLB_ESCR1 }, | ||
65 | .cntr = { {0, -1, -1}, {2, -1, -1} }, | ||
66 | }, | ||
67 | [P4_EVENT_MEMORY_CANCEL] = { | ||
68 | .opcode = P4_OPCODE(P4_EVENT_MEMORY_CANCEL), | ||
69 | .escr_msr = { MSR_P4_DAC_ESCR0, MSR_P4_DAC_ESCR1 }, | ||
70 | .cntr = { {8, 9, -1}, {10, 11, -1} }, | ||
71 | }, | ||
72 | [P4_EVENT_MEMORY_COMPLETE] = { | ||
73 | .opcode = P4_OPCODE(P4_EVENT_MEMORY_COMPLETE), | ||
74 | .escr_msr = { MSR_P4_SAAT_ESCR0 , MSR_P4_SAAT_ESCR1 }, | ||
75 | .cntr = { {8, 9, -1}, {10, 11, -1} }, | ||
76 | }, | ||
77 | [P4_EVENT_LOAD_PORT_REPLAY] = { | ||
78 | .opcode = P4_OPCODE(P4_EVENT_LOAD_PORT_REPLAY), | ||
79 | .escr_msr = { MSR_P4_SAAT_ESCR0, MSR_P4_SAAT_ESCR1 }, | ||
80 | .cntr = { {8, 9, -1}, {10, 11, -1} }, | ||
81 | }, | ||
82 | [P4_EVENT_STORE_PORT_REPLAY] = { | ||
83 | .opcode = P4_OPCODE(P4_EVENT_STORE_PORT_REPLAY), | ||
84 | .escr_msr = { MSR_P4_SAAT_ESCR0 , MSR_P4_SAAT_ESCR1 }, | ||
85 | .cntr = { {8, 9, -1}, {10, 11, -1} }, | ||
86 | }, | ||
87 | [P4_EVENT_MOB_LOAD_REPLAY] = { | ||
88 | .opcode = P4_OPCODE(P4_EVENT_MOB_LOAD_REPLAY), | ||
89 | .escr_msr = { MSR_P4_MOB_ESCR0, MSR_P4_MOB_ESCR1 }, | ||
90 | .cntr = { {0, -1, -1}, {2, -1, -1} }, | ||
91 | }, | ||
92 | [P4_EVENT_PAGE_WALK_TYPE] = { | ||
93 | .opcode = P4_OPCODE(P4_EVENT_PAGE_WALK_TYPE), | ||
94 | .escr_msr = { MSR_P4_PMH_ESCR0, MSR_P4_PMH_ESCR1 }, | ||
95 | .cntr = { {0, -1, -1}, {2, -1, -1} }, | ||
96 | }, | ||
97 | [P4_EVENT_BSQ_CACHE_REFERENCE] = { | ||
98 | .opcode = P4_OPCODE(P4_EVENT_BSQ_CACHE_REFERENCE), | ||
99 | .escr_msr = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR1 }, | ||
100 | .cntr = { {0, -1, -1}, {2, -1, -1} }, | ||
101 | }, | ||
102 | [P4_EVENT_IOQ_ALLOCATION] = { | ||
103 | .opcode = P4_OPCODE(P4_EVENT_IOQ_ALLOCATION), | ||
104 | .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, | ||
105 | .cntr = { {0, -1, -1}, {2, -1, -1} }, | ||
106 | }, | ||
107 | [P4_EVENT_IOQ_ACTIVE_ENTRIES] = { /* shared ESCR */ | ||
108 | .opcode = P4_OPCODE(P4_EVENT_IOQ_ACTIVE_ENTRIES), | ||
109 | .escr_msr = { MSR_P4_FSB_ESCR1, MSR_P4_FSB_ESCR1 }, | ||
110 | .cntr = { {2, -1, -1}, {3, -1, -1} }, | ||
111 | }, | ||
112 | [P4_EVENT_FSB_DATA_ACTIVITY] = { | ||
113 | .opcode = P4_OPCODE(P4_EVENT_FSB_DATA_ACTIVITY), | ||
114 | .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, | ||
115 | .cntr = { {0, -1, -1}, {2, -1, -1} }, | ||
116 | }, | ||
117 | [P4_EVENT_BSQ_ALLOCATION] = { /* shared ESCR, broken CCCR1 */ | ||
118 | .opcode = P4_OPCODE(P4_EVENT_BSQ_ALLOCATION), | ||
119 | .escr_msr = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR0 }, | ||
120 | .cntr = { {0, -1, -1}, {1, -1, -1} }, | ||
121 | }, | ||
122 | [P4_EVENT_BSQ_ACTIVE_ENTRIES] = { /* shared ESCR */ | ||
123 | .opcode = P4_OPCODE(P4_EVENT_BSQ_ACTIVE_ENTRIES), | ||
124 | .escr_msr = { MSR_P4_BSU_ESCR1 , MSR_P4_BSU_ESCR1 }, | ||
125 | .cntr = { {2, -1, -1}, {3, -1, -1} }, | ||
126 | }, | ||
127 | [P4_EVENT_SSE_INPUT_ASSIST] = { | ||
128 | .opcode = P4_OPCODE(P4_EVENT_SSE_INPUT_ASSIST), | ||
129 | .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, | ||
130 | .cntr = { {8, 9, -1}, {10, 11, -1} }, | ||
131 | }, | ||
132 | [P4_EVENT_PACKED_SP_UOP] = { | ||
133 | .opcode = P4_OPCODE(P4_EVENT_PACKED_SP_UOP), | ||
134 | .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, | ||
135 | .cntr = { {8, 9, -1}, {10, 11, -1} }, | ||
136 | }, | ||
137 | [P4_EVENT_PACKED_DP_UOP] = { | ||
138 | .opcode = P4_OPCODE(P4_EVENT_PACKED_DP_UOP), | ||
139 | .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, | ||
140 | .cntr = { {8, 9, -1}, {10, 11, -1} }, | ||
141 | }, | ||
142 | [P4_EVENT_SCALAR_SP_UOP] = { | ||
143 | .opcode = P4_OPCODE(P4_EVENT_SCALAR_SP_UOP), | ||
144 | .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, | ||
145 | .cntr = { {8, 9, -1}, {10, 11, -1} }, | ||
146 | }, | ||
147 | [P4_EVENT_SCALAR_DP_UOP] = { | ||
148 | .opcode = P4_OPCODE(P4_EVENT_SCALAR_DP_UOP), | ||
149 | .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, | ||
150 | .cntr = { {8, 9, -1}, {10, 11, -1} }, | ||
151 | }, | ||
152 | [P4_EVENT_64BIT_MMX_UOP] = { | ||
153 | .opcode = P4_OPCODE(P4_EVENT_64BIT_MMX_UOP), | ||
154 | .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, | ||
155 | .cntr = { {8, 9, -1}, {10, 11, -1} }, | ||
156 | }, | ||
157 | [P4_EVENT_128BIT_MMX_UOP] = { | ||
158 | .opcode = P4_OPCODE(P4_EVENT_128BIT_MMX_UOP), | ||
159 | .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, | ||
160 | .cntr = { {8, 9, -1}, {10, 11, -1} }, | ||
161 | }, | ||
162 | [P4_EVENT_X87_FP_UOP] = { | ||
163 | .opcode = P4_OPCODE(P4_EVENT_X87_FP_UOP), | ||
164 | .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, | ||
165 | .cntr = { {8, 9, -1}, {10, 11, -1} }, | ||
166 | }, | ||
167 | [P4_EVENT_TC_MISC] = { | ||
168 | .opcode = P4_OPCODE(P4_EVENT_TC_MISC), | ||
169 | .escr_msr = { MSR_P4_TC_ESCR0, MSR_P4_TC_ESCR1 }, | ||
170 | .cntr = { {4, 5, -1}, {6, 7, -1} }, | ||
171 | }, | ||
172 | [P4_EVENT_GLOBAL_POWER_EVENTS] = { | ||
173 | .opcode = P4_OPCODE(P4_EVENT_GLOBAL_POWER_EVENTS), | ||
174 | .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, | ||
175 | .cntr = { {0, -1, -1}, {2, -1, -1} }, | ||
176 | }, | ||
177 | [P4_EVENT_TC_MS_XFER] = { | ||
178 | .opcode = P4_OPCODE(P4_EVENT_TC_MS_XFER), | ||
179 | .escr_msr = { MSR_P4_MS_ESCR0, MSR_P4_MS_ESCR1 }, | ||
180 | .cntr = { {4, 5, -1}, {6, 7, -1} }, | ||
181 | }, | ||
182 | [P4_EVENT_UOP_QUEUE_WRITES] = { | ||
183 | .opcode = P4_OPCODE(P4_EVENT_UOP_QUEUE_WRITES), | ||
184 | .escr_msr = { MSR_P4_MS_ESCR0, MSR_P4_MS_ESCR1 }, | ||
185 | .cntr = { {4, 5, -1}, {6, 7, -1} }, | ||
186 | }, | ||
187 | [P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE] = { | ||
188 | .opcode = P4_OPCODE(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE), | ||
189 | .escr_msr = { MSR_P4_TBPU_ESCR0 , MSR_P4_TBPU_ESCR0 }, | ||
190 | .cntr = { {4, 5, -1}, {6, 7, -1} }, | ||
191 | }, | ||
192 | [P4_EVENT_RETIRED_BRANCH_TYPE] = { | ||
193 | .opcode = P4_OPCODE(P4_EVENT_RETIRED_BRANCH_TYPE), | ||
194 | .escr_msr = { MSR_P4_TBPU_ESCR0 , MSR_P4_TBPU_ESCR1 }, | ||
195 | .cntr = { {4, 5, -1}, {6, 7, -1} }, | ||
196 | }, | ||
197 | [P4_EVENT_RESOURCE_STALL] = { | ||
198 | .opcode = P4_OPCODE(P4_EVENT_RESOURCE_STALL), | ||
199 | .escr_msr = { MSR_P4_ALF_ESCR0, MSR_P4_ALF_ESCR1 }, | ||
200 | .cntr = { {12, 13, 16}, {14, 15, 17} }, | ||
201 | }, | ||
202 | [P4_EVENT_WC_BUFFER] = { | ||
203 | .opcode = P4_OPCODE(P4_EVENT_WC_BUFFER), | ||
204 | .escr_msr = { MSR_P4_DAC_ESCR0, MSR_P4_DAC_ESCR1 }, | ||
205 | .cntr = { {8, 9, -1}, {10, 11, -1} }, | ||
206 | }, | ||
207 | [P4_EVENT_B2B_CYCLES] = { | ||
208 | .opcode = P4_OPCODE(P4_EVENT_B2B_CYCLES), | ||
209 | .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, | ||
210 | .cntr = { {0, -1, -1}, {2, -1, -1} }, | ||
211 | }, | ||
212 | [P4_EVENT_BNR] = { | ||
213 | .opcode = P4_OPCODE(P4_EVENT_BNR), | ||
214 | .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, | ||
215 | .cntr = { {0, -1, -1}, {2, -1, -1} }, | ||
216 | }, | ||
217 | [P4_EVENT_SNOOP] = { | ||
218 | .opcode = P4_OPCODE(P4_EVENT_SNOOP), | ||
219 | .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, | ||
220 | .cntr = { {0, -1, -1}, {2, -1, -1} }, | ||
221 | }, | ||
222 | [P4_EVENT_RESPONSE] = { | ||
223 | .opcode = P4_OPCODE(P4_EVENT_RESPONSE), | ||
224 | .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, | ||
225 | .cntr = { {0, -1, -1}, {2, -1, -1} }, | ||
226 | }, | ||
227 | [P4_EVENT_FRONT_END_EVENT] = { | ||
228 | .opcode = P4_OPCODE(P4_EVENT_FRONT_END_EVENT), | ||
229 | .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, | ||
230 | .cntr = { {12, 13, 16}, {14, 15, 17} }, | ||
231 | }, | ||
232 | [P4_EVENT_EXECUTION_EVENT] = { | ||
233 | .opcode = P4_OPCODE(P4_EVENT_EXECUTION_EVENT), | ||
234 | .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, | ||
235 | .cntr = { {12, 13, 16}, {14, 15, 17} }, | ||
236 | }, | ||
237 | [P4_EVENT_REPLAY_EVENT] = { | ||
238 | .opcode = P4_OPCODE(P4_EVENT_REPLAY_EVENT), | ||
239 | .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, | ||
240 | .cntr = { {12, 13, 16}, {14, 15, 17} }, | ||
241 | }, | ||
242 | [P4_EVENT_INSTR_RETIRED] = { | ||
243 | .opcode = P4_OPCODE(P4_EVENT_INSTR_RETIRED), | ||
244 | .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, | ||
245 | .cntr = { {12, 13, 16}, {14, 15, 17} }, | ||
246 | }, | ||
247 | [P4_EVENT_UOPS_RETIRED] = { | ||
248 | .opcode = P4_OPCODE(P4_EVENT_UOPS_RETIRED), | ||
249 | .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, | ||
250 | .cntr = { {12, 13, 16}, {14, 15, 17} }, | ||
251 | }, | ||
252 | [P4_EVENT_UOP_TYPE] = { | ||
253 | .opcode = P4_OPCODE(P4_EVENT_UOP_TYPE), | ||
254 | .escr_msr = { MSR_P4_RAT_ESCR0, MSR_P4_RAT_ESCR1 }, | ||
255 | .cntr = { {12, 13, 16}, {14, 15, 17} }, | ||
256 | }, | ||
257 | [P4_EVENT_BRANCH_RETIRED] = { | ||
258 | .opcode = P4_OPCODE(P4_EVENT_BRANCH_RETIRED), | ||
259 | .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, | ||
260 | .cntr = { {12, 13, 16}, {14, 15, 17} }, | ||
261 | }, | ||
262 | [P4_EVENT_MISPRED_BRANCH_RETIRED] = { | ||
263 | .opcode = P4_OPCODE(P4_EVENT_MISPRED_BRANCH_RETIRED), | ||
264 | .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, | ||
265 | .cntr = { {12, 13, 16}, {14, 15, 17} }, | ||
266 | }, | ||
267 | [P4_EVENT_X87_ASSIST] = { | ||
268 | .opcode = P4_OPCODE(P4_EVENT_X87_ASSIST), | ||
269 | .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, | ||
270 | .cntr = { {12, 13, 16}, {14, 15, 17} }, | ||
271 | }, | ||
272 | [P4_EVENT_MACHINE_CLEAR] = { | ||
273 | .opcode = P4_OPCODE(P4_EVENT_MACHINE_CLEAR), | ||
274 | .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, | ||
275 | .cntr = { {12, 13, 16}, {14, 15, 17} }, | ||
276 | }, | ||
277 | [P4_EVENT_INSTR_COMPLETED] = { | ||
278 | .opcode = P4_OPCODE(P4_EVENT_INSTR_COMPLETED), | ||
279 | .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, | ||
280 | .cntr = { {12, 13, 16}, {14, 15, 17} }, | ||
281 | }, | ||
282 | }; | ||
283 | |||
284 | #define P4_GEN_CACHE_EVENT(event, bit, cache_event) \ | ||
285 | p4_config_pack_escr(P4_ESCR_EVENT(event) | \ | ||
286 | P4_ESCR_EMASK_BIT(event, bit)) | \ | ||
287 | p4_config_pack_cccr(cache_event | \ | ||
288 | P4_CCCR_ESEL(P4_OPCODE_ESEL(P4_OPCODE(event)))) | ||
289 | |||
290 | static __initconst const u64 p4_hw_cache_event_ids | ||
291 | [PERF_COUNT_HW_CACHE_MAX] | ||
292 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
293 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | ||
294 | { | ||
295 | [ C(L1D ) ] = { | ||
296 | [ C(OP_READ) ] = { | ||
297 | [ C(RESULT_ACCESS) ] = 0x0, | ||
298 | [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS, | ||
299 | P4_CACHE__1stl_cache_load_miss_retired), | ||
300 | }, | ||
301 | }, | ||
302 | [ C(LL ) ] = { | ||
303 | [ C(OP_READ) ] = { | ||
304 | [ C(RESULT_ACCESS) ] = 0x0, | ||
305 | [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS, | ||
306 | P4_CACHE__2ndl_cache_load_miss_retired), | ||
307 | }, | ||
308 | }, | ||
309 | [ C(DTLB) ] = { | ||
310 | [ C(OP_READ) ] = { | ||
311 | [ C(RESULT_ACCESS) ] = 0x0, | ||
312 | [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS, | ||
313 | P4_CACHE__dtlb_load_miss_retired), | ||
314 | }, | ||
315 | [ C(OP_WRITE) ] = { | ||
316 | [ C(RESULT_ACCESS) ] = 0x0, | ||
317 | [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS, | ||
318 | P4_CACHE__dtlb_store_miss_retired), | ||
319 | }, | ||
320 | }, | ||
321 | [ C(ITLB) ] = { | ||
322 | [ C(OP_READ) ] = { | ||
323 | [ C(RESULT_ACCESS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_ITLB_REFERENCE, HIT, | ||
324 | P4_CACHE__itlb_reference_hit), | ||
325 | [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_ITLB_REFERENCE, MISS, | ||
326 | P4_CACHE__itlb_reference_miss), | ||
327 | }, | ||
328 | [ C(OP_WRITE) ] = { | ||
329 | [ C(RESULT_ACCESS) ] = -1, | ||
330 | [ C(RESULT_MISS) ] = -1, | ||
331 | }, | ||
332 | [ C(OP_PREFETCH) ] = { | ||
333 | [ C(RESULT_ACCESS) ] = -1, | ||
334 | [ C(RESULT_MISS) ] = -1, | ||
335 | }, | ||
336 | }, | ||
337 | }; | ||
338 | |||
339 | static u64 p4_general_events[PERF_COUNT_HW_MAX] = { | ||
340 | /* non-halted CPU clocks */ | ||
341 | [PERF_COUNT_HW_CPU_CYCLES] = | ||
342 | p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_GLOBAL_POWER_EVENTS) | | ||
343 | P4_ESCR_EMASK_BIT(P4_EVENT_GLOBAL_POWER_EVENTS, RUNNING)), | ||
344 | |||
345 | /* | ||
346 | * retired instructions | ||
347 | * in a sake of simplicity we don't use the FSB tagging | ||
348 | */ | ||
349 | [PERF_COUNT_HW_INSTRUCTIONS] = | ||
350 | p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_INSTR_RETIRED) | | ||
351 | P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, NBOGUSNTAG) | | ||
352 | P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, BOGUSNTAG)), | ||
353 | |||
354 | /* cache hits */ | ||
355 | [PERF_COUNT_HW_CACHE_REFERENCES] = | ||
356 | p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_BSQ_CACHE_REFERENCE) | | ||
357 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITS) | | ||
358 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITE) | | ||
359 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITM) | | ||
360 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITS) | | ||
361 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITE) | | ||
362 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITM)), | ||
363 | |||
364 | /* cache misses */ | ||
365 | [PERF_COUNT_HW_CACHE_MISSES] = | ||
366 | p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_BSQ_CACHE_REFERENCE) | | ||
367 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_MISS) | | ||
368 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_MISS) | | ||
369 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, WR_2ndL_MISS)), | ||
370 | |||
371 | /* branch instructions retired */ | ||
372 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = | ||
373 | p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_RETIRED_BRANCH_TYPE) | | ||
374 | P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, CONDITIONAL) | | ||
375 | P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, CALL) | | ||
376 | P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, RETURN) | | ||
377 | P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, INDIRECT)), | ||
378 | |||
379 | /* mispredicted branches retired */ | ||
380 | [PERF_COUNT_HW_BRANCH_MISSES] = | ||
381 | p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_MISPRED_BRANCH_RETIRED) | | ||
382 | P4_ESCR_EMASK_BIT(P4_EVENT_MISPRED_BRANCH_RETIRED, NBOGUS)), | ||
383 | |||
384 | /* bus ready clocks (cpu is driving #DRDY_DRV\#DRDY_OWN): */ | ||
385 | [PERF_COUNT_HW_BUS_CYCLES] = | ||
386 | p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_FSB_DATA_ACTIVITY) | | ||
387 | P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_DRV) | | ||
388 | P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_OWN)) | | ||
389 | p4_config_pack_cccr(P4_CCCR_EDGE | P4_CCCR_COMPARE), | ||
390 | }; | ||
391 | |||
392 | static struct p4_event_bind *p4_config_get_bind(u64 config) | ||
393 | { | ||
394 | unsigned int evnt = p4_config_unpack_event(config); | ||
395 | struct p4_event_bind *bind = NULL; | ||
396 | |||
397 | if (evnt < ARRAY_SIZE(p4_event_bind_map)) | ||
398 | bind = &p4_event_bind_map[evnt]; | ||
399 | |||
400 | return bind; | ||
401 | } | ||
402 | |||
403 | static u64 p4_pmu_event_map(int hw_event) | ||
404 | { | ||
405 | struct p4_event_bind *bind; | ||
406 | unsigned int esel; | ||
407 | u64 config; | ||
408 | |||
409 | config = p4_general_events[hw_event]; | ||
410 | bind = p4_config_get_bind(config); | ||
411 | esel = P4_OPCODE_ESEL(bind->opcode); | ||
412 | config |= p4_config_pack_cccr(P4_CCCR_ESEL(esel)); | ||
413 | |||
414 | return config; | ||
415 | } | ||
416 | |||
417 | static int p4_hw_config(struct perf_event *event) | ||
418 | { | ||
419 | int cpu = get_cpu(); | ||
420 | int rc = 0; | ||
421 | unsigned int evnt; | ||
422 | u32 escr, cccr; | ||
423 | |||
424 | /* | ||
425 | * the reason we use cpu that early is that: if we get scheduled | ||
426 | * first time on the same cpu -- we will not need swap thread | ||
427 | * specific flags in config (and will save some cpu cycles) | ||
428 | */ | ||
429 | |||
430 | cccr = p4_default_cccr_conf(cpu); | ||
431 | escr = p4_default_escr_conf(cpu, event->attr.exclude_kernel, | ||
432 | event->attr.exclude_user); | ||
433 | event->hw.config = p4_config_pack_escr(escr) | | ||
434 | p4_config_pack_cccr(cccr); | ||
435 | |||
436 | if (p4_ht_active() && p4_ht_thread(cpu)) | ||
437 | event->hw.config = p4_set_ht_bit(event->hw.config); | ||
438 | |||
439 | if (event->attr.type == PERF_TYPE_RAW) { | ||
440 | |||
441 | /* user data may have out-of-bound event index */ | ||
442 | evnt = p4_config_unpack_event(event->attr.config); | ||
443 | if (evnt >= ARRAY_SIZE(p4_event_bind_map)) { | ||
444 | rc = -EINVAL; | ||
445 | goto out; | ||
446 | } | ||
447 | |||
448 | /* | ||
449 | * We don't control raw events so it's up to the caller | ||
450 | * to pass sane values (and we don't count the thread number | ||
451 | * on HT machine but allow HT-compatible specifics to be | ||
452 | * passed on) | ||
453 | * | ||
454 | * XXX: HT wide things should check perf_paranoid_cpu() && | ||
455 | * CAP_SYS_ADMIN | ||
456 | */ | ||
457 | event->hw.config |= event->attr.config & | ||
458 | (p4_config_pack_escr(P4_ESCR_MASK_HT) | | ||
459 | p4_config_pack_cccr(P4_CCCR_MASK_HT)); | ||
460 | } | ||
461 | |||
462 | rc = x86_setup_perfctr(event); | ||
463 | out: | ||
464 | put_cpu(); | ||
465 | return rc; | ||
466 | } | ||
467 | |||
468 | static inline void p4_pmu_clear_cccr_ovf(struct hw_perf_event *hwc) | ||
469 | { | ||
470 | unsigned long dummy; | ||
471 | |||
472 | rdmsrl(hwc->config_base + hwc->idx, dummy); | ||
473 | if (dummy & P4_CCCR_OVF) { | ||
474 | (void)checking_wrmsrl(hwc->config_base + hwc->idx, | ||
475 | ((u64)dummy) & ~P4_CCCR_OVF); | ||
476 | } | ||
477 | } | ||
478 | |||
479 | static inline void p4_pmu_disable_event(struct perf_event *event) | ||
480 | { | ||
481 | struct hw_perf_event *hwc = &event->hw; | ||
482 | |||
483 | /* | ||
484 | * If event gets disabled while counter is in overflowed | ||
485 | * state we need to clear P4_CCCR_OVF, otherwise interrupt get | ||
486 | * asserted again and again | ||
487 | */ | ||
488 | (void)checking_wrmsrl(hwc->config_base + hwc->idx, | ||
489 | (u64)(p4_config_unpack_cccr(hwc->config)) & | ||
490 | ~P4_CCCR_ENABLE & ~P4_CCCR_OVF & ~P4_CCCR_RESERVED); | ||
491 | } | ||
492 | |||
493 | static void p4_pmu_disable_all(void) | ||
494 | { | ||
495 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
496 | int idx; | ||
497 | |||
498 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { | ||
499 | struct perf_event *event = cpuc->events[idx]; | ||
500 | if (!test_bit(idx, cpuc->active_mask)) | ||
501 | continue; | ||
502 | p4_pmu_disable_event(event); | ||
503 | } | ||
504 | } | ||
505 | |||
506 | static void p4_pmu_enable_event(struct perf_event *event) | ||
507 | { | ||
508 | struct hw_perf_event *hwc = &event->hw; | ||
509 | int thread = p4_ht_config_thread(hwc->config); | ||
510 | u64 escr_conf = p4_config_unpack_escr(p4_clear_ht_bit(hwc->config)); | ||
511 | unsigned int idx = p4_config_unpack_event(hwc->config); | ||
512 | unsigned int idx_cache = p4_config_unpack_cache_event(hwc->config); | ||
513 | struct p4_event_bind *bind; | ||
514 | struct p4_cache_event_bind *bind_cache; | ||
515 | u64 escr_addr, cccr; | ||
516 | |||
517 | bind = &p4_event_bind_map[idx]; | ||
518 | escr_addr = (u64)bind->escr_msr[thread]; | ||
519 | |||
520 | /* | ||
521 | * - we dont support cascaded counters yet | ||
522 | * - and counter 1 is broken (erratum) | ||
523 | */ | ||
524 | WARN_ON_ONCE(p4_is_event_cascaded(hwc->config)); | ||
525 | WARN_ON_ONCE(hwc->idx == 1); | ||
526 | |||
527 | /* we need a real Event value */ | ||
528 | escr_conf &= ~P4_ESCR_EVENT_MASK; | ||
529 | escr_conf |= P4_ESCR_EVENT(P4_OPCODE_EVNT(bind->opcode)); | ||
530 | |||
531 | cccr = p4_config_unpack_cccr(hwc->config); | ||
532 | |||
533 | /* | ||
534 | * it could be Cache event so that we need to | ||
535 | * set metrics into additional MSRs | ||
536 | */ | ||
537 | BUILD_BUG_ON(P4_CACHE__MAX > P4_CCCR_CACHE_OPS_MASK); | ||
538 | if (idx_cache > P4_CACHE__NONE && | ||
539 | idx_cache < ARRAY_SIZE(p4_cache_event_bind_map)) { | ||
540 | bind_cache = &p4_cache_event_bind_map[idx_cache]; | ||
541 | (void)checking_wrmsrl(MSR_IA32_PEBS_ENABLE, (u64)bind_cache->metric_pebs); | ||
542 | (void)checking_wrmsrl(MSR_P4_PEBS_MATRIX_VERT, (u64)bind_cache->metric_vert); | ||
543 | } | ||
544 | |||
545 | (void)checking_wrmsrl(escr_addr, escr_conf); | ||
546 | (void)checking_wrmsrl(hwc->config_base + hwc->idx, | ||
547 | (cccr & ~P4_CCCR_RESERVED) | P4_CCCR_ENABLE); | ||
548 | } | ||
549 | |||
550 | static void p4_pmu_enable_all(int added) | ||
551 | { | ||
552 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
553 | int idx; | ||
554 | |||
555 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { | ||
556 | struct perf_event *event = cpuc->events[idx]; | ||
557 | if (!test_bit(idx, cpuc->active_mask)) | ||
558 | continue; | ||
559 | p4_pmu_enable_event(event); | ||
560 | } | ||
561 | } | ||
562 | |||
563 | static int p4_pmu_handle_irq(struct pt_regs *regs) | ||
564 | { | ||
565 | struct perf_sample_data data; | ||
566 | struct cpu_hw_events *cpuc; | ||
567 | struct perf_event *event; | ||
568 | struct hw_perf_event *hwc; | ||
569 | int idx, handled = 0; | ||
570 | u64 val; | ||
571 | |||
572 | data.addr = 0; | ||
573 | data.raw = NULL; | ||
574 | |||
575 | cpuc = &__get_cpu_var(cpu_hw_events); | ||
576 | |||
577 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { | ||
578 | |||
579 | if (!test_bit(idx, cpuc->active_mask)) | ||
580 | continue; | ||
581 | |||
582 | event = cpuc->events[idx]; | ||
583 | hwc = &event->hw; | ||
584 | |||
585 | WARN_ON_ONCE(hwc->idx != idx); | ||
586 | |||
587 | /* | ||
588 | * FIXME: Redundant call, actually not needed | ||
589 | * but just to check if we're screwed | ||
590 | */ | ||
591 | p4_pmu_clear_cccr_ovf(hwc); | ||
592 | |||
593 | val = x86_perf_event_update(event); | ||
594 | if (val & (1ULL << (x86_pmu.cntval_bits - 1))) | ||
595 | continue; | ||
596 | |||
597 | /* | ||
598 | * event overflow | ||
599 | */ | ||
600 | handled = 1; | ||
601 | data.period = event->hw.last_period; | ||
602 | |||
603 | if (!x86_perf_event_set_period(event)) | ||
604 | continue; | ||
605 | if (perf_event_overflow(event, 1, &data, regs)) | ||
606 | p4_pmu_disable_event(event); | ||
607 | } | ||
608 | |||
609 | if (handled) { | ||
610 | /* p4 quirk: unmask it again */ | ||
611 | apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED); | ||
612 | inc_irq_stat(apic_perf_irqs); | ||
613 | } | ||
614 | |||
615 | return handled; | ||
616 | } | ||
617 | |||
618 | /* | ||
619 | * swap thread specific fields according to a thread | ||
620 | * we are going to run on | ||
621 | */ | ||
622 | static void p4_pmu_swap_config_ts(struct hw_perf_event *hwc, int cpu) | ||
623 | { | ||
624 | u32 escr, cccr; | ||
625 | |||
626 | /* | ||
627 | * we either lucky and continue on same cpu or no HT support | ||
628 | */ | ||
629 | if (!p4_should_swap_ts(hwc->config, cpu)) | ||
630 | return; | ||
631 | |||
632 | /* | ||
633 | * the event is migrated from an another logical | ||
634 | * cpu, so we need to swap thread specific flags | ||
635 | */ | ||
636 | |||
637 | escr = p4_config_unpack_escr(hwc->config); | ||
638 | cccr = p4_config_unpack_cccr(hwc->config); | ||
639 | |||
640 | if (p4_ht_thread(cpu)) { | ||
641 | cccr &= ~P4_CCCR_OVF_PMI_T0; | ||
642 | cccr |= P4_CCCR_OVF_PMI_T1; | ||
643 | if (escr & P4_ESCR_T0_OS) { | ||
644 | escr &= ~P4_ESCR_T0_OS; | ||
645 | escr |= P4_ESCR_T1_OS; | ||
646 | } | ||
647 | if (escr & P4_ESCR_T0_USR) { | ||
648 | escr &= ~P4_ESCR_T0_USR; | ||
649 | escr |= P4_ESCR_T1_USR; | ||
650 | } | ||
651 | hwc->config = p4_config_pack_escr(escr); | ||
652 | hwc->config |= p4_config_pack_cccr(cccr); | ||
653 | hwc->config |= P4_CONFIG_HT; | ||
654 | } else { | ||
655 | cccr &= ~P4_CCCR_OVF_PMI_T1; | ||
656 | cccr |= P4_CCCR_OVF_PMI_T0; | ||
657 | if (escr & P4_ESCR_T1_OS) { | ||
658 | escr &= ~P4_ESCR_T1_OS; | ||
659 | escr |= P4_ESCR_T0_OS; | ||
660 | } | ||
661 | if (escr & P4_ESCR_T1_USR) { | ||
662 | escr &= ~P4_ESCR_T1_USR; | ||
663 | escr |= P4_ESCR_T0_USR; | ||
664 | } | ||
665 | hwc->config = p4_config_pack_escr(escr); | ||
666 | hwc->config |= p4_config_pack_cccr(cccr); | ||
667 | hwc->config &= ~P4_CONFIG_HT; | ||
668 | } | ||
669 | } | ||
670 | |||
671 | /* | ||
672 | * ESCR address hashing is tricky, ESCRs are not sequential | ||
673 | * in memory but all starts from MSR_P4_BSU_ESCR0 (0x03e0) and | ||
674 | * the metric between any ESCRs is laid in range [0xa0,0xe1] | ||
675 | * | ||
676 | * so we make ~70% filled hashtable | ||
677 | */ | ||
678 | |||
679 | #define P4_ESCR_MSR_BASE 0x000003a0 | ||
680 | #define P4_ESCR_MSR_MAX 0x000003e1 | ||
681 | #define P4_ESCR_MSR_TABLE_SIZE (P4_ESCR_MSR_MAX - P4_ESCR_MSR_BASE + 1) | ||
682 | #define P4_ESCR_MSR_IDX(msr) (msr - P4_ESCR_MSR_BASE) | ||
683 | #define P4_ESCR_MSR_TABLE_ENTRY(msr) [P4_ESCR_MSR_IDX(msr)] = msr | ||
684 | |||
685 | static const unsigned int p4_escr_table[P4_ESCR_MSR_TABLE_SIZE] = { | ||
686 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_ALF_ESCR0), | ||
687 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_ALF_ESCR1), | ||
688 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_BPU_ESCR0), | ||
689 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_BPU_ESCR1), | ||
690 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_BSU_ESCR0), | ||
691 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_BSU_ESCR1), | ||
692 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR0), | ||
693 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR1), | ||
694 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR2), | ||
695 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR3), | ||
696 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR4), | ||
697 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR5), | ||
698 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_DAC_ESCR0), | ||
699 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_DAC_ESCR1), | ||
700 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FIRM_ESCR0), | ||
701 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FIRM_ESCR1), | ||
702 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FLAME_ESCR0), | ||
703 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FLAME_ESCR1), | ||
704 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FSB_ESCR0), | ||
705 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FSB_ESCR1), | ||
706 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IQ_ESCR0), | ||
707 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IQ_ESCR1), | ||
708 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IS_ESCR0), | ||
709 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IS_ESCR1), | ||
710 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_ITLB_ESCR0), | ||
711 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_ITLB_ESCR1), | ||
712 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IX_ESCR0), | ||
713 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IX_ESCR1), | ||
714 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_MOB_ESCR0), | ||
715 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_MOB_ESCR1), | ||
716 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_MS_ESCR0), | ||
717 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_MS_ESCR1), | ||
718 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_PMH_ESCR0), | ||
719 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_PMH_ESCR1), | ||
720 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_RAT_ESCR0), | ||
721 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_RAT_ESCR1), | ||
722 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_SAAT_ESCR0), | ||
723 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_SAAT_ESCR1), | ||
724 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_SSU_ESCR0), | ||
725 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_SSU_ESCR1), | ||
726 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_TBPU_ESCR0), | ||
727 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_TBPU_ESCR1), | ||
728 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_TC_ESCR0), | ||
729 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_TC_ESCR1), | ||
730 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_U2L_ESCR0), | ||
731 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_U2L_ESCR1), | ||
732 | }; | ||
733 | |||
734 | static int p4_get_escr_idx(unsigned int addr) | ||
735 | { | ||
736 | unsigned int idx = P4_ESCR_MSR_IDX(addr); | ||
737 | |||
738 | if (unlikely(idx >= P4_ESCR_MSR_TABLE_SIZE || | ||
739 | !p4_escr_table[idx])) { | ||
740 | WARN_ONCE(1, "P4 PMU: Wrong address passed: %x\n", addr); | ||
741 | return -1; | ||
742 | } | ||
743 | |||
744 | return idx; | ||
745 | } | ||
746 | |||
747 | static int p4_next_cntr(int thread, unsigned long *used_mask, | ||
748 | struct p4_event_bind *bind) | ||
749 | { | ||
750 | int i, j; | ||
751 | |||
752 | for (i = 0; i < P4_CNTR_LIMIT; i++) { | ||
753 | j = bind->cntr[thread][i]; | ||
754 | if (j != -1 && !test_bit(j, used_mask)) | ||
755 | return j; | ||
756 | } | ||
757 | |||
758 | return -1; | ||
759 | } | ||
760 | |||
761 | static int p4_pmu_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) | ||
762 | { | ||
763 | unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; | ||
764 | unsigned long escr_mask[BITS_TO_LONGS(P4_ESCR_MSR_TABLE_SIZE)]; | ||
765 | int cpu = raw_smp_processor_id(); | ||
766 | struct hw_perf_event *hwc; | ||
767 | struct p4_event_bind *bind; | ||
768 | unsigned int i, thread, num; | ||
769 | int cntr_idx, escr_idx; | ||
770 | |||
771 | bitmap_zero(used_mask, X86_PMC_IDX_MAX); | ||
772 | bitmap_zero(escr_mask, P4_ESCR_MSR_TABLE_SIZE); | ||
773 | |||
774 | for (i = 0, num = n; i < n; i++, num--) { | ||
775 | |||
776 | hwc = &cpuc->event_list[i]->hw; | ||
777 | thread = p4_ht_thread(cpu); | ||
778 | bind = p4_config_get_bind(hwc->config); | ||
779 | escr_idx = p4_get_escr_idx(bind->escr_msr[thread]); | ||
780 | if (unlikely(escr_idx == -1)) | ||
781 | goto done; | ||
782 | |||
783 | if (hwc->idx != -1 && !p4_should_swap_ts(hwc->config, cpu)) { | ||
784 | cntr_idx = hwc->idx; | ||
785 | if (assign) | ||
786 | assign[i] = hwc->idx; | ||
787 | goto reserve; | ||
788 | } | ||
789 | |||
790 | cntr_idx = p4_next_cntr(thread, used_mask, bind); | ||
791 | if (cntr_idx == -1 || test_bit(escr_idx, escr_mask)) | ||
792 | goto done; | ||
793 | |||
794 | p4_pmu_swap_config_ts(hwc, cpu); | ||
795 | if (assign) | ||
796 | assign[i] = cntr_idx; | ||
797 | reserve: | ||
798 | set_bit(cntr_idx, used_mask); | ||
799 | set_bit(escr_idx, escr_mask); | ||
800 | } | ||
801 | |||
802 | done: | ||
803 | return num ? -ENOSPC : 0; | ||
804 | } | ||
805 | |||
806 | static __initconst const struct x86_pmu p4_pmu = { | ||
807 | .name = "Netburst P4/Xeon", | ||
808 | .handle_irq = p4_pmu_handle_irq, | ||
809 | .disable_all = p4_pmu_disable_all, | ||
810 | .enable_all = p4_pmu_enable_all, | ||
811 | .enable = p4_pmu_enable_event, | ||
812 | .disable = p4_pmu_disable_event, | ||
813 | .eventsel = MSR_P4_BPU_CCCR0, | ||
814 | .perfctr = MSR_P4_BPU_PERFCTR0, | ||
815 | .event_map = p4_pmu_event_map, | ||
816 | .max_events = ARRAY_SIZE(p4_general_events), | ||
817 | .get_event_constraints = x86_get_event_constraints, | ||
818 | /* | ||
819 | * IF HT disabled we may need to use all | ||
820 | * ARCH_P4_MAX_CCCR counters simulaneously | ||
821 | * though leave it restricted at moment assuming | ||
822 | * HT is on | ||
823 | */ | ||
824 | .num_counters = ARCH_P4_MAX_CCCR, | ||
825 | .apic = 1, | ||
826 | .cntval_bits = 40, | ||
827 | .cntval_mask = (1ULL << 40) - 1, | ||
828 | .max_period = (1ULL << 39) - 1, | ||
829 | .hw_config = p4_hw_config, | ||
830 | .schedule_events = p4_pmu_schedule_events, | ||
831 | }; | ||
832 | |||
833 | static __init int p4_pmu_init(void) | ||
834 | { | ||
835 | unsigned int low, high; | ||
836 | |||
837 | /* If we get stripped -- indexig fails */ | ||
838 | BUILD_BUG_ON(ARCH_P4_MAX_CCCR > X86_PMC_MAX_GENERIC); | ||
839 | |||
840 | rdmsr(MSR_IA32_MISC_ENABLE, low, high); | ||
841 | if (!(low & (1 << 7))) { | ||
842 | pr_cont("unsupported Netburst CPU model %d ", | ||
843 | boot_cpu_data.x86_model); | ||
844 | return -ENODEV; | ||
845 | } | ||
846 | |||
847 | memcpy(hw_cache_event_ids, p4_hw_cache_event_ids, | ||
848 | sizeof(hw_cache_event_ids)); | ||
849 | |||
850 | pr_cont("Netburst events, "); | ||
851 | |||
852 | x86_pmu = p4_pmu; | ||
853 | |||
854 | return 0; | ||
855 | } | ||
856 | |||
857 | #endif /* CONFIG_CPU_SUP_INTEL */ | ||
diff --git a/arch/x86/kernel/cpu/perf_event_p6.c b/arch/x86/kernel/cpu/perf_event_p6.c index a330485d14da..34ba07be2cda 100644 --- a/arch/x86/kernel/cpu/perf_event_p6.c +++ b/arch/x86/kernel/cpu/perf_event_p6.c | |||
@@ -27,24 +27,6 @@ static u64 p6_pmu_event_map(int hw_event) | |||
27 | */ | 27 | */ |
28 | #define P6_NOP_EVENT 0x0000002EULL | 28 | #define P6_NOP_EVENT 0x0000002EULL |
29 | 29 | ||
30 | static u64 p6_pmu_raw_event(u64 hw_event) | ||
31 | { | ||
32 | #define P6_EVNTSEL_EVENT_MASK 0x000000FFULL | ||
33 | #define P6_EVNTSEL_UNIT_MASK 0x0000FF00ULL | ||
34 | #define P6_EVNTSEL_EDGE_MASK 0x00040000ULL | ||
35 | #define P6_EVNTSEL_INV_MASK 0x00800000ULL | ||
36 | #define P6_EVNTSEL_REG_MASK 0xFF000000ULL | ||
37 | |||
38 | #define P6_EVNTSEL_MASK \ | ||
39 | (P6_EVNTSEL_EVENT_MASK | \ | ||
40 | P6_EVNTSEL_UNIT_MASK | \ | ||
41 | P6_EVNTSEL_EDGE_MASK | \ | ||
42 | P6_EVNTSEL_INV_MASK | \ | ||
43 | P6_EVNTSEL_REG_MASK) | ||
44 | |||
45 | return hw_event & P6_EVNTSEL_MASK; | ||
46 | } | ||
47 | |||
48 | static struct event_constraint p6_event_constraints[] = | 30 | static struct event_constraint p6_event_constraints[] = |
49 | { | 31 | { |
50 | INTEL_EVENT_CONSTRAINT(0xc1, 0x1), /* FLOPS */ | 32 | INTEL_EVENT_CONSTRAINT(0xc1, 0x1), /* FLOPS */ |
@@ -66,7 +48,7 @@ static void p6_pmu_disable_all(void) | |||
66 | wrmsrl(MSR_P6_EVNTSEL0, val); | 48 | wrmsrl(MSR_P6_EVNTSEL0, val); |
67 | } | 49 | } |
68 | 50 | ||
69 | static void p6_pmu_enable_all(void) | 51 | static void p6_pmu_enable_all(int added) |
70 | { | 52 | { |
71 | unsigned long val; | 53 | unsigned long val; |
72 | 54 | ||
@@ -102,22 +84,23 @@ static void p6_pmu_enable_event(struct perf_event *event) | |||
102 | (void)checking_wrmsrl(hwc->config_base + hwc->idx, val); | 84 | (void)checking_wrmsrl(hwc->config_base + hwc->idx, val); |
103 | } | 85 | } |
104 | 86 | ||
105 | static __initconst struct x86_pmu p6_pmu = { | 87 | static __initconst const struct x86_pmu p6_pmu = { |
106 | .name = "p6", | 88 | .name = "p6", |
107 | .handle_irq = x86_pmu_handle_irq, | 89 | .handle_irq = x86_pmu_handle_irq, |
108 | .disable_all = p6_pmu_disable_all, | 90 | .disable_all = p6_pmu_disable_all, |
109 | .enable_all = p6_pmu_enable_all, | 91 | .enable_all = p6_pmu_enable_all, |
110 | .enable = p6_pmu_enable_event, | 92 | .enable = p6_pmu_enable_event, |
111 | .disable = p6_pmu_disable_event, | 93 | .disable = p6_pmu_disable_event, |
94 | .hw_config = x86_pmu_hw_config, | ||
95 | .schedule_events = x86_schedule_events, | ||
112 | .eventsel = MSR_P6_EVNTSEL0, | 96 | .eventsel = MSR_P6_EVNTSEL0, |
113 | .perfctr = MSR_P6_PERFCTR0, | 97 | .perfctr = MSR_P6_PERFCTR0, |
114 | .event_map = p6_pmu_event_map, | 98 | .event_map = p6_pmu_event_map, |
115 | .raw_event = p6_pmu_raw_event, | ||
116 | .max_events = ARRAY_SIZE(p6_perfmon_event_map), | 99 | .max_events = ARRAY_SIZE(p6_perfmon_event_map), |
117 | .apic = 1, | 100 | .apic = 1, |
118 | .max_period = (1ULL << 31) - 1, | 101 | .max_period = (1ULL << 31) - 1, |
119 | .version = 0, | 102 | .version = 0, |
120 | .num_events = 2, | 103 | .num_counters = 2, |
121 | /* | 104 | /* |
122 | * Events have 40 bits implemented. However they are designed such | 105 | * Events have 40 bits implemented. However they are designed such |
123 | * that bits [32-39] are sign extensions of bit 31. As such the | 106 | * that bits [32-39] are sign extensions of bit 31. As such the |
@@ -125,8 +108,8 @@ static __initconst struct x86_pmu p6_pmu = { | |||
125 | * | 108 | * |
126 | * See IA-32 Intel Architecture Software developer manual Vol 3B | 109 | * See IA-32 Intel Architecture Software developer manual Vol 3B |
127 | */ | 110 | */ |
128 | .event_bits = 32, | 111 | .cntval_bits = 32, |
129 | .event_mask = (1ULL << 32) - 1, | 112 | .cntval_mask = (1ULL << 32) - 1, |
130 | .get_event_constraints = x86_get_event_constraints, | 113 | .get_event_constraints = x86_get_event_constraints, |
131 | .event_constraints = p6_event_constraints, | 114 | .event_constraints = p6_event_constraints, |
132 | }; | 115 | }; |
diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c index dfdb4dba2320..b9d1ff588445 100644 --- a/arch/x86/kernel/cpu/vmware.c +++ b/arch/x86/kernel/cpu/vmware.c | |||
@@ -24,8 +24,8 @@ | |||
24 | #include <linux/dmi.h> | 24 | #include <linux/dmi.h> |
25 | #include <linux/module.h> | 25 | #include <linux/module.h> |
26 | #include <asm/div64.h> | 26 | #include <asm/div64.h> |
27 | #include <asm/vmware.h> | ||
28 | #include <asm/x86_init.h> | 27 | #include <asm/x86_init.h> |
28 | #include <asm/hypervisor.h> | ||
29 | 29 | ||
30 | #define CPUID_VMWARE_INFO_LEAF 0x40000000 | 30 | #define CPUID_VMWARE_INFO_LEAF 0x40000000 |
31 | #define VMWARE_HYPERVISOR_MAGIC 0x564D5868 | 31 | #define VMWARE_HYPERVISOR_MAGIC 0x564D5868 |
@@ -65,7 +65,7 @@ static unsigned long vmware_get_tsc_khz(void) | |||
65 | return tsc_hz; | 65 | return tsc_hz; |
66 | } | 66 | } |
67 | 67 | ||
68 | void __init vmware_platform_setup(void) | 68 | static void __init vmware_platform_setup(void) |
69 | { | 69 | { |
70 | uint32_t eax, ebx, ecx, edx; | 70 | uint32_t eax, ebx, ecx, edx; |
71 | 71 | ||
@@ -83,26 +83,22 @@ void __init vmware_platform_setup(void) | |||
83 | * serial key should be enough, as this will always have a VMware | 83 | * serial key should be enough, as this will always have a VMware |
84 | * specific string when running under VMware hypervisor. | 84 | * specific string when running under VMware hypervisor. |
85 | */ | 85 | */ |
86 | int vmware_platform(void) | 86 | static bool __init vmware_platform(void) |
87 | { | 87 | { |
88 | if (cpu_has_hypervisor) { | 88 | if (cpu_has_hypervisor) { |
89 | unsigned int eax, ebx, ecx, edx; | 89 | unsigned int eax; |
90 | char hyper_vendor_id[13]; | 90 | unsigned int hyper_vendor_id[3]; |
91 | 91 | ||
92 | cpuid(CPUID_VMWARE_INFO_LEAF, &eax, &ebx, &ecx, &edx); | 92 | cpuid(CPUID_VMWARE_INFO_LEAF, &eax, &hyper_vendor_id[0], |
93 | memcpy(hyper_vendor_id + 0, &ebx, 4); | 93 | &hyper_vendor_id[1], &hyper_vendor_id[2]); |
94 | memcpy(hyper_vendor_id + 4, &ecx, 4); | 94 | if (!memcmp(hyper_vendor_id, "VMwareVMware", 12)) |
95 | memcpy(hyper_vendor_id + 8, &edx, 4); | 95 | return true; |
96 | hyper_vendor_id[12] = '\0'; | ||
97 | if (!strcmp(hyper_vendor_id, "VMwareVMware")) | ||
98 | return 1; | ||
99 | } else if (dmi_available && dmi_name_in_serial("VMware") && | 96 | } else if (dmi_available && dmi_name_in_serial("VMware") && |
100 | __vmware_platform()) | 97 | __vmware_platform()) |
101 | return 1; | 98 | return true; |
102 | 99 | ||
103 | return 0; | 100 | return false; |
104 | } | 101 | } |
105 | EXPORT_SYMBOL(vmware_platform); | ||
106 | 102 | ||
107 | /* | 103 | /* |
108 | * VMware hypervisor takes care of exporting a reliable TSC to the guest. | 104 | * VMware hypervisor takes care of exporting a reliable TSC to the guest. |
@@ -116,8 +112,16 @@ EXPORT_SYMBOL(vmware_platform); | |||
116 | * so that the kernel could just trust the hypervisor with providing a | 112 | * so that the kernel could just trust the hypervisor with providing a |
117 | * reliable virtual TSC that is suitable for timekeeping. | 113 | * reliable virtual TSC that is suitable for timekeeping. |
118 | */ | 114 | */ |
119 | void __cpuinit vmware_set_feature_bits(struct cpuinfo_x86 *c) | 115 | static void __cpuinit vmware_set_cpu_features(struct cpuinfo_x86 *c) |
120 | { | 116 | { |
121 | set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); | 117 | set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); |
122 | set_cpu_cap(c, X86_FEATURE_TSC_RELIABLE); | 118 | set_cpu_cap(c, X86_FEATURE_TSC_RELIABLE); |
123 | } | 119 | } |
120 | |||
121 | const __refconst struct hypervisor_x86 x86_hyper_vmware = { | ||
122 | .name = "VMware", | ||
123 | .detect = vmware_platform, | ||
124 | .set_cpu_features = vmware_set_cpu_features, | ||
125 | .init_platform = vmware_platform_setup, | ||
126 | }; | ||
127 | EXPORT_SYMBOL(x86_hyper_vmware); | ||
diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c deleted file mode 100644 index 1c47390dd0e5..000000000000 --- a/arch/x86/kernel/ds.c +++ /dev/null | |||
@@ -1,1437 +0,0 @@ | |||
1 | /* | ||
2 | * Debug Store support | ||
3 | * | ||
4 | * This provides a low-level interface to the hardware's Debug Store | ||
5 | * feature that is used for branch trace store (BTS) and | ||
6 | * precise-event based sampling (PEBS). | ||
7 | * | ||
8 | * It manages: | ||
9 | * - DS and BTS hardware configuration | ||
10 | * - buffer overflow handling (to be done) | ||
11 | * - buffer access | ||
12 | * | ||
13 | * It does not do: | ||
14 | * - security checking (is the caller allowed to trace the task) | ||
15 | * - buffer allocation (memory accounting) | ||
16 | * | ||
17 | * | ||
18 | * Copyright (C) 2007-2009 Intel Corporation. | ||
19 | * Markus Metzger <markus.t.metzger@intel.com>, 2007-2009 | ||
20 | */ | ||
21 | |||
22 | #include <linux/kernel.h> | ||
23 | #include <linux/string.h> | ||
24 | #include <linux/errno.h> | ||
25 | #include <linux/sched.h> | ||
26 | #include <linux/slab.h> | ||
27 | #include <linux/mm.h> | ||
28 | #include <linux/trace_clock.h> | ||
29 | |||
30 | #include <asm/ds.h> | ||
31 | |||
32 | #include "ds_selftest.h" | ||
33 | |||
34 | /* | ||
35 | * The configuration for a particular DS hardware implementation: | ||
36 | */ | ||
37 | struct ds_configuration { | ||
38 | /* The name of the configuration: */ | ||
39 | const char *name; | ||
40 | |||
41 | /* The size of pointer-typed fields in DS, BTS, and PEBS: */ | ||
42 | unsigned char sizeof_ptr_field; | ||
43 | |||
44 | /* The size of a BTS/PEBS record in bytes: */ | ||
45 | unsigned char sizeof_rec[2]; | ||
46 | |||
47 | /* The number of pebs counter reset values in the DS structure. */ | ||
48 | unsigned char nr_counter_reset; | ||
49 | |||
50 | /* Control bit-masks indexed by enum ds_feature: */ | ||
51 | unsigned long ctl[dsf_ctl_max]; | ||
52 | }; | ||
53 | static struct ds_configuration ds_cfg __read_mostly; | ||
54 | |||
55 | |||
56 | /* Maximal size of a DS configuration: */ | ||
57 | #define MAX_SIZEOF_DS 0x80 | ||
58 | |||
59 | /* Maximal size of a BTS record: */ | ||
60 | #define MAX_SIZEOF_BTS (3 * 8) | ||
61 | |||
62 | /* BTS and PEBS buffer alignment: */ | ||
63 | #define DS_ALIGNMENT (1 << 3) | ||
64 | |||
65 | /* Number of buffer pointers in DS: */ | ||
66 | #define NUM_DS_PTR_FIELDS 8 | ||
67 | |||
68 | /* Size of a pebs reset value in DS: */ | ||
69 | #define PEBS_RESET_FIELD_SIZE 8 | ||
70 | |||
71 | /* Mask of control bits in the DS MSR register: */ | ||
72 | #define BTS_CONTROL \ | ||
73 | ( ds_cfg.ctl[dsf_bts] | \ | ||
74 | ds_cfg.ctl[dsf_bts_kernel] | \ | ||
75 | ds_cfg.ctl[dsf_bts_user] | \ | ||
76 | ds_cfg.ctl[dsf_bts_overflow] ) | ||
77 | |||
78 | /* | ||
79 | * A BTS or PEBS tracer. | ||
80 | * | ||
81 | * This holds the configuration of the tracer and serves as a handle | ||
82 | * to identify tracers. | ||
83 | */ | ||
84 | struct ds_tracer { | ||
85 | /* The DS context (partially) owned by this tracer. */ | ||
86 | struct ds_context *context; | ||
87 | /* The buffer provided on ds_request() and its size in bytes. */ | ||
88 | void *buffer; | ||
89 | size_t size; | ||
90 | }; | ||
91 | |||
92 | struct bts_tracer { | ||
93 | /* The common DS part: */ | ||
94 | struct ds_tracer ds; | ||
95 | |||
96 | /* The trace including the DS configuration: */ | ||
97 | struct bts_trace trace; | ||
98 | |||
99 | /* Buffer overflow notification function: */ | ||
100 | bts_ovfl_callback_t ovfl; | ||
101 | |||
102 | /* Active flags affecting trace collection. */ | ||
103 | unsigned int flags; | ||
104 | }; | ||
105 | |||
106 | struct pebs_tracer { | ||
107 | /* The common DS part: */ | ||
108 | struct ds_tracer ds; | ||
109 | |||
110 | /* The trace including the DS configuration: */ | ||
111 | struct pebs_trace trace; | ||
112 | |||
113 | /* Buffer overflow notification function: */ | ||
114 | pebs_ovfl_callback_t ovfl; | ||
115 | }; | ||
116 | |||
117 | /* | ||
118 | * Debug Store (DS) save area configuration (see Intel64 and IA32 | ||
119 | * Architectures Software Developer's Manual, section 18.5) | ||
120 | * | ||
121 | * The DS configuration consists of the following fields; different | ||
122 | * architetures vary in the size of those fields. | ||
123 | * | ||
124 | * - double-word aligned base linear address of the BTS buffer | ||
125 | * - write pointer into the BTS buffer | ||
126 | * - end linear address of the BTS buffer (one byte beyond the end of | ||
127 | * the buffer) | ||
128 | * - interrupt pointer into BTS buffer | ||
129 | * (interrupt occurs when write pointer passes interrupt pointer) | ||
130 | * - double-word aligned base linear address of the PEBS buffer | ||
131 | * - write pointer into the PEBS buffer | ||
132 | * - end linear address of the PEBS buffer (one byte beyond the end of | ||
133 | * the buffer) | ||
134 | * - interrupt pointer into PEBS buffer | ||
135 | * (interrupt occurs when write pointer passes interrupt pointer) | ||
136 | * - value to which counter is reset following counter overflow | ||
137 | * | ||
138 | * Later architectures use 64bit pointers throughout, whereas earlier | ||
139 | * architectures use 32bit pointers in 32bit mode. | ||
140 | * | ||
141 | * | ||
142 | * We compute the base address for the first 8 fields based on: | ||
143 | * - the field size stored in the DS configuration | ||
144 | * - the relative field position | ||
145 | * - an offset giving the start of the respective region | ||
146 | * | ||
147 | * This offset is further used to index various arrays holding | ||
148 | * information for BTS and PEBS at the respective index. | ||
149 | * | ||
150 | * On later 32bit processors, we only access the lower 32bit of the | ||
151 | * 64bit pointer fields. The upper halves will be zeroed out. | ||
152 | */ | ||
153 | |||
154 | enum ds_field { | ||
155 | ds_buffer_base = 0, | ||
156 | ds_index, | ||
157 | ds_absolute_maximum, | ||
158 | ds_interrupt_threshold, | ||
159 | }; | ||
160 | |||
161 | enum ds_qualifier { | ||
162 | ds_bts = 0, | ||
163 | ds_pebs | ||
164 | }; | ||
165 | |||
166 | static inline unsigned long | ||
167 | ds_get(const unsigned char *base, enum ds_qualifier qual, enum ds_field field) | ||
168 | { | ||
169 | base += (ds_cfg.sizeof_ptr_field * (field + (4 * qual))); | ||
170 | return *(unsigned long *)base; | ||
171 | } | ||
172 | |||
173 | static inline void | ||
174 | ds_set(unsigned char *base, enum ds_qualifier qual, enum ds_field field, | ||
175 | unsigned long value) | ||
176 | { | ||
177 | base += (ds_cfg.sizeof_ptr_field * (field + (4 * qual))); | ||
178 | (*(unsigned long *)base) = value; | ||
179 | } | ||
180 | |||
181 | |||
182 | /* | ||
183 | * Locking is done only for allocating BTS or PEBS resources. | ||
184 | */ | ||
185 | static DEFINE_SPINLOCK(ds_lock); | ||
186 | |||
187 | /* | ||
188 | * We either support (system-wide) per-cpu or per-thread allocation. | ||
189 | * We distinguish the two based on the task_struct pointer, where a | ||
190 | * NULL pointer indicates per-cpu allocation for the current cpu. | ||
191 | * | ||
192 | * Allocations are use-counted. As soon as resources are allocated, | ||
193 | * further allocations must be of the same type (per-cpu or | ||
194 | * per-thread). We model this by counting allocations (i.e. the number | ||
195 | * of tracers of a certain type) for one type negatively: | ||
196 | * =0 no tracers | ||
197 | * >0 number of per-thread tracers | ||
198 | * <0 number of per-cpu tracers | ||
199 | * | ||
200 | * Tracers essentially gives the number of ds contexts for a certain | ||
201 | * type of allocation. | ||
202 | */ | ||
203 | static atomic_t tracers = ATOMIC_INIT(0); | ||
204 | |||
205 | static inline int get_tracer(struct task_struct *task) | ||
206 | { | ||
207 | int error; | ||
208 | |||
209 | spin_lock_irq(&ds_lock); | ||
210 | |||
211 | if (task) { | ||
212 | error = -EPERM; | ||
213 | if (atomic_read(&tracers) < 0) | ||
214 | goto out; | ||
215 | atomic_inc(&tracers); | ||
216 | } else { | ||
217 | error = -EPERM; | ||
218 | if (atomic_read(&tracers) > 0) | ||
219 | goto out; | ||
220 | atomic_dec(&tracers); | ||
221 | } | ||
222 | |||
223 | error = 0; | ||
224 | out: | ||
225 | spin_unlock_irq(&ds_lock); | ||
226 | return error; | ||
227 | } | ||
228 | |||
229 | static inline void put_tracer(struct task_struct *task) | ||
230 | { | ||
231 | if (task) | ||
232 | atomic_dec(&tracers); | ||
233 | else | ||
234 | atomic_inc(&tracers); | ||
235 | } | ||
236 | |||
237 | /* | ||
238 | * The DS context is either attached to a thread or to a cpu: | ||
239 | * - in the former case, the thread_struct contains a pointer to the | ||
240 | * attached context. | ||
241 | * - in the latter case, we use a static array of per-cpu context | ||
242 | * pointers. | ||
243 | * | ||
244 | * Contexts are use-counted. They are allocated on first access and | ||
245 | * deallocated when the last user puts the context. | ||
246 | */ | ||
247 | struct ds_context { | ||
248 | /* The DS configuration; goes into MSR_IA32_DS_AREA: */ | ||
249 | unsigned char ds[MAX_SIZEOF_DS]; | ||
250 | |||
251 | /* The owner of the BTS and PEBS configuration, respectively: */ | ||
252 | struct bts_tracer *bts_master; | ||
253 | struct pebs_tracer *pebs_master; | ||
254 | |||
255 | /* Use count: */ | ||
256 | unsigned long count; | ||
257 | |||
258 | /* Pointer to the context pointer field: */ | ||
259 | struct ds_context **this; | ||
260 | |||
261 | /* The traced task; NULL for cpu tracing: */ | ||
262 | struct task_struct *task; | ||
263 | |||
264 | /* The traced cpu; only valid if task is NULL: */ | ||
265 | int cpu; | ||
266 | }; | ||
267 | |||
268 | static DEFINE_PER_CPU(struct ds_context *, cpu_ds_context); | ||
269 | |||
270 | |||
271 | static struct ds_context *ds_get_context(struct task_struct *task, int cpu) | ||
272 | { | ||
273 | struct ds_context **p_context = | ||
274 | (task ? &task->thread.ds_ctx : &per_cpu(cpu_ds_context, cpu)); | ||
275 | struct ds_context *context = NULL; | ||
276 | struct ds_context *new_context = NULL; | ||
277 | |||
278 | /* Chances are small that we already have a context. */ | ||
279 | new_context = kzalloc(sizeof(*new_context), GFP_KERNEL); | ||
280 | if (!new_context) | ||
281 | return NULL; | ||
282 | |||
283 | spin_lock_irq(&ds_lock); | ||
284 | |||
285 | context = *p_context; | ||
286 | if (likely(!context)) { | ||
287 | context = new_context; | ||
288 | |||
289 | context->this = p_context; | ||
290 | context->task = task; | ||
291 | context->cpu = cpu; | ||
292 | context->count = 0; | ||
293 | |||
294 | *p_context = context; | ||
295 | } | ||
296 | |||
297 | context->count++; | ||
298 | |||
299 | spin_unlock_irq(&ds_lock); | ||
300 | |||
301 | if (context != new_context) | ||
302 | kfree(new_context); | ||
303 | |||
304 | return context; | ||
305 | } | ||
306 | |||
307 | static void ds_put_context(struct ds_context *context) | ||
308 | { | ||
309 | struct task_struct *task; | ||
310 | unsigned long irq; | ||
311 | |||
312 | if (!context) | ||
313 | return; | ||
314 | |||
315 | spin_lock_irqsave(&ds_lock, irq); | ||
316 | |||
317 | if (--context->count) { | ||
318 | spin_unlock_irqrestore(&ds_lock, irq); | ||
319 | return; | ||
320 | } | ||
321 | |||
322 | *(context->this) = NULL; | ||
323 | |||
324 | task = context->task; | ||
325 | |||
326 | if (task) | ||
327 | clear_tsk_thread_flag(task, TIF_DS_AREA_MSR); | ||
328 | |||
329 | /* | ||
330 | * We leave the (now dangling) pointer to the DS configuration in | ||
331 | * the DS_AREA msr. This is as good or as bad as replacing it with | ||
332 | * NULL - the hardware would crash if we enabled tracing. | ||
333 | * | ||
334 | * This saves us some problems with having to write an msr on a | ||
335 | * different cpu while preventing others from doing the same for the | ||
336 | * next context for that same cpu. | ||
337 | */ | ||
338 | |||
339 | spin_unlock_irqrestore(&ds_lock, irq); | ||
340 | |||
341 | /* The context might still be in use for context switching. */ | ||
342 | if (task && (task != current)) | ||
343 | wait_task_context_switch(task); | ||
344 | |||
345 | kfree(context); | ||
346 | } | ||
347 | |||
348 | static void ds_install_ds_area(struct ds_context *context) | ||
349 | { | ||
350 | unsigned long ds; | ||
351 | |||
352 | ds = (unsigned long)context->ds; | ||
353 | |||
354 | /* | ||
355 | * There is a race between the bts master and the pebs master. | ||
356 | * | ||
357 | * The thread/cpu access is synchronized via get/put_cpu() for | ||
358 | * task tracing and via wrmsr_on_cpu for cpu tracing. | ||
359 | * | ||
360 | * If bts and pebs are collected for the same task or same cpu, | ||
361 | * the same confiuration is written twice. | ||
362 | */ | ||
363 | if (context->task) { | ||
364 | get_cpu(); | ||
365 | if (context->task == current) | ||
366 | wrmsrl(MSR_IA32_DS_AREA, ds); | ||
367 | set_tsk_thread_flag(context->task, TIF_DS_AREA_MSR); | ||
368 | put_cpu(); | ||
369 | } else | ||
370 | wrmsr_on_cpu(context->cpu, MSR_IA32_DS_AREA, | ||
371 | (u32)((u64)ds), (u32)((u64)ds >> 32)); | ||
372 | } | ||
373 | |||
374 | /* | ||
375 | * Call the tracer's callback on a buffer overflow. | ||
376 | * | ||
377 | * context: the ds context | ||
378 | * qual: the buffer type | ||
379 | */ | ||
380 | static void ds_overflow(struct ds_context *context, enum ds_qualifier qual) | ||
381 | { | ||
382 | switch (qual) { | ||
383 | case ds_bts: | ||
384 | if (context->bts_master && | ||
385 | context->bts_master->ovfl) | ||
386 | context->bts_master->ovfl(context->bts_master); | ||
387 | break; | ||
388 | case ds_pebs: | ||
389 | if (context->pebs_master && | ||
390 | context->pebs_master->ovfl) | ||
391 | context->pebs_master->ovfl(context->pebs_master); | ||
392 | break; | ||
393 | } | ||
394 | } | ||
395 | |||
396 | |||
397 | /* | ||
398 | * Write raw data into the BTS or PEBS buffer. | ||
399 | * | ||
400 | * The remainder of any partially written record is zeroed out. | ||
401 | * | ||
402 | * context: the DS context | ||
403 | * qual: the buffer type | ||
404 | * record: the data to write | ||
405 | * size: the size of the data | ||
406 | */ | ||
407 | static int ds_write(struct ds_context *context, enum ds_qualifier qual, | ||
408 | const void *record, size_t size) | ||
409 | { | ||
410 | int bytes_written = 0; | ||
411 | |||
412 | if (!record) | ||
413 | return -EINVAL; | ||
414 | |||
415 | while (size) { | ||
416 | unsigned long base, index, end, write_end, int_th; | ||
417 | unsigned long write_size, adj_write_size; | ||
418 | |||
419 | /* | ||
420 | * Write as much as possible without producing an | ||
421 | * overflow interrupt. | ||
422 | * | ||
423 | * Interrupt_threshold must either be | ||
424 | * - bigger than absolute_maximum or | ||
425 | * - point to a record between buffer_base and absolute_maximum | ||
426 | * | ||
427 | * Index points to a valid record. | ||
428 | */ | ||
429 | base = ds_get(context->ds, qual, ds_buffer_base); | ||
430 | index = ds_get(context->ds, qual, ds_index); | ||
431 | end = ds_get(context->ds, qual, ds_absolute_maximum); | ||
432 | int_th = ds_get(context->ds, qual, ds_interrupt_threshold); | ||
433 | |||
434 | write_end = min(end, int_th); | ||
435 | |||
436 | /* | ||
437 | * If we are already beyond the interrupt threshold, | ||
438 | * we fill the entire buffer. | ||
439 | */ | ||
440 | if (write_end <= index) | ||
441 | write_end = end; | ||
442 | |||
443 | if (write_end <= index) | ||
444 | break; | ||
445 | |||
446 | write_size = min((unsigned long) size, write_end - index); | ||
447 | memcpy((void *)index, record, write_size); | ||
448 | |||
449 | record = (const char *)record + write_size; | ||
450 | size -= write_size; | ||
451 | bytes_written += write_size; | ||
452 | |||
453 | adj_write_size = write_size / ds_cfg.sizeof_rec[qual]; | ||
454 | adj_write_size *= ds_cfg.sizeof_rec[qual]; | ||
455 | |||
456 | /* Zero out trailing bytes. */ | ||
457 | memset((char *)index + write_size, 0, | ||
458 | adj_write_size - write_size); | ||
459 | index += adj_write_size; | ||
460 | |||
461 | if (index >= end) | ||
462 | index = base; | ||
463 | ds_set(context->ds, qual, ds_index, index); | ||
464 | |||
465 | if (index >= int_th) | ||
466 | ds_overflow(context, qual); | ||
467 | } | ||
468 | |||
469 | return bytes_written; | ||
470 | } | ||
471 | |||
472 | |||
473 | /* | ||
474 | * Branch Trace Store (BTS) uses the following format. Different | ||
475 | * architectures vary in the size of those fields. | ||
476 | * - source linear address | ||
477 | * - destination linear address | ||
478 | * - flags | ||
479 | * | ||
480 | * Later architectures use 64bit pointers throughout, whereas earlier | ||
481 | * architectures use 32bit pointers in 32bit mode. | ||
482 | * | ||
483 | * We compute the base address for the fields based on: | ||
484 | * - the field size stored in the DS configuration | ||
485 | * - the relative field position | ||
486 | * | ||
487 | * In order to store additional information in the BTS buffer, we use | ||
488 | * a special source address to indicate that the record requires | ||
489 | * special interpretation. | ||
490 | * | ||
491 | * Netburst indicated via a bit in the flags field whether the branch | ||
492 | * was predicted; this is ignored. | ||
493 | * | ||
494 | * We use two levels of abstraction: | ||
495 | * - the raw data level defined here | ||
496 | * - an arch-independent level defined in ds.h | ||
497 | */ | ||
498 | |||
499 | enum bts_field { | ||
500 | bts_from, | ||
501 | bts_to, | ||
502 | bts_flags, | ||
503 | |||
504 | bts_qual = bts_from, | ||
505 | bts_clock = bts_to, | ||
506 | bts_pid = bts_flags, | ||
507 | |||
508 | bts_qual_mask = (bts_qual_max - 1), | ||
509 | bts_escape = ((unsigned long)-1 & ~bts_qual_mask) | ||
510 | }; | ||
511 | |||
512 | static inline unsigned long bts_get(const char *base, unsigned long field) | ||
513 | { | ||
514 | base += (ds_cfg.sizeof_ptr_field * field); | ||
515 | return *(unsigned long *)base; | ||
516 | } | ||
517 | |||
518 | static inline void bts_set(char *base, unsigned long field, unsigned long val) | ||
519 | { | ||
520 | base += (ds_cfg.sizeof_ptr_field * field); | ||
521 | (*(unsigned long *)base) = val; | ||
522 | } | ||
523 | |||
524 | |||
525 | /* | ||
526 | * The raw BTS data is architecture dependent. | ||
527 | * | ||
528 | * For higher-level users, we give an arch-independent view. | ||
529 | * - ds.h defines struct bts_struct | ||
530 | * - bts_read translates one raw bts record into a bts_struct | ||
531 | * - bts_write translates one bts_struct into the raw format and | ||
532 | * writes it into the top of the parameter tracer's buffer. | ||
533 | * | ||
534 | * return: bytes read/written on success; -Eerrno, otherwise | ||
535 | */ | ||
536 | static int | ||
537 | bts_read(struct bts_tracer *tracer, const void *at, struct bts_struct *out) | ||
538 | { | ||
539 | if (!tracer) | ||
540 | return -EINVAL; | ||
541 | |||
542 | if (at < tracer->trace.ds.begin) | ||
543 | return -EINVAL; | ||
544 | |||
545 | if (tracer->trace.ds.end < (at + tracer->trace.ds.size)) | ||
546 | return -EINVAL; | ||
547 | |||
548 | memset(out, 0, sizeof(*out)); | ||
549 | if ((bts_get(at, bts_qual) & ~bts_qual_mask) == bts_escape) { | ||
550 | out->qualifier = (bts_get(at, bts_qual) & bts_qual_mask); | ||
551 | out->variant.event.clock = bts_get(at, bts_clock); | ||
552 | out->variant.event.pid = bts_get(at, bts_pid); | ||
553 | } else { | ||
554 | out->qualifier = bts_branch; | ||
555 | out->variant.lbr.from = bts_get(at, bts_from); | ||
556 | out->variant.lbr.to = bts_get(at, bts_to); | ||
557 | |||
558 | if (!out->variant.lbr.from && !out->variant.lbr.to) | ||
559 | out->qualifier = bts_invalid; | ||
560 | } | ||
561 | |||
562 | return ds_cfg.sizeof_rec[ds_bts]; | ||
563 | } | ||
564 | |||
565 | static int bts_write(struct bts_tracer *tracer, const struct bts_struct *in) | ||
566 | { | ||
567 | unsigned char raw[MAX_SIZEOF_BTS]; | ||
568 | |||
569 | if (!tracer) | ||
570 | return -EINVAL; | ||
571 | |||
572 | if (MAX_SIZEOF_BTS < ds_cfg.sizeof_rec[ds_bts]) | ||
573 | return -EOVERFLOW; | ||
574 | |||
575 | switch (in->qualifier) { | ||
576 | case bts_invalid: | ||
577 | bts_set(raw, bts_from, 0); | ||
578 | bts_set(raw, bts_to, 0); | ||
579 | bts_set(raw, bts_flags, 0); | ||
580 | break; | ||
581 | case bts_branch: | ||
582 | bts_set(raw, bts_from, in->variant.lbr.from); | ||
583 | bts_set(raw, bts_to, in->variant.lbr.to); | ||
584 | bts_set(raw, bts_flags, 0); | ||
585 | break; | ||
586 | case bts_task_arrives: | ||
587 | case bts_task_departs: | ||
588 | bts_set(raw, bts_qual, (bts_escape | in->qualifier)); | ||
589 | bts_set(raw, bts_clock, in->variant.event.clock); | ||
590 | bts_set(raw, bts_pid, in->variant.event.pid); | ||
591 | break; | ||
592 | default: | ||
593 | return -EINVAL; | ||
594 | } | ||
595 | |||
596 | return ds_write(tracer->ds.context, ds_bts, raw, | ||
597 | ds_cfg.sizeof_rec[ds_bts]); | ||
598 | } | ||
599 | |||
600 | |||
601 | static void ds_write_config(struct ds_context *context, | ||
602 | struct ds_trace *cfg, enum ds_qualifier qual) | ||
603 | { | ||
604 | unsigned char *ds = context->ds; | ||
605 | |||
606 | ds_set(ds, qual, ds_buffer_base, (unsigned long)cfg->begin); | ||
607 | ds_set(ds, qual, ds_index, (unsigned long)cfg->top); | ||
608 | ds_set(ds, qual, ds_absolute_maximum, (unsigned long)cfg->end); | ||
609 | ds_set(ds, qual, ds_interrupt_threshold, (unsigned long)cfg->ith); | ||
610 | } | ||
611 | |||
612 | static void ds_read_config(struct ds_context *context, | ||
613 | struct ds_trace *cfg, enum ds_qualifier qual) | ||
614 | { | ||
615 | unsigned char *ds = context->ds; | ||
616 | |||
617 | cfg->begin = (void *)ds_get(ds, qual, ds_buffer_base); | ||
618 | cfg->top = (void *)ds_get(ds, qual, ds_index); | ||
619 | cfg->end = (void *)ds_get(ds, qual, ds_absolute_maximum); | ||
620 | cfg->ith = (void *)ds_get(ds, qual, ds_interrupt_threshold); | ||
621 | } | ||
622 | |||
623 | static void ds_init_ds_trace(struct ds_trace *trace, enum ds_qualifier qual, | ||
624 | void *base, size_t size, size_t ith, | ||
625 | unsigned int flags) { | ||
626 | unsigned long buffer, adj; | ||
627 | |||
628 | /* | ||
629 | * Adjust the buffer address and size to meet alignment | ||
630 | * constraints: | ||
631 | * - buffer is double-word aligned | ||
632 | * - size is multiple of record size | ||
633 | * | ||
634 | * We checked the size at the very beginning; we have enough | ||
635 | * space to do the adjustment. | ||
636 | */ | ||
637 | buffer = (unsigned long)base; | ||
638 | |||
639 | adj = ALIGN(buffer, DS_ALIGNMENT) - buffer; | ||
640 | buffer += adj; | ||
641 | size -= adj; | ||
642 | |||
643 | trace->n = size / ds_cfg.sizeof_rec[qual]; | ||
644 | trace->size = ds_cfg.sizeof_rec[qual]; | ||
645 | |||
646 | size = (trace->n * trace->size); | ||
647 | |||
648 | trace->begin = (void *)buffer; | ||
649 | trace->top = trace->begin; | ||
650 | trace->end = (void *)(buffer + size); | ||
651 | /* | ||
652 | * The value for 'no threshold' is -1, which will set the | ||
653 | * threshold outside of the buffer, just like we want it. | ||
654 | */ | ||
655 | ith *= ds_cfg.sizeof_rec[qual]; | ||
656 | trace->ith = (void *)(buffer + size - ith); | ||
657 | |||
658 | trace->flags = flags; | ||
659 | } | ||
660 | |||
661 | |||
662 | static int ds_request(struct ds_tracer *tracer, struct ds_trace *trace, | ||
663 | enum ds_qualifier qual, struct task_struct *task, | ||
664 | int cpu, void *base, size_t size, size_t th) | ||
665 | { | ||
666 | struct ds_context *context; | ||
667 | int error; | ||
668 | size_t req_size; | ||
669 | |||
670 | error = -EOPNOTSUPP; | ||
671 | if (!ds_cfg.sizeof_rec[qual]) | ||
672 | goto out; | ||
673 | |||
674 | error = -EINVAL; | ||
675 | if (!base) | ||
676 | goto out; | ||
677 | |||
678 | req_size = ds_cfg.sizeof_rec[qual]; | ||
679 | /* We might need space for alignment adjustments. */ | ||
680 | if (!IS_ALIGNED((unsigned long)base, DS_ALIGNMENT)) | ||
681 | req_size += DS_ALIGNMENT; | ||
682 | |||
683 | error = -EINVAL; | ||
684 | if (size < req_size) | ||
685 | goto out; | ||
686 | |||
687 | if (th != (size_t)-1) { | ||
688 | th *= ds_cfg.sizeof_rec[qual]; | ||
689 | |||
690 | error = -EINVAL; | ||
691 | if (size <= th) | ||
692 | goto out; | ||
693 | } | ||
694 | |||
695 | tracer->buffer = base; | ||
696 | tracer->size = size; | ||
697 | |||
698 | error = -ENOMEM; | ||
699 | context = ds_get_context(task, cpu); | ||
700 | if (!context) | ||
701 | goto out; | ||
702 | tracer->context = context; | ||
703 | |||
704 | /* | ||
705 | * Defer any tracer-specific initialization work for the context until | ||
706 | * context ownership has been clarified. | ||
707 | */ | ||
708 | |||
709 | error = 0; | ||
710 | out: | ||
711 | return error; | ||
712 | } | ||
713 | |||
714 | static struct bts_tracer *ds_request_bts(struct task_struct *task, int cpu, | ||
715 | void *base, size_t size, | ||
716 | bts_ovfl_callback_t ovfl, size_t th, | ||
717 | unsigned int flags) | ||
718 | { | ||
719 | struct bts_tracer *tracer; | ||
720 | int error; | ||
721 | |||
722 | /* Buffer overflow notification is not yet implemented. */ | ||
723 | error = -EOPNOTSUPP; | ||
724 | if (ovfl) | ||
725 | goto out; | ||
726 | |||
727 | error = get_tracer(task); | ||
728 | if (error < 0) | ||
729 | goto out; | ||
730 | |||
731 | error = -ENOMEM; | ||
732 | tracer = kzalloc(sizeof(*tracer), GFP_KERNEL); | ||
733 | if (!tracer) | ||
734 | goto out_put_tracer; | ||
735 | tracer->ovfl = ovfl; | ||
736 | |||
737 | /* Do some more error checking and acquire a tracing context. */ | ||
738 | error = ds_request(&tracer->ds, &tracer->trace.ds, | ||
739 | ds_bts, task, cpu, base, size, th); | ||
740 | if (error < 0) | ||
741 | goto out_tracer; | ||
742 | |||
743 | /* Claim the bts part of the tracing context we acquired above. */ | ||
744 | spin_lock_irq(&ds_lock); | ||
745 | |||
746 | error = -EPERM; | ||
747 | if (tracer->ds.context->bts_master) | ||
748 | goto out_unlock; | ||
749 | tracer->ds.context->bts_master = tracer; | ||
750 | |||
751 | spin_unlock_irq(&ds_lock); | ||
752 | |||
753 | /* | ||
754 | * Now that we own the bts part of the context, let's complete the | ||
755 | * initialization for that part. | ||
756 | */ | ||
757 | ds_init_ds_trace(&tracer->trace.ds, ds_bts, base, size, th, flags); | ||
758 | ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_bts); | ||
759 | ds_install_ds_area(tracer->ds.context); | ||
760 | |||
761 | tracer->trace.read = bts_read; | ||
762 | tracer->trace.write = bts_write; | ||
763 | |||
764 | /* Start tracing. */ | ||
765 | ds_resume_bts(tracer); | ||
766 | |||
767 | return tracer; | ||
768 | |||
769 | out_unlock: | ||
770 | spin_unlock_irq(&ds_lock); | ||
771 | ds_put_context(tracer->ds.context); | ||
772 | out_tracer: | ||
773 | kfree(tracer); | ||
774 | out_put_tracer: | ||
775 | put_tracer(task); | ||
776 | out: | ||
777 | return ERR_PTR(error); | ||
778 | } | ||
779 | |||
780 | struct bts_tracer *ds_request_bts_task(struct task_struct *task, | ||
781 | void *base, size_t size, | ||
782 | bts_ovfl_callback_t ovfl, | ||
783 | size_t th, unsigned int flags) | ||
784 | { | ||
785 | return ds_request_bts(task, 0, base, size, ovfl, th, flags); | ||
786 | } | ||
787 | |||
788 | struct bts_tracer *ds_request_bts_cpu(int cpu, void *base, size_t size, | ||
789 | bts_ovfl_callback_t ovfl, | ||
790 | size_t th, unsigned int flags) | ||
791 | { | ||
792 | return ds_request_bts(NULL, cpu, base, size, ovfl, th, flags); | ||
793 | } | ||
794 | |||
795 | static struct pebs_tracer *ds_request_pebs(struct task_struct *task, int cpu, | ||
796 | void *base, size_t size, | ||
797 | pebs_ovfl_callback_t ovfl, size_t th, | ||
798 | unsigned int flags) | ||
799 | { | ||
800 | struct pebs_tracer *tracer; | ||
801 | int error; | ||
802 | |||
803 | /* Buffer overflow notification is not yet implemented. */ | ||
804 | error = -EOPNOTSUPP; | ||
805 | if (ovfl) | ||
806 | goto out; | ||
807 | |||
808 | error = get_tracer(task); | ||
809 | if (error < 0) | ||
810 | goto out; | ||
811 | |||
812 | error = -ENOMEM; | ||
813 | tracer = kzalloc(sizeof(*tracer), GFP_KERNEL); | ||
814 | if (!tracer) | ||
815 | goto out_put_tracer; | ||
816 | tracer->ovfl = ovfl; | ||
817 | |||
818 | /* Do some more error checking and acquire a tracing context. */ | ||
819 | error = ds_request(&tracer->ds, &tracer->trace.ds, | ||
820 | ds_pebs, task, cpu, base, size, th); | ||
821 | if (error < 0) | ||
822 | goto out_tracer; | ||
823 | |||
824 | /* Claim the pebs part of the tracing context we acquired above. */ | ||
825 | spin_lock_irq(&ds_lock); | ||
826 | |||
827 | error = -EPERM; | ||
828 | if (tracer->ds.context->pebs_master) | ||
829 | goto out_unlock; | ||
830 | tracer->ds.context->pebs_master = tracer; | ||
831 | |||
832 | spin_unlock_irq(&ds_lock); | ||
833 | |||
834 | /* | ||
835 | * Now that we own the pebs part of the context, let's complete the | ||
836 | * initialization for that part. | ||
837 | */ | ||
838 | ds_init_ds_trace(&tracer->trace.ds, ds_pebs, base, size, th, flags); | ||
839 | ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_pebs); | ||
840 | ds_install_ds_area(tracer->ds.context); | ||
841 | |||
842 | /* Start tracing. */ | ||
843 | ds_resume_pebs(tracer); | ||
844 | |||
845 | return tracer; | ||
846 | |||
847 | out_unlock: | ||
848 | spin_unlock_irq(&ds_lock); | ||
849 | ds_put_context(tracer->ds.context); | ||
850 | out_tracer: | ||
851 | kfree(tracer); | ||
852 | out_put_tracer: | ||
853 | put_tracer(task); | ||
854 | out: | ||
855 | return ERR_PTR(error); | ||
856 | } | ||
857 | |||
858 | struct pebs_tracer *ds_request_pebs_task(struct task_struct *task, | ||
859 | void *base, size_t size, | ||
860 | pebs_ovfl_callback_t ovfl, | ||
861 | size_t th, unsigned int flags) | ||
862 | { | ||
863 | return ds_request_pebs(task, 0, base, size, ovfl, th, flags); | ||
864 | } | ||
865 | |||
866 | struct pebs_tracer *ds_request_pebs_cpu(int cpu, void *base, size_t size, | ||
867 | pebs_ovfl_callback_t ovfl, | ||
868 | size_t th, unsigned int flags) | ||
869 | { | ||
870 | return ds_request_pebs(NULL, cpu, base, size, ovfl, th, flags); | ||
871 | } | ||
872 | |||
873 | static void ds_free_bts(struct bts_tracer *tracer) | ||
874 | { | ||
875 | struct task_struct *task; | ||
876 | |||
877 | task = tracer->ds.context->task; | ||
878 | |||
879 | WARN_ON_ONCE(tracer->ds.context->bts_master != tracer); | ||
880 | tracer->ds.context->bts_master = NULL; | ||
881 | |||
882 | /* Make sure tracing stopped and the tracer is not in use. */ | ||
883 | if (task && (task != current)) | ||
884 | wait_task_context_switch(task); | ||
885 | |||
886 | ds_put_context(tracer->ds.context); | ||
887 | put_tracer(task); | ||
888 | |||
889 | kfree(tracer); | ||
890 | } | ||
891 | |||
892 | void ds_release_bts(struct bts_tracer *tracer) | ||
893 | { | ||
894 | might_sleep(); | ||
895 | |||
896 | if (!tracer) | ||
897 | return; | ||
898 | |||
899 | ds_suspend_bts(tracer); | ||
900 | ds_free_bts(tracer); | ||
901 | } | ||
902 | |||
903 | int ds_release_bts_noirq(struct bts_tracer *tracer) | ||
904 | { | ||
905 | struct task_struct *task; | ||
906 | unsigned long irq; | ||
907 | int error; | ||
908 | |||
909 | if (!tracer) | ||
910 | return 0; | ||
911 | |||
912 | task = tracer->ds.context->task; | ||
913 | |||
914 | local_irq_save(irq); | ||
915 | |||
916 | error = -EPERM; | ||
917 | if (!task && | ||
918 | (tracer->ds.context->cpu != smp_processor_id())) | ||
919 | goto out; | ||
920 | |||
921 | error = -EPERM; | ||
922 | if (task && (task != current)) | ||
923 | goto out; | ||
924 | |||
925 | ds_suspend_bts_noirq(tracer); | ||
926 | ds_free_bts(tracer); | ||
927 | |||
928 | error = 0; | ||
929 | out: | ||
930 | local_irq_restore(irq); | ||
931 | return error; | ||
932 | } | ||
933 | |||
934 | static void update_task_debugctlmsr(struct task_struct *task, | ||
935 | unsigned long debugctlmsr) | ||
936 | { | ||
937 | task->thread.debugctlmsr = debugctlmsr; | ||
938 | |||
939 | get_cpu(); | ||
940 | if (task == current) | ||
941 | update_debugctlmsr(debugctlmsr); | ||
942 | put_cpu(); | ||
943 | } | ||
944 | |||
945 | void ds_suspend_bts(struct bts_tracer *tracer) | ||
946 | { | ||
947 | struct task_struct *task; | ||
948 | unsigned long debugctlmsr; | ||
949 | int cpu; | ||
950 | |||
951 | if (!tracer) | ||
952 | return; | ||
953 | |||
954 | tracer->flags = 0; | ||
955 | |||
956 | task = tracer->ds.context->task; | ||
957 | cpu = tracer->ds.context->cpu; | ||
958 | |||
959 | WARN_ON(!task && irqs_disabled()); | ||
960 | |||
961 | debugctlmsr = (task ? | ||
962 | task->thread.debugctlmsr : | ||
963 | get_debugctlmsr_on_cpu(cpu)); | ||
964 | debugctlmsr &= ~BTS_CONTROL; | ||
965 | |||
966 | if (task) | ||
967 | update_task_debugctlmsr(task, debugctlmsr); | ||
968 | else | ||
969 | update_debugctlmsr_on_cpu(cpu, debugctlmsr); | ||
970 | } | ||
971 | |||
972 | int ds_suspend_bts_noirq(struct bts_tracer *tracer) | ||
973 | { | ||
974 | struct task_struct *task; | ||
975 | unsigned long debugctlmsr, irq; | ||
976 | int cpu, error = 0; | ||
977 | |||
978 | if (!tracer) | ||
979 | return 0; | ||
980 | |||
981 | tracer->flags = 0; | ||
982 | |||
983 | task = tracer->ds.context->task; | ||
984 | cpu = tracer->ds.context->cpu; | ||
985 | |||
986 | local_irq_save(irq); | ||
987 | |||
988 | error = -EPERM; | ||
989 | if (!task && (cpu != smp_processor_id())) | ||
990 | goto out; | ||
991 | |||
992 | debugctlmsr = (task ? | ||
993 | task->thread.debugctlmsr : | ||
994 | get_debugctlmsr()); | ||
995 | debugctlmsr &= ~BTS_CONTROL; | ||
996 | |||
997 | if (task) | ||
998 | update_task_debugctlmsr(task, debugctlmsr); | ||
999 | else | ||
1000 | update_debugctlmsr(debugctlmsr); | ||
1001 | |||
1002 | error = 0; | ||
1003 | out: | ||
1004 | local_irq_restore(irq); | ||
1005 | return error; | ||
1006 | } | ||
1007 | |||
1008 | static unsigned long ds_bts_control(struct bts_tracer *tracer) | ||
1009 | { | ||
1010 | unsigned long control; | ||
1011 | |||
1012 | control = ds_cfg.ctl[dsf_bts]; | ||
1013 | if (!(tracer->trace.ds.flags & BTS_KERNEL)) | ||
1014 | control |= ds_cfg.ctl[dsf_bts_kernel]; | ||
1015 | if (!(tracer->trace.ds.flags & BTS_USER)) | ||
1016 | control |= ds_cfg.ctl[dsf_bts_user]; | ||
1017 | |||
1018 | return control; | ||
1019 | } | ||
1020 | |||
1021 | void ds_resume_bts(struct bts_tracer *tracer) | ||
1022 | { | ||
1023 | struct task_struct *task; | ||
1024 | unsigned long debugctlmsr; | ||
1025 | int cpu; | ||
1026 | |||
1027 | if (!tracer) | ||
1028 | return; | ||
1029 | |||
1030 | tracer->flags = tracer->trace.ds.flags; | ||
1031 | |||
1032 | task = tracer->ds.context->task; | ||
1033 | cpu = tracer->ds.context->cpu; | ||
1034 | |||
1035 | WARN_ON(!task && irqs_disabled()); | ||
1036 | |||
1037 | debugctlmsr = (task ? | ||
1038 | task->thread.debugctlmsr : | ||
1039 | get_debugctlmsr_on_cpu(cpu)); | ||
1040 | debugctlmsr |= ds_bts_control(tracer); | ||
1041 | |||
1042 | if (task) | ||
1043 | update_task_debugctlmsr(task, debugctlmsr); | ||
1044 | else | ||
1045 | update_debugctlmsr_on_cpu(cpu, debugctlmsr); | ||
1046 | } | ||
1047 | |||
1048 | int ds_resume_bts_noirq(struct bts_tracer *tracer) | ||
1049 | { | ||
1050 | struct task_struct *task; | ||
1051 | unsigned long debugctlmsr, irq; | ||
1052 | int cpu, error = 0; | ||
1053 | |||
1054 | if (!tracer) | ||
1055 | return 0; | ||
1056 | |||
1057 | tracer->flags = tracer->trace.ds.flags; | ||
1058 | |||
1059 | task = tracer->ds.context->task; | ||
1060 | cpu = tracer->ds.context->cpu; | ||
1061 | |||
1062 | local_irq_save(irq); | ||
1063 | |||
1064 | error = -EPERM; | ||
1065 | if (!task && (cpu != smp_processor_id())) | ||
1066 | goto out; | ||
1067 | |||
1068 | debugctlmsr = (task ? | ||
1069 | task->thread.debugctlmsr : | ||
1070 | get_debugctlmsr()); | ||
1071 | debugctlmsr |= ds_bts_control(tracer); | ||
1072 | |||
1073 | if (task) | ||
1074 | update_task_debugctlmsr(task, debugctlmsr); | ||
1075 | else | ||
1076 | update_debugctlmsr(debugctlmsr); | ||
1077 | |||
1078 | error = 0; | ||
1079 | out: | ||
1080 | local_irq_restore(irq); | ||
1081 | return error; | ||
1082 | } | ||
1083 | |||
1084 | static void ds_free_pebs(struct pebs_tracer *tracer) | ||
1085 | { | ||
1086 | struct task_struct *task; | ||
1087 | |||
1088 | task = tracer->ds.context->task; | ||
1089 | |||
1090 | WARN_ON_ONCE(tracer->ds.context->pebs_master != tracer); | ||
1091 | tracer->ds.context->pebs_master = NULL; | ||
1092 | |||
1093 | ds_put_context(tracer->ds.context); | ||
1094 | put_tracer(task); | ||
1095 | |||
1096 | kfree(tracer); | ||
1097 | } | ||
1098 | |||
1099 | void ds_release_pebs(struct pebs_tracer *tracer) | ||
1100 | { | ||
1101 | might_sleep(); | ||
1102 | |||
1103 | if (!tracer) | ||
1104 | return; | ||
1105 | |||
1106 | ds_suspend_pebs(tracer); | ||
1107 | ds_free_pebs(tracer); | ||
1108 | } | ||
1109 | |||
1110 | int ds_release_pebs_noirq(struct pebs_tracer *tracer) | ||
1111 | { | ||
1112 | struct task_struct *task; | ||
1113 | unsigned long irq; | ||
1114 | int error; | ||
1115 | |||
1116 | if (!tracer) | ||
1117 | return 0; | ||
1118 | |||
1119 | task = tracer->ds.context->task; | ||
1120 | |||
1121 | local_irq_save(irq); | ||
1122 | |||
1123 | error = -EPERM; | ||
1124 | if (!task && | ||
1125 | (tracer->ds.context->cpu != smp_processor_id())) | ||
1126 | goto out; | ||
1127 | |||
1128 | error = -EPERM; | ||
1129 | if (task && (task != current)) | ||
1130 | goto out; | ||
1131 | |||
1132 | ds_suspend_pebs_noirq(tracer); | ||
1133 | ds_free_pebs(tracer); | ||
1134 | |||
1135 | error = 0; | ||
1136 | out: | ||
1137 | local_irq_restore(irq); | ||
1138 | return error; | ||
1139 | } | ||
1140 | |||
1141 | void ds_suspend_pebs(struct pebs_tracer *tracer) | ||
1142 | { | ||
1143 | |||
1144 | } | ||
1145 | |||
1146 | int ds_suspend_pebs_noirq(struct pebs_tracer *tracer) | ||
1147 | { | ||
1148 | return 0; | ||
1149 | } | ||
1150 | |||
1151 | void ds_resume_pebs(struct pebs_tracer *tracer) | ||
1152 | { | ||
1153 | |||
1154 | } | ||
1155 | |||
1156 | int ds_resume_pebs_noirq(struct pebs_tracer *tracer) | ||
1157 | { | ||
1158 | return 0; | ||
1159 | } | ||
1160 | |||
1161 | const struct bts_trace *ds_read_bts(struct bts_tracer *tracer) | ||
1162 | { | ||
1163 | if (!tracer) | ||
1164 | return NULL; | ||
1165 | |||
1166 | ds_read_config(tracer->ds.context, &tracer->trace.ds, ds_bts); | ||
1167 | return &tracer->trace; | ||
1168 | } | ||
1169 | |||
1170 | const struct pebs_trace *ds_read_pebs(struct pebs_tracer *tracer) | ||
1171 | { | ||
1172 | if (!tracer) | ||
1173 | return NULL; | ||
1174 | |||
1175 | ds_read_config(tracer->ds.context, &tracer->trace.ds, ds_pebs); | ||
1176 | |||
1177 | tracer->trace.counters = ds_cfg.nr_counter_reset; | ||
1178 | memcpy(tracer->trace.counter_reset, | ||
1179 | tracer->ds.context->ds + | ||
1180 | (NUM_DS_PTR_FIELDS * ds_cfg.sizeof_ptr_field), | ||
1181 | ds_cfg.nr_counter_reset * PEBS_RESET_FIELD_SIZE); | ||
1182 | |||
1183 | return &tracer->trace; | ||
1184 | } | ||
1185 | |||
1186 | int ds_reset_bts(struct bts_tracer *tracer) | ||
1187 | { | ||
1188 | if (!tracer) | ||
1189 | return -EINVAL; | ||
1190 | |||
1191 | tracer->trace.ds.top = tracer->trace.ds.begin; | ||
1192 | |||
1193 | ds_set(tracer->ds.context->ds, ds_bts, ds_index, | ||
1194 | (unsigned long)tracer->trace.ds.top); | ||
1195 | |||
1196 | return 0; | ||
1197 | } | ||
1198 | |||
1199 | int ds_reset_pebs(struct pebs_tracer *tracer) | ||
1200 | { | ||
1201 | if (!tracer) | ||
1202 | return -EINVAL; | ||
1203 | |||
1204 | tracer->trace.ds.top = tracer->trace.ds.begin; | ||
1205 | |||
1206 | ds_set(tracer->ds.context->ds, ds_pebs, ds_index, | ||
1207 | (unsigned long)tracer->trace.ds.top); | ||
1208 | |||
1209 | return 0; | ||
1210 | } | ||
1211 | |||
1212 | int ds_set_pebs_reset(struct pebs_tracer *tracer, | ||
1213 | unsigned int counter, u64 value) | ||
1214 | { | ||
1215 | if (!tracer) | ||
1216 | return -EINVAL; | ||
1217 | |||
1218 | if (ds_cfg.nr_counter_reset < counter) | ||
1219 | return -EINVAL; | ||
1220 | |||
1221 | *(u64 *)(tracer->ds.context->ds + | ||
1222 | (NUM_DS_PTR_FIELDS * ds_cfg.sizeof_ptr_field) + | ||
1223 | (counter * PEBS_RESET_FIELD_SIZE)) = value; | ||
1224 | |||
1225 | return 0; | ||
1226 | } | ||
1227 | |||
1228 | static const struct ds_configuration ds_cfg_netburst = { | ||
1229 | .name = "Netburst", | ||
1230 | .ctl[dsf_bts] = (1 << 2) | (1 << 3), | ||
1231 | .ctl[dsf_bts_kernel] = (1 << 5), | ||
1232 | .ctl[dsf_bts_user] = (1 << 6), | ||
1233 | .nr_counter_reset = 1, | ||
1234 | }; | ||
1235 | static const struct ds_configuration ds_cfg_pentium_m = { | ||
1236 | .name = "Pentium M", | ||
1237 | .ctl[dsf_bts] = (1 << 6) | (1 << 7), | ||
1238 | .nr_counter_reset = 1, | ||
1239 | }; | ||
1240 | static const struct ds_configuration ds_cfg_core2_atom = { | ||
1241 | .name = "Core 2/Atom", | ||
1242 | .ctl[dsf_bts] = (1 << 6) | (1 << 7), | ||
1243 | .ctl[dsf_bts_kernel] = (1 << 9), | ||
1244 | .ctl[dsf_bts_user] = (1 << 10), | ||
1245 | .nr_counter_reset = 1, | ||
1246 | }; | ||
1247 | static const struct ds_configuration ds_cfg_core_i7 = { | ||
1248 | .name = "Core i7", | ||
1249 | .ctl[dsf_bts] = (1 << 6) | (1 << 7), | ||
1250 | .ctl[dsf_bts_kernel] = (1 << 9), | ||
1251 | .ctl[dsf_bts_user] = (1 << 10), | ||
1252 | .nr_counter_reset = 4, | ||
1253 | }; | ||
1254 | |||
1255 | static void | ||
1256 | ds_configure(const struct ds_configuration *cfg, | ||
1257 | struct cpuinfo_x86 *cpu) | ||
1258 | { | ||
1259 | unsigned long nr_pebs_fields = 0; | ||
1260 | |||
1261 | printk(KERN_INFO "[ds] using %s configuration\n", cfg->name); | ||
1262 | |||
1263 | #ifdef __i386__ | ||
1264 | nr_pebs_fields = 10; | ||
1265 | #else | ||
1266 | nr_pebs_fields = 18; | ||
1267 | #endif | ||
1268 | |||
1269 | /* | ||
1270 | * Starting with version 2, architectural performance | ||
1271 | * monitoring supports a format specifier. | ||
1272 | */ | ||
1273 | if ((cpuid_eax(0xa) & 0xff) > 1) { | ||
1274 | unsigned long perf_capabilities, format; | ||
1275 | |||
1276 | rdmsrl(MSR_IA32_PERF_CAPABILITIES, perf_capabilities); | ||
1277 | |||
1278 | format = (perf_capabilities >> 8) & 0xf; | ||
1279 | |||
1280 | switch (format) { | ||
1281 | case 0: | ||
1282 | nr_pebs_fields = 18; | ||
1283 | break; | ||
1284 | case 1: | ||
1285 | nr_pebs_fields = 22; | ||
1286 | break; | ||
1287 | default: | ||
1288 | printk(KERN_INFO | ||
1289 | "[ds] unknown PEBS format: %lu\n", format); | ||
1290 | nr_pebs_fields = 0; | ||
1291 | break; | ||
1292 | } | ||
1293 | } | ||
1294 | |||
1295 | memset(&ds_cfg, 0, sizeof(ds_cfg)); | ||
1296 | ds_cfg = *cfg; | ||
1297 | |||
1298 | ds_cfg.sizeof_ptr_field = | ||
1299 | (cpu_has(cpu, X86_FEATURE_DTES64) ? 8 : 4); | ||
1300 | |||
1301 | ds_cfg.sizeof_rec[ds_bts] = ds_cfg.sizeof_ptr_field * 3; | ||
1302 | ds_cfg.sizeof_rec[ds_pebs] = ds_cfg.sizeof_ptr_field * nr_pebs_fields; | ||
1303 | |||
1304 | if (!cpu_has(cpu, X86_FEATURE_BTS)) { | ||
1305 | ds_cfg.sizeof_rec[ds_bts] = 0; | ||
1306 | printk(KERN_INFO "[ds] bts not available\n"); | ||
1307 | } | ||
1308 | if (!cpu_has(cpu, X86_FEATURE_PEBS)) { | ||
1309 | ds_cfg.sizeof_rec[ds_pebs] = 0; | ||
1310 | printk(KERN_INFO "[ds] pebs not available\n"); | ||
1311 | } | ||
1312 | |||
1313 | printk(KERN_INFO "[ds] sizes: address: %u bit, ", | ||
1314 | 8 * ds_cfg.sizeof_ptr_field); | ||
1315 | printk("bts/pebs record: %u/%u bytes\n", | ||
1316 | ds_cfg.sizeof_rec[ds_bts], ds_cfg.sizeof_rec[ds_pebs]); | ||
1317 | |||
1318 | WARN_ON_ONCE(MAX_PEBS_COUNTERS < ds_cfg.nr_counter_reset); | ||
1319 | } | ||
1320 | |||
1321 | void __cpuinit ds_init_intel(struct cpuinfo_x86 *c) | ||
1322 | { | ||
1323 | /* Only configure the first cpu. Others are identical. */ | ||
1324 | if (ds_cfg.name) | ||
1325 | return; | ||
1326 | |||
1327 | switch (c->x86) { | ||
1328 | case 0x6: | ||
1329 | switch (c->x86_model) { | ||
1330 | case 0x9: | ||
1331 | case 0xd: /* Pentium M */ | ||
1332 | ds_configure(&ds_cfg_pentium_m, c); | ||
1333 | break; | ||
1334 | case 0xf: | ||
1335 | case 0x17: /* Core2 */ | ||
1336 | case 0x1c: /* Atom */ | ||
1337 | ds_configure(&ds_cfg_core2_atom, c); | ||
1338 | break; | ||
1339 | case 0x1a: /* Core i7 */ | ||
1340 | ds_configure(&ds_cfg_core_i7, c); | ||
1341 | break; | ||
1342 | default: | ||
1343 | /* Sorry, don't know about them. */ | ||
1344 | break; | ||
1345 | } | ||
1346 | break; | ||
1347 | case 0xf: | ||
1348 | switch (c->x86_model) { | ||
1349 | case 0x0: | ||
1350 | case 0x1: | ||
1351 | case 0x2: /* Netburst */ | ||
1352 | ds_configure(&ds_cfg_netburst, c); | ||
1353 | break; | ||
1354 | default: | ||
1355 | /* Sorry, don't know about them. */ | ||
1356 | break; | ||
1357 | } | ||
1358 | break; | ||
1359 | default: | ||
1360 | /* Sorry, don't know about them. */ | ||
1361 | break; | ||
1362 | } | ||
1363 | } | ||
1364 | |||
1365 | static inline void ds_take_timestamp(struct ds_context *context, | ||
1366 | enum bts_qualifier qualifier, | ||
1367 | struct task_struct *task) | ||
1368 | { | ||
1369 | struct bts_tracer *tracer = context->bts_master; | ||
1370 | struct bts_struct ts; | ||
1371 | |||
1372 | /* Prevent compilers from reading the tracer pointer twice. */ | ||
1373 | barrier(); | ||
1374 | |||
1375 | if (!tracer || !(tracer->flags & BTS_TIMESTAMPS)) | ||
1376 | return; | ||
1377 | |||
1378 | memset(&ts, 0, sizeof(ts)); | ||
1379 | ts.qualifier = qualifier; | ||
1380 | ts.variant.event.clock = trace_clock_global(); | ||
1381 | ts.variant.event.pid = task->pid; | ||
1382 | |||
1383 | bts_write(tracer, &ts); | ||
1384 | } | ||
1385 | |||
1386 | /* | ||
1387 | * Change the DS configuration from tracing prev to tracing next. | ||
1388 | */ | ||
1389 | void ds_switch_to(struct task_struct *prev, struct task_struct *next) | ||
1390 | { | ||
1391 | struct ds_context *prev_ctx = prev->thread.ds_ctx; | ||
1392 | struct ds_context *next_ctx = next->thread.ds_ctx; | ||
1393 | unsigned long debugctlmsr = next->thread.debugctlmsr; | ||
1394 | |||
1395 | /* Make sure all data is read before we start. */ | ||
1396 | barrier(); | ||
1397 | |||
1398 | if (prev_ctx) { | ||
1399 | update_debugctlmsr(0); | ||
1400 | |||
1401 | ds_take_timestamp(prev_ctx, bts_task_departs, prev); | ||
1402 | } | ||
1403 | |||
1404 | if (next_ctx) { | ||
1405 | ds_take_timestamp(next_ctx, bts_task_arrives, next); | ||
1406 | |||
1407 | wrmsrl(MSR_IA32_DS_AREA, (unsigned long)next_ctx->ds); | ||
1408 | } | ||
1409 | |||
1410 | update_debugctlmsr(debugctlmsr); | ||
1411 | } | ||
1412 | |||
1413 | static __init int ds_selftest(void) | ||
1414 | { | ||
1415 | if (ds_cfg.sizeof_rec[ds_bts]) { | ||
1416 | int error; | ||
1417 | |||
1418 | error = ds_selftest_bts(); | ||
1419 | if (error) { | ||
1420 | WARN(1, "[ds] selftest failed. disabling bts.\n"); | ||
1421 | ds_cfg.sizeof_rec[ds_bts] = 0; | ||
1422 | } | ||
1423 | } | ||
1424 | |||
1425 | if (ds_cfg.sizeof_rec[ds_pebs]) { | ||
1426 | int error; | ||
1427 | |||
1428 | error = ds_selftest_pebs(); | ||
1429 | if (error) { | ||
1430 | WARN(1, "[ds] selftest failed. disabling pebs.\n"); | ||
1431 | ds_cfg.sizeof_rec[ds_pebs] = 0; | ||
1432 | } | ||
1433 | } | ||
1434 | |||
1435 | return 0; | ||
1436 | } | ||
1437 | device_initcall(ds_selftest); | ||
diff --git a/arch/x86/kernel/ds_selftest.c b/arch/x86/kernel/ds_selftest.c deleted file mode 100644 index 6bc7c199ab99..000000000000 --- a/arch/x86/kernel/ds_selftest.c +++ /dev/null | |||
@@ -1,408 +0,0 @@ | |||
1 | /* | ||
2 | * Debug Store support - selftest | ||
3 | * | ||
4 | * | ||
5 | * Copyright (C) 2009 Intel Corporation. | ||
6 | * Markus Metzger <markus.t.metzger@intel.com>, 2009 | ||
7 | */ | ||
8 | |||
9 | #include "ds_selftest.h" | ||
10 | |||
11 | #include <linux/kernel.h> | ||
12 | #include <linux/string.h> | ||
13 | #include <linux/smp.h> | ||
14 | #include <linux/cpu.h> | ||
15 | |||
16 | #include <asm/ds.h> | ||
17 | |||
18 | |||
19 | #define BUFFER_SIZE 521 /* Intentionally chose an odd size. */ | ||
20 | #define SMALL_BUFFER_SIZE 24 /* A single bts entry. */ | ||
21 | |||
22 | struct ds_selftest_bts_conf { | ||
23 | struct bts_tracer *tracer; | ||
24 | int error; | ||
25 | int (*suspend)(struct bts_tracer *); | ||
26 | int (*resume)(struct bts_tracer *); | ||
27 | }; | ||
28 | |||
29 | static int ds_selftest_bts_consistency(const struct bts_trace *trace) | ||
30 | { | ||
31 | int error = 0; | ||
32 | |||
33 | if (!trace) { | ||
34 | printk(KERN_CONT "failed to access trace..."); | ||
35 | /* Bail out. Other tests are pointless. */ | ||
36 | return -1; | ||
37 | } | ||
38 | |||
39 | if (!trace->read) { | ||
40 | printk(KERN_CONT "bts read not available..."); | ||
41 | error = -1; | ||
42 | } | ||
43 | |||
44 | /* Do some sanity checks on the trace configuration. */ | ||
45 | if (!trace->ds.n) { | ||
46 | printk(KERN_CONT "empty bts buffer..."); | ||
47 | error = -1; | ||
48 | } | ||
49 | if (!trace->ds.size) { | ||
50 | printk(KERN_CONT "bad bts trace setup..."); | ||
51 | error = -1; | ||
52 | } | ||
53 | if (trace->ds.end != | ||
54 | (char *)trace->ds.begin + (trace->ds.n * trace->ds.size)) { | ||
55 | printk(KERN_CONT "bad bts buffer setup..."); | ||
56 | error = -1; | ||
57 | } | ||
58 | /* | ||
59 | * We allow top in [begin; end], since its not clear when the | ||
60 | * overflow adjustment happens: after the increment or before the | ||
61 | * write. | ||
62 | */ | ||
63 | if ((trace->ds.top < trace->ds.begin) || | ||
64 | (trace->ds.end < trace->ds.top)) { | ||
65 | printk(KERN_CONT "bts top out of bounds..."); | ||
66 | error = -1; | ||
67 | } | ||
68 | |||
69 | return error; | ||
70 | } | ||
71 | |||
72 | static int ds_selftest_bts_read(struct bts_tracer *tracer, | ||
73 | const struct bts_trace *trace, | ||
74 | const void *from, const void *to) | ||
75 | { | ||
76 | const unsigned char *at; | ||
77 | |||
78 | /* | ||
79 | * Check a few things which do not belong to this test. | ||
80 | * They should be covered by other tests. | ||
81 | */ | ||
82 | if (!trace) | ||
83 | return -1; | ||
84 | |||
85 | if (!trace->read) | ||
86 | return -1; | ||
87 | |||
88 | if (to < from) | ||
89 | return -1; | ||
90 | |||
91 | if (from < trace->ds.begin) | ||
92 | return -1; | ||
93 | |||
94 | if (trace->ds.end < to) | ||
95 | return -1; | ||
96 | |||
97 | if (!trace->ds.size) | ||
98 | return -1; | ||
99 | |||
100 | /* Now to the test itself. */ | ||
101 | for (at = from; (void *)at < to; at += trace->ds.size) { | ||
102 | struct bts_struct bts; | ||
103 | unsigned long index; | ||
104 | int error; | ||
105 | |||
106 | if (((void *)at - trace->ds.begin) % trace->ds.size) { | ||
107 | printk(KERN_CONT | ||
108 | "read from non-integer index..."); | ||
109 | return -1; | ||
110 | } | ||
111 | index = ((void *)at - trace->ds.begin) / trace->ds.size; | ||
112 | |||
113 | memset(&bts, 0, sizeof(bts)); | ||
114 | error = trace->read(tracer, at, &bts); | ||
115 | if (error < 0) { | ||
116 | printk(KERN_CONT | ||
117 | "error reading bts trace at [%lu] (0x%p)...", | ||
118 | index, at); | ||
119 | return error; | ||
120 | } | ||
121 | |||
122 | switch (bts.qualifier) { | ||
123 | case BTS_BRANCH: | ||
124 | break; | ||
125 | default: | ||
126 | printk(KERN_CONT | ||
127 | "unexpected bts entry %llu at [%lu] (0x%p)...", | ||
128 | bts.qualifier, index, at); | ||
129 | return -1; | ||
130 | } | ||
131 | } | ||
132 | |||
133 | return 0; | ||
134 | } | ||
135 | |||
136 | static void ds_selftest_bts_cpu(void *arg) | ||
137 | { | ||
138 | struct ds_selftest_bts_conf *conf = arg; | ||
139 | const struct bts_trace *trace; | ||
140 | void *top; | ||
141 | |||
142 | if (IS_ERR(conf->tracer)) { | ||
143 | conf->error = PTR_ERR(conf->tracer); | ||
144 | conf->tracer = NULL; | ||
145 | |||
146 | printk(KERN_CONT | ||
147 | "initialization failed (err: %d)...", conf->error); | ||
148 | return; | ||
149 | } | ||
150 | |||
151 | /* We should meanwhile have enough trace. */ | ||
152 | conf->error = conf->suspend(conf->tracer); | ||
153 | if (conf->error < 0) | ||
154 | return; | ||
155 | |||
156 | /* Let's see if we can access the trace. */ | ||
157 | trace = ds_read_bts(conf->tracer); | ||
158 | |||
159 | conf->error = ds_selftest_bts_consistency(trace); | ||
160 | if (conf->error < 0) | ||
161 | return; | ||
162 | |||
163 | /* If everything went well, we should have a few trace entries. */ | ||
164 | if (trace->ds.top == trace->ds.begin) { | ||
165 | /* | ||
166 | * It is possible but highly unlikely that we got a | ||
167 | * buffer overflow and end up at exactly the same | ||
168 | * position we started from. | ||
169 | * Let's issue a warning, but continue. | ||
170 | */ | ||
171 | printk(KERN_CONT "no trace/overflow..."); | ||
172 | } | ||
173 | |||
174 | /* Let's try to read the trace we collected. */ | ||
175 | conf->error = | ||
176 | ds_selftest_bts_read(conf->tracer, trace, | ||
177 | trace->ds.begin, trace->ds.top); | ||
178 | if (conf->error < 0) | ||
179 | return; | ||
180 | |||
181 | /* | ||
182 | * Let's read the trace again. | ||
183 | * Since we suspended tracing, we should get the same result. | ||
184 | */ | ||
185 | top = trace->ds.top; | ||
186 | |||
187 | trace = ds_read_bts(conf->tracer); | ||
188 | conf->error = ds_selftest_bts_consistency(trace); | ||
189 | if (conf->error < 0) | ||
190 | return; | ||
191 | |||
192 | if (top != trace->ds.top) { | ||
193 | printk(KERN_CONT "suspend not working..."); | ||
194 | conf->error = -1; | ||
195 | return; | ||
196 | } | ||
197 | |||
198 | /* Let's collect some more trace - see if resume is working. */ | ||
199 | conf->error = conf->resume(conf->tracer); | ||
200 | if (conf->error < 0) | ||
201 | return; | ||
202 | |||
203 | conf->error = conf->suspend(conf->tracer); | ||
204 | if (conf->error < 0) | ||
205 | return; | ||
206 | |||
207 | trace = ds_read_bts(conf->tracer); | ||
208 | |||
209 | conf->error = ds_selftest_bts_consistency(trace); | ||
210 | if (conf->error < 0) | ||
211 | return; | ||
212 | |||
213 | if (trace->ds.top == top) { | ||
214 | /* | ||
215 | * It is possible but highly unlikely that we got a | ||
216 | * buffer overflow and end up at exactly the same | ||
217 | * position we started from. | ||
218 | * Let's issue a warning and check the full trace. | ||
219 | */ | ||
220 | printk(KERN_CONT | ||
221 | "no resume progress/overflow..."); | ||
222 | |||
223 | conf->error = | ||
224 | ds_selftest_bts_read(conf->tracer, trace, | ||
225 | trace->ds.begin, trace->ds.end); | ||
226 | } else if (trace->ds.top < top) { | ||
227 | /* | ||
228 | * We had a buffer overflow - the entire buffer should | ||
229 | * contain trace records. | ||
230 | */ | ||
231 | conf->error = | ||
232 | ds_selftest_bts_read(conf->tracer, trace, | ||
233 | trace->ds.begin, trace->ds.end); | ||
234 | } else { | ||
235 | /* | ||
236 | * It is quite likely that the buffer did not overflow. | ||
237 | * Let's just check the delta trace. | ||
238 | */ | ||
239 | conf->error = | ||
240 | ds_selftest_bts_read(conf->tracer, trace, top, | ||
241 | trace->ds.top); | ||
242 | } | ||
243 | if (conf->error < 0) | ||
244 | return; | ||
245 | |||
246 | conf->error = 0; | ||
247 | } | ||
248 | |||
249 | static int ds_suspend_bts_wrap(struct bts_tracer *tracer) | ||
250 | { | ||
251 | ds_suspend_bts(tracer); | ||
252 | return 0; | ||
253 | } | ||
254 | |||
255 | static int ds_resume_bts_wrap(struct bts_tracer *tracer) | ||
256 | { | ||
257 | ds_resume_bts(tracer); | ||
258 | return 0; | ||
259 | } | ||
260 | |||
261 | static void ds_release_bts_noirq_wrap(void *tracer) | ||
262 | { | ||
263 | (void)ds_release_bts_noirq(tracer); | ||
264 | } | ||
265 | |||
266 | static int ds_selftest_bts_bad_release_noirq(int cpu, | ||
267 | struct bts_tracer *tracer) | ||
268 | { | ||
269 | int error = -EPERM; | ||
270 | |||
271 | /* Try to release the tracer on the wrong cpu. */ | ||
272 | get_cpu(); | ||
273 | if (cpu != smp_processor_id()) { | ||
274 | error = ds_release_bts_noirq(tracer); | ||
275 | if (error != -EPERM) | ||
276 | printk(KERN_CONT "release on wrong cpu..."); | ||
277 | } | ||
278 | put_cpu(); | ||
279 | |||
280 | return error ? 0 : -1; | ||
281 | } | ||
282 | |||
283 | static int ds_selftest_bts_bad_request_cpu(int cpu, void *buffer) | ||
284 | { | ||
285 | struct bts_tracer *tracer; | ||
286 | int error; | ||
287 | |||
288 | /* Try to request cpu tracing while task tracing is active. */ | ||
289 | tracer = ds_request_bts_cpu(cpu, buffer, BUFFER_SIZE, NULL, | ||
290 | (size_t)-1, BTS_KERNEL); | ||
291 | error = PTR_ERR(tracer); | ||
292 | if (!IS_ERR(tracer)) { | ||
293 | ds_release_bts(tracer); | ||
294 | error = 0; | ||
295 | } | ||
296 | |||
297 | if (error != -EPERM) | ||
298 | printk(KERN_CONT "cpu/task tracing overlap..."); | ||
299 | |||
300 | return error ? 0 : -1; | ||
301 | } | ||
302 | |||
303 | static int ds_selftest_bts_bad_request_task(void *buffer) | ||
304 | { | ||
305 | struct bts_tracer *tracer; | ||
306 | int error; | ||
307 | |||
308 | /* Try to request cpu tracing while task tracing is active. */ | ||
309 | tracer = ds_request_bts_task(current, buffer, BUFFER_SIZE, NULL, | ||
310 | (size_t)-1, BTS_KERNEL); | ||
311 | error = PTR_ERR(tracer); | ||
312 | if (!IS_ERR(tracer)) { | ||
313 | error = 0; | ||
314 | ds_release_bts(tracer); | ||
315 | } | ||
316 | |||
317 | if (error != -EPERM) | ||
318 | printk(KERN_CONT "task/cpu tracing overlap..."); | ||
319 | |||
320 | return error ? 0 : -1; | ||
321 | } | ||
322 | |||
323 | int ds_selftest_bts(void) | ||
324 | { | ||
325 | struct ds_selftest_bts_conf conf; | ||
326 | unsigned char buffer[BUFFER_SIZE], *small_buffer; | ||
327 | unsigned long irq; | ||
328 | int cpu; | ||
329 | |||
330 | printk(KERN_INFO "[ds] bts selftest..."); | ||
331 | conf.error = 0; | ||
332 | |||
333 | small_buffer = (unsigned char *)ALIGN((unsigned long)buffer, 8) + 8; | ||
334 | |||
335 | get_online_cpus(); | ||
336 | for_each_online_cpu(cpu) { | ||
337 | conf.suspend = ds_suspend_bts_wrap; | ||
338 | conf.resume = ds_resume_bts_wrap; | ||
339 | conf.tracer = | ||
340 | ds_request_bts_cpu(cpu, buffer, BUFFER_SIZE, | ||
341 | NULL, (size_t)-1, BTS_KERNEL); | ||
342 | ds_selftest_bts_cpu(&conf); | ||
343 | if (conf.error >= 0) | ||
344 | conf.error = ds_selftest_bts_bad_request_task(buffer); | ||
345 | ds_release_bts(conf.tracer); | ||
346 | if (conf.error < 0) | ||
347 | goto out; | ||
348 | |||
349 | conf.suspend = ds_suspend_bts_noirq; | ||
350 | conf.resume = ds_resume_bts_noirq; | ||
351 | conf.tracer = | ||
352 | ds_request_bts_cpu(cpu, buffer, BUFFER_SIZE, | ||
353 | NULL, (size_t)-1, BTS_KERNEL); | ||
354 | smp_call_function_single(cpu, ds_selftest_bts_cpu, &conf, 1); | ||
355 | if (conf.error >= 0) { | ||
356 | conf.error = | ||
357 | ds_selftest_bts_bad_release_noirq(cpu, | ||
358 | conf.tracer); | ||
359 | /* We must not release the tracer twice. */ | ||
360 | if (conf.error < 0) | ||
361 | conf.tracer = NULL; | ||
362 | } | ||
363 | if (conf.error >= 0) | ||
364 | conf.error = ds_selftest_bts_bad_request_task(buffer); | ||
365 | smp_call_function_single(cpu, ds_release_bts_noirq_wrap, | ||
366 | conf.tracer, 1); | ||
367 | if (conf.error < 0) | ||
368 | goto out; | ||
369 | } | ||
370 | |||
371 | conf.suspend = ds_suspend_bts_wrap; | ||
372 | conf.resume = ds_resume_bts_wrap; | ||
373 | conf.tracer = | ||
374 | ds_request_bts_task(current, buffer, BUFFER_SIZE, | ||
375 | NULL, (size_t)-1, BTS_KERNEL); | ||
376 | ds_selftest_bts_cpu(&conf); | ||
377 | if (conf.error >= 0) | ||
378 | conf.error = ds_selftest_bts_bad_request_cpu(0, buffer); | ||
379 | ds_release_bts(conf.tracer); | ||
380 | if (conf.error < 0) | ||
381 | goto out; | ||
382 | |||
383 | conf.suspend = ds_suspend_bts_noirq; | ||
384 | conf.resume = ds_resume_bts_noirq; | ||
385 | conf.tracer = | ||
386 | ds_request_bts_task(current, small_buffer, SMALL_BUFFER_SIZE, | ||
387 | NULL, (size_t)-1, BTS_KERNEL); | ||
388 | local_irq_save(irq); | ||
389 | ds_selftest_bts_cpu(&conf); | ||
390 | if (conf.error >= 0) | ||
391 | conf.error = ds_selftest_bts_bad_request_cpu(0, buffer); | ||
392 | ds_release_bts_noirq(conf.tracer); | ||
393 | local_irq_restore(irq); | ||
394 | if (conf.error < 0) | ||
395 | goto out; | ||
396 | |||
397 | conf.error = 0; | ||
398 | out: | ||
399 | put_online_cpus(); | ||
400 | printk(KERN_CONT "%s.\n", (conf.error ? "failed" : "passed")); | ||
401 | |||
402 | return conf.error; | ||
403 | } | ||
404 | |||
405 | int ds_selftest_pebs(void) | ||
406 | { | ||
407 | return 0; | ||
408 | } | ||
diff --git a/arch/x86/kernel/ds_selftest.h b/arch/x86/kernel/ds_selftest.h deleted file mode 100644 index 2ba8745c6663..000000000000 --- a/arch/x86/kernel/ds_selftest.h +++ /dev/null | |||
@@ -1,15 +0,0 @@ | |||
1 | /* | ||
2 | * Debug Store support - selftest | ||
3 | * | ||
4 | * | ||
5 | * Copyright (C) 2009 Intel Corporation. | ||
6 | * Markus Metzger <markus.t.metzger@intel.com>, 2009 | ||
7 | */ | ||
8 | |||
9 | #ifdef CONFIG_X86_DS_SELFTEST | ||
10 | extern int ds_selftest_bts(void); | ||
11 | extern int ds_selftest_pebs(void); | ||
12 | #else | ||
13 | static inline int ds_selftest_bts(void) { return 0; } | ||
14 | static inline int ds_selftest_pebs(void) { return 0; } | ||
15 | #endif | ||
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index 6d817554780a..c89a386930b7 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c | |||
@@ -224,11 +224,6 @@ unsigned __kprobes long oops_begin(void) | |||
224 | int cpu; | 224 | int cpu; |
225 | unsigned long flags; | 225 | unsigned long flags; |
226 | 226 | ||
227 | /* notify the hw-branch tracer so it may disable tracing and | ||
228 | add the last trace to the trace buffer - | ||
229 | the earlier this happens, the more useful the trace. */ | ||
230 | trace_hw_branch_oops(); | ||
231 | |||
232 | oops_enter(); | 227 | oops_enter(); |
233 | 228 | ||
234 | /* racy, but better than risking deadlock. */ | 229 | /* racy, but better than risking deadlock. */ |
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 44a8e0dc6737..cd49141cf153 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S | |||
@@ -53,6 +53,7 @@ | |||
53 | #include <asm/processor-flags.h> | 53 | #include <asm/processor-flags.h> |
54 | #include <asm/ftrace.h> | 54 | #include <asm/ftrace.h> |
55 | #include <asm/irq_vectors.h> | 55 | #include <asm/irq_vectors.h> |
56 | #include <asm/cpufeature.h> | ||
56 | 57 | ||
57 | /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */ | 58 | /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */ |
58 | #include <linux/elf-em.h> | 59 | #include <linux/elf-em.h> |
@@ -905,7 +906,25 @@ ENTRY(simd_coprocessor_error) | |||
905 | RING0_INT_FRAME | 906 | RING0_INT_FRAME |
906 | pushl $0 | 907 | pushl $0 |
907 | CFI_ADJUST_CFA_OFFSET 4 | 908 | CFI_ADJUST_CFA_OFFSET 4 |
909 | #ifdef CONFIG_X86_INVD_BUG | ||
910 | /* AMD 486 bug: invd from userspace calls exception 19 instead of #GP */ | ||
911 | 661: pushl $do_general_protection | ||
912 | 662: | ||
913 | .section .altinstructions,"a" | ||
914 | .balign 4 | ||
915 | .long 661b | ||
916 | .long 663f | ||
917 | .byte X86_FEATURE_XMM | ||
918 | .byte 662b-661b | ||
919 | .byte 664f-663f | ||
920 | .previous | ||
921 | .section .altinstr_replacement,"ax" | ||
922 | 663: pushl $do_simd_coprocessor_error | ||
923 | 664: | ||
924 | .previous | ||
925 | #else | ||
908 | pushl $do_simd_coprocessor_error | 926 | pushl $do_simd_coprocessor_error |
927 | #endif | ||
909 | CFI_ADJUST_CFA_OFFSET 4 | 928 | CFI_ADJUST_CFA_OFFSET 4 |
910 | jmp error_code | 929 | jmp error_code |
911 | CFI_ENDPROC | 930 | CFI_ENDPROC |
diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c index d6cc065f519f..a8f1b803d2fd 100644 --- a/arch/x86/kernel/hw_breakpoint.c +++ b/arch/x86/kernel/hw_breakpoint.c | |||
@@ -189,25 +189,16 @@ static int get_hbp_len(u8 hbp_len) | |||
189 | } | 189 | } |
190 | 190 | ||
191 | /* | 191 | /* |
192 | * Check for virtual address in user space. | ||
193 | */ | ||
194 | int arch_check_va_in_userspace(unsigned long va, u8 hbp_len) | ||
195 | { | ||
196 | unsigned int len; | ||
197 | |||
198 | len = get_hbp_len(hbp_len); | ||
199 | |||
200 | return (va <= TASK_SIZE - len); | ||
201 | } | ||
202 | |||
203 | /* | ||
204 | * Check for virtual address in kernel space. | 192 | * Check for virtual address in kernel space. |
205 | */ | 193 | */ |
206 | static int arch_check_va_in_kernelspace(unsigned long va, u8 hbp_len) | 194 | int arch_check_bp_in_kernelspace(struct perf_event *bp) |
207 | { | 195 | { |
208 | unsigned int len; | 196 | unsigned int len; |
197 | unsigned long va; | ||
198 | struct arch_hw_breakpoint *info = counter_arch_bp(bp); | ||
209 | 199 | ||
210 | len = get_hbp_len(hbp_len); | 200 | va = info->address; |
201 | len = get_hbp_len(info->len); | ||
211 | 202 | ||
212 | return (va >= TASK_SIZE) && ((va + len - 1) >= TASK_SIZE); | 203 | return (va >= TASK_SIZE) && ((va + len - 1) >= TASK_SIZE); |
213 | } | 204 | } |
@@ -300,8 +291,7 @@ static int arch_build_bp_info(struct perf_event *bp) | |||
300 | /* | 291 | /* |
301 | * Validate the arch-specific HW Breakpoint register settings | 292 | * Validate the arch-specific HW Breakpoint register settings |
302 | */ | 293 | */ |
303 | int arch_validate_hwbkpt_settings(struct perf_event *bp, | 294 | int arch_validate_hwbkpt_settings(struct perf_event *bp) |
304 | struct task_struct *tsk) | ||
305 | { | 295 | { |
306 | struct arch_hw_breakpoint *info = counter_arch_bp(bp); | 296 | struct arch_hw_breakpoint *info = counter_arch_bp(bp); |
307 | unsigned int align; | 297 | unsigned int align; |
@@ -314,16 +304,6 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp, | |||
314 | 304 | ||
315 | ret = -EINVAL; | 305 | ret = -EINVAL; |
316 | 306 | ||
317 | if (info->type == X86_BREAKPOINT_EXECUTE) | ||
318 | /* | ||
319 | * Ptrace-refactoring code | ||
320 | * For now, we'll allow instruction breakpoint only for user-space | ||
321 | * addresses | ||
322 | */ | ||
323 | if ((!arch_check_va_in_userspace(info->address, info->len)) && | ||
324 | info->len != X86_BREAKPOINT_EXECUTE) | ||
325 | return ret; | ||
326 | |||
327 | switch (info->len) { | 307 | switch (info->len) { |
328 | case X86_BREAKPOINT_LEN_1: | 308 | case X86_BREAKPOINT_LEN_1: |
329 | align = 0; | 309 | align = 0; |
@@ -350,15 +330,6 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp, | |||
350 | if (info->address & align) | 330 | if (info->address & align) |
351 | return -EINVAL; | 331 | return -EINVAL; |
352 | 332 | ||
353 | /* Check that the virtual address is in the proper range */ | ||
354 | if (tsk) { | ||
355 | if (!arch_check_va_in_userspace(info->address, info->len)) | ||
356 | return -EFAULT; | ||
357 | } else { | ||
358 | if (!arch_check_va_in_kernelspace(info->address, info->len)) | ||
359 | return -EFAULT; | ||
360 | } | ||
361 | |||
362 | return 0; | 333 | return 0; |
363 | } | 334 | } |
364 | 335 | ||
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index 54c31c285488..86cef6b32253 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c | |||
@@ -102,65 +102,62 @@ void __cpuinit fpu_init(void) | |||
102 | 102 | ||
103 | mxcsr_feature_mask_init(); | 103 | mxcsr_feature_mask_init(); |
104 | /* clean state in init */ | 104 | /* clean state in init */ |
105 | if (cpu_has_xsave) | 105 | current_thread_info()->status = 0; |
106 | current_thread_info()->status = TS_XSAVE; | ||
107 | else | ||
108 | current_thread_info()->status = 0; | ||
109 | clear_used_math(); | 106 | clear_used_math(); |
110 | } | 107 | } |
111 | #endif /* CONFIG_X86_64 */ | 108 | #endif /* CONFIG_X86_64 */ |
112 | 109 | ||
113 | /* | 110 | static void fpu_finit(struct fpu *fpu) |
114 | * The _current_ task is using the FPU for the first time | ||
115 | * so initialize it and set the mxcsr to its default | ||
116 | * value at reset if we support XMM instructions and then | ||
117 | * remeber the current task has used the FPU. | ||
118 | */ | ||
119 | int init_fpu(struct task_struct *tsk) | ||
120 | { | 111 | { |
121 | if (tsk_used_math(tsk)) { | ||
122 | if (HAVE_HWFP && tsk == current) | ||
123 | unlazy_fpu(tsk); | ||
124 | return 0; | ||
125 | } | ||
126 | |||
127 | /* | ||
128 | * Memory allocation at the first usage of the FPU and other state. | ||
129 | */ | ||
130 | if (!tsk->thread.xstate) { | ||
131 | tsk->thread.xstate = kmem_cache_alloc(task_xstate_cachep, | ||
132 | GFP_KERNEL); | ||
133 | if (!tsk->thread.xstate) | ||
134 | return -ENOMEM; | ||
135 | } | ||
136 | |||
137 | #ifdef CONFIG_X86_32 | 112 | #ifdef CONFIG_X86_32 |
138 | if (!HAVE_HWFP) { | 113 | if (!HAVE_HWFP) { |
139 | memset(tsk->thread.xstate, 0, xstate_size); | 114 | finit_soft_fpu(&fpu->state->soft); |
140 | finit_task(tsk); | 115 | return; |
141 | set_stopped_child_used_math(tsk); | ||
142 | return 0; | ||
143 | } | 116 | } |
144 | #endif | 117 | #endif |
145 | 118 | ||
146 | if (cpu_has_fxsr) { | 119 | if (cpu_has_fxsr) { |
147 | struct i387_fxsave_struct *fx = &tsk->thread.xstate->fxsave; | 120 | struct i387_fxsave_struct *fx = &fpu->state->fxsave; |
148 | 121 | ||
149 | memset(fx, 0, xstate_size); | 122 | memset(fx, 0, xstate_size); |
150 | fx->cwd = 0x37f; | 123 | fx->cwd = 0x37f; |
151 | if (cpu_has_xmm) | 124 | if (cpu_has_xmm) |
152 | fx->mxcsr = MXCSR_DEFAULT; | 125 | fx->mxcsr = MXCSR_DEFAULT; |
153 | } else { | 126 | } else { |
154 | struct i387_fsave_struct *fp = &tsk->thread.xstate->fsave; | 127 | struct i387_fsave_struct *fp = &fpu->state->fsave; |
155 | memset(fp, 0, xstate_size); | 128 | memset(fp, 0, xstate_size); |
156 | fp->cwd = 0xffff037fu; | 129 | fp->cwd = 0xffff037fu; |
157 | fp->swd = 0xffff0000u; | 130 | fp->swd = 0xffff0000u; |
158 | fp->twd = 0xffffffffu; | 131 | fp->twd = 0xffffffffu; |
159 | fp->fos = 0xffff0000u; | 132 | fp->fos = 0xffff0000u; |
160 | } | 133 | } |
134 | } | ||
135 | |||
136 | /* | ||
137 | * The _current_ task is using the FPU for the first time | ||
138 | * so initialize it and set the mxcsr to its default | ||
139 | * value at reset if we support XMM instructions and then | ||
140 | * remeber the current task has used the FPU. | ||
141 | */ | ||
142 | int init_fpu(struct task_struct *tsk) | ||
143 | { | ||
144 | int ret; | ||
145 | |||
146 | if (tsk_used_math(tsk)) { | ||
147 | if (HAVE_HWFP && tsk == current) | ||
148 | unlazy_fpu(tsk); | ||
149 | return 0; | ||
150 | } | ||
151 | |||
161 | /* | 152 | /* |
162 | * Only the device not available exception or ptrace can call init_fpu. | 153 | * Memory allocation at the first usage of the FPU and other state. |
163 | */ | 154 | */ |
155 | ret = fpu_alloc(&tsk->thread.fpu); | ||
156 | if (ret) | ||
157 | return ret; | ||
158 | |||
159 | fpu_finit(&tsk->thread.fpu); | ||
160 | |||
164 | set_stopped_child_used_math(tsk); | 161 | set_stopped_child_used_math(tsk); |
165 | return 0; | 162 | return 0; |
166 | } | 163 | } |
@@ -194,7 +191,7 @@ int xfpregs_get(struct task_struct *target, const struct user_regset *regset, | |||
194 | return ret; | 191 | return ret; |
195 | 192 | ||
196 | return user_regset_copyout(&pos, &count, &kbuf, &ubuf, | 193 | return user_regset_copyout(&pos, &count, &kbuf, &ubuf, |
197 | &target->thread.xstate->fxsave, 0, -1); | 194 | &target->thread.fpu.state->fxsave, 0, -1); |
198 | } | 195 | } |
199 | 196 | ||
200 | int xfpregs_set(struct task_struct *target, const struct user_regset *regset, | 197 | int xfpregs_set(struct task_struct *target, const struct user_regset *regset, |
@@ -211,19 +208,19 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset, | |||
211 | return ret; | 208 | return ret; |
212 | 209 | ||
213 | ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, | 210 | ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, |
214 | &target->thread.xstate->fxsave, 0, -1); | 211 | &target->thread.fpu.state->fxsave, 0, -1); |
215 | 212 | ||
216 | /* | 213 | /* |
217 | * mxcsr reserved bits must be masked to zero for security reasons. | 214 | * mxcsr reserved bits must be masked to zero for security reasons. |
218 | */ | 215 | */ |
219 | target->thread.xstate->fxsave.mxcsr &= mxcsr_feature_mask; | 216 | target->thread.fpu.state->fxsave.mxcsr &= mxcsr_feature_mask; |
220 | 217 | ||
221 | /* | 218 | /* |
222 | * update the header bits in the xsave header, indicating the | 219 | * update the header bits in the xsave header, indicating the |
223 | * presence of FP and SSE state. | 220 | * presence of FP and SSE state. |
224 | */ | 221 | */ |
225 | if (cpu_has_xsave) | 222 | if (cpu_has_xsave) |
226 | target->thread.xstate->xsave.xsave_hdr.xstate_bv |= XSTATE_FPSSE; | 223 | target->thread.fpu.state->xsave.xsave_hdr.xstate_bv |= XSTATE_FPSSE; |
227 | 224 | ||
228 | return ret; | 225 | return ret; |
229 | } | 226 | } |
@@ -246,14 +243,14 @@ int xstateregs_get(struct task_struct *target, const struct user_regset *regset, | |||
246 | * memory layout in the thread struct, so that we can copy the entire | 243 | * memory layout in the thread struct, so that we can copy the entire |
247 | * xstateregs to the user using one user_regset_copyout(). | 244 | * xstateregs to the user using one user_regset_copyout(). |
248 | */ | 245 | */ |
249 | memcpy(&target->thread.xstate->fxsave.sw_reserved, | 246 | memcpy(&target->thread.fpu.state->fxsave.sw_reserved, |
250 | xstate_fx_sw_bytes, sizeof(xstate_fx_sw_bytes)); | 247 | xstate_fx_sw_bytes, sizeof(xstate_fx_sw_bytes)); |
251 | 248 | ||
252 | /* | 249 | /* |
253 | * Copy the xstate memory layout. | 250 | * Copy the xstate memory layout. |
254 | */ | 251 | */ |
255 | ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, | 252 | ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, |
256 | &target->thread.xstate->xsave, 0, -1); | 253 | &target->thread.fpu.state->xsave, 0, -1); |
257 | return ret; | 254 | return ret; |
258 | } | 255 | } |
259 | 256 | ||
@@ -272,14 +269,14 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset, | |||
272 | return ret; | 269 | return ret; |
273 | 270 | ||
274 | ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, | 271 | ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, |
275 | &target->thread.xstate->xsave, 0, -1); | 272 | &target->thread.fpu.state->xsave, 0, -1); |
276 | 273 | ||
277 | /* | 274 | /* |
278 | * mxcsr reserved bits must be masked to zero for security reasons. | 275 | * mxcsr reserved bits must be masked to zero for security reasons. |
279 | */ | 276 | */ |
280 | target->thread.xstate->fxsave.mxcsr &= mxcsr_feature_mask; | 277 | target->thread.fpu.state->fxsave.mxcsr &= mxcsr_feature_mask; |
281 | 278 | ||
282 | xsave_hdr = &target->thread.xstate->xsave.xsave_hdr; | 279 | xsave_hdr = &target->thread.fpu.state->xsave.xsave_hdr; |
283 | 280 | ||
284 | xsave_hdr->xstate_bv &= pcntxt_mask; | 281 | xsave_hdr->xstate_bv &= pcntxt_mask; |
285 | /* | 282 | /* |
@@ -365,7 +362,7 @@ static inline u32 twd_fxsr_to_i387(struct i387_fxsave_struct *fxsave) | |||
365 | static void | 362 | static void |
366 | convert_from_fxsr(struct user_i387_ia32_struct *env, struct task_struct *tsk) | 363 | convert_from_fxsr(struct user_i387_ia32_struct *env, struct task_struct *tsk) |
367 | { | 364 | { |
368 | struct i387_fxsave_struct *fxsave = &tsk->thread.xstate->fxsave; | 365 | struct i387_fxsave_struct *fxsave = &tsk->thread.fpu.state->fxsave; |
369 | struct _fpreg *to = (struct _fpreg *) &env->st_space[0]; | 366 | struct _fpreg *to = (struct _fpreg *) &env->st_space[0]; |
370 | struct _fpxreg *from = (struct _fpxreg *) &fxsave->st_space[0]; | 367 | struct _fpxreg *from = (struct _fpxreg *) &fxsave->st_space[0]; |
371 | int i; | 368 | int i; |
@@ -405,7 +402,7 @@ static void convert_to_fxsr(struct task_struct *tsk, | |||
405 | const struct user_i387_ia32_struct *env) | 402 | const struct user_i387_ia32_struct *env) |
406 | 403 | ||
407 | { | 404 | { |
408 | struct i387_fxsave_struct *fxsave = &tsk->thread.xstate->fxsave; | 405 | struct i387_fxsave_struct *fxsave = &tsk->thread.fpu.state->fxsave; |
409 | struct _fpreg *from = (struct _fpreg *) &env->st_space[0]; | 406 | struct _fpreg *from = (struct _fpreg *) &env->st_space[0]; |
410 | struct _fpxreg *to = (struct _fpxreg *) &fxsave->st_space[0]; | 407 | struct _fpxreg *to = (struct _fpxreg *) &fxsave->st_space[0]; |
411 | int i; | 408 | int i; |
@@ -445,7 +442,7 @@ int fpregs_get(struct task_struct *target, const struct user_regset *regset, | |||
445 | 442 | ||
446 | if (!cpu_has_fxsr) { | 443 | if (!cpu_has_fxsr) { |
447 | return user_regset_copyout(&pos, &count, &kbuf, &ubuf, | 444 | return user_regset_copyout(&pos, &count, &kbuf, &ubuf, |
448 | &target->thread.xstate->fsave, 0, | 445 | &target->thread.fpu.state->fsave, 0, |
449 | -1); | 446 | -1); |
450 | } | 447 | } |
451 | 448 | ||
@@ -475,7 +472,7 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset, | |||
475 | 472 | ||
476 | if (!cpu_has_fxsr) { | 473 | if (!cpu_has_fxsr) { |
477 | return user_regset_copyin(&pos, &count, &kbuf, &ubuf, | 474 | return user_regset_copyin(&pos, &count, &kbuf, &ubuf, |
478 | &target->thread.xstate->fsave, 0, -1); | 475 | &target->thread.fpu.state->fsave, 0, -1); |
479 | } | 476 | } |
480 | 477 | ||
481 | if (pos > 0 || count < sizeof(env)) | 478 | if (pos > 0 || count < sizeof(env)) |
@@ -490,7 +487,7 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset, | |||
490 | * presence of FP. | 487 | * presence of FP. |
491 | */ | 488 | */ |
492 | if (cpu_has_xsave) | 489 | if (cpu_has_xsave) |
493 | target->thread.xstate->xsave.xsave_hdr.xstate_bv |= XSTATE_FP; | 490 | target->thread.fpu.state->xsave.xsave_hdr.xstate_bv |= XSTATE_FP; |
494 | return ret; | 491 | return ret; |
495 | } | 492 | } |
496 | 493 | ||
@@ -501,7 +498,7 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset, | |||
501 | static inline int save_i387_fsave(struct _fpstate_ia32 __user *buf) | 498 | static inline int save_i387_fsave(struct _fpstate_ia32 __user *buf) |
502 | { | 499 | { |
503 | struct task_struct *tsk = current; | 500 | struct task_struct *tsk = current; |
504 | struct i387_fsave_struct *fp = &tsk->thread.xstate->fsave; | 501 | struct i387_fsave_struct *fp = &tsk->thread.fpu.state->fsave; |
505 | 502 | ||
506 | fp->status = fp->swd; | 503 | fp->status = fp->swd; |
507 | if (__copy_to_user(buf, fp, sizeof(struct i387_fsave_struct))) | 504 | if (__copy_to_user(buf, fp, sizeof(struct i387_fsave_struct))) |
@@ -512,7 +509,7 @@ static inline int save_i387_fsave(struct _fpstate_ia32 __user *buf) | |||
512 | static int save_i387_fxsave(struct _fpstate_ia32 __user *buf) | 509 | static int save_i387_fxsave(struct _fpstate_ia32 __user *buf) |
513 | { | 510 | { |
514 | struct task_struct *tsk = current; | 511 | struct task_struct *tsk = current; |
515 | struct i387_fxsave_struct *fx = &tsk->thread.xstate->fxsave; | 512 | struct i387_fxsave_struct *fx = &tsk->thread.fpu.state->fxsave; |
516 | struct user_i387_ia32_struct env; | 513 | struct user_i387_ia32_struct env; |
517 | int err = 0; | 514 | int err = 0; |
518 | 515 | ||
@@ -547,7 +544,7 @@ static int save_i387_xsave(void __user *buf) | |||
547 | * header as well as change any contents in the memory layout. | 544 | * header as well as change any contents in the memory layout. |
548 | * xrestore as part of sigreturn will capture all the changes. | 545 | * xrestore as part of sigreturn will capture all the changes. |
549 | */ | 546 | */ |
550 | tsk->thread.xstate->xsave.xsave_hdr.xstate_bv |= XSTATE_FPSSE; | 547 | tsk->thread.fpu.state->xsave.xsave_hdr.xstate_bv |= XSTATE_FPSSE; |
551 | 548 | ||
552 | if (save_i387_fxsave(fx) < 0) | 549 | if (save_i387_fxsave(fx) < 0) |
553 | return -1; | 550 | return -1; |
@@ -599,7 +596,7 @@ static inline int restore_i387_fsave(struct _fpstate_ia32 __user *buf) | |||
599 | { | 596 | { |
600 | struct task_struct *tsk = current; | 597 | struct task_struct *tsk = current; |
601 | 598 | ||
602 | return __copy_from_user(&tsk->thread.xstate->fsave, buf, | 599 | return __copy_from_user(&tsk->thread.fpu.state->fsave, buf, |
603 | sizeof(struct i387_fsave_struct)); | 600 | sizeof(struct i387_fsave_struct)); |
604 | } | 601 | } |
605 | 602 | ||
@@ -610,10 +607,10 @@ static int restore_i387_fxsave(struct _fpstate_ia32 __user *buf, | |||
610 | struct user_i387_ia32_struct env; | 607 | struct user_i387_ia32_struct env; |
611 | int err; | 608 | int err; |
612 | 609 | ||
613 | err = __copy_from_user(&tsk->thread.xstate->fxsave, &buf->_fxsr_env[0], | 610 | err = __copy_from_user(&tsk->thread.fpu.state->fxsave, &buf->_fxsr_env[0], |
614 | size); | 611 | size); |
615 | /* mxcsr reserved bits must be masked to zero for security reasons */ | 612 | /* mxcsr reserved bits must be masked to zero for security reasons */ |
616 | tsk->thread.xstate->fxsave.mxcsr &= mxcsr_feature_mask; | 613 | tsk->thread.fpu.state->fxsave.mxcsr &= mxcsr_feature_mask; |
617 | if (err || __copy_from_user(&env, buf, sizeof(env))) | 614 | if (err || __copy_from_user(&env, buf, sizeof(env))) |
618 | return 1; | 615 | return 1; |
619 | convert_to_fxsr(tsk, &env); | 616 | convert_to_fxsr(tsk, &env); |
@@ -629,7 +626,7 @@ static int restore_i387_xsave(void __user *buf) | |||
629 | struct i387_fxsave_struct __user *fx = | 626 | struct i387_fxsave_struct __user *fx = |
630 | (struct i387_fxsave_struct __user *) &fx_user->_fxsr_env[0]; | 627 | (struct i387_fxsave_struct __user *) &fx_user->_fxsr_env[0]; |
631 | struct xsave_hdr_struct *xsave_hdr = | 628 | struct xsave_hdr_struct *xsave_hdr = |
632 | ¤t->thread.xstate->xsave.xsave_hdr; | 629 | ¤t->thread.fpu.state->xsave.xsave_hdr; |
633 | u64 mask; | 630 | u64 mask; |
634 | int err; | 631 | int err; |
635 | 632 | ||
diff --git a/arch/x86/kernel/i8253.c b/arch/x86/kernel/i8253.c index 23c167925a5c..2dfd31597443 100644 --- a/arch/x86/kernel/i8253.c +++ b/arch/x86/kernel/i8253.c | |||
@@ -16,7 +16,7 @@ | |||
16 | #include <asm/hpet.h> | 16 | #include <asm/hpet.h> |
17 | #include <asm/smp.h> | 17 | #include <asm/smp.h> |
18 | 18 | ||
19 | DEFINE_SPINLOCK(i8253_lock); | 19 | DEFINE_RAW_SPINLOCK(i8253_lock); |
20 | EXPORT_SYMBOL(i8253_lock); | 20 | EXPORT_SYMBOL(i8253_lock); |
21 | 21 | ||
22 | /* | 22 | /* |
@@ -33,7 +33,7 @@ struct clock_event_device *global_clock_event; | |||
33 | static void init_pit_timer(enum clock_event_mode mode, | 33 | static void init_pit_timer(enum clock_event_mode mode, |
34 | struct clock_event_device *evt) | 34 | struct clock_event_device *evt) |
35 | { | 35 | { |
36 | spin_lock(&i8253_lock); | 36 | raw_spin_lock(&i8253_lock); |
37 | 37 | ||
38 | switch (mode) { | 38 | switch (mode) { |
39 | case CLOCK_EVT_MODE_PERIODIC: | 39 | case CLOCK_EVT_MODE_PERIODIC: |
@@ -62,7 +62,7 @@ static void init_pit_timer(enum clock_event_mode mode, | |||
62 | /* Nothing to do here */ | 62 | /* Nothing to do here */ |
63 | break; | 63 | break; |
64 | } | 64 | } |
65 | spin_unlock(&i8253_lock); | 65 | raw_spin_unlock(&i8253_lock); |
66 | } | 66 | } |
67 | 67 | ||
68 | /* | 68 | /* |
@@ -72,10 +72,10 @@ static void init_pit_timer(enum clock_event_mode mode, | |||
72 | */ | 72 | */ |
73 | static int pit_next_event(unsigned long delta, struct clock_event_device *evt) | 73 | static int pit_next_event(unsigned long delta, struct clock_event_device *evt) |
74 | { | 74 | { |
75 | spin_lock(&i8253_lock); | 75 | raw_spin_lock(&i8253_lock); |
76 | outb_pit(delta & 0xff , PIT_CH0); /* LSB */ | 76 | outb_pit(delta & 0xff , PIT_CH0); /* LSB */ |
77 | outb_pit(delta >> 8 , PIT_CH0); /* MSB */ | 77 | outb_pit(delta >> 8 , PIT_CH0); /* MSB */ |
78 | spin_unlock(&i8253_lock); | 78 | raw_spin_unlock(&i8253_lock); |
79 | 79 | ||
80 | return 0; | 80 | return 0; |
81 | } | 81 | } |
@@ -130,7 +130,7 @@ static cycle_t pit_read(struct clocksource *cs) | |||
130 | int count; | 130 | int count; |
131 | u32 jifs; | 131 | u32 jifs; |
132 | 132 | ||
133 | spin_lock_irqsave(&i8253_lock, flags); | 133 | raw_spin_lock_irqsave(&i8253_lock, flags); |
134 | /* | 134 | /* |
135 | * Although our caller may have the read side of xtime_lock, | 135 | * Although our caller may have the read side of xtime_lock, |
136 | * this is now a seqlock, and we are cheating in this routine | 136 | * this is now a seqlock, and we are cheating in this routine |
@@ -176,7 +176,7 @@ static cycle_t pit_read(struct clocksource *cs) | |||
176 | old_count = count; | 176 | old_count = count; |
177 | old_jifs = jifs; | 177 | old_jifs = jifs; |
178 | 178 | ||
179 | spin_unlock_irqrestore(&i8253_lock, flags); | 179 | raw_spin_unlock_irqrestore(&i8253_lock, flags); |
180 | 180 | ||
181 | count = (LATCH - 1) - count; | 181 | count = (LATCH - 1) - count; |
182 | 182 | ||
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c index 0ed2d300cd46..990ae7cfc578 100644 --- a/arch/x86/kernel/irqinit.c +++ b/arch/x86/kernel/irqinit.c | |||
@@ -60,7 +60,7 @@ static irqreturn_t math_error_irq(int cpl, void *dev_id) | |||
60 | outb(0, 0xF0); | 60 | outb(0, 0xF0); |
61 | if (ignore_fpu_irq || !boot_cpu_data.hard_math) | 61 | if (ignore_fpu_irq || !boot_cpu_data.hard_math) |
62 | return IRQ_NONE; | 62 | return IRQ_NONE; |
63 | math_error((void __user *)get_irq_regs()->ip); | 63 | math_error(get_irq_regs(), 0, 16); |
64 | return IRQ_HANDLED; | 64 | return IRQ_HANDLED; |
65 | } | 65 | } |
66 | 66 | ||
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index 1658efdfb4e5..345a4b1fe144 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c | |||
@@ -422,14 +422,22 @@ static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs, | |||
422 | 422 | ||
423 | static void __kprobes clear_btf(void) | 423 | static void __kprobes clear_btf(void) |
424 | { | 424 | { |
425 | if (test_thread_flag(TIF_DEBUGCTLMSR)) | 425 | if (test_thread_flag(TIF_BLOCKSTEP)) { |
426 | update_debugctlmsr(0); | 426 | unsigned long debugctl = get_debugctlmsr(); |
427 | |||
428 | debugctl &= ~DEBUGCTLMSR_BTF; | ||
429 | update_debugctlmsr(debugctl); | ||
430 | } | ||
427 | } | 431 | } |
428 | 432 | ||
429 | static void __kprobes restore_btf(void) | 433 | static void __kprobes restore_btf(void) |
430 | { | 434 | { |
431 | if (test_thread_flag(TIF_DEBUGCTLMSR)) | 435 | if (test_thread_flag(TIF_BLOCKSTEP)) { |
432 | update_debugctlmsr(current->thread.debugctlmsr); | 436 | unsigned long debugctl = get_debugctlmsr(); |
437 | |||
438 | debugctl |= DEBUGCTLMSR_BTF; | ||
439 | update_debugctlmsr(debugctl); | ||
440 | } | ||
433 | } | 441 | } |
434 | 442 | ||
435 | void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, | 443 | void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, |
diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c index cceb5bc3c3c2..2cd8c544e41a 100644 --- a/arch/x86/kernel/microcode_core.c +++ b/arch/x86/kernel/microcode_core.c | |||
@@ -201,9 +201,9 @@ static int do_microcode_update(const void __user *buf, size_t size) | |||
201 | return error; | 201 | return error; |
202 | } | 202 | } |
203 | 203 | ||
204 | static int microcode_open(struct inode *unused1, struct file *unused2) | 204 | static int microcode_open(struct inode *inode, struct file *file) |
205 | { | 205 | { |
206 | return capable(CAP_SYS_RAWIO) ? 0 : -EPERM; | 206 | return capable(CAP_SYS_RAWIO) ? nonseekable_open(inode, file) : -EPERM; |
207 | } | 207 | } |
208 | 208 | ||
209 | static ssize_t microcode_write(struct file *file, const char __user *buf, | 209 | static ssize_t microcode_write(struct file *file, const char __user *buf, |
diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c index 85a343e28937..356170262a93 100644 --- a/arch/x86/kernel/microcode_intel.c +++ b/arch/x86/kernel/microcode_intel.c | |||
@@ -343,10 +343,11 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size, | |||
343 | int (*get_ucode_data)(void *, const void *, size_t)) | 343 | int (*get_ucode_data)(void *, const void *, size_t)) |
344 | { | 344 | { |
345 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu; | 345 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu; |
346 | u8 *ucode_ptr = data, *new_mc = NULL, *mc; | 346 | u8 *ucode_ptr = data, *new_mc = NULL, *mc = NULL; |
347 | int new_rev = uci->cpu_sig.rev; | 347 | int new_rev = uci->cpu_sig.rev; |
348 | unsigned int leftover = size; | 348 | unsigned int leftover = size; |
349 | enum ucode_state state = UCODE_OK; | 349 | enum ucode_state state = UCODE_OK; |
350 | unsigned int curr_mc_size = 0; | ||
350 | 351 | ||
351 | while (leftover) { | 352 | while (leftover) { |
352 | struct microcode_header_intel mc_header; | 353 | struct microcode_header_intel mc_header; |
@@ -361,9 +362,15 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size, | |||
361 | break; | 362 | break; |
362 | } | 363 | } |
363 | 364 | ||
364 | mc = vmalloc(mc_size); | 365 | /* For performance reasons, reuse mc area when possible */ |
365 | if (!mc) | 366 | if (!mc || mc_size > curr_mc_size) { |
366 | break; | 367 | if (mc) |
368 | vfree(mc); | ||
369 | mc = vmalloc(mc_size); | ||
370 | if (!mc) | ||
371 | break; | ||
372 | curr_mc_size = mc_size; | ||
373 | } | ||
367 | 374 | ||
368 | if (get_ucode_data(mc, ucode_ptr, mc_size) || | 375 | if (get_ucode_data(mc, ucode_ptr, mc_size) || |
369 | microcode_sanity_check(mc) < 0) { | 376 | microcode_sanity_check(mc) < 0) { |
@@ -376,13 +383,16 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size, | |||
376 | vfree(new_mc); | 383 | vfree(new_mc); |
377 | new_rev = mc_header.rev; | 384 | new_rev = mc_header.rev; |
378 | new_mc = mc; | 385 | new_mc = mc; |
379 | } else | 386 | mc = NULL; /* trigger new vmalloc */ |
380 | vfree(mc); | 387 | } |
381 | 388 | ||
382 | ucode_ptr += mc_size; | 389 | ucode_ptr += mc_size; |
383 | leftover -= mc_size; | 390 | leftover -= mc_size; |
384 | } | 391 | } |
385 | 392 | ||
393 | if (mc) | ||
394 | vfree(mc); | ||
395 | |||
386 | if (leftover) { | 396 | if (leftover) { |
387 | if (new_mc) | 397 | if (new_mc) |
388 | vfree(new_mc); | 398 | vfree(new_mc); |
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index e81030f71a8f..5ae5d2426edf 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c | |||
@@ -115,21 +115,6 @@ static void __init MP_bus_info(struct mpc_bus *m) | |||
115 | printk(KERN_WARNING "Unknown bustype %s - ignoring\n", str); | 115 | printk(KERN_WARNING "Unknown bustype %s - ignoring\n", str); |
116 | } | 116 | } |
117 | 117 | ||
118 | static int bad_ioapic(unsigned long address) | ||
119 | { | ||
120 | if (nr_ioapics >= MAX_IO_APICS) { | ||
121 | printk(KERN_ERR "ERROR: Max # of I/O APICs (%d) exceeded " | ||
122 | "(found %d)\n", MAX_IO_APICS, nr_ioapics); | ||
123 | panic("Recompile kernel with bigger MAX_IO_APICS!\n"); | ||
124 | } | ||
125 | if (!address) { | ||
126 | printk(KERN_ERR "WARNING: Bogus (zero) I/O APIC address" | ||
127 | " found in table, skipping!\n"); | ||
128 | return 1; | ||
129 | } | ||
130 | return 0; | ||
131 | } | ||
132 | |||
133 | static void __init MP_ioapic_info(struct mpc_ioapic *m) | 118 | static void __init MP_ioapic_info(struct mpc_ioapic *m) |
134 | { | 119 | { |
135 | if (!(m->flags & MPC_APIC_USABLE)) | 120 | if (!(m->flags & MPC_APIC_USABLE)) |
@@ -138,15 +123,7 @@ static void __init MP_ioapic_info(struct mpc_ioapic *m) | |||
138 | printk(KERN_INFO "I/O APIC #%d Version %d at 0x%X.\n", | 123 | printk(KERN_INFO "I/O APIC #%d Version %d at 0x%X.\n", |
139 | m->apicid, m->apicver, m->apicaddr); | 124 | m->apicid, m->apicver, m->apicaddr); |
140 | 125 | ||
141 | if (bad_ioapic(m->apicaddr)) | 126 | mp_register_ioapic(m->apicid, m->apicaddr, gsi_end + 1); |
142 | return; | ||
143 | |||
144 | mp_ioapics[nr_ioapics].apicaddr = m->apicaddr; | ||
145 | mp_ioapics[nr_ioapics].apicid = m->apicid; | ||
146 | mp_ioapics[nr_ioapics].type = m->type; | ||
147 | mp_ioapics[nr_ioapics].apicver = m->apicver; | ||
148 | mp_ioapics[nr_ioapics].flags = m->flags; | ||
149 | nr_ioapics++; | ||
150 | } | 127 | } |
151 | 128 | ||
152 | static void print_MP_intsrc_info(struct mpc_intsrc *m) | 129 | static void print_MP_intsrc_info(struct mpc_intsrc *m) |
diff --git a/arch/x86/kernel/mrst.c b/arch/x86/kernel/mrst.c index 0aad8670858e..e796448f0eb5 100644 --- a/arch/x86/kernel/mrst.c +++ b/arch/x86/kernel/mrst.c | |||
@@ -237,4 +237,9 @@ void __init x86_mrst_early_setup(void) | |||
237 | x86_init.pci.fixup_irqs = x86_init_noop; | 237 | x86_init.pci.fixup_irqs = x86_init_noop; |
238 | 238 | ||
239 | legacy_pic = &null_legacy_pic; | 239 | legacy_pic = &null_legacy_pic; |
240 | |||
241 | /* Avoid searching for BIOS MP tables */ | ||
242 | x86_init.mpparse.find_smp_config = x86_init_noop; | ||
243 | x86_init.mpparse.get_smp_config = x86_init_uint_noop; | ||
244 | |||
240 | } | 245 | } |
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 28ad9f4d8b94..e7e35219b32f 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c | |||
@@ -20,7 +20,6 @@ | |||
20 | #include <asm/idle.h> | 20 | #include <asm/idle.h> |
21 | #include <asm/uaccess.h> | 21 | #include <asm/uaccess.h> |
22 | #include <asm/i387.h> | 22 | #include <asm/i387.h> |
23 | #include <asm/ds.h> | ||
24 | #include <asm/debugreg.h> | 23 | #include <asm/debugreg.h> |
25 | 24 | ||
26 | unsigned long idle_halt; | 25 | unsigned long idle_halt; |
@@ -32,26 +31,22 @@ struct kmem_cache *task_xstate_cachep; | |||
32 | 31 | ||
33 | int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) | 32 | int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) |
34 | { | 33 | { |
34 | int ret; | ||
35 | |||
35 | *dst = *src; | 36 | *dst = *src; |
36 | if (src->thread.xstate) { | 37 | if (fpu_allocated(&src->thread.fpu)) { |
37 | dst->thread.xstate = kmem_cache_alloc(task_xstate_cachep, | 38 | memset(&dst->thread.fpu, 0, sizeof(dst->thread.fpu)); |
38 | GFP_KERNEL); | 39 | ret = fpu_alloc(&dst->thread.fpu); |
39 | if (!dst->thread.xstate) | 40 | if (ret) |
40 | return -ENOMEM; | 41 | return ret; |
41 | WARN_ON((unsigned long)dst->thread.xstate & 15); | 42 | fpu_copy(&dst->thread.fpu, &src->thread.fpu); |
42 | memcpy(dst->thread.xstate, src->thread.xstate, xstate_size); | ||
43 | } | 43 | } |
44 | return 0; | 44 | return 0; |
45 | } | 45 | } |
46 | 46 | ||
47 | void free_thread_xstate(struct task_struct *tsk) | 47 | void free_thread_xstate(struct task_struct *tsk) |
48 | { | 48 | { |
49 | if (tsk->thread.xstate) { | 49 | fpu_free(&tsk->thread.fpu); |
50 | kmem_cache_free(task_xstate_cachep, tsk->thread.xstate); | ||
51 | tsk->thread.xstate = NULL; | ||
52 | } | ||
53 | |||
54 | WARN(tsk->thread.ds_ctx, "leaking DS context\n"); | ||
55 | } | 50 | } |
56 | 51 | ||
57 | void free_thread_info(struct thread_info *ti) | 52 | void free_thread_info(struct thread_info *ti) |
@@ -198,11 +193,16 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, | |||
198 | prev = &prev_p->thread; | 193 | prev = &prev_p->thread; |
199 | next = &next_p->thread; | 194 | next = &next_p->thread; |
200 | 195 | ||
201 | if (test_tsk_thread_flag(next_p, TIF_DS_AREA_MSR) || | 196 | if (test_tsk_thread_flag(prev_p, TIF_BLOCKSTEP) ^ |
202 | test_tsk_thread_flag(prev_p, TIF_DS_AREA_MSR)) | 197 | test_tsk_thread_flag(next_p, TIF_BLOCKSTEP)) { |
203 | ds_switch_to(prev_p, next_p); | 198 | unsigned long debugctl = get_debugctlmsr(); |
204 | else if (next->debugctlmsr != prev->debugctlmsr) | 199 | |
205 | update_debugctlmsr(next->debugctlmsr); | 200 | debugctl &= ~DEBUGCTLMSR_BTF; |
201 | if (test_tsk_thread_flag(next_p, TIF_BLOCKSTEP)) | ||
202 | debugctl |= DEBUGCTLMSR_BTF; | ||
203 | |||
204 | update_debugctlmsr(debugctl); | ||
205 | } | ||
206 | 206 | ||
207 | if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^ | 207 | if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^ |
208 | test_tsk_thread_flag(next_p, TIF_NOTSC)) { | 208 | test_tsk_thread_flag(next_p, TIF_NOTSC)) { |
@@ -546,11 +546,13 @@ static int __cpuinit check_c1e_idle(const struct cpuinfo_x86 *c) | |||
546 | * check OSVW bit for CPUs that are not affected | 546 | * check OSVW bit for CPUs that are not affected |
547 | * by erratum #400 | 547 | * by erratum #400 |
548 | */ | 548 | */ |
549 | rdmsrl(MSR_AMD64_OSVW_ID_LENGTH, val); | 549 | if (cpu_has(c, X86_FEATURE_OSVW)) { |
550 | if (val >= 2) { | 550 | rdmsrl(MSR_AMD64_OSVW_ID_LENGTH, val); |
551 | rdmsrl(MSR_AMD64_OSVW_STATUS, val); | 551 | if (val >= 2) { |
552 | if (!(val & BIT(1))) | 552 | rdmsrl(MSR_AMD64_OSVW_STATUS, val); |
553 | goto no_c1e_idle; | 553 | if (!(val & BIT(1))) |
554 | goto no_c1e_idle; | ||
555 | } | ||
554 | } | 556 | } |
555 | return 1; | 557 | return 1; |
556 | } | 558 | } |
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index f6c62667e30c..8d128783af47 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c | |||
@@ -55,7 +55,6 @@ | |||
55 | #include <asm/cpu.h> | 55 | #include <asm/cpu.h> |
56 | #include <asm/idle.h> | 56 | #include <asm/idle.h> |
57 | #include <asm/syscalls.h> | 57 | #include <asm/syscalls.h> |
58 | #include <asm/ds.h> | ||
59 | #include <asm/debugreg.h> | 58 | #include <asm/debugreg.h> |
60 | 59 | ||
61 | asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); | 60 | asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); |
@@ -238,13 +237,6 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, | |||
238 | kfree(p->thread.io_bitmap_ptr); | 237 | kfree(p->thread.io_bitmap_ptr); |
239 | p->thread.io_bitmap_max = 0; | 238 | p->thread.io_bitmap_max = 0; |
240 | } | 239 | } |
241 | |||
242 | clear_tsk_thread_flag(p, TIF_DS_AREA_MSR); | ||
243 | p->thread.ds_ctx = NULL; | ||
244 | |||
245 | clear_tsk_thread_flag(p, TIF_DEBUGCTLMSR); | ||
246 | p->thread.debugctlmsr = 0; | ||
247 | |||
248 | return err; | 240 | return err; |
249 | } | 241 | } |
250 | 242 | ||
@@ -317,7 +309,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
317 | 309 | ||
318 | /* we're going to use this soon, after a few expensive things */ | 310 | /* we're going to use this soon, after a few expensive things */ |
319 | if (preload_fpu) | 311 | if (preload_fpu) |
320 | prefetch(next->xstate); | 312 | prefetch(next->fpu.state); |
321 | 313 | ||
322 | /* | 314 | /* |
323 | * Reload esp0. | 315 | * Reload esp0. |
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 17cb3295cbf7..3c2422a99f1f 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c | |||
@@ -49,7 +49,6 @@ | |||
49 | #include <asm/ia32.h> | 49 | #include <asm/ia32.h> |
50 | #include <asm/idle.h> | 50 | #include <asm/idle.h> |
51 | #include <asm/syscalls.h> | 51 | #include <asm/syscalls.h> |
52 | #include <asm/ds.h> | ||
53 | #include <asm/debugreg.h> | 52 | #include <asm/debugreg.h> |
54 | 53 | ||
55 | asmlinkage extern void ret_from_fork(void); | 54 | asmlinkage extern void ret_from_fork(void); |
@@ -313,13 +312,6 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, | |||
313 | if (err) | 312 | if (err) |
314 | goto out; | 313 | goto out; |
315 | } | 314 | } |
316 | |||
317 | clear_tsk_thread_flag(p, TIF_DS_AREA_MSR); | ||
318 | p->thread.ds_ctx = NULL; | ||
319 | |||
320 | clear_tsk_thread_flag(p, TIF_DEBUGCTLMSR); | ||
321 | p->thread.debugctlmsr = 0; | ||
322 | |||
323 | err = 0; | 315 | err = 0; |
324 | out: | 316 | out: |
325 | if (err && p->thread.io_bitmap_ptr) { | 317 | if (err && p->thread.io_bitmap_ptr) { |
@@ -396,7 +388,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
396 | 388 | ||
397 | /* we're going to use this soon, after a few expensive things */ | 389 | /* we're going to use this soon, after a few expensive things */ |
398 | if (preload_fpu) | 390 | if (preload_fpu) |
399 | prefetch(next->xstate); | 391 | prefetch(next->fpu.state); |
400 | 392 | ||
401 | /* | 393 | /* |
402 | * Reload esp0, LDT and the page table pointer: | 394 | * Reload esp0, LDT and the page table pointer: |
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 2e9b55027b7e..70c4872cd8aa 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c | |||
@@ -2,9 +2,6 @@ | |||
2 | /* | 2 | /* |
3 | * Pentium III FXSR, SSE support | 3 | * Pentium III FXSR, SSE support |
4 | * Gareth Hughes <gareth@valinux.com>, May 2000 | 4 | * Gareth Hughes <gareth@valinux.com>, May 2000 |
5 | * | ||
6 | * BTS tracing | ||
7 | * Markus Metzger <markus.t.metzger@intel.com>, Dec 2007 | ||
8 | */ | 5 | */ |
9 | 6 | ||
10 | #include <linux/kernel.h> | 7 | #include <linux/kernel.h> |
@@ -22,7 +19,6 @@ | |||
22 | #include <linux/audit.h> | 19 | #include <linux/audit.h> |
23 | #include <linux/seccomp.h> | 20 | #include <linux/seccomp.h> |
24 | #include <linux/signal.h> | 21 | #include <linux/signal.h> |
25 | #include <linux/workqueue.h> | ||
26 | #include <linux/perf_event.h> | 22 | #include <linux/perf_event.h> |
27 | #include <linux/hw_breakpoint.h> | 23 | #include <linux/hw_breakpoint.h> |
28 | 24 | ||
@@ -36,7 +32,6 @@ | |||
36 | #include <asm/desc.h> | 32 | #include <asm/desc.h> |
37 | #include <asm/prctl.h> | 33 | #include <asm/prctl.h> |
38 | #include <asm/proto.h> | 34 | #include <asm/proto.h> |
39 | #include <asm/ds.h> | ||
40 | #include <asm/hw_breakpoint.h> | 35 | #include <asm/hw_breakpoint.h> |
41 | 36 | ||
42 | #include "tls.h" | 37 | #include "tls.h" |
@@ -693,7 +688,7 @@ static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr, | |||
693 | struct perf_event_attr attr; | 688 | struct perf_event_attr attr; |
694 | 689 | ||
695 | if (!t->ptrace_bps[nr]) { | 690 | if (!t->ptrace_bps[nr]) { |
696 | hw_breakpoint_init(&attr); | 691 | ptrace_breakpoint_init(&attr); |
697 | /* | 692 | /* |
698 | * Put stub len and type to register (reserve) an inactive but | 693 | * Put stub len and type to register (reserve) an inactive but |
699 | * correct bp | 694 | * correct bp |
@@ -789,342 +784,6 @@ static int ioperm_get(struct task_struct *target, | |||
789 | 0, IO_BITMAP_BYTES); | 784 | 0, IO_BITMAP_BYTES); |
790 | } | 785 | } |
791 | 786 | ||
792 | #ifdef CONFIG_X86_PTRACE_BTS | ||
793 | /* | ||
794 | * A branch trace store context. | ||
795 | * | ||
796 | * Contexts may only be installed by ptrace_bts_config() and only for | ||
797 | * ptraced tasks. | ||
798 | * | ||
799 | * Contexts are destroyed when the tracee is detached from the tracer. | ||
800 | * The actual destruction work requires interrupts enabled, so the | ||
801 | * work is deferred and will be scheduled during __ptrace_unlink(). | ||
802 | * | ||
803 | * Contexts hold an additional task_struct reference on the traced | ||
804 | * task, as well as a reference on the tracer's mm. | ||
805 | * | ||
806 | * Ptrace already holds a task_struct for the duration of ptrace operations, | ||
807 | * but since destruction is deferred, it may be executed after both | ||
808 | * tracer and tracee exited. | ||
809 | */ | ||
810 | struct bts_context { | ||
811 | /* The branch trace handle. */ | ||
812 | struct bts_tracer *tracer; | ||
813 | |||
814 | /* The buffer used to store the branch trace and its size. */ | ||
815 | void *buffer; | ||
816 | unsigned int size; | ||
817 | |||
818 | /* The mm that paid for the above buffer. */ | ||
819 | struct mm_struct *mm; | ||
820 | |||
821 | /* The task this context belongs to. */ | ||
822 | struct task_struct *task; | ||
823 | |||
824 | /* The signal to send on a bts buffer overflow. */ | ||
825 | unsigned int bts_ovfl_signal; | ||
826 | |||
827 | /* The work struct to destroy a context. */ | ||
828 | struct work_struct work; | ||
829 | }; | ||
830 | |||
831 | static int alloc_bts_buffer(struct bts_context *context, unsigned int size) | ||
832 | { | ||
833 | void *buffer = NULL; | ||
834 | int err = -ENOMEM; | ||
835 | |||
836 | err = account_locked_memory(current->mm, current->signal->rlim, size); | ||
837 | if (err < 0) | ||
838 | return err; | ||
839 | |||
840 | buffer = kzalloc(size, GFP_KERNEL); | ||
841 | if (!buffer) | ||
842 | goto out_refund; | ||
843 | |||
844 | context->buffer = buffer; | ||
845 | context->size = size; | ||
846 | context->mm = get_task_mm(current); | ||
847 | |||
848 | return 0; | ||
849 | |||
850 | out_refund: | ||
851 | refund_locked_memory(current->mm, size); | ||
852 | return err; | ||
853 | } | ||
854 | |||
855 | static inline void free_bts_buffer(struct bts_context *context) | ||
856 | { | ||
857 | if (!context->buffer) | ||
858 | return; | ||
859 | |||
860 | kfree(context->buffer); | ||
861 | context->buffer = NULL; | ||
862 | |||
863 | refund_locked_memory(context->mm, context->size); | ||
864 | context->size = 0; | ||
865 | |||
866 | mmput(context->mm); | ||
867 | context->mm = NULL; | ||
868 | } | ||
869 | |||
870 | static void free_bts_context_work(struct work_struct *w) | ||
871 | { | ||
872 | struct bts_context *context; | ||
873 | |||
874 | context = container_of(w, struct bts_context, work); | ||
875 | |||
876 | ds_release_bts(context->tracer); | ||
877 | put_task_struct(context->task); | ||
878 | free_bts_buffer(context); | ||
879 | kfree(context); | ||
880 | } | ||
881 | |||
882 | static inline void free_bts_context(struct bts_context *context) | ||
883 | { | ||
884 | INIT_WORK(&context->work, free_bts_context_work); | ||
885 | schedule_work(&context->work); | ||
886 | } | ||
887 | |||
888 | static inline struct bts_context *alloc_bts_context(struct task_struct *task) | ||
889 | { | ||
890 | struct bts_context *context = kzalloc(sizeof(*context), GFP_KERNEL); | ||
891 | if (context) { | ||
892 | context->task = task; | ||
893 | task->bts = context; | ||
894 | |||
895 | get_task_struct(task); | ||
896 | } | ||
897 | |||
898 | return context; | ||
899 | } | ||
900 | |||
901 | static int ptrace_bts_read_record(struct task_struct *child, size_t index, | ||
902 | struct bts_struct __user *out) | ||
903 | { | ||
904 | struct bts_context *context; | ||
905 | const struct bts_trace *trace; | ||
906 | struct bts_struct bts; | ||
907 | const unsigned char *at; | ||
908 | int error; | ||
909 | |||
910 | context = child->bts; | ||
911 | if (!context) | ||
912 | return -ESRCH; | ||
913 | |||
914 | trace = ds_read_bts(context->tracer); | ||
915 | if (!trace) | ||
916 | return -ESRCH; | ||
917 | |||
918 | at = trace->ds.top - ((index + 1) * trace->ds.size); | ||
919 | if ((void *)at < trace->ds.begin) | ||
920 | at += (trace->ds.n * trace->ds.size); | ||
921 | |||
922 | if (!trace->read) | ||
923 | return -EOPNOTSUPP; | ||
924 | |||
925 | error = trace->read(context->tracer, at, &bts); | ||
926 | if (error < 0) | ||
927 | return error; | ||
928 | |||
929 | if (copy_to_user(out, &bts, sizeof(bts))) | ||
930 | return -EFAULT; | ||
931 | |||
932 | return sizeof(bts); | ||
933 | } | ||
934 | |||
935 | static int ptrace_bts_drain(struct task_struct *child, | ||
936 | long size, | ||
937 | struct bts_struct __user *out) | ||
938 | { | ||
939 | struct bts_context *context; | ||
940 | const struct bts_trace *trace; | ||
941 | const unsigned char *at; | ||
942 | int error, drained = 0; | ||
943 | |||
944 | context = child->bts; | ||
945 | if (!context) | ||
946 | return -ESRCH; | ||
947 | |||
948 | trace = ds_read_bts(context->tracer); | ||
949 | if (!trace) | ||
950 | return -ESRCH; | ||
951 | |||
952 | if (!trace->read) | ||
953 | return -EOPNOTSUPP; | ||
954 | |||
955 | if (size < (trace->ds.top - trace->ds.begin)) | ||
956 | return -EIO; | ||
957 | |||
958 | for (at = trace->ds.begin; (void *)at < trace->ds.top; | ||
959 | out++, drained++, at += trace->ds.size) { | ||
960 | struct bts_struct bts; | ||
961 | |||
962 | error = trace->read(context->tracer, at, &bts); | ||
963 | if (error < 0) | ||
964 | return error; | ||
965 | |||
966 | if (copy_to_user(out, &bts, sizeof(bts))) | ||
967 | return -EFAULT; | ||
968 | } | ||
969 | |||
970 | memset(trace->ds.begin, 0, trace->ds.n * trace->ds.size); | ||
971 | |||
972 | error = ds_reset_bts(context->tracer); | ||
973 | if (error < 0) | ||
974 | return error; | ||
975 | |||
976 | return drained; | ||
977 | } | ||
978 | |||
979 | static int ptrace_bts_config(struct task_struct *child, | ||
980 | long cfg_size, | ||
981 | const struct ptrace_bts_config __user *ucfg) | ||
982 | { | ||
983 | struct bts_context *context; | ||
984 | struct ptrace_bts_config cfg; | ||
985 | unsigned int flags = 0; | ||
986 | |||
987 | if (cfg_size < sizeof(cfg)) | ||
988 | return -EIO; | ||
989 | |||
990 | if (copy_from_user(&cfg, ucfg, sizeof(cfg))) | ||
991 | return -EFAULT; | ||
992 | |||
993 | context = child->bts; | ||
994 | if (!context) | ||
995 | context = alloc_bts_context(child); | ||
996 | if (!context) | ||
997 | return -ENOMEM; | ||
998 | |||
999 | if (cfg.flags & PTRACE_BTS_O_SIGNAL) { | ||
1000 | if (!cfg.signal) | ||
1001 | return -EINVAL; | ||
1002 | |||
1003 | return -EOPNOTSUPP; | ||
1004 | context->bts_ovfl_signal = cfg.signal; | ||
1005 | } | ||
1006 | |||
1007 | ds_release_bts(context->tracer); | ||
1008 | context->tracer = NULL; | ||
1009 | |||
1010 | if ((cfg.flags & PTRACE_BTS_O_ALLOC) && (cfg.size != context->size)) { | ||
1011 | int err; | ||
1012 | |||
1013 | free_bts_buffer(context); | ||
1014 | if (!cfg.size) | ||
1015 | return 0; | ||
1016 | |||
1017 | err = alloc_bts_buffer(context, cfg.size); | ||
1018 | if (err < 0) | ||
1019 | return err; | ||
1020 | } | ||
1021 | |||
1022 | if (cfg.flags & PTRACE_BTS_O_TRACE) | ||
1023 | flags |= BTS_USER; | ||
1024 | |||
1025 | if (cfg.flags & PTRACE_BTS_O_SCHED) | ||
1026 | flags |= BTS_TIMESTAMPS; | ||
1027 | |||
1028 | context->tracer = | ||
1029 | ds_request_bts_task(child, context->buffer, context->size, | ||
1030 | NULL, (size_t)-1, flags); | ||
1031 | if (unlikely(IS_ERR(context->tracer))) { | ||
1032 | int error = PTR_ERR(context->tracer); | ||
1033 | |||
1034 | free_bts_buffer(context); | ||
1035 | context->tracer = NULL; | ||
1036 | return error; | ||
1037 | } | ||
1038 | |||
1039 | return sizeof(cfg); | ||
1040 | } | ||
1041 | |||
1042 | static int ptrace_bts_status(struct task_struct *child, | ||
1043 | long cfg_size, | ||
1044 | struct ptrace_bts_config __user *ucfg) | ||
1045 | { | ||
1046 | struct bts_context *context; | ||
1047 | const struct bts_trace *trace; | ||
1048 | struct ptrace_bts_config cfg; | ||
1049 | |||
1050 | context = child->bts; | ||
1051 | if (!context) | ||
1052 | return -ESRCH; | ||
1053 | |||
1054 | if (cfg_size < sizeof(cfg)) | ||
1055 | return -EIO; | ||
1056 | |||
1057 | trace = ds_read_bts(context->tracer); | ||
1058 | if (!trace) | ||
1059 | return -ESRCH; | ||
1060 | |||
1061 | memset(&cfg, 0, sizeof(cfg)); | ||
1062 | cfg.size = trace->ds.end - trace->ds.begin; | ||
1063 | cfg.signal = context->bts_ovfl_signal; | ||
1064 | cfg.bts_size = sizeof(struct bts_struct); | ||
1065 | |||
1066 | if (cfg.signal) | ||
1067 | cfg.flags |= PTRACE_BTS_O_SIGNAL; | ||
1068 | |||
1069 | if (trace->ds.flags & BTS_USER) | ||
1070 | cfg.flags |= PTRACE_BTS_O_TRACE; | ||
1071 | |||
1072 | if (trace->ds.flags & BTS_TIMESTAMPS) | ||
1073 | cfg.flags |= PTRACE_BTS_O_SCHED; | ||
1074 | |||
1075 | if (copy_to_user(ucfg, &cfg, sizeof(cfg))) | ||
1076 | return -EFAULT; | ||
1077 | |||
1078 | return sizeof(cfg); | ||
1079 | } | ||
1080 | |||
1081 | static int ptrace_bts_clear(struct task_struct *child) | ||
1082 | { | ||
1083 | struct bts_context *context; | ||
1084 | const struct bts_trace *trace; | ||
1085 | |||
1086 | context = child->bts; | ||
1087 | if (!context) | ||
1088 | return -ESRCH; | ||
1089 | |||
1090 | trace = ds_read_bts(context->tracer); | ||
1091 | if (!trace) | ||
1092 | return -ESRCH; | ||
1093 | |||
1094 | memset(trace->ds.begin, 0, trace->ds.n * trace->ds.size); | ||
1095 | |||
1096 | return ds_reset_bts(context->tracer); | ||
1097 | } | ||
1098 | |||
1099 | static int ptrace_bts_size(struct task_struct *child) | ||
1100 | { | ||
1101 | struct bts_context *context; | ||
1102 | const struct bts_trace *trace; | ||
1103 | |||
1104 | context = child->bts; | ||
1105 | if (!context) | ||
1106 | return -ESRCH; | ||
1107 | |||
1108 | trace = ds_read_bts(context->tracer); | ||
1109 | if (!trace) | ||
1110 | return -ESRCH; | ||
1111 | |||
1112 | return (trace->ds.top - trace->ds.begin) / trace->ds.size; | ||
1113 | } | ||
1114 | |||
1115 | /* | ||
1116 | * Called from __ptrace_unlink() after the child has been moved back | ||
1117 | * to its original parent. | ||
1118 | */ | ||
1119 | void ptrace_bts_untrace(struct task_struct *child) | ||
1120 | { | ||
1121 | if (unlikely(child->bts)) { | ||
1122 | free_bts_context(child->bts); | ||
1123 | child->bts = NULL; | ||
1124 | } | ||
1125 | } | ||
1126 | #endif /* CONFIG_X86_PTRACE_BTS */ | ||
1127 | |||
1128 | /* | 787 | /* |
1129 | * Called by kernel/ptrace.c when detaching.. | 788 | * Called by kernel/ptrace.c when detaching.. |
1130 | * | 789 | * |
@@ -1252,39 +911,6 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) | |||
1252 | break; | 911 | break; |
1253 | #endif | 912 | #endif |
1254 | 913 | ||
1255 | /* | ||
1256 | * These bits need more cooking - not enabled yet: | ||
1257 | */ | ||
1258 | #ifdef CONFIG_X86_PTRACE_BTS | ||
1259 | case PTRACE_BTS_CONFIG: | ||
1260 | ret = ptrace_bts_config | ||
1261 | (child, data, (struct ptrace_bts_config __user *)addr); | ||
1262 | break; | ||
1263 | |||
1264 | case PTRACE_BTS_STATUS: | ||
1265 | ret = ptrace_bts_status | ||
1266 | (child, data, (struct ptrace_bts_config __user *)addr); | ||
1267 | break; | ||
1268 | |||
1269 | case PTRACE_BTS_SIZE: | ||
1270 | ret = ptrace_bts_size(child); | ||
1271 | break; | ||
1272 | |||
1273 | case PTRACE_BTS_GET: | ||
1274 | ret = ptrace_bts_read_record | ||
1275 | (child, data, (struct bts_struct __user *) addr); | ||
1276 | break; | ||
1277 | |||
1278 | case PTRACE_BTS_CLEAR: | ||
1279 | ret = ptrace_bts_clear(child); | ||
1280 | break; | ||
1281 | |||
1282 | case PTRACE_BTS_DRAIN: | ||
1283 | ret = ptrace_bts_drain | ||
1284 | (child, data, (struct bts_struct __user *) addr); | ||
1285 | break; | ||
1286 | #endif /* CONFIG_X86_PTRACE_BTS */ | ||
1287 | |||
1288 | default: | 914 | default: |
1289 | ret = ptrace_request(child, request, addr, data); | 915 | ret = ptrace_request(child, request, addr, data); |
1290 | break; | 916 | break; |
@@ -1544,14 +1170,6 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, | |||
1544 | 1170 | ||
1545 | case PTRACE_GET_THREAD_AREA: | 1171 | case PTRACE_GET_THREAD_AREA: |
1546 | case PTRACE_SET_THREAD_AREA: | 1172 | case PTRACE_SET_THREAD_AREA: |
1547 | #ifdef CONFIG_X86_PTRACE_BTS | ||
1548 | case PTRACE_BTS_CONFIG: | ||
1549 | case PTRACE_BTS_STATUS: | ||
1550 | case PTRACE_BTS_SIZE: | ||
1551 | case PTRACE_BTS_GET: | ||
1552 | case PTRACE_BTS_CLEAR: | ||
1553 | case PTRACE_BTS_DRAIN: | ||
1554 | #endif /* CONFIG_X86_PTRACE_BTS */ | ||
1555 | return arch_ptrace(child, request, addr, data); | 1173 | return arch_ptrace(child, request, addr, data); |
1556 | 1174 | ||
1557 | default: | 1175 | default: |
diff --git a/arch/x86/kernel/sfi.c b/arch/x86/kernel/sfi.c index 34e099382651..7ded57896c0a 100644 --- a/arch/x86/kernel/sfi.c +++ b/arch/x86/kernel/sfi.c | |||
@@ -81,7 +81,6 @@ static int __init sfi_parse_cpus(struct sfi_table_header *table) | |||
81 | #endif /* CONFIG_X86_LOCAL_APIC */ | 81 | #endif /* CONFIG_X86_LOCAL_APIC */ |
82 | 82 | ||
83 | #ifdef CONFIG_X86_IO_APIC | 83 | #ifdef CONFIG_X86_IO_APIC |
84 | static u32 gsi_base; | ||
85 | 84 | ||
86 | static int __init sfi_parse_ioapic(struct sfi_table_header *table) | 85 | static int __init sfi_parse_ioapic(struct sfi_table_header *table) |
87 | { | 86 | { |
@@ -94,8 +93,7 @@ static int __init sfi_parse_ioapic(struct sfi_table_header *table) | |||
94 | pentry = (struct sfi_apic_table_entry *)sb->pentry; | 93 | pentry = (struct sfi_apic_table_entry *)sb->pentry; |
95 | 94 | ||
96 | for (i = 0; i < num; i++) { | 95 | for (i = 0; i < num; i++) { |
97 | mp_register_ioapic(i, pentry->phys_addr, gsi_base); | 96 | mp_register_ioapic(i, pentry->phys_addr, gsi_end + 1); |
98 | gsi_base += io_apic_get_redir_entries(i); | ||
99 | pentry++; | 97 | pentry++; |
100 | } | 98 | } |
101 | 99 | ||
diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c index 3149032ff107..58de45ee08b6 100644 --- a/arch/x86/kernel/step.c +++ b/arch/x86/kernel/step.c | |||
@@ -158,22 +158,6 @@ static int enable_single_step(struct task_struct *child) | |||
158 | } | 158 | } |
159 | 159 | ||
160 | /* | 160 | /* |
161 | * Install this value in MSR_IA32_DEBUGCTLMSR whenever child is running. | ||
162 | */ | ||
163 | static void write_debugctlmsr(struct task_struct *child, unsigned long val) | ||
164 | { | ||
165 | if (child->thread.debugctlmsr == val) | ||
166 | return; | ||
167 | |||
168 | child->thread.debugctlmsr = val; | ||
169 | |||
170 | if (child != current) | ||
171 | return; | ||
172 | |||
173 | update_debugctlmsr(val); | ||
174 | } | ||
175 | |||
176 | /* | ||
177 | * Enable single or block step. | 161 | * Enable single or block step. |
178 | */ | 162 | */ |
179 | static void enable_step(struct task_struct *child, bool block) | 163 | static void enable_step(struct task_struct *child, bool block) |
@@ -186,15 +170,17 @@ static void enable_step(struct task_struct *child, bool block) | |||
186 | * that uses user-mode single stepping itself. | 170 | * that uses user-mode single stepping itself. |
187 | */ | 171 | */ |
188 | if (enable_single_step(child) && block) { | 172 | if (enable_single_step(child) && block) { |
189 | set_tsk_thread_flag(child, TIF_DEBUGCTLMSR); | 173 | unsigned long debugctl = get_debugctlmsr(); |
190 | write_debugctlmsr(child, | 174 | |
191 | child->thread.debugctlmsr | DEBUGCTLMSR_BTF); | 175 | debugctl |= DEBUGCTLMSR_BTF; |
192 | } else { | 176 | update_debugctlmsr(debugctl); |
193 | write_debugctlmsr(child, | 177 | set_tsk_thread_flag(child, TIF_BLOCKSTEP); |
194 | child->thread.debugctlmsr & ~DEBUGCTLMSR_BTF); | 178 | } else if (test_tsk_thread_flag(child, TIF_BLOCKSTEP)) { |
195 | 179 | unsigned long debugctl = get_debugctlmsr(); | |
196 | if (!child->thread.debugctlmsr) | 180 | |
197 | clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR); | 181 | debugctl &= ~DEBUGCTLMSR_BTF; |
182 | update_debugctlmsr(debugctl); | ||
183 | clear_tsk_thread_flag(child, TIF_BLOCKSTEP); | ||
198 | } | 184 | } |
199 | } | 185 | } |
200 | 186 | ||
@@ -213,11 +199,13 @@ void user_disable_single_step(struct task_struct *child) | |||
213 | /* | 199 | /* |
214 | * Make sure block stepping (BTF) is disabled. | 200 | * Make sure block stepping (BTF) is disabled. |
215 | */ | 201 | */ |
216 | write_debugctlmsr(child, | 202 | if (test_tsk_thread_flag(child, TIF_BLOCKSTEP)) { |
217 | child->thread.debugctlmsr & ~DEBUGCTLMSR_BTF); | 203 | unsigned long debugctl = get_debugctlmsr(); |
218 | 204 | ||
219 | if (!child->thread.debugctlmsr) | 205 | debugctl &= ~DEBUGCTLMSR_BTF; |
220 | clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR); | 206 | update_debugctlmsr(debugctl); |
207 | clear_tsk_thread_flag(child, TIF_BLOCKSTEP); | ||
208 | } | ||
221 | 209 | ||
222 | /* Always clear TIF_SINGLESTEP... */ | 210 | /* Always clear TIF_SINGLESTEP... */ |
223 | clear_tsk_thread_flag(child, TIF_SINGLESTEP); | 211 | clear_tsk_thread_flag(child, TIF_SINGLESTEP); |
diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c index 86c9f91b48ae..cc2c60474fd0 100644 --- a/arch/x86/kernel/tboot.c +++ b/arch/x86/kernel/tboot.c | |||
@@ -175,6 +175,9 @@ static void add_mac_region(phys_addr_t start, unsigned long size) | |||
175 | struct tboot_mac_region *mr; | 175 | struct tboot_mac_region *mr; |
176 | phys_addr_t end = start + size; | 176 | phys_addr_t end = start + size; |
177 | 177 | ||
178 | if (tboot->num_mac_regions >= MAX_TB_MAC_REGIONS) | ||
179 | panic("tboot: Too many MAC regions\n"); | ||
180 | |||
178 | if (start && size) { | 181 | if (start && size) { |
179 | mr = &tboot->mac_regions[tboot->num_mac_regions++]; | 182 | mr = &tboot->mac_regions[tboot->num_mac_regions++]; |
180 | mr->start = round_down(start, PAGE_SIZE); | 183 | mr->start = round_down(start, PAGE_SIZE); |
@@ -184,18 +187,17 @@ static void add_mac_region(phys_addr_t start, unsigned long size) | |||
184 | 187 | ||
185 | static int tboot_setup_sleep(void) | 188 | static int tboot_setup_sleep(void) |
186 | { | 189 | { |
190 | int i; | ||
191 | |||
187 | tboot->num_mac_regions = 0; | 192 | tboot->num_mac_regions = 0; |
188 | 193 | ||
189 | /* S3 resume code */ | 194 | for (i = 0; i < e820.nr_map; i++) { |
190 | add_mac_region(acpi_wakeup_address, WAKEUP_SIZE); | 195 | if ((e820.map[i].type != E820_RAM) |
196 | && (e820.map[i].type != E820_RESERVED_KERN)) | ||
197 | continue; | ||
191 | 198 | ||
192 | #ifdef CONFIG_X86_TRAMPOLINE | 199 | add_mac_region(e820.map[i].addr, e820.map[i].size); |
193 | /* AP trampoline code */ | 200 | } |
194 | add_mac_region(virt_to_phys(trampoline_base), TRAMPOLINE_SIZE); | ||
195 | #endif | ||
196 | |||
197 | /* kernel code + data + bss */ | ||
198 | add_mac_region(virt_to_phys(_text), _end - _text); | ||
199 | 201 | ||
200 | tboot->acpi_sinfo.kernel_s3_resume_vector = acpi_wakeup_address; | 202 | tboot->acpi_sinfo.kernel_s3_resume_vector = acpi_wakeup_address; |
201 | 203 | ||
diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c index 17b03dd3a6b5..7fea555929e2 100644 --- a/arch/x86/kernel/tlb_uv.c +++ b/arch/x86/kernel/tlb_uv.c | |||
@@ -1,7 +1,7 @@ | |||
1 | /* | 1 | /* |
2 | * SGI UltraViolet TLB flush routines. | 2 | * SGI UltraViolet TLB flush routines. |
3 | * | 3 | * |
4 | * (c) 2008 Cliff Wickman <cpw@sgi.com>, SGI. | 4 | * (c) 2008-2010 Cliff Wickman <cpw@sgi.com>, SGI. |
5 | * | 5 | * |
6 | * This code is released under the GNU General Public License version 2 or | 6 | * This code is released under the GNU General Public License version 2 or |
7 | * later. | 7 | * later. |
@@ -20,42 +20,67 @@ | |||
20 | #include <asm/idle.h> | 20 | #include <asm/idle.h> |
21 | #include <asm/tsc.h> | 21 | #include <asm/tsc.h> |
22 | #include <asm/irq_vectors.h> | 22 | #include <asm/irq_vectors.h> |
23 | #include <asm/timer.h> | ||
23 | 24 | ||
24 | static struct bau_control **uv_bau_table_bases __read_mostly; | 25 | struct msg_desc { |
25 | static int uv_bau_retry_limit __read_mostly; | 26 | struct bau_payload_queue_entry *msg; |
27 | int msg_slot; | ||
28 | int sw_ack_slot; | ||
29 | struct bau_payload_queue_entry *va_queue_first; | ||
30 | struct bau_payload_queue_entry *va_queue_last; | ||
31 | }; | ||
26 | 32 | ||
27 | /* base pnode in this partition */ | 33 | #define UV_INTD_SOFT_ACK_TIMEOUT_PERIOD 0x000000000bUL |
28 | static int uv_partition_base_pnode __read_mostly; | 34 | |
35 | static int uv_bau_max_concurrent __read_mostly; | ||
36 | |||
37 | static int nobau; | ||
38 | static int __init setup_nobau(char *arg) | ||
39 | { | ||
40 | nobau = 1; | ||
41 | return 0; | ||
42 | } | ||
43 | early_param("nobau", setup_nobau); | ||
29 | 44 | ||
30 | static unsigned long uv_mmask __read_mostly; | 45 | /* base pnode in this partition */ |
46 | static int uv_partition_base_pnode __read_mostly; | ||
47 | /* position of pnode (which is nasid>>1): */ | ||
48 | static int uv_nshift __read_mostly; | ||
49 | static unsigned long uv_mmask __read_mostly; | ||
31 | 50 | ||
32 | static DEFINE_PER_CPU(struct ptc_stats, ptcstats); | 51 | static DEFINE_PER_CPU(struct ptc_stats, ptcstats); |
33 | static DEFINE_PER_CPU(struct bau_control, bau_control); | 52 | static DEFINE_PER_CPU(struct bau_control, bau_control); |
53 | static DEFINE_PER_CPU(cpumask_var_t, uv_flush_tlb_mask); | ||
54 | |||
55 | struct reset_args { | ||
56 | int sender; | ||
57 | }; | ||
34 | 58 | ||
35 | /* | 59 | /* |
36 | * Determine the first node on a blade. | 60 | * Determine the first node on a uvhub. 'Nodes' are used for kernel |
61 | * memory allocation. | ||
37 | */ | 62 | */ |
38 | static int __init blade_to_first_node(int blade) | 63 | static int __init uvhub_to_first_node(int uvhub) |
39 | { | 64 | { |
40 | int node, b; | 65 | int node, b; |
41 | 66 | ||
42 | for_each_online_node(node) { | 67 | for_each_online_node(node) { |
43 | b = uv_node_to_blade_id(node); | 68 | b = uv_node_to_blade_id(node); |
44 | if (blade == b) | 69 | if (uvhub == b) |
45 | return node; | 70 | return node; |
46 | } | 71 | } |
47 | return -1; /* shouldn't happen */ | 72 | return -1; |
48 | } | 73 | } |
49 | 74 | ||
50 | /* | 75 | /* |
51 | * Determine the apicid of the first cpu on a blade. | 76 | * Determine the apicid of the first cpu on a uvhub. |
52 | */ | 77 | */ |
53 | static int __init blade_to_first_apicid(int blade) | 78 | static int __init uvhub_to_first_apicid(int uvhub) |
54 | { | 79 | { |
55 | int cpu; | 80 | int cpu; |
56 | 81 | ||
57 | for_each_present_cpu(cpu) | 82 | for_each_present_cpu(cpu) |
58 | if (blade == uv_cpu_to_blade_id(cpu)) | 83 | if (uvhub == uv_cpu_to_blade_id(cpu)) |
59 | return per_cpu(x86_cpu_to_apicid, cpu); | 84 | return per_cpu(x86_cpu_to_apicid, cpu); |
60 | return -1; | 85 | return -1; |
61 | } | 86 | } |
@@ -68,195 +93,459 @@ static int __init blade_to_first_apicid(int blade) | |||
68 | * clear of the Timeout bit (as well) will free the resource. No reply will | 93 | * clear of the Timeout bit (as well) will free the resource. No reply will |
69 | * be sent (the hardware will only do one reply per message). | 94 | * be sent (the hardware will only do one reply per message). |
70 | */ | 95 | */ |
71 | static void uv_reply_to_message(int resource, | 96 | static inline void uv_reply_to_message(struct msg_desc *mdp, |
72 | struct bau_payload_queue_entry *msg, | 97 | struct bau_control *bcp) |
73 | struct bau_msg_status *msp) | ||
74 | { | 98 | { |
75 | unsigned long dw; | 99 | unsigned long dw; |
100 | struct bau_payload_queue_entry *msg; | ||
76 | 101 | ||
77 | dw = (1 << (resource + UV_SW_ACK_NPENDING)) | (1 << resource); | 102 | msg = mdp->msg; |
103 | if (!msg->canceled) { | ||
104 | dw = (msg->sw_ack_vector << UV_SW_ACK_NPENDING) | | ||
105 | msg->sw_ack_vector; | ||
106 | uv_write_local_mmr( | ||
107 | UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS, dw); | ||
108 | } | ||
78 | msg->replied_to = 1; | 109 | msg->replied_to = 1; |
79 | msg->sw_ack_vector = 0; | 110 | msg->sw_ack_vector = 0; |
80 | if (msp) | ||
81 | msp->seen_by.bits = 0; | ||
82 | uv_write_local_mmr(UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS, dw); | ||
83 | } | 111 | } |
84 | 112 | ||
85 | /* | 113 | /* |
86 | * Do all the things a cpu should do for a TLB shootdown message. | 114 | * Process the receipt of a RETRY message |
87 | * Other cpu's may come here at the same time for this message. | ||
88 | */ | 115 | */ |
89 | static void uv_bau_process_message(struct bau_payload_queue_entry *msg, | 116 | static inline void uv_bau_process_retry_msg(struct msg_desc *mdp, |
90 | int msg_slot, int sw_ack_slot) | 117 | struct bau_control *bcp) |
91 | { | 118 | { |
92 | unsigned long this_cpu_mask; | 119 | int i; |
93 | struct bau_msg_status *msp; | 120 | int cancel_count = 0; |
94 | int cpu; | 121 | int slot2; |
122 | unsigned long msg_res; | ||
123 | unsigned long mmr = 0; | ||
124 | struct bau_payload_queue_entry *msg; | ||
125 | struct bau_payload_queue_entry *msg2; | ||
126 | struct ptc_stats *stat; | ||
95 | 127 | ||
96 | msp = __get_cpu_var(bau_control).msg_statuses + msg_slot; | 128 | msg = mdp->msg; |
97 | cpu = uv_blade_processor_id(); | 129 | stat = &per_cpu(ptcstats, bcp->cpu); |
98 | msg->number_of_cpus = | 130 | stat->d_retries++; |
99 | uv_blade_nr_online_cpus(uv_node_to_blade_id(numa_node_id())); | 131 | /* |
100 | this_cpu_mask = 1UL << cpu; | 132 | * cancel any message from msg+1 to the retry itself |
101 | if (msp->seen_by.bits & this_cpu_mask) | 133 | */ |
102 | return; | 134 | for (msg2 = msg+1, i = 0; i < DEST_Q_SIZE; msg2++, i++) { |
103 | atomic_or_long(&msp->seen_by.bits, this_cpu_mask); | 135 | if (msg2 > mdp->va_queue_last) |
136 | msg2 = mdp->va_queue_first; | ||
137 | if (msg2 == msg) | ||
138 | break; | ||
139 | |||
140 | /* same conditions for cancellation as uv_do_reset */ | ||
141 | if ((msg2->replied_to == 0) && (msg2->canceled == 0) && | ||
142 | (msg2->sw_ack_vector) && ((msg2->sw_ack_vector & | ||
143 | msg->sw_ack_vector) == 0) && | ||
144 | (msg2->sending_cpu == msg->sending_cpu) && | ||
145 | (msg2->msg_type != MSG_NOOP)) { | ||
146 | slot2 = msg2 - mdp->va_queue_first; | ||
147 | mmr = uv_read_local_mmr | ||
148 | (UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE); | ||
149 | msg_res = ((msg2->sw_ack_vector << 8) | | ||
150 | msg2->sw_ack_vector); | ||
151 | /* | ||
152 | * This is a message retry; clear the resources held | ||
153 | * by the previous message only if they timed out. | ||
154 | * If it has not timed out we have an unexpected | ||
155 | * situation to report. | ||
156 | */ | ||
157 | if (mmr & (msg_res << 8)) { | ||
158 | /* | ||
159 | * is the resource timed out? | ||
160 | * make everyone ignore the cancelled message. | ||
161 | */ | ||
162 | msg2->canceled = 1; | ||
163 | stat->d_canceled++; | ||
164 | cancel_count++; | ||
165 | uv_write_local_mmr( | ||
166 | UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS, | ||
167 | (msg_res << 8) | msg_res); | ||
168 | } else | ||
169 | printk(KERN_INFO "note bau retry: no effect\n"); | ||
170 | } | ||
171 | } | ||
172 | if (!cancel_count) | ||
173 | stat->d_nocanceled++; | ||
174 | } | ||
104 | 175 | ||
105 | if (msg->replied_to == 1) | 176 | /* |
106 | return; | 177 | * Do all the things a cpu should do for a TLB shootdown message. |
178 | * Other cpu's may come here at the same time for this message. | ||
179 | */ | ||
180 | static void uv_bau_process_message(struct msg_desc *mdp, | ||
181 | struct bau_control *bcp) | ||
182 | { | ||
183 | int msg_ack_count; | ||
184 | short socket_ack_count = 0; | ||
185 | struct ptc_stats *stat; | ||
186 | struct bau_payload_queue_entry *msg; | ||
187 | struct bau_control *smaster = bcp->socket_master; | ||
107 | 188 | ||
189 | /* | ||
190 | * This must be a normal message, or retry of a normal message | ||
191 | */ | ||
192 | msg = mdp->msg; | ||
193 | stat = &per_cpu(ptcstats, bcp->cpu); | ||
108 | if (msg->address == TLB_FLUSH_ALL) { | 194 | if (msg->address == TLB_FLUSH_ALL) { |
109 | local_flush_tlb(); | 195 | local_flush_tlb(); |
110 | __get_cpu_var(ptcstats).alltlb++; | 196 | stat->d_alltlb++; |
111 | } else { | 197 | } else { |
112 | __flush_tlb_one(msg->address); | 198 | __flush_tlb_one(msg->address); |
113 | __get_cpu_var(ptcstats).onetlb++; | 199 | stat->d_onetlb++; |
114 | } | 200 | } |
201 | stat->d_requestee++; | ||
202 | |||
203 | /* | ||
204 | * One cpu on each uvhub has the additional job on a RETRY | ||
205 | * of releasing the resource held by the message that is | ||
206 | * being retried. That message is identified by sending | ||
207 | * cpu number. | ||
208 | */ | ||
209 | if (msg->msg_type == MSG_RETRY && bcp == bcp->uvhub_master) | ||
210 | uv_bau_process_retry_msg(mdp, bcp); | ||
115 | 211 | ||
116 | __get_cpu_var(ptcstats).requestee++; | 212 | /* |
213 | * This is a sw_ack message, so we have to reply to it. | ||
214 | * Count each responding cpu on the socket. This avoids | ||
215 | * pinging the count's cache line back and forth between | ||
216 | * the sockets. | ||
217 | */ | ||
218 | socket_ack_count = atomic_add_short_return(1, (struct atomic_short *) | ||
219 | &smaster->socket_acknowledge_count[mdp->msg_slot]); | ||
220 | if (socket_ack_count == bcp->cpus_in_socket) { | ||
221 | /* | ||
222 | * Both sockets dump their completed count total into | ||
223 | * the message's count. | ||
224 | */ | ||
225 | smaster->socket_acknowledge_count[mdp->msg_slot] = 0; | ||
226 | msg_ack_count = atomic_add_short_return(socket_ack_count, | ||
227 | (struct atomic_short *)&msg->acknowledge_count); | ||
228 | |||
229 | if (msg_ack_count == bcp->cpus_in_uvhub) { | ||
230 | /* | ||
231 | * All cpus in uvhub saw it; reply | ||
232 | */ | ||
233 | uv_reply_to_message(mdp, bcp); | ||
234 | } | ||
235 | } | ||
117 | 236 | ||
118 | atomic_inc_short(&msg->acknowledge_count); | 237 | return; |
119 | if (msg->number_of_cpus == msg->acknowledge_count) | ||
120 | uv_reply_to_message(sw_ack_slot, msg, msp); | ||
121 | } | 238 | } |
122 | 239 | ||
123 | /* | 240 | /* |
124 | * Examine the payload queue on one distribution node to see | 241 | * Determine the first cpu on a uvhub. |
125 | * which messages have not been seen, and which cpu(s) have not seen them. | 242 | */ |
243 | static int uvhub_to_first_cpu(int uvhub) | ||
244 | { | ||
245 | int cpu; | ||
246 | for_each_present_cpu(cpu) | ||
247 | if (uvhub == uv_cpu_to_blade_id(cpu)) | ||
248 | return cpu; | ||
249 | return -1; | ||
250 | } | ||
251 | |||
252 | /* | ||
253 | * Last resort when we get a large number of destination timeouts is | ||
254 | * to clear resources held by a given cpu. | ||
255 | * Do this with IPI so that all messages in the BAU message queue | ||
256 | * can be identified by their nonzero sw_ack_vector field. | ||
126 | * | 257 | * |
127 | * Returns the number of cpu's that have not responded. | 258 | * This is entered for a single cpu on the uvhub. |
259 | * The sender want's this uvhub to free a specific message's | ||
260 | * sw_ack resources. | ||
128 | */ | 261 | */ |
129 | static int uv_examine_destination(struct bau_control *bau_tablesp, int sender) | 262 | static void |
263 | uv_do_reset(void *ptr) | ||
130 | { | 264 | { |
131 | struct bau_payload_queue_entry *msg; | ||
132 | struct bau_msg_status *msp; | ||
133 | int count = 0; | ||
134 | int i; | 265 | int i; |
135 | int j; | 266 | int slot; |
267 | int count = 0; | ||
268 | unsigned long mmr; | ||
269 | unsigned long msg_res; | ||
270 | struct bau_control *bcp; | ||
271 | struct reset_args *rap; | ||
272 | struct bau_payload_queue_entry *msg; | ||
273 | struct ptc_stats *stat; | ||
136 | 274 | ||
137 | for (msg = bau_tablesp->va_queue_first, i = 0; i < DEST_Q_SIZE; | 275 | bcp = &per_cpu(bau_control, smp_processor_id()); |
138 | msg++, i++) { | 276 | rap = (struct reset_args *)ptr; |
139 | if ((msg->sending_cpu == sender) && (!msg->replied_to)) { | 277 | stat = &per_cpu(ptcstats, bcp->cpu); |
140 | msp = bau_tablesp->msg_statuses + i; | 278 | stat->d_resets++; |
141 | printk(KERN_DEBUG | 279 | |
142 | "blade %d: address:%#lx %d of %d, not cpu(s): ", | 280 | /* |
143 | i, msg->address, msg->acknowledge_count, | 281 | * We're looking for the given sender, and |
144 | msg->number_of_cpus); | 282 | * will free its sw_ack resource. |
145 | for (j = 0; j < msg->number_of_cpus; j++) { | 283 | * If all cpu's finally responded after the timeout, its |
146 | if (!((1L << j) & msp->seen_by.bits)) { | 284 | * message 'replied_to' was set. |
147 | count++; | 285 | */ |
148 | printk("%d ", j); | 286 | for (msg = bcp->va_queue_first, i = 0; i < DEST_Q_SIZE; msg++, i++) { |
149 | } | 287 | /* uv_do_reset: same conditions for cancellation as |
288 | uv_bau_process_retry_msg() */ | ||
289 | if ((msg->replied_to == 0) && | ||
290 | (msg->canceled == 0) && | ||
291 | (msg->sending_cpu == rap->sender) && | ||
292 | (msg->sw_ack_vector) && | ||
293 | (msg->msg_type != MSG_NOOP)) { | ||
294 | /* | ||
295 | * make everyone else ignore this message | ||
296 | */ | ||
297 | msg->canceled = 1; | ||
298 | slot = msg - bcp->va_queue_first; | ||
299 | count++; | ||
300 | /* | ||
301 | * only reset the resource if it is still pending | ||
302 | */ | ||
303 | mmr = uv_read_local_mmr | ||
304 | (UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE); | ||
305 | msg_res = ((msg->sw_ack_vector << 8) | | ||
306 | msg->sw_ack_vector); | ||
307 | if (mmr & msg_res) { | ||
308 | stat->d_rcanceled++; | ||
309 | uv_write_local_mmr( | ||
310 | UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS, | ||
311 | msg_res); | ||
150 | } | 312 | } |
151 | printk("\n"); | ||
152 | } | 313 | } |
153 | } | 314 | } |
154 | return count; | 315 | return; |
155 | } | 316 | } |
156 | 317 | ||
157 | /* | 318 | /* |
158 | * Examine the payload queue on all the distribution nodes to see | 319 | * Use IPI to get all target uvhubs to release resources held by |
159 | * which messages have not been seen, and which cpu(s) have not seen them. | 320 | * a given sending cpu number. |
160 | * | ||
161 | * Returns the number of cpu's that have not responded. | ||
162 | */ | 321 | */ |
163 | static int uv_examine_destinations(struct bau_target_nodemask *distribution) | 322 | static void uv_reset_with_ipi(struct bau_target_uvhubmask *distribution, |
323 | int sender) | ||
164 | { | 324 | { |
165 | int sender; | 325 | int uvhub; |
166 | int i; | 326 | int cpu; |
167 | int count = 0; | 327 | cpumask_t mask; |
328 | struct reset_args reset_args; | ||
329 | |||
330 | reset_args.sender = sender; | ||
168 | 331 | ||
169 | sender = smp_processor_id(); | 332 | cpus_clear(mask); |
170 | for (i = 0; i < sizeof(struct bau_target_nodemask) * BITSPERBYTE; i++) { | 333 | /* find a single cpu for each uvhub in this distribution mask */ |
171 | if (!bau_node_isset(i, distribution)) | 334 | for (uvhub = 0; |
335 | uvhub < sizeof(struct bau_target_uvhubmask) * BITSPERBYTE; | ||
336 | uvhub++) { | ||
337 | if (!bau_uvhub_isset(uvhub, distribution)) | ||
172 | continue; | 338 | continue; |
173 | count += uv_examine_destination(uv_bau_table_bases[i], sender); | 339 | /* find a cpu for this uvhub */ |
340 | cpu = uvhub_to_first_cpu(uvhub); | ||
341 | cpu_set(cpu, mask); | ||
174 | } | 342 | } |
175 | return count; | 343 | /* IPI all cpus; Preemption is already disabled */ |
344 | smp_call_function_many(&mask, uv_do_reset, (void *)&reset_args, 1); | ||
345 | return; | ||
346 | } | ||
347 | |||
348 | static inline unsigned long | ||
349 | cycles_2_us(unsigned long long cyc) | ||
350 | { | ||
351 | unsigned long long ns; | ||
352 | unsigned long us; | ||
353 | ns = (cyc * per_cpu(cyc2ns, smp_processor_id())) | ||
354 | >> CYC2NS_SCALE_FACTOR; | ||
355 | us = ns / 1000; | ||
356 | return us; | ||
176 | } | 357 | } |
177 | 358 | ||
178 | /* | 359 | /* |
179 | * wait for completion of a broadcast message | 360 | * wait for all cpus on this hub to finish their sends and go quiet |
180 | * | 361 | * leaves uvhub_quiesce set so that no new broadcasts are started by |
181 | * return COMPLETE, RETRY or GIVEUP | 362 | * bau_flush_send_and_wait() |
363 | */ | ||
364 | static inline void | ||
365 | quiesce_local_uvhub(struct bau_control *hmaster) | ||
366 | { | ||
367 | atomic_add_short_return(1, (struct atomic_short *) | ||
368 | &hmaster->uvhub_quiesce); | ||
369 | } | ||
370 | |||
371 | /* | ||
372 | * mark this quiet-requestor as done | ||
373 | */ | ||
374 | static inline void | ||
375 | end_uvhub_quiesce(struct bau_control *hmaster) | ||
376 | { | ||
377 | atomic_add_short_return(-1, (struct atomic_short *) | ||
378 | &hmaster->uvhub_quiesce); | ||
379 | } | ||
380 | |||
381 | /* | ||
382 | * Wait for completion of a broadcast software ack message | ||
383 | * return COMPLETE, RETRY(PLUGGED or TIMEOUT) or GIVEUP | ||
182 | */ | 384 | */ |
183 | static int uv_wait_completion(struct bau_desc *bau_desc, | 385 | static int uv_wait_completion(struct bau_desc *bau_desc, |
184 | unsigned long mmr_offset, int right_shift) | 386 | unsigned long mmr_offset, int right_shift, int this_cpu, |
387 | struct bau_control *bcp, struct bau_control *smaster, long try) | ||
185 | { | 388 | { |
186 | int exams = 0; | 389 | int relaxes = 0; |
187 | long destination_timeouts = 0; | ||
188 | long source_timeouts = 0; | ||
189 | unsigned long descriptor_status; | 390 | unsigned long descriptor_status; |
391 | unsigned long mmr; | ||
392 | unsigned long mask; | ||
393 | cycles_t ttime; | ||
394 | cycles_t timeout_time; | ||
395 | struct ptc_stats *stat = &per_cpu(ptcstats, this_cpu); | ||
396 | struct bau_control *hmaster; | ||
397 | |||
398 | hmaster = bcp->uvhub_master; | ||
399 | timeout_time = get_cycles() + bcp->timeout_interval; | ||
190 | 400 | ||
401 | /* spin on the status MMR, waiting for it to go idle */ | ||
191 | while ((descriptor_status = (((unsigned long) | 402 | while ((descriptor_status = (((unsigned long) |
192 | uv_read_local_mmr(mmr_offset) >> | 403 | uv_read_local_mmr(mmr_offset) >> |
193 | right_shift) & UV_ACT_STATUS_MASK)) != | 404 | right_shift) & UV_ACT_STATUS_MASK)) != |
194 | DESC_STATUS_IDLE) { | 405 | DESC_STATUS_IDLE) { |
195 | if (descriptor_status == DESC_STATUS_SOURCE_TIMEOUT) { | ||
196 | source_timeouts++; | ||
197 | if (source_timeouts > SOURCE_TIMEOUT_LIMIT) | ||
198 | source_timeouts = 0; | ||
199 | __get_cpu_var(ptcstats).s_retry++; | ||
200 | return FLUSH_RETRY; | ||
201 | } | ||
202 | /* | 406 | /* |
203 | * spin here looking for progress at the destinations | 407 | * Our software ack messages may be blocked because there are |
408 | * no swack resources available. As long as none of them | ||
409 | * has timed out hardware will NACK our message and its | ||
410 | * state will stay IDLE. | ||
204 | */ | 411 | */ |
205 | if (descriptor_status == DESC_STATUS_DESTINATION_TIMEOUT) { | 412 | if (descriptor_status == DESC_STATUS_SOURCE_TIMEOUT) { |
206 | destination_timeouts++; | 413 | stat->s_stimeout++; |
207 | if (destination_timeouts > DESTINATION_TIMEOUT_LIMIT) { | 414 | return FLUSH_GIVEUP; |
208 | /* | 415 | } else if (descriptor_status == |
209 | * returns number of cpus not responding | 416 | DESC_STATUS_DESTINATION_TIMEOUT) { |
210 | */ | 417 | stat->s_dtimeout++; |
211 | if (uv_examine_destinations | 418 | ttime = get_cycles(); |
212 | (&bau_desc->distribution) == 0) { | 419 | |
213 | __get_cpu_var(ptcstats).d_retry++; | 420 | /* |
214 | return FLUSH_RETRY; | 421 | * Our retries may be blocked by all destination |
215 | } | 422 | * swack resources being consumed, and a timeout |
216 | exams++; | 423 | * pending. In that case hardware returns the |
217 | if (exams >= uv_bau_retry_limit) { | 424 | * ERROR that looks like a destination timeout. |
218 | printk(KERN_DEBUG | 425 | */ |
219 | "uv_flush_tlb_others"); | 426 | if (cycles_2_us(ttime - bcp->send_message) < BIOS_TO) { |
220 | printk("giving up on cpu %d\n", | 427 | bcp->conseccompletes = 0; |
221 | smp_processor_id()); | 428 | return FLUSH_RETRY_PLUGGED; |
429 | } | ||
430 | |||
431 | bcp->conseccompletes = 0; | ||
432 | return FLUSH_RETRY_TIMEOUT; | ||
433 | } else { | ||
434 | /* | ||
435 | * descriptor_status is still BUSY | ||
436 | */ | ||
437 | cpu_relax(); | ||
438 | relaxes++; | ||
439 | if (relaxes >= 10000) { | ||
440 | relaxes = 0; | ||
441 | if (get_cycles() > timeout_time) { | ||
442 | quiesce_local_uvhub(hmaster); | ||
443 | |||
444 | /* single-thread the register change */ | ||
445 | spin_lock(&hmaster->masks_lock); | ||
446 | mmr = uv_read_local_mmr(mmr_offset); | ||
447 | mask = 0UL; | ||
448 | mask |= (3UL < right_shift); | ||
449 | mask = ~mask; | ||
450 | mmr &= mask; | ||
451 | uv_write_local_mmr(mmr_offset, mmr); | ||
452 | spin_unlock(&hmaster->masks_lock); | ||
453 | end_uvhub_quiesce(hmaster); | ||
454 | stat->s_busy++; | ||
222 | return FLUSH_GIVEUP; | 455 | return FLUSH_GIVEUP; |
223 | } | 456 | } |
224 | /* | ||
225 | * delays can hang the simulator | ||
226 | udelay(1000); | ||
227 | */ | ||
228 | destination_timeouts = 0; | ||
229 | } | 457 | } |
230 | } | 458 | } |
231 | cpu_relax(); | ||
232 | } | 459 | } |
460 | bcp->conseccompletes++; | ||
233 | return FLUSH_COMPLETE; | 461 | return FLUSH_COMPLETE; |
234 | } | 462 | } |
235 | 463 | ||
464 | static inline cycles_t | ||
465 | sec_2_cycles(unsigned long sec) | ||
466 | { | ||
467 | unsigned long ns; | ||
468 | cycles_t cyc; | ||
469 | |||
470 | ns = sec * 1000000000; | ||
471 | cyc = (ns << CYC2NS_SCALE_FACTOR)/(per_cpu(cyc2ns, smp_processor_id())); | ||
472 | return cyc; | ||
473 | } | ||
474 | |||
475 | /* | ||
476 | * conditionally add 1 to *v, unless *v is >= u | ||
477 | * return 0 if we cannot add 1 to *v because it is >= u | ||
478 | * return 1 if we can add 1 to *v because it is < u | ||
479 | * the add is atomic | ||
480 | * | ||
481 | * This is close to atomic_add_unless(), but this allows the 'u' value | ||
482 | * to be lowered below the current 'v'. atomic_add_unless can only stop | ||
483 | * on equal. | ||
484 | */ | ||
485 | static inline int atomic_inc_unless_ge(spinlock_t *lock, atomic_t *v, int u) | ||
486 | { | ||
487 | spin_lock(lock); | ||
488 | if (atomic_read(v) >= u) { | ||
489 | spin_unlock(lock); | ||
490 | return 0; | ||
491 | } | ||
492 | atomic_inc(v); | ||
493 | spin_unlock(lock); | ||
494 | return 1; | ||
495 | } | ||
496 | |||
236 | /** | 497 | /** |
237 | * uv_flush_send_and_wait | 498 | * uv_flush_send_and_wait |
238 | * | 499 | * |
239 | * Send a broadcast and wait for a broadcast message to complete. | 500 | * Send a broadcast and wait for it to complete. |
240 | * | 501 | * |
241 | * The flush_mask contains the cpus the broadcast was sent to. | 502 | * The flush_mask contains the cpus the broadcast is to be sent to, plus |
503 | * cpus that are on the local uvhub. | ||
242 | * | 504 | * |
243 | * Returns NULL if all remote flushing was done. The mask is zeroed. | 505 | * Returns NULL if all flushing represented in the mask was done. The mask |
506 | * is zeroed. | ||
244 | * Returns @flush_mask if some remote flushing remains to be done. The | 507 | * Returns @flush_mask if some remote flushing remains to be done. The |
245 | * mask will have some bits still set. | 508 | * mask will have some bits still set, representing any cpus on the local |
509 | * uvhub (not current cpu) and any on remote uvhubs if the broadcast failed. | ||
246 | */ | 510 | */ |
247 | const struct cpumask *uv_flush_send_and_wait(int cpu, int this_pnode, | 511 | const struct cpumask *uv_flush_send_and_wait(struct bau_desc *bau_desc, |
248 | struct bau_desc *bau_desc, | 512 | struct cpumask *flush_mask, |
249 | struct cpumask *flush_mask) | 513 | struct bau_control *bcp) |
250 | { | 514 | { |
251 | int completion_status = 0; | ||
252 | int right_shift; | 515 | int right_shift; |
253 | int tries = 0; | 516 | int uvhub; |
254 | int pnode; | ||
255 | int bit; | 517 | int bit; |
518 | int completion_status = 0; | ||
519 | int seq_number = 0; | ||
520 | long try = 0; | ||
521 | int cpu = bcp->uvhub_cpu; | ||
522 | int this_cpu = bcp->cpu; | ||
523 | int this_uvhub = bcp->uvhub; | ||
256 | unsigned long mmr_offset; | 524 | unsigned long mmr_offset; |
257 | unsigned long index; | 525 | unsigned long index; |
258 | cycles_t time1; | 526 | cycles_t time1; |
259 | cycles_t time2; | 527 | cycles_t time2; |
528 | struct ptc_stats *stat = &per_cpu(ptcstats, bcp->cpu); | ||
529 | struct bau_control *smaster = bcp->socket_master; | ||
530 | struct bau_control *hmaster = bcp->uvhub_master; | ||
531 | |||
532 | /* | ||
533 | * Spin here while there are hmaster->max_concurrent or more active | ||
534 | * descriptors. This is the per-uvhub 'throttle'. | ||
535 | */ | ||
536 | if (!atomic_inc_unless_ge(&hmaster->uvhub_lock, | ||
537 | &hmaster->active_descriptor_count, | ||
538 | hmaster->max_concurrent)) { | ||
539 | stat->s_throttles++; | ||
540 | do { | ||
541 | cpu_relax(); | ||
542 | } while (!atomic_inc_unless_ge(&hmaster->uvhub_lock, | ||
543 | &hmaster->active_descriptor_count, | ||
544 | hmaster->max_concurrent)); | ||
545 | } | ||
546 | |||
547 | while (hmaster->uvhub_quiesce) | ||
548 | cpu_relax(); | ||
260 | 549 | ||
261 | if (cpu < UV_CPUS_PER_ACT_STATUS) { | 550 | if (cpu < UV_CPUS_PER_ACT_STATUS) { |
262 | mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_0; | 551 | mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_0; |
@@ -268,24 +557,108 @@ const struct cpumask *uv_flush_send_and_wait(int cpu, int this_pnode, | |||
268 | } | 557 | } |
269 | time1 = get_cycles(); | 558 | time1 = get_cycles(); |
270 | do { | 559 | do { |
271 | tries++; | 560 | /* |
561 | * Every message from any given cpu gets a unique message | ||
562 | * sequence number. But retries use that same number. | ||
563 | * Our message may have timed out at the destination because | ||
564 | * all sw-ack resources are in use and there is a timeout | ||
565 | * pending there. In that case, our last send never got | ||
566 | * placed into the queue and we need to persist until it | ||
567 | * does. | ||
568 | * | ||
569 | * Make any retry a type MSG_RETRY so that the destination will | ||
570 | * free any resource held by a previous message from this cpu. | ||
571 | */ | ||
572 | if (try == 0) { | ||
573 | /* use message type set by the caller the first time */ | ||
574 | seq_number = bcp->message_number++; | ||
575 | } else { | ||
576 | /* use RETRY type on all the rest; same sequence */ | ||
577 | bau_desc->header.msg_type = MSG_RETRY; | ||
578 | stat->s_retry_messages++; | ||
579 | } | ||
580 | bau_desc->header.sequence = seq_number; | ||
272 | index = (1UL << UVH_LB_BAU_SB_ACTIVATION_CONTROL_PUSH_SHFT) | | 581 | index = (1UL << UVH_LB_BAU_SB_ACTIVATION_CONTROL_PUSH_SHFT) | |
273 | cpu; | 582 | bcp->uvhub_cpu; |
583 | bcp->send_message = get_cycles(); | ||
584 | |||
274 | uv_write_local_mmr(UVH_LB_BAU_SB_ACTIVATION_CONTROL, index); | 585 | uv_write_local_mmr(UVH_LB_BAU_SB_ACTIVATION_CONTROL, index); |
586 | |||
587 | try++; | ||
275 | completion_status = uv_wait_completion(bau_desc, mmr_offset, | 588 | completion_status = uv_wait_completion(bau_desc, mmr_offset, |
276 | right_shift); | 589 | right_shift, this_cpu, bcp, smaster, try); |
277 | } while (completion_status == FLUSH_RETRY); | 590 | |
591 | if (completion_status == FLUSH_RETRY_PLUGGED) { | ||
592 | /* | ||
593 | * Our retries may be blocked by all destination swack | ||
594 | * resources being consumed, and a timeout pending. In | ||
595 | * that case hardware immediately returns the ERROR | ||
596 | * that looks like a destination timeout. | ||
597 | */ | ||
598 | udelay(TIMEOUT_DELAY); | ||
599 | bcp->plugged_tries++; | ||
600 | if (bcp->plugged_tries >= PLUGSB4RESET) { | ||
601 | bcp->plugged_tries = 0; | ||
602 | quiesce_local_uvhub(hmaster); | ||
603 | spin_lock(&hmaster->queue_lock); | ||
604 | uv_reset_with_ipi(&bau_desc->distribution, | ||
605 | this_cpu); | ||
606 | spin_unlock(&hmaster->queue_lock); | ||
607 | end_uvhub_quiesce(hmaster); | ||
608 | bcp->ipi_attempts++; | ||
609 | stat->s_resets_plug++; | ||
610 | } | ||
611 | } else if (completion_status == FLUSH_RETRY_TIMEOUT) { | ||
612 | hmaster->max_concurrent = 1; | ||
613 | bcp->timeout_tries++; | ||
614 | udelay(TIMEOUT_DELAY); | ||
615 | if (bcp->timeout_tries >= TIMEOUTSB4RESET) { | ||
616 | bcp->timeout_tries = 0; | ||
617 | quiesce_local_uvhub(hmaster); | ||
618 | spin_lock(&hmaster->queue_lock); | ||
619 | uv_reset_with_ipi(&bau_desc->distribution, | ||
620 | this_cpu); | ||
621 | spin_unlock(&hmaster->queue_lock); | ||
622 | end_uvhub_quiesce(hmaster); | ||
623 | bcp->ipi_attempts++; | ||
624 | stat->s_resets_timeout++; | ||
625 | } | ||
626 | } | ||
627 | if (bcp->ipi_attempts >= 3) { | ||
628 | bcp->ipi_attempts = 0; | ||
629 | completion_status = FLUSH_GIVEUP; | ||
630 | break; | ||
631 | } | ||
632 | cpu_relax(); | ||
633 | } while ((completion_status == FLUSH_RETRY_PLUGGED) || | ||
634 | (completion_status == FLUSH_RETRY_TIMEOUT)); | ||
278 | time2 = get_cycles(); | 635 | time2 = get_cycles(); |
279 | __get_cpu_var(ptcstats).sflush += (time2 - time1); | ||
280 | if (tries > 1) | ||
281 | __get_cpu_var(ptcstats).retriesok++; | ||
282 | 636 | ||
283 | if (completion_status == FLUSH_GIVEUP) { | 637 | if ((completion_status == FLUSH_COMPLETE) && (bcp->conseccompletes > 5) |
638 | && (hmaster->max_concurrent < hmaster->max_concurrent_constant)) | ||
639 | hmaster->max_concurrent++; | ||
640 | |||
641 | /* | ||
642 | * hold any cpu not timing out here; no other cpu currently held by | ||
643 | * the 'throttle' should enter the activation code | ||
644 | */ | ||
645 | while (hmaster->uvhub_quiesce) | ||
646 | cpu_relax(); | ||
647 | atomic_dec(&hmaster->active_descriptor_count); | ||
648 | |||
649 | /* guard against cycles wrap */ | ||
650 | if (time2 > time1) | ||
651 | stat->s_time += (time2 - time1); | ||
652 | else | ||
653 | stat->s_requestor--; /* don't count this one */ | ||
654 | if (completion_status == FLUSH_COMPLETE && try > 1) | ||
655 | stat->s_retriesok++; | ||
656 | else if (completion_status == FLUSH_GIVEUP) { | ||
284 | /* | 657 | /* |
285 | * Cause the caller to do an IPI-style TLB shootdown on | 658 | * Cause the caller to do an IPI-style TLB shootdown on |
286 | * the cpu's, all of which are still in the mask. | 659 | * the target cpu's, all of which are still in the mask. |
287 | */ | 660 | */ |
288 | __get_cpu_var(ptcstats).ptc_i++; | 661 | stat->s_giveup++; |
289 | return flush_mask; | 662 | return flush_mask; |
290 | } | 663 | } |
291 | 664 | ||
@@ -294,18 +667,17 @@ const struct cpumask *uv_flush_send_and_wait(int cpu, int this_pnode, | |||
294 | * use the IPI method of shootdown on them. | 667 | * use the IPI method of shootdown on them. |
295 | */ | 668 | */ |
296 | for_each_cpu(bit, flush_mask) { | 669 | for_each_cpu(bit, flush_mask) { |
297 | pnode = uv_cpu_to_pnode(bit); | 670 | uvhub = uv_cpu_to_blade_id(bit); |
298 | if (pnode == this_pnode) | 671 | if (uvhub == this_uvhub) |
299 | continue; | 672 | continue; |
300 | cpumask_clear_cpu(bit, flush_mask); | 673 | cpumask_clear_cpu(bit, flush_mask); |
301 | } | 674 | } |
302 | if (!cpumask_empty(flush_mask)) | 675 | if (!cpumask_empty(flush_mask)) |
303 | return flush_mask; | 676 | return flush_mask; |
677 | |||
304 | return NULL; | 678 | return NULL; |
305 | } | 679 | } |
306 | 680 | ||
307 | static DEFINE_PER_CPU(cpumask_var_t, uv_flush_tlb_mask); | ||
308 | |||
309 | /** | 681 | /** |
310 | * uv_flush_tlb_others - globally purge translation cache of a virtual | 682 | * uv_flush_tlb_others - globally purge translation cache of a virtual |
311 | * address or all TLB's | 683 | * address or all TLB's |
@@ -322,8 +694,8 @@ static DEFINE_PER_CPU(cpumask_var_t, uv_flush_tlb_mask); | |||
322 | * The caller has derived the cpumask from the mm_struct. This function | 694 | * The caller has derived the cpumask from the mm_struct. This function |
323 | * is called only if there are bits set in the mask. (e.g. flush_tlb_page()) | 695 | * is called only if there are bits set in the mask. (e.g. flush_tlb_page()) |
324 | * | 696 | * |
325 | * The cpumask is converted into a nodemask of the nodes containing | 697 | * The cpumask is converted into a uvhubmask of the uvhubs containing |
326 | * the cpus. | 698 | * those cpus. |
327 | * | 699 | * |
328 | * Note that this function should be called with preemption disabled. | 700 | * Note that this function should be called with preemption disabled. |
329 | * | 701 | * |
@@ -335,52 +707,82 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, | |||
335 | struct mm_struct *mm, | 707 | struct mm_struct *mm, |
336 | unsigned long va, unsigned int cpu) | 708 | unsigned long va, unsigned int cpu) |
337 | { | 709 | { |
338 | struct cpumask *flush_mask = __get_cpu_var(uv_flush_tlb_mask); | 710 | int remotes; |
339 | int i; | 711 | int tcpu; |
340 | int bit; | 712 | int uvhub; |
341 | int pnode; | ||
342 | int uv_cpu; | ||
343 | int this_pnode; | ||
344 | int locals = 0; | 713 | int locals = 0; |
345 | struct bau_desc *bau_desc; | 714 | struct bau_desc *bau_desc; |
715 | struct cpumask *flush_mask; | ||
716 | struct ptc_stats *stat; | ||
717 | struct bau_control *bcp; | ||
346 | 718 | ||
347 | cpumask_andnot(flush_mask, cpumask, cpumask_of(cpu)); | 719 | if (nobau) |
720 | return cpumask; | ||
348 | 721 | ||
349 | uv_cpu = uv_blade_processor_id(); | 722 | bcp = &per_cpu(bau_control, cpu); |
350 | this_pnode = uv_hub_info->pnode; | 723 | /* |
351 | bau_desc = __get_cpu_var(bau_control).descriptor_base; | 724 | * Each sending cpu has a per-cpu mask which it fills from the caller's |
352 | bau_desc += UV_ITEMS_PER_DESCRIPTOR * uv_cpu; | 725 | * cpu mask. Only remote cpus are converted to uvhubs and copied. |
726 | */ | ||
727 | flush_mask = (struct cpumask *)per_cpu(uv_flush_tlb_mask, cpu); | ||
728 | /* | ||
729 | * copy cpumask to flush_mask, removing current cpu | ||
730 | * (current cpu should already have been flushed by the caller and | ||
731 | * should never be returned if we return flush_mask) | ||
732 | */ | ||
733 | cpumask_andnot(flush_mask, cpumask, cpumask_of(cpu)); | ||
734 | if (cpu_isset(cpu, *cpumask)) | ||
735 | locals++; /* current cpu was targeted */ | ||
353 | 736 | ||
354 | bau_nodes_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE); | 737 | bau_desc = bcp->descriptor_base; |
738 | bau_desc += UV_ITEMS_PER_DESCRIPTOR * bcp->uvhub_cpu; | ||
355 | 739 | ||
356 | i = 0; | 740 | bau_uvhubs_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE); |
357 | for_each_cpu(bit, flush_mask) { | 741 | remotes = 0; |
358 | pnode = uv_cpu_to_pnode(bit); | 742 | for_each_cpu(tcpu, flush_mask) { |
359 | BUG_ON(pnode > (UV_DISTRIBUTION_SIZE - 1)); | 743 | uvhub = uv_cpu_to_blade_id(tcpu); |
360 | if (pnode == this_pnode) { | 744 | if (uvhub == bcp->uvhub) { |
361 | locals++; | 745 | locals++; |
362 | continue; | 746 | continue; |
363 | } | 747 | } |
364 | bau_node_set(pnode - uv_partition_base_pnode, | 748 | bau_uvhub_set(uvhub, &bau_desc->distribution); |
365 | &bau_desc->distribution); | 749 | remotes++; |
366 | i++; | ||
367 | } | 750 | } |
368 | if (i == 0) { | 751 | if (remotes == 0) { |
369 | /* | 752 | /* |
370 | * no off_node flushing; return status for local node | 753 | * No off_hub flushing; return status for local hub. |
754 | * Return the caller's mask if all were local (the current | ||
755 | * cpu may be in that mask). | ||
371 | */ | 756 | */ |
372 | if (locals) | 757 | if (locals) |
373 | return flush_mask; | 758 | return cpumask; |
374 | else | 759 | else |
375 | return NULL; | 760 | return NULL; |
376 | } | 761 | } |
377 | __get_cpu_var(ptcstats).requestor++; | 762 | stat = &per_cpu(ptcstats, cpu); |
378 | __get_cpu_var(ptcstats).ntargeted += i; | 763 | stat->s_requestor++; |
764 | stat->s_ntargcpu += remotes; | ||
765 | remotes = bau_uvhub_weight(&bau_desc->distribution); | ||
766 | stat->s_ntarguvhub += remotes; | ||
767 | if (remotes >= 16) | ||
768 | stat->s_ntarguvhub16++; | ||
769 | else if (remotes >= 8) | ||
770 | stat->s_ntarguvhub8++; | ||
771 | else if (remotes >= 4) | ||
772 | stat->s_ntarguvhub4++; | ||
773 | else if (remotes >= 2) | ||
774 | stat->s_ntarguvhub2++; | ||
775 | else | ||
776 | stat->s_ntarguvhub1++; | ||
379 | 777 | ||
380 | bau_desc->payload.address = va; | 778 | bau_desc->payload.address = va; |
381 | bau_desc->payload.sending_cpu = cpu; | 779 | bau_desc->payload.sending_cpu = cpu; |
382 | 780 | ||
383 | return uv_flush_send_and_wait(uv_cpu, this_pnode, bau_desc, flush_mask); | 781 | /* |
782 | * uv_flush_send_and_wait returns null if all cpu's were messaged, or | ||
783 | * the adjusted flush_mask if any cpu's were not messaged. | ||
784 | */ | ||
785 | return uv_flush_send_and_wait(bau_desc, flush_mask, bcp); | ||
384 | } | 786 | } |
385 | 787 | ||
386 | /* | 788 | /* |
@@ -389,87 +791,70 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, | |||
389 | * | 791 | * |
390 | * We received a broadcast assist message. | 792 | * We received a broadcast assist message. |
391 | * | 793 | * |
392 | * Interrupts may have been disabled; this interrupt could represent | 794 | * Interrupts are disabled; this interrupt could represent |
393 | * the receipt of several messages. | 795 | * the receipt of several messages. |
394 | * | 796 | * |
395 | * All cores/threads on this node get this interrupt. | 797 | * All cores/threads on this hub get this interrupt. |
396 | * The last one to see it does the s/w ack. | 798 | * The last one to see it does the software ack. |
397 | * (the resource will not be freed until noninterruptable cpus see this | 799 | * (the resource will not be freed until noninterruptable cpus see this |
398 | * interrupt; hardware will timeout the s/w ack and reply ERROR) | 800 | * interrupt; hardware may timeout the s/w ack and reply ERROR) |
399 | */ | 801 | */ |
400 | void uv_bau_message_interrupt(struct pt_regs *regs) | 802 | void uv_bau_message_interrupt(struct pt_regs *regs) |
401 | { | 803 | { |
402 | struct bau_payload_queue_entry *va_queue_first; | ||
403 | struct bau_payload_queue_entry *va_queue_last; | ||
404 | struct bau_payload_queue_entry *msg; | ||
405 | struct pt_regs *old_regs = set_irq_regs(regs); | ||
406 | cycles_t time1; | ||
407 | cycles_t time2; | ||
408 | int msg_slot; | ||
409 | int sw_ack_slot; | ||
410 | int fw; | ||
411 | int count = 0; | 804 | int count = 0; |
412 | unsigned long local_pnode; | 805 | cycles_t time_start; |
413 | 806 | struct bau_payload_queue_entry *msg; | |
414 | ack_APIC_irq(); | 807 | struct bau_control *bcp; |
415 | exit_idle(); | 808 | struct ptc_stats *stat; |
416 | irq_enter(); | 809 | struct msg_desc msgdesc; |
417 | 810 | ||
418 | time1 = get_cycles(); | 811 | time_start = get_cycles(); |
419 | 812 | bcp = &per_cpu(bau_control, smp_processor_id()); | |
420 | local_pnode = uv_blade_to_pnode(uv_numa_blade_id()); | 813 | stat = &per_cpu(ptcstats, smp_processor_id()); |
421 | 814 | msgdesc.va_queue_first = bcp->va_queue_first; | |
422 | va_queue_first = __get_cpu_var(bau_control).va_queue_first; | 815 | msgdesc.va_queue_last = bcp->va_queue_last; |
423 | va_queue_last = __get_cpu_var(bau_control).va_queue_last; | 816 | msg = bcp->bau_msg_head; |
424 | |||
425 | msg = __get_cpu_var(bau_control).bau_msg_head; | ||
426 | while (msg->sw_ack_vector) { | 817 | while (msg->sw_ack_vector) { |
427 | count++; | 818 | count++; |
428 | fw = msg->sw_ack_vector; | 819 | msgdesc.msg_slot = msg - msgdesc.va_queue_first; |
429 | msg_slot = msg - va_queue_first; | 820 | msgdesc.sw_ack_slot = ffs(msg->sw_ack_vector) - 1; |
430 | sw_ack_slot = ffs(fw) - 1; | 821 | msgdesc.msg = msg; |
431 | 822 | uv_bau_process_message(&msgdesc, bcp); | |
432 | uv_bau_process_message(msg, msg_slot, sw_ack_slot); | ||
433 | |||
434 | msg++; | 823 | msg++; |
435 | if (msg > va_queue_last) | 824 | if (msg > msgdesc.va_queue_last) |
436 | msg = va_queue_first; | 825 | msg = msgdesc.va_queue_first; |
437 | __get_cpu_var(bau_control).bau_msg_head = msg; | 826 | bcp->bau_msg_head = msg; |
438 | } | 827 | } |
828 | stat->d_time += (get_cycles() - time_start); | ||
439 | if (!count) | 829 | if (!count) |
440 | __get_cpu_var(ptcstats).nomsg++; | 830 | stat->d_nomsg++; |
441 | else if (count > 1) | 831 | else if (count > 1) |
442 | __get_cpu_var(ptcstats).multmsg++; | 832 | stat->d_multmsg++; |
443 | 833 | ack_APIC_irq(); | |
444 | time2 = get_cycles(); | ||
445 | __get_cpu_var(ptcstats).dflush += (time2 - time1); | ||
446 | |||
447 | irq_exit(); | ||
448 | set_irq_regs(old_regs); | ||
449 | } | 834 | } |
450 | 835 | ||
451 | /* | 836 | /* |
452 | * uv_enable_timeouts | 837 | * uv_enable_timeouts |
453 | * | 838 | * |
454 | * Each target blade (i.e. blades that have cpu's) needs to have | 839 | * Each target uvhub (i.e. a uvhub that has no cpu's) needs to have |
455 | * shootdown message timeouts enabled. The timeout does not cause | 840 | * shootdown message timeouts enabled. The timeout does not cause |
456 | * an interrupt, but causes an error message to be returned to | 841 | * an interrupt, but causes an error message to be returned to |
457 | * the sender. | 842 | * the sender. |
458 | */ | 843 | */ |
459 | static void uv_enable_timeouts(void) | 844 | static void uv_enable_timeouts(void) |
460 | { | 845 | { |
461 | int blade; | 846 | int uvhub; |
462 | int nblades; | 847 | int nuvhubs; |
463 | int pnode; | 848 | int pnode; |
464 | unsigned long mmr_image; | 849 | unsigned long mmr_image; |
465 | 850 | ||
466 | nblades = uv_num_possible_blades(); | 851 | nuvhubs = uv_num_possible_blades(); |
467 | 852 | ||
468 | for (blade = 0; blade < nblades; blade++) { | 853 | for (uvhub = 0; uvhub < nuvhubs; uvhub++) { |
469 | if (!uv_blade_nr_possible_cpus(blade)) | 854 | if (!uv_blade_nr_possible_cpus(uvhub)) |
470 | continue; | 855 | continue; |
471 | 856 | ||
472 | pnode = uv_blade_to_pnode(blade); | 857 | pnode = uv_blade_to_pnode(uvhub); |
473 | mmr_image = | 858 | mmr_image = |
474 | uv_read_global_mmr64(pnode, UVH_LB_BAU_MISC_CONTROL); | 859 | uv_read_global_mmr64(pnode, UVH_LB_BAU_MISC_CONTROL); |
475 | /* | 860 | /* |
@@ -479,16 +864,16 @@ static void uv_enable_timeouts(void) | |||
479 | * To program the period, the SOFT_ACK_MODE must be off. | 864 | * To program the period, the SOFT_ACK_MODE must be off. |
480 | */ | 865 | */ |
481 | mmr_image &= ~((unsigned long)1 << | 866 | mmr_image &= ~((unsigned long)1 << |
482 | UV_ENABLE_INTD_SOFT_ACK_MODE_SHIFT); | 867 | UVH_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT); |
483 | uv_write_global_mmr64 | 868 | uv_write_global_mmr64 |
484 | (pnode, UVH_LB_BAU_MISC_CONTROL, mmr_image); | 869 | (pnode, UVH_LB_BAU_MISC_CONTROL, mmr_image); |
485 | /* | 870 | /* |
486 | * Set the 4-bit period. | 871 | * Set the 4-bit period. |
487 | */ | 872 | */ |
488 | mmr_image &= ~((unsigned long)0xf << | 873 | mmr_image &= ~((unsigned long)0xf << |
489 | UV_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHIFT); | 874 | UVH_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT); |
490 | mmr_image |= (UV_INTD_SOFT_ACK_TIMEOUT_PERIOD << | 875 | mmr_image |= (UV_INTD_SOFT_ACK_TIMEOUT_PERIOD << |
491 | UV_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHIFT); | 876 | UVH_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT); |
492 | uv_write_global_mmr64 | 877 | uv_write_global_mmr64 |
493 | (pnode, UVH_LB_BAU_MISC_CONTROL, mmr_image); | 878 | (pnode, UVH_LB_BAU_MISC_CONTROL, mmr_image); |
494 | /* | 879 | /* |
@@ -497,7 +882,7 @@ static void uv_enable_timeouts(void) | |||
497 | * indicated in bits 2:0 (7 causes all of them to timeout). | 882 | * indicated in bits 2:0 (7 causes all of them to timeout). |
498 | */ | 883 | */ |
499 | mmr_image |= ((unsigned long)1 << | 884 | mmr_image |= ((unsigned long)1 << |
500 | UV_ENABLE_INTD_SOFT_ACK_MODE_SHIFT); | 885 | UVH_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT); |
501 | uv_write_global_mmr64 | 886 | uv_write_global_mmr64 |
502 | (pnode, UVH_LB_BAU_MISC_CONTROL, mmr_image); | 887 | (pnode, UVH_LB_BAU_MISC_CONTROL, mmr_image); |
503 | } | 888 | } |
@@ -522,9 +907,20 @@ static void uv_ptc_seq_stop(struct seq_file *file, void *data) | |||
522 | { | 907 | { |
523 | } | 908 | } |
524 | 909 | ||
910 | static inline unsigned long long | ||
911 | millisec_2_cycles(unsigned long millisec) | ||
912 | { | ||
913 | unsigned long ns; | ||
914 | unsigned long long cyc; | ||
915 | |||
916 | ns = millisec * 1000; | ||
917 | cyc = (ns << CYC2NS_SCALE_FACTOR)/(per_cpu(cyc2ns, smp_processor_id())); | ||
918 | return cyc; | ||
919 | } | ||
920 | |||
525 | /* | 921 | /* |
526 | * Display the statistics thru /proc | 922 | * Display the statistics thru /proc. |
527 | * data points to the cpu number | 923 | * 'data' points to the cpu number |
528 | */ | 924 | */ |
529 | static int uv_ptc_seq_show(struct seq_file *file, void *data) | 925 | static int uv_ptc_seq_show(struct seq_file *file, void *data) |
530 | { | 926 | { |
@@ -535,78 +931,155 @@ static int uv_ptc_seq_show(struct seq_file *file, void *data) | |||
535 | 931 | ||
536 | if (!cpu) { | 932 | if (!cpu) { |
537 | seq_printf(file, | 933 | seq_printf(file, |
538 | "# cpu requestor requestee one all sretry dretry ptc_i "); | 934 | "# cpu sent stime numuvhubs numuvhubs16 numuvhubs8 "); |
539 | seq_printf(file, | 935 | seq_printf(file, |
540 | "sw_ack sflush dflush sok dnomsg dmult starget\n"); | 936 | "numuvhubs4 numuvhubs2 numuvhubs1 numcpus dto "); |
937 | seq_printf(file, | ||
938 | "retries rok resetp resett giveup sto bz throt "); | ||
939 | seq_printf(file, | ||
940 | "sw_ack recv rtime all "); | ||
941 | seq_printf(file, | ||
942 | "one mult none retry canc nocan reset rcan\n"); | ||
541 | } | 943 | } |
542 | if (cpu < num_possible_cpus() && cpu_online(cpu)) { | 944 | if (cpu < num_possible_cpus() && cpu_online(cpu)) { |
543 | stat = &per_cpu(ptcstats, cpu); | 945 | stat = &per_cpu(ptcstats, cpu); |
544 | seq_printf(file, "cpu %d %ld %ld %ld %ld %ld %ld %ld ", | 946 | /* source side statistics */ |
545 | cpu, stat->requestor, | 947 | seq_printf(file, |
546 | stat->requestee, stat->onetlb, stat->alltlb, | 948 | "cpu %d %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld ", |
547 | stat->s_retry, stat->d_retry, stat->ptc_i); | 949 | cpu, stat->s_requestor, cycles_2_us(stat->s_time), |
548 | seq_printf(file, "%lx %ld %ld %ld %ld %ld %ld\n", | 950 | stat->s_ntarguvhub, stat->s_ntarguvhub16, |
951 | stat->s_ntarguvhub8, stat->s_ntarguvhub4, | ||
952 | stat->s_ntarguvhub2, stat->s_ntarguvhub1, | ||
953 | stat->s_ntargcpu, stat->s_dtimeout); | ||
954 | seq_printf(file, "%ld %ld %ld %ld %ld %ld %ld %ld ", | ||
955 | stat->s_retry_messages, stat->s_retriesok, | ||
956 | stat->s_resets_plug, stat->s_resets_timeout, | ||
957 | stat->s_giveup, stat->s_stimeout, | ||
958 | stat->s_busy, stat->s_throttles); | ||
959 | /* destination side statistics */ | ||
960 | seq_printf(file, | ||
961 | "%lx %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld\n", | ||
549 | uv_read_global_mmr64(uv_cpu_to_pnode(cpu), | 962 | uv_read_global_mmr64(uv_cpu_to_pnode(cpu), |
550 | UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE), | 963 | UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE), |
551 | stat->sflush, stat->dflush, | 964 | stat->d_requestee, cycles_2_us(stat->d_time), |
552 | stat->retriesok, stat->nomsg, | 965 | stat->d_alltlb, stat->d_onetlb, stat->d_multmsg, |
553 | stat->multmsg, stat->ntargeted); | 966 | stat->d_nomsg, stat->d_retries, stat->d_canceled, |
967 | stat->d_nocanceled, stat->d_resets, | ||
968 | stat->d_rcanceled); | ||
554 | } | 969 | } |
555 | 970 | ||
556 | return 0; | 971 | return 0; |
557 | } | 972 | } |
558 | 973 | ||
559 | /* | 974 | /* |
975 | * -1: resetf the statistics | ||
560 | * 0: display meaning of the statistics | 976 | * 0: display meaning of the statistics |
561 | * >0: retry limit | 977 | * >0: maximum concurrent active descriptors per uvhub (throttle) |
562 | */ | 978 | */ |
563 | static ssize_t uv_ptc_proc_write(struct file *file, const char __user *user, | 979 | static ssize_t uv_ptc_proc_write(struct file *file, const char __user *user, |
564 | size_t count, loff_t *data) | 980 | size_t count, loff_t *data) |
565 | { | 981 | { |
566 | long newmode; | 982 | int cpu; |
983 | long input_arg; | ||
567 | char optstr[64]; | 984 | char optstr[64]; |
985 | struct ptc_stats *stat; | ||
986 | struct bau_control *bcp; | ||
568 | 987 | ||
569 | if (count == 0 || count > sizeof(optstr)) | 988 | if (count == 0 || count > sizeof(optstr)) |
570 | return -EINVAL; | 989 | return -EINVAL; |
571 | if (copy_from_user(optstr, user, count)) | 990 | if (copy_from_user(optstr, user, count)) |
572 | return -EFAULT; | 991 | return -EFAULT; |
573 | optstr[count - 1] = '\0'; | 992 | optstr[count - 1] = '\0'; |
574 | if (strict_strtoul(optstr, 10, &newmode) < 0) { | 993 | if (strict_strtol(optstr, 10, &input_arg) < 0) { |
575 | printk(KERN_DEBUG "%s is invalid\n", optstr); | 994 | printk(KERN_DEBUG "%s is invalid\n", optstr); |
576 | return -EINVAL; | 995 | return -EINVAL; |
577 | } | 996 | } |
578 | 997 | ||
579 | if (newmode == 0) { | 998 | if (input_arg == 0) { |
580 | printk(KERN_DEBUG "# cpu: cpu number\n"); | 999 | printk(KERN_DEBUG "# cpu: cpu number\n"); |
1000 | printk(KERN_DEBUG "Sender statistics:\n"); | ||
1001 | printk(KERN_DEBUG | ||
1002 | "sent: number of shootdown messages sent\n"); | ||
1003 | printk(KERN_DEBUG | ||
1004 | "stime: time spent sending messages\n"); | ||
1005 | printk(KERN_DEBUG | ||
1006 | "numuvhubs: number of hubs targeted with shootdown\n"); | ||
1007 | printk(KERN_DEBUG | ||
1008 | "numuvhubs16: number times 16 or more hubs targeted\n"); | ||
1009 | printk(KERN_DEBUG | ||
1010 | "numuvhubs8: number times 8 or more hubs targeted\n"); | ||
1011 | printk(KERN_DEBUG | ||
1012 | "numuvhubs4: number times 4 or more hubs targeted\n"); | ||
1013 | printk(KERN_DEBUG | ||
1014 | "numuvhubs2: number times 2 or more hubs targeted\n"); | ||
1015 | printk(KERN_DEBUG | ||
1016 | "numuvhubs1: number times 1 hub targeted\n"); | ||
1017 | printk(KERN_DEBUG | ||
1018 | "numcpus: number of cpus targeted with shootdown\n"); | ||
1019 | printk(KERN_DEBUG | ||
1020 | "dto: number of destination timeouts\n"); | ||
1021 | printk(KERN_DEBUG | ||
1022 | "retries: destination timeout retries sent\n"); | ||
1023 | printk(KERN_DEBUG | ||
1024 | "rok: : destination timeouts successfully retried\n"); | ||
1025 | printk(KERN_DEBUG | ||
1026 | "resetp: ipi-style resource resets for plugs\n"); | ||
1027 | printk(KERN_DEBUG | ||
1028 | "resett: ipi-style resource resets for timeouts\n"); | ||
1029 | printk(KERN_DEBUG | ||
1030 | "giveup: fall-backs to ipi-style shootdowns\n"); | ||
1031 | printk(KERN_DEBUG | ||
1032 | "sto: number of source timeouts\n"); | ||
1033 | printk(KERN_DEBUG | ||
1034 | "bz: number of stay-busy's\n"); | ||
1035 | printk(KERN_DEBUG | ||
1036 | "throt: number times spun in throttle\n"); | ||
1037 | printk(KERN_DEBUG "Destination side statistics:\n"); | ||
581 | printk(KERN_DEBUG | 1038 | printk(KERN_DEBUG |
582 | "requestor: times this cpu was the flush requestor\n"); | 1039 | "sw_ack: image of UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE\n"); |
583 | printk(KERN_DEBUG | 1040 | printk(KERN_DEBUG |
584 | "requestee: times this cpu was requested to flush its TLBs\n"); | 1041 | "recv: shootdown messages received\n"); |
585 | printk(KERN_DEBUG | 1042 | printk(KERN_DEBUG |
586 | "one: times requested to flush a single address\n"); | 1043 | "rtime: time spent processing messages\n"); |
587 | printk(KERN_DEBUG | 1044 | printk(KERN_DEBUG |
588 | "all: times requested to flush all TLB's\n"); | 1045 | "all: shootdown all-tlb messages\n"); |
589 | printk(KERN_DEBUG | 1046 | printk(KERN_DEBUG |
590 | "sretry: number of retries of source-side timeouts\n"); | 1047 | "one: shootdown one-tlb messages\n"); |
591 | printk(KERN_DEBUG | 1048 | printk(KERN_DEBUG |
592 | "dretry: number of retries of destination-side timeouts\n"); | 1049 | "mult: interrupts that found multiple messages\n"); |
593 | printk(KERN_DEBUG | 1050 | printk(KERN_DEBUG |
594 | "ptc_i: times UV fell through to IPI-style flushes\n"); | 1051 | "none: interrupts that found no messages\n"); |
595 | printk(KERN_DEBUG | 1052 | printk(KERN_DEBUG |
596 | "sw_ack: image of UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE\n"); | 1053 | "retry: number of retry messages processed\n"); |
597 | printk(KERN_DEBUG | 1054 | printk(KERN_DEBUG |
598 | "sflush_us: cycles spent in uv_flush_tlb_others()\n"); | 1055 | "canc: number messages canceled by retries\n"); |
599 | printk(KERN_DEBUG | 1056 | printk(KERN_DEBUG |
600 | "dflush_us: cycles spent in handling flush requests\n"); | 1057 | "nocan: number retries that found nothing to cancel\n"); |
601 | printk(KERN_DEBUG "sok: successes on retry\n"); | ||
602 | printk(KERN_DEBUG "dnomsg: interrupts with no message\n"); | ||
603 | printk(KERN_DEBUG | 1058 | printk(KERN_DEBUG |
604 | "dmult: interrupts with multiple messages\n"); | 1059 | "reset: number of ipi-style reset requests processed\n"); |
605 | printk(KERN_DEBUG "starget: nodes targeted\n"); | 1060 | printk(KERN_DEBUG |
1061 | "rcan: number messages canceled by reset requests\n"); | ||
1062 | } else if (input_arg == -1) { | ||
1063 | for_each_present_cpu(cpu) { | ||
1064 | stat = &per_cpu(ptcstats, cpu); | ||
1065 | memset(stat, 0, sizeof(struct ptc_stats)); | ||
1066 | } | ||
606 | } else { | 1067 | } else { |
607 | uv_bau_retry_limit = newmode; | 1068 | uv_bau_max_concurrent = input_arg; |
608 | printk(KERN_DEBUG "timeout retry limit:%d\n", | 1069 | bcp = &per_cpu(bau_control, smp_processor_id()); |
609 | uv_bau_retry_limit); | 1070 | if (uv_bau_max_concurrent < 1 || |
1071 | uv_bau_max_concurrent > bcp->cpus_in_uvhub) { | ||
1072 | printk(KERN_DEBUG | ||
1073 | "Error: BAU max concurrent %d; %d is invalid\n", | ||
1074 | bcp->max_concurrent, uv_bau_max_concurrent); | ||
1075 | return -EINVAL; | ||
1076 | } | ||
1077 | printk(KERN_DEBUG "Set BAU max concurrent:%d\n", | ||
1078 | uv_bau_max_concurrent); | ||
1079 | for_each_present_cpu(cpu) { | ||
1080 | bcp = &per_cpu(bau_control, cpu); | ||
1081 | bcp->max_concurrent = uv_bau_max_concurrent; | ||
1082 | } | ||
610 | } | 1083 | } |
611 | 1084 | ||
612 | return count; | 1085 | return count; |
@@ -650,79 +1123,30 @@ static int __init uv_ptc_init(void) | |||
650 | } | 1123 | } |
651 | 1124 | ||
652 | /* | 1125 | /* |
653 | * begin the initialization of the per-blade control structures | ||
654 | */ | ||
655 | static struct bau_control * __init uv_table_bases_init(int blade, int node) | ||
656 | { | ||
657 | int i; | ||
658 | struct bau_msg_status *msp; | ||
659 | struct bau_control *bau_tabp; | ||
660 | |||
661 | bau_tabp = | ||
662 | kmalloc_node(sizeof(struct bau_control), GFP_KERNEL, node); | ||
663 | BUG_ON(!bau_tabp); | ||
664 | |||
665 | bau_tabp->msg_statuses = | ||
666 | kmalloc_node(sizeof(struct bau_msg_status) * | ||
667 | DEST_Q_SIZE, GFP_KERNEL, node); | ||
668 | BUG_ON(!bau_tabp->msg_statuses); | ||
669 | |||
670 | for (i = 0, msp = bau_tabp->msg_statuses; i < DEST_Q_SIZE; i++, msp++) | ||
671 | bau_cpubits_clear(&msp->seen_by, (int) | ||
672 | uv_blade_nr_possible_cpus(blade)); | ||
673 | |||
674 | uv_bau_table_bases[blade] = bau_tabp; | ||
675 | |||
676 | return bau_tabp; | ||
677 | } | ||
678 | |||
679 | /* | ||
680 | * finish the initialization of the per-blade control structures | ||
681 | */ | ||
682 | static void __init | ||
683 | uv_table_bases_finish(int blade, | ||
684 | struct bau_control *bau_tablesp, | ||
685 | struct bau_desc *adp) | ||
686 | { | ||
687 | struct bau_control *bcp; | ||
688 | int cpu; | ||
689 | |||
690 | for_each_present_cpu(cpu) { | ||
691 | if (blade != uv_cpu_to_blade_id(cpu)) | ||
692 | continue; | ||
693 | |||
694 | bcp = (struct bau_control *)&per_cpu(bau_control, cpu); | ||
695 | bcp->bau_msg_head = bau_tablesp->va_queue_first; | ||
696 | bcp->va_queue_first = bau_tablesp->va_queue_first; | ||
697 | bcp->va_queue_last = bau_tablesp->va_queue_last; | ||
698 | bcp->msg_statuses = bau_tablesp->msg_statuses; | ||
699 | bcp->descriptor_base = adp; | ||
700 | } | ||
701 | } | ||
702 | |||
703 | /* | ||
704 | * initialize the sending side's sending buffers | 1126 | * initialize the sending side's sending buffers |
705 | */ | 1127 | */ |
706 | static struct bau_desc * __init | 1128 | static void |
707 | uv_activation_descriptor_init(int node, int pnode) | 1129 | uv_activation_descriptor_init(int node, int pnode) |
708 | { | 1130 | { |
709 | int i; | 1131 | int i; |
1132 | int cpu; | ||
710 | unsigned long pa; | 1133 | unsigned long pa; |
711 | unsigned long m; | 1134 | unsigned long m; |
712 | unsigned long n; | 1135 | unsigned long n; |
713 | struct bau_desc *adp; | 1136 | struct bau_desc *bau_desc; |
714 | struct bau_desc *ad2; | 1137 | struct bau_desc *bd2; |
1138 | struct bau_control *bcp; | ||
715 | 1139 | ||
716 | /* | 1140 | /* |
717 | * each bau_desc is 64 bytes; there are 8 (UV_ITEMS_PER_DESCRIPTOR) | 1141 | * each bau_desc is 64 bytes; there are 8 (UV_ITEMS_PER_DESCRIPTOR) |
718 | * per cpu; and up to 32 (UV_ADP_SIZE) cpu's per blade | 1142 | * per cpu; and up to 32 (UV_ADP_SIZE) cpu's per uvhub |
719 | */ | 1143 | */ |
720 | adp = (struct bau_desc *)kmalloc_node(sizeof(struct bau_desc)* | 1144 | bau_desc = (struct bau_desc *)kmalloc_node(sizeof(struct bau_desc)* |
721 | UV_ADP_SIZE*UV_ITEMS_PER_DESCRIPTOR, GFP_KERNEL, node); | 1145 | UV_ADP_SIZE*UV_ITEMS_PER_DESCRIPTOR, GFP_KERNEL, node); |
722 | BUG_ON(!adp); | 1146 | BUG_ON(!bau_desc); |
723 | 1147 | ||
724 | pa = uv_gpa(adp); /* need the real nasid*/ | 1148 | pa = uv_gpa(bau_desc); /* need the real nasid*/ |
725 | n = uv_gpa_to_pnode(pa); | 1149 | n = pa >> uv_nshift; |
726 | m = pa & uv_mmask; | 1150 | m = pa & uv_mmask; |
727 | 1151 | ||
728 | uv_write_global_mmr64(pnode, UVH_LB_BAU_SB_DESCRIPTOR_BASE, | 1152 | uv_write_global_mmr64(pnode, UVH_LB_BAU_SB_DESCRIPTOR_BASE, |
@@ -731,96 +1155,188 @@ uv_activation_descriptor_init(int node, int pnode) | |||
731 | /* | 1155 | /* |
732 | * initializing all 8 (UV_ITEMS_PER_DESCRIPTOR) descriptors for each | 1156 | * initializing all 8 (UV_ITEMS_PER_DESCRIPTOR) descriptors for each |
733 | * cpu even though we only use the first one; one descriptor can | 1157 | * cpu even though we only use the first one; one descriptor can |
734 | * describe a broadcast to 256 nodes. | 1158 | * describe a broadcast to 256 uv hubs. |
735 | */ | 1159 | */ |
736 | for (i = 0, ad2 = adp; i < (UV_ADP_SIZE*UV_ITEMS_PER_DESCRIPTOR); | 1160 | for (i = 0, bd2 = bau_desc; i < (UV_ADP_SIZE*UV_ITEMS_PER_DESCRIPTOR); |
737 | i++, ad2++) { | 1161 | i++, bd2++) { |
738 | memset(ad2, 0, sizeof(struct bau_desc)); | 1162 | memset(bd2, 0, sizeof(struct bau_desc)); |
739 | ad2->header.sw_ack_flag = 1; | 1163 | bd2->header.sw_ack_flag = 1; |
740 | /* | 1164 | /* |
741 | * base_dest_nodeid is the first node in the partition, so | 1165 | * base_dest_nodeid is the nasid (pnode<<1) of the first uvhub |
742 | * the bit map will indicate partition-relative node numbers. | 1166 | * in the partition. The bit map will indicate uvhub numbers, |
743 | * note that base_dest_nodeid is actually a nasid. | 1167 | * which are 0-N in a partition. Pnodes are unique system-wide. |
744 | */ | 1168 | */ |
745 | ad2->header.base_dest_nodeid = uv_partition_base_pnode << 1; | 1169 | bd2->header.base_dest_nodeid = uv_partition_base_pnode << 1; |
746 | ad2->header.dest_subnodeid = 0x10; /* the LB */ | 1170 | bd2->header.dest_subnodeid = 0x10; /* the LB */ |
747 | ad2->header.command = UV_NET_ENDPOINT_INTD; | 1171 | bd2->header.command = UV_NET_ENDPOINT_INTD; |
748 | ad2->header.int_both = 1; | 1172 | bd2->header.int_both = 1; |
749 | /* | 1173 | /* |
750 | * all others need to be set to zero: | 1174 | * all others need to be set to zero: |
751 | * fairness chaining multilevel count replied_to | 1175 | * fairness chaining multilevel count replied_to |
752 | */ | 1176 | */ |
753 | } | 1177 | } |
754 | return adp; | 1178 | for_each_present_cpu(cpu) { |
1179 | if (pnode != uv_blade_to_pnode(uv_cpu_to_blade_id(cpu))) | ||
1180 | continue; | ||
1181 | bcp = &per_cpu(bau_control, cpu); | ||
1182 | bcp->descriptor_base = bau_desc; | ||
1183 | } | ||
755 | } | 1184 | } |
756 | 1185 | ||
757 | /* | 1186 | /* |
758 | * initialize the destination side's receiving buffers | 1187 | * initialize the destination side's receiving buffers |
1188 | * entered for each uvhub in the partition | ||
1189 | * - node is first node (kernel memory notion) on the uvhub | ||
1190 | * - pnode is the uvhub's physical identifier | ||
759 | */ | 1191 | */ |
760 | static struct bau_payload_queue_entry * __init | 1192 | static void |
761 | uv_payload_queue_init(int node, int pnode, struct bau_control *bau_tablesp) | 1193 | uv_payload_queue_init(int node, int pnode) |
762 | { | 1194 | { |
763 | struct bau_payload_queue_entry *pqp; | ||
764 | unsigned long pa; | ||
765 | int pn; | 1195 | int pn; |
1196 | int cpu; | ||
766 | char *cp; | 1197 | char *cp; |
1198 | unsigned long pa; | ||
1199 | struct bau_payload_queue_entry *pqp; | ||
1200 | struct bau_payload_queue_entry *pqp_malloc; | ||
1201 | struct bau_control *bcp; | ||
767 | 1202 | ||
768 | pqp = (struct bau_payload_queue_entry *) kmalloc_node( | 1203 | pqp = (struct bau_payload_queue_entry *) kmalloc_node( |
769 | (DEST_Q_SIZE + 1) * sizeof(struct bau_payload_queue_entry), | 1204 | (DEST_Q_SIZE + 1) * sizeof(struct bau_payload_queue_entry), |
770 | GFP_KERNEL, node); | 1205 | GFP_KERNEL, node); |
771 | BUG_ON(!pqp); | 1206 | BUG_ON(!pqp); |
1207 | pqp_malloc = pqp; | ||
772 | 1208 | ||
773 | cp = (char *)pqp + 31; | 1209 | cp = (char *)pqp + 31; |
774 | pqp = (struct bau_payload_queue_entry *)(((unsigned long)cp >> 5) << 5); | 1210 | pqp = (struct bau_payload_queue_entry *)(((unsigned long)cp >> 5) << 5); |
775 | bau_tablesp->va_queue_first = pqp; | 1211 | |
1212 | for_each_present_cpu(cpu) { | ||
1213 | if (pnode != uv_cpu_to_pnode(cpu)) | ||
1214 | continue; | ||
1215 | /* for every cpu on this pnode: */ | ||
1216 | bcp = &per_cpu(bau_control, cpu); | ||
1217 | bcp->va_queue_first = pqp; | ||
1218 | bcp->bau_msg_head = pqp; | ||
1219 | bcp->va_queue_last = pqp + (DEST_Q_SIZE - 1); | ||
1220 | } | ||
776 | /* | 1221 | /* |
777 | * need the pnode of where the memory was really allocated | 1222 | * need the pnode of where the memory was really allocated |
778 | */ | 1223 | */ |
779 | pa = uv_gpa(pqp); | 1224 | pa = uv_gpa(pqp); |
780 | pn = uv_gpa_to_pnode(pa); | 1225 | pn = pa >> uv_nshift; |
781 | uv_write_global_mmr64(pnode, | 1226 | uv_write_global_mmr64(pnode, |
782 | UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST, | 1227 | UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST, |
783 | ((unsigned long)pn << UV_PAYLOADQ_PNODE_SHIFT) | | 1228 | ((unsigned long)pn << UV_PAYLOADQ_PNODE_SHIFT) | |
784 | uv_physnodeaddr(pqp)); | 1229 | uv_physnodeaddr(pqp)); |
785 | uv_write_global_mmr64(pnode, UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL, | 1230 | uv_write_global_mmr64(pnode, UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL, |
786 | uv_physnodeaddr(pqp)); | 1231 | uv_physnodeaddr(pqp)); |
787 | bau_tablesp->va_queue_last = pqp + (DEST_Q_SIZE - 1); | ||
788 | uv_write_global_mmr64(pnode, UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST, | 1232 | uv_write_global_mmr64(pnode, UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST, |
789 | (unsigned long) | 1233 | (unsigned long) |
790 | uv_physnodeaddr(bau_tablesp->va_queue_last)); | 1234 | uv_physnodeaddr(pqp + (DEST_Q_SIZE - 1))); |
1235 | /* in effect, all msg_type's are set to MSG_NOOP */ | ||
791 | memset(pqp, 0, sizeof(struct bau_payload_queue_entry) * DEST_Q_SIZE); | 1236 | memset(pqp, 0, sizeof(struct bau_payload_queue_entry) * DEST_Q_SIZE); |
792 | |||
793 | return pqp; | ||
794 | } | 1237 | } |
795 | 1238 | ||
796 | /* | 1239 | /* |
797 | * Initialization of each UV blade's structures | 1240 | * Initialization of each UV hub's structures |
798 | */ | 1241 | */ |
799 | static int __init uv_init_blade(int blade) | 1242 | static void __init uv_init_uvhub(int uvhub, int vector) |
800 | { | 1243 | { |
801 | int node; | 1244 | int node; |
802 | int pnode; | 1245 | int pnode; |
803 | unsigned long pa; | ||
804 | unsigned long apicid; | 1246 | unsigned long apicid; |
805 | struct bau_desc *adp; | 1247 | |
806 | struct bau_payload_queue_entry *pqp; | 1248 | node = uvhub_to_first_node(uvhub); |
807 | struct bau_control *bau_tablesp; | 1249 | pnode = uv_blade_to_pnode(uvhub); |
808 | 1250 | uv_activation_descriptor_init(node, pnode); | |
809 | node = blade_to_first_node(blade); | 1251 | uv_payload_queue_init(node, pnode); |
810 | bau_tablesp = uv_table_bases_init(blade, node); | ||
811 | pnode = uv_blade_to_pnode(blade); | ||
812 | adp = uv_activation_descriptor_init(node, pnode); | ||
813 | pqp = uv_payload_queue_init(node, pnode, bau_tablesp); | ||
814 | uv_table_bases_finish(blade, bau_tablesp, adp); | ||
815 | /* | 1252 | /* |
816 | * the below initialization can't be in firmware because the | 1253 | * the below initialization can't be in firmware because the |
817 | * messaging IRQ will be determined by the OS | 1254 | * messaging IRQ will be determined by the OS |
818 | */ | 1255 | */ |
819 | apicid = blade_to_first_apicid(blade); | 1256 | apicid = uvhub_to_first_apicid(uvhub); |
820 | pa = uv_read_global_mmr64(pnode, UVH_BAU_DATA_CONFIG); | ||
821 | uv_write_global_mmr64(pnode, UVH_BAU_DATA_CONFIG, | 1257 | uv_write_global_mmr64(pnode, UVH_BAU_DATA_CONFIG, |
822 | ((apicid << 32) | UV_BAU_MESSAGE)); | 1258 | ((apicid << 32) | vector)); |
823 | return 0; | 1259 | } |
1260 | |||
1261 | /* | ||
1262 | * initialize the bau_control structure for each cpu | ||
1263 | */ | ||
1264 | static void uv_init_per_cpu(int nuvhubs) | ||
1265 | { | ||
1266 | int i, j, k; | ||
1267 | int cpu; | ||
1268 | int pnode; | ||
1269 | int uvhub; | ||
1270 | short socket = 0; | ||
1271 | struct bau_control *bcp; | ||
1272 | struct uvhub_desc *bdp; | ||
1273 | struct socket_desc *sdp; | ||
1274 | struct bau_control *hmaster = NULL; | ||
1275 | struct bau_control *smaster = NULL; | ||
1276 | struct socket_desc { | ||
1277 | short num_cpus; | ||
1278 | short cpu_number[16]; | ||
1279 | }; | ||
1280 | struct uvhub_desc { | ||
1281 | short num_sockets; | ||
1282 | short num_cpus; | ||
1283 | short uvhub; | ||
1284 | short pnode; | ||
1285 | struct socket_desc socket[2]; | ||
1286 | }; | ||
1287 | struct uvhub_desc *uvhub_descs; | ||
1288 | |||
1289 | uvhub_descs = (struct uvhub_desc *) | ||
1290 | kmalloc(nuvhubs * sizeof(struct uvhub_desc), GFP_KERNEL); | ||
1291 | memset(uvhub_descs, 0, nuvhubs * sizeof(struct uvhub_desc)); | ||
1292 | for_each_present_cpu(cpu) { | ||
1293 | bcp = &per_cpu(bau_control, cpu); | ||
1294 | memset(bcp, 0, sizeof(struct bau_control)); | ||
1295 | spin_lock_init(&bcp->masks_lock); | ||
1296 | bcp->max_concurrent = uv_bau_max_concurrent; | ||
1297 | pnode = uv_cpu_hub_info(cpu)->pnode; | ||
1298 | uvhub = uv_cpu_hub_info(cpu)->numa_blade_id; | ||
1299 | bdp = &uvhub_descs[uvhub]; | ||
1300 | bdp->num_cpus++; | ||
1301 | bdp->uvhub = uvhub; | ||
1302 | bdp->pnode = pnode; | ||
1303 | /* time interval to catch a hardware stay-busy bug */ | ||
1304 | bcp->timeout_interval = millisec_2_cycles(3); | ||
1305 | /* kludge: assume uv_hub.h is constant */ | ||
1306 | socket = (cpu_physical_id(cpu)>>5)&1; | ||
1307 | if (socket >= bdp->num_sockets) | ||
1308 | bdp->num_sockets = socket+1; | ||
1309 | sdp = &bdp->socket[socket]; | ||
1310 | sdp->cpu_number[sdp->num_cpus] = cpu; | ||
1311 | sdp->num_cpus++; | ||
1312 | } | ||
1313 | socket = 0; | ||
1314 | for_each_possible_blade(uvhub) { | ||
1315 | bdp = &uvhub_descs[uvhub]; | ||
1316 | for (i = 0; i < bdp->num_sockets; i++) { | ||
1317 | sdp = &bdp->socket[i]; | ||
1318 | for (j = 0; j < sdp->num_cpus; j++) { | ||
1319 | cpu = sdp->cpu_number[j]; | ||
1320 | bcp = &per_cpu(bau_control, cpu); | ||
1321 | bcp->cpu = cpu; | ||
1322 | if (j == 0) { | ||
1323 | smaster = bcp; | ||
1324 | if (i == 0) | ||
1325 | hmaster = bcp; | ||
1326 | } | ||
1327 | bcp->cpus_in_uvhub = bdp->num_cpus; | ||
1328 | bcp->cpus_in_socket = sdp->num_cpus; | ||
1329 | bcp->socket_master = smaster; | ||
1330 | bcp->uvhub_master = hmaster; | ||
1331 | for (k = 0; k < DEST_Q_SIZE; k++) | ||
1332 | bcp->socket_acknowledge_count[k] = 0; | ||
1333 | bcp->uvhub_cpu = | ||
1334 | uv_cpu_hub_info(cpu)->blade_processor_id; | ||
1335 | } | ||
1336 | socket++; | ||
1337 | } | ||
1338 | } | ||
1339 | kfree(uvhub_descs); | ||
824 | } | 1340 | } |
825 | 1341 | ||
826 | /* | 1342 | /* |
@@ -828,38 +1344,54 @@ static int __init uv_init_blade(int blade) | |||
828 | */ | 1344 | */ |
829 | static int __init uv_bau_init(void) | 1345 | static int __init uv_bau_init(void) |
830 | { | 1346 | { |
831 | int blade; | 1347 | int uvhub; |
832 | int nblades; | 1348 | int pnode; |
1349 | int nuvhubs; | ||
833 | int cur_cpu; | 1350 | int cur_cpu; |
1351 | int vector; | ||
1352 | unsigned long mmr; | ||
834 | 1353 | ||
835 | if (!is_uv_system()) | 1354 | if (!is_uv_system()) |
836 | return 0; | 1355 | return 0; |
837 | 1356 | ||
1357 | if (nobau) | ||
1358 | return 0; | ||
1359 | |||
838 | for_each_possible_cpu(cur_cpu) | 1360 | for_each_possible_cpu(cur_cpu) |
839 | zalloc_cpumask_var_node(&per_cpu(uv_flush_tlb_mask, cur_cpu), | 1361 | zalloc_cpumask_var_node(&per_cpu(uv_flush_tlb_mask, cur_cpu), |
840 | GFP_KERNEL, cpu_to_node(cur_cpu)); | 1362 | GFP_KERNEL, cpu_to_node(cur_cpu)); |
841 | 1363 | ||
842 | uv_bau_retry_limit = 1; | 1364 | uv_bau_max_concurrent = MAX_BAU_CONCURRENT; |
1365 | uv_nshift = uv_hub_info->m_val; | ||
843 | uv_mmask = (1UL << uv_hub_info->m_val) - 1; | 1366 | uv_mmask = (1UL << uv_hub_info->m_val) - 1; |
844 | nblades = uv_num_possible_blades(); | 1367 | nuvhubs = uv_num_possible_blades(); |
845 | 1368 | ||
846 | uv_bau_table_bases = (struct bau_control **) | 1369 | uv_init_per_cpu(nuvhubs); |
847 | kmalloc(nblades * sizeof(struct bau_control *), GFP_KERNEL); | ||
848 | BUG_ON(!uv_bau_table_bases); | ||
849 | 1370 | ||
850 | uv_partition_base_pnode = 0x7fffffff; | 1371 | uv_partition_base_pnode = 0x7fffffff; |
851 | for (blade = 0; blade < nblades; blade++) | 1372 | for (uvhub = 0; uvhub < nuvhubs; uvhub++) |
852 | if (uv_blade_nr_possible_cpus(blade) && | 1373 | if (uv_blade_nr_possible_cpus(uvhub) && |
853 | (uv_blade_to_pnode(blade) < uv_partition_base_pnode)) | 1374 | (uv_blade_to_pnode(uvhub) < uv_partition_base_pnode)) |
854 | uv_partition_base_pnode = uv_blade_to_pnode(blade); | 1375 | uv_partition_base_pnode = uv_blade_to_pnode(uvhub); |
855 | for (blade = 0; blade < nblades; blade++) | 1376 | |
856 | if (uv_blade_nr_possible_cpus(blade)) | 1377 | vector = UV_BAU_MESSAGE; |
857 | uv_init_blade(blade); | 1378 | for_each_possible_blade(uvhub) |
858 | 1379 | if (uv_blade_nr_possible_cpus(uvhub)) | |
859 | alloc_intr_gate(UV_BAU_MESSAGE, uv_bau_message_intr1); | 1380 | uv_init_uvhub(uvhub, vector); |
1381 | |||
860 | uv_enable_timeouts(); | 1382 | uv_enable_timeouts(); |
1383 | alloc_intr_gate(vector, uv_bau_message_intr1); | ||
1384 | |||
1385 | for_each_possible_blade(uvhub) { | ||
1386 | pnode = uv_blade_to_pnode(uvhub); | ||
1387 | /* INIT the bau */ | ||
1388 | uv_write_global_mmr64(pnode, UVH_LB_BAU_SB_ACTIVATION_CONTROL, | ||
1389 | ((unsigned long)1 << 63)); | ||
1390 | mmr = 1; /* should be 1 to broadcast to both sockets */ | ||
1391 | uv_write_global_mmr64(pnode, UVH_BAU_DATA_BROADCAST, mmr); | ||
1392 | } | ||
861 | 1393 | ||
862 | return 0; | 1394 | return 0; |
863 | } | 1395 | } |
864 | __initcall(uv_bau_init); | 1396 | core_initcall(uv_bau_init); |
865 | __initcall(uv_ptc_init); | 1397 | core_initcall(uv_ptc_init); |
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 1168e4454188..02cfb9b8f5b1 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c | |||
@@ -108,15 +108,6 @@ static inline void preempt_conditional_cli(struct pt_regs *regs) | |||
108 | dec_preempt_count(); | 108 | dec_preempt_count(); |
109 | } | 109 | } |
110 | 110 | ||
111 | #ifdef CONFIG_X86_32 | ||
112 | static inline void | ||
113 | die_if_kernel(const char *str, struct pt_regs *regs, long err) | ||
114 | { | ||
115 | if (!user_mode_vm(regs)) | ||
116 | die(str, regs, err); | ||
117 | } | ||
118 | #endif | ||
119 | |||
120 | static void __kprobes | 111 | static void __kprobes |
121 | do_trap(int trapnr, int signr, char *str, struct pt_regs *regs, | 112 | do_trap(int trapnr, int signr, char *str, struct pt_regs *regs, |
122 | long error_code, siginfo_t *info) | 113 | long error_code, siginfo_t *info) |
@@ -543,11 +534,11 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) | |||
543 | 534 | ||
544 | /* DR6 may or may not be cleared by the CPU */ | 535 | /* DR6 may or may not be cleared by the CPU */ |
545 | set_debugreg(0, 6); | 536 | set_debugreg(0, 6); |
537 | |||
546 | /* | 538 | /* |
547 | * The processor cleared BTF, so don't mark that we need it set. | 539 | * The processor cleared BTF, so don't mark that we need it set. |
548 | */ | 540 | */ |
549 | clear_tsk_thread_flag(tsk, TIF_DEBUGCTLMSR); | 541 | clear_tsk_thread_flag(tsk, TIF_BLOCKSTEP); |
550 | tsk->thread.debugctlmsr = 0; | ||
551 | 542 | ||
552 | /* Store the virtualized DR6 value */ | 543 | /* Store the virtualized DR6 value */ |
553 | tsk->thread.debugreg6 = dr6; | 544 | tsk->thread.debugreg6 = dr6; |
@@ -585,55 +576,67 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) | |||
585 | return; | 576 | return; |
586 | } | 577 | } |
587 | 578 | ||
588 | #ifdef CONFIG_X86_64 | ||
589 | static int kernel_math_error(struct pt_regs *regs, const char *str, int trapnr) | ||
590 | { | ||
591 | if (fixup_exception(regs)) | ||
592 | return 1; | ||
593 | |||
594 | notify_die(DIE_GPF, str, regs, 0, trapnr, SIGFPE); | ||
595 | /* Illegal floating point operation in the kernel */ | ||
596 | current->thread.trap_no = trapnr; | ||
597 | die(str, regs, 0); | ||
598 | return 0; | ||
599 | } | ||
600 | #endif | ||
601 | |||
602 | /* | 579 | /* |
603 | * Note that we play around with the 'TS' bit in an attempt to get | 580 | * Note that we play around with the 'TS' bit in an attempt to get |
604 | * the correct behaviour even in the presence of the asynchronous | 581 | * the correct behaviour even in the presence of the asynchronous |
605 | * IRQ13 behaviour | 582 | * IRQ13 behaviour |
606 | */ | 583 | */ |
607 | void math_error(void __user *ip) | 584 | void math_error(struct pt_regs *regs, int error_code, int trapnr) |
608 | { | 585 | { |
609 | struct task_struct *task; | 586 | struct task_struct *task = current; |
610 | siginfo_t info; | 587 | siginfo_t info; |
611 | unsigned short cwd, swd, err; | 588 | unsigned short err; |
589 | char *str = (trapnr == 16) ? "fpu exception" : "simd exception"; | ||
590 | |||
591 | if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, SIGFPE) == NOTIFY_STOP) | ||
592 | return; | ||
593 | conditional_sti(regs); | ||
594 | |||
595 | if (!user_mode_vm(regs)) | ||
596 | { | ||
597 | if (!fixup_exception(regs)) { | ||
598 | task->thread.error_code = error_code; | ||
599 | task->thread.trap_no = trapnr; | ||
600 | die(str, regs, error_code); | ||
601 | } | ||
602 | return; | ||
603 | } | ||
612 | 604 | ||
613 | /* | 605 | /* |
614 | * Save the info for the exception handler and clear the error. | 606 | * Save the info for the exception handler and clear the error. |
615 | */ | 607 | */ |
616 | task = current; | ||
617 | save_init_fpu(task); | 608 | save_init_fpu(task); |
618 | task->thread.trap_no = 16; | 609 | task->thread.trap_no = trapnr; |
619 | task->thread.error_code = 0; | 610 | task->thread.error_code = error_code; |
620 | info.si_signo = SIGFPE; | 611 | info.si_signo = SIGFPE; |
621 | info.si_errno = 0; | 612 | info.si_errno = 0; |
622 | info.si_addr = ip; | 613 | info.si_addr = (void __user *)regs->ip; |
623 | /* | 614 | if (trapnr == 16) { |
624 | * (~cwd & swd) will mask out exceptions that are not set to unmasked | 615 | unsigned short cwd, swd; |
625 | * status. 0x3f is the exception bits in these regs, 0x200 is the | 616 | /* |
626 | * C1 reg you need in case of a stack fault, 0x040 is the stack | 617 | * (~cwd & swd) will mask out exceptions that are not set to unmasked |
627 | * fault bit. We should only be taking one exception at a time, | 618 | * status. 0x3f is the exception bits in these regs, 0x200 is the |
628 | * so if this combination doesn't produce any single exception, | 619 | * C1 reg you need in case of a stack fault, 0x040 is the stack |
629 | * then we have a bad program that isn't synchronizing its FPU usage | 620 | * fault bit. We should only be taking one exception at a time, |
630 | * and it will suffer the consequences since we won't be able to | 621 | * so if this combination doesn't produce any single exception, |
631 | * fully reproduce the context of the exception | 622 | * then we have a bad program that isn't synchronizing its FPU usage |
632 | */ | 623 | * and it will suffer the consequences since we won't be able to |
633 | cwd = get_fpu_cwd(task); | 624 | * fully reproduce the context of the exception |
634 | swd = get_fpu_swd(task); | 625 | */ |
626 | cwd = get_fpu_cwd(task); | ||
627 | swd = get_fpu_swd(task); | ||
635 | 628 | ||
636 | err = swd & ~cwd; | 629 | err = swd & ~cwd; |
630 | } else { | ||
631 | /* | ||
632 | * The SIMD FPU exceptions are handled a little differently, as there | ||
633 | * is only a single status/control register. Thus, to determine which | ||
634 | * unmasked exception was caught we must mask the exception mask bits | ||
635 | * at 0x1f80, and then use these to mask the exception bits at 0x3f. | ||
636 | */ | ||
637 | unsigned short mxcsr = get_fpu_mxcsr(task); | ||
638 | err = ~(mxcsr >> 7) & mxcsr; | ||
639 | } | ||
637 | 640 | ||
638 | if (err & 0x001) { /* Invalid op */ | 641 | if (err & 0x001) { /* Invalid op */ |
639 | /* | 642 | /* |
@@ -662,97 +665,17 @@ void math_error(void __user *ip) | |||
662 | 665 | ||
663 | dotraplinkage void do_coprocessor_error(struct pt_regs *regs, long error_code) | 666 | dotraplinkage void do_coprocessor_error(struct pt_regs *regs, long error_code) |
664 | { | 667 | { |
665 | conditional_sti(regs); | ||
666 | |||
667 | #ifdef CONFIG_X86_32 | 668 | #ifdef CONFIG_X86_32 |
668 | ignore_fpu_irq = 1; | 669 | ignore_fpu_irq = 1; |
669 | #else | ||
670 | if (!user_mode(regs) && | ||
671 | kernel_math_error(regs, "kernel x87 math error", 16)) | ||
672 | return; | ||
673 | #endif | 670 | #endif |
674 | 671 | ||
675 | math_error((void __user *)regs->ip); | 672 | math_error(regs, error_code, 16); |
676 | } | ||
677 | |||
678 | static void simd_math_error(void __user *ip) | ||
679 | { | ||
680 | struct task_struct *task; | ||
681 | siginfo_t info; | ||
682 | unsigned short mxcsr; | ||
683 | |||
684 | /* | ||
685 | * Save the info for the exception handler and clear the error. | ||
686 | */ | ||
687 | task = current; | ||
688 | save_init_fpu(task); | ||
689 | task->thread.trap_no = 19; | ||
690 | task->thread.error_code = 0; | ||
691 | info.si_signo = SIGFPE; | ||
692 | info.si_errno = 0; | ||
693 | info.si_code = __SI_FAULT; | ||
694 | info.si_addr = ip; | ||
695 | /* | ||
696 | * The SIMD FPU exceptions are handled a little differently, as there | ||
697 | * is only a single status/control register. Thus, to determine which | ||
698 | * unmasked exception was caught we must mask the exception mask bits | ||
699 | * at 0x1f80, and then use these to mask the exception bits at 0x3f. | ||
700 | */ | ||
701 | mxcsr = get_fpu_mxcsr(task); | ||
702 | switch (~((mxcsr & 0x1f80) >> 7) & (mxcsr & 0x3f)) { | ||
703 | case 0x000: | ||
704 | default: | ||
705 | break; | ||
706 | case 0x001: /* Invalid Op */ | ||
707 | info.si_code = FPE_FLTINV; | ||
708 | break; | ||
709 | case 0x002: /* Denormalize */ | ||
710 | case 0x010: /* Underflow */ | ||
711 | info.si_code = FPE_FLTUND; | ||
712 | break; | ||
713 | case 0x004: /* Zero Divide */ | ||
714 | info.si_code = FPE_FLTDIV; | ||
715 | break; | ||
716 | case 0x008: /* Overflow */ | ||
717 | info.si_code = FPE_FLTOVF; | ||
718 | break; | ||
719 | case 0x020: /* Precision */ | ||
720 | info.si_code = FPE_FLTRES; | ||
721 | break; | ||
722 | } | ||
723 | force_sig_info(SIGFPE, &info, task); | ||
724 | } | 673 | } |
725 | 674 | ||
726 | dotraplinkage void | 675 | dotraplinkage void |
727 | do_simd_coprocessor_error(struct pt_regs *regs, long error_code) | 676 | do_simd_coprocessor_error(struct pt_regs *regs, long error_code) |
728 | { | 677 | { |
729 | conditional_sti(regs); | 678 | math_error(regs, error_code, 19); |
730 | |||
731 | #ifdef CONFIG_X86_32 | ||
732 | if (cpu_has_xmm) { | ||
733 | /* Handle SIMD FPU exceptions on PIII+ processors. */ | ||
734 | ignore_fpu_irq = 1; | ||
735 | simd_math_error((void __user *)regs->ip); | ||
736 | return; | ||
737 | } | ||
738 | /* | ||
739 | * Handle strange cache flush from user space exception | ||
740 | * in all other cases. This is undocumented behaviour. | ||
741 | */ | ||
742 | if (regs->flags & X86_VM_MASK) { | ||
743 | handle_vm86_fault((struct kernel_vm86_regs *)regs, error_code); | ||
744 | return; | ||
745 | } | ||
746 | current->thread.trap_no = 19; | ||
747 | current->thread.error_code = error_code; | ||
748 | die_if_kernel("cache flush denied", regs, error_code); | ||
749 | force_sig(SIGSEGV, current); | ||
750 | #else | ||
751 | if (!user_mode(regs) && | ||
752 | kernel_math_error(regs, "kernel simd math error", 19)) | ||
753 | return; | ||
754 | simd_math_error((void __user *)regs->ip); | ||
755 | #endif | ||
756 | } | 679 | } |
757 | 680 | ||
758 | dotraplinkage void | 681 | dotraplinkage void |
diff --git a/arch/x86/kernel/uv_irq.c b/arch/x86/kernel/uv_irq.c index 1d40336b030a..1132129db792 100644 --- a/arch/x86/kernel/uv_irq.c +++ b/arch/x86/kernel/uv_irq.c | |||
@@ -44,7 +44,7 @@ static void uv_ack_apic(unsigned int irq) | |||
44 | ack_APIC_irq(); | 44 | ack_APIC_irq(); |
45 | } | 45 | } |
46 | 46 | ||
47 | struct irq_chip uv_irq_chip = { | 47 | static struct irq_chip uv_irq_chip = { |
48 | .name = "UV-CORE", | 48 | .name = "UV-CORE", |
49 | .startup = uv_noop_ret, | 49 | .startup = uv_noop_ret, |
50 | .shutdown = uv_noop, | 50 | .shutdown = uv_noop, |
@@ -141,7 +141,7 @@ int uv_irq_2_mmr_info(int irq, unsigned long *offset, int *pnode) | |||
141 | */ | 141 | */ |
142 | static int | 142 | static int |
143 | arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, | 143 | arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, |
144 | unsigned long mmr_offset, int restrict) | 144 | unsigned long mmr_offset, int limit) |
145 | { | 145 | { |
146 | const struct cpumask *eligible_cpu = cpumask_of(cpu); | 146 | const struct cpumask *eligible_cpu = cpumask_of(cpu); |
147 | struct irq_desc *desc = irq_to_desc(irq); | 147 | struct irq_desc *desc = irq_to_desc(irq); |
@@ -160,7 +160,7 @@ arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, | |||
160 | if (err != 0) | 160 | if (err != 0) |
161 | return err; | 161 | return err; |
162 | 162 | ||
163 | if (restrict == UV_AFFINITY_CPU) | 163 | if (limit == UV_AFFINITY_CPU) |
164 | desc->status |= IRQ_NO_BALANCING; | 164 | desc->status |= IRQ_NO_BALANCING; |
165 | else | 165 | else |
166 | desc->status |= IRQ_MOVE_PCNTXT; | 166 | desc->status |= IRQ_MOVE_PCNTXT; |
@@ -214,7 +214,7 @@ static int uv_set_irq_affinity(unsigned int irq, const struct cpumask *mask) | |||
214 | unsigned long mmr_value; | 214 | unsigned long mmr_value; |
215 | struct uv_IO_APIC_route_entry *entry; | 215 | struct uv_IO_APIC_route_entry *entry; |
216 | unsigned long mmr_offset; | 216 | unsigned long mmr_offset; |
217 | unsigned mmr_pnode; | 217 | int mmr_pnode; |
218 | 218 | ||
219 | if (set_desc_affinity(desc, mask, &dest)) | 219 | if (set_desc_affinity(desc, mask, &dest)) |
220 | return -1; | 220 | return -1; |
@@ -248,7 +248,7 @@ static int uv_set_irq_affinity(unsigned int irq, const struct cpumask *mask) | |||
248 | * interrupt is raised. | 248 | * interrupt is raised. |
249 | */ | 249 | */ |
250 | int uv_setup_irq(char *irq_name, int cpu, int mmr_blade, | 250 | int uv_setup_irq(char *irq_name, int cpu, int mmr_blade, |
251 | unsigned long mmr_offset, int restrict) | 251 | unsigned long mmr_offset, int limit) |
252 | { | 252 | { |
253 | int irq, ret; | 253 | int irq, ret; |
254 | 254 | ||
@@ -258,7 +258,7 @@ int uv_setup_irq(char *irq_name, int cpu, int mmr_blade, | |||
258 | return -EBUSY; | 258 | return -EBUSY; |
259 | 259 | ||
260 | ret = arch_enable_uv_irq(irq_name, irq, cpu, mmr_blade, mmr_offset, | 260 | ret = arch_enable_uv_irq(irq_name, irq, cpu, mmr_blade, mmr_offset, |
261 | restrict); | 261 | limit); |
262 | if (ret == irq) | 262 | if (ret == irq) |
263 | uv_set_irq_2_mmr_info(irq, mmr_offset, mmr_blade); | 263 | uv_set_irq_2_mmr_info(irq, mmr_offset, mmr_blade); |
264 | else | 264 | else |
diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c index 693920b22496..1b950d151e58 100644 --- a/arch/x86/kernel/x8664_ksyms_64.c +++ b/arch/x86/kernel/x8664_ksyms_64.c | |||
@@ -54,7 +54,6 @@ EXPORT_SYMBOL(memcpy); | |||
54 | EXPORT_SYMBOL(__memcpy); | 54 | EXPORT_SYMBOL(__memcpy); |
55 | 55 | ||
56 | EXPORT_SYMBOL(empty_zero_page); | 56 | EXPORT_SYMBOL(empty_zero_page); |
57 | EXPORT_SYMBOL(init_level4_pgt); | ||
58 | #ifndef CONFIG_PARAVIRT | 57 | #ifndef CONFIG_PARAVIRT |
59 | EXPORT_SYMBOL(native_load_gs_index); | 58 | EXPORT_SYMBOL(native_load_gs_index); |
60 | #endif | 59 | #endif |
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index 782c3a362ec6..37e68fc5e24a 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c | |||
@@ -99,7 +99,7 @@ int save_i387_xstate(void __user *buf) | |||
99 | if (err) | 99 | if (err) |
100 | return err; | 100 | return err; |
101 | 101 | ||
102 | if (task_thread_info(tsk)->status & TS_XSAVE) | 102 | if (use_xsave()) |
103 | err = xsave_user(buf); | 103 | err = xsave_user(buf); |
104 | else | 104 | else |
105 | err = fxsave_user(buf); | 105 | err = fxsave_user(buf); |
@@ -109,14 +109,14 @@ int save_i387_xstate(void __user *buf) | |||
109 | task_thread_info(tsk)->status &= ~TS_USEDFPU; | 109 | task_thread_info(tsk)->status &= ~TS_USEDFPU; |
110 | stts(); | 110 | stts(); |
111 | } else { | 111 | } else { |
112 | if (__copy_to_user(buf, &tsk->thread.xstate->fxsave, | 112 | if (__copy_to_user(buf, &tsk->thread.fpu.state->fxsave, |
113 | xstate_size)) | 113 | xstate_size)) |
114 | return -1; | 114 | return -1; |
115 | } | 115 | } |
116 | 116 | ||
117 | clear_used_math(); /* trigger finit */ | 117 | clear_used_math(); /* trigger finit */ |
118 | 118 | ||
119 | if (task_thread_info(tsk)->status & TS_XSAVE) { | 119 | if (use_xsave()) { |
120 | struct _fpstate __user *fx = buf; | 120 | struct _fpstate __user *fx = buf; |
121 | struct _xstate __user *x = buf; | 121 | struct _xstate __user *x = buf; |
122 | u64 xstate_bv; | 122 | u64 xstate_bv; |
@@ -225,7 +225,7 @@ int restore_i387_xstate(void __user *buf) | |||
225 | clts(); | 225 | clts(); |
226 | task_thread_info(current)->status |= TS_USEDFPU; | 226 | task_thread_info(current)->status |= TS_USEDFPU; |
227 | } | 227 | } |
228 | if (task_thread_info(tsk)->status & TS_XSAVE) | 228 | if (use_xsave()) |
229 | err = restore_user_xstate(buf); | 229 | err = restore_user_xstate(buf); |
230 | else | 230 | else |
231 | err = fxrstor_checking((__force struct i387_fxsave_struct *) | 231 | err = fxrstor_checking((__force struct i387_fxsave_struct *) |
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 2ba58206812a..737361fcd503 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c | |||
@@ -2067,7 +2067,7 @@ static int cpuid_interception(struct vcpu_svm *svm) | |||
2067 | static int iret_interception(struct vcpu_svm *svm) | 2067 | static int iret_interception(struct vcpu_svm *svm) |
2068 | { | 2068 | { |
2069 | ++svm->vcpu.stat.nmi_window_exits; | 2069 | ++svm->vcpu.stat.nmi_window_exits; |
2070 | svm->vmcb->control.intercept &= ~(1UL << INTERCEPT_IRET); | 2070 | svm->vmcb->control.intercept &= ~(1ULL << INTERCEPT_IRET); |
2071 | svm->vcpu.arch.hflags |= HF_IRET_MASK; | 2071 | svm->vcpu.arch.hflags |= HF_IRET_MASK; |
2072 | return 1; | 2072 | return 1; |
2073 | } | 2073 | } |
@@ -2479,7 +2479,7 @@ static void svm_inject_nmi(struct kvm_vcpu *vcpu) | |||
2479 | 2479 | ||
2480 | svm->vmcb->control.event_inj = SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_NMI; | 2480 | svm->vmcb->control.event_inj = SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_NMI; |
2481 | vcpu->arch.hflags |= HF_NMI_MASK; | 2481 | vcpu->arch.hflags |= HF_NMI_MASK; |
2482 | svm->vmcb->control.intercept |= (1UL << INTERCEPT_IRET); | 2482 | svm->vmcb->control.intercept |= (1ULL << INTERCEPT_IRET); |
2483 | ++vcpu->stat.nmi_injections; | 2483 | ++vcpu->stat.nmi_injections; |
2484 | } | 2484 | } |
2485 | 2485 | ||
@@ -2539,10 +2539,10 @@ static void svm_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked) | |||
2539 | 2539 | ||
2540 | if (masked) { | 2540 | if (masked) { |
2541 | svm->vcpu.arch.hflags |= HF_NMI_MASK; | 2541 | svm->vcpu.arch.hflags |= HF_NMI_MASK; |
2542 | svm->vmcb->control.intercept |= (1UL << INTERCEPT_IRET); | 2542 | svm->vmcb->control.intercept |= (1ULL << INTERCEPT_IRET); |
2543 | } else { | 2543 | } else { |
2544 | svm->vcpu.arch.hflags &= ~HF_NMI_MASK; | 2544 | svm->vcpu.arch.hflags &= ~HF_NMI_MASK; |
2545 | svm->vmcb->control.intercept &= ~(1UL << INTERCEPT_IRET); | 2545 | svm->vmcb->control.intercept &= ~(1ULL << INTERCEPT_IRET); |
2546 | } | 2546 | } |
2547 | } | 2547 | } |
2548 | 2548 | ||
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index bc933cfb4e66..edca080407a5 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c | |||
@@ -2703,8 +2703,7 @@ static int vmx_nmi_allowed(struct kvm_vcpu *vcpu) | |||
2703 | return 0; | 2703 | return 0; |
2704 | 2704 | ||
2705 | return !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & | 2705 | return !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & |
2706 | (GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS | | 2706 | (GUEST_INTR_STATE_MOV_SS | GUEST_INTR_STATE_NMI)); |
2707 | GUEST_INTR_STATE_NMI)); | ||
2708 | } | 2707 | } |
2709 | 2708 | ||
2710 | static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu) | 2709 | static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu) |
@@ -3660,8 +3659,11 @@ static void vmx_complete_interrupts(struct vcpu_vmx *vmx) | |||
3660 | 3659 | ||
3661 | /* We need to handle NMIs before interrupts are enabled */ | 3660 | /* We need to handle NMIs before interrupts are enabled */ |
3662 | if ((exit_intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR && | 3661 | if ((exit_intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR && |
3663 | (exit_intr_info & INTR_INFO_VALID_MASK)) | 3662 | (exit_intr_info & INTR_INFO_VALID_MASK)) { |
3663 | kvm_before_handle_nmi(&vmx->vcpu); | ||
3664 | asm("int $2"); | 3664 | asm("int $2"); |
3665 | kvm_after_handle_nmi(&vmx->vcpu); | ||
3666 | } | ||
3665 | 3667 | ||
3666 | idtv_info_valid = idt_vectoring_info & VECTORING_INFO_VALID_MASK; | 3668 | idtv_info_valid = idt_vectoring_info & VECTORING_INFO_VALID_MASK; |
3667 | 3669 | ||
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 3c4ca98ad27f..dd9bc8fb81ab 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
@@ -40,6 +40,7 @@ | |||
40 | #include <linux/user-return-notifier.h> | 40 | #include <linux/user-return-notifier.h> |
41 | #include <linux/srcu.h> | 41 | #include <linux/srcu.h> |
42 | #include <linux/slab.h> | 42 | #include <linux/slab.h> |
43 | #include <linux/perf_event.h> | ||
43 | #include <trace/events/kvm.h> | 44 | #include <trace/events/kvm.h> |
44 | #undef TRACE_INCLUDE_FILE | 45 | #undef TRACE_INCLUDE_FILE |
45 | #define CREATE_TRACE_POINTS | 46 | #define CREATE_TRACE_POINTS |
@@ -1712,6 +1713,7 @@ static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu, | |||
1712 | if (copy_from_user(cpuid_entries, entries, | 1713 | if (copy_from_user(cpuid_entries, entries, |
1713 | cpuid->nent * sizeof(struct kvm_cpuid_entry))) | 1714 | cpuid->nent * sizeof(struct kvm_cpuid_entry))) |
1714 | goto out_free; | 1715 | goto out_free; |
1716 | vcpu_load(vcpu); | ||
1715 | for (i = 0; i < cpuid->nent; i++) { | 1717 | for (i = 0; i < cpuid->nent; i++) { |
1716 | vcpu->arch.cpuid_entries[i].function = cpuid_entries[i].function; | 1718 | vcpu->arch.cpuid_entries[i].function = cpuid_entries[i].function; |
1717 | vcpu->arch.cpuid_entries[i].eax = cpuid_entries[i].eax; | 1719 | vcpu->arch.cpuid_entries[i].eax = cpuid_entries[i].eax; |
@@ -1729,6 +1731,7 @@ static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu, | |||
1729 | r = 0; | 1731 | r = 0; |
1730 | kvm_apic_set_version(vcpu); | 1732 | kvm_apic_set_version(vcpu); |
1731 | kvm_x86_ops->cpuid_update(vcpu); | 1733 | kvm_x86_ops->cpuid_update(vcpu); |
1734 | vcpu_put(vcpu); | ||
1732 | 1735 | ||
1733 | out_free: | 1736 | out_free: |
1734 | vfree(cpuid_entries); | 1737 | vfree(cpuid_entries); |
@@ -1749,9 +1752,11 @@ static int kvm_vcpu_ioctl_set_cpuid2(struct kvm_vcpu *vcpu, | |||
1749 | if (copy_from_user(&vcpu->arch.cpuid_entries, entries, | 1752 | if (copy_from_user(&vcpu->arch.cpuid_entries, entries, |
1750 | cpuid->nent * sizeof(struct kvm_cpuid_entry2))) | 1753 | cpuid->nent * sizeof(struct kvm_cpuid_entry2))) |
1751 | goto out; | 1754 | goto out; |
1755 | vcpu_load(vcpu); | ||
1752 | vcpu->arch.cpuid_nent = cpuid->nent; | 1756 | vcpu->arch.cpuid_nent = cpuid->nent; |
1753 | kvm_apic_set_version(vcpu); | 1757 | kvm_apic_set_version(vcpu); |
1754 | kvm_x86_ops->cpuid_update(vcpu); | 1758 | kvm_x86_ops->cpuid_update(vcpu); |
1759 | vcpu_put(vcpu); | ||
1755 | return 0; | 1760 | return 0; |
1756 | 1761 | ||
1757 | out: | 1762 | out: |
@@ -3743,6 +3748,51 @@ static void kvm_timer_init(void) | |||
3743 | } | 3748 | } |
3744 | } | 3749 | } |
3745 | 3750 | ||
3751 | static DEFINE_PER_CPU(struct kvm_vcpu *, current_vcpu); | ||
3752 | |||
3753 | static int kvm_is_in_guest(void) | ||
3754 | { | ||
3755 | return percpu_read(current_vcpu) != NULL; | ||
3756 | } | ||
3757 | |||
3758 | static int kvm_is_user_mode(void) | ||
3759 | { | ||
3760 | int user_mode = 3; | ||
3761 | |||
3762 | if (percpu_read(current_vcpu)) | ||
3763 | user_mode = kvm_x86_ops->get_cpl(percpu_read(current_vcpu)); | ||
3764 | |||
3765 | return user_mode != 0; | ||
3766 | } | ||
3767 | |||
3768 | static unsigned long kvm_get_guest_ip(void) | ||
3769 | { | ||
3770 | unsigned long ip = 0; | ||
3771 | |||
3772 | if (percpu_read(current_vcpu)) | ||
3773 | ip = kvm_rip_read(percpu_read(current_vcpu)); | ||
3774 | |||
3775 | return ip; | ||
3776 | } | ||
3777 | |||
3778 | static struct perf_guest_info_callbacks kvm_guest_cbs = { | ||
3779 | .is_in_guest = kvm_is_in_guest, | ||
3780 | .is_user_mode = kvm_is_user_mode, | ||
3781 | .get_guest_ip = kvm_get_guest_ip, | ||
3782 | }; | ||
3783 | |||
3784 | void kvm_before_handle_nmi(struct kvm_vcpu *vcpu) | ||
3785 | { | ||
3786 | percpu_write(current_vcpu, vcpu); | ||
3787 | } | ||
3788 | EXPORT_SYMBOL_GPL(kvm_before_handle_nmi); | ||
3789 | |||
3790 | void kvm_after_handle_nmi(struct kvm_vcpu *vcpu) | ||
3791 | { | ||
3792 | percpu_write(current_vcpu, NULL); | ||
3793 | } | ||
3794 | EXPORT_SYMBOL_GPL(kvm_after_handle_nmi); | ||
3795 | |||
3746 | int kvm_arch_init(void *opaque) | 3796 | int kvm_arch_init(void *opaque) |
3747 | { | 3797 | { |
3748 | int r; | 3798 | int r; |
@@ -3779,6 +3829,8 @@ int kvm_arch_init(void *opaque) | |||
3779 | 3829 | ||
3780 | kvm_timer_init(); | 3830 | kvm_timer_init(); |
3781 | 3831 | ||
3832 | perf_register_guest_info_callbacks(&kvm_guest_cbs); | ||
3833 | |||
3782 | return 0; | 3834 | return 0; |
3783 | 3835 | ||
3784 | out: | 3836 | out: |
@@ -3787,6 +3839,8 @@ out: | |||
3787 | 3839 | ||
3788 | void kvm_arch_exit(void) | 3840 | void kvm_arch_exit(void) |
3789 | { | 3841 | { |
3842 | perf_unregister_guest_info_callbacks(&kvm_guest_cbs); | ||
3843 | |||
3790 | if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) | 3844 | if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) |
3791 | cpufreq_unregister_notifier(&kvmclock_cpufreq_notifier_block, | 3845 | cpufreq_unregister_notifier(&kvmclock_cpufreq_notifier_block, |
3792 | CPUFREQ_TRANSITION_NOTIFIER); | 3846 | CPUFREQ_TRANSITION_NOTIFIER); |
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index 2d101639bd8d..b7a404722d2b 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h | |||
@@ -65,4 +65,7 @@ static inline int is_paging(struct kvm_vcpu *vcpu) | |||
65 | return kvm_read_cr0_bits(vcpu, X86_CR0_PG); | 65 | return kvm_read_cr0_bits(vcpu, X86_CR0_PG); |
66 | } | 66 | } |
67 | 67 | ||
68 | void kvm_before_handle_nmi(struct kvm_vcpu *vcpu); | ||
69 | void kvm_after_handle_nmi(struct kvm_vcpu *vcpu); | ||
70 | |||
68 | #endif | 71 | #endif |
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index 419386c24b82..f871e04b6965 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile | |||
@@ -20,17 +20,18 @@ lib-y := delay.o | |||
20 | lib-y += thunk_$(BITS).o | 20 | lib-y += thunk_$(BITS).o |
21 | lib-y += usercopy_$(BITS).o getuser.o putuser.o | 21 | lib-y += usercopy_$(BITS).o getuser.o putuser.o |
22 | lib-y += memcpy_$(BITS).o | 22 | lib-y += memcpy_$(BITS).o |
23 | lib-$(CONFIG_KPROBES) += insn.o inat.o | 23 | lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o |
24 | 24 | ||
25 | obj-y += msr.o msr-reg.o msr-reg-export.o | 25 | obj-y += msr.o msr-reg.o msr-reg-export.o |
26 | 26 | ||
27 | ifeq ($(CONFIG_X86_32),y) | 27 | ifeq ($(CONFIG_X86_32),y) |
28 | obj-y += atomic64_32.o | 28 | obj-y += atomic64_32.o |
29 | lib-y += atomic64_cx8_32.o | ||
29 | lib-y += checksum_32.o | 30 | lib-y += checksum_32.o |
30 | lib-y += strstr_32.o | 31 | lib-y += strstr_32.o |
31 | lib-y += semaphore_32.o string_32.o | 32 | lib-y += semaphore_32.o string_32.o |
32 | ifneq ($(CONFIG_X86_CMPXCHG64),y) | 33 | ifneq ($(CONFIG_X86_CMPXCHG64),y) |
33 | lib-y += cmpxchg8b_emu.o | 34 | lib-y += cmpxchg8b_emu.o atomic64_386_32.o |
34 | endif | 35 | endif |
35 | lib-$(CONFIG_X86_USE_3DNOW) += mmx_32.o | 36 | lib-$(CONFIG_X86_USE_3DNOW) += mmx_32.o |
36 | else | 37 | else |
diff --git a/arch/x86/lib/atomic64_32.c b/arch/x86/lib/atomic64_32.c index 824fa0be55a3..540179e8e9fa 100644 --- a/arch/x86/lib/atomic64_32.c +++ b/arch/x86/lib/atomic64_32.c | |||
@@ -6,225 +6,54 @@ | |||
6 | #include <asm/cmpxchg.h> | 6 | #include <asm/cmpxchg.h> |
7 | #include <asm/atomic.h> | 7 | #include <asm/atomic.h> |
8 | 8 | ||
9 | static noinline u64 cmpxchg8b(u64 *ptr, u64 old, u64 new) | 9 | long long atomic64_read_cx8(long long, const atomic64_t *v); |
10 | { | 10 | EXPORT_SYMBOL(atomic64_read_cx8); |
11 | u32 low = new; | 11 | long long atomic64_set_cx8(long long, const atomic64_t *v); |
12 | u32 high = new >> 32; | 12 | EXPORT_SYMBOL(atomic64_set_cx8); |
13 | 13 | long long atomic64_xchg_cx8(long long, unsigned high); | |
14 | asm volatile( | 14 | EXPORT_SYMBOL(atomic64_xchg_cx8); |
15 | LOCK_PREFIX "cmpxchg8b %1\n" | 15 | long long atomic64_add_return_cx8(long long a, atomic64_t *v); |
16 | : "+A" (old), "+m" (*ptr) | 16 | EXPORT_SYMBOL(atomic64_add_return_cx8); |
17 | : "b" (low), "c" (high) | 17 | long long atomic64_sub_return_cx8(long long a, atomic64_t *v); |
18 | ); | 18 | EXPORT_SYMBOL(atomic64_sub_return_cx8); |
19 | return old; | 19 | long long atomic64_inc_return_cx8(long long a, atomic64_t *v); |
20 | } | 20 | EXPORT_SYMBOL(atomic64_inc_return_cx8); |
21 | 21 | long long atomic64_dec_return_cx8(long long a, atomic64_t *v); | |
22 | u64 atomic64_cmpxchg(atomic64_t *ptr, u64 old_val, u64 new_val) | 22 | EXPORT_SYMBOL(atomic64_dec_return_cx8); |
23 | { | 23 | long long atomic64_dec_if_positive_cx8(atomic64_t *v); |
24 | return cmpxchg8b(&ptr->counter, old_val, new_val); | 24 | EXPORT_SYMBOL(atomic64_dec_if_positive_cx8); |
25 | } | 25 | int atomic64_inc_not_zero_cx8(atomic64_t *v); |
26 | EXPORT_SYMBOL(atomic64_cmpxchg); | 26 | EXPORT_SYMBOL(atomic64_inc_not_zero_cx8); |
27 | 27 | int atomic64_add_unless_cx8(atomic64_t *v, long long a, long long u); | |
28 | /** | 28 | EXPORT_SYMBOL(atomic64_add_unless_cx8); |
29 | * atomic64_xchg - xchg atomic64 variable | 29 | |
30 | * @ptr: pointer to type atomic64_t | 30 | #ifndef CONFIG_X86_CMPXCHG64 |
31 | * @new_val: value to assign | 31 | long long atomic64_read_386(long long, const atomic64_t *v); |
32 | * | 32 | EXPORT_SYMBOL(atomic64_read_386); |
33 | * Atomically xchgs the value of @ptr to @new_val and returns | 33 | long long atomic64_set_386(long long, const atomic64_t *v); |
34 | * the old value. | 34 | EXPORT_SYMBOL(atomic64_set_386); |
35 | */ | 35 | long long atomic64_xchg_386(long long, unsigned high); |
36 | u64 atomic64_xchg(atomic64_t *ptr, u64 new_val) | 36 | EXPORT_SYMBOL(atomic64_xchg_386); |
37 | { | 37 | long long atomic64_add_return_386(long long a, atomic64_t *v); |
38 | /* | 38 | EXPORT_SYMBOL(atomic64_add_return_386); |
39 | * Try first with a (possibly incorrect) assumption about | 39 | long long atomic64_sub_return_386(long long a, atomic64_t *v); |
40 | * what we have there. We'll do two loops most likely, | 40 | EXPORT_SYMBOL(atomic64_sub_return_386); |
41 | * but we'll get an ownership MESI transaction straight away | 41 | long long atomic64_inc_return_386(long long a, atomic64_t *v); |
42 | * instead of a read transaction followed by a | 42 | EXPORT_SYMBOL(atomic64_inc_return_386); |
43 | * flush-for-ownership transaction: | 43 | long long atomic64_dec_return_386(long long a, atomic64_t *v); |
44 | */ | 44 | EXPORT_SYMBOL(atomic64_dec_return_386); |
45 | u64 old_val, real_val = 0; | 45 | long long atomic64_add_386(long long a, atomic64_t *v); |
46 | 46 | EXPORT_SYMBOL(atomic64_add_386); | |
47 | do { | 47 | long long atomic64_sub_386(long long a, atomic64_t *v); |
48 | old_val = real_val; | 48 | EXPORT_SYMBOL(atomic64_sub_386); |
49 | 49 | long long atomic64_inc_386(long long a, atomic64_t *v); | |
50 | real_val = atomic64_cmpxchg(ptr, old_val, new_val); | 50 | EXPORT_SYMBOL(atomic64_inc_386); |
51 | 51 | long long atomic64_dec_386(long long a, atomic64_t *v); | |
52 | } while (real_val != old_val); | 52 | EXPORT_SYMBOL(atomic64_dec_386); |
53 | 53 | long long atomic64_dec_if_positive_386(atomic64_t *v); | |
54 | return old_val; | 54 | EXPORT_SYMBOL(atomic64_dec_if_positive_386); |
55 | } | 55 | int atomic64_inc_not_zero_386(atomic64_t *v); |
56 | EXPORT_SYMBOL(atomic64_xchg); | 56 | EXPORT_SYMBOL(atomic64_inc_not_zero_386); |
57 | 57 | int atomic64_add_unless_386(atomic64_t *v, long long a, long long u); | |
58 | /** | 58 | EXPORT_SYMBOL(atomic64_add_unless_386); |
59 | * atomic64_set - set atomic64 variable | 59 | #endif |
60 | * @ptr: pointer to type atomic64_t | ||
61 | * @new_val: value to assign | ||
62 | * | ||
63 | * Atomically sets the value of @ptr to @new_val. | ||
64 | */ | ||
65 | void atomic64_set(atomic64_t *ptr, u64 new_val) | ||
66 | { | ||
67 | atomic64_xchg(ptr, new_val); | ||
68 | } | ||
69 | EXPORT_SYMBOL(atomic64_set); | ||
70 | |||
71 | /** | ||
72 | EXPORT_SYMBOL(atomic64_read); | ||
73 | * atomic64_add_return - add and return | ||
74 | * @delta: integer value to add | ||
75 | * @ptr: pointer to type atomic64_t | ||
76 | * | ||
77 | * Atomically adds @delta to @ptr and returns @delta + *@ptr | ||
78 | */ | ||
79 | noinline u64 atomic64_add_return(u64 delta, atomic64_t *ptr) | ||
80 | { | ||
81 | /* | ||
82 | * Try first with a (possibly incorrect) assumption about | ||
83 | * what we have there. We'll do two loops most likely, | ||
84 | * but we'll get an ownership MESI transaction straight away | ||
85 | * instead of a read transaction followed by a | ||
86 | * flush-for-ownership transaction: | ||
87 | */ | ||
88 | u64 old_val, new_val, real_val = 0; | ||
89 | |||
90 | do { | ||
91 | old_val = real_val; | ||
92 | new_val = old_val + delta; | ||
93 | |||
94 | real_val = atomic64_cmpxchg(ptr, old_val, new_val); | ||
95 | |||
96 | } while (real_val != old_val); | ||
97 | |||
98 | return new_val; | ||
99 | } | ||
100 | EXPORT_SYMBOL(atomic64_add_return); | ||
101 | |||
102 | u64 atomic64_sub_return(u64 delta, atomic64_t *ptr) | ||
103 | { | ||
104 | return atomic64_add_return(-delta, ptr); | ||
105 | } | ||
106 | EXPORT_SYMBOL(atomic64_sub_return); | ||
107 | |||
108 | u64 atomic64_inc_return(atomic64_t *ptr) | ||
109 | { | ||
110 | return atomic64_add_return(1, ptr); | ||
111 | } | ||
112 | EXPORT_SYMBOL(atomic64_inc_return); | ||
113 | |||
114 | u64 atomic64_dec_return(atomic64_t *ptr) | ||
115 | { | ||
116 | return atomic64_sub_return(1, ptr); | ||
117 | } | ||
118 | EXPORT_SYMBOL(atomic64_dec_return); | ||
119 | |||
120 | /** | ||
121 | * atomic64_add - add integer to atomic64 variable | ||
122 | * @delta: integer value to add | ||
123 | * @ptr: pointer to type atomic64_t | ||
124 | * | ||
125 | * Atomically adds @delta to @ptr. | ||
126 | */ | ||
127 | void atomic64_add(u64 delta, atomic64_t *ptr) | ||
128 | { | ||
129 | atomic64_add_return(delta, ptr); | ||
130 | } | ||
131 | EXPORT_SYMBOL(atomic64_add); | ||
132 | |||
133 | /** | ||
134 | * atomic64_sub - subtract the atomic64 variable | ||
135 | * @delta: integer value to subtract | ||
136 | * @ptr: pointer to type atomic64_t | ||
137 | * | ||
138 | * Atomically subtracts @delta from @ptr. | ||
139 | */ | ||
140 | void atomic64_sub(u64 delta, atomic64_t *ptr) | ||
141 | { | ||
142 | atomic64_add(-delta, ptr); | ||
143 | } | ||
144 | EXPORT_SYMBOL(atomic64_sub); | ||
145 | |||
146 | /** | ||
147 | * atomic64_sub_and_test - subtract value from variable and test result | ||
148 | * @delta: integer value to subtract | ||
149 | * @ptr: pointer to type atomic64_t | ||
150 | * | ||
151 | * Atomically subtracts @delta from @ptr and returns | ||
152 | * true if the result is zero, or false for all | ||
153 | * other cases. | ||
154 | */ | ||
155 | int atomic64_sub_and_test(u64 delta, atomic64_t *ptr) | ||
156 | { | ||
157 | u64 new_val = atomic64_sub_return(delta, ptr); | ||
158 | |||
159 | return new_val == 0; | ||
160 | } | ||
161 | EXPORT_SYMBOL(atomic64_sub_and_test); | ||
162 | |||
163 | /** | ||
164 | * atomic64_inc - increment atomic64 variable | ||
165 | * @ptr: pointer to type atomic64_t | ||
166 | * | ||
167 | * Atomically increments @ptr by 1. | ||
168 | */ | ||
169 | void atomic64_inc(atomic64_t *ptr) | ||
170 | { | ||
171 | atomic64_add(1, ptr); | ||
172 | } | ||
173 | EXPORT_SYMBOL(atomic64_inc); | ||
174 | |||
175 | /** | ||
176 | * atomic64_dec - decrement atomic64 variable | ||
177 | * @ptr: pointer to type atomic64_t | ||
178 | * | ||
179 | * Atomically decrements @ptr by 1. | ||
180 | */ | ||
181 | void atomic64_dec(atomic64_t *ptr) | ||
182 | { | ||
183 | atomic64_sub(1, ptr); | ||
184 | } | ||
185 | EXPORT_SYMBOL(atomic64_dec); | ||
186 | |||
187 | /** | ||
188 | * atomic64_dec_and_test - decrement and test | ||
189 | * @ptr: pointer to type atomic64_t | ||
190 | * | ||
191 | * Atomically decrements @ptr by 1 and | ||
192 | * returns true if the result is 0, or false for all other | ||
193 | * cases. | ||
194 | */ | ||
195 | int atomic64_dec_and_test(atomic64_t *ptr) | ||
196 | { | ||
197 | return atomic64_sub_and_test(1, ptr); | ||
198 | } | ||
199 | EXPORT_SYMBOL(atomic64_dec_and_test); | ||
200 | |||
201 | /** | ||
202 | * atomic64_inc_and_test - increment and test | ||
203 | * @ptr: pointer to type atomic64_t | ||
204 | * | ||
205 | * Atomically increments @ptr by 1 | ||
206 | * and returns true if the result is zero, or false for all | ||
207 | * other cases. | ||
208 | */ | ||
209 | int atomic64_inc_and_test(atomic64_t *ptr) | ||
210 | { | ||
211 | return atomic64_sub_and_test(-1, ptr); | ||
212 | } | ||
213 | EXPORT_SYMBOL(atomic64_inc_and_test); | ||
214 | |||
215 | /** | ||
216 | * atomic64_add_negative - add and test if negative | ||
217 | * @delta: integer value to add | ||
218 | * @ptr: pointer to type atomic64_t | ||
219 | * | ||
220 | * Atomically adds @delta to @ptr and returns true | ||
221 | * if the result is negative, or false when | ||
222 | * result is greater than or equal to zero. | ||
223 | */ | ||
224 | int atomic64_add_negative(u64 delta, atomic64_t *ptr) | ||
225 | { | ||
226 | s64 new_val = atomic64_add_return(delta, ptr); | ||
227 | |||
228 | return new_val < 0; | ||
229 | } | ||
230 | EXPORT_SYMBOL(atomic64_add_negative); | ||
diff --git a/arch/x86/lib/atomic64_386_32.S b/arch/x86/lib/atomic64_386_32.S new file mode 100644 index 000000000000..4a5979aa6883 --- /dev/null +++ b/arch/x86/lib/atomic64_386_32.S | |||
@@ -0,0 +1,174 @@ | |||
1 | /* | ||
2 | * atomic64_t for 386/486 | ||
3 | * | ||
4 | * Copyright © 2010 Luca Barbieri | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | */ | ||
11 | |||
12 | #include <linux/linkage.h> | ||
13 | #include <asm/alternative-asm.h> | ||
14 | #include <asm/dwarf2.h> | ||
15 | |||
16 | /* if you want SMP support, implement these with real spinlocks */ | ||
17 | .macro LOCK reg | ||
18 | pushfl | ||
19 | CFI_ADJUST_CFA_OFFSET 4 | ||
20 | cli | ||
21 | .endm | ||
22 | |||
23 | .macro UNLOCK reg | ||
24 | popfl | ||
25 | CFI_ADJUST_CFA_OFFSET -4 | ||
26 | .endm | ||
27 | |||
28 | .macro BEGIN func reg | ||
29 | $v = \reg | ||
30 | |||
31 | ENTRY(atomic64_\func\()_386) | ||
32 | CFI_STARTPROC | ||
33 | LOCK $v | ||
34 | |||
35 | .macro RETURN | ||
36 | UNLOCK $v | ||
37 | ret | ||
38 | .endm | ||
39 | |||
40 | .macro END_ | ||
41 | CFI_ENDPROC | ||
42 | ENDPROC(atomic64_\func\()_386) | ||
43 | .purgem RETURN | ||
44 | .purgem END_ | ||
45 | .purgem END | ||
46 | .endm | ||
47 | |||
48 | .macro END | ||
49 | RETURN | ||
50 | END_ | ||
51 | .endm | ||
52 | .endm | ||
53 | |||
54 | BEGIN read %ecx | ||
55 | movl ($v), %eax | ||
56 | movl 4($v), %edx | ||
57 | END | ||
58 | |||
59 | BEGIN set %esi | ||
60 | movl %ebx, ($v) | ||
61 | movl %ecx, 4($v) | ||
62 | END | ||
63 | |||
64 | BEGIN xchg %esi | ||
65 | movl ($v), %eax | ||
66 | movl 4($v), %edx | ||
67 | movl %ebx, ($v) | ||
68 | movl %ecx, 4($v) | ||
69 | END | ||
70 | |||
71 | BEGIN add %ecx | ||
72 | addl %eax, ($v) | ||
73 | adcl %edx, 4($v) | ||
74 | END | ||
75 | |||
76 | BEGIN add_return %ecx | ||
77 | addl ($v), %eax | ||
78 | adcl 4($v), %edx | ||
79 | movl %eax, ($v) | ||
80 | movl %edx, 4($v) | ||
81 | END | ||
82 | |||
83 | BEGIN sub %ecx | ||
84 | subl %eax, ($v) | ||
85 | sbbl %edx, 4($v) | ||
86 | END | ||
87 | |||
88 | BEGIN sub_return %ecx | ||
89 | negl %edx | ||
90 | negl %eax | ||
91 | sbbl $0, %edx | ||
92 | addl ($v), %eax | ||
93 | adcl 4($v), %edx | ||
94 | movl %eax, ($v) | ||
95 | movl %edx, 4($v) | ||
96 | END | ||
97 | |||
98 | BEGIN inc %esi | ||
99 | addl $1, ($v) | ||
100 | adcl $0, 4($v) | ||
101 | END | ||
102 | |||
103 | BEGIN inc_return %esi | ||
104 | movl ($v), %eax | ||
105 | movl 4($v), %edx | ||
106 | addl $1, %eax | ||
107 | adcl $0, %edx | ||
108 | movl %eax, ($v) | ||
109 | movl %edx, 4($v) | ||
110 | END | ||
111 | |||
112 | BEGIN dec %esi | ||
113 | subl $1, ($v) | ||
114 | sbbl $0, 4($v) | ||
115 | END | ||
116 | |||
117 | BEGIN dec_return %esi | ||
118 | movl ($v), %eax | ||
119 | movl 4($v), %edx | ||
120 | subl $1, %eax | ||
121 | sbbl $0, %edx | ||
122 | movl %eax, ($v) | ||
123 | movl %edx, 4($v) | ||
124 | END | ||
125 | |||
126 | BEGIN add_unless %ecx | ||
127 | addl %eax, %esi | ||
128 | adcl %edx, %edi | ||
129 | addl ($v), %eax | ||
130 | adcl 4($v), %edx | ||
131 | cmpl %eax, %esi | ||
132 | je 3f | ||
133 | 1: | ||
134 | movl %eax, ($v) | ||
135 | movl %edx, 4($v) | ||
136 | movl $1, %eax | ||
137 | 2: | ||
138 | RETURN | ||
139 | 3: | ||
140 | cmpl %edx, %edi | ||
141 | jne 1b | ||
142 | xorl %eax, %eax | ||
143 | jmp 2b | ||
144 | END_ | ||
145 | |||
146 | BEGIN inc_not_zero %esi | ||
147 | movl ($v), %eax | ||
148 | movl 4($v), %edx | ||
149 | testl %eax, %eax | ||
150 | je 3f | ||
151 | 1: | ||
152 | addl $1, %eax | ||
153 | adcl $0, %edx | ||
154 | movl %eax, ($v) | ||
155 | movl %edx, 4($v) | ||
156 | movl $1, %eax | ||
157 | 2: | ||
158 | RETURN | ||
159 | 3: | ||
160 | testl %edx, %edx | ||
161 | jne 1b | ||
162 | jmp 2b | ||
163 | END_ | ||
164 | |||
165 | BEGIN dec_if_positive %esi | ||
166 | movl ($v), %eax | ||
167 | movl 4($v), %edx | ||
168 | subl $1, %eax | ||
169 | sbbl $0, %edx | ||
170 | js 1f | ||
171 | movl %eax, ($v) | ||
172 | movl %edx, 4($v) | ||
173 | 1: | ||
174 | END | ||
diff --git a/arch/x86/lib/atomic64_cx8_32.S b/arch/x86/lib/atomic64_cx8_32.S new file mode 100644 index 000000000000..71e080de3352 --- /dev/null +++ b/arch/x86/lib/atomic64_cx8_32.S | |||
@@ -0,0 +1,224 @@ | |||
1 | /* | ||
2 | * atomic64_t for 586+ | ||
3 | * | ||
4 | * Copyright © 2010 Luca Barbieri | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | */ | ||
11 | |||
12 | #include <linux/linkage.h> | ||
13 | #include <asm/alternative-asm.h> | ||
14 | #include <asm/dwarf2.h> | ||
15 | |||
16 | .macro SAVE reg | ||
17 | pushl %\reg | ||
18 | CFI_ADJUST_CFA_OFFSET 4 | ||
19 | CFI_REL_OFFSET \reg, 0 | ||
20 | .endm | ||
21 | |||
22 | .macro RESTORE reg | ||
23 | popl %\reg | ||
24 | CFI_ADJUST_CFA_OFFSET -4 | ||
25 | CFI_RESTORE \reg | ||
26 | .endm | ||
27 | |||
28 | .macro read64 reg | ||
29 | movl %ebx, %eax | ||
30 | movl %ecx, %edx | ||
31 | /* we need LOCK_PREFIX since otherwise cmpxchg8b always does the write */ | ||
32 | LOCK_PREFIX | ||
33 | cmpxchg8b (\reg) | ||
34 | .endm | ||
35 | |||
36 | ENTRY(atomic64_read_cx8) | ||
37 | CFI_STARTPROC | ||
38 | |||
39 | read64 %ecx | ||
40 | ret | ||
41 | CFI_ENDPROC | ||
42 | ENDPROC(atomic64_read_cx8) | ||
43 | |||
44 | ENTRY(atomic64_set_cx8) | ||
45 | CFI_STARTPROC | ||
46 | |||
47 | 1: | ||
48 | /* we don't need LOCK_PREFIX since aligned 64-bit writes | ||
49 | * are atomic on 586 and newer */ | ||
50 | cmpxchg8b (%esi) | ||
51 | jne 1b | ||
52 | |||
53 | ret | ||
54 | CFI_ENDPROC | ||
55 | ENDPROC(atomic64_set_cx8) | ||
56 | |||
57 | ENTRY(atomic64_xchg_cx8) | ||
58 | CFI_STARTPROC | ||
59 | |||
60 | movl %ebx, %eax | ||
61 | movl %ecx, %edx | ||
62 | 1: | ||
63 | LOCK_PREFIX | ||
64 | cmpxchg8b (%esi) | ||
65 | jne 1b | ||
66 | |||
67 | ret | ||
68 | CFI_ENDPROC | ||
69 | ENDPROC(atomic64_xchg_cx8) | ||
70 | |||
71 | .macro addsub_return func ins insc | ||
72 | ENTRY(atomic64_\func\()_return_cx8) | ||
73 | CFI_STARTPROC | ||
74 | SAVE ebp | ||
75 | SAVE ebx | ||
76 | SAVE esi | ||
77 | SAVE edi | ||
78 | |||
79 | movl %eax, %esi | ||
80 | movl %edx, %edi | ||
81 | movl %ecx, %ebp | ||
82 | |||
83 | read64 %ebp | ||
84 | 1: | ||
85 | movl %eax, %ebx | ||
86 | movl %edx, %ecx | ||
87 | \ins\()l %esi, %ebx | ||
88 | \insc\()l %edi, %ecx | ||
89 | LOCK_PREFIX | ||
90 | cmpxchg8b (%ebp) | ||
91 | jne 1b | ||
92 | |||
93 | 10: | ||
94 | movl %ebx, %eax | ||
95 | movl %ecx, %edx | ||
96 | RESTORE edi | ||
97 | RESTORE esi | ||
98 | RESTORE ebx | ||
99 | RESTORE ebp | ||
100 | ret | ||
101 | CFI_ENDPROC | ||
102 | ENDPROC(atomic64_\func\()_return_cx8) | ||
103 | .endm | ||
104 | |||
105 | addsub_return add add adc | ||
106 | addsub_return sub sub sbb | ||
107 | |||
108 | .macro incdec_return func ins insc | ||
109 | ENTRY(atomic64_\func\()_return_cx8) | ||
110 | CFI_STARTPROC | ||
111 | SAVE ebx | ||
112 | |||
113 | read64 %esi | ||
114 | 1: | ||
115 | movl %eax, %ebx | ||
116 | movl %edx, %ecx | ||
117 | \ins\()l $1, %ebx | ||
118 | \insc\()l $0, %ecx | ||
119 | LOCK_PREFIX | ||
120 | cmpxchg8b (%esi) | ||
121 | jne 1b | ||
122 | |||
123 | 10: | ||
124 | movl %ebx, %eax | ||
125 | movl %ecx, %edx | ||
126 | RESTORE ebx | ||
127 | ret | ||
128 | CFI_ENDPROC | ||
129 | ENDPROC(atomic64_\func\()_return_cx8) | ||
130 | .endm | ||
131 | |||
132 | incdec_return inc add adc | ||
133 | incdec_return dec sub sbb | ||
134 | |||
135 | ENTRY(atomic64_dec_if_positive_cx8) | ||
136 | CFI_STARTPROC | ||
137 | SAVE ebx | ||
138 | |||
139 | read64 %esi | ||
140 | 1: | ||
141 | movl %eax, %ebx | ||
142 | movl %edx, %ecx | ||
143 | subl $1, %ebx | ||
144 | sbb $0, %ecx | ||
145 | js 2f | ||
146 | LOCK_PREFIX | ||
147 | cmpxchg8b (%esi) | ||
148 | jne 1b | ||
149 | |||
150 | 2: | ||
151 | movl %ebx, %eax | ||
152 | movl %ecx, %edx | ||
153 | RESTORE ebx | ||
154 | ret | ||
155 | CFI_ENDPROC | ||
156 | ENDPROC(atomic64_dec_if_positive_cx8) | ||
157 | |||
158 | ENTRY(atomic64_add_unless_cx8) | ||
159 | CFI_STARTPROC | ||
160 | SAVE ebp | ||
161 | SAVE ebx | ||
162 | /* these just push these two parameters on the stack */ | ||
163 | SAVE edi | ||
164 | SAVE esi | ||
165 | |||
166 | movl %ecx, %ebp | ||
167 | movl %eax, %esi | ||
168 | movl %edx, %edi | ||
169 | |||
170 | read64 %ebp | ||
171 | 1: | ||
172 | cmpl %eax, 0(%esp) | ||
173 | je 4f | ||
174 | 2: | ||
175 | movl %eax, %ebx | ||
176 | movl %edx, %ecx | ||
177 | addl %esi, %ebx | ||
178 | adcl %edi, %ecx | ||
179 | LOCK_PREFIX | ||
180 | cmpxchg8b (%ebp) | ||
181 | jne 1b | ||
182 | |||
183 | movl $1, %eax | ||
184 | 3: | ||
185 | addl $8, %esp | ||
186 | CFI_ADJUST_CFA_OFFSET -8 | ||
187 | RESTORE ebx | ||
188 | RESTORE ebp | ||
189 | ret | ||
190 | 4: | ||
191 | cmpl %edx, 4(%esp) | ||
192 | jne 2b | ||
193 | xorl %eax, %eax | ||
194 | jmp 3b | ||
195 | CFI_ENDPROC | ||
196 | ENDPROC(atomic64_add_unless_cx8) | ||
197 | |||
198 | ENTRY(atomic64_inc_not_zero_cx8) | ||
199 | CFI_STARTPROC | ||
200 | SAVE ebx | ||
201 | |||
202 | read64 %esi | ||
203 | 1: | ||
204 | testl %eax, %eax | ||
205 | je 4f | ||
206 | 2: | ||
207 | movl %eax, %ebx | ||
208 | movl %edx, %ecx | ||
209 | addl $1, %ebx | ||
210 | adcl $0, %ecx | ||
211 | LOCK_PREFIX | ||
212 | cmpxchg8b (%esi) | ||
213 | jne 1b | ||
214 | |||
215 | movl $1, %eax | ||
216 | 3: | ||
217 | RESTORE ebx | ||
218 | ret | ||
219 | 4: | ||
220 | testl %edx, %edx | ||
221 | jne 2b | ||
222 | jmp 3b | ||
223 | CFI_ENDPROC | ||
224 | ENDPROC(atomic64_inc_not_zero_cx8) | ||
diff --git a/arch/x86/math-emu/fpu_aux.c b/arch/x86/math-emu/fpu_aux.c index aa0987088774..dc8adad10a2f 100644 --- a/arch/x86/math-emu/fpu_aux.c +++ b/arch/x86/math-emu/fpu_aux.c | |||
@@ -30,10 +30,10 @@ static void fclex(void) | |||
30 | } | 30 | } |
31 | 31 | ||
32 | /* Needs to be externally visible */ | 32 | /* Needs to be externally visible */ |
33 | void finit_task(struct task_struct *tsk) | 33 | void finit_soft_fpu(struct i387_soft_struct *soft) |
34 | { | 34 | { |
35 | struct i387_soft_struct *soft = &tsk->thread.xstate->soft; | ||
36 | struct address *oaddr, *iaddr; | 35 | struct address *oaddr, *iaddr; |
36 | memset(soft, 0, sizeof(*soft)); | ||
37 | soft->cwd = 0x037f; | 37 | soft->cwd = 0x037f; |
38 | soft->swd = 0; | 38 | soft->swd = 0; |
39 | soft->ftop = 0; /* We don't keep top in the status word internally. */ | 39 | soft->ftop = 0; /* We don't keep top in the status word internally. */ |
@@ -52,7 +52,7 @@ void finit_task(struct task_struct *tsk) | |||
52 | 52 | ||
53 | void finit(void) | 53 | void finit(void) |
54 | { | 54 | { |
55 | finit_task(current); | 55 | finit_soft_fpu(¤t->thread.fpu.state->soft); |
56 | } | 56 | } |
57 | 57 | ||
58 | /* | 58 | /* |
diff --git a/arch/x86/math-emu/fpu_entry.c b/arch/x86/math-emu/fpu_entry.c index 5d87f586f8d7..7718541541d4 100644 --- a/arch/x86/math-emu/fpu_entry.c +++ b/arch/x86/math-emu/fpu_entry.c | |||
@@ -681,7 +681,7 @@ int fpregs_soft_set(struct task_struct *target, | |||
681 | unsigned int pos, unsigned int count, | 681 | unsigned int pos, unsigned int count, |
682 | const void *kbuf, const void __user *ubuf) | 682 | const void *kbuf, const void __user *ubuf) |
683 | { | 683 | { |
684 | struct i387_soft_struct *s387 = &target->thread.xstate->soft; | 684 | struct i387_soft_struct *s387 = &target->thread.fpu.state->soft; |
685 | void *space = s387->st_space; | 685 | void *space = s387->st_space; |
686 | int ret; | 686 | int ret; |
687 | int offset, other, i, tags, regnr, tag, newtop; | 687 | int offset, other, i, tags, regnr, tag, newtop; |
@@ -733,7 +733,7 @@ int fpregs_soft_get(struct task_struct *target, | |||
733 | unsigned int pos, unsigned int count, | 733 | unsigned int pos, unsigned int count, |
734 | void *kbuf, void __user *ubuf) | 734 | void *kbuf, void __user *ubuf) |
735 | { | 735 | { |
736 | struct i387_soft_struct *s387 = &target->thread.xstate->soft; | 736 | struct i387_soft_struct *s387 = &target->thread.fpu.state->soft; |
737 | const void *space = s387->st_space; | 737 | const void *space = s387->st_space; |
738 | int ret; | 738 | int ret; |
739 | int offset = (S387->ftop & 7) * 10, other = 80 - offset; | 739 | int offset = (S387->ftop & 7) * 10, other = 80 - offset; |
diff --git a/arch/x86/math-emu/fpu_system.h b/arch/x86/math-emu/fpu_system.h index 50fa0ec2c8a5..2c614410a5f3 100644 --- a/arch/x86/math-emu/fpu_system.h +++ b/arch/x86/math-emu/fpu_system.h | |||
@@ -31,7 +31,7 @@ | |||
31 | #define SEG_EXPAND_DOWN(s) (((s).b & ((1 << 11) | (1 << 10))) \ | 31 | #define SEG_EXPAND_DOWN(s) (((s).b & ((1 << 11) | (1 << 10))) \ |
32 | == (1 << 10)) | 32 | == (1 << 10)) |
33 | 33 | ||
34 | #define I387 (current->thread.xstate) | 34 | #define I387 (current->thread.fpu.state) |
35 | #define FPU_info (I387->soft.info) | 35 | #define FPU_info (I387->soft.info) |
36 | 36 | ||
37 | #define FPU_CS (*(unsigned short *) &(FPU_info->regs->cs)) | 37 | #define FPU_CS (*(unsigned short *) &(FPU_info->regs->cs)) |
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index 06630d26e56d..a4c768397baa 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile | |||
@@ -6,6 +6,7 @@ nostackp := $(call cc-option, -fno-stack-protector) | |||
6 | CFLAGS_physaddr.o := $(nostackp) | 6 | CFLAGS_physaddr.o := $(nostackp) |
7 | CFLAGS_setup_nx.o := $(nostackp) | 7 | CFLAGS_setup_nx.o := $(nostackp) |
8 | 8 | ||
9 | obj-$(CONFIG_X86_PAT) += pat_rbtree.o | ||
9 | obj-$(CONFIG_SMP) += tlb.o | 10 | obj-$(CONFIG_SMP) += tlb.o |
10 | 11 | ||
11 | obj-$(CONFIG_X86_32) += pgtable_32.o iomap_32.o | 12 | obj-$(CONFIG_X86_32) += pgtable_32.o iomap_32.o |
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index edc8b95afc1a..bbe5502ee1cb 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c | |||
@@ -30,6 +30,8 @@ | |||
30 | #include <asm/pat.h> | 30 | #include <asm/pat.h> |
31 | #include <asm/io.h> | 31 | #include <asm/io.h> |
32 | 32 | ||
33 | #include "pat_internal.h" | ||
34 | |||
33 | #ifdef CONFIG_X86_PAT | 35 | #ifdef CONFIG_X86_PAT |
34 | int __read_mostly pat_enabled = 1; | 36 | int __read_mostly pat_enabled = 1; |
35 | 37 | ||
@@ -53,19 +55,15 @@ static inline void pat_disable(const char *reason) | |||
53 | #endif | 55 | #endif |
54 | 56 | ||
55 | 57 | ||
56 | static int debug_enable; | 58 | int pat_debug_enable; |
57 | 59 | ||
58 | static int __init pat_debug_setup(char *str) | 60 | static int __init pat_debug_setup(char *str) |
59 | { | 61 | { |
60 | debug_enable = 1; | 62 | pat_debug_enable = 1; |
61 | return 0; | 63 | return 0; |
62 | } | 64 | } |
63 | __setup("debugpat", pat_debug_setup); | 65 | __setup("debugpat", pat_debug_setup); |
64 | 66 | ||
65 | #define dprintk(fmt, arg...) \ | ||
66 | do { if (debug_enable) printk(KERN_INFO fmt, ##arg); } while (0) | ||
67 | |||
68 | |||
69 | static u64 __read_mostly boot_pat_state; | 67 | static u64 __read_mostly boot_pat_state; |
70 | 68 | ||
71 | enum { | 69 | enum { |
@@ -132,84 +130,7 @@ void pat_init(void) | |||
132 | 130 | ||
133 | #undef PAT | 131 | #undef PAT |
134 | 132 | ||
135 | static char *cattr_name(unsigned long flags) | 133 | static DEFINE_SPINLOCK(memtype_lock); /* protects memtype accesses */ |
136 | { | ||
137 | switch (flags & _PAGE_CACHE_MASK) { | ||
138 | case _PAGE_CACHE_UC: return "uncached"; | ||
139 | case _PAGE_CACHE_UC_MINUS: return "uncached-minus"; | ||
140 | case _PAGE_CACHE_WB: return "write-back"; | ||
141 | case _PAGE_CACHE_WC: return "write-combining"; | ||
142 | default: return "broken"; | ||
143 | } | ||
144 | } | ||
145 | |||
146 | /* | ||
147 | * The global memtype list keeps track of memory type for specific | ||
148 | * physical memory areas. Conflicting memory types in different | ||
149 | * mappings can cause CPU cache corruption. To avoid this we keep track. | ||
150 | * | ||
151 | * The list is sorted based on starting address and can contain multiple | ||
152 | * entries for each address (this allows reference counting for overlapping | ||
153 | * areas). All the aliases have the same cache attributes of course. | ||
154 | * Zero attributes are represented as holes. | ||
155 | * | ||
156 | * The data structure is a list that is also organized as an rbtree | ||
157 | * sorted on the start address of memtype range. | ||
158 | * | ||
159 | * memtype_lock protects both the linear list and rbtree. | ||
160 | */ | ||
161 | |||
162 | struct memtype { | ||
163 | u64 start; | ||
164 | u64 end; | ||
165 | unsigned long type; | ||
166 | struct list_head nd; | ||
167 | struct rb_node rb; | ||
168 | }; | ||
169 | |||
170 | static struct rb_root memtype_rbroot = RB_ROOT; | ||
171 | static LIST_HEAD(memtype_list); | ||
172 | static DEFINE_SPINLOCK(memtype_lock); /* protects memtype list */ | ||
173 | |||
174 | static struct memtype *memtype_rb_search(struct rb_root *root, u64 start) | ||
175 | { | ||
176 | struct rb_node *node = root->rb_node; | ||
177 | struct memtype *last_lower = NULL; | ||
178 | |||
179 | while (node) { | ||
180 | struct memtype *data = container_of(node, struct memtype, rb); | ||
181 | |||
182 | if (data->start < start) { | ||
183 | last_lower = data; | ||
184 | node = node->rb_right; | ||
185 | } else if (data->start > start) { | ||
186 | node = node->rb_left; | ||
187 | } else | ||
188 | return data; | ||
189 | } | ||
190 | |||
191 | /* Will return NULL if there is no entry with its start <= start */ | ||
192 | return last_lower; | ||
193 | } | ||
194 | |||
195 | static void memtype_rb_insert(struct rb_root *root, struct memtype *data) | ||
196 | { | ||
197 | struct rb_node **new = &(root->rb_node); | ||
198 | struct rb_node *parent = NULL; | ||
199 | |||
200 | while (*new) { | ||
201 | struct memtype *this = container_of(*new, struct memtype, rb); | ||
202 | |||
203 | parent = *new; | ||
204 | if (data->start <= this->start) | ||
205 | new = &((*new)->rb_left); | ||
206 | else if (data->start > this->start) | ||
207 | new = &((*new)->rb_right); | ||
208 | } | ||
209 | |||
210 | rb_link_node(&data->rb, parent, new); | ||
211 | rb_insert_color(&data->rb, root); | ||
212 | } | ||
213 | 134 | ||
214 | /* | 135 | /* |
215 | * Does intersection of PAT memory type and MTRR memory type and returns | 136 | * Does intersection of PAT memory type and MTRR memory type and returns |
@@ -237,33 +158,6 @@ static unsigned long pat_x_mtrr_type(u64 start, u64 end, unsigned long req_type) | |||
237 | return req_type; | 158 | return req_type; |
238 | } | 159 | } |
239 | 160 | ||
240 | static int | ||
241 | chk_conflict(struct memtype *new, struct memtype *entry, unsigned long *type) | ||
242 | { | ||
243 | if (new->type != entry->type) { | ||
244 | if (type) { | ||
245 | new->type = entry->type; | ||
246 | *type = entry->type; | ||
247 | } else | ||
248 | goto conflict; | ||
249 | } | ||
250 | |||
251 | /* check overlaps with more than one entry in the list */ | ||
252 | list_for_each_entry_continue(entry, &memtype_list, nd) { | ||
253 | if (new->end <= entry->start) | ||
254 | break; | ||
255 | else if (new->type != entry->type) | ||
256 | goto conflict; | ||
257 | } | ||
258 | return 0; | ||
259 | |||
260 | conflict: | ||
261 | printk(KERN_INFO "%s:%d conflicting memory types " | ||
262 | "%Lx-%Lx %s<->%s\n", current->comm, current->pid, new->start, | ||
263 | new->end, cattr_name(new->type), cattr_name(entry->type)); | ||
264 | return -EBUSY; | ||
265 | } | ||
266 | |||
267 | static int pat_pagerange_is_ram(unsigned long start, unsigned long end) | 161 | static int pat_pagerange_is_ram(unsigned long start, unsigned long end) |
268 | { | 162 | { |
269 | int ram_page = 0, not_rampage = 0; | 163 | int ram_page = 0, not_rampage = 0; |
@@ -296,8 +190,6 @@ static int pat_pagerange_is_ram(unsigned long start, unsigned long end) | |||
296 | * Here we do two pass: | 190 | * Here we do two pass: |
297 | * - Find the memtype of all the pages in the range, look for any conflicts | 191 | * - Find the memtype of all the pages in the range, look for any conflicts |
298 | * - In case of no conflicts, set the new memtype for pages in the range | 192 | * - In case of no conflicts, set the new memtype for pages in the range |
299 | * | ||
300 | * Caller must hold memtype_lock for atomicity. | ||
301 | */ | 193 | */ |
302 | static int reserve_ram_pages_type(u64 start, u64 end, unsigned long req_type, | 194 | static int reserve_ram_pages_type(u64 start, u64 end, unsigned long req_type, |
303 | unsigned long *new_type) | 195 | unsigned long *new_type) |
@@ -364,9 +256,8 @@ static int free_ram_pages_type(u64 start, u64 end) | |||
364 | int reserve_memtype(u64 start, u64 end, unsigned long req_type, | 256 | int reserve_memtype(u64 start, u64 end, unsigned long req_type, |
365 | unsigned long *new_type) | 257 | unsigned long *new_type) |
366 | { | 258 | { |
367 | struct memtype *new, *entry; | 259 | struct memtype *new; |
368 | unsigned long actual_type; | 260 | unsigned long actual_type; |
369 | struct list_head *where; | ||
370 | int is_range_ram; | 261 | int is_range_ram; |
371 | int err = 0; | 262 | int err = 0; |
372 | 263 | ||
@@ -404,9 +295,7 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, | |||
404 | is_range_ram = pat_pagerange_is_ram(start, end); | 295 | is_range_ram = pat_pagerange_is_ram(start, end); |
405 | if (is_range_ram == 1) { | 296 | if (is_range_ram == 1) { |
406 | 297 | ||
407 | spin_lock(&memtype_lock); | ||
408 | err = reserve_ram_pages_type(start, end, req_type, new_type); | 298 | err = reserve_ram_pages_type(start, end, req_type, new_type); |
409 | spin_unlock(&memtype_lock); | ||
410 | 299 | ||
411 | return err; | 300 | return err; |
412 | } else if (is_range_ram < 0) { | 301 | } else if (is_range_ram < 0) { |
@@ -423,42 +312,7 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, | |||
423 | 312 | ||
424 | spin_lock(&memtype_lock); | 313 | spin_lock(&memtype_lock); |
425 | 314 | ||
426 | /* Search for existing mapping that overlaps the current range */ | 315 | err = rbt_memtype_check_insert(new, new_type); |
427 | where = NULL; | ||
428 | list_for_each_entry(entry, &memtype_list, nd) { | ||
429 | if (end <= entry->start) { | ||
430 | where = entry->nd.prev; | ||
431 | break; | ||
432 | } else if (start <= entry->start) { /* end > entry->start */ | ||
433 | err = chk_conflict(new, entry, new_type); | ||
434 | if (!err) { | ||
435 | dprintk("Overlap at 0x%Lx-0x%Lx\n", | ||
436 | entry->start, entry->end); | ||
437 | where = entry->nd.prev; | ||
438 | } | ||
439 | break; | ||
440 | } else if (start < entry->end) { /* start > entry->start */ | ||
441 | err = chk_conflict(new, entry, new_type); | ||
442 | if (!err) { | ||
443 | dprintk("Overlap at 0x%Lx-0x%Lx\n", | ||
444 | entry->start, entry->end); | ||
445 | |||
446 | /* | ||
447 | * Move to right position in the linked | ||
448 | * list to add this new entry | ||
449 | */ | ||
450 | list_for_each_entry_continue(entry, | ||
451 | &memtype_list, nd) { | ||
452 | if (start <= entry->start) { | ||
453 | where = entry->nd.prev; | ||
454 | break; | ||
455 | } | ||
456 | } | ||
457 | } | ||
458 | break; | ||
459 | } | ||
460 | } | ||
461 | |||
462 | if (err) { | 316 | if (err) { |
463 | printk(KERN_INFO "reserve_memtype failed 0x%Lx-0x%Lx, " | 317 | printk(KERN_INFO "reserve_memtype failed 0x%Lx-0x%Lx, " |
464 | "track %s, req %s\n", | 318 | "track %s, req %s\n", |
@@ -469,13 +323,6 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, | |||
469 | return err; | 323 | return err; |
470 | } | 324 | } |
471 | 325 | ||
472 | if (where) | ||
473 | list_add(&new->nd, where); | ||
474 | else | ||
475 | list_add_tail(&new->nd, &memtype_list); | ||
476 | |||
477 | memtype_rb_insert(&memtype_rbroot, new); | ||
478 | |||
479 | spin_unlock(&memtype_lock); | 326 | spin_unlock(&memtype_lock); |
480 | 327 | ||
481 | dprintk("reserve_memtype added 0x%Lx-0x%Lx, track %s, req %s, ret %s\n", | 328 | dprintk("reserve_memtype added 0x%Lx-0x%Lx, track %s, req %s, ret %s\n", |
@@ -487,7 +334,6 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, | |||
487 | 334 | ||
488 | int free_memtype(u64 start, u64 end) | 335 | int free_memtype(u64 start, u64 end) |
489 | { | 336 | { |
490 | struct memtype *entry, *saved_entry; | ||
491 | int err = -EINVAL; | 337 | int err = -EINVAL; |
492 | int is_range_ram; | 338 | int is_range_ram; |
493 | 339 | ||
@@ -501,9 +347,7 @@ int free_memtype(u64 start, u64 end) | |||
501 | is_range_ram = pat_pagerange_is_ram(start, end); | 347 | is_range_ram = pat_pagerange_is_ram(start, end); |
502 | if (is_range_ram == 1) { | 348 | if (is_range_ram == 1) { |
503 | 349 | ||
504 | spin_lock(&memtype_lock); | ||
505 | err = free_ram_pages_type(start, end); | 350 | err = free_ram_pages_type(start, end); |
506 | spin_unlock(&memtype_lock); | ||
507 | 351 | ||
508 | return err; | 352 | return err; |
509 | } else if (is_range_ram < 0) { | 353 | } else if (is_range_ram < 0) { |
@@ -511,46 +355,7 @@ int free_memtype(u64 start, u64 end) | |||
511 | } | 355 | } |
512 | 356 | ||
513 | spin_lock(&memtype_lock); | 357 | spin_lock(&memtype_lock); |
514 | 358 | err = rbt_memtype_erase(start, end); | |
515 | entry = memtype_rb_search(&memtype_rbroot, start); | ||
516 | if (unlikely(entry == NULL)) | ||
517 | goto unlock_ret; | ||
518 | |||
519 | /* | ||
520 | * Saved entry points to an entry with start same or less than what | ||
521 | * we searched for. Now go through the list in both directions to look | ||
522 | * for the entry that matches with both start and end, with list stored | ||
523 | * in sorted start address | ||
524 | */ | ||
525 | saved_entry = entry; | ||
526 | list_for_each_entry_from(entry, &memtype_list, nd) { | ||
527 | if (entry->start == start && entry->end == end) { | ||
528 | rb_erase(&entry->rb, &memtype_rbroot); | ||
529 | list_del(&entry->nd); | ||
530 | kfree(entry); | ||
531 | err = 0; | ||
532 | break; | ||
533 | } else if (entry->start > start) { | ||
534 | break; | ||
535 | } | ||
536 | } | ||
537 | |||
538 | if (!err) | ||
539 | goto unlock_ret; | ||
540 | |||
541 | entry = saved_entry; | ||
542 | list_for_each_entry_reverse(entry, &memtype_list, nd) { | ||
543 | if (entry->start == start && entry->end == end) { | ||
544 | rb_erase(&entry->rb, &memtype_rbroot); | ||
545 | list_del(&entry->nd); | ||
546 | kfree(entry); | ||
547 | err = 0; | ||
548 | break; | ||
549 | } else if (entry->start < start) { | ||
550 | break; | ||
551 | } | ||
552 | } | ||
553 | unlock_ret: | ||
554 | spin_unlock(&memtype_lock); | 359 | spin_unlock(&memtype_lock); |
555 | 360 | ||
556 | if (err) { | 361 | if (err) { |
@@ -583,10 +388,8 @@ static unsigned long lookup_memtype(u64 paddr) | |||
583 | 388 | ||
584 | if (pat_pagerange_is_ram(paddr, paddr + PAGE_SIZE)) { | 389 | if (pat_pagerange_is_ram(paddr, paddr + PAGE_SIZE)) { |
585 | struct page *page; | 390 | struct page *page; |
586 | spin_lock(&memtype_lock); | ||
587 | page = pfn_to_page(paddr >> PAGE_SHIFT); | 391 | page = pfn_to_page(paddr >> PAGE_SHIFT); |
588 | rettype = get_page_memtype(page); | 392 | rettype = get_page_memtype(page); |
589 | spin_unlock(&memtype_lock); | ||
590 | /* | 393 | /* |
591 | * -1 from get_page_memtype() implies RAM page is in its | 394 | * -1 from get_page_memtype() implies RAM page is in its |
592 | * default state and not reserved, and hence of type WB | 395 | * default state and not reserved, and hence of type WB |
@@ -599,7 +402,7 @@ static unsigned long lookup_memtype(u64 paddr) | |||
599 | 402 | ||
600 | spin_lock(&memtype_lock); | 403 | spin_lock(&memtype_lock); |
601 | 404 | ||
602 | entry = memtype_rb_search(&memtype_rbroot, paddr); | 405 | entry = rbt_memtype_lookup(paddr); |
603 | if (entry != NULL) | 406 | if (entry != NULL) |
604 | rettype = entry->type; | 407 | rettype = entry->type; |
605 | else | 408 | else |
@@ -936,29 +739,25 @@ EXPORT_SYMBOL_GPL(pgprot_writecombine); | |||
936 | 739 | ||
937 | #if defined(CONFIG_DEBUG_FS) && defined(CONFIG_X86_PAT) | 740 | #if defined(CONFIG_DEBUG_FS) && defined(CONFIG_X86_PAT) |
938 | 741 | ||
939 | /* get Nth element of the linked list */ | ||
940 | static struct memtype *memtype_get_idx(loff_t pos) | 742 | static struct memtype *memtype_get_idx(loff_t pos) |
941 | { | 743 | { |
942 | struct memtype *list_node, *print_entry; | 744 | struct memtype *print_entry; |
943 | int i = 1; | 745 | int ret; |
944 | 746 | ||
945 | print_entry = kmalloc(sizeof(struct memtype), GFP_KERNEL); | 747 | print_entry = kzalloc(sizeof(struct memtype), GFP_KERNEL); |
946 | if (!print_entry) | 748 | if (!print_entry) |
947 | return NULL; | 749 | return NULL; |
948 | 750 | ||
949 | spin_lock(&memtype_lock); | 751 | spin_lock(&memtype_lock); |
950 | list_for_each_entry(list_node, &memtype_list, nd) { | 752 | ret = rbt_memtype_copy_nth_element(print_entry, pos); |
951 | if (pos == i) { | ||
952 | *print_entry = *list_node; | ||
953 | spin_unlock(&memtype_lock); | ||
954 | return print_entry; | ||
955 | } | ||
956 | ++i; | ||
957 | } | ||
958 | spin_unlock(&memtype_lock); | 753 | spin_unlock(&memtype_lock); |
959 | kfree(print_entry); | ||
960 | 754 | ||
961 | return NULL; | 755 | if (!ret) { |
756 | return print_entry; | ||
757 | } else { | ||
758 | kfree(print_entry); | ||
759 | return NULL; | ||
760 | } | ||
962 | } | 761 | } |
963 | 762 | ||
964 | static void *memtype_seq_start(struct seq_file *seq, loff_t *pos) | 763 | static void *memtype_seq_start(struct seq_file *seq, loff_t *pos) |
diff --git a/arch/x86/mm/pat_internal.h b/arch/x86/mm/pat_internal.h new file mode 100644 index 000000000000..4f39eefa3e61 --- /dev/null +++ b/arch/x86/mm/pat_internal.h | |||
@@ -0,0 +1,46 @@ | |||
1 | #ifndef __PAT_INTERNAL_H_ | ||
2 | #define __PAT_INTERNAL_H_ | ||
3 | |||
4 | extern int pat_debug_enable; | ||
5 | |||
6 | #define dprintk(fmt, arg...) \ | ||
7 | do { if (pat_debug_enable) printk(KERN_INFO fmt, ##arg); } while (0) | ||
8 | |||
9 | struct memtype { | ||
10 | u64 start; | ||
11 | u64 end; | ||
12 | u64 subtree_max_end; | ||
13 | unsigned long type; | ||
14 | struct rb_node rb; | ||
15 | }; | ||
16 | |||
17 | static inline char *cattr_name(unsigned long flags) | ||
18 | { | ||
19 | switch (flags & _PAGE_CACHE_MASK) { | ||
20 | case _PAGE_CACHE_UC: return "uncached"; | ||
21 | case _PAGE_CACHE_UC_MINUS: return "uncached-minus"; | ||
22 | case _PAGE_CACHE_WB: return "write-back"; | ||
23 | case _PAGE_CACHE_WC: return "write-combining"; | ||
24 | default: return "broken"; | ||
25 | } | ||
26 | } | ||
27 | |||
28 | #ifdef CONFIG_X86_PAT | ||
29 | extern int rbt_memtype_check_insert(struct memtype *new, | ||
30 | unsigned long *new_type); | ||
31 | extern int rbt_memtype_erase(u64 start, u64 end); | ||
32 | extern struct memtype *rbt_memtype_lookup(u64 addr); | ||
33 | extern int rbt_memtype_copy_nth_element(struct memtype *out, loff_t pos); | ||
34 | #else | ||
35 | static inline int rbt_memtype_check_insert(struct memtype *new, | ||
36 | unsigned long *new_type) | ||
37 | { return 0; } | ||
38 | static inline int rbt_memtype_erase(u64 start, u64 end) | ||
39 | { return 0; } | ||
40 | static inline struct memtype *rbt_memtype_lookup(u64 addr) | ||
41 | { return NULL; } | ||
42 | static inline int rbt_memtype_copy_nth_element(struct memtype *out, loff_t pos) | ||
43 | { return 0; } | ||
44 | #endif | ||
45 | |||
46 | #endif /* __PAT_INTERNAL_H_ */ | ||
diff --git a/arch/x86/mm/pat_rbtree.c b/arch/x86/mm/pat_rbtree.c new file mode 100644 index 000000000000..07de4cb8cc30 --- /dev/null +++ b/arch/x86/mm/pat_rbtree.c | |||
@@ -0,0 +1,273 @@ | |||
1 | /* | ||
2 | * Handle caching attributes in page tables (PAT) | ||
3 | * | ||
4 | * Authors: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com> | ||
5 | * Suresh B Siddha <suresh.b.siddha@intel.com> | ||
6 | * | ||
7 | * Interval tree (augmented rbtree) used to store the PAT memory type | ||
8 | * reservations. | ||
9 | */ | ||
10 | |||
11 | #include <linux/seq_file.h> | ||
12 | #include <linux/debugfs.h> | ||
13 | #include <linux/kernel.h> | ||
14 | #include <linux/module.h> | ||
15 | #include <linux/rbtree.h> | ||
16 | #include <linux/sched.h> | ||
17 | #include <linux/gfp.h> | ||
18 | |||
19 | #include <asm/pgtable.h> | ||
20 | #include <asm/pat.h> | ||
21 | |||
22 | #include "pat_internal.h" | ||
23 | |||
24 | /* | ||
25 | * The memtype tree keeps track of memory type for specific | ||
26 | * physical memory areas. Without proper tracking, conflicting memory | ||
27 | * types in different mappings can cause CPU cache corruption. | ||
28 | * | ||
29 | * The tree is an interval tree (augmented rbtree) with tree ordered | ||
30 | * on starting address. Tree can contain multiple entries for | ||
31 | * different regions which overlap. All the aliases have the same | ||
32 | * cache attributes of course. | ||
33 | * | ||
34 | * memtype_lock protects the rbtree. | ||
35 | */ | ||
36 | |||
37 | static void memtype_rb_augment_cb(struct rb_node *node); | ||
38 | static struct rb_root memtype_rbroot = RB_AUGMENT_ROOT(&memtype_rb_augment_cb); | ||
39 | |||
40 | static int is_node_overlap(struct memtype *node, u64 start, u64 end) | ||
41 | { | ||
42 | if (node->start >= end || node->end <= start) | ||
43 | return 0; | ||
44 | |||
45 | return 1; | ||
46 | } | ||
47 | |||
48 | static u64 get_subtree_max_end(struct rb_node *node) | ||
49 | { | ||
50 | u64 ret = 0; | ||
51 | if (node) { | ||
52 | struct memtype *data = container_of(node, struct memtype, rb); | ||
53 | ret = data->subtree_max_end; | ||
54 | } | ||
55 | return ret; | ||
56 | } | ||
57 | |||
58 | /* Update 'subtree_max_end' for a node, based on node and its children */ | ||
59 | static void update_node_max_end(struct rb_node *node) | ||
60 | { | ||
61 | struct memtype *data; | ||
62 | u64 max_end, child_max_end; | ||
63 | |||
64 | if (!node) | ||
65 | return; | ||
66 | |||
67 | data = container_of(node, struct memtype, rb); | ||
68 | max_end = data->end; | ||
69 | |||
70 | child_max_end = get_subtree_max_end(node->rb_right); | ||
71 | if (child_max_end > max_end) | ||
72 | max_end = child_max_end; | ||
73 | |||
74 | child_max_end = get_subtree_max_end(node->rb_left); | ||
75 | if (child_max_end > max_end) | ||
76 | max_end = child_max_end; | ||
77 | |||
78 | data->subtree_max_end = max_end; | ||
79 | } | ||
80 | |||
81 | /* Update 'subtree_max_end' for a node and all its ancestors */ | ||
82 | static void update_path_max_end(struct rb_node *node) | ||
83 | { | ||
84 | u64 old_max_end, new_max_end; | ||
85 | |||
86 | while (node) { | ||
87 | struct memtype *data = container_of(node, struct memtype, rb); | ||
88 | |||
89 | old_max_end = data->subtree_max_end; | ||
90 | update_node_max_end(node); | ||
91 | new_max_end = data->subtree_max_end; | ||
92 | |||
93 | if (new_max_end == old_max_end) | ||
94 | break; | ||
95 | |||
96 | node = rb_parent(node); | ||
97 | } | ||
98 | } | ||
99 | |||
100 | /* Find the first (lowest start addr) overlapping range from rb tree */ | ||
101 | static struct memtype *memtype_rb_lowest_match(struct rb_root *root, | ||
102 | u64 start, u64 end) | ||
103 | { | ||
104 | struct rb_node *node = root->rb_node; | ||
105 | struct memtype *last_lower = NULL; | ||
106 | |||
107 | while (node) { | ||
108 | struct memtype *data = container_of(node, struct memtype, rb); | ||
109 | |||
110 | if (get_subtree_max_end(node->rb_left) > start) { | ||
111 | /* Lowest overlap if any must be on left side */ | ||
112 | node = node->rb_left; | ||
113 | } else if (is_node_overlap(data, start, end)) { | ||
114 | last_lower = data; | ||
115 | break; | ||
116 | } else if (start >= data->start) { | ||
117 | /* Lowest overlap if any must be on right side */ | ||
118 | node = node->rb_right; | ||
119 | } else { | ||
120 | break; | ||
121 | } | ||
122 | } | ||
123 | return last_lower; /* Returns NULL if there is no overlap */ | ||
124 | } | ||
125 | |||
126 | static struct memtype *memtype_rb_exact_match(struct rb_root *root, | ||
127 | u64 start, u64 end) | ||
128 | { | ||
129 | struct memtype *match; | ||
130 | |||
131 | match = memtype_rb_lowest_match(root, start, end); | ||
132 | while (match != NULL && match->start < end) { | ||
133 | struct rb_node *node; | ||
134 | |||
135 | if (match->start == start && match->end == end) | ||
136 | return match; | ||
137 | |||
138 | node = rb_next(&match->rb); | ||
139 | if (node) | ||
140 | match = container_of(node, struct memtype, rb); | ||
141 | else | ||
142 | match = NULL; | ||
143 | } | ||
144 | |||
145 | return NULL; /* Returns NULL if there is no exact match */ | ||
146 | } | ||
147 | |||
148 | static int memtype_rb_check_conflict(struct rb_root *root, | ||
149 | u64 start, u64 end, | ||
150 | unsigned long reqtype, unsigned long *newtype) | ||
151 | { | ||
152 | struct rb_node *node; | ||
153 | struct memtype *match; | ||
154 | int found_type = reqtype; | ||
155 | |||
156 | match = memtype_rb_lowest_match(&memtype_rbroot, start, end); | ||
157 | if (match == NULL) | ||
158 | goto success; | ||
159 | |||
160 | if (match->type != found_type && newtype == NULL) | ||
161 | goto failure; | ||
162 | |||
163 | dprintk("Overlap at 0x%Lx-0x%Lx\n", match->start, match->end); | ||
164 | found_type = match->type; | ||
165 | |||
166 | node = rb_next(&match->rb); | ||
167 | while (node) { | ||
168 | match = container_of(node, struct memtype, rb); | ||
169 | |||
170 | if (match->start >= end) /* Checked all possible matches */ | ||
171 | goto success; | ||
172 | |||
173 | if (is_node_overlap(match, start, end) && | ||
174 | match->type != found_type) { | ||
175 | goto failure; | ||
176 | } | ||
177 | |||
178 | node = rb_next(&match->rb); | ||
179 | } | ||
180 | success: | ||
181 | if (newtype) | ||
182 | *newtype = found_type; | ||
183 | |||
184 | return 0; | ||
185 | |||
186 | failure: | ||
187 | printk(KERN_INFO "%s:%d conflicting memory types " | ||
188 | "%Lx-%Lx %s<->%s\n", current->comm, current->pid, start, | ||
189 | end, cattr_name(found_type), cattr_name(match->type)); | ||
190 | return -EBUSY; | ||
191 | } | ||
192 | |||
193 | static void memtype_rb_augment_cb(struct rb_node *node) | ||
194 | { | ||
195 | if (node) | ||
196 | update_path_max_end(node); | ||
197 | } | ||
198 | |||
199 | static void memtype_rb_insert(struct rb_root *root, struct memtype *newdata) | ||
200 | { | ||
201 | struct rb_node **node = &(root->rb_node); | ||
202 | struct rb_node *parent = NULL; | ||
203 | |||
204 | while (*node) { | ||
205 | struct memtype *data = container_of(*node, struct memtype, rb); | ||
206 | |||
207 | parent = *node; | ||
208 | if (newdata->start <= data->start) | ||
209 | node = &((*node)->rb_left); | ||
210 | else if (newdata->start > data->start) | ||
211 | node = &((*node)->rb_right); | ||
212 | } | ||
213 | |||
214 | rb_link_node(&newdata->rb, parent, node); | ||
215 | rb_insert_color(&newdata->rb, root); | ||
216 | } | ||
217 | |||
218 | int rbt_memtype_check_insert(struct memtype *new, unsigned long *ret_type) | ||
219 | { | ||
220 | int err = 0; | ||
221 | |||
222 | err = memtype_rb_check_conflict(&memtype_rbroot, new->start, new->end, | ||
223 | new->type, ret_type); | ||
224 | |||
225 | if (!err) { | ||
226 | if (ret_type) | ||
227 | new->type = *ret_type; | ||
228 | |||
229 | memtype_rb_insert(&memtype_rbroot, new); | ||
230 | } | ||
231 | return err; | ||
232 | } | ||
233 | |||
234 | int rbt_memtype_erase(u64 start, u64 end) | ||
235 | { | ||
236 | struct memtype *data; | ||
237 | |||
238 | data = memtype_rb_exact_match(&memtype_rbroot, start, end); | ||
239 | if (!data) | ||
240 | return -EINVAL; | ||
241 | |||
242 | rb_erase(&data->rb, &memtype_rbroot); | ||
243 | return 0; | ||
244 | } | ||
245 | |||
246 | struct memtype *rbt_memtype_lookup(u64 addr) | ||
247 | { | ||
248 | struct memtype *data; | ||
249 | data = memtype_rb_lowest_match(&memtype_rbroot, addr, addr + PAGE_SIZE); | ||
250 | return data; | ||
251 | } | ||
252 | |||
253 | #if defined(CONFIG_DEBUG_FS) | ||
254 | int rbt_memtype_copy_nth_element(struct memtype *out, loff_t pos) | ||
255 | { | ||
256 | struct rb_node *node; | ||
257 | int i = 1; | ||
258 | |||
259 | node = rb_first(&memtype_rbroot); | ||
260 | while (node && pos != i) { | ||
261 | node = rb_next(node); | ||
262 | i++; | ||
263 | } | ||
264 | |||
265 | if (node) { /* pos == i */ | ||
266 | struct memtype *this = container_of(node, struct memtype, rb); | ||
267 | *out = *this; | ||
268 | return 0; | ||
269 | } else { | ||
270 | return 1; | ||
271 | } | ||
272 | } | ||
273 | #endif | ||
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c index 28c68762648f..f9897f7a9ef1 100644 --- a/arch/x86/mm/srat_64.c +++ b/arch/x86/mm/srat_64.c | |||
@@ -363,6 +363,54 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end) | |||
363 | for (i = 0; i < MAX_NUMNODES; i++) | 363 | for (i = 0; i < MAX_NUMNODES; i++) |
364 | cutoff_node(i, start, end); | 364 | cutoff_node(i, start, end); |
365 | 365 | ||
366 | /* | ||
367 | * Join together blocks on the same node, holes between | ||
368 | * which don't overlap with memory on other nodes. | ||
369 | */ | ||
370 | for (i = 0; i < num_node_memblks; ++i) { | ||
371 | int j, k; | ||
372 | |||
373 | for (j = i + 1; j < num_node_memblks; ++j) { | ||
374 | unsigned long start, end; | ||
375 | |||
376 | if (memblk_nodeid[i] != memblk_nodeid[j]) | ||
377 | continue; | ||
378 | start = min(node_memblk_range[i].end, | ||
379 | node_memblk_range[j].end); | ||
380 | end = max(node_memblk_range[i].start, | ||
381 | node_memblk_range[j].start); | ||
382 | for (k = 0; k < num_node_memblks; ++k) { | ||
383 | if (memblk_nodeid[i] == memblk_nodeid[k]) | ||
384 | continue; | ||
385 | if (start < node_memblk_range[k].end && | ||
386 | end > node_memblk_range[k].start) | ||
387 | break; | ||
388 | } | ||
389 | if (k < num_node_memblks) | ||
390 | continue; | ||
391 | start = min(node_memblk_range[i].start, | ||
392 | node_memblk_range[j].start); | ||
393 | end = max(node_memblk_range[i].end, | ||
394 | node_memblk_range[j].end); | ||
395 | printk(KERN_INFO "SRAT: Node %d " | ||
396 | "[%Lx,%Lx) + [%Lx,%Lx) -> [%lx,%lx)\n", | ||
397 | memblk_nodeid[i], | ||
398 | node_memblk_range[i].start, | ||
399 | node_memblk_range[i].end, | ||
400 | node_memblk_range[j].start, | ||
401 | node_memblk_range[j].end, | ||
402 | start, end); | ||
403 | node_memblk_range[i].start = start; | ||
404 | node_memblk_range[i].end = end; | ||
405 | k = --num_node_memblks - j; | ||
406 | memmove(memblk_nodeid + j, memblk_nodeid + j+1, | ||
407 | k * sizeof(*memblk_nodeid)); | ||
408 | memmove(node_memblk_range + j, node_memblk_range + j+1, | ||
409 | k * sizeof(*node_memblk_range)); | ||
410 | --j; | ||
411 | } | ||
412 | } | ||
413 | |||
366 | memnode_shift = compute_hash_shift(node_memblk_range, num_node_memblks, | 414 | memnode_shift = compute_hash_shift(node_memblk_range, num_node_memblks, |
367 | memblk_nodeid); | 415 | memblk_nodeid); |
368 | if (memnode_shift < 0) { | 416 | if (memnode_shift < 0) { |
@@ -461,7 +509,8 @@ void __init acpi_fake_nodes(const struct bootnode *fake_nodes, int num_nodes) | |||
461 | * node, it must now point to the fake node ID. | 509 | * node, it must now point to the fake node ID. |
462 | */ | 510 | */ |
463 | for (j = 0; j < MAX_LOCAL_APIC; j++) | 511 | for (j = 0; j < MAX_LOCAL_APIC; j++) |
464 | if (apicid_to_node[j] == nid) | 512 | if (apicid_to_node[j] == nid && |
513 | fake_apicid_to_node[j] == NUMA_NO_NODE) | ||
465 | fake_apicid_to_node[j] = i; | 514 | fake_apicid_to_node[j] = i; |
466 | } | 515 | } |
467 | for (i = 0; i < num_nodes; i++) | 516 | for (i = 0; i < num_nodes; i++) |
diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index 2c505ee71014..b28d2f1253bb 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c | |||
@@ -31,8 +31,9 @@ static struct op_x86_model_spec *model; | |||
31 | static DEFINE_PER_CPU(struct op_msrs, cpu_msrs); | 31 | static DEFINE_PER_CPU(struct op_msrs, cpu_msrs); |
32 | static DEFINE_PER_CPU(unsigned long, saved_lvtpc); | 32 | static DEFINE_PER_CPU(unsigned long, saved_lvtpc); |
33 | 33 | ||
34 | /* 0 == registered but off, 1 == registered and on */ | 34 | /* must be protected with get_online_cpus()/put_online_cpus(): */ |
35 | static int nmi_enabled = 0; | 35 | static int nmi_enabled; |
36 | static int ctr_running; | ||
36 | 37 | ||
37 | struct op_counter_config counter_config[OP_MAX_COUNTER]; | 38 | struct op_counter_config counter_config[OP_MAX_COUNTER]; |
38 | 39 | ||
@@ -61,12 +62,16 @@ static int profile_exceptions_notify(struct notifier_block *self, | |||
61 | { | 62 | { |
62 | struct die_args *args = (struct die_args *)data; | 63 | struct die_args *args = (struct die_args *)data; |
63 | int ret = NOTIFY_DONE; | 64 | int ret = NOTIFY_DONE; |
64 | int cpu = smp_processor_id(); | ||
65 | 65 | ||
66 | switch (val) { | 66 | switch (val) { |
67 | case DIE_NMI: | 67 | case DIE_NMI: |
68 | case DIE_NMI_IPI: | 68 | case DIE_NMI_IPI: |
69 | model->check_ctrs(args->regs, &per_cpu(cpu_msrs, cpu)); | 69 | if (ctr_running) |
70 | model->check_ctrs(args->regs, &__get_cpu_var(cpu_msrs)); | ||
71 | else if (!nmi_enabled) | ||
72 | break; | ||
73 | else | ||
74 | model->stop(&__get_cpu_var(cpu_msrs)); | ||
70 | ret = NOTIFY_STOP; | 75 | ret = NOTIFY_STOP; |
71 | break; | 76 | break; |
72 | default: | 77 | default: |
@@ -95,24 +100,36 @@ static void nmi_cpu_save_registers(struct op_msrs *msrs) | |||
95 | static void nmi_cpu_start(void *dummy) | 100 | static void nmi_cpu_start(void *dummy) |
96 | { | 101 | { |
97 | struct op_msrs const *msrs = &__get_cpu_var(cpu_msrs); | 102 | struct op_msrs const *msrs = &__get_cpu_var(cpu_msrs); |
98 | model->start(msrs); | 103 | if (!msrs->controls) |
104 | WARN_ON_ONCE(1); | ||
105 | else | ||
106 | model->start(msrs); | ||
99 | } | 107 | } |
100 | 108 | ||
101 | static int nmi_start(void) | 109 | static int nmi_start(void) |
102 | { | 110 | { |
111 | get_online_cpus(); | ||
103 | on_each_cpu(nmi_cpu_start, NULL, 1); | 112 | on_each_cpu(nmi_cpu_start, NULL, 1); |
113 | ctr_running = 1; | ||
114 | put_online_cpus(); | ||
104 | return 0; | 115 | return 0; |
105 | } | 116 | } |
106 | 117 | ||
107 | static void nmi_cpu_stop(void *dummy) | 118 | static void nmi_cpu_stop(void *dummy) |
108 | { | 119 | { |
109 | struct op_msrs const *msrs = &__get_cpu_var(cpu_msrs); | 120 | struct op_msrs const *msrs = &__get_cpu_var(cpu_msrs); |
110 | model->stop(msrs); | 121 | if (!msrs->controls) |
122 | WARN_ON_ONCE(1); | ||
123 | else | ||
124 | model->stop(msrs); | ||
111 | } | 125 | } |
112 | 126 | ||
113 | static void nmi_stop(void) | 127 | static void nmi_stop(void) |
114 | { | 128 | { |
129 | get_online_cpus(); | ||
115 | on_each_cpu(nmi_cpu_stop, NULL, 1); | 130 | on_each_cpu(nmi_cpu_stop, NULL, 1); |
131 | ctr_running = 0; | ||
132 | put_online_cpus(); | ||
116 | } | 133 | } |
117 | 134 | ||
118 | #ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX | 135 | #ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX |
@@ -252,7 +269,10 @@ static int nmi_switch_event(void) | |||
252 | if (nmi_multiplex_on() < 0) | 269 | if (nmi_multiplex_on() < 0) |
253 | return -EINVAL; /* not necessary */ | 270 | return -EINVAL; /* not necessary */ |
254 | 271 | ||
255 | on_each_cpu(nmi_cpu_switch, NULL, 1); | 272 | get_online_cpus(); |
273 | if (ctr_running) | ||
274 | on_each_cpu(nmi_cpu_switch, NULL, 1); | ||
275 | put_online_cpus(); | ||
256 | 276 | ||
257 | return 0; | 277 | return 0; |
258 | } | 278 | } |
@@ -295,6 +315,7 @@ static void free_msrs(void) | |||
295 | kfree(per_cpu(cpu_msrs, i).controls); | 315 | kfree(per_cpu(cpu_msrs, i).controls); |
296 | per_cpu(cpu_msrs, i).controls = NULL; | 316 | per_cpu(cpu_msrs, i).controls = NULL; |
297 | } | 317 | } |
318 | nmi_shutdown_mux(); | ||
298 | } | 319 | } |
299 | 320 | ||
300 | static int allocate_msrs(void) | 321 | static int allocate_msrs(void) |
@@ -307,14 +328,21 @@ static int allocate_msrs(void) | |||
307 | per_cpu(cpu_msrs, i).counters = kzalloc(counters_size, | 328 | per_cpu(cpu_msrs, i).counters = kzalloc(counters_size, |
308 | GFP_KERNEL); | 329 | GFP_KERNEL); |
309 | if (!per_cpu(cpu_msrs, i).counters) | 330 | if (!per_cpu(cpu_msrs, i).counters) |
310 | return 0; | 331 | goto fail; |
311 | per_cpu(cpu_msrs, i).controls = kzalloc(controls_size, | 332 | per_cpu(cpu_msrs, i).controls = kzalloc(controls_size, |
312 | GFP_KERNEL); | 333 | GFP_KERNEL); |
313 | if (!per_cpu(cpu_msrs, i).controls) | 334 | if (!per_cpu(cpu_msrs, i).controls) |
314 | return 0; | 335 | goto fail; |
315 | } | 336 | } |
316 | 337 | ||
338 | if (!nmi_setup_mux()) | ||
339 | goto fail; | ||
340 | |||
317 | return 1; | 341 | return 1; |
342 | |||
343 | fail: | ||
344 | free_msrs(); | ||
345 | return 0; | ||
318 | } | 346 | } |
319 | 347 | ||
320 | static void nmi_cpu_setup(void *dummy) | 348 | static void nmi_cpu_setup(void *dummy) |
@@ -336,49 +364,6 @@ static struct notifier_block profile_exceptions_nb = { | |||
336 | .priority = 2 | 364 | .priority = 2 |
337 | }; | 365 | }; |
338 | 366 | ||
339 | static int nmi_setup(void) | ||
340 | { | ||
341 | int err = 0; | ||
342 | int cpu; | ||
343 | |||
344 | if (!allocate_msrs()) | ||
345 | err = -ENOMEM; | ||
346 | else if (!nmi_setup_mux()) | ||
347 | err = -ENOMEM; | ||
348 | else | ||
349 | err = register_die_notifier(&profile_exceptions_nb); | ||
350 | |||
351 | if (err) { | ||
352 | free_msrs(); | ||
353 | nmi_shutdown_mux(); | ||
354 | return err; | ||
355 | } | ||
356 | |||
357 | /* We need to serialize save and setup for HT because the subset | ||
358 | * of msrs are distinct for save and setup operations | ||
359 | */ | ||
360 | |||
361 | /* Assume saved/restored counters are the same on all CPUs */ | ||
362 | model->fill_in_addresses(&per_cpu(cpu_msrs, 0)); | ||
363 | for_each_possible_cpu(cpu) { | ||
364 | if (!cpu) | ||
365 | continue; | ||
366 | |||
367 | memcpy(per_cpu(cpu_msrs, cpu).counters, | ||
368 | per_cpu(cpu_msrs, 0).counters, | ||
369 | sizeof(struct op_msr) * model->num_counters); | ||
370 | |||
371 | memcpy(per_cpu(cpu_msrs, cpu).controls, | ||
372 | per_cpu(cpu_msrs, 0).controls, | ||
373 | sizeof(struct op_msr) * model->num_controls); | ||
374 | |||
375 | mux_clone(cpu); | ||
376 | } | ||
377 | on_each_cpu(nmi_cpu_setup, NULL, 1); | ||
378 | nmi_enabled = 1; | ||
379 | return 0; | ||
380 | } | ||
381 | |||
382 | static void nmi_cpu_restore_registers(struct op_msrs *msrs) | 367 | static void nmi_cpu_restore_registers(struct op_msrs *msrs) |
383 | { | 368 | { |
384 | struct op_msr *counters = msrs->counters; | 369 | struct op_msr *counters = msrs->counters; |
@@ -412,20 +397,24 @@ static void nmi_cpu_shutdown(void *dummy) | |||
412 | apic_write(APIC_LVTPC, per_cpu(saved_lvtpc, cpu)); | 397 | apic_write(APIC_LVTPC, per_cpu(saved_lvtpc, cpu)); |
413 | apic_write(APIC_LVTERR, v); | 398 | apic_write(APIC_LVTERR, v); |
414 | nmi_cpu_restore_registers(msrs); | 399 | nmi_cpu_restore_registers(msrs); |
400 | if (model->cpu_down) | ||
401 | model->cpu_down(); | ||
415 | } | 402 | } |
416 | 403 | ||
417 | static void nmi_shutdown(void) | 404 | static void nmi_cpu_up(void *dummy) |
418 | { | 405 | { |
419 | struct op_msrs *msrs; | 406 | if (nmi_enabled) |
407 | nmi_cpu_setup(dummy); | ||
408 | if (ctr_running) | ||
409 | nmi_cpu_start(dummy); | ||
410 | } | ||
420 | 411 | ||
421 | nmi_enabled = 0; | 412 | static void nmi_cpu_down(void *dummy) |
422 | on_each_cpu(nmi_cpu_shutdown, NULL, 1); | 413 | { |
423 | unregister_die_notifier(&profile_exceptions_nb); | 414 | if (ctr_running) |
424 | nmi_shutdown_mux(); | 415 | nmi_cpu_stop(dummy); |
425 | msrs = &get_cpu_var(cpu_msrs); | 416 | if (nmi_enabled) |
426 | model->shutdown(msrs); | 417 | nmi_cpu_shutdown(dummy); |
427 | free_msrs(); | ||
428 | put_cpu_var(cpu_msrs); | ||
429 | } | 418 | } |
430 | 419 | ||
431 | static int nmi_create_files(struct super_block *sb, struct dentry *root) | 420 | static int nmi_create_files(struct super_block *sb, struct dentry *root) |
@@ -457,7 +446,6 @@ static int nmi_create_files(struct super_block *sb, struct dentry *root) | |||
457 | return 0; | 446 | return 0; |
458 | } | 447 | } |
459 | 448 | ||
460 | #ifdef CONFIG_SMP | ||
461 | static int oprofile_cpu_notifier(struct notifier_block *b, unsigned long action, | 449 | static int oprofile_cpu_notifier(struct notifier_block *b, unsigned long action, |
462 | void *data) | 450 | void *data) |
463 | { | 451 | { |
@@ -465,10 +453,10 @@ static int oprofile_cpu_notifier(struct notifier_block *b, unsigned long action, | |||
465 | switch (action) { | 453 | switch (action) { |
466 | case CPU_DOWN_FAILED: | 454 | case CPU_DOWN_FAILED: |
467 | case CPU_ONLINE: | 455 | case CPU_ONLINE: |
468 | smp_call_function_single(cpu, nmi_cpu_start, NULL, 0); | 456 | smp_call_function_single(cpu, nmi_cpu_up, NULL, 0); |
469 | break; | 457 | break; |
470 | case CPU_DOWN_PREPARE: | 458 | case CPU_DOWN_PREPARE: |
471 | smp_call_function_single(cpu, nmi_cpu_stop, NULL, 1); | 459 | smp_call_function_single(cpu, nmi_cpu_down, NULL, 1); |
472 | break; | 460 | break; |
473 | } | 461 | } |
474 | return NOTIFY_DONE; | 462 | return NOTIFY_DONE; |
@@ -477,7 +465,75 @@ static int oprofile_cpu_notifier(struct notifier_block *b, unsigned long action, | |||
477 | static struct notifier_block oprofile_cpu_nb = { | 465 | static struct notifier_block oprofile_cpu_nb = { |
478 | .notifier_call = oprofile_cpu_notifier | 466 | .notifier_call = oprofile_cpu_notifier |
479 | }; | 467 | }; |
480 | #endif | 468 | |
469 | static int nmi_setup(void) | ||
470 | { | ||
471 | int err = 0; | ||
472 | int cpu; | ||
473 | |||
474 | if (!allocate_msrs()) | ||
475 | return -ENOMEM; | ||
476 | |||
477 | /* We need to serialize save and setup for HT because the subset | ||
478 | * of msrs are distinct for save and setup operations | ||
479 | */ | ||
480 | |||
481 | /* Assume saved/restored counters are the same on all CPUs */ | ||
482 | err = model->fill_in_addresses(&per_cpu(cpu_msrs, 0)); | ||
483 | if (err) | ||
484 | goto fail; | ||
485 | |||
486 | for_each_possible_cpu(cpu) { | ||
487 | if (!cpu) | ||
488 | continue; | ||
489 | |||
490 | memcpy(per_cpu(cpu_msrs, cpu).counters, | ||
491 | per_cpu(cpu_msrs, 0).counters, | ||
492 | sizeof(struct op_msr) * model->num_counters); | ||
493 | |||
494 | memcpy(per_cpu(cpu_msrs, cpu).controls, | ||
495 | per_cpu(cpu_msrs, 0).controls, | ||
496 | sizeof(struct op_msr) * model->num_controls); | ||
497 | |||
498 | mux_clone(cpu); | ||
499 | } | ||
500 | |||
501 | nmi_enabled = 0; | ||
502 | ctr_running = 0; | ||
503 | barrier(); | ||
504 | err = register_die_notifier(&profile_exceptions_nb); | ||
505 | if (err) | ||
506 | goto fail; | ||
507 | |||
508 | get_online_cpus(); | ||
509 | register_cpu_notifier(&oprofile_cpu_nb); | ||
510 | on_each_cpu(nmi_cpu_setup, NULL, 1); | ||
511 | nmi_enabled = 1; | ||
512 | put_online_cpus(); | ||
513 | |||
514 | return 0; | ||
515 | fail: | ||
516 | free_msrs(); | ||
517 | return err; | ||
518 | } | ||
519 | |||
520 | static void nmi_shutdown(void) | ||
521 | { | ||
522 | struct op_msrs *msrs; | ||
523 | |||
524 | get_online_cpus(); | ||
525 | unregister_cpu_notifier(&oprofile_cpu_nb); | ||
526 | on_each_cpu(nmi_cpu_shutdown, NULL, 1); | ||
527 | nmi_enabled = 0; | ||
528 | ctr_running = 0; | ||
529 | put_online_cpus(); | ||
530 | barrier(); | ||
531 | unregister_die_notifier(&profile_exceptions_nb); | ||
532 | msrs = &get_cpu_var(cpu_msrs); | ||
533 | model->shutdown(msrs); | ||
534 | free_msrs(); | ||
535 | put_cpu_var(cpu_msrs); | ||
536 | } | ||
481 | 537 | ||
482 | #ifdef CONFIG_PM | 538 | #ifdef CONFIG_PM |
483 | 539 | ||
@@ -687,9 +743,6 @@ int __init op_nmi_init(struct oprofile_operations *ops) | |||
687 | return -ENODEV; | 743 | return -ENODEV; |
688 | } | 744 | } |
689 | 745 | ||
690 | #ifdef CONFIG_SMP | ||
691 | register_cpu_notifier(&oprofile_cpu_nb); | ||
692 | #endif | ||
693 | /* default values, can be overwritten by model */ | 746 | /* default values, can be overwritten by model */ |
694 | ops->create_files = nmi_create_files; | 747 | ops->create_files = nmi_create_files; |
695 | ops->setup = nmi_setup; | 748 | ops->setup = nmi_setup; |
@@ -716,12 +769,6 @@ int __init op_nmi_init(struct oprofile_operations *ops) | |||
716 | 769 | ||
717 | void op_nmi_exit(void) | 770 | void op_nmi_exit(void) |
718 | { | 771 | { |
719 | if (using_nmi) { | 772 | if (using_nmi) |
720 | exit_sysfs(); | 773 | exit_sysfs(); |
721 | #ifdef CONFIG_SMP | ||
722 | unregister_cpu_notifier(&oprofile_cpu_nb); | ||
723 | #endif | ||
724 | } | ||
725 | if (model->exit) | ||
726 | model->exit(); | ||
727 | } | 774 | } |
diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c index 090cbbec7dbd..b67a6b5aa8d4 100644 --- a/arch/x86/oprofile/op_model_amd.c +++ b/arch/x86/oprofile/op_model_amd.c | |||
@@ -30,13 +30,10 @@ | |||
30 | #include "op_counter.h" | 30 | #include "op_counter.h" |
31 | 31 | ||
32 | #define NUM_COUNTERS 4 | 32 | #define NUM_COUNTERS 4 |
33 | #define NUM_CONTROLS 4 | ||
34 | #ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX | 33 | #ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX |
35 | #define NUM_VIRT_COUNTERS 32 | 34 | #define NUM_VIRT_COUNTERS 32 |
36 | #define NUM_VIRT_CONTROLS 32 | ||
37 | #else | 35 | #else |
38 | #define NUM_VIRT_COUNTERS NUM_COUNTERS | 36 | #define NUM_VIRT_COUNTERS NUM_COUNTERS |
39 | #define NUM_VIRT_CONTROLS NUM_CONTROLS | ||
40 | #endif | 37 | #endif |
41 | 38 | ||
42 | #define OP_EVENT_MASK 0x0FFF | 39 | #define OP_EVENT_MASK 0x0FFF |
@@ -105,102 +102,6 @@ static u32 get_ibs_caps(void) | |||
105 | return ibs_caps; | 102 | return ibs_caps; |
106 | } | 103 | } |
107 | 104 | ||
108 | #ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX | ||
109 | |||
110 | static void op_mux_switch_ctrl(struct op_x86_model_spec const *model, | ||
111 | struct op_msrs const * const msrs) | ||
112 | { | ||
113 | u64 val; | ||
114 | int i; | ||
115 | |||
116 | /* enable active counters */ | ||
117 | for (i = 0; i < NUM_COUNTERS; ++i) { | ||
118 | int virt = op_x86_phys_to_virt(i); | ||
119 | if (!reset_value[virt]) | ||
120 | continue; | ||
121 | rdmsrl(msrs->controls[i].addr, val); | ||
122 | val &= model->reserved; | ||
123 | val |= op_x86_get_ctrl(model, &counter_config[virt]); | ||
124 | wrmsrl(msrs->controls[i].addr, val); | ||
125 | } | ||
126 | } | ||
127 | |||
128 | #endif | ||
129 | |||
130 | /* functions for op_amd_spec */ | ||
131 | |||
132 | static void op_amd_fill_in_addresses(struct op_msrs * const msrs) | ||
133 | { | ||
134 | int i; | ||
135 | |||
136 | for (i = 0; i < NUM_COUNTERS; i++) { | ||
137 | if (reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i)) | ||
138 | msrs->counters[i].addr = MSR_K7_PERFCTR0 + i; | ||
139 | } | ||
140 | |||
141 | for (i = 0; i < NUM_CONTROLS; i++) { | ||
142 | if (reserve_evntsel_nmi(MSR_K7_EVNTSEL0 + i)) | ||
143 | msrs->controls[i].addr = MSR_K7_EVNTSEL0 + i; | ||
144 | } | ||
145 | } | ||
146 | |||
147 | static void op_amd_setup_ctrs(struct op_x86_model_spec const *model, | ||
148 | struct op_msrs const * const msrs) | ||
149 | { | ||
150 | u64 val; | ||
151 | int i; | ||
152 | |||
153 | /* setup reset_value */ | ||
154 | for (i = 0; i < NUM_VIRT_COUNTERS; ++i) { | ||
155 | if (counter_config[i].enabled | ||
156 | && msrs->counters[op_x86_virt_to_phys(i)].addr) | ||
157 | reset_value[i] = counter_config[i].count; | ||
158 | else | ||
159 | reset_value[i] = 0; | ||
160 | } | ||
161 | |||
162 | /* clear all counters */ | ||
163 | for (i = 0; i < NUM_CONTROLS; ++i) { | ||
164 | if (unlikely(!msrs->controls[i].addr)) { | ||
165 | if (counter_config[i].enabled && !smp_processor_id()) | ||
166 | /* | ||
167 | * counter is reserved, this is on all | ||
168 | * cpus, so report only for cpu #0 | ||
169 | */ | ||
170 | op_x86_warn_reserved(i); | ||
171 | continue; | ||
172 | } | ||
173 | rdmsrl(msrs->controls[i].addr, val); | ||
174 | if (val & ARCH_PERFMON_EVENTSEL_ENABLE) | ||
175 | op_x86_warn_in_use(i); | ||
176 | val &= model->reserved; | ||
177 | wrmsrl(msrs->controls[i].addr, val); | ||
178 | } | ||
179 | |||
180 | /* avoid a false detection of ctr overflows in NMI handler */ | ||
181 | for (i = 0; i < NUM_COUNTERS; ++i) { | ||
182 | if (unlikely(!msrs->counters[i].addr)) | ||
183 | continue; | ||
184 | wrmsrl(msrs->counters[i].addr, -1LL); | ||
185 | } | ||
186 | |||
187 | /* enable active counters */ | ||
188 | for (i = 0; i < NUM_COUNTERS; ++i) { | ||
189 | int virt = op_x86_phys_to_virt(i); | ||
190 | if (!reset_value[virt]) | ||
191 | continue; | ||
192 | |||
193 | /* setup counter registers */ | ||
194 | wrmsrl(msrs->counters[i].addr, -(u64)reset_value[virt]); | ||
195 | |||
196 | /* setup control registers */ | ||
197 | rdmsrl(msrs->controls[i].addr, val); | ||
198 | val &= model->reserved; | ||
199 | val |= op_x86_get_ctrl(model, &counter_config[virt]); | ||
200 | wrmsrl(msrs->controls[i].addr, val); | ||
201 | } | ||
202 | } | ||
203 | |||
204 | /* | 105 | /* |
205 | * 16-bit Linear Feedback Shift Register (LFSR) | 106 | * 16-bit Linear Feedback Shift Register (LFSR) |
206 | * | 107 | * |
@@ -365,6 +266,125 @@ static void op_amd_stop_ibs(void) | |||
365 | wrmsrl(MSR_AMD64_IBSOPCTL, 0); | 266 | wrmsrl(MSR_AMD64_IBSOPCTL, 0); |
366 | } | 267 | } |
367 | 268 | ||
269 | #ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX | ||
270 | |||
271 | static void op_mux_switch_ctrl(struct op_x86_model_spec const *model, | ||
272 | struct op_msrs const * const msrs) | ||
273 | { | ||
274 | u64 val; | ||
275 | int i; | ||
276 | |||
277 | /* enable active counters */ | ||
278 | for (i = 0; i < NUM_COUNTERS; ++i) { | ||
279 | int virt = op_x86_phys_to_virt(i); | ||
280 | if (!reset_value[virt]) | ||
281 | continue; | ||
282 | rdmsrl(msrs->controls[i].addr, val); | ||
283 | val &= model->reserved; | ||
284 | val |= op_x86_get_ctrl(model, &counter_config[virt]); | ||
285 | wrmsrl(msrs->controls[i].addr, val); | ||
286 | } | ||
287 | } | ||
288 | |||
289 | #endif | ||
290 | |||
291 | /* functions for op_amd_spec */ | ||
292 | |||
293 | static void op_amd_shutdown(struct op_msrs const * const msrs) | ||
294 | { | ||
295 | int i; | ||
296 | |||
297 | for (i = 0; i < NUM_COUNTERS; ++i) { | ||
298 | if (!msrs->counters[i].addr) | ||
299 | continue; | ||
300 | release_perfctr_nmi(MSR_K7_PERFCTR0 + i); | ||
301 | release_evntsel_nmi(MSR_K7_EVNTSEL0 + i); | ||
302 | } | ||
303 | } | ||
304 | |||
305 | static int op_amd_fill_in_addresses(struct op_msrs * const msrs) | ||
306 | { | ||
307 | int i; | ||
308 | |||
309 | for (i = 0; i < NUM_COUNTERS; i++) { | ||
310 | if (!reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i)) | ||
311 | goto fail; | ||
312 | if (!reserve_evntsel_nmi(MSR_K7_EVNTSEL0 + i)) { | ||
313 | release_perfctr_nmi(MSR_K7_PERFCTR0 + i); | ||
314 | goto fail; | ||
315 | } | ||
316 | /* both registers must be reserved */ | ||
317 | msrs->counters[i].addr = MSR_K7_PERFCTR0 + i; | ||
318 | msrs->controls[i].addr = MSR_K7_EVNTSEL0 + i; | ||
319 | continue; | ||
320 | fail: | ||
321 | if (!counter_config[i].enabled) | ||
322 | continue; | ||
323 | op_x86_warn_reserved(i); | ||
324 | op_amd_shutdown(msrs); | ||
325 | return -EBUSY; | ||
326 | } | ||
327 | |||
328 | return 0; | ||
329 | } | ||
330 | |||
331 | static void op_amd_setup_ctrs(struct op_x86_model_spec const *model, | ||
332 | struct op_msrs const * const msrs) | ||
333 | { | ||
334 | u64 val; | ||
335 | int i; | ||
336 | |||
337 | /* setup reset_value */ | ||
338 | for (i = 0; i < NUM_VIRT_COUNTERS; ++i) { | ||
339 | if (counter_config[i].enabled | ||
340 | && msrs->counters[op_x86_virt_to_phys(i)].addr) | ||
341 | reset_value[i] = counter_config[i].count; | ||
342 | else | ||
343 | reset_value[i] = 0; | ||
344 | } | ||
345 | |||
346 | /* clear all counters */ | ||
347 | for (i = 0; i < NUM_COUNTERS; ++i) { | ||
348 | if (!msrs->controls[i].addr) | ||
349 | continue; | ||
350 | rdmsrl(msrs->controls[i].addr, val); | ||
351 | if (val & ARCH_PERFMON_EVENTSEL_ENABLE) | ||
352 | op_x86_warn_in_use(i); | ||
353 | val &= model->reserved; | ||
354 | wrmsrl(msrs->controls[i].addr, val); | ||
355 | /* | ||
356 | * avoid a false detection of ctr overflows in NMI | ||
357 | * handler | ||
358 | */ | ||
359 | wrmsrl(msrs->counters[i].addr, -1LL); | ||
360 | } | ||
361 | |||
362 | /* enable active counters */ | ||
363 | for (i = 0; i < NUM_COUNTERS; ++i) { | ||
364 | int virt = op_x86_phys_to_virt(i); | ||
365 | if (!reset_value[virt]) | ||
366 | continue; | ||
367 | |||
368 | /* setup counter registers */ | ||
369 | wrmsrl(msrs->counters[i].addr, -(u64)reset_value[virt]); | ||
370 | |||
371 | /* setup control registers */ | ||
372 | rdmsrl(msrs->controls[i].addr, val); | ||
373 | val &= model->reserved; | ||
374 | val |= op_x86_get_ctrl(model, &counter_config[virt]); | ||
375 | wrmsrl(msrs->controls[i].addr, val); | ||
376 | } | ||
377 | |||
378 | if (ibs_caps) | ||
379 | setup_APIC_eilvt_ibs(0, APIC_EILVT_MSG_NMI, 0); | ||
380 | } | ||
381 | |||
382 | static void op_amd_cpu_shutdown(void) | ||
383 | { | ||
384 | if (ibs_caps) | ||
385 | setup_APIC_eilvt_ibs(0, APIC_EILVT_MSG_FIX, 1); | ||
386 | } | ||
387 | |||
368 | static int op_amd_check_ctrs(struct pt_regs * const regs, | 388 | static int op_amd_check_ctrs(struct pt_regs * const regs, |
369 | struct op_msrs const * const msrs) | 389 | struct op_msrs const * const msrs) |
370 | { | 390 | { |
@@ -425,42 +445,16 @@ static void op_amd_stop(struct op_msrs const * const msrs) | |||
425 | op_amd_stop_ibs(); | 445 | op_amd_stop_ibs(); |
426 | } | 446 | } |
427 | 447 | ||
428 | static void op_amd_shutdown(struct op_msrs const * const msrs) | 448 | static int __init_ibs_nmi(void) |
429 | { | ||
430 | int i; | ||
431 | |||
432 | for (i = 0; i < NUM_COUNTERS; ++i) { | ||
433 | if (msrs->counters[i].addr) | ||
434 | release_perfctr_nmi(MSR_K7_PERFCTR0 + i); | ||
435 | } | ||
436 | for (i = 0; i < NUM_CONTROLS; ++i) { | ||
437 | if (msrs->controls[i].addr) | ||
438 | release_evntsel_nmi(MSR_K7_EVNTSEL0 + i); | ||
439 | } | ||
440 | } | ||
441 | |||
442 | static u8 ibs_eilvt_off; | ||
443 | |||
444 | static inline void apic_init_ibs_nmi_per_cpu(void *arg) | ||
445 | { | ||
446 | ibs_eilvt_off = setup_APIC_eilvt_ibs(0, APIC_EILVT_MSG_NMI, 0); | ||
447 | } | ||
448 | |||
449 | static inline void apic_clear_ibs_nmi_per_cpu(void *arg) | ||
450 | { | ||
451 | setup_APIC_eilvt_ibs(0, APIC_EILVT_MSG_FIX, 1); | ||
452 | } | ||
453 | |||
454 | static int init_ibs_nmi(void) | ||
455 | { | 449 | { |
456 | #define IBSCTL_LVTOFFSETVAL (1 << 8) | 450 | #define IBSCTL_LVTOFFSETVAL (1 << 8) |
457 | #define IBSCTL 0x1cc | 451 | #define IBSCTL 0x1cc |
458 | struct pci_dev *cpu_cfg; | 452 | struct pci_dev *cpu_cfg; |
459 | int nodes; | 453 | int nodes; |
460 | u32 value = 0; | 454 | u32 value = 0; |
455 | u8 ibs_eilvt_off; | ||
461 | 456 | ||
462 | /* per CPU setup */ | 457 | ibs_eilvt_off = setup_APIC_eilvt_ibs(0, APIC_EILVT_MSG_FIX, 1); |
463 | on_each_cpu(apic_init_ibs_nmi_per_cpu, NULL, 1); | ||
464 | 458 | ||
465 | nodes = 0; | 459 | nodes = 0; |
466 | cpu_cfg = NULL; | 460 | cpu_cfg = NULL; |
@@ -490,22 +484,15 @@ static int init_ibs_nmi(void) | |||
490 | return 0; | 484 | return 0; |
491 | } | 485 | } |
492 | 486 | ||
493 | /* uninitialize the APIC for the IBS interrupts if needed */ | ||
494 | static void clear_ibs_nmi(void) | ||
495 | { | ||
496 | if (ibs_caps) | ||
497 | on_each_cpu(apic_clear_ibs_nmi_per_cpu, NULL, 1); | ||
498 | } | ||
499 | |||
500 | /* initialize the APIC for the IBS interrupts if available */ | 487 | /* initialize the APIC for the IBS interrupts if available */ |
501 | static void ibs_init(void) | 488 | static void init_ibs(void) |
502 | { | 489 | { |
503 | ibs_caps = get_ibs_caps(); | 490 | ibs_caps = get_ibs_caps(); |
504 | 491 | ||
505 | if (!ibs_caps) | 492 | if (!ibs_caps) |
506 | return; | 493 | return; |
507 | 494 | ||
508 | if (init_ibs_nmi()) { | 495 | if (__init_ibs_nmi()) { |
509 | ibs_caps = 0; | 496 | ibs_caps = 0; |
510 | return; | 497 | return; |
511 | } | 498 | } |
@@ -514,14 +501,6 @@ static void ibs_init(void) | |||
514 | (unsigned)ibs_caps); | 501 | (unsigned)ibs_caps); |
515 | } | 502 | } |
516 | 503 | ||
517 | static void ibs_exit(void) | ||
518 | { | ||
519 | if (!ibs_caps) | ||
520 | return; | ||
521 | |||
522 | clear_ibs_nmi(); | ||
523 | } | ||
524 | |||
525 | static int (*create_arch_files)(struct super_block *sb, struct dentry *root); | 504 | static int (*create_arch_files)(struct super_block *sb, struct dentry *root); |
526 | 505 | ||
527 | static int setup_ibs_files(struct super_block *sb, struct dentry *root) | 506 | static int setup_ibs_files(struct super_block *sb, struct dentry *root) |
@@ -570,27 +549,22 @@ static int setup_ibs_files(struct super_block *sb, struct dentry *root) | |||
570 | 549 | ||
571 | static int op_amd_init(struct oprofile_operations *ops) | 550 | static int op_amd_init(struct oprofile_operations *ops) |
572 | { | 551 | { |
573 | ibs_init(); | 552 | init_ibs(); |
574 | create_arch_files = ops->create_files; | 553 | create_arch_files = ops->create_files; |
575 | ops->create_files = setup_ibs_files; | 554 | ops->create_files = setup_ibs_files; |
576 | return 0; | 555 | return 0; |
577 | } | 556 | } |
578 | 557 | ||
579 | static void op_amd_exit(void) | ||
580 | { | ||
581 | ibs_exit(); | ||
582 | } | ||
583 | |||
584 | struct op_x86_model_spec op_amd_spec = { | 558 | struct op_x86_model_spec op_amd_spec = { |
585 | .num_counters = NUM_COUNTERS, | 559 | .num_counters = NUM_COUNTERS, |
586 | .num_controls = NUM_CONTROLS, | 560 | .num_controls = NUM_COUNTERS, |
587 | .num_virt_counters = NUM_VIRT_COUNTERS, | 561 | .num_virt_counters = NUM_VIRT_COUNTERS, |
588 | .reserved = MSR_AMD_EVENTSEL_RESERVED, | 562 | .reserved = MSR_AMD_EVENTSEL_RESERVED, |
589 | .event_mask = OP_EVENT_MASK, | 563 | .event_mask = OP_EVENT_MASK, |
590 | .init = op_amd_init, | 564 | .init = op_amd_init, |
591 | .exit = op_amd_exit, | ||
592 | .fill_in_addresses = &op_amd_fill_in_addresses, | 565 | .fill_in_addresses = &op_amd_fill_in_addresses, |
593 | .setup_ctrs = &op_amd_setup_ctrs, | 566 | .setup_ctrs = &op_amd_setup_ctrs, |
567 | .cpu_down = &op_amd_cpu_shutdown, | ||
594 | .check_ctrs = &op_amd_check_ctrs, | 568 | .check_ctrs = &op_amd_check_ctrs, |
595 | .start = &op_amd_start, | 569 | .start = &op_amd_start, |
596 | .stop = &op_amd_stop, | 570 | .stop = &op_amd_stop, |
diff --git a/arch/x86/oprofile/op_model_p4.c b/arch/x86/oprofile/op_model_p4.c index e6a160a4684a..182558dd5515 100644 --- a/arch/x86/oprofile/op_model_p4.c +++ b/arch/x86/oprofile/op_model_p4.c | |||
@@ -385,8 +385,26 @@ static unsigned int get_stagger(void) | |||
385 | 385 | ||
386 | static unsigned long reset_value[NUM_COUNTERS_NON_HT]; | 386 | static unsigned long reset_value[NUM_COUNTERS_NON_HT]; |
387 | 387 | ||
388 | static void p4_shutdown(struct op_msrs const * const msrs) | ||
389 | { | ||
390 | int i; | ||
388 | 391 | ||
389 | static void p4_fill_in_addresses(struct op_msrs * const msrs) | 392 | for (i = 0; i < num_counters; ++i) { |
393 | if (msrs->counters[i].addr) | ||
394 | release_perfctr_nmi(msrs->counters[i].addr); | ||
395 | } | ||
396 | /* | ||
397 | * some of the control registers are specially reserved in | ||
398 | * conjunction with the counter registers (hence the starting offset). | ||
399 | * This saves a few bits. | ||
400 | */ | ||
401 | for (i = num_counters; i < num_controls; ++i) { | ||
402 | if (msrs->controls[i].addr) | ||
403 | release_evntsel_nmi(msrs->controls[i].addr); | ||
404 | } | ||
405 | } | ||
406 | |||
407 | static int p4_fill_in_addresses(struct op_msrs * const msrs) | ||
390 | { | 408 | { |
391 | unsigned int i; | 409 | unsigned int i; |
392 | unsigned int addr, cccraddr, stag; | 410 | unsigned int addr, cccraddr, stag; |
@@ -468,6 +486,18 @@ static void p4_fill_in_addresses(struct op_msrs * const msrs) | |||
468 | msrs->controls[i++].addr = MSR_P4_CRU_ESCR5; | 486 | msrs->controls[i++].addr = MSR_P4_CRU_ESCR5; |
469 | } | 487 | } |
470 | } | 488 | } |
489 | |||
490 | for (i = 0; i < num_counters; ++i) { | ||
491 | if (!counter_config[i].enabled) | ||
492 | continue; | ||
493 | if (msrs->controls[i].addr) | ||
494 | continue; | ||
495 | op_x86_warn_reserved(i); | ||
496 | p4_shutdown(msrs); | ||
497 | return -EBUSY; | ||
498 | } | ||
499 | |||
500 | return 0; | ||
471 | } | 501 | } |
472 | 502 | ||
473 | 503 | ||
@@ -668,26 +698,6 @@ static void p4_stop(struct op_msrs const * const msrs) | |||
668 | } | 698 | } |
669 | } | 699 | } |
670 | 700 | ||
671 | static void p4_shutdown(struct op_msrs const * const msrs) | ||
672 | { | ||
673 | int i; | ||
674 | |||
675 | for (i = 0; i < num_counters; ++i) { | ||
676 | if (msrs->counters[i].addr) | ||
677 | release_perfctr_nmi(msrs->counters[i].addr); | ||
678 | } | ||
679 | /* | ||
680 | * some of the control registers are specially reserved in | ||
681 | * conjunction with the counter registers (hence the starting offset). | ||
682 | * This saves a few bits. | ||
683 | */ | ||
684 | for (i = num_counters; i < num_controls; ++i) { | ||
685 | if (msrs->controls[i].addr) | ||
686 | release_evntsel_nmi(msrs->controls[i].addr); | ||
687 | } | ||
688 | } | ||
689 | |||
690 | |||
691 | #ifdef CONFIG_SMP | 701 | #ifdef CONFIG_SMP |
692 | struct op_x86_model_spec op_p4_ht2_spec = { | 702 | struct op_x86_model_spec op_p4_ht2_spec = { |
693 | .num_counters = NUM_COUNTERS_HT2, | 703 | .num_counters = NUM_COUNTERS_HT2, |
diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c index 2bf90fafa7b5..d769cda54082 100644 --- a/arch/x86/oprofile/op_model_ppro.c +++ b/arch/x86/oprofile/op_model_ppro.c | |||
@@ -30,19 +30,46 @@ static int counter_width = 32; | |||
30 | 30 | ||
31 | static u64 *reset_value; | 31 | static u64 *reset_value; |
32 | 32 | ||
33 | static void ppro_fill_in_addresses(struct op_msrs * const msrs) | 33 | static void ppro_shutdown(struct op_msrs const * const msrs) |
34 | { | 34 | { |
35 | int i; | 35 | int i; |
36 | 36 | ||
37 | for (i = 0; i < num_counters; i++) { | 37 | for (i = 0; i < num_counters; ++i) { |
38 | if (reserve_perfctr_nmi(MSR_P6_PERFCTR0 + i)) | 38 | if (!msrs->counters[i].addr) |
39 | msrs->counters[i].addr = MSR_P6_PERFCTR0 + i; | 39 | continue; |
40 | release_perfctr_nmi(MSR_P6_PERFCTR0 + i); | ||
41 | release_evntsel_nmi(MSR_P6_EVNTSEL0 + i); | ||
42 | } | ||
43 | if (reset_value) { | ||
44 | kfree(reset_value); | ||
45 | reset_value = NULL; | ||
40 | } | 46 | } |
47 | } | ||
48 | |||
49 | static int ppro_fill_in_addresses(struct op_msrs * const msrs) | ||
50 | { | ||
51 | int i; | ||
41 | 52 | ||
42 | for (i = 0; i < num_counters; i++) { | 53 | for (i = 0; i < num_counters; i++) { |
43 | if (reserve_evntsel_nmi(MSR_P6_EVNTSEL0 + i)) | 54 | if (!reserve_perfctr_nmi(MSR_P6_PERFCTR0 + i)) |
44 | msrs->controls[i].addr = MSR_P6_EVNTSEL0 + i; | 55 | goto fail; |
56 | if (!reserve_evntsel_nmi(MSR_P6_EVNTSEL0 + i)) { | ||
57 | release_perfctr_nmi(MSR_P6_PERFCTR0 + i); | ||
58 | goto fail; | ||
59 | } | ||
60 | /* both registers must be reserved */ | ||
61 | msrs->counters[i].addr = MSR_P6_PERFCTR0 + i; | ||
62 | msrs->controls[i].addr = MSR_P6_EVNTSEL0 + i; | ||
63 | continue; | ||
64 | fail: | ||
65 | if (!counter_config[i].enabled) | ||
66 | continue; | ||
67 | op_x86_warn_reserved(i); | ||
68 | ppro_shutdown(msrs); | ||
69 | return -EBUSY; | ||
45 | } | 70 | } |
71 | |||
72 | return 0; | ||
46 | } | 73 | } |
47 | 74 | ||
48 | 75 | ||
@@ -78,26 +105,17 @@ static void ppro_setup_ctrs(struct op_x86_model_spec const *model, | |||
78 | 105 | ||
79 | /* clear all counters */ | 106 | /* clear all counters */ |
80 | for (i = 0; i < num_counters; ++i) { | 107 | for (i = 0; i < num_counters; ++i) { |
81 | if (unlikely(!msrs->controls[i].addr)) { | 108 | if (!msrs->controls[i].addr) |
82 | if (counter_config[i].enabled && !smp_processor_id()) | ||
83 | /* | ||
84 | * counter is reserved, this is on all | ||
85 | * cpus, so report only for cpu #0 | ||
86 | */ | ||
87 | op_x86_warn_reserved(i); | ||
88 | continue; | 109 | continue; |
89 | } | ||
90 | rdmsrl(msrs->controls[i].addr, val); | 110 | rdmsrl(msrs->controls[i].addr, val); |
91 | if (val & ARCH_PERFMON_EVENTSEL_ENABLE) | 111 | if (val & ARCH_PERFMON_EVENTSEL_ENABLE) |
92 | op_x86_warn_in_use(i); | 112 | op_x86_warn_in_use(i); |
93 | val &= model->reserved; | 113 | val &= model->reserved; |
94 | wrmsrl(msrs->controls[i].addr, val); | 114 | wrmsrl(msrs->controls[i].addr, val); |
95 | } | 115 | /* |
96 | 116 | * avoid a false detection of ctr overflows in NMI * | |
97 | /* avoid a false detection of ctr overflows in NMI handler */ | 117 | * handler |
98 | for (i = 0; i < num_counters; ++i) { | 118 | */ |
99 | if (unlikely(!msrs->counters[i].addr)) | ||
100 | continue; | ||
101 | wrmsrl(msrs->counters[i].addr, -1LL); | 119 | wrmsrl(msrs->counters[i].addr, -1LL); |
102 | } | 120 | } |
103 | 121 | ||
@@ -189,25 +207,6 @@ static void ppro_stop(struct op_msrs const * const msrs) | |||
189 | } | 207 | } |
190 | } | 208 | } |
191 | 209 | ||
192 | static void ppro_shutdown(struct op_msrs const * const msrs) | ||
193 | { | ||
194 | int i; | ||
195 | |||
196 | for (i = 0; i < num_counters; ++i) { | ||
197 | if (msrs->counters[i].addr) | ||
198 | release_perfctr_nmi(MSR_P6_PERFCTR0 + i); | ||
199 | } | ||
200 | for (i = 0; i < num_counters; ++i) { | ||
201 | if (msrs->controls[i].addr) | ||
202 | release_evntsel_nmi(MSR_P6_EVNTSEL0 + i); | ||
203 | } | ||
204 | if (reset_value) { | ||
205 | kfree(reset_value); | ||
206 | reset_value = NULL; | ||
207 | } | ||
208 | } | ||
209 | |||
210 | |||
211 | struct op_x86_model_spec op_ppro_spec = { | 210 | struct op_x86_model_spec op_ppro_spec = { |
212 | .num_counters = 2, | 211 | .num_counters = 2, |
213 | .num_controls = 2, | 212 | .num_controls = 2, |
@@ -239,11 +238,11 @@ static void arch_perfmon_setup_counters(void) | |||
239 | if (eax.split.version_id == 0 && current_cpu_data.x86 == 6 && | 238 | if (eax.split.version_id == 0 && current_cpu_data.x86 == 6 && |
240 | current_cpu_data.x86_model == 15) { | 239 | current_cpu_data.x86_model == 15) { |
241 | eax.split.version_id = 2; | 240 | eax.split.version_id = 2; |
242 | eax.split.num_events = 2; | 241 | eax.split.num_counters = 2; |
243 | eax.split.bit_width = 40; | 242 | eax.split.bit_width = 40; |
244 | } | 243 | } |
245 | 244 | ||
246 | num_counters = eax.split.num_events; | 245 | num_counters = eax.split.num_counters; |
247 | 246 | ||
248 | op_arch_perfmon_spec.num_counters = num_counters; | 247 | op_arch_perfmon_spec.num_counters = num_counters; |
249 | op_arch_perfmon_spec.num_controls = num_counters; | 248 | op_arch_perfmon_spec.num_controls = num_counters; |
diff --git a/arch/x86/oprofile/op_x86_model.h b/arch/x86/oprofile/op_x86_model.h index ff82a755edd4..89017fa1fd63 100644 --- a/arch/x86/oprofile/op_x86_model.h +++ b/arch/x86/oprofile/op_x86_model.h | |||
@@ -40,10 +40,10 @@ struct op_x86_model_spec { | |||
40 | u64 reserved; | 40 | u64 reserved; |
41 | u16 event_mask; | 41 | u16 event_mask; |
42 | int (*init)(struct oprofile_operations *ops); | 42 | int (*init)(struct oprofile_operations *ops); |
43 | void (*exit)(void); | 43 | int (*fill_in_addresses)(struct op_msrs * const msrs); |
44 | void (*fill_in_addresses)(struct op_msrs * const msrs); | ||
45 | void (*setup_ctrs)(struct op_x86_model_spec const *model, | 44 | void (*setup_ctrs)(struct op_x86_model_spec const *model, |
46 | struct op_msrs const * const msrs); | 45 | struct op_msrs const * const msrs); |
46 | void (*cpu_down)(void); | ||
47 | int (*check_ctrs)(struct pt_regs * const regs, | 47 | int (*check_ctrs)(struct pt_regs * const regs, |
48 | struct op_msrs const * const msrs); | 48 | struct op_msrs const * const msrs); |
49 | void (*start)(struct op_msrs const * const msrs); | 49 | void (*start)(struct op_msrs const * const msrs); |
diff --git a/arch/x86/pci/mrst.c b/arch/x86/pci/mrst.c index 8bf2fcb88d04..7ef3a2735df3 100644 --- a/arch/x86/pci/mrst.c +++ b/arch/x86/pci/mrst.c | |||
@@ -109,7 +109,7 @@ static int pci_device_update_fixed(struct pci_bus *bus, unsigned int devfn, | |||
109 | decode++; | 109 | decode++; |
110 | decode = ~(decode - 1); | 110 | decode = ~(decode - 1); |
111 | } else { | 111 | } else { |
112 | decode = ~0; | 112 | decode = 0; |
113 | } | 113 | } |
114 | 114 | ||
115 | /* | 115 | /* |
@@ -247,6 +247,10 @@ static void __devinit pci_fixed_bar_fixup(struct pci_dev *dev) | |||
247 | u32 size; | 247 | u32 size; |
248 | int i; | 248 | int i; |
249 | 249 | ||
250 | /* Must have extended configuration space */ | ||
251 | if (dev->cfg_size < PCIE_CAP_OFFSET + 4) | ||
252 | return; | ||
253 | |||
250 | /* Fixup the BAR sizes for fixed BAR devices and make them unmoveable */ | 254 | /* Fixup the BAR sizes for fixed BAR devices and make them unmoveable */ |
251 | offset = fixed_bar_cap(dev->bus, dev->devfn); | 255 | offset = fixed_bar_cap(dev->bus, dev->devfn); |
252 | if (!offset || PCI_DEVFN(2, 0) == dev->devfn || | 256 | if (!offset || PCI_DEVFN(2, 0) == dev->devfn || |
diff --git a/arch/xtensa/include/asm/atomic.h b/arch/xtensa/include/asm/atomic.h index 22d6dde42619..a96a0619d0b7 100644 --- a/arch/xtensa/include/asm/atomic.h +++ b/arch/xtensa/include/asm/atomic.h | |||
@@ -46,7 +46,7 @@ | |||
46 | * | 46 | * |
47 | * Atomically reads the value of @v. | 47 | * Atomically reads the value of @v. |
48 | */ | 48 | */ |
49 | #define atomic_read(v) ((v)->counter) | 49 | #define atomic_read(v) (*(volatile int *)&(v)->counter) |
50 | 50 | ||
51 | /** | 51 | /** |
52 | * atomic_set - set atomic variable | 52 | * atomic_set - set atomic variable |