diff options
author | Peter Zijlstra <peterz@infradead.org> | 2019-02-27 03:48:51 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2019-06-17 06:43:41 -0400 |
commit | 9ed7d75b2f09d836e71d597cd5879abb1a44e7a9 (patch) | |
tree | bd126f30b32c914a19acf7afceaf288952a07c05 /include/linux/smp.h | |
parent | 0b9ccc0a9b146b49e83bf1e32f70d2396a694bfb (diff) |
x86/percpu: Relax smp_processor_id()
Nadav reported that since this_cpu_read() became asm-volatile, many
smp_processor_id() users generated worse code due to the extra
constraints.
However since smp_processor_id() is reading a stable value, we can use
__this_cpu_read().
While this does reduce text size somewhat, this mostly results in code
movement to .text.unlikely as a result of more/larger .cold.
subfunctions. Less text on the hotpath is good for I$.
$ ./compare.sh defconfig-build1 defconfig-build2 vmlinux.o
setup_APIC_ibs 90 98 -12,+20
force_ibs_eilvt_setup 400 413 -57,+70
pci_serr_error 109 104 -54,+49
pci_serr_error 109 104 -54,+49
unknown_nmi_error 125 120 -76,+71
unknown_nmi_error 125 120 -76,+71
io_check_error 125 132 -97,+104
intel_thermal_interrupt 730 822 +92,+0
intel_init_thermal 951 945 -6,+0
generic_get_mtrr 301 294 -7,+0
generic_get_mtrr 301 294 -7,+0
generic_set_all 749 754 -44,+49
get_fixed_ranges 352 360 -41,+49
x86_acpi_suspend_lowlevel 369 363 -6,+0
check_tsc_sync_source 412 412 -71,+71
irq_migrate_all_off_this_cpu 662 674 -14,+26
clocksource_watchdog 748 748 -113,+113
__perf_event_account_interrupt 204 197 -7,+0
attempt_merge 1748 1741 -7,+0
intel_guc_send_ct 1424 1409 -15,+0
__fini_doorbell 235 231 -4,+0
bdw_set_cdclk 928 923 -5,+0
gen11_dsi_disable 1571 1556 -15,+0
gmbus_wait 493 488 -5,+0
md_make_request 376 369 -7,+0
__split_and_process_bio 543 536 -7,+0
delay_tsc 96 89 -7,+0
hsw_disable_pc8 696 691 -5,+0
tsc_verify_tsc_adjust 215 228 -22,+35
cpuidle_driver_unref 56 49 -7,+0
blk_account_io_completion 159 148 -11,+0
mtrr_wrmsr 95 99 -29,+33
__intel_wait_for_register_fw 401 419 +18,+0
cpuidle_driver_ref 43 36 -7,+0
cpuidle_get_driver 15 8 -7,+0
blk_account_io_done 535 528 -7,+0
irq_migrate_all_off_this_cpu 662 674 -14,+26
check_tsc_sync_source 412 412 -71,+71
irq_wait_for_poll 170 163 -7,+0
generic_end_io_acct 329 322 -7,+0
x86_acpi_suspend_lowlevel 369 363 -6,+0
nohz_balance_enter_idle 198 191 -7,+0
generic_start_io_acct 254 247 -7,+0
blk_account_io_start 341 334 -7,+0
perf_event_task_tick 682 675 -7,+0
intel_init_thermal 951 945 -6,+0
amd_e400_c1e_apic_setup 47 51 -28,+32
setup_APIC_eilvt 350 328 -22,+0
hsw_enable_pc8 1611 1605 -6,+0
total 12985947 12985892 -994,+939
Reported-by: Nadav Amit <nadav.amit@gmail.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'include/linux/smp.h')
-rw-r--r-- | include/linux/smp.h | 45 |
1 files changed, 31 insertions, 14 deletions
diff --git a/include/linux/smp.h b/include/linux/smp.h index a56f08ff3097..aa9e5e82d8c3 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h | |||
@@ -181,29 +181,46 @@ static inline int get_boot_cpu_id(void) | |||
181 | 181 | ||
182 | #endif /* !SMP */ | 182 | #endif /* !SMP */ |
183 | 183 | ||
184 | /* | 184 | /** |
185 | * smp_processor_id(): get the current CPU ID. | 185 | * raw_processor_id() - get the current (unstable) CPU id |
186 | * | ||
187 | * For then you know what you are doing and need an unstable | ||
188 | * CPU id. | ||
189 | */ | ||
190 | |||
191 | /** | ||
192 | * smp_processor_id() - get the current (stable) CPU id | ||
193 | * | ||
194 | * This is the normal accessor to the CPU id and should be used | ||
195 | * whenever possible. | ||
196 | * | ||
197 | * The CPU id is stable when: | ||
186 | * | 198 | * |
187 | * if DEBUG_PREEMPT is enabled then we check whether it is | 199 | * - IRQs are disabled; |
188 | * used in a preemption-safe way. (smp_processor_id() is safe | 200 | * - preemption is disabled; |
189 | * if it's used in a preemption-off critical section, or in | 201 | * - the task is CPU affine. |
190 | * a thread that is bound to the current CPU.) | ||
191 | * | 202 | * |
192 | * NOTE: raw_smp_processor_id() is for internal use only | 203 | * When CONFIG_DEBUG_PREEMPT; we verify these assumption and WARN |
193 | * (smp_processor_id() is the preferred variant), but in rare | 204 | * when smp_processor_id() is used when the CPU id is not stable. |
194 | * instances it might also be used to turn off false positives | ||
195 | * (i.e. smp_processor_id() use that the debugging code reports but | ||
196 | * which use for some reason is legal). Don't use this to hack around | ||
197 | * the warning message, as your code might not work under PREEMPT. | ||
198 | */ | 205 | */ |
206 | |||
207 | /* | ||
208 | * Allow the architecture to differentiate between a stable and unstable read. | ||
209 | * For example, x86 uses an IRQ-safe asm-volatile read for the unstable but a | ||
210 | * regular asm read for the stable. | ||
211 | */ | ||
212 | #ifndef __smp_processor_id | ||
213 | #define __smp_processor_id(x) raw_smp_processor_id(x) | ||
214 | #endif | ||
215 | |||
199 | #ifdef CONFIG_DEBUG_PREEMPT | 216 | #ifdef CONFIG_DEBUG_PREEMPT |
200 | extern unsigned int debug_smp_processor_id(void); | 217 | extern unsigned int debug_smp_processor_id(void); |
201 | # define smp_processor_id() debug_smp_processor_id() | 218 | # define smp_processor_id() debug_smp_processor_id() |
202 | #else | 219 | #else |
203 | # define smp_processor_id() raw_smp_processor_id() | 220 | # define smp_processor_id() __smp_processor_id() |
204 | #endif | 221 | #endif |
205 | 222 | ||
206 | #define get_cpu() ({ preempt_disable(); smp_processor_id(); }) | 223 | #define get_cpu() ({ preempt_disable(); __smp_processor_id(); }) |
207 | #define put_cpu() preempt_enable() | 224 | #define put_cpu() preempt_enable() |
208 | 225 | ||
209 | /* | 226 | /* |