summaryrefslogtreecommitdiffstats
path: root/include/linux/smp.h
diff options
context:
space:
mode:
authorPeter Zijlstra <peterz@infradead.org>2019-02-27 03:48:51 -0500
committerIngo Molnar <mingo@kernel.org>2019-06-17 06:43:41 -0400
commit9ed7d75b2f09d836e71d597cd5879abb1a44e7a9 (patch)
treebd126f30b32c914a19acf7afceaf288952a07c05 /include/linux/smp.h
parent0b9ccc0a9b146b49e83bf1e32f70d2396a694bfb (diff)
x86/percpu: Relax smp_processor_id()
Nadav reported that since this_cpu_read() became asm-volatile, many smp_processor_id() users generated worse code due to the extra constraints. However since smp_processor_id() is reading a stable value, we can use __this_cpu_read(). While this does reduce text size somewhat, this mostly results in code movement to .text.unlikely as a result of more/larger .cold. subfunctions. Less text on the hotpath is good for I$. $ ./compare.sh defconfig-build1 defconfig-build2 vmlinux.o setup_APIC_ibs 90 98 -12,+20 force_ibs_eilvt_setup 400 413 -57,+70 pci_serr_error 109 104 -54,+49 pci_serr_error 109 104 -54,+49 unknown_nmi_error 125 120 -76,+71 unknown_nmi_error 125 120 -76,+71 io_check_error 125 132 -97,+104 intel_thermal_interrupt 730 822 +92,+0 intel_init_thermal 951 945 -6,+0 generic_get_mtrr 301 294 -7,+0 generic_get_mtrr 301 294 -7,+0 generic_set_all 749 754 -44,+49 get_fixed_ranges 352 360 -41,+49 x86_acpi_suspend_lowlevel 369 363 -6,+0 check_tsc_sync_source 412 412 -71,+71 irq_migrate_all_off_this_cpu 662 674 -14,+26 clocksource_watchdog 748 748 -113,+113 __perf_event_account_interrupt 204 197 -7,+0 attempt_merge 1748 1741 -7,+0 intel_guc_send_ct 1424 1409 -15,+0 __fini_doorbell 235 231 -4,+0 bdw_set_cdclk 928 923 -5,+0 gen11_dsi_disable 1571 1556 -15,+0 gmbus_wait 493 488 -5,+0 md_make_request 376 369 -7,+0 __split_and_process_bio 543 536 -7,+0 delay_tsc 96 89 -7,+0 hsw_disable_pc8 696 691 -5,+0 tsc_verify_tsc_adjust 215 228 -22,+35 cpuidle_driver_unref 56 49 -7,+0 blk_account_io_completion 159 148 -11,+0 mtrr_wrmsr 95 99 -29,+33 __intel_wait_for_register_fw 401 419 +18,+0 cpuidle_driver_ref 43 36 -7,+0 cpuidle_get_driver 15 8 -7,+0 blk_account_io_done 535 528 -7,+0 irq_migrate_all_off_this_cpu 662 674 -14,+26 check_tsc_sync_source 412 412 -71,+71 irq_wait_for_poll 170 163 -7,+0 generic_end_io_acct 329 322 -7,+0 x86_acpi_suspend_lowlevel 369 363 -6,+0 nohz_balance_enter_idle 198 191 -7,+0 generic_start_io_acct 254 247 -7,+0 blk_account_io_start 341 334 -7,+0 perf_event_task_tick 682 675 -7,+0 intel_init_thermal 951 945 -6,+0 amd_e400_c1e_apic_setup 47 51 -28,+32 setup_APIC_eilvt 350 328 -22,+0 hsw_enable_pc8 1611 1605 -6,+0 total 12985947 12985892 -994,+939 Reported-by: Nadav Amit <nadav.amit@gmail.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'include/linux/smp.h')
-rw-r--r--include/linux/smp.h45
1 files changed, 31 insertions, 14 deletions
diff --git a/include/linux/smp.h b/include/linux/smp.h
index a56f08ff3097..aa9e5e82d8c3 100644
--- a/include/linux/smp.h
+++ b/include/linux/smp.h
@@ -181,29 +181,46 @@ static inline int get_boot_cpu_id(void)
181 181
182#endif /* !SMP */ 182#endif /* !SMP */
183 183
184/* 184/**
185 * smp_processor_id(): get the current CPU ID. 185 * raw_processor_id() - get the current (unstable) CPU id
186 *
187 * For then you know what you are doing and need an unstable
188 * CPU id.
189 */
190
191/**
192 * smp_processor_id() - get the current (stable) CPU id
193 *
194 * This is the normal accessor to the CPU id and should be used
195 * whenever possible.
196 *
197 * The CPU id is stable when:
186 * 198 *
187 * if DEBUG_PREEMPT is enabled then we check whether it is 199 * - IRQs are disabled;
188 * used in a preemption-safe way. (smp_processor_id() is safe 200 * - preemption is disabled;
189 * if it's used in a preemption-off critical section, or in 201 * - the task is CPU affine.
190 * a thread that is bound to the current CPU.)
191 * 202 *
192 * NOTE: raw_smp_processor_id() is for internal use only 203 * When CONFIG_DEBUG_PREEMPT; we verify these assumption and WARN
193 * (smp_processor_id() is the preferred variant), but in rare 204 * when smp_processor_id() is used when the CPU id is not stable.
194 * instances it might also be used to turn off false positives
195 * (i.e. smp_processor_id() use that the debugging code reports but
196 * which use for some reason is legal). Don't use this to hack around
197 * the warning message, as your code might not work under PREEMPT.
198 */ 205 */
206
207/*
208 * Allow the architecture to differentiate between a stable and unstable read.
209 * For example, x86 uses an IRQ-safe asm-volatile read for the unstable but a
210 * regular asm read for the stable.
211 */
212#ifndef __smp_processor_id
213#define __smp_processor_id(x) raw_smp_processor_id(x)
214#endif
215
199#ifdef CONFIG_DEBUG_PREEMPT 216#ifdef CONFIG_DEBUG_PREEMPT
200 extern unsigned int debug_smp_processor_id(void); 217 extern unsigned int debug_smp_processor_id(void);
201# define smp_processor_id() debug_smp_processor_id() 218# define smp_processor_id() debug_smp_processor_id()
202#else 219#else
203# define smp_processor_id() raw_smp_processor_id() 220# define smp_processor_id() __smp_processor_id()
204#endif 221#endif
205 222
206#define get_cpu() ({ preempt_disable(); smp_processor_id(); }) 223#define get_cpu() ({ preempt_disable(); __smp_processor_id(); })
207#define put_cpu() preempt_enable() 224#define put_cpu() preempt_enable()
208 225
209/* 226/*