aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/time
diff options
context:
space:
mode:
authorThomas Gleixner <tglx@linutronix.de>2015-05-26 18:50:33 -0400
committerThomas Gleixner <tglx@linutronix.de>2015-06-19 09:18:28 -0400
commitbc7a34b8b9ebfb0f4b8a35a72a0b134fd6c5ef50 (patch)
treef6324a2a7742e56740e9cc08d9636865ee72ec89 /kernel/time
parentc74441a17eb975b604e339ca6c11b9ab9aaca11f (diff)
timer: Reduce timer migration overhead if disabled
Eric reported that the timer_migration sysctl is not really nice performance wise as it needs to check at every timer insertion whether the feature is enabled or not. Further the check does not live in the timer code, so we have an extra function call which checks an extra cache line to figure out that it is disabled. We can do better and store that information in the per cpu (hr)timer bases. I pondered to use a static key, but that's a nightmare to update from the nohz code and the timer base cache line is hot anyway when we select a timer base. The old logic enabled the timer migration unconditionally if CONFIG_NO_HZ was set even if nohz was disabled on the kernel command line. With this modification, we start off with migration disabled. The user visible sysctl is still set to enabled. If the kernel switches to NOHZ migration is enabled, if the user did not disable it via the sysctl prior to the switch. If nohz=off is on the kernel command line, migration stays disabled no matter what. Before: 47.76% hog [.] main 14.84% [kernel] [k] _raw_spin_lock_irqsave 9.55% [kernel] [k] _raw_spin_unlock_irqrestore 6.71% [kernel] [k] mod_timer 6.24% [kernel] [k] lock_timer_base.isra.38 3.76% [kernel] [k] detach_if_pending 3.71% [kernel] [k] del_timer 2.50% [kernel] [k] internal_add_timer 1.51% [kernel] [k] get_nohz_timer_target 1.28% [kernel] [k] __internal_add_timer 0.78% [kernel] [k] timerfn 0.48% [kernel] [k] wake_up_nohz_cpu After: 48.10% hog [.] main 15.25% [kernel] [k] _raw_spin_lock_irqsave 9.76% [kernel] [k] _raw_spin_unlock_irqrestore 6.50% [kernel] [k] mod_timer 6.44% [kernel] [k] lock_timer_base.isra.38 3.87% [kernel] [k] detach_if_pending 3.80% [kernel] [k] del_timer 2.67% [kernel] [k] internal_add_timer 1.33% [kernel] [k] __internal_add_timer 0.73% [kernel] [k] timerfn 0.54% [kernel] [k] wake_up_nohz_cpu Reported-by: Eric Dumazet <edumazet@google.com> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Paul McKenney <paulmck@linux.vnet.ibm.com> Cc: Frederic Weisbecker <fweisbec@gmail.com> Cc: Viresh Kumar <viresh.kumar@linaro.org> Cc: John Stultz <john.stultz@linaro.org> Cc: Joonwoo Park <joonwoop@codeaurora.org> Cc: Wenbo Wang <wenbo.wang@memblaze.com> Link: http://lkml.kernel.org/r/20150526224512.127050787@linutronix.de Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Diffstat (limited to 'kernel/time')
-rw-r--r--kernel/time/hrtimer.c35
-rw-r--r--kernel/time/tick-internal.h14
-rw-r--r--kernel/time/tick-sched.c25
-rw-r--r--kernel/time/timer.c59
-rw-r--r--kernel/time/timer_list.c2
5 files changed, 109 insertions, 26 deletions
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index f026413de4d6..6115f4df119b 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -177,6 +177,24 @@ hrtimer_check_target(struct hrtimer *timer, struct hrtimer_clock_base *new_base)
177#endif 177#endif
178} 178}
179 179
180#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
181static inline
182struct hrtimer_cpu_base *get_target_base(struct hrtimer_cpu_base *base,
183 int pinned)
184{
185 if (pinned || !base->migration_enabled)
186 return this_cpu_ptr(&hrtimer_bases);
187 return &per_cpu(hrtimer_bases, get_nohz_timer_target());
188}
189#else
190static inline
191struct hrtimer_cpu_base *get_target_base(struct hrtimer_cpu_base *base,
192 int pinned)
193{
194 return this_cpu_ptr(&hrtimer_bases);
195}
196#endif
197
180/* 198/*
181 * Switch the timer base to the current CPU when possible. 199 * Switch the timer base to the current CPU when possible.
182 */ 200 */
@@ -184,14 +202,13 @@ static inline struct hrtimer_clock_base *
184switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base, 202switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base,
185 int pinned) 203 int pinned)
186{ 204{
205 struct hrtimer_cpu_base *new_cpu_base, *this_base;
187 struct hrtimer_clock_base *new_base; 206 struct hrtimer_clock_base *new_base;
188 struct hrtimer_cpu_base *new_cpu_base;
189 int this_cpu = smp_processor_id();
190 int cpu = get_nohz_timer_target(pinned);
191 int basenum = base->index; 207 int basenum = base->index;
192 208
209 this_base = this_cpu_ptr(&hrtimer_bases);
210 new_cpu_base = get_target_base(this_base, pinned);
193again: 211again:
194 new_cpu_base = &per_cpu(hrtimer_bases, cpu);
195 new_base = &new_cpu_base->clock_base[basenum]; 212 new_base = &new_cpu_base->clock_base[basenum];
196 213
197 if (base != new_base) { 214 if (base != new_base) {
@@ -212,17 +229,19 @@ again:
212 raw_spin_unlock(&base->cpu_base->lock); 229 raw_spin_unlock(&base->cpu_base->lock);
213 raw_spin_lock(&new_base->cpu_base->lock); 230 raw_spin_lock(&new_base->cpu_base->lock);
214 231
215 if (cpu != this_cpu && hrtimer_check_target(timer, new_base)) { 232 if (new_cpu_base != this_base &&
216 cpu = this_cpu; 233 hrtimer_check_target(timer, new_base)) {
217 raw_spin_unlock(&new_base->cpu_base->lock); 234 raw_spin_unlock(&new_base->cpu_base->lock);
218 raw_spin_lock(&base->cpu_base->lock); 235 raw_spin_lock(&base->cpu_base->lock);
236 new_cpu_base = this_base;
219 timer->base = base; 237 timer->base = base;
220 goto again; 238 goto again;
221 } 239 }
222 timer->base = new_base; 240 timer->base = new_base;
223 } else { 241 } else {
224 if (cpu != this_cpu && hrtimer_check_target(timer, new_base)) { 242 if (new_cpu_base != this_base &&
225 cpu = this_cpu; 243 hrtimer_check_target(timer, new_base)) {
244 new_cpu_base = this_base;
226 goto again; 245 goto again;
227 } 246 }
228 } 247 }
diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h
index ec2208aabdd1..2edde84744df 100644
--- a/kernel/time/tick-internal.h
+++ b/kernel/time/tick-internal.h
@@ -149,4 +149,18 @@ extern void tick_nohz_init(void);
149static inline void tick_nohz_init(void) { } 149static inline void tick_nohz_init(void) { }
150#endif 150#endif
151 151
152#ifdef CONFIG_NO_HZ_COMMON
153extern unsigned long tick_nohz_active;
154#else
155#define tick_nohz_active (0)
156#endif
157
158#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
159extern void timers_update_migration(void);
160#else
161static inline void timers_update_migration(void) { }
162#endif
163
164DECLARE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases);
165
152extern u64 get_next_timer_interrupt(unsigned long basej, u64 basem); 166extern u64 get_next_timer_interrupt(unsigned long basej, u64 basem);
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 812f7a3b9898..b1cb01699355 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -399,7 +399,7 @@ void __init tick_nohz_init(void)
399 * NO HZ enabled ? 399 * NO HZ enabled ?
400 */ 400 */
401static int tick_nohz_enabled __read_mostly = 1; 401static int tick_nohz_enabled __read_mostly = 1;
402int tick_nohz_active __read_mostly; 402unsigned long tick_nohz_active __read_mostly;
403/* 403/*
404 * Enable / Disable tickless mode 404 * Enable / Disable tickless mode
405 */ 405 */
@@ -956,6 +956,16 @@ static void tick_nohz_handler(struct clock_event_device *dev)
956 tick_program_event(hrtimer_get_expires(&ts->sched_timer), 1); 956 tick_program_event(hrtimer_get_expires(&ts->sched_timer), 1);
957} 957}
958 958
959static inline void tick_nohz_activate(struct tick_sched *ts, int mode)
960{
961 if (!tick_nohz_enabled)
962 return;
963 ts->nohz_mode = mode;
964 /* One update is enough */
965 if (!test_and_set_bit(0, &tick_nohz_active))
966 timers_update_migration();
967}
968
959/** 969/**
960 * tick_nohz_switch_to_nohz - switch to nohz mode 970 * tick_nohz_switch_to_nohz - switch to nohz mode
961 */ 971 */
@@ -970,9 +980,6 @@ static void tick_nohz_switch_to_nohz(void)
970 if (tick_switch_to_oneshot(tick_nohz_handler)) 980 if (tick_switch_to_oneshot(tick_nohz_handler))
971 return; 981 return;
972 982
973 tick_nohz_active = 1;
974 ts->nohz_mode = NOHZ_MODE_LOWRES;
975
976 /* 983 /*
977 * Recycle the hrtimer in ts, so we can share the 984 * Recycle the hrtimer in ts, so we can share the
978 * hrtimer_forward with the highres code. 985 * hrtimer_forward with the highres code.
@@ -984,6 +991,7 @@ static void tick_nohz_switch_to_nohz(void)
984 hrtimer_forward_now(&ts->sched_timer, tick_period); 991 hrtimer_forward_now(&ts->sched_timer, tick_period);
985 hrtimer_set_expires(&ts->sched_timer, next); 992 hrtimer_set_expires(&ts->sched_timer, next);
986 tick_program_event(next, 1); 993 tick_program_event(next, 1);
994 tick_nohz_activate(ts, NOHZ_MODE_LOWRES);
987} 995}
988 996
989/* 997/*
@@ -1035,6 +1043,7 @@ static inline void tick_nohz_irq_enter(void)
1035 1043
1036static inline void tick_nohz_switch_to_nohz(void) { } 1044static inline void tick_nohz_switch_to_nohz(void) { }
1037static inline void tick_nohz_irq_enter(void) { } 1045static inline void tick_nohz_irq_enter(void) { }
1046static inline void tick_nohz_activate(struct tick_sched *ts, int mode) { }
1038 1047
1039#endif /* CONFIG_NO_HZ_COMMON */ 1048#endif /* CONFIG_NO_HZ_COMMON */
1040 1049
@@ -1117,13 +1126,7 @@ void tick_setup_sched_timer(void)
1117 1126
1118 hrtimer_forward(&ts->sched_timer, now, tick_period); 1127 hrtimer_forward(&ts->sched_timer, now, tick_period);
1119 hrtimer_start_expires(&ts->sched_timer, HRTIMER_MODE_ABS_PINNED); 1128 hrtimer_start_expires(&ts->sched_timer, HRTIMER_MODE_ABS_PINNED);
1120 1129 tick_nohz_activate(ts, NOHZ_MODE_HIGHRES);
1121#ifdef CONFIG_NO_HZ_COMMON
1122 if (tick_nohz_enabled) {
1123 ts->nohz_mode = NOHZ_MODE_HIGHRES;
1124 tick_nohz_active = 1;
1125 }
1126#endif
1127} 1130}
1128#endif /* HIGH_RES_TIMERS */ 1131#endif /* HIGH_RES_TIMERS */
1129 1132
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index 3398d93c74a7..343142ed996a 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -85,6 +85,7 @@ struct tvec_base {
85 unsigned long active_timers; 85 unsigned long active_timers;
86 unsigned long all_timers; 86 unsigned long all_timers;
87 int cpu; 87 int cpu;
88 bool migration_enabled;
88 struct tvec_root tv1; 89 struct tvec_root tv1;
89 struct tvec tv2; 90 struct tvec tv2;
90 struct tvec tv3; 91 struct tvec tv3;
@@ -95,6 +96,54 @@ struct tvec_base {
95 96
96static DEFINE_PER_CPU(struct tvec_base, tvec_bases); 97static DEFINE_PER_CPU(struct tvec_base, tvec_bases);
97 98
99#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
100unsigned int sysctl_timer_migration = 1;
101
102void timers_update_migration(void)
103{
104 bool on = sysctl_timer_migration && tick_nohz_active;
105 unsigned int cpu;
106
107 /* Avoid the loop, if nothing to update */
108 if (this_cpu_read(tvec_bases.migration_enabled) == on)
109 return;
110
111 for_each_possible_cpu(cpu) {
112 per_cpu(tvec_bases.migration_enabled, cpu) = on;
113 per_cpu(hrtimer_bases.migration_enabled, cpu) = on;
114 }
115}
116
117int timer_migration_handler(struct ctl_table *table, int write,
118 void __user *buffer, size_t *lenp,
119 loff_t *ppos)
120{
121 static DEFINE_MUTEX(mutex);
122 int ret;
123
124 mutex_lock(&mutex);
125 ret = proc_dointvec(table, write, buffer, lenp, ppos);
126 if (!ret && write)
127 timers_update_migration();
128 mutex_unlock(&mutex);
129 return ret;
130}
131
132static inline struct tvec_base *get_target_base(struct tvec_base *base,
133 int pinned)
134{
135 if (pinned || !base->migration_enabled)
136 return this_cpu_ptr(&tvec_bases);
137 return per_cpu_ptr(&tvec_bases, get_nohz_timer_target());
138}
139#else
140static inline struct tvec_base *get_target_base(struct tvec_base *base,
141 int pinned)
142{
143 return this_cpu_ptr(&tvec_bases);
144}
145#endif
146
98static unsigned long round_jiffies_common(unsigned long j, int cpu, 147static unsigned long round_jiffies_common(unsigned long j, int cpu,
99 bool force_up) 148 bool force_up)
100{ 149{
@@ -716,11 +765,11 @@ static struct tvec_base *lock_timer_base(struct timer_list *timer,
716 765
717static inline int 766static inline int
718__mod_timer(struct timer_list *timer, unsigned long expires, 767__mod_timer(struct timer_list *timer, unsigned long expires,
719 bool pending_only, int pinned) 768 bool pending_only, int pinned)
720{ 769{
721 struct tvec_base *base, *new_base; 770 struct tvec_base *base, *new_base;
722 unsigned long flags; 771 unsigned long flags;
723 int ret = 0 , cpu; 772 int ret = 0;
724 773
725 timer_stats_timer_set_start_info(timer); 774 timer_stats_timer_set_start_info(timer);
726 BUG_ON(!timer->function); 775 BUG_ON(!timer->function);
@@ -733,8 +782,7 @@ __mod_timer(struct timer_list *timer, unsigned long expires,
733 782
734 debug_activate(timer, expires); 783 debug_activate(timer, expires);
735 784
736 cpu = get_nohz_timer_target(pinned); 785 new_base = get_target_base(base, pinned);
737 new_base = per_cpu_ptr(&tvec_bases, cpu);
738 786
739 if (base != new_base) { 787 if (base != new_base) {
740 /* 788 /*
@@ -751,7 +799,8 @@ __mod_timer(struct timer_list *timer, unsigned long expires,
751 spin_unlock(&base->lock); 799 spin_unlock(&base->lock);
752 base = new_base; 800 base = new_base;
753 spin_lock(&base->lock); 801 spin_lock(&base->lock);
754 timer->flags = (timer->flags & ~TIMER_BASEMASK) | cpu; 802 timer->flags &= ~TIMER_BASEMASK;
803 timer->flags |= base->cpu;
755 } 804 }
756 } 805 }
757 806
diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c
index 1327004429be..a4536e1e3e2a 100644
--- a/kernel/time/timer_list.c
+++ b/kernel/time/timer_list.c
@@ -29,8 +29,6 @@ struct timer_list_iter {
29 29
30typedef void (*print_fn_t)(struct seq_file *m, unsigned int *classes); 30typedef void (*print_fn_t)(struct seq_file *m, unsigned int *classes);
31 31
32DECLARE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases);
33
34/* 32/*
35 * This allows printing both to /proc/timer_list and 33 * This allows printing both to /proc/timer_list and
36 * to the console (on SysRq-Q): 34 * to the console (on SysRq-Q):