summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/linux/hrtimer.h2
-rw-r--r--include/linux/sched.h6
-rw-r--r--include/linux/sched/sysctl.h12
-rw-r--r--include/linux/timer.h9
-rw-r--r--kernel/rcu/tree_plugin.h2
-rw-r--r--kernel/sched/core.c9
-rw-r--r--kernel/sysctl.c18
-rw-r--r--kernel/time/hrtimer.c35
-rw-r--r--kernel/time/tick-internal.h14
-rw-r--r--kernel/time/tick-sched.c25
-rw-r--r--kernel/time/timer.c59
-rw-r--r--kernel/time/timer_list.c2
12 files changed, 133 insertions, 60 deletions
diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 5db055821ef3..69551020bb97 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -163,6 +163,7 @@ enum hrtimer_base_type {
163 * @cpu: cpu number 163 * @cpu: cpu number
164 * @active_bases: Bitfield to mark bases with active timers 164 * @active_bases: Bitfield to mark bases with active timers
165 * @clock_was_set_seq: Sequence counter of clock was set events 165 * @clock_was_set_seq: Sequence counter of clock was set events
166 * @migration_enabled: The migration of hrtimers to other cpus is enabled
166 * @expires_next: absolute time of the next event which was scheduled 167 * @expires_next: absolute time of the next event which was scheduled
167 * via clock_set_next_event() 168 * via clock_set_next_event()
168 * @next_timer: Pointer to the first expiring timer 169 * @next_timer: Pointer to the first expiring timer
@@ -186,6 +187,7 @@ struct hrtimer_cpu_base {
186 unsigned int cpu; 187 unsigned int cpu;
187 unsigned int active_bases; 188 unsigned int active_bases;
188 unsigned int clock_was_set_seq; 189 unsigned int clock_was_set_seq;
190 bool migration_enabled;
189#ifdef CONFIG_HIGH_RES_TIMERS 191#ifdef CONFIG_HIGH_RES_TIMERS
190 unsigned int in_hrtirq : 1, 192 unsigned int in_hrtirq : 1,
191 hres_active : 1, 193 hres_active : 1,
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 26a2e6122734..d7151460b0cf 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -335,14 +335,10 @@ extern int runqueue_is_locked(int cpu);
335#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) 335#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
336extern void nohz_balance_enter_idle(int cpu); 336extern void nohz_balance_enter_idle(int cpu);
337extern void set_cpu_sd_state_idle(void); 337extern void set_cpu_sd_state_idle(void);
338extern int get_nohz_timer_target(int pinned); 338extern int get_nohz_timer_target(void);
339#else 339#else
340static inline void nohz_balance_enter_idle(int cpu) { } 340static inline void nohz_balance_enter_idle(int cpu) { }
341static inline void set_cpu_sd_state_idle(void) { } 341static inline void set_cpu_sd_state_idle(void) { }
342static inline int get_nohz_timer_target(int pinned)
343{
344 return smp_processor_id();
345}
346#endif 342#endif
347 343
348/* 344/*
diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h
index 596a0e007c62..c9e4731cf10b 100644
--- a/include/linux/sched/sysctl.h
+++ b/include/linux/sched/sysctl.h
@@ -57,24 +57,12 @@ extern unsigned int sysctl_numa_balancing_scan_size;
57extern unsigned int sysctl_sched_migration_cost; 57extern unsigned int sysctl_sched_migration_cost;
58extern unsigned int sysctl_sched_nr_migrate; 58extern unsigned int sysctl_sched_nr_migrate;
59extern unsigned int sysctl_sched_time_avg; 59extern unsigned int sysctl_sched_time_avg;
60extern unsigned int sysctl_timer_migration;
61extern unsigned int sysctl_sched_shares_window; 60extern unsigned int sysctl_sched_shares_window;
62 61
63int sched_proc_update_handler(struct ctl_table *table, int write, 62int sched_proc_update_handler(struct ctl_table *table, int write,
64 void __user *buffer, size_t *length, 63 void __user *buffer, size_t *length,
65 loff_t *ppos); 64 loff_t *ppos);
66#endif 65#endif
67#ifdef CONFIG_SCHED_DEBUG
68static inline unsigned int get_sysctl_timer_migration(void)
69{
70 return sysctl_timer_migration;
71}
72#else
73static inline unsigned int get_sysctl_timer_migration(void)
74{
75 return 1;
76}
77#endif
78 66
79/* 67/*
80 * control realtime throttling: 68 * control realtime throttling:
diff --git a/include/linux/timer.h b/include/linux/timer.h
index ff0689b6e297..61aa61dc410c 100644
--- a/include/linux/timer.h
+++ b/include/linux/timer.h
@@ -238,6 +238,15 @@ extern void run_local_timers(void);
238struct hrtimer; 238struct hrtimer;
239extern enum hrtimer_restart it_real_fn(struct hrtimer *); 239extern enum hrtimer_restart it_real_fn(struct hrtimer *);
240 240
241#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
242#include <linux/sysctl.h>
243
244extern unsigned int sysctl_timer_migration;
245int timer_migration_handler(struct ctl_table *table, int write,
246 void __user *buffer, size_t *lenp,
247 loff_t *ppos);
248#endif
249
241unsigned long __round_jiffies(unsigned long j, int cpu); 250unsigned long __round_jiffies(unsigned long j, int cpu);
242unsigned long __round_jiffies_relative(unsigned long j, int cpu); 251unsigned long __round_jiffies_relative(unsigned long j, int cpu);
243unsigned long round_jiffies(unsigned long j); 252unsigned long round_jiffies(unsigned long j);
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 0ef80a0bbabb..d72fa24f2312 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -1432,8 +1432,6 @@ module_param(rcu_idle_gp_delay, int, 0644);
1432static int rcu_idle_lazy_gp_delay = RCU_IDLE_LAZY_GP_DELAY; 1432static int rcu_idle_lazy_gp_delay = RCU_IDLE_LAZY_GP_DELAY;
1433module_param(rcu_idle_lazy_gp_delay, int, 0644); 1433module_param(rcu_idle_lazy_gp_delay, int, 0644);
1434 1434
1435extern int tick_nohz_active;
1436
1437/* 1435/*
1438 * Try to advance callbacks for all flavors of RCU on the current CPU, but 1436 * Try to advance callbacks for all flavors of RCU on the current CPU, but
1439 * only if it has been awhile since the last time we did so. Afterwards, 1437 * only if it has been awhile since the last time we did so. Afterwards,
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index ecb7c4216350..e9f25ce70c77 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -572,13 +572,12 @@ void resched_cpu(int cpu)
572 * selecting an idle cpu will add more delays to the timers than intended 572 * selecting an idle cpu will add more delays to the timers than intended
573 * (as that cpu's timer base may not be uptodate wrt jiffies etc). 573 * (as that cpu's timer base may not be uptodate wrt jiffies etc).
574 */ 574 */
575int get_nohz_timer_target(int pinned) 575int get_nohz_timer_target(void)
576{ 576{
577 int cpu = smp_processor_id(); 577 int i, cpu = smp_processor_id();
578 int i;
579 struct sched_domain *sd; 578 struct sched_domain *sd;
580 579
581 if (pinned || !get_sysctl_timer_migration() || !idle_cpu(cpu)) 580 if (!idle_cpu(cpu))
582 return cpu; 581 return cpu;
583 582
584 rcu_read_lock(); 583 rcu_read_lock();
@@ -7050,8 +7049,6 @@ void __init sched_init_smp(void)
7050} 7049}
7051#endif /* CONFIG_SMP */ 7050#endif /* CONFIG_SMP */
7052 7051
7053const_debug unsigned int sysctl_timer_migration = 1;
7054
7055int in_sched_functions(unsigned long addr) 7052int in_sched_functions(unsigned long addr)
7056{ 7053{
7057 return in_lock_functions(addr) || 7054 return in_lock_functions(addr) ||
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 2082b1a88fb9..b13e9d2de302 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -349,15 +349,6 @@ static struct ctl_table kern_table[] = {
349 .mode = 0644, 349 .mode = 0644,
350 .proc_handler = proc_dointvec, 350 .proc_handler = proc_dointvec,
351 }, 351 },
352 {
353 .procname = "timer_migration",
354 .data = &sysctl_timer_migration,
355 .maxlen = sizeof(unsigned int),
356 .mode = 0644,
357 .proc_handler = proc_dointvec_minmax,
358 .extra1 = &zero,
359 .extra2 = &one,
360 },
361#endif /* CONFIG_SMP */ 352#endif /* CONFIG_SMP */
362#ifdef CONFIG_NUMA_BALANCING 353#ifdef CONFIG_NUMA_BALANCING
363 { 354 {
@@ -1132,6 +1123,15 @@ static struct ctl_table kern_table[] = {
1132 .extra1 = &zero, 1123 .extra1 = &zero,
1133 .extra2 = &one, 1124 .extra2 = &one,
1134 }, 1125 },
1126#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
1127 {
1128 .procname = "timer_migration",
1129 .data = &sysctl_timer_migration,
1130 .maxlen = sizeof(unsigned int),
1131 .mode = 0644,
1132 .proc_handler = timer_migration_handler,
1133 },
1134#endif
1135 { } 1135 { }
1136}; 1136};
1137 1137
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index f026413de4d6..6115f4df119b 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -177,6 +177,24 @@ hrtimer_check_target(struct hrtimer *timer, struct hrtimer_clock_base *new_base)
177#endif 177#endif
178} 178}
179 179
180#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
181static inline
182struct hrtimer_cpu_base *get_target_base(struct hrtimer_cpu_base *base,
183 int pinned)
184{
185 if (pinned || !base->migration_enabled)
186 return this_cpu_ptr(&hrtimer_bases);
187 return &per_cpu(hrtimer_bases, get_nohz_timer_target());
188}
189#else
190static inline
191struct hrtimer_cpu_base *get_target_base(struct hrtimer_cpu_base *base,
192 int pinned)
193{
194 return this_cpu_ptr(&hrtimer_bases);
195}
196#endif
197
180/* 198/*
181 * Switch the timer base to the current CPU when possible. 199 * Switch the timer base to the current CPU when possible.
182 */ 200 */
@@ -184,14 +202,13 @@ static inline struct hrtimer_clock_base *
184switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base, 202switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base,
185 int pinned) 203 int pinned)
186{ 204{
205 struct hrtimer_cpu_base *new_cpu_base, *this_base;
187 struct hrtimer_clock_base *new_base; 206 struct hrtimer_clock_base *new_base;
188 struct hrtimer_cpu_base *new_cpu_base;
189 int this_cpu = smp_processor_id();
190 int cpu = get_nohz_timer_target(pinned);
191 int basenum = base->index; 207 int basenum = base->index;
192 208
209 this_base = this_cpu_ptr(&hrtimer_bases);
210 new_cpu_base = get_target_base(this_base, pinned);
193again: 211again:
194 new_cpu_base = &per_cpu(hrtimer_bases, cpu);
195 new_base = &new_cpu_base->clock_base[basenum]; 212 new_base = &new_cpu_base->clock_base[basenum];
196 213
197 if (base != new_base) { 214 if (base != new_base) {
@@ -212,17 +229,19 @@ again:
212 raw_spin_unlock(&base->cpu_base->lock); 229 raw_spin_unlock(&base->cpu_base->lock);
213 raw_spin_lock(&new_base->cpu_base->lock); 230 raw_spin_lock(&new_base->cpu_base->lock);
214 231
215 if (cpu != this_cpu && hrtimer_check_target(timer, new_base)) { 232 if (new_cpu_base != this_base &&
216 cpu = this_cpu; 233 hrtimer_check_target(timer, new_base)) {
217 raw_spin_unlock(&new_base->cpu_base->lock); 234 raw_spin_unlock(&new_base->cpu_base->lock);
218 raw_spin_lock(&base->cpu_base->lock); 235 raw_spin_lock(&base->cpu_base->lock);
236 new_cpu_base = this_base;
219 timer->base = base; 237 timer->base = base;
220 goto again; 238 goto again;
221 } 239 }
222 timer->base = new_base; 240 timer->base = new_base;
223 } else { 241 } else {
224 if (cpu != this_cpu && hrtimer_check_target(timer, new_base)) { 242 if (new_cpu_base != this_base &&
225 cpu = this_cpu; 243 hrtimer_check_target(timer, new_base)) {
244 new_cpu_base = this_base;
226 goto again; 245 goto again;
227 } 246 }
228 } 247 }
diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h
index ec2208aabdd1..2edde84744df 100644
--- a/kernel/time/tick-internal.h
+++ b/kernel/time/tick-internal.h
@@ -149,4 +149,18 @@ extern void tick_nohz_init(void);
149static inline void tick_nohz_init(void) { } 149static inline void tick_nohz_init(void) { }
150#endif 150#endif
151 151
152#ifdef CONFIG_NO_HZ_COMMON
153extern unsigned long tick_nohz_active;
154#else
155#define tick_nohz_active (0)
156#endif
157
158#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
159extern void timers_update_migration(void);
160#else
161static inline void timers_update_migration(void) { }
162#endif
163
164DECLARE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases);
165
152extern u64 get_next_timer_interrupt(unsigned long basej, u64 basem); 166extern u64 get_next_timer_interrupt(unsigned long basej, u64 basem);
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 812f7a3b9898..b1cb01699355 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -399,7 +399,7 @@ void __init tick_nohz_init(void)
399 * NO HZ enabled ? 399 * NO HZ enabled ?
400 */ 400 */
401static int tick_nohz_enabled __read_mostly = 1; 401static int tick_nohz_enabled __read_mostly = 1;
402int tick_nohz_active __read_mostly; 402unsigned long tick_nohz_active __read_mostly;
403/* 403/*
404 * Enable / Disable tickless mode 404 * Enable / Disable tickless mode
405 */ 405 */
@@ -956,6 +956,16 @@ static void tick_nohz_handler(struct clock_event_device *dev)
956 tick_program_event(hrtimer_get_expires(&ts->sched_timer), 1); 956 tick_program_event(hrtimer_get_expires(&ts->sched_timer), 1);
957} 957}
958 958
959static inline void tick_nohz_activate(struct tick_sched *ts, int mode)
960{
961 if (!tick_nohz_enabled)
962 return;
963 ts->nohz_mode = mode;
964 /* One update is enough */
965 if (!test_and_set_bit(0, &tick_nohz_active))
966 timers_update_migration();
967}
968
959/** 969/**
960 * tick_nohz_switch_to_nohz - switch to nohz mode 970 * tick_nohz_switch_to_nohz - switch to nohz mode
961 */ 971 */
@@ -970,9 +980,6 @@ static void tick_nohz_switch_to_nohz(void)
970 if (tick_switch_to_oneshot(tick_nohz_handler)) 980 if (tick_switch_to_oneshot(tick_nohz_handler))
971 return; 981 return;
972 982
973 tick_nohz_active = 1;
974 ts->nohz_mode = NOHZ_MODE_LOWRES;
975
976 /* 983 /*
977 * Recycle the hrtimer in ts, so we can share the 984 * Recycle the hrtimer in ts, so we can share the
978 * hrtimer_forward with the highres code. 985 * hrtimer_forward with the highres code.
@@ -984,6 +991,7 @@ static void tick_nohz_switch_to_nohz(void)
984 hrtimer_forward_now(&ts->sched_timer, tick_period); 991 hrtimer_forward_now(&ts->sched_timer, tick_period);
985 hrtimer_set_expires(&ts->sched_timer, next); 992 hrtimer_set_expires(&ts->sched_timer, next);
986 tick_program_event(next, 1); 993 tick_program_event(next, 1);
994 tick_nohz_activate(ts, NOHZ_MODE_LOWRES);
987} 995}
988 996
989/* 997/*
@@ -1035,6 +1043,7 @@ static inline void tick_nohz_irq_enter(void)
1035 1043
1036static inline void tick_nohz_switch_to_nohz(void) { } 1044static inline void tick_nohz_switch_to_nohz(void) { }
1037static inline void tick_nohz_irq_enter(void) { } 1045static inline void tick_nohz_irq_enter(void) { }
1046static inline void tick_nohz_activate(struct tick_sched *ts, int mode) { }
1038 1047
1039#endif /* CONFIG_NO_HZ_COMMON */ 1048#endif /* CONFIG_NO_HZ_COMMON */
1040 1049
@@ -1117,13 +1126,7 @@ void tick_setup_sched_timer(void)
1117 1126
1118 hrtimer_forward(&ts->sched_timer, now, tick_period); 1127 hrtimer_forward(&ts->sched_timer, now, tick_period);
1119 hrtimer_start_expires(&ts->sched_timer, HRTIMER_MODE_ABS_PINNED); 1128 hrtimer_start_expires(&ts->sched_timer, HRTIMER_MODE_ABS_PINNED);
1120 1129 tick_nohz_activate(ts, NOHZ_MODE_HIGHRES);
1121#ifdef CONFIG_NO_HZ_COMMON
1122 if (tick_nohz_enabled) {
1123 ts->nohz_mode = NOHZ_MODE_HIGHRES;
1124 tick_nohz_active = 1;
1125 }
1126#endif
1127} 1130}
1128#endif /* HIGH_RES_TIMERS */ 1131#endif /* HIGH_RES_TIMERS */
1129 1132
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index 3398d93c74a7..343142ed996a 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -85,6 +85,7 @@ struct tvec_base {
85 unsigned long active_timers; 85 unsigned long active_timers;
86 unsigned long all_timers; 86 unsigned long all_timers;
87 int cpu; 87 int cpu;
88 bool migration_enabled;
88 struct tvec_root tv1; 89 struct tvec_root tv1;
89 struct tvec tv2; 90 struct tvec tv2;
90 struct tvec tv3; 91 struct tvec tv3;
@@ -95,6 +96,54 @@ struct tvec_base {
95 96
96static DEFINE_PER_CPU(struct tvec_base, tvec_bases); 97static DEFINE_PER_CPU(struct tvec_base, tvec_bases);
97 98
99#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
100unsigned int sysctl_timer_migration = 1;
101
102void timers_update_migration(void)
103{
104 bool on = sysctl_timer_migration && tick_nohz_active;
105 unsigned int cpu;
106
107 /* Avoid the loop, if nothing to update */
108 if (this_cpu_read(tvec_bases.migration_enabled) == on)
109 return;
110
111 for_each_possible_cpu(cpu) {
112 per_cpu(tvec_bases.migration_enabled, cpu) = on;
113 per_cpu(hrtimer_bases.migration_enabled, cpu) = on;
114 }
115}
116
117int timer_migration_handler(struct ctl_table *table, int write,
118 void __user *buffer, size_t *lenp,
119 loff_t *ppos)
120{
121 static DEFINE_MUTEX(mutex);
122 int ret;
123
124 mutex_lock(&mutex);
125 ret = proc_dointvec(table, write, buffer, lenp, ppos);
126 if (!ret && write)
127 timers_update_migration();
128 mutex_unlock(&mutex);
129 return ret;
130}
131
132static inline struct tvec_base *get_target_base(struct tvec_base *base,
133 int pinned)
134{
135 if (pinned || !base->migration_enabled)
136 return this_cpu_ptr(&tvec_bases);
137 return per_cpu_ptr(&tvec_bases, get_nohz_timer_target());
138}
139#else
140static inline struct tvec_base *get_target_base(struct tvec_base *base,
141 int pinned)
142{
143 return this_cpu_ptr(&tvec_bases);
144}
145#endif
146
98static unsigned long round_jiffies_common(unsigned long j, int cpu, 147static unsigned long round_jiffies_common(unsigned long j, int cpu,
99 bool force_up) 148 bool force_up)
100{ 149{
@@ -716,11 +765,11 @@ static struct tvec_base *lock_timer_base(struct timer_list *timer,
716 765
717static inline int 766static inline int
718__mod_timer(struct timer_list *timer, unsigned long expires, 767__mod_timer(struct timer_list *timer, unsigned long expires,
719 bool pending_only, int pinned) 768 bool pending_only, int pinned)
720{ 769{
721 struct tvec_base *base, *new_base; 770 struct tvec_base *base, *new_base;
722 unsigned long flags; 771 unsigned long flags;
723 int ret = 0 , cpu; 772 int ret = 0;
724 773
725 timer_stats_timer_set_start_info(timer); 774 timer_stats_timer_set_start_info(timer);
726 BUG_ON(!timer->function); 775 BUG_ON(!timer->function);
@@ -733,8 +782,7 @@ __mod_timer(struct timer_list *timer, unsigned long expires,
733 782
734 debug_activate(timer, expires); 783 debug_activate(timer, expires);
735 784
736 cpu = get_nohz_timer_target(pinned); 785 new_base = get_target_base(base, pinned);
737 new_base = per_cpu_ptr(&tvec_bases, cpu);
738 786
739 if (base != new_base) { 787 if (base != new_base) {
740 /* 788 /*
@@ -751,7 +799,8 @@ __mod_timer(struct timer_list *timer, unsigned long expires,
751 spin_unlock(&base->lock); 799 spin_unlock(&base->lock);
752 base = new_base; 800 base = new_base;
753 spin_lock(&base->lock); 801 spin_lock(&base->lock);
754 timer->flags = (timer->flags & ~TIMER_BASEMASK) | cpu; 802 timer->flags &= ~TIMER_BASEMASK;
803 timer->flags |= base->cpu;
755 } 804 }
756 } 805 }
757 806
diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c
index 1327004429be..a4536e1e3e2a 100644
--- a/kernel/time/timer_list.c
+++ b/kernel/time/timer_list.c
@@ -29,8 +29,6 @@ struct timer_list_iter {
29 29
30typedef void (*print_fn_t)(struct seq_file *m, unsigned int *classes); 30typedef void (*print_fn_t)(struct seq_file *m, unsigned int *classes);
31 31
32DECLARE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases);
33
34/* 32/*
35 * This allows printing both to /proc/timer_list and 33 * This allows printing both to /proc/timer_list and
36 * to the console (on SysRq-Q): 34 * to the console (on SysRq-Q):