diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/hrtimer.c | 58 | ||||
-rw-r--r-- | kernel/module.c | 2 | ||||
-rw-r--r-- | kernel/printk.c | 33 | ||||
-rw-r--r-- | kernel/sched.c | 11 | ||||
-rw-r--r-- | kernel/sysctl.c | 8 | ||||
-rw-r--r-- | kernel/time/clockevents.c | 14 | ||||
-rw-r--r-- | kernel/time/clocksource.c | 20 | ||||
-rw-r--r-- | kernel/time/tick-broadcast.c | 2 | ||||
-rw-r--r-- | kernel/time/tick-oneshot.c | 17 | ||||
-rw-r--r-- | kernel/time/tick-sched.c | 7 | ||||
-rw-r--r-- | kernel/timer.c | 51 | ||||
-rw-r--r-- | kernel/trace/Kconfig | 10 | ||||
-rw-r--r-- | kernel/trace/trace.c | 23 | ||||
-rw-r--r-- | kernel/trace/trace_sysprof.c | 3 | ||||
-rw-r--r-- | kernel/user.c | 67 |
15 files changed, 251 insertions, 75 deletions
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index cb8a15c19583..b675a67c9ac3 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c | |||
@@ -43,6 +43,8 @@ | |||
43 | #include <linux/seq_file.h> | 43 | #include <linux/seq_file.h> |
44 | #include <linux/err.h> | 44 | #include <linux/err.h> |
45 | #include <linux/debugobjects.h> | 45 | #include <linux/debugobjects.h> |
46 | #include <linux/sched.h> | ||
47 | #include <linux/timer.h> | ||
46 | 48 | ||
47 | #include <asm/uaccess.h> | 49 | #include <asm/uaccess.h> |
48 | 50 | ||
@@ -193,12 +195,24 @@ struct hrtimer_clock_base *lock_hrtimer_base(const struct hrtimer *timer, | |||
193 | * Switch the timer base to the current CPU when possible. | 195 | * Switch the timer base to the current CPU when possible. |
194 | */ | 196 | */ |
195 | static inline struct hrtimer_clock_base * | 197 | static inline struct hrtimer_clock_base * |
196 | switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base) | 198 | switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base, |
199 | int pinned) | ||
197 | { | 200 | { |
198 | struct hrtimer_clock_base *new_base; | 201 | struct hrtimer_clock_base *new_base; |
199 | struct hrtimer_cpu_base *new_cpu_base; | 202 | struct hrtimer_cpu_base *new_cpu_base; |
203 | int cpu, preferred_cpu = -1; | ||
204 | |||
205 | cpu = smp_processor_id(); | ||
206 | #if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP) | ||
207 | if (!pinned && get_sysctl_timer_migration() && idle_cpu(cpu)) { | ||
208 | preferred_cpu = get_nohz_load_balancer(); | ||
209 | if (preferred_cpu >= 0) | ||
210 | cpu = preferred_cpu; | ||
211 | } | ||
212 | #endif | ||
200 | 213 | ||
201 | new_cpu_base = &__get_cpu_var(hrtimer_bases); | 214 | again: |
215 | new_cpu_base = &per_cpu(hrtimer_bases, cpu); | ||
202 | new_base = &new_cpu_base->clock_base[base->index]; | 216 | new_base = &new_cpu_base->clock_base[base->index]; |
203 | 217 | ||
204 | if (base != new_base) { | 218 | if (base != new_base) { |
@@ -218,6 +232,40 @@ switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base) | |||
218 | timer->base = NULL; | 232 | timer->base = NULL; |
219 | spin_unlock(&base->cpu_base->lock); | 233 | spin_unlock(&base->cpu_base->lock); |
220 | spin_lock(&new_base->cpu_base->lock); | 234 | spin_lock(&new_base->cpu_base->lock); |
235 | |||
236 | /* Optimized away for NOHZ=n SMP=n */ | ||
237 | if (cpu == preferred_cpu) { | ||
238 | /* Calculate clock monotonic expiry time */ | ||
239 | #ifdef CONFIG_HIGH_RES_TIMERS | ||
240 | ktime_t expires = ktime_sub(hrtimer_get_expires(timer), | ||
241 | new_base->offset); | ||
242 | #else | ||
243 | ktime_t expires = hrtimer_get_expires(timer); | ||
244 | #endif | ||
245 | |||
246 | /* | ||
247 | * Get the next event on target cpu from the | ||
248 | * clock events layer. | ||
249 | * This covers the highres=off nohz=on case as well. | ||
250 | */ | ||
251 | ktime_t next = clockevents_get_next_event(cpu); | ||
252 | |||
253 | ktime_t delta = ktime_sub(expires, next); | ||
254 | |||
255 | /* | ||
256 | * We do not migrate the timer when it is expiring | ||
257 | * before the next event on the target cpu because | ||
258 | * we cannot reprogram the target cpu hardware and | ||
259 | * we would cause it to fire late. | ||
260 | */ | ||
261 | if (delta.tv64 < 0) { | ||
262 | cpu = smp_processor_id(); | ||
263 | spin_unlock(&new_base->cpu_base->lock); | ||
264 | spin_lock(&base->cpu_base->lock); | ||
265 | timer->base = base; | ||
266 | goto again; | ||
267 | } | ||
268 | } | ||
221 | timer->base = new_base; | 269 | timer->base = new_base; |
222 | } | 270 | } |
223 | return new_base; | 271 | return new_base; |
@@ -235,7 +283,7 @@ lock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags) | |||
235 | return base; | 283 | return base; |
236 | } | 284 | } |
237 | 285 | ||
238 | # define switch_hrtimer_base(t, b) (b) | 286 | # define switch_hrtimer_base(t, b, p) (b) |
239 | 287 | ||
240 | #endif /* !CONFIG_SMP */ | 288 | #endif /* !CONFIG_SMP */ |
241 | 289 | ||
@@ -907,9 +955,9 @@ int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, | |||
907 | ret = remove_hrtimer(timer, base); | 955 | ret = remove_hrtimer(timer, base); |
908 | 956 | ||
909 | /* Switch the timer base, if necessary: */ | 957 | /* Switch the timer base, if necessary: */ |
910 | new_base = switch_hrtimer_base(timer, base); | 958 | new_base = switch_hrtimer_base(timer, base, mode & HRTIMER_MODE_PINNED); |
911 | 959 | ||
912 | if (mode == HRTIMER_MODE_REL) { | 960 | if (mode & HRTIMER_MODE_REL) { |
913 | tim = ktime_add_safe(tim, new_base->get_time()); | 961 | tim = ktime_add_safe(tim, new_base->get_time()); |
914 | /* | 962 | /* |
915 | * CONFIG_TIME_LOW_RES is a temporary way for architectures | 963 | * CONFIG_TIME_LOW_RES is a temporary way for architectures |
diff --git a/kernel/module.c b/kernel/module.c index e4ab36ce7672..215aaab09e91 100644 --- a/kernel/module.c +++ b/kernel/module.c | |||
@@ -2899,7 +2899,7 @@ void print_modules(void) | |||
2899 | struct module *mod; | 2899 | struct module *mod; |
2900 | char buf[8]; | 2900 | char buf[8]; |
2901 | 2901 | ||
2902 | printk("Modules linked in:"); | 2902 | printk(KERN_DEFAULT "Modules linked in:"); |
2903 | /* Most callers should already have preempt disabled, but make sure */ | 2903 | /* Most callers should already have preempt disabled, but make sure */ |
2904 | preempt_disable(); | 2904 | preempt_disable(); |
2905 | list_for_each_entry_rcu(mod, &modules, list) | 2905 | list_for_each_entry_rcu(mod, &modules, list) |
diff --git a/kernel/printk.c b/kernel/printk.c index 5052b5497c67..b4d97b54c1ec 100644 --- a/kernel/printk.c +++ b/kernel/printk.c | |||
@@ -687,20 +687,35 @@ asmlinkage int vprintk(const char *fmt, va_list args) | |||
687 | sizeof(printk_buf) - printed_len, fmt, args); | 687 | sizeof(printk_buf) - printed_len, fmt, args); |
688 | 688 | ||
689 | 689 | ||
690 | p = printk_buf; | ||
691 | |||
692 | /* Do we have a loglevel in the string? */ | ||
693 | if (p[0] == '<') { | ||
694 | unsigned char c = p[1]; | ||
695 | if (c && p[2] == '>') { | ||
696 | switch (c) { | ||
697 | case '0' ... '7': /* loglevel */ | ||
698 | current_log_level = c - '0'; | ||
699 | /* Fallthrough - make sure we're on a new line */ | ||
700 | case 'd': /* KERN_DEFAULT */ | ||
701 | if (!new_text_line) { | ||
702 | emit_log_char('\n'); | ||
703 | new_text_line = 1; | ||
704 | } | ||
705 | /* Fallthrough - skip the loglevel */ | ||
706 | case 'c': /* KERN_CONT */ | ||
707 | p += 3; | ||
708 | break; | ||
709 | } | ||
710 | } | ||
711 | } | ||
712 | |||
690 | /* | 713 | /* |
691 | * Copy the output into log_buf. If the caller didn't provide | 714 | * Copy the output into log_buf. If the caller didn't provide |
692 | * appropriate log level tags, we insert them here | 715 | * appropriate log level tags, we insert them here |
693 | */ | 716 | */ |
694 | for (p = printk_buf; *p; p++) { | 717 | for ( ; *p; p++) { |
695 | if (new_text_line) { | 718 | if (new_text_line) { |
696 | /* If a token, set current_log_level and skip over */ | ||
697 | if (p[0] == '<' && p[1] >= '0' && p[1] <= '7' && | ||
698 | p[2] == '>') { | ||
699 | current_log_level = p[1] - '0'; | ||
700 | p += 3; | ||
701 | printed_len -= 3; | ||
702 | } | ||
703 | |||
704 | /* Always output the token */ | 719 | /* Always output the token */ |
705 | emit_log_char('<'); | 720 | emit_log_char('<'); |
706 | emit_log_char(current_log_level + '0'); | 721 | emit_log_char(current_log_level + '0'); |
diff --git a/kernel/sched.c b/kernel/sched.c index 8ec9d13140be..8fb88a906aaa 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -240,7 +240,7 @@ static void start_rt_bandwidth(struct rt_bandwidth *rt_b) | |||
240 | hard = hrtimer_get_expires(&rt_b->rt_period_timer); | 240 | hard = hrtimer_get_expires(&rt_b->rt_period_timer); |
241 | delta = ktime_to_ns(ktime_sub(hard, soft)); | 241 | delta = ktime_to_ns(ktime_sub(hard, soft)); |
242 | __hrtimer_start_range_ns(&rt_b->rt_period_timer, soft, delta, | 242 | __hrtimer_start_range_ns(&rt_b->rt_period_timer, soft, delta, |
243 | HRTIMER_MODE_ABS, 0); | 243 | HRTIMER_MODE_ABS_PINNED, 0); |
244 | } | 244 | } |
245 | spin_unlock(&rt_b->rt_runtime_lock); | 245 | spin_unlock(&rt_b->rt_runtime_lock); |
246 | } | 246 | } |
@@ -1155,7 +1155,7 @@ static __init void init_hrtick(void) | |||
1155 | static void hrtick_start(struct rq *rq, u64 delay) | 1155 | static void hrtick_start(struct rq *rq, u64 delay) |
1156 | { | 1156 | { |
1157 | __hrtimer_start_range_ns(&rq->hrtick_timer, ns_to_ktime(delay), 0, | 1157 | __hrtimer_start_range_ns(&rq->hrtick_timer, ns_to_ktime(delay), 0, |
1158 | HRTIMER_MODE_REL, 0); | 1158 | HRTIMER_MODE_REL_PINNED, 0); |
1159 | } | 1159 | } |
1160 | 1160 | ||
1161 | static inline void init_hrtick(void) | 1161 | static inline void init_hrtick(void) |
@@ -4397,6 +4397,11 @@ static struct { | |||
4397 | .load_balancer = ATOMIC_INIT(-1), | 4397 | .load_balancer = ATOMIC_INIT(-1), |
4398 | }; | 4398 | }; |
4399 | 4399 | ||
4400 | int get_nohz_load_balancer(void) | ||
4401 | { | ||
4402 | return atomic_read(&nohz.load_balancer); | ||
4403 | } | ||
4404 | |||
4400 | #if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT) | 4405 | #if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT) |
4401 | /** | 4406 | /** |
4402 | * lowest_flag_domain - Return lowest sched_domain containing flag. | 4407 | * lowest_flag_domain - Return lowest sched_domain containing flag. |
@@ -9029,6 +9034,8 @@ void __init sched_init_smp(void) | |||
9029 | } | 9034 | } |
9030 | #endif /* CONFIG_SMP */ | 9035 | #endif /* CONFIG_SMP */ |
9031 | 9036 | ||
9037 | const_debug unsigned int sysctl_timer_migration = 1; | ||
9038 | |||
9032 | int in_sched_functions(unsigned long addr) | 9039 | int in_sched_functions(unsigned long addr) |
9033 | { | 9040 | { |
9034 | return in_lock_functions(addr) || | 9041 | return in_lock_functions(addr) || |
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 9ef80bba3509..f5c76b6cd616 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
@@ -329,6 +329,14 @@ static struct ctl_table kern_table[] = { | |||
329 | .mode = 0644, | 329 | .mode = 0644, |
330 | .proc_handler = &proc_dointvec, | 330 | .proc_handler = &proc_dointvec, |
331 | }, | 331 | }, |
332 | { | ||
333 | .ctl_name = CTL_UNNUMBERED, | ||
334 | .procname = "timer_migration", | ||
335 | .data = &sysctl_timer_migration, | ||
336 | .maxlen = sizeof(unsigned int), | ||
337 | .mode = 0644, | ||
338 | .proc_handler = &proc_dointvec, | ||
339 | }, | ||
332 | #endif | 340 | #endif |
333 | { | 341 | { |
334 | .ctl_name = CTL_UNNUMBERED, | 342 | .ctl_name = CTL_UNNUMBERED, |
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index d13be216a790..1ad6dd461119 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c | |||
@@ -18,6 +18,7 @@ | |||
18 | #include <linux/notifier.h> | 18 | #include <linux/notifier.h> |
19 | #include <linux/smp.h> | 19 | #include <linux/smp.h> |
20 | #include <linux/sysdev.h> | 20 | #include <linux/sysdev.h> |
21 | #include <linux/tick.h> | ||
21 | 22 | ||
22 | /* The registered clock event devices */ | 23 | /* The registered clock event devices */ |
23 | static LIST_HEAD(clockevent_devices); | 24 | static LIST_HEAD(clockevent_devices); |
@@ -54,6 +55,7 @@ unsigned long clockevent_delta2ns(unsigned long latch, | |||
54 | 55 | ||
55 | return (unsigned long) clc; | 56 | return (unsigned long) clc; |
56 | } | 57 | } |
58 | EXPORT_SYMBOL_GPL(clockevent_delta2ns); | ||
57 | 59 | ||
58 | /** | 60 | /** |
59 | * clockevents_set_mode - set the operating mode of a clock event device | 61 | * clockevents_set_mode - set the operating mode of a clock event device |
@@ -187,6 +189,7 @@ void clockevents_register_device(struct clock_event_device *dev) | |||
187 | 189 | ||
188 | spin_unlock(&clockevents_lock); | 190 | spin_unlock(&clockevents_lock); |
189 | } | 191 | } |
192 | EXPORT_SYMBOL_GPL(clockevents_register_device); | ||
190 | 193 | ||
191 | /* | 194 | /* |
192 | * Noop handler when we shut down an event device | 195 | * Noop handler when we shut down an event device |
@@ -251,4 +254,15 @@ void clockevents_notify(unsigned long reason, void *arg) | |||
251 | spin_unlock(&clockevents_lock); | 254 | spin_unlock(&clockevents_lock); |
252 | } | 255 | } |
253 | EXPORT_SYMBOL_GPL(clockevents_notify); | 256 | EXPORT_SYMBOL_GPL(clockevents_notify); |
257 | |||
258 | ktime_t clockevents_get_next_event(int cpu) | ||
259 | { | ||
260 | struct tick_device *td; | ||
261 | struct clock_event_device *dev; | ||
262 | |||
263 | td = &per_cpu(tick_cpu_device, cpu); | ||
264 | dev = td->evtdev; | ||
265 | |||
266 | return dev->next_event; | ||
267 | } | ||
254 | #endif | 268 | #endif |
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index 80189f6f1c5a..592bf584d1d2 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c | |||
@@ -509,6 +509,18 @@ static ssize_t sysfs_override_clocksource(struct sys_device *dev, | |||
509 | } | 509 | } |
510 | } | 510 | } |
511 | 511 | ||
512 | /* | ||
513 | * Check to make sure we don't switch to a non-highres capable | ||
514 | * clocksource if the tick code is in oneshot mode (highres or nohz) | ||
515 | */ | ||
516 | if (tick_oneshot_mode_active() && | ||
517 | !(ovr->flags & CLOCK_SOURCE_VALID_FOR_HRES)) { | ||
518 | printk(KERN_WARNING "%s clocksource is not HRT compatible. " | ||
519 | "Cannot switch while in HRT/NOHZ mode\n", ovr->name); | ||
520 | ovr = NULL; | ||
521 | override_name[0] = 0; | ||
522 | } | ||
523 | |||
512 | /* Reselect, when the override name has changed */ | 524 | /* Reselect, when the override name has changed */ |
513 | if (ovr != clocksource_override) { | 525 | if (ovr != clocksource_override) { |
514 | clocksource_override = ovr; | 526 | clocksource_override = ovr; |
@@ -537,7 +549,13 @@ sysfs_show_available_clocksources(struct sys_device *dev, | |||
537 | 549 | ||
538 | spin_lock_irq(&clocksource_lock); | 550 | spin_lock_irq(&clocksource_lock); |
539 | list_for_each_entry(src, &clocksource_list, list) { | 551 | list_for_each_entry(src, &clocksource_list, list) { |
540 | count += snprintf(buf + count, | 552 | /* |
553 | * Don't show non-HRES clocksource if the tick code is | ||
554 | * in one shot mode (highres=on or nohz=on) | ||
555 | */ | ||
556 | if (!tick_oneshot_mode_active() || | ||
557 | (src->flags & CLOCK_SOURCE_VALID_FOR_HRES)) | ||
558 | count += snprintf(buf + count, | ||
541 | max((ssize_t)PAGE_SIZE - count, (ssize_t)0), | 559 | max((ssize_t)PAGE_SIZE - count, (ssize_t)0), |
542 | "%s ", src->name); | 560 | "%s ", src->name); |
543 | } | 561 | } |
diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index 118a3b3b3f9a..877dbedc3118 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c | |||
@@ -27,7 +27,7 @@ | |||
27 | * timer stops in C3 state. | 27 | * timer stops in C3 state. |
28 | */ | 28 | */ |
29 | 29 | ||
30 | struct tick_device tick_broadcast_device; | 30 | static struct tick_device tick_broadcast_device; |
31 | /* FIXME: Use cpumask_var_t. */ | 31 | /* FIXME: Use cpumask_var_t. */ |
32 | static DECLARE_BITMAP(tick_broadcast_mask, NR_CPUS); | 32 | static DECLARE_BITMAP(tick_broadcast_mask, NR_CPUS); |
33 | static DECLARE_BITMAP(tmpmask, NR_CPUS); | 33 | static DECLARE_BITMAP(tmpmask, NR_CPUS); |
diff --git a/kernel/time/tick-oneshot.c b/kernel/time/tick-oneshot.c index 2e8de678e767..a96c0e2b89cf 100644 --- a/kernel/time/tick-oneshot.c +++ b/kernel/time/tick-oneshot.c | |||
@@ -128,6 +128,23 @@ int tick_switch_to_oneshot(void (*handler)(struct clock_event_device *)) | |||
128 | return 0; | 128 | return 0; |
129 | } | 129 | } |
130 | 130 | ||
131 | /** | ||
132 | * tick_check_oneshot_mode - check whether the system is in oneshot mode | ||
133 | * | ||
134 | * returns 1 when either nohz or highres are enabled. otherwise 0. | ||
135 | */ | ||
136 | int tick_oneshot_mode_active(void) | ||
137 | { | ||
138 | unsigned long flags; | ||
139 | int ret; | ||
140 | |||
141 | local_irq_save(flags); | ||
142 | ret = __get_cpu_var(tick_cpu_device).mode == TICKDEV_MODE_ONESHOT; | ||
143 | local_irq_restore(flags); | ||
144 | |||
145 | return ret; | ||
146 | } | ||
147 | |||
131 | #ifdef CONFIG_HIGH_RES_TIMERS | 148 | #ifdef CONFIG_HIGH_RES_TIMERS |
132 | /** | 149 | /** |
133 | * tick_init_highres - switch to high resolution mode | 150 | * tick_init_highres - switch to high resolution mode |
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index d3f1ef4d5cbe..2aff39c6f10c 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c | |||
@@ -349,7 +349,7 @@ void tick_nohz_stop_sched_tick(int inidle) | |||
349 | 349 | ||
350 | if (ts->nohz_mode == NOHZ_MODE_HIGHRES) { | 350 | if (ts->nohz_mode == NOHZ_MODE_HIGHRES) { |
351 | hrtimer_start(&ts->sched_timer, expires, | 351 | hrtimer_start(&ts->sched_timer, expires, |
352 | HRTIMER_MODE_ABS); | 352 | HRTIMER_MODE_ABS_PINNED); |
353 | /* Check, if the timer was already in the past */ | 353 | /* Check, if the timer was already in the past */ |
354 | if (hrtimer_active(&ts->sched_timer)) | 354 | if (hrtimer_active(&ts->sched_timer)) |
355 | goto out; | 355 | goto out; |
@@ -395,7 +395,7 @@ static void tick_nohz_restart(struct tick_sched *ts, ktime_t now) | |||
395 | 395 | ||
396 | if (ts->nohz_mode == NOHZ_MODE_HIGHRES) { | 396 | if (ts->nohz_mode == NOHZ_MODE_HIGHRES) { |
397 | hrtimer_start_expires(&ts->sched_timer, | 397 | hrtimer_start_expires(&ts->sched_timer, |
398 | HRTIMER_MODE_ABS); | 398 | HRTIMER_MODE_ABS_PINNED); |
399 | /* Check, if the timer was already in the past */ | 399 | /* Check, if the timer was already in the past */ |
400 | if (hrtimer_active(&ts->sched_timer)) | 400 | if (hrtimer_active(&ts->sched_timer)) |
401 | break; | 401 | break; |
@@ -698,7 +698,8 @@ void tick_setup_sched_timer(void) | |||
698 | 698 | ||
699 | for (;;) { | 699 | for (;;) { |
700 | hrtimer_forward(&ts->sched_timer, now, tick_period); | 700 | hrtimer_forward(&ts->sched_timer, now, tick_period); |
701 | hrtimer_start_expires(&ts->sched_timer, HRTIMER_MODE_ABS); | 701 | hrtimer_start_expires(&ts->sched_timer, |
702 | HRTIMER_MODE_ABS_PINNED); | ||
702 | /* Check, if the timer was already in the past */ | 703 | /* Check, if the timer was already in the past */ |
703 | if (hrtimer_active(&ts->sched_timer)) | 704 | if (hrtimer_active(&ts->sched_timer)) |
704 | break; | 705 | break; |
diff --git a/kernel/timer.c b/kernel/timer.c index faf2db897de4..54d3912f8cad 100644 --- a/kernel/timer.c +++ b/kernel/timer.c | |||
@@ -38,6 +38,7 @@ | |||
38 | #include <linux/tick.h> | 38 | #include <linux/tick.h> |
39 | #include <linux/kallsyms.h> | 39 | #include <linux/kallsyms.h> |
40 | #include <linux/perf_counter.h> | 40 | #include <linux/perf_counter.h> |
41 | #include <linux/sched.h> | ||
41 | 42 | ||
42 | #include <asm/uaccess.h> | 43 | #include <asm/uaccess.h> |
43 | #include <asm/unistd.h> | 44 | #include <asm/unistd.h> |
@@ -605,13 +606,12 @@ static struct tvec_base *lock_timer_base(struct timer_list *timer, | |||
605 | } | 606 | } |
606 | 607 | ||
607 | static inline int | 608 | static inline int |
608 | __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only) | 609 | __mod_timer(struct timer_list *timer, unsigned long expires, |
610 | bool pending_only, int pinned) | ||
609 | { | 611 | { |
610 | struct tvec_base *base, *new_base; | 612 | struct tvec_base *base, *new_base; |
611 | unsigned long flags; | 613 | unsigned long flags; |
612 | int ret; | 614 | int ret = 0 , cpu; |
613 | |||
614 | ret = 0; | ||
615 | 615 | ||
616 | timer_stats_timer_set_start_info(timer); | 616 | timer_stats_timer_set_start_info(timer); |
617 | BUG_ON(!timer->function); | 617 | BUG_ON(!timer->function); |
@@ -630,6 +630,18 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only) | |||
630 | 630 | ||
631 | new_base = __get_cpu_var(tvec_bases); | 631 | new_base = __get_cpu_var(tvec_bases); |
632 | 632 | ||
633 | cpu = smp_processor_id(); | ||
634 | |||
635 | #if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP) | ||
636 | if (!pinned && get_sysctl_timer_migration() && idle_cpu(cpu)) { | ||
637 | int preferred_cpu = get_nohz_load_balancer(); | ||
638 | |||
639 | if (preferred_cpu >= 0) | ||
640 | cpu = preferred_cpu; | ||
641 | } | ||
642 | #endif | ||
643 | new_base = per_cpu(tvec_bases, cpu); | ||
644 | |||
633 | if (base != new_base) { | 645 | if (base != new_base) { |
634 | /* | 646 | /* |
635 | * We are trying to schedule the timer on the local CPU. | 647 | * We are trying to schedule the timer on the local CPU. |
@@ -669,7 +681,7 @@ out_unlock: | |||
669 | */ | 681 | */ |
670 | int mod_timer_pending(struct timer_list *timer, unsigned long expires) | 682 | int mod_timer_pending(struct timer_list *timer, unsigned long expires) |
671 | { | 683 | { |
672 | return __mod_timer(timer, expires, true); | 684 | return __mod_timer(timer, expires, true, TIMER_NOT_PINNED); |
673 | } | 685 | } |
674 | EXPORT_SYMBOL(mod_timer_pending); | 686 | EXPORT_SYMBOL(mod_timer_pending); |
675 | 687 | ||
@@ -703,11 +715,33 @@ int mod_timer(struct timer_list *timer, unsigned long expires) | |||
703 | if (timer->expires == expires && timer_pending(timer)) | 715 | if (timer->expires == expires && timer_pending(timer)) |
704 | return 1; | 716 | return 1; |
705 | 717 | ||
706 | return __mod_timer(timer, expires, false); | 718 | return __mod_timer(timer, expires, false, TIMER_NOT_PINNED); |
707 | } | 719 | } |
708 | EXPORT_SYMBOL(mod_timer); | 720 | EXPORT_SYMBOL(mod_timer); |
709 | 721 | ||
710 | /** | 722 | /** |
723 | * mod_timer_pinned - modify a timer's timeout | ||
724 | * @timer: the timer to be modified | ||
725 | * @expires: new timeout in jiffies | ||
726 | * | ||
727 | * mod_timer_pinned() is a way to update the expire field of an | ||
728 | * active timer (if the timer is inactive it will be activated) | ||
729 | * and not allow the timer to be migrated to a different CPU. | ||
730 | * | ||
731 | * mod_timer_pinned(timer, expires) is equivalent to: | ||
732 | * | ||
733 | * del_timer(timer); timer->expires = expires; add_timer(timer); | ||
734 | */ | ||
735 | int mod_timer_pinned(struct timer_list *timer, unsigned long expires) | ||
736 | { | ||
737 | if (timer->expires == expires && timer_pending(timer)) | ||
738 | return 1; | ||
739 | |||
740 | return __mod_timer(timer, expires, false, TIMER_PINNED); | ||
741 | } | ||
742 | EXPORT_SYMBOL(mod_timer_pinned); | ||
743 | |||
744 | /** | ||
711 | * add_timer - start a timer | 745 | * add_timer - start a timer |
712 | * @timer: the timer to be added | 746 | * @timer: the timer to be added |
713 | * | 747 | * |
@@ -1017,6 +1051,9 @@ cascade: | |||
1017 | index = slot = timer_jiffies & TVN_MASK; | 1051 | index = slot = timer_jiffies & TVN_MASK; |
1018 | do { | 1052 | do { |
1019 | list_for_each_entry(nte, varp->vec + slot, entry) { | 1053 | list_for_each_entry(nte, varp->vec + slot, entry) { |
1054 | if (tbase_get_deferrable(nte->base)) | ||
1055 | continue; | ||
1056 | |||
1020 | found = 1; | 1057 | found = 1; |
1021 | if (time_before(nte->expires, expires)) | 1058 | if (time_before(nte->expires, expires)) |
1022 | expires = nte->expires; | 1059 | expires = nte->expires; |
@@ -1307,7 +1344,7 @@ signed long __sched schedule_timeout(signed long timeout) | |||
1307 | expire = timeout + jiffies; | 1344 | expire = timeout + jiffies; |
1308 | 1345 | ||
1309 | setup_timer_on_stack(&timer, process_timeout, (unsigned long)current); | 1346 | setup_timer_on_stack(&timer, process_timeout, (unsigned long)current); |
1310 | __mod_timer(&timer, expire, false); | 1347 | __mod_timer(&timer, expire, false, TIMER_NOT_PINNED); |
1311 | schedule(); | 1348 | schedule(); |
1312 | del_singleshot_timer_sync(&timer); | 1349 | del_singleshot_timer_sync(&timer); |
1313 | 1350 | ||
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 4a13e5a01ce3..61071fecc82e 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig | |||
@@ -147,7 +147,7 @@ config IRQSOFF_TRACER | |||
147 | disabled by default and can be runtime (re-)started | 147 | disabled by default and can be runtime (re-)started |
148 | via: | 148 | via: |
149 | 149 | ||
150 | echo 0 > /debugfs/tracing/tracing_max_latency | 150 | echo 0 > /sys/kernel/debug/tracing/tracing_max_latency |
151 | 151 | ||
152 | (Note that kernel size and overhead increases with this option | 152 | (Note that kernel size and overhead increases with this option |
153 | enabled. This option and the preempt-off timing option can be | 153 | enabled. This option and the preempt-off timing option can be |
@@ -168,7 +168,7 @@ config PREEMPT_TRACER | |||
168 | disabled by default and can be runtime (re-)started | 168 | disabled by default and can be runtime (re-)started |
169 | via: | 169 | via: |
170 | 170 | ||
171 | echo 0 > /debugfs/tracing/tracing_max_latency | 171 | echo 0 > /sys/kernel/debug/tracing/tracing_max_latency |
172 | 172 | ||
173 | (Note that kernel size and overhead increases with this option | 173 | (Note that kernel size and overhead increases with this option |
174 | enabled. This option and the irqs-off timing option can be | 174 | enabled. This option and the irqs-off timing option can be |
@@ -261,7 +261,7 @@ config PROFILE_ANNOTATED_BRANCHES | |||
261 | This tracer profiles all the the likely and unlikely macros | 261 | This tracer profiles all the the likely and unlikely macros |
262 | in the kernel. It will display the results in: | 262 | in the kernel. It will display the results in: |
263 | 263 | ||
264 | /debugfs/tracing/profile_annotated_branch | 264 | /sys/kernel/debug/tracing/profile_annotated_branch |
265 | 265 | ||
266 | Note: this will add a significant overhead, only turn this | 266 | Note: this will add a significant overhead, only turn this |
267 | on if you need to profile the system's use of these macros. | 267 | on if you need to profile the system's use of these macros. |
@@ -274,7 +274,7 @@ config PROFILE_ALL_BRANCHES | |||
274 | taken in the kernel is recorded whether it hit or miss. | 274 | taken in the kernel is recorded whether it hit or miss. |
275 | The results will be displayed in: | 275 | The results will be displayed in: |
276 | 276 | ||
277 | /debugfs/tracing/profile_branch | 277 | /sys/kernel/debug/tracing/profile_branch |
278 | 278 | ||
279 | This option also enables the likely/unlikely profiler. | 279 | This option also enables the likely/unlikely profiler. |
280 | 280 | ||
@@ -323,7 +323,7 @@ config STACK_TRACER | |||
323 | select KALLSYMS | 323 | select KALLSYMS |
324 | help | 324 | help |
325 | This special tracer records the maximum stack footprint of the | 325 | This special tracer records the maximum stack footprint of the |
326 | kernel and displays it in debugfs/tracing/stack_trace. | 326 | kernel and displays it in /sys/kernel/debug/tracing/stack_trace. |
327 | 327 | ||
328 | This tracer works by hooking into every function call that the | 328 | This tracer works by hooking into every function call that the |
329 | kernel executes, and keeping a maximum stack depth value and | 329 | kernel executes, and keeping a maximum stack depth value and |
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 8acd9b81a5d7..c1878bfb2e1e 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c | |||
@@ -344,7 +344,7 @@ static raw_spinlock_t ftrace_max_lock = | |||
344 | /* | 344 | /* |
345 | * Copy the new maximum trace into the separate maximum-trace | 345 | * Copy the new maximum trace into the separate maximum-trace |
346 | * structure. (this way the maximum trace is permanently saved, | 346 | * structure. (this way the maximum trace is permanently saved, |
347 | * for later retrieval via /debugfs/tracing/latency_trace) | 347 | * for later retrieval via /sys/kernel/debug/tracing/latency_trace) |
348 | */ | 348 | */ |
349 | static void | 349 | static void |
350 | __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) | 350 | __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) |
@@ -2414,21 +2414,20 @@ static const struct file_operations tracing_iter_fops = { | |||
2414 | 2414 | ||
2415 | static const char readme_msg[] = | 2415 | static const char readme_msg[] = |
2416 | "tracing mini-HOWTO:\n\n" | 2416 | "tracing mini-HOWTO:\n\n" |
2417 | "# mkdir /debug\n" | 2417 | "# mount -t debugfs nodev /sys/kernel/debug\n\n" |
2418 | "# mount -t debugfs nodev /debug\n\n" | 2418 | "# cat /sys/kernel/debug/tracing/available_tracers\n" |
2419 | "# cat /debug/tracing/available_tracers\n" | ||
2420 | "wakeup preemptirqsoff preemptoff irqsoff function sched_switch nop\n\n" | 2419 | "wakeup preemptirqsoff preemptoff irqsoff function sched_switch nop\n\n" |
2421 | "# cat /debug/tracing/current_tracer\n" | 2420 | "# cat /sys/kernel/debug/tracing/current_tracer\n" |
2422 | "nop\n" | 2421 | "nop\n" |
2423 | "# echo sched_switch > /debug/tracing/current_tracer\n" | 2422 | "# echo sched_switch > /sys/kernel/debug/tracing/current_tracer\n" |
2424 | "# cat /debug/tracing/current_tracer\n" | 2423 | "# cat /sys/kernel/debug/tracing/current_tracer\n" |
2425 | "sched_switch\n" | 2424 | "sched_switch\n" |
2426 | "# cat /debug/tracing/trace_options\n" | 2425 | "# cat /sys/kernel/debug/tracing/trace_options\n" |
2427 | "noprint-parent nosym-offset nosym-addr noverbose\n" | 2426 | "noprint-parent nosym-offset nosym-addr noverbose\n" |
2428 | "# echo print-parent > /debug/tracing/trace_options\n" | 2427 | "# echo print-parent > /sys/kernel/debug/tracing/trace_options\n" |
2429 | "# echo 1 > /debug/tracing/tracing_enabled\n" | 2428 | "# echo 1 > /sys/kernel/debug/tracing/tracing_enabled\n" |
2430 | "# cat /debug/tracing/trace > /tmp/trace.txt\n" | 2429 | "# cat /sys/kernel/debug/tracing/trace > /tmp/trace.txt\n" |
2431 | "# echo 0 > /debug/tracing/tracing_enabled\n" | 2430 | "# echo 0 > /sys/kernel/debug/tracing/tracing_enabled\n" |
2432 | ; | 2431 | ; |
2433 | 2432 | ||
2434 | static ssize_t | 2433 | static ssize_t |
diff --git a/kernel/trace/trace_sysprof.c b/kernel/trace/trace_sysprof.c index e04b76cc238a..f6693969287d 100644 --- a/kernel/trace/trace_sysprof.c +++ b/kernel/trace/trace_sysprof.c | |||
@@ -203,7 +203,8 @@ static void start_stack_timer(void *unused) | |||
203 | hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | 203 | hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); |
204 | hrtimer->function = stack_trace_timer_fn; | 204 | hrtimer->function = stack_trace_timer_fn; |
205 | 205 | ||
206 | hrtimer_start(hrtimer, ns_to_ktime(sample_period), HRTIMER_MODE_REL); | 206 | hrtimer_start(hrtimer, ns_to_ktime(sample_period), |
207 | HRTIMER_MODE_REL_PINNED); | ||
207 | } | 208 | } |
208 | 209 | ||
209 | static void start_stack_timers(void) | 210 | static void start_stack_timers(void) |
diff --git a/kernel/user.c b/kernel/user.c index 850e0ba41c1e..2c000e7132ac 100644 --- a/kernel/user.c +++ b/kernel/user.c | |||
@@ -75,21 +75,6 @@ static void uid_hash_remove(struct user_struct *up) | |||
75 | put_user_ns(up->user_ns); | 75 | put_user_ns(up->user_ns); |
76 | } | 76 | } |
77 | 77 | ||
78 | static struct user_struct *uid_hash_find(uid_t uid, struct hlist_head *hashent) | ||
79 | { | ||
80 | struct user_struct *user; | ||
81 | struct hlist_node *h; | ||
82 | |||
83 | hlist_for_each_entry(user, h, hashent, uidhash_node) { | ||
84 | if (user->uid == uid) { | ||
85 | atomic_inc(&user->__count); | ||
86 | return user; | ||
87 | } | ||
88 | } | ||
89 | |||
90 | return NULL; | ||
91 | } | ||
92 | |||
93 | #ifdef CONFIG_USER_SCHED | 78 | #ifdef CONFIG_USER_SCHED |
94 | 79 | ||
95 | static void sched_destroy_user(struct user_struct *up) | 80 | static void sched_destroy_user(struct user_struct *up) |
@@ -119,6 +104,23 @@ static int sched_create_user(struct user_struct *up) { return 0; } | |||
119 | 104 | ||
120 | #if defined(CONFIG_USER_SCHED) && defined(CONFIG_SYSFS) | 105 | #if defined(CONFIG_USER_SCHED) && defined(CONFIG_SYSFS) |
121 | 106 | ||
107 | static struct user_struct *uid_hash_find(uid_t uid, struct hlist_head *hashent) | ||
108 | { | ||
109 | struct user_struct *user; | ||
110 | struct hlist_node *h; | ||
111 | |||
112 | hlist_for_each_entry(user, h, hashent, uidhash_node) { | ||
113 | if (user->uid == uid) { | ||
114 | /* possibly resurrect an "almost deleted" object */ | ||
115 | if (atomic_inc_return(&user->__count) == 1) | ||
116 | cancel_delayed_work(&user->work); | ||
117 | return user; | ||
118 | } | ||
119 | } | ||
120 | |||
121 | return NULL; | ||
122 | } | ||
123 | |||
122 | static struct kset *uids_kset; /* represents the /sys/kernel/uids/ directory */ | 124 | static struct kset *uids_kset; /* represents the /sys/kernel/uids/ directory */ |
123 | static DEFINE_MUTEX(uids_mutex); | 125 | static DEFINE_MUTEX(uids_mutex); |
124 | 126 | ||
@@ -283,12 +285,12 @@ int __init uids_sysfs_init(void) | |||
283 | return uids_user_create(&root_user); | 285 | return uids_user_create(&root_user); |
284 | } | 286 | } |
285 | 287 | ||
286 | /* work function to remove sysfs directory for a user and free up | 288 | /* delayed work function to remove sysfs directory for a user and free up |
287 | * corresponding structures. | 289 | * corresponding structures. |
288 | */ | 290 | */ |
289 | static void cleanup_user_struct(struct work_struct *w) | 291 | static void cleanup_user_struct(struct work_struct *w) |
290 | { | 292 | { |
291 | struct user_struct *up = container_of(w, struct user_struct, work); | 293 | struct user_struct *up = container_of(w, struct user_struct, work.work); |
292 | unsigned long flags; | 294 | unsigned long flags; |
293 | int remove_user = 0; | 295 | int remove_user = 0; |
294 | 296 | ||
@@ -297,15 +299,12 @@ static void cleanup_user_struct(struct work_struct *w) | |||
297 | */ | 299 | */ |
298 | uids_mutex_lock(); | 300 | uids_mutex_lock(); |
299 | 301 | ||
300 | local_irq_save(flags); | 302 | spin_lock_irqsave(&uidhash_lock, flags); |
301 | 303 | if (atomic_read(&up->__count) == 0) { | |
302 | if (atomic_dec_and_lock(&up->__count, &uidhash_lock)) { | ||
303 | uid_hash_remove(up); | 304 | uid_hash_remove(up); |
304 | remove_user = 1; | 305 | remove_user = 1; |
305 | spin_unlock_irqrestore(&uidhash_lock, flags); | ||
306 | } else { | ||
307 | local_irq_restore(flags); | ||
308 | } | 306 | } |
307 | spin_unlock_irqrestore(&uidhash_lock, flags); | ||
309 | 308 | ||
310 | if (!remove_user) | 309 | if (!remove_user) |
311 | goto done; | 310 | goto done; |
@@ -331,16 +330,28 @@ done: | |||
331 | */ | 330 | */ |
332 | static void free_user(struct user_struct *up, unsigned long flags) | 331 | static void free_user(struct user_struct *up, unsigned long flags) |
333 | { | 332 | { |
334 | /* restore back the count */ | ||
335 | atomic_inc(&up->__count); | ||
336 | spin_unlock_irqrestore(&uidhash_lock, flags); | 333 | spin_unlock_irqrestore(&uidhash_lock, flags); |
337 | 334 | INIT_DELAYED_WORK(&up->work, cleanup_user_struct); | |
338 | INIT_WORK(&up->work, cleanup_user_struct); | 335 | schedule_delayed_work(&up->work, msecs_to_jiffies(1000)); |
339 | schedule_work(&up->work); | ||
340 | } | 336 | } |
341 | 337 | ||
342 | #else /* CONFIG_USER_SCHED && CONFIG_SYSFS */ | 338 | #else /* CONFIG_USER_SCHED && CONFIG_SYSFS */ |
343 | 339 | ||
340 | static struct user_struct *uid_hash_find(uid_t uid, struct hlist_head *hashent) | ||
341 | { | ||
342 | struct user_struct *user; | ||
343 | struct hlist_node *h; | ||
344 | |||
345 | hlist_for_each_entry(user, h, hashent, uidhash_node) { | ||
346 | if (user->uid == uid) { | ||
347 | atomic_inc(&user->__count); | ||
348 | return user; | ||
349 | } | ||
350 | } | ||
351 | |||
352 | return NULL; | ||
353 | } | ||
354 | |||
344 | int uids_sysfs_init(void) { return 0; } | 355 | int uids_sysfs_init(void) { return 0; } |
345 | static inline int uids_user_create(struct user_struct *up) { return 0; } | 356 | static inline int uids_user_create(struct user_struct *up) { return 0; } |
346 | static inline void uids_mutex_lock(void) { } | 357 | static inline void uids_mutex_lock(void) { } |