aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/hrtimer.c58
-rw-r--r--kernel/module.c2
-rw-r--r--kernel/printk.c33
-rw-r--r--kernel/sched.c11
-rw-r--r--kernel/sysctl.c8
-rw-r--r--kernel/time/clockevents.c14
-rw-r--r--kernel/time/clocksource.c20
-rw-r--r--kernel/time/tick-broadcast.c2
-rw-r--r--kernel/time/tick-oneshot.c17
-rw-r--r--kernel/time/tick-sched.c7
-rw-r--r--kernel/timer.c51
-rw-r--r--kernel/trace/Kconfig10
-rw-r--r--kernel/trace/trace.c23
-rw-r--r--kernel/trace/trace_sysprof.c3
-rw-r--r--kernel/user.c67
15 files changed, 251 insertions, 75 deletions
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index cb8a15c19583..b675a67c9ac3 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -43,6 +43,8 @@
43#include <linux/seq_file.h> 43#include <linux/seq_file.h>
44#include <linux/err.h> 44#include <linux/err.h>
45#include <linux/debugobjects.h> 45#include <linux/debugobjects.h>
46#include <linux/sched.h>
47#include <linux/timer.h>
46 48
47#include <asm/uaccess.h> 49#include <asm/uaccess.h>
48 50
@@ -193,12 +195,24 @@ struct hrtimer_clock_base *lock_hrtimer_base(const struct hrtimer *timer,
193 * Switch the timer base to the current CPU when possible. 195 * Switch the timer base to the current CPU when possible.
194 */ 196 */
195static inline struct hrtimer_clock_base * 197static inline struct hrtimer_clock_base *
196switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base) 198switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base,
199 int pinned)
197{ 200{
198 struct hrtimer_clock_base *new_base; 201 struct hrtimer_clock_base *new_base;
199 struct hrtimer_cpu_base *new_cpu_base; 202 struct hrtimer_cpu_base *new_cpu_base;
203 int cpu, preferred_cpu = -1;
204
205 cpu = smp_processor_id();
206#if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP)
207 if (!pinned && get_sysctl_timer_migration() && idle_cpu(cpu)) {
208 preferred_cpu = get_nohz_load_balancer();
209 if (preferred_cpu >= 0)
210 cpu = preferred_cpu;
211 }
212#endif
200 213
201 new_cpu_base = &__get_cpu_var(hrtimer_bases); 214again:
215 new_cpu_base = &per_cpu(hrtimer_bases, cpu);
202 new_base = &new_cpu_base->clock_base[base->index]; 216 new_base = &new_cpu_base->clock_base[base->index];
203 217
204 if (base != new_base) { 218 if (base != new_base) {
@@ -218,6 +232,40 @@ switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base)
218 timer->base = NULL; 232 timer->base = NULL;
219 spin_unlock(&base->cpu_base->lock); 233 spin_unlock(&base->cpu_base->lock);
220 spin_lock(&new_base->cpu_base->lock); 234 spin_lock(&new_base->cpu_base->lock);
235
236 /* Optimized away for NOHZ=n SMP=n */
237 if (cpu == preferred_cpu) {
238 /* Calculate clock monotonic expiry time */
239#ifdef CONFIG_HIGH_RES_TIMERS
240 ktime_t expires = ktime_sub(hrtimer_get_expires(timer),
241 new_base->offset);
242#else
243 ktime_t expires = hrtimer_get_expires(timer);
244#endif
245
246 /*
247 * Get the next event on target cpu from the
248 * clock events layer.
249 * This covers the highres=off nohz=on case as well.
250 */
251 ktime_t next = clockevents_get_next_event(cpu);
252
253 ktime_t delta = ktime_sub(expires, next);
254
255 /*
256 * We do not migrate the timer when it is expiring
257 * before the next event on the target cpu because
258 * we cannot reprogram the target cpu hardware and
259 * we would cause it to fire late.
260 */
261 if (delta.tv64 < 0) {
262 cpu = smp_processor_id();
263 spin_unlock(&new_base->cpu_base->lock);
264 spin_lock(&base->cpu_base->lock);
265 timer->base = base;
266 goto again;
267 }
268 }
221 timer->base = new_base; 269 timer->base = new_base;
222 } 270 }
223 return new_base; 271 return new_base;
@@ -235,7 +283,7 @@ lock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags)
235 return base; 283 return base;
236} 284}
237 285
238# define switch_hrtimer_base(t, b) (b) 286# define switch_hrtimer_base(t, b, p) (b)
239 287
240#endif /* !CONFIG_SMP */ 288#endif /* !CONFIG_SMP */
241 289
@@ -907,9 +955,9 @@ int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
907 ret = remove_hrtimer(timer, base); 955 ret = remove_hrtimer(timer, base);
908 956
909 /* Switch the timer base, if necessary: */ 957 /* Switch the timer base, if necessary: */
910 new_base = switch_hrtimer_base(timer, base); 958 new_base = switch_hrtimer_base(timer, base, mode & HRTIMER_MODE_PINNED);
911 959
912 if (mode == HRTIMER_MODE_REL) { 960 if (mode & HRTIMER_MODE_REL) {
913 tim = ktime_add_safe(tim, new_base->get_time()); 961 tim = ktime_add_safe(tim, new_base->get_time());
914 /* 962 /*
915 * CONFIG_TIME_LOW_RES is a temporary way for architectures 963 * CONFIG_TIME_LOW_RES is a temporary way for architectures
diff --git a/kernel/module.c b/kernel/module.c
index e4ab36ce7672..215aaab09e91 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -2899,7 +2899,7 @@ void print_modules(void)
2899 struct module *mod; 2899 struct module *mod;
2900 char buf[8]; 2900 char buf[8];
2901 2901
2902 printk("Modules linked in:"); 2902 printk(KERN_DEFAULT "Modules linked in:");
2903 /* Most callers should already have preempt disabled, but make sure */ 2903 /* Most callers should already have preempt disabled, but make sure */
2904 preempt_disable(); 2904 preempt_disable();
2905 list_for_each_entry_rcu(mod, &modules, list) 2905 list_for_each_entry_rcu(mod, &modules, list)
diff --git a/kernel/printk.c b/kernel/printk.c
index 5052b5497c67..b4d97b54c1ec 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -687,20 +687,35 @@ asmlinkage int vprintk(const char *fmt, va_list args)
687 sizeof(printk_buf) - printed_len, fmt, args); 687 sizeof(printk_buf) - printed_len, fmt, args);
688 688
689 689
690 p = printk_buf;
691
692 /* Do we have a loglevel in the string? */
693 if (p[0] == '<') {
694 unsigned char c = p[1];
695 if (c && p[2] == '>') {
696 switch (c) {
697 case '0' ... '7': /* loglevel */
698 current_log_level = c - '0';
699 /* Fallthrough - make sure we're on a new line */
700 case 'd': /* KERN_DEFAULT */
701 if (!new_text_line) {
702 emit_log_char('\n');
703 new_text_line = 1;
704 }
705 /* Fallthrough - skip the loglevel */
706 case 'c': /* KERN_CONT */
707 p += 3;
708 break;
709 }
710 }
711 }
712
690 /* 713 /*
691 * Copy the output into log_buf. If the caller didn't provide 714 * Copy the output into log_buf. If the caller didn't provide
692 * appropriate log level tags, we insert them here 715 * appropriate log level tags, we insert them here
693 */ 716 */
694 for (p = printk_buf; *p; p++) { 717 for ( ; *p; p++) {
695 if (new_text_line) { 718 if (new_text_line) {
696 /* If a token, set current_log_level and skip over */
697 if (p[0] == '<' && p[1] >= '0' && p[1] <= '7' &&
698 p[2] == '>') {
699 current_log_level = p[1] - '0';
700 p += 3;
701 printed_len -= 3;
702 }
703
704 /* Always output the token */ 719 /* Always output the token */
705 emit_log_char('<'); 720 emit_log_char('<');
706 emit_log_char(current_log_level + '0'); 721 emit_log_char(current_log_level + '0');
diff --git a/kernel/sched.c b/kernel/sched.c
index 8ec9d13140be..8fb88a906aaa 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -240,7 +240,7 @@ static void start_rt_bandwidth(struct rt_bandwidth *rt_b)
240 hard = hrtimer_get_expires(&rt_b->rt_period_timer); 240 hard = hrtimer_get_expires(&rt_b->rt_period_timer);
241 delta = ktime_to_ns(ktime_sub(hard, soft)); 241 delta = ktime_to_ns(ktime_sub(hard, soft));
242 __hrtimer_start_range_ns(&rt_b->rt_period_timer, soft, delta, 242 __hrtimer_start_range_ns(&rt_b->rt_period_timer, soft, delta,
243 HRTIMER_MODE_ABS, 0); 243 HRTIMER_MODE_ABS_PINNED, 0);
244 } 244 }
245 spin_unlock(&rt_b->rt_runtime_lock); 245 spin_unlock(&rt_b->rt_runtime_lock);
246} 246}
@@ -1155,7 +1155,7 @@ static __init void init_hrtick(void)
1155static void hrtick_start(struct rq *rq, u64 delay) 1155static void hrtick_start(struct rq *rq, u64 delay)
1156{ 1156{
1157 __hrtimer_start_range_ns(&rq->hrtick_timer, ns_to_ktime(delay), 0, 1157 __hrtimer_start_range_ns(&rq->hrtick_timer, ns_to_ktime(delay), 0,
1158 HRTIMER_MODE_REL, 0); 1158 HRTIMER_MODE_REL_PINNED, 0);
1159} 1159}
1160 1160
1161static inline void init_hrtick(void) 1161static inline void init_hrtick(void)
@@ -4397,6 +4397,11 @@ static struct {
4397 .load_balancer = ATOMIC_INIT(-1), 4397 .load_balancer = ATOMIC_INIT(-1),
4398}; 4398};
4399 4399
4400int get_nohz_load_balancer(void)
4401{
4402 return atomic_read(&nohz.load_balancer);
4403}
4404
4400#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT) 4405#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
4401/** 4406/**
4402 * lowest_flag_domain - Return lowest sched_domain containing flag. 4407 * lowest_flag_domain - Return lowest sched_domain containing flag.
@@ -9029,6 +9034,8 @@ void __init sched_init_smp(void)
9029} 9034}
9030#endif /* CONFIG_SMP */ 9035#endif /* CONFIG_SMP */
9031 9036
9037const_debug unsigned int sysctl_timer_migration = 1;
9038
9032int in_sched_functions(unsigned long addr) 9039int in_sched_functions(unsigned long addr)
9033{ 9040{
9034 return in_lock_functions(addr) || 9041 return in_lock_functions(addr) ||
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 9ef80bba3509..f5c76b6cd616 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -329,6 +329,14 @@ static struct ctl_table kern_table[] = {
329 .mode = 0644, 329 .mode = 0644,
330 .proc_handler = &proc_dointvec, 330 .proc_handler = &proc_dointvec,
331 }, 331 },
332 {
333 .ctl_name = CTL_UNNUMBERED,
334 .procname = "timer_migration",
335 .data = &sysctl_timer_migration,
336 .maxlen = sizeof(unsigned int),
337 .mode = 0644,
338 .proc_handler = &proc_dointvec,
339 },
332#endif 340#endif
333 { 341 {
334 .ctl_name = CTL_UNNUMBERED, 342 .ctl_name = CTL_UNNUMBERED,
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index d13be216a790..1ad6dd461119 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -18,6 +18,7 @@
18#include <linux/notifier.h> 18#include <linux/notifier.h>
19#include <linux/smp.h> 19#include <linux/smp.h>
20#include <linux/sysdev.h> 20#include <linux/sysdev.h>
21#include <linux/tick.h>
21 22
22/* The registered clock event devices */ 23/* The registered clock event devices */
23static LIST_HEAD(clockevent_devices); 24static LIST_HEAD(clockevent_devices);
@@ -54,6 +55,7 @@ unsigned long clockevent_delta2ns(unsigned long latch,
54 55
55 return (unsigned long) clc; 56 return (unsigned long) clc;
56} 57}
58EXPORT_SYMBOL_GPL(clockevent_delta2ns);
57 59
58/** 60/**
59 * clockevents_set_mode - set the operating mode of a clock event device 61 * clockevents_set_mode - set the operating mode of a clock event device
@@ -187,6 +189,7 @@ void clockevents_register_device(struct clock_event_device *dev)
187 189
188 spin_unlock(&clockevents_lock); 190 spin_unlock(&clockevents_lock);
189} 191}
192EXPORT_SYMBOL_GPL(clockevents_register_device);
190 193
191/* 194/*
192 * Noop handler when we shut down an event device 195 * Noop handler when we shut down an event device
@@ -251,4 +254,15 @@ void clockevents_notify(unsigned long reason, void *arg)
251 spin_unlock(&clockevents_lock); 254 spin_unlock(&clockevents_lock);
252} 255}
253EXPORT_SYMBOL_GPL(clockevents_notify); 256EXPORT_SYMBOL_GPL(clockevents_notify);
257
258ktime_t clockevents_get_next_event(int cpu)
259{
260 struct tick_device *td;
261 struct clock_event_device *dev;
262
263 td = &per_cpu(tick_cpu_device, cpu);
264 dev = td->evtdev;
265
266 return dev->next_event;
267}
254#endif 268#endif
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 80189f6f1c5a..592bf584d1d2 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -509,6 +509,18 @@ static ssize_t sysfs_override_clocksource(struct sys_device *dev,
509 } 509 }
510 } 510 }
511 511
512 /*
513 * Check to make sure we don't switch to a non-highres capable
514 * clocksource if the tick code is in oneshot mode (highres or nohz)
515 */
516 if (tick_oneshot_mode_active() &&
517 !(ovr->flags & CLOCK_SOURCE_VALID_FOR_HRES)) {
518 printk(KERN_WARNING "%s clocksource is not HRT compatible. "
519 "Cannot switch while in HRT/NOHZ mode\n", ovr->name);
520 ovr = NULL;
521 override_name[0] = 0;
522 }
523
512 /* Reselect, when the override name has changed */ 524 /* Reselect, when the override name has changed */
513 if (ovr != clocksource_override) { 525 if (ovr != clocksource_override) {
514 clocksource_override = ovr; 526 clocksource_override = ovr;
@@ -537,7 +549,13 @@ sysfs_show_available_clocksources(struct sys_device *dev,
537 549
538 spin_lock_irq(&clocksource_lock); 550 spin_lock_irq(&clocksource_lock);
539 list_for_each_entry(src, &clocksource_list, list) { 551 list_for_each_entry(src, &clocksource_list, list) {
540 count += snprintf(buf + count, 552 /*
553 * Don't show non-HRES clocksource if the tick code is
554 * in one shot mode (highres=on or nohz=on)
555 */
556 if (!tick_oneshot_mode_active() ||
557 (src->flags & CLOCK_SOURCE_VALID_FOR_HRES))
558 count += snprintf(buf + count,
541 max((ssize_t)PAGE_SIZE - count, (ssize_t)0), 559 max((ssize_t)PAGE_SIZE - count, (ssize_t)0),
542 "%s ", src->name); 560 "%s ", src->name);
543 } 561 }
diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index 118a3b3b3f9a..877dbedc3118 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -27,7 +27,7 @@
27 * timer stops in C3 state. 27 * timer stops in C3 state.
28 */ 28 */
29 29
30struct tick_device tick_broadcast_device; 30static struct tick_device tick_broadcast_device;
31/* FIXME: Use cpumask_var_t. */ 31/* FIXME: Use cpumask_var_t. */
32static DECLARE_BITMAP(tick_broadcast_mask, NR_CPUS); 32static DECLARE_BITMAP(tick_broadcast_mask, NR_CPUS);
33static DECLARE_BITMAP(tmpmask, NR_CPUS); 33static DECLARE_BITMAP(tmpmask, NR_CPUS);
diff --git a/kernel/time/tick-oneshot.c b/kernel/time/tick-oneshot.c
index 2e8de678e767..a96c0e2b89cf 100644
--- a/kernel/time/tick-oneshot.c
+++ b/kernel/time/tick-oneshot.c
@@ -128,6 +128,23 @@ int tick_switch_to_oneshot(void (*handler)(struct clock_event_device *))
128 return 0; 128 return 0;
129} 129}
130 130
131/**
132 * tick_check_oneshot_mode - check whether the system is in oneshot mode
133 *
134 * returns 1 when either nohz or highres are enabled. otherwise 0.
135 */
136int tick_oneshot_mode_active(void)
137{
138 unsigned long flags;
139 int ret;
140
141 local_irq_save(flags);
142 ret = __get_cpu_var(tick_cpu_device).mode == TICKDEV_MODE_ONESHOT;
143 local_irq_restore(flags);
144
145 return ret;
146}
147
131#ifdef CONFIG_HIGH_RES_TIMERS 148#ifdef CONFIG_HIGH_RES_TIMERS
132/** 149/**
133 * tick_init_highres - switch to high resolution mode 150 * tick_init_highres - switch to high resolution mode
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index d3f1ef4d5cbe..2aff39c6f10c 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -349,7 +349,7 @@ void tick_nohz_stop_sched_tick(int inidle)
349 349
350 if (ts->nohz_mode == NOHZ_MODE_HIGHRES) { 350 if (ts->nohz_mode == NOHZ_MODE_HIGHRES) {
351 hrtimer_start(&ts->sched_timer, expires, 351 hrtimer_start(&ts->sched_timer, expires,
352 HRTIMER_MODE_ABS); 352 HRTIMER_MODE_ABS_PINNED);
353 /* Check, if the timer was already in the past */ 353 /* Check, if the timer was already in the past */
354 if (hrtimer_active(&ts->sched_timer)) 354 if (hrtimer_active(&ts->sched_timer))
355 goto out; 355 goto out;
@@ -395,7 +395,7 @@ static void tick_nohz_restart(struct tick_sched *ts, ktime_t now)
395 395
396 if (ts->nohz_mode == NOHZ_MODE_HIGHRES) { 396 if (ts->nohz_mode == NOHZ_MODE_HIGHRES) {
397 hrtimer_start_expires(&ts->sched_timer, 397 hrtimer_start_expires(&ts->sched_timer,
398 HRTIMER_MODE_ABS); 398 HRTIMER_MODE_ABS_PINNED);
399 /* Check, if the timer was already in the past */ 399 /* Check, if the timer was already in the past */
400 if (hrtimer_active(&ts->sched_timer)) 400 if (hrtimer_active(&ts->sched_timer))
401 break; 401 break;
@@ -698,7 +698,8 @@ void tick_setup_sched_timer(void)
698 698
699 for (;;) { 699 for (;;) {
700 hrtimer_forward(&ts->sched_timer, now, tick_period); 700 hrtimer_forward(&ts->sched_timer, now, tick_period);
701 hrtimer_start_expires(&ts->sched_timer, HRTIMER_MODE_ABS); 701 hrtimer_start_expires(&ts->sched_timer,
702 HRTIMER_MODE_ABS_PINNED);
702 /* Check, if the timer was already in the past */ 703 /* Check, if the timer was already in the past */
703 if (hrtimer_active(&ts->sched_timer)) 704 if (hrtimer_active(&ts->sched_timer))
704 break; 705 break;
diff --git a/kernel/timer.c b/kernel/timer.c
index faf2db897de4..54d3912f8cad 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -38,6 +38,7 @@
38#include <linux/tick.h> 38#include <linux/tick.h>
39#include <linux/kallsyms.h> 39#include <linux/kallsyms.h>
40#include <linux/perf_counter.h> 40#include <linux/perf_counter.h>
41#include <linux/sched.h>
41 42
42#include <asm/uaccess.h> 43#include <asm/uaccess.h>
43#include <asm/unistd.h> 44#include <asm/unistd.h>
@@ -605,13 +606,12 @@ static struct tvec_base *lock_timer_base(struct timer_list *timer,
605} 606}
606 607
607static inline int 608static inline int
608__mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only) 609__mod_timer(struct timer_list *timer, unsigned long expires,
610 bool pending_only, int pinned)
609{ 611{
610 struct tvec_base *base, *new_base; 612 struct tvec_base *base, *new_base;
611 unsigned long flags; 613 unsigned long flags;
612 int ret; 614 int ret = 0 , cpu;
613
614 ret = 0;
615 615
616 timer_stats_timer_set_start_info(timer); 616 timer_stats_timer_set_start_info(timer);
617 BUG_ON(!timer->function); 617 BUG_ON(!timer->function);
@@ -630,6 +630,18 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only)
630 630
631 new_base = __get_cpu_var(tvec_bases); 631 new_base = __get_cpu_var(tvec_bases);
632 632
633 cpu = smp_processor_id();
634
635#if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP)
636 if (!pinned && get_sysctl_timer_migration() && idle_cpu(cpu)) {
637 int preferred_cpu = get_nohz_load_balancer();
638
639 if (preferred_cpu >= 0)
640 cpu = preferred_cpu;
641 }
642#endif
643 new_base = per_cpu(tvec_bases, cpu);
644
633 if (base != new_base) { 645 if (base != new_base) {
634 /* 646 /*
635 * We are trying to schedule the timer on the local CPU. 647 * We are trying to schedule the timer on the local CPU.
@@ -669,7 +681,7 @@ out_unlock:
669 */ 681 */
670int mod_timer_pending(struct timer_list *timer, unsigned long expires) 682int mod_timer_pending(struct timer_list *timer, unsigned long expires)
671{ 683{
672 return __mod_timer(timer, expires, true); 684 return __mod_timer(timer, expires, true, TIMER_NOT_PINNED);
673} 685}
674EXPORT_SYMBOL(mod_timer_pending); 686EXPORT_SYMBOL(mod_timer_pending);
675 687
@@ -703,11 +715,33 @@ int mod_timer(struct timer_list *timer, unsigned long expires)
703 if (timer->expires == expires && timer_pending(timer)) 715 if (timer->expires == expires && timer_pending(timer))
704 return 1; 716 return 1;
705 717
706 return __mod_timer(timer, expires, false); 718 return __mod_timer(timer, expires, false, TIMER_NOT_PINNED);
707} 719}
708EXPORT_SYMBOL(mod_timer); 720EXPORT_SYMBOL(mod_timer);
709 721
710/** 722/**
723 * mod_timer_pinned - modify a timer's timeout
724 * @timer: the timer to be modified
725 * @expires: new timeout in jiffies
726 *
727 * mod_timer_pinned() is a way to update the expire field of an
728 * active timer (if the timer is inactive it will be activated)
729 * and not allow the timer to be migrated to a different CPU.
730 *
731 * mod_timer_pinned(timer, expires) is equivalent to:
732 *
733 * del_timer(timer); timer->expires = expires; add_timer(timer);
734 */
735int mod_timer_pinned(struct timer_list *timer, unsigned long expires)
736{
737 if (timer->expires == expires && timer_pending(timer))
738 return 1;
739
740 return __mod_timer(timer, expires, false, TIMER_PINNED);
741}
742EXPORT_SYMBOL(mod_timer_pinned);
743
744/**
711 * add_timer - start a timer 745 * add_timer - start a timer
712 * @timer: the timer to be added 746 * @timer: the timer to be added
713 * 747 *
@@ -1017,6 +1051,9 @@ cascade:
1017 index = slot = timer_jiffies & TVN_MASK; 1051 index = slot = timer_jiffies & TVN_MASK;
1018 do { 1052 do {
1019 list_for_each_entry(nte, varp->vec + slot, entry) { 1053 list_for_each_entry(nte, varp->vec + slot, entry) {
1054 if (tbase_get_deferrable(nte->base))
1055 continue;
1056
1020 found = 1; 1057 found = 1;
1021 if (time_before(nte->expires, expires)) 1058 if (time_before(nte->expires, expires))
1022 expires = nte->expires; 1059 expires = nte->expires;
@@ -1307,7 +1344,7 @@ signed long __sched schedule_timeout(signed long timeout)
1307 expire = timeout + jiffies; 1344 expire = timeout + jiffies;
1308 1345
1309 setup_timer_on_stack(&timer, process_timeout, (unsigned long)current); 1346 setup_timer_on_stack(&timer, process_timeout, (unsigned long)current);
1310 __mod_timer(&timer, expire, false); 1347 __mod_timer(&timer, expire, false, TIMER_NOT_PINNED);
1311 schedule(); 1348 schedule();
1312 del_singleshot_timer_sync(&timer); 1349 del_singleshot_timer_sync(&timer);
1313 1350
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 4a13e5a01ce3..61071fecc82e 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -147,7 +147,7 @@ config IRQSOFF_TRACER
147 disabled by default and can be runtime (re-)started 147 disabled by default and can be runtime (re-)started
148 via: 148 via:
149 149
150 echo 0 > /debugfs/tracing/tracing_max_latency 150 echo 0 > /sys/kernel/debug/tracing/tracing_max_latency
151 151
152 (Note that kernel size and overhead increases with this option 152 (Note that kernel size and overhead increases with this option
153 enabled. This option and the preempt-off timing option can be 153 enabled. This option and the preempt-off timing option can be
@@ -168,7 +168,7 @@ config PREEMPT_TRACER
168 disabled by default and can be runtime (re-)started 168 disabled by default and can be runtime (re-)started
169 via: 169 via:
170 170
171 echo 0 > /debugfs/tracing/tracing_max_latency 171 echo 0 > /sys/kernel/debug/tracing/tracing_max_latency
172 172
173 (Note that kernel size and overhead increases with this option 173 (Note that kernel size and overhead increases with this option
174 enabled. This option and the irqs-off timing option can be 174 enabled. This option and the irqs-off timing option can be
@@ -261,7 +261,7 @@ config PROFILE_ANNOTATED_BRANCHES
261 This tracer profiles all the the likely and unlikely macros 261 This tracer profiles all the the likely and unlikely macros
262 in the kernel. It will display the results in: 262 in the kernel. It will display the results in:
263 263
264 /debugfs/tracing/profile_annotated_branch 264 /sys/kernel/debug/tracing/profile_annotated_branch
265 265
266 Note: this will add a significant overhead, only turn this 266 Note: this will add a significant overhead, only turn this
267 on if you need to profile the system's use of these macros. 267 on if you need to profile the system's use of these macros.
@@ -274,7 +274,7 @@ config PROFILE_ALL_BRANCHES
274 taken in the kernel is recorded whether it hit or miss. 274 taken in the kernel is recorded whether it hit or miss.
275 The results will be displayed in: 275 The results will be displayed in:
276 276
277 /debugfs/tracing/profile_branch 277 /sys/kernel/debug/tracing/profile_branch
278 278
279 This option also enables the likely/unlikely profiler. 279 This option also enables the likely/unlikely profiler.
280 280
@@ -323,7 +323,7 @@ config STACK_TRACER
323 select KALLSYMS 323 select KALLSYMS
324 help 324 help
325 This special tracer records the maximum stack footprint of the 325 This special tracer records the maximum stack footprint of the
326 kernel and displays it in debugfs/tracing/stack_trace. 326 kernel and displays it in /sys/kernel/debug/tracing/stack_trace.
327 327
328 This tracer works by hooking into every function call that the 328 This tracer works by hooking into every function call that the
329 kernel executes, and keeping a maximum stack depth value and 329 kernel executes, and keeping a maximum stack depth value and
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 8acd9b81a5d7..c1878bfb2e1e 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -344,7 +344,7 @@ static raw_spinlock_t ftrace_max_lock =
344/* 344/*
345 * Copy the new maximum trace into the separate maximum-trace 345 * Copy the new maximum trace into the separate maximum-trace
346 * structure. (this way the maximum trace is permanently saved, 346 * structure. (this way the maximum trace is permanently saved,
347 * for later retrieval via /debugfs/tracing/latency_trace) 347 * for later retrieval via /sys/kernel/debug/tracing/latency_trace)
348 */ 348 */
349static void 349static void
350__update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) 350__update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
@@ -2414,21 +2414,20 @@ static const struct file_operations tracing_iter_fops = {
2414 2414
2415static const char readme_msg[] = 2415static const char readme_msg[] =
2416 "tracing mini-HOWTO:\n\n" 2416 "tracing mini-HOWTO:\n\n"
2417 "# mkdir /debug\n" 2417 "# mount -t debugfs nodev /sys/kernel/debug\n\n"
2418 "# mount -t debugfs nodev /debug\n\n" 2418 "# cat /sys/kernel/debug/tracing/available_tracers\n"
2419 "# cat /debug/tracing/available_tracers\n"
2420 "wakeup preemptirqsoff preemptoff irqsoff function sched_switch nop\n\n" 2419 "wakeup preemptirqsoff preemptoff irqsoff function sched_switch nop\n\n"
2421 "# cat /debug/tracing/current_tracer\n" 2420 "# cat /sys/kernel/debug/tracing/current_tracer\n"
2422 "nop\n" 2421 "nop\n"
2423 "# echo sched_switch > /debug/tracing/current_tracer\n" 2422 "# echo sched_switch > /sys/kernel/debug/tracing/current_tracer\n"
2424 "# cat /debug/tracing/current_tracer\n" 2423 "# cat /sys/kernel/debug/tracing/current_tracer\n"
2425 "sched_switch\n" 2424 "sched_switch\n"
2426 "# cat /debug/tracing/trace_options\n" 2425 "# cat /sys/kernel/debug/tracing/trace_options\n"
2427 "noprint-parent nosym-offset nosym-addr noverbose\n" 2426 "noprint-parent nosym-offset nosym-addr noverbose\n"
2428 "# echo print-parent > /debug/tracing/trace_options\n" 2427 "# echo print-parent > /sys/kernel/debug/tracing/trace_options\n"
2429 "# echo 1 > /debug/tracing/tracing_enabled\n" 2428 "# echo 1 > /sys/kernel/debug/tracing/tracing_enabled\n"
2430 "# cat /debug/tracing/trace > /tmp/trace.txt\n" 2429 "# cat /sys/kernel/debug/tracing/trace > /tmp/trace.txt\n"
2431 "# echo 0 > /debug/tracing/tracing_enabled\n" 2430 "# echo 0 > /sys/kernel/debug/tracing/tracing_enabled\n"
2432; 2431;
2433 2432
2434static ssize_t 2433static ssize_t
diff --git a/kernel/trace/trace_sysprof.c b/kernel/trace/trace_sysprof.c
index e04b76cc238a..f6693969287d 100644
--- a/kernel/trace/trace_sysprof.c
+++ b/kernel/trace/trace_sysprof.c
@@ -203,7 +203,8 @@ static void start_stack_timer(void *unused)
203 hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); 203 hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
204 hrtimer->function = stack_trace_timer_fn; 204 hrtimer->function = stack_trace_timer_fn;
205 205
206 hrtimer_start(hrtimer, ns_to_ktime(sample_period), HRTIMER_MODE_REL); 206 hrtimer_start(hrtimer, ns_to_ktime(sample_period),
207 HRTIMER_MODE_REL_PINNED);
207} 208}
208 209
209static void start_stack_timers(void) 210static void start_stack_timers(void)
diff --git a/kernel/user.c b/kernel/user.c
index 850e0ba41c1e..2c000e7132ac 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -75,21 +75,6 @@ static void uid_hash_remove(struct user_struct *up)
75 put_user_ns(up->user_ns); 75 put_user_ns(up->user_ns);
76} 76}
77 77
78static struct user_struct *uid_hash_find(uid_t uid, struct hlist_head *hashent)
79{
80 struct user_struct *user;
81 struct hlist_node *h;
82
83 hlist_for_each_entry(user, h, hashent, uidhash_node) {
84 if (user->uid == uid) {
85 atomic_inc(&user->__count);
86 return user;
87 }
88 }
89
90 return NULL;
91}
92
93#ifdef CONFIG_USER_SCHED 78#ifdef CONFIG_USER_SCHED
94 79
95static void sched_destroy_user(struct user_struct *up) 80static void sched_destroy_user(struct user_struct *up)
@@ -119,6 +104,23 @@ static int sched_create_user(struct user_struct *up) { return 0; }
119 104
120#if defined(CONFIG_USER_SCHED) && defined(CONFIG_SYSFS) 105#if defined(CONFIG_USER_SCHED) && defined(CONFIG_SYSFS)
121 106
107static struct user_struct *uid_hash_find(uid_t uid, struct hlist_head *hashent)
108{
109 struct user_struct *user;
110 struct hlist_node *h;
111
112 hlist_for_each_entry(user, h, hashent, uidhash_node) {
113 if (user->uid == uid) {
114 /* possibly resurrect an "almost deleted" object */
115 if (atomic_inc_return(&user->__count) == 1)
116 cancel_delayed_work(&user->work);
117 return user;
118 }
119 }
120
121 return NULL;
122}
123
122static struct kset *uids_kset; /* represents the /sys/kernel/uids/ directory */ 124static struct kset *uids_kset; /* represents the /sys/kernel/uids/ directory */
123static DEFINE_MUTEX(uids_mutex); 125static DEFINE_MUTEX(uids_mutex);
124 126
@@ -283,12 +285,12 @@ int __init uids_sysfs_init(void)
283 return uids_user_create(&root_user); 285 return uids_user_create(&root_user);
284} 286}
285 287
286/* work function to remove sysfs directory for a user and free up 288/* delayed work function to remove sysfs directory for a user and free up
287 * corresponding structures. 289 * corresponding structures.
288 */ 290 */
289static void cleanup_user_struct(struct work_struct *w) 291static void cleanup_user_struct(struct work_struct *w)
290{ 292{
291 struct user_struct *up = container_of(w, struct user_struct, work); 293 struct user_struct *up = container_of(w, struct user_struct, work.work);
292 unsigned long flags; 294 unsigned long flags;
293 int remove_user = 0; 295 int remove_user = 0;
294 296
@@ -297,15 +299,12 @@ static void cleanup_user_struct(struct work_struct *w)
297 */ 299 */
298 uids_mutex_lock(); 300 uids_mutex_lock();
299 301
300 local_irq_save(flags); 302 spin_lock_irqsave(&uidhash_lock, flags);
301 303 if (atomic_read(&up->__count) == 0) {
302 if (atomic_dec_and_lock(&up->__count, &uidhash_lock)) {
303 uid_hash_remove(up); 304 uid_hash_remove(up);
304 remove_user = 1; 305 remove_user = 1;
305 spin_unlock_irqrestore(&uidhash_lock, flags);
306 } else {
307 local_irq_restore(flags);
308 } 306 }
307 spin_unlock_irqrestore(&uidhash_lock, flags);
309 308
310 if (!remove_user) 309 if (!remove_user)
311 goto done; 310 goto done;
@@ -331,16 +330,28 @@ done:
331 */ 330 */
332static void free_user(struct user_struct *up, unsigned long flags) 331static void free_user(struct user_struct *up, unsigned long flags)
333{ 332{
334 /* restore back the count */
335 atomic_inc(&up->__count);
336 spin_unlock_irqrestore(&uidhash_lock, flags); 333 spin_unlock_irqrestore(&uidhash_lock, flags);
337 334 INIT_DELAYED_WORK(&up->work, cleanup_user_struct);
338 INIT_WORK(&up->work, cleanup_user_struct); 335 schedule_delayed_work(&up->work, msecs_to_jiffies(1000));
339 schedule_work(&up->work);
340} 336}
341 337
342#else /* CONFIG_USER_SCHED && CONFIG_SYSFS */ 338#else /* CONFIG_USER_SCHED && CONFIG_SYSFS */
343 339
340static struct user_struct *uid_hash_find(uid_t uid, struct hlist_head *hashent)
341{
342 struct user_struct *user;
343 struct hlist_node *h;
344
345 hlist_for_each_entry(user, h, hashent, uidhash_node) {
346 if (user->uid == uid) {
347 atomic_inc(&user->__count);
348 return user;
349 }
350 }
351
352 return NULL;
353}
354
344int uids_sysfs_init(void) { return 0; } 355int uids_sysfs_init(void) { return 0; }
345static inline int uids_user_create(struct user_struct *up) { return 0; } 356static inline int uids_user_create(struct user_struct *up) { return 0; }
346static inline void uids_mutex_lock(void) { } 357static inline void uids_mutex_lock(void) { }