aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2009-04-07 06:05:21 -0400
committerIngo Molnar <mingo@elte.hu>2009-04-07 06:05:25 -0400
commit6c009ecef8cca28c7c09eb16d0802e37915a76e1 (patch)
tree11c773f780186fdb9fbc9c80a73fb7c8426b1fba /kernel
parent98c2aaf8be5baf7193be37fb28bce8e7327158bc (diff)
parentd508afb437daee7cf07da085b635c44a4ebf9b38 (diff)
Merge branch 'linus' into perfcounters/core
Merge reason: need the upstream facility added by: 7f1e2ca: hrtimer: fix rq->lock inversion (again) Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/exit.c3
-rw-r--r--kernel/hrtimer.c55
-rw-r--r--kernel/lockdep.c5
-rw-r--r--kernel/rcuclassic.c23
-rw-r--r--kernel/rcupreempt.c48
-rw-r--r--kernel/rcutree.c20
-rw-r--r--kernel/rcutree.h10
-rw-r--r--kernel/rcutree_trace.c2
-rw-r--r--kernel/sched.c14
-rw-r--r--kernel/softirq.c2
-rw-r--r--kernel/sysctl.c4
-rw-r--r--kernel/trace/kmemtrace.c319
-rw-r--r--kernel/trace/trace.h6
13 files changed, 379 insertions, 132 deletions
diff --git a/kernel/exit.c b/kernel/exit.c
index 7a14a2b504f5..fbb5d94c8bbc 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -840,8 +840,7 @@ static void exit_notify(struct task_struct *tsk, int group_dead)
840 */ 840 */
841 if (tsk->exit_signal != SIGCHLD && !task_detached(tsk) && 841 if (tsk->exit_signal != SIGCHLD && !task_detached(tsk) &&
842 (tsk->parent_exec_id != tsk->real_parent->self_exec_id || 842 (tsk->parent_exec_id != tsk->real_parent->self_exec_id ||
843 tsk->self_exec_id != tsk->parent_exec_id) && 843 tsk->self_exec_id != tsk->parent_exec_id))
844 !capable(CAP_KILL))
845 tsk->exit_signal = SIGCHLD; 844 tsk->exit_signal = SIGCHLD;
846 845
847 signal = tracehook_notify_death(tsk, &cookie, group_dead); 846 signal = tracehook_notify_death(tsk, &cookie, group_dead);
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index f394d2a42ca3..cb8a15c19583 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -651,14 +651,20 @@ static inline void hrtimer_init_timer_hres(struct hrtimer *timer)
651 * and expiry check is done in the hrtimer_interrupt or in the softirq. 651 * and expiry check is done in the hrtimer_interrupt or in the softirq.
652 */ 652 */
653static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer, 653static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer,
654 struct hrtimer_clock_base *base) 654 struct hrtimer_clock_base *base,
655 int wakeup)
655{ 656{
656 if (base->cpu_base->hres_active && hrtimer_reprogram(timer, base)) { 657 if (base->cpu_base->hres_active && hrtimer_reprogram(timer, base)) {
657 spin_unlock(&base->cpu_base->lock); 658 if (wakeup) {
658 raise_softirq_irqoff(HRTIMER_SOFTIRQ); 659 spin_unlock(&base->cpu_base->lock);
659 spin_lock(&base->cpu_base->lock); 660 raise_softirq_irqoff(HRTIMER_SOFTIRQ);
661 spin_lock(&base->cpu_base->lock);
662 } else
663 __raise_softirq_irqoff(HRTIMER_SOFTIRQ);
664
660 return 1; 665 return 1;
661 } 666 }
667
662 return 0; 668 return 0;
663} 669}
664 670
@@ -703,7 +709,8 @@ static inline int hrtimer_is_hres_enabled(void) { return 0; }
703static inline int hrtimer_switch_to_hres(void) { return 0; } 709static inline int hrtimer_switch_to_hres(void) { return 0; }
704static inline void hrtimer_force_reprogram(struct hrtimer_cpu_base *base) { } 710static inline void hrtimer_force_reprogram(struct hrtimer_cpu_base *base) { }
705static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer, 711static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer,
706 struct hrtimer_clock_base *base) 712 struct hrtimer_clock_base *base,
713 int wakeup)
707{ 714{
708 return 0; 715 return 0;
709} 716}
@@ -886,20 +893,9 @@ remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base)
886 return 0; 893 return 0;
887} 894}
888 895
889/** 896int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
890 * hrtimer_start_range_ns - (re)start an hrtimer on the current CPU 897 unsigned long delta_ns, const enum hrtimer_mode mode,
891 * @timer: the timer to be added 898 int wakeup)
892 * @tim: expiry time
893 * @delta_ns: "slack" range for the timer
894 * @mode: expiry mode: absolute (HRTIMER_ABS) or relative (HRTIMER_REL)
895 *
896 * Returns:
897 * 0 on success
898 * 1 when the timer was active
899 */
900int
901hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, unsigned long delta_ns,
902 const enum hrtimer_mode mode)
903{ 899{
904 struct hrtimer_clock_base *base, *new_base; 900 struct hrtimer_clock_base *base, *new_base;
905 unsigned long flags; 901 unsigned long flags;
@@ -940,12 +936,29 @@ hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, unsigned long delta_n
940 * XXX send_remote_softirq() ? 936 * XXX send_remote_softirq() ?
941 */ 937 */
942 if (leftmost && new_base->cpu_base == &__get_cpu_var(hrtimer_bases)) 938 if (leftmost && new_base->cpu_base == &__get_cpu_var(hrtimer_bases))
943 hrtimer_enqueue_reprogram(timer, new_base); 939 hrtimer_enqueue_reprogram(timer, new_base, wakeup);
944 940
945 unlock_hrtimer_base(timer, &flags); 941 unlock_hrtimer_base(timer, &flags);
946 942
947 return ret; 943 return ret;
948} 944}
945
946/**
947 * hrtimer_start_range_ns - (re)start an hrtimer on the current CPU
948 * @timer: the timer to be added
949 * @tim: expiry time
950 * @delta_ns: "slack" range for the timer
951 * @mode: expiry mode: absolute (HRTIMER_ABS) or relative (HRTIMER_REL)
952 *
953 * Returns:
954 * 0 on success
955 * 1 when the timer was active
956 */
957int hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
958 unsigned long delta_ns, const enum hrtimer_mode mode)
959{
960 return __hrtimer_start_range_ns(timer, tim, delta_ns, mode, 1);
961}
949EXPORT_SYMBOL_GPL(hrtimer_start_range_ns); 962EXPORT_SYMBOL_GPL(hrtimer_start_range_ns);
950 963
951/** 964/**
@@ -961,7 +974,7 @@ EXPORT_SYMBOL_GPL(hrtimer_start_range_ns);
961int 974int
962hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode) 975hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode)
963{ 976{
964 return hrtimer_start_range_ns(timer, tim, 0, mode); 977 return __hrtimer_start_range_ns(timer, tim, 0, mode, 1);
965} 978}
966EXPORT_SYMBOL_GPL(hrtimer_start); 979EXPORT_SYMBOL_GPL(hrtimer_start);
967 980
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 81b5f33970b8..b0f011866969 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -793,6 +793,7 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force)
793 793
794 printk("BUG: MAX_LOCKDEP_KEYS too low!\n"); 794 printk("BUG: MAX_LOCKDEP_KEYS too low!\n");
795 printk("turning off the locking correctness validator.\n"); 795 printk("turning off the locking correctness validator.\n");
796 dump_stack();
796 return NULL; 797 return NULL;
797 } 798 }
798 class = lock_classes + nr_lock_classes++; 799 class = lock_classes + nr_lock_classes++;
@@ -856,6 +857,7 @@ static struct lock_list *alloc_list_entry(void)
856 857
857 printk("BUG: MAX_LOCKDEP_ENTRIES too low!\n"); 858 printk("BUG: MAX_LOCKDEP_ENTRIES too low!\n");
858 printk("turning off the locking correctness validator.\n"); 859 printk("turning off the locking correctness validator.\n");
860 dump_stack();
859 return NULL; 861 return NULL;
860 } 862 }
861 return list_entries + nr_list_entries++; 863 return list_entries + nr_list_entries++;
@@ -1682,6 +1684,7 @@ cache_hit:
1682 1684
1683 printk("BUG: MAX_LOCKDEP_CHAINS too low!\n"); 1685 printk("BUG: MAX_LOCKDEP_CHAINS too low!\n");
1684 printk("turning off the locking correctness validator.\n"); 1686 printk("turning off the locking correctness validator.\n");
1687 dump_stack();
1685 return 0; 1688 return 0;
1686 } 1689 }
1687 chain = lock_chains + nr_lock_chains++; 1690 chain = lock_chains + nr_lock_chains++;
@@ -2541,6 +2544,7 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
2541 debug_locks_off(); 2544 debug_locks_off();
2542 printk("BUG: MAX_LOCKDEP_SUBCLASSES too low!\n"); 2545 printk("BUG: MAX_LOCKDEP_SUBCLASSES too low!\n");
2543 printk("turning off the locking correctness validator.\n"); 2546 printk("turning off the locking correctness validator.\n");
2547 dump_stack();
2544 return 0; 2548 return 0;
2545 } 2549 }
2546 2550
@@ -2637,6 +2641,7 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
2637 debug_locks_off(); 2641 debug_locks_off();
2638 printk("BUG: MAX_LOCK_DEPTH too low!\n"); 2642 printk("BUG: MAX_LOCK_DEPTH too low!\n");
2639 printk("turning off the locking correctness validator.\n"); 2643 printk("turning off the locking correctness validator.\n");
2644 dump_stack();
2640 return 0; 2645 return 0;
2641 } 2646 }
2642 2647
diff --git a/kernel/rcuclassic.c b/kernel/rcuclassic.c
index 654c640a6b9c..0f2b0b311304 100644
--- a/kernel/rcuclassic.c
+++ b/kernel/rcuclassic.c
@@ -65,6 +65,7 @@ static struct rcu_ctrlblk rcu_ctrlblk = {
65 .lock = __SPIN_LOCK_UNLOCKED(&rcu_ctrlblk.lock), 65 .lock = __SPIN_LOCK_UNLOCKED(&rcu_ctrlblk.lock),
66 .cpumask = CPU_BITS_NONE, 66 .cpumask = CPU_BITS_NONE,
67}; 67};
68
68static struct rcu_ctrlblk rcu_bh_ctrlblk = { 69static struct rcu_ctrlblk rcu_bh_ctrlblk = {
69 .cur = -300, 70 .cur = -300,
70 .completed = -300, 71 .completed = -300,
@@ -73,8 +74,26 @@ static struct rcu_ctrlblk rcu_bh_ctrlblk = {
73 .cpumask = CPU_BITS_NONE, 74 .cpumask = CPU_BITS_NONE,
74}; 75};
75 76
76DEFINE_PER_CPU(struct rcu_data, rcu_data) = { 0L }; 77static DEFINE_PER_CPU(struct rcu_data, rcu_data);
77DEFINE_PER_CPU(struct rcu_data, rcu_bh_data) = { 0L }; 78static DEFINE_PER_CPU(struct rcu_data, rcu_bh_data);
79
80/*
81 * Increment the quiescent state counter.
82 * The counter is a bit degenerated: We do not need to know
83 * how many quiescent states passed, just if there was at least
84 * one since the start of the grace period. Thus just a flag.
85 */
86void rcu_qsctr_inc(int cpu)
87{
88 struct rcu_data *rdp = &per_cpu(rcu_data, cpu);
89 rdp->passed_quiesc = 1;
90}
91
92void rcu_bh_qsctr_inc(int cpu)
93{
94 struct rcu_data *rdp = &per_cpu(rcu_bh_data, cpu);
95 rdp->passed_quiesc = 1;
96}
78 97
79static int blimit = 10; 98static int blimit = 10;
80static int qhimark = 10000; 99static int qhimark = 10000;
diff --git a/kernel/rcupreempt.c b/kernel/rcupreempt.c
index 5d59e850fb71..ce97a4df64d3 100644
--- a/kernel/rcupreempt.c
+++ b/kernel/rcupreempt.c
@@ -147,7 +147,51 @@ struct rcu_ctrlblk {
147 wait_queue_head_t sched_wq; /* Place for rcu_sched to sleep. */ 147 wait_queue_head_t sched_wq; /* Place for rcu_sched to sleep. */
148}; 148};
149 149
150struct rcu_dyntick_sched {
151 int dynticks;
152 int dynticks_snap;
153 int sched_qs;
154 int sched_qs_snap;
155 int sched_dynticks_snap;
156};
157
158static DEFINE_PER_CPU_SHARED_ALIGNED(struct rcu_dyntick_sched, rcu_dyntick_sched) = {
159 .dynticks = 1,
160};
161
162void rcu_qsctr_inc(int cpu)
163{
164 struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu);
165
166 rdssp->sched_qs++;
167}
168
169#ifdef CONFIG_NO_HZ
170
171void rcu_enter_nohz(void)
172{
173 static DEFINE_RATELIMIT_STATE(rs, 10 * HZ, 1);
174
175 smp_mb(); /* CPUs seeing ++ must see prior RCU read-side crit sects */
176 __get_cpu_var(rcu_dyntick_sched).dynticks++;
177 WARN_ON_RATELIMIT(__get_cpu_var(rcu_dyntick_sched).dynticks & 0x1, &rs);
178}
179
180void rcu_exit_nohz(void)
181{
182 static DEFINE_RATELIMIT_STATE(rs, 10 * HZ, 1);
183
184 __get_cpu_var(rcu_dyntick_sched).dynticks++;
185 smp_mb(); /* CPUs seeing ++ must see later RCU read-side crit sects */
186 WARN_ON_RATELIMIT(!(__get_cpu_var(rcu_dyntick_sched).dynticks & 0x1),
187 &rs);
188}
189
190#endif /* CONFIG_NO_HZ */
191
192
150static DEFINE_PER_CPU(struct rcu_data, rcu_data); 193static DEFINE_PER_CPU(struct rcu_data, rcu_data);
194
151static struct rcu_ctrlblk rcu_ctrlblk = { 195static struct rcu_ctrlblk rcu_ctrlblk = {
152 .fliplock = __SPIN_LOCK_UNLOCKED(rcu_ctrlblk.fliplock), 196 .fliplock = __SPIN_LOCK_UNLOCKED(rcu_ctrlblk.fliplock),
153 .completed = 0, 197 .completed = 0,
@@ -427,10 +471,6 @@ static void __rcu_advance_callbacks(struct rcu_data *rdp)
427 } 471 }
428} 472}
429 473
430DEFINE_PER_CPU_SHARED_ALIGNED(struct rcu_dyntick_sched, rcu_dyntick_sched) = {
431 .dynticks = 1,
432};
433
434#ifdef CONFIG_NO_HZ 474#ifdef CONFIG_NO_HZ
435static DEFINE_PER_CPU(int, rcu_update_flag); 475static DEFINE_PER_CPU(int, rcu_update_flag);
436 476
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 97ce31579ec0..7f3266922572 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -78,6 +78,26 @@ DEFINE_PER_CPU(struct rcu_data, rcu_data);
78struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh_state); 78struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh_state);
79DEFINE_PER_CPU(struct rcu_data, rcu_bh_data); 79DEFINE_PER_CPU(struct rcu_data, rcu_bh_data);
80 80
81/*
82 * Increment the quiescent state counter.
83 * The counter is a bit degenerated: We do not need to know
84 * how many quiescent states passed, just if there was at least
85 * one since the start of the grace period. Thus just a flag.
86 */
87void rcu_qsctr_inc(int cpu)
88{
89 struct rcu_data *rdp = &per_cpu(rcu_data, cpu);
90 rdp->passed_quiesc = 1;
91 rdp->passed_quiesc_completed = rdp->completed;
92}
93
94void rcu_bh_qsctr_inc(int cpu)
95{
96 struct rcu_data *rdp = &per_cpu(rcu_bh_data, cpu);
97 rdp->passed_quiesc = 1;
98 rdp->passed_quiesc_completed = rdp->completed;
99}
100
81#ifdef CONFIG_NO_HZ 101#ifdef CONFIG_NO_HZ
82DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = { 102DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = {
83 .dynticks_nesting = 1, 103 .dynticks_nesting = 1,
diff --git a/kernel/rcutree.h b/kernel/rcutree.h
new file mode 100644
index 000000000000..5e872bbf07f5
--- /dev/null
+++ b/kernel/rcutree.h
@@ -0,0 +1,10 @@
1
2/*
3 * RCU implementation internal declarations:
4 */
5extern struct rcu_state rcu_state;
6DECLARE_PER_CPU(struct rcu_data, rcu_data);
7
8extern struct rcu_state rcu_bh_state;
9DECLARE_PER_CPU(struct rcu_data, rcu_bh_data);
10
diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c
index d6db3e837826..4ee954f6a8d5 100644
--- a/kernel/rcutree_trace.c
+++ b/kernel/rcutree_trace.c
@@ -43,6 +43,8 @@
43#include <linux/debugfs.h> 43#include <linux/debugfs.h>
44#include <linux/seq_file.h> 44#include <linux/seq_file.h>
45 45
46#include "rcutree.h"
47
46static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp) 48static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp)
47{ 49{
48 if (!rdp->beenonline) 50 if (!rdp->beenonline)
diff --git a/kernel/sched.c b/kernel/sched.c
index 0de2f814fb18..b66a08c2480e 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -231,13 +231,20 @@ static void start_rt_bandwidth(struct rt_bandwidth *rt_b)
231 231
232 spin_lock(&rt_b->rt_runtime_lock); 232 spin_lock(&rt_b->rt_runtime_lock);
233 for (;;) { 233 for (;;) {
234 unsigned long delta;
235 ktime_t soft, hard;
236
234 if (hrtimer_active(&rt_b->rt_period_timer)) 237 if (hrtimer_active(&rt_b->rt_period_timer))
235 break; 238 break;
236 239
237 now = hrtimer_cb_get_time(&rt_b->rt_period_timer); 240 now = hrtimer_cb_get_time(&rt_b->rt_period_timer);
238 hrtimer_forward(&rt_b->rt_period_timer, now, rt_b->rt_period); 241 hrtimer_forward(&rt_b->rt_period_timer, now, rt_b->rt_period);
239 hrtimer_start_expires(&rt_b->rt_period_timer, 242
240 HRTIMER_MODE_ABS); 243 soft = hrtimer_get_softexpires(&rt_b->rt_period_timer);
244 hard = hrtimer_get_expires(&rt_b->rt_period_timer);
245 delta = ktime_to_ns(ktime_sub(hard, soft));
246 __hrtimer_start_range_ns(&rt_b->rt_period_timer, soft, delta,
247 HRTIMER_MODE_ABS, 0);
241 } 248 }
242 spin_unlock(&rt_b->rt_runtime_lock); 249 spin_unlock(&rt_b->rt_runtime_lock);
243} 250}
@@ -1147,7 +1154,8 @@ static __init void init_hrtick(void)
1147 */ 1154 */
1148static void hrtick_start(struct rq *rq, u64 delay) 1155static void hrtick_start(struct rq *rq, u64 delay)
1149{ 1156{
1150 hrtimer_start(&rq->hrtick_timer, ns_to_ktime(delay), HRTIMER_MODE_REL); 1157 __hrtimer_start_range_ns(&rq->hrtick_timer, ns_to_ktime(delay), 0,
1158 HRTIMER_MODE_REL, 0);
1151} 1159}
1152 1160
1153static inline void init_hrtick(void) 1161static inline void init_hrtick(void)
diff --git a/kernel/softirq.c b/kernel/softirq.c
index d105a82543d0..2fecefacdc5b 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -65,7 +65,7 @@ char *softirq_to_name[NR_SOFTIRQS] = {
65 * to the pending events, so lets the scheduler to balance 65 * to the pending events, so lets the scheduler to balance
66 * the softirq load for us. 66 * the softirq load for us.
67 */ 67 */
68static inline void wakeup_softirqd(void) 68void wakeup_softirqd(void)
69{ 69{
70 /* Interrupts are disabled: no need to stop preemption */ 70 /* Interrupts are disabled: no need to stop preemption */
71 struct task_struct *tsk = __get_cpu_var(ksoftirqd); 71 struct task_struct *tsk = __get_cpu_var(ksoftirqd);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 82350f8f04f6..b125e3387568 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -97,8 +97,8 @@ static int neg_one = -1;
97#endif 97#endif
98 98
99static int zero; 99static int zero;
100static int one = 1; 100static int __maybe_unused one = 1;
101static int two = 2; 101static int __maybe_unused two = 2;
102static unsigned long one_ul = 1; 102static unsigned long one_ul = 1;
103static int one_hundred = 100; 103static int one_hundred = 100;
104 104
diff --git a/kernel/trace/kmemtrace.c b/kernel/trace/kmemtrace.c
index ae201b3eda89..5011f4d91e37 100644
--- a/kernel/trace/kmemtrace.c
+++ b/kernel/trace/kmemtrace.c
@@ -6,14 +6,16 @@
6 * Copyright (C) 2008 Frederic Weisbecker <fweisbec@gmail.com> 6 * Copyright (C) 2008 Frederic Weisbecker <fweisbec@gmail.com>
7 */ 7 */
8 8
9#include <linux/dcache.h> 9#include <linux/tracepoint.h>
10#include <linux/seq_file.h>
10#include <linux/debugfs.h> 11#include <linux/debugfs.h>
12#include <linux/dcache.h>
11#include <linux/fs.h> 13#include <linux/fs.h>
12#include <linux/seq_file.h> 14
13#include <trace/kmemtrace.h> 15#include <trace/kmemtrace.h>
14 16
15#include "trace.h"
16#include "trace_output.h" 17#include "trace_output.h"
18#include "trace.h"
17 19
18/* Select an alternative, minimalistic output than the original one */ 20/* Select an alternative, minimalistic output than the original one */
19#define TRACE_KMEM_OPT_MINIMAL 0x1 21#define TRACE_KMEM_OPT_MINIMAL 0x1
@@ -25,14 +27,156 @@ static struct tracer_opt kmem_opts[] = {
25}; 27};
26 28
27static struct tracer_flags kmem_tracer_flags = { 29static struct tracer_flags kmem_tracer_flags = {
28 .val = 0, 30 .val = 0,
29 .opts = kmem_opts 31 .opts = kmem_opts
30}; 32};
31 33
32
33static bool kmem_tracing_enabled __read_mostly;
34static struct trace_array *kmemtrace_array; 34static struct trace_array *kmemtrace_array;
35 35
36/* Trace allocations */
37static inline void kmemtrace_alloc(enum kmemtrace_type_id type_id,
38 unsigned long call_site,
39 const void *ptr,
40 size_t bytes_req,
41 size_t bytes_alloc,
42 gfp_t gfp_flags,
43 int node)
44{
45 struct trace_array *tr = kmemtrace_array;
46 struct kmemtrace_alloc_entry *entry;
47 struct ring_buffer_event *event;
48
49 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry));
50 if (!event)
51 return;
52
53 entry = ring_buffer_event_data(event);
54 tracing_generic_entry_update(&entry->ent, 0, 0);
55
56 entry->ent.type = TRACE_KMEM_ALLOC;
57 entry->type_id = type_id;
58 entry->call_site = call_site;
59 entry->ptr = ptr;
60 entry->bytes_req = bytes_req;
61 entry->bytes_alloc = bytes_alloc;
62 entry->gfp_flags = gfp_flags;
63 entry->node = node;
64
65 ring_buffer_unlock_commit(tr->buffer, event);
66
67 trace_wake_up();
68}
69
70static inline void kmemtrace_free(enum kmemtrace_type_id type_id,
71 unsigned long call_site,
72 const void *ptr)
73{
74 struct trace_array *tr = kmemtrace_array;
75 struct kmemtrace_free_entry *entry;
76 struct ring_buffer_event *event;
77
78 event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry));
79 if (!event)
80 return;
81 entry = ring_buffer_event_data(event);
82 tracing_generic_entry_update(&entry->ent, 0, 0);
83
84 entry->ent.type = TRACE_KMEM_FREE;
85 entry->type_id = type_id;
86 entry->call_site = call_site;
87 entry->ptr = ptr;
88
89 ring_buffer_unlock_commit(tr->buffer, event);
90
91 trace_wake_up();
92}
93
94static void kmemtrace_kmalloc(unsigned long call_site,
95 const void *ptr,
96 size_t bytes_req,
97 size_t bytes_alloc,
98 gfp_t gfp_flags)
99{
100 kmemtrace_alloc(KMEMTRACE_TYPE_KMALLOC, call_site, ptr,
101 bytes_req, bytes_alloc, gfp_flags, -1);
102}
103
104static void kmemtrace_kmem_cache_alloc(unsigned long call_site,
105 const void *ptr,
106 size_t bytes_req,
107 size_t bytes_alloc,
108 gfp_t gfp_flags)
109{
110 kmemtrace_alloc(KMEMTRACE_TYPE_CACHE, call_site, ptr,
111 bytes_req, bytes_alloc, gfp_flags, -1);
112}
113
114static void kmemtrace_kmalloc_node(unsigned long call_site,
115 const void *ptr,
116 size_t bytes_req,
117 size_t bytes_alloc,
118 gfp_t gfp_flags,
119 int node)
120{
121 kmemtrace_alloc(KMEMTRACE_TYPE_KMALLOC, call_site, ptr,
122 bytes_req, bytes_alloc, gfp_flags, node);
123}
124
125static void kmemtrace_kmem_cache_alloc_node(unsigned long call_site,
126 const void *ptr,
127 size_t bytes_req,
128 size_t bytes_alloc,
129 gfp_t gfp_flags,
130 int node)
131{
132 kmemtrace_alloc(KMEMTRACE_TYPE_CACHE, call_site, ptr,
133 bytes_req, bytes_alloc, gfp_flags, node);
134}
135
136static void kmemtrace_kfree(unsigned long call_site, const void *ptr)
137{
138 kmemtrace_free(KMEMTRACE_TYPE_KMALLOC, call_site, ptr);
139}
140
141static void kmemtrace_kmem_cache_free(unsigned long call_site, const void *ptr)
142{
143 kmemtrace_free(KMEMTRACE_TYPE_CACHE, call_site, ptr);
144}
145
146static int kmemtrace_start_probes(void)
147{
148 int err;
149
150 err = register_trace_kmalloc(kmemtrace_kmalloc);
151 if (err)
152 return err;
153 err = register_trace_kmem_cache_alloc(kmemtrace_kmem_cache_alloc);
154 if (err)
155 return err;
156 err = register_trace_kmalloc_node(kmemtrace_kmalloc_node);
157 if (err)
158 return err;
159 err = register_trace_kmem_cache_alloc_node(kmemtrace_kmem_cache_alloc_node);
160 if (err)
161 return err;
162 err = register_trace_kfree(kmemtrace_kfree);
163 if (err)
164 return err;
165 err = register_trace_kmem_cache_free(kmemtrace_kmem_cache_free);
166
167 return err;
168}
169
170static void kmemtrace_stop_probes(void)
171{
172 unregister_trace_kmalloc(kmemtrace_kmalloc);
173 unregister_trace_kmem_cache_alloc(kmemtrace_kmem_cache_alloc);
174 unregister_trace_kmalloc_node(kmemtrace_kmalloc_node);
175 unregister_trace_kmem_cache_alloc_node(kmemtrace_kmem_cache_alloc_node);
176 unregister_trace_kfree(kmemtrace_kfree);
177 unregister_trace_kmem_cache_free(kmemtrace_kmem_cache_free);
178}
179
36static int kmem_trace_init(struct trace_array *tr) 180static int kmem_trace_init(struct trace_array *tr)
37{ 181{
38 int cpu; 182 int cpu;
@@ -41,14 +185,14 @@ static int kmem_trace_init(struct trace_array *tr)
41 for_each_cpu_mask(cpu, cpu_possible_map) 185 for_each_cpu_mask(cpu, cpu_possible_map)
42 tracing_reset(tr, cpu); 186 tracing_reset(tr, cpu);
43 187
44 kmem_tracing_enabled = true; 188 kmemtrace_start_probes();
45 189
46 return 0; 190 return 0;
47} 191}
48 192
49static void kmem_trace_reset(struct trace_array *tr) 193static void kmem_trace_reset(struct trace_array *tr)
50{ 194{
51 kmem_tracing_enabled = false; 195 kmemtrace_stop_probes();
52} 196}
53 197
54static void kmemtrace_headers(struct seq_file *s) 198static void kmemtrace_headers(struct seq_file *s)
@@ -66,47 +210,84 @@ static void kmemtrace_headers(struct seq_file *s)
66} 210}
67 211
68/* 212/*
69 * The two following functions give the original output from kmemtrace, 213 * The following functions give the original output from kmemtrace,
70 * or something close to....perhaps they need some missing things 214 * plus the origin CPU, since reordering occurs in-kernel now.
71 */ 215 */
216
217#define KMEMTRACE_USER_ALLOC 0
218#define KMEMTRACE_USER_FREE 1
219
220struct kmemtrace_user_event {
221 u8 event_id;
222 u8 type_id;
223 u16 event_size;
224 u32 cpu;
225 u64 timestamp;
226 unsigned long call_site;
227 unsigned long ptr;
228};
229
230struct kmemtrace_user_event_alloc {
231 size_t bytes_req;
232 size_t bytes_alloc;
233 unsigned gfp_flags;
234 int node;
235};
236
72static enum print_line_t 237static enum print_line_t
73kmemtrace_print_alloc_original(struct trace_iterator *iter, 238kmemtrace_print_alloc_user(struct trace_iterator *iter,
74 struct kmemtrace_alloc_entry *entry) 239 struct kmemtrace_alloc_entry *entry)
75{ 240{
241 struct kmemtrace_user_event_alloc *ev_alloc;
76 struct trace_seq *s = &iter->seq; 242 struct trace_seq *s = &iter->seq;
77 int ret; 243 struct kmemtrace_user_event *ev;
244
245 ev = trace_seq_reserve(s, sizeof(*ev));
246 if (!ev)
247 return TRACE_TYPE_PARTIAL_LINE;
78 248
79 /* Taken from the old linux/kmemtrace.h */ 249 ev->event_id = KMEMTRACE_USER_ALLOC;
80 ret = trace_seq_printf(s, "type_id %d call_site %lu ptr %lu " 250 ev->type_id = entry->type_id;
81 "bytes_req %lu bytes_alloc %lu gfp_flags %lu node %d\n", 251 ev->event_size = sizeof(*ev) + sizeof(*ev_alloc);
82 entry->type_id, entry->call_site, (unsigned long) entry->ptr, 252 ev->cpu = iter->cpu;
83 (unsigned long) entry->bytes_req, (unsigned long) entry->bytes_alloc, 253 ev->timestamp = iter->ts;
84 (unsigned long) entry->gfp_flags, entry->node); 254 ev->call_site = entry->call_site;
255 ev->ptr = (unsigned long)entry->ptr;
85 256
86 if (!ret) 257 ev_alloc = trace_seq_reserve(s, sizeof(*ev_alloc));
258 if (!ev_alloc)
87 return TRACE_TYPE_PARTIAL_LINE; 259 return TRACE_TYPE_PARTIAL_LINE;
88 260
261 ev_alloc->bytes_req = entry->bytes_req;
262 ev_alloc->bytes_alloc = entry->bytes_alloc;
263 ev_alloc->gfp_flags = entry->gfp_flags;
264 ev_alloc->node = entry->node;
265
89 return TRACE_TYPE_HANDLED; 266 return TRACE_TYPE_HANDLED;
90} 267}
91 268
92static enum print_line_t 269static enum print_line_t
93kmemtrace_print_free_original(struct trace_iterator *iter, 270kmemtrace_print_free_user(struct trace_iterator *iter,
94 struct kmemtrace_free_entry *entry) 271 struct kmemtrace_free_entry *entry)
95{ 272{
96 struct trace_seq *s = &iter->seq; 273 struct trace_seq *s = &iter->seq;
97 int ret; 274 struct kmemtrace_user_event *ev;
98 275
99 /* Taken from the old linux/kmemtrace.h */ 276 ev = trace_seq_reserve(s, sizeof(*ev));
100 ret = trace_seq_printf(s, "type_id %d call_site %lu ptr %lu\n", 277 if (!ev)
101 entry->type_id, entry->call_site, (unsigned long) entry->ptr);
102
103 if (!ret)
104 return TRACE_TYPE_PARTIAL_LINE; 278 return TRACE_TYPE_PARTIAL_LINE;
105 279
280 ev->event_id = KMEMTRACE_USER_FREE;
281 ev->type_id = entry->type_id;
282 ev->event_size = sizeof(*ev);
283 ev->cpu = iter->cpu;
284 ev->timestamp = iter->ts;
285 ev->call_site = entry->call_site;
286 ev->ptr = (unsigned long)entry->ptr;
287
106 return TRACE_TYPE_HANDLED; 288 return TRACE_TYPE_HANDLED;
107} 289}
108 290
109
110/* The two other following provide a more minimalistic output */ 291/* The two other following provide a more minimalistic output */
111static enum print_line_t 292static enum print_line_t
112kmemtrace_print_alloc_compress(struct trace_iterator *iter, 293kmemtrace_print_alloc_compress(struct trace_iterator *iter,
@@ -178,7 +359,7 @@ kmemtrace_print_alloc_compress(struct trace_iterator *iter,
178 359
179static enum print_line_t 360static enum print_line_t
180kmemtrace_print_free_compress(struct trace_iterator *iter, 361kmemtrace_print_free_compress(struct trace_iterator *iter,
181 struct kmemtrace_free_entry *entry) 362 struct kmemtrace_free_entry *entry)
182{ 363{
183 struct trace_seq *s = &iter->seq; 364 struct trace_seq *s = &iter->seq;
184 int ret; 365 int ret;
@@ -239,20 +420,22 @@ static enum print_line_t kmemtrace_print_line(struct trace_iterator *iter)
239 switch (entry->type) { 420 switch (entry->type) {
240 case TRACE_KMEM_ALLOC: { 421 case TRACE_KMEM_ALLOC: {
241 struct kmemtrace_alloc_entry *field; 422 struct kmemtrace_alloc_entry *field;
423
242 trace_assign_type(field, entry); 424 trace_assign_type(field, entry);
243 if (kmem_tracer_flags.val & TRACE_KMEM_OPT_MINIMAL) 425 if (kmem_tracer_flags.val & TRACE_KMEM_OPT_MINIMAL)
244 return kmemtrace_print_alloc_compress(iter, field); 426 return kmemtrace_print_alloc_compress(iter, field);
245 else 427 else
246 return kmemtrace_print_alloc_original(iter, field); 428 return kmemtrace_print_alloc_user(iter, field);
247 } 429 }
248 430
249 case TRACE_KMEM_FREE: { 431 case TRACE_KMEM_FREE: {
250 struct kmemtrace_free_entry *field; 432 struct kmemtrace_free_entry *field;
433
251 trace_assign_type(field, entry); 434 trace_assign_type(field, entry);
252 if (kmem_tracer_flags.val & TRACE_KMEM_OPT_MINIMAL) 435 if (kmem_tracer_flags.val & TRACE_KMEM_OPT_MINIMAL)
253 return kmemtrace_print_free_compress(iter, field); 436 return kmemtrace_print_free_compress(iter, field);
254 else 437 else
255 return kmemtrace_print_free_original(iter, field); 438 return kmemtrace_print_free_user(iter, field);
256 } 439 }
257 440
258 default: 441 default:
@@ -260,70 +443,13 @@ static enum print_line_t kmemtrace_print_line(struct trace_iterator *iter)
260 } 443 }
261} 444}
262 445
263/* Trace allocations */
264void kmemtrace_mark_alloc_node(enum kmemtrace_type_id type_id,
265 unsigned long call_site,
266 const void *ptr,
267 size_t bytes_req,
268 size_t bytes_alloc,
269 gfp_t gfp_flags,
270 int node)
271{
272 struct ring_buffer_event *event;
273 struct kmemtrace_alloc_entry *entry;
274 struct trace_array *tr = kmemtrace_array;
275
276 if (!kmem_tracing_enabled)
277 return;
278
279 event = trace_buffer_lock_reserve(tr, TRACE_KMEM_ALLOC,
280 sizeof(*entry), 0, 0);
281 if (!event)
282 return;
283 entry = ring_buffer_event_data(event);
284
285 entry->call_site = call_site;
286 entry->ptr = ptr;
287 entry->bytes_req = bytes_req;
288 entry->bytes_alloc = bytes_alloc;
289 entry->gfp_flags = gfp_flags;
290 entry->node = node;
291
292 trace_buffer_unlock_commit(tr, event, 0, 0);
293}
294EXPORT_SYMBOL(kmemtrace_mark_alloc_node);
295
296void kmemtrace_mark_free(enum kmemtrace_type_id type_id,
297 unsigned long call_site,
298 const void *ptr)
299{
300 struct ring_buffer_event *event;
301 struct kmemtrace_free_entry *entry;
302 struct trace_array *tr = kmemtrace_array;
303
304 if (!kmem_tracing_enabled)
305 return;
306
307 event = trace_buffer_lock_reserve(tr, TRACE_KMEM_FREE,
308 sizeof(*entry), 0, 0);
309 if (!event)
310 return;
311 entry = ring_buffer_event_data(event);
312 entry->type_id = type_id;
313 entry->call_site = call_site;
314 entry->ptr = ptr;
315
316 trace_buffer_unlock_commit(tr, event, 0, 0);
317}
318EXPORT_SYMBOL(kmemtrace_mark_free);
319
320static struct tracer kmem_tracer __read_mostly = { 446static struct tracer kmem_tracer __read_mostly = {
321 .name = "kmemtrace", 447 .name = "kmemtrace",
322 .init = kmem_trace_init, 448 .init = kmem_trace_init,
323 .reset = kmem_trace_reset, 449 .reset = kmem_trace_reset,
324 .print_line = kmemtrace_print_line, 450 .print_line = kmemtrace_print_line,
325 .print_header = kmemtrace_headers, 451 .print_header = kmemtrace_headers,
326 .flags = &kmem_tracer_flags 452 .flags = &kmem_tracer_flags
327}; 453};
328 454
329void kmemtrace_init(void) 455void kmemtrace_init(void)
@@ -335,5 +461,4 @@ static int __init init_kmem_tracer(void)
335{ 461{
336 return register_tracer(&kmem_tracer); 462 return register_tracer(&kmem_tracer);
337} 463}
338
339device_initcall(init_kmem_tracer); 464device_initcall(init_kmem_tracer);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index cb0ce3fc36d3..cbc168f1e43d 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -182,6 +182,12 @@ struct trace_power {
182 struct power_trace state_data; 182 struct power_trace state_data;
183}; 183};
184 184
185enum kmemtrace_type_id {
186 KMEMTRACE_TYPE_KMALLOC = 0, /* kmalloc() or kfree(). */
187 KMEMTRACE_TYPE_CACHE, /* kmem_cache_*(). */
188 KMEMTRACE_TYPE_PAGES, /* __get_free_pages() and friends. */
189};
190
185struct kmemtrace_alloc_entry { 191struct kmemtrace_alloc_entry {
186 struct trace_entry ent; 192 struct trace_entry ent;
187 enum kmemtrace_type_id type_id; 193 enum kmemtrace_type_id type_id;