aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2008-10-30 19:38:21 -0400
committerIngo Molnar <mingo@elte.hu>2008-10-30 19:38:21 -0400
commite1e302d8a9ab06ba8d7d5ec503d8996e6cf0eca4 (patch)
tree6a6c805fdfd6a2f6433956cbee348b2c3a277784 /kernel
parent944ac4259e39801c843a915c3da8194ac9af0440 (diff)
parent7f82f000ed030d1108b4de47d9e2d556092980c6 (diff)
Merge branch 'linus' into tracing/ftrace
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Makefile4
-rw-r--r--kernel/cgroup_freezer.c51
-rw-r--r--kernel/freezer.c20
-rw-r--r--kernel/irq/proc.c2
-rw-r--r--kernel/lockdep.c17
-rw-r--r--kernel/printk.c39
-rw-r--r--kernel/profile.c2
-rw-r--r--kernel/resource.c2
-rw-r--r--kernel/sched.c3
-rw-r--r--kernel/sched_fair.c169
-rw-r--r--kernel/sched_idletask.c5
-rw-r--r--kernel/sched_rt.c5
-rw-r--r--kernel/signal.c3
-rw-r--r--kernel/sysctl.c2
-rw-r--r--kernel/trace/Kconfig29
-rw-r--r--kernel/trace/Makefile6
-rw-r--r--kernel/trace/ftrace.c608
-rw-r--r--kernel/trace/ring_buffer.c6
-rw-r--r--kernel/trace/trace.c15
-rw-r--r--kernel/trace/trace.h2
-rw-r--r--kernel/trace/trace_functions.c2
-rw-r--r--kernel/trace/trace_irqsoff.c4
-rw-r--r--kernel/trace/trace_sched_wakeup.c4
-rw-r--r--kernel/trace/trace_selftest.c18
-rw-r--r--kernel/trace/trace_stack.c4
-rw-r--r--kernel/tracepoint.c8
26 files changed, 321 insertions, 709 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index 305f11dbef2..9a3ec66a9d8 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -13,7 +13,7 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o \
13 13
14CFLAGS_REMOVE_sched.o = -mno-spe 14CFLAGS_REMOVE_sched.o = -mno-spe
15 15
16ifdef CONFIG_FTRACE 16ifdef CONFIG_FUNCTION_TRACER
17# Do not trace debug files and internal ftrace files 17# Do not trace debug files and internal ftrace files
18CFLAGS_REMOVE_lockdep.o = -pg 18CFLAGS_REMOVE_lockdep.o = -pg
19CFLAGS_REMOVE_lockdep_proc.o = -pg 19CFLAGS_REMOVE_lockdep_proc.o = -pg
@@ -88,7 +88,7 @@ obj-$(CONFIG_MARKERS) += marker.o
88obj-$(CONFIG_TRACEPOINTS) += tracepoint.o 88obj-$(CONFIG_TRACEPOINTS) += tracepoint.o
89obj-$(CONFIG_LATENCYTOP) += latencytop.o 89obj-$(CONFIG_LATENCYTOP) += latencytop.o
90obj-$(CONFIG_HAVE_GENERIC_DMA_COHERENT) += dma-coherent.o 90obj-$(CONFIG_HAVE_GENERIC_DMA_COHERENT) += dma-coherent.o
91obj-$(CONFIG_FTRACE) += trace/ 91obj-$(CONFIG_FUNCTION_TRACER) += trace/
92obj-$(CONFIG_TRACING) += trace/ 92obj-$(CONFIG_TRACING) += trace/
93obj-$(CONFIG_SMP) += sched_cpupri.o 93obj-$(CONFIG_SMP) += sched_cpupri.o
94 94
diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c
index e9505695449..7fa476f01d0 100644
--- a/kernel/cgroup_freezer.c
+++ b/kernel/cgroup_freezer.c
@@ -162,9 +162,13 @@ static int freezer_can_attach(struct cgroup_subsys *ss,
162 struct task_struct *task) 162 struct task_struct *task)
163{ 163{
164 struct freezer *freezer; 164 struct freezer *freezer;
165 int retval;
166 165
167 /* Anything frozen can't move or be moved to/from */ 166 /*
167 * Anything frozen can't move or be moved to/from.
168 *
169 * Since orig_freezer->state == FROZEN means that @task has been
170 * frozen, so it's sufficient to check the latter condition.
171 */
168 172
169 if (is_task_frozen_enough(task)) 173 if (is_task_frozen_enough(task))
170 return -EBUSY; 174 return -EBUSY;
@@ -173,13 +177,7 @@ static int freezer_can_attach(struct cgroup_subsys *ss,
173 if (freezer->state == CGROUP_FROZEN) 177 if (freezer->state == CGROUP_FROZEN)
174 return -EBUSY; 178 return -EBUSY;
175 179
176 retval = 0; 180 return 0;
177 task_lock(task);
178 freezer = task_freezer(task);
179 if (freezer->state == CGROUP_FROZEN)
180 retval = -EBUSY;
181 task_unlock(task);
182 return retval;
183} 181}
184 182
185static void freezer_fork(struct cgroup_subsys *ss, struct task_struct *task) 183static void freezer_fork(struct cgroup_subsys *ss, struct task_struct *task)
@@ -190,8 +188,9 @@ static void freezer_fork(struct cgroup_subsys *ss, struct task_struct *task)
190 freezer = task_freezer(task); 188 freezer = task_freezer(task);
191 task_unlock(task); 189 task_unlock(task);
192 190
193 BUG_ON(freezer->state == CGROUP_FROZEN);
194 spin_lock_irq(&freezer->lock); 191 spin_lock_irq(&freezer->lock);
192 BUG_ON(freezer->state == CGROUP_FROZEN);
193
195 /* Locking avoids race with FREEZING -> THAWED transitions. */ 194 /* Locking avoids race with FREEZING -> THAWED transitions. */
196 if (freezer->state == CGROUP_FREEZING) 195 if (freezer->state == CGROUP_FREEZING)
197 freeze_task(task, true); 196 freeze_task(task, true);
@@ -276,25 +275,18 @@ static int try_to_freeze_cgroup(struct cgroup *cgroup, struct freezer *freezer)
276 return num_cant_freeze_now ? -EBUSY : 0; 275 return num_cant_freeze_now ? -EBUSY : 0;
277} 276}
278 277
279static int unfreeze_cgroup(struct cgroup *cgroup, struct freezer *freezer) 278static void unfreeze_cgroup(struct cgroup *cgroup, struct freezer *freezer)
280{ 279{
281 struct cgroup_iter it; 280 struct cgroup_iter it;
282 struct task_struct *task; 281 struct task_struct *task;
283 282
284 cgroup_iter_start(cgroup, &it); 283 cgroup_iter_start(cgroup, &it);
285 while ((task = cgroup_iter_next(cgroup, &it))) { 284 while ((task = cgroup_iter_next(cgroup, &it))) {
286 int do_wake; 285 thaw_process(task);
287
288 task_lock(task);
289 do_wake = __thaw_process(task);
290 task_unlock(task);
291 if (do_wake)
292 wake_up_process(task);
293 } 286 }
294 cgroup_iter_end(cgroup, &it); 287 cgroup_iter_end(cgroup, &it);
295 freezer->state = CGROUP_THAWED;
296 288
297 return 0; 289 freezer->state = CGROUP_THAWED;
298} 290}
299 291
300static int freezer_change_state(struct cgroup *cgroup, 292static int freezer_change_state(struct cgroup *cgroup,
@@ -304,27 +296,22 @@ static int freezer_change_state(struct cgroup *cgroup,
304 int retval = 0; 296 int retval = 0;
305 297
306 freezer = cgroup_freezer(cgroup); 298 freezer = cgroup_freezer(cgroup);
299
307 spin_lock_irq(&freezer->lock); 300 spin_lock_irq(&freezer->lock);
301
308 update_freezer_state(cgroup, freezer); 302 update_freezer_state(cgroup, freezer);
309 if (goal_state == freezer->state) 303 if (goal_state == freezer->state)
310 goto out; 304 goto out;
311 switch (freezer->state) { 305
306 switch (goal_state) {
312 case CGROUP_THAWED: 307 case CGROUP_THAWED:
313 retval = try_to_freeze_cgroup(cgroup, freezer); 308 unfreeze_cgroup(cgroup, freezer);
314 break; 309 break;
315 case CGROUP_FREEZING:
316 if (goal_state == CGROUP_FROZEN) {
317 /* Userspace is retrying after
318 * "/bin/echo FROZEN > freezer.state" returned -EBUSY */
319 retval = try_to_freeze_cgroup(cgroup, freezer);
320 break;
321 }
322 /* state == FREEZING and goal_state == THAWED, so unfreeze */
323 case CGROUP_FROZEN: 310 case CGROUP_FROZEN:
324 retval = unfreeze_cgroup(cgroup, freezer); 311 retval = try_to_freeze_cgroup(cgroup, freezer);
325 break; 312 break;
326 default: 313 default:
327 break; 314 BUG();
328 } 315 }
329out: 316out:
330 spin_unlock_irq(&freezer->lock); 317 spin_unlock_irq(&freezer->lock);
diff --git a/kernel/freezer.c b/kernel/freezer.c
index ba6248b323e..2f4936cf708 100644
--- a/kernel/freezer.c
+++ b/kernel/freezer.c
@@ -121,16 +121,7 @@ void cancel_freezing(struct task_struct *p)
121 } 121 }
122} 122}
123 123
124/* 124static int __thaw_process(struct task_struct *p)
125 * Wake up a frozen process
126 *
127 * task_lock() is needed to prevent the race with refrigerator() which may
128 * occur if the freezing of tasks fails. Namely, without the lock, if the
129 * freezing of tasks failed, thaw_tasks() might have run before a task in
130 * refrigerator() could call frozen_process(), in which case the task would be
131 * frozen and no one would thaw it.
132 */
133int __thaw_process(struct task_struct *p)
134{ 125{
135 if (frozen(p)) { 126 if (frozen(p)) {
136 p->flags &= ~PF_FROZEN; 127 p->flags &= ~PF_FROZEN;
@@ -140,6 +131,15 @@ int __thaw_process(struct task_struct *p)
140 return 0; 131 return 0;
141} 132}
142 133
134/*
135 * Wake up a frozen process
136 *
137 * task_lock() is needed to prevent the race with refrigerator() which may
138 * occur if the freezing of tasks fails. Namely, without the lock, if the
139 * freezing of tasks failed, thaw_tasks() might have run before a task in
140 * refrigerator() could call frozen_process(), in which case the task would be
141 * frozen and no one would thaw it.
142 */
143int thaw_process(struct task_struct *p) 143int thaw_process(struct task_struct *p)
144{ 144{
145 task_lock(p); 145 task_lock(p);
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index fac014a81b2..4d161c70ba5 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -220,7 +220,7 @@ void unregister_handler_proc(unsigned int irq, struct irqaction *action)
220 } 220 }
221} 221}
222 222
223void register_default_affinity_proc(void) 223static void register_default_affinity_proc(void)
224{ 224{
225#ifdef CONFIG_SMP 225#ifdef CONFIG_SMP
226 proc_create("irq/default_smp_affinity", 0600, NULL, 226 proc_create("irq/default_smp_affinity", 0600, NULL,
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index dbda475b13b..06e157119d2 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -2169,12 +2169,11 @@ void early_boot_irqs_on(void)
2169/* 2169/*
2170 * Hardirqs will be enabled: 2170 * Hardirqs will be enabled:
2171 */ 2171 */
2172void trace_hardirqs_on_caller(unsigned long a0) 2172void trace_hardirqs_on_caller(unsigned long ip)
2173{ 2173{
2174 struct task_struct *curr = current; 2174 struct task_struct *curr = current;
2175 unsigned long ip;
2176 2175
2177 time_hardirqs_on(CALLER_ADDR0, a0); 2176 time_hardirqs_on(CALLER_ADDR0, ip);
2178 2177
2179 if (unlikely(!debug_locks || current->lockdep_recursion)) 2178 if (unlikely(!debug_locks || current->lockdep_recursion))
2180 return; 2179 return;
@@ -2188,7 +2187,6 @@ void trace_hardirqs_on_caller(unsigned long a0)
2188 } 2187 }
2189 /* we'll do an OFF -> ON transition: */ 2188 /* we'll do an OFF -> ON transition: */
2190 curr->hardirqs_enabled = 1; 2189 curr->hardirqs_enabled = 1;
2191 ip = (unsigned long) __builtin_return_address(0);
2192 2190
2193 if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) 2191 if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
2194 return; 2192 return;
@@ -2224,11 +2222,11 @@ EXPORT_SYMBOL(trace_hardirqs_on);
2224/* 2222/*
2225 * Hardirqs were disabled: 2223 * Hardirqs were disabled:
2226 */ 2224 */
2227void trace_hardirqs_off_caller(unsigned long a0) 2225void trace_hardirqs_off_caller(unsigned long ip)
2228{ 2226{
2229 struct task_struct *curr = current; 2227 struct task_struct *curr = current;
2230 2228
2231 time_hardirqs_off(CALLER_ADDR0, a0); 2229 time_hardirqs_off(CALLER_ADDR0, ip);
2232 2230
2233 if (unlikely(!debug_locks || current->lockdep_recursion)) 2231 if (unlikely(!debug_locks || current->lockdep_recursion))
2234 return; 2232 return;
@@ -2241,7 +2239,7 @@ void trace_hardirqs_off_caller(unsigned long a0)
2241 * We have done an ON -> OFF transition: 2239 * We have done an ON -> OFF transition:
2242 */ 2240 */
2243 curr->hardirqs_enabled = 0; 2241 curr->hardirqs_enabled = 0;
2244 curr->hardirq_disable_ip = _RET_IP_; 2242 curr->hardirq_disable_ip = ip;
2245 curr->hardirq_disable_event = ++curr->irq_events; 2243 curr->hardirq_disable_event = ++curr->irq_events;
2246 debug_atomic_inc(&hardirqs_off_events); 2244 debug_atomic_inc(&hardirqs_off_events);
2247 } else 2245 } else
@@ -3417,9 +3415,10 @@ retry:
3417 } 3415 }
3418 printk(" ignoring it.\n"); 3416 printk(" ignoring it.\n");
3419 unlock = 0; 3417 unlock = 0;
3418 } else {
3419 if (count != 10)
3420 printk(KERN_CONT " locked it.\n");
3420 } 3421 }
3421 if (count != 10)
3422 printk(" locked it.\n");
3423 3422
3424 do_each_thread(g, p) { 3423 do_each_thread(g, p) {
3425 /* 3424 /*
diff --git a/kernel/printk.c b/kernel/printk.c
index 6341af77eb6..f492f1583d7 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -233,45 +233,6 @@ static inline void boot_delay_msec(void)
233#endif 233#endif
234 234
235/* 235/*
236 * Return the number of unread characters in the log buffer.
237 */
238static int log_buf_get_len(void)
239{
240 return logged_chars;
241}
242
243/*
244 * Copy a range of characters from the log buffer.
245 */
246int log_buf_copy(char *dest, int idx, int len)
247{
248 int ret, max;
249 bool took_lock = false;
250
251 if (!oops_in_progress) {
252 spin_lock_irq(&logbuf_lock);
253 took_lock = true;
254 }
255
256 max = log_buf_get_len();
257 if (idx < 0 || idx >= max) {
258 ret = -1;
259 } else {
260 if (len > max)
261 len = max;
262 ret = len;
263 idx += (log_end - max);
264 while (len-- > 0)
265 dest[len] = LOG_BUF(idx + len);
266 }
267
268 if (took_lock)
269 spin_unlock_irq(&logbuf_lock);
270
271 return ret;
272}
273
274/*
275 * Commands to do_syslog: 236 * Commands to do_syslog:
276 * 237 *
277 * 0 -- Close the log. Currently a NOP. 238 * 0 -- Close the log. Currently a NOP.
diff --git a/kernel/profile.c b/kernel/profile.c
index a9e422df6bf..9830a037d8d 100644
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -102,7 +102,7 @@ int profile_setup(char *str)
102__setup("profile=", profile_setup); 102__setup("profile=", profile_setup);
103 103
104 104
105int profile_init(void) 105int __ref profile_init(void)
106{ 106{
107 int buffer_bytes; 107 int buffer_bytes;
108 if (!prof_on) 108 if (!prof_on)
diff --git a/kernel/resource.c b/kernel/resource.c
index 4089d12af6e..7fec0e42723 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -571,7 +571,7 @@ static void __init __reserve_region_with_split(struct resource *root,
571 571
572} 572}
573 573
574void reserve_region_with_split(struct resource *root, 574void __init reserve_region_with_split(struct resource *root,
575 resource_size_t start, resource_size_t end, 575 resource_size_t start, resource_size_t end,
576 const char *name) 576 const char *name)
577{ 577{
diff --git a/kernel/sched.c b/kernel/sched.c
index 6625c3c4b10..e8819bc6f46 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -386,7 +386,6 @@ struct cfs_rq {
386 386
387 u64 exec_clock; 387 u64 exec_clock;
388 u64 min_vruntime; 388 u64 min_vruntime;
389 u64 pair_start;
390 389
391 struct rb_root tasks_timeline; 390 struct rb_root tasks_timeline;
392 struct rb_node *rb_leftmost; 391 struct rb_node *rb_leftmost;
@@ -3344,7 +3343,7 @@ small_imbalance:
3344 } else 3343 } else
3345 this_load_per_task = cpu_avg_load_per_task(this_cpu); 3344 this_load_per_task = cpu_avg_load_per_task(this_cpu);
3346 3345
3347 if (max_load - this_load + 2*busiest_load_per_task >= 3346 if (max_load - this_load + busiest_load_per_task >=
3348 busiest_load_per_task * imbn) { 3347 busiest_load_per_task * imbn) {
3349 *imbalance = busiest_load_per_task; 3348 *imbalance = busiest_load_per_task;
3350 return busiest; 3349 return busiest;
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 9573c33688b..ce514afd78f 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -143,6 +143,49 @@ static inline struct sched_entity *parent_entity(struct sched_entity *se)
143 return se->parent; 143 return se->parent;
144} 144}
145 145
146/* return depth at which a sched entity is present in the hierarchy */
147static inline int depth_se(struct sched_entity *se)
148{
149 int depth = 0;
150
151 for_each_sched_entity(se)
152 depth++;
153
154 return depth;
155}
156
157static void
158find_matching_se(struct sched_entity **se, struct sched_entity **pse)
159{
160 int se_depth, pse_depth;
161
162 /*
163 * preemption test can be made between sibling entities who are in the
164 * same cfs_rq i.e who have a common parent. Walk up the hierarchy of
165 * both tasks until we find their ancestors who are siblings of common
166 * parent.
167 */
168
169 /* First walk up until both entities are at same depth */
170 se_depth = depth_se(*se);
171 pse_depth = depth_se(*pse);
172
173 while (se_depth > pse_depth) {
174 se_depth--;
175 *se = parent_entity(*se);
176 }
177
178 while (pse_depth > se_depth) {
179 pse_depth--;
180 *pse = parent_entity(*pse);
181 }
182
183 while (!is_same_group(*se, *pse)) {
184 *se = parent_entity(*se);
185 *pse = parent_entity(*pse);
186 }
187}
188
146#else /* CONFIG_FAIR_GROUP_SCHED */ 189#else /* CONFIG_FAIR_GROUP_SCHED */
147 190
148static inline struct rq *rq_of(struct cfs_rq *cfs_rq) 191static inline struct rq *rq_of(struct cfs_rq *cfs_rq)
@@ -193,6 +236,11 @@ static inline struct sched_entity *parent_entity(struct sched_entity *se)
193 return NULL; 236 return NULL;
194} 237}
195 238
239static inline void
240find_matching_se(struct sched_entity **se, struct sched_entity **pse)
241{
242}
243
196#endif /* CONFIG_FAIR_GROUP_SCHED */ 244#endif /* CONFIG_FAIR_GROUP_SCHED */
197 245
198 246
@@ -223,6 +271,27 @@ static inline s64 entity_key(struct cfs_rq *cfs_rq, struct sched_entity *se)
223 return se->vruntime - cfs_rq->min_vruntime; 271 return se->vruntime - cfs_rq->min_vruntime;
224} 272}
225 273
274static void update_min_vruntime(struct cfs_rq *cfs_rq)
275{
276 u64 vruntime = cfs_rq->min_vruntime;
277
278 if (cfs_rq->curr)
279 vruntime = cfs_rq->curr->vruntime;
280
281 if (cfs_rq->rb_leftmost) {
282 struct sched_entity *se = rb_entry(cfs_rq->rb_leftmost,
283 struct sched_entity,
284 run_node);
285
286 if (vruntime == cfs_rq->min_vruntime)
287 vruntime = se->vruntime;
288 else
289 vruntime = min_vruntime(vruntime, se->vruntime);
290 }
291
292 cfs_rq->min_vruntime = max_vruntime(cfs_rq->min_vruntime, vruntime);
293}
294
226/* 295/*
227 * Enqueue an entity into the rb-tree: 296 * Enqueue an entity into the rb-tree:
228 */ 297 */
@@ -256,15 +325,8 @@ static void __enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
256 * Maintain a cache of leftmost tree entries (it is frequently 325 * Maintain a cache of leftmost tree entries (it is frequently
257 * used): 326 * used):
258 */ 327 */
259 if (leftmost) { 328 if (leftmost)
260 cfs_rq->rb_leftmost = &se->run_node; 329 cfs_rq->rb_leftmost = &se->run_node;
261 /*
262 * maintain cfs_rq->min_vruntime to be a monotonic increasing
263 * value tracking the leftmost vruntime in the tree.
264 */
265 cfs_rq->min_vruntime =
266 max_vruntime(cfs_rq->min_vruntime, se->vruntime);
267 }
268 330
269 rb_link_node(&se->run_node, parent, link); 331 rb_link_node(&se->run_node, parent, link);
270 rb_insert_color(&se->run_node, &cfs_rq->tasks_timeline); 332 rb_insert_color(&se->run_node, &cfs_rq->tasks_timeline);
@@ -274,18 +336,9 @@ static void __dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
274{ 336{
275 if (cfs_rq->rb_leftmost == &se->run_node) { 337 if (cfs_rq->rb_leftmost == &se->run_node) {
276 struct rb_node *next_node; 338 struct rb_node *next_node;
277 struct sched_entity *next;
278 339
279 next_node = rb_next(&se->run_node); 340 next_node = rb_next(&se->run_node);
280 cfs_rq->rb_leftmost = next_node; 341 cfs_rq->rb_leftmost = next_node;
281
282 if (next_node) {
283 next = rb_entry(next_node,
284 struct sched_entity, run_node);
285 cfs_rq->min_vruntime =
286 max_vruntime(cfs_rq->min_vruntime,
287 next->vruntime);
288 }
289 } 342 }
290 343
291 if (cfs_rq->next == se) 344 if (cfs_rq->next == se)
@@ -424,6 +477,7 @@ __update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr,
424 schedstat_add(cfs_rq, exec_clock, delta_exec); 477 schedstat_add(cfs_rq, exec_clock, delta_exec);
425 delta_exec_weighted = calc_delta_fair(delta_exec, curr); 478 delta_exec_weighted = calc_delta_fair(delta_exec, curr);
426 curr->vruntime += delta_exec_weighted; 479 curr->vruntime += delta_exec_weighted;
480 update_min_vruntime(cfs_rq);
427} 481}
428 482
429static void update_curr(struct cfs_rq *cfs_rq) 483static void update_curr(struct cfs_rq *cfs_rq)
@@ -613,13 +667,7 @@ static void check_spread(struct cfs_rq *cfs_rq, struct sched_entity *se)
613static void 667static void
614place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial) 668place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
615{ 669{
616 u64 vruntime; 670 u64 vruntime = cfs_rq->min_vruntime;
617
618 if (first_fair(cfs_rq)) {
619 vruntime = min_vruntime(cfs_rq->min_vruntime,
620 __pick_next_entity(cfs_rq)->vruntime);
621 } else
622 vruntime = cfs_rq->min_vruntime;
623 671
624 /* 672 /*
625 * The 'current' period is already promised to the current tasks, 673 * The 'current' period is already promised to the current tasks,
@@ -696,6 +744,7 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep)
696 if (se != cfs_rq->curr) 744 if (se != cfs_rq->curr)
697 __dequeue_entity(cfs_rq, se); 745 __dequeue_entity(cfs_rq, se);
698 account_entity_dequeue(cfs_rq, se); 746 account_entity_dequeue(cfs_rq, se);
747 update_min_vruntime(cfs_rq);
699} 748}
700 749
701/* 750/*
@@ -742,16 +791,14 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
742 se->prev_sum_exec_runtime = se->sum_exec_runtime; 791 se->prev_sum_exec_runtime = se->sum_exec_runtime;
743} 792}
744 793
794static int
795wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se);
796
745static struct sched_entity * 797static struct sched_entity *
746pick_next(struct cfs_rq *cfs_rq, struct sched_entity *se) 798pick_next(struct cfs_rq *cfs_rq, struct sched_entity *se)
747{ 799{
748 struct rq *rq = rq_of(cfs_rq); 800 if (!cfs_rq->next || wakeup_preempt_entity(cfs_rq->next, se) == 1)
749 u64 pair_slice = rq->clock - cfs_rq->pair_start;
750
751 if (!cfs_rq->next || pair_slice > sysctl_sched_min_granularity) {
752 cfs_rq->pair_start = rq->clock;
753 return se; 801 return se;
754 }
755 802
756 return cfs_rq->next; 803 return cfs_rq->next;
757} 804}
@@ -1122,10 +1169,9 @@ wake_affine(struct sched_domain *this_sd, struct rq *this_rq,
1122 if (!(this_sd->flags & SD_WAKE_AFFINE) || !sched_feat(AFFINE_WAKEUPS)) 1169 if (!(this_sd->flags & SD_WAKE_AFFINE) || !sched_feat(AFFINE_WAKEUPS))
1123 return 0; 1170 return 0;
1124 1171
1125 if (!sync && sched_feat(SYNC_WAKEUPS) && 1172 if (sync && (curr->se.avg_overlap > sysctl_sched_migration_cost ||
1126 curr->se.avg_overlap < sysctl_sched_migration_cost && 1173 p->se.avg_overlap > sysctl_sched_migration_cost))
1127 p->se.avg_overlap < sysctl_sched_migration_cost) 1174 sync = 0;
1128 sync = 1;
1129 1175
1130 /* 1176 /*
1131 * If sync wakeup then subtract the (maximum possible) 1177 * If sync wakeup then subtract the (maximum possible)
@@ -1244,13 +1290,42 @@ static unsigned long wakeup_gran(struct sched_entity *se)
1244 * More easily preempt - nice tasks, while not making it harder for 1290 * More easily preempt - nice tasks, while not making it harder for
1245 * + nice tasks. 1291 * + nice tasks.
1246 */ 1292 */
1247 if (sched_feat(ASYM_GRAN)) 1293 if (!sched_feat(ASYM_GRAN) || se->load.weight > NICE_0_LOAD)
1248 gran = calc_delta_mine(gran, NICE_0_LOAD, &se->load); 1294 gran = calc_delta_fair(sysctl_sched_wakeup_granularity, se);
1249 1295
1250 return gran; 1296 return gran;
1251} 1297}
1252 1298
1253/* 1299/*
1300 * Should 'se' preempt 'curr'.
1301 *
1302 * |s1
1303 * |s2
1304 * |s3
1305 * g
1306 * |<--->|c
1307 *
1308 * w(c, s1) = -1
1309 * w(c, s2) = 0
1310 * w(c, s3) = 1
1311 *
1312 */
1313static int
1314wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se)
1315{
1316 s64 gran, vdiff = curr->vruntime - se->vruntime;
1317
1318 if (vdiff <= 0)
1319 return -1;
1320
1321 gran = wakeup_gran(curr);
1322 if (vdiff > gran)
1323 return 1;
1324
1325 return 0;
1326}
1327
1328/*
1254 * Preempt the current task with a newly woken task if needed: 1329 * Preempt the current task with a newly woken task if needed:
1255 */ 1330 */
1256static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int sync) 1331static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int sync)
@@ -1258,7 +1333,6 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int sync)
1258 struct task_struct *curr = rq->curr; 1333 struct task_struct *curr = rq->curr;
1259 struct cfs_rq *cfs_rq = task_cfs_rq(curr); 1334 struct cfs_rq *cfs_rq = task_cfs_rq(curr);
1260 struct sched_entity *se = &curr->se, *pse = &p->se; 1335 struct sched_entity *se = &curr->se, *pse = &p->se;
1261 s64 delta_exec;
1262 1336
1263 if (unlikely(rt_prio(p->prio))) { 1337 if (unlikely(rt_prio(p->prio))) {
1264 update_rq_clock(rq); 1338 update_rq_clock(rq);
@@ -1296,9 +1370,19 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int sync)
1296 return; 1370 return;
1297 } 1371 }
1298 1372
1299 delta_exec = se->sum_exec_runtime - se->prev_sum_exec_runtime; 1373 find_matching_se(&se, &pse);
1300 if (delta_exec > wakeup_gran(pse)) 1374
1301 resched_task(curr); 1375 while (se) {
1376 BUG_ON(!pse);
1377
1378 if (wakeup_preempt_entity(se, pse) == 1) {
1379 resched_task(curr);
1380 break;
1381 }
1382
1383 se = parent_entity(se);
1384 pse = parent_entity(pse);
1385 }
1302} 1386}
1303 1387
1304static struct task_struct *pick_next_task_fair(struct rq *rq) 1388static struct task_struct *pick_next_task_fair(struct rq *rq)
@@ -1594,9 +1678,6 @@ static const struct sched_class fair_sched_class = {
1594 .enqueue_task = enqueue_task_fair, 1678 .enqueue_task = enqueue_task_fair,
1595 .dequeue_task = dequeue_task_fair, 1679 .dequeue_task = dequeue_task_fair,
1596 .yield_task = yield_task_fair, 1680 .yield_task = yield_task_fair,
1597#ifdef CONFIG_SMP
1598 .select_task_rq = select_task_rq_fair,
1599#endif /* CONFIG_SMP */
1600 1681
1601 .check_preempt_curr = check_preempt_wakeup, 1682 .check_preempt_curr = check_preempt_wakeup,
1602 1683
@@ -1604,6 +1685,8 @@ static const struct sched_class fair_sched_class = {
1604 .put_prev_task = put_prev_task_fair, 1685 .put_prev_task = put_prev_task_fair,
1605 1686
1606#ifdef CONFIG_SMP 1687#ifdef CONFIG_SMP
1688 .select_task_rq = select_task_rq_fair,
1689
1607 .load_balance = load_balance_fair, 1690 .load_balance = load_balance_fair,
1608 .move_one_task = move_one_task_fair, 1691 .move_one_task = move_one_task_fair,
1609#endif 1692#endif
diff --git a/kernel/sched_idletask.c b/kernel/sched_idletask.c
index dec4ccabe2f..8a21a2e28c1 100644
--- a/kernel/sched_idletask.c
+++ b/kernel/sched_idletask.c
@@ -105,9 +105,6 @@ static const struct sched_class idle_sched_class = {
105 105
106 /* dequeue is not valid, we print a debug message there: */ 106 /* dequeue is not valid, we print a debug message there: */
107 .dequeue_task = dequeue_task_idle, 107 .dequeue_task = dequeue_task_idle,
108#ifdef CONFIG_SMP
109 .select_task_rq = select_task_rq_idle,
110#endif /* CONFIG_SMP */
111 108
112 .check_preempt_curr = check_preempt_curr_idle, 109 .check_preempt_curr = check_preempt_curr_idle,
113 110
@@ -115,6 +112,8 @@ static const struct sched_class idle_sched_class = {
115 .put_prev_task = put_prev_task_idle, 112 .put_prev_task = put_prev_task_idle,
116 113
117#ifdef CONFIG_SMP 114#ifdef CONFIG_SMP
115 .select_task_rq = select_task_rq_idle,
116
118 .load_balance = load_balance_idle, 117 .load_balance = load_balance_idle,
119 .move_one_task = move_one_task_idle, 118 .move_one_task = move_one_task_idle,
120#endif 119#endif
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index b446dc87494..d9ba9d5f99d 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -1504,9 +1504,6 @@ static const struct sched_class rt_sched_class = {
1504 .enqueue_task = enqueue_task_rt, 1504 .enqueue_task = enqueue_task_rt,
1505 .dequeue_task = dequeue_task_rt, 1505 .dequeue_task = dequeue_task_rt,
1506 .yield_task = yield_task_rt, 1506 .yield_task = yield_task_rt,
1507#ifdef CONFIG_SMP
1508 .select_task_rq = select_task_rq_rt,
1509#endif /* CONFIG_SMP */
1510 1507
1511 .check_preempt_curr = check_preempt_curr_rt, 1508 .check_preempt_curr = check_preempt_curr_rt,
1512 1509
@@ -1514,6 +1511,8 @@ static const struct sched_class rt_sched_class = {
1514 .put_prev_task = put_prev_task_rt, 1511 .put_prev_task = put_prev_task_rt,
1515 1512
1516#ifdef CONFIG_SMP 1513#ifdef CONFIG_SMP
1514 .select_task_rq = select_task_rq_rt,
1515
1517 .load_balance = load_balance_rt, 1516 .load_balance = load_balance_rt,
1518 .move_one_task = move_one_task_rt, 1517 .move_one_task = move_one_task_rt,
1519 .set_cpus_allowed = set_cpus_allowed_rt, 1518 .set_cpus_allowed = set_cpus_allowed_rt,
diff --git a/kernel/signal.c b/kernel/signal.c
index 105217da5c8..4530fc65445 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1144,7 +1144,8 @@ static int kill_something_info(int sig, struct siginfo *info, pid_t pid)
1144 struct task_struct * p; 1144 struct task_struct * p;
1145 1145
1146 for_each_process(p) { 1146 for_each_process(p) {
1147 if (p->pid > 1 && !same_thread_group(p, current)) { 1147 if (task_pid_vnr(p) > 1 &&
1148 !same_thread_group(p, current)) {
1148 int err = group_send_sig_info(sig, info, p); 1149 int err = group_send_sig_info(sig, info, p);
1149 ++count; 1150 ++count;
1150 if (err != -EPERM) 1151 if (err != -EPERM)
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 84754f5801e..6b6b727258b 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -474,7 +474,7 @@ static struct ctl_table kern_table[] = {
474 .mode = 0644, 474 .mode = 0644,
475 .proc_handler = &proc_dointvec, 475 .proc_handler = &proc_dointvec,
476 }, 476 },
477#ifdef CONFIG_FTRACE 477#ifdef CONFIG_FUNCTION_TRACER
478 { 478 {
479 .ctl_name = CTL_UNNUMBERED, 479 .ctl_name = CTL_UNNUMBERED,
480 .procname = "ftrace_enabled", 480 .procname = "ftrace_enabled",
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 1cb3e1f616a..b58f43bec36 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -1,13 +1,13 @@
1# 1#
2# Architectures that offer an FTRACE implementation should select HAVE_FTRACE: 2# Architectures that offer an FUNCTION_TRACER implementation should
3# select HAVE_FUNCTION_TRACER:
3# 4#
4 5
5config NOP_TRACER 6config NOP_TRACER
6 bool 7 bool
7 8
8config HAVE_FTRACE 9config HAVE_FUNCTION_TRACER
9 bool 10 bool
10 select NOP_TRACER
11 11
12config HAVE_DYNAMIC_FTRACE 12config HAVE_DYNAMIC_FTRACE
13 bool 13 bool
@@ -27,10 +27,13 @@ config TRACING
27 select RING_BUFFER 27 select RING_BUFFER
28 select STACKTRACE 28 select STACKTRACE
29 select TRACEPOINTS 29 select TRACEPOINTS
30 select NOP_TRACER
31
32menu "Tracers"
30 33
31config FTRACE 34config FUNCTION_TRACER
32 bool "Kernel Function Tracer" 35 bool "Kernel Function Tracer"
33 depends on HAVE_FTRACE 36 depends on HAVE_FUNCTION_TRACER
34 depends on DEBUG_KERNEL 37 depends on DEBUG_KERNEL
35 select FRAME_POINTER 38 select FRAME_POINTER
36 select TRACING 39 select TRACING
@@ -49,7 +52,6 @@ config IRQSOFF_TRACER
49 default n 52 default n
50 depends on TRACE_IRQFLAGS_SUPPORT 53 depends on TRACE_IRQFLAGS_SUPPORT
51 depends on GENERIC_TIME 54 depends on GENERIC_TIME
52 depends on HAVE_FTRACE
53 depends on DEBUG_KERNEL 55 depends on DEBUG_KERNEL
54 select TRACE_IRQFLAGS 56 select TRACE_IRQFLAGS
55 select TRACING 57 select TRACING
@@ -73,7 +75,6 @@ config PREEMPT_TRACER
73 default n 75 default n
74 depends on GENERIC_TIME 76 depends on GENERIC_TIME
75 depends on PREEMPT 77 depends on PREEMPT
76 depends on HAVE_FTRACE
77 depends on DEBUG_KERNEL 78 depends on DEBUG_KERNEL
78 select TRACING 79 select TRACING
79 select TRACER_MAX_TRACE 80 select TRACER_MAX_TRACE
@@ -101,7 +102,6 @@ config SYSPROF_TRACER
101 102
102config SCHED_TRACER 103config SCHED_TRACER
103 bool "Scheduling Latency Tracer" 104 bool "Scheduling Latency Tracer"
104 depends on HAVE_FTRACE
105 depends on DEBUG_KERNEL 105 depends on DEBUG_KERNEL
106 select TRACING 106 select TRACING
107 select CONTEXT_SWITCH_TRACER 107 select CONTEXT_SWITCH_TRACER
@@ -112,7 +112,6 @@ config SCHED_TRACER
112 112
113config CONTEXT_SWITCH_TRACER 113config CONTEXT_SWITCH_TRACER
114 bool "Trace process context switches" 114 bool "Trace process context switches"
115 depends on HAVE_FTRACE
116 depends on DEBUG_KERNEL 115 depends on DEBUG_KERNEL
117 select TRACING 116 select TRACING
118 select MARKERS 117 select MARKERS
@@ -122,9 +121,9 @@ config CONTEXT_SWITCH_TRACER
122 121
123config BOOT_TRACER 122config BOOT_TRACER
124 bool "Trace boot initcalls" 123 bool "Trace boot initcalls"
125 depends on HAVE_FTRACE
126 depends on DEBUG_KERNEL 124 depends on DEBUG_KERNEL
127 select TRACING 125 select TRACING
126 select CONTEXT_SWITCH_TRACER
128 help 127 help
129 This tracer helps developers to optimize boot times: it records 128 This tracer helps developers to optimize boot times: it records
130 the timings of the initcalls and traces key events and the identity 129 the timings of the initcalls and traces key events and the identity
@@ -141,9 +140,9 @@ config BOOT_TRACER
141 140
142config STACK_TRACER 141config STACK_TRACER
143 bool "Trace max stack" 142 bool "Trace max stack"
144 depends on HAVE_FTRACE 143 depends on HAVE_FUNCTION_TRACER
145 depends on DEBUG_KERNEL 144 depends on DEBUG_KERNEL
146 select FTRACE 145 select FUNCTION_TRACER
147 select STACKTRACE 146 select STACKTRACE
148 help 147 help
149 This special tracer records the maximum stack footprint of the 148 This special tracer records the maximum stack footprint of the
@@ -160,7 +159,7 @@ config STACK_TRACER
160 159
161config DYNAMIC_FTRACE 160config DYNAMIC_FTRACE
162 bool "enable/disable ftrace tracepoints dynamically" 161 bool "enable/disable ftrace tracepoints dynamically"
163 depends on FTRACE 162 depends on FUNCTION_TRACER
164 depends on HAVE_DYNAMIC_FTRACE 163 depends on HAVE_DYNAMIC_FTRACE
165 depends on DEBUG_KERNEL 164 depends on DEBUG_KERNEL
166 default y 165 default y
@@ -170,7 +169,7 @@ config DYNAMIC_FTRACE
170 with a No-Op instruction) as they are called. A table is 169 with a No-Op instruction) as they are called. A table is
171 created to dynamically enable them again. 170 created to dynamically enable them again.
172 171
173 This way a CONFIG_FTRACE kernel is slightly larger, but otherwise 172 This way a CONFIG_FUNCTION_TRACER kernel is slightly larger, but otherwise
174 has native performance as long as no tracing is active. 173 has native performance as long as no tracing is active.
175 174
176 The changes to the code are done by a kernel thread that 175 The changes to the code are done by a kernel thread that
@@ -195,3 +194,5 @@ config FTRACE_STARTUP_TEST
195 a series of tests are made to verify that the tracer is 194 a series of tests are made to verify that the tracer is
196 functioning properly. It will do tests on all the configured 195 functioning properly. It will do tests on all the configured
197 tracers of ftrace. 196 tracers of ftrace.
197
198endmenu
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index a85dfba88ba..c8228b1a49e 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -1,7 +1,7 @@
1 1
2# Do not instrument the tracer itself: 2# Do not instrument the tracer itself:
3 3
4ifdef CONFIG_FTRACE 4ifdef CONFIG_FUNCTION_TRACER
5ORIG_CFLAGS := $(KBUILD_CFLAGS) 5ORIG_CFLAGS := $(KBUILD_CFLAGS)
6KBUILD_CFLAGS = $(subst -pg,,$(ORIG_CFLAGS)) 6KBUILD_CFLAGS = $(subst -pg,,$(ORIG_CFLAGS))
7 7
@@ -10,13 +10,13 @@ CFLAGS_trace_selftest_dynamic.o = -pg
10obj-y += trace_selftest_dynamic.o 10obj-y += trace_selftest_dynamic.o
11endif 11endif
12 12
13obj-$(CONFIG_FTRACE) += libftrace.o 13obj-$(CONFIG_FUNCTION_TRACER) += libftrace.o
14obj-$(CONFIG_RING_BUFFER) += ring_buffer.o 14obj-$(CONFIG_RING_BUFFER) += ring_buffer.o
15 15
16obj-$(CONFIG_TRACING) += trace.o 16obj-$(CONFIG_TRACING) += trace.o
17obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o 17obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o
18obj-$(CONFIG_SYSPROF_TRACER) += trace_sysprof.o 18obj-$(CONFIG_SYSPROF_TRACER) += trace_sysprof.o
19obj-$(CONFIG_FTRACE) += trace_functions.o 19obj-$(CONFIG_FUNCTION_TRACER) += trace_functions.o
20obj-$(CONFIG_IRQSOFF_TRACER) += trace_irqsoff.o 20obj-$(CONFIG_IRQSOFF_TRACER) += trace_irqsoff.o
21obj-$(CONFIG_PREEMPT_TRACER) += trace_irqsoff.o 21obj-$(CONFIG_PREEMPT_TRACER) += trace_irqsoff.o
22obj-$(CONFIG_SCHED_TRACER) += trace_sched_wakeup.o 22obj-$(CONFIG_SCHED_TRACER) += trace_sched_wakeup.o
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 4dda4f60a2a..4a39d24568c 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -25,13 +25,24 @@
25#include <linux/ftrace.h> 25#include <linux/ftrace.h>
26#include <linux/sysctl.h> 26#include <linux/sysctl.h>
27#include <linux/ctype.h> 27#include <linux/ctype.h>
28#include <linux/hash.h>
29#include <linux/list.h> 28#include <linux/list.h>
30 29
31#include <asm/ftrace.h> 30#include <asm/ftrace.h>
32 31
33#include "trace.h" 32#include "trace.h"
34 33
34#define FTRACE_WARN_ON(cond) \
35 do { \
36 if (WARN_ON(cond)) \
37 ftrace_kill(); \
38 } while (0)
39
40#define FTRACE_WARN_ON_ONCE(cond) \
41 do { \
42 if (WARN_ON_ONCE(cond)) \
43 ftrace_kill(); \
44 } while (0)
45
35/* ftrace_enabled is a method to turn ftrace on or off */ 46/* ftrace_enabled is a method to turn ftrace on or off */
36int ftrace_enabled __read_mostly; 47int ftrace_enabled __read_mostly;
37static int last_ftrace_enabled; 48static int last_ftrace_enabled;
@@ -153,21 +164,8 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops)
153} 164}
154 165
155#ifdef CONFIG_DYNAMIC_FTRACE 166#ifdef CONFIG_DYNAMIC_FTRACE
156
157#ifndef CONFIG_FTRACE_MCOUNT_RECORD 167#ifndef CONFIG_FTRACE_MCOUNT_RECORD
158/* 168# error Dynamic ftrace depends on MCOUNT_RECORD
159 * The hash lock is only needed when the recording of the mcount
160 * callers are dynamic. That is, by the caller themselves and
161 * not recorded via the compilation.
162 */
163static DEFINE_SPINLOCK(ftrace_hash_lock);
164#define ftrace_hash_lock(flags) spin_lock_irqsave(&ftrace_hash_lock, flags)
165#define ftrace_hash_unlock(flags) \
166 spin_unlock_irqrestore(&ftrace_hash_lock, flags)
167#else
168/* This is protected via the ftrace_lock with MCOUNT_RECORD. */
169#define ftrace_hash_lock(flags) do { (void)(flags); } while (0)
170#define ftrace_hash_unlock(flags) do { } while(0)
171#endif 169#endif
172 170
173/* 171/*
@@ -178,8 +176,6 @@ static DEFINE_SPINLOCK(ftrace_hash_lock);
178 */ 176 */
179static unsigned long mcount_addr = MCOUNT_ADDR; 177static unsigned long mcount_addr = MCOUNT_ADDR;
180 178
181static struct task_struct *ftraced_task;
182
183enum { 179enum {
184 FTRACE_ENABLE_CALLS = (1 << 0), 180 FTRACE_ENABLE_CALLS = (1 << 0),
185 FTRACE_DISABLE_CALLS = (1 << 1), 181 FTRACE_DISABLE_CALLS = (1 << 1),
@@ -190,13 +186,9 @@ enum {
190 186
191static int ftrace_filtered; 187static int ftrace_filtered;
192static int tracing_on; 188static int tracing_on;
193static int frozen_record_count;
194
195static struct hlist_head ftrace_hash[FTRACE_HASHSIZE];
196 189
197static DEFINE_PER_CPU(int, ftrace_shutdown_disable_cpu); 190static LIST_HEAD(ftrace_new_addrs);
198 191
199static DEFINE_MUTEX(ftraced_lock);
200static DEFINE_MUTEX(ftrace_regex_lock); 192static DEFINE_MUTEX(ftrace_regex_lock);
201 193
202struct ftrace_page { 194struct ftrace_page {
@@ -214,16 +206,13 @@ struct ftrace_page {
214static struct ftrace_page *ftrace_pages_start; 206static struct ftrace_page *ftrace_pages_start;
215static struct ftrace_page *ftrace_pages; 207static struct ftrace_page *ftrace_pages;
216 208
217static int ftraced_trigger;
218static int ftraced_suspend;
219static int ftraced_stop;
220
221static int ftrace_record_suspend;
222
223static struct dyn_ftrace *ftrace_free_records; 209static struct dyn_ftrace *ftrace_free_records;
224 210
225 211
226#ifdef CONFIG_KPROBES 212#ifdef CONFIG_KPROBES
213
214static int frozen_record_count;
215
227static inline void freeze_record(struct dyn_ftrace *rec) 216static inline void freeze_record(struct dyn_ftrace *rec)
228{ 217{
229 if (!(rec->flags & FTRACE_FL_FROZEN)) { 218 if (!(rec->flags & FTRACE_FL_FROZEN)) {
@@ -250,72 +239,6 @@ static inline int record_frozen(struct dyn_ftrace *rec)
250# define record_frozen(rec) ({ 0; }) 239# define record_frozen(rec) ({ 0; })
251#endif /* CONFIG_KPROBES */ 240#endif /* CONFIG_KPROBES */
252 241
253int skip_trace(unsigned long ip)
254{
255 unsigned long fl;
256 struct dyn_ftrace *rec;
257 struct hlist_node *t;
258 struct hlist_head *head;
259
260 if (frozen_record_count == 0)
261 return 0;
262
263 head = &ftrace_hash[hash_long(ip, FTRACE_HASHBITS)];
264 hlist_for_each_entry_rcu(rec, t, head, node) {
265 if (rec->ip == ip) {
266 if (record_frozen(rec)) {
267 if (rec->flags & FTRACE_FL_FAILED)
268 return 1;
269
270 if (!(rec->flags & FTRACE_FL_CONVERTED))
271 return 1;
272
273 if (!tracing_on || !ftrace_enabled)
274 return 1;
275
276 if (ftrace_filtered) {
277 fl = rec->flags & (FTRACE_FL_FILTER |
278 FTRACE_FL_NOTRACE);
279 if (!fl || (fl & FTRACE_FL_NOTRACE))
280 return 1;
281 }
282 }
283 break;
284 }
285 }
286
287 return 0;
288}
289
290static inline int
291ftrace_ip_in_hash(unsigned long ip, unsigned long key)
292{
293 struct dyn_ftrace *p;
294 struct hlist_node *t;
295 int found = 0;
296
297 hlist_for_each_entry_rcu(p, t, &ftrace_hash[key], node) {
298 if (p->ip == ip) {
299 found = 1;
300 break;
301 }
302 }
303
304 return found;
305}
306
307static inline void
308ftrace_add_hash(struct dyn_ftrace *node, unsigned long key)
309{
310 hlist_add_head_rcu(&node->node, &ftrace_hash[key]);
311}
312
313/* called from kstop_machine */
314static inline void ftrace_del_hash(struct dyn_ftrace *node)
315{
316 hlist_del(&node->node);
317}
318
319static void ftrace_free_rec(struct dyn_ftrace *rec) 242static void ftrace_free_rec(struct dyn_ftrace *rec)
320{ 243{
321 rec->ip = (unsigned long)ftrace_free_records; 244 rec->ip = (unsigned long)ftrace_free_records;
@@ -346,7 +269,6 @@ void ftrace_release(void *start, unsigned long size)
346 } 269 }
347 } 270 }
348 spin_unlock(&ftrace_lock); 271 spin_unlock(&ftrace_lock);
349
350} 272}
351 273
352static struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip) 274static struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip)
@@ -358,10 +280,8 @@ static struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip)
358 rec = ftrace_free_records; 280 rec = ftrace_free_records;
359 281
360 if (unlikely(!(rec->flags & FTRACE_FL_FREE))) { 282 if (unlikely(!(rec->flags & FTRACE_FL_FREE))) {
361 WARN_ON_ONCE(1); 283 FTRACE_WARN_ON_ONCE(1);
362 ftrace_free_records = NULL; 284 ftrace_free_records = NULL;
363 ftrace_disabled = 1;
364 ftrace_enabled = 0;
365 return NULL; 285 return NULL;
366 } 286 }
367 287
@@ -371,76 +291,36 @@ static struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip)
371 } 291 }
372 292
373 if (ftrace_pages->index == ENTRIES_PER_PAGE) { 293 if (ftrace_pages->index == ENTRIES_PER_PAGE) {
374 if (!ftrace_pages->next) 294 if (!ftrace_pages->next) {
375 return NULL; 295 /* allocate another page */
296 ftrace_pages->next =
297 (void *)get_zeroed_page(GFP_KERNEL);
298 if (!ftrace_pages->next)
299 return NULL;
300 }
376 ftrace_pages = ftrace_pages->next; 301 ftrace_pages = ftrace_pages->next;
377 } 302 }
378 303
379 return &ftrace_pages->records[ftrace_pages->index++]; 304 return &ftrace_pages->records[ftrace_pages->index++];
380} 305}
381 306
382static void 307static struct dyn_ftrace *
383ftrace_record_ip(unsigned long ip) 308ftrace_record_ip(unsigned long ip)
384{ 309{
385 struct dyn_ftrace *node; 310 struct dyn_ftrace *rec;
386 unsigned long flags;
387 unsigned long key;
388 int resched;
389 int cpu;
390 311
391 if (!ftrace_enabled || ftrace_disabled) 312 if (!ftrace_enabled || ftrace_disabled)
392 return; 313 return NULL;
393
394 resched = need_resched();
395 preempt_disable_notrace();
396 314
397 /* 315 rec = ftrace_alloc_dyn_node(ip);
398 * We simply need to protect against recursion. 316 if (!rec)
399 * Use the the raw version of smp_processor_id and not 317 return NULL;
400 * __get_cpu_var which can call debug hooks that can
401 * cause a recursive crash here.
402 */
403 cpu = raw_smp_processor_id();
404 per_cpu(ftrace_shutdown_disable_cpu, cpu)++;
405 if (per_cpu(ftrace_shutdown_disable_cpu, cpu) != 1)
406 goto out;
407
408 if (unlikely(ftrace_record_suspend))
409 goto out;
410
411 key = hash_long(ip, FTRACE_HASHBITS);
412
413 WARN_ON_ONCE(key >= FTRACE_HASHSIZE);
414
415 if (ftrace_ip_in_hash(ip, key))
416 goto out;
417
418 ftrace_hash_lock(flags);
419
420 /* This ip may have hit the hash before the lock */
421 if (ftrace_ip_in_hash(ip, key))
422 goto out_unlock;
423
424 node = ftrace_alloc_dyn_node(ip);
425 if (!node)
426 goto out_unlock;
427
428 node->ip = ip;
429
430 ftrace_add_hash(node, key);
431 318
432 ftraced_trigger = 1; 319 rec->ip = ip;
433 320
434 out_unlock: 321 list_add(&rec->list, &ftrace_new_addrs);
435 ftrace_hash_unlock(flags);
436 out:
437 per_cpu(ftrace_shutdown_disable_cpu, cpu)--;
438 322
439 /* prevent recursion with scheduler */ 323 return rec;
440 if (resched)
441 preempt_enable_no_resched_notrace();
442 else
443 preempt_enable_notrace();
444} 324}
445 325
446#define FTRACE_ADDR ((long)(ftrace_caller)) 326#define FTRACE_ADDR ((long)(ftrace_caller))
@@ -559,7 +439,6 @@ static void ftrace_replace_code(int enable)
559 rec->flags |= FTRACE_FL_FAILED; 439 rec->flags |= FTRACE_FL_FAILED;
560 if ((system_state == SYSTEM_BOOTING) || 440 if ((system_state == SYSTEM_BOOTING) ||
561 !core_kernel_text(rec->ip)) { 441 !core_kernel_text(rec->ip)) {
562 ftrace_del_hash(rec);
563 ftrace_free_rec(rec); 442 ftrace_free_rec(rec);
564 } 443 }
565 } 444 }
@@ -567,15 +446,6 @@ static void ftrace_replace_code(int enable)
567 } 446 }
568} 447}
569 448
570static void ftrace_shutdown_replenish(void)
571{
572 if (ftrace_pages->next)
573 return;
574
575 /* allocate another page */
576 ftrace_pages->next = (void *)get_zeroed_page(GFP_KERNEL);
577}
578
579static void print_ip_ins(const char *fmt, unsigned char *p) 449static void print_ip_ins(const char *fmt, unsigned char *p)
580{ 450{
581 int i; 451 int i;
@@ -591,23 +461,23 @@ ftrace_code_disable(struct dyn_ftrace *rec)
591{ 461{
592 unsigned long ip; 462 unsigned long ip;
593 unsigned char *nop, *call; 463 unsigned char *nop, *call;
594 int failed; 464 int ret;
595 465
596 ip = rec->ip; 466 ip = rec->ip;
597 467
598 nop = ftrace_nop_replace(); 468 nop = ftrace_nop_replace();
599 call = ftrace_call_replace(ip, mcount_addr); 469 call = ftrace_call_replace(ip, mcount_addr);
600 470
601 failed = ftrace_modify_code(ip, call, nop); 471 ret = ftrace_modify_code(ip, call, nop);
602 if (failed) { 472 if (ret) {
603 switch (failed) { 473 switch (ret) {
604 case 1: 474 case -EFAULT:
605 WARN_ON_ONCE(1); 475 FTRACE_WARN_ON_ONCE(1);
606 pr_info("ftrace faulted on modifying "); 476 pr_info("ftrace faulted on modifying ");
607 print_ip_sym(ip); 477 print_ip_sym(ip);
608 break; 478 break;
609 case 2: 479 case -EINVAL:
610 WARN_ON_ONCE(1); 480 FTRACE_WARN_ON_ONCE(1);
611 pr_info("ftrace failed to modify "); 481 pr_info("ftrace failed to modify ");
612 print_ip_sym(ip); 482 print_ip_sym(ip);
613 print_ip_ins(" expected: ", call); 483 print_ip_ins(" expected: ", call);
@@ -615,6 +485,15 @@ ftrace_code_disable(struct dyn_ftrace *rec)
615 print_ip_ins(" replace: ", nop); 485 print_ip_ins(" replace: ", nop);
616 printk(KERN_CONT "\n"); 486 printk(KERN_CONT "\n");
617 break; 487 break;
488 case -EPERM:
489 FTRACE_WARN_ON_ONCE(1);
490 pr_info("ftrace faulted on writing ");
491 print_ip_sym(ip);
492 break;
493 default:
494 FTRACE_WARN_ON_ONCE(1);
495 pr_info("ftrace faulted on unknown error ");
496 print_ip_sym(ip);
618 } 497 }
619 498
620 rec->flags |= FTRACE_FL_FAILED; 499 rec->flags |= FTRACE_FL_FAILED;
@@ -623,19 +502,11 @@ ftrace_code_disable(struct dyn_ftrace *rec)
623 return 1; 502 return 1;
624} 503}
625 504
626static int __ftrace_update_code(void *ignore);
627
628static int __ftrace_modify_code(void *data) 505static int __ftrace_modify_code(void *data)
629{ 506{
630 unsigned long addr;
631 int *command = data; 507 int *command = data;
632 508
633 if (*command & FTRACE_ENABLE_CALLS) { 509 if (*command & FTRACE_ENABLE_CALLS) {
634 /*
635 * Update any recorded ips now that we have the
636 * machine stopped
637 */
638 __ftrace_update_code(NULL);
639 ftrace_replace_code(1); 510 ftrace_replace_code(1);
640 tracing_on = 1; 511 tracing_on = 1;
641 } else if (*command & FTRACE_DISABLE_CALLS) { 512 } else if (*command & FTRACE_DISABLE_CALLS) {
@@ -646,14 +517,6 @@ static int __ftrace_modify_code(void *data)
646 if (*command & FTRACE_UPDATE_TRACE_FUNC) 517 if (*command & FTRACE_UPDATE_TRACE_FUNC)
647 ftrace_update_ftrace_func(ftrace_trace_function); 518 ftrace_update_ftrace_func(ftrace_trace_function);
648 519
649 if (*command & FTRACE_ENABLE_MCOUNT) {
650 addr = (unsigned long)ftrace_record_ip;
651 ftrace_mcount_set(&addr);
652 } else if (*command & FTRACE_DISABLE_MCOUNT) {
653 addr = (unsigned long)ftrace_stub;
654 ftrace_mcount_set(&addr);
655 }
656
657 return 0; 520 return 0;
658} 521}
659 522
@@ -662,26 +525,9 @@ static void ftrace_run_update_code(int command)
662 stop_machine(__ftrace_modify_code, &command, NULL); 525 stop_machine(__ftrace_modify_code, &command, NULL);
663} 526}
664 527
665void ftrace_disable_daemon(void)
666{
667 /* Stop the daemon from calling kstop_machine */
668 mutex_lock(&ftraced_lock);
669 ftraced_stop = 1;
670 mutex_unlock(&ftraced_lock);
671
672 ftrace_force_update();
673}
674
675void ftrace_enable_daemon(void)
676{
677 mutex_lock(&ftraced_lock);
678 ftraced_stop = 0;
679 mutex_unlock(&ftraced_lock);
680
681 ftrace_force_update();
682}
683
684static ftrace_func_t saved_ftrace_func; 528static ftrace_func_t saved_ftrace_func;
529static int ftrace_start;
530static DEFINE_MUTEX(ftrace_start_lock);
685 531
686static void ftrace_startup(void) 532static void ftrace_startup(void)
687{ 533{
@@ -690,9 +536,9 @@ static void ftrace_startup(void)
690 if (unlikely(ftrace_disabled)) 536 if (unlikely(ftrace_disabled))
691 return; 537 return;
692 538
693 mutex_lock(&ftraced_lock); 539 mutex_lock(&ftrace_start_lock);
694 ftraced_suspend++; 540 ftrace_start++;
695 if (ftraced_suspend == 1) 541 if (ftrace_start == 1)
696 command |= FTRACE_ENABLE_CALLS; 542 command |= FTRACE_ENABLE_CALLS;
697 543
698 if (saved_ftrace_func != ftrace_trace_function) { 544 if (saved_ftrace_func != ftrace_trace_function) {
@@ -705,7 +551,7 @@ static void ftrace_startup(void)
705 551
706 ftrace_run_update_code(command); 552 ftrace_run_update_code(command);
707 out: 553 out:
708 mutex_unlock(&ftraced_lock); 554 mutex_unlock(&ftrace_start_lock);
709} 555}
710 556
711static void ftrace_shutdown(void) 557static void ftrace_shutdown(void)
@@ -715,9 +561,9 @@ static void ftrace_shutdown(void)
715 if (unlikely(ftrace_disabled)) 561 if (unlikely(ftrace_disabled))
716 return; 562 return;
717 563
718 mutex_lock(&ftraced_lock); 564 mutex_lock(&ftrace_start_lock);
719 ftraced_suspend--; 565 ftrace_start--;
720 if (!ftraced_suspend) 566 if (!ftrace_start)
721 command |= FTRACE_DISABLE_CALLS; 567 command |= FTRACE_DISABLE_CALLS;
722 568
723 if (saved_ftrace_func != ftrace_trace_function) { 569 if (saved_ftrace_func != ftrace_trace_function) {
@@ -730,7 +576,7 @@ static void ftrace_shutdown(void)
730 576
731 ftrace_run_update_code(command); 577 ftrace_run_update_code(command);
732 out: 578 out:
733 mutex_unlock(&ftraced_lock); 579 mutex_unlock(&ftrace_start_lock);
734} 580}
735 581
736static void ftrace_startup_sysctl(void) 582static void ftrace_startup_sysctl(void)
@@ -740,15 +586,15 @@ static void ftrace_startup_sysctl(void)
740 if (unlikely(ftrace_disabled)) 586 if (unlikely(ftrace_disabled))
741 return; 587 return;
742 588
743 mutex_lock(&ftraced_lock); 589 mutex_lock(&ftrace_start_lock);
744 /* Force update next time */ 590 /* Force update next time */
745 saved_ftrace_func = NULL; 591 saved_ftrace_func = NULL;
746 /* ftraced_suspend is true if we want ftrace running */ 592 /* ftrace_start is true if we want ftrace running */
747 if (ftraced_suspend) 593 if (ftrace_start)
748 command |= FTRACE_ENABLE_CALLS; 594 command |= FTRACE_ENABLE_CALLS;
749 595
750 ftrace_run_update_code(command); 596 ftrace_run_update_code(command);
751 mutex_unlock(&ftraced_lock); 597 mutex_unlock(&ftrace_start_lock);
752} 598}
753 599
754static void ftrace_shutdown_sysctl(void) 600static void ftrace_shutdown_sysctl(void)
@@ -758,112 +604,50 @@ static void ftrace_shutdown_sysctl(void)
758 if (unlikely(ftrace_disabled)) 604 if (unlikely(ftrace_disabled))
759 return; 605 return;
760 606
761 mutex_lock(&ftraced_lock); 607 mutex_lock(&ftrace_start_lock);
762 /* ftraced_suspend is true if ftrace is running */ 608 /* ftrace_start is true if ftrace is running */
763 if (ftraced_suspend) 609 if (ftrace_start)
764 command |= FTRACE_DISABLE_CALLS; 610 command |= FTRACE_DISABLE_CALLS;
765 611
766 ftrace_run_update_code(command); 612 ftrace_run_update_code(command);
767 mutex_unlock(&ftraced_lock); 613 mutex_unlock(&ftrace_start_lock);
768} 614}
769 615
770static cycle_t ftrace_update_time; 616static cycle_t ftrace_update_time;
771static unsigned long ftrace_update_cnt; 617static unsigned long ftrace_update_cnt;
772unsigned long ftrace_update_tot_cnt; 618unsigned long ftrace_update_tot_cnt;
773 619
774static int __ftrace_update_code(void *ignore) 620static int ftrace_update_code(void)
775{ 621{
776 int i, save_ftrace_enabled; 622 struct dyn_ftrace *p, *t;
777 cycle_t start, stop; 623 cycle_t start, stop;
778 struct dyn_ftrace *p;
779 struct hlist_node *t, *n;
780 struct hlist_head *head, temp_list;
781
782 /* Don't be recording funcs now */
783 ftrace_record_suspend++;
784 save_ftrace_enabled = ftrace_enabled;
785 ftrace_enabled = 0;
786 624
787 start = ftrace_now(raw_smp_processor_id()); 625 start = ftrace_now(raw_smp_processor_id());
788 ftrace_update_cnt = 0; 626 ftrace_update_cnt = 0;
789 627
790 /* No locks needed, the machine is stopped! */ 628 list_for_each_entry_safe(p, t, &ftrace_new_addrs, list) {
791 for (i = 0; i < FTRACE_HASHSIZE; i++) {
792 INIT_HLIST_HEAD(&temp_list);
793 head = &ftrace_hash[i];
794 629
795 /* all CPUS are stopped, we are safe to modify code */ 630 /* If something went wrong, bail without enabling anything */
796 hlist_for_each_entry_safe(p, t, n, head, node) { 631 if (unlikely(ftrace_disabled))
797 /* Skip over failed records which have not been 632 return -1;
798 * freed. */
799 if (p->flags & FTRACE_FL_FAILED)
800 continue;
801 633
802 /* Unconverted records are always at the head of the 634 list_del_init(&p->list);
803 * hash bucket. Once we encounter a converted record,
804 * simply skip over to the next bucket. Saves ftraced
805 * some processor cycles (ftrace does its bid for
806 * global warming :-p ). */
807 if (p->flags & (FTRACE_FL_CONVERTED))
808 break;
809 635
810 /* Ignore updates to this record's mcount site. 636 /* convert record (i.e, patch mcount-call with NOP) */
811 * Reintroduce this record at the head of this 637 if (ftrace_code_disable(p)) {
812 * bucket to attempt to "convert" it again if 638 p->flags |= FTRACE_FL_CONVERTED;
813 * the kprobe on it is unregistered before the 639 ftrace_update_cnt++;
814 * next run. */ 640 } else
815 if (get_kprobe((void *)p->ip)) { 641 ftrace_free_rec(p);
816 ftrace_del_hash(p);
817 INIT_HLIST_NODE(&p->node);
818 hlist_add_head(&p->node, &temp_list);
819 freeze_record(p);
820 continue;
821 } else {
822 unfreeze_record(p);
823 }
824
825 /* convert record (i.e, patch mcount-call with NOP) */
826 if (ftrace_code_disable(p)) {
827 p->flags |= FTRACE_FL_CONVERTED;
828 ftrace_update_cnt++;
829 } else {
830 if ((system_state == SYSTEM_BOOTING) ||
831 !core_kernel_text(p->ip)) {
832 ftrace_del_hash(p);
833 ftrace_free_rec(p);
834 }
835 }
836 }
837
838 hlist_for_each_entry_safe(p, t, n, &temp_list, node) {
839 hlist_del(&p->node);
840 INIT_HLIST_NODE(&p->node);
841 hlist_add_head(&p->node, head);
842 }
843 } 642 }
844 643
845 stop = ftrace_now(raw_smp_processor_id()); 644 stop = ftrace_now(raw_smp_processor_id());
846 ftrace_update_time = stop - start; 645 ftrace_update_time = stop - start;
847 ftrace_update_tot_cnt += ftrace_update_cnt; 646 ftrace_update_tot_cnt += ftrace_update_cnt;
848 ftraced_trigger = 0;
849
850 ftrace_enabled = save_ftrace_enabled;
851 ftrace_record_suspend--;
852 647
853 return 0; 648 return 0;
854} 649}
855 650
856static int ftrace_update_code(void)
857{
858 if (unlikely(ftrace_disabled) ||
859 !ftrace_enabled || !ftraced_trigger)
860 return 0;
861
862 stop_machine(__ftrace_update_code, NULL, NULL);
863
864 return 1;
865}
866
867static int __init ftrace_dyn_table_alloc(unsigned long num_to_init) 651static int __init ftrace_dyn_table_alloc(unsigned long num_to_init)
868{ 652{
869 struct ftrace_page *pg; 653 struct ftrace_page *pg;
@@ -892,7 +676,7 @@ static int __init ftrace_dyn_table_alloc(unsigned long num_to_init)
892 pg = ftrace_pages = ftrace_pages_start; 676 pg = ftrace_pages = ftrace_pages_start;
893 677
894 cnt = num_to_init / ENTRIES_PER_PAGE; 678 cnt = num_to_init / ENTRIES_PER_PAGE;
895 pr_info("ftrace: allocating %ld hash entries in %d pages\n", 679 pr_info("ftrace: allocating %ld entries in %d pages\n",
896 num_to_init, cnt); 680 num_to_init, cnt);
897 681
898 for (i = 0; i < cnt; i++) { 682 for (i = 0; i < cnt; i++) {
@@ -1401,10 +1185,10 @@ ftrace_regex_release(struct inode *inode, struct file *file, int enable)
1401 } 1185 }
1402 1186
1403 mutex_lock(&ftrace_sysctl_lock); 1187 mutex_lock(&ftrace_sysctl_lock);
1404 mutex_lock(&ftraced_lock); 1188 mutex_lock(&ftrace_start_lock);
1405 if (iter->filtered && ftraced_suspend && ftrace_enabled) 1189 if (iter->filtered && ftrace_start && ftrace_enabled)
1406 ftrace_run_update_code(FTRACE_ENABLE_CALLS); 1190 ftrace_run_update_code(FTRACE_ENABLE_CALLS);
1407 mutex_unlock(&ftraced_lock); 1191 mutex_unlock(&ftrace_start_lock);
1408 mutex_unlock(&ftrace_sysctl_lock); 1192 mutex_unlock(&ftrace_sysctl_lock);
1409 1193
1410 kfree(iter); 1194 kfree(iter);
@@ -1424,55 +1208,6 @@ ftrace_notrace_release(struct inode *inode, struct file *file)
1424 return ftrace_regex_release(inode, file, 0); 1208 return ftrace_regex_release(inode, file, 0);
1425} 1209}
1426 1210
1427static ssize_t
1428ftraced_read(struct file *filp, char __user *ubuf,
1429 size_t cnt, loff_t *ppos)
1430{
1431 /* don't worry about races */
1432 char *buf = ftraced_stop ? "disabled\n" : "enabled\n";
1433 int r = strlen(buf);
1434
1435 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
1436}
1437
1438static ssize_t
1439ftraced_write(struct file *filp, const char __user *ubuf,
1440 size_t cnt, loff_t *ppos)
1441{
1442 char buf[64];
1443 long val;
1444 int ret;
1445
1446 if (cnt >= sizeof(buf))
1447 return -EINVAL;
1448
1449 if (copy_from_user(&buf, ubuf, cnt))
1450 return -EFAULT;
1451
1452 if (strncmp(buf, "enable", 6) == 0)
1453 val = 1;
1454 else if (strncmp(buf, "disable", 7) == 0)
1455 val = 0;
1456 else {
1457 buf[cnt] = 0;
1458
1459 ret = strict_strtoul(buf, 10, &val);
1460 if (ret < 0)
1461 return ret;
1462
1463 val = !!val;
1464 }
1465
1466 if (val)
1467 ftrace_enable_daemon();
1468 else
1469 ftrace_disable_daemon();
1470
1471 filp->f_pos += cnt;
1472
1473 return cnt;
1474}
1475
1476static struct file_operations ftrace_avail_fops = { 1211static struct file_operations ftrace_avail_fops = {
1477 .open = ftrace_avail_open, 1212 .open = ftrace_avail_open,
1478 .read = seq_read, 1213 .read = seq_read,
@@ -1503,54 +1238,6 @@ static struct file_operations ftrace_notrace_fops = {
1503 .release = ftrace_notrace_release, 1238 .release = ftrace_notrace_release,
1504}; 1239};
1505 1240
1506static struct file_operations ftraced_fops = {
1507 .open = tracing_open_generic,
1508 .read = ftraced_read,
1509 .write = ftraced_write,
1510};
1511
1512/**
1513 * ftrace_force_update - force an update to all recording ftrace functions
1514 */
1515int ftrace_force_update(void)
1516{
1517 int ret = 0;
1518
1519 if (unlikely(ftrace_disabled))
1520 return -ENODEV;
1521
1522 mutex_lock(&ftrace_sysctl_lock);
1523 mutex_lock(&ftraced_lock);
1524
1525 /*
1526 * If ftraced_trigger is not set, then there is nothing
1527 * to update.
1528 */
1529 if (ftraced_trigger && !ftrace_update_code())
1530 ret = -EBUSY;
1531
1532 mutex_unlock(&ftraced_lock);
1533 mutex_unlock(&ftrace_sysctl_lock);
1534
1535 return ret;
1536}
1537
1538static void ftrace_force_shutdown(void)
1539{
1540 struct task_struct *task;
1541 int command = FTRACE_DISABLE_CALLS | FTRACE_UPDATE_TRACE_FUNC;
1542
1543 mutex_lock(&ftraced_lock);
1544 task = ftraced_task;
1545 ftraced_task = NULL;
1546 ftraced_suspend = -1;
1547 ftrace_run_update_code(command);
1548 mutex_unlock(&ftraced_lock);
1549
1550 if (task)
1551 kthread_stop(task);
1552}
1553
1554static __init int ftrace_init_debugfs(void) 1241static __init int ftrace_init_debugfs(void)
1555{ 1242{
1556 struct dentry *d_tracer; 1243 struct dentry *d_tracer;
@@ -1581,17 +1268,11 @@ static __init int ftrace_init_debugfs(void)
1581 pr_warning("Could not create debugfs " 1268 pr_warning("Could not create debugfs "
1582 "'set_ftrace_notrace' entry\n"); 1269 "'set_ftrace_notrace' entry\n");
1583 1270
1584 entry = debugfs_create_file("ftraced_enabled", 0644, d_tracer,
1585 NULL, &ftraced_fops);
1586 if (!entry)
1587 pr_warning("Could not create debugfs "
1588 "'ftraced_enabled' entry\n");
1589 return 0; 1271 return 0;
1590} 1272}
1591 1273
1592fs_initcall(ftrace_init_debugfs); 1274fs_initcall(ftrace_init_debugfs);
1593 1275
1594#ifdef CONFIG_FTRACE_MCOUNT_RECORD
1595static int ftrace_convert_nops(unsigned long *start, 1276static int ftrace_convert_nops(unsigned long *start,
1596 unsigned long *end) 1277 unsigned long *end)
1597{ 1278{
@@ -1599,20 +1280,18 @@ static int ftrace_convert_nops(unsigned long *start,
1599 unsigned long addr; 1280 unsigned long addr;
1600 unsigned long flags; 1281 unsigned long flags;
1601 1282
1283 mutex_lock(&ftrace_start_lock);
1602 p = start; 1284 p = start;
1603 while (p < end) { 1285 while (p < end) {
1604 addr = ftrace_call_adjust(*p++); 1286 addr = ftrace_call_adjust(*p++);
1605 /* should not be called from interrupt context */
1606 spin_lock(&ftrace_lock);
1607 ftrace_record_ip(addr); 1287 ftrace_record_ip(addr);
1608 spin_unlock(&ftrace_lock);
1609 ftrace_shutdown_replenish();
1610 } 1288 }
1611 1289
1612 /* p is ignored */ 1290 /* disable interrupts to prevent kstop machine */
1613 local_irq_save(flags); 1291 local_irq_save(flags);
1614 __ftrace_update_code(p); 1292 ftrace_update_code();
1615 local_irq_restore(flags); 1293 local_irq_restore(flags);
1294 mutex_unlock(&ftrace_start_lock);
1616 1295
1617 return 0; 1296 return 0;
1618} 1297}
@@ -1658,130 +1337,34 @@ void __init ftrace_init(void)
1658 failed: 1337 failed:
1659 ftrace_disabled = 1; 1338 ftrace_disabled = 1;
1660} 1339}
1661#else /* CONFIG_FTRACE_MCOUNT_RECORD */
1662static int ftraced(void *ignore)
1663{
1664 unsigned long usecs;
1665
1666 while (!kthread_should_stop()) {
1667
1668 set_current_state(TASK_INTERRUPTIBLE);
1669
1670 /* check once a second */
1671 schedule_timeout(HZ);
1672 1340
1673 if (unlikely(ftrace_disabled)) 1341#else
1674 continue;
1675
1676 mutex_lock(&ftrace_sysctl_lock);
1677 mutex_lock(&ftraced_lock);
1678 if (!ftraced_suspend && !ftraced_stop &&
1679 ftrace_update_code()) {
1680 usecs = nsecs_to_usecs(ftrace_update_time);
1681 if (ftrace_update_tot_cnt > 100000) {
1682 ftrace_update_tot_cnt = 0;
1683 pr_info("hm, dftrace overflow: %lu change%s"
1684 " (%lu total) in %lu usec%s\n",
1685 ftrace_update_cnt,
1686 ftrace_update_cnt != 1 ? "s" : "",
1687 ftrace_update_tot_cnt,
1688 usecs, usecs != 1 ? "s" : "");
1689 ftrace_disabled = 1;
1690 WARN_ON_ONCE(1);
1691 }
1692 }
1693 mutex_unlock(&ftraced_lock);
1694 mutex_unlock(&ftrace_sysctl_lock);
1695
1696 ftrace_shutdown_replenish();
1697 }
1698 __set_current_state(TASK_RUNNING);
1699 return 0;
1700}
1701 1342
1702static int __init ftrace_dynamic_init(void) 1343static int __init ftrace_nodyn_init(void)
1703{ 1344{
1704 struct task_struct *p; 1345 ftrace_enabled = 1;
1705 unsigned long addr;
1706 int ret;
1707
1708 addr = (unsigned long)ftrace_record_ip;
1709
1710 stop_machine(ftrace_dyn_arch_init, &addr, NULL);
1711
1712 /* ftrace_dyn_arch_init places the return code in addr */
1713 if (addr) {
1714 ret = (int)addr;
1715 goto failed;
1716 }
1717
1718 ret = ftrace_dyn_table_alloc(NR_TO_INIT);
1719 if (ret)
1720 goto failed;
1721
1722 p = kthread_run(ftraced, NULL, "ftraced");
1723 if (IS_ERR(p)) {
1724 ret = -1;
1725 goto failed;
1726 }
1727
1728 last_ftrace_enabled = ftrace_enabled = 1;
1729 ftraced_task = p;
1730
1731 return 0; 1346 return 0;
1732
1733 failed:
1734 ftrace_disabled = 1;
1735 return ret;
1736} 1347}
1348device_initcall(ftrace_nodyn_init);
1737 1349
1738core_initcall(ftrace_dynamic_init);
1739#endif /* CONFIG_FTRACE_MCOUNT_RECORD */
1740
1741#else
1742# define ftrace_startup() do { } while (0) 1350# define ftrace_startup() do { } while (0)
1743# define ftrace_shutdown() do { } while (0) 1351# define ftrace_shutdown() do { } while (0)
1744# define ftrace_startup_sysctl() do { } while (0) 1352# define ftrace_startup_sysctl() do { } while (0)
1745# define ftrace_shutdown_sysctl() do { } while (0) 1353# define ftrace_shutdown_sysctl() do { } while (0)
1746# define ftrace_force_shutdown() do { } while (0)
1747#endif /* CONFIG_DYNAMIC_FTRACE */ 1354#endif /* CONFIG_DYNAMIC_FTRACE */
1748 1355
1749/** 1356/**
1750 * ftrace_kill_atomic - kill ftrace from critical sections 1357 * ftrace_kill - kill ftrace
1751 * 1358 *
1752 * This function should be used by panic code. It stops ftrace 1359 * This function should be used by panic code. It stops ftrace
1753 * but in a not so nice way. If you need to simply kill ftrace 1360 * but in a not so nice way. If you need to simply kill ftrace
1754 * from a non-atomic section, use ftrace_kill. 1361 * from a non-atomic section, use ftrace_kill.
1755 */ 1362 */
1756void ftrace_kill_atomic(void)
1757{
1758 ftrace_disabled = 1;
1759 ftrace_enabled = 0;
1760#ifdef CONFIG_DYNAMIC_FTRACE
1761 ftraced_suspend = -1;
1762#endif
1763 clear_ftrace_function();
1764}
1765
1766/**
1767 * ftrace_kill - totally shutdown ftrace
1768 *
1769 * This is a safety measure. If something was detected that seems
1770 * wrong, calling this function will keep ftrace from doing
1771 * any more modifications, and updates.
1772 * used when something went wrong.
1773 */
1774void ftrace_kill(void) 1363void ftrace_kill(void)
1775{ 1364{
1776 mutex_lock(&ftrace_sysctl_lock);
1777 ftrace_disabled = 1; 1365 ftrace_disabled = 1;
1778 ftrace_enabled = 0; 1366 ftrace_enabled = 0;
1779
1780 clear_ftrace_function(); 1367 clear_ftrace_function();
1781 mutex_unlock(&ftrace_sysctl_lock);
1782
1783 /* Try to totally disable ftrace */
1784 ftrace_force_shutdown();
1785} 1368}
1786 1369
1787/** 1370/**
@@ -1870,3 +1453,4 @@ ftrace_enable_sysctl(struct ctl_table *table, int write,
1870 mutex_unlock(&ftrace_sysctl_lock); 1453 mutex_unlock(&ftrace_sysctl_lock);
1871 return ret; 1454 return ret;
1872} 1455}
1456
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 94af1fe56bb..cedf4e26828 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -130,7 +130,7 @@ struct buffer_page {
130static inline void free_buffer_page(struct buffer_page *bpage) 130static inline void free_buffer_page(struct buffer_page *bpage)
131{ 131{
132 if (bpage->page) 132 if (bpage->page)
133 __free_page(bpage->page); 133 free_page((unsigned long)bpage->page);
134 kfree(bpage); 134 kfree(bpage);
135} 135}
136 136
@@ -966,7 +966,9 @@ rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer,
966 if (unlikely(*delta > (1ULL << 59) && !once++)) { 966 if (unlikely(*delta > (1ULL << 59) && !once++)) {
967 printk(KERN_WARNING "Delta way too big! %llu" 967 printk(KERN_WARNING "Delta way too big! %llu"
968 " ts=%llu write stamp = %llu\n", 968 " ts=%llu write stamp = %llu\n",
969 *delta, *ts, cpu_buffer->write_stamp); 969 (unsigned long long)*delta,
970 (unsigned long long)*ts,
971 (unsigned long long)cpu_buffer->write_stamp);
970 WARN_ON(1); 972 WARN_ON(1);
971 } 973 }
972 974
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 47f46cbdd86..8741e5c4313 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -34,6 +34,7 @@
34 34
35#include <linux/stacktrace.h> 35#include <linux/stacktrace.h>
36#include <linux/ring_buffer.h> 36#include <linux/ring_buffer.h>
37#include <linux/irqflags.h>
37 38
38#include "trace.h" 39#include "trace.h"
39 40
@@ -873,7 +874,7 @@ ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3)
873 preempt_enable_notrace(); 874 preempt_enable_notrace();
874} 875}
875 876
876#ifdef CONFIG_FTRACE 877#ifdef CONFIG_FUNCTION_TRACER
877static void 878static void
878function_trace_call(unsigned long ip, unsigned long parent_ip) 879function_trace_call(unsigned long ip, unsigned long parent_ip)
879{ 880{
@@ -887,9 +888,6 @@ function_trace_call(unsigned long ip, unsigned long parent_ip)
887 if (unlikely(!ftrace_function_enabled)) 888 if (unlikely(!ftrace_function_enabled))
888 return; 889 return;
889 890
890 if (skip_trace(ip))
891 return;
892
893 pc = preempt_count(); 891 pc = preempt_count();
894 resched = need_resched(); 892 resched = need_resched();
895 preempt_disable_notrace(); 893 preempt_disable_notrace();
@@ -2401,9 +2399,10 @@ tracing_set_trace_write(struct file *filp, const char __user *ubuf,
2401 int i; 2399 int i;
2402 size_t ret; 2400 size_t ret;
2403 2401
2402 ret = cnt;
2403
2404 if (cnt > max_tracer_type_len) 2404 if (cnt > max_tracer_type_len)
2405 cnt = max_tracer_type_len; 2405 cnt = max_tracer_type_len;
2406 ret = cnt;
2407 2406
2408 if (copy_from_user(&buf, ubuf, cnt)) 2407 if (copy_from_user(&buf, ubuf, cnt))
2409 return -EFAULT; 2408 return -EFAULT;
@@ -2436,8 +2435,8 @@ tracing_set_trace_write(struct file *filp, const char __user *ubuf,
2436 out: 2435 out:
2437 mutex_unlock(&trace_types_lock); 2436 mutex_unlock(&trace_types_lock);
2438 2437
2439 if (ret == cnt) 2438 if (ret > 0)
2440 filp->f_pos += cnt; 2439 filp->f_pos += ret;
2441 2440
2442 return ret; 2441 return ret;
2443} 2442}
@@ -3120,7 +3119,7 @@ void ftrace_dump(void)
3120 dump_ran = 1; 3119 dump_ran = 1;
3121 3120
3122 /* No turning back! */ 3121 /* No turning back! */
3123 ftrace_kill_atomic(); 3122 ftrace_kill();
3124 3123
3125 for_each_tracing_cpu(cpu) { 3124 for_each_tracing_cpu(cpu) {
3126 atomic_inc(&global_trace.data[cpu]->disabled); 3125 atomic_inc(&global_trace.data[cpu]->disabled);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index f1f99572cde..6889ca48f1f 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -335,7 +335,7 @@ void update_max_tr_single(struct trace_array *tr,
335 335
336extern cycle_t ftrace_now(int cpu); 336extern cycle_t ftrace_now(int cpu);
337 337
338#ifdef CONFIG_FTRACE 338#ifdef CONFIG_FUNCTION_TRACER
339void tracing_start_function_trace(void); 339void tracing_start_function_trace(void);
340void tracing_stop_function_trace(void); 340void tracing_stop_function_trace(void);
341#else 341#else
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
index e90eb0c2c56..0f85a64003d 100644
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c
@@ -64,7 +64,7 @@ static void function_trace_ctrl_update(struct trace_array *tr)
64 64
65static struct tracer function_trace __read_mostly = 65static struct tracer function_trace __read_mostly =
66{ 66{
67 .name = "ftrace", 67 .name = "function",
68 .init = function_trace_init, 68 .init = function_trace_init,
69 .reset = function_trace_reset, 69 .reset = function_trace_reset,
70 .ctrl_update = function_trace_ctrl_update, 70 .ctrl_update = function_trace_ctrl_update,
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index a7db7f040ae..9c74071c10e 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -63,7 +63,7 @@ irq_trace(void)
63 */ 63 */
64static __cacheline_aligned_in_smp unsigned long max_sequence; 64static __cacheline_aligned_in_smp unsigned long max_sequence;
65 65
66#ifdef CONFIG_FTRACE 66#ifdef CONFIG_FUNCTION_TRACER
67/* 67/*
68 * irqsoff uses its own tracer function to keep the overhead down: 68 * irqsoff uses its own tracer function to keep the overhead down:
69 */ 69 */
@@ -104,7 +104,7 @@ static struct ftrace_ops trace_ops __read_mostly =
104{ 104{
105 .func = irqsoff_tracer_call, 105 .func = irqsoff_tracer_call,
106}; 106};
107#endif /* CONFIG_FTRACE */ 107#endif /* CONFIG_FUNCTION_TRACER */
108 108
109/* 109/*
110 * Should this new latency be reported/recorded? 110 * Should this new latency be reported/recorded?
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index fe4a252c236..3ae93f16b56 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -31,7 +31,7 @@ static raw_spinlock_t wakeup_lock =
31 31
32static void __wakeup_reset(struct trace_array *tr); 32static void __wakeup_reset(struct trace_array *tr);
33 33
34#ifdef CONFIG_FTRACE 34#ifdef CONFIG_FUNCTION_TRACER
35/* 35/*
36 * irqsoff uses its own tracer function to keep the overhead down: 36 * irqsoff uses its own tracer function to keep the overhead down:
37 */ 37 */
@@ -96,7 +96,7 @@ static struct ftrace_ops trace_ops __read_mostly =
96{ 96{
97 .func = wakeup_tracer_call, 97 .func = wakeup_tracer_call,
98}; 98};
99#endif /* CONFIG_FTRACE */ 99#endif /* CONFIG_FUNCTION_TRACER */
100 100
101/* 101/*
102 * Should this new latency be reported/recorded? 102 * Should this new latency be reported/recorded?
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index 09cf230d7ec..90bc752a758 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -70,7 +70,7 @@ static int trace_test_buffer(struct trace_array *tr, unsigned long *count)
70 return ret; 70 return ret;
71} 71}
72 72
73#ifdef CONFIG_FTRACE 73#ifdef CONFIG_FUNCTION_TRACER
74 74
75#ifdef CONFIG_DYNAMIC_FTRACE 75#ifdef CONFIG_DYNAMIC_FTRACE
76 76
@@ -99,13 +99,6 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace,
99 /* passed in by parameter to fool gcc from optimizing */ 99 /* passed in by parameter to fool gcc from optimizing */
100 func(); 100 func();
101 101
102 /* update the records */
103 ret = ftrace_force_update();
104 if (ret) {
105 printk(KERN_CONT ".. ftraced failed .. ");
106 return ret;
107 }
108
109 /* 102 /*
110 * Some archs *cough*PowerPC*cough* add charachters to the 103 * Some archs *cough*PowerPC*cough* add charachters to the
111 * start of the function names. We simply put a '*' to 104 * start of the function names. We simply put a '*' to
@@ -183,13 +176,6 @@ trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr)
183 /* make sure msleep has been recorded */ 176 /* make sure msleep has been recorded */
184 msleep(1); 177 msleep(1);
185 178
186 /* force the recorded functions to be traced */
187 ret = ftrace_force_update();
188 if (ret) {
189 printk(KERN_CONT ".. ftraced failed .. ");
190 return ret;
191 }
192
193 /* start the tracing */ 179 /* start the tracing */
194 ftrace_enabled = 1; 180 ftrace_enabled = 1;
195 tracer_enabled = 1; 181 tracer_enabled = 1;
@@ -226,7 +212,7 @@ trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr)
226 212
227 return ret; 213 return ret;
228} 214}
229#endif /* CONFIG_FTRACE */ 215#endif /* CONFIG_FUNCTION_TRACER */
230 216
231#ifdef CONFIG_IRQSOFF_TRACER 217#ifdef CONFIG_IRQSOFF_TRACER
232int 218int
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index 74c5d9a3afa..be682b62fe5 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -44,6 +44,10 @@ static inline void check_stack(void)
44 if (this_size <= max_stack_size) 44 if (this_size <= max_stack_size)
45 return; 45 return;
46 46
47 /* we do not handle interrupt stacks yet */
48 if (!object_is_on_stack(&this_size))
49 return;
50
47 raw_local_irq_save(flags); 51 raw_local_irq_save(flags);
48 __raw_spin_lock(&max_stack_lock); 52 __raw_spin_lock(&max_stack_lock);
49 53
diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c
index f2b7c28a470..af8c8566488 100644
--- a/kernel/tracepoint.c
+++ b/kernel/tracepoint.c
@@ -131,6 +131,9 @@ tracepoint_entry_remove_probe(struct tracepoint_entry *entry, void *probe)
131 131
132 old = entry->funcs; 132 old = entry->funcs;
133 133
134 if (!old)
135 return NULL;
136
134 debug_print_probes(entry); 137 debug_print_probes(entry);
135 /* (N -> M), (N > 1, M >= 0) probes */ 138 /* (N -> M), (N > 1, M >= 0) probes */
136 for (nr_probes = 0; old[nr_probes]; nr_probes++) { 139 for (nr_probes = 0; old[nr_probes]; nr_probes++) {
@@ -388,6 +391,11 @@ int tracepoint_probe_unregister(const char *name, void *probe)
388 if (entry->rcu_pending) 391 if (entry->rcu_pending)
389 rcu_barrier_sched(); 392 rcu_barrier_sched();
390 old = tracepoint_entry_remove_probe(entry, probe); 393 old = tracepoint_entry_remove_probe(entry, probe);
394 if (!old) {
395 printk(KERN_WARNING "Warning: Trying to unregister a probe"
396 "that doesn't exist\n");
397 goto end;
398 }
391 mutex_unlock(&tracepoints_mutex); 399 mutex_unlock(&tracepoints_mutex);
392 tracepoint_update_probes(); /* may update entry */ 400 tracepoint_update_probes(); /* may update entry */
393 mutex_lock(&tracepoints_mutex); 401 mutex_lock(&tracepoints_mutex);