aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2009-08-12 20:44:53 -0400
committerDavid S. Miller <davem@davemloft.net>2009-08-12 20:44:53 -0400
commitaa11d958d1a6572eda08214d7c6a735804fe48a5 (patch)
treed025b05270ad1e010660d17eeadc6ac3c1abbd7d /kernel
parent07f6642ee9418e962e54cbc07471cfe2e559c568 (diff)
parent9799218ae36910af50f002a5db1802d576fffb43 (diff)
Merge branch 'master' of master.kernel.org:/pub/scm/linux/kernel/git/davem/net-2.6
Conflicts: arch/microblaze/include/asm/socket.h
Diffstat (limited to 'kernel')
-rw-r--r--kernel/cgroup.c151
-rw-r--r--kernel/exit.c1
-rw-r--r--kernel/fork.c31
-rw-r--r--kernel/freezer.c7
-rw-r--r--kernel/futex.c1
-rw-r--r--kernel/hrtimer.c110
-rw-r--r--kernel/irq/internals.h3
-rw-r--r--kernel/irq/manage.c55
-rw-r--r--kernel/irq/migration.c2
-rw-r--r--kernel/kexec.c2
-rw-r--r--kernel/kmod.c1
-rw-r--r--kernel/kprobes.c8
-rw-r--r--kernel/kthread.c10
-rw-r--r--kernel/lockdep_proc.c3
-rw-r--r--kernel/module.c9
-rw-r--r--kernel/panic.c1
-rw-r--r--kernel/perf_counter.c311
-rw-r--r--kernel/pid.c7
-rw-r--r--kernel/posix-cpu-timers.c7
-rw-r--r--kernel/posix-timers.c7
-rw-r--r--kernel/power/user.c1
-rw-r--r--kernel/profile.c5
-rw-r--r--kernel/ptrace.c4
-rw-r--r--kernel/rcutree.c3
-rw-r--r--kernel/rtmutex.c4
-rw-r--r--kernel/sched.c61
-rw-r--r--kernel/sched_cpupri.c15
-rw-r--r--kernel/sched_fair.c45
-rw-r--r--kernel/sched_rt.c18
-rw-r--r--kernel/signal.c25
-rw-r--r--kernel/smp.c2
-rw-r--r--kernel/softirq.c64
-rw-r--r--kernel/time/clockevents.c11
-rw-r--r--kernel/time/clocksource.c2
-rw-r--r--kernel/timer.c2
-rw-r--r--kernel/trace/Kconfig6
-rw-r--r--kernel/trace/blktrace.c1
-rw-r--r--kernel/trace/ftrace.c28
-rw-r--r--kernel/trace/ring_buffer.c15
-rw-r--r--kernel/trace/trace.c14
-rw-r--r--kernel/trace/trace.h4
-rw-r--r--kernel/trace/trace_event_profile.c2
-rw-r--r--kernel/trace/trace_event_types.h3
-rw-r--r--kernel/trace/trace_events.c4
-rw-r--r--kernel/trace/trace_events_filter.c20
-rw-r--r--kernel/trace/trace_functions.c2
-rw-r--r--kernel/trace/trace_functions_graph.c11
-rw-r--r--kernel/trace/trace_output.c3
-rw-r--r--kernel/trace/trace_printk.c2
-rw-r--r--kernel/trace/trace_stack.c11
-rw-r--r--kernel/trace/trace_stat.c34
51 files changed, 746 insertions, 403 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 3737a682cdf5..b6eadfe30e7b 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -47,6 +47,7 @@
47#include <linux/hash.h> 47#include <linux/hash.h>
48#include <linux/namei.h> 48#include <linux/namei.h>
49#include <linux/smp_lock.h> 49#include <linux/smp_lock.h>
50#include <linux/pid_namespace.h>
50 51
51#include <asm/atomic.h> 52#include <asm/atomic.h>
52 53
@@ -734,16 +735,28 @@ static void cgroup_d_remove_dir(struct dentry *dentry)
734 * reference to css->refcnt. In general, this refcnt is expected to goes down 735 * reference to css->refcnt. In general, this refcnt is expected to goes down
735 * to zero, soon. 736 * to zero, soon.
736 * 737 *
737 * CGRP_WAIT_ON_RMDIR flag is modified under cgroup's inode->i_mutex; 738 * CGRP_WAIT_ON_RMDIR flag is set under cgroup's inode->i_mutex;
738 */ 739 */
739DECLARE_WAIT_QUEUE_HEAD(cgroup_rmdir_waitq); 740DECLARE_WAIT_QUEUE_HEAD(cgroup_rmdir_waitq);
740 741
741static void cgroup_wakeup_rmdir_waiters(const struct cgroup *cgrp) 742static void cgroup_wakeup_rmdir_waiter(struct cgroup *cgrp)
742{ 743{
743 if (unlikely(test_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags))) 744 if (unlikely(test_and_clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags)))
744 wake_up_all(&cgroup_rmdir_waitq); 745 wake_up_all(&cgroup_rmdir_waitq);
745} 746}
746 747
748void cgroup_exclude_rmdir(struct cgroup_subsys_state *css)
749{
750 css_get(css);
751}
752
753void cgroup_release_and_wakeup_rmdir(struct cgroup_subsys_state *css)
754{
755 cgroup_wakeup_rmdir_waiter(css->cgroup);
756 css_put(css);
757}
758
759
747static int rebind_subsystems(struct cgroupfs_root *root, 760static int rebind_subsystems(struct cgroupfs_root *root,
748 unsigned long final_bits) 761 unsigned long final_bits)
749{ 762{
@@ -960,6 +973,7 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp)
960 INIT_LIST_HEAD(&cgrp->children); 973 INIT_LIST_HEAD(&cgrp->children);
961 INIT_LIST_HEAD(&cgrp->css_sets); 974 INIT_LIST_HEAD(&cgrp->css_sets);
962 INIT_LIST_HEAD(&cgrp->release_list); 975 INIT_LIST_HEAD(&cgrp->release_list);
976 INIT_LIST_HEAD(&cgrp->pids_list);
963 init_rwsem(&cgrp->pids_mutex); 977 init_rwsem(&cgrp->pids_mutex);
964} 978}
965static void init_cgroup_root(struct cgroupfs_root *root) 979static void init_cgroup_root(struct cgroupfs_root *root)
@@ -1357,7 +1371,7 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
1357 * wake up rmdir() waiter. the rmdir should fail since the cgroup 1371 * wake up rmdir() waiter. the rmdir should fail since the cgroup
1358 * is no longer empty. 1372 * is no longer empty.
1359 */ 1373 */
1360 cgroup_wakeup_rmdir_waiters(cgrp); 1374 cgroup_wakeup_rmdir_waiter(cgrp);
1361 return 0; 1375 return 0;
1362} 1376}
1363 1377
@@ -2201,12 +2215,30 @@ err:
2201 return ret; 2215 return ret;
2202} 2216}
2203 2217
2218/*
2219 * Cache pids for all threads in the same pid namespace that are
2220 * opening the same "tasks" file.
2221 */
2222struct cgroup_pids {
2223 /* The node in cgrp->pids_list */
2224 struct list_head list;
2225 /* The cgroup those pids belong to */
2226 struct cgroup *cgrp;
2227 /* The namepsace those pids belong to */
2228 struct pid_namespace *ns;
2229 /* Array of process ids in the cgroup */
2230 pid_t *tasks_pids;
2231 /* How many files are using the this tasks_pids array */
2232 int use_count;
2233 /* Length of the current tasks_pids array */
2234 int length;
2235};
2236
2204static int cmppid(const void *a, const void *b) 2237static int cmppid(const void *a, const void *b)
2205{ 2238{
2206 return *(pid_t *)a - *(pid_t *)b; 2239 return *(pid_t *)a - *(pid_t *)b;
2207} 2240}
2208 2241
2209
2210/* 2242/*
2211 * seq_file methods for the "tasks" file. The seq_file position is the 2243 * seq_file methods for the "tasks" file. The seq_file position is the
2212 * next pid to display; the seq_file iterator is a pointer to the pid 2244 * next pid to display; the seq_file iterator is a pointer to the pid
@@ -2221,45 +2253,47 @@ static void *cgroup_tasks_start(struct seq_file *s, loff_t *pos)
2221 * after a seek to the start). Use a binary-search to find the 2253 * after a seek to the start). Use a binary-search to find the
2222 * next pid to display, if any 2254 * next pid to display, if any
2223 */ 2255 */
2224 struct cgroup *cgrp = s->private; 2256 struct cgroup_pids *cp = s->private;
2257 struct cgroup *cgrp = cp->cgrp;
2225 int index = 0, pid = *pos; 2258 int index = 0, pid = *pos;
2226 int *iter; 2259 int *iter;
2227 2260
2228 down_read(&cgrp->pids_mutex); 2261 down_read(&cgrp->pids_mutex);
2229 if (pid) { 2262 if (pid) {
2230 int end = cgrp->pids_length; 2263 int end = cp->length;
2231 2264
2232 while (index < end) { 2265 while (index < end) {
2233 int mid = (index + end) / 2; 2266 int mid = (index + end) / 2;
2234 if (cgrp->tasks_pids[mid] == pid) { 2267 if (cp->tasks_pids[mid] == pid) {
2235 index = mid; 2268 index = mid;
2236 break; 2269 break;
2237 } else if (cgrp->tasks_pids[mid] <= pid) 2270 } else if (cp->tasks_pids[mid] <= pid)
2238 index = mid + 1; 2271 index = mid + 1;
2239 else 2272 else
2240 end = mid; 2273 end = mid;
2241 } 2274 }
2242 } 2275 }
2243 /* If we're off the end of the array, we're done */ 2276 /* If we're off the end of the array, we're done */
2244 if (index >= cgrp->pids_length) 2277 if (index >= cp->length)
2245 return NULL; 2278 return NULL;
2246 /* Update the abstract position to be the actual pid that we found */ 2279 /* Update the abstract position to be the actual pid that we found */
2247 iter = cgrp->tasks_pids + index; 2280 iter = cp->tasks_pids + index;
2248 *pos = *iter; 2281 *pos = *iter;
2249 return iter; 2282 return iter;
2250} 2283}
2251 2284
2252static void cgroup_tasks_stop(struct seq_file *s, void *v) 2285static void cgroup_tasks_stop(struct seq_file *s, void *v)
2253{ 2286{
2254 struct cgroup *cgrp = s->private; 2287 struct cgroup_pids *cp = s->private;
2288 struct cgroup *cgrp = cp->cgrp;
2255 up_read(&cgrp->pids_mutex); 2289 up_read(&cgrp->pids_mutex);
2256} 2290}
2257 2291
2258static void *cgroup_tasks_next(struct seq_file *s, void *v, loff_t *pos) 2292static void *cgroup_tasks_next(struct seq_file *s, void *v, loff_t *pos)
2259{ 2293{
2260 struct cgroup *cgrp = s->private; 2294 struct cgroup_pids *cp = s->private;
2261 int *p = v; 2295 int *p = v;
2262 int *end = cgrp->tasks_pids + cgrp->pids_length; 2296 int *end = cp->tasks_pids + cp->length;
2263 2297
2264 /* 2298 /*
2265 * Advance to the next pid in the array. If this goes off the 2299 * Advance to the next pid in the array. If this goes off the
@@ -2286,26 +2320,33 @@ static struct seq_operations cgroup_tasks_seq_operations = {
2286 .show = cgroup_tasks_show, 2320 .show = cgroup_tasks_show,
2287}; 2321};
2288 2322
2289static void release_cgroup_pid_array(struct cgroup *cgrp) 2323static void release_cgroup_pid_array(struct cgroup_pids *cp)
2290{ 2324{
2325 struct cgroup *cgrp = cp->cgrp;
2326
2291 down_write(&cgrp->pids_mutex); 2327 down_write(&cgrp->pids_mutex);
2292 BUG_ON(!cgrp->pids_use_count); 2328 BUG_ON(!cp->use_count);
2293 if (!--cgrp->pids_use_count) { 2329 if (!--cp->use_count) {
2294 kfree(cgrp->tasks_pids); 2330 list_del(&cp->list);
2295 cgrp->tasks_pids = NULL; 2331 put_pid_ns(cp->ns);
2296 cgrp->pids_length = 0; 2332 kfree(cp->tasks_pids);
2333 kfree(cp);
2297 } 2334 }
2298 up_write(&cgrp->pids_mutex); 2335 up_write(&cgrp->pids_mutex);
2299} 2336}
2300 2337
2301static int cgroup_tasks_release(struct inode *inode, struct file *file) 2338static int cgroup_tasks_release(struct inode *inode, struct file *file)
2302{ 2339{
2303 struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent); 2340 struct seq_file *seq;
2341 struct cgroup_pids *cp;
2304 2342
2305 if (!(file->f_mode & FMODE_READ)) 2343 if (!(file->f_mode & FMODE_READ))
2306 return 0; 2344 return 0;
2307 2345
2308 release_cgroup_pid_array(cgrp); 2346 seq = file->private_data;
2347 cp = seq->private;
2348
2349 release_cgroup_pid_array(cp);
2309 return seq_release(inode, file); 2350 return seq_release(inode, file);
2310} 2351}
2311 2352
@@ -2324,6 +2365,8 @@ static struct file_operations cgroup_tasks_operations = {
2324static int cgroup_tasks_open(struct inode *unused, struct file *file) 2365static int cgroup_tasks_open(struct inode *unused, struct file *file)
2325{ 2366{
2326 struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent); 2367 struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
2368 struct pid_namespace *ns = current->nsproxy->pid_ns;
2369 struct cgroup_pids *cp;
2327 pid_t *pidarray; 2370 pid_t *pidarray;
2328 int npids; 2371 int npids;
2329 int retval; 2372 int retval;
@@ -2350,20 +2393,37 @@ static int cgroup_tasks_open(struct inode *unused, struct file *file)
2350 * array if necessary 2393 * array if necessary
2351 */ 2394 */
2352 down_write(&cgrp->pids_mutex); 2395 down_write(&cgrp->pids_mutex);
2353 kfree(cgrp->tasks_pids); 2396
2354 cgrp->tasks_pids = pidarray; 2397 list_for_each_entry(cp, &cgrp->pids_list, list) {
2355 cgrp->pids_length = npids; 2398 if (ns == cp->ns)
2356 cgrp->pids_use_count++; 2399 goto found;
2400 }
2401
2402 cp = kzalloc(sizeof(*cp), GFP_KERNEL);
2403 if (!cp) {
2404 up_write(&cgrp->pids_mutex);
2405 kfree(pidarray);
2406 return -ENOMEM;
2407 }
2408 cp->cgrp = cgrp;
2409 cp->ns = ns;
2410 get_pid_ns(ns);
2411 list_add(&cp->list, &cgrp->pids_list);
2412found:
2413 kfree(cp->tasks_pids);
2414 cp->tasks_pids = pidarray;
2415 cp->length = npids;
2416 cp->use_count++;
2357 up_write(&cgrp->pids_mutex); 2417 up_write(&cgrp->pids_mutex);
2358 2418
2359 file->f_op = &cgroup_tasks_operations; 2419 file->f_op = &cgroup_tasks_operations;
2360 2420
2361 retval = seq_open(file, &cgroup_tasks_seq_operations); 2421 retval = seq_open(file, &cgroup_tasks_seq_operations);
2362 if (retval) { 2422 if (retval) {
2363 release_cgroup_pid_array(cgrp); 2423 release_cgroup_pid_array(cp);
2364 return retval; 2424 return retval;
2365 } 2425 }
2366 ((struct seq_file *)file->private_data)->private = cgrp; 2426 ((struct seq_file *)file->private_data)->private = cp;
2367 return 0; 2427 return 0;
2368} 2428}
2369 2429
@@ -2696,33 +2756,42 @@ again:
2696 mutex_unlock(&cgroup_mutex); 2756 mutex_unlock(&cgroup_mutex);
2697 2757
2698 /* 2758 /*
2759 * In general, subsystem has no css->refcnt after pre_destroy(). But
2760 * in racy cases, subsystem may have to get css->refcnt after
2761 * pre_destroy() and it makes rmdir return with -EBUSY. This sometimes
2762 * make rmdir return -EBUSY too often. To avoid that, we use waitqueue
2763 * for cgroup's rmdir. CGRP_WAIT_ON_RMDIR is for synchronizing rmdir
2764 * and subsystem's reference count handling. Please see css_get/put
2765 * and css_tryget() and cgroup_wakeup_rmdir_waiter() implementation.
2766 */
2767 set_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
2768
2769 /*
2699 * Call pre_destroy handlers of subsys. Notify subsystems 2770 * Call pre_destroy handlers of subsys. Notify subsystems
2700 * that rmdir() request comes. 2771 * that rmdir() request comes.
2701 */ 2772 */
2702 ret = cgroup_call_pre_destroy(cgrp); 2773 ret = cgroup_call_pre_destroy(cgrp);
2703 if (ret) 2774 if (ret) {
2775 clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
2704 return ret; 2776 return ret;
2777 }
2705 2778
2706 mutex_lock(&cgroup_mutex); 2779 mutex_lock(&cgroup_mutex);
2707 parent = cgrp->parent; 2780 parent = cgrp->parent;
2708 if (atomic_read(&cgrp->count) || !list_empty(&cgrp->children)) { 2781 if (atomic_read(&cgrp->count) || !list_empty(&cgrp->children)) {
2782 clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
2709 mutex_unlock(&cgroup_mutex); 2783 mutex_unlock(&cgroup_mutex);
2710 return -EBUSY; 2784 return -EBUSY;
2711 } 2785 }
2712 /*
2713 * css_put/get is provided for subsys to grab refcnt to css. In typical
2714 * case, subsystem has no reference after pre_destroy(). But, under
2715 * hierarchy management, some *temporal* refcnt can be hold.
2716 * To avoid returning -EBUSY to a user, waitqueue is used. If subsys
2717 * is really busy, it should return -EBUSY at pre_destroy(). wake_up
2718 * is called when css_put() is called and refcnt goes down to 0.
2719 */
2720 set_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
2721 prepare_to_wait(&cgroup_rmdir_waitq, &wait, TASK_INTERRUPTIBLE); 2786 prepare_to_wait(&cgroup_rmdir_waitq, &wait, TASK_INTERRUPTIBLE);
2722
2723 if (!cgroup_clear_css_refs(cgrp)) { 2787 if (!cgroup_clear_css_refs(cgrp)) {
2724 mutex_unlock(&cgroup_mutex); 2788 mutex_unlock(&cgroup_mutex);
2725 schedule(); 2789 /*
2790 * Because someone may call cgroup_wakeup_rmdir_waiter() before
2791 * prepare_to_wait(), we need to check this flag.
2792 */
2793 if (test_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags))
2794 schedule();
2726 finish_wait(&cgroup_rmdir_waitq, &wait); 2795 finish_wait(&cgroup_rmdir_waitq, &wait);
2727 clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags); 2796 clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
2728 if (signal_pending(current)) 2797 if (signal_pending(current))
@@ -3294,7 +3363,7 @@ void __css_put(struct cgroup_subsys_state *css)
3294 set_bit(CGRP_RELEASABLE, &cgrp->flags); 3363 set_bit(CGRP_RELEASABLE, &cgrp->flags);
3295 check_for_release(cgrp); 3364 check_for_release(cgrp);
3296 } 3365 }
3297 cgroup_wakeup_rmdir_waiters(cgrp); 3366 cgroup_wakeup_rmdir_waiter(cgrp);
3298 } 3367 }
3299 rcu_read_unlock(); 3368 rcu_read_unlock();
3300} 3369}
diff --git a/kernel/exit.c b/kernel/exit.c
index 628d41f0dd54..869dc221733e 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -12,7 +12,6 @@
12#include <linux/completion.h> 12#include <linux/completion.h>
13#include <linux/personality.h> 13#include <linux/personality.h>
14#include <linux/tty.h> 14#include <linux/tty.h>
15#include <linux/mnt_namespace.h>
16#include <linux/iocontext.h> 15#include <linux/iocontext.h>
17#include <linux/key.h> 16#include <linux/key.h>
18#include <linux/security.h> 17#include <linux/security.h>
diff --git a/kernel/fork.c b/kernel/fork.c
index 467746b3f0aa..021e1138556e 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -17,7 +17,6 @@
17#include <linux/module.h> 17#include <linux/module.h>
18#include <linux/vmalloc.h> 18#include <linux/vmalloc.h>
19#include <linux/completion.h> 19#include <linux/completion.h>
20#include <linux/mnt_namespace.h>
21#include <linux/personality.h> 20#include <linux/personality.h>
22#include <linux/mempolicy.h> 21#include <linux/mempolicy.h>
23#include <linux/sem.h> 22#include <linux/sem.h>
@@ -427,6 +426,7 @@ static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p)
427 init_rwsem(&mm->mmap_sem); 426 init_rwsem(&mm->mmap_sem);
428 INIT_LIST_HEAD(&mm->mmlist); 427 INIT_LIST_HEAD(&mm->mmlist);
429 mm->flags = (current->mm) ? current->mm->flags : default_dump_filter; 428 mm->flags = (current->mm) ? current->mm->flags : default_dump_filter;
429 mm->oom_adj = (current->mm) ? current->mm->oom_adj : 0;
430 mm->core_state = NULL; 430 mm->core_state = NULL;
431 mm->nr_ptes = 0; 431 mm->nr_ptes = 0;
432 set_mm_counter(mm, file_rss, 0); 432 set_mm_counter(mm, file_rss, 0);
@@ -568,18 +568,18 @@ void mm_release(struct task_struct *tsk, struct mm_struct *mm)
568 * the value intact in a core dump, and to save the unnecessary 568 * the value intact in a core dump, and to save the unnecessary
569 * trouble otherwise. Userland only wants this done for a sys_exit. 569 * trouble otherwise. Userland only wants this done for a sys_exit.
570 */ 570 */
571 if (tsk->clear_child_tid 571 if (tsk->clear_child_tid) {
572 && !(tsk->flags & PF_SIGNALED) 572 if (!(tsk->flags & PF_SIGNALED) &&
573 && atomic_read(&mm->mm_users) > 1) { 573 atomic_read(&mm->mm_users) > 1) {
574 u32 __user * tidptr = tsk->clear_child_tid; 574 /*
575 * We don't check the error code - if userspace has
576 * not set up a proper pointer then tough luck.
577 */
578 put_user(0, tsk->clear_child_tid);
579 sys_futex(tsk->clear_child_tid, FUTEX_WAKE,
580 1, NULL, NULL, 0);
581 }
575 tsk->clear_child_tid = NULL; 582 tsk->clear_child_tid = NULL;
576
577 /*
578 * We don't check the error code - if userspace has
579 * not set up a proper pointer then tough luck.
580 */
581 put_user(0, tidptr);
582 sys_futex(tidptr, FUTEX_WAKE, 1, NULL, NULL, 0);
583 } 583 }
584} 584}
585 585
@@ -1269,6 +1269,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1269 write_unlock_irq(&tasklist_lock); 1269 write_unlock_irq(&tasklist_lock);
1270 proc_fork_connector(p); 1270 proc_fork_connector(p);
1271 cgroup_post_fork(p); 1271 cgroup_post_fork(p);
1272 perf_counter_fork(p);
1272 return p; 1273 return p;
1273 1274
1274bad_fork_free_pid: 1275bad_fork_free_pid:
@@ -1408,12 +1409,6 @@ long do_fork(unsigned long clone_flags,
1408 if (clone_flags & CLONE_VFORK) { 1409 if (clone_flags & CLONE_VFORK) {
1409 p->vfork_done = &vfork; 1410 p->vfork_done = &vfork;
1410 init_completion(&vfork); 1411 init_completion(&vfork);
1411 } else if (!(clone_flags & CLONE_VM)) {
1412 /*
1413 * vfork will do an exec which will call
1414 * set_task_comm()
1415 */
1416 perf_counter_fork(p);
1417 } 1412 }
1418 1413
1419 audit_finish_fork(p); 1414 audit_finish_fork(p);
diff --git a/kernel/freezer.c b/kernel/freezer.c
index 2f4936cf7083..bd1d42b17cb2 100644
--- a/kernel/freezer.c
+++ b/kernel/freezer.c
@@ -44,12 +44,19 @@ void refrigerator(void)
44 recalc_sigpending(); /* We sent fake signal, clean it up */ 44 recalc_sigpending(); /* We sent fake signal, clean it up */
45 spin_unlock_irq(&current->sighand->siglock); 45 spin_unlock_irq(&current->sighand->siglock);
46 46
47 /* prevent accounting of that task to load */
48 current->flags |= PF_FREEZING;
49
47 for (;;) { 50 for (;;) {
48 set_current_state(TASK_UNINTERRUPTIBLE); 51 set_current_state(TASK_UNINTERRUPTIBLE);
49 if (!frozen(current)) 52 if (!frozen(current))
50 break; 53 break;
51 schedule(); 54 schedule();
52 } 55 }
56
57 /* Remove the accounting blocker */
58 current->flags &= ~PF_FREEZING;
59
53 pr_debug("%s left refrigerator\n", current->comm); 60 pr_debug("%s left refrigerator\n", current->comm);
54 __set_current_state(save); 61 __set_current_state(save);
55} 62}
diff --git a/kernel/futex.c b/kernel/futex.c
index 794c862125fe..0672ff88f159 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -247,6 +247,7 @@ again:
247 if (err < 0) 247 if (err < 0)
248 return err; 248 return err;
249 249
250 page = compound_head(page);
250 lock_page(page); 251 lock_page(page);
251 if (!page->mapping) { 252 if (!page->mapping) {
252 unlock_page(page); 253 unlock_page(page);
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 9002958a96e7..49da79ab8486 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -191,6 +191,46 @@ struct hrtimer_clock_base *lock_hrtimer_base(const struct hrtimer *timer,
191 } 191 }
192} 192}
193 193
194
195/*
196 * Get the preferred target CPU for NOHZ
197 */
198static int hrtimer_get_target(int this_cpu, int pinned)
199{
200#ifdef CONFIG_NO_HZ
201 if (!pinned && get_sysctl_timer_migration() && idle_cpu(this_cpu)) {
202 int preferred_cpu = get_nohz_load_balancer();
203
204 if (preferred_cpu >= 0)
205 return preferred_cpu;
206 }
207#endif
208 return this_cpu;
209}
210
211/*
212 * With HIGHRES=y we do not migrate the timer when it is expiring
213 * before the next event on the target cpu because we cannot reprogram
214 * the target cpu hardware and we would cause it to fire late.
215 *
216 * Called with cpu_base->lock of target cpu held.
217 */
218static int
219hrtimer_check_target(struct hrtimer *timer, struct hrtimer_clock_base *new_base)
220{
221#ifdef CONFIG_HIGH_RES_TIMERS
222 ktime_t expires;
223
224 if (!new_base->cpu_base->hres_active)
225 return 0;
226
227 expires = ktime_sub(hrtimer_get_expires(timer), new_base->offset);
228 return expires.tv64 <= new_base->cpu_base->expires_next.tv64;
229#else
230 return 0;
231#endif
232}
233
194/* 234/*
195 * Switch the timer base to the current CPU when possible. 235 * Switch the timer base to the current CPU when possible.
196 */ 236 */
@@ -200,16 +240,8 @@ switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base,
200{ 240{
201 struct hrtimer_clock_base *new_base; 241 struct hrtimer_clock_base *new_base;
202 struct hrtimer_cpu_base *new_cpu_base; 242 struct hrtimer_cpu_base *new_cpu_base;
203 int cpu, preferred_cpu = -1; 243 int this_cpu = smp_processor_id();
204 244 int cpu = hrtimer_get_target(this_cpu, pinned);
205 cpu = smp_processor_id();
206#if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP)
207 if (!pinned && get_sysctl_timer_migration() && idle_cpu(cpu)) {
208 preferred_cpu = get_nohz_load_balancer();
209 if (preferred_cpu >= 0)
210 cpu = preferred_cpu;
211 }
212#endif
213 245
214again: 246again:
215 new_cpu_base = &per_cpu(hrtimer_bases, cpu); 247 new_cpu_base = &per_cpu(hrtimer_bases, cpu);
@@ -217,7 +249,7 @@ again:
217 249
218 if (base != new_base) { 250 if (base != new_base) {
219 /* 251 /*
220 * We are trying to schedule the timer on the local CPU. 252 * We are trying to move timer to new_base.
221 * However we can't change timer's base while it is running, 253 * However we can't change timer's base while it is running,
222 * so we keep it on the same CPU. No hassle vs. reprogramming 254 * so we keep it on the same CPU. No hassle vs. reprogramming
223 * the event source in the high resolution case. The softirq 255 * the event source in the high resolution case. The softirq
@@ -233,38 +265,12 @@ again:
233 spin_unlock(&base->cpu_base->lock); 265 spin_unlock(&base->cpu_base->lock);
234 spin_lock(&new_base->cpu_base->lock); 266 spin_lock(&new_base->cpu_base->lock);
235 267
236 /* Optimized away for NOHZ=n SMP=n */ 268 if (cpu != this_cpu && hrtimer_check_target(timer, new_base)) {
237 if (cpu == preferred_cpu) { 269 cpu = this_cpu;
238 /* Calculate clock monotonic expiry time */ 270 spin_unlock(&new_base->cpu_base->lock);
239#ifdef CONFIG_HIGH_RES_TIMERS 271 spin_lock(&base->cpu_base->lock);
240 ktime_t expires = ktime_sub(hrtimer_get_expires(timer), 272 timer->base = base;
241 new_base->offset); 273 goto again;
242#else
243 ktime_t expires = hrtimer_get_expires(timer);
244#endif
245
246 /*
247 * Get the next event on target cpu from the
248 * clock events layer.
249 * This covers the highres=off nohz=on case as well.
250 */
251 ktime_t next = clockevents_get_next_event(cpu);
252
253 ktime_t delta = ktime_sub(expires, next);
254
255 /*
256 * We do not migrate the timer when it is expiring
257 * before the next event on the target cpu because
258 * we cannot reprogram the target cpu hardware and
259 * we would cause it to fire late.
260 */
261 if (delta.tv64 < 0) {
262 cpu = smp_processor_id();
263 spin_unlock(&new_base->cpu_base->lock);
264 spin_lock(&base->cpu_base->lock);
265 timer->base = base;
266 goto again;
267 }
268 } 274 }
269 timer->base = new_base; 275 timer->base = new_base;
270 } 276 }
@@ -1276,14 +1282,22 @@ void hrtimer_interrupt(struct clock_event_device *dev)
1276 1282
1277 expires_next.tv64 = KTIME_MAX; 1283 expires_next.tv64 = KTIME_MAX;
1278 1284
1285 spin_lock(&cpu_base->lock);
1286 /*
1287 * We set expires_next to KTIME_MAX here with cpu_base->lock
1288 * held to prevent that a timer is enqueued in our queue via
1289 * the migration code. This does not affect enqueueing of
1290 * timers which run their callback and need to be requeued on
1291 * this CPU.
1292 */
1293 cpu_base->expires_next.tv64 = KTIME_MAX;
1294
1279 base = cpu_base->clock_base; 1295 base = cpu_base->clock_base;
1280 1296
1281 for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) { 1297 for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
1282 ktime_t basenow; 1298 ktime_t basenow;
1283 struct rb_node *node; 1299 struct rb_node *node;
1284 1300
1285 spin_lock(&cpu_base->lock);
1286
1287 basenow = ktime_add(now, base->offset); 1301 basenow = ktime_add(now, base->offset);
1288 1302
1289 while ((node = base->first)) { 1303 while ((node = base->first)) {
@@ -1316,11 +1330,15 @@ void hrtimer_interrupt(struct clock_event_device *dev)
1316 1330
1317 __run_hrtimer(timer); 1331 __run_hrtimer(timer);
1318 } 1332 }
1319 spin_unlock(&cpu_base->lock);
1320 base++; 1333 base++;
1321 } 1334 }
1322 1335
1336 /*
1337 * Store the new expiry value so the migration code can verify
1338 * against it.
1339 */
1323 cpu_base->expires_next = expires_next; 1340 cpu_base->expires_next = expires_next;
1341 spin_unlock(&cpu_base->lock);
1324 1342
1325 /* Reprogramming necessary ? */ 1343 /* Reprogramming necessary ? */
1326 if (expires_next.tv64 != KTIME_MAX) { 1344 if (expires_next.tv64 != KTIME_MAX) {
diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h
index 73468253143b..e70ed5592eb9 100644
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -42,8 +42,7 @@ static inline void unregister_handler_proc(unsigned int irq,
42 42
43extern int irq_select_affinity_usr(unsigned int irq); 43extern int irq_select_affinity_usr(unsigned int irq);
44 44
45extern void 45extern void irq_set_thread_affinity(struct irq_desc *desc);
46irq_set_thread_affinity(struct irq_desc *desc, const struct cpumask *cpumask);
47 46
48/* 47/*
49 * Debugging printout: 48 * Debugging printout:
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 50da67672901..61c679db4687 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -80,14 +80,22 @@ int irq_can_set_affinity(unsigned int irq)
80 return 1; 80 return 1;
81} 81}
82 82
83void 83/**
84irq_set_thread_affinity(struct irq_desc *desc, const struct cpumask *cpumask) 84 * irq_set_thread_affinity - Notify irq threads to adjust affinity
85 * @desc: irq descriptor which has affitnity changed
86 *
87 * We just set IRQTF_AFFINITY and delegate the affinity setting
88 * to the interrupt thread itself. We can not call
89 * set_cpus_allowed_ptr() here as we hold desc->lock and this
90 * code can be called from hard interrupt context.
91 */
92void irq_set_thread_affinity(struct irq_desc *desc)
85{ 93{
86 struct irqaction *action = desc->action; 94 struct irqaction *action = desc->action;
87 95
88 while (action) { 96 while (action) {
89 if (action->thread) 97 if (action->thread)
90 set_cpus_allowed_ptr(action->thread, cpumask); 98 set_bit(IRQTF_AFFINITY, &action->thread_flags);
91 action = action->next; 99 action = action->next;
92 } 100 }
93} 101}
@@ -112,7 +120,7 @@ int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask)
112 if (desc->status & IRQ_MOVE_PCNTXT) { 120 if (desc->status & IRQ_MOVE_PCNTXT) {
113 if (!desc->chip->set_affinity(irq, cpumask)) { 121 if (!desc->chip->set_affinity(irq, cpumask)) {
114 cpumask_copy(desc->affinity, cpumask); 122 cpumask_copy(desc->affinity, cpumask);
115 irq_set_thread_affinity(desc, cpumask); 123 irq_set_thread_affinity(desc);
116 } 124 }
117 } 125 }
118 else { 126 else {
@@ -122,7 +130,7 @@ int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask)
122#else 130#else
123 if (!desc->chip->set_affinity(irq, cpumask)) { 131 if (!desc->chip->set_affinity(irq, cpumask)) {
124 cpumask_copy(desc->affinity, cpumask); 132 cpumask_copy(desc->affinity, cpumask);
125 irq_set_thread_affinity(desc, cpumask); 133 irq_set_thread_affinity(desc);
126 } 134 }
127#endif 135#endif
128 desc->status |= IRQ_AFFINITY_SET; 136 desc->status |= IRQ_AFFINITY_SET;
@@ -176,7 +184,7 @@ int irq_select_affinity_usr(unsigned int irq)
176 spin_lock_irqsave(&desc->lock, flags); 184 spin_lock_irqsave(&desc->lock, flags);
177 ret = setup_affinity(irq, desc); 185 ret = setup_affinity(irq, desc);
178 if (!ret) 186 if (!ret)
179 irq_set_thread_affinity(desc, desc->affinity); 187 irq_set_thread_affinity(desc);
180 spin_unlock_irqrestore(&desc->lock, flags); 188 spin_unlock_irqrestore(&desc->lock, flags);
181 189
182 return ret; 190 return ret;
@@ -443,6 +451,39 @@ static int irq_wait_for_interrupt(struct irqaction *action)
443 return -1; 451 return -1;
444} 452}
445 453
454#ifdef CONFIG_SMP
455/*
456 * Check whether we need to change the affinity of the interrupt thread.
457 */
458static void
459irq_thread_check_affinity(struct irq_desc *desc, struct irqaction *action)
460{
461 cpumask_var_t mask;
462
463 if (!test_and_clear_bit(IRQTF_AFFINITY, &action->thread_flags))
464 return;
465
466 /*
467 * In case we are out of memory we set IRQTF_AFFINITY again and
468 * try again next time
469 */
470 if (!alloc_cpumask_var(&mask, GFP_KERNEL)) {
471 set_bit(IRQTF_AFFINITY, &action->thread_flags);
472 return;
473 }
474
475 spin_lock_irq(&desc->lock);
476 cpumask_copy(mask, desc->affinity);
477 spin_unlock_irq(&desc->lock);
478
479 set_cpus_allowed_ptr(current, mask);
480 free_cpumask_var(mask);
481}
482#else
483static inline void
484irq_thread_check_affinity(struct irq_desc *desc, struct irqaction *action) { }
485#endif
486
446/* 487/*
447 * Interrupt handler thread 488 * Interrupt handler thread
448 */ 489 */
@@ -458,6 +499,8 @@ static int irq_thread(void *data)
458 499
459 while (!irq_wait_for_interrupt(action)) { 500 while (!irq_wait_for_interrupt(action)) {
460 501
502 irq_thread_check_affinity(desc, action);
503
461 atomic_inc(&desc->threads_active); 504 atomic_inc(&desc->threads_active);
462 505
463 spin_lock_irq(&desc->lock); 506 spin_lock_irq(&desc->lock);
diff --git a/kernel/irq/migration.c b/kernel/irq/migration.c
index cfe767ca1545..fcb6c96f2627 100644
--- a/kernel/irq/migration.c
+++ b/kernel/irq/migration.c
@@ -45,7 +45,7 @@ void move_masked_irq(int irq)
45 < nr_cpu_ids)) 45 < nr_cpu_ids))
46 if (!desc->chip->set_affinity(irq, desc->pending_mask)) { 46 if (!desc->chip->set_affinity(irq, desc->pending_mask)) {
47 cpumask_copy(desc->affinity, desc->pending_mask); 47 cpumask_copy(desc->affinity, desc->pending_mask);
48 irq_set_thread_affinity(desc, desc->pending_mask); 48 irq_set_thread_affinity(desc);
49 } 49 }
50 50
51 cpumask_clear(desc->pending_mask); 51 cpumask_clear(desc->pending_mask);
diff --git a/kernel/kexec.c b/kernel/kexec.c
index ae1c35201cc8..f336e2107f98 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -1228,7 +1228,7 @@ static int __init parse_crashkernel_mem(char *cmdline,
1228 } while (*cur++ == ','); 1228 } while (*cur++ == ',');
1229 1229
1230 if (*crash_size > 0) { 1230 if (*crash_size > 0) {
1231 while (*cur != ' ' && *cur != '@') 1231 while (*cur && *cur != ' ' && *cur != '@')
1232 cur++; 1232 cur++;
1233 if (*cur == '@') { 1233 if (*cur == '@') {
1234 cur++; 1234 cur++;
diff --git a/kernel/kmod.c b/kernel/kmod.c
index 7e95bedb2bfc..385c31a1bdbf 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -24,7 +24,6 @@
24#include <linux/unistd.h> 24#include <linux/unistd.h>
25#include <linux/kmod.h> 25#include <linux/kmod.h>
26#include <linux/slab.h> 26#include <linux/slab.h>
27#include <linux/mnt_namespace.h>
28#include <linux/completion.h> 27#include <linux/completion.h>
29#include <linux/file.h> 28#include <linux/file.h>
30#include <linux/fdtable.h> 29#include <linux/fdtable.h>
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index c0fa54b276d9..0540948e29ab 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -237,13 +237,9 @@ static int __kprobes collect_garbage_slots(void)
237{ 237{
238 struct kprobe_insn_page *kip; 238 struct kprobe_insn_page *kip;
239 struct hlist_node *pos, *next; 239 struct hlist_node *pos, *next;
240 int safety;
241 240
242 /* Ensure no-one is preepmted on the garbages */ 241 /* Ensure no-one is preepmted on the garbages */
243 mutex_unlock(&kprobe_insn_mutex); 242 if (check_safety())
244 safety = check_safety();
245 mutex_lock(&kprobe_insn_mutex);
246 if (safety != 0)
247 return -EAGAIN; 243 return -EAGAIN;
248 244
249 hlist_for_each_entry_safe(kip, pos, next, &kprobe_insn_pages, hlist) { 245 hlist_for_each_entry_safe(kip, pos, next, &kprobe_insn_pages, hlist) {
@@ -698,7 +694,7 @@ int __kprobes register_kprobe(struct kprobe *p)
698 p->addr = addr; 694 p->addr = addr;
699 695
700 preempt_disable(); 696 preempt_disable();
701 if (!__kernel_text_address((unsigned long) p->addr) || 697 if (!kernel_text_address((unsigned long) p->addr) ||
702 in_kprobes_functions((unsigned long) p->addr)) { 698 in_kprobes_functions((unsigned long) p->addr)) {
703 preempt_enable(); 699 preempt_enable();
704 return -EINVAL; 700 return -EINVAL;
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 9b1a7de26979..eb8751aa0418 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -180,10 +180,12 @@ EXPORT_SYMBOL(kthread_bind);
180 * @k: thread created by kthread_create(). 180 * @k: thread created by kthread_create().
181 * 181 *
182 * Sets kthread_should_stop() for @k to return true, wakes it, and 182 * Sets kthread_should_stop() for @k to return true, wakes it, and
183 * waits for it to exit. Your threadfn() must not call do_exit() 183 * waits for it to exit. This can also be called after kthread_create()
184 * itself if you use this function! This can also be called after 184 * instead of calling wake_up_process(): the thread will exit without
185 * kthread_create() instead of calling wake_up_process(): the thread 185 * calling threadfn().
186 * will exit without calling threadfn(). 186 *
187 * If threadfn() may call do_exit() itself, the caller must ensure
188 * task_struct can't go away.
187 * 189 *
188 * Returns the result of threadfn(), or %-EINTR if wake_up_process() 190 * Returns the result of threadfn(), or %-EINTR if wake_up_process()
189 * was never called. 191 * was never called.
diff --git a/kernel/lockdep_proc.c b/kernel/lockdep_proc.c
index d7135aa2d2c4..e94caa666dba 100644
--- a/kernel/lockdep_proc.c
+++ b/kernel/lockdep_proc.c
@@ -758,7 +758,8 @@ static int __init lockdep_proc_init(void)
758 &proc_lockdep_stats_operations); 758 &proc_lockdep_stats_operations);
759 759
760#ifdef CONFIG_LOCK_STAT 760#ifdef CONFIG_LOCK_STAT
761 proc_create("lock_stat", S_IRUSR, NULL, &proc_lock_stat_operations); 761 proc_create("lock_stat", S_IRUSR | S_IWUSR, NULL,
762 &proc_lock_stat_operations);
762#endif 763#endif
763 764
764 return 0; 765 return 0;
diff --git a/kernel/module.c b/kernel/module.c
index 38928fcaff2b..fd1411403558 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -1068,7 +1068,8 @@ static inline int check_modstruct_version(Elf_Shdr *sechdrs,
1068{ 1068{
1069 const unsigned long *crc; 1069 const unsigned long *crc;
1070 1070
1071 if (!find_symbol("module_layout", NULL, &crc, true, false)) 1071 if (!find_symbol(MODULE_SYMBOL_PREFIX "module_layout", NULL,
1072 &crc, true, false))
1072 BUG(); 1073 BUG();
1073 return check_version(sechdrs, versindex, "module_layout", mod, crc); 1074 return check_version(sechdrs, versindex, "module_layout", mod, crc);
1074} 1075}
@@ -2451,9 +2452,9 @@ SYSCALL_DEFINE3(init_module, void __user *, umod,
2451 return ret; 2452 return ret;
2452 } 2453 }
2453 if (ret > 0) { 2454 if (ret > 0) {
2454 printk(KERN_WARNING "%s: '%s'->init suspiciously returned %d, " 2455 printk(KERN_WARNING
2455 "it should follow 0/-E convention\n" 2456"%s: '%s'->init suspiciously returned %d, it should follow 0/-E convention\n"
2456 KERN_WARNING "%s: loading module anyway...\n", 2457"%s: loading module anyway...\n",
2457 __func__, mod->name, ret, 2458 __func__, mod->name, ret,
2458 __func__); 2459 __func__);
2459 dump_stack(); 2460 dump_stack();
diff --git a/kernel/panic.c b/kernel/panic.c
index 984b3ecbd72c..512ab73b0ca3 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -301,6 +301,7 @@ int oops_may_print(void)
301 */ 301 */
302void oops_enter(void) 302void oops_enter(void)
303{ 303{
304 tracing_off();
304 /* can't trust the integrity of the kernel anymore: */ 305 /* can't trust the integrity of the kernel anymore: */
305 debug_locks_off(); 306 debug_locks_off();
306 do_oops_enter_exit(); 307 do_oops_enter_exit();
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index d55a50da2347..868102172aa4 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -42,6 +42,7 @@ static int perf_overcommit __read_mostly = 1;
42static atomic_t nr_counters __read_mostly; 42static atomic_t nr_counters __read_mostly;
43static atomic_t nr_mmap_counters __read_mostly; 43static atomic_t nr_mmap_counters __read_mostly;
44static atomic_t nr_comm_counters __read_mostly; 44static atomic_t nr_comm_counters __read_mostly;
45static atomic_t nr_task_counters __read_mostly;
45 46
46/* 47/*
47 * perf counter paranoia level: 48 * perf counter paranoia level:
@@ -146,6 +147,28 @@ static void put_ctx(struct perf_counter_context *ctx)
146 } 147 }
147} 148}
148 149
150static void unclone_ctx(struct perf_counter_context *ctx)
151{
152 if (ctx->parent_ctx) {
153 put_ctx(ctx->parent_ctx);
154 ctx->parent_ctx = NULL;
155 }
156}
157
158/*
159 * If we inherit counters we want to return the parent counter id
160 * to userspace.
161 */
162static u64 primary_counter_id(struct perf_counter *counter)
163{
164 u64 id = counter->id;
165
166 if (counter->parent)
167 id = counter->parent->id;
168
169 return id;
170}
171
149/* 172/*
150 * Get the perf_counter_context for a task and lock it. 173 * Get the perf_counter_context for a task and lock it.
151 * This has to cope with with the fact that until it is locked, 174 * This has to cope with with the fact that until it is locked,
@@ -1081,7 +1104,7 @@ static void perf_counter_sync_stat(struct perf_counter_context *ctx,
1081 __perf_counter_sync_stat(counter, next_counter); 1104 __perf_counter_sync_stat(counter, next_counter);
1082 1105
1083 counter = list_next_entry(counter, event_entry); 1106 counter = list_next_entry(counter, event_entry);
1084 next_counter = list_next_entry(counter, event_entry); 1107 next_counter = list_next_entry(next_counter, event_entry);
1085 } 1108 }
1086} 1109}
1087 1110
@@ -1288,7 +1311,6 @@ static void perf_counter_cpu_sched_in(struct perf_cpu_context *cpuctx, int cpu)
1288#define MAX_INTERRUPTS (~0ULL) 1311#define MAX_INTERRUPTS (~0ULL)
1289 1312
1290static void perf_log_throttle(struct perf_counter *counter, int enable); 1313static void perf_log_throttle(struct perf_counter *counter, int enable);
1291static void perf_log_period(struct perf_counter *counter, u64 period);
1292 1314
1293static void perf_adjust_period(struct perf_counter *counter, u64 events) 1315static void perf_adjust_period(struct perf_counter *counter, u64 events)
1294{ 1316{
@@ -1307,8 +1329,6 @@ static void perf_adjust_period(struct perf_counter *counter, u64 events)
1307 if (!sample_period) 1329 if (!sample_period)
1308 sample_period = 1; 1330 sample_period = 1;
1309 1331
1310 perf_log_period(counter, sample_period);
1311
1312 hwc->sample_period = sample_period; 1332 hwc->sample_period = sample_period;
1313} 1333}
1314 1334
@@ -1463,10 +1483,8 @@ static void perf_counter_enable_on_exec(struct task_struct *task)
1463 /* 1483 /*
1464 * Unclone this context if we enabled any counter. 1484 * Unclone this context if we enabled any counter.
1465 */ 1485 */
1466 if (enabled && ctx->parent_ctx) { 1486 if (enabled)
1467 put_ctx(ctx->parent_ctx); 1487 unclone_ctx(ctx);
1468 ctx->parent_ctx = NULL;
1469 }
1470 1488
1471 spin_unlock(&ctx->lock); 1489 spin_unlock(&ctx->lock);
1472 1490
@@ -1526,7 +1544,6 @@ __perf_counter_init_context(struct perf_counter_context *ctx,
1526 1544
1527static struct perf_counter_context *find_get_context(pid_t pid, int cpu) 1545static struct perf_counter_context *find_get_context(pid_t pid, int cpu)
1528{ 1546{
1529 struct perf_counter_context *parent_ctx;
1530 struct perf_counter_context *ctx; 1547 struct perf_counter_context *ctx;
1531 struct perf_cpu_context *cpuctx; 1548 struct perf_cpu_context *cpuctx;
1532 struct task_struct *task; 1549 struct task_struct *task;
@@ -1586,11 +1603,7 @@ static struct perf_counter_context *find_get_context(pid_t pid, int cpu)
1586 retry: 1603 retry:
1587 ctx = perf_lock_task_context(task, &flags); 1604 ctx = perf_lock_task_context(task, &flags);
1588 if (ctx) { 1605 if (ctx) {
1589 parent_ctx = ctx->parent_ctx; 1606 unclone_ctx(ctx);
1590 if (parent_ctx) {
1591 put_ctx(parent_ctx);
1592 ctx->parent_ctx = NULL; /* no longer a clone */
1593 }
1594 spin_unlock_irqrestore(&ctx->lock, flags); 1607 spin_unlock_irqrestore(&ctx->lock, flags);
1595 } 1608 }
1596 1609
@@ -1642,6 +1655,8 @@ static void free_counter(struct perf_counter *counter)
1642 atomic_dec(&nr_mmap_counters); 1655 atomic_dec(&nr_mmap_counters);
1643 if (counter->attr.comm) 1656 if (counter->attr.comm)
1644 atomic_dec(&nr_comm_counters); 1657 atomic_dec(&nr_comm_counters);
1658 if (counter->attr.task)
1659 atomic_dec(&nr_task_counters);
1645 } 1660 }
1646 1661
1647 if (counter->destroy) 1662 if (counter->destroy)
@@ -1676,6 +1691,18 @@ static int perf_release(struct inode *inode, struct file *file)
1676 return 0; 1691 return 0;
1677} 1692}
1678 1693
1694static u64 perf_counter_read_tree(struct perf_counter *counter)
1695{
1696 struct perf_counter *child;
1697 u64 total = 0;
1698
1699 total += perf_counter_read(counter);
1700 list_for_each_entry(child, &counter->child_list, child_list)
1701 total += perf_counter_read(child);
1702
1703 return total;
1704}
1705
1679/* 1706/*
1680 * Read the performance counter - simple non blocking version for now 1707 * Read the performance counter - simple non blocking version for now
1681 */ 1708 */
@@ -1695,7 +1722,7 @@ perf_read_hw(struct perf_counter *counter, char __user *buf, size_t count)
1695 1722
1696 WARN_ON_ONCE(counter->ctx->parent_ctx); 1723 WARN_ON_ONCE(counter->ctx->parent_ctx);
1697 mutex_lock(&counter->child_mutex); 1724 mutex_lock(&counter->child_mutex);
1698 values[0] = perf_counter_read(counter); 1725 values[0] = perf_counter_read_tree(counter);
1699 n = 1; 1726 n = 1;
1700 if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) 1727 if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
1701 values[n++] = counter->total_time_enabled + 1728 values[n++] = counter->total_time_enabled +
@@ -1704,7 +1731,7 @@ perf_read_hw(struct perf_counter *counter, char __user *buf, size_t count)
1704 values[n++] = counter->total_time_running + 1731 values[n++] = counter->total_time_running +
1705 atomic64_read(&counter->child_total_time_running); 1732 atomic64_read(&counter->child_total_time_running);
1706 if (counter->attr.read_format & PERF_FORMAT_ID) 1733 if (counter->attr.read_format & PERF_FORMAT_ID)
1707 values[n++] = counter->id; 1734 values[n++] = primary_counter_id(counter);
1708 mutex_unlock(&counter->child_mutex); 1735 mutex_unlock(&counter->child_mutex);
1709 1736
1710 if (count < n * sizeof(u64)) 1737 if (count < n * sizeof(u64))
@@ -1811,8 +1838,6 @@ static int perf_counter_period(struct perf_counter *counter, u64 __user *arg)
1811 1838
1812 counter->attr.sample_freq = value; 1839 counter->attr.sample_freq = value;
1813 } else { 1840 } else {
1814 perf_log_period(counter, value);
1815
1816 counter->attr.sample_period = value; 1841 counter->attr.sample_period = value;
1817 counter->hw.sample_period = value; 1842 counter->hw.sample_period = value;
1818 } 1843 }
@@ -2020,7 +2045,7 @@ fail:
2020 2045
2021static void perf_mmap_free_page(unsigned long addr) 2046static void perf_mmap_free_page(unsigned long addr)
2022{ 2047{
2023 struct page *page = virt_to_page(addr); 2048 struct page *page = virt_to_page((void *)addr);
2024 2049
2025 page->mapping = NULL; 2050 page->mapping = NULL;
2026 __free_page(page); 2051 __free_page(page);
@@ -2621,6 +2646,7 @@ static void perf_counter_output(struct perf_counter *counter, int nmi,
2621 u64 counter; 2646 u64 counter;
2622 } group_entry; 2647 } group_entry;
2623 struct perf_callchain_entry *callchain = NULL; 2648 struct perf_callchain_entry *callchain = NULL;
2649 struct perf_tracepoint_record *tp;
2624 int callchain_size = 0; 2650 int callchain_size = 0;
2625 u64 time; 2651 u64 time;
2626 struct { 2652 struct {
@@ -2661,10 +2687,14 @@ static void perf_counter_output(struct perf_counter *counter, int nmi,
2661 if (sample_type & PERF_SAMPLE_ID) 2687 if (sample_type & PERF_SAMPLE_ID)
2662 header.size += sizeof(u64); 2688 header.size += sizeof(u64);
2663 2689
2690 if (sample_type & PERF_SAMPLE_STREAM_ID)
2691 header.size += sizeof(u64);
2692
2664 if (sample_type & PERF_SAMPLE_CPU) { 2693 if (sample_type & PERF_SAMPLE_CPU) {
2665 header.size += sizeof(cpu_entry); 2694 header.size += sizeof(cpu_entry);
2666 2695
2667 cpu_entry.cpu = raw_smp_processor_id(); 2696 cpu_entry.cpu = raw_smp_processor_id();
2697 cpu_entry.reserved = 0;
2668 } 2698 }
2669 2699
2670 if (sample_type & PERF_SAMPLE_PERIOD) 2700 if (sample_type & PERF_SAMPLE_PERIOD)
@@ -2685,6 +2715,11 @@ static void perf_counter_output(struct perf_counter *counter, int nmi,
2685 header.size += sizeof(u64); 2715 header.size += sizeof(u64);
2686 } 2716 }
2687 2717
2718 if (sample_type & PERF_SAMPLE_TP_RECORD) {
2719 tp = data->private;
2720 header.size += tp->size;
2721 }
2722
2688 ret = perf_output_begin(&handle, counter, header.size, nmi, 1); 2723 ret = perf_output_begin(&handle, counter, header.size, nmi, 1);
2689 if (ret) 2724 if (ret)
2690 return; 2725 return;
@@ -2703,7 +2738,13 @@ static void perf_counter_output(struct perf_counter *counter, int nmi,
2703 if (sample_type & PERF_SAMPLE_ADDR) 2738 if (sample_type & PERF_SAMPLE_ADDR)
2704 perf_output_put(&handle, data->addr); 2739 perf_output_put(&handle, data->addr);
2705 2740
2706 if (sample_type & PERF_SAMPLE_ID) 2741 if (sample_type & PERF_SAMPLE_ID) {
2742 u64 id = primary_counter_id(counter);
2743
2744 perf_output_put(&handle, id);
2745 }
2746
2747 if (sample_type & PERF_SAMPLE_STREAM_ID)
2707 perf_output_put(&handle, counter->id); 2748 perf_output_put(&handle, counter->id);
2708 2749
2709 if (sample_type & PERF_SAMPLE_CPU) 2750 if (sample_type & PERF_SAMPLE_CPU)
@@ -2726,7 +2767,7 @@ static void perf_counter_output(struct perf_counter *counter, int nmi,
2726 if (sub != counter) 2767 if (sub != counter)
2727 sub->pmu->read(sub); 2768 sub->pmu->read(sub);
2728 2769
2729 group_entry.id = sub->id; 2770 group_entry.id = primary_counter_id(sub);
2730 group_entry.counter = atomic64_read(&sub->count); 2771 group_entry.counter = atomic64_read(&sub->count);
2731 2772
2732 perf_output_put(&handle, group_entry); 2773 perf_output_put(&handle, group_entry);
@@ -2742,6 +2783,9 @@ static void perf_counter_output(struct perf_counter *counter, int nmi,
2742 } 2783 }
2743 } 2784 }
2744 2785
2786 if (sample_type & PERF_SAMPLE_TP_RECORD)
2787 perf_output_copy(&handle, tp->record, tp->size);
2788
2745 perf_output_end(&handle); 2789 perf_output_end(&handle);
2746} 2790}
2747 2791
@@ -2786,15 +2830,8 @@ perf_counter_read_event(struct perf_counter *counter,
2786 } 2830 }
2787 2831
2788 if (counter->attr.read_format & PERF_FORMAT_ID) { 2832 if (counter->attr.read_format & PERF_FORMAT_ID) {
2789 u64 id;
2790
2791 event.header.size += sizeof(u64); 2833 event.header.size += sizeof(u64);
2792 if (counter->parent) 2834 event.format[i++] = primary_counter_id(counter);
2793 id = counter->parent->id;
2794 else
2795 id = counter->id;
2796
2797 event.format[i++] = id;
2798 } 2835 }
2799 2836
2800 ret = perf_output_begin(&handle, counter, event.header.size, 0, 0); 2837 ret = perf_output_begin(&handle, counter, event.header.size, 0, 0);
@@ -2806,10 +2843,12 @@ perf_counter_read_event(struct perf_counter *counter,
2806} 2843}
2807 2844
2808/* 2845/*
2809 * fork tracking 2846 * task tracking -- fork/exit
2847 *
2848 * enabled by: attr.comm | attr.mmap | attr.task
2810 */ 2849 */
2811 2850
2812struct perf_fork_event { 2851struct perf_task_event {
2813 struct task_struct *task; 2852 struct task_struct *task;
2814 2853
2815 struct { 2854 struct {
@@ -2817,37 +2856,42 @@ struct perf_fork_event {
2817 2856
2818 u32 pid; 2857 u32 pid;
2819 u32 ppid; 2858 u32 ppid;
2859 u32 tid;
2860 u32 ptid;
2820 } event; 2861 } event;
2821}; 2862};
2822 2863
2823static void perf_counter_fork_output(struct perf_counter *counter, 2864static void perf_counter_task_output(struct perf_counter *counter,
2824 struct perf_fork_event *fork_event) 2865 struct perf_task_event *task_event)
2825{ 2866{
2826 struct perf_output_handle handle; 2867 struct perf_output_handle handle;
2827 int size = fork_event->event.header.size; 2868 int size = task_event->event.header.size;
2828 struct task_struct *task = fork_event->task; 2869 struct task_struct *task = task_event->task;
2829 int ret = perf_output_begin(&handle, counter, size, 0, 0); 2870 int ret = perf_output_begin(&handle, counter, size, 0, 0);
2830 2871
2831 if (ret) 2872 if (ret)
2832 return; 2873 return;
2833 2874
2834 fork_event->event.pid = perf_counter_pid(counter, task); 2875 task_event->event.pid = perf_counter_pid(counter, task);
2835 fork_event->event.ppid = perf_counter_pid(counter, task->real_parent); 2876 task_event->event.ppid = perf_counter_pid(counter, task->real_parent);
2877
2878 task_event->event.tid = perf_counter_tid(counter, task);
2879 task_event->event.ptid = perf_counter_tid(counter, task->real_parent);
2836 2880
2837 perf_output_put(&handle, fork_event->event); 2881 perf_output_put(&handle, task_event->event);
2838 perf_output_end(&handle); 2882 perf_output_end(&handle);
2839} 2883}
2840 2884
2841static int perf_counter_fork_match(struct perf_counter *counter) 2885static int perf_counter_task_match(struct perf_counter *counter)
2842{ 2886{
2843 if (counter->attr.comm || counter->attr.mmap) 2887 if (counter->attr.comm || counter->attr.mmap || counter->attr.task)
2844 return 1; 2888 return 1;
2845 2889
2846 return 0; 2890 return 0;
2847} 2891}
2848 2892
2849static void perf_counter_fork_ctx(struct perf_counter_context *ctx, 2893static void perf_counter_task_ctx(struct perf_counter_context *ctx,
2850 struct perf_fork_event *fork_event) 2894 struct perf_task_event *task_event)
2851{ 2895{
2852 struct perf_counter *counter; 2896 struct perf_counter *counter;
2853 2897
@@ -2856,19 +2900,19 @@ static void perf_counter_fork_ctx(struct perf_counter_context *ctx,
2856 2900
2857 rcu_read_lock(); 2901 rcu_read_lock();
2858 list_for_each_entry_rcu(counter, &ctx->event_list, event_entry) { 2902 list_for_each_entry_rcu(counter, &ctx->event_list, event_entry) {
2859 if (perf_counter_fork_match(counter)) 2903 if (perf_counter_task_match(counter))
2860 perf_counter_fork_output(counter, fork_event); 2904 perf_counter_task_output(counter, task_event);
2861 } 2905 }
2862 rcu_read_unlock(); 2906 rcu_read_unlock();
2863} 2907}
2864 2908
2865static void perf_counter_fork_event(struct perf_fork_event *fork_event) 2909static void perf_counter_task_event(struct perf_task_event *task_event)
2866{ 2910{
2867 struct perf_cpu_context *cpuctx; 2911 struct perf_cpu_context *cpuctx;
2868 struct perf_counter_context *ctx; 2912 struct perf_counter_context *ctx;
2869 2913
2870 cpuctx = &get_cpu_var(perf_cpu_context); 2914 cpuctx = &get_cpu_var(perf_cpu_context);
2871 perf_counter_fork_ctx(&cpuctx->ctx, fork_event); 2915 perf_counter_task_ctx(&cpuctx->ctx, task_event);
2872 put_cpu_var(perf_cpu_context); 2916 put_cpu_var(perf_cpu_context);
2873 2917
2874 rcu_read_lock(); 2918 rcu_read_lock();
@@ -2878,29 +2922,40 @@ static void perf_counter_fork_event(struct perf_fork_event *fork_event)
2878 */ 2922 */
2879 ctx = rcu_dereference(current->perf_counter_ctxp); 2923 ctx = rcu_dereference(current->perf_counter_ctxp);
2880 if (ctx) 2924 if (ctx)
2881 perf_counter_fork_ctx(ctx, fork_event); 2925 perf_counter_task_ctx(ctx, task_event);
2882 rcu_read_unlock(); 2926 rcu_read_unlock();
2883} 2927}
2884 2928
2885void perf_counter_fork(struct task_struct *task) 2929static void perf_counter_task(struct task_struct *task, int new)
2886{ 2930{
2887 struct perf_fork_event fork_event; 2931 struct perf_task_event task_event;
2888 2932
2889 if (!atomic_read(&nr_comm_counters) && 2933 if (!atomic_read(&nr_comm_counters) &&
2890 !atomic_read(&nr_mmap_counters)) 2934 !atomic_read(&nr_mmap_counters) &&
2935 !atomic_read(&nr_task_counters))
2891 return; 2936 return;
2892 2937
2893 fork_event = (struct perf_fork_event){ 2938 task_event = (struct perf_task_event){
2894 .task = task, 2939 .task = task,
2895 .event = { 2940 .event = {
2896 .header = { 2941 .header = {
2897 .type = PERF_EVENT_FORK, 2942 .type = new ? PERF_EVENT_FORK : PERF_EVENT_EXIT,
2898 .size = sizeof(fork_event.event), 2943 .misc = 0,
2944 .size = sizeof(task_event.event),
2899 }, 2945 },
2946 /* .pid */
2947 /* .ppid */
2948 /* .tid */
2949 /* .ptid */
2900 }, 2950 },
2901 }; 2951 };
2902 2952
2903 perf_counter_fork_event(&fork_event); 2953 perf_counter_task_event(&task_event);
2954}
2955
2956void perf_counter_fork(struct task_struct *task)
2957{
2958 perf_counter_task(task, 1);
2904} 2959}
2905 2960
2906/* 2961/*
@@ -2968,8 +3023,10 @@ static void perf_counter_comm_event(struct perf_comm_event *comm_event)
2968 struct perf_cpu_context *cpuctx; 3023 struct perf_cpu_context *cpuctx;
2969 struct perf_counter_context *ctx; 3024 struct perf_counter_context *ctx;
2970 unsigned int size; 3025 unsigned int size;
2971 char *comm = comm_event->task->comm; 3026 char comm[TASK_COMM_LEN];
2972 3027
3028 memset(comm, 0, sizeof(comm));
3029 strncpy(comm, comm_event->task->comm, sizeof(comm));
2973 size = ALIGN(strlen(comm)+1, sizeof(u64)); 3030 size = ALIGN(strlen(comm)+1, sizeof(u64));
2974 3031
2975 comm_event->comm = comm; 3032 comm_event->comm = comm;
@@ -3004,8 +3061,16 @@ void perf_counter_comm(struct task_struct *task)
3004 3061
3005 comm_event = (struct perf_comm_event){ 3062 comm_event = (struct perf_comm_event){
3006 .task = task, 3063 .task = task,
3064 /* .comm */
3065 /* .comm_size */
3007 .event = { 3066 .event = {
3008 .header = { .type = PERF_EVENT_COMM, }, 3067 .header = {
3068 .type = PERF_EVENT_COMM,
3069 .misc = 0,
3070 /* .size */
3071 },
3072 /* .pid */
3073 /* .tid */
3009 }, 3074 },
3010 }; 3075 };
3011 3076
@@ -3088,8 +3153,15 @@ static void perf_counter_mmap_event(struct perf_mmap_event *mmap_event)
3088 char *buf = NULL; 3153 char *buf = NULL;
3089 const char *name; 3154 const char *name;
3090 3155
3156 memset(tmp, 0, sizeof(tmp));
3157
3091 if (file) { 3158 if (file) {
3092 buf = kzalloc(PATH_MAX, GFP_KERNEL); 3159 /*
3160 * d_path works from the end of the buffer backwards, so we
3161 * need to add enough zero bytes after the string to handle
3162 * the 64bit alignment we do later.
3163 */
3164 buf = kzalloc(PATH_MAX + sizeof(u64), GFP_KERNEL);
3093 if (!buf) { 3165 if (!buf) {
3094 name = strncpy(tmp, "//enomem", sizeof(tmp)); 3166 name = strncpy(tmp, "//enomem", sizeof(tmp));
3095 goto got_name; 3167 goto got_name;
@@ -3100,9 +3172,11 @@ static void perf_counter_mmap_event(struct perf_mmap_event *mmap_event)
3100 goto got_name; 3172 goto got_name;
3101 } 3173 }
3102 } else { 3174 } else {
3103 name = arch_vma_name(mmap_event->vma); 3175 if (arch_vma_name(mmap_event->vma)) {
3104 if (name) 3176 name = strncpy(tmp, arch_vma_name(mmap_event->vma),
3177 sizeof(tmp));
3105 goto got_name; 3178 goto got_name;
3179 }
3106 3180
3107 if (!vma->vm_mm) { 3181 if (!vma->vm_mm) {
3108 name = strncpy(tmp, "[vdso]", sizeof(tmp)); 3182 name = strncpy(tmp, "[vdso]", sizeof(tmp));
@@ -3147,8 +3221,16 @@ void __perf_counter_mmap(struct vm_area_struct *vma)
3147 3221
3148 mmap_event = (struct perf_mmap_event){ 3222 mmap_event = (struct perf_mmap_event){
3149 .vma = vma, 3223 .vma = vma,
3224 /* .file_name */
3225 /* .file_size */
3150 .event = { 3226 .event = {
3151 .header = { .type = PERF_EVENT_MMAP, }, 3227 .header = {
3228 .type = PERF_EVENT_MMAP,
3229 .misc = 0,
3230 /* .size */
3231 },
3232 /* .pid */
3233 /* .tid */
3152 .start = vma->vm_start, 3234 .start = vma->vm_start,
3153 .len = vma->vm_end - vma->vm_start, 3235 .len = vma->vm_end - vma->vm_start,
3154 .pgoff = vma->vm_pgoff, 3236 .pgoff = vma->vm_pgoff,
@@ -3159,49 +3241,6 @@ void __perf_counter_mmap(struct vm_area_struct *vma)
3159} 3241}
3160 3242
3161/* 3243/*
3162 * Log sample_period changes so that analyzing tools can re-normalize the
3163 * event flow.
3164 */
3165
3166struct freq_event {
3167 struct perf_event_header header;
3168 u64 time;
3169 u64 id;
3170 u64 period;
3171};
3172
3173static void perf_log_period(struct perf_counter *counter, u64 period)
3174{
3175 struct perf_output_handle handle;
3176 struct freq_event event;
3177 int ret;
3178
3179 if (counter->hw.sample_period == period)
3180 return;
3181
3182 if (counter->attr.sample_type & PERF_SAMPLE_PERIOD)
3183 return;
3184
3185 event = (struct freq_event) {
3186 .header = {
3187 .type = PERF_EVENT_PERIOD,
3188 .misc = 0,
3189 .size = sizeof(event),
3190 },
3191 .time = sched_clock(),
3192 .id = counter->id,
3193 .period = period,
3194 };
3195
3196 ret = perf_output_begin(&handle, counter, sizeof(event), 1, 0);
3197 if (ret)
3198 return;
3199
3200 perf_output_put(&handle, event);
3201 perf_output_end(&handle);
3202}
3203
3204/*
3205 * IRQ throttle logging 3244 * IRQ throttle logging
3206 */ 3245 */
3207 3246
@@ -3214,16 +3253,21 @@ static void perf_log_throttle(struct perf_counter *counter, int enable)
3214 struct perf_event_header header; 3253 struct perf_event_header header;
3215 u64 time; 3254 u64 time;
3216 u64 id; 3255 u64 id;
3256 u64 stream_id;
3217 } throttle_event = { 3257 } throttle_event = {
3218 .header = { 3258 .header = {
3219 .type = PERF_EVENT_THROTTLE + 1, 3259 .type = PERF_EVENT_THROTTLE,
3220 .misc = 0, 3260 .misc = 0,
3221 .size = sizeof(throttle_event), 3261 .size = sizeof(throttle_event),
3222 }, 3262 },
3223 .time = sched_clock(), 3263 .time = sched_clock(),
3224 .id = counter->id, 3264 .id = primary_counter_id(counter),
3265 .stream_id = counter->id,
3225 }; 3266 };
3226 3267
3268 if (enable)
3269 throttle_event.header.type = PERF_EVENT_UNTHROTTLE;
3270
3227 ret = perf_output_begin(&handle, counter, sizeof(throttle_event), 1, 0); 3271 ret = perf_output_begin(&handle, counter, sizeof(throttle_event), 1, 0);
3228 if (ret) 3272 if (ret)
3229 return; 3273 return;
@@ -3668,17 +3712,24 @@ static const struct pmu perf_ops_task_clock = {
3668}; 3712};
3669 3713
3670#ifdef CONFIG_EVENT_PROFILE 3714#ifdef CONFIG_EVENT_PROFILE
3671void perf_tpcounter_event(int event_id) 3715void perf_tpcounter_event(int event_id, u64 addr, u64 count, void *record,
3716 int entry_size)
3672{ 3717{
3718 struct perf_tracepoint_record tp = {
3719 .size = entry_size,
3720 .record = record,
3721 };
3722
3673 struct perf_sample_data data = { 3723 struct perf_sample_data data = {
3674 .regs = get_irq_regs(); 3724 .regs = get_irq_regs(),
3675 .addr = 0, 3725 .addr = addr,
3726 .private = &tp,
3676 }; 3727 };
3677 3728
3678 if (!data.regs) 3729 if (!data.regs)
3679 data.regs = task_pt_regs(current); 3730 data.regs = task_pt_regs(current);
3680 3731
3681 do_perf_swcounter_event(PERF_TYPE_TRACEPOINT, event_id, 1, 1, &data); 3732 do_perf_swcounter_event(PERF_TYPE_TRACEPOINT, event_id, count, 1, &data);
3682} 3733}
3683EXPORT_SYMBOL_GPL(perf_tpcounter_event); 3734EXPORT_SYMBOL_GPL(perf_tpcounter_event);
3684 3735
@@ -3687,16 +3738,12 @@ extern void ftrace_profile_disable(int);
3687 3738
3688static void tp_perf_counter_destroy(struct perf_counter *counter) 3739static void tp_perf_counter_destroy(struct perf_counter *counter)
3689{ 3740{
3690 ftrace_profile_disable(perf_event_id(&counter->attr)); 3741 ftrace_profile_disable(counter->attr.config);
3691} 3742}
3692 3743
3693static const struct pmu *tp_perf_counter_init(struct perf_counter *counter) 3744static const struct pmu *tp_perf_counter_init(struct perf_counter *counter)
3694{ 3745{
3695 int event_id = perf_event_id(&counter->attr); 3746 if (ftrace_profile_enable(counter->attr.config))
3696 int ret;
3697
3698 ret = ftrace_profile_enable(event_id);
3699 if (ret)
3700 return NULL; 3747 return NULL;
3701 3748
3702 counter->destroy = tp_perf_counter_destroy; 3749 counter->destroy = tp_perf_counter_destroy;
@@ -3874,6 +3921,8 @@ done:
3874 atomic_inc(&nr_mmap_counters); 3921 atomic_inc(&nr_mmap_counters);
3875 if (counter->attr.comm) 3922 if (counter->attr.comm)
3876 atomic_inc(&nr_comm_counters); 3923 atomic_inc(&nr_comm_counters);
3924 if (counter->attr.task)
3925 atomic_inc(&nr_task_counters);
3877 } 3926 }
3878 3927
3879 return counter; 3928 return counter;
@@ -4235,8 +4284,10 @@ void perf_counter_exit_task(struct task_struct *child)
4235 struct perf_counter_context *child_ctx; 4284 struct perf_counter_context *child_ctx;
4236 unsigned long flags; 4285 unsigned long flags;
4237 4286
4238 if (likely(!child->perf_counter_ctxp)) 4287 if (likely(!child->perf_counter_ctxp)) {
4288 perf_counter_task(child, 0);
4239 return; 4289 return;
4290 }
4240 4291
4241 local_irq_save(flags); 4292 local_irq_save(flags);
4242 /* 4293 /*
@@ -4254,18 +4305,22 @@ void perf_counter_exit_task(struct task_struct *child)
4254 * incremented the context's refcount before we do put_ctx below. 4305 * incremented the context's refcount before we do put_ctx below.
4255 */ 4306 */
4256 spin_lock(&child_ctx->lock); 4307 spin_lock(&child_ctx->lock);
4308 /*
4309 * If this context is a clone; unclone it so it can't get
4310 * swapped to another process while we're removing all
4311 * the counters from it.
4312 */
4313 unclone_ctx(child_ctx);
4314 spin_unlock_irqrestore(&child_ctx->lock, flags);
4315
4316 /*
4317 * Report the task dead after unscheduling the counters so that we
4318 * won't get any samples after PERF_EVENT_EXIT. We can however still
4319 * get a few PERF_EVENT_READ events.
4320 */
4321 perf_counter_task(child, 0);
4322
4257 child->perf_counter_ctxp = NULL; 4323 child->perf_counter_ctxp = NULL;
4258 if (child_ctx->parent_ctx) {
4259 /*
4260 * This context is a clone; unclone it so it can't get
4261 * swapped to another process while we're removing all
4262 * the counters from it.
4263 */
4264 put_ctx(child_ctx->parent_ctx);
4265 child_ctx->parent_ctx = NULL;
4266 }
4267 spin_unlock(&child_ctx->lock);
4268 local_irq_restore(flags);
4269 4324
4270 /* 4325 /*
4271 * We can recurse on the same lock type through: 4326 * We can recurse on the same lock type through:
diff --git a/kernel/pid.c b/kernel/pid.c
index 5fa1db48d8b7..31310b5d3f50 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -36,7 +36,6 @@
36#include <linux/pid_namespace.h> 36#include <linux/pid_namespace.h>
37#include <linux/init_task.h> 37#include <linux/init_task.h>
38#include <linux/syscalls.h> 38#include <linux/syscalls.h>
39#include <linux/kmemleak.h>
40 39
41#define pid_hashfn(nr, ns) \ 40#define pid_hashfn(nr, ns) \
42 hash_long((unsigned long)nr + (unsigned long)ns, pidhash_shift) 41 hash_long((unsigned long)nr + (unsigned long)ns, pidhash_shift)
@@ -513,12 +512,6 @@ void __init pidhash_init(void)
513 pid_hash = alloc_bootmem(pidhash_size * sizeof(*(pid_hash))); 512 pid_hash = alloc_bootmem(pidhash_size * sizeof(*(pid_hash)));
514 if (!pid_hash) 513 if (!pid_hash)
515 panic("Could not alloc pidhash!\n"); 514 panic("Could not alloc pidhash!\n");
516 /*
517 * pid_hash contains references to allocated struct pid objects and it
518 * must be scanned by kmemleak to avoid false positives.
519 */
520 kmemleak_alloc(pid_hash, pidhash_size * sizeof(*(pid_hash)), 0,
521 GFP_KERNEL);
522 for (i = 0; i < pidhash_size; i++) 515 for (i = 0; i < pidhash_size; i++)
523 INIT_HLIST_HEAD(&pid_hash[i]); 516 INIT_HLIST_HEAD(&pid_hash[i]);
524} 517}
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index bece7c0b67b2..e33a21cb9407 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -521,11 +521,12 @@ void posix_cpu_timers_exit(struct task_struct *tsk)
521} 521}
522void posix_cpu_timers_exit_group(struct task_struct *tsk) 522void posix_cpu_timers_exit_group(struct task_struct *tsk)
523{ 523{
524 struct task_cputime cputime; 524 struct signal_struct *const sig = tsk->signal;
525 525
526 thread_group_cputimer(tsk, &cputime);
527 cleanup_timers(tsk->signal->cpu_timers, 526 cleanup_timers(tsk->signal->cpu_timers,
528 cputime.utime, cputime.stime, cputime.sum_exec_runtime); 527 cputime_add(tsk->utime, sig->utime),
528 cputime_add(tsk->stime, sig->stime),
529 tsk->se.sum_exec_runtime + sig->sum_sched_runtime);
529} 530}
530 531
531static void clear_dead_task(struct k_itimer *timer, union cpu_time_count now) 532static void clear_dead_task(struct k_itimer *timer, union cpu_time_count now)
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index 052ec4d195c7..d089d052c4a9 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -202,6 +202,12 @@ static int no_timer_create(struct k_itimer *new_timer)
202 return -EOPNOTSUPP; 202 return -EOPNOTSUPP;
203} 203}
204 204
205static int no_nsleep(const clockid_t which_clock, int flags,
206 struct timespec *tsave, struct timespec __user *rmtp)
207{
208 return -EOPNOTSUPP;
209}
210
205/* 211/*
206 * Return nonzero if we know a priori this clockid_t value is bogus. 212 * Return nonzero if we know a priori this clockid_t value is bogus.
207 */ 213 */
@@ -254,6 +260,7 @@ static __init int init_posix_timers(void)
254 .clock_get = posix_get_monotonic_raw, 260 .clock_get = posix_get_monotonic_raw,
255 .clock_set = do_posix_clock_nosettime, 261 .clock_set = do_posix_clock_nosettime,
256 .timer_create = no_timer_create, 262 .timer_create = no_timer_create,
263 .nsleep = no_nsleep,
257 }; 264 };
258 265
259 register_posix_clock(CLOCK_REALTIME, &clock_realtime); 266 register_posix_clock(CLOCK_REALTIME, &clock_realtime);
diff --git a/kernel/power/user.c b/kernel/power/user.c
index ed97375daae9..bf0014d6a5f0 100644
--- a/kernel/power/user.c
+++ b/kernel/power/user.c
@@ -23,7 +23,6 @@
23#include <linux/console.h> 23#include <linux/console.h>
24#include <linux/cpu.h> 24#include <linux/cpu.h>
25#include <linux/freezer.h> 25#include <linux/freezer.h>
26#include <linux/smp_lock.h>
27#include <scsi/scsi_scan.h> 26#include <scsi/scsi_scan.h>
28 27
29#include <asm/uaccess.h> 28#include <asm/uaccess.h>
diff --git a/kernel/profile.c b/kernel/profile.c
index 69911b5745eb..419250ebec4d 100644
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -117,11 +117,12 @@ int __ref profile_init(void)
117 117
118 cpumask_copy(prof_cpu_mask, cpu_possible_mask); 118 cpumask_copy(prof_cpu_mask, cpu_possible_mask);
119 119
120 prof_buffer = kzalloc(buffer_bytes, GFP_KERNEL); 120 prof_buffer = kzalloc(buffer_bytes, GFP_KERNEL|__GFP_NOWARN);
121 if (prof_buffer) 121 if (prof_buffer)
122 return 0; 122 return 0;
123 123
124 prof_buffer = alloc_pages_exact(buffer_bytes, GFP_KERNEL|__GFP_ZERO); 124 prof_buffer = alloc_pages_exact(buffer_bytes,
125 GFP_KERNEL|__GFP_ZERO|__GFP_NOWARN);
125 if (prof_buffer) 126 if (prof_buffer)
126 return 0; 127 return 0;
127 128
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 61c78b2c07ba..082c320e4dbf 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -181,8 +181,8 @@ int ptrace_attach(struct task_struct *task)
181 * interference; SUID, SGID and LSM creds get determined differently 181 * interference; SUID, SGID and LSM creds get determined differently
182 * under ptrace. 182 * under ptrace.
183 */ 183 */
184 retval = mutex_lock_interruptible(&task->cred_guard_mutex); 184 retval = -ERESTARTNOINTR;
185 if (retval < 0) 185 if (mutex_lock_interruptible(&task->cred_guard_mutex))
186 goto out; 186 goto out;
187 187
188 task_lock(task); 188 task_lock(task);
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 0dccfbba6d26..7717b95c2027 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -1533,7 +1533,7 @@ void __init __rcu_init(void)
1533 int j; 1533 int j;
1534 struct rcu_node *rnp; 1534 struct rcu_node *rnp;
1535 1535
1536 printk(KERN_WARNING "Experimental hierarchical RCU implementation.\n"); 1536 printk(KERN_INFO "Hierarchical RCU implementation.\n");
1537#ifdef CONFIG_RCU_CPU_STALL_DETECTOR 1537#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
1538 printk(KERN_INFO "RCU-based detection of stalled CPUs is enabled.\n"); 1538 printk(KERN_INFO "RCU-based detection of stalled CPUs is enabled.\n");
1539#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ 1539#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
@@ -1546,7 +1546,6 @@ void __init __rcu_init(void)
1546 rcu_cpu_notify(&rcu_nb, CPU_UP_PREPARE, (void *)(long)i); 1546 rcu_cpu_notify(&rcu_nb, CPU_UP_PREPARE, (void *)(long)i);
1547 /* Register notifier for non-boot CPUs */ 1547 /* Register notifier for non-boot CPUs */
1548 register_cpu_notifier(&rcu_nb); 1548 register_cpu_notifier(&rcu_nb);
1549 printk(KERN_WARNING "Experimental hierarchical RCU init done.\n");
1550} 1549}
1551 1550
1552module_param(blimit, int, 0); 1551module_param(blimit, int, 0);
diff --git a/kernel/rtmutex.c b/kernel/rtmutex.c
index fcd107a78c5a..29bd4baf9e75 100644
--- a/kernel/rtmutex.c
+++ b/kernel/rtmutex.c
@@ -1039,16 +1039,14 @@ int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
1039 if (!rt_mutex_owner(lock) || try_to_steal_lock(lock, task)) { 1039 if (!rt_mutex_owner(lock) || try_to_steal_lock(lock, task)) {
1040 /* We got the lock for task. */ 1040 /* We got the lock for task. */
1041 debug_rt_mutex_lock(lock); 1041 debug_rt_mutex_lock(lock);
1042
1043 rt_mutex_set_owner(lock, task, 0); 1042 rt_mutex_set_owner(lock, task, 0);
1044 1043 spin_unlock(&lock->wait_lock);
1045 rt_mutex_deadlock_account_lock(lock, task); 1044 rt_mutex_deadlock_account_lock(lock, task);
1046 return 1; 1045 return 1;
1047 } 1046 }
1048 1047
1049 ret = task_blocks_on_rt_mutex(lock, waiter, task, detect_deadlock); 1048 ret = task_blocks_on_rt_mutex(lock, waiter, task, detect_deadlock);
1050 1049
1051
1052 if (ret && !waiter->task) { 1050 if (ret && !waiter->task) {
1053 /* 1051 /*
1054 * Reset the return value. We might have 1052 * Reset the return value. We might have
diff --git a/kernel/sched.c b/kernel/sched.c
index 7c9098d186e6..1b59e265273b 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -493,6 +493,7 @@ struct rt_rq {
493#endif 493#endif
494#ifdef CONFIG_SMP 494#ifdef CONFIG_SMP
495 unsigned long rt_nr_migratory; 495 unsigned long rt_nr_migratory;
496 unsigned long rt_nr_total;
496 int overloaded; 497 int overloaded;
497 struct plist_head pushable_tasks; 498 struct plist_head pushable_tasks;
498#endif 499#endif
@@ -2571,15 +2572,37 @@ static void __sched_fork(struct task_struct *p)
2571 p->se.avg_wakeup = sysctl_sched_wakeup_granularity; 2572 p->se.avg_wakeup = sysctl_sched_wakeup_granularity;
2572 2573
2573#ifdef CONFIG_SCHEDSTATS 2574#ifdef CONFIG_SCHEDSTATS
2574 p->se.wait_start = 0; 2575 p->se.wait_start = 0;
2575 p->se.sum_sleep_runtime = 0; 2576 p->se.wait_max = 0;
2576 p->se.sleep_start = 0; 2577 p->se.wait_count = 0;
2577 p->se.block_start = 0; 2578 p->se.wait_sum = 0;
2578 p->se.sleep_max = 0; 2579
2579 p->se.block_max = 0; 2580 p->se.sleep_start = 0;
2580 p->se.exec_max = 0; 2581 p->se.sleep_max = 0;
2581 p->se.slice_max = 0; 2582 p->se.sum_sleep_runtime = 0;
2582 p->se.wait_max = 0; 2583
2584 p->se.block_start = 0;
2585 p->se.block_max = 0;
2586 p->se.exec_max = 0;
2587 p->se.slice_max = 0;
2588
2589 p->se.nr_migrations_cold = 0;
2590 p->se.nr_failed_migrations_affine = 0;
2591 p->se.nr_failed_migrations_running = 0;
2592 p->se.nr_failed_migrations_hot = 0;
2593 p->se.nr_forced_migrations = 0;
2594 p->se.nr_forced2_migrations = 0;
2595
2596 p->se.nr_wakeups = 0;
2597 p->se.nr_wakeups_sync = 0;
2598 p->se.nr_wakeups_migrate = 0;
2599 p->se.nr_wakeups_local = 0;
2600 p->se.nr_wakeups_remote = 0;
2601 p->se.nr_wakeups_affine = 0;
2602 p->se.nr_wakeups_affine_attempts = 0;
2603 p->se.nr_wakeups_passive = 0;
2604 p->se.nr_wakeups_idle = 0;
2605
2583#endif 2606#endif
2584 2607
2585 INIT_LIST_HEAD(&p->rt.run_list); 2608 INIT_LIST_HEAD(&p->rt.run_list);
@@ -6541,6 +6564,11 @@ SYSCALL_DEFINE0(sched_yield)
6541 return 0; 6564 return 0;
6542} 6565}
6543 6566
6567static inline int should_resched(void)
6568{
6569 return need_resched() && !(preempt_count() & PREEMPT_ACTIVE);
6570}
6571
6544static void __cond_resched(void) 6572static void __cond_resched(void)
6545{ 6573{
6546#ifdef CONFIG_DEBUG_SPINLOCK_SLEEP 6574#ifdef CONFIG_DEBUG_SPINLOCK_SLEEP
@@ -6560,8 +6588,7 @@ static void __cond_resched(void)
6560 6588
6561int __sched _cond_resched(void) 6589int __sched _cond_resched(void)
6562{ 6590{
6563 if (need_resched() && !(preempt_count() & PREEMPT_ACTIVE) && 6591 if (should_resched()) {
6564 system_state == SYSTEM_RUNNING) {
6565 __cond_resched(); 6592 __cond_resched();
6566 return 1; 6593 return 1;
6567 } 6594 }
@@ -6579,12 +6606,12 @@ EXPORT_SYMBOL(_cond_resched);
6579 */ 6606 */
6580int cond_resched_lock(spinlock_t *lock) 6607int cond_resched_lock(spinlock_t *lock)
6581{ 6608{
6582 int resched = need_resched() && system_state == SYSTEM_RUNNING; 6609 int resched = should_resched();
6583 int ret = 0; 6610 int ret = 0;
6584 6611
6585 if (spin_needbreak(lock) || resched) { 6612 if (spin_needbreak(lock) || resched) {
6586 spin_unlock(lock); 6613 spin_unlock(lock);
6587 if (resched && need_resched()) 6614 if (resched)
6588 __cond_resched(); 6615 __cond_resched();
6589 else 6616 else
6590 cpu_relax(); 6617 cpu_relax();
@@ -6599,7 +6626,7 @@ int __sched cond_resched_softirq(void)
6599{ 6626{
6600 BUG_ON(!in_softirq()); 6627 BUG_ON(!in_softirq());
6601 6628
6602 if (need_resched() && system_state == SYSTEM_RUNNING) { 6629 if (should_resched()) {
6603 local_bh_enable(); 6630 local_bh_enable();
6604 __cond_resched(); 6631 __cond_resched();
6605 local_bh_disable(); 6632 local_bh_disable();
@@ -7262,6 +7289,7 @@ static void migrate_dead_tasks(unsigned int dead_cpu)
7262static void calc_global_load_remove(struct rq *rq) 7289static void calc_global_load_remove(struct rq *rq)
7263{ 7290{
7264 atomic_long_sub(rq->calc_load_active, &calc_load_tasks); 7291 atomic_long_sub(rq->calc_load_active, &calc_load_tasks);
7292 rq->calc_load_active = 0;
7265} 7293}
7266#endif /* CONFIG_HOTPLUG_CPU */ 7294#endif /* CONFIG_HOTPLUG_CPU */
7267 7295
@@ -7488,6 +7516,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
7488 task_rq_unlock(rq, &flags); 7516 task_rq_unlock(rq, &flags);
7489 get_task_struct(p); 7517 get_task_struct(p);
7490 cpu_rq(cpu)->migration_thread = p; 7518 cpu_rq(cpu)->migration_thread = p;
7519 rq->calc_load_update = calc_load_update;
7491 break; 7520 break;
7492 7521
7493 case CPU_ONLINE: 7522 case CPU_ONLINE:
@@ -7498,8 +7527,6 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
7498 /* Update our root-domain */ 7527 /* Update our root-domain */
7499 rq = cpu_rq(cpu); 7528 rq = cpu_rq(cpu);
7500 spin_lock_irqsave(&rq->lock, flags); 7529 spin_lock_irqsave(&rq->lock, flags);
7501 rq->calc_load_update = calc_load_update;
7502 rq->calc_load_active = 0;
7503 if (rq->rd) { 7530 if (rq->rd) {
7504 BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span)); 7531 BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
7505 7532
@@ -9070,7 +9097,7 @@ static void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq)
9070#ifdef CONFIG_SMP 9097#ifdef CONFIG_SMP
9071 rt_rq->rt_nr_migratory = 0; 9098 rt_rq->rt_nr_migratory = 0;
9072 rt_rq->overloaded = 0; 9099 rt_rq->overloaded = 0;
9073 plist_head_init(&rq->rt.pushable_tasks, &rq->lock); 9100 plist_head_init(&rt_rq->pushable_tasks, &rq->lock);
9074#endif 9101#endif
9075 9102
9076 rt_rq->rt_time = 0; 9103 rt_rq->rt_time = 0;
diff --git a/kernel/sched_cpupri.c b/kernel/sched_cpupri.c
index e6c251790dde..d014efbf947a 100644
--- a/kernel/sched_cpupri.c
+++ b/kernel/sched_cpupri.c
@@ -81,8 +81,21 @@ int cpupri_find(struct cpupri *cp, struct task_struct *p,
81 if (cpumask_any_and(&p->cpus_allowed, vec->mask) >= nr_cpu_ids) 81 if (cpumask_any_and(&p->cpus_allowed, vec->mask) >= nr_cpu_ids)
82 continue; 82 continue;
83 83
84 if (lowest_mask) 84 if (lowest_mask) {
85 cpumask_and(lowest_mask, &p->cpus_allowed, vec->mask); 85 cpumask_and(lowest_mask, &p->cpus_allowed, vec->mask);
86
87 /*
88 * We have to ensure that we have at least one bit
89 * still set in the array, since the map could have
90 * been concurrently emptied between the first and
91 * second reads of vec->mask. If we hit this
92 * condition, simply act as though we never hit this
93 * priority level and continue on.
94 */
95 if (cpumask_any(lowest_mask) >= nr_cpu_ids)
96 continue;
97 }
98
86 return 1; 99 return 1;
87 } 100 }
88 101
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index ba7fd6e9556f..652e8bdef9aa 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -266,6 +266,12 @@ static inline u64 min_vruntime(u64 min_vruntime, u64 vruntime)
266 return min_vruntime; 266 return min_vruntime;
267} 267}
268 268
269static inline int entity_before(struct sched_entity *a,
270 struct sched_entity *b)
271{
272 return (s64)(a->vruntime - b->vruntime) < 0;
273}
274
269static inline s64 entity_key(struct cfs_rq *cfs_rq, struct sched_entity *se) 275static inline s64 entity_key(struct cfs_rq *cfs_rq, struct sched_entity *se)
270{ 276{
271 return se->vruntime - cfs_rq->min_vruntime; 277 return se->vruntime - cfs_rq->min_vruntime;
@@ -605,9 +611,13 @@ account_entity_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se)
605static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se) 611static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
606{ 612{
607#ifdef CONFIG_SCHEDSTATS 613#ifdef CONFIG_SCHEDSTATS
614 struct task_struct *tsk = NULL;
615
616 if (entity_is_task(se))
617 tsk = task_of(se);
618
608 if (se->sleep_start) { 619 if (se->sleep_start) {
609 u64 delta = rq_of(cfs_rq)->clock - se->sleep_start; 620 u64 delta = rq_of(cfs_rq)->clock - se->sleep_start;
610 struct task_struct *tsk = task_of(se);
611 621
612 if ((s64)delta < 0) 622 if ((s64)delta < 0)
613 delta = 0; 623 delta = 0;
@@ -618,11 +628,11 @@ static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
618 se->sleep_start = 0; 628 se->sleep_start = 0;
619 se->sum_sleep_runtime += delta; 629 se->sum_sleep_runtime += delta;
620 630
621 account_scheduler_latency(tsk, delta >> 10, 1); 631 if (tsk)
632 account_scheduler_latency(tsk, delta >> 10, 1);
622 } 633 }
623 if (se->block_start) { 634 if (se->block_start) {
624 u64 delta = rq_of(cfs_rq)->clock - se->block_start; 635 u64 delta = rq_of(cfs_rq)->clock - se->block_start;
625 struct task_struct *tsk = task_of(se);
626 636
627 if ((s64)delta < 0) 637 if ((s64)delta < 0)
628 delta = 0; 638 delta = 0;
@@ -633,17 +643,19 @@ static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
633 se->block_start = 0; 643 se->block_start = 0;
634 se->sum_sleep_runtime += delta; 644 se->sum_sleep_runtime += delta;
635 645
636 /* 646 if (tsk) {
637 * Blocking time is in units of nanosecs, so shift by 20 to 647 /*
638 * get a milliseconds-range estimation of the amount of 648 * Blocking time is in units of nanosecs, so shift by
639 * time that the task spent sleeping: 649 * 20 to get a milliseconds-range estimation of the
640 */ 650 * amount of time that the task spent sleeping:
641 if (unlikely(prof_on == SLEEP_PROFILING)) { 651 */
642 652 if (unlikely(prof_on == SLEEP_PROFILING)) {
643 profile_hits(SLEEP_PROFILING, (void *)get_wchan(tsk), 653 profile_hits(SLEEP_PROFILING,
644 delta >> 20); 654 (void *)get_wchan(tsk),
655 delta >> 20);
656 }
657 account_scheduler_latency(tsk, delta >> 10, 0);
645 } 658 }
646 account_scheduler_latency(tsk, delta >> 10, 0);
647 } 659 }
648#endif 660#endif
649} 661}
@@ -687,7 +699,8 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
687 * all of which have the same weight. 699 * all of which have the same weight.
688 */ 700 */
689 if (sched_feat(NORMALIZED_SLEEPER) && 701 if (sched_feat(NORMALIZED_SLEEPER) &&
690 task_of(se)->policy != SCHED_IDLE) 702 (!entity_is_task(se) ||
703 task_of(se)->policy != SCHED_IDLE))
691 thresh = calc_delta_fair(thresh, se); 704 thresh = calc_delta_fair(thresh, se);
692 705
693 vruntime -= thresh; 706 vruntime -= thresh;
@@ -1016,7 +1029,7 @@ static void yield_task_fair(struct rq *rq)
1016 /* 1029 /*
1017 * Already in the rightmost position? 1030 * Already in the rightmost position?
1018 */ 1031 */
1019 if (unlikely(!rightmost || rightmost->vruntime < se->vruntime)) 1032 if (unlikely(!rightmost || entity_before(rightmost, se)))
1020 return; 1033 return;
1021 1034
1022 /* 1035 /*
@@ -1712,7 +1725,7 @@ static void task_new_fair(struct rq *rq, struct task_struct *p)
1712 1725
1713 /* 'curr' will be NULL if the child belongs to a different group */ 1726 /* 'curr' will be NULL if the child belongs to a different group */
1714 if (sysctl_sched_child_runs_first && this_cpu == task_cpu(p) && 1727 if (sysctl_sched_child_runs_first && this_cpu == task_cpu(p) &&
1715 curr && curr->vruntime < se->vruntime) { 1728 curr && entity_before(curr, se)) {
1716 /* 1729 /*
1717 * Upon rescheduling, sched_class::put_prev_task() will place 1730 * Upon rescheduling, sched_class::put_prev_task() will place
1718 * 'current' within the tree based on its new key value. 1731 * 'current' within the tree based on its new key value.
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 9bf0d2a73045..3918e01994e0 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -10,6 +10,8 @@ static inline struct task_struct *rt_task_of(struct sched_rt_entity *rt_se)
10 10
11#ifdef CONFIG_RT_GROUP_SCHED 11#ifdef CONFIG_RT_GROUP_SCHED
12 12
13#define rt_entity_is_task(rt_se) (!(rt_se)->my_q)
14
13static inline struct rq *rq_of_rt_rq(struct rt_rq *rt_rq) 15static inline struct rq *rq_of_rt_rq(struct rt_rq *rt_rq)
14{ 16{
15 return rt_rq->rq; 17 return rt_rq->rq;
@@ -22,6 +24,8 @@ static inline struct rt_rq *rt_rq_of_se(struct sched_rt_entity *rt_se)
22 24
23#else /* CONFIG_RT_GROUP_SCHED */ 25#else /* CONFIG_RT_GROUP_SCHED */
24 26
27#define rt_entity_is_task(rt_se) (1)
28
25static inline struct rq *rq_of_rt_rq(struct rt_rq *rt_rq) 29static inline struct rq *rq_of_rt_rq(struct rt_rq *rt_rq)
26{ 30{
27 return container_of(rt_rq, struct rq, rt); 31 return container_of(rt_rq, struct rq, rt);
@@ -73,7 +77,7 @@ static inline void rt_clear_overload(struct rq *rq)
73 77
74static void update_rt_migration(struct rt_rq *rt_rq) 78static void update_rt_migration(struct rt_rq *rt_rq)
75{ 79{
76 if (rt_rq->rt_nr_migratory && (rt_rq->rt_nr_running > 1)) { 80 if (rt_rq->rt_nr_migratory && rt_rq->rt_nr_total > 1) {
77 if (!rt_rq->overloaded) { 81 if (!rt_rq->overloaded) {
78 rt_set_overload(rq_of_rt_rq(rt_rq)); 82 rt_set_overload(rq_of_rt_rq(rt_rq));
79 rt_rq->overloaded = 1; 83 rt_rq->overloaded = 1;
@@ -86,6 +90,12 @@ static void update_rt_migration(struct rt_rq *rt_rq)
86 90
87static void inc_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) 91static void inc_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
88{ 92{
93 if (!rt_entity_is_task(rt_se))
94 return;
95
96 rt_rq = &rq_of_rt_rq(rt_rq)->rt;
97
98 rt_rq->rt_nr_total++;
89 if (rt_se->nr_cpus_allowed > 1) 99 if (rt_se->nr_cpus_allowed > 1)
90 rt_rq->rt_nr_migratory++; 100 rt_rq->rt_nr_migratory++;
91 101
@@ -94,6 +104,12 @@ static void inc_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
94 104
95static void dec_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) 105static void dec_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
96{ 106{
107 if (!rt_entity_is_task(rt_se))
108 return;
109
110 rt_rq = &rq_of_rt_rq(rt_rq)->rt;
111
112 rt_rq->rt_nr_total--;
97 if (rt_se->nr_cpus_allowed > 1) 113 if (rt_se->nr_cpus_allowed > 1)
98 rt_rq->rt_nr_migratory--; 114 rt_rq->rt_nr_migratory--;
99 115
diff --git a/kernel/signal.c b/kernel/signal.c
index ccf1ceedaebe..64c5deeaca5d 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -2454,11 +2454,9 @@ do_sigaltstack (const stack_t __user *uss, stack_t __user *uoss, unsigned long s
2454 stack_t oss; 2454 stack_t oss;
2455 int error; 2455 int error;
2456 2456
2457 if (uoss) { 2457 oss.ss_sp = (void __user *) current->sas_ss_sp;
2458 oss.ss_sp = (void __user *) current->sas_ss_sp; 2458 oss.ss_size = current->sas_ss_size;
2459 oss.ss_size = current->sas_ss_size; 2459 oss.ss_flags = sas_ss_flags(sp);
2460 oss.ss_flags = sas_ss_flags(sp);
2461 }
2462 2460
2463 if (uss) { 2461 if (uss) {
2464 void __user *ss_sp; 2462 void __user *ss_sp;
@@ -2466,10 +2464,12 @@ do_sigaltstack (const stack_t __user *uss, stack_t __user *uoss, unsigned long s
2466 int ss_flags; 2464 int ss_flags;
2467 2465
2468 error = -EFAULT; 2466 error = -EFAULT;
2469 if (!access_ok(VERIFY_READ, uss, sizeof(*uss)) 2467 if (!access_ok(VERIFY_READ, uss, sizeof(*uss)))
2470 || __get_user(ss_sp, &uss->ss_sp) 2468 goto out;
2471 || __get_user(ss_flags, &uss->ss_flags) 2469 error = __get_user(ss_sp, &uss->ss_sp) |
2472 || __get_user(ss_size, &uss->ss_size)) 2470 __get_user(ss_flags, &uss->ss_flags) |
2471 __get_user(ss_size, &uss->ss_size);
2472 if (error)
2473 goto out; 2473 goto out;
2474 2474
2475 error = -EPERM; 2475 error = -EPERM;
@@ -2501,13 +2501,16 @@ do_sigaltstack (const stack_t __user *uss, stack_t __user *uoss, unsigned long s
2501 current->sas_ss_size = ss_size; 2501 current->sas_ss_size = ss_size;
2502 } 2502 }
2503 2503
2504 error = 0;
2504 if (uoss) { 2505 if (uoss) {
2505 error = -EFAULT; 2506 error = -EFAULT;
2506 if (copy_to_user(uoss, &oss, sizeof(oss))) 2507 if (!access_ok(VERIFY_WRITE, uoss, sizeof(*uoss)))
2507 goto out; 2508 goto out;
2509 error = __put_user(oss.ss_sp, &uoss->ss_sp) |
2510 __put_user(oss.ss_size, &uoss->ss_size) |
2511 __put_user(oss.ss_flags, &uoss->ss_flags);
2508 } 2512 }
2509 2513
2510 error = 0;
2511out: 2514out:
2512 return error; 2515 return error;
2513} 2516}
diff --git a/kernel/smp.c b/kernel/smp.c
index ad63d8501207..94188b8ecc33 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -57,7 +57,7 @@ hotplug_cfd(struct notifier_block *nfb, unsigned long action, void *hcpu)
57 return NOTIFY_BAD; 57 return NOTIFY_BAD;
58 break; 58 break;
59 59
60#ifdef CONFIG_CPU_HOTPLUG 60#ifdef CONFIG_HOTPLUG_CPU
61 case CPU_UP_CANCELED: 61 case CPU_UP_CANCELED:
62 case CPU_UP_CANCELED_FROZEN: 62 case CPU_UP_CANCELED_FROZEN:
63 63
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 3a94905fa5d2..eb5e131a0485 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -345,7 +345,9 @@ void open_softirq(int nr, void (*action)(struct softirq_action *))
345 softirq_vec[nr].action = action; 345 softirq_vec[nr].action = action;
346} 346}
347 347
348/* Tasklets */ 348/*
349 * Tasklets
350 */
349struct tasklet_head 351struct tasklet_head
350{ 352{
351 struct tasklet_struct *head; 353 struct tasklet_struct *head;
@@ -493,6 +495,66 @@ void tasklet_kill(struct tasklet_struct *t)
493 495
494EXPORT_SYMBOL(tasklet_kill); 496EXPORT_SYMBOL(tasklet_kill);
495 497
498/*
499 * tasklet_hrtimer
500 */
501
502/*
503 * The trampoline is called when the hrtimer expires. If this is
504 * called from the hrtimer interrupt then we schedule the tasklet as
505 * the timer callback function expects to run in softirq context. If
506 * it's called in softirq context anyway (i.e. high resolution timers
507 * disabled) then the hrtimer callback is called right away.
508 */
509static enum hrtimer_restart __hrtimer_tasklet_trampoline(struct hrtimer *timer)
510{
511 struct tasklet_hrtimer *ttimer =
512 container_of(timer, struct tasklet_hrtimer, timer);
513
514 if (hrtimer_is_hres_active(timer)) {
515 tasklet_hi_schedule(&ttimer->tasklet);
516 return HRTIMER_NORESTART;
517 }
518 return ttimer->function(timer);
519}
520
521/*
522 * Helper function which calls the hrtimer callback from
523 * tasklet/softirq context
524 */
525static void __tasklet_hrtimer_trampoline(unsigned long data)
526{
527 struct tasklet_hrtimer *ttimer = (void *)data;
528 enum hrtimer_restart restart;
529
530 restart = ttimer->function(&ttimer->timer);
531 if (restart != HRTIMER_NORESTART)
532 hrtimer_restart(&ttimer->timer);
533}
534
535/**
536 * tasklet_hrtimer_init - Init a tasklet/hrtimer combo for softirq callbacks
537 * @ttimer: tasklet_hrtimer which is initialized
538 * @function: hrtimer callback funtion which gets called from softirq context
539 * @which_clock: clock id (CLOCK_MONOTONIC/CLOCK_REALTIME)
540 * @mode: hrtimer mode (HRTIMER_MODE_ABS/HRTIMER_MODE_REL)
541 */
542void tasklet_hrtimer_init(struct tasklet_hrtimer *ttimer,
543 enum hrtimer_restart (*function)(struct hrtimer *),
544 clockid_t which_clock, enum hrtimer_mode mode)
545{
546 hrtimer_init(&ttimer->timer, which_clock, mode);
547 ttimer->timer.function = __hrtimer_tasklet_trampoline;
548 tasklet_init(&ttimer->tasklet, __tasklet_hrtimer_trampoline,
549 (unsigned long)ttimer);
550 ttimer->function = function;
551}
552EXPORT_SYMBOL_GPL(tasklet_hrtimer_init);
553
554/*
555 * Remote softirq bits
556 */
557
496DEFINE_PER_CPU(struct list_head [NR_SOFTIRQS], softirq_work_list); 558DEFINE_PER_CPU(struct list_head [NR_SOFTIRQS], softirq_work_list);
497EXPORT_PER_CPU_SYMBOL(softirq_work_list); 559EXPORT_PER_CPU_SYMBOL(softirq_work_list);
498 560
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index 1ad6dd461119..a6dcd67b041d 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -254,15 +254,4 @@ void clockevents_notify(unsigned long reason, void *arg)
254 spin_unlock(&clockevents_lock); 254 spin_unlock(&clockevents_lock);
255} 255}
256EXPORT_SYMBOL_GPL(clockevents_notify); 256EXPORT_SYMBOL_GPL(clockevents_notify);
257
258ktime_t clockevents_get_next_event(int cpu)
259{
260 struct tick_device *td;
261 struct clock_event_device *dev;
262
263 td = &per_cpu(tick_cpu_device, cpu);
264 dev = td->evtdev;
265
266 return dev->next_event;
267}
268#endif 257#endif
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 592bf584d1d2..7466cb811251 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -513,7 +513,7 @@ static ssize_t sysfs_override_clocksource(struct sys_device *dev,
513 * Check to make sure we don't switch to a non-highres capable 513 * Check to make sure we don't switch to a non-highres capable
514 * clocksource if the tick code is in oneshot mode (highres or nohz) 514 * clocksource if the tick code is in oneshot mode (highres or nohz)
515 */ 515 */
516 if (tick_oneshot_mode_active() && 516 if (tick_oneshot_mode_active() && ovr &&
517 !(ovr->flags & CLOCK_SOURCE_VALID_FOR_HRES)) { 517 !(ovr->flags & CLOCK_SOURCE_VALID_FOR_HRES)) {
518 printk(KERN_WARNING "%s clocksource is not HRT compatible. " 518 printk(KERN_WARNING "%s clocksource is not HRT compatible. "
519 "Cannot switch while in HRT/NOHZ mode\n", ovr->name); 519 "Cannot switch while in HRT/NOHZ mode\n", ovr->name);
diff --git a/kernel/timer.c b/kernel/timer.c
index 0b36b9e5cc8b..a7f07d5a6241 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -714,7 +714,7 @@ int mod_timer(struct timer_list *timer, unsigned long expires)
714 * networking code - if the timer is re-modified 714 * networking code - if the timer is re-modified
715 * to be the same thing then just return: 715 * to be the same thing then just return:
716 */ 716 */
717 if (timer->expires == expires && timer_pending(timer)) 717 if (timer_pending(timer) && timer->expires == expires)
718 return 1; 718 return 1;
719 719
720 return __mod_timer(timer, expires, false, TIMER_NOT_PINNED); 720 return __mod_timer(timer, expires, false, TIMER_NOT_PINNED);
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 1551f47e7669..019f380fd764 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -226,13 +226,13 @@ config BOOT_TRACER
226 the timings of the initcalls and traces key events and the identity 226 the timings of the initcalls and traces key events and the identity
227 of tasks that can cause boot delays, such as context-switches. 227 of tasks that can cause boot delays, such as context-switches.
228 228
229 Its aim is to be parsed by the /scripts/bootgraph.pl tool to 229 Its aim is to be parsed by the scripts/bootgraph.pl tool to
230 produce pretty graphics about boot inefficiencies, giving a visual 230 produce pretty graphics about boot inefficiencies, giving a visual
231 representation of the delays during initcalls - but the raw 231 representation of the delays during initcalls - but the raw
232 /debug/tracing/trace text output is readable too. 232 /debug/tracing/trace text output is readable too.
233 233
234 You must pass in ftrace=initcall to the kernel command line 234 You must pass in initcall_debug and ftrace=initcall to the kernel
235 to enable this on bootup. 235 command line to enable this on bootup.
236 236
237config TRACE_BRANCH_PROFILING 237config TRACE_BRANCH_PROFILING
238 bool 238 bool
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 39af8af6fc30..1090b0aed9ba 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -22,6 +22,7 @@
22#include <linux/init.h> 22#include <linux/init.h>
23#include <linux/mutex.h> 23#include <linux/mutex.h>
24#include <linux/debugfs.h> 24#include <linux/debugfs.h>
25#include <linux/smp_lock.h>
25#include <linux/time.h> 26#include <linux/time.h>
26#include <linux/uaccess.h> 27#include <linux/uaccess.h>
27 28
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index f3716bf04df6..1e1d23c26308 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -768,7 +768,7 @@ static struct tracer_stat function_stats __initdata = {
768 .stat_show = function_stat_show 768 .stat_show = function_stat_show
769}; 769};
770 770
771static void ftrace_profile_debugfs(struct dentry *d_tracer) 771static __init void ftrace_profile_debugfs(struct dentry *d_tracer)
772{ 772{
773 struct ftrace_profile_stat *stat; 773 struct ftrace_profile_stat *stat;
774 struct dentry *entry; 774 struct dentry *entry;
@@ -786,7 +786,6 @@ static void ftrace_profile_debugfs(struct dentry *d_tracer)
786 * The files created are permanent, if something happens 786 * The files created are permanent, if something happens
787 * we still do not free memory. 787 * we still do not free memory.
788 */ 788 */
789 kfree(stat);
790 WARN(1, 789 WARN(1,
791 "Could not allocate stat file for cpu %d\n", 790 "Could not allocate stat file for cpu %d\n",
792 cpu); 791 cpu);
@@ -813,7 +812,7 @@ static void ftrace_profile_debugfs(struct dentry *d_tracer)
813} 812}
814 813
815#else /* CONFIG_FUNCTION_PROFILER */ 814#else /* CONFIG_FUNCTION_PROFILER */
816static void ftrace_profile_debugfs(struct dentry *d_tracer) 815static __init void ftrace_profile_debugfs(struct dentry *d_tracer)
817{ 816{
818} 817}
819#endif /* CONFIG_FUNCTION_PROFILER */ 818#endif /* CONFIG_FUNCTION_PROFILER */
@@ -1663,7 +1662,7 @@ ftrace_regex_open(struct inode *inode, struct file *file, int enable)
1663 1662
1664 mutex_lock(&ftrace_regex_lock); 1663 mutex_lock(&ftrace_regex_lock);
1665 if ((file->f_mode & FMODE_WRITE) && 1664 if ((file->f_mode & FMODE_WRITE) &&
1666 !(file->f_flags & O_APPEND)) 1665 (file->f_flags & O_TRUNC))
1667 ftrace_filter_reset(enable); 1666 ftrace_filter_reset(enable);
1668 1667
1669 if (file->f_mode & FMODE_READ) { 1668 if (file->f_mode & FMODE_READ) {
@@ -2578,7 +2577,7 @@ ftrace_graph_open(struct inode *inode, struct file *file)
2578 2577
2579 mutex_lock(&graph_lock); 2578 mutex_lock(&graph_lock);
2580 if ((file->f_mode & FMODE_WRITE) && 2579 if ((file->f_mode & FMODE_WRITE) &&
2581 !(file->f_flags & O_APPEND)) { 2580 (file->f_flags & O_TRUNC)) {
2582 ftrace_graph_count = 0; 2581 ftrace_graph_count = 0;
2583 memset(ftrace_graph_funcs, 0, sizeof(ftrace_graph_funcs)); 2582 memset(ftrace_graph_funcs, 0, sizeof(ftrace_graph_funcs));
2584 } 2583 }
@@ -2597,6 +2596,14 @@ ftrace_graph_open(struct inode *inode, struct file *file)
2597} 2596}
2598 2597
2599static int 2598static int
2599ftrace_graph_release(struct inode *inode, struct file *file)
2600{
2601 if (file->f_mode & FMODE_READ)
2602 seq_release(inode, file);
2603 return 0;
2604}
2605
2606static int
2600ftrace_set_func(unsigned long *array, int *idx, char *buffer) 2607ftrace_set_func(unsigned long *array, int *idx, char *buffer)
2601{ 2608{
2602 struct dyn_ftrace *rec; 2609 struct dyn_ftrace *rec;
@@ -2725,9 +2732,10 @@ ftrace_graph_write(struct file *file, const char __user *ubuf,
2725} 2732}
2726 2733
2727static const struct file_operations ftrace_graph_fops = { 2734static const struct file_operations ftrace_graph_fops = {
2728 .open = ftrace_graph_open, 2735 .open = ftrace_graph_open,
2729 .read = seq_read, 2736 .read = seq_read,
2730 .write = ftrace_graph_write, 2737 .write = ftrace_graph_write,
2738 .release = ftrace_graph_release,
2731}; 2739};
2732#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ 2740#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
2733 2741
@@ -3160,10 +3168,10 @@ ftrace_enable_sysctl(struct ctl_table *table, int write,
3160 3168
3161 ret = proc_dointvec(table, write, file, buffer, lenp, ppos); 3169 ret = proc_dointvec(table, write, file, buffer, lenp, ppos);
3162 3170
3163 if (ret || !write || (last_ftrace_enabled == ftrace_enabled)) 3171 if (ret || !write || (last_ftrace_enabled == !!ftrace_enabled))
3164 goto out; 3172 goto out;
3165 3173
3166 last_ftrace_enabled = ftrace_enabled; 3174 last_ftrace_enabled = !!ftrace_enabled;
3167 3175
3168 if (ftrace_enabled) { 3176 if (ftrace_enabled) {
3169 3177
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index bf27bb7a63e2..a330513d96ce 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -735,6 +735,7 @@ ring_buffer_free(struct ring_buffer *buffer)
735 735
736 put_online_cpus(); 736 put_online_cpus();
737 737
738 kfree(buffer->buffers);
738 free_cpumask_var(buffer->cpumask); 739 free_cpumask_var(buffer->cpumask);
739 740
740 kfree(buffer); 741 kfree(buffer);
@@ -1785,7 +1786,7 @@ void ring_buffer_discard_commit(struct ring_buffer *buffer,
1785 */ 1786 */
1786 RB_WARN_ON(buffer, !local_read(&cpu_buffer->committing)); 1787 RB_WARN_ON(buffer, !local_read(&cpu_buffer->committing));
1787 1788
1788 if (!rb_try_to_discard(cpu_buffer, event)) 1789 if (rb_try_to_discard(cpu_buffer, event))
1789 goto out; 1790 goto out;
1790 1791
1791 /* 1792 /*
@@ -2383,7 +2384,6 @@ rb_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
2383 * the box. Return the padding, and we will release 2384 * the box. Return the padding, and we will release
2384 * the current locks, and try again. 2385 * the current locks, and try again.
2385 */ 2386 */
2386 rb_advance_reader(cpu_buffer);
2387 return event; 2387 return event;
2388 2388
2389 case RINGBUF_TYPE_TIME_EXTEND: 2389 case RINGBUF_TYPE_TIME_EXTEND:
@@ -2486,7 +2486,7 @@ static inline int rb_ok_to_lock(void)
2486 * buffer too. A one time deal is all you get from reading 2486 * buffer too. A one time deal is all you get from reading
2487 * the ring buffer from an NMI. 2487 * the ring buffer from an NMI.
2488 */ 2488 */
2489 if (likely(!in_nmi() && !oops_in_progress)) 2489 if (likely(!in_nmi()))
2490 return 1; 2490 return 1;
2491 2491
2492 tracing_off_permanent(); 2492 tracing_off_permanent();
@@ -2519,6 +2519,8 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
2519 if (dolock) 2519 if (dolock)
2520 spin_lock(&cpu_buffer->reader_lock); 2520 spin_lock(&cpu_buffer->reader_lock);
2521 event = rb_buffer_peek(buffer, cpu, ts); 2521 event = rb_buffer_peek(buffer, cpu, ts);
2522 if (event && event->type_len == RINGBUF_TYPE_PADDING)
2523 rb_advance_reader(cpu_buffer);
2522 if (dolock) 2524 if (dolock)
2523 spin_unlock(&cpu_buffer->reader_lock); 2525 spin_unlock(&cpu_buffer->reader_lock);
2524 local_irq_restore(flags); 2526 local_irq_restore(flags);
@@ -2590,12 +2592,9 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts)
2590 spin_lock(&cpu_buffer->reader_lock); 2592 spin_lock(&cpu_buffer->reader_lock);
2591 2593
2592 event = rb_buffer_peek(buffer, cpu, ts); 2594 event = rb_buffer_peek(buffer, cpu, ts);
2593 if (!event) 2595 if (event)
2594 goto out_unlock; 2596 rb_advance_reader(cpu_buffer);
2595
2596 rb_advance_reader(cpu_buffer);
2597 2597
2598 out_unlock:
2599 if (dolock) 2598 if (dolock)
2600 spin_unlock(&cpu_buffer->reader_lock); 2599 spin_unlock(&cpu_buffer->reader_lock);
2601 local_irq_restore(flags); 2600 local_irq_restore(flags);
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 3aa0a0dfdfa8..c22b40f8f576 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -17,6 +17,7 @@
17#include <linux/writeback.h> 17#include <linux/writeback.h>
18#include <linux/kallsyms.h> 18#include <linux/kallsyms.h>
19#include <linux/seq_file.h> 19#include <linux/seq_file.h>
20#include <linux/smp_lock.h>
20#include <linux/notifier.h> 21#include <linux/notifier.h>
21#include <linux/irqflags.h> 22#include <linux/irqflags.h>
22#include <linux/debugfs.h> 23#include <linux/debugfs.h>
@@ -847,6 +848,7 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
847 ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) | 848 ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) |
848 (need_resched() ? TRACE_FLAG_NEED_RESCHED : 0); 849 (need_resched() ? TRACE_FLAG_NEED_RESCHED : 0);
849} 850}
851EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
850 852
851struct ring_buffer_event *trace_buffer_lock_reserve(struct trace_array *tr, 853struct ring_buffer_event *trace_buffer_lock_reserve(struct trace_array *tr,
852 int type, 854 int type,
@@ -2030,7 +2032,7 @@ static int tracing_open(struct inode *inode, struct file *file)
2030 2032
2031 /* If this file was open for write, then erase contents */ 2033 /* If this file was open for write, then erase contents */
2032 if ((file->f_mode & FMODE_WRITE) && 2034 if ((file->f_mode & FMODE_WRITE) &&
2033 !(file->f_flags & O_APPEND)) { 2035 (file->f_flags & O_TRUNC)) {
2034 long cpu = (long) inode->i_private; 2036 long cpu = (long) inode->i_private;
2035 2037
2036 if (cpu == TRACE_PIPE_ALL_CPU) 2038 if (cpu == TRACE_PIPE_ALL_CPU)
@@ -3084,7 +3086,8 @@ tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
3084 break; 3086 break;
3085 } 3087 }
3086 3088
3087 trace_consume(iter); 3089 if (ret != TRACE_TYPE_NO_CONSUME)
3090 trace_consume(iter);
3088 rem -= count; 3091 rem -= count;
3089 if (!find_next_entry_inc(iter)) { 3092 if (!find_next_entry_inc(iter)) {
3090 rem = 0; 3093 rem = 0;
@@ -4232,8 +4235,11 @@ static void __ftrace_dump(bool disable_tracing)
4232 iter.pos = -1; 4235 iter.pos = -1;
4233 4236
4234 if (find_next_entry_inc(&iter) != NULL) { 4237 if (find_next_entry_inc(&iter) != NULL) {
4235 print_trace_line(&iter); 4238 int ret;
4236 trace_consume(&iter); 4239
4240 ret = print_trace_line(&iter);
4241 if (ret != TRACE_TYPE_NO_CONSUME)
4242 trace_consume(&iter);
4237 } 4243 }
4238 4244
4239 trace_printk_seq(&iter.seq); 4245 trace_printk_seq(&iter.seq);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 3548ae5cc780..8b9f4f6e9559 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -438,10 +438,6 @@ struct trace_entry *tracing_get_trace_entry(struct trace_array *tr,
438struct trace_entry *trace_find_next_entry(struct trace_iterator *iter, 438struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
439 int *ent_cpu, u64 *ent_ts); 439 int *ent_cpu, u64 *ent_ts);
440 440
441void tracing_generic_entry_update(struct trace_entry *entry,
442 unsigned long flags,
443 int pc);
444
445void default_wait_pipe(struct trace_iterator *iter); 441void default_wait_pipe(struct trace_iterator *iter);
446void poll_wait_pipe(struct trace_iterator *iter); 442void poll_wait_pipe(struct trace_iterator *iter);
447 443
diff --git a/kernel/trace/trace_event_profile.c b/kernel/trace/trace_event_profile.c
index 5b5895afecfe..11ba5bb4ed0a 100644
--- a/kernel/trace/trace_event_profile.c
+++ b/kernel/trace/trace_event_profile.c
@@ -14,7 +14,7 @@ int ftrace_profile_enable(int event_id)
14 14
15 mutex_lock(&event_mutex); 15 mutex_lock(&event_mutex);
16 list_for_each_entry(event, &ftrace_events, list) { 16 list_for_each_entry(event, &ftrace_events, list) {
17 if (event->id == event_id) { 17 if (event->id == event_id && event->profile_enable) {
18 ret = event->profile_enable(event); 18 ret = event->profile_enable(event);
19 break; 19 break;
20 } 20 }
diff --git a/kernel/trace/trace_event_types.h b/kernel/trace/trace_event_types.h
index 5e32e375134d..6db005e12487 100644
--- a/kernel/trace/trace_event_types.h
+++ b/kernel/trace/trace_event_types.h
@@ -26,6 +26,9 @@ TRACE_EVENT_FORMAT(funcgraph_exit, TRACE_GRAPH_RET,
26 ftrace_graph_ret_entry, ignore, 26 ftrace_graph_ret_entry, ignore,
27 TRACE_STRUCT( 27 TRACE_STRUCT(
28 TRACE_FIELD(unsigned long, ret.func, func) 28 TRACE_FIELD(unsigned long, ret.func, func)
29 TRACE_FIELD(unsigned long long, ret.calltime, calltime)
30 TRACE_FIELD(unsigned long long, ret.rettime, rettime)
31 TRACE_FIELD(unsigned long, ret.overrun, overrun)
29 TRACE_FIELD(int, ret.depth, depth) 32 TRACE_FIELD(int, ret.depth, depth)
30 ), 33 ),
31 TP_RAW_FMT("<-- %lx (%d)") 34 TP_RAW_FMT("<-- %lx (%d)")
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 53c8fd376a88..e75276a49cf5 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -376,7 +376,7 @@ ftrace_event_seq_open(struct inode *inode, struct file *file)
376 const struct seq_operations *seq_ops; 376 const struct seq_operations *seq_ops;
377 377
378 if ((file->f_mode & FMODE_WRITE) && 378 if ((file->f_mode & FMODE_WRITE) &&
379 !(file->f_flags & O_APPEND)) 379 (file->f_flags & O_TRUNC))
380 ftrace_clear_events(); 380 ftrace_clear_events();
381 381
382 seq_ops = inode->i_private; 382 seq_ops = inode->i_private;
@@ -940,7 +940,7 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events,
940 entry = trace_create_file("enable", 0644, call->dir, call, 940 entry = trace_create_file("enable", 0644, call->dir, call,
941 enable); 941 enable);
942 942
943 if (call->id) 943 if (call->id && call->profile_enable)
944 entry = trace_create_file("id", 0444, call->dir, call, 944 entry = trace_create_file("id", 0444, call->dir, call,
945 id); 945 id);
946 946
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index 936c621bbf46..f32dc9d1ea7b 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -624,9 +624,6 @@ static int filter_add_subsystem_pred(struct filter_parse_state *ps,
624 return -ENOSPC; 624 return -ENOSPC;
625 } 625 }
626 626
627 filter->preds[filter->n_preds] = pred;
628 filter->n_preds++;
629
630 list_for_each_entry(call, &ftrace_events, list) { 627 list_for_each_entry(call, &ftrace_events, list) {
631 628
632 if (!call->define_fields) 629 if (!call->define_fields)
@@ -643,6 +640,9 @@ static int filter_add_subsystem_pred(struct filter_parse_state *ps,
643 } 640 }
644 replace_filter_string(call->filter, filter_string); 641 replace_filter_string(call->filter, filter_string);
645 } 642 }
643
644 filter->preds[filter->n_preds] = pred;
645 filter->n_preds++;
646out: 646out:
647 return err; 647 return err;
648} 648}
@@ -1029,12 +1029,17 @@ static int replace_preds(struct event_subsystem *system,
1029 1029
1030 if (elt->op == OP_AND || elt->op == OP_OR) { 1030 if (elt->op == OP_AND || elt->op == OP_OR) {
1031 pred = create_logical_pred(elt->op); 1031 pred = create_logical_pred(elt->op);
1032 if (!pred)
1033 return -ENOMEM;
1032 if (call) { 1034 if (call) {
1033 err = filter_add_pred(ps, call, pred); 1035 err = filter_add_pred(ps, call, pred);
1034 filter_free_pred(pred); 1036 filter_free_pred(pred);
1035 } else 1037 } else {
1036 err = filter_add_subsystem_pred(ps, system, 1038 err = filter_add_subsystem_pred(ps, system,
1037 pred, filter_string); 1039 pred, filter_string);
1040 if (err)
1041 filter_free_pred(pred);
1042 }
1038 if (err) 1043 if (err)
1039 return err; 1044 return err;
1040 1045
@@ -1048,12 +1053,17 @@ static int replace_preds(struct event_subsystem *system,
1048 } 1053 }
1049 1054
1050 pred = create_pred(elt->op, operand1, operand2); 1055 pred = create_pred(elt->op, operand1, operand2);
1056 if (!pred)
1057 return -ENOMEM;
1051 if (call) { 1058 if (call) {
1052 err = filter_add_pred(ps, call, pred); 1059 err = filter_add_pred(ps, call, pred);
1053 filter_free_pred(pred); 1060 filter_free_pred(pred);
1054 } else 1061 } else {
1055 err = filter_add_subsystem_pred(ps, system, pred, 1062 err = filter_add_subsystem_pred(ps, system, pred,
1056 filter_string); 1063 filter_string);
1064 if (err)
1065 filter_free_pred(pred);
1066 }
1057 if (err) 1067 if (err)
1058 return err; 1068 return err;
1059 1069
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
index 7402144bff21..75ef000613c3 100644
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c
@@ -363,7 +363,7 @@ ftrace_trace_onoff_callback(char *glob, char *cmd, char *param, int enable)
363 out_reg: 363 out_reg:
364 ret = register_ftrace_function_probe(glob, ops, count); 364 ret = register_ftrace_function_probe(glob, ops, count);
365 365
366 return ret; 366 return ret < 0 ? ret : 0;
367} 367}
368 368
369static struct ftrace_func_command ftrace_traceon_cmd = { 369static struct ftrace_func_command ftrace_traceon_cmd = {
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index d2249abafb53..420ec3487579 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -843,9 +843,16 @@ print_graph_function(struct trace_iterator *iter)
843 843
844 switch (entry->type) { 844 switch (entry->type) {
845 case TRACE_GRAPH_ENT: { 845 case TRACE_GRAPH_ENT: {
846 struct ftrace_graph_ent_entry *field; 846 /*
847 * print_graph_entry() may consume the current event,
848 * thus @field may become invalid, so we need to save it.
849 * sizeof(struct ftrace_graph_ent_entry) is very small,
850 * it can be safely saved at the stack.
851 */
852 struct ftrace_graph_ent_entry *field, saved;
847 trace_assign_type(field, entry); 853 trace_assign_type(field, entry);
848 return print_graph_entry(field, s, iter); 854 saved = *field;
855 return print_graph_entry(&saved, s, iter);
849 } 856 }
850 case TRACE_GRAPH_RET: { 857 case TRACE_GRAPH_RET: {
851 struct ftrace_graph_ret_entry *field; 858 struct ftrace_graph_ret_entry *field;
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index 7938f3ae93e3..e0c2545622e8 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -27,8 +27,7 @@ void trace_print_seq(struct seq_file *m, struct trace_seq *s)
27{ 27{
28 int len = s->len >= PAGE_SIZE ? PAGE_SIZE - 1 : s->len; 28 int len = s->len >= PAGE_SIZE ? PAGE_SIZE - 1 : s->len;
29 29
30 s->buffer[len] = 0; 30 seq_write(m, s->buffer, len);
31 seq_puts(m, s->buffer);
32 31
33 trace_seq_init(s); 32 trace_seq_init(s);
34} 33}
diff --git a/kernel/trace/trace_printk.c b/kernel/trace/trace_printk.c
index 7b6278110827..687699d365ae 100644
--- a/kernel/trace/trace_printk.c
+++ b/kernel/trace/trace_printk.c
@@ -176,7 +176,7 @@ static int t_show(struct seq_file *m, void *v)
176 const char *str = *fmt; 176 const char *str = *fmt;
177 int i; 177 int i;
178 178
179 seq_printf(m, "0x%lx : \"", (unsigned long)fmt); 179 seq_printf(m, "0x%lx : \"", *(unsigned long *)fmt);
180 180
181 /* 181 /*
182 * Tabs and new lines need to be converted. 182 * Tabs and new lines need to be converted.
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index 2d7aebd71dbd..6a2a9d484cd6 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -301,17 +301,14 @@ static const struct seq_operations stack_trace_seq_ops = {
301 301
302static int stack_trace_open(struct inode *inode, struct file *file) 302static int stack_trace_open(struct inode *inode, struct file *file)
303{ 303{
304 int ret; 304 return seq_open(file, &stack_trace_seq_ops);
305
306 ret = seq_open(file, &stack_trace_seq_ops);
307
308 return ret;
309} 305}
310 306
311static const struct file_operations stack_trace_fops = { 307static const struct file_operations stack_trace_fops = {
312 .open = stack_trace_open, 308 .open = stack_trace_open,
313 .read = seq_read, 309 .read = seq_read,
314 .llseek = seq_lseek, 310 .llseek = seq_lseek,
311 .release = seq_release,
315}; 312};
316 313
317int 314int
@@ -326,10 +323,10 @@ stack_trace_sysctl(struct ctl_table *table, int write,
326 ret = proc_dointvec(table, write, file, buffer, lenp, ppos); 323 ret = proc_dointvec(table, write, file, buffer, lenp, ppos);
327 324
328 if (ret || !write || 325 if (ret || !write ||
329 (last_stack_tracer_enabled == stack_tracer_enabled)) 326 (last_stack_tracer_enabled == !!stack_tracer_enabled))
330 goto out; 327 goto out;
331 328
332 last_stack_tracer_enabled = stack_tracer_enabled; 329 last_stack_tracer_enabled = !!stack_tracer_enabled;
333 330
334 if (stack_tracer_enabled) 331 if (stack_tracer_enabled)
335 register_ftrace_function(&trace_ops); 332 register_ftrace_function(&trace_ops);
diff --git a/kernel/trace/trace_stat.c b/kernel/trace/trace_stat.c
index e66f5e493342..aea321c82fa0 100644
--- a/kernel/trace/trace_stat.c
+++ b/kernel/trace/trace_stat.c
@@ -73,7 +73,7 @@ static struct rb_node *release_next(struct rb_node *node)
73 } 73 }
74} 74}
75 75
76static void reset_stat_session(struct stat_session *session) 76static void __reset_stat_session(struct stat_session *session)
77{ 77{
78 struct rb_node *node = session->stat_root.rb_node; 78 struct rb_node *node = session->stat_root.rb_node;
79 79
@@ -83,10 +83,17 @@ static void reset_stat_session(struct stat_session *session)
83 session->stat_root = RB_ROOT; 83 session->stat_root = RB_ROOT;
84} 84}
85 85
86static void reset_stat_session(struct stat_session *session)
87{
88 mutex_lock(&session->stat_mutex);
89 __reset_stat_session(session);
90 mutex_unlock(&session->stat_mutex);
91}
92
86static void destroy_session(struct stat_session *session) 93static void destroy_session(struct stat_session *session)
87{ 94{
88 debugfs_remove(session->file); 95 debugfs_remove(session->file);
89 reset_stat_session(session); 96 __reset_stat_session(session);
90 mutex_destroy(&session->stat_mutex); 97 mutex_destroy(&session->stat_mutex);
91 kfree(session); 98 kfree(session);
92} 99}
@@ -150,7 +157,7 @@ static int stat_seq_init(struct stat_session *session)
150 int i; 157 int i;
151 158
152 mutex_lock(&session->stat_mutex); 159 mutex_lock(&session->stat_mutex);
153 reset_stat_session(session); 160 __reset_stat_session(session);
154 161
155 if (!ts->stat_cmp) 162 if (!ts->stat_cmp)
156 ts->stat_cmp = dummy_cmp; 163 ts->stat_cmp = dummy_cmp;
@@ -183,7 +190,7 @@ exit:
183 return ret; 190 return ret;
184 191
185exit_free_rbtree: 192exit_free_rbtree:
186 reset_stat_session(session); 193 __reset_stat_session(session);
187 mutex_unlock(&session->stat_mutex); 194 mutex_unlock(&session->stat_mutex);
188 return ret; 195 return ret;
189} 196}
@@ -250,16 +257,21 @@ static const struct seq_operations trace_stat_seq_ops = {
250static int tracing_stat_open(struct inode *inode, struct file *file) 257static int tracing_stat_open(struct inode *inode, struct file *file)
251{ 258{
252 int ret; 259 int ret;
253 260 struct seq_file *m;
254 struct stat_session *session = inode->i_private; 261 struct stat_session *session = inode->i_private;
255 262
263 ret = stat_seq_init(session);
264 if (ret)
265 return ret;
266
256 ret = seq_open(file, &trace_stat_seq_ops); 267 ret = seq_open(file, &trace_stat_seq_ops);
257 if (!ret) { 268 if (ret) {
258 struct seq_file *m = file->private_data; 269 reset_stat_session(session);
259 m->private = session; 270 return ret;
260 ret = stat_seq_init(session);
261 } 271 }
262 272
273 m = file->private_data;
274 m->private = session;
263 return ret; 275 return ret;
264} 276}
265 277
@@ -270,11 +282,9 @@ static int tracing_stat_release(struct inode *i, struct file *f)
270{ 282{
271 struct stat_session *session = i->i_private; 283 struct stat_session *session = i->i_private;
272 284
273 mutex_lock(&session->stat_mutex);
274 reset_stat_session(session); 285 reset_stat_session(session);
275 mutex_unlock(&session->stat_mutex);
276 286
277 return 0; 287 return seq_release(i, f);
278} 288}
279 289
280static const struct file_operations tracing_stat_fops = { 290static const struct file_operations tracing_stat_fops = {