aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/acct.c10
-rw-r--r--kernel/async.c1
-rw-r--r--kernel/audit.c3
-rw-r--r--kernel/audit_tree.c1
-rw-r--r--kernel/audit_watch.c1
-rw-r--r--kernel/auditfilter.c1
-rw-r--r--kernel/auditsc.c3
-rw-r--r--kernel/cgroup.c694
-rw-r--r--kernel/cgroup_freezer.c15
-rw-r--r--kernel/compat.c1
-rw-r--r--kernel/cpu.c1
-rw-r--r--kernel/cpuset.c106
-rw-r--r--kernel/cred.c11
-rw-r--r--kernel/early_res.c6
-rw-r--r--kernel/exit.c5
-rw-r--r--kernel/fork.c50
-rw-r--r--kernel/hw_breakpoint.c11
-rw-r--r--kernel/irq/chip.c37
-rw-r--r--kernel/irq/devres.c4
-rw-r--r--kernel/irq/manage.c32
-rw-r--r--kernel/irq/numa_migrate.c1
-rw-r--r--kernel/irq/proc.c1
-rw-r--r--kernel/kallsyms.c1
-rw-r--r--kernel/kgdb.c205
-rw-r--r--kernel/kprobes.c3
-rw-r--r--kernel/ksysfs.c2
-rw-r--r--kernel/kthread.c2
-rw-r--r--kernel/latencytop.c1
-rw-r--r--kernel/lockdep.c32
-rw-r--r--kernel/module.c139
-rw-r--r--kernel/nsproxy.c14
-rw-r--r--kernel/padata.c1
-rw-r--r--kernel/params.c6
-rw-r--r--kernel/perf_event.c139
-rw-r--r--kernel/pid.c4
-rw-r--r--kernel/pid_namespace.c8
-rw-r--r--kernel/posix-cpu-timers.c10
-rw-r--r--kernel/power/hibernate.c1
-rw-r--r--kernel/power/hibernate_nvs.c1
-rw-r--r--kernel/power/process.c5
-rw-r--r--kernel/power/snapshot.c1
-rw-r--r--kernel/power/suspend.c1
-rw-r--r--kernel/power/swap.c1
-rw-r--r--kernel/power/user.c2
-rw-r--r--kernel/rcupdate.c30
-rw-r--r--kernel/rcutree.h21
-rw-r--r--kernel/rcutree_plugin.h8
-rw-r--r--kernel/res_counter.c1
-rw-r--r--kernel/resource.c44
-rw-r--r--kernel/sched.c37
-rw-r--r--kernel/sched_cpupri.c3
-rw-r--r--kernel/sched_debug.c4
-rw-r--r--kernel/sched_fair.c2
-rw-r--r--kernel/sched_rt.c7
-rw-r--r--kernel/slow-work.c2
-rw-r--r--kernel/slow-work.h8
-rw-r--r--kernel/smp.c1
-rw-r--r--kernel/softlockup.c4
-rw-r--r--kernel/srcu.c1
-rw-r--r--kernel/sys.c68
-rw-r--r--kernel/sys_ni.c1
-rw-r--r--kernel/sysctl.c37
-rw-r--r--kernel/sysctl_binary.c1
-rw-r--r--kernel/taskstats.c1
-rw-r--r--kernel/time.c1
-rw-r--r--kernel/time/clocksource.c4
-rw-r--r--kernel/time/tick-oneshot.c52
-rw-r--r--kernel/time/timecompare.c1
-rw-r--r--kernel/time/timekeeping.c3
-rw-r--r--kernel/time/timer_list.c3
-rw-r--r--kernel/timer.c2
-rw-r--r--kernel/trace/Makefile2
-rw-r--r--kernel/trace/blktrace.c1
-rw-r--r--kernel/trace/ftrace.c31
-rw-r--r--kernel/trace/power-traces.c1
-rw-r--r--kernel/trace/ring_buffer.c39
-rw-r--r--kernel/trace/trace.c51
-rw-r--r--kernel/trace/trace.h5
-rw-r--r--kernel/trace/trace_clock.c5
-rw-r--r--kernel/trace/trace_event_perf.c (renamed from kernel/trace/trace_event_profile.c)63
-rw-r--r--kernel/trace/trace_events.c3
-rw-r--r--kernel/trace/trace_events_filter.c1
-rw-r--r--kernel/trace/trace_functions_graph.c28
-rw-r--r--kernel/trace/trace_kprobe.c29
-rw-r--r--kernel/trace/trace_ksym.c1
-rw-r--r--kernel/trace/trace_mmiotrace.c1
-rw-r--r--kernel/trace/trace_selftest.c1
-rw-r--r--kernel/trace/trace_stat.c1
-rw-r--r--kernel/trace/trace_syscalls.c73
-rw-r--r--kernel/trace/trace_workqueue.c1
90 files changed, 1630 insertions, 628 deletions
diff --git a/kernel/acct.c b/kernel/acct.c
index a6605ca921b6..24f8c81fc48d 100644
--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -588,16 +588,6 @@ out:
588} 588}
589 589
590/** 590/**
591 * acct_init_pacct - initialize a new pacct_struct
592 * @pacct: per-process accounting info struct to initialize
593 */
594void acct_init_pacct(struct pacct_struct *pacct)
595{
596 memset(pacct, 0, sizeof(struct pacct_struct));
597 pacct->ac_utime = pacct->ac_stime = cputime_zero;
598}
599
600/**
601 * acct_collect - collect accounting information into pacct_struct 591 * acct_collect - collect accounting information into pacct_struct
602 * @exitcode: task exit code 592 * @exitcode: task exit code
603 * @group_dead: not 0, if this thread is the last one in the process. 593 * @group_dead: not 0, if this thread is the last one in the process.
diff --git a/kernel/async.c b/kernel/async.c
index 27235f5de198..15319d6c18fe 100644
--- a/kernel/async.c
+++ b/kernel/async.c
@@ -56,6 +56,7 @@ asynchronous and synchronous parts of the kernel.
56#include <linux/init.h> 56#include <linux/init.h>
57#include <linux/kthread.h> 57#include <linux/kthread.h>
58#include <linux/delay.h> 58#include <linux/delay.h>
59#include <linux/slab.h>
59#include <asm/atomic.h> 60#include <asm/atomic.h>
60 61
61static async_cookie_t next_cookie = 1; 62static async_cookie_t next_cookie = 1;
diff --git a/kernel/audit.c b/kernel/audit.c
index 5feed232be9d..c71bd26631a2 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -46,6 +46,7 @@
46#include <asm/atomic.h> 46#include <asm/atomic.h>
47#include <linux/mm.h> 47#include <linux/mm.h>
48#include <linux/module.h> 48#include <linux/module.h>
49#include <linux/slab.h>
49#include <linux/err.h> 50#include <linux/err.h>
50#include <linux/kthread.h> 51#include <linux/kthread.h>
51 52
@@ -398,7 +399,7 @@ static void kauditd_send_skb(struct sk_buff *skb)
398 skb_get(skb); 399 skb_get(skb);
399 err = netlink_unicast(audit_sock, skb, audit_nlk_pid, 0); 400 err = netlink_unicast(audit_sock, skb, audit_nlk_pid, 0);
400 if (err < 0) { 401 if (err < 0) {
401 BUG_ON(err != -ECONNREFUSED); /* Shoudn't happen */ 402 BUG_ON(err != -ECONNREFUSED); /* Shouldn't happen */
402 printk(KERN_ERR "audit: *NO* daemon at audit_pid=%d\n", audit_pid); 403 printk(KERN_ERR "audit: *NO* daemon at audit_pid=%d\n", audit_pid);
403 audit_log_lost("auditd dissapeared\n"); 404 audit_log_lost("auditd dissapeared\n");
404 audit_pid = 0; 405 audit_pid = 0;
diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index 028e85663f27..46a57b57a335 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -3,6 +3,7 @@
3#include <linux/namei.h> 3#include <linux/namei.h>
4#include <linux/mount.h> 4#include <linux/mount.h>
5#include <linux/kthread.h> 5#include <linux/kthread.h>
6#include <linux/slab.h>
6 7
7struct audit_tree; 8struct audit_tree;
8struct audit_chunk; 9struct audit_chunk;
diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c
index cc7e87936cbc..8df43696f4ba 100644
--- a/kernel/audit_watch.c
+++ b/kernel/audit_watch.c
@@ -27,6 +27,7 @@
27#include <linux/namei.h> 27#include <linux/namei.h>
28#include <linux/netlink.h> 28#include <linux/netlink.h>
29#include <linux/sched.h> 29#include <linux/sched.h>
30#include <linux/slab.h>
30#include <linux/inotify.h> 31#include <linux/inotify.h>
31#include <linux/security.h> 32#include <linux/security.h>
32#include "audit.h" 33#include "audit.h"
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index a70604047f3c..ce08041f578d 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -27,6 +27,7 @@
27#include <linux/namei.h> 27#include <linux/namei.h>
28#include <linux/netlink.h> 28#include <linux/netlink.h>
29#include <linux/sched.h> 29#include <linux/sched.h>
30#include <linux/slab.h>
30#include <linux/security.h> 31#include <linux/security.h>
31#include "audit.h" 32#include "audit.h"
32 33
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index f3a461c0970a..3828ad5fb8f1 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -49,6 +49,7 @@
49#include <linux/namei.h> 49#include <linux/namei.h>
50#include <linux/mm.h> 50#include <linux/mm.h>
51#include <linux/module.h> 51#include <linux/module.h>
52#include <linux/slab.h>
52#include <linux/mount.h> 53#include <linux/mount.h>
53#include <linux/socket.h> 54#include <linux/socket.h>
54#include <linux/mqueue.h> 55#include <linux/mqueue.h>
@@ -1893,7 +1894,7 @@ static int audit_inc_name_count(struct audit_context *context,
1893{ 1894{
1894 if (context->name_count >= AUDIT_NAMES) { 1895 if (context->name_count >= AUDIT_NAMES) {
1895 if (inode) 1896 if (inode)
1896 printk(KERN_DEBUG "name_count maxed, losing inode data: " 1897 printk(KERN_DEBUG "audit: name_count maxed, losing inode data: "
1897 "dev=%02x:%02x, inode=%lu\n", 1898 "dev=%02x:%02x, inode=%lu\n",
1898 MAJOR(inode->i_sb->s_dev), 1899 MAJOR(inode->i_sb->s_dev),
1899 MINOR(inode->i_sb->s_dev), 1900 MINOR(inode->i_sb->s_dev),
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 4fd90e129772..e2769e13980c 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -4,6 +4,10 @@
4 * Based originally on the cpuset system, extracted by Paul Menage 4 * Based originally on the cpuset system, extracted by Paul Menage
5 * Copyright (C) 2006 Google, Inc 5 * Copyright (C) 2006 Google, Inc
6 * 6 *
7 * Notifications support
8 * Copyright (C) 2009 Nokia Corporation
9 * Author: Kirill A. Shutemov
10 *
7 * Copyright notices from the original cpuset code: 11 * Copyright notices from the original cpuset code:
8 * -------------------------------------------------- 12 * --------------------------------------------------
9 * Copyright (C) 2003 BULL SA. 13 * Copyright (C) 2003 BULL SA.
@@ -23,7 +27,6 @@
23 */ 27 */
24 28
25#include <linux/cgroup.h> 29#include <linux/cgroup.h>
26#include <linux/module.h>
27#include <linux/ctype.h> 30#include <linux/ctype.h>
28#include <linux/errno.h> 31#include <linux/errno.h>
29#include <linux/fs.h> 32#include <linux/fs.h>
@@ -44,6 +47,7 @@
44#include <linux/string.h> 47#include <linux/string.h>
45#include <linux/sort.h> 48#include <linux/sort.h>
46#include <linux/kmod.h> 49#include <linux/kmod.h>
50#include <linux/module.h>
47#include <linux/delayacct.h> 51#include <linux/delayacct.h>
48#include <linux/cgroupstats.h> 52#include <linux/cgroupstats.h>
49#include <linux/hash.h> 53#include <linux/hash.h>
@@ -52,15 +56,21 @@
52#include <linux/pid_namespace.h> 56#include <linux/pid_namespace.h>
53#include <linux/idr.h> 57#include <linux/idr.h>
54#include <linux/vmalloc.h> /* TODO: replace with more sophisticated array */ 58#include <linux/vmalloc.h> /* TODO: replace with more sophisticated array */
59#include <linux/eventfd.h>
60#include <linux/poll.h>
55 61
56#include <asm/atomic.h> 62#include <asm/atomic.h>
57 63
58static DEFINE_MUTEX(cgroup_mutex); 64static DEFINE_MUTEX(cgroup_mutex);
59 65
60/* Generate an array of cgroup subsystem pointers */ 66/*
67 * Generate an array of cgroup subsystem pointers. At boot time, this is
68 * populated up to CGROUP_BUILTIN_SUBSYS_COUNT, and modular subsystems are
69 * registered after that. The mutable section of this array is protected by
70 * cgroup_mutex.
71 */
61#define SUBSYS(_x) &_x ## _subsys, 72#define SUBSYS(_x) &_x ## _subsys,
62 73static struct cgroup_subsys *subsys[CGROUP_SUBSYS_COUNT] = {
63static struct cgroup_subsys *subsys[] = {
64#include <linux/cgroup_subsys.h> 74#include <linux/cgroup_subsys.h>
65}; 75};
66 76
@@ -147,6 +157,35 @@ struct css_id {
147 unsigned short stack[0]; /* Array of Length (depth+1) */ 157 unsigned short stack[0]; /* Array of Length (depth+1) */
148}; 158};
149 159
160/*
161 * cgroup_event represents events which userspace want to recieve.
162 */
163struct cgroup_event {
164 /*
165 * Cgroup which the event belongs to.
166 */
167 struct cgroup *cgrp;
168 /*
169 * Control file which the event associated.
170 */
171 struct cftype *cft;
172 /*
173 * eventfd to signal userspace about the event.
174 */
175 struct eventfd_ctx *eventfd;
176 /*
177 * Each of these stored in a list by the cgroup.
178 */
179 struct list_head list;
180 /*
181 * All fields below needed to unregister event when
182 * userspace closes eventfd.
183 */
184 poll_table pt;
185 wait_queue_head_t *wqh;
186 wait_queue_t wait;
187 struct work_struct remove;
188};
150 189
151/* The list of hierarchy roots */ 190/* The list of hierarchy roots */
152 191
@@ -250,7 +289,8 @@ struct cg_cgroup_link {
250static struct css_set init_css_set; 289static struct css_set init_css_set;
251static struct cg_cgroup_link init_css_set_link; 290static struct cg_cgroup_link init_css_set_link;
252 291
253static int cgroup_subsys_init_idr(struct cgroup_subsys *ss); 292static int cgroup_init_idr(struct cgroup_subsys *ss,
293 struct cgroup_subsys_state *css);
254 294
255/* css_set_lock protects the list of css_set objects, and the 295/* css_set_lock protects the list of css_set objects, and the
256 * chain of tasks off each css_set. Nests outside task->alloc_lock 296 * chain of tasks off each css_set. Nests outside task->alloc_lock
@@ -448,8 +488,11 @@ static struct css_set *find_existing_css_set(
448 struct hlist_node *node; 488 struct hlist_node *node;
449 struct css_set *cg; 489 struct css_set *cg;
450 490
451 /* Built the set of subsystem state objects that we want to 491 /*
452 * see in the new css_set */ 492 * Build the set of subsystem state objects that we want to see in the
493 * new css_set. while subsystems can change globally, the entries here
494 * won't change, so no need for locking.
495 */
453 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { 496 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
454 if (root->subsys_bits & (1UL << i)) { 497 if (root->subsys_bits & (1UL << i)) {
455 /* Subsystem is in this hierarchy. So we want 498 /* Subsystem is in this hierarchy. So we want
@@ -696,6 +739,7 @@ void cgroup_lock(void)
696{ 739{
697 mutex_lock(&cgroup_mutex); 740 mutex_lock(&cgroup_mutex);
698} 741}
742EXPORT_SYMBOL_GPL(cgroup_lock);
699 743
700/** 744/**
701 * cgroup_unlock - release lock on cgroup changes 745 * cgroup_unlock - release lock on cgroup changes
@@ -706,6 +750,7 @@ void cgroup_unlock(void)
706{ 750{
707 mutex_unlock(&cgroup_mutex); 751 mutex_unlock(&cgroup_mutex);
708} 752}
753EXPORT_SYMBOL_GPL(cgroup_unlock);
709 754
710/* 755/*
711 * A couple of forward declarations required, due to cyclic reference loop: 756 * A couple of forward declarations required, due to cyclic reference loop:
@@ -757,6 +802,7 @@ static int cgroup_call_pre_destroy(struct cgroup *cgrp)
757 if (ret) 802 if (ret)
758 break; 803 break;
759 } 804 }
805
760 return ret; 806 return ret;
761} 807}
762 808
@@ -884,7 +930,11 @@ void cgroup_release_and_wakeup_rmdir(struct cgroup_subsys_state *css)
884 css_put(css); 930 css_put(css);
885} 931}
886 932
887 933/*
934 * Call with cgroup_mutex held. Drops reference counts on modules, including
935 * any duplicate ones that parse_cgroupfs_options took. If this function
936 * returns an error, no reference counts are touched.
937 */
888static int rebind_subsystems(struct cgroupfs_root *root, 938static int rebind_subsystems(struct cgroupfs_root *root,
889 unsigned long final_bits) 939 unsigned long final_bits)
890{ 940{
@@ -892,6 +942,8 @@ static int rebind_subsystems(struct cgroupfs_root *root,
892 struct cgroup *cgrp = &root->top_cgroup; 942 struct cgroup *cgrp = &root->top_cgroup;
893 int i; 943 int i;
894 944
945 BUG_ON(!mutex_is_locked(&cgroup_mutex));
946
895 removed_bits = root->actual_subsys_bits & ~final_bits; 947 removed_bits = root->actual_subsys_bits & ~final_bits;
896 added_bits = final_bits & ~root->actual_subsys_bits; 948 added_bits = final_bits & ~root->actual_subsys_bits;
897 /* Check that any added subsystems are currently free */ 949 /* Check that any added subsystems are currently free */
@@ -900,6 +952,12 @@ static int rebind_subsystems(struct cgroupfs_root *root,
900 struct cgroup_subsys *ss = subsys[i]; 952 struct cgroup_subsys *ss = subsys[i];
901 if (!(bit & added_bits)) 953 if (!(bit & added_bits))
902 continue; 954 continue;
955 /*
956 * Nobody should tell us to do a subsys that doesn't exist:
957 * parse_cgroupfs_options should catch that case and refcounts
958 * ensure that subsystems won't disappear once selected.
959 */
960 BUG_ON(ss == NULL);
903 if (ss->root != &rootnode) { 961 if (ss->root != &rootnode) {
904 /* Subsystem isn't free */ 962 /* Subsystem isn't free */
905 return -EBUSY; 963 return -EBUSY;
@@ -919,6 +977,7 @@ static int rebind_subsystems(struct cgroupfs_root *root,
919 unsigned long bit = 1UL << i; 977 unsigned long bit = 1UL << i;
920 if (bit & added_bits) { 978 if (bit & added_bits) {
921 /* We're binding this subsystem to this hierarchy */ 979 /* We're binding this subsystem to this hierarchy */
980 BUG_ON(ss == NULL);
922 BUG_ON(cgrp->subsys[i]); 981 BUG_ON(cgrp->subsys[i]);
923 BUG_ON(!dummytop->subsys[i]); 982 BUG_ON(!dummytop->subsys[i]);
924 BUG_ON(dummytop->subsys[i]->cgroup != dummytop); 983 BUG_ON(dummytop->subsys[i]->cgroup != dummytop);
@@ -930,8 +989,10 @@ static int rebind_subsystems(struct cgroupfs_root *root,
930 if (ss->bind) 989 if (ss->bind)
931 ss->bind(ss, cgrp); 990 ss->bind(ss, cgrp);
932 mutex_unlock(&ss->hierarchy_mutex); 991 mutex_unlock(&ss->hierarchy_mutex);
992 /* refcount was already taken, and we're keeping it */
933 } else if (bit & removed_bits) { 993 } else if (bit & removed_bits) {
934 /* We're removing this subsystem */ 994 /* We're removing this subsystem */
995 BUG_ON(ss == NULL);
935 BUG_ON(cgrp->subsys[i] != dummytop->subsys[i]); 996 BUG_ON(cgrp->subsys[i] != dummytop->subsys[i]);
936 BUG_ON(cgrp->subsys[i]->cgroup != cgrp); 997 BUG_ON(cgrp->subsys[i]->cgroup != cgrp);
937 mutex_lock(&ss->hierarchy_mutex); 998 mutex_lock(&ss->hierarchy_mutex);
@@ -942,9 +1003,20 @@ static int rebind_subsystems(struct cgroupfs_root *root,
942 subsys[i]->root = &rootnode; 1003 subsys[i]->root = &rootnode;
943 list_move(&ss->sibling, &rootnode.subsys_list); 1004 list_move(&ss->sibling, &rootnode.subsys_list);
944 mutex_unlock(&ss->hierarchy_mutex); 1005 mutex_unlock(&ss->hierarchy_mutex);
1006 /* subsystem is now free - drop reference on module */
1007 module_put(ss->module);
945 } else if (bit & final_bits) { 1008 } else if (bit & final_bits) {
946 /* Subsystem state should already exist */ 1009 /* Subsystem state should already exist */
1010 BUG_ON(ss == NULL);
947 BUG_ON(!cgrp->subsys[i]); 1011 BUG_ON(!cgrp->subsys[i]);
1012 /*
1013 * a refcount was taken, but we already had one, so
1014 * drop the extra reference.
1015 */
1016 module_put(ss->module);
1017#ifdef CONFIG_MODULE_UNLOAD
1018 BUG_ON(ss->module && !module_refcount(ss->module));
1019#endif
948 } else { 1020 } else {
949 /* Subsystem state shouldn't exist */ 1021 /* Subsystem state shouldn't exist */
950 BUG_ON(cgrp->subsys[i]); 1022 BUG_ON(cgrp->subsys[i]);
@@ -986,13 +1058,20 @@ struct cgroup_sb_opts {
986 1058
987}; 1059};
988 1060
989/* Convert a hierarchy specifier into a bitmask of subsystems and 1061/*
990 * flags. */ 1062 * Convert a hierarchy specifier into a bitmask of subsystems and flags. Call
991static int parse_cgroupfs_options(char *data, 1063 * with cgroup_mutex held to protect the subsys[] array. This function takes
992 struct cgroup_sb_opts *opts) 1064 * refcounts on subsystems to be used, unless it returns error, in which case
1065 * no refcounts are taken.
1066 */
1067static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts)
993{ 1068{
994 char *token, *o = data ?: "all"; 1069 char *token, *o = data ?: "all";
995 unsigned long mask = (unsigned long)-1; 1070 unsigned long mask = (unsigned long)-1;
1071 int i;
1072 bool module_pin_failed = false;
1073
1074 BUG_ON(!mutex_is_locked(&cgroup_mutex));
996 1075
997#ifdef CONFIG_CPUSETS 1076#ifdef CONFIG_CPUSETS
998 mask = ~(1UL << cpuset_subsys_id); 1077 mask = ~(1UL << cpuset_subsys_id);
@@ -1005,10 +1084,11 @@ static int parse_cgroupfs_options(char *data,
1005 return -EINVAL; 1084 return -EINVAL;
1006 if (!strcmp(token, "all")) { 1085 if (!strcmp(token, "all")) {
1007 /* Add all non-disabled subsystems */ 1086 /* Add all non-disabled subsystems */
1008 int i;
1009 opts->subsys_bits = 0; 1087 opts->subsys_bits = 0;
1010 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { 1088 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
1011 struct cgroup_subsys *ss = subsys[i]; 1089 struct cgroup_subsys *ss = subsys[i];
1090 if (ss == NULL)
1091 continue;
1012 if (!ss->disabled) 1092 if (!ss->disabled)
1013 opts->subsys_bits |= 1ul << i; 1093 opts->subsys_bits |= 1ul << i;
1014 } 1094 }
@@ -1026,7 +1106,6 @@ static int parse_cgroupfs_options(char *data,
1026 if (!opts->release_agent) 1106 if (!opts->release_agent)
1027 return -ENOMEM; 1107 return -ENOMEM;
1028 } else if (!strncmp(token, "name=", 5)) { 1108 } else if (!strncmp(token, "name=", 5)) {
1029 int i;
1030 const char *name = token + 5; 1109 const char *name = token + 5;
1031 /* Can't specify an empty name */ 1110 /* Can't specify an empty name */
1032 if (!strlen(name)) 1111 if (!strlen(name))
@@ -1050,9 +1129,10 @@ static int parse_cgroupfs_options(char *data,
1050 return -ENOMEM; 1129 return -ENOMEM;
1051 } else { 1130 } else {
1052 struct cgroup_subsys *ss; 1131 struct cgroup_subsys *ss;
1053 int i;
1054 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { 1132 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
1055 ss = subsys[i]; 1133 ss = subsys[i];
1134 if (ss == NULL)
1135 continue;
1056 if (!strcmp(token, ss->name)) { 1136 if (!strcmp(token, ss->name)) {
1057 if (!ss->disabled) 1137 if (!ss->disabled)
1058 set_bit(i, &opts->subsys_bits); 1138 set_bit(i, &opts->subsys_bits);
@@ -1087,9 +1167,54 @@ static int parse_cgroupfs_options(char *data,
1087 if (!opts->subsys_bits && !opts->name) 1167 if (!opts->subsys_bits && !opts->name)
1088 return -EINVAL; 1168 return -EINVAL;
1089 1169
1170 /*
1171 * Grab references on all the modules we'll need, so the subsystems
1172 * don't dance around before rebind_subsystems attaches them. This may
1173 * take duplicate reference counts on a subsystem that's already used,
1174 * but rebind_subsystems handles this case.
1175 */
1176 for (i = CGROUP_BUILTIN_SUBSYS_COUNT; i < CGROUP_SUBSYS_COUNT; i++) {
1177 unsigned long bit = 1UL << i;
1178
1179 if (!(bit & opts->subsys_bits))
1180 continue;
1181 if (!try_module_get(subsys[i]->module)) {
1182 module_pin_failed = true;
1183 break;
1184 }
1185 }
1186 if (module_pin_failed) {
1187 /*
1188 * oops, one of the modules was going away. this means that we
1189 * raced with a module_delete call, and to the user this is
1190 * essentially a "subsystem doesn't exist" case.
1191 */
1192 for (i--; i >= CGROUP_BUILTIN_SUBSYS_COUNT; i--) {
1193 /* drop refcounts only on the ones we took */
1194 unsigned long bit = 1UL << i;
1195
1196 if (!(bit & opts->subsys_bits))
1197 continue;
1198 module_put(subsys[i]->module);
1199 }
1200 return -ENOENT;
1201 }
1202
1090 return 0; 1203 return 0;
1091} 1204}
1092 1205
1206static void drop_parsed_module_refcounts(unsigned long subsys_bits)
1207{
1208 int i;
1209 for (i = CGROUP_BUILTIN_SUBSYS_COUNT; i < CGROUP_SUBSYS_COUNT; i++) {
1210 unsigned long bit = 1UL << i;
1211
1212 if (!(bit & subsys_bits))
1213 continue;
1214 module_put(subsys[i]->module);
1215 }
1216}
1217
1093static int cgroup_remount(struct super_block *sb, int *flags, char *data) 1218static int cgroup_remount(struct super_block *sb, int *flags, char *data)
1094{ 1219{
1095 int ret = 0; 1220 int ret = 0;
@@ -1106,21 +1231,19 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data)
1106 if (ret) 1231 if (ret)
1107 goto out_unlock; 1232 goto out_unlock;
1108 1233
1109 /* Don't allow flags to change at remount */ 1234 /* Don't allow flags or name to change at remount */
1110 if (opts.flags != root->flags) { 1235 if (opts.flags != root->flags ||
1111 ret = -EINVAL; 1236 (opts.name && strcmp(opts.name, root->name))) {
1112 goto out_unlock;
1113 }
1114
1115 /* Don't allow name to change at remount */
1116 if (opts.name && strcmp(opts.name, root->name)) {
1117 ret = -EINVAL; 1237 ret = -EINVAL;
1238 drop_parsed_module_refcounts(opts.subsys_bits);
1118 goto out_unlock; 1239 goto out_unlock;
1119 } 1240 }
1120 1241
1121 ret = rebind_subsystems(root, opts.subsys_bits); 1242 ret = rebind_subsystems(root, opts.subsys_bits);
1122 if (ret) 1243 if (ret) {
1244 drop_parsed_module_refcounts(opts.subsys_bits);
1123 goto out_unlock; 1245 goto out_unlock;
1246 }
1124 1247
1125 /* (re)populate subsystem files */ 1248 /* (re)populate subsystem files */
1126 cgroup_populate_dir(cgrp); 1249 cgroup_populate_dir(cgrp);
@@ -1151,6 +1274,8 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp)
1151 INIT_LIST_HEAD(&cgrp->release_list); 1274 INIT_LIST_HEAD(&cgrp->release_list);
1152 INIT_LIST_HEAD(&cgrp->pidlists); 1275 INIT_LIST_HEAD(&cgrp->pidlists);
1153 mutex_init(&cgrp->pidlist_mutex); 1276 mutex_init(&cgrp->pidlist_mutex);
1277 INIT_LIST_HEAD(&cgrp->event_list);
1278 spin_lock_init(&cgrp->event_list_lock);
1154} 1279}
1155 1280
1156static void init_cgroup_root(struct cgroupfs_root *root) 1281static void init_cgroup_root(struct cgroupfs_root *root)
@@ -1306,7 +1431,9 @@ static int cgroup_get_sb(struct file_system_type *fs_type,
1306 struct cgroupfs_root *new_root; 1431 struct cgroupfs_root *new_root;
1307 1432
1308 /* First find the desired set of subsystems */ 1433 /* First find the desired set of subsystems */
1434 mutex_lock(&cgroup_mutex);
1309 ret = parse_cgroupfs_options(data, &opts); 1435 ret = parse_cgroupfs_options(data, &opts);
1436 mutex_unlock(&cgroup_mutex);
1310 if (ret) 1437 if (ret)
1311 goto out_err; 1438 goto out_err;
1312 1439
@@ -1317,7 +1444,7 @@ static int cgroup_get_sb(struct file_system_type *fs_type,
1317 new_root = cgroup_root_from_opts(&opts); 1444 new_root = cgroup_root_from_opts(&opts);
1318 if (IS_ERR(new_root)) { 1445 if (IS_ERR(new_root)) {
1319 ret = PTR_ERR(new_root); 1446 ret = PTR_ERR(new_root);
1320 goto out_err; 1447 goto drop_modules;
1321 } 1448 }
1322 opts.new_root = new_root; 1449 opts.new_root = new_root;
1323 1450
@@ -1326,7 +1453,7 @@ static int cgroup_get_sb(struct file_system_type *fs_type,
1326 if (IS_ERR(sb)) { 1453 if (IS_ERR(sb)) {
1327 ret = PTR_ERR(sb); 1454 ret = PTR_ERR(sb);
1328 cgroup_drop_root(opts.new_root); 1455 cgroup_drop_root(opts.new_root);
1329 goto out_err; 1456 goto drop_modules;
1330 } 1457 }
1331 1458
1332 root = sb->s_fs_info; 1459 root = sb->s_fs_info;
@@ -1382,6 +1509,11 @@ static int cgroup_get_sb(struct file_system_type *fs_type,
1382 free_cg_links(&tmp_cg_links); 1509 free_cg_links(&tmp_cg_links);
1383 goto drop_new_super; 1510 goto drop_new_super;
1384 } 1511 }
1512 /*
1513 * There must be no failure case after here, since rebinding
1514 * takes care of subsystems' refcounts, which are explicitly
1515 * dropped in the failure exit path.
1516 */
1385 1517
1386 /* EBUSY should be the only error here */ 1518 /* EBUSY should be the only error here */
1387 BUG_ON(ret); 1519 BUG_ON(ret);
@@ -1420,6 +1552,8 @@ static int cgroup_get_sb(struct file_system_type *fs_type,
1420 * any) is not needed 1552 * any) is not needed
1421 */ 1553 */
1422 cgroup_drop_root(opts.new_root); 1554 cgroup_drop_root(opts.new_root);
1555 /* no subsys rebinding, so refcounts don't change */
1556 drop_parsed_module_refcounts(opts.subsys_bits);
1423 } 1557 }
1424 1558
1425 simple_set_mnt(mnt, sb); 1559 simple_set_mnt(mnt, sb);
@@ -1429,6 +1563,8 @@ static int cgroup_get_sb(struct file_system_type *fs_type,
1429 1563
1430 drop_new_super: 1564 drop_new_super:
1431 deactivate_locked_super(sb); 1565 deactivate_locked_super(sb);
1566 drop_modules:
1567 drop_parsed_module_refcounts(opts.subsys_bits);
1432 out_err: 1568 out_err:
1433 kfree(opts.release_agent); 1569 kfree(opts.release_agent);
1434 kfree(opts.name); 1570 kfree(opts.name);
@@ -1542,6 +1678,7 @@ int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen)
1542 memmove(buf, start, buf + buflen - start); 1678 memmove(buf, start, buf + buflen - start);
1543 return 0; 1679 return 0;
1544} 1680}
1681EXPORT_SYMBOL_GPL(cgroup_path);
1545 1682
1546/** 1683/**
1547 * cgroup_attach_task - attach task 'tsk' to cgroup 'cgrp' 1684 * cgroup_attach_task - attach task 'tsk' to cgroup 'cgrp'
@@ -1554,7 +1691,7 @@ int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen)
1554int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk) 1691int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
1555{ 1692{
1556 int retval = 0; 1693 int retval = 0;
1557 struct cgroup_subsys *ss; 1694 struct cgroup_subsys *ss, *failed_ss = NULL;
1558 struct cgroup *oldcgrp; 1695 struct cgroup *oldcgrp;
1559 struct css_set *cg; 1696 struct css_set *cg;
1560 struct css_set *newcg; 1697 struct css_set *newcg;
@@ -1568,8 +1705,16 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
1568 for_each_subsys(root, ss) { 1705 for_each_subsys(root, ss) {
1569 if (ss->can_attach) { 1706 if (ss->can_attach) {
1570 retval = ss->can_attach(ss, cgrp, tsk, false); 1707 retval = ss->can_attach(ss, cgrp, tsk, false);
1571 if (retval) 1708 if (retval) {
1572 return retval; 1709 /*
1710 * Remember on which subsystem the can_attach()
1711 * failed, so that we only call cancel_attach()
1712 * against the subsystems whose can_attach()
1713 * succeeded. (See below)
1714 */
1715 failed_ss = ss;
1716 goto out;
1717 }
1573 } 1718 }
1574 } 1719 }
1575 1720
@@ -1583,14 +1728,17 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
1583 */ 1728 */
1584 newcg = find_css_set(cg, cgrp); 1729 newcg = find_css_set(cg, cgrp);
1585 put_css_set(cg); 1730 put_css_set(cg);
1586 if (!newcg) 1731 if (!newcg) {
1587 return -ENOMEM; 1732 retval = -ENOMEM;
1733 goto out;
1734 }
1588 1735
1589 task_lock(tsk); 1736 task_lock(tsk);
1590 if (tsk->flags & PF_EXITING) { 1737 if (tsk->flags & PF_EXITING) {
1591 task_unlock(tsk); 1738 task_unlock(tsk);
1592 put_css_set(newcg); 1739 put_css_set(newcg);
1593 return -ESRCH; 1740 retval = -ESRCH;
1741 goto out;
1594 } 1742 }
1595 rcu_assign_pointer(tsk->cgroups, newcg); 1743 rcu_assign_pointer(tsk->cgroups, newcg);
1596 task_unlock(tsk); 1744 task_unlock(tsk);
@@ -1616,7 +1764,22 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
1616 * is no longer empty. 1764 * is no longer empty.
1617 */ 1765 */
1618 cgroup_wakeup_rmdir_waiter(cgrp); 1766 cgroup_wakeup_rmdir_waiter(cgrp);
1619 return 0; 1767out:
1768 if (retval) {
1769 for_each_subsys(root, ss) {
1770 if (ss == failed_ss)
1771 /*
1772 * This subsystem was the one that failed the
1773 * can_attach() check earlier, so we don't need
1774 * to call cancel_attach() against it or any
1775 * remaining subsystems.
1776 */
1777 break;
1778 if (ss->cancel_attach)
1779 ss->cancel_attach(ss, cgrp, tsk, false);
1780 }
1781 }
1782 return retval;
1620} 1783}
1621 1784
1622/* 1785/*
@@ -1682,6 +1845,7 @@ bool cgroup_lock_live_group(struct cgroup *cgrp)
1682 } 1845 }
1683 return true; 1846 return true;
1684} 1847}
1848EXPORT_SYMBOL_GPL(cgroup_lock_live_group);
1685 1849
1686static int cgroup_release_agent_write(struct cgroup *cgrp, struct cftype *cft, 1850static int cgroup_release_agent_write(struct cgroup *cgrp, struct cftype *cft,
1687 const char *buffer) 1851 const char *buffer)
@@ -1950,6 +2114,16 @@ static const struct inode_operations cgroup_dir_inode_operations = {
1950 .rename = cgroup_rename, 2114 .rename = cgroup_rename,
1951}; 2115};
1952 2116
2117/*
2118 * Check if a file is a control file
2119 */
2120static inline struct cftype *__file_cft(struct file *file)
2121{
2122 if (file->f_dentry->d_inode->i_fop != &cgroup_file_operations)
2123 return ERR_PTR(-EINVAL);
2124 return __d_cft(file->f_dentry);
2125}
2126
1953static int cgroup_create_file(struct dentry *dentry, mode_t mode, 2127static int cgroup_create_file(struct dentry *dentry, mode_t mode,
1954 struct super_block *sb) 2128 struct super_block *sb)
1955{ 2129{
@@ -2069,6 +2243,7 @@ int cgroup_add_file(struct cgroup *cgrp,
2069 error = PTR_ERR(dentry); 2243 error = PTR_ERR(dentry);
2070 return error; 2244 return error;
2071} 2245}
2246EXPORT_SYMBOL_GPL(cgroup_add_file);
2072 2247
2073int cgroup_add_files(struct cgroup *cgrp, 2248int cgroup_add_files(struct cgroup *cgrp,
2074 struct cgroup_subsys *subsys, 2249 struct cgroup_subsys *subsys,
@@ -2083,6 +2258,7 @@ int cgroup_add_files(struct cgroup *cgrp,
2083 } 2258 }
2084 return 0; 2259 return 0;
2085} 2260}
2261EXPORT_SYMBOL_GPL(cgroup_add_files);
2086 2262
2087/** 2263/**
2088 * cgroup_task_count - count the number of tasks in a cgroup. 2264 * cgroup_task_count - count the number of tasks in a cgroup.
@@ -2468,7 +2644,8 @@ static struct cgroup_pidlist *cgroup_pidlist_find(struct cgroup *cgrp,
2468{ 2644{
2469 struct cgroup_pidlist *l; 2645 struct cgroup_pidlist *l;
2470 /* don't need task_nsproxy() if we're looking at ourself */ 2646 /* don't need task_nsproxy() if we're looking at ourself */
2471 struct pid_namespace *ns = get_pid_ns(current->nsproxy->pid_ns); 2647 struct pid_namespace *ns = current->nsproxy->pid_ns;
2648
2472 /* 2649 /*
2473 * We can't drop the pidlist_mutex before taking the l->mutex in case 2650 * We can't drop the pidlist_mutex before taking the l->mutex in case
2474 * the last ref-holder is trying to remove l from the list at the same 2651 * the last ref-holder is trying to remove l from the list at the same
@@ -2478,8 +2655,6 @@ static struct cgroup_pidlist *cgroup_pidlist_find(struct cgroup *cgrp,
2478 mutex_lock(&cgrp->pidlist_mutex); 2655 mutex_lock(&cgrp->pidlist_mutex);
2479 list_for_each_entry(l, &cgrp->pidlists, links) { 2656 list_for_each_entry(l, &cgrp->pidlists, links) {
2480 if (l->key.type == type && l->key.ns == ns) { 2657 if (l->key.type == type && l->key.ns == ns) {
2481 /* found a matching list - drop the extra refcount */
2482 put_pid_ns(ns);
2483 /* make sure l doesn't vanish out from under us */ 2658 /* make sure l doesn't vanish out from under us */
2484 down_write(&l->mutex); 2659 down_write(&l->mutex);
2485 mutex_unlock(&cgrp->pidlist_mutex); 2660 mutex_unlock(&cgrp->pidlist_mutex);
@@ -2490,13 +2665,12 @@ static struct cgroup_pidlist *cgroup_pidlist_find(struct cgroup *cgrp,
2490 l = kmalloc(sizeof(struct cgroup_pidlist), GFP_KERNEL); 2665 l = kmalloc(sizeof(struct cgroup_pidlist), GFP_KERNEL);
2491 if (!l) { 2666 if (!l) {
2492 mutex_unlock(&cgrp->pidlist_mutex); 2667 mutex_unlock(&cgrp->pidlist_mutex);
2493 put_pid_ns(ns);
2494 return l; 2668 return l;
2495 } 2669 }
2496 init_rwsem(&l->mutex); 2670 init_rwsem(&l->mutex);
2497 down_write(&l->mutex); 2671 down_write(&l->mutex);
2498 l->key.type = type; 2672 l->key.type = type;
2499 l->key.ns = ns; 2673 l->key.ns = get_pid_ns(ns);
2500 l->use_count = 0; /* don't increment here */ 2674 l->use_count = 0; /* don't increment here */
2501 l->list = NULL; 2675 l->list = NULL;
2502 l->owner = cgrp; 2676 l->owner = cgrp;
@@ -2804,6 +2978,174 @@ static int cgroup_write_notify_on_release(struct cgroup *cgrp,
2804} 2978}
2805 2979
2806/* 2980/*
2981 * Unregister event and free resources.
2982 *
2983 * Gets called from workqueue.
2984 */
2985static void cgroup_event_remove(struct work_struct *work)
2986{
2987 struct cgroup_event *event = container_of(work, struct cgroup_event,
2988 remove);
2989 struct cgroup *cgrp = event->cgrp;
2990
2991 /* TODO: check return code */
2992 event->cft->unregister_event(cgrp, event->cft, event->eventfd);
2993
2994 eventfd_ctx_put(event->eventfd);
2995 kfree(event);
2996 dput(cgrp->dentry);
2997}
2998
2999/*
3000 * Gets called on POLLHUP on eventfd when user closes it.
3001 *
3002 * Called with wqh->lock held and interrupts disabled.
3003 */
3004static int cgroup_event_wake(wait_queue_t *wait, unsigned mode,
3005 int sync, void *key)
3006{
3007 struct cgroup_event *event = container_of(wait,
3008 struct cgroup_event, wait);
3009 struct cgroup *cgrp = event->cgrp;
3010 unsigned long flags = (unsigned long)key;
3011
3012 if (flags & POLLHUP) {
3013 remove_wait_queue_locked(event->wqh, &event->wait);
3014 spin_lock(&cgrp->event_list_lock);
3015 list_del(&event->list);
3016 spin_unlock(&cgrp->event_list_lock);
3017 /*
3018 * We are in atomic context, but cgroup_event_remove() may
3019 * sleep, so we have to call it in workqueue.
3020 */
3021 schedule_work(&event->remove);
3022 }
3023
3024 return 0;
3025}
3026
3027static void cgroup_event_ptable_queue_proc(struct file *file,
3028 wait_queue_head_t *wqh, poll_table *pt)
3029{
3030 struct cgroup_event *event = container_of(pt,
3031 struct cgroup_event, pt);
3032
3033 event->wqh = wqh;
3034 add_wait_queue(wqh, &event->wait);
3035}
3036
3037/*
3038 * Parse input and register new cgroup event handler.
3039 *
3040 * Input must be in format '<event_fd> <control_fd> <args>'.
3041 * Interpretation of args is defined by control file implementation.
3042 */
3043static int cgroup_write_event_control(struct cgroup *cgrp, struct cftype *cft,
3044 const char *buffer)
3045{
3046 struct cgroup_event *event = NULL;
3047 unsigned int efd, cfd;
3048 struct file *efile = NULL;
3049 struct file *cfile = NULL;
3050 char *endp;
3051 int ret;
3052
3053 efd = simple_strtoul(buffer, &endp, 10);
3054 if (*endp != ' ')
3055 return -EINVAL;
3056 buffer = endp + 1;
3057
3058 cfd = simple_strtoul(buffer, &endp, 10);
3059 if ((*endp != ' ') && (*endp != '\0'))
3060 return -EINVAL;
3061 buffer = endp + 1;
3062
3063 event = kzalloc(sizeof(*event), GFP_KERNEL);
3064 if (!event)
3065 return -ENOMEM;
3066 event->cgrp = cgrp;
3067 INIT_LIST_HEAD(&event->list);
3068 init_poll_funcptr(&event->pt, cgroup_event_ptable_queue_proc);
3069 init_waitqueue_func_entry(&event->wait, cgroup_event_wake);
3070 INIT_WORK(&event->remove, cgroup_event_remove);
3071
3072 efile = eventfd_fget(efd);
3073 if (IS_ERR(efile)) {
3074 ret = PTR_ERR(efile);
3075 goto fail;
3076 }
3077
3078 event->eventfd = eventfd_ctx_fileget(efile);
3079 if (IS_ERR(event->eventfd)) {
3080 ret = PTR_ERR(event->eventfd);
3081 goto fail;
3082 }
3083
3084 cfile = fget(cfd);
3085 if (!cfile) {
3086 ret = -EBADF;
3087 goto fail;
3088 }
3089
3090 /* the process need read permission on control file */
3091 ret = file_permission(cfile, MAY_READ);
3092 if (ret < 0)
3093 goto fail;
3094
3095 event->cft = __file_cft(cfile);
3096 if (IS_ERR(event->cft)) {
3097 ret = PTR_ERR(event->cft);
3098 goto fail;
3099 }
3100
3101 if (!event->cft->register_event || !event->cft->unregister_event) {
3102 ret = -EINVAL;
3103 goto fail;
3104 }
3105
3106 ret = event->cft->register_event(cgrp, event->cft,
3107 event->eventfd, buffer);
3108 if (ret)
3109 goto fail;
3110
3111 if (efile->f_op->poll(efile, &event->pt) & POLLHUP) {
3112 event->cft->unregister_event(cgrp, event->cft, event->eventfd);
3113 ret = 0;
3114 goto fail;
3115 }
3116
3117 /*
3118 * Events should be removed after rmdir of cgroup directory, but before
3119 * destroying subsystem state objects. Let's take reference to cgroup
3120 * directory dentry to do that.
3121 */
3122 dget(cgrp->dentry);
3123
3124 spin_lock(&cgrp->event_list_lock);
3125 list_add(&event->list, &cgrp->event_list);
3126 spin_unlock(&cgrp->event_list_lock);
3127
3128 fput(cfile);
3129 fput(efile);
3130
3131 return 0;
3132
3133fail:
3134 if (cfile)
3135 fput(cfile);
3136
3137 if (event && event->eventfd && !IS_ERR(event->eventfd))
3138 eventfd_ctx_put(event->eventfd);
3139
3140 if (!IS_ERR_OR_NULL(efile))
3141 fput(efile);
3142
3143 kfree(event);
3144
3145 return ret;
3146}
3147
3148/*
2807 * for the common functions, 'private' gives the type of file 3149 * for the common functions, 'private' gives the type of file
2808 */ 3150 */
2809/* for hysterical raisins, we can't put this on the older files */ 3151/* for hysterical raisins, we can't put this on the older files */
@@ -2828,6 +3170,11 @@ static struct cftype files[] = {
2828 .read_u64 = cgroup_read_notify_on_release, 3170 .read_u64 = cgroup_read_notify_on_release,
2829 .write_u64 = cgroup_write_notify_on_release, 3171 .write_u64 = cgroup_write_notify_on_release,
2830 }, 3172 },
3173 {
3174 .name = CGROUP_FILE_GENERIC_PREFIX "event_control",
3175 .write_string = cgroup_write_event_control,
3176 .mode = S_IWUGO,
3177 },
2831}; 3178};
2832 3179
2833static struct cftype cft_release_agent = { 3180static struct cftype cft_release_agent = {
@@ -2892,8 +3239,14 @@ static void cgroup_lock_hierarchy(struct cgroupfs_root *root)
2892 /* We need to take each hierarchy_mutex in a consistent order */ 3239 /* We need to take each hierarchy_mutex in a consistent order */
2893 int i; 3240 int i;
2894 3241
3242 /*
3243 * No worry about a race with rebind_subsystems that might mess up the
3244 * locking order, since both parties are under cgroup_mutex.
3245 */
2895 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { 3246 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
2896 struct cgroup_subsys *ss = subsys[i]; 3247 struct cgroup_subsys *ss = subsys[i];
3248 if (ss == NULL)
3249 continue;
2897 if (ss->root == root) 3250 if (ss->root == root)
2898 mutex_lock(&ss->hierarchy_mutex); 3251 mutex_lock(&ss->hierarchy_mutex);
2899 } 3252 }
@@ -2905,6 +3258,8 @@ static void cgroup_unlock_hierarchy(struct cgroupfs_root *root)
2905 3258
2906 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { 3259 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
2907 struct cgroup_subsys *ss = subsys[i]; 3260 struct cgroup_subsys *ss = subsys[i];
3261 if (ss == NULL)
3262 continue;
2908 if (ss->root == root) 3263 if (ss->root == root)
2909 mutex_unlock(&ss->hierarchy_mutex); 3264 mutex_unlock(&ss->hierarchy_mutex);
2910 } 3265 }
@@ -3028,11 +3383,16 @@ static int cgroup_has_css_refs(struct cgroup *cgrp)
3028 * synchronization other than RCU, and the subsystem linked 3383 * synchronization other than RCU, and the subsystem linked
3029 * list isn't RCU-safe */ 3384 * list isn't RCU-safe */
3030 int i; 3385 int i;
3386 /*
3387 * We won't need to lock the subsys array, because the subsystems
3388 * we're concerned about aren't going anywhere since our cgroup root
3389 * has a reference on them.
3390 */
3031 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { 3391 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
3032 struct cgroup_subsys *ss = subsys[i]; 3392 struct cgroup_subsys *ss = subsys[i];
3033 struct cgroup_subsys_state *css; 3393 struct cgroup_subsys_state *css;
3034 /* Skip subsystems not in this hierarchy */ 3394 /* Skip subsystems not present or not in this hierarchy */
3035 if (ss->root != cgrp->root) 3395 if (ss == NULL || ss->root != cgrp->root)
3036 continue; 3396 continue;
3037 css = cgrp->subsys[ss->subsys_id]; 3397 css = cgrp->subsys[ss->subsys_id];
3038 /* When called from check_for_release() it's possible 3398 /* When called from check_for_release() it's possible
@@ -3106,6 +3466,7 @@ static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry)
3106 struct dentry *d; 3466 struct dentry *d;
3107 struct cgroup *parent; 3467 struct cgroup *parent;
3108 DEFINE_WAIT(wait); 3468 DEFINE_WAIT(wait);
3469 struct cgroup_event *event, *tmp;
3109 int ret; 3470 int ret;
3110 3471
3111 /* the vfs holds both inode->i_mutex already */ 3472 /* the vfs holds both inode->i_mutex already */
@@ -3189,6 +3550,20 @@ again:
3189 set_bit(CGRP_RELEASABLE, &parent->flags); 3550 set_bit(CGRP_RELEASABLE, &parent->flags);
3190 check_for_release(parent); 3551 check_for_release(parent);
3191 3552
3553 /*
3554 * Unregister events and notify userspace.
3555 * Notify userspace about cgroup removing only after rmdir of cgroup
3556 * directory to avoid race between userspace and kernelspace
3557 */
3558 spin_lock(&cgrp->event_list_lock);
3559 list_for_each_entry_safe(event, tmp, &cgrp->event_list, list) {
3560 list_del(&event->list);
3561 remove_wait_queue(event->wqh, &event->wait);
3562 eventfd_signal(event->eventfd, 1);
3563 schedule_work(&event->remove);
3564 }
3565 spin_unlock(&cgrp->event_list_lock);
3566
3192 mutex_unlock(&cgroup_mutex); 3567 mutex_unlock(&cgroup_mutex);
3193 return 0; 3568 return 0;
3194} 3569}
@@ -3223,9 +3598,198 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss)
3223 mutex_init(&ss->hierarchy_mutex); 3598 mutex_init(&ss->hierarchy_mutex);
3224 lockdep_set_class(&ss->hierarchy_mutex, &ss->subsys_key); 3599 lockdep_set_class(&ss->hierarchy_mutex, &ss->subsys_key);
3225 ss->active = 1; 3600 ss->active = 1;
3601
3602 /* this function shouldn't be used with modular subsystems, since they
3603 * need to register a subsys_id, among other things */
3604 BUG_ON(ss->module);
3226} 3605}
3227 3606
3228/** 3607/**
3608 * cgroup_load_subsys: load and register a modular subsystem at runtime
3609 * @ss: the subsystem to load
3610 *
3611 * This function should be called in a modular subsystem's initcall. If the
3612 * subsytem is built as a module, it will be assigned a new subsys_id and set
3613 * up for use. If the subsystem is built-in anyway, work is delegated to the
3614 * simpler cgroup_init_subsys.
3615 */
3616int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss)
3617{
3618 int i;
3619 struct cgroup_subsys_state *css;
3620
3621 /* check name and function validity */
3622 if (ss->name == NULL || strlen(ss->name) > MAX_CGROUP_TYPE_NAMELEN ||
3623 ss->create == NULL || ss->destroy == NULL)
3624 return -EINVAL;
3625
3626 /*
3627 * we don't support callbacks in modular subsystems. this check is
3628 * before the ss->module check for consistency; a subsystem that could
3629 * be a module should still have no callbacks even if the user isn't
3630 * compiling it as one.
3631 */
3632 if (ss->fork || ss->exit)
3633 return -EINVAL;
3634
3635 /*
3636 * an optionally modular subsystem is built-in: we want to do nothing,
3637 * since cgroup_init_subsys will have already taken care of it.
3638 */
3639 if (ss->module == NULL) {
3640 /* a few sanity checks */
3641 BUG_ON(ss->subsys_id >= CGROUP_BUILTIN_SUBSYS_COUNT);
3642 BUG_ON(subsys[ss->subsys_id] != ss);
3643 return 0;
3644 }
3645
3646 /*
3647 * need to register a subsys id before anything else - for example,
3648 * init_cgroup_css needs it.
3649 */
3650 mutex_lock(&cgroup_mutex);
3651 /* find the first empty slot in the array */
3652 for (i = CGROUP_BUILTIN_SUBSYS_COUNT; i < CGROUP_SUBSYS_COUNT; i++) {
3653 if (subsys[i] == NULL)
3654 break;
3655 }
3656 if (i == CGROUP_SUBSYS_COUNT) {
3657 /* maximum number of subsystems already registered! */
3658 mutex_unlock(&cgroup_mutex);
3659 return -EBUSY;
3660 }
3661 /* assign ourselves the subsys_id */
3662 ss->subsys_id = i;
3663 subsys[i] = ss;
3664
3665 /*
3666 * no ss->create seems to need anything important in the ss struct, so
3667 * this can happen first (i.e. before the rootnode attachment).
3668 */
3669 css = ss->create(ss, dummytop);
3670 if (IS_ERR(css)) {
3671 /* failure case - need to deassign the subsys[] slot. */
3672 subsys[i] = NULL;
3673 mutex_unlock(&cgroup_mutex);
3674 return PTR_ERR(css);
3675 }
3676
3677 list_add(&ss->sibling, &rootnode.subsys_list);
3678 ss->root = &rootnode;
3679
3680 /* our new subsystem will be attached to the dummy hierarchy. */
3681 init_cgroup_css(css, ss, dummytop);
3682 /* init_idr must be after init_cgroup_css because it sets css->id. */
3683 if (ss->use_id) {
3684 int ret = cgroup_init_idr(ss, css);
3685 if (ret) {
3686 dummytop->subsys[ss->subsys_id] = NULL;
3687 ss->destroy(ss, dummytop);
3688 subsys[i] = NULL;
3689 mutex_unlock(&cgroup_mutex);
3690 return ret;
3691 }
3692 }
3693
3694 /*
3695 * Now we need to entangle the css into the existing css_sets. unlike
3696 * in cgroup_init_subsys, there are now multiple css_sets, so each one
3697 * will need a new pointer to it; done by iterating the css_set_table.
3698 * furthermore, modifying the existing css_sets will corrupt the hash
3699 * table state, so each changed css_set will need its hash recomputed.
3700 * this is all done under the css_set_lock.
3701 */
3702 write_lock(&css_set_lock);
3703 for (i = 0; i < CSS_SET_TABLE_SIZE; i++) {
3704 struct css_set *cg;
3705 struct hlist_node *node, *tmp;
3706 struct hlist_head *bucket = &css_set_table[i], *new_bucket;
3707
3708 hlist_for_each_entry_safe(cg, node, tmp, bucket, hlist) {
3709 /* skip entries that we already rehashed */
3710 if (cg->subsys[ss->subsys_id])
3711 continue;
3712 /* remove existing entry */
3713 hlist_del(&cg->hlist);
3714 /* set new value */
3715 cg->subsys[ss->subsys_id] = css;
3716 /* recompute hash and restore entry */
3717 new_bucket = css_set_hash(cg->subsys);
3718 hlist_add_head(&cg->hlist, new_bucket);
3719 }
3720 }
3721 write_unlock(&css_set_lock);
3722
3723 mutex_init(&ss->hierarchy_mutex);
3724 lockdep_set_class(&ss->hierarchy_mutex, &ss->subsys_key);
3725 ss->active = 1;
3726
3727 /* success! */
3728 mutex_unlock(&cgroup_mutex);
3729 return 0;
3730}
3731EXPORT_SYMBOL_GPL(cgroup_load_subsys);
3732
3733/**
3734 * cgroup_unload_subsys: unload a modular subsystem
3735 * @ss: the subsystem to unload
3736 *
3737 * This function should be called in a modular subsystem's exitcall. When this
3738 * function is invoked, the refcount on the subsystem's module will be 0, so
3739 * the subsystem will not be attached to any hierarchy.
3740 */
3741void cgroup_unload_subsys(struct cgroup_subsys *ss)
3742{
3743 struct cg_cgroup_link *link;
3744 struct hlist_head *hhead;
3745
3746 BUG_ON(ss->module == NULL);
3747
3748 /*
3749 * we shouldn't be called if the subsystem is in use, and the use of
3750 * try_module_get in parse_cgroupfs_options should ensure that it
3751 * doesn't start being used while we're killing it off.
3752 */
3753 BUG_ON(ss->root != &rootnode);
3754
3755 mutex_lock(&cgroup_mutex);
3756 /* deassign the subsys_id */
3757 BUG_ON(ss->subsys_id < CGROUP_BUILTIN_SUBSYS_COUNT);
3758 subsys[ss->subsys_id] = NULL;
3759
3760 /* remove subsystem from rootnode's list of subsystems */
3761 list_del(&ss->sibling);
3762
3763 /*
3764 * disentangle the css from all css_sets attached to the dummytop. as
3765 * in loading, we need to pay our respects to the hashtable gods.
3766 */
3767 write_lock(&css_set_lock);
3768 list_for_each_entry(link, &dummytop->css_sets, cgrp_link_list) {
3769 struct css_set *cg = link->cg;
3770
3771 hlist_del(&cg->hlist);
3772 BUG_ON(!cg->subsys[ss->subsys_id]);
3773 cg->subsys[ss->subsys_id] = NULL;
3774 hhead = css_set_hash(cg->subsys);
3775 hlist_add_head(&cg->hlist, hhead);
3776 }
3777 write_unlock(&css_set_lock);
3778
3779 /*
3780 * remove subsystem's css from the dummytop and free it - need to free
3781 * before marking as null because ss->destroy needs the cgrp->subsys
3782 * pointer to find their state. note that this also takes care of
3783 * freeing the css_id.
3784 */
3785 ss->destroy(ss, dummytop);
3786 dummytop->subsys[ss->subsys_id] = NULL;
3787
3788 mutex_unlock(&cgroup_mutex);
3789}
3790EXPORT_SYMBOL_GPL(cgroup_unload_subsys);
3791
3792/**
3229 * cgroup_init_early - cgroup initialization at system boot 3793 * cgroup_init_early - cgroup initialization at system boot
3230 * 3794 *
3231 * Initialize cgroups at system boot, and initialize any 3795 * Initialize cgroups at system boot, and initialize any
@@ -3253,7 +3817,8 @@ int __init cgroup_init_early(void)
3253 for (i = 0; i < CSS_SET_TABLE_SIZE; i++) 3817 for (i = 0; i < CSS_SET_TABLE_SIZE; i++)
3254 INIT_HLIST_HEAD(&css_set_table[i]); 3818 INIT_HLIST_HEAD(&css_set_table[i]);
3255 3819
3256 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { 3820 /* at bootup time, we don't worry about modular subsystems */
3821 for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) {
3257 struct cgroup_subsys *ss = subsys[i]; 3822 struct cgroup_subsys *ss = subsys[i];
3258 3823
3259 BUG_ON(!ss->name); 3824 BUG_ON(!ss->name);
@@ -3288,12 +3853,13 @@ int __init cgroup_init(void)
3288 if (err) 3853 if (err)
3289 return err; 3854 return err;
3290 3855
3291 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { 3856 /* at bootup time, we don't worry about modular subsystems */
3857 for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) {
3292 struct cgroup_subsys *ss = subsys[i]; 3858 struct cgroup_subsys *ss = subsys[i];
3293 if (!ss->early_init) 3859 if (!ss->early_init)
3294 cgroup_init_subsys(ss); 3860 cgroup_init_subsys(ss);
3295 if (ss->use_id) 3861 if (ss->use_id)
3296 cgroup_subsys_init_idr(ss); 3862 cgroup_init_idr(ss, init_css_set.subsys[ss->subsys_id]);
3297 } 3863 }
3298 3864
3299 /* Add init_css_set to the hash table */ 3865 /* Add init_css_set to the hash table */
@@ -3397,9 +3963,16 @@ static int proc_cgroupstats_show(struct seq_file *m, void *v)
3397 int i; 3963 int i;
3398 3964
3399 seq_puts(m, "#subsys_name\thierarchy\tnum_cgroups\tenabled\n"); 3965 seq_puts(m, "#subsys_name\thierarchy\tnum_cgroups\tenabled\n");
3966 /*
3967 * ideally we don't want subsystems moving around while we do this.
3968 * cgroup_mutex is also necessary to guarantee an atomic snapshot of
3969 * subsys/hierarchy state.
3970 */
3400 mutex_lock(&cgroup_mutex); 3971 mutex_lock(&cgroup_mutex);
3401 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { 3972 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
3402 struct cgroup_subsys *ss = subsys[i]; 3973 struct cgroup_subsys *ss = subsys[i];
3974 if (ss == NULL)
3975 continue;
3403 seq_printf(m, "%s\t%d\t%d\t%d\n", 3976 seq_printf(m, "%s\t%d\t%d\t%d\n",
3404 ss->name, ss->root->hierarchy_id, 3977 ss->name, ss->root->hierarchy_id,
3405 ss->root->number_of_cgroups, !ss->disabled); 3978 ss->root->number_of_cgroups, !ss->disabled);
@@ -3457,7 +4030,12 @@ void cgroup_fork_callbacks(struct task_struct *child)
3457{ 4030{
3458 if (need_forkexit_callback) { 4031 if (need_forkexit_callback) {
3459 int i; 4032 int i;
3460 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { 4033 /*
4034 * forkexit callbacks are only supported for builtin
4035 * subsystems, and the builtin section of the subsys array is
4036 * immutable, so we don't need to lock the subsys array here.
4037 */
4038 for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) {
3461 struct cgroup_subsys *ss = subsys[i]; 4039 struct cgroup_subsys *ss = subsys[i];
3462 if (ss->fork) 4040 if (ss->fork)
3463 ss->fork(ss, child); 4041 ss->fork(ss, child);
@@ -3526,7 +4104,11 @@ void cgroup_exit(struct task_struct *tsk, int run_callbacks)
3526 struct css_set *cg; 4104 struct css_set *cg;
3527 4105
3528 if (run_callbacks && need_forkexit_callback) { 4106 if (run_callbacks && need_forkexit_callback) {
3529 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { 4107 /*
4108 * modular subsystems can't use callbacks, so no need to lock
4109 * the subsys array
4110 */
4111 for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) {
3530 struct cgroup_subsys *ss = subsys[i]; 4112 struct cgroup_subsys *ss = subsys[i];
3531 if (ss->exit) 4113 if (ss->exit)
3532 ss->exit(ss, tsk); 4114 ss->exit(ss, tsk);
@@ -3720,12 +4302,13 @@ static void check_for_release(struct cgroup *cgrp)
3720 } 4302 }
3721} 4303}
3722 4304
3723void __css_put(struct cgroup_subsys_state *css) 4305/* Caller must verify that the css is not for root cgroup */
4306void __css_put(struct cgroup_subsys_state *css, int count)
3724{ 4307{
3725 struct cgroup *cgrp = css->cgroup; 4308 struct cgroup *cgrp = css->cgroup;
3726 int val; 4309 int val;
3727 rcu_read_lock(); 4310 rcu_read_lock();
3728 val = atomic_dec_return(&css->refcnt); 4311 val = atomic_sub_return(count, &css->refcnt);
3729 if (val == 1) { 4312 if (val == 1) {
3730 if (notify_on_release(cgrp)) { 4313 if (notify_on_release(cgrp)) {
3731 set_bit(CGRP_RELEASABLE, &cgrp->flags); 4314 set_bit(CGRP_RELEASABLE, &cgrp->flags);
@@ -3736,6 +4319,7 @@ void __css_put(struct cgroup_subsys_state *css)
3736 rcu_read_unlock(); 4319 rcu_read_unlock();
3737 WARN_ON_ONCE(val < 1); 4320 WARN_ON_ONCE(val < 1);
3738} 4321}
4322EXPORT_SYMBOL_GPL(__css_put);
3739 4323
3740/* 4324/*
3741 * Notify userspace when a cgroup is released, by running the 4325 * Notify userspace when a cgroup is released, by running the
@@ -3817,8 +4401,11 @@ static int __init cgroup_disable(char *str)
3817 while ((token = strsep(&str, ",")) != NULL) { 4401 while ((token = strsep(&str, ",")) != NULL) {
3818 if (!*token) 4402 if (!*token)
3819 continue; 4403 continue;
3820 4404 /*
3821 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { 4405 * cgroup_disable, being at boot time, can't know about module
4406 * subsystems, so we don't worry about them.
4407 */
4408 for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) {
3822 struct cgroup_subsys *ss = subsys[i]; 4409 struct cgroup_subsys *ss = subsys[i];
3823 4410
3824 if (!strcmp(token, ss->name)) { 4411 if (!strcmp(token, ss->name)) {
@@ -3848,6 +4435,7 @@ unsigned short css_id(struct cgroup_subsys_state *css)
3848 return cssid->id; 4435 return cssid->id;
3849 return 0; 4436 return 0;
3850} 4437}
4438EXPORT_SYMBOL_GPL(css_id);
3851 4439
3852unsigned short css_depth(struct cgroup_subsys_state *css) 4440unsigned short css_depth(struct cgroup_subsys_state *css)
3853{ 4441{
@@ -3857,6 +4445,7 @@ unsigned short css_depth(struct cgroup_subsys_state *css)
3857 return cssid->depth; 4445 return cssid->depth;
3858 return 0; 4446 return 0;
3859} 4447}
4448EXPORT_SYMBOL_GPL(css_depth);
3860 4449
3861bool css_is_ancestor(struct cgroup_subsys_state *child, 4450bool css_is_ancestor(struct cgroup_subsys_state *child,
3862 const struct cgroup_subsys_state *root) 4451 const struct cgroup_subsys_state *root)
@@ -3893,6 +4482,7 @@ void free_css_id(struct cgroup_subsys *ss, struct cgroup_subsys_state *css)
3893 spin_unlock(&ss->id_lock); 4482 spin_unlock(&ss->id_lock);
3894 call_rcu(&id->rcu_head, __free_css_id_cb); 4483 call_rcu(&id->rcu_head, __free_css_id_cb);
3895} 4484}
4485EXPORT_SYMBOL_GPL(free_css_id);
3896 4486
3897/* 4487/*
3898 * This is called by init or create(). Then, calls to this function are 4488 * This is called by init or create(). Then, calls to this function are
@@ -3942,15 +4532,14 @@ err_out:
3942 4532
3943} 4533}
3944 4534
3945static int __init cgroup_subsys_init_idr(struct cgroup_subsys *ss) 4535static int __init_or_module cgroup_init_idr(struct cgroup_subsys *ss,
4536 struct cgroup_subsys_state *rootcss)
3946{ 4537{
3947 struct css_id *newid; 4538 struct css_id *newid;
3948 struct cgroup_subsys_state *rootcss;
3949 4539
3950 spin_lock_init(&ss->id_lock); 4540 spin_lock_init(&ss->id_lock);
3951 idr_init(&ss->idr); 4541 idr_init(&ss->idr);
3952 4542
3953 rootcss = init_css_set.subsys[ss->subsys_id];
3954 newid = get_new_cssid(ss, 0); 4543 newid = get_new_cssid(ss, 0);
3955 if (IS_ERR(newid)) 4544 if (IS_ERR(newid))
3956 return PTR_ERR(newid); 4545 return PTR_ERR(newid);
@@ -4010,6 +4599,7 @@ struct cgroup_subsys_state *css_lookup(struct cgroup_subsys *ss, int id)
4010 4599
4011 return rcu_dereference(cssid->css); 4600 return rcu_dereference(cssid->css);
4012} 4601}
4602EXPORT_SYMBOL_GPL(css_lookup);
4013 4603
4014/** 4604/**
4015 * css_get_next - lookup next cgroup under specified hierarchy. 4605 * css_get_next - lookup next cgroup under specified hierarchy.
diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c
index 59e9ef6aab40..e5c0244962b0 100644
--- a/kernel/cgroup_freezer.c
+++ b/kernel/cgroup_freezer.c
@@ -15,6 +15,7 @@
15 */ 15 */
16 16
17#include <linux/module.h> 17#include <linux/module.h>
18#include <linux/slab.h>
18#include <linux/cgroup.h> 19#include <linux/cgroup.h>
19#include <linux/fs.h> 20#include <linux/fs.h>
20#include <linux/uaccess.h> 21#include <linux/uaccess.h>
@@ -47,17 +48,20 @@ static inline struct freezer *task_freezer(struct task_struct *task)
47 struct freezer, css); 48 struct freezer, css);
48} 49}
49 50
50int cgroup_frozen(struct task_struct *task) 51int cgroup_freezing_or_frozen(struct task_struct *task)
51{ 52{
52 struct freezer *freezer; 53 struct freezer *freezer;
53 enum freezer_state state; 54 enum freezer_state state;
54 55
55 task_lock(task); 56 task_lock(task);
56 freezer = task_freezer(task); 57 freezer = task_freezer(task);
57 state = freezer->state; 58 if (!freezer->css.cgroup->parent)
59 state = CGROUP_THAWED; /* root cgroup can't be frozen */
60 else
61 state = freezer->state;
58 task_unlock(task); 62 task_unlock(task);
59 63
60 return state == CGROUP_FROZEN; 64 return (state == CGROUP_FREEZING) || (state == CGROUP_FROZEN);
61} 65}
62 66
63/* 67/*
@@ -201,9 +205,12 @@ static void freezer_fork(struct cgroup_subsys *ss, struct task_struct *task)
201 * No lock is needed, since the task isn't on tasklist yet, 205 * No lock is needed, since the task isn't on tasklist yet,
202 * so it can't be moved to another cgroup, which means the 206 * so it can't be moved to another cgroup, which means the
203 * freezer won't be removed and will be valid during this 207 * freezer won't be removed and will be valid during this
204 * function call. 208 * function call. Nevertheless, apply RCU read-side critical
209 * section to suppress RCU lockdep false positives.
205 */ 210 */
211 rcu_read_lock();
206 freezer = task_freezer(task); 212 freezer = task_freezer(task);
213 rcu_read_unlock();
207 214
208 /* 215 /*
209 * The root cgroup is non-freezable, so we can skip the 216 * The root cgroup is non-freezable, so we can skip the
diff --git a/kernel/compat.c b/kernel/compat.c
index f6c204f07ea6..7f40e9275fd9 100644
--- a/kernel/compat.c
+++ b/kernel/compat.c
@@ -25,6 +25,7 @@
25#include <linux/posix-timers.h> 25#include <linux/posix-timers.h>
26#include <linux/times.h> 26#include <linux/times.h>
27#include <linux/ptrace.h> 27#include <linux/ptrace.h>
28#include <linux/gfp.h>
28 29
29#include <asm/uaccess.h> 30#include <asm/uaccess.h>
30 31
diff --git a/kernel/cpu.c b/kernel/cpu.c
index f8cced2692b3..25bba73b1be3 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -14,6 +14,7 @@
14#include <linux/kthread.h> 14#include <linux/kthread.h>
15#include <linux/stop_machine.h> 15#include <linux/stop_machine.h>
16#include <linux/mutex.h> 16#include <linux/mutex.h>
17#include <linux/gfp.h>
17 18
18#ifdef CONFIG_SMP 19#ifdef CONFIG_SMP
19/* Serializes the updates to cpu_online_mask, cpu_present_mask */ 20/* Serializes the updates to cpu_online_mask, cpu_present_mask */
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index ba401fab459f..d10946748ec2 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -920,9 +920,6 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs,
920 * call to guarantee_online_mems(), as we know no one is changing 920 * call to guarantee_online_mems(), as we know no one is changing
921 * our task's cpuset. 921 * our task's cpuset.
922 * 922 *
923 * Hold callback_mutex around the two modifications of our tasks
924 * mems_allowed to synchronize with cpuset_mems_allowed().
925 *
926 * While the mm_struct we are migrating is typically from some 923 * While the mm_struct we are migrating is typically from some
927 * other task, the task_struct mems_allowed that we are hacking 924 * other task, the task_struct mems_allowed that we are hacking
928 * is for our current task, which must allocate new pages for that 925 * is for our current task, which must allocate new pages for that
@@ -973,15 +970,20 @@ static void cpuset_change_nodemask(struct task_struct *p,
973 struct cpuset *cs; 970 struct cpuset *cs;
974 int migrate; 971 int migrate;
975 const nodemask_t *oldmem = scan->data; 972 const nodemask_t *oldmem = scan->data;
976 nodemask_t newmems; 973 NODEMASK_ALLOC(nodemask_t, newmems, GFP_KERNEL);
974
975 if (!newmems)
976 return;
977 977
978 cs = cgroup_cs(scan->cg); 978 cs = cgroup_cs(scan->cg);
979 guarantee_online_mems(cs, &newmems); 979 guarantee_online_mems(cs, newmems);
980 980
981 task_lock(p); 981 task_lock(p);
982 cpuset_change_task_nodemask(p, &newmems); 982 cpuset_change_task_nodemask(p, newmems);
983 task_unlock(p); 983 task_unlock(p);
984 984
985 NODEMASK_FREE(newmems);
986
985 mm = get_task_mm(p); 987 mm = get_task_mm(p);
986 if (!mm) 988 if (!mm)
987 return; 989 return;
@@ -1051,16 +1053,21 @@ static void update_tasks_nodemask(struct cpuset *cs, const nodemask_t *oldmem,
1051static int update_nodemask(struct cpuset *cs, struct cpuset *trialcs, 1053static int update_nodemask(struct cpuset *cs, struct cpuset *trialcs,
1052 const char *buf) 1054 const char *buf)
1053{ 1055{
1054 nodemask_t oldmem; 1056 NODEMASK_ALLOC(nodemask_t, oldmem, GFP_KERNEL);
1055 int retval; 1057 int retval;
1056 struct ptr_heap heap; 1058 struct ptr_heap heap;
1057 1059
1060 if (!oldmem)
1061 return -ENOMEM;
1062
1058 /* 1063 /*
1059 * top_cpuset.mems_allowed tracks node_stats[N_HIGH_MEMORY]; 1064 * top_cpuset.mems_allowed tracks node_stats[N_HIGH_MEMORY];
1060 * it's read-only 1065 * it's read-only
1061 */ 1066 */
1062 if (cs == &top_cpuset) 1067 if (cs == &top_cpuset) {
1063 return -EACCES; 1068 retval = -EACCES;
1069 goto done;
1070 }
1064 1071
1065 /* 1072 /*
1066 * An empty mems_allowed is ok iff there are no tasks in the cpuset. 1073 * An empty mems_allowed is ok iff there are no tasks in the cpuset.
@@ -1076,11 +1083,13 @@ static int update_nodemask(struct cpuset *cs, struct cpuset *trialcs,
1076 goto done; 1083 goto done;
1077 1084
1078 if (!nodes_subset(trialcs->mems_allowed, 1085 if (!nodes_subset(trialcs->mems_allowed,
1079 node_states[N_HIGH_MEMORY])) 1086 node_states[N_HIGH_MEMORY])) {
1080 return -EINVAL; 1087 retval = -EINVAL;
1088 goto done;
1089 }
1081 } 1090 }
1082 oldmem = cs->mems_allowed; 1091 *oldmem = cs->mems_allowed;
1083 if (nodes_equal(oldmem, trialcs->mems_allowed)) { 1092 if (nodes_equal(*oldmem, trialcs->mems_allowed)) {
1084 retval = 0; /* Too easy - nothing to do */ 1093 retval = 0; /* Too easy - nothing to do */
1085 goto done; 1094 goto done;
1086 } 1095 }
@@ -1096,10 +1105,11 @@ static int update_nodemask(struct cpuset *cs, struct cpuset *trialcs,
1096 cs->mems_allowed = trialcs->mems_allowed; 1105 cs->mems_allowed = trialcs->mems_allowed;
1097 mutex_unlock(&callback_mutex); 1106 mutex_unlock(&callback_mutex);
1098 1107
1099 update_tasks_nodemask(cs, &oldmem, &heap); 1108 update_tasks_nodemask(cs, oldmem, &heap);
1100 1109
1101 heap_free(&heap); 1110 heap_free(&heap);
1102done: 1111done:
1112 NODEMASK_FREE(oldmem);
1103 return retval; 1113 return retval;
1104} 1114}
1105 1115
@@ -1384,40 +1394,47 @@ static void cpuset_attach(struct cgroup_subsys *ss, struct cgroup *cont,
1384 struct cgroup *oldcont, struct task_struct *tsk, 1394 struct cgroup *oldcont, struct task_struct *tsk,
1385 bool threadgroup) 1395 bool threadgroup)
1386{ 1396{
1387 nodemask_t from, to;
1388 struct mm_struct *mm; 1397 struct mm_struct *mm;
1389 struct cpuset *cs = cgroup_cs(cont); 1398 struct cpuset *cs = cgroup_cs(cont);
1390 struct cpuset *oldcs = cgroup_cs(oldcont); 1399 struct cpuset *oldcs = cgroup_cs(oldcont);
1400 NODEMASK_ALLOC(nodemask_t, from, GFP_KERNEL);
1401 NODEMASK_ALLOC(nodemask_t, to, GFP_KERNEL);
1402
1403 if (from == NULL || to == NULL)
1404 goto alloc_fail;
1391 1405
1392 if (cs == &top_cpuset) { 1406 if (cs == &top_cpuset) {
1393 cpumask_copy(cpus_attach, cpu_possible_mask); 1407 cpumask_copy(cpus_attach, cpu_possible_mask);
1394 to = node_possible_map;
1395 } else { 1408 } else {
1396 guarantee_online_cpus(cs, cpus_attach); 1409 guarantee_online_cpus(cs, cpus_attach);
1397 guarantee_online_mems(cs, &to);
1398 } 1410 }
1411 guarantee_online_mems(cs, to);
1399 1412
1400 /* do per-task migration stuff possibly for each in the threadgroup */ 1413 /* do per-task migration stuff possibly for each in the threadgroup */
1401 cpuset_attach_task(tsk, &to, cs); 1414 cpuset_attach_task(tsk, to, cs);
1402 if (threadgroup) { 1415 if (threadgroup) {
1403 struct task_struct *c; 1416 struct task_struct *c;
1404 rcu_read_lock(); 1417 rcu_read_lock();
1405 list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) { 1418 list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) {
1406 cpuset_attach_task(c, &to, cs); 1419 cpuset_attach_task(c, to, cs);
1407 } 1420 }
1408 rcu_read_unlock(); 1421 rcu_read_unlock();
1409 } 1422 }
1410 1423
1411 /* change mm; only needs to be done once even if threadgroup */ 1424 /* change mm; only needs to be done once even if threadgroup */
1412 from = oldcs->mems_allowed; 1425 *from = oldcs->mems_allowed;
1413 to = cs->mems_allowed; 1426 *to = cs->mems_allowed;
1414 mm = get_task_mm(tsk); 1427 mm = get_task_mm(tsk);
1415 if (mm) { 1428 if (mm) {
1416 mpol_rebind_mm(mm, &to); 1429 mpol_rebind_mm(mm, to);
1417 if (is_memory_migrate(cs)) 1430 if (is_memory_migrate(cs))
1418 cpuset_migrate_mm(mm, &from, &to); 1431 cpuset_migrate_mm(mm, from, to);
1419 mmput(mm); 1432 mmput(mm);
1420 } 1433 }
1434
1435alloc_fail:
1436 NODEMASK_FREE(from);
1437 NODEMASK_FREE(to);
1421} 1438}
1422 1439
1423/* The various types of files and directories in a cpuset file system */ 1440/* The various types of files and directories in a cpuset file system */
@@ -1562,13 +1579,21 @@ static int cpuset_sprintf_cpulist(char *page, struct cpuset *cs)
1562 1579
1563static int cpuset_sprintf_memlist(char *page, struct cpuset *cs) 1580static int cpuset_sprintf_memlist(char *page, struct cpuset *cs)
1564{ 1581{
1565 nodemask_t mask; 1582 NODEMASK_ALLOC(nodemask_t, mask, GFP_KERNEL);
1583 int retval;
1584
1585 if (mask == NULL)
1586 return -ENOMEM;
1566 1587
1567 mutex_lock(&callback_mutex); 1588 mutex_lock(&callback_mutex);
1568 mask = cs->mems_allowed; 1589 *mask = cs->mems_allowed;
1569 mutex_unlock(&callback_mutex); 1590 mutex_unlock(&callback_mutex);
1570 1591
1571 return nodelist_scnprintf(page, PAGE_SIZE, mask); 1592 retval = nodelist_scnprintf(page, PAGE_SIZE, *mask);
1593
1594 NODEMASK_FREE(mask);
1595
1596 return retval;
1572} 1597}
1573 1598
1574static ssize_t cpuset_common_file_read(struct cgroup *cont, 1599static ssize_t cpuset_common_file_read(struct cgroup *cont,
@@ -1997,7 +2022,10 @@ static void scan_for_empty_cpusets(struct cpuset *root)
1997 struct cpuset *cp; /* scans cpusets being updated */ 2022 struct cpuset *cp; /* scans cpusets being updated */
1998 struct cpuset *child; /* scans child cpusets of cp */ 2023 struct cpuset *child; /* scans child cpusets of cp */
1999 struct cgroup *cont; 2024 struct cgroup *cont;
2000 nodemask_t oldmems; 2025 NODEMASK_ALLOC(nodemask_t, oldmems, GFP_KERNEL);
2026
2027 if (oldmems == NULL)
2028 return;
2001 2029
2002 list_add_tail((struct list_head *)&root->stack_list, &queue); 2030 list_add_tail((struct list_head *)&root->stack_list, &queue);
2003 2031
@@ -2014,7 +2042,7 @@ static void scan_for_empty_cpusets(struct cpuset *root)
2014 nodes_subset(cp->mems_allowed, node_states[N_HIGH_MEMORY])) 2042 nodes_subset(cp->mems_allowed, node_states[N_HIGH_MEMORY]))
2015 continue; 2043 continue;
2016 2044
2017 oldmems = cp->mems_allowed; 2045 *oldmems = cp->mems_allowed;
2018 2046
2019 /* Remove offline cpus and mems from this cpuset. */ 2047 /* Remove offline cpus and mems from this cpuset. */
2020 mutex_lock(&callback_mutex); 2048 mutex_lock(&callback_mutex);
@@ -2030,9 +2058,10 @@ static void scan_for_empty_cpusets(struct cpuset *root)
2030 remove_tasks_in_empty_cpuset(cp); 2058 remove_tasks_in_empty_cpuset(cp);
2031 else { 2059 else {
2032 update_tasks_cpumask(cp, NULL); 2060 update_tasks_cpumask(cp, NULL);
2033 update_tasks_nodemask(cp, &oldmems, NULL); 2061 update_tasks_nodemask(cp, oldmems, NULL);
2034 } 2062 }
2035 } 2063 }
2064 NODEMASK_FREE(oldmems);
2036} 2065}
2037 2066
2038/* 2067/*
@@ -2090,20 +2119,33 @@ static int cpuset_track_online_cpus(struct notifier_block *unused_nb,
2090static int cpuset_track_online_nodes(struct notifier_block *self, 2119static int cpuset_track_online_nodes(struct notifier_block *self,
2091 unsigned long action, void *arg) 2120 unsigned long action, void *arg)
2092{ 2121{
2122 NODEMASK_ALLOC(nodemask_t, oldmems, GFP_KERNEL);
2123
2124 if (oldmems == NULL)
2125 return NOTIFY_DONE;
2126
2093 cgroup_lock(); 2127 cgroup_lock();
2094 switch (action) { 2128 switch (action) {
2095 case MEM_ONLINE: 2129 case MEM_ONLINE:
2096 case MEM_OFFLINE: 2130 *oldmems = top_cpuset.mems_allowed;
2097 mutex_lock(&callback_mutex); 2131 mutex_lock(&callback_mutex);
2098 top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY]; 2132 top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY];
2099 mutex_unlock(&callback_mutex); 2133 mutex_unlock(&callback_mutex);
2100 if (action == MEM_OFFLINE) 2134 update_tasks_nodemask(&top_cpuset, oldmems, NULL);
2101 scan_for_empty_cpusets(&top_cpuset); 2135 break;
2136 case MEM_OFFLINE:
2137 /*
2138 * needn't update top_cpuset.mems_allowed explicitly because
2139 * scan_for_empty_cpusets() will update it.
2140 */
2141 scan_for_empty_cpusets(&top_cpuset);
2102 break; 2142 break;
2103 default: 2143 default:
2104 break; 2144 break;
2105 } 2145 }
2106 cgroup_unlock(); 2146 cgroup_unlock();
2147
2148 NODEMASK_FREE(oldmems);
2107 return NOTIFY_OK; 2149 return NOTIFY_OK;
2108} 2150}
2109#endif 2151#endif
diff --git a/kernel/cred.c b/kernel/cred.c
index 1ed8ca18790c..62af1816c235 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -10,6 +10,7 @@
10 */ 10 */
11#include <linux/module.h> 11#include <linux/module.h>
12#include <linux/cred.h> 12#include <linux/cred.h>
13#include <linux/slab.h>
13#include <linux/sched.h> 14#include <linux/sched.h>
14#include <linux/key.h> 15#include <linux/key.h>
15#include <linux/keyctl.h> 16#include <linux/keyctl.h>
@@ -364,7 +365,7 @@ struct cred *prepare_usermodehelper_creds(void)
364 365
365 new = kmem_cache_alloc(cred_jar, GFP_ATOMIC); 366 new = kmem_cache_alloc(cred_jar, GFP_ATOMIC);
366 if (!new) 367 if (!new)
367 return NULL; 368 goto free_tgcred;
368 369
369 kdebug("prepare_usermodehelper_creds() alloc %p", new); 370 kdebug("prepare_usermodehelper_creds() alloc %p", new);
370 371
@@ -398,6 +399,12 @@ struct cred *prepare_usermodehelper_creds(void)
398error: 399error:
399 put_cred(new); 400 put_cred(new);
400 return NULL; 401 return NULL;
402
403free_tgcred:
404#ifdef CONFIG_KEYS
405 kfree(tgcred);
406#endif
407 return NULL;
401} 408}
402 409
403/* 410/*
@@ -786,8 +793,6 @@ bool creds_are_invalid(const struct cred *cred)
786{ 793{
787 if (cred->magic != CRED_MAGIC) 794 if (cred->magic != CRED_MAGIC)
788 return true; 795 return true;
789 if (atomic_read(&cred->usage) < atomic_read(&cred->subscribers))
790 return true;
791#ifdef CONFIG_SECURITY_SELINUX 796#ifdef CONFIG_SECURITY_SELINUX
792 if (selinux_is_enabled()) { 797 if (selinux_is_enabled()) {
793 if ((unsigned long) cred->security < PAGE_SIZE) 798 if ((unsigned long) cred->security < PAGE_SIZE)
diff --git a/kernel/early_res.c b/kernel/early_res.c
index 3cb2c661bb78..31aa9332ef3f 100644
--- a/kernel/early_res.c
+++ b/kernel/early_res.c
@@ -333,6 +333,12 @@ void __init free_early_partial(u64 start, u64 end)
333 struct early_res *r; 333 struct early_res *r;
334 int i; 334 int i;
335 335
336 if (start == end)
337 return;
338
339 if (WARN_ONCE(start > end, " wrong range [%#llx, %#llx]\n", start, end))
340 return;
341
336try_next: 342try_next:
337 i = find_overlapped_early(start, end); 343 i = find_overlapped_early(start, end);
338 if (i >= max_early_res) 344 if (i >= max_early_res)
diff --git a/kernel/exit.c b/kernel/exit.c
index ce1e48c2d93d..7f2683a10ac4 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -87,7 +87,7 @@ static void __exit_signal(struct task_struct *tsk)
87 87
88 sighand = rcu_dereference_check(tsk->sighand, 88 sighand = rcu_dereference_check(tsk->sighand,
89 rcu_read_lock_held() || 89 rcu_read_lock_held() ||
90 lockdep_is_held(&tasklist_lock)); 90 lockdep_tasklist_lock_is_held());
91 spin_lock(&sighand->siglock); 91 spin_lock(&sighand->siglock);
92 92
93 posix_cpu_timers_exit(tsk); 93 posix_cpu_timers_exit(tsk);
@@ -953,7 +953,8 @@ NORET_TYPE void do_exit(long code)
953 953
954 acct_update_integrals(tsk); 954 acct_update_integrals(tsk);
955 /* sync mm's RSS info before statistics gathering */ 955 /* sync mm's RSS info before statistics gathering */
956 sync_mm_rss(tsk, tsk->mm); 956 if (tsk->mm)
957 sync_mm_rss(tsk, tsk->mm);
957 group_dead = atomic_dec_and_test(&tsk->signal->live); 958 group_dead = atomic_dec_and_test(&tsk->signal->live);
958 if (group_dead) { 959 if (group_dead) {
959 hrtimer_cancel(&tsk->signal->real_timer); 960 hrtimer_cancel(&tsk->signal->real_timer);
diff --git a/kernel/fork.c b/kernel/fork.c
index b0ec34abc0bb..44b0791b0a2e 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -86,7 +86,14 @@ int max_threads; /* tunable limit on nr_threads */
86DEFINE_PER_CPU(unsigned long, process_counts) = 0; 86DEFINE_PER_CPU(unsigned long, process_counts) = 0;
87 87
88__cacheline_aligned DEFINE_RWLOCK(tasklist_lock); /* outer */ 88__cacheline_aligned DEFINE_RWLOCK(tasklist_lock); /* outer */
89EXPORT_SYMBOL_GPL(tasklist_lock); 89
90#ifdef CONFIG_PROVE_RCU
91int lockdep_tasklist_lock_is_held(void)
92{
93 return lockdep_is_held(&tasklist_lock);
94}
95EXPORT_SYMBOL_GPL(lockdep_tasklist_lock_is_held);
96#endif /* #ifdef CONFIG_PROVE_RCU */
90 97
91int nr_processes(void) 98int nr_processes(void)
92{ 99{
@@ -833,17 +840,6 @@ static void posix_cpu_timers_init_group(struct signal_struct *sig)
833 /* Thread group counters. */ 840 /* Thread group counters. */
834 thread_group_cputime_init(sig); 841 thread_group_cputime_init(sig);
835 842
836 /* Expiration times and increments. */
837 sig->it[CPUCLOCK_PROF].expires = cputime_zero;
838 sig->it[CPUCLOCK_PROF].incr = cputime_zero;
839 sig->it[CPUCLOCK_VIRT].expires = cputime_zero;
840 sig->it[CPUCLOCK_VIRT].incr = cputime_zero;
841
842 /* Cached expiration times. */
843 sig->cputime_expires.prof_exp = cputime_zero;
844 sig->cputime_expires.virt_exp = cputime_zero;
845 sig->cputime_expires.sched_exp = 0;
846
847 cpu_limit = ACCESS_ONCE(sig->rlim[RLIMIT_CPU].rlim_cur); 843 cpu_limit = ACCESS_ONCE(sig->rlim[RLIMIT_CPU].rlim_cur);
848 if (cpu_limit != RLIM_INFINITY) { 844 if (cpu_limit != RLIM_INFINITY) {
849 sig->cputime_expires.prof_exp = secs_to_cputime(cpu_limit); 845 sig->cputime_expires.prof_exp = secs_to_cputime(cpu_limit);
@@ -863,7 +859,7 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
863 if (clone_flags & CLONE_THREAD) 859 if (clone_flags & CLONE_THREAD)
864 return 0; 860 return 0;
865 861
866 sig = kmem_cache_alloc(signal_cachep, GFP_KERNEL); 862 sig = kmem_cache_zalloc(signal_cachep, GFP_KERNEL);
867 tsk->signal = sig; 863 tsk->signal = sig;
868 if (!sig) 864 if (!sig)
869 return -ENOMEM; 865 return -ENOMEM;
@@ -871,46 +867,21 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
871 atomic_set(&sig->count, 1); 867 atomic_set(&sig->count, 1);
872 atomic_set(&sig->live, 1); 868 atomic_set(&sig->live, 1);
873 init_waitqueue_head(&sig->wait_chldexit); 869 init_waitqueue_head(&sig->wait_chldexit);
874 sig->flags = 0;
875 if (clone_flags & CLONE_NEWPID) 870 if (clone_flags & CLONE_NEWPID)
876 sig->flags |= SIGNAL_UNKILLABLE; 871 sig->flags |= SIGNAL_UNKILLABLE;
877 sig->group_exit_code = 0;
878 sig->group_exit_task = NULL;
879 sig->group_stop_count = 0;
880 sig->curr_target = tsk; 872 sig->curr_target = tsk;
881 init_sigpending(&sig->shared_pending); 873 init_sigpending(&sig->shared_pending);
882 INIT_LIST_HEAD(&sig->posix_timers); 874 INIT_LIST_HEAD(&sig->posix_timers);
883 875
884 hrtimer_init(&sig->real_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); 876 hrtimer_init(&sig->real_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
885 sig->it_real_incr.tv64 = 0;
886 sig->real_timer.function = it_real_fn; 877 sig->real_timer.function = it_real_fn;
887 878
888 sig->leader = 0; /* session leadership doesn't inherit */
889 sig->tty_old_pgrp = NULL;
890 sig->tty = NULL;
891
892 sig->utime = sig->stime = sig->cutime = sig->cstime = cputime_zero;
893 sig->gtime = cputime_zero;
894 sig->cgtime = cputime_zero;
895#ifndef CONFIG_VIRT_CPU_ACCOUNTING
896 sig->prev_utime = sig->prev_stime = cputime_zero;
897#endif
898 sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0;
899 sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0;
900 sig->inblock = sig->oublock = sig->cinblock = sig->coublock = 0;
901 sig->maxrss = sig->cmaxrss = 0;
902 task_io_accounting_init(&sig->ioac);
903 sig->sum_sched_runtime = 0;
904 taskstats_tgid_init(sig);
905
906 task_lock(current->group_leader); 879 task_lock(current->group_leader);
907 memcpy(sig->rlim, current->signal->rlim, sizeof sig->rlim); 880 memcpy(sig->rlim, current->signal->rlim, sizeof sig->rlim);
908 task_unlock(current->group_leader); 881 task_unlock(current->group_leader);
909 882
910 posix_cpu_timers_init_group(sig); 883 posix_cpu_timers_init_group(sig);
911 884
912 acct_init_pacct(&sig->pacct);
913
914 tty_audit_fork(sig); 885 tty_audit_fork(sig);
915 886
916 sig->oom_adj = current->signal->oom_adj; 887 sig->oom_adj = current->signal->oom_adj;
@@ -1081,6 +1052,9 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1081 p->prev_utime = cputime_zero; 1052 p->prev_utime = cputime_zero;
1082 p->prev_stime = cputime_zero; 1053 p->prev_stime = cputime_zero;
1083#endif 1054#endif
1055#if defined(SPLIT_RSS_COUNTING)
1056 memset(&p->rss_stat, 0, sizeof(p->rss_stat));
1057#endif
1084 1058
1085 p->default_timer_slack_ns = current->timer_slack_ns; 1059 p->default_timer_slack_ns = current->timer_slack_ns;
1086 1060
diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c
index 967e66143e11..03808ed342a6 100644
--- a/kernel/hw_breakpoint.c
+++ b/kernel/hw_breakpoint.c
@@ -413,17 +413,17 @@ EXPORT_SYMBOL_GPL(unregister_hw_breakpoint);
413 * 413 *
414 * @return a set of per_cpu pointers to perf events 414 * @return a set of per_cpu pointers to perf events
415 */ 415 */
416struct perf_event ** 416struct perf_event * __percpu *
417register_wide_hw_breakpoint(struct perf_event_attr *attr, 417register_wide_hw_breakpoint(struct perf_event_attr *attr,
418 perf_overflow_handler_t triggered) 418 perf_overflow_handler_t triggered)
419{ 419{
420 struct perf_event **cpu_events, **pevent, *bp; 420 struct perf_event * __percpu *cpu_events, **pevent, *bp;
421 long err; 421 long err;
422 int cpu; 422 int cpu;
423 423
424 cpu_events = alloc_percpu(typeof(*cpu_events)); 424 cpu_events = alloc_percpu(typeof(*cpu_events));
425 if (!cpu_events) 425 if (!cpu_events)
426 return ERR_PTR(-ENOMEM); 426 return (void __percpu __force *)ERR_PTR(-ENOMEM);
427 427
428 get_online_cpus(); 428 get_online_cpus();
429 for_each_online_cpu(cpu) { 429 for_each_online_cpu(cpu) {
@@ -451,7 +451,7 @@ fail:
451 put_online_cpus(); 451 put_online_cpus();
452 452
453 free_percpu(cpu_events); 453 free_percpu(cpu_events);
454 return ERR_PTR(err); 454 return (void __percpu __force *)ERR_PTR(err);
455} 455}
456EXPORT_SYMBOL_GPL(register_wide_hw_breakpoint); 456EXPORT_SYMBOL_GPL(register_wide_hw_breakpoint);
457 457
@@ -459,7 +459,7 @@ EXPORT_SYMBOL_GPL(register_wide_hw_breakpoint);
459 * unregister_wide_hw_breakpoint - unregister a wide breakpoint in the kernel 459 * unregister_wide_hw_breakpoint - unregister a wide breakpoint in the kernel
460 * @cpu_events: the per cpu set of events to unregister 460 * @cpu_events: the per cpu set of events to unregister
461 */ 461 */
462void unregister_wide_hw_breakpoint(struct perf_event **cpu_events) 462void unregister_wide_hw_breakpoint(struct perf_event * __percpu *cpu_events)
463{ 463{
464 int cpu; 464 int cpu;
465 struct perf_event **pevent; 465 struct perf_event **pevent;
@@ -489,5 +489,4 @@ struct pmu perf_ops_bp = {
489 .enable = arch_install_hw_breakpoint, 489 .enable = arch_install_hw_breakpoint,
490 .disable = arch_uninstall_hw_breakpoint, 490 .disable = arch_uninstall_hw_breakpoint,
491 .read = hw_breakpoint_pmu_read, 491 .read = hw_breakpoint_pmu_read,
492 .unthrottle = hw_breakpoint_pmu_unthrottle
493}; 492};
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index d70394f12ee9..b7091d5ca2f8 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -359,6 +359,23 @@ static inline void mask_ack_irq(struct irq_desc *desc, int irq)
359 if (desc->chip->ack) 359 if (desc->chip->ack)
360 desc->chip->ack(irq); 360 desc->chip->ack(irq);
361 } 361 }
362 desc->status |= IRQ_MASKED;
363}
364
365static inline void mask_irq(struct irq_desc *desc, int irq)
366{
367 if (desc->chip->mask) {
368 desc->chip->mask(irq);
369 desc->status |= IRQ_MASKED;
370 }
371}
372
373static inline void unmask_irq(struct irq_desc *desc, int irq)
374{
375 if (desc->chip->unmask) {
376 desc->chip->unmask(irq);
377 desc->status &= ~IRQ_MASKED;
378 }
362} 379}
363 380
364/* 381/*
@@ -484,10 +501,8 @@ handle_level_irq(unsigned int irq, struct irq_desc *desc)
484 raw_spin_lock(&desc->lock); 501 raw_spin_lock(&desc->lock);
485 desc->status &= ~IRQ_INPROGRESS; 502 desc->status &= ~IRQ_INPROGRESS;
486 503
487 if (unlikely(desc->status & IRQ_ONESHOT)) 504 if (!(desc->status & (IRQ_DISABLED | IRQ_ONESHOT)))
488 desc->status |= IRQ_MASKED; 505 unmask_irq(desc, irq);
489 else if (!(desc->status & IRQ_DISABLED) && desc->chip->unmask)
490 desc->chip->unmask(irq);
491out_unlock: 506out_unlock:
492 raw_spin_unlock(&desc->lock); 507 raw_spin_unlock(&desc->lock);
493} 508}
@@ -524,8 +539,7 @@ handle_fasteoi_irq(unsigned int irq, struct irq_desc *desc)
524 action = desc->action; 539 action = desc->action;
525 if (unlikely(!action || (desc->status & IRQ_DISABLED))) { 540 if (unlikely(!action || (desc->status & IRQ_DISABLED))) {
526 desc->status |= IRQ_PENDING; 541 desc->status |= IRQ_PENDING;
527 if (desc->chip->mask) 542 mask_irq(desc, irq);
528 desc->chip->mask(irq);
529 goto out; 543 goto out;
530 } 544 }
531 545
@@ -554,7 +568,7 @@ out:
554 * signal. The occurence is latched into the irq controller hardware 568 * signal. The occurence is latched into the irq controller hardware
555 * and must be acked in order to be reenabled. After the ack another 569 * and must be acked in order to be reenabled. After the ack another
556 * interrupt can happen on the same source even before the first one 570 * interrupt can happen on the same source even before the first one
557 * is handled by the assosiacted event handler. If this happens it 571 * is handled by the associated event handler. If this happens it
558 * might be necessary to disable (mask) the interrupt depending on the 572 * might be necessary to disable (mask) the interrupt depending on the
559 * controller hardware. This requires to reenable the interrupt inside 573 * controller hardware. This requires to reenable the interrupt inside
560 * of the loop which handles the interrupts which have arrived while 574 * of the loop which handles the interrupts which have arrived while
@@ -593,7 +607,7 @@ handle_edge_irq(unsigned int irq, struct irq_desc *desc)
593 irqreturn_t action_ret; 607 irqreturn_t action_ret;
594 608
595 if (unlikely(!action)) { 609 if (unlikely(!action)) {
596 desc->chip->mask(irq); 610 mask_irq(desc, irq);
597 goto out_unlock; 611 goto out_unlock;
598 } 612 }
599 613
@@ -605,8 +619,7 @@ handle_edge_irq(unsigned int irq, struct irq_desc *desc)
605 if (unlikely((desc->status & 619 if (unlikely((desc->status &
606 (IRQ_PENDING | IRQ_MASKED | IRQ_DISABLED)) == 620 (IRQ_PENDING | IRQ_MASKED | IRQ_DISABLED)) ==
607 (IRQ_PENDING | IRQ_MASKED))) { 621 (IRQ_PENDING | IRQ_MASKED))) {
608 desc->chip->unmask(irq); 622 unmask_irq(desc, irq);
609 desc->status &= ~IRQ_MASKED;
610 } 623 }
611 624
612 desc->status &= ~IRQ_PENDING; 625 desc->status &= ~IRQ_PENDING;
@@ -716,7 +729,7 @@ set_irq_chip_and_handler_name(unsigned int irq, struct irq_chip *chip,
716 __set_irq_handler(irq, handle, 0, name); 729 __set_irq_handler(irq, handle, 0, name);
717} 730}
718 731
719void __init set_irq_noprobe(unsigned int irq) 732void set_irq_noprobe(unsigned int irq)
720{ 733{
721 struct irq_desc *desc = irq_to_desc(irq); 734 struct irq_desc *desc = irq_to_desc(irq);
722 unsigned long flags; 735 unsigned long flags;
@@ -731,7 +744,7 @@ void __init set_irq_noprobe(unsigned int irq)
731 raw_spin_unlock_irqrestore(&desc->lock, flags); 744 raw_spin_unlock_irqrestore(&desc->lock, flags);
732} 745}
733 746
734void __init set_irq_probe(unsigned int irq) 747void set_irq_probe(unsigned int irq)
735{ 748{
736 struct irq_desc *desc = irq_to_desc(irq); 749 struct irq_desc *desc = irq_to_desc(irq);
737 unsigned long flags; 750 unsigned long flags;
diff --git a/kernel/irq/devres.c b/kernel/irq/devres.c
index d06df9c41cba..1ef4ffcdfa55 100644
--- a/kernel/irq/devres.c
+++ b/kernel/irq/devres.c
@@ -42,7 +42,7 @@ static int devm_irq_match(struct device *dev, void *res, void *data)
42 * automatically freed on driver detach. 42 * automatically freed on driver detach.
43 * 43 *
44 * If an IRQ allocated with this function needs to be freed 44 * If an IRQ allocated with this function needs to be freed
45 * separately, dev_free_irq() must be used. 45 * separately, devm_free_irq() must be used.
46 */ 46 */
47int devm_request_threaded_irq(struct device *dev, unsigned int irq, 47int devm_request_threaded_irq(struct device *dev, unsigned int irq,
48 irq_handler_t handler, irq_handler_t thread_fn, 48 irq_handler_t handler, irq_handler_t thread_fn,
@@ -81,7 +81,7 @@ EXPORT_SYMBOL(devm_request_threaded_irq);
81 * Except for the extra @dev argument, this function takes the 81 * Except for the extra @dev argument, this function takes the
82 * same arguments and performs the same function as free_irq(). 82 * same arguments and performs the same function as free_irq().
83 * This function instead of free_irq() should be used to manually 83 * This function instead of free_irq() should be used to manually
84 * free IRQs allocated with dev_request_irq(). 84 * free IRQs allocated with devm_request_irq().
85 */ 85 */
86void devm_free_irq(struct device *dev, unsigned int irq, void *dev_id) 86void devm_free_irq(struct device *dev, unsigned int irq, void *dev_id)
87{ 87{
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index eb6078ca60c7..704e488730a5 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -382,6 +382,7 @@ int can_request_irq(unsigned int irq, unsigned long irqflags)
382{ 382{
383 struct irq_desc *desc = irq_to_desc(irq); 383 struct irq_desc *desc = irq_to_desc(irq);
384 struct irqaction *action; 384 struct irqaction *action;
385 unsigned long flags;
385 386
386 if (!desc) 387 if (!desc)
387 return 0; 388 return 0;
@@ -389,11 +390,14 @@ int can_request_irq(unsigned int irq, unsigned long irqflags)
389 if (desc->status & IRQ_NOREQUEST) 390 if (desc->status & IRQ_NOREQUEST)
390 return 0; 391 return 0;
391 392
393 raw_spin_lock_irqsave(&desc->lock, flags);
392 action = desc->action; 394 action = desc->action;
393 if (action) 395 if (action)
394 if (irqflags & action->flags & IRQF_SHARED) 396 if (irqflags & action->flags & IRQF_SHARED)
395 action = NULL; 397 action = NULL;
396 398
399 raw_spin_unlock_irqrestore(&desc->lock, flags);
400
397 return !action; 401 return !action;
398} 402}
399 403
@@ -483,8 +487,26 @@ static int irq_wait_for_interrupt(struct irqaction *action)
483 */ 487 */
484static void irq_finalize_oneshot(unsigned int irq, struct irq_desc *desc) 488static void irq_finalize_oneshot(unsigned int irq, struct irq_desc *desc)
485{ 489{
490again:
486 chip_bus_lock(irq, desc); 491 chip_bus_lock(irq, desc);
487 raw_spin_lock_irq(&desc->lock); 492 raw_spin_lock_irq(&desc->lock);
493
494 /*
495 * Implausible though it may be we need to protect us against
496 * the following scenario:
497 *
498 * The thread is faster done than the hard interrupt handler
499 * on the other CPU. If we unmask the irq line then the
500 * interrupt can come in again and masks the line, leaves due
501 * to IRQ_INPROGRESS and the irq line is masked forever.
502 */
503 if (unlikely(desc->status & IRQ_INPROGRESS)) {
504 raw_spin_unlock_irq(&desc->lock);
505 chip_bus_sync_unlock(irq, desc);
506 cpu_relax();
507 goto again;
508 }
509
488 if (!(desc->status & IRQ_DISABLED) && (desc->status & IRQ_MASKED)) { 510 if (!(desc->status & IRQ_DISABLED) && (desc->status & IRQ_MASKED)) {
489 desc->status &= ~IRQ_MASKED; 511 desc->status &= ~IRQ_MASKED;
490 desc->chip->unmask(irq); 512 desc->chip->unmask(irq);
@@ -735,6 +757,16 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
735 if (new->flags & IRQF_ONESHOT) 757 if (new->flags & IRQF_ONESHOT)
736 desc->status |= IRQ_ONESHOT; 758 desc->status |= IRQ_ONESHOT;
737 759
760 /*
761 * Force MSI interrupts to run with interrupts
762 * disabled. The multi vector cards can cause stack
763 * overflows due to nested interrupts when enough of
764 * them are directed to a core and fire at the same
765 * time.
766 */
767 if (desc->msi_desc)
768 new->flags |= IRQF_DISABLED;
769
738 if (!(desc->status & IRQ_NOAUTOEN)) { 770 if (!(desc->status & IRQ_NOAUTOEN)) {
739 desc->depth = 0; 771 desc->depth = 0;
740 desc->status &= ~IRQ_DISABLED; 772 desc->status &= ~IRQ_DISABLED;
diff --git a/kernel/irq/numa_migrate.c b/kernel/irq/numa_migrate.c
index 963559dbd858..65d3845665ac 100644
--- a/kernel/irq/numa_migrate.c
+++ b/kernel/irq/numa_migrate.c
@@ -6,6 +6,7 @@
6 */ 6 */
7 7
8#include <linux/irq.h> 8#include <linux/irq.h>
9#include <linux/slab.h>
9#include <linux/module.h> 10#include <linux/module.h>
10#include <linux/random.h> 11#include <linux/random.h>
11#include <linux/interrupt.h> 12#include <linux/interrupt.h>
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index 6f50eccc79c0..7a6eb04ef6b5 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -7,6 +7,7 @@
7 */ 7 */
8 8
9#include <linux/irq.h> 9#include <linux/irq.h>
10#include <linux/gfp.h>
10#include <linux/proc_fs.h> 11#include <linux/proc_fs.h>
11#include <linux/seq_file.h> 12#include <linux/seq_file.h>
12#include <linux/interrupt.h> 13#include <linux/interrupt.h>
diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
index 8e5288a8a355..13aff293f4de 100644
--- a/kernel/kallsyms.c
+++ b/kernel/kallsyms.c
@@ -21,6 +21,7 @@
21#include <linux/sched.h> /* for cond_resched */ 21#include <linux/sched.h> /* for cond_resched */
22#include <linux/mm.h> 22#include <linux/mm.h>
23#include <linux/ctype.h> 23#include <linux/ctype.h>
24#include <linux/slab.h>
24 25
25#include <asm/sections.h> 26#include <asm/sections.h>
26 27
diff --git a/kernel/kgdb.c b/kernel/kgdb.c
index 761fdd2b3034..11f3515ca83f 100644
--- a/kernel/kgdb.c
+++ b/kernel/kgdb.c
@@ -69,9 +69,16 @@ struct kgdb_state {
69 struct pt_regs *linux_regs; 69 struct pt_regs *linux_regs;
70}; 70};
71 71
72/* Exception state values */
73#define DCPU_WANT_MASTER 0x1 /* Waiting to become a master kgdb cpu */
74#define DCPU_NEXT_MASTER 0x2 /* Transition from one master cpu to another */
75#define DCPU_IS_SLAVE 0x4 /* Slave cpu enter exception */
76#define DCPU_SSTEP 0x8 /* CPU is single stepping */
77
72static struct debuggerinfo_struct { 78static struct debuggerinfo_struct {
73 void *debuggerinfo; 79 void *debuggerinfo;
74 struct task_struct *task; 80 struct task_struct *task;
81 int exception_state;
75} kgdb_info[NR_CPUS]; 82} kgdb_info[NR_CPUS];
76 83
77/** 84/**
@@ -391,27 +398,22 @@ int kgdb_mem2hex(char *mem, char *buf, int count)
391 398
392/* 399/*
393 * Copy the binary array pointed to by buf into mem. Fix $, #, and 400 * Copy the binary array pointed to by buf into mem. Fix $, #, and
394 * 0x7d escaped with 0x7d. Return a pointer to the character after 401 * 0x7d escaped with 0x7d. Return -EFAULT on failure or 0 on success.
395 * the last byte written. 402 * The input buf is overwitten with the result to write to mem.
396 */ 403 */
397static int kgdb_ebin2mem(char *buf, char *mem, int count) 404static int kgdb_ebin2mem(char *buf, char *mem, int count)
398{ 405{
399 int err = 0; 406 int size = 0;
400 char c; 407 char *c = buf;
401 408
402 while (count-- > 0) { 409 while (count-- > 0) {
403 c = *buf++; 410 c[size] = *buf++;
404 if (c == 0x7d) 411 if (c[size] == 0x7d)
405 c = *buf++ ^ 0x20; 412 c[size] = *buf++ ^ 0x20;
406 413 size++;
407 err = probe_kernel_write(mem, &c, 1);
408 if (err)
409 break;
410
411 mem++;
412 } 414 }
413 415
414 return err; 416 return probe_kernel_write(mem, c, size);
415} 417}
416 418
417/* 419/*
@@ -563,49 +565,6 @@ static struct task_struct *getthread(struct pt_regs *regs, int tid)
563} 565}
564 566
565/* 567/*
566 * CPU debug state control:
567 */
568
569#ifdef CONFIG_SMP
570static void kgdb_wait(struct pt_regs *regs)
571{
572 unsigned long flags;
573 int cpu;
574
575 local_irq_save(flags);
576 cpu = raw_smp_processor_id();
577 kgdb_info[cpu].debuggerinfo = regs;
578 kgdb_info[cpu].task = current;
579 /*
580 * Make sure the above info reaches the primary CPU before
581 * our cpu_in_kgdb[] flag setting does:
582 */
583 smp_wmb();
584 atomic_set(&cpu_in_kgdb[cpu], 1);
585
586 /* Disable any cpu specific hw breakpoints */
587 kgdb_disable_hw_debug(regs);
588
589 /* Wait till primary CPU is done with debugging */
590 while (atomic_read(&passive_cpu_wait[cpu]))
591 cpu_relax();
592
593 kgdb_info[cpu].debuggerinfo = NULL;
594 kgdb_info[cpu].task = NULL;
595
596 /* fix up hardware debug registers on local cpu */
597 if (arch_kgdb_ops.correct_hw_break)
598 arch_kgdb_ops.correct_hw_break();
599
600 /* Signal the primary CPU that we are done: */
601 atomic_set(&cpu_in_kgdb[cpu], 0);
602 touch_softlockup_watchdog_sync();
603 clocksource_touch_watchdog();
604 local_irq_restore(flags);
605}
606#endif
607
608/*
609 * Some architectures need cache flushes when we set/clear a 568 * Some architectures need cache flushes when we set/clear a
610 * breakpoint: 569 * breakpoint:
611 */ 570 */
@@ -1400,34 +1359,13 @@ static int kgdb_reenter_check(struct kgdb_state *ks)
1400 return 1; 1359 return 1;
1401} 1360}
1402 1361
1403/* 1362static int kgdb_cpu_enter(struct kgdb_state *ks, struct pt_regs *regs)
1404 * kgdb_handle_exception() - main entry point from a kernel exception
1405 *
1406 * Locking hierarchy:
1407 * interface locks, if any (begin_session)
1408 * kgdb lock (kgdb_active)
1409 */
1410int
1411kgdb_handle_exception(int evector, int signo, int ecode, struct pt_regs *regs)
1412{ 1363{
1413 struct kgdb_state kgdb_var;
1414 struct kgdb_state *ks = &kgdb_var;
1415 unsigned long flags; 1364 unsigned long flags;
1416 int sstep_tries = 100; 1365 int sstep_tries = 100;
1417 int error = 0; 1366 int error = 0;
1418 int i, cpu; 1367 int i, cpu;
1419 1368 int trace_on = 0;
1420 ks->cpu = raw_smp_processor_id();
1421 ks->ex_vector = evector;
1422 ks->signo = signo;
1423 ks->ex_vector = evector;
1424 ks->err_code = ecode;
1425 ks->kgdb_usethreadid = 0;
1426 ks->linux_regs = regs;
1427
1428 if (kgdb_reenter_check(ks))
1429 return 0; /* Ouch, double exception ! */
1430
1431acquirelock: 1369acquirelock:
1432 /* 1370 /*
1433 * Interrupts will be restored by the 'trap return' code, except when 1371 * Interrupts will be restored by the 'trap return' code, except when
@@ -1435,13 +1373,43 @@ acquirelock:
1435 */ 1373 */
1436 local_irq_save(flags); 1374 local_irq_save(flags);
1437 1375
1438 cpu = raw_smp_processor_id(); 1376 cpu = ks->cpu;
1377 kgdb_info[cpu].debuggerinfo = regs;
1378 kgdb_info[cpu].task = current;
1379 /*
1380 * Make sure the above info reaches the primary CPU before
1381 * our cpu_in_kgdb[] flag setting does:
1382 */
1383 atomic_inc(&cpu_in_kgdb[cpu]);
1439 1384
1440 /* 1385 /*
1441 * Acquire the kgdb_active lock: 1386 * CPU will loop if it is a slave or request to become a kgdb
1387 * master cpu and acquire the kgdb_active lock:
1442 */ 1388 */
1443 while (atomic_cmpxchg(&kgdb_active, -1, cpu) != -1) 1389 while (1) {
1390 if (kgdb_info[cpu].exception_state & DCPU_WANT_MASTER) {
1391 if (atomic_cmpxchg(&kgdb_active, -1, cpu) == cpu)
1392 break;
1393 } else if (kgdb_info[cpu].exception_state & DCPU_IS_SLAVE) {
1394 if (!atomic_read(&passive_cpu_wait[cpu]))
1395 goto return_normal;
1396 } else {
1397return_normal:
1398 /* Return to normal operation by executing any
1399 * hw breakpoint fixup.
1400 */
1401 if (arch_kgdb_ops.correct_hw_break)
1402 arch_kgdb_ops.correct_hw_break();
1403 if (trace_on)
1404 tracing_on();
1405 atomic_dec(&cpu_in_kgdb[cpu]);
1406 touch_softlockup_watchdog_sync();
1407 clocksource_touch_watchdog();
1408 local_irq_restore(flags);
1409 return 0;
1410 }
1444 cpu_relax(); 1411 cpu_relax();
1412 }
1445 1413
1446 /* 1414 /*
1447 * For single stepping, try to only enter on the processor 1415 * For single stepping, try to only enter on the processor
@@ -1475,9 +1443,6 @@ acquirelock:
1475 if (kgdb_io_ops->pre_exception) 1443 if (kgdb_io_ops->pre_exception)
1476 kgdb_io_ops->pre_exception(); 1444 kgdb_io_ops->pre_exception();
1477 1445
1478 kgdb_info[ks->cpu].debuggerinfo = ks->linux_regs;
1479 kgdb_info[ks->cpu].task = current;
1480
1481 kgdb_disable_hw_debug(ks->linux_regs); 1446 kgdb_disable_hw_debug(ks->linux_regs);
1482 1447
1483 /* 1448 /*
@@ -1486,15 +1451,9 @@ acquirelock:
1486 */ 1451 */
1487 if (!kgdb_single_step) { 1452 if (!kgdb_single_step) {
1488 for (i = 0; i < NR_CPUS; i++) 1453 for (i = 0; i < NR_CPUS; i++)
1489 atomic_set(&passive_cpu_wait[i], 1); 1454 atomic_inc(&passive_cpu_wait[i]);
1490 } 1455 }
1491 1456
1492 /*
1493 * spin_lock code is good enough as a barrier so we don't
1494 * need one here:
1495 */
1496 atomic_set(&cpu_in_kgdb[ks->cpu], 1);
1497
1498#ifdef CONFIG_SMP 1457#ifdef CONFIG_SMP
1499 /* Signal the other CPUs to enter kgdb_wait() */ 1458 /* Signal the other CPUs to enter kgdb_wait() */
1500 if ((!kgdb_single_step) && kgdb_do_roundup) 1459 if ((!kgdb_single_step) && kgdb_do_roundup)
@@ -1518,6 +1477,9 @@ acquirelock:
1518 kgdb_single_step = 0; 1477 kgdb_single_step = 0;
1519 kgdb_contthread = current; 1478 kgdb_contthread = current;
1520 exception_level = 0; 1479 exception_level = 0;
1480 trace_on = tracing_is_on();
1481 if (trace_on)
1482 tracing_off();
1521 1483
1522 /* Talk to debugger with gdbserial protocol */ 1484 /* Talk to debugger with gdbserial protocol */
1523 error = gdb_serial_stub(ks); 1485 error = gdb_serial_stub(ks);
@@ -1526,13 +1488,11 @@ acquirelock:
1526 if (kgdb_io_ops->post_exception) 1488 if (kgdb_io_ops->post_exception)
1527 kgdb_io_ops->post_exception(); 1489 kgdb_io_ops->post_exception();
1528 1490
1529 kgdb_info[ks->cpu].debuggerinfo = NULL; 1491 atomic_dec(&cpu_in_kgdb[ks->cpu]);
1530 kgdb_info[ks->cpu].task = NULL;
1531 atomic_set(&cpu_in_kgdb[ks->cpu], 0);
1532 1492
1533 if (!kgdb_single_step) { 1493 if (!kgdb_single_step) {
1534 for (i = NR_CPUS-1; i >= 0; i--) 1494 for (i = NR_CPUS-1; i >= 0; i--)
1535 atomic_set(&passive_cpu_wait[i], 0); 1495 atomic_dec(&passive_cpu_wait[i]);
1536 /* 1496 /*
1537 * Wait till all the CPUs have quit 1497 * Wait till all the CPUs have quit
1538 * from the debugger. 1498 * from the debugger.
@@ -1551,6 +1511,8 @@ kgdb_restore:
1551 else 1511 else
1552 kgdb_sstep_pid = 0; 1512 kgdb_sstep_pid = 0;
1553 } 1513 }
1514 if (trace_on)
1515 tracing_on();
1554 /* Free kgdb_active */ 1516 /* Free kgdb_active */
1555 atomic_set(&kgdb_active, -1); 1517 atomic_set(&kgdb_active, -1);
1556 touch_softlockup_watchdog_sync(); 1518 touch_softlockup_watchdog_sync();
@@ -1560,13 +1522,52 @@ kgdb_restore:
1560 return error; 1522 return error;
1561} 1523}
1562 1524
1525/*
1526 * kgdb_handle_exception() - main entry point from a kernel exception
1527 *
1528 * Locking hierarchy:
1529 * interface locks, if any (begin_session)
1530 * kgdb lock (kgdb_active)
1531 */
1532int
1533kgdb_handle_exception(int evector, int signo, int ecode, struct pt_regs *regs)
1534{
1535 struct kgdb_state kgdb_var;
1536 struct kgdb_state *ks = &kgdb_var;
1537 int ret;
1538
1539 ks->cpu = raw_smp_processor_id();
1540 ks->ex_vector = evector;
1541 ks->signo = signo;
1542 ks->ex_vector = evector;
1543 ks->err_code = ecode;
1544 ks->kgdb_usethreadid = 0;
1545 ks->linux_regs = regs;
1546
1547 if (kgdb_reenter_check(ks))
1548 return 0; /* Ouch, double exception ! */
1549 kgdb_info[ks->cpu].exception_state |= DCPU_WANT_MASTER;
1550 ret = kgdb_cpu_enter(ks, regs);
1551 kgdb_info[ks->cpu].exception_state &= ~DCPU_WANT_MASTER;
1552 return ret;
1553}
1554
1563int kgdb_nmicallback(int cpu, void *regs) 1555int kgdb_nmicallback(int cpu, void *regs)
1564{ 1556{
1565#ifdef CONFIG_SMP 1557#ifdef CONFIG_SMP
1558 struct kgdb_state kgdb_var;
1559 struct kgdb_state *ks = &kgdb_var;
1560
1561 memset(ks, 0, sizeof(struct kgdb_state));
1562 ks->cpu = cpu;
1563 ks->linux_regs = regs;
1564
1566 if (!atomic_read(&cpu_in_kgdb[cpu]) && 1565 if (!atomic_read(&cpu_in_kgdb[cpu]) &&
1567 atomic_read(&kgdb_active) != cpu && 1566 atomic_read(&kgdb_active) != -1 &&
1568 atomic_read(&cpu_in_kgdb[atomic_read(&kgdb_active)])) { 1567 atomic_read(&kgdb_active) != cpu) {
1569 kgdb_wait((struct pt_regs *)regs); 1568 kgdb_info[cpu].exception_state |= DCPU_IS_SLAVE;
1569 kgdb_cpu_enter(ks, regs);
1570 kgdb_info[cpu].exception_state &= ~DCPU_IS_SLAVE;
1570 return 0; 1571 return 0;
1571 } 1572 }
1572#endif 1573#endif
@@ -1742,11 +1743,11 @@ EXPORT_SYMBOL_GPL(kgdb_unregister_io_module);
1742 */ 1743 */
1743void kgdb_breakpoint(void) 1744void kgdb_breakpoint(void)
1744{ 1745{
1745 atomic_set(&kgdb_setting_breakpoint, 1); 1746 atomic_inc(&kgdb_setting_breakpoint);
1746 wmb(); /* Sync point before breakpoint */ 1747 wmb(); /* Sync point before breakpoint */
1747 arch_kgdb_breakpoint(); 1748 arch_kgdb_breakpoint();
1748 wmb(); /* Sync point after breakpoint */ 1749 wmb(); /* Sync point after breakpoint */
1749 atomic_set(&kgdb_setting_breakpoint, 0); 1750 atomic_dec(&kgdb_setting_breakpoint);
1750} 1751}
1751EXPORT_SYMBOL_GPL(kgdb_breakpoint); 1752EXPORT_SYMBOL_GPL(kgdb_breakpoint);
1752 1753
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index fa034d29cf73..0ed46f3e51e9 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -259,7 +259,8 @@ static void __kprobes __free_insn_slot(struct kprobe_insn_cache *c,
259 struct kprobe_insn_page *kip; 259 struct kprobe_insn_page *kip;
260 260
261 list_for_each_entry(kip, &c->pages, list) { 261 list_for_each_entry(kip, &c->pages, list) {
262 long idx = ((long)slot - (long)kip->insns) / c->insn_size; 262 long idx = ((long)slot - (long)kip->insns) /
263 (c->insn_size * sizeof(kprobe_opcode_t));
263 if (idx >= 0 && idx < slots_per_page(c)) { 264 if (idx >= 0 && idx < slots_per_page(c)) {
264 WARN_ON(kip->slot_used[idx] != SLOT_USED); 265 WARN_ON(kip->slot_used[idx] != SLOT_USED);
265 if (dirty) { 266 if (dirty) {
diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c
index 6b1ccc3f0205..21fe3c426948 100644
--- a/kernel/ksysfs.c
+++ b/kernel/ksysfs.c
@@ -33,7 +33,7 @@ static ssize_t uevent_seqnum_show(struct kobject *kobj,
33} 33}
34KERNEL_ATTR_RO(uevent_seqnum); 34KERNEL_ATTR_RO(uevent_seqnum);
35 35
36/* uevent helper program, used during early boo */ 36/* uevent helper program, used during early boot */
37static ssize_t uevent_helper_show(struct kobject *kobj, 37static ssize_t uevent_helper_show(struct kobject *kobj,
38 struct kobj_attribute *attr, char *buf) 38 struct kobj_attribute *attr, char *buf)
39{ 39{
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 82ed0ea15194..83911c780175 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -219,7 +219,7 @@ int kthreadd(void *unused)
219 set_task_comm(tsk, "kthreadd"); 219 set_task_comm(tsk, "kthreadd");
220 ignore_signals(tsk); 220 ignore_signals(tsk);
221 set_cpus_allowed_ptr(tsk, cpu_all_mask); 221 set_cpus_allowed_ptr(tsk, cpu_all_mask);
222 set_mems_allowed(node_possible_map); 222 set_mems_allowed(node_states[N_HIGH_MEMORY]);
223 223
224 current->flags |= PF_NOFREEZE | PF_FREEZER_NOSIG; 224 current->flags |= PF_NOFREEZE | PF_FREEZER_NOSIG;
225 225
diff --git a/kernel/latencytop.c b/kernel/latencytop.c
index ca07c5c0c914..877fb306d415 100644
--- a/kernel/latencytop.c
+++ b/kernel/latencytop.c
@@ -56,7 +56,6 @@
56#include <linux/module.h> 56#include <linux/module.h>
57#include <linux/sched.h> 57#include <linux/sched.h>
58#include <linux/list.h> 58#include <linux/list.h>
59#include <linux/slab.h>
60#include <linux/stacktrace.h> 59#include <linux/stacktrace.h>
61 60
62static DEFINE_SPINLOCK(latency_lock); 61static DEFINE_SPINLOCK(latency_lock);
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 0c30d0455de1..2594e1ce41cb 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -43,6 +43,7 @@
43#include <linux/ftrace.h> 43#include <linux/ftrace.h>
44#include <linux/stringify.h> 44#include <linux/stringify.h>
45#include <linux/bitops.h> 45#include <linux/bitops.h>
46#include <linux/gfp.h>
46 47
47#include <asm/sections.h> 48#include <asm/sections.h>
48 49
@@ -582,9 +583,6 @@ static int static_obj(void *obj)
582 unsigned long start = (unsigned long) &_stext, 583 unsigned long start = (unsigned long) &_stext,
583 end = (unsigned long) &_end, 584 end = (unsigned long) &_end,
584 addr = (unsigned long) obj; 585 addr = (unsigned long) obj;
585#ifdef CONFIG_SMP
586 int i;
587#endif
588 586
589 /* 587 /*
590 * static variable? 588 * static variable?
@@ -595,24 +593,16 @@ static int static_obj(void *obj)
595 if (arch_is_kernel_data(addr)) 593 if (arch_is_kernel_data(addr))
596 return 1; 594 return 1;
597 595
598#ifdef CONFIG_SMP
599 /* 596 /*
600 * percpu var? 597 * in-kernel percpu var?
601 */ 598 */
602 for_each_possible_cpu(i) { 599 if (is_kernel_percpu_address(addr))
603 start = (unsigned long) &__per_cpu_start + per_cpu_offset(i); 600 return 1;
604 end = (unsigned long) &__per_cpu_start + PERCPU_ENOUGH_ROOM
605 + per_cpu_offset(i);
606
607 if ((addr >= start) && (addr < end))
608 return 1;
609 }
610#endif
611 601
612 /* 602 /*
613 * module var? 603 * module static or percpu var?
614 */ 604 */
615 return is_module_address(addr); 605 return is_module_address(addr) || is_module_percpu_address(addr);
616} 606}
617 607
618/* 608/*
@@ -3211,8 +3201,6 @@ void lock_acquire(struct lockdep_map *lock, unsigned int subclass,
3211{ 3201{
3212 unsigned long flags; 3202 unsigned long flags;
3213 3203
3214 trace_lock_acquire(lock, subclass, trylock, read, check, nest_lock, ip);
3215
3216 if (unlikely(current->lockdep_recursion)) 3204 if (unlikely(current->lockdep_recursion))
3217 return; 3205 return;
3218 3206
@@ -3220,6 +3208,7 @@ void lock_acquire(struct lockdep_map *lock, unsigned int subclass,
3220 check_flags(flags); 3208 check_flags(flags);
3221 3209
3222 current->lockdep_recursion = 1; 3210 current->lockdep_recursion = 1;
3211 trace_lock_acquire(lock, subclass, trylock, read, check, nest_lock, ip);
3223 __lock_acquire(lock, subclass, trylock, read, check, 3212 __lock_acquire(lock, subclass, trylock, read, check,
3224 irqs_disabled_flags(flags), nest_lock, ip, 0); 3213 irqs_disabled_flags(flags), nest_lock, ip, 0);
3225 current->lockdep_recursion = 0; 3214 current->lockdep_recursion = 0;
@@ -3232,14 +3221,13 @@ void lock_release(struct lockdep_map *lock, int nested,
3232{ 3221{
3233 unsigned long flags; 3222 unsigned long flags;
3234 3223
3235 trace_lock_release(lock, nested, ip);
3236
3237 if (unlikely(current->lockdep_recursion)) 3224 if (unlikely(current->lockdep_recursion))
3238 return; 3225 return;
3239 3226
3240 raw_local_irq_save(flags); 3227 raw_local_irq_save(flags);
3241 check_flags(flags); 3228 check_flags(flags);
3242 current->lockdep_recursion = 1; 3229 current->lockdep_recursion = 1;
3230 trace_lock_release(lock, nested, ip);
3243 __lock_release(lock, nested, ip); 3231 __lock_release(lock, nested, ip);
3244 current->lockdep_recursion = 0; 3232 current->lockdep_recursion = 0;
3245 raw_local_irq_restore(flags); 3233 raw_local_irq_restore(flags);
@@ -3413,8 +3401,6 @@ void lock_contended(struct lockdep_map *lock, unsigned long ip)
3413{ 3401{
3414 unsigned long flags; 3402 unsigned long flags;
3415 3403
3416 trace_lock_contended(lock, ip);
3417
3418 if (unlikely(!lock_stat)) 3404 if (unlikely(!lock_stat))
3419 return; 3405 return;
3420 3406
@@ -3424,6 +3410,7 @@ void lock_contended(struct lockdep_map *lock, unsigned long ip)
3424 raw_local_irq_save(flags); 3410 raw_local_irq_save(flags);
3425 check_flags(flags); 3411 check_flags(flags);
3426 current->lockdep_recursion = 1; 3412 current->lockdep_recursion = 1;
3413 trace_lock_contended(lock, ip);
3427 __lock_contended(lock, ip); 3414 __lock_contended(lock, ip);
3428 current->lockdep_recursion = 0; 3415 current->lockdep_recursion = 0;
3429 raw_local_irq_restore(flags); 3416 raw_local_irq_restore(flags);
@@ -3822,6 +3809,7 @@ void lockdep_rcu_dereference(const char *file, const int line)
3822 printk("%s:%d invoked rcu_dereference_check() without protection!\n", 3809 printk("%s:%d invoked rcu_dereference_check() without protection!\n",
3823 file, line); 3810 file, line);
3824 printk("\nother info that might help us debug this:\n\n"); 3811 printk("\nother info that might help us debug this:\n\n");
3812 printk("\nrcu_scheduler_active = %d, debug_locks = %d\n", rcu_scheduler_active, debug_locks);
3825 lockdep_print_held_locks(curr); 3813 lockdep_print_held_locks(curr);
3826 printk("\nstack backtrace:\n"); 3814 printk("\nstack backtrace:\n");
3827 dump_stack(); 3815 dump_stack();
diff --git a/kernel/module.c b/kernel/module.c
index c968d3606dca..1016b75b026a 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -370,27 +370,33 @@ EXPORT_SYMBOL_GPL(find_module);
370 370
371#ifdef CONFIG_SMP 371#ifdef CONFIG_SMP
372 372
373static void *percpu_modalloc(unsigned long size, unsigned long align, 373static inline void __percpu *mod_percpu(struct module *mod)
374 const char *name)
375{ 374{
376 void *ptr; 375 return mod->percpu;
376}
377 377
378static int percpu_modalloc(struct module *mod,
379 unsigned long size, unsigned long align)
380{
378 if (align > PAGE_SIZE) { 381 if (align > PAGE_SIZE) {
379 printk(KERN_WARNING "%s: per-cpu alignment %li > %li\n", 382 printk(KERN_WARNING "%s: per-cpu alignment %li > %li\n",
380 name, align, PAGE_SIZE); 383 mod->name, align, PAGE_SIZE);
381 align = PAGE_SIZE; 384 align = PAGE_SIZE;
382 } 385 }
383 386
384 ptr = __alloc_reserved_percpu(size, align); 387 mod->percpu = __alloc_reserved_percpu(size, align);
385 if (!ptr) 388 if (!mod->percpu) {
386 printk(KERN_WARNING 389 printk(KERN_WARNING
387 "Could not allocate %lu bytes percpu data\n", size); 390 "Could not allocate %lu bytes percpu data\n", size);
388 return ptr; 391 return -ENOMEM;
392 }
393 mod->percpu_size = size;
394 return 0;
389} 395}
390 396
391static void percpu_modfree(void *freeme) 397static void percpu_modfree(struct module *mod)
392{ 398{
393 free_percpu(freeme); 399 free_percpu(mod->percpu);
394} 400}
395 401
396static unsigned int find_pcpusec(Elf_Ehdr *hdr, 402static unsigned int find_pcpusec(Elf_Ehdr *hdr,
@@ -400,24 +406,62 @@ static unsigned int find_pcpusec(Elf_Ehdr *hdr,
400 return find_sec(hdr, sechdrs, secstrings, ".data.percpu"); 406 return find_sec(hdr, sechdrs, secstrings, ".data.percpu");
401} 407}
402 408
403static void percpu_modcopy(void *pcpudest, const void *from, unsigned long size) 409static void percpu_modcopy(struct module *mod,
410 const void *from, unsigned long size)
404{ 411{
405 int cpu; 412 int cpu;
406 413
407 for_each_possible_cpu(cpu) 414 for_each_possible_cpu(cpu)
408 memcpy(pcpudest + per_cpu_offset(cpu), from, size); 415 memcpy(per_cpu_ptr(mod->percpu, cpu), from, size);
416}
417
418/**
419 * is_module_percpu_address - test whether address is from module static percpu
420 * @addr: address to test
421 *
422 * Test whether @addr belongs to module static percpu area.
423 *
424 * RETURNS:
425 * %true if @addr is from module static percpu area
426 */
427bool is_module_percpu_address(unsigned long addr)
428{
429 struct module *mod;
430 unsigned int cpu;
431
432 preempt_disable();
433
434 list_for_each_entry_rcu(mod, &modules, list) {
435 if (!mod->percpu_size)
436 continue;
437 for_each_possible_cpu(cpu) {
438 void *start = per_cpu_ptr(mod->percpu, cpu);
439
440 if ((void *)addr >= start &&
441 (void *)addr < start + mod->percpu_size) {
442 preempt_enable();
443 return true;
444 }
445 }
446 }
447
448 preempt_enable();
449 return false;
409} 450}
410 451
411#else /* ... !CONFIG_SMP */ 452#else /* ... !CONFIG_SMP */
412 453
413static inline void *percpu_modalloc(unsigned long size, unsigned long align, 454static inline void __percpu *mod_percpu(struct module *mod)
414 const char *name)
415{ 455{
416 return NULL; 456 return NULL;
417} 457}
418static inline void percpu_modfree(void *pcpuptr) 458static inline int percpu_modalloc(struct module *mod,
459 unsigned long size, unsigned long align)
460{
461 return -ENOMEM;
462}
463static inline void percpu_modfree(struct module *mod)
419{ 464{
420 BUG();
421} 465}
422static inline unsigned int find_pcpusec(Elf_Ehdr *hdr, 466static inline unsigned int find_pcpusec(Elf_Ehdr *hdr,
423 Elf_Shdr *sechdrs, 467 Elf_Shdr *sechdrs,
@@ -425,12 +469,16 @@ static inline unsigned int find_pcpusec(Elf_Ehdr *hdr,
425{ 469{
426 return 0; 470 return 0;
427} 471}
428static inline void percpu_modcopy(void *pcpudst, const void *src, 472static inline void percpu_modcopy(struct module *mod,
429 unsigned long size) 473 const void *from, unsigned long size)
430{ 474{
431 /* pcpusec should be 0, and size of that section should be 0. */ 475 /* pcpusec should be 0, and size of that section should be 0. */
432 BUG_ON(size != 0); 476 BUG_ON(size != 0);
433} 477}
478bool is_module_percpu_address(unsigned long addr)
479{
480 return false;
481}
434 482
435#endif /* CONFIG_SMP */ 483#endif /* CONFIG_SMP */
436 484
@@ -473,11 +521,13 @@ static void module_unload_init(struct module *mod)
473 int cpu; 521 int cpu;
474 522
475 INIT_LIST_HEAD(&mod->modules_which_use_me); 523 INIT_LIST_HEAD(&mod->modules_which_use_me);
476 for_each_possible_cpu(cpu) 524 for_each_possible_cpu(cpu) {
477 per_cpu_ptr(mod->refptr, cpu)->count = 0; 525 per_cpu_ptr(mod->refptr, cpu)->incs = 0;
526 per_cpu_ptr(mod->refptr, cpu)->decs = 0;
527 }
478 528
479 /* Hold reference count during initialization. */ 529 /* Hold reference count during initialization. */
480 __this_cpu_write(mod->refptr->count, 1); 530 __this_cpu_write(mod->refptr->incs, 1);
481 /* Backwards compatibility macros put refcount during init. */ 531 /* Backwards compatibility macros put refcount during init. */
482 mod->waiter = current; 532 mod->waiter = current;
483} 533}
@@ -616,12 +666,28 @@ static int try_stop_module(struct module *mod, int flags, int *forced)
616 666
617unsigned int module_refcount(struct module *mod) 667unsigned int module_refcount(struct module *mod)
618{ 668{
619 unsigned int total = 0; 669 unsigned int incs = 0, decs = 0;
620 int cpu; 670 int cpu;
621 671
622 for_each_possible_cpu(cpu) 672 for_each_possible_cpu(cpu)
623 total += per_cpu_ptr(mod->refptr, cpu)->count; 673 decs += per_cpu_ptr(mod->refptr, cpu)->decs;
624 return total; 674 /*
675 * ensure the incs are added up after the decs.
676 * module_put ensures incs are visible before decs with smp_wmb.
677 *
678 * This 2-count scheme avoids the situation where the refcount
679 * for CPU0 is read, then CPU0 increments the module refcount,
680 * then CPU1 drops that refcount, then the refcount for CPU1 is
681 * read. We would record a decrement but not its corresponding
682 * increment so we would see a low count (disaster).
683 *
684 * Rare situation? But module_refcount can be preempted, and we
685 * might be tallying up 4096+ CPUs. So it is not impossible.
686 */
687 smp_rmb();
688 for_each_possible_cpu(cpu)
689 incs += per_cpu_ptr(mod->refptr, cpu)->incs;
690 return incs - decs;
625} 691}
626EXPORT_SYMBOL(module_refcount); 692EXPORT_SYMBOL(module_refcount);
627 693
@@ -798,10 +864,11 @@ void module_put(struct module *module)
798{ 864{
799 if (module) { 865 if (module) {
800 preempt_disable(); 866 preempt_disable();
801 __this_cpu_dec(module->refptr->count); 867 smp_wmb(); /* see comment in module_refcount */
868 __this_cpu_inc(module->refptr->decs);
802 869
803 trace_module_put(module, _RET_IP_, 870 trace_module_put(module, _RET_IP_,
804 __this_cpu_read(module->refptr->count)); 871 __this_cpu_read(module->refptr->decs));
805 /* Maybe they're waiting for us to drop reference? */ 872 /* Maybe they're waiting for us to drop reference? */
806 if (unlikely(!module_is_live(module))) 873 if (unlikely(!module_is_live(module)))
807 wake_up_process(module->waiter); 874 wake_up_process(module->waiter);
@@ -1400,8 +1467,7 @@ static void free_module(struct module *mod)
1400 /* This may be NULL, but that's OK */ 1467 /* This may be NULL, but that's OK */
1401 module_free(mod, mod->module_init); 1468 module_free(mod, mod->module_init);
1402 kfree(mod->args); 1469 kfree(mod->args);
1403 if (mod->percpu) 1470 percpu_modfree(mod);
1404 percpu_modfree(mod->percpu);
1405#if defined(CONFIG_MODULE_UNLOAD) 1471#if defined(CONFIG_MODULE_UNLOAD)
1406 if (mod->refptr) 1472 if (mod->refptr)
1407 free_percpu(mod->refptr); 1473 free_percpu(mod->refptr);
@@ -1520,7 +1586,7 @@ static int simplify_symbols(Elf_Shdr *sechdrs,
1520 default: 1586 default:
1521 /* Divert to percpu allocation if a percpu var. */ 1587 /* Divert to percpu allocation if a percpu var. */
1522 if (sym[i].st_shndx == pcpuindex) 1588 if (sym[i].st_shndx == pcpuindex)
1523 secbase = (unsigned long)mod->percpu; 1589 secbase = (unsigned long)mod_percpu(mod);
1524 else 1590 else
1525 secbase = sechdrs[sym[i].st_shndx].sh_addr; 1591 secbase = sechdrs[sym[i].st_shndx].sh_addr;
1526 sym[i].st_value += secbase; 1592 sym[i].st_value += secbase;
@@ -1954,7 +2020,7 @@ static noinline struct module *load_module(void __user *umod,
1954 unsigned int modindex, versindex, infoindex, pcpuindex; 2020 unsigned int modindex, versindex, infoindex, pcpuindex;
1955 struct module *mod; 2021 struct module *mod;
1956 long err = 0; 2022 long err = 0;
1957 void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */ 2023 void *ptr = NULL; /* Stops spurious gcc warning */
1958 unsigned long symoffs, stroffs, *strmap; 2024 unsigned long symoffs, stroffs, *strmap;
1959 2025
1960 mm_segment_t old_fs; 2026 mm_segment_t old_fs;
@@ -2094,15 +2160,11 @@ static noinline struct module *load_module(void __user *umod,
2094 2160
2095 if (pcpuindex) { 2161 if (pcpuindex) {
2096 /* We have a special allocation for this section. */ 2162 /* We have a special allocation for this section. */
2097 percpu = percpu_modalloc(sechdrs[pcpuindex].sh_size, 2163 err = percpu_modalloc(mod, sechdrs[pcpuindex].sh_size,
2098 sechdrs[pcpuindex].sh_addralign, 2164 sechdrs[pcpuindex].sh_addralign);
2099 mod->name); 2165 if (err)
2100 if (!percpu) {
2101 err = -ENOMEM;
2102 goto free_mod; 2166 goto free_mod;
2103 }
2104 sechdrs[pcpuindex].sh_flags &= ~(unsigned long)SHF_ALLOC; 2167 sechdrs[pcpuindex].sh_flags &= ~(unsigned long)SHF_ALLOC;
2105 mod->percpu = percpu;
2106 } 2168 }
2107 2169
2108 /* Determine total sizes, and put offsets in sh_entsize. For now 2170 /* Determine total sizes, and put offsets in sh_entsize. For now
@@ -2317,7 +2379,7 @@ static noinline struct module *load_module(void __user *umod,
2317 sort_extable(mod->extable, mod->extable + mod->num_exentries); 2379 sort_extable(mod->extable, mod->extable + mod->num_exentries);
2318 2380
2319 /* Finally, copy percpu area over. */ 2381 /* Finally, copy percpu area over. */
2320 percpu_modcopy(mod->percpu, (void *)sechdrs[pcpuindex].sh_addr, 2382 percpu_modcopy(mod, (void *)sechdrs[pcpuindex].sh_addr,
2321 sechdrs[pcpuindex].sh_size); 2383 sechdrs[pcpuindex].sh_size);
2322 2384
2323 add_kallsyms(mod, sechdrs, hdr->e_shnum, symindex, strindex, 2385 add_kallsyms(mod, sechdrs, hdr->e_shnum, symindex, strindex,
@@ -2409,8 +2471,7 @@ static noinline struct module *load_module(void __user *umod,
2409 module_free(mod, mod->module_core); 2471 module_free(mod, mod->module_core);
2410 /* mod will be freed with core. Don't access it beyond this line! */ 2472 /* mod will be freed with core. Don't access it beyond this line! */
2411 free_percpu: 2473 free_percpu:
2412 if (percpu) 2474 percpu_modfree(mod);
2413 percpu_modfree(percpu);
2414 free_mod: 2475 free_mod:
2415 kfree(args); 2476 kfree(args);
2416 kfree(strmap); 2477 kfree(strmap);
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
index 09b4ff9711b2..f74e6c00e26d 100644
--- a/kernel/nsproxy.c
+++ b/kernel/nsproxy.c
@@ -13,6 +13,7 @@
13 * Pavel Emelianov <xemul@openvz.org> 13 * Pavel Emelianov <xemul@openvz.org>
14 */ 14 */
15 15
16#include <linux/slab.h>
16#include <linux/module.h> 17#include <linux/module.h>
17#include <linux/nsproxy.h> 18#include <linux/nsproxy.h>
18#include <linux/init_task.h> 19#include <linux/init_task.h>
@@ -24,7 +25,18 @@
24 25
25static struct kmem_cache *nsproxy_cachep; 26static struct kmem_cache *nsproxy_cachep;
26 27
27struct nsproxy init_nsproxy = INIT_NSPROXY(init_nsproxy); 28struct nsproxy init_nsproxy = {
29 .count = ATOMIC_INIT(1),
30 .uts_ns = &init_uts_ns,
31#if defined(CONFIG_POSIX_MQUEUE) || defined(CONFIG_SYSVIPC)
32 .ipc_ns = &init_ipc_ns,
33#endif
34 .mnt_ns = NULL,
35 .pid_ns = &init_pid_ns,
36#ifdef CONFIG_NET
37 .net_ns = &init_net,
38#endif
39};
28 40
29static inline struct nsproxy *create_nsproxy(void) 41static inline struct nsproxy *create_nsproxy(void)
30{ 42{
diff --git a/kernel/padata.c b/kernel/padata.c
index 93caf65ff57c..fd03513c7327 100644
--- a/kernel/padata.c
+++ b/kernel/padata.c
@@ -25,6 +25,7 @@
25#include <linux/padata.h> 25#include <linux/padata.h>
26#include <linux/mutex.h> 26#include <linux/mutex.h>
27#include <linux/sched.h> 27#include <linux/sched.h>
28#include <linux/slab.h>
28#include <linux/rcupdate.h> 29#include <linux/rcupdate.h>
29 30
30#define MAX_SEQ_NR INT_MAX - NR_CPUS 31#define MAX_SEQ_NR INT_MAX - NR_CPUS
diff --git a/kernel/params.c b/kernel/params.c
index d55a53ec9234..0b30ecd53a52 100644
--- a/kernel/params.c
+++ b/kernel/params.c
@@ -401,8 +401,8 @@ int param_get_string(char *buffer, struct kernel_param *kp)
401} 401}
402 402
403/* sysfs output in /sys/modules/XYZ/parameters/ */ 403/* sysfs output in /sys/modules/XYZ/parameters/ */
404#define to_module_attr(n) container_of(n, struct module_attribute, attr); 404#define to_module_attr(n) container_of(n, struct module_attribute, attr)
405#define to_module_kobject(n) container_of(n, struct module_kobject, kobj); 405#define to_module_kobject(n) container_of(n, struct module_kobject, kobj)
406 406
407extern struct kernel_param __start___param[], __stop___param[]; 407extern struct kernel_param __start___param[], __stop___param[];
408 408
@@ -420,7 +420,7 @@ struct module_param_attrs
420}; 420};
421 421
422#ifdef CONFIG_SYSFS 422#ifdef CONFIG_SYSFS
423#define to_param_attr(n) container_of(n, struct param_attribute, mattr); 423#define to_param_attr(n) container_of(n, struct param_attribute, mattr)
424 424
425static ssize_t param_attr_show(struct module_attribute *mattr, 425static ssize_t param_attr_show(struct module_attribute *mattr,
426 struct module *mod, char *buf) 426 struct module *mod, char *buf)
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index f40560b86544..3d1552d3c12b 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -15,6 +15,7 @@
15#include <linux/smp.h> 15#include <linux/smp.h>
16#include <linux/file.h> 16#include <linux/file.h>
17#include <linux/poll.h> 17#include <linux/poll.h>
18#include <linux/slab.h>
18#include <linux/sysfs.h> 19#include <linux/sysfs.h>
19#include <linux/dcache.h> 20#include <linux/dcache.h>
20#include <linux/percpu.h> 21#include <linux/percpu.h>
@@ -56,21 +57,6 @@ static atomic_t nr_task_events __read_mostly;
56 */ 57 */
57int sysctl_perf_event_paranoid __read_mostly = 1; 58int sysctl_perf_event_paranoid __read_mostly = 1;
58 59
59static inline bool perf_paranoid_tracepoint_raw(void)
60{
61 return sysctl_perf_event_paranoid > -1;
62}
63
64static inline bool perf_paranoid_cpu(void)
65{
66 return sysctl_perf_event_paranoid > 0;
67}
68
69static inline bool perf_paranoid_kernel(void)
70{
71 return sysctl_perf_event_paranoid > 1;
72}
73
74int sysctl_perf_event_mlock __read_mostly = 512; /* 'free' kb per user */ 60int sysctl_perf_event_mlock __read_mostly = 512; /* 'free' kb per user */
75 61
76/* 62/*
@@ -96,10 +82,6 @@ extern __weak const struct pmu *hw_perf_event_init(struct perf_event *event)
96void __weak hw_perf_disable(void) { barrier(); } 82void __weak hw_perf_disable(void) { barrier(); }
97void __weak hw_perf_enable(void) { barrier(); } 83void __weak hw_perf_enable(void) { barrier(); }
98 84
99void __weak hw_perf_event_setup(int cpu) { barrier(); }
100void __weak hw_perf_event_setup_online(int cpu) { barrier(); }
101void __weak hw_perf_event_setup_offline(int cpu) { barrier(); }
102
103int __weak 85int __weak
104hw_perf_group_sched_in(struct perf_event *group_leader, 86hw_perf_group_sched_in(struct perf_event *group_leader,
105 struct perf_cpu_context *cpuctx, 87 struct perf_cpu_context *cpuctx,
@@ -112,25 +94,15 @@ void __weak perf_event_print_debug(void) { }
112 94
113static DEFINE_PER_CPU(int, perf_disable_count); 95static DEFINE_PER_CPU(int, perf_disable_count);
114 96
115void __perf_disable(void)
116{
117 __get_cpu_var(perf_disable_count)++;
118}
119
120bool __perf_enable(void)
121{
122 return !--__get_cpu_var(perf_disable_count);
123}
124
125void perf_disable(void) 97void perf_disable(void)
126{ 98{
127 __perf_disable(); 99 if (!__get_cpu_var(perf_disable_count)++)
128 hw_perf_disable(); 100 hw_perf_disable();
129} 101}
130 102
131void perf_enable(void) 103void perf_enable(void)
132{ 104{
133 if (__perf_enable()) 105 if (!--__get_cpu_var(perf_disable_count))
134 hw_perf_enable(); 106 hw_perf_enable();
135} 107}
136 108
@@ -1193,11 +1165,9 @@ void perf_event_task_sched_out(struct task_struct *task,
1193 struct perf_event_context *ctx = task->perf_event_ctxp; 1165 struct perf_event_context *ctx = task->perf_event_ctxp;
1194 struct perf_event_context *next_ctx; 1166 struct perf_event_context *next_ctx;
1195 struct perf_event_context *parent; 1167 struct perf_event_context *parent;
1196 struct pt_regs *regs;
1197 int do_switch = 1; 1168 int do_switch = 1;
1198 1169
1199 regs = task_pt_regs(task); 1170 perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 1, NULL, 0);
1200 perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 1, regs, 0);
1201 1171
1202 if (likely(!ctx || !cpuctx->task_ctx)) 1172 if (likely(!ctx || !cpuctx->task_ctx))
1203 return; 1173 return;
@@ -1553,12 +1523,15 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx)
1553 */ 1523 */
1554 if (interrupts == MAX_INTERRUPTS) { 1524 if (interrupts == MAX_INTERRUPTS) {
1555 perf_log_throttle(event, 1); 1525 perf_log_throttle(event, 1);
1526 perf_disable();
1556 event->pmu->unthrottle(event); 1527 event->pmu->unthrottle(event);
1528 perf_enable();
1557 } 1529 }
1558 1530
1559 if (!event->attr.freq || !event->attr.sample_freq) 1531 if (!event->attr.freq || !event->attr.sample_freq)
1560 continue; 1532 continue;
1561 1533
1534 perf_disable();
1562 event->pmu->read(event); 1535 event->pmu->read(event);
1563 now = atomic64_read(&event->count); 1536 now = atomic64_read(&event->count);
1564 delta = now - hwc->freq_count_stamp; 1537 delta = now - hwc->freq_count_stamp;
@@ -1566,6 +1539,7 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx)
1566 1539
1567 if (delta > 0) 1540 if (delta > 0)
1568 perf_adjust_period(event, TICK_NSEC, delta); 1541 perf_adjust_period(event, TICK_NSEC, delta);
1542 perf_enable();
1569 } 1543 }
1570 raw_spin_unlock(&ctx->lock); 1544 raw_spin_unlock(&ctx->lock);
1571} 1545}
@@ -1575,9 +1549,6 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx)
1575 */ 1549 */
1576static void rotate_ctx(struct perf_event_context *ctx) 1550static void rotate_ctx(struct perf_event_context *ctx)
1577{ 1551{
1578 if (!ctx->nr_events)
1579 return;
1580
1581 raw_spin_lock(&ctx->lock); 1552 raw_spin_lock(&ctx->lock);
1582 1553
1583 /* Rotate the first entry last of non-pinned groups */ 1554 /* Rotate the first entry last of non-pinned groups */
@@ -1590,19 +1561,28 @@ void perf_event_task_tick(struct task_struct *curr)
1590{ 1561{
1591 struct perf_cpu_context *cpuctx; 1562 struct perf_cpu_context *cpuctx;
1592 struct perf_event_context *ctx; 1563 struct perf_event_context *ctx;
1564 int rotate = 0;
1593 1565
1594 if (!atomic_read(&nr_events)) 1566 if (!atomic_read(&nr_events))
1595 return; 1567 return;
1596 1568
1597 cpuctx = &__get_cpu_var(perf_cpu_context); 1569 cpuctx = &__get_cpu_var(perf_cpu_context);
1598 ctx = curr->perf_event_ctxp; 1570 if (cpuctx->ctx.nr_events &&
1571 cpuctx->ctx.nr_events != cpuctx->ctx.nr_active)
1572 rotate = 1;
1599 1573
1600 perf_disable(); 1574 ctx = curr->perf_event_ctxp;
1575 if (ctx && ctx->nr_events && ctx->nr_events != ctx->nr_active)
1576 rotate = 1;
1601 1577
1602 perf_ctx_adjust_freq(&cpuctx->ctx); 1578 perf_ctx_adjust_freq(&cpuctx->ctx);
1603 if (ctx) 1579 if (ctx)
1604 perf_ctx_adjust_freq(ctx); 1580 perf_ctx_adjust_freq(ctx);
1605 1581
1582 if (!rotate)
1583 return;
1584
1585 perf_disable();
1606 cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE); 1586 cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE);
1607 if (ctx) 1587 if (ctx)
1608 task_ctx_sched_out(ctx, EVENT_FLEXIBLE); 1588 task_ctx_sched_out(ctx, EVENT_FLEXIBLE);
@@ -1614,7 +1594,6 @@ void perf_event_task_tick(struct task_struct *curr)
1614 cpu_ctx_sched_in(cpuctx, EVENT_FLEXIBLE); 1594 cpu_ctx_sched_in(cpuctx, EVENT_FLEXIBLE);
1615 if (ctx) 1595 if (ctx)
1616 task_ctx_sched_in(curr, EVENT_FLEXIBLE); 1596 task_ctx_sched_in(curr, EVENT_FLEXIBLE);
1617
1618 perf_enable(); 1597 perf_enable();
1619} 1598}
1620 1599
@@ -2806,6 +2785,12 @@ __weak struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
2806 return NULL; 2785 return NULL;
2807} 2786}
2808 2787
2788__weak
2789void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int skip)
2790{
2791}
2792
2793
2809/* 2794/*
2810 * Output 2795 * Output
2811 */ 2796 */
@@ -3391,15 +3376,23 @@ static void perf_event_task_output(struct perf_event *event,
3391 struct perf_task_event *task_event) 3376 struct perf_task_event *task_event)
3392{ 3377{
3393 struct perf_output_handle handle; 3378 struct perf_output_handle handle;
3394 int size;
3395 struct task_struct *task = task_event->task; 3379 struct task_struct *task = task_event->task;
3396 int ret; 3380 unsigned long flags;
3381 int size, ret;
3382
3383 /*
3384 * If this CPU attempts to acquire an rq lock held by a CPU spinning
3385 * in perf_output_lock() from interrupt context, it's game over.
3386 */
3387 local_irq_save(flags);
3397 3388
3398 size = task_event->event_id.header.size; 3389 size = task_event->event_id.header.size;
3399 ret = perf_output_begin(&handle, event, size, 0, 0); 3390 ret = perf_output_begin(&handle, event, size, 0, 0);
3400 3391
3401 if (ret) 3392 if (ret) {
3393 local_irq_restore(flags);
3402 return; 3394 return;
3395 }
3403 3396
3404 task_event->event_id.pid = perf_event_pid(event, task); 3397 task_event->event_id.pid = perf_event_pid(event, task);
3405 task_event->event_id.ppid = perf_event_pid(event, current); 3398 task_event->event_id.ppid = perf_event_pid(event, current);
@@ -3410,6 +3403,7 @@ static void perf_event_task_output(struct perf_event *event,
3410 perf_output_put(&handle, task_event->event_id); 3403 perf_output_put(&handle, task_event->event_id);
3411 3404
3412 perf_output_end(&handle); 3405 perf_output_end(&handle);
3406 local_irq_restore(flags);
3413} 3407}
3414 3408
3415static int perf_event_task_match(struct perf_event *event) 3409static int perf_event_task_match(struct perf_event *event)
@@ -4123,8 +4117,7 @@ void __perf_sw_event(u32 event_id, u64 nr, int nmi,
4123 if (rctx < 0) 4117 if (rctx < 0)
4124 return; 4118 return;
4125 4119
4126 data.addr = addr; 4120 perf_sample_data_init(&data, addr);
4127 data.raw = NULL;
4128 4121
4129 do_perf_sw_event(PERF_TYPE_SOFTWARE, event_id, nr, nmi, &data, regs); 4122 do_perf_sw_event(PERF_TYPE_SOFTWARE, event_id, nr, nmi, &data, regs);
4130 4123
@@ -4169,11 +4162,10 @@ static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer)
4169 struct perf_event *event; 4162 struct perf_event *event;
4170 u64 period; 4163 u64 period;
4171 4164
4172 event = container_of(hrtimer, struct perf_event, hw.hrtimer); 4165 event = container_of(hrtimer, struct perf_event, hw.hrtimer);
4173 event->pmu->read(event); 4166 event->pmu->read(event);
4174 4167
4175 data.addr = 0; 4168 perf_sample_data_init(&data, 0);
4176 data.raw = NULL;
4177 data.period = event->hw.last_period; 4169 data.period = event->hw.last_period;
4178 regs = get_irq_regs(); 4170 regs = get_irq_regs();
4179 /* 4171 /*
@@ -4335,26 +4327,20 @@ static const struct pmu perf_ops_task_clock = {
4335#ifdef CONFIG_EVENT_TRACING 4327#ifdef CONFIG_EVENT_TRACING
4336 4328
4337void perf_tp_event(int event_id, u64 addr, u64 count, void *record, 4329void perf_tp_event(int event_id, u64 addr, u64 count, void *record,
4338 int entry_size) 4330 int entry_size, struct pt_regs *regs)
4339{ 4331{
4332 struct perf_sample_data data;
4340 struct perf_raw_record raw = { 4333 struct perf_raw_record raw = {
4341 .size = entry_size, 4334 .size = entry_size,
4342 .data = record, 4335 .data = record,
4343 }; 4336 };
4344 4337
4345 struct perf_sample_data data = { 4338 perf_sample_data_init(&data, addr);
4346 .addr = addr, 4339 data.raw = &raw;
4347 .raw = &raw,
4348 };
4349
4350 struct pt_regs *regs = get_irq_regs();
4351
4352 if (!regs)
4353 regs = task_pt_regs(current);
4354 4340
4355 /* Trace events already protected against recursion */ 4341 /* Trace events already protected against recursion */
4356 do_perf_sw_event(PERF_TYPE_TRACEPOINT, event_id, count, 1, 4342 do_perf_sw_event(PERF_TYPE_TRACEPOINT, event_id, count, 1,
4357 &data, regs); 4343 &data, regs);
4358} 4344}
4359EXPORT_SYMBOL_GPL(perf_tp_event); 4345EXPORT_SYMBOL_GPL(perf_tp_event);
4360 4346
@@ -4370,7 +4356,7 @@ static int perf_tp_event_match(struct perf_event *event,
4370 4356
4371static void tp_perf_event_destroy(struct perf_event *event) 4357static void tp_perf_event_destroy(struct perf_event *event)
4372{ 4358{
4373 ftrace_profile_disable(event->attr.config); 4359 perf_trace_disable(event->attr.config);
4374} 4360}
4375 4361
4376static const struct pmu *tp_perf_event_init(struct perf_event *event) 4362static const struct pmu *tp_perf_event_init(struct perf_event *event)
@@ -4384,7 +4370,7 @@ static const struct pmu *tp_perf_event_init(struct perf_event *event)
4384 !capable(CAP_SYS_ADMIN)) 4370 !capable(CAP_SYS_ADMIN))
4385 return ERR_PTR(-EPERM); 4371 return ERR_PTR(-EPERM);
4386 4372
4387 if (ftrace_profile_enable(event->attr.config)) 4373 if (perf_trace_enable(event->attr.config))
4388 return NULL; 4374 return NULL;
4389 4375
4390 event->destroy = tp_perf_event_destroy; 4376 event->destroy = tp_perf_event_destroy;
@@ -4463,8 +4449,7 @@ void perf_bp_event(struct perf_event *bp, void *data)
4463 struct perf_sample_data sample; 4449 struct perf_sample_data sample;
4464 struct pt_regs *regs = data; 4450 struct pt_regs *regs = data;
4465 4451
4466 sample.raw = NULL; 4452 perf_sample_data_init(&sample, bp->attr.bp_addr);
4467 sample.addr = bp->attr.bp_addr;
4468 4453
4469 if (!perf_exclude_event(bp, regs)) 4454 if (!perf_exclude_event(bp, regs))
4470 perf_swevent_add(bp, 1, 1, &sample, regs); 4455 perf_swevent_add(bp, 1, 1, &sample, regs);
@@ -4912,7 +4897,7 @@ err_fput_free_put_context:
4912 4897
4913err_free_put_context: 4898err_free_put_context:
4914 if (err < 0) 4899 if (err < 0)
4915 kfree(event); 4900 free_event(event);
4916 4901
4917err_put_context: 4902err_put_context:
4918 if (err < 0) 4903 if (err < 0)
@@ -5392,18 +5377,26 @@ int perf_event_init_task(struct task_struct *child)
5392 return ret; 5377 return ret;
5393} 5378}
5394 5379
5380static void __init perf_event_init_all_cpus(void)
5381{
5382 int cpu;
5383 struct perf_cpu_context *cpuctx;
5384
5385 for_each_possible_cpu(cpu) {
5386 cpuctx = &per_cpu(perf_cpu_context, cpu);
5387 __perf_event_init_context(&cpuctx->ctx, NULL);
5388 }
5389}
5390
5395static void __cpuinit perf_event_init_cpu(int cpu) 5391static void __cpuinit perf_event_init_cpu(int cpu)
5396{ 5392{
5397 struct perf_cpu_context *cpuctx; 5393 struct perf_cpu_context *cpuctx;
5398 5394
5399 cpuctx = &per_cpu(perf_cpu_context, cpu); 5395 cpuctx = &per_cpu(perf_cpu_context, cpu);
5400 __perf_event_init_context(&cpuctx->ctx, NULL);
5401 5396
5402 spin_lock(&perf_resource_lock); 5397 spin_lock(&perf_resource_lock);
5403 cpuctx->max_pertask = perf_max_events - perf_reserved_percpu; 5398 cpuctx->max_pertask = perf_max_events - perf_reserved_percpu;
5404 spin_unlock(&perf_resource_lock); 5399 spin_unlock(&perf_resource_lock);
5405
5406 hw_perf_event_setup(cpu);
5407} 5400}
5408 5401
5409#ifdef CONFIG_HOTPLUG_CPU 5402#ifdef CONFIG_HOTPLUG_CPU
@@ -5443,20 +5436,11 @@ perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu)
5443 perf_event_init_cpu(cpu); 5436 perf_event_init_cpu(cpu);
5444 break; 5437 break;
5445 5438
5446 case CPU_ONLINE:
5447 case CPU_ONLINE_FROZEN:
5448 hw_perf_event_setup_online(cpu);
5449 break;
5450
5451 case CPU_DOWN_PREPARE: 5439 case CPU_DOWN_PREPARE:
5452 case CPU_DOWN_PREPARE_FROZEN: 5440 case CPU_DOWN_PREPARE_FROZEN:
5453 perf_event_exit_cpu(cpu); 5441 perf_event_exit_cpu(cpu);
5454 break; 5442 break;
5455 5443
5456 case CPU_DEAD:
5457 hw_perf_event_setup_offline(cpu);
5458 break;
5459
5460 default: 5444 default:
5461 break; 5445 break;
5462 } 5446 }
@@ -5474,6 +5458,7 @@ static struct notifier_block __cpuinitdata perf_cpu_nb = {
5474 5458
5475void __init perf_event_init(void) 5459void __init perf_event_init(void)
5476{ 5460{
5461 perf_event_init_all_cpus();
5477 perf_cpu_notify(&perf_cpu_nb, (unsigned long)CPU_UP_PREPARE, 5462 perf_cpu_notify(&perf_cpu_nb, (unsigned long)CPU_UP_PREPARE,
5478 (void *)(long)smp_processor_id()); 5463 (void *)(long)smp_processor_id());
5479 perf_cpu_notify(&perf_cpu_nb, (unsigned long)CPU_ONLINE, 5464 perf_cpu_notify(&perf_cpu_nb, (unsigned long)CPU_ONLINE,
diff --git a/kernel/pid.c b/kernel/pid.c
index 86b296943e5f..aebb30d9c233 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -367,7 +367,9 @@ struct task_struct *pid_task(struct pid *pid, enum pid_type type)
367 struct task_struct *result = NULL; 367 struct task_struct *result = NULL;
368 if (pid) { 368 if (pid) {
369 struct hlist_node *first; 369 struct hlist_node *first;
370 first = rcu_dereference_check(pid->tasks[type].first, rcu_read_lock_held() || lockdep_is_held(&tasklist_lock)); 370 first = rcu_dereference_check(pid->tasks[type].first,
371 rcu_read_lock_held() ||
372 lockdep_tasklist_lock_is_held());
371 if (first) 373 if (first)
372 result = hlist_entry(first, struct task_struct, pids[(type)].node); 374 result = hlist_entry(first, struct task_struct, pids[(type)].node);
373 } 375 }
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index 86b3796b0436..a5aff94e1f0b 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -13,6 +13,7 @@
13#include <linux/syscalls.h> 13#include <linux/syscalls.h>
14#include <linux/err.h> 14#include <linux/err.h>
15#include <linux/acct.h> 15#include <linux/acct.h>
16#include <linux/slab.h>
16 17
17#define BITS_PER_PAGE (PAGE_SIZE*8) 18#define BITS_PER_PAGE (PAGE_SIZE*8)
18 19
@@ -161,13 +162,12 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns)
161 rcu_read_lock(); 162 rcu_read_lock();
162 163
163 /* 164 /*
164 * Use force_sig() since it clears SIGNAL_UNKILLABLE ensuring 165 * Any nested-container's init processes won't ignore the
165 * any nested-container's init processes don't ignore the 166 * SEND_SIG_NOINFO signal, see send_signal()->si_fromuser().
166 * signal
167 */ 167 */
168 task = pid_task(find_vpid(nr), PIDTYPE_PID); 168 task = pid_task(find_vpid(nr), PIDTYPE_PID);
169 if (task) 169 if (task)
170 force_sig(SIGKILL, task); 170 send_sig_info(SIGKILL, SEND_SIG_NOINFO, task);
171 171
172 rcu_read_unlock(); 172 rcu_read_unlock();
173 173
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index 1a22dfd42df9..bc7704b3a443 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -1061,9 +1061,9 @@ static void check_thread_timers(struct task_struct *tsk,
1061 } 1061 }
1062} 1062}
1063 1063
1064static void stop_process_timers(struct task_struct *tsk) 1064static void stop_process_timers(struct signal_struct *sig)
1065{ 1065{
1066 struct thread_group_cputimer *cputimer = &tsk->signal->cputimer; 1066 struct thread_group_cputimer *cputimer = &sig->cputimer;
1067 unsigned long flags; 1067 unsigned long flags;
1068 1068
1069 if (!cputimer->running) 1069 if (!cputimer->running)
@@ -1072,6 +1072,10 @@ static void stop_process_timers(struct task_struct *tsk)
1072 spin_lock_irqsave(&cputimer->lock, flags); 1072 spin_lock_irqsave(&cputimer->lock, flags);
1073 cputimer->running = 0; 1073 cputimer->running = 0;
1074 spin_unlock_irqrestore(&cputimer->lock, flags); 1074 spin_unlock_irqrestore(&cputimer->lock, flags);
1075
1076 sig->cputime_expires.prof_exp = cputime_zero;
1077 sig->cputime_expires.virt_exp = cputime_zero;
1078 sig->cputime_expires.sched_exp = 0;
1075} 1079}
1076 1080
1077static u32 onecputick; 1081static u32 onecputick;
@@ -1133,7 +1137,7 @@ static void check_process_timers(struct task_struct *tsk,
1133 list_empty(&timers[CPUCLOCK_VIRT]) && 1137 list_empty(&timers[CPUCLOCK_VIRT]) &&
1134 cputime_eq(sig->it[CPUCLOCK_VIRT].expires, cputime_zero) && 1138 cputime_eq(sig->it[CPUCLOCK_VIRT].expires, cputime_zero) &&
1135 list_empty(&timers[CPUCLOCK_SCHED])) { 1139 list_empty(&timers[CPUCLOCK_SCHED])) {
1136 stop_process_timers(tsk); 1140 stop_process_timers(sig);
1137 return; 1141 return;
1138 } 1142 }
1139 1143
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index da5288ec2392..aa9e916da4d5 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -22,6 +22,7 @@
22#include <linux/console.h> 22#include <linux/console.h>
23#include <linux/cpu.h> 23#include <linux/cpu.h>
24#include <linux/freezer.h> 24#include <linux/freezer.h>
25#include <linux/gfp.h>
25#include <scsi/scsi_scan.h> 26#include <scsi/scsi_scan.h>
26#include <asm/suspend.h> 27#include <asm/suspend.h>
27 28
diff --git a/kernel/power/hibernate_nvs.c b/kernel/power/hibernate_nvs.c
index 39ac698ef836..fdcad9ed5a7b 100644
--- a/kernel/power/hibernate_nvs.c
+++ b/kernel/power/hibernate_nvs.c
@@ -10,6 +10,7 @@
10#include <linux/kernel.h> 10#include <linux/kernel.h>
11#include <linux/list.h> 11#include <linux/list.h>
12#include <linux/mm.h> 12#include <linux/mm.h>
13#include <linux/slab.h>
13#include <linux/suspend.h> 14#include <linux/suspend.h>
14 15
15/* 16/*
diff --git a/kernel/power/process.c b/kernel/power/process.c
index 5ade1bdcf366..71ae29052ab6 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -88,12 +88,11 @@ static int try_to_freeze_tasks(bool sig_only)
88 printk(KERN_ERR "Freezing of tasks failed after %d.%02d seconds " 88 printk(KERN_ERR "Freezing of tasks failed after %d.%02d seconds "
89 "(%d tasks refusing to freeze):\n", 89 "(%d tasks refusing to freeze):\n",
90 elapsed_csecs / 100, elapsed_csecs % 100, todo); 90 elapsed_csecs / 100, elapsed_csecs % 100, todo);
91 show_state();
92 read_lock(&tasklist_lock); 91 read_lock(&tasklist_lock);
93 do_each_thread(g, p) { 92 do_each_thread(g, p) {
94 task_lock(p); 93 task_lock(p);
95 if (freezing(p) && !freezer_should_skip(p)) 94 if (freezing(p) && !freezer_should_skip(p))
96 printk(KERN_ERR " %s\n", p->comm); 95 sched_show_task(p);
97 cancel_freezing(p); 96 cancel_freezing(p);
98 task_unlock(p); 97 task_unlock(p);
99 } while_each_thread(g, p); 98 } while_each_thread(g, p);
@@ -145,7 +144,7 @@ static void thaw_tasks(bool nosig_only)
145 if (nosig_only && should_send_signal(p)) 144 if (nosig_only && should_send_signal(p))
146 continue; 145 continue;
147 146
148 if (cgroup_frozen(p)) 147 if (cgroup_freezing_or_frozen(p))
149 continue; 148 continue;
150 149
151 thaw_process(p); 150 thaw_process(p);
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index 830cadecbdfc..be861c26dda7 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -26,6 +26,7 @@
26#include <linux/console.h> 26#include <linux/console.h>
27#include <linux/highmem.h> 27#include <linux/highmem.h>
28#include <linux/list.h> 28#include <linux/list.h>
29#include <linux/slab.h>
29 30
30#include <asm/uaccess.h> 31#include <asm/uaccess.h>
31#include <asm/mmu_context.h> 32#include <asm/mmu_context.h>
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index 44cce10b582d..56e7dbb8b996 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -15,6 +15,7 @@
15#include <linux/console.h> 15#include <linux/console.h>
16#include <linux/cpu.h> 16#include <linux/cpu.h>
17#include <linux/syscalls.h> 17#include <linux/syscalls.h>
18#include <linux/gfp.h>
18 19
19#include "power.h" 20#include "power.h"
20 21
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index 1d575733d4e1..66824d71983a 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -23,6 +23,7 @@
23#include <linux/swap.h> 23#include <linux/swap.h>
24#include <linux/swapops.h> 24#include <linux/swapops.h>
25#include <linux/pm.h> 25#include <linux/pm.h>
26#include <linux/slab.h>
26 27
27#include "power.h" 28#include "power.h"
28 29
diff --git a/kernel/power/user.c b/kernel/power/user.c
index 4d2289626a84..a8c96212bc1b 100644
--- a/kernel/power/user.c
+++ b/kernel/power/user.c
@@ -420,7 +420,7 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd,
420 * User space encodes device types as two-byte values, 420 * User space encodes device types as two-byte values,
421 * so we need to recode them 421 * so we need to recode them
422 */ 422 */
423 swdev = old_decode_dev(swap_area.dev); 423 swdev = new_decode_dev(swap_area.dev);
424 if (swdev) { 424 if (swdev) {
425 offset = swap_area.offset; 425 offset = swap_area.offset;
426 data->swap = swap_type_of(swdev, offset, NULL); 426 data->swap = swap_type_of(swdev, offset, NULL);
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index f1125c1a6321..03a7ea1579f6 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -45,6 +45,7 @@
45#include <linux/mutex.h> 45#include <linux/mutex.h>
46#include <linux/module.h> 46#include <linux/module.h>
47#include <linux/kernel_stat.h> 47#include <linux/kernel_stat.h>
48#include <linux/hardirq.h>
48 49
49#ifdef CONFIG_DEBUG_LOCK_ALLOC 50#ifdef CONFIG_DEBUG_LOCK_ALLOC
50static struct lock_class_key rcu_lock_key; 51static struct lock_class_key rcu_lock_key;
@@ -66,6 +67,35 @@ EXPORT_SYMBOL_GPL(rcu_sched_lock_map);
66int rcu_scheduler_active __read_mostly; 67int rcu_scheduler_active __read_mostly;
67EXPORT_SYMBOL_GPL(rcu_scheduler_active); 68EXPORT_SYMBOL_GPL(rcu_scheduler_active);
68 69
70#ifdef CONFIG_DEBUG_LOCK_ALLOC
71
72int debug_lockdep_rcu_enabled(void)
73{
74 return rcu_scheduler_active && debug_locks &&
75 current->lockdep_recursion == 0;
76}
77EXPORT_SYMBOL_GPL(debug_lockdep_rcu_enabled);
78
79/**
80 * rcu_read_lock_bh_held - might we be in RCU-bh read-side critical section?
81 *
82 * Check for bottom half being disabled, which covers both the
83 * CONFIG_PROVE_RCU and not cases. Note that if someone uses
84 * rcu_read_lock_bh(), but then later enables BH, lockdep (if enabled)
85 * will show the situation.
86 *
87 * Check debug_lockdep_rcu_enabled() to prevent false positives during boot.
88 */
89int rcu_read_lock_bh_held(void)
90{
91 if (!debug_lockdep_rcu_enabled())
92 return 1;
93 return in_softirq();
94}
95EXPORT_SYMBOL_GPL(rcu_read_lock_bh_held);
96
97#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
98
69/* 99/*
70 * This function is invoked towards the end of the scheduler's initialization 100 * This function is invoked towards the end of the scheduler's initialization
71 * process. Before this is called, the idle task might contain 101 * process. Before this is called, the idle task might contain
diff --git a/kernel/rcutree.h b/kernel/rcutree.h
index 1439eb504c22..4a525a30e08e 100644
--- a/kernel/rcutree.h
+++ b/kernel/rcutree.h
@@ -246,12 +246,21 @@ struct rcu_data {
246 246
247#define RCU_JIFFIES_TILL_FORCE_QS 3 /* for rsp->jiffies_force_qs */ 247#define RCU_JIFFIES_TILL_FORCE_QS 3 /* for rsp->jiffies_force_qs */
248#ifdef CONFIG_RCU_CPU_STALL_DETECTOR 248#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
249#define RCU_SECONDS_TILL_STALL_CHECK (10 * HZ) /* for rsp->jiffies_stall */ 249
250#define RCU_SECONDS_TILL_STALL_RECHECK (30 * HZ) /* for rsp->jiffies_stall */ 250#ifdef CONFIG_PROVE_RCU
251#define RCU_STALL_RAT_DELAY 2 /* Allow other CPUs time */ 251#define RCU_STALL_DELAY_DELTA (5 * HZ)
252 /* to take at least one */ 252#else
253 /* scheduling clock irq */ 253#define RCU_STALL_DELAY_DELTA 0
254 /* before ratting on them. */ 254#endif
255
256#define RCU_SECONDS_TILL_STALL_CHECK (10 * HZ + RCU_STALL_DELAY_DELTA)
257 /* for rsp->jiffies_stall */
258#define RCU_SECONDS_TILL_STALL_RECHECK (30 * HZ + RCU_STALL_DELAY_DELTA)
259 /* for rsp->jiffies_stall */
260#define RCU_STALL_RAT_DELAY 2 /* Allow other CPUs time */
261 /* to take at least one */
262 /* scheduling clock irq */
263 /* before ratting on them. */
255 264
256#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ 265#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
257 266
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index 464ad2cdee00..79b53bda8943 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -1010,6 +1010,10 @@ int rcu_needs_cpu(int cpu)
1010 int c = 0; 1010 int c = 0;
1011 int thatcpu; 1011 int thatcpu;
1012 1012
1013 /* Check for being in the holdoff period. */
1014 if (per_cpu(rcu_dyntick_holdoff, cpu) == jiffies)
1015 return rcu_needs_cpu_quick_check(cpu);
1016
1013 /* Don't bother unless we are the last non-dyntick-idle CPU. */ 1017 /* Don't bother unless we are the last non-dyntick-idle CPU. */
1014 for_each_cpu_not(thatcpu, nohz_cpu_mask) 1018 for_each_cpu_not(thatcpu, nohz_cpu_mask)
1015 if (thatcpu != cpu) { 1019 if (thatcpu != cpu) {
@@ -1041,10 +1045,8 @@ int rcu_needs_cpu(int cpu)
1041 } 1045 }
1042 1046
1043 /* If RCU callbacks are still pending, RCU still needs this CPU. */ 1047 /* If RCU callbacks are still pending, RCU still needs this CPU. */
1044 if (c) { 1048 if (c)
1045 raise_softirq(RCU_SOFTIRQ); 1049 raise_softirq(RCU_SOFTIRQ);
1046 per_cpu(rcu_dyntick_holdoff, cpu) = jiffies;
1047 }
1048 return c; 1050 return c;
1049} 1051}
1050 1052
diff --git a/kernel/res_counter.c b/kernel/res_counter.c
index bcdabf37c40b..c7eaa37a768b 100644
--- a/kernel/res_counter.c
+++ b/kernel/res_counter.c
@@ -10,7 +10,6 @@
10#include <linux/types.h> 10#include <linux/types.h>
11#include <linux/parser.h> 11#include <linux/parser.h>
12#include <linux/fs.h> 12#include <linux/fs.h>
13#include <linux/slab.h>
14#include <linux/res_counter.h> 13#include <linux/res_counter.h>
15#include <linux/uaccess.h> 14#include <linux/uaccess.h>
16#include <linux/mm.h> 15#include <linux/mm.h>
diff --git a/kernel/resource.c b/kernel/resource.c
index 2d5be5d9bf5f..9c358e263534 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -219,19 +219,34 @@ void release_child_resources(struct resource *r)
219} 219}
220 220
221/** 221/**
222 * request_resource - request and reserve an I/O or memory resource 222 * request_resource_conflict - request and reserve an I/O or memory resource
223 * @root: root resource descriptor 223 * @root: root resource descriptor
224 * @new: resource descriptor desired by caller 224 * @new: resource descriptor desired by caller
225 * 225 *
226 * Returns 0 for success, negative error code on error. 226 * Returns 0 for success, conflict resource on error.
227 */ 227 */
228int request_resource(struct resource *root, struct resource *new) 228struct resource *request_resource_conflict(struct resource *root, struct resource *new)
229{ 229{
230 struct resource *conflict; 230 struct resource *conflict;
231 231
232 write_lock(&resource_lock); 232 write_lock(&resource_lock);
233 conflict = __request_resource(root, new); 233 conflict = __request_resource(root, new);
234 write_unlock(&resource_lock); 234 write_unlock(&resource_lock);
235 return conflict;
236}
237
238/**
239 * request_resource - request and reserve an I/O or memory resource
240 * @root: root resource descriptor
241 * @new: resource descriptor desired by caller
242 *
243 * Returns 0 for success, negative error code on error.
244 */
245int request_resource(struct resource *root, struct resource *new)
246{
247 struct resource *conflict;
248
249 conflict = request_resource_conflict(root, new);
235 return conflict ? -EBUSY : 0; 250 return conflict ? -EBUSY : 0;
236} 251}
237 252
@@ -474,25 +489,40 @@ static struct resource * __insert_resource(struct resource *parent, struct resou
474} 489}
475 490
476/** 491/**
477 * insert_resource - Inserts a resource in the resource tree 492 * insert_resource_conflict - Inserts resource in the resource tree
478 * @parent: parent of the new resource 493 * @parent: parent of the new resource
479 * @new: new resource to insert 494 * @new: new resource to insert
480 * 495 *
481 * Returns 0 on success, -EBUSY if the resource can't be inserted. 496 * Returns 0 on success, conflict resource if the resource can't be inserted.
482 * 497 *
483 * This function is equivalent to request_resource when no conflict 498 * This function is equivalent to request_resource_conflict when no conflict
484 * happens. If a conflict happens, and the conflicting resources 499 * happens. If a conflict happens, and the conflicting resources
485 * entirely fit within the range of the new resource, then the new 500 * entirely fit within the range of the new resource, then the new
486 * resource is inserted and the conflicting resources become children of 501 * resource is inserted and the conflicting resources become children of
487 * the new resource. 502 * the new resource.
488 */ 503 */
489int insert_resource(struct resource *parent, struct resource *new) 504struct resource *insert_resource_conflict(struct resource *parent, struct resource *new)
490{ 505{
491 struct resource *conflict; 506 struct resource *conflict;
492 507
493 write_lock(&resource_lock); 508 write_lock(&resource_lock);
494 conflict = __insert_resource(parent, new); 509 conflict = __insert_resource(parent, new);
495 write_unlock(&resource_lock); 510 write_unlock(&resource_lock);
511 return conflict;
512}
513
514/**
515 * insert_resource - Inserts a resource in the resource tree
516 * @parent: parent of the new resource
517 * @new: new resource to insert
518 *
519 * Returns 0 on success, -EBUSY if the resource can't be inserted.
520 */
521int insert_resource(struct resource *parent, struct resource *new)
522{
523 struct resource *conflict;
524
525 conflict = insert_resource_conflict(parent, new);
496 return conflict ? -EBUSY : 0; 526 return conflict ? -EBUSY : 0;
497} 527}
498 528
diff --git a/kernel/sched.c b/kernel/sched.c
index 150b6988de49..3c2a54f70ffe 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -71,6 +71,7 @@
71#include <linux/debugfs.h> 71#include <linux/debugfs.h>
72#include <linux/ctype.h> 72#include <linux/ctype.h>
73#include <linux/ftrace.h> 73#include <linux/ftrace.h>
74#include <linux/slab.h>
74 75
75#include <asm/tlb.h> 76#include <asm/tlb.h>
76#include <asm/irq_regs.h> 77#include <asm/irq_regs.h>
@@ -322,6 +323,15 @@ static inline struct task_group *task_group(struct task_struct *p)
322/* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */ 323/* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */
323static inline void set_task_rq(struct task_struct *p, unsigned int cpu) 324static inline void set_task_rq(struct task_struct *p, unsigned int cpu)
324{ 325{
326 /*
327 * Strictly speaking this rcu_read_lock() is not needed since the
328 * task_group is tied to the cgroup, which in turn can never go away
329 * as long as there are tasks attached to it.
330 *
331 * However since task_group() uses task_subsys_state() which is an
332 * rcu_dereference() user, this quiets CONFIG_PROVE_RCU.
333 */
334 rcu_read_lock();
325#ifdef CONFIG_FAIR_GROUP_SCHED 335#ifdef CONFIG_FAIR_GROUP_SCHED
326 p->se.cfs_rq = task_group(p)->cfs_rq[cpu]; 336 p->se.cfs_rq = task_group(p)->cfs_rq[cpu];
327 p->se.parent = task_group(p)->se[cpu]; 337 p->se.parent = task_group(p)->se[cpu];
@@ -331,6 +341,7 @@ static inline void set_task_rq(struct task_struct *p, unsigned int cpu)
331 p->rt.rt_rq = task_group(p)->rt_rq[cpu]; 341 p->rt.rt_rq = task_group(p)->rt_rq[cpu];
332 p->rt.parent = task_group(p)->rt_se[cpu]; 342 p->rt.parent = task_group(p)->rt_se[cpu];
333#endif 343#endif
344 rcu_read_unlock();
334} 345}
335 346
336#else 347#else
@@ -2359,7 +2370,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state,
2359{ 2370{
2360 int cpu, orig_cpu, this_cpu, success = 0; 2371 int cpu, orig_cpu, this_cpu, success = 0;
2361 unsigned long flags; 2372 unsigned long flags;
2362 struct rq *rq, *orig_rq; 2373 struct rq *rq;
2363 2374
2364 if (!sched_feat(SYNC_WAKEUPS)) 2375 if (!sched_feat(SYNC_WAKEUPS))
2365 wake_flags &= ~WF_SYNC; 2376 wake_flags &= ~WF_SYNC;
@@ -2367,7 +2378,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state,
2367 this_cpu = get_cpu(); 2378 this_cpu = get_cpu();
2368 2379
2369 smp_wmb(); 2380 smp_wmb();
2370 rq = orig_rq = task_rq_lock(p, &flags); 2381 rq = task_rq_lock(p, &flags);
2371 update_rq_clock(rq); 2382 update_rq_clock(rq);
2372 if (!(p->state & state)) 2383 if (!(p->state & state))
2373 goto out; 2384 goto out;
@@ -2650,7 +2661,7 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
2650{ 2661{
2651 unsigned long flags; 2662 unsigned long flags;
2652 struct rq *rq; 2663 struct rq *rq;
2653 int cpu = get_cpu(); 2664 int cpu __maybe_unused = get_cpu();
2654 2665
2655#ifdef CONFIG_SMP 2666#ifdef CONFIG_SMP
2656 /* 2667 /*
@@ -3779,7 +3790,7 @@ int mutex_spin_on_owner(struct mutex *lock, struct thread_info *owner)
3779 * the mutex owner just released it and exited. 3790 * the mutex owner just released it and exited.
3780 */ 3791 */
3781 if (probe_kernel_address(&owner->cpu, cpu)) 3792 if (probe_kernel_address(&owner->cpu, cpu))
3782 goto out; 3793 return 0;
3783#else 3794#else
3784 cpu = owner->cpu; 3795 cpu = owner->cpu;
3785#endif 3796#endif
@@ -3789,14 +3800,14 @@ int mutex_spin_on_owner(struct mutex *lock, struct thread_info *owner)
3789 * the cpu field may no longer be valid. 3800 * the cpu field may no longer be valid.
3790 */ 3801 */
3791 if (cpu >= nr_cpumask_bits) 3802 if (cpu >= nr_cpumask_bits)
3792 goto out; 3803 return 0;
3793 3804
3794 /* 3805 /*
3795 * We need to validate that we can do a 3806 * We need to validate that we can do a
3796 * get_cpu() and that we have the percpu area. 3807 * get_cpu() and that we have the percpu area.
3797 */ 3808 */
3798 if (!cpu_online(cpu)) 3809 if (!cpu_online(cpu))
3799 goto out; 3810 return 0;
3800 3811
3801 rq = cpu_rq(cpu); 3812 rq = cpu_rq(cpu);
3802 3813
@@ -3815,7 +3826,7 @@ int mutex_spin_on_owner(struct mutex *lock, struct thread_info *owner)
3815 3826
3816 cpu_relax(); 3827 cpu_relax();
3817 } 3828 }
3818out: 3829
3819 return 1; 3830 return 1;
3820} 3831}
3821#endif 3832#endif
@@ -4902,7 +4913,9 @@ SYSCALL_DEFINE3(sched_getaffinity, pid_t, pid, unsigned int, len,
4902 int ret; 4913 int ret;
4903 cpumask_var_t mask; 4914 cpumask_var_t mask;
4904 4915
4905 if (len < cpumask_size()) 4916 if ((len * BITS_PER_BYTE) < nr_cpu_ids)
4917 return -EINVAL;
4918 if (len & (sizeof(unsigned long)-1))
4906 return -EINVAL; 4919 return -EINVAL;
4907 4920
4908 if (!alloc_cpumask_var(&mask, GFP_KERNEL)) 4921 if (!alloc_cpumask_var(&mask, GFP_KERNEL))
@@ -4910,10 +4923,12 @@ SYSCALL_DEFINE3(sched_getaffinity, pid_t, pid, unsigned int, len,
4910 4923
4911 ret = sched_getaffinity(pid, mask); 4924 ret = sched_getaffinity(pid, mask);
4912 if (ret == 0) { 4925 if (ret == 0) {
4913 if (copy_to_user(user_mask_ptr, mask, cpumask_size())) 4926 size_t retlen = min_t(size_t, len, cpumask_size());
4927
4928 if (copy_to_user(user_mask_ptr, mask, retlen))
4914 ret = -EFAULT; 4929 ret = -EFAULT;
4915 else 4930 else
4916 ret = cpumask_size(); 4931 ret = retlen;
4917 } 4932 }
4918 free_cpumask_var(mask); 4933 free_cpumask_var(mask);
4919 4934
@@ -5383,7 +5398,7 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
5383 5398
5384 get_task_struct(mt); 5399 get_task_struct(mt);
5385 task_rq_unlock(rq, &flags); 5400 task_rq_unlock(rq, &flags);
5386 wake_up_process(rq->migration_thread); 5401 wake_up_process(mt);
5387 put_task_struct(mt); 5402 put_task_struct(mt);
5388 wait_for_completion(&req.done); 5403 wait_for_completion(&req.done);
5389 tlb_migrate_finish(p->mm); 5404 tlb_migrate_finish(p->mm);
diff --git a/kernel/sched_cpupri.c b/kernel/sched_cpupri.c
index 82095bf2099f..e6871cb3fc83 100644
--- a/kernel/sched_cpupri.c
+++ b/kernel/sched_cpupri.c
@@ -27,6 +27,7 @@
27 * of the License. 27 * of the License.
28 */ 28 */
29 29
30#include <linux/gfp.h>
30#include "sched_cpupri.h" 31#include "sched_cpupri.h"
31 32
32/* Convert between a 140 based task->prio, and our 102 based cpupri */ 33/* Convert between a 140 based task->prio, and our 102 based cpupri */
@@ -56,7 +57,7 @@ static int convert_prio(int prio)
56 * @lowest_mask: A mask to fill in with selected CPUs (or NULL) 57 * @lowest_mask: A mask to fill in with selected CPUs (or NULL)
57 * 58 *
58 * Note: This function returns the recommended CPUs as calculated during the 59 * Note: This function returns the recommended CPUs as calculated during the
59 * current invokation. By the time the call returns, the CPUs may have in 60 * current invocation. By the time the call returns, the CPUs may have in
60 * fact changed priorities any number of times. While not ideal, it is not 61 * fact changed priorities any number of times. While not ideal, it is not
61 * an issue of correctness since the normal rebalancer logic will correct 62 * an issue of correctness since the normal rebalancer logic will correct
62 * any discrepancies created by racing against the uncertainty of the current 63 * any discrepancies created by racing against the uncertainty of the current
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index 67f95aada4b9..9b49db144037 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -518,8 +518,4 @@ void proc_sched_set_task(struct task_struct *p)
518 p->se.nr_wakeups_idle = 0; 518 p->se.nr_wakeups_idle = 0;
519 p->sched_info.bkl_count = 0; 519 p->sched_info.bkl_count = 0;
520#endif 520#endif
521 p->se.sum_exec_runtime = 0;
522 p->se.prev_sum_exec_runtime = 0;
523 p->nvcsw = 0;
524 p->nivcsw = 0;
525} 521}
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 3e1fd96c6cf9..5a5ea2cd924f 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -3476,7 +3476,7 @@ static void run_rebalance_domains(struct softirq_action *h)
3476 3476
3477static inline int on_null_domain(int cpu) 3477static inline int on_null_domain(int cpu)
3478{ 3478{
3479 return !rcu_dereference(cpu_rq(cpu)->sd); 3479 return !rcu_dereference_sched(cpu_rq(cpu)->sd);
3480} 3480}
3481 3481
3482/* 3482/*
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 5a6ed1f0990a..b5b920ae2ea7 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -1146,7 +1146,12 @@ static struct task_struct *pick_next_highest_task_rt(struct rq *rq, int cpu)
1146 if (next && next->prio < idx) 1146 if (next && next->prio < idx)
1147 continue; 1147 continue;
1148 list_for_each_entry(rt_se, array->queue + idx, run_list) { 1148 list_for_each_entry(rt_se, array->queue + idx, run_list) {
1149 struct task_struct *p = rt_task_of(rt_se); 1149 struct task_struct *p;
1150
1151 if (!rt_entity_is_task(rt_se))
1152 continue;
1153
1154 p = rt_task_of(rt_se);
1150 if (pick_rt_task(rq, p, cpu)) { 1155 if (pick_rt_task(rq, p, cpu)) {
1151 next = p; 1156 next = p;
1152 break; 1157 break;
diff --git a/kernel/slow-work.c b/kernel/slow-work.c
index 7494bbf5a270..7d3f4fa9ef4f 100644
--- a/kernel/slow-work.c
+++ b/kernel/slow-work.c
@@ -637,7 +637,7 @@ int delayed_slow_work_enqueue(struct delayed_slow_work *dwork,
637 goto cancelled; 637 goto cancelled;
638 638
639 /* the timer holds a reference whilst it is pending */ 639 /* the timer holds a reference whilst it is pending */
640 ret = work->ops->get_ref(work); 640 ret = slow_work_get_ref(work);
641 if (ret < 0) 641 if (ret < 0)
642 goto cant_get_ref; 642 goto cant_get_ref;
643 643
diff --git a/kernel/slow-work.h b/kernel/slow-work.h
index 321f3c59d732..a29ebd1ef41d 100644
--- a/kernel/slow-work.h
+++ b/kernel/slow-work.h
@@ -43,28 +43,28 @@ extern void slow_work_new_thread_desc(struct slow_work *, struct seq_file *);
43 */ 43 */
44static inline void slow_work_set_thread_pid(int id, pid_t pid) 44static inline void slow_work_set_thread_pid(int id, pid_t pid)
45{ 45{
46#ifdef CONFIG_SLOW_WORK_PROC 46#ifdef CONFIG_SLOW_WORK_DEBUG
47 slow_work_pids[id] = pid; 47 slow_work_pids[id] = pid;
48#endif 48#endif
49} 49}
50 50
51static inline void slow_work_mark_time(struct slow_work *work) 51static inline void slow_work_mark_time(struct slow_work *work)
52{ 52{
53#ifdef CONFIG_SLOW_WORK_PROC 53#ifdef CONFIG_SLOW_WORK_DEBUG
54 work->mark = CURRENT_TIME; 54 work->mark = CURRENT_TIME;
55#endif 55#endif
56} 56}
57 57
58static inline void slow_work_begin_exec(int id, struct slow_work *work) 58static inline void slow_work_begin_exec(int id, struct slow_work *work)
59{ 59{
60#ifdef CONFIG_SLOW_WORK_PROC 60#ifdef CONFIG_SLOW_WORK_DEBUG
61 slow_work_execs[id] = work; 61 slow_work_execs[id] = work;
62#endif 62#endif
63} 63}
64 64
65static inline void slow_work_end_exec(int id, struct slow_work *work) 65static inline void slow_work_end_exec(int id, struct slow_work *work)
66{ 66{
67#ifdef CONFIG_SLOW_WORK_PROC 67#ifdef CONFIG_SLOW_WORK_DEBUG
68 write_lock(&slow_work_execs_lock); 68 write_lock(&slow_work_execs_lock);
69 slow_work_execs[id] = NULL; 69 slow_work_execs[id] = NULL;
70 write_unlock(&slow_work_execs_lock); 70 write_unlock(&slow_work_execs_lock);
diff --git a/kernel/smp.c b/kernel/smp.c
index 9867b6bfefce..3fc697336183 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -9,6 +9,7 @@
9#include <linux/module.h> 9#include <linux/module.h>
10#include <linux/percpu.h> 10#include <linux/percpu.h>
11#include <linux/init.h> 11#include <linux/init.h>
12#include <linux/gfp.h>
12#include <linux/smp.h> 13#include <linux/smp.h>
13#include <linux/cpu.h> 14#include <linux/cpu.h>
14 15
diff --git a/kernel/softlockup.c b/kernel/softlockup.c
index 0d4c7898ab80..4b493f67dcb5 100644
--- a/kernel/softlockup.c
+++ b/kernel/softlockup.c
@@ -155,11 +155,11 @@ void softlockup_tick(void)
155 * Wake up the high-prio watchdog task twice per 155 * Wake up the high-prio watchdog task twice per
156 * threshold timespan. 156 * threshold timespan.
157 */ 157 */
158 if (now > touch_ts + softlockup_thresh/2) 158 if (time_after(now - softlockup_thresh/2, touch_ts))
159 wake_up_process(per_cpu(softlockup_watchdog, this_cpu)); 159 wake_up_process(per_cpu(softlockup_watchdog, this_cpu));
160 160
161 /* Warn about unreasonable delays: */ 161 /* Warn about unreasonable delays: */
162 if (now <= (touch_ts + softlockup_thresh)) 162 if (time_before_eq(now - softlockup_thresh, touch_ts))
163 return; 163 return;
164 164
165 per_cpu(softlockup_print_ts, this_cpu) = touch_ts; 165 per_cpu(softlockup_print_ts, this_cpu) = touch_ts;
diff --git a/kernel/srcu.c b/kernel/srcu.c
index bde4295774c8..2980da3fd509 100644
--- a/kernel/srcu.c
+++ b/kernel/srcu.c
@@ -30,7 +30,6 @@
30#include <linux/preempt.h> 30#include <linux/preempt.h>
31#include <linux/rcupdate.h> 31#include <linux/rcupdate.h>
32#include <linux/sched.h> 32#include <linux/sched.h>
33#include <linux/slab.h>
34#include <linux/smp.h> 33#include <linux/smp.h>
35#include <linux/srcu.h> 34#include <linux/srcu.h>
36 35
diff --git a/kernel/sys.c b/kernel/sys.c
index 9814e43fb23b..7cb426a58965 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -33,8 +33,10 @@
33#include <linux/task_io_accounting_ops.h> 33#include <linux/task_io_accounting_ops.h>
34#include <linux/seccomp.h> 34#include <linux/seccomp.h>
35#include <linux/cpu.h> 35#include <linux/cpu.h>
36#include <linux/personality.h>
36#include <linux/ptrace.h> 37#include <linux/ptrace.h>
37#include <linux/fs_struct.h> 38#include <linux/fs_struct.h>
39#include <linux/gfp.h>
38 40
39#include <linux/compat.h> 41#include <linux/compat.h>
40#include <linux/syscalls.h> 42#include <linux/syscalls.h>
@@ -1114,6 +1116,15 @@ out:
1114 1116
1115DECLARE_RWSEM(uts_sem); 1117DECLARE_RWSEM(uts_sem);
1116 1118
1119#ifdef COMPAT_UTS_MACHINE
1120#define override_architecture(name) \
1121 (personality(current->personality) == PER_LINUX32 && \
1122 copy_to_user(name->machine, COMPAT_UTS_MACHINE, \
1123 sizeof(COMPAT_UTS_MACHINE)))
1124#else
1125#define override_architecture(name) 0
1126#endif
1127
1117SYSCALL_DEFINE1(newuname, struct new_utsname __user *, name) 1128SYSCALL_DEFINE1(newuname, struct new_utsname __user *, name)
1118{ 1129{
1119 int errno = 0; 1130 int errno = 0;
@@ -1122,9 +1133,66 @@ SYSCALL_DEFINE1(newuname, struct new_utsname __user *, name)
1122 if (copy_to_user(name, utsname(), sizeof *name)) 1133 if (copy_to_user(name, utsname(), sizeof *name))
1123 errno = -EFAULT; 1134 errno = -EFAULT;
1124 up_read(&uts_sem); 1135 up_read(&uts_sem);
1136
1137 if (!errno && override_architecture(name))
1138 errno = -EFAULT;
1125 return errno; 1139 return errno;
1126} 1140}
1127 1141
1142#ifdef __ARCH_WANT_SYS_OLD_UNAME
1143/*
1144 * Old cruft
1145 */
1146SYSCALL_DEFINE1(uname, struct old_utsname __user *, name)
1147{
1148 int error = 0;
1149
1150 if (!name)
1151 return -EFAULT;
1152
1153 down_read(&uts_sem);
1154 if (copy_to_user(name, utsname(), sizeof(*name)))
1155 error = -EFAULT;
1156 up_read(&uts_sem);
1157
1158 if (!error && override_architecture(name))
1159 error = -EFAULT;
1160 return error;
1161}
1162
1163SYSCALL_DEFINE1(olduname, struct oldold_utsname __user *, name)
1164{
1165 int error;
1166
1167 if (!name)
1168 return -EFAULT;
1169 if (!access_ok(VERIFY_WRITE, name, sizeof(struct oldold_utsname)))
1170 return -EFAULT;
1171
1172 down_read(&uts_sem);
1173 error = __copy_to_user(&name->sysname, &utsname()->sysname,
1174 __OLD_UTS_LEN);
1175 error |= __put_user(0, name->sysname + __OLD_UTS_LEN);
1176 error |= __copy_to_user(&name->nodename, &utsname()->nodename,
1177 __OLD_UTS_LEN);
1178 error |= __put_user(0, name->nodename + __OLD_UTS_LEN);
1179 error |= __copy_to_user(&name->release, &utsname()->release,
1180 __OLD_UTS_LEN);
1181 error |= __put_user(0, name->release + __OLD_UTS_LEN);
1182 error |= __copy_to_user(&name->version, &utsname()->version,
1183 __OLD_UTS_LEN);
1184 error |= __put_user(0, name->version + __OLD_UTS_LEN);
1185 error |= __copy_to_user(&name->machine, &utsname()->machine,
1186 __OLD_UTS_LEN);
1187 error |= __put_user(0, name->machine + __OLD_UTS_LEN);
1188 up_read(&uts_sem);
1189
1190 if (!error && override_architecture(name))
1191 error = -EFAULT;
1192 return error ? -EFAULT : 0;
1193}
1194#endif
1195
1128SYSCALL_DEFINE2(sethostname, char __user *, name, int, len) 1196SYSCALL_DEFINE2(sethostname, char __user *, name, int, len)
1129{ 1197{
1130 int errno; 1198 int errno;
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 695384f12a7d..70f2ea758ffe 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -126,6 +126,7 @@ cond_syscall(sys_setreuid16);
126cond_syscall(sys_setuid16); 126cond_syscall(sys_setuid16);
127cond_syscall(sys_vm86old); 127cond_syscall(sys_vm86old);
128cond_syscall(sys_vm86); 128cond_syscall(sys_vm86);
129cond_syscall(sys_ipc);
129cond_syscall(compat_sys_ipc); 130cond_syscall(compat_sys_ipc);
130cond_syscall(compat_sys_sysctl); 131cond_syscall(compat_sys_sysctl);
131cond_syscall(sys_flock); 132cond_syscall(sys_flock);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 0ef19c614f6d..8686b0f5fc12 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -23,6 +23,7 @@
23#include <linux/swap.h> 23#include <linux/swap.h>
24#include <linux/slab.h> 24#include <linux/slab.h>
25#include <linux/sysctl.h> 25#include <linux/sysctl.h>
26#include <linux/signal.h>
26#include <linux/proc_fs.h> 27#include <linux/proc_fs.h>
27#include <linux/security.h> 28#include <linux/security.h>
28#include <linux/ctype.h> 29#include <linux/ctype.h>
@@ -60,13 +61,23 @@
60#include <asm/stacktrace.h> 61#include <asm/stacktrace.h>
61#include <asm/io.h> 62#include <asm/io.h>
62#endif 63#endif
64#ifdef CONFIG_BSD_PROCESS_ACCT
65#include <linux/acct.h>
66#endif
67#ifdef CONFIG_RT_MUTEXES
68#include <linux/rtmutex.h>
69#endif
70#if defined(CONFIG_PROVE_LOCKING) || defined(CONFIG_LOCK_STAT)
71#include <linux/lockdep.h>
72#endif
73#ifdef CONFIG_CHR_DEV_SG
74#include <scsi/sg.h>
75#endif
63 76
64 77
65#if defined(CONFIG_SYSCTL) 78#if defined(CONFIG_SYSCTL)
66 79
67/* External variables not in a header file. */ 80/* External variables not in a header file. */
68extern int C_A_D;
69extern int print_fatal_signals;
70extern int sysctl_overcommit_memory; 81extern int sysctl_overcommit_memory;
71extern int sysctl_overcommit_ratio; 82extern int sysctl_overcommit_ratio;
72extern int sysctl_panic_on_oom; 83extern int sysctl_panic_on_oom;
@@ -88,9 +99,6 @@ extern int sysctl_nr_open_min, sysctl_nr_open_max;
88#ifndef CONFIG_MMU 99#ifndef CONFIG_MMU
89extern int sysctl_nr_trim_pages; 100extern int sysctl_nr_trim_pages;
90#endif 101#endif
91#ifdef CONFIG_RCU_TORTURE_TEST
92extern int rcutorture_runnable;
93#endif /* #ifdef CONFIG_RCU_TORTURE_TEST */
94#ifdef CONFIG_BLOCK 102#ifdef CONFIG_BLOCK
95extern int blk_iopoll_enabled; 103extern int blk_iopoll_enabled;
96#endif 104#endif
@@ -120,14 +128,6 @@ static int min_percpu_pagelist_fract = 8;
120 128
121static int ngroups_max = NGROUPS_MAX; 129static int ngroups_max = NGROUPS_MAX;
122 130
123#ifdef CONFIG_MODULES
124extern char modprobe_path[];
125extern int modules_disabled;
126#endif
127#ifdef CONFIG_CHR_DEV_SG
128extern int sg_big_buff;
129#endif
130
131#ifdef CONFIG_SPARC 131#ifdef CONFIG_SPARC
132#include <asm/system.h> 132#include <asm/system.h>
133#endif 133#endif
@@ -149,10 +149,6 @@ extern int sysctl_userprocess_debug;
149extern int spin_retry; 149extern int spin_retry;
150#endif 150#endif
151 151
152#ifdef CONFIG_BSD_PROCESS_ACCT
153extern int acct_parm[];
154#endif
155
156#ifdef CONFIG_IA64 152#ifdef CONFIG_IA64
157extern int no_unaligned_warning; 153extern int no_unaligned_warning;
158extern int unaligned_dump_stack; 154extern int unaligned_dump_stack;
@@ -160,10 +156,6 @@ extern int unaligned_dump_stack;
160 156
161extern struct ratelimit_state printk_ratelimit_state; 157extern struct ratelimit_state printk_ratelimit_state;
162 158
163#ifdef CONFIG_RT_MUTEXES
164extern int max_lock_depth;
165#endif
166
167#ifdef CONFIG_PROC_SYSCTL 159#ifdef CONFIG_PROC_SYSCTL
168static int proc_do_cad_pid(struct ctl_table *table, int write, 160static int proc_do_cad_pid(struct ctl_table *table, int write,
169 void __user *buffer, size_t *lenp, loff_t *ppos); 161 void __user *buffer, size_t *lenp, loff_t *ppos);
@@ -202,9 +194,6 @@ extern struct ctl_table epoll_table[];
202int sysctl_legacy_va_layout; 194int sysctl_legacy_va_layout;
203#endif 195#endif
204 196
205extern int prove_locking;
206extern int lock_stat;
207
208/* The default sysctl tables: */ 197/* The default sysctl tables: */
209 198
210static struct ctl_table root_table[] = { 199static struct ctl_table root_table[] = {
diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c
index 8cd50d8f9bde..59030570f5ca 100644
--- a/kernel/sysctl_binary.c
+++ b/kernel/sysctl_binary.c
@@ -13,6 +13,7 @@
13#include <linux/file.h> 13#include <linux/file.h>
14#include <linux/ctype.h> 14#include <linux/ctype.h>
15#include <linux/netdevice.h> 15#include <linux/netdevice.h>
16#include <linux/slab.h>
16 17
17#ifdef CONFIG_SYSCTL_SYSCALL 18#ifdef CONFIG_SYSCTL_SYSCALL
18 19
diff --git a/kernel/taskstats.c b/kernel/taskstats.c
index 899ca51be5e8..11281d5792bd 100644
--- a/kernel/taskstats.c
+++ b/kernel/taskstats.c
@@ -22,6 +22,7 @@
22#include <linux/delayacct.h> 22#include <linux/delayacct.h>
23#include <linux/cpumask.h> 23#include <linux/cpumask.h>
24#include <linux/percpu.h> 24#include <linux/percpu.h>
25#include <linux/slab.h>
25#include <linux/cgroupstats.h> 26#include <linux/cgroupstats.h>
26#include <linux/cgroup.h> 27#include <linux/cgroup.h>
27#include <linux/fs.h> 28#include <linux/fs.h>
diff --git a/kernel/time.c b/kernel/time.c
index 804798005d19..656dccfe1cbb 100644
--- a/kernel/time.c
+++ b/kernel/time.c
@@ -35,7 +35,6 @@
35#include <linux/syscalls.h> 35#include <linux/syscalls.h>
36#include <linux/security.h> 36#include <linux/security.h>
37#include <linux/fs.h> 37#include <linux/fs.h>
38#include <linux/slab.h>
39#include <linux/math64.h> 38#include <linux/math64.h>
40#include <linux/ptrace.h> 39#include <linux/ptrace.h>
41 40
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 1f663d23e85e..1f5dde637457 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -592,6 +592,10 @@ static inline void clocksource_select(void) { }
592 */ 592 */
593static int __init clocksource_done_booting(void) 593static int __init clocksource_done_booting(void)
594{ 594{
595 mutex_lock(&clocksource_mutex);
596 curr_clocksource = clocksource_default_clock();
597 mutex_unlock(&clocksource_mutex);
598
595 finished_booting = 1; 599 finished_booting = 1;
596 600
597 /* 601 /*
diff --git a/kernel/time/tick-oneshot.c b/kernel/time/tick-oneshot.c
index 0a8a213016f0..aada0e52680a 100644
--- a/kernel/time/tick-oneshot.c
+++ b/kernel/time/tick-oneshot.c
@@ -22,6 +22,29 @@
22 22
23#include "tick-internal.h" 23#include "tick-internal.h"
24 24
25/* Limit min_delta to a jiffie */
26#define MIN_DELTA_LIMIT (NSEC_PER_SEC / HZ)
27
28static int tick_increase_min_delta(struct clock_event_device *dev)
29{
30 /* Nothing to do if we already reached the limit */
31 if (dev->min_delta_ns >= MIN_DELTA_LIMIT)
32 return -ETIME;
33
34 if (dev->min_delta_ns < 5000)
35 dev->min_delta_ns = 5000;
36 else
37 dev->min_delta_ns += dev->min_delta_ns >> 1;
38
39 if (dev->min_delta_ns > MIN_DELTA_LIMIT)
40 dev->min_delta_ns = MIN_DELTA_LIMIT;
41
42 printk(KERN_WARNING "CE: %s increased min_delta_ns to %llu nsec\n",
43 dev->name ? dev->name : "?",
44 (unsigned long long) dev->min_delta_ns);
45 return 0;
46}
47
25/** 48/**
26 * tick_program_event internal worker function 49 * tick_program_event internal worker function
27 */ 50 */
@@ -37,23 +60,28 @@ int tick_dev_program_event(struct clock_event_device *dev, ktime_t expires,
37 if (!ret || !force) 60 if (!ret || !force)
38 return ret; 61 return ret;
39 62
63 dev->retries++;
40 /* 64 /*
41 * We tried 2 times to program the device with the given 65 * We tried 3 times to program the device with the given
42 * min_delta_ns. If that's not working then we double it 66 * min_delta_ns. If that's not working then we increase it
43 * and emit a warning. 67 * and emit a warning.
44 */ 68 */
45 if (++i > 2) { 69 if (++i > 2) {
46 /* Increase the min. delta and try again */ 70 /* Increase the min. delta and try again */
47 if (!dev->min_delta_ns) 71 if (tick_increase_min_delta(dev)) {
48 dev->min_delta_ns = 5000; 72 /*
49 else 73 * Get out of the loop if min_delta_ns
50 dev->min_delta_ns += dev->min_delta_ns >> 1; 74 * hit the limit already. That's
51 75 * better than staying here forever.
52 printk(KERN_WARNING 76 *
53 "CE: %s increasing min_delta_ns to %llu nsec\n", 77 * We clear next_event so we have a
54 dev->name ? dev->name : "?", 78 * chance that the box survives.
55 (unsigned long long) dev->min_delta_ns << 1); 79 */
56 80 printk(KERN_WARNING
81 "CE: Reprogramming failure. Giving up\n");
82 dev->next_event.tv64 = KTIME_MAX;
83 return -ETIME;
84 }
57 i = 0; 85 i = 0;
58 } 86 }
59 87
diff --git a/kernel/time/timecompare.c b/kernel/time/timecompare.c
index 12f5c55090be..ac38fbb176cc 100644
--- a/kernel/time/timecompare.c
+++ b/kernel/time/timecompare.c
@@ -19,6 +19,7 @@
19 19
20#include <linux/timecompare.h> 20#include <linux/timecompare.h>
21#include <linux/module.h> 21#include <linux/module.h>
22#include <linux/slab.h>
22#include <linux/math64.h> 23#include <linux/math64.h>
23 24
24/* 25/*
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 16736379a9ca..39f6177fafac 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -818,7 +818,8 @@ void update_wall_time(void)
818 shift = min(shift, maxshift); 818 shift = min(shift, maxshift);
819 while (offset >= timekeeper.cycle_interval) { 819 while (offset >= timekeeper.cycle_interval) {
820 offset = logarithmic_accumulation(offset, shift); 820 offset = logarithmic_accumulation(offset, shift);
821 shift--; 821 if(offset < timekeeper.cycle_interval<<shift)
822 shift--;
822 } 823 }
823 824
824 /* correct the clock when NTP error is too big */ 825 /* correct the clock when NTP error is too big */
diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c
index bdfb8dd1050c..1a4a7dd78777 100644
--- a/kernel/time/timer_list.c
+++ b/kernel/time/timer_list.c
@@ -228,6 +228,7 @@ print_tickdevice(struct seq_file *m, struct tick_device *td, int cpu)
228 SEQ_printf(m, " event_handler: "); 228 SEQ_printf(m, " event_handler: ");
229 print_name_offset(m, dev->event_handler); 229 print_name_offset(m, dev->event_handler);
230 SEQ_printf(m, "\n"); 230 SEQ_printf(m, "\n");
231 SEQ_printf(m, " retries: %lu\n", dev->retries);
231} 232}
232 233
233static void timer_list_show_tickdevices(struct seq_file *m) 234static void timer_list_show_tickdevices(struct seq_file *m)
@@ -257,7 +258,7 @@ static int timer_list_show(struct seq_file *m, void *v)
257 u64 now = ktime_to_ns(ktime_get()); 258 u64 now = ktime_to_ns(ktime_get());
258 int cpu; 259 int cpu;
259 260
260 SEQ_printf(m, "Timer List Version: v0.5\n"); 261 SEQ_printf(m, "Timer List Version: v0.6\n");
261 SEQ_printf(m, "HRTIMER_MAX_CLOCK_BASES: %d\n", HRTIMER_MAX_CLOCK_BASES); 262 SEQ_printf(m, "HRTIMER_MAX_CLOCK_BASES: %d\n", HRTIMER_MAX_CLOCK_BASES);
262 SEQ_printf(m, "now at %Ld nsecs\n", (unsigned long long)now); 263 SEQ_printf(m, "now at %Ld nsecs\n", (unsigned long long)now);
263 264
diff --git a/kernel/timer.c b/kernel/timer.c
index c61a7949387f..aeb6a54f2771 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -39,6 +39,7 @@
39#include <linux/kallsyms.h> 39#include <linux/kallsyms.h>
40#include <linux/perf_event.h> 40#include <linux/perf_event.h>
41#include <linux/sched.h> 41#include <linux/sched.h>
42#include <linux/slab.h>
42 43
43#include <asm/uaccess.h> 44#include <asm/uaccess.h>
44#include <asm/unistd.h> 45#include <asm/unistd.h>
@@ -880,6 +881,7 @@ int try_to_del_timer_sync(struct timer_list *timer)
880 if (base->running_timer == timer) 881 if (base->running_timer == timer)
881 goto out; 882 goto out;
882 883
884 timer_stats_timer_clear_start_info(timer);
883 ret = 0; 885 ret = 0;
884 if (timer_pending(timer)) { 886 if (timer_pending(timer)) {
885 detach_timer(timer, 1); 887 detach_timer(timer, 1);
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index d00c6fe23f54..78edc6490038 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -52,7 +52,7 @@ obj-$(CONFIG_EVENT_TRACING) += trace_events.o
52obj-$(CONFIG_EVENT_TRACING) += trace_export.o 52obj-$(CONFIG_EVENT_TRACING) += trace_export.o
53obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o 53obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o
54ifeq ($(CONFIG_PERF_EVENTS),y) 54ifeq ($(CONFIG_PERF_EVENTS),y)
55obj-$(CONFIG_EVENT_TRACING) += trace_event_profile.o 55obj-$(CONFIG_EVENT_TRACING) += trace_event_perf.o
56endif 56endif
57obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o 57obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o
58obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o 58obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 07f945a99430..b3bc91a3f510 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -21,6 +21,7 @@
21#include <linux/percpu.h> 21#include <linux/percpu.h>
22#include <linux/init.h> 22#include <linux/init.h>
23#include <linux/mutex.h> 23#include <linux/mutex.h>
24#include <linux/slab.h>
24#include <linux/debugfs.h> 25#include <linux/debugfs.h>
25#include <linux/smp_lock.h> 26#include <linux/smp_lock.h>
26#include <linux/time.h> 27#include <linux/time.h>
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 83783579378f..2404b59b3097 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -24,9 +24,11 @@
24#include <linux/uaccess.h> 24#include <linux/uaccess.h>
25#include <linux/ftrace.h> 25#include <linux/ftrace.h>
26#include <linux/sysctl.h> 26#include <linux/sysctl.h>
27#include <linux/slab.h>
27#include <linux/ctype.h> 28#include <linux/ctype.h>
28#include <linux/list.h> 29#include <linux/list.h>
29#include <linux/hash.h> 30#include <linux/hash.h>
31#include <linux/rcupdate.h>
30 32
31#include <trace/events/sched.h> 33#include <trace/events/sched.h>
32 34
@@ -84,22 +86,22 @@ ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub;
84ftrace_func_t __ftrace_trace_function __read_mostly = ftrace_stub; 86ftrace_func_t __ftrace_trace_function __read_mostly = ftrace_stub;
85ftrace_func_t ftrace_pid_function __read_mostly = ftrace_stub; 87ftrace_func_t ftrace_pid_function __read_mostly = ftrace_stub;
86 88
87#ifdef CONFIG_FUNCTION_GRAPH_TRACER 89/*
88static int ftrace_set_func(unsigned long *array, int *idx, char *buffer); 90 * Traverse the ftrace_list, invoking all entries. The reason that we
89#endif 91 * can use rcu_dereference_raw() is that elements removed from this list
90 92 * are simply leaked, so there is no need to interact with a grace-period
93 * mechanism. The rcu_dereference_raw() calls are needed to handle
94 * concurrent insertions into the ftrace_list.
95 *
96 * Silly Alpha and silly pointer-speculation compiler optimizations!
97 */
91static void ftrace_list_func(unsigned long ip, unsigned long parent_ip) 98static void ftrace_list_func(unsigned long ip, unsigned long parent_ip)
92{ 99{
93 struct ftrace_ops *op = ftrace_list; 100 struct ftrace_ops *op = rcu_dereference_raw(ftrace_list); /*see above*/
94
95 /* in case someone actually ports this to alpha! */
96 read_barrier_depends();
97 101
98 while (op != &ftrace_list_end) { 102 while (op != &ftrace_list_end) {
99 /* silly alpha */
100 read_barrier_depends();
101 op->func(ip, parent_ip); 103 op->func(ip, parent_ip);
102 op = op->next; 104 op = rcu_dereference_raw(op->next); /*see above*/
103 }; 105 };
104} 106}
105 107
@@ -154,8 +156,7 @@ static int __register_ftrace_function(struct ftrace_ops *ops)
154 * the ops->next pointer is valid before another CPU sees 156 * the ops->next pointer is valid before another CPU sees
155 * the ops pointer included into the ftrace_list. 157 * the ops pointer included into the ftrace_list.
156 */ 158 */
157 smp_wmb(); 159 rcu_assign_pointer(ftrace_list, ops);
158 ftrace_list = ops;
159 160
160 if (ftrace_enabled) { 161 if (ftrace_enabled) {
161 ftrace_func_t func; 162 ftrace_func_t func;
@@ -2276,6 +2277,8 @@ __setup("ftrace_filter=", set_ftrace_filter);
2276 2277
2277#ifdef CONFIG_FUNCTION_GRAPH_TRACER 2278#ifdef CONFIG_FUNCTION_GRAPH_TRACER
2278static char ftrace_graph_buf[FTRACE_FILTER_SIZE] __initdata; 2279static char ftrace_graph_buf[FTRACE_FILTER_SIZE] __initdata;
2280static int ftrace_set_func(unsigned long *array, int *idx, char *buffer);
2281
2279static int __init set_graph_function(char *str) 2282static int __init set_graph_function(char *str)
2280{ 2283{
2281 strlcpy(ftrace_graph_buf, str, FTRACE_FILTER_SIZE); 2284 strlcpy(ftrace_graph_buf, str, FTRACE_FILTER_SIZE);
@@ -3351,6 +3354,7 @@ void ftrace_graph_init_task(struct task_struct *t)
3351{ 3354{
3352 /* Make sure we do not use the parent ret_stack */ 3355 /* Make sure we do not use the parent ret_stack */
3353 t->ret_stack = NULL; 3356 t->ret_stack = NULL;
3357 t->curr_ret_stack = -1;
3354 3358
3355 if (ftrace_graph_active) { 3359 if (ftrace_graph_active) {
3356 struct ftrace_ret_stack *ret_stack; 3360 struct ftrace_ret_stack *ret_stack;
@@ -3360,7 +3364,6 @@ void ftrace_graph_init_task(struct task_struct *t)
3360 GFP_KERNEL); 3364 GFP_KERNEL);
3361 if (!ret_stack) 3365 if (!ret_stack)
3362 return; 3366 return;
3363 t->curr_ret_stack = -1;
3364 atomic_set(&t->tracing_graph_pause, 0); 3367 atomic_set(&t->tracing_graph_pause, 0);
3365 atomic_set(&t->trace_overrun, 0); 3368 atomic_set(&t->trace_overrun, 0);
3366 t->ftrace_timestamp = 0; 3369 t->ftrace_timestamp = 0;
diff --git a/kernel/trace/power-traces.c b/kernel/trace/power-traces.c
index 9f4f565b01e6..a22582a06161 100644
--- a/kernel/trace/power-traces.c
+++ b/kernel/trace/power-traces.c
@@ -9,7 +9,6 @@
9#include <linux/workqueue.h> 9#include <linux/workqueue.h>
10#include <linux/sched.h> 10#include <linux/sched.h>
11#include <linux/module.h> 11#include <linux/module.h>
12#include <linux/slab.h>
13 12
14#define CREATE_TRACE_POINTS 13#define CREATE_TRACE_POINTS
15#include <trace/events/power.h> 14#include <trace/events/power.h>
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 0287f9f52f5a..41ca394feb22 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -14,6 +14,7 @@
14#include <linux/module.h> 14#include <linux/module.h>
15#include <linux/percpu.h> 15#include <linux/percpu.h>
16#include <linux/mutex.h> 16#include <linux/mutex.h>
17#include <linux/slab.h>
17#include <linux/init.h> 18#include <linux/init.h>
18#include <linux/hash.h> 19#include <linux/hash.h>
19#include <linux/list.h> 20#include <linux/list.h>
@@ -207,6 +208,14 @@ EXPORT_SYMBOL_GPL(tracing_is_on);
207#define RB_MAX_SMALL_DATA (RB_ALIGNMENT * RINGBUF_TYPE_DATA_TYPE_LEN_MAX) 208#define RB_MAX_SMALL_DATA (RB_ALIGNMENT * RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
208#define RB_EVNT_MIN_SIZE 8U /* two 32bit words */ 209#define RB_EVNT_MIN_SIZE 8U /* two 32bit words */
209 210
211#if !defined(CONFIG_64BIT) || defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
212# define RB_FORCE_8BYTE_ALIGNMENT 0
213# define RB_ARCH_ALIGNMENT RB_ALIGNMENT
214#else
215# define RB_FORCE_8BYTE_ALIGNMENT 1
216# define RB_ARCH_ALIGNMENT 8U
217#endif
218
210/* define RINGBUF_TYPE_DATA for 'case RINGBUF_TYPE_DATA:' */ 219/* define RINGBUF_TYPE_DATA for 'case RINGBUF_TYPE_DATA:' */
211#define RINGBUF_TYPE_DATA 0 ... RINGBUF_TYPE_DATA_TYPE_LEN_MAX 220#define RINGBUF_TYPE_DATA 0 ... RINGBUF_TYPE_DATA_TYPE_LEN_MAX
212 221
@@ -1201,18 +1210,19 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages)
1201 1210
1202 for (i = 0; i < nr_pages; i++) { 1211 for (i = 0; i < nr_pages; i++) {
1203 if (RB_WARN_ON(cpu_buffer, list_empty(cpu_buffer->pages))) 1212 if (RB_WARN_ON(cpu_buffer, list_empty(cpu_buffer->pages)))
1204 return; 1213 goto out;
1205 p = cpu_buffer->pages->next; 1214 p = cpu_buffer->pages->next;
1206 bpage = list_entry(p, struct buffer_page, list); 1215 bpage = list_entry(p, struct buffer_page, list);
1207 list_del_init(&bpage->list); 1216 list_del_init(&bpage->list);
1208 free_buffer_page(bpage); 1217 free_buffer_page(bpage);
1209 } 1218 }
1210 if (RB_WARN_ON(cpu_buffer, list_empty(cpu_buffer->pages))) 1219 if (RB_WARN_ON(cpu_buffer, list_empty(cpu_buffer->pages)))
1211 return; 1220 goto out;
1212 1221
1213 rb_reset_cpu(cpu_buffer); 1222 rb_reset_cpu(cpu_buffer);
1214 rb_check_pages(cpu_buffer); 1223 rb_check_pages(cpu_buffer);
1215 1224
1225out:
1216 spin_unlock_irq(&cpu_buffer->reader_lock); 1226 spin_unlock_irq(&cpu_buffer->reader_lock);
1217} 1227}
1218 1228
@@ -1229,7 +1239,7 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer,
1229 1239
1230 for (i = 0; i < nr_pages; i++) { 1240 for (i = 0; i < nr_pages; i++) {
1231 if (RB_WARN_ON(cpu_buffer, list_empty(pages))) 1241 if (RB_WARN_ON(cpu_buffer, list_empty(pages)))
1232 return; 1242 goto out;
1233 p = pages->next; 1243 p = pages->next;
1234 bpage = list_entry(p, struct buffer_page, list); 1244 bpage = list_entry(p, struct buffer_page, list);
1235 list_del_init(&bpage->list); 1245 list_del_init(&bpage->list);
@@ -1238,6 +1248,7 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer,
1238 rb_reset_cpu(cpu_buffer); 1248 rb_reset_cpu(cpu_buffer);
1239 rb_check_pages(cpu_buffer); 1249 rb_check_pages(cpu_buffer);
1240 1250
1251out:
1241 spin_unlock_irq(&cpu_buffer->reader_lock); 1252 spin_unlock_irq(&cpu_buffer->reader_lock);
1242} 1253}
1243 1254
@@ -1547,7 +1558,7 @@ rb_update_event(struct ring_buffer_event *event,
1547 1558
1548 case 0: 1559 case 0:
1549 length -= RB_EVNT_HDR_SIZE; 1560 length -= RB_EVNT_HDR_SIZE;
1550 if (length > RB_MAX_SMALL_DATA) 1561 if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT)
1551 event->array[0] = length; 1562 event->array[0] = length;
1552 else 1563 else
1553 event->type_len = DIV_ROUND_UP(length, RB_ALIGNMENT); 1564 event->type_len = DIV_ROUND_UP(length, RB_ALIGNMENT);
@@ -1722,11 +1733,11 @@ static unsigned rb_calculate_event_length(unsigned length)
1722 if (!length) 1733 if (!length)
1723 length = 1; 1734 length = 1;
1724 1735
1725 if (length > RB_MAX_SMALL_DATA) 1736 if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT)
1726 length += sizeof(event.array[0]); 1737 length += sizeof(event.array[0]);
1727 1738
1728 length += RB_EVNT_HDR_SIZE; 1739 length += RB_EVNT_HDR_SIZE;
1729 length = ALIGN(length, RB_ALIGNMENT); 1740 length = ALIGN(length, RB_ARCH_ALIGNMENT);
1730 1741
1731 return length; 1742 return length;
1732} 1743}
@@ -2233,12 +2244,12 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length)
2233 if (ring_buffer_flags != RB_BUFFERS_ON) 2244 if (ring_buffer_flags != RB_BUFFERS_ON)
2234 return NULL; 2245 return NULL;
2235 2246
2236 if (atomic_read(&buffer->record_disabled))
2237 return NULL;
2238
2239 /* If we are tracing schedule, we don't want to recurse */ 2247 /* If we are tracing schedule, we don't want to recurse */
2240 resched = ftrace_preempt_disable(); 2248 resched = ftrace_preempt_disable();
2241 2249
2250 if (atomic_read(&buffer->record_disabled))
2251 goto out_nocheck;
2252
2242 if (trace_recursive_lock()) 2253 if (trace_recursive_lock())
2243 goto out_nocheck; 2254 goto out_nocheck;
2244 2255
@@ -2470,11 +2481,11 @@ int ring_buffer_write(struct ring_buffer *buffer,
2470 if (ring_buffer_flags != RB_BUFFERS_ON) 2481 if (ring_buffer_flags != RB_BUFFERS_ON)
2471 return -EBUSY; 2482 return -EBUSY;
2472 2483
2473 if (atomic_read(&buffer->record_disabled))
2474 return -EBUSY;
2475
2476 resched = ftrace_preempt_disable(); 2484 resched = ftrace_preempt_disable();
2477 2485
2486 if (atomic_read(&buffer->record_disabled))
2487 goto out;
2488
2478 cpu = raw_smp_processor_id(); 2489 cpu = raw_smp_processor_id();
2479 2490
2480 if (!cpumask_test_cpu(cpu, buffer->cpumask)) 2491 if (!cpumask_test_cpu(cpu, buffer->cpumask))
@@ -2542,7 +2553,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_record_disable);
2542 * @buffer: The ring buffer to enable writes 2553 * @buffer: The ring buffer to enable writes
2543 * 2554 *
2544 * Note, multiple disables will need the same number of enables 2555 * Note, multiple disables will need the same number of enables
2545 * to truely enable the writing (much like preempt_disable). 2556 * to truly enable the writing (much like preempt_disable).
2546 */ 2557 */
2547void ring_buffer_record_enable(struct ring_buffer *buffer) 2558void ring_buffer_record_enable(struct ring_buffer *buffer)
2548{ 2559{
@@ -2578,7 +2589,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_record_disable_cpu);
2578 * @cpu: The CPU to enable. 2589 * @cpu: The CPU to enable.
2579 * 2590 *
2580 * Note, multiple disables will need the same number of enables 2591 * Note, multiple disables will need the same number of enables
2581 * to truely enable the writing (much like preempt_disable). 2592 * to truly enable the writing (much like preempt_disable).
2582 */ 2593 */
2583void ring_buffer_record_enable_cpu(struct ring_buffer *buffer, int cpu) 2594void ring_buffer_record_enable_cpu(struct ring_buffer *buffer, int cpu)
2584{ 2595{
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index ed01fdba4a55..44f916a04065 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -33,10 +33,10 @@
33#include <linux/kdebug.h> 33#include <linux/kdebug.h>
34#include <linux/string.h> 34#include <linux/string.h>
35#include <linux/rwsem.h> 35#include <linux/rwsem.h>
36#include <linux/slab.h>
36#include <linux/ctype.h> 37#include <linux/ctype.h>
37#include <linux/init.h> 38#include <linux/init.h>
38#include <linux/poll.h> 39#include <linux/poll.h>
39#include <linux/gfp.h>
40#include <linux/fs.h> 40#include <linux/fs.h>
41 41
42#include "trace.h" 42#include "trace.h"
@@ -374,6 +374,21 @@ static int __init set_buf_size(char *str)
374} 374}
375__setup("trace_buf_size=", set_buf_size); 375__setup("trace_buf_size=", set_buf_size);
376 376
377static int __init set_tracing_thresh(char *str)
378{
379 unsigned long threshhold;
380 int ret;
381
382 if (!str)
383 return 0;
384 ret = strict_strtoul(str, 0, &threshhold);
385 if (ret < 0)
386 return 0;
387 tracing_thresh = threshhold * 1000;
388 return 1;
389}
390__setup("tracing_thresh=", set_tracing_thresh);
391
377unsigned long nsecs_to_usecs(unsigned long nsecs) 392unsigned long nsecs_to_usecs(unsigned long nsecs)
378{ 393{
379 return nsecs / 1000; 394 return nsecs / 1000;
@@ -579,9 +594,10 @@ static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
579static arch_spinlock_t ftrace_max_lock = 594static arch_spinlock_t ftrace_max_lock =
580 (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; 595 (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
581 596
597unsigned long __read_mostly tracing_thresh;
598
582#ifdef CONFIG_TRACER_MAX_TRACE 599#ifdef CONFIG_TRACER_MAX_TRACE
583unsigned long __read_mostly tracing_max_latency; 600unsigned long __read_mostly tracing_max_latency;
584unsigned long __read_mostly tracing_thresh;
585 601
586/* 602/*
587 * Copy the new maximum trace into the separate maximum-trace 603 * Copy the new maximum trace into the separate maximum-trace
@@ -592,7 +608,7 @@ static void
592__update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) 608__update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
593{ 609{
594 struct trace_array_cpu *data = tr->data[cpu]; 610 struct trace_array_cpu *data = tr->data[cpu];
595 struct trace_array_cpu *max_data = tr->data[cpu]; 611 struct trace_array_cpu *max_data;
596 612
597 max_tr.cpu = cpu; 613 max_tr.cpu = cpu;
598 max_tr.time_start = data->preempt_timestamp; 614 max_tr.time_start = data->preempt_timestamp;
@@ -602,7 +618,7 @@ __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
602 max_data->critical_start = data->critical_start; 618 max_data->critical_start = data->critical_start;
603 max_data->critical_end = data->critical_end; 619 max_data->critical_end = data->critical_end;
604 620
605 memcpy(data->comm, tsk->comm, TASK_COMM_LEN); 621 memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
606 max_data->pid = tsk->pid; 622 max_data->pid = tsk->pid;
607 max_data->uid = task_uid(tsk); 623 max_data->uid = task_uid(tsk);
608 max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO; 624 max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
@@ -824,10 +840,10 @@ out:
824 mutex_unlock(&trace_types_lock); 840 mutex_unlock(&trace_types_lock);
825} 841}
826 842
827static void __tracing_reset(struct trace_array *tr, int cpu) 843static void __tracing_reset(struct ring_buffer *buffer, int cpu)
828{ 844{
829 ftrace_disable_cpu(); 845 ftrace_disable_cpu();
830 ring_buffer_reset_cpu(tr->buffer, cpu); 846 ring_buffer_reset_cpu(buffer, cpu);
831 ftrace_enable_cpu(); 847 ftrace_enable_cpu();
832} 848}
833 849
@@ -839,7 +855,7 @@ void tracing_reset(struct trace_array *tr, int cpu)
839 855
840 /* Make sure all commits have finished */ 856 /* Make sure all commits have finished */
841 synchronize_sched(); 857 synchronize_sched();
842 __tracing_reset(tr, cpu); 858 __tracing_reset(buffer, cpu);
843 859
844 ring_buffer_record_enable(buffer); 860 ring_buffer_record_enable(buffer);
845} 861}
@@ -857,7 +873,7 @@ void tracing_reset_online_cpus(struct trace_array *tr)
857 tr->time_start = ftrace_now(tr->cpu); 873 tr->time_start = ftrace_now(tr->cpu);
858 874
859 for_each_online_cpu(cpu) 875 for_each_online_cpu(cpu)
860 __tracing_reset(tr, cpu); 876 __tracing_reset(buffer, cpu);
861 877
862 ring_buffer_record_enable(buffer); 878 ring_buffer_record_enable(buffer);
863} 879}
@@ -934,6 +950,8 @@ void tracing_start(void)
934 goto out; 950 goto out;
935 } 951 }
936 952
953 /* Prevent the buffers from switching */
954 arch_spin_lock(&ftrace_max_lock);
937 955
938 buffer = global_trace.buffer; 956 buffer = global_trace.buffer;
939 if (buffer) 957 if (buffer)
@@ -943,6 +961,8 @@ void tracing_start(void)
943 if (buffer) 961 if (buffer)
944 ring_buffer_record_enable(buffer); 962 ring_buffer_record_enable(buffer);
945 963
964 arch_spin_unlock(&ftrace_max_lock);
965
946 ftrace_start(); 966 ftrace_start();
947 out: 967 out:
948 spin_unlock_irqrestore(&tracing_start_lock, flags); 968 spin_unlock_irqrestore(&tracing_start_lock, flags);
@@ -964,6 +984,9 @@ void tracing_stop(void)
964 if (trace_stop_count++) 984 if (trace_stop_count++)
965 goto out; 985 goto out;
966 986
987 /* Prevent the buffers from switching */
988 arch_spin_lock(&ftrace_max_lock);
989
967 buffer = global_trace.buffer; 990 buffer = global_trace.buffer;
968 if (buffer) 991 if (buffer)
969 ring_buffer_record_disable(buffer); 992 ring_buffer_record_disable(buffer);
@@ -972,6 +995,8 @@ void tracing_stop(void)
972 if (buffer) 995 if (buffer)
973 ring_buffer_record_disable(buffer); 996 ring_buffer_record_disable(buffer);
974 997
998 arch_spin_unlock(&ftrace_max_lock);
999
975 out: 1000 out:
976 spin_unlock_irqrestore(&tracing_start_lock, flags); 1001 spin_unlock_irqrestore(&tracing_start_lock, flags);
977} 1002}
@@ -1259,6 +1284,13 @@ ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
1259 if (!(trace_flags & TRACE_ITER_USERSTACKTRACE)) 1284 if (!(trace_flags & TRACE_ITER_USERSTACKTRACE))
1260 return; 1285 return;
1261 1286
1287 /*
1288 * NMIs can not handle page faults, even with fix ups.
1289 * The save user stack can (and often does) fault.
1290 */
1291 if (unlikely(in_nmi()))
1292 return;
1293
1262 event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK, 1294 event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
1263 sizeof(*entry), flags, pc); 1295 sizeof(*entry), flags, pc);
1264 if (!event) 1296 if (!event)
@@ -1703,6 +1735,7 @@ static void *s_start(struct seq_file *m, loff_t *pos)
1703 1735
1704 ftrace_enable_cpu(); 1736 ftrace_enable_cpu();
1705 1737
1738 iter->leftover = 0;
1706 for (p = iter; p && l < *pos; p = s_next(m, p, &l)) 1739 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
1707 ; 1740 ;
1708 1741
@@ -4248,10 +4281,10 @@ static __init int tracer_init_debugfs(void)
4248#ifdef CONFIG_TRACER_MAX_TRACE 4281#ifdef CONFIG_TRACER_MAX_TRACE
4249 trace_create_file("tracing_max_latency", 0644, d_tracer, 4282 trace_create_file("tracing_max_latency", 0644, d_tracer,
4250 &tracing_max_latency, &tracing_max_lat_fops); 4283 &tracing_max_latency, &tracing_max_lat_fops);
4284#endif
4251 4285
4252 trace_create_file("tracing_thresh", 0644, d_tracer, 4286 trace_create_file("tracing_thresh", 0644, d_tracer,
4253 &tracing_thresh, &tracing_max_lat_fops); 4287 &tracing_thresh, &tracing_max_lat_fops);
4254#endif
4255 4288
4256 trace_create_file("README", 0444, d_tracer, 4289 trace_create_file("README", 0444, d_tracer,
4257 NULL, &tracing_readme_fops); 4290 NULL, &tracing_readme_fops);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index fd05bcaf91b0..2825ef2c0b15 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -396,9 +396,10 @@ extern int process_new_ksym_entry(char *ksymname, int op, unsigned long addr);
396 396
397extern unsigned long nsecs_to_usecs(unsigned long nsecs); 397extern unsigned long nsecs_to_usecs(unsigned long nsecs);
398 398
399extern unsigned long tracing_thresh;
400
399#ifdef CONFIG_TRACER_MAX_TRACE 401#ifdef CONFIG_TRACER_MAX_TRACE
400extern unsigned long tracing_max_latency; 402extern unsigned long tracing_max_latency;
401extern unsigned long tracing_thresh;
402 403
403void update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu); 404void update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu);
404void update_max_tr_single(struct trace_array *tr, 405void update_max_tr_single(struct trace_array *tr,
@@ -550,7 +551,7 @@ static inline int ftrace_trace_task(struct task_struct *task)
550 * struct trace_parser - servers for reading the user input separated by spaces 551 * struct trace_parser - servers for reading the user input separated by spaces
551 * @cont: set if the input is not complete - no final space char was found 552 * @cont: set if the input is not complete - no final space char was found
552 * @buffer: holds the parsed user input 553 * @buffer: holds the parsed user input
553 * @idx: user input lenght 554 * @idx: user input length
554 * @size: buffer size 555 * @size: buffer size
555 */ 556 */
556struct trace_parser { 557struct trace_parser {
diff --git a/kernel/trace/trace_clock.c b/kernel/trace/trace_clock.c
index 84a3a7ba072a..9d589d8dcd1a 100644
--- a/kernel/trace/trace_clock.c
+++ b/kernel/trace/trace_clock.c
@@ -13,6 +13,7 @@
13 * Tracer plugins will chose a default from these clocks. 13 * Tracer plugins will chose a default from these clocks.
14 */ 14 */
15#include <linux/spinlock.h> 15#include <linux/spinlock.h>
16#include <linux/irqflags.h>
16#include <linux/hardirq.h> 17#include <linux/hardirq.h>
17#include <linux/module.h> 18#include <linux/module.h>
18#include <linux/percpu.h> 19#include <linux/percpu.h>
@@ -83,7 +84,7 @@ u64 notrace trace_clock_global(void)
83 int this_cpu; 84 int this_cpu;
84 u64 now; 85 u64 now;
85 86
86 raw_local_irq_save(flags); 87 local_irq_save(flags);
87 88
88 this_cpu = raw_smp_processor_id(); 89 this_cpu = raw_smp_processor_id();
89 now = cpu_clock(this_cpu); 90 now = cpu_clock(this_cpu);
@@ -109,7 +110,7 @@ u64 notrace trace_clock_global(void)
109 arch_spin_unlock(&trace_clock_struct.lock); 110 arch_spin_unlock(&trace_clock_struct.lock);
110 111
111 out: 112 out:
112 raw_local_irq_restore(flags); 113 local_irq_restore(flags);
113 114
114 return now; 115 return now;
115} 116}
diff --git a/kernel/trace/trace_event_profile.c b/kernel/trace/trace_event_perf.c
index f0d693005075..0565bb42566f 100644
--- a/kernel/trace/trace_event_profile.c
+++ b/kernel/trace/trace_event_perf.c
@@ -1,32 +1,41 @@
1/* 1/*
2 * trace event based perf counter profiling 2 * trace event based perf event profiling/tracing
3 * 3 *
4 * Copyright (C) 2009 Red Hat Inc, Peter Zijlstra <pzijlstr@redhat.com> 4 * Copyright (C) 2009 Red Hat Inc, Peter Zijlstra <pzijlstr@redhat.com>
5 * 5 * Copyright (C) 2009-2010 Frederic Weisbecker <fweisbec@gmail.com>
6 */ 6 */
7 7
8#include <linux/module.h> 8#include <linux/module.h>
9#include <linux/kprobes.h> 9#include <linux/kprobes.h>
10#include "trace.h" 10#include "trace.h"
11 11
12DEFINE_PER_CPU(struct pt_regs, perf_trace_regs);
13EXPORT_PER_CPU_SYMBOL_GPL(perf_trace_regs);
14
15EXPORT_SYMBOL_GPL(perf_arch_fetch_caller_regs);
12 16
13static char *perf_trace_buf; 17static char *perf_trace_buf;
14static char *perf_trace_buf_nmi; 18static char *perf_trace_buf_nmi;
15 19
16typedef typeof(char [FTRACE_MAX_PROFILE_SIZE]) perf_trace_t ; 20/*
21 * Force it to be aligned to unsigned long to avoid misaligned accesses
22 * suprises
23 */
24typedef typeof(unsigned long [PERF_MAX_TRACE_SIZE / sizeof(unsigned long)])
25 perf_trace_t;
17 26
18/* Count the events in use (per event id, not per instance) */ 27/* Count the events in use (per event id, not per instance) */
19static int total_profile_count; 28static int total_ref_count;
20 29
21static int ftrace_profile_enable_event(struct ftrace_event_call *event) 30static int perf_trace_event_enable(struct ftrace_event_call *event)
22{ 31{
23 char *buf; 32 char *buf;
24 int ret = -ENOMEM; 33 int ret = -ENOMEM;
25 34
26 if (event->profile_count++ > 0) 35 if (event->perf_refcount++ > 0)
27 return 0; 36 return 0;
28 37
29 if (!total_profile_count) { 38 if (!total_ref_count) {
30 buf = (char *)alloc_percpu(perf_trace_t); 39 buf = (char *)alloc_percpu(perf_trace_t);
31 if (!buf) 40 if (!buf)
32 goto fail_buf; 41 goto fail_buf;
@@ -40,35 +49,35 @@ static int ftrace_profile_enable_event(struct ftrace_event_call *event)
40 rcu_assign_pointer(perf_trace_buf_nmi, buf); 49 rcu_assign_pointer(perf_trace_buf_nmi, buf);
41 } 50 }
42 51
43 ret = event->profile_enable(event); 52 ret = event->perf_event_enable(event);
44 if (!ret) { 53 if (!ret) {
45 total_profile_count++; 54 total_ref_count++;
46 return 0; 55 return 0;
47 } 56 }
48 57
49fail_buf_nmi: 58fail_buf_nmi:
50 if (!total_profile_count) { 59 if (!total_ref_count) {
51 free_percpu(perf_trace_buf_nmi); 60 free_percpu(perf_trace_buf_nmi);
52 free_percpu(perf_trace_buf); 61 free_percpu(perf_trace_buf);
53 perf_trace_buf_nmi = NULL; 62 perf_trace_buf_nmi = NULL;
54 perf_trace_buf = NULL; 63 perf_trace_buf = NULL;
55 } 64 }
56fail_buf: 65fail_buf:
57 event->profile_count--; 66 event->perf_refcount--;
58 67
59 return ret; 68 return ret;
60} 69}
61 70
62int ftrace_profile_enable(int event_id) 71int perf_trace_enable(int event_id)
63{ 72{
64 struct ftrace_event_call *event; 73 struct ftrace_event_call *event;
65 int ret = -EINVAL; 74 int ret = -EINVAL;
66 75
67 mutex_lock(&event_mutex); 76 mutex_lock(&event_mutex);
68 list_for_each_entry(event, &ftrace_events, list) { 77 list_for_each_entry(event, &ftrace_events, list) {
69 if (event->id == event_id && event->profile_enable && 78 if (event->id == event_id && event->perf_event_enable &&
70 try_module_get(event->mod)) { 79 try_module_get(event->mod)) {
71 ret = ftrace_profile_enable_event(event); 80 ret = perf_trace_event_enable(event);
72 break; 81 break;
73 } 82 }
74 } 83 }
@@ -77,16 +86,16 @@ int ftrace_profile_enable(int event_id)
77 return ret; 86 return ret;
78} 87}
79 88
80static void ftrace_profile_disable_event(struct ftrace_event_call *event) 89static void perf_trace_event_disable(struct ftrace_event_call *event)
81{ 90{
82 char *buf, *nmi_buf; 91 char *buf, *nmi_buf;
83 92
84 if (--event->profile_count > 0) 93 if (--event->perf_refcount > 0)
85 return; 94 return;
86 95
87 event->profile_disable(event); 96 event->perf_event_disable(event);
88 97
89 if (!--total_profile_count) { 98 if (!--total_ref_count) {
90 buf = perf_trace_buf; 99 buf = perf_trace_buf;
91 rcu_assign_pointer(perf_trace_buf, NULL); 100 rcu_assign_pointer(perf_trace_buf, NULL);
92 101
@@ -104,14 +113,14 @@ static void ftrace_profile_disable_event(struct ftrace_event_call *event)
104 } 113 }
105} 114}
106 115
107void ftrace_profile_disable(int event_id) 116void perf_trace_disable(int event_id)
108{ 117{
109 struct ftrace_event_call *event; 118 struct ftrace_event_call *event;
110 119
111 mutex_lock(&event_mutex); 120 mutex_lock(&event_mutex);
112 list_for_each_entry(event, &ftrace_events, list) { 121 list_for_each_entry(event, &ftrace_events, list) {
113 if (event->id == event_id) { 122 if (event->id == event_id) {
114 ftrace_profile_disable_event(event); 123 perf_trace_event_disable(event);
115 module_put(event->mod); 124 module_put(event->mod);
116 break; 125 break;
117 } 126 }
@@ -119,13 +128,15 @@ void ftrace_profile_disable(int event_id)
119 mutex_unlock(&event_mutex); 128 mutex_unlock(&event_mutex);
120} 129}
121 130
122__kprobes void *ftrace_perf_buf_prepare(int size, unsigned short type, 131__kprobes void *perf_trace_buf_prepare(int size, unsigned short type,
123 int *rctxp, unsigned long *irq_flags) 132 int *rctxp, unsigned long *irq_flags)
124{ 133{
125 struct trace_entry *entry; 134 struct trace_entry *entry;
126 char *trace_buf, *raw_data; 135 char *trace_buf, *raw_data;
127 int pc, cpu; 136 int pc, cpu;
128 137
138 BUILD_BUG_ON(PERF_MAX_TRACE_SIZE % sizeof(unsigned long));
139
129 pc = preempt_count(); 140 pc = preempt_count();
130 141
131 /* Protect the per cpu buffer, begin the rcu read side */ 142 /* Protect the per cpu buffer, begin the rcu read side */
@@ -138,9 +149,9 @@ __kprobes void *ftrace_perf_buf_prepare(int size, unsigned short type,
138 cpu = smp_processor_id(); 149 cpu = smp_processor_id();
139 150
140 if (in_nmi()) 151 if (in_nmi())
141 trace_buf = rcu_dereference(perf_trace_buf_nmi); 152 trace_buf = rcu_dereference_sched(perf_trace_buf_nmi);
142 else 153 else
143 trace_buf = rcu_dereference(perf_trace_buf); 154 trace_buf = rcu_dereference_sched(perf_trace_buf);
144 155
145 if (!trace_buf) 156 if (!trace_buf)
146 goto err; 157 goto err;
@@ -148,7 +159,7 @@ __kprobes void *ftrace_perf_buf_prepare(int size, unsigned short type,
148 raw_data = per_cpu_ptr(trace_buf, cpu); 159 raw_data = per_cpu_ptr(trace_buf, cpu);
149 160
150 /* zero the dead bytes from align to not leak stack to user */ 161 /* zero the dead bytes from align to not leak stack to user */
151 *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; 162 memset(&raw_data[size - sizeof(u64)], 0, sizeof(u64));
152 163
153 entry = (struct trace_entry *)raw_data; 164 entry = (struct trace_entry *)raw_data;
154 tracing_generic_entry_update(entry, *irq_flags, pc); 165 tracing_generic_entry_update(entry, *irq_flags, pc);
@@ -161,4 +172,4 @@ err_recursion:
161 local_irq_restore(*irq_flags); 172 local_irq_restore(*irq_flags);
162 return NULL; 173 return NULL;
163} 174}
164EXPORT_SYMBOL_GPL(ftrace_perf_buf_prepare); 175EXPORT_SYMBOL_GPL(perf_trace_buf_prepare);
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 3f972ad98d04..c697c7043349 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -15,6 +15,7 @@
15#include <linux/uaccess.h> 15#include <linux/uaccess.h>
16#include <linux/module.h> 16#include <linux/module.h>
17#include <linux/ctype.h> 17#include <linux/ctype.h>
18#include <linux/slab.h>
18#include <linux/delay.h> 19#include <linux/delay.h>
19 20
20#include <asm/setup.h> 21#include <asm/setup.h>
@@ -938,7 +939,7 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events,
938 trace_create_file("enable", 0644, call->dir, call, 939 trace_create_file("enable", 0644, call->dir, call,
939 enable); 940 enable);
940 941
941 if (call->id && call->profile_enable) 942 if (call->id && call->perf_event_enable)
942 trace_create_file("id", 0444, call->dir, call, 943 trace_create_file("id", 0444, call->dir, call,
943 id); 944 id);
944 945
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index 4615f62a04f1..88c0b6dbd7fe 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -22,6 +22,7 @@
22#include <linux/ctype.h> 22#include <linux/ctype.h>
23#include <linux/mutex.h> 23#include <linux/mutex.h>
24#include <linux/perf_event.h> 24#include <linux/perf_event.h>
25#include <linux/slab.h>
25 26
26#include "trace.h" 27#include "trace.h"
27#include "trace_output.h" 28#include "trace_output.h"
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 3fc2a575664f..9aed1a5cf553 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -9,6 +9,7 @@
9#include <linux/debugfs.h> 9#include <linux/debugfs.h>
10#include <linux/uaccess.h> 10#include <linux/uaccess.h>
11#include <linux/ftrace.h> 11#include <linux/ftrace.h>
12#include <linux/slab.h>
12#include <linux/fs.h> 13#include <linux/fs.h>
13 14
14#include "trace.h" 15#include "trace.h"
@@ -237,6 +238,14 @@ int trace_graph_entry(struct ftrace_graph_ent *trace)
237 return ret; 238 return ret;
238} 239}
239 240
241int trace_graph_thresh_entry(struct ftrace_graph_ent *trace)
242{
243 if (tracing_thresh)
244 return 1;
245 else
246 return trace_graph_entry(trace);
247}
248
240static void __trace_graph_return(struct trace_array *tr, 249static void __trace_graph_return(struct trace_array *tr,
241 struct ftrace_graph_ret *trace, 250 struct ftrace_graph_ret *trace,
242 unsigned long flags, 251 unsigned long flags,
@@ -290,13 +299,26 @@ void set_graph_array(struct trace_array *tr)
290 smp_mb(); 299 smp_mb();
291} 300}
292 301
302void trace_graph_thresh_return(struct ftrace_graph_ret *trace)
303{
304 if (tracing_thresh &&
305 (trace->rettime - trace->calltime < tracing_thresh))
306 return;
307 else
308 trace_graph_return(trace);
309}
310
293static int graph_trace_init(struct trace_array *tr) 311static int graph_trace_init(struct trace_array *tr)
294{ 312{
295 int ret; 313 int ret;
296 314
297 set_graph_array(tr); 315 set_graph_array(tr);
298 ret = register_ftrace_graph(&trace_graph_return, 316 if (tracing_thresh)
299 &trace_graph_entry); 317 ret = register_ftrace_graph(&trace_graph_thresh_return,
318 &trace_graph_thresh_entry);
319 else
320 ret = register_ftrace_graph(&trace_graph_return,
321 &trace_graph_entry);
300 if (ret) 322 if (ret)
301 return ret; 323 return ret;
302 tracing_start_cmdline_record(); 324 tracing_start_cmdline_record();
@@ -920,7 +942,7 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s,
920 if (!ret) 942 if (!ret)
921 return TRACE_TYPE_PARTIAL_LINE; 943 return TRACE_TYPE_PARTIAL_LINE;
922 } else { 944 } else {
923 ret = trace_seq_printf(s, "} (%ps)\n", (void *)trace->func); 945 ret = trace_seq_printf(s, "} /* %ps */\n", (void *)trace->func);
924 if (!ret) 946 if (!ret)
925 return TRACE_TYPE_PARTIAL_LINE; 947 return TRACE_TYPE_PARTIAL_LINE;
926 } 948 }
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 505c92273b1a..1251e367bae9 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -1214,7 +1214,7 @@ static int set_print_fmt(struct trace_probe *tp)
1214#ifdef CONFIG_PERF_EVENTS 1214#ifdef CONFIG_PERF_EVENTS
1215 1215
1216/* Kprobe profile handler */ 1216/* Kprobe profile handler */
1217static __kprobes void kprobe_profile_func(struct kprobe *kp, 1217static __kprobes void kprobe_perf_func(struct kprobe *kp,
1218 struct pt_regs *regs) 1218 struct pt_regs *regs)
1219{ 1219{
1220 struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp); 1220 struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
@@ -1227,11 +1227,11 @@ static __kprobes void kprobe_profile_func(struct kprobe *kp,
1227 __size = SIZEOF_KPROBE_TRACE_ENTRY(tp->nr_args); 1227 __size = SIZEOF_KPROBE_TRACE_ENTRY(tp->nr_args);
1228 size = ALIGN(__size + sizeof(u32), sizeof(u64)); 1228 size = ALIGN(__size + sizeof(u32), sizeof(u64));
1229 size -= sizeof(u32); 1229 size -= sizeof(u32);
1230 if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE, 1230 if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
1231 "profile buffer not large enough")) 1231 "profile buffer not large enough"))
1232 return; 1232 return;
1233 1233
1234 entry = ftrace_perf_buf_prepare(size, call->id, &rctx, &irq_flags); 1234 entry = perf_trace_buf_prepare(size, call->id, &rctx, &irq_flags);
1235 if (!entry) 1235 if (!entry)
1236 return; 1236 return;
1237 1237
@@ -1240,11 +1240,11 @@ static __kprobes void kprobe_profile_func(struct kprobe *kp,
1240 for (i = 0; i < tp->nr_args; i++) 1240 for (i = 0; i < tp->nr_args; i++)
1241 entry->args[i] = call_fetch(&tp->args[i].fetch, regs); 1241 entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
1242 1242
1243 ftrace_perf_buf_submit(entry, size, rctx, entry->ip, 1, irq_flags); 1243 perf_trace_buf_submit(entry, size, rctx, entry->ip, 1, irq_flags, regs);
1244} 1244}
1245 1245
1246/* Kretprobe profile handler */ 1246/* Kretprobe profile handler */
1247static __kprobes void kretprobe_profile_func(struct kretprobe_instance *ri, 1247static __kprobes void kretprobe_perf_func(struct kretprobe_instance *ri,
1248 struct pt_regs *regs) 1248 struct pt_regs *regs)
1249{ 1249{
1250 struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp); 1250 struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
@@ -1257,11 +1257,11 @@ static __kprobes void kretprobe_profile_func(struct kretprobe_instance *ri,
1257 __size = SIZEOF_KRETPROBE_TRACE_ENTRY(tp->nr_args); 1257 __size = SIZEOF_KRETPROBE_TRACE_ENTRY(tp->nr_args);
1258 size = ALIGN(__size + sizeof(u32), sizeof(u64)); 1258 size = ALIGN(__size + sizeof(u32), sizeof(u64));
1259 size -= sizeof(u32); 1259 size -= sizeof(u32);
1260 if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE, 1260 if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
1261 "profile buffer not large enough")) 1261 "profile buffer not large enough"))
1262 return; 1262 return;
1263 1263
1264 entry = ftrace_perf_buf_prepare(size, call->id, &rctx, &irq_flags); 1264 entry = perf_trace_buf_prepare(size, call->id, &rctx, &irq_flags);
1265 if (!entry) 1265 if (!entry)
1266 return; 1266 return;
1267 1267
@@ -1271,10 +1271,11 @@ static __kprobes void kretprobe_profile_func(struct kretprobe_instance *ri,
1271 for (i = 0; i < tp->nr_args; i++) 1271 for (i = 0; i < tp->nr_args; i++)
1272 entry->args[i] = call_fetch(&tp->args[i].fetch, regs); 1272 entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
1273 1273
1274 ftrace_perf_buf_submit(entry, size, rctx, entry->ret_ip, 1, irq_flags); 1274 perf_trace_buf_submit(entry, size, rctx, entry->ret_ip, 1,
1275 irq_flags, regs);
1275} 1276}
1276 1277
1277static int probe_profile_enable(struct ftrace_event_call *call) 1278static int probe_perf_enable(struct ftrace_event_call *call)
1278{ 1279{
1279 struct trace_probe *tp = (struct trace_probe *)call->data; 1280 struct trace_probe *tp = (struct trace_probe *)call->data;
1280 1281
@@ -1286,7 +1287,7 @@ static int probe_profile_enable(struct ftrace_event_call *call)
1286 return enable_kprobe(&tp->rp.kp); 1287 return enable_kprobe(&tp->rp.kp);
1287} 1288}
1288 1289
1289static void probe_profile_disable(struct ftrace_event_call *call) 1290static void probe_perf_disable(struct ftrace_event_call *call)
1290{ 1291{
1291 struct trace_probe *tp = (struct trace_probe *)call->data; 1292 struct trace_probe *tp = (struct trace_probe *)call->data;
1292 1293
@@ -1311,7 +1312,7 @@ int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs)
1311 kprobe_trace_func(kp, regs); 1312 kprobe_trace_func(kp, regs);
1312#ifdef CONFIG_PERF_EVENTS 1313#ifdef CONFIG_PERF_EVENTS
1313 if (tp->flags & TP_FLAG_PROFILE) 1314 if (tp->flags & TP_FLAG_PROFILE)
1314 kprobe_profile_func(kp, regs); 1315 kprobe_perf_func(kp, regs);
1315#endif 1316#endif
1316 return 0; /* We don't tweek kernel, so just return 0 */ 1317 return 0; /* We don't tweek kernel, so just return 0 */
1317} 1318}
@@ -1325,7 +1326,7 @@ int kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs)
1325 kretprobe_trace_func(ri, regs); 1326 kretprobe_trace_func(ri, regs);
1326#ifdef CONFIG_PERF_EVENTS 1327#ifdef CONFIG_PERF_EVENTS
1327 if (tp->flags & TP_FLAG_PROFILE) 1328 if (tp->flags & TP_FLAG_PROFILE)
1328 kretprobe_profile_func(ri, regs); 1329 kretprobe_perf_func(ri, regs);
1329#endif 1330#endif
1330 return 0; /* We don't tweek kernel, so just return 0 */ 1331 return 0; /* We don't tweek kernel, so just return 0 */
1331} 1332}
@@ -1358,8 +1359,8 @@ static int register_probe_event(struct trace_probe *tp)
1358 call->unregfunc = probe_event_disable; 1359 call->unregfunc = probe_event_disable;
1359 1360
1360#ifdef CONFIG_PERF_EVENTS 1361#ifdef CONFIG_PERF_EVENTS
1361 call->profile_enable = probe_profile_enable; 1362 call->perf_event_enable = probe_perf_enable;
1362 call->profile_disable = probe_profile_disable; 1363 call->perf_event_disable = probe_perf_disable;
1363#endif 1364#endif
1364 call->data = tp; 1365 call->data = tp;
1365 ret = trace_add_event_call(call); 1366 ret = trace_add_event_call(call);
diff --git a/kernel/trace/trace_ksym.c b/kernel/trace/trace_ksym.c
index 94103cdcf9d8..d59cd6879477 100644
--- a/kernel/trace/trace_ksym.c
+++ b/kernel/trace/trace_ksym.c
@@ -23,6 +23,7 @@
23#include <linux/debugfs.h> 23#include <linux/debugfs.h>
24#include <linux/ftrace.h> 24#include <linux/ftrace.h>
25#include <linux/module.h> 25#include <linux/module.h>
26#include <linux/slab.h>
26#include <linux/fs.h> 27#include <linux/fs.h>
27 28
28#include "trace_output.h" 29#include "trace_output.h"
diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c
index 0acd834659ed..017fa376505d 100644
--- a/kernel/trace/trace_mmiotrace.c
+++ b/kernel/trace/trace_mmiotrace.c
@@ -9,6 +9,7 @@
9#include <linux/kernel.h> 9#include <linux/kernel.h>
10#include <linux/mmiotrace.h> 10#include <linux/mmiotrace.h>
11#include <linux/pci.h> 11#include <linux/pci.h>
12#include <linux/slab.h>
12#include <linux/time.h> 13#include <linux/time.h>
13 14
14#include <asm/atomic.h> 15#include <asm/atomic.h>
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index 280fea470d67..81003b4d617f 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -3,6 +3,7 @@
3#include <linux/stringify.h> 3#include <linux/stringify.h>
4#include <linux/kthread.h> 4#include <linux/kthread.h>
5#include <linux/delay.h> 5#include <linux/delay.h>
6#include <linux/slab.h>
6 7
7static inline int trace_valid_entry(struct trace_entry *entry) 8static inline int trace_valid_entry(struct trace_entry *entry)
8{ 9{
diff --git a/kernel/trace/trace_stat.c b/kernel/trace/trace_stat.c
index a4bb239eb987..96cffb269e73 100644
--- a/kernel/trace/trace_stat.c
+++ b/kernel/trace/trace_stat.c
@@ -10,6 +10,7 @@
10 10
11 11
12#include <linux/list.h> 12#include <linux/list.h>
13#include <linux/slab.h>
13#include <linux/rbtree.h> 14#include <linux/rbtree.h>
14#include <linux/debugfs.h> 15#include <linux/debugfs.h>
15#include "trace_stat.h" 16#include "trace_stat.h"
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index cba47d7935cc..4d6d711717f2 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -1,5 +1,6 @@
1#include <trace/syscall.h> 1#include <trace/syscall.h>
2#include <trace/events/syscalls.h> 2#include <trace/events/syscalls.h>
3#include <linux/slab.h>
3#include <linux/kernel.h> 4#include <linux/kernel.h>
4#include <linux/ftrace.h> 5#include <linux/ftrace.h>
5#include <linux/perf_event.h> 6#include <linux/perf_event.h>
@@ -428,12 +429,12 @@ core_initcall(init_ftrace_syscalls);
428 429
429#ifdef CONFIG_PERF_EVENTS 430#ifdef CONFIG_PERF_EVENTS
430 431
431static DECLARE_BITMAP(enabled_prof_enter_syscalls, NR_syscalls); 432static DECLARE_BITMAP(enabled_perf_enter_syscalls, NR_syscalls);
432static DECLARE_BITMAP(enabled_prof_exit_syscalls, NR_syscalls); 433static DECLARE_BITMAP(enabled_perf_exit_syscalls, NR_syscalls);
433static int sys_prof_refcount_enter; 434static int sys_perf_refcount_enter;
434static int sys_prof_refcount_exit; 435static int sys_perf_refcount_exit;
435 436
436static void prof_syscall_enter(struct pt_regs *regs, long id) 437static void perf_syscall_enter(struct pt_regs *regs, long id)
437{ 438{
438 struct syscall_metadata *sys_data; 439 struct syscall_metadata *sys_data;
439 struct syscall_trace_enter *rec; 440 struct syscall_trace_enter *rec;
@@ -443,7 +444,7 @@ static void prof_syscall_enter(struct pt_regs *regs, long id)
443 int size; 444 int size;
444 445
445 syscall_nr = syscall_get_nr(current, regs); 446 syscall_nr = syscall_get_nr(current, regs);
446 if (!test_bit(syscall_nr, enabled_prof_enter_syscalls)) 447 if (!test_bit(syscall_nr, enabled_perf_enter_syscalls))
447 return; 448 return;
448 449
449 sys_data = syscall_nr_to_meta(syscall_nr); 450 sys_data = syscall_nr_to_meta(syscall_nr);
@@ -455,11 +456,11 @@ static void prof_syscall_enter(struct pt_regs *regs, long id)
455 size = ALIGN(size + sizeof(u32), sizeof(u64)); 456 size = ALIGN(size + sizeof(u32), sizeof(u64));
456 size -= sizeof(u32); 457 size -= sizeof(u32);
457 458
458 if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE, 459 if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
459 "profile buffer not large enough")) 460 "perf buffer not large enough"))
460 return; 461 return;
461 462
462 rec = (struct syscall_trace_enter *)ftrace_perf_buf_prepare(size, 463 rec = (struct syscall_trace_enter *)perf_trace_buf_prepare(size,
463 sys_data->enter_event->id, &rctx, &flags); 464 sys_data->enter_event->id, &rctx, &flags);
464 if (!rec) 465 if (!rec)
465 return; 466 return;
@@ -467,10 +468,10 @@ static void prof_syscall_enter(struct pt_regs *regs, long id)
467 rec->nr = syscall_nr; 468 rec->nr = syscall_nr;
468 syscall_get_arguments(current, regs, 0, sys_data->nb_args, 469 syscall_get_arguments(current, regs, 0, sys_data->nb_args,
469 (unsigned long *)&rec->args); 470 (unsigned long *)&rec->args);
470 ftrace_perf_buf_submit(rec, size, rctx, 0, 1, flags); 471 perf_trace_buf_submit(rec, size, rctx, 0, 1, flags, regs);
471} 472}
472 473
473int prof_sysenter_enable(struct ftrace_event_call *call) 474int perf_sysenter_enable(struct ftrace_event_call *call)
474{ 475{
475 int ret = 0; 476 int ret = 0;
476 int num; 477 int num;
@@ -478,34 +479,34 @@ int prof_sysenter_enable(struct ftrace_event_call *call)
478 num = ((struct syscall_metadata *)call->data)->syscall_nr; 479 num = ((struct syscall_metadata *)call->data)->syscall_nr;
479 480
480 mutex_lock(&syscall_trace_lock); 481 mutex_lock(&syscall_trace_lock);
481 if (!sys_prof_refcount_enter) 482 if (!sys_perf_refcount_enter)
482 ret = register_trace_sys_enter(prof_syscall_enter); 483 ret = register_trace_sys_enter(perf_syscall_enter);
483 if (ret) { 484 if (ret) {
484 pr_info("event trace: Could not activate" 485 pr_info("event trace: Could not activate"
485 "syscall entry trace point"); 486 "syscall entry trace point");
486 } else { 487 } else {
487 set_bit(num, enabled_prof_enter_syscalls); 488 set_bit(num, enabled_perf_enter_syscalls);
488 sys_prof_refcount_enter++; 489 sys_perf_refcount_enter++;
489 } 490 }
490 mutex_unlock(&syscall_trace_lock); 491 mutex_unlock(&syscall_trace_lock);
491 return ret; 492 return ret;
492} 493}
493 494
494void prof_sysenter_disable(struct ftrace_event_call *call) 495void perf_sysenter_disable(struct ftrace_event_call *call)
495{ 496{
496 int num; 497 int num;
497 498
498 num = ((struct syscall_metadata *)call->data)->syscall_nr; 499 num = ((struct syscall_metadata *)call->data)->syscall_nr;
499 500
500 mutex_lock(&syscall_trace_lock); 501 mutex_lock(&syscall_trace_lock);
501 sys_prof_refcount_enter--; 502 sys_perf_refcount_enter--;
502 clear_bit(num, enabled_prof_enter_syscalls); 503 clear_bit(num, enabled_perf_enter_syscalls);
503 if (!sys_prof_refcount_enter) 504 if (!sys_perf_refcount_enter)
504 unregister_trace_sys_enter(prof_syscall_enter); 505 unregister_trace_sys_enter(perf_syscall_enter);
505 mutex_unlock(&syscall_trace_lock); 506 mutex_unlock(&syscall_trace_lock);
506} 507}
507 508
508static void prof_syscall_exit(struct pt_regs *regs, long ret) 509static void perf_syscall_exit(struct pt_regs *regs, long ret)
509{ 510{
510 struct syscall_metadata *sys_data; 511 struct syscall_metadata *sys_data;
511 struct syscall_trace_exit *rec; 512 struct syscall_trace_exit *rec;
@@ -515,7 +516,7 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret)
515 int size; 516 int size;
516 517
517 syscall_nr = syscall_get_nr(current, regs); 518 syscall_nr = syscall_get_nr(current, regs);
518 if (!test_bit(syscall_nr, enabled_prof_exit_syscalls)) 519 if (!test_bit(syscall_nr, enabled_perf_exit_syscalls))
519 return; 520 return;
520 521
521 sys_data = syscall_nr_to_meta(syscall_nr); 522 sys_data = syscall_nr_to_meta(syscall_nr);
@@ -530,11 +531,11 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret)
530 * Impossible, but be paranoid with the future 531 * Impossible, but be paranoid with the future
531 * How to put this check outside runtime? 532 * How to put this check outside runtime?
532 */ 533 */
533 if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE, 534 if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
534 "exit event has grown above profile buffer size")) 535 "exit event has grown above perf buffer size"))
535 return; 536 return;
536 537
537 rec = (struct syscall_trace_exit *)ftrace_perf_buf_prepare(size, 538 rec = (struct syscall_trace_exit *)perf_trace_buf_prepare(size,
538 sys_data->exit_event->id, &rctx, &flags); 539 sys_data->exit_event->id, &rctx, &flags);
539 if (!rec) 540 if (!rec)
540 return; 541 return;
@@ -542,10 +543,10 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret)
542 rec->nr = syscall_nr; 543 rec->nr = syscall_nr;
543 rec->ret = syscall_get_return_value(current, regs); 544 rec->ret = syscall_get_return_value(current, regs);
544 545
545 ftrace_perf_buf_submit(rec, size, rctx, 0, 1, flags); 546 perf_trace_buf_submit(rec, size, rctx, 0, 1, flags, regs);
546} 547}
547 548
548int prof_sysexit_enable(struct ftrace_event_call *call) 549int perf_sysexit_enable(struct ftrace_event_call *call)
549{ 550{
550 int ret = 0; 551 int ret = 0;
551 int num; 552 int num;
@@ -553,30 +554,30 @@ int prof_sysexit_enable(struct ftrace_event_call *call)
553 num = ((struct syscall_metadata *)call->data)->syscall_nr; 554 num = ((struct syscall_metadata *)call->data)->syscall_nr;
554 555
555 mutex_lock(&syscall_trace_lock); 556 mutex_lock(&syscall_trace_lock);
556 if (!sys_prof_refcount_exit) 557 if (!sys_perf_refcount_exit)
557 ret = register_trace_sys_exit(prof_syscall_exit); 558 ret = register_trace_sys_exit(perf_syscall_exit);
558 if (ret) { 559 if (ret) {
559 pr_info("event trace: Could not activate" 560 pr_info("event trace: Could not activate"
560 "syscall exit trace point"); 561 "syscall exit trace point");
561 } else { 562 } else {
562 set_bit(num, enabled_prof_exit_syscalls); 563 set_bit(num, enabled_perf_exit_syscalls);
563 sys_prof_refcount_exit++; 564 sys_perf_refcount_exit++;
564 } 565 }
565 mutex_unlock(&syscall_trace_lock); 566 mutex_unlock(&syscall_trace_lock);
566 return ret; 567 return ret;
567} 568}
568 569
569void prof_sysexit_disable(struct ftrace_event_call *call) 570void perf_sysexit_disable(struct ftrace_event_call *call)
570{ 571{
571 int num; 572 int num;
572 573
573 num = ((struct syscall_metadata *)call->data)->syscall_nr; 574 num = ((struct syscall_metadata *)call->data)->syscall_nr;
574 575
575 mutex_lock(&syscall_trace_lock); 576 mutex_lock(&syscall_trace_lock);
576 sys_prof_refcount_exit--; 577 sys_perf_refcount_exit--;
577 clear_bit(num, enabled_prof_exit_syscalls); 578 clear_bit(num, enabled_perf_exit_syscalls);
578 if (!sys_prof_refcount_exit) 579 if (!sys_perf_refcount_exit)
579 unregister_trace_sys_exit(prof_syscall_exit); 580 unregister_trace_sys_exit(perf_syscall_exit);
580 mutex_unlock(&syscall_trace_lock); 581 mutex_unlock(&syscall_trace_lock);
581} 582}
582 583
diff --git a/kernel/trace/trace_workqueue.c b/kernel/trace/trace_workqueue.c
index 40cafb07dffd..cc2d2faa7d9e 100644
--- a/kernel/trace/trace_workqueue.c
+++ b/kernel/trace/trace_workqueue.c
@@ -9,6 +9,7 @@
9#include <trace/events/workqueue.h> 9#include <trace/events/workqueue.h>
10#include <linux/list.h> 10#include <linux/list.h>
11#include <linux/percpu.h> 11#include <linux/percpu.h>
12#include <linux/slab.h>
12#include <linux/kref.h> 13#include <linux/kref.h>
13#include "trace_stat.h" 14#include "trace_stat.h"
14#include "trace.h" 15#include "trace.h"