aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorMark Brown <broonie@opensource.wolfsonmicro.com>2010-03-22 07:17:26 -0400
committerMark Brown <broonie@opensource.wolfsonmicro.com>2010-03-22 07:17:26 -0400
commitf9b44121b34174ae4f243a568393fc3225842e75 (patch)
tree943f68cd7458d08a9e8809ed0a10b71b23911f3e /kernel
parent8727b909bb2348d29e62c599cd7a5d610da3760f (diff)
parent220bf991b0366cc50a94feede3d7341fa5710ee4 (diff)
Merge commit 'v2.6.34-rc2' into for-2.6.34
Diffstat (limited to 'kernel')
-rw-r--r--kernel/acct.c10
-rw-r--r--kernel/audit.c2
-rw-r--r--kernel/cgroup.c693
-rw-r--r--kernel/exit.c2
-rw-r--r--kernel/fork.c47
-rw-r--r--kernel/hw_breakpoint.c11
-rw-r--r--kernel/irq/chip.c2
-rw-r--r--kernel/irq/devres.c4
-rw-r--r--kernel/kprobes.c3
-rw-r--r--kernel/ksysfs.c2
-rw-r--r--kernel/lockdep.c10
-rw-r--r--kernel/nsproxy.c13
-rw-r--r--kernel/params.c6
-rw-r--r--kernel/perf_event.c118
-rw-r--r--kernel/pid.c4
-rw-r--r--kernel/pid_namespace.c7
-rw-r--r--kernel/rcutree.h21
-rw-r--r--kernel/rcutree_plugin.h8
-rw-r--r--kernel/sched.c4
-rw-r--r--kernel/sched_cpupri.c2
-rw-r--r--kernel/sched_fair.c2
-rw-r--r--kernel/sched_rt.c7
-rw-r--r--kernel/sys.c67
-rw-r--r--kernel/sys_ni.c1
-rw-r--r--kernel/sysctl.c37
-rw-r--r--kernel/time/clocksource.c4
-rw-r--r--kernel/trace/Makefile2
-rw-r--r--kernel/trace/ftrace.c30
-rw-r--r--kernel/trace/ring_buffer.c16
-rw-r--r--kernel/trace/trace.c49
-rw-r--r--kernel/trace/trace.h5
-rw-r--r--kernel/trace/trace_clock.c1
-rw-r--r--kernel/trace/trace_event_perf.c (renamed from kernel/trace/trace_event_profile.c)54
-rw-r--r--kernel/trace/trace_events.c2
-rw-r--r--kernel/trace/trace_functions_graph.c27
-rw-r--r--kernel/trace/trace_kprobe.c29
-rw-r--r--kernel/trace/trace_syscalls.c72
37 files changed, 1027 insertions, 347 deletions
diff --git a/kernel/acct.c b/kernel/acct.c
index a6605ca921b6..24f8c81fc48d 100644
--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -588,16 +588,6 @@ out:
588} 588}
589 589
590/** 590/**
591 * acct_init_pacct - initialize a new pacct_struct
592 * @pacct: per-process accounting info struct to initialize
593 */
594void acct_init_pacct(struct pacct_struct *pacct)
595{
596 memset(pacct, 0, sizeof(struct pacct_struct));
597 pacct->ac_utime = pacct->ac_stime = cputime_zero;
598}
599
600/**
601 * acct_collect - collect accounting information into pacct_struct 591 * acct_collect - collect accounting information into pacct_struct
602 * @exitcode: task exit code 592 * @exitcode: task exit code
603 * @group_dead: not 0, if this thread is the last one in the process. 593 * @group_dead: not 0, if this thread is the last one in the process.
diff --git a/kernel/audit.c b/kernel/audit.c
index 5feed232be9d..78f7f86aa238 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -398,7 +398,7 @@ static void kauditd_send_skb(struct sk_buff *skb)
398 skb_get(skb); 398 skb_get(skb);
399 err = netlink_unicast(audit_sock, skb, audit_nlk_pid, 0); 399 err = netlink_unicast(audit_sock, skb, audit_nlk_pid, 0);
400 if (err < 0) { 400 if (err < 0) {
401 BUG_ON(err != -ECONNREFUSED); /* Shoudn't happen */ 401 BUG_ON(err != -ECONNREFUSED); /* Shouldn't happen */
402 printk(KERN_ERR "audit: *NO* daemon at audit_pid=%d\n", audit_pid); 402 printk(KERN_ERR "audit: *NO* daemon at audit_pid=%d\n", audit_pid);
403 audit_log_lost("auditd dissapeared\n"); 403 audit_log_lost("auditd dissapeared\n");
404 audit_pid = 0; 404 audit_pid = 0;
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 4fd90e129772..ef909a329750 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -4,6 +4,10 @@
4 * Based originally on the cpuset system, extracted by Paul Menage 4 * Based originally on the cpuset system, extracted by Paul Menage
5 * Copyright (C) 2006 Google, Inc 5 * Copyright (C) 2006 Google, Inc
6 * 6 *
7 * Notifications support
8 * Copyright (C) 2009 Nokia Corporation
9 * Author: Kirill A. Shutemov
10 *
7 * Copyright notices from the original cpuset code: 11 * Copyright notices from the original cpuset code:
8 * -------------------------------------------------- 12 * --------------------------------------------------
9 * Copyright (C) 2003 BULL SA. 13 * Copyright (C) 2003 BULL SA.
@@ -44,6 +48,7 @@
44#include <linux/string.h> 48#include <linux/string.h>
45#include <linux/sort.h> 49#include <linux/sort.h>
46#include <linux/kmod.h> 50#include <linux/kmod.h>
51#include <linux/module.h>
47#include <linux/delayacct.h> 52#include <linux/delayacct.h>
48#include <linux/cgroupstats.h> 53#include <linux/cgroupstats.h>
49#include <linux/hash.h> 54#include <linux/hash.h>
@@ -52,15 +57,21 @@
52#include <linux/pid_namespace.h> 57#include <linux/pid_namespace.h>
53#include <linux/idr.h> 58#include <linux/idr.h>
54#include <linux/vmalloc.h> /* TODO: replace with more sophisticated array */ 59#include <linux/vmalloc.h> /* TODO: replace with more sophisticated array */
60#include <linux/eventfd.h>
61#include <linux/poll.h>
55 62
56#include <asm/atomic.h> 63#include <asm/atomic.h>
57 64
58static DEFINE_MUTEX(cgroup_mutex); 65static DEFINE_MUTEX(cgroup_mutex);
59 66
60/* Generate an array of cgroup subsystem pointers */ 67/*
68 * Generate an array of cgroup subsystem pointers. At boot time, this is
69 * populated up to CGROUP_BUILTIN_SUBSYS_COUNT, and modular subsystems are
70 * registered after that. The mutable section of this array is protected by
71 * cgroup_mutex.
72 */
61#define SUBSYS(_x) &_x ## _subsys, 73#define SUBSYS(_x) &_x ## _subsys,
62 74static struct cgroup_subsys *subsys[CGROUP_SUBSYS_COUNT] = {
63static struct cgroup_subsys *subsys[] = {
64#include <linux/cgroup_subsys.h> 75#include <linux/cgroup_subsys.h>
65}; 76};
66 77
@@ -147,6 +158,35 @@ struct css_id {
147 unsigned short stack[0]; /* Array of Length (depth+1) */ 158 unsigned short stack[0]; /* Array of Length (depth+1) */
148}; 159};
149 160
161/*
162 * cgroup_event represents events which userspace want to recieve.
163 */
164struct cgroup_event {
165 /*
166 * Cgroup which the event belongs to.
167 */
168 struct cgroup *cgrp;
169 /*
170 * Control file which the event associated.
171 */
172 struct cftype *cft;
173 /*
174 * eventfd to signal userspace about the event.
175 */
176 struct eventfd_ctx *eventfd;
177 /*
178 * Each of these stored in a list by the cgroup.
179 */
180 struct list_head list;
181 /*
182 * All fields below needed to unregister event when
183 * userspace closes eventfd.
184 */
185 poll_table pt;
186 wait_queue_head_t *wqh;
187 wait_queue_t wait;
188 struct work_struct remove;
189};
150 190
151/* The list of hierarchy roots */ 191/* The list of hierarchy roots */
152 192
@@ -250,7 +290,8 @@ struct cg_cgroup_link {
250static struct css_set init_css_set; 290static struct css_set init_css_set;
251static struct cg_cgroup_link init_css_set_link; 291static struct cg_cgroup_link init_css_set_link;
252 292
253static int cgroup_subsys_init_idr(struct cgroup_subsys *ss); 293static int cgroup_init_idr(struct cgroup_subsys *ss,
294 struct cgroup_subsys_state *css);
254 295
255/* css_set_lock protects the list of css_set objects, and the 296/* css_set_lock protects the list of css_set objects, and the
256 * chain of tasks off each css_set. Nests outside task->alloc_lock 297 * chain of tasks off each css_set. Nests outside task->alloc_lock
@@ -448,8 +489,11 @@ static struct css_set *find_existing_css_set(
448 struct hlist_node *node; 489 struct hlist_node *node;
449 struct css_set *cg; 490 struct css_set *cg;
450 491
451 /* Built the set of subsystem state objects that we want to 492 /*
452 * see in the new css_set */ 493 * Build the set of subsystem state objects that we want to see in the
494 * new css_set. while subsystems can change globally, the entries here
495 * won't change, so no need for locking.
496 */
453 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { 497 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
454 if (root->subsys_bits & (1UL << i)) { 498 if (root->subsys_bits & (1UL << i)) {
455 /* Subsystem is in this hierarchy. So we want 499 /* Subsystem is in this hierarchy. So we want
@@ -696,6 +740,7 @@ void cgroup_lock(void)
696{ 740{
697 mutex_lock(&cgroup_mutex); 741 mutex_lock(&cgroup_mutex);
698} 742}
743EXPORT_SYMBOL_GPL(cgroup_lock);
699 744
700/** 745/**
701 * cgroup_unlock - release lock on cgroup changes 746 * cgroup_unlock - release lock on cgroup changes
@@ -706,6 +751,7 @@ void cgroup_unlock(void)
706{ 751{
707 mutex_unlock(&cgroup_mutex); 752 mutex_unlock(&cgroup_mutex);
708} 753}
754EXPORT_SYMBOL_GPL(cgroup_unlock);
709 755
710/* 756/*
711 * A couple of forward declarations required, due to cyclic reference loop: 757 * A couple of forward declarations required, due to cyclic reference loop:
@@ -757,6 +803,7 @@ static int cgroup_call_pre_destroy(struct cgroup *cgrp)
757 if (ret) 803 if (ret)
758 break; 804 break;
759 } 805 }
806
760 return ret; 807 return ret;
761} 808}
762 809
@@ -884,7 +931,11 @@ void cgroup_release_and_wakeup_rmdir(struct cgroup_subsys_state *css)
884 css_put(css); 931 css_put(css);
885} 932}
886 933
887 934/*
935 * Call with cgroup_mutex held. Drops reference counts on modules, including
936 * any duplicate ones that parse_cgroupfs_options took. If this function
937 * returns an error, no reference counts are touched.
938 */
888static int rebind_subsystems(struct cgroupfs_root *root, 939static int rebind_subsystems(struct cgroupfs_root *root,
889 unsigned long final_bits) 940 unsigned long final_bits)
890{ 941{
@@ -892,6 +943,8 @@ static int rebind_subsystems(struct cgroupfs_root *root,
892 struct cgroup *cgrp = &root->top_cgroup; 943 struct cgroup *cgrp = &root->top_cgroup;
893 int i; 944 int i;
894 945
946 BUG_ON(!mutex_is_locked(&cgroup_mutex));
947
895 removed_bits = root->actual_subsys_bits & ~final_bits; 948 removed_bits = root->actual_subsys_bits & ~final_bits;
896 added_bits = final_bits & ~root->actual_subsys_bits; 949 added_bits = final_bits & ~root->actual_subsys_bits;
897 /* Check that any added subsystems are currently free */ 950 /* Check that any added subsystems are currently free */
@@ -900,6 +953,12 @@ static int rebind_subsystems(struct cgroupfs_root *root,
900 struct cgroup_subsys *ss = subsys[i]; 953 struct cgroup_subsys *ss = subsys[i];
901 if (!(bit & added_bits)) 954 if (!(bit & added_bits))
902 continue; 955 continue;
956 /*
957 * Nobody should tell us to do a subsys that doesn't exist:
958 * parse_cgroupfs_options should catch that case and refcounts
959 * ensure that subsystems won't disappear once selected.
960 */
961 BUG_ON(ss == NULL);
903 if (ss->root != &rootnode) { 962 if (ss->root != &rootnode) {
904 /* Subsystem isn't free */ 963 /* Subsystem isn't free */
905 return -EBUSY; 964 return -EBUSY;
@@ -919,6 +978,7 @@ static int rebind_subsystems(struct cgroupfs_root *root,
919 unsigned long bit = 1UL << i; 978 unsigned long bit = 1UL << i;
920 if (bit & added_bits) { 979 if (bit & added_bits) {
921 /* We're binding this subsystem to this hierarchy */ 980 /* We're binding this subsystem to this hierarchy */
981 BUG_ON(ss == NULL);
922 BUG_ON(cgrp->subsys[i]); 982 BUG_ON(cgrp->subsys[i]);
923 BUG_ON(!dummytop->subsys[i]); 983 BUG_ON(!dummytop->subsys[i]);
924 BUG_ON(dummytop->subsys[i]->cgroup != dummytop); 984 BUG_ON(dummytop->subsys[i]->cgroup != dummytop);
@@ -930,8 +990,10 @@ static int rebind_subsystems(struct cgroupfs_root *root,
930 if (ss->bind) 990 if (ss->bind)
931 ss->bind(ss, cgrp); 991 ss->bind(ss, cgrp);
932 mutex_unlock(&ss->hierarchy_mutex); 992 mutex_unlock(&ss->hierarchy_mutex);
993 /* refcount was already taken, and we're keeping it */
933 } else if (bit & removed_bits) { 994 } else if (bit & removed_bits) {
934 /* We're removing this subsystem */ 995 /* We're removing this subsystem */
996 BUG_ON(ss == NULL);
935 BUG_ON(cgrp->subsys[i] != dummytop->subsys[i]); 997 BUG_ON(cgrp->subsys[i] != dummytop->subsys[i]);
936 BUG_ON(cgrp->subsys[i]->cgroup != cgrp); 998 BUG_ON(cgrp->subsys[i]->cgroup != cgrp);
937 mutex_lock(&ss->hierarchy_mutex); 999 mutex_lock(&ss->hierarchy_mutex);
@@ -942,9 +1004,20 @@ static int rebind_subsystems(struct cgroupfs_root *root,
942 subsys[i]->root = &rootnode; 1004 subsys[i]->root = &rootnode;
943 list_move(&ss->sibling, &rootnode.subsys_list); 1005 list_move(&ss->sibling, &rootnode.subsys_list);
944 mutex_unlock(&ss->hierarchy_mutex); 1006 mutex_unlock(&ss->hierarchy_mutex);
1007 /* subsystem is now free - drop reference on module */
1008 module_put(ss->module);
945 } else if (bit & final_bits) { 1009 } else if (bit & final_bits) {
946 /* Subsystem state should already exist */ 1010 /* Subsystem state should already exist */
1011 BUG_ON(ss == NULL);
947 BUG_ON(!cgrp->subsys[i]); 1012 BUG_ON(!cgrp->subsys[i]);
1013 /*
1014 * a refcount was taken, but we already had one, so
1015 * drop the extra reference.
1016 */
1017 module_put(ss->module);
1018#ifdef CONFIG_MODULE_UNLOAD
1019 BUG_ON(ss->module && !module_refcount(ss->module));
1020#endif
948 } else { 1021 } else {
949 /* Subsystem state shouldn't exist */ 1022 /* Subsystem state shouldn't exist */
950 BUG_ON(cgrp->subsys[i]); 1023 BUG_ON(cgrp->subsys[i]);
@@ -986,13 +1059,20 @@ struct cgroup_sb_opts {
986 1059
987}; 1060};
988 1061
989/* Convert a hierarchy specifier into a bitmask of subsystems and 1062/*
990 * flags. */ 1063 * Convert a hierarchy specifier into a bitmask of subsystems and flags. Call
991static int parse_cgroupfs_options(char *data, 1064 * with cgroup_mutex held to protect the subsys[] array. This function takes
992 struct cgroup_sb_opts *opts) 1065 * refcounts on subsystems to be used, unless it returns error, in which case
1066 * no refcounts are taken.
1067 */
1068static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts)
993{ 1069{
994 char *token, *o = data ?: "all"; 1070 char *token, *o = data ?: "all";
995 unsigned long mask = (unsigned long)-1; 1071 unsigned long mask = (unsigned long)-1;
1072 int i;
1073 bool module_pin_failed = false;
1074
1075 BUG_ON(!mutex_is_locked(&cgroup_mutex));
996 1076
997#ifdef CONFIG_CPUSETS 1077#ifdef CONFIG_CPUSETS
998 mask = ~(1UL << cpuset_subsys_id); 1078 mask = ~(1UL << cpuset_subsys_id);
@@ -1005,10 +1085,11 @@ static int parse_cgroupfs_options(char *data,
1005 return -EINVAL; 1085 return -EINVAL;
1006 if (!strcmp(token, "all")) { 1086 if (!strcmp(token, "all")) {
1007 /* Add all non-disabled subsystems */ 1087 /* Add all non-disabled subsystems */
1008 int i;
1009 opts->subsys_bits = 0; 1088 opts->subsys_bits = 0;
1010 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { 1089 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
1011 struct cgroup_subsys *ss = subsys[i]; 1090 struct cgroup_subsys *ss = subsys[i];
1091 if (ss == NULL)
1092 continue;
1012 if (!ss->disabled) 1093 if (!ss->disabled)
1013 opts->subsys_bits |= 1ul << i; 1094 opts->subsys_bits |= 1ul << i;
1014 } 1095 }
@@ -1026,7 +1107,6 @@ static int parse_cgroupfs_options(char *data,
1026 if (!opts->release_agent) 1107 if (!opts->release_agent)
1027 return -ENOMEM; 1108 return -ENOMEM;
1028 } else if (!strncmp(token, "name=", 5)) { 1109 } else if (!strncmp(token, "name=", 5)) {
1029 int i;
1030 const char *name = token + 5; 1110 const char *name = token + 5;
1031 /* Can't specify an empty name */ 1111 /* Can't specify an empty name */
1032 if (!strlen(name)) 1112 if (!strlen(name))
@@ -1050,9 +1130,10 @@ static int parse_cgroupfs_options(char *data,
1050 return -ENOMEM; 1130 return -ENOMEM;
1051 } else { 1131 } else {
1052 struct cgroup_subsys *ss; 1132 struct cgroup_subsys *ss;
1053 int i;
1054 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { 1133 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
1055 ss = subsys[i]; 1134 ss = subsys[i];
1135 if (ss == NULL)
1136 continue;
1056 if (!strcmp(token, ss->name)) { 1137 if (!strcmp(token, ss->name)) {
1057 if (!ss->disabled) 1138 if (!ss->disabled)
1058 set_bit(i, &opts->subsys_bits); 1139 set_bit(i, &opts->subsys_bits);
@@ -1087,9 +1168,54 @@ static int parse_cgroupfs_options(char *data,
1087 if (!opts->subsys_bits && !opts->name) 1168 if (!opts->subsys_bits && !opts->name)
1088 return -EINVAL; 1169 return -EINVAL;
1089 1170
1171 /*
1172 * Grab references on all the modules we'll need, so the subsystems
1173 * don't dance around before rebind_subsystems attaches them. This may
1174 * take duplicate reference counts on a subsystem that's already used,
1175 * but rebind_subsystems handles this case.
1176 */
1177 for (i = CGROUP_BUILTIN_SUBSYS_COUNT; i < CGROUP_SUBSYS_COUNT; i++) {
1178 unsigned long bit = 1UL << i;
1179
1180 if (!(bit & opts->subsys_bits))
1181 continue;
1182 if (!try_module_get(subsys[i]->module)) {
1183 module_pin_failed = true;
1184 break;
1185 }
1186 }
1187 if (module_pin_failed) {
1188 /*
1189 * oops, one of the modules was going away. this means that we
1190 * raced with a module_delete call, and to the user this is
1191 * essentially a "subsystem doesn't exist" case.
1192 */
1193 for (i--; i >= CGROUP_BUILTIN_SUBSYS_COUNT; i--) {
1194 /* drop refcounts only on the ones we took */
1195 unsigned long bit = 1UL << i;
1196
1197 if (!(bit & opts->subsys_bits))
1198 continue;
1199 module_put(subsys[i]->module);
1200 }
1201 return -ENOENT;
1202 }
1203
1090 return 0; 1204 return 0;
1091} 1205}
1092 1206
1207static void drop_parsed_module_refcounts(unsigned long subsys_bits)
1208{
1209 int i;
1210 for (i = CGROUP_BUILTIN_SUBSYS_COUNT; i < CGROUP_SUBSYS_COUNT; i++) {
1211 unsigned long bit = 1UL << i;
1212
1213 if (!(bit & subsys_bits))
1214 continue;
1215 module_put(subsys[i]->module);
1216 }
1217}
1218
1093static int cgroup_remount(struct super_block *sb, int *flags, char *data) 1219static int cgroup_remount(struct super_block *sb, int *flags, char *data)
1094{ 1220{
1095 int ret = 0; 1221 int ret = 0;
@@ -1106,21 +1232,19 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data)
1106 if (ret) 1232 if (ret)
1107 goto out_unlock; 1233 goto out_unlock;
1108 1234
1109 /* Don't allow flags to change at remount */ 1235 /* Don't allow flags or name to change at remount */
1110 if (opts.flags != root->flags) { 1236 if (opts.flags != root->flags ||
1111 ret = -EINVAL; 1237 (opts.name && strcmp(opts.name, root->name))) {
1112 goto out_unlock;
1113 }
1114
1115 /* Don't allow name to change at remount */
1116 if (opts.name && strcmp(opts.name, root->name)) {
1117 ret = -EINVAL; 1238 ret = -EINVAL;
1239 drop_parsed_module_refcounts(opts.subsys_bits);
1118 goto out_unlock; 1240 goto out_unlock;
1119 } 1241 }
1120 1242
1121 ret = rebind_subsystems(root, opts.subsys_bits); 1243 ret = rebind_subsystems(root, opts.subsys_bits);
1122 if (ret) 1244 if (ret) {
1245 drop_parsed_module_refcounts(opts.subsys_bits);
1123 goto out_unlock; 1246 goto out_unlock;
1247 }
1124 1248
1125 /* (re)populate subsystem files */ 1249 /* (re)populate subsystem files */
1126 cgroup_populate_dir(cgrp); 1250 cgroup_populate_dir(cgrp);
@@ -1151,6 +1275,8 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp)
1151 INIT_LIST_HEAD(&cgrp->release_list); 1275 INIT_LIST_HEAD(&cgrp->release_list);
1152 INIT_LIST_HEAD(&cgrp->pidlists); 1276 INIT_LIST_HEAD(&cgrp->pidlists);
1153 mutex_init(&cgrp->pidlist_mutex); 1277 mutex_init(&cgrp->pidlist_mutex);
1278 INIT_LIST_HEAD(&cgrp->event_list);
1279 spin_lock_init(&cgrp->event_list_lock);
1154} 1280}
1155 1281
1156static void init_cgroup_root(struct cgroupfs_root *root) 1282static void init_cgroup_root(struct cgroupfs_root *root)
@@ -1306,7 +1432,9 @@ static int cgroup_get_sb(struct file_system_type *fs_type,
1306 struct cgroupfs_root *new_root; 1432 struct cgroupfs_root *new_root;
1307 1433
1308 /* First find the desired set of subsystems */ 1434 /* First find the desired set of subsystems */
1435 mutex_lock(&cgroup_mutex);
1309 ret = parse_cgroupfs_options(data, &opts); 1436 ret = parse_cgroupfs_options(data, &opts);
1437 mutex_unlock(&cgroup_mutex);
1310 if (ret) 1438 if (ret)
1311 goto out_err; 1439 goto out_err;
1312 1440
@@ -1317,7 +1445,7 @@ static int cgroup_get_sb(struct file_system_type *fs_type,
1317 new_root = cgroup_root_from_opts(&opts); 1445 new_root = cgroup_root_from_opts(&opts);
1318 if (IS_ERR(new_root)) { 1446 if (IS_ERR(new_root)) {
1319 ret = PTR_ERR(new_root); 1447 ret = PTR_ERR(new_root);
1320 goto out_err; 1448 goto drop_modules;
1321 } 1449 }
1322 opts.new_root = new_root; 1450 opts.new_root = new_root;
1323 1451
@@ -1326,7 +1454,7 @@ static int cgroup_get_sb(struct file_system_type *fs_type,
1326 if (IS_ERR(sb)) { 1454 if (IS_ERR(sb)) {
1327 ret = PTR_ERR(sb); 1455 ret = PTR_ERR(sb);
1328 cgroup_drop_root(opts.new_root); 1456 cgroup_drop_root(opts.new_root);
1329 goto out_err; 1457 goto drop_modules;
1330 } 1458 }
1331 1459
1332 root = sb->s_fs_info; 1460 root = sb->s_fs_info;
@@ -1382,6 +1510,11 @@ static int cgroup_get_sb(struct file_system_type *fs_type,
1382 free_cg_links(&tmp_cg_links); 1510 free_cg_links(&tmp_cg_links);
1383 goto drop_new_super; 1511 goto drop_new_super;
1384 } 1512 }
1513 /*
1514 * There must be no failure case after here, since rebinding
1515 * takes care of subsystems' refcounts, which are explicitly
1516 * dropped in the failure exit path.
1517 */
1385 1518
1386 /* EBUSY should be the only error here */ 1519 /* EBUSY should be the only error here */
1387 BUG_ON(ret); 1520 BUG_ON(ret);
@@ -1420,6 +1553,8 @@ static int cgroup_get_sb(struct file_system_type *fs_type,
1420 * any) is not needed 1553 * any) is not needed
1421 */ 1554 */
1422 cgroup_drop_root(opts.new_root); 1555 cgroup_drop_root(opts.new_root);
1556 /* no subsys rebinding, so refcounts don't change */
1557 drop_parsed_module_refcounts(opts.subsys_bits);
1423 } 1558 }
1424 1559
1425 simple_set_mnt(mnt, sb); 1560 simple_set_mnt(mnt, sb);
@@ -1429,6 +1564,8 @@ static int cgroup_get_sb(struct file_system_type *fs_type,
1429 1564
1430 drop_new_super: 1565 drop_new_super:
1431 deactivate_locked_super(sb); 1566 deactivate_locked_super(sb);
1567 drop_modules:
1568 drop_parsed_module_refcounts(opts.subsys_bits);
1432 out_err: 1569 out_err:
1433 kfree(opts.release_agent); 1570 kfree(opts.release_agent);
1434 kfree(opts.name); 1571 kfree(opts.name);
@@ -1542,6 +1679,7 @@ int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen)
1542 memmove(buf, start, buf + buflen - start); 1679 memmove(buf, start, buf + buflen - start);
1543 return 0; 1680 return 0;
1544} 1681}
1682EXPORT_SYMBOL_GPL(cgroup_path);
1545 1683
1546/** 1684/**
1547 * cgroup_attach_task - attach task 'tsk' to cgroup 'cgrp' 1685 * cgroup_attach_task - attach task 'tsk' to cgroup 'cgrp'
@@ -1554,7 +1692,7 @@ int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen)
1554int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk) 1692int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
1555{ 1693{
1556 int retval = 0; 1694 int retval = 0;
1557 struct cgroup_subsys *ss; 1695 struct cgroup_subsys *ss, *failed_ss = NULL;
1558 struct cgroup *oldcgrp; 1696 struct cgroup *oldcgrp;
1559 struct css_set *cg; 1697 struct css_set *cg;
1560 struct css_set *newcg; 1698 struct css_set *newcg;
@@ -1568,8 +1706,16 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
1568 for_each_subsys(root, ss) { 1706 for_each_subsys(root, ss) {
1569 if (ss->can_attach) { 1707 if (ss->can_attach) {
1570 retval = ss->can_attach(ss, cgrp, tsk, false); 1708 retval = ss->can_attach(ss, cgrp, tsk, false);
1571 if (retval) 1709 if (retval) {
1572 return retval; 1710 /*
1711 * Remember on which subsystem the can_attach()
1712 * failed, so that we only call cancel_attach()
1713 * against the subsystems whose can_attach()
1714 * succeeded. (See below)
1715 */
1716 failed_ss = ss;
1717 goto out;
1718 }
1573 } 1719 }
1574 } 1720 }
1575 1721
@@ -1583,14 +1729,17 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
1583 */ 1729 */
1584 newcg = find_css_set(cg, cgrp); 1730 newcg = find_css_set(cg, cgrp);
1585 put_css_set(cg); 1731 put_css_set(cg);
1586 if (!newcg) 1732 if (!newcg) {
1587 return -ENOMEM; 1733 retval = -ENOMEM;
1734 goto out;
1735 }
1588 1736
1589 task_lock(tsk); 1737 task_lock(tsk);
1590 if (tsk->flags & PF_EXITING) { 1738 if (tsk->flags & PF_EXITING) {
1591 task_unlock(tsk); 1739 task_unlock(tsk);
1592 put_css_set(newcg); 1740 put_css_set(newcg);
1593 return -ESRCH; 1741 retval = -ESRCH;
1742 goto out;
1594 } 1743 }
1595 rcu_assign_pointer(tsk->cgroups, newcg); 1744 rcu_assign_pointer(tsk->cgroups, newcg);
1596 task_unlock(tsk); 1745 task_unlock(tsk);
@@ -1616,7 +1765,22 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
1616 * is no longer empty. 1765 * is no longer empty.
1617 */ 1766 */
1618 cgroup_wakeup_rmdir_waiter(cgrp); 1767 cgroup_wakeup_rmdir_waiter(cgrp);
1619 return 0; 1768out:
1769 if (retval) {
1770 for_each_subsys(root, ss) {
1771 if (ss == failed_ss)
1772 /*
1773 * This subsystem was the one that failed the
1774 * can_attach() check earlier, so we don't need
1775 * to call cancel_attach() against it or any
1776 * remaining subsystems.
1777 */
1778 break;
1779 if (ss->cancel_attach)
1780 ss->cancel_attach(ss, cgrp, tsk, false);
1781 }
1782 }
1783 return retval;
1620} 1784}
1621 1785
1622/* 1786/*
@@ -1682,6 +1846,7 @@ bool cgroup_lock_live_group(struct cgroup *cgrp)
1682 } 1846 }
1683 return true; 1847 return true;
1684} 1848}
1849EXPORT_SYMBOL_GPL(cgroup_lock_live_group);
1685 1850
1686static int cgroup_release_agent_write(struct cgroup *cgrp, struct cftype *cft, 1851static int cgroup_release_agent_write(struct cgroup *cgrp, struct cftype *cft,
1687 const char *buffer) 1852 const char *buffer)
@@ -1950,6 +2115,16 @@ static const struct inode_operations cgroup_dir_inode_operations = {
1950 .rename = cgroup_rename, 2115 .rename = cgroup_rename,
1951}; 2116};
1952 2117
2118/*
2119 * Check if a file is a control file
2120 */
2121static inline struct cftype *__file_cft(struct file *file)
2122{
2123 if (file->f_dentry->d_inode->i_fop != &cgroup_file_operations)
2124 return ERR_PTR(-EINVAL);
2125 return __d_cft(file->f_dentry);
2126}
2127
1953static int cgroup_create_file(struct dentry *dentry, mode_t mode, 2128static int cgroup_create_file(struct dentry *dentry, mode_t mode,
1954 struct super_block *sb) 2129 struct super_block *sb)
1955{ 2130{
@@ -2069,6 +2244,7 @@ int cgroup_add_file(struct cgroup *cgrp,
2069 error = PTR_ERR(dentry); 2244 error = PTR_ERR(dentry);
2070 return error; 2245 return error;
2071} 2246}
2247EXPORT_SYMBOL_GPL(cgroup_add_file);
2072 2248
2073int cgroup_add_files(struct cgroup *cgrp, 2249int cgroup_add_files(struct cgroup *cgrp,
2074 struct cgroup_subsys *subsys, 2250 struct cgroup_subsys *subsys,
@@ -2083,6 +2259,7 @@ int cgroup_add_files(struct cgroup *cgrp,
2083 } 2259 }
2084 return 0; 2260 return 0;
2085} 2261}
2262EXPORT_SYMBOL_GPL(cgroup_add_files);
2086 2263
2087/** 2264/**
2088 * cgroup_task_count - count the number of tasks in a cgroup. 2265 * cgroup_task_count - count the number of tasks in a cgroup.
@@ -2468,7 +2645,8 @@ static struct cgroup_pidlist *cgroup_pidlist_find(struct cgroup *cgrp,
2468{ 2645{
2469 struct cgroup_pidlist *l; 2646 struct cgroup_pidlist *l;
2470 /* don't need task_nsproxy() if we're looking at ourself */ 2647 /* don't need task_nsproxy() if we're looking at ourself */
2471 struct pid_namespace *ns = get_pid_ns(current->nsproxy->pid_ns); 2648 struct pid_namespace *ns = current->nsproxy->pid_ns;
2649
2472 /* 2650 /*
2473 * We can't drop the pidlist_mutex before taking the l->mutex in case 2651 * We can't drop the pidlist_mutex before taking the l->mutex in case
2474 * the last ref-holder is trying to remove l from the list at the same 2652 * the last ref-holder is trying to remove l from the list at the same
@@ -2478,8 +2656,6 @@ static struct cgroup_pidlist *cgroup_pidlist_find(struct cgroup *cgrp,
2478 mutex_lock(&cgrp->pidlist_mutex); 2656 mutex_lock(&cgrp->pidlist_mutex);
2479 list_for_each_entry(l, &cgrp->pidlists, links) { 2657 list_for_each_entry(l, &cgrp->pidlists, links) {
2480 if (l->key.type == type && l->key.ns == ns) { 2658 if (l->key.type == type && l->key.ns == ns) {
2481 /* found a matching list - drop the extra refcount */
2482 put_pid_ns(ns);
2483 /* make sure l doesn't vanish out from under us */ 2659 /* make sure l doesn't vanish out from under us */
2484 down_write(&l->mutex); 2660 down_write(&l->mutex);
2485 mutex_unlock(&cgrp->pidlist_mutex); 2661 mutex_unlock(&cgrp->pidlist_mutex);
@@ -2490,13 +2666,12 @@ static struct cgroup_pidlist *cgroup_pidlist_find(struct cgroup *cgrp,
2490 l = kmalloc(sizeof(struct cgroup_pidlist), GFP_KERNEL); 2666 l = kmalloc(sizeof(struct cgroup_pidlist), GFP_KERNEL);
2491 if (!l) { 2667 if (!l) {
2492 mutex_unlock(&cgrp->pidlist_mutex); 2668 mutex_unlock(&cgrp->pidlist_mutex);
2493 put_pid_ns(ns);
2494 return l; 2669 return l;
2495 } 2670 }
2496 init_rwsem(&l->mutex); 2671 init_rwsem(&l->mutex);
2497 down_write(&l->mutex); 2672 down_write(&l->mutex);
2498 l->key.type = type; 2673 l->key.type = type;
2499 l->key.ns = ns; 2674 l->key.ns = get_pid_ns(ns);
2500 l->use_count = 0; /* don't increment here */ 2675 l->use_count = 0; /* don't increment here */
2501 l->list = NULL; 2676 l->list = NULL;
2502 l->owner = cgrp; 2677 l->owner = cgrp;
@@ -2804,6 +2979,174 @@ static int cgroup_write_notify_on_release(struct cgroup *cgrp,
2804} 2979}
2805 2980
2806/* 2981/*
2982 * Unregister event and free resources.
2983 *
2984 * Gets called from workqueue.
2985 */
2986static void cgroup_event_remove(struct work_struct *work)
2987{
2988 struct cgroup_event *event = container_of(work, struct cgroup_event,
2989 remove);
2990 struct cgroup *cgrp = event->cgrp;
2991
2992 /* TODO: check return code */
2993 event->cft->unregister_event(cgrp, event->cft, event->eventfd);
2994
2995 eventfd_ctx_put(event->eventfd);
2996 kfree(event);
2997 dput(cgrp->dentry);
2998}
2999
3000/*
3001 * Gets called on POLLHUP on eventfd when user closes it.
3002 *
3003 * Called with wqh->lock held and interrupts disabled.
3004 */
3005static int cgroup_event_wake(wait_queue_t *wait, unsigned mode,
3006 int sync, void *key)
3007{
3008 struct cgroup_event *event = container_of(wait,
3009 struct cgroup_event, wait);
3010 struct cgroup *cgrp = event->cgrp;
3011 unsigned long flags = (unsigned long)key;
3012
3013 if (flags & POLLHUP) {
3014 remove_wait_queue_locked(event->wqh, &event->wait);
3015 spin_lock(&cgrp->event_list_lock);
3016 list_del(&event->list);
3017 spin_unlock(&cgrp->event_list_lock);
3018 /*
3019 * We are in atomic context, but cgroup_event_remove() may
3020 * sleep, so we have to call it in workqueue.
3021 */
3022 schedule_work(&event->remove);
3023 }
3024
3025 return 0;
3026}
3027
3028static void cgroup_event_ptable_queue_proc(struct file *file,
3029 wait_queue_head_t *wqh, poll_table *pt)
3030{
3031 struct cgroup_event *event = container_of(pt,
3032 struct cgroup_event, pt);
3033
3034 event->wqh = wqh;
3035 add_wait_queue(wqh, &event->wait);
3036}
3037
3038/*
3039 * Parse input and register new cgroup event handler.
3040 *
3041 * Input must be in format '<event_fd> <control_fd> <args>'.
3042 * Interpretation of args is defined by control file implementation.
3043 */
3044static int cgroup_write_event_control(struct cgroup *cgrp, struct cftype *cft,
3045 const char *buffer)
3046{
3047 struct cgroup_event *event = NULL;
3048 unsigned int efd, cfd;
3049 struct file *efile = NULL;
3050 struct file *cfile = NULL;
3051 char *endp;
3052 int ret;
3053
3054 efd = simple_strtoul(buffer, &endp, 10);
3055 if (*endp != ' ')
3056 return -EINVAL;
3057 buffer = endp + 1;
3058
3059 cfd = simple_strtoul(buffer, &endp, 10);
3060 if ((*endp != ' ') && (*endp != '\0'))
3061 return -EINVAL;
3062 buffer = endp + 1;
3063
3064 event = kzalloc(sizeof(*event), GFP_KERNEL);
3065 if (!event)
3066 return -ENOMEM;
3067 event->cgrp = cgrp;
3068 INIT_LIST_HEAD(&event->list);
3069 init_poll_funcptr(&event->pt, cgroup_event_ptable_queue_proc);
3070 init_waitqueue_func_entry(&event->wait, cgroup_event_wake);
3071 INIT_WORK(&event->remove, cgroup_event_remove);
3072
3073 efile = eventfd_fget(efd);
3074 if (IS_ERR(efile)) {
3075 ret = PTR_ERR(efile);
3076 goto fail;
3077 }
3078
3079 event->eventfd = eventfd_ctx_fileget(efile);
3080 if (IS_ERR(event->eventfd)) {
3081 ret = PTR_ERR(event->eventfd);
3082 goto fail;
3083 }
3084
3085 cfile = fget(cfd);
3086 if (!cfile) {
3087 ret = -EBADF;
3088 goto fail;
3089 }
3090
3091 /* the process need read permission on control file */
3092 ret = file_permission(cfile, MAY_READ);
3093 if (ret < 0)
3094 goto fail;
3095
3096 event->cft = __file_cft(cfile);
3097 if (IS_ERR(event->cft)) {
3098 ret = PTR_ERR(event->cft);
3099 goto fail;
3100 }
3101
3102 if (!event->cft->register_event || !event->cft->unregister_event) {
3103 ret = -EINVAL;
3104 goto fail;
3105 }
3106
3107 ret = event->cft->register_event(cgrp, event->cft,
3108 event->eventfd, buffer);
3109 if (ret)
3110 goto fail;
3111
3112 if (efile->f_op->poll(efile, &event->pt) & POLLHUP) {
3113 event->cft->unregister_event(cgrp, event->cft, event->eventfd);
3114 ret = 0;
3115 goto fail;
3116 }
3117
3118 /*
3119 * Events should be removed after rmdir of cgroup directory, but before
3120 * destroying subsystem state objects. Let's take reference to cgroup
3121 * directory dentry to do that.
3122 */
3123 dget(cgrp->dentry);
3124
3125 spin_lock(&cgrp->event_list_lock);
3126 list_add(&event->list, &cgrp->event_list);
3127 spin_unlock(&cgrp->event_list_lock);
3128
3129 fput(cfile);
3130 fput(efile);
3131
3132 return 0;
3133
3134fail:
3135 if (cfile)
3136 fput(cfile);
3137
3138 if (event && event->eventfd && !IS_ERR(event->eventfd))
3139 eventfd_ctx_put(event->eventfd);
3140
3141 if (!IS_ERR_OR_NULL(efile))
3142 fput(efile);
3143
3144 kfree(event);
3145
3146 return ret;
3147}
3148
3149/*
2807 * for the common functions, 'private' gives the type of file 3150 * for the common functions, 'private' gives the type of file
2808 */ 3151 */
2809/* for hysterical raisins, we can't put this on the older files */ 3152/* for hysterical raisins, we can't put this on the older files */
@@ -2828,6 +3171,11 @@ static struct cftype files[] = {
2828 .read_u64 = cgroup_read_notify_on_release, 3171 .read_u64 = cgroup_read_notify_on_release,
2829 .write_u64 = cgroup_write_notify_on_release, 3172 .write_u64 = cgroup_write_notify_on_release,
2830 }, 3173 },
3174 {
3175 .name = CGROUP_FILE_GENERIC_PREFIX "event_control",
3176 .write_string = cgroup_write_event_control,
3177 .mode = S_IWUGO,
3178 },
2831}; 3179};
2832 3180
2833static struct cftype cft_release_agent = { 3181static struct cftype cft_release_agent = {
@@ -2892,8 +3240,14 @@ static void cgroup_lock_hierarchy(struct cgroupfs_root *root)
2892 /* We need to take each hierarchy_mutex in a consistent order */ 3240 /* We need to take each hierarchy_mutex in a consistent order */
2893 int i; 3241 int i;
2894 3242
3243 /*
3244 * No worry about a race with rebind_subsystems that might mess up the
3245 * locking order, since both parties are under cgroup_mutex.
3246 */
2895 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { 3247 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
2896 struct cgroup_subsys *ss = subsys[i]; 3248 struct cgroup_subsys *ss = subsys[i];
3249 if (ss == NULL)
3250 continue;
2897 if (ss->root == root) 3251 if (ss->root == root)
2898 mutex_lock(&ss->hierarchy_mutex); 3252 mutex_lock(&ss->hierarchy_mutex);
2899 } 3253 }
@@ -2905,6 +3259,8 @@ static void cgroup_unlock_hierarchy(struct cgroupfs_root *root)
2905 3259
2906 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { 3260 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
2907 struct cgroup_subsys *ss = subsys[i]; 3261 struct cgroup_subsys *ss = subsys[i];
3262 if (ss == NULL)
3263 continue;
2908 if (ss->root == root) 3264 if (ss->root == root)
2909 mutex_unlock(&ss->hierarchy_mutex); 3265 mutex_unlock(&ss->hierarchy_mutex);
2910 } 3266 }
@@ -3028,11 +3384,16 @@ static int cgroup_has_css_refs(struct cgroup *cgrp)
3028 * synchronization other than RCU, and the subsystem linked 3384 * synchronization other than RCU, and the subsystem linked
3029 * list isn't RCU-safe */ 3385 * list isn't RCU-safe */
3030 int i; 3386 int i;
3387 /*
3388 * We won't need to lock the subsys array, because the subsystems
3389 * we're concerned about aren't going anywhere since our cgroup root
3390 * has a reference on them.
3391 */
3031 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { 3392 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
3032 struct cgroup_subsys *ss = subsys[i]; 3393 struct cgroup_subsys *ss = subsys[i];
3033 struct cgroup_subsys_state *css; 3394 struct cgroup_subsys_state *css;
3034 /* Skip subsystems not in this hierarchy */ 3395 /* Skip subsystems not present or not in this hierarchy */
3035 if (ss->root != cgrp->root) 3396 if (ss == NULL || ss->root != cgrp->root)
3036 continue; 3397 continue;
3037 css = cgrp->subsys[ss->subsys_id]; 3398 css = cgrp->subsys[ss->subsys_id];
3038 /* When called from check_for_release() it's possible 3399 /* When called from check_for_release() it's possible
@@ -3106,6 +3467,7 @@ static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry)
3106 struct dentry *d; 3467 struct dentry *d;
3107 struct cgroup *parent; 3468 struct cgroup *parent;
3108 DEFINE_WAIT(wait); 3469 DEFINE_WAIT(wait);
3470 struct cgroup_event *event, *tmp;
3109 int ret; 3471 int ret;
3110 3472
3111 /* the vfs holds both inode->i_mutex already */ 3473 /* the vfs holds both inode->i_mutex already */
@@ -3189,6 +3551,20 @@ again:
3189 set_bit(CGRP_RELEASABLE, &parent->flags); 3551 set_bit(CGRP_RELEASABLE, &parent->flags);
3190 check_for_release(parent); 3552 check_for_release(parent);
3191 3553
3554 /*
3555 * Unregister events and notify userspace.
3556 * Notify userspace about cgroup removing only after rmdir of cgroup
3557 * directory to avoid race between userspace and kernelspace
3558 */
3559 spin_lock(&cgrp->event_list_lock);
3560 list_for_each_entry_safe(event, tmp, &cgrp->event_list, list) {
3561 list_del(&event->list);
3562 remove_wait_queue(event->wqh, &event->wait);
3563 eventfd_signal(event->eventfd, 1);
3564 schedule_work(&event->remove);
3565 }
3566 spin_unlock(&cgrp->event_list_lock);
3567
3192 mutex_unlock(&cgroup_mutex); 3568 mutex_unlock(&cgroup_mutex);
3193 return 0; 3569 return 0;
3194} 3570}
@@ -3223,7 +3599,196 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss)
3223 mutex_init(&ss->hierarchy_mutex); 3599 mutex_init(&ss->hierarchy_mutex);
3224 lockdep_set_class(&ss->hierarchy_mutex, &ss->subsys_key); 3600 lockdep_set_class(&ss->hierarchy_mutex, &ss->subsys_key);
3225 ss->active = 1; 3601 ss->active = 1;
3602
3603 /* this function shouldn't be used with modular subsystems, since they
3604 * need to register a subsys_id, among other things */
3605 BUG_ON(ss->module);
3606}
3607
3608/**
3609 * cgroup_load_subsys: load and register a modular subsystem at runtime
3610 * @ss: the subsystem to load
3611 *
3612 * This function should be called in a modular subsystem's initcall. If the
3613 * subsytem is built as a module, it will be assigned a new subsys_id and set
3614 * up for use. If the subsystem is built-in anyway, work is delegated to the
3615 * simpler cgroup_init_subsys.
3616 */
3617int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss)
3618{
3619 int i;
3620 struct cgroup_subsys_state *css;
3621
3622 /* check name and function validity */
3623 if (ss->name == NULL || strlen(ss->name) > MAX_CGROUP_TYPE_NAMELEN ||
3624 ss->create == NULL || ss->destroy == NULL)
3625 return -EINVAL;
3626
3627 /*
3628 * we don't support callbacks in modular subsystems. this check is
3629 * before the ss->module check for consistency; a subsystem that could
3630 * be a module should still have no callbacks even if the user isn't
3631 * compiling it as one.
3632 */
3633 if (ss->fork || ss->exit)
3634 return -EINVAL;
3635
3636 /*
3637 * an optionally modular subsystem is built-in: we want to do nothing,
3638 * since cgroup_init_subsys will have already taken care of it.
3639 */
3640 if (ss->module == NULL) {
3641 /* a few sanity checks */
3642 BUG_ON(ss->subsys_id >= CGROUP_BUILTIN_SUBSYS_COUNT);
3643 BUG_ON(subsys[ss->subsys_id] != ss);
3644 return 0;
3645 }
3646
3647 /*
3648 * need to register a subsys id before anything else - for example,
3649 * init_cgroup_css needs it.
3650 */
3651 mutex_lock(&cgroup_mutex);
3652 /* find the first empty slot in the array */
3653 for (i = CGROUP_BUILTIN_SUBSYS_COUNT; i < CGROUP_SUBSYS_COUNT; i++) {
3654 if (subsys[i] == NULL)
3655 break;
3656 }
3657 if (i == CGROUP_SUBSYS_COUNT) {
3658 /* maximum number of subsystems already registered! */
3659 mutex_unlock(&cgroup_mutex);
3660 return -EBUSY;
3661 }
3662 /* assign ourselves the subsys_id */
3663 ss->subsys_id = i;
3664 subsys[i] = ss;
3665
3666 /*
3667 * no ss->create seems to need anything important in the ss struct, so
3668 * this can happen first (i.e. before the rootnode attachment).
3669 */
3670 css = ss->create(ss, dummytop);
3671 if (IS_ERR(css)) {
3672 /* failure case - need to deassign the subsys[] slot. */
3673 subsys[i] = NULL;
3674 mutex_unlock(&cgroup_mutex);
3675 return PTR_ERR(css);
3676 }
3677
3678 list_add(&ss->sibling, &rootnode.subsys_list);
3679 ss->root = &rootnode;
3680
3681 /* our new subsystem will be attached to the dummy hierarchy. */
3682 init_cgroup_css(css, ss, dummytop);
3683 /* init_idr must be after init_cgroup_css because it sets css->id. */
3684 if (ss->use_id) {
3685 int ret = cgroup_init_idr(ss, css);
3686 if (ret) {
3687 dummytop->subsys[ss->subsys_id] = NULL;
3688 ss->destroy(ss, dummytop);
3689 subsys[i] = NULL;
3690 mutex_unlock(&cgroup_mutex);
3691 return ret;
3692 }
3693 }
3694
3695 /*
3696 * Now we need to entangle the css into the existing css_sets. unlike
3697 * in cgroup_init_subsys, there are now multiple css_sets, so each one
3698 * will need a new pointer to it; done by iterating the css_set_table.
3699 * furthermore, modifying the existing css_sets will corrupt the hash
3700 * table state, so each changed css_set will need its hash recomputed.
3701 * this is all done under the css_set_lock.
3702 */
3703 write_lock(&css_set_lock);
3704 for (i = 0; i < CSS_SET_TABLE_SIZE; i++) {
3705 struct css_set *cg;
3706 struct hlist_node *node, *tmp;
3707 struct hlist_head *bucket = &css_set_table[i], *new_bucket;
3708
3709 hlist_for_each_entry_safe(cg, node, tmp, bucket, hlist) {
3710 /* skip entries that we already rehashed */
3711 if (cg->subsys[ss->subsys_id])
3712 continue;
3713 /* remove existing entry */
3714 hlist_del(&cg->hlist);
3715 /* set new value */
3716 cg->subsys[ss->subsys_id] = css;
3717 /* recompute hash and restore entry */
3718 new_bucket = css_set_hash(cg->subsys);
3719 hlist_add_head(&cg->hlist, new_bucket);
3720 }
3721 }
3722 write_unlock(&css_set_lock);
3723
3724 mutex_init(&ss->hierarchy_mutex);
3725 lockdep_set_class(&ss->hierarchy_mutex, &ss->subsys_key);
3726 ss->active = 1;
3727
3728 /* success! */
3729 mutex_unlock(&cgroup_mutex);
3730 return 0;
3226} 3731}
3732EXPORT_SYMBOL_GPL(cgroup_load_subsys);
3733
3734/**
3735 * cgroup_unload_subsys: unload a modular subsystem
3736 * @ss: the subsystem to unload
3737 *
3738 * This function should be called in a modular subsystem's exitcall. When this
3739 * function is invoked, the refcount on the subsystem's module will be 0, so
3740 * the subsystem will not be attached to any hierarchy.
3741 */
3742void cgroup_unload_subsys(struct cgroup_subsys *ss)
3743{
3744 struct cg_cgroup_link *link;
3745 struct hlist_head *hhead;
3746
3747 BUG_ON(ss->module == NULL);
3748
3749 /*
3750 * we shouldn't be called if the subsystem is in use, and the use of
3751 * try_module_get in parse_cgroupfs_options should ensure that it
3752 * doesn't start being used while we're killing it off.
3753 */
3754 BUG_ON(ss->root != &rootnode);
3755
3756 mutex_lock(&cgroup_mutex);
3757 /* deassign the subsys_id */
3758 BUG_ON(ss->subsys_id < CGROUP_BUILTIN_SUBSYS_COUNT);
3759 subsys[ss->subsys_id] = NULL;
3760
3761 /* remove subsystem from rootnode's list of subsystems */
3762 list_del(&ss->sibling);
3763
3764 /*
3765 * disentangle the css from all css_sets attached to the dummytop. as
3766 * in loading, we need to pay our respects to the hashtable gods.
3767 */
3768 write_lock(&css_set_lock);
3769 list_for_each_entry(link, &dummytop->css_sets, cgrp_link_list) {
3770 struct css_set *cg = link->cg;
3771
3772 hlist_del(&cg->hlist);
3773 BUG_ON(!cg->subsys[ss->subsys_id]);
3774 cg->subsys[ss->subsys_id] = NULL;
3775 hhead = css_set_hash(cg->subsys);
3776 hlist_add_head(&cg->hlist, hhead);
3777 }
3778 write_unlock(&css_set_lock);
3779
3780 /*
3781 * remove subsystem's css from the dummytop and free it - need to free
3782 * before marking as null because ss->destroy needs the cgrp->subsys
3783 * pointer to find their state. note that this also takes care of
3784 * freeing the css_id.
3785 */
3786 ss->destroy(ss, dummytop);
3787 dummytop->subsys[ss->subsys_id] = NULL;
3788
3789 mutex_unlock(&cgroup_mutex);
3790}
3791EXPORT_SYMBOL_GPL(cgroup_unload_subsys);
3227 3792
3228/** 3793/**
3229 * cgroup_init_early - cgroup initialization at system boot 3794 * cgroup_init_early - cgroup initialization at system boot
@@ -3253,7 +3818,8 @@ int __init cgroup_init_early(void)
3253 for (i = 0; i < CSS_SET_TABLE_SIZE; i++) 3818 for (i = 0; i < CSS_SET_TABLE_SIZE; i++)
3254 INIT_HLIST_HEAD(&css_set_table[i]); 3819 INIT_HLIST_HEAD(&css_set_table[i]);
3255 3820
3256 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { 3821 /* at bootup time, we don't worry about modular subsystems */
3822 for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) {
3257 struct cgroup_subsys *ss = subsys[i]; 3823 struct cgroup_subsys *ss = subsys[i];
3258 3824
3259 BUG_ON(!ss->name); 3825 BUG_ON(!ss->name);
@@ -3288,12 +3854,13 @@ int __init cgroup_init(void)
3288 if (err) 3854 if (err)
3289 return err; 3855 return err;
3290 3856
3291 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { 3857 /* at bootup time, we don't worry about modular subsystems */
3858 for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) {
3292 struct cgroup_subsys *ss = subsys[i]; 3859 struct cgroup_subsys *ss = subsys[i];
3293 if (!ss->early_init) 3860 if (!ss->early_init)
3294 cgroup_init_subsys(ss); 3861 cgroup_init_subsys(ss);
3295 if (ss->use_id) 3862 if (ss->use_id)
3296 cgroup_subsys_init_idr(ss); 3863 cgroup_init_idr(ss, init_css_set.subsys[ss->subsys_id]);
3297 } 3864 }
3298 3865
3299 /* Add init_css_set to the hash table */ 3866 /* Add init_css_set to the hash table */
@@ -3397,9 +3964,16 @@ static int proc_cgroupstats_show(struct seq_file *m, void *v)
3397 int i; 3964 int i;
3398 3965
3399 seq_puts(m, "#subsys_name\thierarchy\tnum_cgroups\tenabled\n"); 3966 seq_puts(m, "#subsys_name\thierarchy\tnum_cgroups\tenabled\n");
3967 /*
3968 * ideally we don't want subsystems moving around while we do this.
3969 * cgroup_mutex is also necessary to guarantee an atomic snapshot of
3970 * subsys/hierarchy state.
3971 */
3400 mutex_lock(&cgroup_mutex); 3972 mutex_lock(&cgroup_mutex);
3401 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { 3973 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
3402 struct cgroup_subsys *ss = subsys[i]; 3974 struct cgroup_subsys *ss = subsys[i];
3975 if (ss == NULL)
3976 continue;
3403 seq_printf(m, "%s\t%d\t%d\t%d\n", 3977 seq_printf(m, "%s\t%d\t%d\t%d\n",
3404 ss->name, ss->root->hierarchy_id, 3978 ss->name, ss->root->hierarchy_id,
3405 ss->root->number_of_cgroups, !ss->disabled); 3979 ss->root->number_of_cgroups, !ss->disabled);
@@ -3457,7 +4031,12 @@ void cgroup_fork_callbacks(struct task_struct *child)
3457{ 4031{
3458 if (need_forkexit_callback) { 4032 if (need_forkexit_callback) {
3459 int i; 4033 int i;
3460 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { 4034 /*
4035 * forkexit callbacks are only supported for builtin
4036 * subsystems, and the builtin section of the subsys array is
4037 * immutable, so we don't need to lock the subsys array here.
4038 */
4039 for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) {
3461 struct cgroup_subsys *ss = subsys[i]; 4040 struct cgroup_subsys *ss = subsys[i];
3462 if (ss->fork) 4041 if (ss->fork)
3463 ss->fork(ss, child); 4042 ss->fork(ss, child);
@@ -3526,7 +4105,11 @@ void cgroup_exit(struct task_struct *tsk, int run_callbacks)
3526 struct css_set *cg; 4105 struct css_set *cg;
3527 4106
3528 if (run_callbacks && need_forkexit_callback) { 4107 if (run_callbacks && need_forkexit_callback) {
3529 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { 4108 /*
4109 * modular subsystems can't use callbacks, so no need to lock
4110 * the subsys array
4111 */
4112 for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) {
3530 struct cgroup_subsys *ss = subsys[i]; 4113 struct cgroup_subsys *ss = subsys[i];
3531 if (ss->exit) 4114 if (ss->exit)
3532 ss->exit(ss, tsk); 4115 ss->exit(ss, tsk);
@@ -3720,12 +4303,13 @@ static void check_for_release(struct cgroup *cgrp)
3720 } 4303 }
3721} 4304}
3722 4305
3723void __css_put(struct cgroup_subsys_state *css) 4306/* Caller must verify that the css is not for root cgroup */
4307void __css_put(struct cgroup_subsys_state *css, int count)
3724{ 4308{
3725 struct cgroup *cgrp = css->cgroup; 4309 struct cgroup *cgrp = css->cgroup;
3726 int val; 4310 int val;
3727 rcu_read_lock(); 4311 rcu_read_lock();
3728 val = atomic_dec_return(&css->refcnt); 4312 val = atomic_sub_return(count, &css->refcnt);
3729 if (val == 1) { 4313 if (val == 1) {
3730 if (notify_on_release(cgrp)) { 4314 if (notify_on_release(cgrp)) {
3731 set_bit(CGRP_RELEASABLE, &cgrp->flags); 4315 set_bit(CGRP_RELEASABLE, &cgrp->flags);
@@ -3736,6 +4320,7 @@ void __css_put(struct cgroup_subsys_state *css)
3736 rcu_read_unlock(); 4320 rcu_read_unlock();
3737 WARN_ON_ONCE(val < 1); 4321 WARN_ON_ONCE(val < 1);
3738} 4322}
4323EXPORT_SYMBOL_GPL(__css_put);
3739 4324
3740/* 4325/*
3741 * Notify userspace when a cgroup is released, by running the 4326 * Notify userspace when a cgroup is released, by running the
@@ -3817,8 +4402,11 @@ static int __init cgroup_disable(char *str)
3817 while ((token = strsep(&str, ",")) != NULL) { 4402 while ((token = strsep(&str, ",")) != NULL) {
3818 if (!*token) 4403 if (!*token)
3819 continue; 4404 continue;
3820 4405 /*
3821 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { 4406 * cgroup_disable, being at boot time, can't know about module
4407 * subsystems, so we don't worry about them.
4408 */
4409 for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) {
3822 struct cgroup_subsys *ss = subsys[i]; 4410 struct cgroup_subsys *ss = subsys[i];
3823 4411
3824 if (!strcmp(token, ss->name)) { 4412 if (!strcmp(token, ss->name)) {
@@ -3848,6 +4436,7 @@ unsigned short css_id(struct cgroup_subsys_state *css)
3848 return cssid->id; 4436 return cssid->id;
3849 return 0; 4437 return 0;
3850} 4438}
4439EXPORT_SYMBOL_GPL(css_id);
3851 4440
3852unsigned short css_depth(struct cgroup_subsys_state *css) 4441unsigned short css_depth(struct cgroup_subsys_state *css)
3853{ 4442{
@@ -3857,6 +4446,7 @@ unsigned short css_depth(struct cgroup_subsys_state *css)
3857 return cssid->depth; 4446 return cssid->depth;
3858 return 0; 4447 return 0;
3859} 4448}
4449EXPORT_SYMBOL_GPL(css_depth);
3860 4450
3861bool css_is_ancestor(struct cgroup_subsys_state *child, 4451bool css_is_ancestor(struct cgroup_subsys_state *child,
3862 const struct cgroup_subsys_state *root) 4452 const struct cgroup_subsys_state *root)
@@ -3893,6 +4483,7 @@ void free_css_id(struct cgroup_subsys *ss, struct cgroup_subsys_state *css)
3893 spin_unlock(&ss->id_lock); 4483 spin_unlock(&ss->id_lock);
3894 call_rcu(&id->rcu_head, __free_css_id_cb); 4484 call_rcu(&id->rcu_head, __free_css_id_cb);
3895} 4485}
4486EXPORT_SYMBOL_GPL(free_css_id);
3896 4487
3897/* 4488/*
3898 * This is called by init or create(). Then, calls to this function are 4489 * This is called by init or create(). Then, calls to this function are
@@ -3942,15 +4533,14 @@ err_out:
3942 4533
3943} 4534}
3944 4535
3945static int __init cgroup_subsys_init_idr(struct cgroup_subsys *ss) 4536static int __init_or_module cgroup_init_idr(struct cgroup_subsys *ss,
4537 struct cgroup_subsys_state *rootcss)
3946{ 4538{
3947 struct css_id *newid; 4539 struct css_id *newid;
3948 struct cgroup_subsys_state *rootcss;
3949 4540
3950 spin_lock_init(&ss->id_lock); 4541 spin_lock_init(&ss->id_lock);
3951 idr_init(&ss->idr); 4542 idr_init(&ss->idr);
3952 4543
3953 rootcss = init_css_set.subsys[ss->subsys_id];
3954 newid = get_new_cssid(ss, 0); 4544 newid = get_new_cssid(ss, 0);
3955 if (IS_ERR(newid)) 4545 if (IS_ERR(newid))
3956 return PTR_ERR(newid); 4546 return PTR_ERR(newid);
@@ -4010,6 +4600,7 @@ struct cgroup_subsys_state *css_lookup(struct cgroup_subsys *ss, int id)
4010 4600
4011 return rcu_dereference(cssid->css); 4601 return rcu_dereference(cssid->css);
4012} 4602}
4603EXPORT_SYMBOL_GPL(css_lookup);
4013 4604
4014/** 4605/**
4015 * css_get_next - lookup next cgroup under specified hierarchy. 4606 * css_get_next - lookup next cgroup under specified hierarchy.
diff --git a/kernel/exit.c b/kernel/exit.c
index ce1e48c2d93d..cce59cb5ee6a 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -87,7 +87,7 @@ static void __exit_signal(struct task_struct *tsk)
87 87
88 sighand = rcu_dereference_check(tsk->sighand, 88 sighand = rcu_dereference_check(tsk->sighand,
89 rcu_read_lock_held() || 89 rcu_read_lock_held() ||
90 lockdep_is_held(&tasklist_lock)); 90 lockdep_tasklist_lock_is_held());
91 spin_lock(&sighand->siglock); 91 spin_lock(&sighand->siglock);
92 92
93 posix_cpu_timers_exit(tsk); 93 posix_cpu_timers_exit(tsk);
diff --git a/kernel/fork.c b/kernel/fork.c
index b0ec34abc0bb..4799c5f0e6d0 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -86,7 +86,14 @@ int max_threads; /* tunable limit on nr_threads */
86DEFINE_PER_CPU(unsigned long, process_counts) = 0; 86DEFINE_PER_CPU(unsigned long, process_counts) = 0;
87 87
88__cacheline_aligned DEFINE_RWLOCK(tasklist_lock); /* outer */ 88__cacheline_aligned DEFINE_RWLOCK(tasklist_lock); /* outer */
89EXPORT_SYMBOL_GPL(tasklist_lock); 89
90#ifdef CONFIG_PROVE_RCU
91int lockdep_tasklist_lock_is_held(void)
92{
93 return lockdep_is_held(&tasklist_lock);
94}
95EXPORT_SYMBOL_GPL(lockdep_tasklist_lock_is_held);
96#endif /* #ifdef CONFIG_PROVE_RCU */
90 97
91int nr_processes(void) 98int nr_processes(void)
92{ 99{
@@ -833,17 +840,6 @@ static void posix_cpu_timers_init_group(struct signal_struct *sig)
833 /* Thread group counters. */ 840 /* Thread group counters. */
834 thread_group_cputime_init(sig); 841 thread_group_cputime_init(sig);
835 842
836 /* Expiration times and increments. */
837 sig->it[CPUCLOCK_PROF].expires = cputime_zero;
838 sig->it[CPUCLOCK_PROF].incr = cputime_zero;
839 sig->it[CPUCLOCK_VIRT].expires = cputime_zero;
840 sig->it[CPUCLOCK_VIRT].incr = cputime_zero;
841
842 /* Cached expiration times. */
843 sig->cputime_expires.prof_exp = cputime_zero;
844 sig->cputime_expires.virt_exp = cputime_zero;
845 sig->cputime_expires.sched_exp = 0;
846
847 cpu_limit = ACCESS_ONCE(sig->rlim[RLIMIT_CPU].rlim_cur); 843 cpu_limit = ACCESS_ONCE(sig->rlim[RLIMIT_CPU].rlim_cur);
848 if (cpu_limit != RLIM_INFINITY) { 844 if (cpu_limit != RLIM_INFINITY) {
849 sig->cputime_expires.prof_exp = secs_to_cputime(cpu_limit); 845 sig->cputime_expires.prof_exp = secs_to_cputime(cpu_limit);
@@ -863,7 +859,7 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
863 if (clone_flags & CLONE_THREAD) 859 if (clone_flags & CLONE_THREAD)
864 return 0; 860 return 0;
865 861
866 sig = kmem_cache_alloc(signal_cachep, GFP_KERNEL); 862 sig = kmem_cache_zalloc(signal_cachep, GFP_KERNEL);
867 tsk->signal = sig; 863 tsk->signal = sig;
868 if (!sig) 864 if (!sig)
869 return -ENOMEM; 865 return -ENOMEM;
@@ -871,46 +867,21 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
871 atomic_set(&sig->count, 1); 867 atomic_set(&sig->count, 1);
872 atomic_set(&sig->live, 1); 868 atomic_set(&sig->live, 1);
873 init_waitqueue_head(&sig->wait_chldexit); 869 init_waitqueue_head(&sig->wait_chldexit);
874 sig->flags = 0;
875 if (clone_flags & CLONE_NEWPID) 870 if (clone_flags & CLONE_NEWPID)
876 sig->flags |= SIGNAL_UNKILLABLE; 871 sig->flags |= SIGNAL_UNKILLABLE;
877 sig->group_exit_code = 0;
878 sig->group_exit_task = NULL;
879 sig->group_stop_count = 0;
880 sig->curr_target = tsk; 872 sig->curr_target = tsk;
881 init_sigpending(&sig->shared_pending); 873 init_sigpending(&sig->shared_pending);
882 INIT_LIST_HEAD(&sig->posix_timers); 874 INIT_LIST_HEAD(&sig->posix_timers);
883 875
884 hrtimer_init(&sig->real_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); 876 hrtimer_init(&sig->real_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
885 sig->it_real_incr.tv64 = 0;
886 sig->real_timer.function = it_real_fn; 877 sig->real_timer.function = it_real_fn;
887 878
888 sig->leader = 0; /* session leadership doesn't inherit */
889 sig->tty_old_pgrp = NULL;
890 sig->tty = NULL;
891
892 sig->utime = sig->stime = sig->cutime = sig->cstime = cputime_zero;
893 sig->gtime = cputime_zero;
894 sig->cgtime = cputime_zero;
895#ifndef CONFIG_VIRT_CPU_ACCOUNTING
896 sig->prev_utime = sig->prev_stime = cputime_zero;
897#endif
898 sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0;
899 sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0;
900 sig->inblock = sig->oublock = sig->cinblock = sig->coublock = 0;
901 sig->maxrss = sig->cmaxrss = 0;
902 task_io_accounting_init(&sig->ioac);
903 sig->sum_sched_runtime = 0;
904 taskstats_tgid_init(sig);
905
906 task_lock(current->group_leader); 879 task_lock(current->group_leader);
907 memcpy(sig->rlim, current->signal->rlim, sizeof sig->rlim); 880 memcpy(sig->rlim, current->signal->rlim, sizeof sig->rlim);
908 task_unlock(current->group_leader); 881 task_unlock(current->group_leader);
909 882
910 posix_cpu_timers_init_group(sig); 883 posix_cpu_timers_init_group(sig);
911 884
912 acct_init_pacct(&sig->pacct);
913
914 tty_audit_fork(sig); 885 tty_audit_fork(sig);
915 886
916 sig->oom_adj = current->signal->oom_adj; 887 sig->oom_adj = current->signal->oom_adj;
diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c
index 967e66143e11..03808ed342a6 100644
--- a/kernel/hw_breakpoint.c
+++ b/kernel/hw_breakpoint.c
@@ -413,17 +413,17 @@ EXPORT_SYMBOL_GPL(unregister_hw_breakpoint);
413 * 413 *
414 * @return a set of per_cpu pointers to perf events 414 * @return a set of per_cpu pointers to perf events
415 */ 415 */
416struct perf_event ** 416struct perf_event * __percpu *
417register_wide_hw_breakpoint(struct perf_event_attr *attr, 417register_wide_hw_breakpoint(struct perf_event_attr *attr,
418 perf_overflow_handler_t triggered) 418 perf_overflow_handler_t triggered)
419{ 419{
420 struct perf_event **cpu_events, **pevent, *bp; 420 struct perf_event * __percpu *cpu_events, **pevent, *bp;
421 long err; 421 long err;
422 int cpu; 422 int cpu;
423 423
424 cpu_events = alloc_percpu(typeof(*cpu_events)); 424 cpu_events = alloc_percpu(typeof(*cpu_events));
425 if (!cpu_events) 425 if (!cpu_events)
426 return ERR_PTR(-ENOMEM); 426 return (void __percpu __force *)ERR_PTR(-ENOMEM);
427 427
428 get_online_cpus(); 428 get_online_cpus();
429 for_each_online_cpu(cpu) { 429 for_each_online_cpu(cpu) {
@@ -451,7 +451,7 @@ fail:
451 put_online_cpus(); 451 put_online_cpus();
452 452
453 free_percpu(cpu_events); 453 free_percpu(cpu_events);
454 return ERR_PTR(err); 454 return (void __percpu __force *)ERR_PTR(err);
455} 455}
456EXPORT_SYMBOL_GPL(register_wide_hw_breakpoint); 456EXPORT_SYMBOL_GPL(register_wide_hw_breakpoint);
457 457
@@ -459,7 +459,7 @@ EXPORT_SYMBOL_GPL(register_wide_hw_breakpoint);
459 * unregister_wide_hw_breakpoint - unregister a wide breakpoint in the kernel 459 * unregister_wide_hw_breakpoint - unregister a wide breakpoint in the kernel
460 * @cpu_events: the per cpu set of events to unregister 460 * @cpu_events: the per cpu set of events to unregister
461 */ 461 */
462void unregister_wide_hw_breakpoint(struct perf_event **cpu_events) 462void unregister_wide_hw_breakpoint(struct perf_event * __percpu *cpu_events)
463{ 463{
464 int cpu; 464 int cpu;
465 struct perf_event **pevent; 465 struct perf_event **pevent;
@@ -489,5 +489,4 @@ struct pmu perf_ops_bp = {
489 .enable = arch_install_hw_breakpoint, 489 .enable = arch_install_hw_breakpoint,
490 .disable = arch_uninstall_hw_breakpoint, 490 .disable = arch_uninstall_hw_breakpoint,
491 .read = hw_breakpoint_pmu_read, 491 .read = hw_breakpoint_pmu_read,
492 .unthrottle = hw_breakpoint_pmu_unthrottle
493}; 492};
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index d70394f12ee9..42ec11b2af8a 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -554,7 +554,7 @@ out:
554 * signal. The occurence is latched into the irq controller hardware 554 * signal. The occurence is latched into the irq controller hardware
555 * and must be acked in order to be reenabled. After the ack another 555 * and must be acked in order to be reenabled. After the ack another
556 * interrupt can happen on the same source even before the first one 556 * interrupt can happen on the same source even before the first one
557 * is handled by the assosiacted event handler. If this happens it 557 * is handled by the associated event handler. If this happens it
558 * might be necessary to disable (mask) the interrupt depending on the 558 * might be necessary to disable (mask) the interrupt depending on the
559 * controller hardware. This requires to reenable the interrupt inside 559 * controller hardware. This requires to reenable the interrupt inside
560 * of the loop which handles the interrupts which have arrived while 560 * of the loop which handles the interrupts which have arrived while
diff --git a/kernel/irq/devres.c b/kernel/irq/devres.c
index d06df9c41cba..1ef4ffcdfa55 100644
--- a/kernel/irq/devres.c
+++ b/kernel/irq/devres.c
@@ -42,7 +42,7 @@ static int devm_irq_match(struct device *dev, void *res, void *data)
42 * automatically freed on driver detach. 42 * automatically freed on driver detach.
43 * 43 *
44 * If an IRQ allocated with this function needs to be freed 44 * If an IRQ allocated with this function needs to be freed
45 * separately, dev_free_irq() must be used. 45 * separately, devm_free_irq() must be used.
46 */ 46 */
47int devm_request_threaded_irq(struct device *dev, unsigned int irq, 47int devm_request_threaded_irq(struct device *dev, unsigned int irq,
48 irq_handler_t handler, irq_handler_t thread_fn, 48 irq_handler_t handler, irq_handler_t thread_fn,
@@ -81,7 +81,7 @@ EXPORT_SYMBOL(devm_request_threaded_irq);
81 * Except for the extra @dev argument, this function takes the 81 * Except for the extra @dev argument, this function takes the
82 * same arguments and performs the same function as free_irq(). 82 * same arguments and performs the same function as free_irq().
83 * This function instead of free_irq() should be used to manually 83 * This function instead of free_irq() should be used to manually
84 * free IRQs allocated with dev_request_irq(). 84 * free IRQs allocated with devm_request_irq().
85 */ 85 */
86void devm_free_irq(struct device *dev, unsigned int irq, void *dev_id) 86void devm_free_irq(struct device *dev, unsigned int irq, void *dev_id)
87{ 87{
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index fa034d29cf73..0ed46f3e51e9 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -259,7 +259,8 @@ static void __kprobes __free_insn_slot(struct kprobe_insn_cache *c,
259 struct kprobe_insn_page *kip; 259 struct kprobe_insn_page *kip;
260 260
261 list_for_each_entry(kip, &c->pages, list) { 261 list_for_each_entry(kip, &c->pages, list) {
262 long idx = ((long)slot - (long)kip->insns) / c->insn_size; 262 long idx = ((long)slot - (long)kip->insns) /
263 (c->insn_size * sizeof(kprobe_opcode_t));
263 if (idx >= 0 && idx < slots_per_page(c)) { 264 if (idx >= 0 && idx < slots_per_page(c)) {
264 WARN_ON(kip->slot_used[idx] != SLOT_USED); 265 WARN_ON(kip->slot_used[idx] != SLOT_USED);
265 if (dirty) { 266 if (dirty) {
diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c
index 6b1ccc3f0205..21fe3c426948 100644
--- a/kernel/ksysfs.c
+++ b/kernel/ksysfs.c
@@ -33,7 +33,7 @@ static ssize_t uevent_seqnum_show(struct kobject *kobj,
33} 33}
34KERNEL_ATTR_RO(uevent_seqnum); 34KERNEL_ATTR_RO(uevent_seqnum);
35 35
36/* uevent helper program, used during early boo */ 36/* uevent helper program, used during early boot */
37static ssize_t uevent_helper_show(struct kobject *kobj, 37static ssize_t uevent_helper_show(struct kobject *kobj,
38 struct kobj_attribute *attr, char *buf) 38 struct kobj_attribute *attr, char *buf)
39{ 39{
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 0c30d0455de1..c927a549db2c 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -3211,8 +3211,6 @@ void lock_acquire(struct lockdep_map *lock, unsigned int subclass,
3211{ 3211{
3212 unsigned long flags; 3212 unsigned long flags;
3213 3213
3214 trace_lock_acquire(lock, subclass, trylock, read, check, nest_lock, ip);
3215
3216 if (unlikely(current->lockdep_recursion)) 3214 if (unlikely(current->lockdep_recursion))
3217 return; 3215 return;
3218 3216
@@ -3220,6 +3218,7 @@ void lock_acquire(struct lockdep_map *lock, unsigned int subclass,
3220 check_flags(flags); 3218 check_flags(flags);
3221 3219
3222 current->lockdep_recursion = 1; 3220 current->lockdep_recursion = 1;
3221 trace_lock_acquire(lock, subclass, trylock, read, check, nest_lock, ip);
3223 __lock_acquire(lock, subclass, trylock, read, check, 3222 __lock_acquire(lock, subclass, trylock, read, check,
3224 irqs_disabled_flags(flags), nest_lock, ip, 0); 3223 irqs_disabled_flags(flags), nest_lock, ip, 0);
3225 current->lockdep_recursion = 0; 3224 current->lockdep_recursion = 0;
@@ -3232,14 +3231,13 @@ void lock_release(struct lockdep_map *lock, int nested,
3232{ 3231{
3233 unsigned long flags; 3232 unsigned long flags;
3234 3233
3235 trace_lock_release(lock, nested, ip);
3236
3237 if (unlikely(current->lockdep_recursion)) 3234 if (unlikely(current->lockdep_recursion))
3238 return; 3235 return;
3239 3236
3240 raw_local_irq_save(flags); 3237 raw_local_irq_save(flags);
3241 check_flags(flags); 3238 check_flags(flags);
3242 current->lockdep_recursion = 1; 3239 current->lockdep_recursion = 1;
3240 trace_lock_release(lock, nested, ip);
3243 __lock_release(lock, nested, ip); 3241 __lock_release(lock, nested, ip);
3244 current->lockdep_recursion = 0; 3242 current->lockdep_recursion = 0;
3245 raw_local_irq_restore(flags); 3243 raw_local_irq_restore(flags);
@@ -3413,8 +3411,6 @@ void lock_contended(struct lockdep_map *lock, unsigned long ip)
3413{ 3411{
3414 unsigned long flags; 3412 unsigned long flags;
3415 3413
3416 trace_lock_contended(lock, ip);
3417
3418 if (unlikely(!lock_stat)) 3414 if (unlikely(!lock_stat))
3419 return; 3415 return;
3420 3416
@@ -3424,6 +3420,7 @@ void lock_contended(struct lockdep_map *lock, unsigned long ip)
3424 raw_local_irq_save(flags); 3420 raw_local_irq_save(flags);
3425 check_flags(flags); 3421 check_flags(flags);
3426 current->lockdep_recursion = 1; 3422 current->lockdep_recursion = 1;
3423 trace_lock_contended(lock, ip);
3427 __lock_contended(lock, ip); 3424 __lock_contended(lock, ip);
3428 current->lockdep_recursion = 0; 3425 current->lockdep_recursion = 0;
3429 raw_local_irq_restore(flags); 3426 raw_local_irq_restore(flags);
@@ -3822,6 +3819,7 @@ void lockdep_rcu_dereference(const char *file, const int line)
3822 printk("%s:%d invoked rcu_dereference_check() without protection!\n", 3819 printk("%s:%d invoked rcu_dereference_check() without protection!\n",
3823 file, line); 3820 file, line);
3824 printk("\nother info that might help us debug this:\n\n"); 3821 printk("\nother info that might help us debug this:\n\n");
3822 printk("\nrcu_scheduler_active = %d, debug_locks = %d\n", rcu_scheduler_active, debug_locks);
3825 lockdep_print_held_locks(curr); 3823 lockdep_print_held_locks(curr);
3826 printk("\nstack backtrace:\n"); 3824 printk("\nstack backtrace:\n");
3827 dump_stack(); 3825 dump_stack();
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
index 09b4ff9711b2..2ab67233ee8f 100644
--- a/kernel/nsproxy.c
+++ b/kernel/nsproxy.c
@@ -24,7 +24,18 @@
24 24
25static struct kmem_cache *nsproxy_cachep; 25static struct kmem_cache *nsproxy_cachep;
26 26
27struct nsproxy init_nsproxy = INIT_NSPROXY(init_nsproxy); 27struct nsproxy init_nsproxy = {
28 .count = ATOMIC_INIT(1),
29 .uts_ns = &init_uts_ns,
30#if defined(CONFIG_POSIX_MQUEUE) || defined(CONFIG_SYSVIPC)
31 .ipc_ns = &init_ipc_ns,
32#endif
33 .mnt_ns = NULL,
34 .pid_ns = &init_pid_ns,
35#ifdef CONFIG_NET
36 .net_ns = &init_net,
37#endif
38};
28 39
29static inline struct nsproxy *create_nsproxy(void) 40static inline struct nsproxy *create_nsproxy(void)
30{ 41{
diff --git a/kernel/params.c b/kernel/params.c
index d55a53ec9234..0b30ecd53a52 100644
--- a/kernel/params.c
+++ b/kernel/params.c
@@ -401,8 +401,8 @@ int param_get_string(char *buffer, struct kernel_param *kp)
401} 401}
402 402
403/* sysfs output in /sys/modules/XYZ/parameters/ */ 403/* sysfs output in /sys/modules/XYZ/parameters/ */
404#define to_module_attr(n) container_of(n, struct module_attribute, attr); 404#define to_module_attr(n) container_of(n, struct module_attribute, attr)
405#define to_module_kobject(n) container_of(n, struct module_kobject, kobj); 405#define to_module_kobject(n) container_of(n, struct module_kobject, kobj)
406 406
407extern struct kernel_param __start___param[], __stop___param[]; 407extern struct kernel_param __start___param[], __stop___param[];
408 408
@@ -420,7 +420,7 @@ struct module_param_attrs
420}; 420};
421 421
422#ifdef CONFIG_SYSFS 422#ifdef CONFIG_SYSFS
423#define to_param_attr(n) container_of(n, struct param_attribute, mattr); 423#define to_param_attr(n) container_of(n, struct param_attribute, mattr)
424 424
425static ssize_t param_attr_show(struct module_attribute *mattr, 425static ssize_t param_attr_show(struct module_attribute *mattr,
426 struct module *mod, char *buf) 426 struct module *mod, char *buf)
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index f40560b86544..574ee58a3046 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -56,21 +56,6 @@ static atomic_t nr_task_events __read_mostly;
56 */ 56 */
57int sysctl_perf_event_paranoid __read_mostly = 1; 57int sysctl_perf_event_paranoid __read_mostly = 1;
58 58
59static inline bool perf_paranoid_tracepoint_raw(void)
60{
61 return sysctl_perf_event_paranoid > -1;
62}
63
64static inline bool perf_paranoid_cpu(void)
65{
66 return sysctl_perf_event_paranoid > 0;
67}
68
69static inline bool perf_paranoid_kernel(void)
70{
71 return sysctl_perf_event_paranoid > 1;
72}
73
74int sysctl_perf_event_mlock __read_mostly = 512; /* 'free' kb per user */ 59int sysctl_perf_event_mlock __read_mostly = 512; /* 'free' kb per user */
75 60
76/* 61/*
@@ -96,10 +81,6 @@ extern __weak const struct pmu *hw_perf_event_init(struct perf_event *event)
96void __weak hw_perf_disable(void) { barrier(); } 81void __weak hw_perf_disable(void) { barrier(); }
97void __weak hw_perf_enable(void) { barrier(); } 82void __weak hw_perf_enable(void) { barrier(); }
98 83
99void __weak hw_perf_event_setup(int cpu) { barrier(); }
100void __weak hw_perf_event_setup_online(int cpu) { barrier(); }
101void __weak hw_perf_event_setup_offline(int cpu) { barrier(); }
102
103int __weak 84int __weak
104hw_perf_group_sched_in(struct perf_event *group_leader, 85hw_perf_group_sched_in(struct perf_event *group_leader,
105 struct perf_cpu_context *cpuctx, 86 struct perf_cpu_context *cpuctx,
@@ -112,25 +93,15 @@ void __weak perf_event_print_debug(void) { }
112 93
113static DEFINE_PER_CPU(int, perf_disable_count); 94static DEFINE_PER_CPU(int, perf_disable_count);
114 95
115void __perf_disable(void)
116{
117 __get_cpu_var(perf_disable_count)++;
118}
119
120bool __perf_enable(void)
121{
122 return !--__get_cpu_var(perf_disable_count);
123}
124
125void perf_disable(void) 96void perf_disable(void)
126{ 97{
127 __perf_disable(); 98 if (!__get_cpu_var(perf_disable_count)++)
128 hw_perf_disable(); 99 hw_perf_disable();
129} 100}
130 101
131void perf_enable(void) 102void perf_enable(void)
132{ 103{
133 if (__perf_enable()) 104 if (!--__get_cpu_var(perf_disable_count))
134 hw_perf_enable(); 105 hw_perf_enable();
135} 106}
136 107
@@ -1553,12 +1524,15 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx)
1553 */ 1524 */
1554 if (interrupts == MAX_INTERRUPTS) { 1525 if (interrupts == MAX_INTERRUPTS) {
1555 perf_log_throttle(event, 1); 1526 perf_log_throttle(event, 1);
1527 perf_disable();
1556 event->pmu->unthrottle(event); 1528 event->pmu->unthrottle(event);
1529 perf_enable();
1557 } 1530 }
1558 1531
1559 if (!event->attr.freq || !event->attr.sample_freq) 1532 if (!event->attr.freq || !event->attr.sample_freq)
1560 continue; 1533 continue;
1561 1534
1535 perf_disable();
1562 event->pmu->read(event); 1536 event->pmu->read(event);
1563 now = atomic64_read(&event->count); 1537 now = atomic64_read(&event->count);
1564 delta = now - hwc->freq_count_stamp; 1538 delta = now - hwc->freq_count_stamp;
@@ -1566,6 +1540,7 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx)
1566 1540
1567 if (delta > 0) 1541 if (delta > 0)
1568 perf_adjust_period(event, TICK_NSEC, delta); 1542 perf_adjust_period(event, TICK_NSEC, delta);
1543 perf_enable();
1569 } 1544 }
1570 raw_spin_unlock(&ctx->lock); 1545 raw_spin_unlock(&ctx->lock);
1571} 1546}
@@ -1575,9 +1550,6 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx)
1575 */ 1550 */
1576static void rotate_ctx(struct perf_event_context *ctx) 1551static void rotate_ctx(struct perf_event_context *ctx)
1577{ 1552{
1578 if (!ctx->nr_events)
1579 return;
1580
1581 raw_spin_lock(&ctx->lock); 1553 raw_spin_lock(&ctx->lock);
1582 1554
1583 /* Rotate the first entry last of non-pinned groups */ 1555 /* Rotate the first entry last of non-pinned groups */
@@ -1590,19 +1562,28 @@ void perf_event_task_tick(struct task_struct *curr)
1590{ 1562{
1591 struct perf_cpu_context *cpuctx; 1563 struct perf_cpu_context *cpuctx;
1592 struct perf_event_context *ctx; 1564 struct perf_event_context *ctx;
1565 int rotate = 0;
1593 1566
1594 if (!atomic_read(&nr_events)) 1567 if (!atomic_read(&nr_events))
1595 return; 1568 return;
1596 1569
1597 cpuctx = &__get_cpu_var(perf_cpu_context); 1570 cpuctx = &__get_cpu_var(perf_cpu_context);
1598 ctx = curr->perf_event_ctxp; 1571 if (cpuctx->ctx.nr_events &&
1572 cpuctx->ctx.nr_events != cpuctx->ctx.nr_active)
1573 rotate = 1;
1599 1574
1600 perf_disable(); 1575 ctx = curr->perf_event_ctxp;
1576 if (ctx && ctx->nr_events && ctx->nr_events != ctx->nr_active)
1577 rotate = 1;
1601 1578
1602 perf_ctx_adjust_freq(&cpuctx->ctx); 1579 perf_ctx_adjust_freq(&cpuctx->ctx);
1603 if (ctx) 1580 if (ctx)
1604 perf_ctx_adjust_freq(ctx); 1581 perf_ctx_adjust_freq(ctx);
1605 1582
1583 if (!rotate)
1584 return;
1585
1586 perf_disable();
1606 cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE); 1587 cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE);
1607 if (ctx) 1588 if (ctx)
1608 task_ctx_sched_out(ctx, EVENT_FLEXIBLE); 1589 task_ctx_sched_out(ctx, EVENT_FLEXIBLE);
@@ -1614,7 +1595,6 @@ void perf_event_task_tick(struct task_struct *curr)
1614 cpu_ctx_sched_in(cpuctx, EVENT_FLEXIBLE); 1595 cpu_ctx_sched_in(cpuctx, EVENT_FLEXIBLE);
1615 if (ctx) 1596 if (ctx)
1616 task_ctx_sched_in(curr, EVENT_FLEXIBLE); 1597 task_ctx_sched_in(curr, EVENT_FLEXIBLE);
1617
1618 perf_enable(); 1598 perf_enable();
1619} 1599}
1620 1600
@@ -2806,6 +2786,13 @@ __weak struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
2806 return NULL; 2786 return NULL;
2807} 2787}
2808 2788
2789#ifdef CONFIG_EVENT_TRACING
2790__weak
2791void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int skip)
2792{
2793}
2794#endif
2795
2809/* 2796/*
2810 * Output 2797 * Output
2811 */ 2798 */
@@ -4123,8 +4110,7 @@ void __perf_sw_event(u32 event_id, u64 nr, int nmi,
4123 if (rctx < 0) 4110 if (rctx < 0)
4124 return; 4111 return;
4125 4112
4126 data.addr = addr; 4113 perf_sample_data_init(&data, addr);
4127 data.raw = NULL;
4128 4114
4129 do_perf_sw_event(PERF_TYPE_SOFTWARE, event_id, nr, nmi, &data, regs); 4115 do_perf_sw_event(PERF_TYPE_SOFTWARE, event_id, nr, nmi, &data, regs);
4130 4116
@@ -4169,11 +4155,10 @@ static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer)
4169 struct perf_event *event; 4155 struct perf_event *event;
4170 u64 period; 4156 u64 period;
4171 4157
4172 event = container_of(hrtimer, struct perf_event, hw.hrtimer); 4158 event = container_of(hrtimer, struct perf_event, hw.hrtimer);
4173 event->pmu->read(event); 4159 event->pmu->read(event);
4174 4160
4175 data.addr = 0; 4161 perf_sample_data_init(&data, 0);
4176 data.raw = NULL;
4177 data.period = event->hw.last_period; 4162 data.period = event->hw.last_period;
4178 regs = get_irq_regs(); 4163 regs = get_irq_regs();
4179 /* 4164 /*
@@ -4335,26 +4320,20 @@ static const struct pmu perf_ops_task_clock = {
4335#ifdef CONFIG_EVENT_TRACING 4320#ifdef CONFIG_EVENT_TRACING
4336 4321
4337void perf_tp_event(int event_id, u64 addr, u64 count, void *record, 4322void perf_tp_event(int event_id, u64 addr, u64 count, void *record,
4338 int entry_size) 4323 int entry_size, struct pt_regs *regs)
4339{ 4324{
4325 struct perf_sample_data data;
4340 struct perf_raw_record raw = { 4326 struct perf_raw_record raw = {
4341 .size = entry_size, 4327 .size = entry_size,
4342 .data = record, 4328 .data = record,
4343 }; 4329 };
4344 4330
4345 struct perf_sample_data data = { 4331 perf_sample_data_init(&data, addr);
4346 .addr = addr, 4332 data.raw = &raw;
4347 .raw = &raw,
4348 };
4349
4350 struct pt_regs *regs = get_irq_regs();
4351
4352 if (!regs)
4353 regs = task_pt_regs(current);
4354 4333
4355 /* Trace events already protected against recursion */ 4334 /* Trace events already protected against recursion */
4356 do_perf_sw_event(PERF_TYPE_TRACEPOINT, event_id, count, 1, 4335 do_perf_sw_event(PERF_TYPE_TRACEPOINT, event_id, count, 1,
4357 &data, regs); 4336 &data, regs);
4358} 4337}
4359EXPORT_SYMBOL_GPL(perf_tp_event); 4338EXPORT_SYMBOL_GPL(perf_tp_event);
4360 4339
@@ -4370,7 +4349,7 @@ static int perf_tp_event_match(struct perf_event *event,
4370 4349
4371static void tp_perf_event_destroy(struct perf_event *event) 4350static void tp_perf_event_destroy(struct perf_event *event)
4372{ 4351{
4373 ftrace_profile_disable(event->attr.config); 4352 perf_trace_disable(event->attr.config);
4374} 4353}
4375 4354
4376static const struct pmu *tp_perf_event_init(struct perf_event *event) 4355static const struct pmu *tp_perf_event_init(struct perf_event *event)
@@ -4384,7 +4363,7 @@ static const struct pmu *tp_perf_event_init(struct perf_event *event)
4384 !capable(CAP_SYS_ADMIN)) 4363 !capable(CAP_SYS_ADMIN))
4385 return ERR_PTR(-EPERM); 4364 return ERR_PTR(-EPERM);
4386 4365
4387 if (ftrace_profile_enable(event->attr.config)) 4366 if (perf_trace_enable(event->attr.config))
4388 return NULL; 4367 return NULL;
4389 4368
4390 event->destroy = tp_perf_event_destroy; 4369 event->destroy = tp_perf_event_destroy;
@@ -4463,8 +4442,7 @@ void perf_bp_event(struct perf_event *bp, void *data)
4463 struct perf_sample_data sample; 4442 struct perf_sample_data sample;
4464 struct pt_regs *regs = data; 4443 struct pt_regs *regs = data;
4465 4444
4466 sample.raw = NULL; 4445 perf_sample_data_init(&sample, bp->attr.bp_addr);
4467 sample.addr = bp->attr.bp_addr;
4468 4446
4469 if (!perf_exclude_event(bp, regs)) 4447 if (!perf_exclude_event(bp, regs))
4470 perf_swevent_add(bp, 1, 1, &sample, regs); 4448 perf_swevent_add(bp, 1, 1, &sample, regs);
@@ -5392,18 +5370,26 @@ int perf_event_init_task(struct task_struct *child)
5392 return ret; 5370 return ret;
5393} 5371}
5394 5372
5373static void __init perf_event_init_all_cpus(void)
5374{
5375 int cpu;
5376 struct perf_cpu_context *cpuctx;
5377
5378 for_each_possible_cpu(cpu) {
5379 cpuctx = &per_cpu(perf_cpu_context, cpu);
5380 __perf_event_init_context(&cpuctx->ctx, NULL);
5381 }
5382}
5383
5395static void __cpuinit perf_event_init_cpu(int cpu) 5384static void __cpuinit perf_event_init_cpu(int cpu)
5396{ 5385{
5397 struct perf_cpu_context *cpuctx; 5386 struct perf_cpu_context *cpuctx;
5398 5387
5399 cpuctx = &per_cpu(perf_cpu_context, cpu); 5388 cpuctx = &per_cpu(perf_cpu_context, cpu);
5400 __perf_event_init_context(&cpuctx->ctx, NULL);
5401 5389
5402 spin_lock(&perf_resource_lock); 5390 spin_lock(&perf_resource_lock);
5403 cpuctx->max_pertask = perf_max_events - perf_reserved_percpu; 5391 cpuctx->max_pertask = perf_max_events - perf_reserved_percpu;
5404 spin_unlock(&perf_resource_lock); 5392 spin_unlock(&perf_resource_lock);
5405
5406 hw_perf_event_setup(cpu);
5407} 5393}
5408 5394
5409#ifdef CONFIG_HOTPLUG_CPU 5395#ifdef CONFIG_HOTPLUG_CPU
@@ -5443,20 +5429,11 @@ perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu)
5443 perf_event_init_cpu(cpu); 5429 perf_event_init_cpu(cpu);
5444 break; 5430 break;
5445 5431
5446 case CPU_ONLINE:
5447 case CPU_ONLINE_FROZEN:
5448 hw_perf_event_setup_online(cpu);
5449 break;
5450
5451 case CPU_DOWN_PREPARE: 5432 case CPU_DOWN_PREPARE:
5452 case CPU_DOWN_PREPARE_FROZEN: 5433 case CPU_DOWN_PREPARE_FROZEN:
5453 perf_event_exit_cpu(cpu); 5434 perf_event_exit_cpu(cpu);
5454 break; 5435 break;
5455 5436
5456 case CPU_DEAD:
5457 hw_perf_event_setup_offline(cpu);
5458 break;
5459
5460 default: 5437 default:
5461 break; 5438 break;
5462 } 5439 }
@@ -5474,6 +5451,7 @@ static struct notifier_block __cpuinitdata perf_cpu_nb = {
5474 5451
5475void __init perf_event_init(void) 5452void __init perf_event_init(void)
5476{ 5453{
5454 perf_event_init_all_cpus();
5477 perf_cpu_notify(&perf_cpu_nb, (unsigned long)CPU_UP_PREPARE, 5455 perf_cpu_notify(&perf_cpu_nb, (unsigned long)CPU_UP_PREPARE,
5478 (void *)(long)smp_processor_id()); 5456 (void *)(long)smp_processor_id());
5479 perf_cpu_notify(&perf_cpu_nb, (unsigned long)CPU_ONLINE, 5457 perf_cpu_notify(&perf_cpu_nb, (unsigned long)CPU_ONLINE,
diff --git a/kernel/pid.c b/kernel/pid.c
index 86b296943e5f..aebb30d9c233 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -367,7 +367,9 @@ struct task_struct *pid_task(struct pid *pid, enum pid_type type)
367 struct task_struct *result = NULL; 367 struct task_struct *result = NULL;
368 if (pid) { 368 if (pid) {
369 struct hlist_node *first; 369 struct hlist_node *first;
370 first = rcu_dereference_check(pid->tasks[type].first, rcu_read_lock_held() || lockdep_is_held(&tasklist_lock)); 370 first = rcu_dereference_check(pid->tasks[type].first,
371 rcu_read_lock_held() ||
372 lockdep_tasklist_lock_is_held());
371 if (first) 373 if (first)
372 result = hlist_entry(first, struct task_struct, pids[(type)].node); 374 result = hlist_entry(first, struct task_struct, pids[(type)].node);
373 } 375 }
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index 86b3796b0436..79aac93acf99 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -161,13 +161,12 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns)
161 rcu_read_lock(); 161 rcu_read_lock();
162 162
163 /* 163 /*
164 * Use force_sig() since it clears SIGNAL_UNKILLABLE ensuring 164 * Any nested-container's init processes won't ignore the
165 * any nested-container's init processes don't ignore the 165 * SEND_SIG_NOINFO signal, see send_signal()->si_fromuser().
166 * signal
167 */ 166 */
168 task = pid_task(find_vpid(nr), PIDTYPE_PID); 167 task = pid_task(find_vpid(nr), PIDTYPE_PID);
169 if (task) 168 if (task)
170 force_sig(SIGKILL, task); 169 send_sig_info(SIGKILL, SEND_SIG_NOINFO, task);
171 170
172 rcu_read_unlock(); 171 rcu_read_unlock();
173 172
diff --git a/kernel/rcutree.h b/kernel/rcutree.h
index 1439eb504c22..4a525a30e08e 100644
--- a/kernel/rcutree.h
+++ b/kernel/rcutree.h
@@ -246,12 +246,21 @@ struct rcu_data {
246 246
247#define RCU_JIFFIES_TILL_FORCE_QS 3 /* for rsp->jiffies_force_qs */ 247#define RCU_JIFFIES_TILL_FORCE_QS 3 /* for rsp->jiffies_force_qs */
248#ifdef CONFIG_RCU_CPU_STALL_DETECTOR 248#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
249#define RCU_SECONDS_TILL_STALL_CHECK (10 * HZ) /* for rsp->jiffies_stall */ 249
250#define RCU_SECONDS_TILL_STALL_RECHECK (30 * HZ) /* for rsp->jiffies_stall */ 250#ifdef CONFIG_PROVE_RCU
251#define RCU_STALL_RAT_DELAY 2 /* Allow other CPUs time */ 251#define RCU_STALL_DELAY_DELTA (5 * HZ)
252 /* to take at least one */ 252#else
253 /* scheduling clock irq */ 253#define RCU_STALL_DELAY_DELTA 0
254 /* before ratting on them. */ 254#endif
255
256#define RCU_SECONDS_TILL_STALL_CHECK (10 * HZ + RCU_STALL_DELAY_DELTA)
257 /* for rsp->jiffies_stall */
258#define RCU_SECONDS_TILL_STALL_RECHECK (30 * HZ + RCU_STALL_DELAY_DELTA)
259 /* for rsp->jiffies_stall */
260#define RCU_STALL_RAT_DELAY 2 /* Allow other CPUs time */
261 /* to take at least one */
262 /* scheduling clock irq */
263 /* before ratting on them. */
255 264
256#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ 265#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
257 266
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index 464ad2cdee00..79b53bda8943 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -1010,6 +1010,10 @@ int rcu_needs_cpu(int cpu)
1010 int c = 0; 1010 int c = 0;
1011 int thatcpu; 1011 int thatcpu;
1012 1012
1013 /* Check for being in the holdoff period. */
1014 if (per_cpu(rcu_dyntick_holdoff, cpu) == jiffies)
1015 return rcu_needs_cpu_quick_check(cpu);
1016
1013 /* Don't bother unless we are the last non-dyntick-idle CPU. */ 1017 /* Don't bother unless we are the last non-dyntick-idle CPU. */
1014 for_each_cpu_not(thatcpu, nohz_cpu_mask) 1018 for_each_cpu_not(thatcpu, nohz_cpu_mask)
1015 if (thatcpu != cpu) { 1019 if (thatcpu != cpu) {
@@ -1041,10 +1045,8 @@ int rcu_needs_cpu(int cpu)
1041 } 1045 }
1042 1046
1043 /* If RCU callbacks are still pending, RCU still needs this CPU. */ 1047 /* If RCU callbacks are still pending, RCU still needs this CPU. */
1044 if (c) { 1048 if (c)
1045 raise_softirq(RCU_SOFTIRQ); 1049 raise_softirq(RCU_SOFTIRQ);
1046 per_cpu(rcu_dyntick_holdoff, cpu) = jiffies;
1047 }
1048 return c; 1050 return c;
1049} 1051}
1050 1052
diff --git a/kernel/sched.c b/kernel/sched.c
index 150b6988de49..9ab3cd7858d3 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2359,7 +2359,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state,
2359{ 2359{
2360 int cpu, orig_cpu, this_cpu, success = 0; 2360 int cpu, orig_cpu, this_cpu, success = 0;
2361 unsigned long flags; 2361 unsigned long flags;
2362 struct rq *rq, *orig_rq; 2362 struct rq *rq;
2363 2363
2364 if (!sched_feat(SYNC_WAKEUPS)) 2364 if (!sched_feat(SYNC_WAKEUPS))
2365 wake_flags &= ~WF_SYNC; 2365 wake_flags &= ~WF_SYNC;
@@ -2367,7 +2367,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state,
2367 this_cpu = get_cpu(); 2367 this_cpu = get_cpu();
2368 2368
2369 smp_wmb(); 2369 smp_wmb();
2370 rq = orig_rq = task_rq_lock(p, &flags); 2370 rq = task_rq_lock(p, &flags);
2371 update_rq_clock(rq); 2371 update_rq_clock(rq);
2372 if (!(p->state & state)) 2372 if (!(p->state & state))
2373 goto out; 2373 goto out;
diff --git a/kernel/sched_cpupri.c b/kernel/sched_cpupri.c
index 82095bf2099f..fccf9fbb0d7b 100644
--- a/kernel/sched_cpupri.c
+++ b/kernel/sched_cpupri.c
@@ -56,7 +56,7 @@ static int convert_prio(int prio)
56 * @lowest_mask: A mask to fill in with selected CPUs (or NULL) 56 * @lowest_mask: A mask to fill in with selected CPUs (or NULL)
57 * 57 *
58 * Note: This function returns the recommended CPUs as calculated during the 58 * Note: This function returns the recommended CPUs as calculated during the
59 * current invokation. By the time the call returns, the CPUs may have in 59 * current invocation. By the time the call returns, the CPUs may have in
60 * fact changed priorities any number of times. While not ideal, it is not 60 * fact changed priorities any number of times. While not ideal, it is not
61 * an issue of correctness since the normal rebalancer logic will correct 61 * an issue of correctness since the normal rebalancer logic will correct
62 * any discrepancies created by racing against the uncertainty of the current 62 * any discrepancies created by racing against the uncertainty of the current
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 3e1fd96c6cf9..5a5ea2cd924f 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -3476,7 +3476,7 @@ static void run_rebalance_domains(struct softirq_action *h)
3476 3476
3477static inline int on_null_domain(int cpu) 3477static inline int on_null_domain(int cpu)
3478{ 3478{
3479 return !rcu_dereference(cpu_rq(cpu)->sd); 3479 return !rcu_dereference_sched(cpu_rq(cpu)->sd);
3480} 3480}
3481 3481
3482/* 3482/*
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 5a6ed1f0990a..b5b920ae2ea7 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -1146,7 +1146,12 @@ static struct task_struct *pick_next_highest_task_rt(struct rq *rq, int cpu)
1146 if (next && next->prio < idx) 1146 if (next && next->prio < idx)
1147 continue; 1147 continue;
1148 list_for_each_entry(rt_se, array->queue + idx, run_list) { 1148 list_for_each_entry(rt_se, array->queue + idx, run_list) {
1149 struct task_struct *p = rt_task_of(rt_se); 1149 struct task_struct *p;
1150
1151 if (!rt_entity_is_task(rt_se))
1152 continue;
1153
1154 p = rt_task_of(rt_se);
1150 if (pick_rt_task(rq, p, cpu)) { 1155 if (pick_rt_task(rq, p, cpu)) {
1151 next = p; 1156 next = p;
1152 break; 1157 break;
diff --git a/kernel/sys.c b/kernel/sys.c
index 9814e43fb23b..8298878f4f71 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -33,6 +33,7 @@
33#include <linux/task_io_accounting_ops.h> 33#include <linux/task_io_accounting_ops.h>
34#include <linux/seccomp.h> 34#include <linux/seccomp.h>
35#include <linux/cpu.h> 35#include <linux/cpu.h>
36#include <linux/personality.h>
36#include <linux/ptrace.h> 37#include <linux/ptrace.h>
37#include <linux/fs_struct.h> 38#include <linux/fs_struct.h>
38 39
@@ -1114,6 +1115,15 @@ out:
1114 1115
1115DECLARE_RWSEM(uts_sem); 1116DECLARE_RWSEM(uts_sem);
1116 1117
1118#ifdef COMPAT_UTS_MACHINE
1119#define override_architecture(name) \
1120 (current->personality == PER_LINUX32 && \
1121 copy_to_user(name->machine, COMPAT_UTS_MACHINE, \
1122 sizeof(COMPAT_UTS_MACHINE)))
1123#else
1124#define override_architecture(name) 0
1125#endif
1126
1117SYSCALL_DEFINE1(newuname, struct new_utsname __user *, name) 1127SYSCALL_DEFINE1(newuname, struct new_utsname __user *, name)
1118{ 1128{
1119 int errno = 0; 1129 int errno = 0;
@@ -1122,9 +1132,66 @@ SYSCALL_DEFINE1(newuname, struct new_utsname __user *, name)
1122 if (copy_to_user(name, utsname(), sizeof *name)) 1132 if (copy_to_user(name, utsname(), sizeof *name))
1123 errno = -EFAULT; 1133 errno = -EFAULT;
1124 up_read(&uts_sem); 1134 up_read(&uts_sem);
1135
1136 if (!errno && override_architecture(name))
1137 errno = -EFAULT;
1125 return errno; 1138 return errno;
1126} 1139}
1127 1140
1141#ifdef __ARCH_WANT_SYS_OLD_UNAME
1142/*
1143 * Old cruft
1144 */
1145SYSCALL_DEFINE1(uname, struct old_utsname __user *, name)
1146{
1147 int error = 0;
1148
1149 if (!name)
1150 return -EFAULT;
1151
1152 down_read(&uts_sem);
1153 if (copy_to_user(name, utsname(), sizeof(*name)))
1154 error = -EFAULT;
1155 up_read(&uts_sem);
1156
1157 if (!error && override_architecture(name))
1158 error = -EFAULT;
1159 return error;
1160}
1161
1162SYSCALL_DEFINE1(olduname, struct oldold_utsname __user *, name)
1163{
1164 int error;
1165
1166 if (!name)
1167 return -EFAULT;
1168 if (!access_ok(VERIFY_WRITE, name, sizeof(struct oldold_utsname)))
1169 return -EFAULT;
1170
1171 down_read(&uts_sem);
1172 error = __copy_to_user(&name->sysname, &utsname()->sysname,
1173 __OLD_UTS_LEN);
1174 error |= __put_user(0, name->sysname + __OLD_UTS_LEN);
1175 error |= __copy_to_user(&name->nodename, &utsname()->nodename,
1176 __OLD_UTS_LEN);
1177 error |= __put_user(0, name->nodename + __OLD_UTS_LEN);
1178 error |= __copy_to_user(&name->release, &utsname()->release,
1179 __OLD_UTS_LEN);
1180 error |= __put_user(0, name->release + __OLD_UTS_LEN);
1181 error |= __copy_to_user(&name->version, &utsname()->version,
1182 __OLD_UTS_LEN);
1183 error |= __put_user(0, name->version + __OLD_UTS_LEN);
1184 error |= __copy_to_user(&name->machine, &utsname()->machine,
1185 __OLD_UTS_LEN);
1186 error |= __put_user(0, name->machine + __OLD_UTS_LEN);
1187 up_read(&uts_sem);
1188
1189 if (!error && override_architecture(name))
1190 error = -EFAULT;
1191 return error ? -EFAULT : 0;
1192}
1193#endif
1194
1128SYSCALL_DEFINE2(sethostname, char __user *, name, int, len) 1195SYSCALL_DEFINE2(sethostname, char __user *, name, int, len)
1129{ 1196{
1130 int errno; 1197 int errno;
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 695384f12a7d..70f2ea758ffe 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -126,6 +126,7 @@ cond_syscall(sys_setreuid16);
126cond_syscall(sys_setuid16); 126cond_syscall(sys_setuid16);
127cond_syscall(sys_vm86old); 127cond_syscall(sys_vm86old);
128cond_syscall(sys_vm86); 128cond_syscall(sys_vm86);
129cond_syscall(sys_ipc);
129cond_syscall(compat_sys_ipc); 130cond_syscall(compat_sys_ipc);
130cond_syscall(compat_sys_sysctl); 131cond_syscall(compat_sys_sysctl);
131cond_syscall(sys_flock); 132cond_syscall(sys_flock);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 0ef19c614f6d..8686b0f5fc12 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -23,6 +23,7 @@
23#include <linux/swap.h> 23#include <linux/swap.h>
24#include <linux/slab.h> 24#include <linux/slab.h>
25#include <linux/sysctl.h> 25#include <linux/sysctl.h>
26#include <linux/signal.h>
26#include <linux/proc_fs.h> 27#include <linux/proc_fs.h>
27#include <linux/security.h> 28#include <linux/security.h>
28#include <linux/ctype.h> 29#include <linux/ctype.h>
@@ -60,13 +61,23 @@
60#include <asm/stacktrace.h> 61#include <asm/stacktrace.h>
61#include <asm/io.h> 62#include <asm/io.h>
62#endif 63#endif
64#ifdef CONFIG_BSD_PROCESS_ACCT
65#include <linux/acct.h>
66#endif
67#ifdef CONFIG_RT_MUTEXES
68#include <linux/rtmutex.h>
69#endif
70#if defined(CONFIG_PROVE_LOCKING) || defined(CONFIG_LOCK_STAT)
71#include <linux/lockdep.h>
72#endif
73#ifdef CONFIG_CHR_DEV_SG
74#include <scsi/sg.h>
75#endif
63 76
64 77
65#if defined(CONFIG_SYSCTL) 78#if defined(CONFIG_SYSCTL)
66 79
67/* External variables not in a header file. */ 80/* External variables not in a header file. */
68extern int C_A_D;
69extern int print_fatal_signals;
70extern int sysctl_overcommit_memory; 81extern int sysctl_overcommit_memory;
71extern int sysctl_overcommit_ratio; 82extern int sysctl_overcommit_ratio;
72extern int sysctl_panic_on_oom; 83extern int sysctl_panic_on_oom;
@@ -88,9 +99,6 @@ extern int sysctl_nr_open_min, sysctl_nr_open_max;
88#ifndef CONFIG_MMU 99#ifndef CONFIG_MMU
89extern int sysctl_nr_trim_pages; 100extern int sysctl_nr_trim_pages;
90#endif 101#endif
91#ifdef CONFIG_RCU_TORTURE_TEST
92extern int rcutorture_runnable;
93#endif /* #ifdef CONFIG_RCU_TORTURE_TEST */
94#ifdef CONFIG_BLOCK 102#ifdef CONFIG_BLOCK
95extern int blk_iopoll_enabled; 103extern int blk_iopoll_enabled;
96#endif 104#endif
@@ -120,14 +128,6 @@ static int min_percpu_pagelist_fract = 8;
120 128
121static int ngroups_max = NGROUPS_MAX; 129static int ngroups_max = NGROUPS_MAX;
122 130
123#ifdef CONFIG_MODULES
124extern char modprobe_path[];
125extern int modules_disabled;
126#endif
127#ifdef CONFIG_CHR_DEV_SG
128extern int sg_big_buff;
129#endif
130
131#ifdef CONFIG_SPARC 131#ifdef CONFIG_SPARC
132#include <asm/system.h> 132#include <asm/system.h>
133#endif 133#endif
@@ -149,10 +149,6 @@ extern int sysctl_userprocess_debug;
149extern int spin_retry; 149extern int spin_retry;
150#endif 150#endif
151 151
152#ifdef CONFIG_BSD_PROCESS_ACCT
153extern int acct_parm[];
154#endif
155
156#ifdef CONFIG_IA64 152#ifdef CONFIG_IA64
157extern int no_unaligned_warning; 153extern int no_unaligned_warning;
158extern int unaligned_dump_stack; 154extern int unaligned_dump_stack;
@@ -160,10 +156,6 @@ extern int unaligned_dump_stack;
160 156
161extern struct ratelimit_state printk_ratelimit_state; 157extern struct ratelimit_state printk_ratelimit_state;
162 158
163#ifdef CONFIG_RT_MUTEXES
164extern int max_lock_depth;
165#endif
166
167#ifdef CONFIG_PROC_SYSCTL 159#ifdef CONFIG_PROC_SYSCTL
168static int proc_do_cad_pid(struct ctl_table *table, int write, 160static int proc_do_cad_pid(struct ctl_table *table, int write,
169 void __user *buffer, size_t *lenp, loff_t *ppos); 161 void __user *buffer, size_t *lenp, loff_t *ppos);
@@ -202,9 +194,6 @@ extern struct ctl_table epoll_table[];
202int sysctl_legacy_va_layout; 194int sysctl_legacy_va_layout;
203#endif 195#endif
204 196
205extern int prove_locking;
206extern int lock_stat;
207
208/* The default sysctl tables: */ 197/* The default sysctl tables: */
209 198
210static struct ctl_table root_table[] = { 199static struct ctl_table root_table[] = {
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 1f663d23e85e..1f5dde637457 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -592,6 +592,10 @@ static inline void clocksource_select(void) { }
592 */ 592 */
593static int __init clocksource_done_booting(void) 593static int __init clocksource_done_booting(void)
594{ 594{
595 mutex_lock(&clocksource_mutex);
596 curr_clocksource = clocksource_default_clock();
597 mutex_unlock(&clocksource_mutex);
598
595 finished_booting = 1; 599 finished_booting = 1;
596 600
597 /* 601 /*
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index d00c6fe23f54..78edc6490038 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -52,7 +52,7 @@ obj-$(CONFIG_EVENT_TRACING) += trace_events.o
52obj-$(CONFIG_EVENT_TRACING) += trace_export.o 52obj-$(CONFIG_EVENT_TRACING) += trace_export.o
53obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o 53obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o
54ifeq ($(CONFIG_PERF_EVENTS),y) 54ifeq ($(CONFIG_PERF_EVENTS),y)
55obj-$(CONFIG_EVENT_TRACING) += trace_event_profile.o 55obj-$(CONFIG_EVENT_TRACING) += trace_event_perf.o
56endif 56endif
57obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o 57obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o
58obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o 58obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 83783579378f..d9062f5cc0c0 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -27,6 +27,7 @@
27#include <linux/ctype.h> 27#include <linux/ctype.h>
28#include <linux/list.h> 28#include <linux/list.h>
29#include <linux/hash.h> 29#include <linux/hash.h>
30#include <linux/rcupdate.h>
30 31
31#include <trace/events/sched.h> 32#include <trace/events/sched.h>
32 33
@@ -84,22 +85,22 @@ ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub;
84ftrace_func_t __ftrace_trace_function __read_mostly = ftrace_stub; 85ftrace_func_t __ftrace_trace_function __read_mostly = ftrace_stub;
85ftrace_func_t ftrace_pid_function __read_mostly = ftrace_stub; 86ftrace_func_t ftrace_pid_function __read_mostly = ftrace_stub;
86 87
87#ifdef CONFIG_FUNCTION_GRAPH_TRACER 88/*
88static int ftrace_set_func(unsigned long *array, int *idx, char *buffer); 89 * Traverse the ftrace_list, invoking all entries. The reason that we
89#endif 90 * can use rcu_dereference_raw() is that elements removed from this list
90 91 * are simply leaked, so there is no need to interact with a grace-period
92 * mechanism. The rcu_dereference_raw() calls are needed to handle
93 * concurrent insertions into the ftrace_list.
94 *
95 * Silly Alpha and silly pointer-speculation compiler optimizations!
96 */
91static void ftrace_list_func(unsigned long ip, unsigned long parent_ip) 97static void ftrace_list_func(unsigned long ip, unsigned long parent_ip)
92{ 98{
93 struct ftrace_ops *op = ftrace_list; 99 struct ftrace_ops *op = rcu_dereference_raw(ftrace_list); /*see above*/
94
95 /* in case someone actually ports this to alpha! */
96 read_barrier_depends();
97 100
98 while (op != &ftrace_list_end) { 101 while (op != &ftrace_list_end) {
99 /* silly alpha */
100 read_barrier_depends();
101 op->func(ip, parent_ip); 102 op->func(ip, parent_ip);
102 op = op->next; 103 op = rcu_dereference_raw(op->next); /*see above*/
103 }; 104 };
104} 105}
105 106
@@ -154,8 +155,7 @@ static int __register_ftrace_function(struct ftrace_ops *ops)
154 * the ops->next pointer is valid before another CPU sees 155 * the ops->next pointer is valid before another CPU sees
155 * the ops pointer included into the ftrace_list. 156 * the ops pointer included into the ftrace_list.
156 */ 157 */
157 smp_wmb(); 158 rcu_assign_pointer(ftrace_list, ops);
158 ftrace_list = ops;
159 159
160 if (ftrace_enabled) { 160 if (ftrace_enabled) {
161 ftrace_func_t func; 161 ftrace_func_t func;
@@ -2276,6 +2276,8 @@ __setup("ftrace_filter=", set_ftrace_filter);
2276 2276
2277#ifdef CONFIG_FUNCTION_GRAPH_TRACER 2277#ifdef CONFIG_FUNCTION_GRAPH_TRACER
2278static char ftrace_graph_buf[FTRACE_FILTER_SIZE] __initdata; 2278static char ftrace_graph_buf[FTRACE_FILTER_SIZE] __initdata;
2279static int ftrace_set_func(unsigned long *array, int *idx, char *buffer);
2280
2279static int __init set_graph_function(char *str) 2281static int __init set_graph_function(char *str)
2280{ 2282{
2281 strlcpy(ftrace_graph_buf, str, FTRACE_FILTER_SIZE); 2283 strlcpy(ftrace_graph_buf, str, FTRACE_FILTER_SIZE);
@@ -3351,6 +3353,7 @@ void ftrace_graph_init_task(struct task_struct *t)
3351{ 3353{
3352 /* Make sure we do not use the parent ret_stack */ 3354 /* Make sure we do not use the parent ret_stack */
3353 t->ret_stack = NULL; 3355 t->ret_stack = NULL;
3356 t->curr_ret_stack = -1;
3354 3357
3355 if (ftrace_graph_active) { 3358 if (ftrace_graph_active) {
3356 struct ftrace_ret_stack *ret_stack; 3359 struct ftrace_ret_stack *ret_stack;
@@ -3360,7 +3363,6 @@ void ftrace_graph_init_task(struct task_struct *t)
3360 GFP_KERNEL); 3363 GFP_KERNEL);
3361 if (!ret_stack) 3364 if (!ret_stack)
3362 return; 3365 return;
3363 t->curr_ret_stack = -1;
3364 atomic_set(&t->tracing_graph_pause, 0); 3366 atomic_set(&t->tracing_graph_pause, 0);
3365 atomic_set(&t->trace_overrun, 0); 3367 atomic_set(&t->trace_overrun, 0);
3366 t->ftrace_timestamp = 0; 3368 t->ftrace_timestamp = 0;
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 0287f9f52f5a..05a9f83b8819 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -2233,12 +2233,12 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length)
2233 if (ring_buffer_flags != RB_BUFFERS_ON) 2233 if (ring_buffer_flags != RB_BUFFERS_ON)
2234 return NULL; 2234 return NULL;
2235 2235
2236 if (atomic_read(&buffer->record_disabled))
2237 return NULL;
2238
2239 /* If we are tracing schedule, we don't want to recurse */ 2236 /* If we are tracing schedule, we don't want to recurse */
2240 resched = ftrace_preempt_disable(); 2237 resched = ftrace_preempt_disable();
2241 2238
2239 if (atomic_read(&buffer->record_disabled))
2240 goto out_nocheck;
2241
2242 if (trace_recursive_lock()) 2242 if (trace_recursive_lock())
2243 goto out_nocheck; 2243 goto out_nocheck;
2244 2244
@@ -2470,11 +2470,11 @@ int ring_buffer_write(struct ring_buffer *buffer,
2470 if (ring_buffer_flags != RB_BUFFERS_ON) 2470 if (ring_buffer_flags != RB_BUFFERS_ON)
2471 return -EBUSY; 2471 return -EBUSY;
2472 2472
2473 if (atomic_read(&buffer->record_disabled))
2474 return -EBUSY;
2475
2476 resched = ftrace_preempt_disable(); 2473 resched = ftrace_preempt_disable();
2477 2474
2475 if (atomic_read(&buffer->record_disabled))
2476 goto out;
2477
2478 cpu = raw_smp_processor_id(); 2478 cpu = raw_smp_processor_id();
2479 2479
2480 if (!cpumask_test_cpu(cpu, buffer->cpumask)) 2480 if (!cpumask_test_cpu(cpu, buffer->cpumask))
@@ -2542,7 +2542,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_record_disable);
2542 * @buffer: The ring buffer to enable writes 2542 * @buffer: The ring buffer to enable writes
2543 * 2543 *
2544 * Note, multiple disables will need the same number of enables 2544 * Note, multiple disables will need the same number of enables
2545 * to truely enable the writing (much like preempt_disable). 2545 * to truly enable the writing (much like preempt_disable).
2546 */ 2546 */
2547void ring_buffer_record_enable(struct ring_buffer *buffer) 2547void ring_buffer_record_enable(struct ring_buffer *buffer)
2548{ 2548{
@@ -2578,7 +2578,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_record_disable_cpu);
2578 * @cpu: The CPU to enable. 2578 * @cpu: The CPU to enable.
2579 * 2579 *
2580 * Note, multiple disables will need the same number of enables 2580 * Note, multiple disables will need the same number of enables
2581 * to truely enable the writing (much like preempt_disable). 2581 * to truly enable the writing (much like preempt_disable).
2582 */ 2582 */
2583void ring_buffer_record_enable_cpu(struct ring_buffer *buffer, int cpu) 2583void ring_buffer_record_enable_cpu(struct ring_buffer *buffer, int cpu)
2584{ 2584{
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index ed01fdba4a55..3ec2ee6f6560 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -374,6 +374,21 @@ static int __init set_buf_size(char *str)
374} 374}
375__setup("trace_buf_size=", set_buf_size); 375__setup("trace_buf_size=", set_buf_size);
376 376
377static int __init set_tracing_thresh(char *str)
378{
379 unsigned long threshhold;
380 int ret;
381
382 if (!str)
383 return 0;
384 ret = strict_strtoul(str, 0, &threshhold);
385 if (ret < 0)
386 return 0;
387 tracing_thresh = threshhold * 1000;
388 return 1;
389}
390__setup("tracing_thresh=", set_tracing_thresh);
391
377unsigned long nsecs_to_usecs(unsigned long nsecs) 392unsigned long nsecs_to_usecs(unsigned long nsecs)
378{ 393{
379 return nsecs / 1000; 394 return nsecs / 1000;
@@ -579,9 +594,10 @@ static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
579static arch_spinlock_t ftrace_max_lock = 594static arch_spinlock_t ftrace_max_lock =
580 (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; 595 (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
581 596
597unsigned long __read_mostly tracing_thresh;
598
582#ifdef CONFIG_TRACER_MAX_TRACE 599#ifdef CONFIG_TRACER_MAX_TRACE
583unsigned long __read_mostly tracing_max_latency; 600unsigned long __read_mostly tracing_max_latency;
584unsigned long __read_mostly tracing_thresh;
585 601
586/* 602/*
587 * Copy the new maximum trace into the separate maximum-trace 603 * Copy the new maximum trace into the separate maximum-trace
@@ -592,7 +608,7 @@ static void
592__update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) 608__update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
593{ 609{
594 struct trace_array_cpu *data = tr->data[cpu]; 610 struct trace_array_cpu *data = tr->data[cpu];
595 struct trace_array_cpu *max_data = tr->data[cpu]; 611 struct trace_array_cpu *max_data;
596 612
597 max_tr.cpu = cpu; 613 max_tr.cpu = cpu;
598 max_tr.time_start = data->preempt_timestamp; 614 max_tr.time_start = data->preempt_timestamp;
@@ -602,7 +618,7 @@ __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
602 max_data->critical_start = data->critical_start; 618 max_data->critical_start = data->critical_start;
603 max_data->critical_end = data->critical_end; 619 max_data->critical_end = data->critical_end;
604 620
605 memcpy(data->comm, tsk->comm, TASK_COMM_LEN); 621 memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
606 max_data->pid = tsk->pid; 622 max_data->pid = tsk->pid;
607 max_data->uid = task_uid(tsk); 623 max_data->uid = task_uid(tsk);
608 max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO; 624 max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
@@ -824,10 +840,10 @@ out:
824 mutex_unlock(&trace_types_lock); 840 mutex_unlock(&trace_types_lock);
825} 841}
826 842
827static void __tracing_reset(struct trace_array *tr, int cpu) 843static void __tracing_reset(struct ring_buffer *buffer, int cpu)
828{ 844{
829 ftrace_disable_cpu(); 845 ftrace_disable_cpu();
830 ring_buffer_reset_cpu(tr->buffer, cpu); 846 ring_buffer_reset_cpu(buffer, cpu);
831 ftrace_enable_cpu(); 847 ftrace_enable_cpu();
832} 848}
833 849
@@ -839,7 +855,7 @@ void tracing_reset(struct trace_array *tr, int cpu)
839 855
840 /* Make sure all commits have finished */ 856 /* Make sure all commits have finished */
841 synchronize_sched(); 857 synchronize_sched();
842 __tracing_reset(tr, cpu); 858 __tracing_reset(buffer, cpu);
843 859
844 ring_buffer_record_enable(buffer); 860 ring_buffer_record_enable(buffer);
845} 861}
@@ -857,7 +873,7 @@ void tracing_reset_online_cpus(struct trace_array *tr)
857 tr->time_start = ftrace_now(tr->cpu); 873 tr->time_start = ftrace_now(tr->cpu);
858 874
859 for_each_online_cpu(cpu) 875 for_each_online_cpu(cpu)
860 __tracing_reset(tr, cpu); 876 __tracing_reset(buffer, cpu);
861 877
862 ring_buffer_record_enable(buffer); 878 ring_buffer_record_enable(buffer);
863} 879}
@@ -934,6 +950,8 @@ void tracing_start(void)
934 goto out; 950 goto out;
935 } 951 }
936 952
953 /* Prevent the buffers from switching */
954 arch_spin_lock(&ftrace_max_lock);
937 955
938 buffer = global_trace.buffer; 956 buffer = global_trace.buffer;
939 if (buffer) 957 if (buffer)
@@ -943,6 +961,8 @@ void tracing_start(void)
943 if (buffer) 961 if (buffer)
944 ring_buffer_record_enable(buffer); 962 ring_buffer_record_enable(buffer);
945 963
964 arch_spin_unlock(&ftrace_max_lock);
965
946 ftrace_start(); 966 ftrace_start();
947 out: 967 out:
948 spin_unlock_irqrestore(&tracing_start_lock, flags); 968 spin_unlock_irqrestore(&tracing_start_lock, flags);
@@ -964,6 +984,9 @@ void tracing_stop(void)
964 if (trace_stop_count++) 984 if (trace_stop_count++)
965 goto out; 985 goto out;
966 986
987 /* Prevent the buffers from switching */
988 arch_spin_lock(&ftrace_max_lock);
989
967 buffer = global_trace.buffer; 990 buffer = global_trace.buffer;
968 if (buffer) 991 if (buffer)
969 ring_buffer_record_disable(buffer); 992 ring_buffer_record_disable(buffer);
@@ -972,6 +995,8 @@ void tracing_stop(void)
972 if (buffer) 995 if (buffer)
973 ring_buffer_record_disable(buffer); 996 ring_buffer_record_disable(buffer);
974 997
998 arch_spin_unlock(&ftrace_max_lock);
999
975 out: 1000 out:
976 spin_unlock_irqrestore(&tracing_start_lock, flags); 1001 spin_unlock_irqrestore(&tracing_start_lock, flags);
977} 1002}
@@ -1259,6 +1284,13 @@ ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
1259 if (!(trace_flags & TRACE_ITER_USERSTACKTRACE)) 1284 if (!(trace_flags & TRACE_ITER_USERSTACKTRACE))
1260 return; 1285 return;
1261 1286
1287 /*
1288 * NMIs can not handle page faults, even with fix ups.
1289 * The save user stack can (and often does) fault.
1290 */
1291 if (unlikely(in_nmi()))
1292 return;
1293
1262 event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK, 1294 event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
1263 sizeof(*entry), flags, pc); 1295 sizeof(*entry), flags, pc);
1264 if (!event) 1296 if (!event)
@@ -1703,6 +1735,7 @@ static void *s_start(struct seq_file *m, loff_t *pos)
1703 1735
1704 ftrace_enable_cpu(); 1736 ftrace_enable_cpu();
1705 1737
1738 iter->leftover = 0;
1706 for (p = iter; p && l < *pos; p = s_next(m, p, &l)) 1739 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
1707 ; 1740 ;
1708 1741
@@ -4248,10 +4281,10 @@ static __init int tracer_init_debugfs(void)
4248#ifdef CONFIG_TRACER_MAX_TRACE 4281#ifdef CONFIG_TRACER_MAX_TRACE
4249 trace_create_file("tracing_max_latency", 0644, d_tracer, 4282 trace_create_file("tracing_max_latency", 0644, d_tracer,
4250 &tracing_max_latency, &tracing_max_lat_fops); 4283 &tracing_max_latency, &tracing_max_lat_fops);
4284#endif
4251 4285
4252 trace_create_file("tracing_thresh", 0644, d_tracer, 4286 trace_create_file("tracing_thresh", 0644, d_tracer,
4253 &tracing_thresh, &tracing_max_lat_fops); 4287 &tracing_thresh, &tracing_max_lat_fops);
4254#endif
4255 4288
4256 trace_create_file("README", 0444, d_tracer, 4289 trace_create_file("README", 0444, d_tracer,
4257 NULL, &tracing_readme_fops); 4290 NULL, &tracing_readme_fops);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index fd05bcaf91b0..2825ef2c0b15 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -396,9 +396,10 @@ extern int process_new_ksym_entry(char *ksymname, int op, unsigned long addr);
396 396
397extern unsigned long nsecs_to_usecs(unsigned long nsecs); 397extern unsigned long nsecs_to_usecs(unsigned long nsecs);
398 398
399extern unsigned long tracing_thresh;
400
399#ifdef CONFIG_TRACER_MAX_TRACE 401#ifdef CONFIG_TRACER_MAX_TRACE
400extern unsigned long tracing_max_latency; 402extern unsigned long tracing_max_latency;
401extern unsigned long tracing_thresh;
402 403
403void update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu); 404void update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu);
404void update_max_tr_single(struct trace_array *tr, 405void update_max_tr_single(struct trace_array *tr,
@@ -550,7 +551,7 @@ static inline int ftrace_trace_task(struct task_struct *task)
550 * struct trace_parser - servers for reading the user input separated by spaces 551 * struct trace_parser - servers for reading the user input separated by spaces
551 * @cont: set if the input is not complete - no final space char was found 552 * @cont: set if the input is not complete - no final space char was found
552 * @buffer: holds the parsed user input 553 * @buffer: holds the parsed user input
553 * @idx: user input lenght 554 * @idx: user input length
554 * @size: buffer size 555 * @size: buffer size
555 */ 556 */
556struct trace_parser { 557struct trace_parser {
diff --git a/kernel/trace/trace_clock.c b/kernel/trace/trace_clock.c
index 84a3a7ba072a..6fbfb8f417b9 100644
--- a/kernel/trace/trace_clock.c
+++ b/kernel/trace/trace_clock.c
@@ -13,6 +13,7 @@
13 * Tracer plugins will chose a default from these clocks. 13 * Tracer plugins will chose a default from these clocks.
14 */ 14 */
15#include <linux/spinlock.h> 15#include <linux/spinlock.h>
16#include <linux/irqflags.h>
16#include <linux/hardirq.h> 17#include <linux/hardirq.h>
17#include <linux/module.h> 18#include <linux/module.h>
18#include <linux/percpu.h> 19#include <linux/percpu.h>
diff --git a/kernel/trace/trace_event_profile.c b/kernel/trace/trace_event_perf.c
index f0d693005075..81f691eb3a30 100644
--- a/kernel/trace/trace_event_profile.c
+++ b/kernel/trace/trace_event_perf.c
@@ -1,32 +1,36 @@
1/* 1/*
2 * trace event based perf counter profiling 2 * trace event based perf event profiling/tracing
3 * 3 *
4 * Copyright (C) 2009 Red Hat Inc, Peter Zijlstra <pzijlstr@redhat.com> 4 * Copyright (C) 2009 Red Hat Inc, Peter Zijlstra <pzijlstr@redhat.com>
5 * 5 * Copyright (C) 2009-2010 Frederic Weisbecker <fweisbec@gmail.com>
6 */ 6 */
7 7
8#include <linux/module.h> 8#include <linux/module.h>
9#include <linux/kprobes.h> 9#include <linux/kprobes.h>
10#include "trace.h" 10#include "trace.h"
11 11
12DEFINE_PER_CPU(struct pt_regs, perf_trace_regs);
13EXPORT_PER_CPU_SYMBOL_GPL(perf_trace_regs);
14
15EXPORT_SYMBOL_GPL(perf_arch_fetch_caller_regs);
12 16
13static char *perf_trace_buf; 17static char *perf_trace_buf;
14static char *perf_trace_buf_nmi; 18static char *perf_trace_buf_nmi;
15 19
16typedef typeof(char [FTRACE_MAX_PROFILE_SIZE]) perf_trace_t ; 20typedef typeof(char [PERF_MAX_TRACE_SIZE]) perf_trace_t ;
17 21
18/* Count the events in use (per event id, not per instance) */ 22/* Count the events in use (per event id, not per instance) */
19static int total_profile_count; 23static int total_ref_count;
20 24
21static int ftrace_profile_enable_event(struct ftrace_event_call *event) 25static int perf_trace_event_enable(struct ftrace_event_call *event)
22{ 26{
23 char *buf; 27 char *buf;
24 int ret = -ENOMEM; 28 int ret = -ENOMEM;
25 29
26 if (event->profile_count++ > 0) 30 if (event->perf_refcount++ > 0)
27 return 0; 31 return 0;
28 32
29 if (!total_profile_count) { 33 if (!total_ref_count) {
30 buf = (char *)alloc_percpu(perf_trace_t); 34 buf = (char *)alloc_percpu(perf_trace_t);
31 if (!buf) 35 if (!buf)
32 goto fail_buf; 36 goto fail_buf;
@@ -40,35 +44,35 @@ static int ftrace_profile_enable_event(struct ftrace_event_call *event)
40 rcu_assign_pointer(perf_trace_buf_nmi, buf); 44 rcu_assign_pointer(perf_trace_buf_nmi, buf);
41 } 45 }
42 46
43 ret = event->profile_enable(event); 47 ret = event->perf_event_enable(event);
44 if (!ret) { 48 if (!ret) {
45 total_profile_count++; 49 total_ref_count++;
46 return 0; 50 return 0;
47 } 51 }
48 52
49fail_buf_nmi: 53fail_buf_nmi:
50 if (!total_profile_count) { 54 if (!total_ref_count) {
51 free_percpu(perf_trace_buf_nmi); 55 free_percpu(perf_trace_buf_nmi);
52 free_percpu(perf_trace_buf); 56 free_percpu(perf_trace_buf);
53 perf_trace_buf_nmi = NULL; 57 perf_trace_buf_nmi = NULL;
54 perf_trace_buf = NULL; 58 perf_trace_buf = NULL;
55 } 59 }
56fail_buf: 60fail_buf:
57 event->profile_count--; 61 event->perf_refcount--;
58 62
59 return ret; 63 return ret;
60} 64}
61 65
62int ftrace_profile_enable(int event_id) 66int perf_trace_enable(int event_id)
63{ 67{
64 struct ftrace_event_call *event; 68 struct ftrace_event_call *event;
65 int ret = -EINVAL; 69 int ret = -EINVAL;
66 70
67 mutex_lock(&event_mutex); 71 mutex_lock(&event_mutex);
68 list_for_each_entry(event, &ftrace_events, list) { 72 list_for_each_entry(event, &ftrace_events, list) {
69 if (event->id == event_id && event->profile_enable && 73 if (event->id == event_id && event->perf_event_enable &&
70 try_module_get(event->mod)) { 74 try_module_get(event->mod)) {
71 ret = ftrace_profile_enable_event(event); 75 ret = perf_trace_event_enable(event);
72 break; 76 break;
73 } 77 }
74 } 78 }
@@ -77,16 +81,16 @@ int ftrace_profile_enable(int event_id)
77 return ret; 81 return ret;
78} 82}
79 83
80static void ftrace_profile_disable_event(struct ftrace_event_call *event) 84static void perf_trace_event_disable(struct ftrace_event_call *event)
81{ 85{
82 char *buf, *nmi_buf; 86 char *buf, *nmi_buf;
83 87
84 if (--event->profile_count > 0) 88 if (--event->perf_refcount > 0)
85 return; 89 return;
86 90
87 event->profile_disable(event); 91 event->perf_event_disable(event);
88 92
89 if (!--total_profile_count) { 93 if (!--total_ref_count) {
90 buf = perf_trace_buf; 94 buf = perf_trace_buf;
91 rcu_assign_pointer(perf_trace_buf, NULL); 95 rcu_assign_pointer(perf_trace_buf, NULL);
92 96
@@ -104,14 +108,14 @@ static void ftrace_profile_disable_event(struct ftrace_event_call *event)
104 } 108 }
105} 109}
106 110
107void ftrace_profile_disable(int event_id) 111void perf_trace_disable(int event_id)
108{ 112{
109 struct ftrace_event_call *event; 113 struct ftrace_event_call *event;
110 114
111 mutex_lock(&event_mutex); 115 mutex_lock(&event_mutex);
112 list_for_each_entry(event, &ftrace_events, list) { 116 list_for_each_entry(event, &ftrace_events, list) {
113 if (event->id == event_id) { 117 if (event->id == event_id) {
114 ftrace_profile_disable_event(event); 118 perf_trace_event_disable(event);
115 module_put(event->mod); 119 module_put(event->mod);
116 break; 120 break;
117 } 121 }
@@ -119,8 +123,8 @@ void ftrace_profile_disable(int event_id)
119 mutex_unlock(&event_mutex); 123 mutex_unlock(&event_mutex);
120} 124}
121 125
122__kprobes void *ftrace_perf_buf_prepare(int size, unsigned short type, 126__kprobes void *perf_trace_buf_prepare(int size, unsigned short type,
123 int *rctxp, unsigned long *irq_flags) 127 int *rctxp, unsigned long *irq_flags)
124{ 128{
125 struct trace_entry *entry; 129 struct trace_entry *entry;
126 char *trace_buf, *raw_data; 130 char *trace_buf, *raw_data;
@@ -138,9 +142,9 @@ __kprobes void *ftrace_perf_buf_prepare(int size, unsigned short type,
138 cpu = smp_processor_id(); 142 cpu = smp_processor_id();
139 143
140 if (in_nmi()) 144 if (in_nmi())
141 trace_buf = rcu_dereference(perf_trace_buf_nmi); 145 trace_buf = rcu_dereference_sched(perf_trace_buf_nmi);
142 else 146 else
143 trace_buf = rcu_dereference(perf_trace_buf); 147 trace_buf = rcu_dereference_sched(perf_trace_buf);
144 148
145 if (!trace_buf) 149 if (!trace_buf)
146 goto err; 150 goto err;
@@ -161,4 +165,4 @@ err_recursion:
161 local_irq_restore(*irq_flags); 165 local_irq_restore(*irq_flags);
162 return NULL; 166 return NULL;
163} 167}
164EXPORT_SYMBOL_GPL(ftrace_perf_buf_prepare); 168EXPORT_SYMBOL_GPL(perf_trace_buf_prepare);
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 3f972ad98d04..beab8bf2f310 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -938,7 +938,7 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events,
938 trace_create_file("enable", 0644, call->dir, call, 938 trace_create_file("enable", 0644, call->dir, call,
939 enable); 939 enable);
940 940
941 if (call->id && call->profile_enable) 941 if (call->id && call->perf_event_enable)
942 trace_create_file("id", 0444, call->dir, call, 942 trace_create_file("id", 0444, call->dir, call,
943 id); 943 id);
944 944
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 3fc2a575664f..e6989d9b44da 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -237,6 +237,14 @@ int trace_graph_entry(struct ftrace_graph_ent *trace)
237 return ret; 237 return ret;
238} 238}
239 239
240int trace_graph_thresh_entry(struct ftrace_graph_ent *trace)
241{
242 if (tracing_thresh)
243 return 1;
244 else
245 return trace_graph_entry(trace);
246}
247
240static void __trace_graph_return(struct trace_array *tr, 248static void __trace_graph_return(struct trace_array *tr,
241 struct ftrace_graph_ret *trace, 249 struct ftrace_graph_ret *trace,
242 unsigned long flags, 250 unsigned long flags,
@@ -290,13 +298,26 @@ void set_graph_array(struct trace_array *tr)
290 smp_mb(); 298 smp_mb();
291} 299}
292 300
301void trace_graph_thresh_return(struct ftrace_graph_ret *trace)
302{
303 if (tracing_thresh &&
304 (trace->rettime - trace->calltime < tracing_thresh))
305 return;
306 else
307 trace_graph_return(trace);
308}
309
293static int graph_trace_init(struct trace_array *tr) 310static int graph_trace_init(struct trace_array *tr)
294{ 311{
295 int ret; 312 int ret;
296 313
297 set_graph_array(tr); 314 set_graph_array(tr);
298 ret = register_ftrace_graph(&trace_graph_return, 315 if (tracing_thresh)
299 &trace_graph_entry); 316 ret = register_ftrace_graph(&trace_graph_thresh_return,
317 &trace_graph_thresh_entry);
318 else
319 ret = register_ftrace_graph(&trace_graph_return,
320 &trace_graph_entry);
300 if (ret) 321 if (ret)
301 return ret; 322 return ret;
302 tracing_start_cmdline_record(); 323 tracing_start_cmdline_record();
@@ -920,7 +941,7 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s,
920 if (!ret) 941 if (!ret)
921 return TRACE_TYPE_PARTIAL_LINE; 942 return TRACE_TYPE_PARTIAL_LINE;
922 } else { 943 } else {
923 ret = trace_seq_printf(s, "} (%ps)\n", (void *)trace->func); 944 ret = trace_seq_printf(s, "} /* %ps */\n", (void *)trace->func);
924 if (!ret) 945 if (!ret)
925 return TRACE_TYPE_PARTIAL_LINE; 946 return TRACE_TYPE_PARTIAL_LINE;
926 } 947 }
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 505c92273b1a..1251e367bae9 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -1214,7 +1214,7 @@ static int set_print_fmt(struct trace_probe *tp)
1214#ifdef CONFIG_PERF_EVENTS 1214#ifdef CONFIG_PERF_EVENTS
1215 1215
1216/* Kprobe profile handler */ 1216/* Kprobe profile handler */
1217static __kprobes void kprobe_profile_func(struct kprobe *kp, 1217static __kprobes void kprobe_perf_func(struct kprobe *kp,
1218 struct pt_regs *regs) 1218 struct pt_regs *regs)
1219{ 1219{
1220 struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp); 1220 struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
@@ -1227,11 +1227,11 @@ static __kprobes void kprobe_profile_func(struct kprobe *kp,
1227 __size = SIZEOF_KPROBE_TRACE_ENTRY(tp->nr_args); 1227 __size = SIZEOF_KPROBE_TRACE_ENTRY(tp->nr_args);
1228 size = ALIGN(__size + sizeof(u32), sizeof(u64)); 1228 size = ALIGN(__size + sizeof(u32), sizeof(u64));
1229 size -= sizeof(u32); 1229 size -= sizeof(u32);
1230 if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE, 1230 if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
1231 "profile buffer not large enough")) 1231 "profile buffer not large enough"))
1232 return; 1232 return;
1233 1233
1234 entry = ftrace_perf_buf_prepare(size, call->id, &rctx, &irq_flags); 1234 entry = perf_trace_buf_prepare(size, call->id, &rctx, &irq_flags);
1235 if (!entry) 1235 if (!entry)
1236 return; 1236 return;
1237 1237
@@ -1240,11 +1240,11 @@ static __kprobes void kprobe_profile_func(struct kprobe *kp,
1240 for (i = 0; i < tp->nr_args; i++) 1240 for (i = 0; i < tp->nr_args; i++)
1241 entry->args[i] = call_fetch(&tp->args[i].fetch, regs); 1241 entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
1242 1242
1243 ftrace_perf_buf_submit(entry, size, rctx, entry->ip, 1, irq_flags); 1243 perf_trace_buf_submit(entry, size, rctx, entry->ip, 1, irq_flags, regs);
1244} 1244}
1245 1245
1246/* Kretprobe profile handler */ 1246/* Kretprobe profile handler */
1247static __kprobes void kretprobe_profile_func(struct kretprobe_instance *ri, 1247static __kprobes void kretprobe_perf_func(struct kretprobe_instance *ri,
1248 struct pt_regs *regs) 1248 struct pt_regs *regs)
1249{ 1249{
1250 struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp); 1250 struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
@@ -1257,11 +1257,11 @@ static __kprobes void kretprobe_profile_func(struct kretprobe_instance *ri,
1257 __size = SIZEOF_KRETPROBE_TRACE_ENTRY(tp->nr_args); 1257 __size = SIZEOF_KRETPROBE_TRACE_ENTRY(tp->nr_args);
1258 size = ALIGN(__size + sizeof(u32), sizeof(u64)); 1258 size = ALIGN(__size + sizeof(u32), sizeof(u64));
1259 size -= sizeof(u32); 1259 size -= sizeof(u32);
1260 if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE, 1260 if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
1261 "profile buffer not large enough")) 1261 "profile buffer not large enough"))
1262 return; 1262 return;
1263 1263
1264 entry = ftrace_perf_buf_prepare(size, call->id, &rctx, &irq_flags); 1264 entry = perf_trace_buf_prepare(size, call->id, &rctx, &irq_flags);
1265 if (!entry) 1265 if (!entry)
1266 return; 1266 return;
1267 1267
@@ -1271,10 +1271,11 @@ static __kprobes void kretprobe_profile_func(struct kretprobe_instance *ri,
1271 for (i = 0; i < tp->nr_args; i++) 1271 for (i = 0; i < tp->nr_args; i++)
1272 entry->args[i] = call_fetch(&tp->args[i].fetch, regs); 1272 entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
1273 1273
1274 ftrace_perf_buf_submit(entry, size, rctx, entry->ret_ip, 1, irq_flags); 1274 perf_trace_buf_submit(entry, size, rctx, entry->ret_ip, 1,
1275 irq_flags, regs);
1275} 1276}
1276 1277
1277static int probe_profile_enable(struct ftrace_event_call *call) 1278static int probe_perf_enable(struct ftrace_event_call *call)
1278{ 1279{
1279 struct trace_probe *tp = (struct trace_probe *)call->data; 1280 struct trace_probe *tp = (struct trace_probe *)call->data;
1280 1281
@@ -1286,7 +1287,7 @@ static int probe_profile_enable(struct ftrace_event_call *call)
1286 return enable_kprobe(&tp->rp.kp); 1287 return enable_kprobe(&tp->rp.kp);
1287} 1288}
1288 1289
1289static void probe_profile_disable(struct ftrace_event_call *call) 1290static void probe_perf_disable(struct ftrace_event_call *call)
1290{ 1291{
1291 struct trace_probe *tp = (struct trace_probe *)call->data; 1292 struct trace_probe *tp = (struct trace_probe *)call->data;
1292 1293
@@ -1311,7 +1312,7 @@ int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs)
1311 kprobe_trace_func(kp, regs); 1312 kprobe_trace_func(kp, regs);
1312#ifdef CONFIG_PERF_EVENTS 1313#ifdef CONFIG_PERF_EVENTS
1313 if (tp->flags & TP_FLAG_PROFILE) 1314 if (tp->flags & TP_FLAG_PROFILE)
1314 kprobe_profile_func(kp, regs); 1315 kprobe_perf_func(kp, regs);
1315#endif 1316#endif
1316 return 0; /* We don't tweek kernel, so just return 0 */ 1317 return 0; /* We don't tweek kernel, so just return 0 */
1317} 1318}
@@ -1325,7 +1326,7 @@ int kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs)
1325 kretprobe_trace_func(ri, regs); 1326 kretprobe_trace_func(ri, regs);
1326#ifdef CONFIG_PERF_EVENTS 1327#ifdef CONFIG_PERF_EVENTS
1327 if (tp->flags & TP_FLAG_PROFILE) 1328 if (tp->flags & TP_FLAG_PROFILE)
1328 kretprobe_profile_func(ri, regs); 1329 kretprobe_perf_func(ri, regs);
1329#endif 1330#endif
1330 return 0; /* We don't tweek kernel, so just return 0 */ 1331 return 0; /* We don't tweek kernel, so just return 0 */
1331} 1332}
@@ -1358,8 +1359,8 @@ static int register_probe_event(struct trace_probe *tp)
1358 call->unregfunc = probe_event_disable; 1359 call->unregfunc = probe_event_disable;
1359 1360
1360#ifdef CONFIG_PERF_EVENTS 1361#ifdef CONFIG_PERF_EVENTS
1361 call->profile_enable = probe_profile_enable; 1362 call->perf_event_enable = probe_perf_enable;
1362 call->profile_disable = probe_profile_disable; 1363 call->perf_event_disable = probe_perf_disable;
1363#endif 1364#endif
1364 call->data = tp; 1365 call->data = tp;
1365 ret = trace_add_event_call(call); 1366 ret = trace_add_event_call(call);
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index cba47d7935cc..33c2a5b769dc 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -428,12 +428,12 @@ core_initcall(init_ftrace_syscalls);
428 428
429#ifdef CONFIG_PERF_EVENTS 429#ifdef CONFIG_PERF_EVENTS
430 430
431static DECLARE_BITMAP(enabled_prof_enter_syscalls, NR_syscalls); 431static DECLARE_BITMAP(enabled_perf_enter_syscalls, NR_syscalls);
432static DECLARE_BITMAP(enabled_prof_exit_syscalls, NR_syscalls); 432static DECLARE_BITMAP(enabled_perf_exit_syscalls, NR_syscalls);
433static int sys_prof_refcount_enter; 433static int sys_perf_refcount_enter;
434static int sys_prof_refcount_exit; 434static int sys_perf_refcount_exit;
435 435
436static void prof_syscall_enter(struct pt_regs *regs, long id) 436static void perf_syscall_enter(struct pt_regs *regs, long id)
437{ 437{
438 struct syscall_metadata *sys_data; 438 struct syscall_metadata *sys_data;
439 struct syscall_trace_enter *rec; 439 struct syscall_trace_enter *rec;
@@ -443,7 +443,7 @@ static void prof_syscall_enter(struct pt_regs *regs, long id)
443 int size; 443 int size;
444 444
445 syscall_nr = syscall_get_nr(current, regs); 445 syscall_nr = syscall_get_nr(current, regs);
446 if (!test_bit(syscall_nr, enabled_prof_enter_syscalls)) 446 if (!test_bit(syscall_nr, enabled_perf_enter_syscalls))
447 return; 447 return;
448 448
449 sys_data = syscall_nr_to_meta(syscall_nr); 449 sys_data = syscall_nr_to_meta(syscall_nr);
@@ -455,11 +455,11 @@ static void prof_syscall_enter(struct pt_regs *regs, long id)
455 size = ALIGN(size + sizeof(u32), sizeof(u64)); 455 size = ALIGN(size + sizeof(u32), sizeof(u64));
456 size -= sizeof(u32); 456 size -= sizeof(u32);
457 457
458 if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE, 458 if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
459 "profile buffer not large enough")) 459 "perf buffer not large enough"))
460 return; 460 return;
461 461
462 rec = (struct syscall_trace_enter *)ftrace_perf_buf_prepare(size, 462 rec = (struct syscall_trace_enter *)perf_trace_buf_prepare(size,
463 sys_data->enter_event->id, &rctx, &flags); 463 sys_data->enter_event->id, &rctx, &flags);
464 if (!rec) 464 if (!rec)
465 return; 465 return;
@@ -467,10 +467,10 @@ static void prof_syscall_enter(struct pt_regs *regs, long id)
467 rec->nr = syscall_nr; 467 rec->nr = syscall_nr;
468 syscall_get_arguments(current, regs, 0, sys_data->nb_args, 468 syscall_get_arguments(current, regs, 0, sys_data->nb_args,
469 (unsigned long *)&rec->args); 469 (unsigned long *)&rec->args);
470 ftrace_perf_buf_submit(rec, size, rctx, 0, 1, flags); 470 perf_trace_buf_submit(rec, size, rctx, 0, 1, flags, regs);
471} 471}
472 472
473int prof_sysenter_enable(struct ftrace_event_call *call) 473int perf_sysenter_enable(struct ftrace_event_call *call)
474{ 474{
475 int ret = 0; 475 int ret = 0;
476 int num; 476 int num;
@@ -478,34 +478,34 @@ int prof_sysenter_enable(struct ftrace_event_call *call)
478 num = ((struct syscall_metadata *)call->data)->syscall_nr; 478 num = ((struct syscall_metadata *)call->data)->syscall_nr;
479 479
480 mutex_lock(&syscall_trace_lock); 480 mutex_lock(&syscall_trace_lock);
481 if (!sys_prof_refcount_enter) 481 if (!sys_perf_refcount_enter)
482 ret = register_trace_sys_enter(prof_syscall_enter); 482 ret = register_trace_sys_enter(perf_syscall_enter);
483 if (ret) { 483 if (ret) {
484 pr_info("event trace: Could not activate" 484 pr_info("event trace: Could not activate"
485 "syscall entry trace point"); 485 "syscall entry trace point");
486 } else { 486 } else {
487 set_bit(num, enabled_prof_enter_syscalls); 487 set_bit(num, enabled_perf_enter_syscalls);
488 sys_prof_refcount_enter++; 488 sys_perf_refcount_enter++;
489 } 489 }
490 mutex_unlock(&syscall_trace_lock); 490 mutex_unlock(&syscall_trace_lock);
491 return ret; 491 return ret;
492} 492}
493 493
494void prof_sysenter_disable(struct ftrace_event_call *call) 494void perf_sysenter_disable(struct ftrace_event_call *call)
495{ 495{
496 int num; 496 int num;
497 497
498 num = ((struct syscall_metadata *)call->data)->syscall_nr; 498 num = ((struct syscall_metadata *)call->data)->syscall_nr;
499 499
500 mutex_lock(&syscall_trace_lock); 500 mutex_lock(&syscall_trace_lock);
501 sys_prof_refcount_enter--; 501 sys_perf_refcount_enter--;
502 clear_bit(num, enabled_prof_enter_syscalls); 502 clear_bit(num, enabled_perf_enter_syscalls);
503 if (!sys_prof_refcount_enter) 503 if (!sys_perf_refcount_enter)
504 unregister_trace_sys_enter(prof_syscall_enter); 504 unregister_trace_sys_enter(perf_syscall_enter);
505 mutex_unlock(&syscall_trace_lock); 505 mutex_unlock(&syscall_trace_lock);
506} 506}
507 507
508static void prof_syscall_exit(struct pt_regs *regs, long ret) 508static void perf_syscall_exit(struct pt_regs *regs, long ret)
509{ 509{
510 struct syscall_metadata *sys_data; 510 struct syscall_metadata *sys_data;
511 struct syscall_trace_exit *rec; 511 struct syscall_trace_exit *rec;
@@ -515,7 +515,7 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret)
515 int size; 515 int size;
516 516
517 syscall_nr = syscall_get_nr(current, regs); 517 syscall_nr = syscall_get_nr(current, regs);
518 if (!test_bit(syscall_nr, enabled_prof_exit_syscalls)) 518 if (!test_bit(syscall_nr, enabled_perf_exit_syscalls))
519 return; 519 return;
520 520
521 sys_data = syscall_nr_to_meta(syscall_nr); 521 sys_data = syscall_nr_to_meta(syscall_nr);
@@ -530,11 +530,11 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret)
530 * Impossible, but be paranoid with the future 530 * Impossible, but be paranoid with the future
531 * How to put this check outside runtime? 531 * How to put this check outside runtime?
532 */ 532 */
533 if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE, 533 if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
534 "exit event has grown above profile buffer size")) 534 "exit event has grown above perf buffer size"))
535 return; 535 return;
536 536
537 rec = (struct syscall_trace_exit *)ftrace_perf_buf_prepare(size, 537 rec = (struct syscall_trace_exit *)perf_trace_buf_prepare(size,
538 sys_data->exit_event->id, &rctx, &flags); 538 sys_data->exit_event->id, &rctx, &flags);
539 if (!rec) 539 if (!rec)
540 return; 540 return;
@@ -542,10 +542,10 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret)
542 rec->nr = syscall_nr; 542 rec->nr = syscall_nr;
543 rec->ret = syscall_get_return_value(current, regs); 543 rec->ret = syscall_get_return_value(current, regs);
544 544
545 ftrace_perf_buf_submit(rec, size, rctx, 0, 1, flags); 545 perf_trace_buf_submit(rec, size, rctx, 0, 1, flags, regs);
546} 546}
547 547
548int prof_sysexit_enable(struct ftrace_event_call *call) 548int perf_sysexit_enable(struct ftrace_event_call *call)
549{ 549{
550 int ret = 0; 550 int ret = 0;
551 int num; 551 int num;
@@ -553,30 +553,30 @@ int prof_sysexit_enable(struct ftrace_event_call *call)
553 num = ((struct syscall_metadata *)call->data)->syscall_nr; 553 num = ((struct syscall_metadata *)call->data)->syscall_nr;
554 554
555 mutex_lock(&syscall_trace_lock); 555 mutex_lock(&syscall_trace_lock);
556 if (!sys_prof_refcount_exit) 556 if (!sys_perf_refcount_exit)
557 ret = register_trace_sys_exit(prof_syscall_exit); 557 ret = register_trace_sys_exit(perf_syscall_exit);
558 if (ret) { 558 if (ret) {
559 pr_info("event trace: Could not activate" 559 pr_info("event trace: Could not activate"
560 "syscall exit trace point"); 560 "syscall exit trace point");
561 } else { 561 } else {
562 set_bit(num, enabled_prof_exit_syscalls); 562 set_bit(num, enabled_perf_exit_syscalls);
563 sys_prof_refcount_exit++; 563 sys_perf_refcount_exit++;
564 } 564 }
565 mutex_unlock(&syscall_trace_lock); 565 mutex_unlock(&syscall_trace_lock);
566 return ret; 566 return ret;
567} 567}
568 568
569void prof_sysexit_disable(struct ftrace_event_call *call) 569void perf_sysexit_disable(struct ftrace_event_call *call)
570{ 570{
571 int num; 571 int num;
572 572
573 num = ((struct syscall_metadata *)call->data)->syscall_nr; 573 num = ((struct syscall_metadata *)call->data)->syscall_nr;
574 574
575 mutex_lock(&syscall_trace_lock); 575 mutex_lock(&syscall_trace_lock);
576 sys_prof_refcount_exit--; 576 sys_perf_refcount_exit--;
577 clear_bit(num, enabled_prof_exit_syscalls); 577 clear_bit(num, enabled_perf_exit_syscalls);
578 if (!sys_prof_refcount_exit) 578 if (!sys_perf_refcount_exit)
579 unregister_trace_sys_exit(prof_syscall_exit); 579 unregister_trace_sys_exit(perf_syscall_exit);
580 mutex_unlock(&syscall_trace_lock); 580 mutex_unlock(&syscall_trace_lock);
581} 581}
582 582