aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/audit.c67
-rw-r--r--kernel/audit.h5
-rw-r--r--kernel/audit_tree.c9
-rw-r--r--kernel/audit_watch.c4
-rw-r--r--kernel/auditfilter.c12
-rw-r--r--kernel/auditsc.c16
-rw-r--r--kernel/cgroup.c140
-rw-r--r--kernel/cgroup_freezer.c72
-rw-r--r--kernel/cpuset.c13
-rw-r--r--kernel/cred.c4
-rw-r--r--kernel/debug/debug_core.c16
-rw-r--r--kernel/debug/kdb/kdb_main.c48
-rw-r--r--kernel/exit.c10
-rw-r--r--kernel/fork.c2
-rw-r--r--kernel/irq/irqdesc.c15
-rw-r--r--kernel/irq/manage.c4
-rw-r--r--kernel/jump_label.c77
-rw-r--r--kernel/kprobes.c33
-rw-r--r--kernel/latencytop.c17
-rw-r--r--kernel/module.c2
-rw-r--r--kernel/ns_cgroup.c8
-rw-r--r--kernel/perf_event.c136
-rw-r--r--kernel/printk.c6
-rw-r--r--kernel/ptrace.c36
-rw-r--r--kernel/range.c2
-rw-r--r--kernel/relay.c15
-rw-r--r--kernel/resource.c153
-rw-r--r--kernel/sched.c8
-rw-r--r--kernel/sched_fair.c25
-rw-r--r--kernel/sched_stats.h20
-rw-r--r--kernel/signal.c5
-rw-r--r--kernel/smp.c8
-rw-r--r--kernel/softirq.c16
-rw-r--r--kernel/sysctl.c9
-rw-r--r--kernel/taskstats.c172
-rw-r--r--kernel/trace/blktrace.c4
-rw-r--r--kernel/trace/ring_buffer.c335
-rw-r--r--kernel/trace/trace.c8
-rw-r--r--kernel/trace/trace_kprobe.c1
-rw-r--r--kernel/tsacct.c10
-rw-r--r--kernel/watchdog.c2
41 files changed, 928 insertions, 617 deletions
diff --git a/kernel/audit.c b/kernel/audit.c
index d96045789b54..77770a034d59 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -467,23 +467,16 @@ static int audit_prepare_user_tty(pid_t pid, uid_t loginuid, u32 sessionid)
467 struct task_struct *tsk; 467 struct task_struct *tsk;
468 int err; 468 int err;
469 469
470 read_lock(&tasklist_lock); 470 rcu_read_lock();
471 tsk = find_task_by_vpid(pid); 471 tsk = find_task_by_vpid(pid);
472 err = -ESRCH; 472 if (!tsk) {
473 if (!tsk) 473 rcu_read_unlock();
474 goto out; 474 return -ESRCH;
475 err = 0; 475 }
476 476 get_task_struct(tsk);
477 spin_lock_irq(&tsk->sighand->siglock); 477 rcu_read_unlock();
478 if (!tsk->signal->audit_tty) 478 err = tty_audit_push_task(tsk, loginuid, sessionid);
479 err = -EPERM; 479 put_task_struct(tsk);
480 spin_unlock_irq(&tsk->sighand->siglock);
481 if (err)
482 goto out;
483
484 tty_audit_push_task(tsk, loginuid, sessionid);
485out:
486 read_unlock(&tasklist_lock);
487 return err; 480 return err;
488} 481}
489 482
@@ -506,7 +499,7 @@ int audit_send_list(void *_dest)
506} 499}
507 500
508struct sk_buff *audit_make_reply(int pid, int seq, int type, int done, 501struct sk_buff *audit_make_reply(int pid, int seq, int type, int done,
509 int multi, void *payload, int size) 502 int multi, const void *payload, int size)
510{ 503{
511 struct sk_buff *skb; 504 struct sk_buff *skb;
512 struct nlmsghdr *nlh; 505 struct nlmsghdr *nlh;
@@ -555,8 +548,8 @@ static int audit_send_reply_thread(void *arg)
555 * Allocates an skb, builds the netlink message, and sends it to the pid. 548 * Allocates an skb, builds the netlink message, and sends it to the pid.
556 * No failure notifications. 549 * No failure notifications.
557 */ 550 */
558void audit_send_reply(int pid, int seq, int type, int done, int multi, 551static void audit_send_reply(int pid, int seq, int type, int done, int multi,
559 void *payload, int size) 552 const void *payload, int size)
560{ 553{
561 struct sk_buff *skb; 554 struct sk_buff *skb;
562 struct task_struct *tsk; 555 struct task_struct *tsk;
@@ -880,40 +873,40 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
880 case AUDIT_TTY_GET: { 873 case AUDIT_TTY_GET: {
881 struct audit_tty_status s; 874 struct audit_tty_status s;
882 struct task_struct *tsk; 875 struct task_struct *tsk;
876 unsigned long flags;
883 877
884 read_lock(&tasklist_lock); 878 rcu_read_lock();
885 tsk = find_task_by_vpid(pid); 879 tsk = find_task_by_vpid(pid);
886 if (!tsk) 880 if (tsk && lock_task_sighand(tsk, &flags)) {
887 err = -ESRCH;
888 else {
889 spin_lock_irq(&tsk->sighand->siglock);
890 s.enabled = tsk->signal->audit_tty != 0; 881 s.enabled = tsk->signal->audit_tty != 0;
891 spin_unlock_irq(&tsk->sighand->siglock); 882 unlock_task_sighand(tsk, &flags);
892 } 883 } else
893 read_unlock(&tasklist_lock); 884 err = -ESRCH;
894 audit_send_reply(NETLINK_CB(skb).pid, seq, AUDIT_TTY_GET, 0, 0, 885 rcu_read_unlock();
895 &s, sizeof(s)); 886
887 if (!err)
888 audit_send_reply(NETLINK_CB(skb).pid, seq,
889 AUDIT_TTY_GET, 0, 0, &s, sizeof(s));
896 break; 890 break;
897 } 891 }
898 case AUDIT_TTY_SET: { 892 case AUDIT_TTY_SET: {
899 struct audit_tty_status *s; 893 struct audit_tty_status *s;
900 struct task_struct *tsk; 894 struct task_struct *tsk;
895 unsigned long flags;
901 896
902 if (nlh->nlmsg_len < sizeof(struct audit_tty_status)) 897 if (nlh->nlmsg_len < sizeof(struct audit_tty_status))
903 return -EINVAL; 898 return -EINVAL;
904 s = data; 899 s = data;
905 if (s->enabled != 0 && s->enabled != 1) 900 if (s->enabled != 0 && s->enabled != 1)
906 return -EINVAL; 901 return -EINVAL;
907 read_lock(&tasklist_lock); 902 rcu_read_lock();
908 tsk = find_task_by_vpid(pid); 903 tsk = find_task_by_vpid(pid);
909 if (!tsk) 904 if (tsk && lock_task_sighand(tsk, &flags)) {
910 err = -ESRCH;
911 else {
912 spin_lock_irq(&tsk->sighand->siglock);
913 tsk->signal->audit_tty = s->enabled != 0; 905 tsk->signal->audit_tty = s->enabled != 0;
914 spin_unlock_irq(&tsk->sighand->siglock); 906 unlock_task_sighand(tsk, &flags);
915 } 907 } else
916 read_unlock(&tasklist_lock); 908 err = -ESRCH;
909 rcu_read_unlock();
917 break; 910 break;
918 } 911 }
919 default: 912 default:
diff --git a/kernel/audit.h b/kernel/audit.h
index f7206db4e13d..91e7071c4d2c 100644
--- a/kernel/audit.h
+++ b/kernel/audit.h
@@ -84,10 +84,7 @@ extern int audit_compare_dname_path(const char *dname, const char *path,
84 int *dirlen); 84 int *dirlen);
85extern struct sk_buff * audit_make_reply(int pid, int seq, int type, 85extern struct sk_buff * audit_make_reply(int pid, int seq, int type,
86 int done, int multi, 86 int done, int multi,
87 void *payload, int size); 87 const void *payload, int size);
88extern void audit_send_reply(int pid, int seq, int type,
89 int done, int multi,
90 void *payload, int size);
91extern void audit_panic(const char *message); 88extern void audit_panic(const char *message);
92 89
93struct audit_netlink_list { 90struct audit_netlink_list {
diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index 7f18d3a4527e..37b2bea170c8 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -223,7 +223,7 @@ static void untag_chunk(struct node *p)
223{ 223{
224 struct audit_chunk *chunk = find_chunk(p); 224 struct audit_chunk *chunk = find_chunk(p);
225 struct fsnotify_mark *entry = &chunk->mark; 225 struct fsnotify_mark *entry = &chunk->mark;
226 struct audit_chunk *new; 226 struct audit_chunk *new = NULL;
227 struct audit_tree *owner; 227 struct audit_tree *owner;
228 int size = chunk->count - 1; 228 int size = chunk->count - 1;
229 int i, j; 229 int i, j;
@@ -232,9 +232,14 @@ static void untag_chunk(struct node *p)
232 232
233 spin_unlock(&hash_lock); 233 spin_unlock(&hash_lock);
234 234
235 if (size)
236 new = alloc_chunk(size);
237
235 spin_lock(&entry->lock); 238 spin_lock(&entry->lock);
236 if (chunk->dead || !entry->i.inode) { 239 if (chunk->dead || !entry->i.inode) {
237 spin_unlock(&entry->lock); 240 spin_unlock(&entry->lock);
241 if (new)
242 free_chunk(new);
238 goto out; 243 goto out;
239 } 244 }
240 245
@@ -255,9 +260,9 @@ static void untag_chunk(struct node *p)
255 goto out; 260 goto out;
256 } 261 }
257 262
258 new = alloc_chunk(size);
259 if (!new) 263 if (!new)
260 goto Fallback; 264 goto Fallback;
265
261 fsnotify_duplicate_mark(&new->mark, entry); 266 fsnotify_duplicate_mark(&new->mark, entry);
262 if (fsnotify_add_mark(&new->mark, new->mark.group, new->mark.i.inode, NULL, 1)) { 267 if (fsnotify_add_mark(&new->mark, new->mark.group, new->mark.i.inode, NULL, 1)) {
263 free_chunk(new); 268 free_chunk(new);
diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c
index f0c9b2e7542d..d2e3c7866460 100644
--- a/kernel/audit_watch.c
+++ b/kernel/audit_watch.c
@@ -60,7 +60,7 @@ struct audit_parent {
60}; 60};
61 61
62/* fsnotify handle. */ 62/* fsnotify handle. */
63struct fsnotify_group *audit_watch_group; 63static struct fsnotify_group *audit_watch_group;
64 64
65/* fsnotify events we care about. */ 65/* fsnotify events we care about. */
66#define AUDIT_FS_WATCH (FS_MOVE | FS_CREATE | FS_DELETE | FS_DELETE_SELF |\ 66#define AUDIT_FS_WATCH (FS_MOVE | FS_CREATE | FS_DELETE | FS_DELETE_SELF |\
@@ -123,7 +123,7 @@ void audit_put_watch(struct audit_watch *watch)
123 } 123 }
124} 124}
125 125
126void audit_remove_watch(struct audit_watch *watch) 126static void audit_remove_watch(struct audit_watch *watch)
127{ 127{
128 list_del(&watch->wlist); 128 list_del(&watch->wlist);
129 audit_put_parent(watch->parent); 129 audit_put_parent(watch->parent);
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index eb7675499fb5..add2819af71b 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -1252,6 +1252,18 @@ static int audit_filter_user_rules(struct netlink_skb_parms *cb,
1252 case AUDIT_LOGINUID: 1252 case AUDIT_LOGINUID:
1253 result = audit_comparator(cb->loginuid, f->op, f->val); 1253 result = audit_comparator(cb->loginuid, f->op, f->val);
1254 break; 1254 break;
1255 case AUDIT_SUBJ_USER:
1256 case AUDIT_SUBJ_ROLE:
1257 case AUDIT_SUBJ_TYPE:
1258 case AUDIT_SUBJ_SEN:
1259 case AUDIT_SUBJ_CLR:
1260 if (f->lsm_rule)
1261 result = security_audit_rule_match(cb->sid,
1262 f->type,
1263 f->op,
1264 f->lsm_rule,
1265 NULL);
1266 break;
1255 } 1267 }
1256 1268
1257 if (!result) 1269 if (!result)
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 1b31c130d034..f49a0318c2ed 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -241,6 +241,10 @@ struct audit_context {
241 pid_t pid; 241 pid_t pid;
242 struct audit_cap_data cap; 242 struct audit_cap_data cap;
243 } capset; 243 } capset;
244 struct {
245 int fd;
246 int flags;
247 } mmap;
244 }; 248 };
245 int fds[2]; 249 int fds[2];
246 250
@@ -1305,6 +1309,10 @@ static void show_special(struct audit_context *context, int *call_panic)
1305 audit_log_cap(ab, "cap_pp", &context->capset.cap.permitted); 1309 audit_log_cap(ab, "cap_pp", &context->capset.cap.permitted);
1306 audit_log_cap(ab, "cap_pe", &context->capset.cap.effective); 1310 audit_log_cap(ab, "cap_pe", &context->capset.cap.effective);
1307 break; } 1311 break; }
1312 case AUDIT_MMAP: {
1313 audit_log_format(ab, "fd=%d flags=0x%x", context->mmap.fd,
1314 context->mmap.flags);
1315 break; }
1308 } 1316 }
1309 audit_log_end(ab); 1317 audit_log_end(ab);
1310} 1318}
@@ -2476,6 +2484,14 @@ void __audit_log_capset(pid_t pid,
2476 context->type = AUDIT_CAPSET; 2484 context->type = AUDIT_CAPSET;
2477} 2485}
2478 2486
2487void __audit_mmap_fd(int fd, int flags)
2488{
2489 struct audit_context *context = current->audit_context;
2490 context->mmap.fd = fd;
2491 context->mmap.flags = flags;
2492 context->type = AUDIT_MMAP;
2493}
2494
2479/** 2495/**
2480 * audit_core_dumps - record information about processes that end abnormally 2496 * audit_core_dumps - record information about processes that end abnormally
2481 * @signr: signal value 2497 * @signr: signal value
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 9270d532ec3c..66a416b42c18 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -243,6 +243,11 @@ static int notify_on_release(const struct cgroup *cgrp)
243 return test_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags); 243 return test_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
244} 244}
245 245
246static int clone_children(const struct cgroup *cgrp)
247{
248 return test_bit(CGRP_CLONE_CHILDREN, &cgrp->flags);
249}
250
246/* 251/*
247 * for_each_subsys() allows you to iterate on each subsystem attached to 252 * for_each_subsys() allows you to iterate on each subsystem attached to
248 * an active hierarchy 253 * an active hierarchy
@@ -1040,6 +1045,8 @@ static int cgroup_show_options(struct seq_file *seq, struct vfsmount *vfs)
1040 seq_puts(seq, ",noprefix"); 1045 seq_puts(seq, ",noprefix");
1041 if (strlen(root->release_agent_path)) 1046 if (strlen(root->release_agent_path))
1042 seq_printf(seq, ",release_agent=%s", root->release_agent_path); 1047 seq_printf(seq, ",release_agent=%s", root->release_agent_path);
1048 if (clone_children(&root->top_cgroup))
1049 seq_puts(seq, ",clone_children");
1043 if (strlen(root->name)) 1050 if (strlen(root->name))
1044 seq_printf(seq, ",name=%s", root->name); 1051 seq_printf(seq, ",name=%s", root->name);
1045 mutex_unlock(&cgroup_mutex); 1052 mutex_unlock(&cgroup_mutex);
@@ -1050,6 +1057,7 @@ struct cgroup_sb_opts {
1050 unsigned long subsys_bits; 1057 unsigned long subsys_bits;
1051 unsigned long flags; 1058 unsigned long flags;
1052 char *release_agent; 1059 char *release_agent;
1060 bool clone_children;
1053 char *name; 1061 char *name;
1054 /* User explicitly requested empty subsystem */ 1062 /* User explicitly requested empty subsystem */
1055 bool none; 1063 bool none;
@@ -1066,7 +1074,8 @@ struct cgroup_sb_opts {
1066 */ 1074 */
1067static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts) 1075static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts)
1068{ 1076{
1069 char *token, *o = data ?: "all"; 1077 char *token, *o = data;
1078 bool all_ss = false, one_ss = false;
1070 unsigned long mask = (unsigned long)-1; 1079 unsigned long mask = (unsigned long)-1;
1071 int i; 1080 int i;
1072 bool module_pin_failed = false; 1081 bool module_pin_failed = false;
@@ -1082,22 +1091,27 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts)
1082 while ((token = strsep(&o, ",")) != NULL) { 1091 while ((token = strsep(&o, ",")) != NULL) {
1083 if (!*token) 1092 if (!*token)
1084 return -EINVAL; 1093 return -EINVAL;
1085 if (!strcmp(token, "all")) { 1094 if (!strcmp(token, "none")) {
1086 /* Add all non-disabled subsystems */
1087 opts->subsys_bits = 0;
1088 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
1089 struct cgroup_subsys *ss = subsys[i];
1090 if (ss == NULL)
1091 continue;
1092 if (!ss->disabled)
1093 opts->subsys_bits |= 1ul << i;
1094 }
1095 } else if (!strcmp(token, "none")) {
1096 /* Explicitly have no subsystems */ 1095 /* Explicitly have no subsystems */
1097 opts->none = true; 1096 opts->none = true;
1098 } else if (!strcmp(token, "noprefix")) { 1097 continue;
1098 }
1099 if (!strcmp(token, "all")) {
1100 /* Mutually exclusive option 'all' + subsystem name */
1101 if (one_ss)
1102 return -EINVAL;
1103 all_ss = true;
1104 continue;
1105 }
1106 if (!strcmp(token, "noprefix")) {
1099 set_bit(ROOT_NOPREFIX, &opts->flags); 1107 set_bit(ROOT_NOPREFIX, &opts->flags);
1100 } else if (!strncmp(token, "release_agent=", 14)) { 1108 continue;
1109 }
1110 if (!strcmp(token, "clone_children")) {
1111 opts->clone_children = true;
1112 continue;
1113 }
1114 if (!strncmp(token, "release_agent=", 14)) {
1101 /* Specifying two release agents is forbidden */ 1115 /* Specifying two release agents is forbidden */
1102 if (opts->release_agent) 1116 if (opts->release_agent)
1103 return -EINVAL; 1117 return -EINVAL;
@@ -1105,7 +1119,9 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts)
1105 kstrndup(token + 14, PATH_MAX - 1, GFP_KERNEL); 1119 kstrndup(token + 14, PATH_MAX - 1, GFP_KERNEL);
1106 if (!opts->release_agent) 1120 if (!opts->release_agent)
1107 return -ENOMEM; 1121 return -ENOMEM;
1108 } else if (!strncmp(token, "name=", 5)) { 1122 continue;
1123 }
1124 if (!strncmp(token, "name=", 5)) {
1109 const char *name = token + 5; 1125 const char *name = token + 5;
1110 /* Can't specify an empty name */ 1126 /* Can't specify an empty name */
1111 if (!strlen(name)) 1127 if (!strlen(name))
@@ -1127,20 +1143,44 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts)
1127 GFP_KERNEL); 1143 GFP_KERNEL);
1128 if (!opts->name) 1144 if (!opts->name)
1129 return -ENOMEM; 1145 return -ENOMEM;
1130 } else { 1146
1131 struct cgroup_subsys *ss; 1147 continue;
1132 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { 1148 }
1133 ss = subsys[i]; 1149
1134 if (ss == NULL) 1150 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
1135 continue; 1151 struct cgroup_subsys *ss = subsys[i];
1136 if (!strcmp(token, ss->name)) { 1152 if (ss == NULL)
1137 if (!ss->disabled) 1153 continue;
1138 set_bit(i, &opts->subsys_bits); 1154 if (strcmp(token, ss->name))
1139 break; 1155 continue;
1140 } 1156 if (ss->disabled)
1141 } 1157 continue;
1142 if (i == CGROUP_SUBSYS_COUNT) 1158
1143 return -ENOENT; 1159 /* Mutually exclusive option 'all' + subsystem name */
1160 if (all_ss)
1161 return -EINVAL;
1162 set_bit(i, &opts->subsys_bits);
1163 one_ss = true;
1164
1165 break;
1166 }
1167 if (i == CGROUP_SUBSYS_COUNT)
1168 return -ENOENT;
1169 }
1170
1171 /*
1172 * If the 'all' option was specified select all the subsystems,
1173 * otherwise 'all, 'none' and a subsystem name options were not
1174 * specified, let's default to 'all'
1175 */
1176 if (all_ss || (!all_ss && !one_ss && !opts->none)) {
1177 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
1178 struct cgroup_subsys *ss = subsys[i];
1179 if (ss == NULL)
1180 continue;
1181 if (ss->disabled)
1182 continue;
1183 set_bit(i, &opts->subsys_bits);
1144 } 1184 }
1145 } 1185 }
1146 1186
@@ -1355,6 +1395,8 @@ static struct cgroupfs_root *cgroup_root_from_opts(struct cgroup_sb_opts *opts)
1355 strcpy(root->release_agent_path, opts->release_agent); 1395 strcpy(root->release_agent_path, opts->release_agent);
1356 if (opts->name) 1396 if (opts->name)
1357 strcpy(root->name, opts->name); 1397 strcpy(root->name, opts->name);
1398 if (opts->clone_children)
1399 set_bit(CGRP_CLONE_CHILDREN, &root->top_cgroup.flags);
1358 return root; 1400 return root;
1359} 1401}
1360 1402
@@ -1418,9 +1460,9 @@ static int cgroup_get_rootdir(struct super_block *sb)
1418 return 0; 1460 return 0;
1419} 1461}
1420 1462
1421static int cgroup_get_sb(struct file_system_type *fs_type, 1463static struct dentry *cgroup_mount(struct file_system_type *fs_type,
1422 int flags, const char *unused_dev_name, 1464 int flags, const char *unused_dev_name,
1423 void *data, struct vfsmount *mnt) 1465 void *data)
1424{ 1466{
1425 struct cgroup_sb_opts opts; 1467 struct cgroup_sb_opts opts;
1426 struct cgroupfs_root *root; 1468 struct cgroupfs_root *root;
@@ -1554,10 +1596,9 @@ static int cgroup_get_sb(struct file_system_type *fs_type,
1554 drop_parsed_module_refcounts(opts.subsys_bits); 1596 drop_parsed_module_refcounts(opts.subsys_bits);
1555 } 1597 }
1556 1598
1557 simple_set_mnt(mnt, sb);
1558 kfree(opts.release_agent); 1599 kfree(opts.release_agent);
1559 kfree(opts.name); 1600 kfree(opts.name);
1560 return 0; 1601 return dget(sb->s_root);
1561 1602
1562 drop_new_super: 1603 drop_new_super:
1563 deactivate_locked_super(sb); 1604 deactivate_locked_super(sb);
@@ -1566,7 +1607,7 @@ static int cgroup_get_sb(struct file_system_type *fs_type,
1566 out_err: 1607 out_err:
1567 kfree(opts.release_agent); 1608 kfree(opts.release_agent);
1568 kfree(opts.name); 1609 kfree(opts.name);
1569 return ret; 1610 return ERR_PTR(ret);
1570} 1611}
1571 1612
1572static void cgroup_kill_sb(struct super_block *sb) { 1613static void cgroup_kill_sb(struct super_block *sb) {
@@ -1616,7 +1657,7 @@ static void cgroup_kill_sb(struct super_block *sb) {
1616 1657
1617static struct file_system_type cgroup_fs_type = { 1658static struct file_system_type cgroup_fs_type = {
1618 .name = "cgroup", 1659 .name = "cgroup",
1619 .get_sb = cgroup_get_sb, 1660 .mount = cgroup_mount,
1620 .kill_sb = cgroup_kill_sb, 1661 .kill_sb = cgroup_kill_sb,
1621}; 1662};
1622 1663
@@ -1880,6 +1921,8 @@ static int cgroup_release_agent_write(struct cgroup *cgrp, struct cftype *cft,
1880 const char *buffer) 1921 const char *buffer)
1881{ 1922{
1882 BUILD_BUG_ON(sizeof(cgrp->root->release_agent_path) < PATH_MAX); 1923 BUILD_BUG_ON(sizeof(cgrp->root->release_agent_path) < PATH_MAX);
1924 if (strlen(buffer) >= PATH_MAX)
1925 return -EINVAL;
1883 if (!cgroup_lock_live_group(cgrp)) 1926 if (!cgroup_lock_live_group(cgrp))
1884 return -ENODEV; 1927 return -ENODEV;
1885 strcpy(cgrp->root->release_agent_path, buffer); 1928 strcpy(cgrp->root->release_agent_path, buffer);
@@ -3173,6 +3216,23 @@ fail:
3173 return ret; 3216 return ret;
3174} 3217}
3175 3218
3219static u64 cgroup_clone_children_read(struct cgroup *cgrp,
3220 struct cftype *cft)
3221{
3222 return clone_children(cgrp);
3223}
3224
3225static int cgroup_clone_children_write(struct cgroup *cgrp,
3226 struct cftype *cft,
3227 u64 val)
3228{
3229 if (val)
3230 set_bit(CGRP_CLONE_CHILDREN, &cgrp->flags);
3231 else
3232 clear_bit(CGRP_CLONE_CHILDREN, &cgrp->flags);
3233 return 0;
3234}
3235
3176/* 3236/*
3177 * for the common functions, 'private' gives the type of file 3237 * for the common functions, 'private' gives the type of file
3178 */ 3238 */
@@ -3203,6 +3263,11 @@ static struct cftype files[] = {
3203 .write_string = cgroup_write_event_control, 3263 .write_string = cgroup_write_event_control,
3204 .mode = S_IWUGO, 3264 .mode = S_IWUGO,
3205 }, 3265 },
3266 {
3267 .name = "cgroup.clone_children",
3268 .read_u64 = cgroup_clone_children_read,
3269 .write_u64 = cgroup_clone_children_write,
3270 },
3206}; 3271};
3207 3272
3208static struct cftype cft_release_agent = { 3273static struct cftype cft_release_agent = {
@@ -3332,6 +3397,9 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
3332 if (notify_on_release(parent)) 3397 if (notify_on_release(parent))
3333 set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags); 3398 set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
3334 3399
3400 if (clone_children(parent))
3401 set_bit(CGRP_CLONE_CHILDREN, &cgrp->flags);
3402
3335 for_each_subsys(root, ss) { 3403 for_each_subsys(root, ss) {
3336 struct cgroup_subsys_state *css = ss->create(ss, cgrp); 3404 struct cgroup_subsys_state *css = ss->create(ss, cgrp);
3337 3405
@@ -3346,6 +3414,8 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
3346 goto err_destroy; 3414 goto err_destroy;
3347 } 3415 }
3348 /* At error, ->destroy() callback has to free assigned ID. */ 3416 /* At error, ->destroy() callback has to free assigned ID. */
3417 if (clone_children(parent) && ss->post_clone)
3418 ss->post_clone(ss, cgrp);
3349 } 3419 }
3350 3420
3351 cgroup_lock_hierarchy(root); 3421 cgroup_lock_hierarchy(root);
diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c
index ce71ed53e88f..e7bebb7c6c38 100644
--- a/kernel/cgroup_freezer.c
+++ b/kernel/cgroup_freezer.c
@@ -48,20 +48,19 @@ static inline struct freezer *task_freezer(struct task_struct *task)
48 struct freezer, css); 48 struct freezer, css);
49} 49}
50 50
51int cgroup_freezing_or_frozen(struct task_struct *task) 51static inline int __cgroup_freezing_or_frozen(struct task_struct *task)
52{ 52{
53 struct freezer *freezer; 53 enum freezer_state state = task_freezer(task)->state;
54 enum freezer_state state; 54 return (state == CGROUP_FREEZING) || (state == CGROUP_FROZEN);
55}
55 56
57int cgroup_freezing_or_frozen(struct task_struct *task)
58{
59 int result;
56 task_lock(task); 60 task_lock(task);
57 freezer = task_freezer(task); 61 result = __cgroup_freezing_or_frozen(task);
58 if (!freezer->css.cgroup->parent)
59 state = CGROUP_THAWED; /* root cgroup can't be frozen */
60 else
61 state = freezer->state;
62 task_unlock(task); 62 task_unlock(task);
63 63 return result;
64 return (state == CGROUP_FREEZING) || (state == CGROUP_FROZEN);
65} 64}
66 65
67/* 66/*
@@ -154,13 +153,6 @@ static void freezer_destroy(struct cgroup_subsys *ss,
154 kfree(cgroup_freezer(cgroup)); 153 kfree(cgroup_freezer(cgroup));
155} 154}
156 155
157/* Task is frozen or will freeze immediately when next it gets woken */
158static bool is_task_frozen_enough(struct task_struct *task)
159{
160 return frozen(task) ||
161 (task_is_stopped_or_traced(task) && freezing(task));
162}
163
164/* 156/*
165 * The call to cgroup_lock() in the freezer.state write method prevents 157 * The call to cgroup_lock() in the freezer.state write method prevents
166 * a write to that file racing against an attach, and hence the 158 * a write to that file racing against an attach, and hence the
@@ -174,24 +166,25 @@ static int freezer_can_attach(struct cgroup_subsys *ss,
174 166
175 /* 167 /*
176 * Anything frozen can't move or be moved to/from. 168 * Anything frozen can't move or be moved to/from.
177 *
178 * Since orig_freezer->state == FROZEN means that @task has been
179 * frozen, so it's sufficient to check the latter condition.
180 */ 169 */
181 170
182 if (is_task_frozen_enough(task)) 171 freezer = cgroup_freezer(new_cgroup);
172 if (freezer->state != CGROUP_THAWED)
183 return -EBUSY; 173 return -EBUSY;
184 174
185 freezer = cgroup_freezer(new_cgroup); 175 rcu_read_lock();
186 if (freezer->state == CGROUP_FROZEN) 176 if (__cgroup_freezing_or_frozen(task)) {
177 rcu_read_unlock();
187 return -EBUSY; 178 return -EBUSY;
179 }
180 rcu_read_unlock();
188 181
189 if (threadgroup) { 182 if (threadgroup) {
190 struct task_struct *c; 183 struct task_struct *c;
191 184
192 rcu_read_lock(); 185 rcu_read_lock();
193 list_for_each_entry_rcu(c, &task->thread_group, thread_group) { 186 list_for_each_entry_rcu(c, &task->thread_group, thread_group) {
194 if (is_task_frozen_enough(c)) { 187 if (__cgroup_freezing_or_frozen(c)) {
195 rcu_read_unlock(); 188 rcu_read_unlock();
196 return -EBUSY; 189 return -EBUSY;
197 } 190 }
@@ -236,31 +229,30 @@ static void freezer_fork(struct cgroup_subsys *ss, struct task_struct *task)
236/* 229/*
237 * caller must hold freezer->lock 230 * caller must hold freezer->lock
238 */ 231 */
239static void update_freezer_state(struct cgroup *cgroup, 232static void update_if_frozen(struct cgroup *cgroup,
240 struct freezer *freezer) 233 struct freezer *freezer)
241{ 234{
242 struct cgroup_iter it; 235 struct cgroup_iter it;
243 struct task_struct *task; 236 struct task_struct *task;
244 unsigned int nfrozen = 0, ntotal = 0; 237 unsigned int nfrozen = 0, ntotal = 0;
238 enum freezer_state old_state = freezer->state;
245 239
246 cgroup_iter_start(cgroup, &it); 240 cgroup_iter_start(cgroup, &it);
247 while ((task = cgroup_iter_next(cgroup, &it))) { 241 while ((task = cgroup_iter_next(cgroup, &it))) {
248 ntotal++; 242 ntotal++;
249 if (is_task_frozen_enough(task)) 243 if (frozen(task))
250 nfrozen++; 244 nfrozen++;
251 } 245 }
252 246
253 /* 247 if (old_state == CGROUP_THAWED) {
254 * Transition to FROZEN when no new tasks can be added ensures 248 BUG_ON(nfrozen > 0);
255 * that we never exist in the FROZEN state while there are unfrozen 249 } else if (old_state == CGROUP_FREEZING) {
256 * tasks. 250 if (nfrozen == ntotal)
257 */ 251 freezer->state = CGROUP_FROZEN;
258 if (nfrozen == ntotal) 252 } else { /* old_state == CGROUP_FROZEN */
259 freezer->state = CGROUP_FROZEN; 253 BUG_ON(nfrozen != ntotal);
260 else if (nfrozen > 0) 254 }
261 freezer->state = CGROUP_FREEZING; 255
262 else
263 freezer->state = CGROUP_THAWED;
264 cgroup_iter_end(cgroup, &it); 256 cgroup_iter_end(cgroup, &it);
265} 257}
266 258
@@ -279,7 +271,7 @@ static int freezer_read(struct cgroup *cgroup, struct cftype *cft,
279 if (state == CGROUP_FREEZING) { 271 if (state == CGROUP_FREEZING) {
280 /* We change from FREEZING to FROZEN lazily if the cgroup was 272 /* We change from FREEZING to FROZEN lazily if the cgroup was
281 * only partially frozen when we exitted write. */ 273 * only partially frozen when we exitted write. */
282 update_freezer_state(cgroup, freezer); 274 update_if_frozen(cgroup, freezer);
283 state = freezer->state; 275 state = freezer->state;
284 } 276 }
285 spin_unlock_irq(&freezer->lock); 277 spin_unlock_irq(&freezer->lock);
@@ -301,7 +293,7 @@ static int try_to_freeze_cgroup(struct cgroup *cgroup, struct freezer *freezer)
301 while ((task = cgroup_iter_next(cgroup, &it))) { 293 while ((task = cgroup_iter_next(cgroup, &it))) {
302 if (!freeze_task(task, true)) 294 if (!freeze_task(task, true))
303 continue; 295 continue;
304 if (is_task_frozen_enough(task)) 296 if (frozen(task))
305 continue; 297 continue;
306 if (!freezing(task) && !freezer_should_skip(task)) 298 if (!freezing(task) && !freezer_should_skip(task))
307 num_cant_freeze_now++; 299 num_cant_freeze_now++;
@@ -335,7 +327,7 @@ static int freezer_change_state(struct cgroup *cgroup,
335 327
336 spin_lock_irq(&freezer->lock); 328 spin_lock_irq(&freezer->lock);
337 329
338 update_freezer_state(cgroup, freezer); 330 update_if_frozen(cgroup, freezer);
339 if (goal_state == freezer->state) 331 if (goal_state == freezer->state)
340 goto out; 332 goto out;
341 333
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 51b143e2a07a..4349935c2ad8 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -231,18 +231,17 @@ static DEFINE_SPINLOCK(cpuset_buffer_lock);
231 * users. If someone tries to mount the "cpuset" filesystem, we 231 * users. If someone tries to mount the "cpuset" filesystem, we
232 * silently switch it to mount "cgroup" instead 232 * silently switch it to mount "cgroup" instead
233 */ 233 */
234static int cpuset_get_sb(struct file_system_type *fs_type, 234static struct dentry *cpuset_mount(struct file_system_type *fs_type,
235 int flags, const char *unused_dev_name, 235 int flags, const char *unused_dev_name, void *data)
236 void *data, struct vfsmount *mnt)
237{ 236{
238 struct file_system_type *cgroup_fs = get_fs_type("cgroup"); 237 struct file_system_type *cgroup_fs = get_fs_type("cgroup");
239 int ret = -ENODEV; 238 struct dentry *ret = ERR_PTR(-ENODEV);
240 if (cgroup_fs) { 239 if (cgroup_fs) {
241 char mountopts[] = 240 char mountopts[] =
242 "cpuset,noprefix," 241 "cpuset,noprefix,"
243 "release_agent=/sbin/cpuset_release_agent"; 242 "release_agent=/sbin/cpuset_release_agent";
244 ret = cgroup_fs->get_sb(cgroup_fs, flags, 243 ret = cgroup_fs->mount(cgroup_fs, flags,
245 unused_dev_name, mountopts, mnt); 244 unused_dev_name, mountopts);
246 put_filesystem(cgroup_fs); 245 put_filesystem(cgroup_fs);
247 } 246 }
248 return ret; 247 return ret;
@@ -250,7 +249,7 @@ static int cpuset_get_sb(struct file_system_type *fs_type,
250 249
251static struct file_system_type cpuset_fs_type = { 250static struct file_system_type cpuset_fs_type = {
252 .name = "cpuset", 251 .name = "cpuset",
253 .get_sb = cpuset_get_sb, 252 .mount = cpuset_mount,
254}; 253};
255 254
256/* 255/*
diff --git a/kernel/cred.c b/kernel/cred.c
index 9a3e22641fe7..6a1aa004e376 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -325,7 +325,7 @@ EXPORT_SYMBOL(prepare_creds);
325 325
326/* 326/*
327 * Prepare credentials for current to perform an execve() 327 * Prepare credentials for current to perform an execve()
328 * - The caller must hold current->cred_guard_mutex 328 * - The caller must hold ->cred_guard_mutex
329 */ 329 */
330struct cred *prepare_exec_creds(void) 330struct cred *prepare_exec_creds(void)
331{ 331{
@@ -384,8 +384,6 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags)
384 struct cred *new; 384 struct cred *new;
385 int ret; 385 int ret;
386 386
387 mutex_init(&p->cred_guard_mutex);
388
389 if ( 387 if (
390#ifdef CONFIG_KEYS 388#ifdef CONFIG_KEYS
391 !p->cred->thread_keyring && 389 !p->cred->thread_keyring &&
diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c
index fec596da9bd0..cefd4a11f6d9 100644
--- a/kernel/debug/debug_core.c
+++ b/kernel/debug/debug_core.c
@@ -209,18 +209,6 @@ int __weak kgdb_skipexception(int exception, struct pt_regs *regs)
209 return 0; 209 return 0;
210} 210}
211 211
212/**
213 * kgdb_disable_hw_debug - Disable hardware debugging while we in kgdb.
214 * @regs: Current &struct pt_regs.
215 *
216 * This function will be called if the particular architecture must
217 * disable hardware debugging while it is processing gdb packets or
218 * handling exception.
219 */
220void __weak kgdb_disable_hw_debug(struct pt_regs *regs)
221{
222}
223
224/* 212/*
225 * Some architectures need cache flushes when we set/clear a 213 * Some architectures need cache flushes when we set/clear a
226 * breakpoint: 214 * breakpoint:
@@ -484,7 +472,9 @@ static int kgdb_cpu_enter(struct kgdb_state *ks, struct pt_regs *regs,
484 atomic_inc(&masters_in_kgdb); 472 atomic_inc(&masters_in_kgdb);
485 else 473 else
486 atomic_inc(&slaves_in_kgdb); 474 atomic_inc(&slaves_in_kgdb);
487 kgdb_disable_hw_debug(ks->linux_regs); 475
476 if (arch_kgdb_ops.disable_hw_break)
477 arch_kgdb_ops.disable_hw_break(regs);
488 478
489acquirelock: 479acquirelock:
490 /* 480 /*
diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c
index d7bda21a106b..37755d621924 100644
--- a/kernel/debug/kdb/kdb_main.c
+++ b/kernel/debug/kdb/kdb_main.c
@@ -1127,7 +1127,7 @@ static int kdb_local(kdb_reason_t reason, int error, struct pt_regs *regs,
1127 /* special case below */ 1127 /* special case below */
1128 } else { 1128 } else {
1129 kdb_printf("\nEntering kdb (current=0x%p, pid %d) ", 1129 kdb_printf("\nEntering kdb (current=0x%p, pid %d) ",
1130 kdb_current, kdb_current->pid); 1130 kdb_current, kdb_current ? kdb_current->pid : 0);
1131#if defined(CONFIG_SMP) 1131#if defined(CONFIG_SMP)
1132 kdb_printf("on processor %d ", raw_smp_processor_id()); 1132 kdb_printf("on processor %d ", raw_smp_processor_id());
1133#endif 1133#endif
@@ -2603,20 +2603,17 @@ static int kdb_summary(int argc, const char **argv)
2603 */ 2603 */
2604static int kdb_per_cpu(int argc, const char **argv) 2604static int kdb_per_cpu(int argc, const char **argv)
2605{ 2605{
2606 char buf[256], fmtstr[64]; 2606 char fmtstr[64];
2607 kdb_symtab_t symtab; 2607 int cpu, diag, nextarg = 1;
2608 cpumask_t suppress = CPU_MASK_NONE; 2608 unsigned long addr, symaddr, val, bytesperword = 0, whichcpu = ~0UL;
2609 int cpu, diag;
2610 unsigned long addr, val, bytesperword = 0, whichcpu = ~0UL;
2611 2609
2612 if (argc < 1 || argc > 3) 2610 if (argc < 1 || argc > 3)
2613 return KDB_ARGCOUNT; 2611 return KDB_ARGCOUNT;
2614 2612
2615 snprintf(buf, sizeof(buf), "per_cpu__%s", argv[1]); 2613 diag = kdbgetaddrarg(argc, argv, &nextarg, &symaddr, NULL, NULL);
2616 if (!kdbgetsymval(buf, &symtab)) { 2614 if (diag)
2617 kdb_printf("%s is not a per_cpu variable\n", argv[1]); 2615 return diag;
2618 return KDB_BADADDR; 2616
2619 }
2620 if (argc >= 2) { 2617 if (argc >= 2) {
2621 diag = kdbgetularg(argv[2], &bytesperword); 2618 diag = kdbgetularg(argv[2], &bytesperword);
2622 if (diag) 2619 if (diag)
@@ -2649,46 +2646,25 @@ static int kdb_per_cpu(int argc, const char **argv)
2649#define KDB_PCU(cpu) 0 2646#define KDB_PCU(cpu) 0
2650#endif 2647#endif
2651#endif 2648#endif
2652
2653 for_each_online_cpu(cpu) { 2649 for_each_online_cpu(cpu) {
2650 if (KDB_FLAG(CMD_INTERRUPT))
2651 return 0;
2652
2654 if (whichcpu != ~0UL && whichcpu != cpu) 2653 if (whichcpu != ~0UL && whichcpu != cpu)
2655 continue; 2654 continue;
2656 addr = symtab.sym_start + KDB_PCU(cpu); 2655 addr = symaddr + KDB_PCU(cpu);
2657 diag = kdb_getword(&val, addr, bytesperword); 2656 diag = kdb_getword(&val, addr, bytesperword);
2658 if (diag) { 2657 if (diag) {
2659 kdb_printf("%5d " kdb_bfd_vma_fmt0 " - unable to " 2658 kdb_printf("%5d " kdb_bfd_vma_fmt0 " - unable to "
2660 "read, diag=%d\n", cpu, addr, diag); 2659 "read, diag=%d\n", cpu, addr, diag);
2661 continue; 2660 continue;
2662 } 2661 }
2663#ifdef CONFIG_SMP
2664 if (!val) {
2665 cpu_set(cpu, suppress);
2666 continue;
2667 }
2668#endif /* CONFIG_SMP */
2669 kdb_printf("%5d ", cpu); 2662 kdb_printf("%5d ", cpu);
2670 kdb_md_line(fmtstr, addr, 2663 kdb_md_line(fmtstr, addr,
2671 bytesperword == KDB_WORD_SIZE, 2664 bytesperword == KDB_WORD_SIZE,
2672 1, bytesperword, 1, 1, 0); 2665 1, bytesperword, 1, 1, 0);
2673 } 2666 }
2674 if (cpus_weight(suppress) == 0)
2675 return 0;
2676 kdb_printf("Zero suppressed cpu(s):");
2677 for (cpu = first_cpu(suppress); cpu < num_possible_cpus();
2678 cpu = next_cpu(cpu, suppress)) {
2679 kdb_printf(" %d", cpu);
2680 if (cpu == num_possible_cpus() - 1 ||
2681 next_cpu(cpu, suppress) != cpu + 1)
2682 continue;
2683 while (cpu < num_possible_cpus() &&
2684 next_cpu(cpu, suppress) == cpu + 1)
2685 ++cpu;
2686 kdb_printf("-%d", cpu);
2687 }
2688 kdb_printf("\n");
2689
2690#undef KDB_PCU 2667#undef KDB_PCU
2691
2692 return 0; 2668 return 0;
2693} 2669}
2694 2670
diff --git a/kernel/exit.c b/kernel/exit.c
index 894179a32ec1..21aa7b3001fb 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -96,6 +96,14 @@ static void __exit_signal(struct task_struct *tsk)
96 sig->tty = NULL; 96 sig->tty = NULL;
97 } else { 97 } else {
98 /* 98 /*
99 * This can only happen if the caller is de_thread().
100 * FIXME: this is the temporary hack, we should teach
101 * posix-cpu-timers to handle this case correctly.
102 */
103 if (unlikely(has_group_leader_pid(tsk)))
104 posix_cpu_timers_exit_group(tsk);
105
106 /*
99 * If there is any task waiting for the group exit 107 * If there is any task waiting for the group exit
100 * then notify it: 108 * then notify it:
101 */ 109 */
@@ -703,6 +711,8 @@ static void exit_mm(struct task_struct * tsk)
703 * space. 711 * space.
704 */ 712 */
705static struct task_struct *find_new_reaper(struct task_struct *father) 713static struct task_struct *find_new_reaper(struct task_struct *father)
714 __releases(&tasklist_lock)
715 __acquires(&tasklist_lock)
706{ 716{
707 struct pid_namespace *pid_ns = task_active_pid_ns(father); 717 struct pid_namespace *pid_ns = task_active_pid_ns(father);
708 struct task_struct *thread; 718 struct task_struct *thread;
diff --git a/kernel/fork.c b/kernel/fork.c
index e87aaaaf5131..3b159c5991b7 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -908,6 +908,8 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
908 sig->oom_adj = current->signal->oom_adj; 908 sig->oom_adj = current->signal->oom_adj;
909 sig->oom_score_adj = current->signal->oom_score_adj; 909 sig->oom_score_adj = current->signal->oom_score_adj;
910 910
911 mutex_init(&sig->cred_guard_mutex);
912
911 return 0; 913 return 0;
912} 914}
913 915
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index 9d917ff72675..9988d03797f5 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -393,3 +393,18 @@ unsigned int kstat_irqs_cpu(unsigned int irq, int cpu)
393 struct irq_desc *desc = irq_to_desc(irq); 393 struct irq_desc *desc = irq_to_desc(irq);
394 return desc ? desc->kstat_irqs[cpu] : 0; 394 return desc ? desc->kstat_irqs[cpu] : 0;
395} 395}
396
397#ifdef CONFIG_GENERIC_HARDIRQS
398unsigned int kstat_irqs(unsigned int irq)
399{
400 struct irq_desc *desc = irq_to_desc(irq);
401 int cpu;
402 int sum = 0;
403
404 if (!desc)
405 return 0;
406 for_each_possible_cpu(cpu)
407 sum += desc->kstat_irqs[cpu];
408 return sum;
409}
410#endif /* CONFIG_GENERIC_HARDIRQS */
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 644e8d5fa367..5f92acc5f952 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -324,6 +324,10 @@ void enable_irq(unsigned int irq)
324 if (!desc) 324 if (!desc)
325 return; 325 return;
326 326
327 if (WARN(!desc->irq_data.chip || !desc->irq_data.chip->irq_enable,
328 KERN_ERR "enable_irq before setup/request_irq: irq %u\n", irq))
329 return;
330
327 chip_bus_lock(desc); 331 chip_bus_lock(desc);
328 raw_spin_lock_irqsave(&desc->lock, flags); 332 raw_spin_lock_irqsave(&desc->lock, flags);
329 __enable_irq(desc, irq, false); 333 __enable_irq(desc, irq, false);
diff --git a/kernel/jump_label.c b/kernel/jump_label.c
index 7be868bf25c6..3b79bd938330 100644
--- a/kernel/jump_label.c
+++ b/kernel/jump_label.c
@@ -39,6 +39,16 @@ struct jump_label_module_entry {
39 struct module *mod; 39 struct module *mod;
40}; 40};
41 41
42void jump_label_lock(void)
43{
44 mutex_lock(&jump_label_mutex);
45}
46
47void jump_label_unlock(void)
48{
49 mutex_unlock(&jump_label_mutex);
50}
51
42static int jump_label_cmp(const void *a, const void *b) 52static int jump_label_cmp(const void *a, const void *b)
43{ 53{
44 const struct jump_entry *jea = a; 54 const struct jump_entry *jea = a;
@@ -152,7 +162,7 @@ void jump_label_update(unsigned long key, enum jump_label_type type)
152 struct jump_label_module_entry *e_module; 162 struct jump_label_module_entry *e_module;
153 int count; 163 int count;
154 164
155 mutex_lock(&jump_label_mutex); 165 jump_label_lock();
156 entry = get_jump_label_entry((jump_label_t)key); 166 entry = get_jump_label_entry((jump_label_t)key);
157 if (entry) { 167 if (entry) {
158 count = entry->nr_entries; 168 count = entry->nr_entries;
@@ -168,13 +178,14 @@ void jump_label_update(unsigned long key, enum jump_label_type type)
168 count = e_module->nr_entries; 178 count = e_module->nr_entries;
169 iter = e_module->table; 179 iter = e_module->table;
170 while (count--) { 180 while (count--) {
171 if (kernel_text_address(iter->code)) 181 if (iter->key &&
182 kernel_text_address(iter->code))
172 arch_jump_label_transform(iter, type); 183 arch_jump_label_transform(iter, type);
173 iter++; 184 iter++;
174 } 185 }
175 } 186 }
176 } 187 }
177 mutex_unlock(&jump_label_mutex); 188 jump_label_unlock();
178} 189}
179 190
180static int addr_conflict(struct jump_entry *entry, void *start, void *end) 191static int addr_conflict(struct jump_entry *entry, void *start, void *end)
@@ -231,6 +242,7 @@ out:
231 * overlaps with any of the jump label patch addresses. Code 242 * overlaps with any of the jump label patch addresses. Code
232 * that wants to modify kernel text should first verify that 243 * that wants to modify kernel text should first verify that
233 * it does not overlap with any of the jump label addresses. 244 * it does not overlap with any of the jump label addresses.
245 * Caller must hold jump_label_mutex.
234 * 246 *
235 * returns 1 if there is an overlap, 0 otherwise 247 * returns 1 if there is an overlap, 0 otherwise
236 */ 248 */
@@ -241,7 +253,6 @@ int jump_label_text_reserved(void *start, void *end)
241 struct jump_entry *iter_stop = __start___jump_table; 253 struct jump_entry *iter_stop = __start___jump_table;
242 int conflict = 0; 254 int conflict = 0;
243 255
244 mutex_lock(&jump_label_mutex);
245 iter = iter_start; 256 iter = iter_start;
246 while (iter < iter_stop) { 257 while (iter < iter_stop) {
247 if (addr_conflict(iter, start, end)) { 258 if (addr_conflict(iter, start, end)) {
@@ -256,10 +267,16 @@ int jump_label_text_reserved(void *start, void *end)
256 conflict = module_conflict(start, end); 267 conflict = module_conflict(start, end);
257#endif 268#endif
258out: 269out:
259 mutex_unlock(&jump_label_mutex);
260 return conflict; 270 return conflict;
261} 271}
262 272
273/*
274 * Not all archs need this.
275 */
276void __weak arch_jump_label_text_poke_early(jump_label_t addr)
277{
278}
279
263static __init int init_jump_label(void) 280static __init int init_jump_label(void)
264{ 281{
265 int ret; 282 int ret;
@@ -267,7 +284,7 @@ static __init int init_jump_label(void)
267 struct jump_entry *iter_stop = __stop___jump_table; 284 struct jump_entry *iter_stop = __stop___jump_table;
268 struct jump_entry *iter; 285 struct jump_entry *iter;
269 286
270 mutex_lock(&jump_label_mutex); 287 jump_label_lock();
271 ret = build_jump_label_hashtable(__start___jump_table, 288 ret = build_jump_label_hashtable(__start___jump_table,
272 __stop___jump_table); 289 __stop___jump_table);
273 iter = iter_start; 290 iter = iter_start;
@@ -275,7 +292,7 @@ static __init int init_jump_label(void)
275 arch_jump_label_text_poke_early(iter->code); 292 arch_jump_label_text_poke_early(iter->code);
276 iter++; 293 iter++;
277 } 294 }
278 mutex_unlock(&jump_label_mutex); 295 jump_label_unlock();
279 return ret; 296 return ret;
280} 297}
281early_initcall(init_jump_label); 298early_initcall(init_jump_label);
@@ -366,6 +383,39 @@ static void remove_jump_label_module(struct module *mod)
366 } 383 }
367} 384}
368 385
386static void remove_jump_label_module_init(struct module *mod)
387{
388 struct hlist_head *head;
389 struct hlist_node *node, *node_next, *module_node, *module_node_next;
390 struct jump_label_entry *e;
391 struct jump_label_module_entry *e_module;
392 struct jump_entry *iter;
393 int i, count;
394
395 /* if the module doesn't have jump label entries, just return */
396 if (!mod->num_jump_entries)
397 return;
398
399 for (i = 0; i < JUMP_LABEL_TABLE_SIZE; i++) {
400 head = &jump_label_table[i];
401 hlist_for_each_entry_safe(e, node, node_next, head, hlist) {
402 hlist_for_each_entry_safe(e_module, module_node,
403 module_node_next,
404 &(e->modules), hlist) {
405 if (e_module->mod != mod)
406 continue;
407 count = e_module->nr_entries;
408 iter = e_module->table;
409 while (count--) {
410 if (within_module_init(iter->code, mod))
411 iter->key = 0;
412 iter++;
413 }
414 }
415 }
416 }
417}
418
369static int 419static int
370jump_label_module_notify(struct notifier_block *self, unsigned long val, 420jump_label_module_notify(struct notifier_block *self, unsigned long val,
371 void *data) 421 void *data)
@@ -375,16 +425,21 @@ jump_label_module_notify(struct notifier_block *self, unsigned long val,
375 425
376 switch (val) { 426 switch (val) {
377 case MODULE_STATE_COMING: 427 case MODULE_STATE_COMING:
378 mutex_lock(&jump_label_mutex); 428 jump_label_lock();
379 ret = add_jump_label_module(mod); 429 ret = add_jump_label_module(mod);
380 if (ret) 430 if (ret)
381 remove_jump_label_module(mod); 431 remove_jump_label_module(mod);
382 mutex_unlock(&jump_label_mutex); 432 jump_label_unlock();
383 break; 433 break;
384 case MODULE_STATE_GOING: 434 case MODULE_STATE_GOING:
385 mutex_lock(&jump_label_mutex); 435 jump_label_lock();
386 remove_jump_label_module(mod); 436 remove_jump_label_module(mod);
387 mutex_unlock(&jump_label_mutex); 437 jump_label_unlock();
438 break;
439 case MODULE_STATE_LIVE:
440 jump_label_lock();
441 remove_jump_label_module_init(mod);
442 jump_label_unlock();
388 break; 443 break;
389 } 444 }
390 return ret; 445 return ret;
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 56a891914273..9737a76e106f 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -74,7 +74,8 @@ static struct hlist_head kretprobe_inst_table[KPROBE_TABLE_SIZE];
74/* NOTE: change this value only with kprobe_mutex held */ 74/* NOTE: change this value only with kprobe_mutex held */
75static bool kprobes_all_disarmed; 75static bool kprobes_all_disarmed;
76 76
77static DEFINE_MUTEX(kprobe_mutex); /* Protects kprobe_table */ 77/* This protects kprobe_table and optimizing_list */
78static DEFINE_MUTEX(kprobe_mutex);
78static DEFINE_PER_CPU(struct kprobe *, kprobe_instance) = NULL; 79static DEFINE_PER_CPU(struct kprobe *, kprobe_instance) = NULL;
79static struct { 80static struct {
80 spinlock_t lock ____cacheline_aligned_in_smp; 81 spinlock_t lock ____cacheline_aligned_in_smp;
@@ -595,6 +596,7 @@ static __kprobes void try_to_optimize_kprobe(struct kprobe *p)
595} 596}
596 597
597#ifdef CONFIG_SYSCTL 598#ifdef CONFIG_SYSCTL
599/* This should be called with kprobe_mutex locked */
598static void __kprobes optimize_all_kprobes(void) 600static void __kprobes optimize_all_kprobes(void)
599{ 601{
600 struct hlist_head *head; 602 struct hlist_head *head;
@@ -607,17 +609,16 @@ static void __kprobes optimize_all_kprobes(void)
607 return; 609 return;
608 610
609 kprobes_allow_optimization = true; 611 kprobes_allow_optimization = true;
610 mutex_lock(&text_mutex);
611 for (i = 0; i < KPROBE_TABLE_SIZE; i++) { 612 for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
612 head = &kprobe_table[i]; 613 head = &kprobe_table[i];
613 hlist_for_each_entry_rcu(p, node, head, hlist) 614 hlist_for_each_entry_rcu(p, node, head, hlist)
614 if (!kprobe_disabled(p)) 615 if (!kprobe_disabled(p))
615 optimize_kprobe(p); 616 optimize_kprobe(p);
616 } 617 }
617 mutex_unlock(&text_mutex);
618 printk(KERN_INFO "Kprobes globally optimized\n"); 618 printk(KERN_INFO "Kprobes globally optimized\n");
619} 619}
620 620
621/* This should be called with kprobe_mutex locked */
621static void __kprobes unoptimize_all_kprobes(void) 622static void __kprobes unoptimize_all_kprobes(void)
622{ 623{
623 struct hlist_head *head; 624 struct hlist_head *head;
@@ -1144,14 +1145,13 @@ int __kprobes register_kprobe(struct kprobe *p)
1144 if (ret) 1145 if (ret)
1145 return ret; 1146 return ret;
1146 1147
1148 jump_label_lock();
1147 preempt_disable(); 1149 preempt_disable();
1148 if (!kernel_text_address((unsigned long) p->addr) || 1150 if (!kernel_text_address((unsigned long) p->addr) ||
1149 in_kprobes_functions((unsigned long) p->addr) || 1151 in_kprobes_functions((unsigned long) p->addr) ||
1150 ftrace_text_reserved(p->addr, p->addr) || 1152 ftrace_text_reserved(p->addr, p->addr) ||
1151 jump_label_text_reserved(p->addr, p->addr)) { 1153 jump_label_text_reserved(p->addr, p->addr))
1152 preempt_enable(); 1154 goto fail_with_jump_label;
1153 return -EINVAL;
1154 }
1155 1155
1156 /* User can pass only KPROBE_FLAG_DISABLED to register_kprobe */ 1156 /* User can pass only KPROBE_FLAG_DISABLED to register_kprobe */
1157 p->flags &= KPROBE_FLAG_DISABLED; 1157 p->flags &= KPROBE_FLAG_DISABLED;
@@ -1165,10 +1165,9 @@ int __kprobes register_kprobe(struct kprobe *p)
1165 * We must hold a refcount of the probed module while updating 1165 * We must hold a refcount of the probed module while updating
1166 * its code to prohibit unexpected unloading. 1166 * its code to prohibit unexpected unloading.
1167 */ 1167 */
1168 if (unlikely(!try_module_get(probed_mod))) { 1168 if (unlikely(!try_module_get(probed_mod)))
1169 preempt_enable(); 1169 goto fail_with_jump_label;
1170 return -EINVAL; 1170
1171 }
1172 /* 1171 /*
1173 * If the module freed .init.text, we couldn't insert 1172 * If the module freed .init.text, we couldn't insert
1174 * kprobes in there. 1173 * kprobes in there.
@@ -1176,16 +1175,18 @@ int __kprobes register_kprobe(struct kprobe *p)
1176 if (within_module_init((unsigned long)p->addr, probed_mod) && 1175 if (within_module_init((unsigned long)p->addr, probed_mod) &&
1177 probed_mod->state != MODULE_STATE_COMING) { 1176 probed_mod->state != MODULE_STATE_COMING) {
1178 module_put(probed_mod); 1177 module_put(probed_mod);
1179 preempt_enable(); 1178 goto fail_with_jump_label;
1180 return -EINVAL;
1181 } 1179 }
1182 } 1180 }
1183 preempt_enable(); 1181 preempt_enable();
1182 jump_label_unlock();
1184 1183
1185 p->nmissed = 0; 1184 p->nmissed = 0;
1186 INIT_LIST_HEAD(&p->list); 1185 INIT_LIST_HEAD(&p->list);
1187 mutex_lock(&kprobe_mutex); 1186 mutex_lock(&kprobe_mutex);
1188 1187
1188 jump_label_lock(); /* needed to call jump_label_text_reserved() */
1189
1189 get_online_cpus(); /* For avoiding text_mutex deadlock. */ 1190 get_online_cpus(); /* For avoiding text_mutex deadlock. */
1190 mutex_lock(&text_mutex); 1191 mutex_lock(&text_mutex);
1191 1192
@@ -1213,12 +1214,18 @@ int __kprobes register_kprobe(struct kprobe *p)
1213out: 1214out:
1214 mutex_unlock(&text_mutex); 1215 mutex_unlock(&text_mutex);
1215 put_online_cpus(); 1216 put_online_cpus();
1217 jump_label_unlock();
1216 mutex_unlock(&kprobe_mutex); 1218 mutex_unlock(&kprobe_mutex);
1217 1219
1218 if (probed_mod) 1220 if (probed_mod)
1219 module_put(probed_mod); 1221 module_put(probed_mod);
1220 1222
1221 return ret; 1223 return ret;
1224
1225fail_with_jump_label:
1226 preempt_enable();
1227 jump_label_unlock();
1228 return -EINVAL;
1222} 1229}
1223EXPORT_SYMBOL_GPL(register_kprobe); 1230EXPORT_SYMBOL_GPL(register_kprobe);
1224 1231
diff --git a/kernel/latencytop.c b/kernel/latencytop.c
index 877fb306d415..17110a4a4fc2 100644
--- a/kernel/latencytop.c
+++ b/kernel/latencytop.c
@@ -194,14 +194,7 @@ __account_scheduler_latency(struct task_struct *tsk, int usecs, int inter)
194 194
195 account_global_scheduler_latency(tsk, &lat); 195 account_global_scheduler_latency(tsk, &lat);
196 196
197 /* 197 for (i = 0; i < tsk->latency_record_count; i++) {
198 * short term hack; if we're > 32 we stop; future we recycle:
199 */
200 tsk->latency_record_count++;
201 if (tsk->latency_record_count >= LT_SAVECOUNT)
202 goto out_unlock;
203
204 for (i = 0; i < LT_SAVECOUNT; i++) {
205 struct latency_record *mylat; 198 struct latency_record *mylat;
206 int same = 1; 199 int same = 1;
207 200
@@ -227,8 +220,14 @@ __account_scheduler_latency(struct task_struct *tsk, int usecs, int inter)
227 } 220 }
228 } 221 }
229 222
223 /*
224 * short term hack; if we're > 32 we stop; future we recycle:
225 */
226 if (tsk->latency_record_count >= LT_SAVECOUNT)
227 goto out_unlock;
228
230 /* Allocated a new one: */ 229 /* Allocated a new one: */
231 i = tsk->latency_record_count; 230 i = tsk->latency_record_count++;
232 memcpy(&tsk->latency_record[i], &lat, sizeof(struct latency_record)); 231 memcpy(&tsk->latency_record[i], &lat, sizeof(struct latency_record));
233 232
234out_unlock: 233out_unlock:
diff --git a/kernel/module.c b/kernel/module.c
index 2df46301a7a4..437a74a7524a 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -2037,7 +2037,7 @@ static inline void layout_symtab(struct module *mod, struct load_info *info)
2037{ 2037{
2038} 2038}
2039 2039
2040static void add_kallsyms(struct module *mod, struct load_info *info) 2040static void add_kallsyms(struct module *mod, const struct load_info *info)
2041{ 2041{
2042} 2042}
2043#endif /* CONFIG_KALLSYMS */ 2043#endif /* CONFIG_KALLSYMS */
diff --git a/kernel/ns_cgroup.c b/kernel/ns_cgroup.c
index 2a5dfec8efe0..2c98ad94ba0e 100644
--- a/kernel/ns_cgroup.c
+++ b/kernel/ns_cgroup.c
@@ -85,6 +85,14 @@ static struct cgroup_subsys_state *ns_create(struct cgroup_subsys *ss,
85 return ERR_PTR(-EPERM); 85 return ERR_PTR(-EPERM);
86 if (!cgroup_is_descendant(cgroup, current)) 86 if (!cgroup_is_descendant(cgroup, current))
87 return ERR_PTR(-EPERM); 87 return ERR_PTR(-EPERM);
88 if (test_bit(CGRP_CLONE_CHILDREN, &cgroup->flags)) {
89 printk("ns_cgroup can't be created with parent "
90 "'clone_children' set.\n");
91 return ERR_PTR(-EINVAL);
92 }
93
94 printk_once("ns_cgroup deprecated: consider using the "
95 "'clone_children' flag without the ns_cgroup.\n");
88 96
89 ns_cgroup = kzalloc(sizeof(*ns_cgroup), GFP_KERNEL); 97 ns_cgroup = kzalloc(sizeof(*ns_cgroup), GFP_KERNEL);
90 if (!ns_cgroup) 98 if (!ns_cgroup)
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index f309e8014c78..cb6c0d2af68f 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -417,8 +417,8 @@ event_filter_match(struct perf_event *event)
417 return event->cpu == -1 || event->cpu == smp_processor_id(); 417 return event->cpu == -1 || event->cpu == smp_processor_id();
418} 418}
419 419
420static int 420static void
421__event_sched_out(struct perf_event *event, 421event_sched_out(struct perf_event *event,
422 struct perf_cpu_context *cpuctx, 422 struct perf_cpu_context *cpuctx,
423 struct perf_event_context *ctx) 423 struct perf_event_context *ctx)
424{ 424{
@@ -437,13 +437,14 @@ __event_sched_out(struct perf_event *event,
437 } 437 }
438 438
439 if (event->state != PERF_EVENT_STATE_ACTIVE) 439 if (event->state != PERF_EVENT_STATE_ACTIVE)
440 return 0; 440 return;
441 441
442 event->state = PERF_EVENT_STATE_INACTIVE; 442 event->state = PERF_EVENT_STATE_INACTIVE;
443 if (event->pending_disable) { 443 if (event->pending_disable) {
444 event->pending_disable = 0; 444 event->pending_disable = 0;
445 event->state = PERF_EVENT_STATE_OFF; 445 event->state = PERF_EVENT_STATE_OFF;
446 } 446 }
447 event->tstamp_stopped = ctx->time;
447 event->pmu->del(event, 0); 448 event->pmu->del(event, 0);
448 event->oncpu = -1; 449 event->oncpu = -1;
449 450
@@ -452,19 +453,6 @@ __event_sched_out(struct perf_event *event,
452 ctx->nr_active--; 453 ctx->nr_active--;
453 if (event->attr.exclusive || !cpuctx->active_oncpu) 454 if (event->attr.exclusive || !cpuctx->active_oncpu)
454 cpuctx->exclusive = 0; 455 cpuctx->exclusive = 0;
455 return 1;
456}
457
458static void
459event_sched_out(struct perf_event *event,
460 struct perf_cpu_context *cpuctx,
461 struct perf_event_context *ctx)
462{
463 int ret;
464
465 ret = __event_sched_out(event, cpuctx, ctx);
466 if (ret)
467 event->tstamp_stopped = ctx->time;
468} 456}
469 457
470static void 458static void
@@ -664,7 +652,7 @@ retry:
664} 652}
665 653
666static int 654static int
667__event_sched_in(struct perf_event *event, 655event_sched_in(struct perf_event *event,
668 struct perf_cpu_context *cpuctx, 656 struct perf_cpu_context *cpuctx,
669 struct perf_event_context *ctx) 657 struct perf_event_context *ctx)
670{ 658{
@@ -684,6 +672,10 @@ __event_sched_in(struct perf_event *event,
684 return -EAGAIN; 672 return -EAGAIN;
685 } 673 }
686 674
675 event->tstamp_running += ctx->time - event->tstamp_stopped;
676
677 event->shadow_ctx_time = ctx->time - ctx->timestamp;
678
687 if (!is_software_event(event)) 679 if (!is_software_event(event))
688 cpuctx->active_oncpu++; 680 cpuctx->active_oncpu++;
689 ctx->nr_active++; 681 ctx->nr_active++;
@@ -694,35 +686,6 @@ __event_sched_in(struct perf_event *event,
694 return 0; 686 return 0;
695} 687}
696 688
697static inline int
698event_sched_in(struct perf_event *event,
699 struct perf_cpu_context *cpuctx,
700 struct perf_event_context *ctx)
701{
702 int ret = __event_sched_in(event, cpuctx, ctx);
703 if (ret)
704 return ret;
705 event->tstamp_running += ctx->time - event->tstamp_stopped;
706 return 0;
707}
708
709static void
710group_commit_event_sched_in(struct perf_event *group_event,
711 struct perf_cpu_context *cpuctx,
712 struct perf_event_context *ctx)
713{
714 struct perf_event *event;
715 u64 now = ctx->time;
716
717 group_event->tstamp_running += now - group_event->tstamp_stopped;
718 /*
719 * Schedule in siblings as one group (if any):
720 */
721 list_for_each_entry(event, &group_event->sibling_list, group_entry) {
722 event->tstamp_running += now - event->tstamp_stopped;
723 }
724}
725
726static int 689static int
727group_sched_in(struct perf_event *group_event, 690group_sched_in(struct perf_event *group_event,
728 struct perf_cpu_context *cpuctx, 691 struct perf_cpu_context *cpuctx,
@@ -730,19 +693,15 @@ group_sched_in(struct perf_event *group_event,
730{ 693{
731 struct perf_event *event, *partial_group = NULL; 694 struct perf_event *event, *partial_group = NULL;
732 struct pmu *pmu = group_event->pmu; 695 struct pmu *pmu = group_event->pmu;
696 u64 now = ctx->time;
697 bool simulate = false;
733 698
734 if (group_event->state == PERF_EVENT_STATE_OFF) 699 if (group_event->state == PERF_EVENT_STATE_OFF)
735 return 0; 700 return 0;
736 701
737 pmu->start_txn(pmu); 702 pmu->start_txn(pmu);
738 703
739 /* 704 if (event_sched_in(group_event, cpuctx, ctx)) {
740 * use __event_sched_in() to delay updating tstamp_running
741 * until the transaction is committed. In case of failure
742 * we will keep an unmodified tstamp_running which is a
743 * requirement to get correct timing information
744 */
745 if (__event_sched_in(group_event, cpuctx, ctx)) {
746 pmu->cancel_txn(pmu); 705 pmu->cancel_txn(pmu);
747 return -EAGAIN; 706 return -EAGAIN;
748 } 707 }
@@ -751,31 +710,42 @@ group_sched_in(struct perf_event *group_event,
751 * Schedule in siblings as one group (if any): 710 * Schedule in siblings as one group (if any):
752 */ 711 */
753 list_for_each_entry(event, &group_event->sibling_list, group_entry) { 712 list_for_each_entry(event, &group_event->sibling_list, group_entry) {
754 if (__event_sched_in(event, cpuctx, ctx)) { 713 if (event_sched_in(event, cpuctx, ctx)) {
755 partial_group = event; 714 partial_group = event;
756 goto group_error; 715 goto group_error;
757 } 716 }
758 } 717 }
759 718
760 if (!pmu->commit_txn(pmu)) { 719 if (!pmu->commit_txn(pmu))
761 /* commit tstamp_running */
762 group_commit_event_sched_in(group_event, cpuctx, ctx);
763 return 0; 720 return 0;
764 } 721
765group_error: 722group_error:
766 /* 723 /*
767 * Groups can be scheduled in as one unit only, so undo any 724 * Groups can be scheduled in as one unit only, so undo any
768 * partial group before returning: 725 * partial group before returning:
726 * The events up to the failed event are scheduled out normally,
727 * tstamp_stopped will be updated.
769 * 728 *
770 * use __event_sched_out() to avoid updating tstamp_stopped 729 * The failed events and the remaining siblings need to have
771 * because the event never actually ran 730 * their timings updated as if they had gone thru event_sched_in()
731 * and event_sched_out(). This is required to get consistent timings
732 * across the group. This also takes care of the case where the group
733 * could never be scheduled by ensuring tstamp_stopped is set to mark
734 * the time the event was actually stopped, such that time delta
735 * calculation in update_event_times() is correct.
772 */ 736 */
773 list_for_each_entry(event, &group_event->sibling_list, group_entry) { 737 list_for_each_entry(event, &group_event->sibling_list, group_entry) {
774 if (event == partial_group) 738 if (event == partial_group)
775 break; 739 simulate = true;
776 __event_sched_out(event, cpuctx, ctx); 740
741 if (simulate) {
742 event->tstamp_running += now - event->tstamp_stopped;
743 event->tstamp_stopped = now;
744 } else {
745 event_sched_out(event, cpuctx, ctx);
746 }
777 } 747 }
778 __event_sched_out(group_event, cpuctx, ctx); 748 event_sched_out(group_event, cpuctx, ctx);
779 749
780 pmu->cancel_txn(pmu); 750 pmu->cancel_txn(pmu);
781 751
@@ -3428,7 +3398,8 @@ static u32 perf_event_tid(struct perf_event *event, struct task_struct *p)
3428} 3398}
3429 3399
3430static void perf_output_read_one(struct perf_output_handle *handle, 3400static void perf_output_read_one(struct perf_output_handle *handle,
3431 struct perf_event *event) 3401 struct perf_event *event,
3402 u64 enabled, u64 running)
3432{ 3403{
3433 u64 read_format = event->attr.read_format; 3404 u64 read_format = event->attr.read_format;
3434 u64 values[4]; 3405 u64 values[4];
@@ -3436,11 +3407,11 @@ static void perf_output_read_one(struct perf_output_handle *handle,
3436 3407
3437 values[n++] = perf_event_count(event); 3408 values[n++] = perf_event_count(event);
3438 if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) { 3409 if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
3439 values[n++] = event->total_time_enabled + 3410 values[n++] = enabled +
3440 atomic64_read(&event->child_total_time_enabled); 3411 atomic64_read(&event->child_total_time_enabled);
3441 } 3412 }
3442 if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) { 3413 if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
3443 values[n++] = event->total_time_running + 3414 values[n++] = running +
3444 atomic64_read(&event->child_total_time_running); 3415 atomic64_read(&event->child_total_time_running);
3445 } 3416 }
3446 if (read_format & PERF_FORMAT_ID) 3417 if (read_format & PERF_FORMAT_ID)
@@ -3453,7 +3424,8 @@ static void perf_output_read_one(struct perf_output_handle *handle,
3453 * XXX PERF_FORMAT_GROUP vs inherited events seems difficult. 3424 * XXX PERF_FORMAT_GROUP vs inherited events seems difficult.
3454 */ 3425 */
3455static void perf_output_read_group(struct perf_output_handle *handle, 3426static void perf_output_read_group(struct perf_output_handle *handle,
3456 struct perf_event *event) 3427 struct perf_event *event,
3428 u64 enabled, u64 running)
3457{ 3429{
3458 struct perf_event *leader = event->group_leader, *sub; 3430 struct perf_event *leader = event->group_leader, *sub;
3459 u64 read_format = event->attr.read_format; 3431 u64 read_format = event->attr.read_format;
@@ -3463,10 +3435,10 @@ static void perf_output_read_group(struct perf_output_handle *handle,
3463 values[n++] = 1 + leader->nr_siblings; 3435 values[n++] = 1 + leader->nr_siblings;
3464 3436
3465 if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) 3437 if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
3466 values[n++] = leader->total_time_enabled; 3438 values[n++] = enabled;
3467 3439
3468 if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) 3440 if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
3469 values[n++] = leader->total_time_running; 3441 values[n++] = running;
3470 3442
3471 if (leader != event) 3443 if (leader != event)
3472 leader->pmu->read(leader); 3444 leader->pmu->read(leader);
@@ -3491,13 +3463,35 @@ static void perf_output_read_group(struct perf_output_handle *handle,
3491 } 3463 }
3492} 3464}
3493 3465
3466#define PERF_FORMAT_TOTAL_TIMES (PERF_FORMAT_TOTAL_TIME_ENABLED|\
3467 PERF_FORMAT_TOTAL_TIME_RUNNING)
3468
3494static void perf_output_read(struct perf_output_handle *handle, 3469static void perf_output_read(struct perf_output_handle *handle,
3495 struct perf_event *event) 3470 struct perf_event *event)
3496{ 3471{
3472 u64 enabled = 0, running = 0, now, ctx_time;
3473 u64 read_format = event->attr.read_format;
3474
3475 /*
3476 * compute total_time_enabled, total_time_running
3477 * based on snapshot values taken when the event
3478 * was last scheduled in.
3479 *
3480 * we cannot simply called update_context_time()
3481 * because of locking issue as we are called in
3482 * NMI context
3483 */
3484 if (read_format & PERF_FORMAT_TOTAL_TIMES) {
3485 now = perf_clock();
3486 ctx_time = event->shadow_ctx_time + now;
3487 enabled = ctx_time - event->tstamp_enabled;
3488 running = ctx_time - event->tstamp_running;
3489 }
3490
3497 if (event->attr.read_format & PERF_FORMAT_GROUP) 3491 if (event->attr.read_format & PERF_FORMAT_GROUP)
3498 perf_output_read_group(handle, event); 3492 perf_output_read_group(handle, event, enabled, running);
3499 else 3493 else
3500 perf_output_read_one(handle, event); 3494 perf_output_read_one(handle, event, enabled, running);
3501} 3495}
3502 3496
3503void perf_output_sample(struct perf_output_handle *handle, 3497void perf_output_sample(struct perf_output_handle *handle,
diff --git a/kernel/printk.c b/kernel/printk.c
index b2ebaee8c377..38e7d5868d60 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -261,6 +261,12 @@ static inline void boot_delay_msec(void)
261} 261}
262#endif 262#endif
263 263
264#ifdef CONFIG_SECURITY_DMESG_RESTRICT
265int dmesg_restrict = 1;
266#else
267int dmesg_restrict;
268#endif
269
264int do_syslog(int type, char __user *buf, int len, bool from_file) 270int do_syslog(int type, char __user *buf, int len, bool from_file)
265{ 271{
266 unsigned i, j, limit, count; 272 unsigned i, j, limit, count;
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index f34d798ef4a2..99bbaa3e5b0d 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -181,7 +181,7 @@ int ptrace_attach(struct task_struct *task)
181 * under ptrace. 181 * under ptrace.
182 */ 182 */
183 retval = -ERESTARTNOINTR; 183 retval = -ERESTARTNOINTR;
184 if (mutex_lock_interruptible(&task->cred_guard_mutex)) 184 if (mutex_lock_interruptible(&task->signal->cred_guard_mutex))
185 goto out; 185 goto out;
186 186
187 task_lock(task); 187 task_lock(task);
@@ -208,7 +208,7 @@ int ptrace_attach(struct task_struct *task)
208unlock_tasklist: 208unlock_tasklist:
209 write_unlock_irq(&tasklist_lock); 209 write_unlock_irq(&tasklist_lock);
210unlock_creds: 210unlock_creds:
211 mutex_unlock(&task->cred_guard_mutex); 211 mutex_unlock(&task->signal->cred_guard_mutex);
212out: 212out:
213 return retval; 213 return retval;
214} 214}
@@ -329,6 +329,8 @@ int ptrace_detach(struct task_struct *child, unsigned int data)
329 * and reacquire the lock. 329 * and reacquire the lock.
330 */ 330 */
331void exit_ptrace(struct task_struct *tracer) 331void exit_ptrace(struct task_struct *tracer)
332 __releases(&tasklist_lock)
333 __acquires(&tasklist_lock)
332{ 334{
333 struct task_struct *p, *n; 335 struct task_struct *p, *n;
334 LIST_HEAD(ptrace_dead); 336 LIST_HEAD(ptrace_dead);
@@ -402,7 +404,7 @@ int ptrace_writedata(struct task_struct *tsk, char __user *src, unsigned long ds
402 return copied; 404 return copied;
403} 405}
404 406
405static int ptrace_setoptions(struct task_struct *child, long data) 407static int ptrace_setoptions(struct task_struct *child, unsigned long data)
406{ 408{
407 child->ptrace &= ~PT_TRACE_MASK; 409 child->ptrace &= ~PT_TRACE_MASK;
408 410
@@ -481,7 +483,8 @@ static int ptrace_setsiginfo(struct task_struct *child, const siginfo_t *info)
481#define is_sysemu_singlestep(request) 0 483#define is_sysemu_singlestep(request) 0
482#endif 484#endif
483 485
484static int ptrace_resume(struct task_struct *child, long request, long data) 486static int ptrace_resume(struct task_struct *child, long request,
487 unsigned long data)
485{ 488{
486 if (!valid_signal(data)) 489 if (!valid_signal(data))
487 return -EIO; 490 return -EIO;
@@ -558,10 +561,12 @@ static int ptrace_regset(struct task_struct *task, int req, unsigned int type,
558#endif 561#endif
559 562
560int ptrace_request(struct task_struct *child, long request, 563int ptrace_request(struct task_struct *child, long request,
561 long addr, long data) 564 unsigned long addr, unsigned long data)
562{ 565{
563 int ret = -EIO; 566 int ret = -EIO;
564 siginfo_t siginfo; 567 siginfo_t siginfo;
568 void __user *datavp = (void __user *) data;
569 unsigned long __user *datalp = datavp;
565 570
566 switch (request) { 571 switch (request) {
567 case PTRACE_PEEKTEXT: 572 case PTRACE_PEEKTEXT:
@@ -578,19 +583,17 @@ int ptrace_request(struct task_struct *child, long request,
578 ret = ptrace_setoptions(child, data); 583 ret = ptrace_setoptions(child, data);
579 break; 584 break;
580 case PTRACE_GETEVENTMSG: 585 case PTRACE_GETEVENTMSG:
581 ret = put_user(child->ptrace_message, (unsigned long __user *) data); 586 ret = put_user(child->ptrace_message, datalp);
582 break; 587 break;
583 588
584 case PTRACE_GETSIGINFO: 589 case PTRACE_GETSIGINFO:
585 ret = ptrace_getsiginfo(child, &siginfo); 590 ret = ptrace_getsiginfo(child, &siginfo);
586 if (!ret) 591 if (!ret)
587 ret = copy_siginfo_to_user((siginfo_t __user *) data, 592 ret = copy_siginfo_to_user(datavp, &siginfo);
588 &siginfo);
589 break; 593 break;
590 594
591 case PTRACE_SETSIGINFO: 595 case PTRACE_SETSIGINFO:
592 if (copy_from_user(&siginfo, (siginfo_t __user *) data, 596 if (copy_from_user(&siginfo, datavp, sizeof siginfo))
593 sizeof siginfo))
594 ret = -EFAULT; 597 ret = -EFAULT;
595 else 598 else
596 ret = ptrace_setsiginfo(child, &siginfo); 599 ret = ptrace_setsiginfo(child, &siginfo);
@@ -621,7 +624,7 @@ int ptrace_request(struct task_struct *child, long request,
621 } 624 }
622 mmput(mm); 625 mmput(mm);
623 626
624 ret = put_user(tmp, (unsigned long __user *) data); 627 ret = put_user(tmp, datalp);
625 break; 628 break;
626 } 629 }
627#endif 630#endif
@@ -650,7 +653,7 @@ int ptrace_request(struct task_struct *child, long request,
650 case PTRACE_SETREGSET: 653 case PTRACE_SETREGSET:
651 { 654 {
652 struct iovec kiov; 655 struct iovec kiov;
653 struct iovec __user *uiov = (struct iovec __user *) data; 656 struct iovec __user *uiov = datavp;
654 657
655 if (!access_ok(VERIFY_WRITE, uiov, sizeof(*uiov))) 658 if (!access_ok(VERIFY_WRITE, uiov, sizeof(*uiov)))
656 return -EFAULT; 659 return -EFAULT;
@@ -691,7 +694,8 @@ static struct task_struct *ptrace_get_task_struct(pid_t pid)
691#define arch_ptrace_attach(child) do { } while (0) 694#define arch_ptrace_attach(child) do { } while (0)
692#endif 695#endif
693 696
694SYSCALL_DEFINE4(ptrace, long, request, long, pid, long, addr, long, data) 697SYSCALL_DEFINE4(ptrace, long, request, long, pid, unsigned long, addr,
698 unsigned long, data)
695{ 699{
696 struct task_struct *child; 700 struct task_struct *child;
697 long ret; 701 long ret;
@@ -732,7 +736,8 @@ SYSCALL_DEFINE4(ptrace, long, request, long, pid, long, addr, long, data)
732 return ret; 736 return ret;
733} 737}
734 738
735int generic_ptrace_peekdata(struct task_struct *tsk, long addr, long data) 739int generic_ptrace_peekdata(struct task_struct *tsk, unsigned long addr,
740 unsigned long data)
736{ 741{
737 unsigned long tmp; 742 unsigned long tmp;
738 int copied; 743 int copied;
@@ -743,7 +748,8 @@ int generic_ptrace_peekdata(struct task_struct *tsk, long addr, long data)
743 return put_user(tmp, (unsigned long __user *)data); 748 return put_user(tmp, (unsigned long __user *)data);
744} 749}
745 750
746int generic_ptrace_pokedata(struct task_struct *tsk, long addr, long data) 751int generic_ptrace_pokedata(struct task_struct *tsk, unsigned long addr,
752 unsigned long data)
747{ 753{
748 int copied; 754 int copied;
749 755
diff --git a/kernel/range.c b/kernel/range.c
index 471b66acabb5..37fa9b99ad58 100644
--- a/kernel/range.c
+++ b/kernel/range.c
@@ -119,7 +119,7 @@ static int cmp_range(const void *x1, const void *x2)
119 119
120int clean_sort_range(struct range *range, int az) 120int clean_sort_range(struct range *range, int az)
121{ 121{
122 int i, j, k = az - 1, nr_range = 0; 122 int i, j, k = az - 1, nr_range = az;
123 123
124 for (i = 0; i < k; i++) { 124 for (i = 0; i < k; i++) {
125 if (range[i].end) 125 if (range[i].end)
diff --git a/kernel/relay.c b/kernel/relay.c
index c7cf397fb929..859ea5a9605f 100644
--- a/kernel/relay.c
+++ b/kernel/relay.c
@@ -70,17 +70,10 @@ static const struct vm_operations_struct relay_file_mmap_ops = {
70 */ 70 */
71static struct page **relay_alloc_page_array(unsigned int n_pages) 71static struct page **relay_alloc_page_array(unsigned int n_pages)
72{ 72{
73 struct page **array; 73 const size_t pa_size = n_pages * sizeof(struct page *);
74 size_t pa_size = n_pages * sizeof(struct page *); 74 if (pa_size > PAGE_SIZE)
75 75 return vzalloc(pa_size);
76 if (pa_size > PAGE_SIZE) { 76 return kzalloc(pa_size, GFP_KERNEL);
77 array = vmalloc(pa_size);
78 if (array)
79 memset(array, 0, pa_size);
80 } else {
81 array = kzalloc(pa_size, GFP_KERNEL);
82 }
83 return array;
84} 77}
85 78
86/* 79/*
diff --git a/kernel/resource.c b/kernel/resource.c
index 7b36976e5dea..9fad33efd0db 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -40,6 +40,23 @@ EXPORT_SYMBOL(iomem_resource);
40 40
41static DEFINE_RWLOCK(resource_lock); 41static DEFINE_RWLOCK(resource_lock);
42 42
43/*
44 * By default, we allocate free space bottom-up. The architecture can request
45 * top-down by clearing this flag. The user can override the architecture's
46 * choice with the "resource_alloc_from_bottom" kernel boot option, but that
47 * should only be a debugging tool.
48 */
49int resource_alloc_from_bottom = 1;
50
51static __init int setup_alloc_from_bottom(char *s)
52{
53 printk(KERN_INFO
54 "resource: allocating from bottom-up; please report a bug\n");
55 resource_alloc_from_bottom = 1;
56 return 0;
57}
58early_param("resource_alloc_from_bottom", setup_alloc_from_bottom);
59
43static void *r_next(struct seq_file *m, void *v, loff_t *pos) 60static void *r_next(struct seq_file *m, void *v, loff_t *pos)
44{ 61{
45 struct resource *p = v; 62 struct resource *p = v;
@@ -357,8 +374,97 @@ int __weak page_is_ram(unsigned long pfn)
357 return walk_system_ram_range(pfn, 1, NULL, __is_ram) == 1; 374 return walk_system_ram_range(pfn, 1, NULL, __is_ram) == 1;
358} 375}
359 376
377static resource_size_t simple_align_resource(void *data,
378 const struct resource *avail,
379 resource_size_t size,
380 resource_size_t align)
381{
382 return avail->start;
383}
384
385static void resource_clip(struct resource *res, resource_size_t min,
386 resource_size_t max)
387{
388 if (res->start < min)
389 res->start = min;
390 if (res->end > max)
391 res->end = max;
392}
393
394static bool resource_contains(struct resource *res1, struct resource *res2)
395{
396 return res1->start <= res2->start && res1->end >= res2->end;
397}
398
399/*
400 * Find the resource before "child" in the sibling list of "root" children.
401 */
402static struct resource *find_sibling_prev(struct resource *root, struct resource *child)
403{
404 struct resource *this;
405
406 for (this = root->child; this; this = this->sibling)
407 if (this->sibling == child)
408 return this;
409
410 return NULL;
411}
412
413/*
414 * Find empty slot in the resource tree given range and alignment.
415 * This version allocates from the end of the root resource first.
416 */
417static int find_resource_from_top(struct resource *root, struct resource *new,
418 resource_size_t size, resource_size_t min,
419 resource_size_t max, resource_size_t align,
420 resource_size_t (*alignf)(void *,
421 const struct resource *,
422 resource_size_t,
423 resource_size_t),
424 void *alignf_data)
425{
426 struct resource *this;
427 struct resource tmp, avail, alloc;
428
429 tmp.start = root->end;
430 tmp.end = root->end;
431
432 this = find_sibling_prev(root, NULL);
433 for (;;) {
434 if (this) {
435 if (this->end < root->end)
436 tmp.start = this->end + 1;
437 } else
438 tmp.start = root->start;
439
440 resource_clip(&tmp, min, max);
441
442 /* Check for overflow after ALIGN() */
443 avail = *new;
444 avail.start = ALIGN(tmp.start, align);
445 avail.end = tmp.end;
446 if (avail.start >= tmp.start) {
447 alloc.start = alignf(alignf_data, &avail, size, align);
448 alloc.end = alloc.start + size - 1;
449 if (resource_contains(&avail, &alloc)) {
450 new->start = alloc.start;
451 new->end = alloc.end;
452 return 0;
453 }
454 }
455
456 if (!this || this->start == root->start)
457 break;
458
459 tmp.end = this->start - 1;
460 this = find_sibling_prev(root, this);
461 }
462 return -EBUSY;
463}
464
360/* 465/*
361 * Find empty slot in the resource tree given range and alignment. 466 * Find empty slot in the resource tree given range and alignment.
467 * This version allocates from the beginning of the root resource first.
362 */ 468 */
363static int find_resource(struct resource *root, struct resource *new, 469static int find_resource(struct resource *root, struct resource *new,
364 resource_size_t size, resource_size_t min, 470 resource_size_t size, resource_size_t min,
@@ -370,36 +476,43 @@ static int find_resource(struct resource *root, struct resource *new,
370 void *alignf_data) 476 void *alignf_data)
371{ 477{
372 struct resource *this = root->child; 478 struct resource *this = root->child;
373 struct resource tmp = *new; 479 struct resource tmp = *new, avail, alloc;
374 480
375 tmp.start = root->start; 481 tmp.start = root->start;
376 /* 482 /*
377 * Skip past an allocated resource that starts at 0, since the assignment 483 * Skip past an allocated resource that starts at 0, since the
378 * of this->start - 1 to tmp->end below would cause an underflow. 484 * assignment of this->start - 1 to tmp->end below would cause an
485 * underflow.
379 */ 486 */
380 if (this && this->start == 0) { 487 if (this && this->start == 0) {
381 tmp.start = this->end + 1; 488 tmp.start = this->end + 1;
382 this = this->sibling; 489 this = this->sibling;
383 } 490 }
384 for(;;) { 491 for (;;) {
385 if (this) 492 if (this)
386 tmp.end = this->start - 1; 493 tmp.end = this->start - 1;
387 else 494 else
388 tmp.end = root->end; 495 tmp.end = root->end;
389 if (tmp.start < min) 496
390 tmp.start = min; 497 resource_clip(&tmp, min, max);
391 if (tmp.end > max) 498
392 tmp.end = max; 499 /* Check for overflow after ALIGN() */
393 tmp.start = ALIGN(tmp.start, align); 500 avail = *new;
394 if (alignf) 501 avail.start = ALIGN(tmp.start, align);
395 tmp.start = alignf(alignf_data, &tmp, size, align); 502 avail.end = tmp.end;
396 if (tmp.start < tmp.end && tmp.end - tmp.start >= size - 1) { 503 if (avail.start >= tmp.start) {
397 new->start = tmp.start; 504 alloc.start = alignf(alignf_data, &avail, size, align);
398 new->end = tmp.start + size - 1; 505 alloc.end = alloc.start + size - 1;
399 return 0; 506 if (resource_contains(&avail, &alloc)) {
507 new->start = alloc.start;
508 new->end = alloc.end;
509 return 0;
510 }
400 } 511 }
512
401 if (!this) 513 if (!this)
402 break; 514 break;
515
403 tmp.start = this->end + 1; 516 tmp.start = this->end + 1;
404 this = this->sibling; 517 this = this->sibling;
405 } 518 }
@@ -428,8 +541,14 @@ int allocate_resource(struct resource *root, struct resource *new,
428{ 541{
429 int err; 542 int err;
430 543
544 if (!alignf)
545 alignf = simple_align_resource;
546
431 write_lock(&resource_lock); 547 write_lock(&resource_lock);
432 err = find_resource(root, new, size, min, max, align, alignf, alignf_data); 548 if (resource_alloc_from_bottom)
549 err = find_resource(root, new, size, min, max, align, alignf, alignf_data);
550 else
551 err = find_resource_from_top(root, new, size, min, max, align, alignf, alignf_data);
433 if (err >= 0 && __request_resource(root, new)) 552 if (err >= 0 && __request_resource(root, new))
434 err = -EBUSY; 553 err = -EBUSY;
435 write_unlock(&resource_lock); 554 write_unlock(&resource_lock);
@@ -453,6 +572,8 @@ static struct resource * __insert_resource(struct resource *parent, struct resou
453 572
454 if (first == parent) 573 if (first == parent)
455 return first; 574 return first;
575 if (WARN_ON(first == new)) /* duplicated insertion */
576 return first;
456 577
457 if ((first->start > new->start) || (first->end < new->end)) 578 if ((first->start > new->start) || (first->end < new->end))
458 break; 579 break;
diff --git a/kernel/sched.c b/kernel/sched.c
index d42992bccdfa..aa14a56f9d03 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -8510,12 +8510,12 @@ void sched_move_task(struct task_struct *tsk)
8510 if (unlikely(running)) 8510 if (unlikely(running))
8511 tsk->sched_class->put_prev_task(rq, tsk); 8511 tsk->sched_class->put_prev_task(rq, tsk);
8512 8512
8513 set_task_rq(tsk, task_cpu(tsk));
8514
8515#ifdef CONFIG_FAIR_GROUP_SCHED 8513#ifdef CONFIG_FAIR_GROUP_SCHED
8516 if (tsk->sched_class->moved_group) 8514 if (tsk->sched_class->task_move_group)
8517 tsk->sched_class->moved_group(tsk, on_rq); 8515 tsk->sched_class->task_move_group(tsk, on_rq);
8516 else
8518#endif 8517#endif
8518 set_task_rq(tsk, task_cpu(tsk));
8519 8519
8520 if (unlikely(running)) 8520 if (unlikely(running))
8521 tsk->sched_class->set_curr_task(rq); 8521 tsk->sched_class->set_curr_task(rq);
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 933f3d1b62ea..f4f6a8326dd0 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -3869,13 +3869,26 @@ static void set_curr_task_fair(struct rq *rq)
3869} 3869}
3870 3870
3871#ifdef CONFIG_FAIR_GROUP_SCHED 3871#ifdef CONFIG_FAIR_GROUP_SCHED
3872static void moved_group_fair(struct task_struct *p, int on_rq) 3872static void task_move_group_fair(struct task_struct *p, int on_rq)
3873{ 3873{
3874 struct cfs_rq *cfs_rq = task_cfs_rq(p); 3874 /*
3875 3875 * If the task was not on the rq at the time of this cgroup movement
3876 update_curr(cfs_rq); 3876 * it must have been asleep, sleeping tasks keep their ->vruntime
3877 * absolute on their old rq until wakeup (needed for the fair sleeper
3878 * bonus in place_entity()).
3879 *
3880 * If it was on the rq, we've just 'preempted' it, which does convert
3881 * ->vruntime to a relative base.
3882 *
3883 * Make sure both cases convert their relative position when migrating
3884 * to another cgroup's rq. This does somewhat interfere with the
3885 * fair sleeper stuff for the first placement, but who cares.
3886 */
3887 if (!on_rq)
3888 p->se.vruntime -= cfs_rq_of(&p->se)->min_vruntime;
3889 set_task_rq(p, task_cpu(p));
3877 if (!on_rq) 3890 if (!on_rq)
3878 place_entity(cfs_rq, &p->se, 1); 3891 p->se.vruntime += cfs_rq_of(&p->se)->min_vruntime;
3879} 3892}
3880#endif 3893#endif
3881 3894
@@ -3927,7 +3940,7 @@ static const struct sched_class fair_sched_class = {
3927 .get_rr_interval = get_rr_interval_fair, 3940 .get_rr_interval = get_rr_interval_fair,
3928 3941
3929#ifdef CONFIG_FAIR_GROUP_SCHED 3942#ifdef CONFIG_FAIR_GROUP_SCHED
3930 .moved_group = moved_group_fair, 3943 .task_move_group = task_move_group_fair,
3931#endif 3944#endif
3932}; 3945};
3933 3946
diff --git a/kernel/sched_stats.h b/kernel/sched_stats.h
index 25c2f962f6fc..48ddf431db0e 100644
--- a/kernel/sched_stats.h
+++ b/kernel/sched_stats.h
@@ -157,15 +157,7 @@ static inline void sched_info_reset_dequeued(struct task_struct *t)
157} 157}
158 158
159/* 159/*
160 * Called when a process is dequeued from the active array and given 160 * We are interested in knowing how long it was from the *first* time a
161 * the cpu. We should note that with the exception of interactive
162 * tasks, the expired queue will become the active queue after the active
163 * queue is empty, without explicitly dequeuing and requeuing tasks in the
164 * expired queue. (Interactive tasks may be requeued directly to the
165 * active queue, thus delaying tasks in the expired queue from running;
166 * see scheduler_tick()).
167 *
168 * Though we are interested in knowing how long it was from the *first* time a
169 * task was queued to the time that it finally hit a cpu, we call this routine 161 * task was queued to the time that it finally hit a cpu, we call this routine
170 * from dequeue_task() to account for possible rq->clock skew across cpus. The 162 * from dequeue_task() to account for possible rq->clock skew across cpus. The
171 * delta taken on each cpu would annul the skew. 163 * delta taken on each cpu would annul the skew.
@@ -203,16 +195,6 @@ static void sched_info_arrive(struct task_struct *t)
203} 195}
204 196
205/* 197/*
206 * Called when a process is queued into either the active or expired
207 * array. The time is noted and later used to determine how long we
208 * had to wait for us to reach the cpu. Since the expired queue will
209 * become the active queue after active queue is empty, without dequeuing
210 * and requeuing any tasks, we are interested in queuing to either. It
211 * is unusual but not impossible for tasks to be dequeued and immediately
212 * requeued in the same or another array: this can happen in sched_yield(),
213 * set_user_nice(), and even load_balance() as it moves tasks from runqueue
214 * to runqueue.
215 *
216 * This function is only called from enqueue_task(), but also only updates 198 * This function is only called from enqueue_task(), but also only updates
217 * the timestamp if it is already not set. It's assumed that 199 * the timestamp if it is already not set. It's assumed that
218 * sched_info_dequeued() will clear that stamp when appropriate. 200 * sched_info_dequeued() will clear that stamp when appropriate.
diff --git a/kernel/signal.c b/kernel/signal.c
index 919562c3d6b7..4e3cff10fdce 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1105,7 +1105,8 @@ int zap_other_threads(struct task_struct *p)
1105 return count; 1105 return count;
1106} 1106}
1107 1107
1108struct sighand_struct *lock_task_sighand(struct task_struct *tsk, unsigned long *flags) 1108struct sighand_struct *__lock_task_sighand(struct task_struct *tsk,
1109 unsigned long *flags)
1109{ 1110{
1110 struct sighand_struct *sighand; 1111 struct sighand_struct *sighand;
1111 1112
@@ -1617,6 +1618,8 @@ static int sigkill_pending(struct task_struct *tsk)
1617 * is gone, we keep current->exit_code unless clear_code. 1618 * is gone, we keep current->exit_code unless clear_code.
1618 */ 1619 */
1619static void ptrace_stop(int exit_code, int clear_code, siginfo_t *info) 1620static void ptrace_stop(int exit_code, int clear_code, siginfo_t *info)
1621 __releases(&current->sighand->siglock)
1622 __acquires(&current->sighand->siglock)
1620{ 1623{
1621 if (arch_ptrace_stop_needed(exit_code, info)) { 1624 if (arch_ptrace_stop_needed(exit_code, info)) {
1622 /* 1625 /*
diff --git a/kernel/smp.c b/kernel/smp.c
index ed6aacfcb7ef..12ed8b013e2d 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -267,7 +267,7 @@ static DEFINE_PER_CPU_SHARED_ALIGNED(struct call_single_data, csd_data);
267 * 267 *
268 * Returns 0 on success, else a negative status code. 268 * Returns 0 on success, else a negative status code.
269 */ 269 */
270int smp_call_function_single(int cpu, void (*func) (void *info), void *info, 270int smp_call_function_single(int cpu, smp_call_func_t func, void *info,
271 int wait) 271 int wait)
272{ 272{
273 struct call_single_data d = { 273 struct call_single_data d = {
@@ -336,7 +336,7 @@ EXPORT_SYMBOL(smp_call_function_single);
336 * 3) any other online cpu in @mask 336 * 3) any other online cpu in @mask
337 */ 337 */
338int smp_call_function_any(const struct cpumask *mask, 338int smp_call_function_any(const struct cpumask *mask,
339 void (*func)(void *info), void *info, int wait) 339 smp_call_func_t func, void *info, int wait)
340{ 340{
341 unsigned int cpu; 341 unsigned int cpu;
342 const struct cpumask *nodemask; 342 const struct cpumask *nodemask;
@@ -416,7 +416,7 @@ void __smp_call_function_single(int cpu, struct call_single_data *data,
416 * must be disabled when calling this function. 416 * must be disabled when calling this function.
417 */ 417 */
418void smp_call_function_many(const struct cpumask *mask, 418void smp_call_function_many(const struct cpumask *mask,
419 void (*func)(void *), void *info, bool wait) 419 smp_call_func_t func, void *info, bool wait)
420{ 420{
421 struct call_function_data *data; 421 struct call_function_data *data;
422 unsigned long flags; 422 unsigned long flags;
@@ -500,7 +500,7 @@ EXPORT_SYMBOL(smp_call_function_many);
500 * You must not call this function with disabled interrupts or from a 500 * You must not call this function with disabled interrupts or from a
501 * hardware interrupt handler or from a bottom half handler. 501 * hardware interrupt handler or from a bottom half handler.
502 */ 502 */
503int smp_call_function(void (*func)(void *), void *info, int wait) 503int smp_call_function(smp_call_func_t func, void *info, int wait)
504{ 504{
505 preempt_disable(); 505 preempt_disable();
506 smp_call_function_many(cpu_online_mask, func, info, wait); 506 smp_call_function_many(cpu_online_mask, func, info, wait);
diff --git a/kernel/softirq.c b/kernel/softirq.c
index f02a9dfa19bc..18f4be0d5fe0 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -229,18 +229,20 @@ restart:
229 229
230 do { 230 do {
231 if (pending & 1) { 231 if (pending & 1) {
232 unsigned int vec_nr = h - softirq_vec;
232 int prev_count = preempt_count(); 233 int prev_count = preempt_count();
233 kstat_incr_softirqs_this_cpu(h - softirq_vec);
234 234
235 trace_softirq_entry(h, softirq_vec); 235 kstat_incr_softirqs_this_cpu(vec_nr);
236
237 trace_softirq_entry(vec_nr);
236 h->action(h); 238 h->action(h);
237 trace_softirq_exit(h, softirq_vec); 239 trace_softirq_exit(vec_nr);
238 if (unlikely(prev_count != preempt_count())) { 240 if (unlikely(prev_count != preempt_count())) {
239 printk(KERN_ERR "huh, entered softirq %td %s %p" 241 printk(KERN_ERR "huh, entered softirq %u %s %p"
240 "with preempt_count %08x," 242 "with preempt_count %08x,"
241 " exited with %08x?\n", h - softirq_vec, 243 " exited with %08x?\n", vec_nr,
242 softirq_to_name[h - softirq_vec], 244 softirq_to_name[vec_nr], h->action,
243 h->action, prev_count, preempt_count()); 245 prev_count, preempt_count());
244 preempt_count() = prev_count; 246 preempt_count() = prev_count;
245 } 247 }
246 248
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index c33a1edb799f..b65bf634035e 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -704,6 +704,15 @@ static struct ctl_table kern_table[] = {
704 }, 704 },
705#endif 705#endif
706 { 706 {
707 .procname = "dmesg_restrict",
708 .data = &dmesg_restrict,
709 .maxlen = sizeof(int),
710 .mode = 0644,
711 .proc_handler = proc_dointvec_minmax,
712 .extra1 = &zero,
713 .extra2 = &one,
714 },
715 {
707 .procname = "ngroups_max", 716 .procname = "ngroups_max",
708 .data = &ngroups_max, 717 .data = &ngroups_max,
709 .maxlen = sizeof (int), 718 .maxlen = sizeof (int),
diff --git a/kernel/taskstats.c b/kernel/taskstats.c
index 11281d5792bd..c8231fb15708 100644
--- a/kernel/taskstats.c
+++ b/kernel/taskstats.c
@@ -175,22 +175,8 @@ static void send_cpu_listeners(struct sk_buff *skb,
175 up_write(&listeners->sem); 175 up_write(&listeners->sem);
176} 176}
177 177
178static int fill_pid(pid_t pid, struct task_struct *tsk, 178static void fill_stats(struct task_struct *tsk, struct taskstats *stats)
179 struct taskstats *stats)
180{ 179{
181 int rc = 0;
182
183 if (!tsk) {
184 rcu_read_lock();
185 tsk = find_task_by_vpid(pid);
186 if (tsk)
187 get_task_struct(tsk);
188 rcu_read_unlock();
189 if (!tsk)
190 return -ESRCH;
191 } else
192 get_task_struct(tsk);
193
194 memset(stats, 0, sizeof(*stats)); 180 memset(stats, 0, sizeof(*stats));
195 /* 181 /*
196 * Each accounting subsystem adds calls to its functions to 182 * Each accounting subsystem adds calls to its functions to
@@ -209,17 +195,27 @@ static int fill_pid(pid_t pid, struct task_struct *tsk,
209 195
210 /* fill in extended acct fields */ 196 /* fill in extended acct fields */
211 xacct_add_tsk(stats, tsk); 197 xacct_add_tsk(stats, tsk);
198}
212 199
213 /* Define err: label here if needed */ 200static int fill_stats_for_pid(pid_t pid, struct taskstats *stats)
214 put_task_struct(tsk); 201{
215 return rc; 202 struct task_struct *tsk;
216 203
204 rcu_read_lock();
205 tsk = find_task_by_vpid(pid);
206 if (tsk)
207 get_task_struct(tsk);
208 rcu_read_unlock();
209 if (!tsk)
210 return -ESRCH;
211 fill_stats(tsk, stats);
212 put_task_struct(tsk);
213 return 0;
217} 214}
218 215
219static int fill_tgid(pid_t tgid, struct task_struct *first, 216static int fill_stats_for_tgid(pid_t tgid, struct taskstats *stats)
220 struct taskstats *stats)
221{ 217{
222 struct task_struct *tsk; 218 struct task_struct *tsk, *first;
223 unsigned long flags; 219 unsigned long flags;
224 int rc = -ESRCH; 220 int rc = -ESRCH;
225 221
@@ -228,8 +224,7 @@ static int fill_tgid(pid_t tgid, struct task_struct *first,
228 * leaders who are already counted with the dead tasks 224 * leaders who are already counted with the dead tasks
229 */ 225 */
230 rcu_read_lock(); 226 rcu_read_lock();
231 if (!first) 227 first = find_task_by_vpid(tgid);
232 first = find_task_by_vpid(tgid);
233 228
234 if (!first || !lock_task_sighand(first, &flags)) 229 if (!first || !lock_task_sighand(first, &flags))
235 goto out; 230 goto out;
@@ -268,7 +263,6 @@ out:
268 return rc; 263 return rc;
269} 264}
270 265
271
272static void fill_tgid_exit(struct task_struct *tsk) 266static void fill_tgid_exit(struct task_struct *tsk)
273{ 267{
274 unsigned long flags; 268 unsigned long flags;
@@ -360,6 +354,12 @@ static struct taskstats *mk_reply(struct sk_buff *skb, int type, u32 pid)
360 struct nlattr *na, *ret; 354 struct nlattr *na, *ret;
361 int aggr; 355 int aggr;
362 356
357 /* If we don't pad, we end up with alignment on a 4 byte boundary.
358 * This causes lots of runtime warnings on systems requiring 8 byte
359 * alignment */
360 u32 pids[2] = { pid, 0 };
361 int pid_size = ALIGN(sizeof(pid), sizeof(long));
362
363 aggr = (type == TASKSTATS_TYPE_PID) 363 aggr = (type == TASKSTATS_TYPE_PID)
364 ? TASKSTATS_TYPE_AGGR_PID 364 ? TASKSTATS_TYPE_AGGR_PID
365 : TASKSTATS_TYPE_AGGR_TGID; 365 : TASKSTATS_TYPE_AGGR_TGID;
@@ -367,7 +367,7 @@ static struct taskstats *mk_reply(struct sk_buff *skb, int type, u32 pid)
367 na = nla_nest_start(skb, aggr); 367 na = nla_nest_start(skb, aggr);
368 if (!na) 368 if (!na)
369 goto err; 369 goto err;
370 if (nla_put(skb, type, sizeof(pid), &pid) < 0) 370 if (nla_put(skb, type, pid_size, pids) < 0)
371 goto err; 371 goto err;
372 ret = nla_reserve(skb, TASKSTATS_TYPE_STATS, sizeof(struct taskstats)); 372 ret = nla_reserve(skb, TASKSTATS_TYPE_STATS, sizeof(struct taskstats));
373 if (!ret) 373 if (!ret)
@@ -424,39 +424,46 @@ err:
424 return rc; 424 return rc;
425} 425}
426 426
427static int taskstats_user_cmd(struct sk_buff *skb, struct genl_info *info) 427static int cmd_attr_register_cpumask(struct genl_info *info)
428{ 428{
429 int rc;
430 struct sk_buff *rep_skb;
431 struct taskstats *stats;
432 size_t size;
433 cpumask_var_t mask; 429 cpumask_var_t mask;
430 int rc;
434 431
435 if (!alloc_cpumask_var(&mask, GFP_KERNEL)) 432 if (!alloc_cpumask_var(&mask, GFP_KERNEL))
436 return -ENOMEM; 433 return -ENOMEM;
437
438 rc = parse(info->attrs[TASKSTATS_CMD_ATTR_REGISTER_CPUMASK], mask); 434 rc = parse(info->attrs[TASKSTATS_CMD_ATTR_REGISTER_CPUMASK], mask);
439 if (rc < 0) 435 if (rc < 0)
440 goto free_return_rc; 436 goto out;
441 if (rc == 0) { 437 rc = add_del_listener(info->snd_pid, mask, REGISTER);
442 rc = add_del_listener(info->snd_pid, mask, REGISTER); 438out:
443 goto free_return_rc; 439 free_cpumask_var(mask);
444 } 440 return rc;
441}
442
443static int cmd_attr_deregister_cpumask(struct genl_info *info)
444{
445 cpumask_var_t mask;
446 int rc;
445 447
448 if (!alloc_cpumask_var(&mask, GFP_KERNEL))
449 return -ENOMEM;
446 rc = parse(info->attrs[TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK], mask); 450 rc = parse(info->attrs[TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK], mask);
447 if (rc < 0) 451 if (rc < 0)
448 goto free_return_rc; 452 goto out;
449 if (rc == 0) { 453 rc = add_del_listener(info->snd_pid, mask, DEREGISTER);
450 rc = add_del_listener(info->snd_pid, mask, DEREGISTER); 454out:
451free_return_rc:
452 free_cpumask_var(mask);
453 return rc;
454 }
455 free_cpumask_var(mask); 455 free_cpumask_var(mask);
456 return rc;
457}
458
459static int cmd_attr_pid(struct genl_info *info)
460{
461 struct taskstats *stats;
462 struct sk_buff *rep_skb;
463 size_t size;
464 u32 pid;
465 int rc;
456 466
457 /*
458 * Size includes space for nested attributes
459 */
460 size = nla_total_size(sizeof(u32)) + 467 size = nla_total_size(sizeof(u32)) +
461 nla_total_size(sizeof(struct taskstats)) + nla_total_size(0); 468 nla_total_size(sizeof(struct taskstats)) + nla_total_size(0);
462 469
@@ -465,33 +472,64 @@ free_return_rc:
465 return rc; 472 return rc;
466 473
467 rc = -EINVAL; 474 rc = -EINVAL;
468 if (info->attrs[TASKSTATS_CMD_ATTR_PID]) { 475 pid = nla_get_u32(info->attrs[TASKSTATS_CMD_ATTR_PID]);
469 u32 pid = nla_get_u32(info->attrs[TASKSTATS_CMD_ATTR_PID]); 476 stats = mk_reply(rep_skb, TASKSTATS_TYPE_PID, pid);
470 stats = mk_reply(rep_skb, TASKSTATS_TYPE_PID, pid); 477 if (!stats)
471 if (!stats) 478 goto err;
472 goto err; 479
473 480 rc = fill_stats_for_pid(pid, stats);
474 rc = fill_pid(pid, NULL, stats); 481 if (rc < 0)
475 if (rc < 0) 482 goto err;
476 goto err; 483 return send_reply(rep_skb, info);
477 } else if (info->attrs[TASKSTATS_CMD_ATTR_TGID]) { 484err:
478 u32 tgid = nla_get_u32(info->attrs[TASKSTATS_CMD_ATTR_TGID]); 485 nlmsg_free(rep_skb);
479 stats = mk_reply(rep_skb, TASKSTATS_TYPE_TGID, tgid); 486 return rc;
480 if (!stats) 487}
481 goto err; 488
482 489static int cmd_attr_tgid(struct genl_info *info)
483 rc = fill_tgid(tgid, NULL, stats); 490{
484 if (rc < 0) 491 struct taskstats *stats;
485 goto err; 492 struct sk_buff *rep_skb;
486 } else 493 size_t size;
494 u32 tgid;
495 int rc;
496
497 size = nla_total_size(sizeof(u32)) +
498 nla_total_size(sizeof(struct taskstats)) + nla_total_size(0);
499
500 rc = prepare_reply(info, TASKSTATS_CMD_NEW, &rep_skb, size);
501 if (rc < 0)
502 return rc;
503
504 rc = -EINVAL;
505 tgid = nla_get_u32(info->attrs[TASKSTATS_CMD_ATTR_TGID]);
506 stats = mk_reply(rep_skb, TASKSTATS_TYPE_TGID, tgid);
507 if (!stats)
487 goto err; 508 goto err;
488 509
510 rc = fill_stats_for_tgid(tgid, stats);
511 if (rc < 0)
512 goto err;
489 return send_reply(rep_skb, info); 513 return send_reply(rep_skb, info);
490err: 514err:
491 nlmsg_free(rep_skb); 515 nlmsg_free(rep_skb);
492 return rc; 516 return rc;
493} 517}
494 518
519static int taskstats_user_cmd(struct sk_buff *skb, struct genl_info *info)
520{
521 if (info->attrs[TASKSTATS_CMD_ATTR_REGISTER_CPUMASK])
522 return cmd_attr_register_cpumask(info);
523 else if (info->attrs[TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK])
524 return cmd_attr_deregister_cpumask(info);
525 else if (info->attrs[TASKSTATS_CMD_ATTR_PID])
526 return cmd_attr_pid(info);
527 else if (info->attrs[TASKSTATS_CMD_ATTR_TGID])
528 return cmd_attr_tgid(info);
529 else
530 return -EINVAL;
531}
532
495static struct taskstats *taskstats_tgid_alloc(struct task_struct *tsk) 533static struct taskstats *taskstats_tgid_alloc(struct task_struct *tsk)
496{ 534{
497 struct signal_struct *sig = tsk->signal; 535 struct signal_struct *sig = tsk->signal;
@@ -555,9 +593,7 @@ void taskstats_exit(struct task_struct *tsk, int group_dead)
555 if (!stats) 593 if (!stats)
556 goto err; 594 goto err;
557 595
558 rc = fill_pid(-1, tsk, stats); 596 fill_stats(tsk, stats);
559 if (rc < 0)
560 goto err;
561 597
562 /* 598 /*
563 * Doesn't matter if tsk is the leader or the last group member leaving 599 * Doesn't matter if tsk is the leader or the last group member leaving
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index bc251ed66724..7b8ec0281548 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -168,7 +168,6 @@ static int act_log_check(struct blk_trace *bt, u32 what, sector_t sector,
168static const u32 ddir_act[2] = { BLK_TC_ACT(BLK_TC_READ), 168static const u32 ddir_act[2] = { BLK_TC_ACT(BLK_TC_READ),
169 BLK_TC_ACT(BLK_TC_WRITE) }; 169 BLK_TC_ACT(BLK_TC_WRITE) };
170 170
171#define BLK_TC_HARDBARRIER BLK_TC_BARRIER
172#define BLK_TC_RAHEAD BLK_TC_AHEAD 171#define BLK_TC_RAHEAD BLK_TC_AHEAD
173 172
174/* The ilog2() calls fall out because they're constant */ 173/* The ilog2() calls fall out because they're constant */
@@ -196,7 +195,6 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
196 return; 195 return;
197 196
198 what |= ddir_act[rw & WRITE]; 197 what |= ddir_act[rw & WRITE];
199 what |= MASK_TC_BIT(rw, HARDBARRIER);
200 what |= MASK_TC_BIT(rw, SYNC); 198 what |= MASK_TC_BIT(rw, SYNC);
201 what |= MASK_TC_BIT(rw, RAHEAD); 199 what |= MASK_TC_BIT(rw, RAHEAD);
202 what |= MASK_TC_BIT(rw, META); 200 what |= MASK_TC_BIT(rw, META);
@@ -1807,8 +1805,6 @@ void blk_fill_rwbs(char *rwbs, u32 rw, int bytes)
1807 1805
1808 if (rw & REQ_RAHEAD) 1806 if (rw & REQ_RAHEAD)
1809 rwbs[i++] = 'A'; 1807 rwbs[i++] = 'A';
1810 if (rw & REQ_HARDBARRIER)
1811 rwbs[i++] = 'B';
1812 if (rw & REQ_SYNC) 1808 if (rw & REQ_SYNC)
1813 rwbs[i++] = 'S'; 1809 rwbs[i++] = 'S';
1814 if (rw & REQ_META) 1810 if (rw & REQ_META)
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index c3dab054d18e..9ed509a015d8 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -224,6 +224,9 @@ enum {
224 RB_LEN_TIME_STAMP = 16, 224 RB_LEN_TIME_STAMP = 16,
225}; 225};
226 226
227#define skip_time_extend(event) \
228 ((struct ring_buffer_event *)((char *)event + RB_LEN_TIME_EXTEND))
229
227static inline int rb_null_event(struct ring_buffer_event *event) 230static inline int rb_null_event(struct ring_buffer_event *event)
228{ 231{
229 return event->type_len == RINGBUF_TYPE_PADDING && !event->time_delta; 232 return event->type_len == RINGBUF_TYPE_PADDING && !event->time_delta;
@@ -248,8 +251,12 @@ rb_event_data_length(struct ring_buffer_event *event)
248 return length + RB_EVNT_HDR_SIZE; 251 return length + RB_EVNT_HDR_SIZE;
249} 252}
250 253
251/* inline for ring buffer fast paths */ 254/*
252static unsigned 255 * Return the length of the given event. Will return
256 * the length of the time extend if the event is a
257 * time extend.
258 */
259static inline unsigned
253rb_event_length(struct ring_buffer_event *event) 260rb_event_length(struct ring_buffer_event *event)
254{ 261{
255 switch (event->type_len) { 262 switch (event->type_len) {
@@ -274,13 +281,41 @@ rb_event_length(struct ring_buffer_event *event)
274 return 0; 281 return 0;
275} 282}
276 283
284/*
285 * Return total length of time extend and data,
286 * or just the event length for all other events.
287 */
288static inline unsigned
289rb_event_ts_length(struct ring_buffer_event *event)
290{
291 unsigned len = 0;
292
293 if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) {
294 /* time extends include the data event after it */
295 len = RB_LEN_TIME_EXTEND;
296 event = skip_time_extend(event);
297 }
298 return len + rb_event_length(event);
299}
300
277/** 301/**
278 * ring_buffer_event_length - return the length of the event 302 * ring_buffer_event_length - return the length of the event
279 * @event: the event to get the length of 303 * @event: the event to get the length of
304 *
305 * Returns the size of the data load of a data event.
306 * If the event is something other than a data event, it
307 * returns the size of the event itself. With the exception
308 * of a TIME EXTEND, where it still returns the size of the
309 * data load of the data event after it.
280 */ 310 */
281unsigned ring_buffer_event_length(struct ring_buffer_event *event) 311unsigned ring_buffer_event_length(struct ring_buffer_event *event)
282{ 312{
283 unsigned length = rb_event_length(event); 313 unsigned length;
314
315 if (event->type_len == RINGBUF_TYPE_TIME_EXTEND)
316 event = skip_time_extend(event);
317
318 length = rb_event_length(event);
284 if (event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX) 319 if (event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
285 return length; 320 return length;
286 length -= RB_EVNT_HDR_SIZE; 321 length -= RB_EVNT_HDR_SIZE;
@@ -294,6 +329,8 @@ EXPORT_SYMBOL_GPL(ring_buffer_event_length);
294static void * 329static void *
295rb_event_data(struct ring_buffer_event *event) 330rb_event_data(struct ring_buffer_event *event)
296{ 331{
332 if (event->type_len == RINGBUF_TYPE_TIME_EXTEND)
333 event = skip_time_extend(event);
297 BUG_ON(event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX); 334 BUG_ON(event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX);
298 /* If length is in len field, then array[0] has the data */ 335 /* If length is in len field, then array[0] has the data */
299 if (event->type_len) 336 if (event->type_len)
@@ -404,9 +441,6 @@ static inline int test_time_stamp(u64 delta)
404/* Max payload is BUF_PAGE_SIZE - header (8bytes) */ 441/* Max payload is BUF_PAGE_SIZE - header (8bytes) */
405#define BUF_MAX_DATA_SIZE (BUF_PAGE_SIZE - (sizeof(u32) * 2)) 442#define BUF_MAX_DATA_SIZE (BUF_PAGE_SIZE - (sizeof(u32) * 2))
406 443
407/* Max number of timestamps that can fit on a page */
408#define RB_TIMESTAMPS_PER_PAGE (BUF_PAGE_SIZE / RB_LEN_TIME_EXTEND)
409
410int ring_buffer_print_page_header(struct trace_seq *s) 444int ring_buffer_print_page_header(struct trace_seq *s)
411{ 445{
412 struct buffer_data_page field; 446 struct buffer_data_page field;
@@ -1546,6 +1580,25 @@ static void rb_inc_iter(struct ring_buffer_iter *iter)
1546 iter->head = 0; 1580 iter->head = 0;
1547} 1581}
1548 1582
1583/* Slow path, do not inline */
1584static noinline struct ring_buffer_event *
1585rb_add_time_stamp(struct ring_buffer_event *event, u64 delta)
1586{
1587 event->type_len = RINGBUF_TYPE_TIME_EXTEND;
1588
1589 /* Not the first event on the page? */
1590 if (rb_event_index(event)) {
1591 event->time_delta = delta & TS_MASK;
1592 event->array[0] = delta >> TS_SHIFT;
1593 } else {
1594 /* nope, just zero it */
1595 event->time_delta = 0;
1596 event->array[0] = 0;
1597 }
1598
1599 return skip_time_extend(event);
1600}
1601
1549/** 1602/**
1550 * ring_buffer_update_event - update event type and data 1603 * ring_buffer_update_event - update event type and data
1551 * @event: the even to update 1604 * @event: the even to update
@@ -1558,28 +1611,31 @@ static void rb_inc_iter(struct ring_buffer_iter *iter)
1558 * data field. 1611 * data field.
1559 */ 1612 */
1560static void 1613static void
1561rb_update_event(struct ring_buffer_event *event, 1614rb_update_event(struct ring_buffer_per_cpu *cpu_buffer,
1562 unsigned type, unsigned length) 1615 struct ring_buffer_event *event, unsigned length,
1616 int add_timestamp, u64 delta)
1563{ 1617{
1564 event->type_len = type; 1618 /* Only a commit updates the timestamp */
1565 1619 if (unlikely(!rb_event_is_commit(cpu_buffer, event)))
1566 switch (type) { 1620 delta = 0;
1567
1568 case RINGBUF_TYPE_PADDING:
1569 case RINGBUF_TYPE_TIME_EXTEND:
1570 case RINGBUF_TYPE_TIME_STAMP:
1571 break;
1572 1621
1573 case 0: 1622 /*
1574 length -= RB_EVNT_HDR_SIZE; 1623 * If we need to add a timestamp, then we
1575 if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT) 1624 * add it to the start of the resevered space.
1576 event->array[0] = length; 1625 */
1577 else 1626 if (unlikely(add_timestamp)) {
1578 event->type_len = DIV_ROUND_UP(length, RB_ALIGNMENT); 1627 event = rb_add_time_stamp(event, delta);
1579 break; 1628 length -= RB_LEN_TIME_EXTEND;
1580 default: 1629 delta = 0;
1581 BUG();
1582 } 1630 }
1631
1632 event->time_delta = delta;
1633 length -= RB_EVNT_HDR_SIZE;
1634 if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT) {
1635 event->type_len = 0;
1636 event->array[0] = length;
1637 } else
1638 event->type_len = DIV_ROUND_UP(length, RB_ALIGNMENT);
1583} 1639}
1584 1640
1585/* 1641/*
@@ -1823,10 +1879,13 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer,
1823 local_sub(length, &tail_page->write); 1879 local_sub(length, &tail_page->write);
1824} 1880}
1825 1881
1826static struct ring_buffer_event * 1882/*
1883 * This is the slow path, force gcc not to inline it.
1884 */
1885static noinline struct ring_buffer_event *
1827rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer, 1886rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
1828 unsigned long length, unsigned long tail, 1887 unsigned long length, unsigned long tail,
1829 struct buffer_page *tail_page, u64 *ts) 1888 struct buffer_page *tail_page, u64 ts)
1830{ 1889{
1831 struct buffer_page *commit_page = cpu_buffer->commit_page; 1890 struct buffer_page *commit_page = cpu_buffer->commit_page;
1832 struct ring_buffer *buffer = cpu_buffer->buffer; 1891 struct ring_buffer *buffer = cpu_buffer->buffer;
@@ -1909,8 +1968,8 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
1909 * Nested commits always have zero deltas, so 1968 * Nested commits always have zero deltas, so
1910 * just reread the time stamp 1969 * just reread the time stamp
1911 */ 1970 */
1912 *ts = rb_time_stamp(buffer); 1971 ts = rb_time_stamp(buffer);
1913 next_page->page->time_stamp = *ts; 1972 next_page->page->time_stamp = ts;
1914 } 1973 }
1915 1974
1916 out_again: 1975 out_again:
@@ -1929,12 +1988,21 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
1929 1988
1930static struct ring_buffer_event * 1989static struct ring_buffer_event *
1931__rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, 1990__rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
1932 unsigned type, unsigned long length, u64 *ts) 1991 unsigned long length, u64 ts,
1992 u64 delta, int add_timestamp)
1933{ 1993{
1934 struct buffer_page *tail_page; 1994 struct buffer_page *tail_page;
1935 struct ring_buffer_event *event; 1995 struct ring_buffer_event *event;
1936 unsigned long tail, write; 1996 unsigned long tail, write;
1937 1997
1998 /*
1999 * If the time delta since the last event is too big to
2000 * hold in the time field of the event, then we append a
2001 * TIME EXTEND event ahead of the data event.
2002 */
2003 if (unlikely(add_timestamp))
2004 length += RB_LEN_TIME_EXTEND;
2005
1938 tail_page = cpu_buffer->tail_page; 2006 tail_page = cpu_buffer->tail_page;
1939 write = local_add_return(length, &tail_page->write); 2007 write = local_add_return(length, &tail_page->write);
1940 2008
@@ -1943,7 +2011,7 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
1943 tail = write - length; 2011 tail = write - length;
1944 2012
1945 /* See if we shot pass the end of this buffer page */ 2013 /* See if we shot pass the end of this buffer page */
1946 if (write > BUF_PAGE_SIZE) 2014 if (unlikely(write > BUF_PAGE_SIZE))
1947 return rb_move_tail(cpu_buffer, length, tail, 2015 return rb_move_tail(cpu_buffer, length, tail,
1948 tail_page, ts); 2016 tail_page, ts);
1949 2017
@@ -1951,18 +2019,16 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
1951 2019
1952 event = __rb_page_index(tail_page, tail); 2020 event = __rb_page_index(tail_page, tail);
1953 kmemcheck_annotate_bitfield(event, bitfield); 2021 kmemcheck_annotate_bitfield(event, bitfield);
1954 rb_update_event(event, type, length); 2022 rb_update_event(cpu_buffer, event, length, add_timestamp, delta);
1955 2023
1956 /* The passed in type is zero for DATA */ 2024 local_inc(&tail_page->entries);
1957 if (likely(!type))
1958 local_inc(&tail_page->entries);
1959 2025
1960 /* 2026 /*
1961 * If this is the first commit on the page, then update 2027 * If this is the first commit on the page, then update
1962 * its timestamp. 2028 * its timestamp.
1963 */ 2029 */
1964 if (!tail) 2030 if (!tail)
1965 tail_page->page->time_stamp = *ts; 2031 tail_page->page->time_stamp = ts;
1966 2032
1967 return event; 2033 return event;
1968} 2034}
@@ -1977,7 +2043,7 @@ rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer,
1977 unsigned long addr; 2043 unsigned long addr;
1978 2044
1979 new_index = rb_event_index(event); 2045 new_index = rb_event_index(event);
1980 old_index = new_index + rb_event_length(event); 2046 old_index = new_index + rb_event_ts_length(event);
1981 addr = (unsigned long)event; 2047 addr = (unsigned long)event;
1982 addr &= PAGE_MASK; 2048 addr &= PAGE_MASK;
1983 2049
@@ -2003,76 +2069,13 @@ rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer,
2003 return 0; 2069 return 0;
2004} 2070}
2005 2071
2006static int
2007rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer,
2008 u64 *ts, u64 *delta)
2009{
2010 struct ring_buffer_event *event;
2011 int ret;
2012
2013 WARN_ONCE(*delta > (1ULL << 59),
2014 KERN_WARNING "Delta way too big! %llu ts=%llu write stamp = %llu\n",
2015 (unsigned long long)*delta,
2016 (unsigned long long)*ts,
2017 (unsigned long long)cpu_buffer->write_stamp);
2018
2019 /*
2020 * The delta is too big, we to add a
2021 * new timestamp.
2022 */
2023 event = __rb_reserve_next(cpu_buffer,
2024 RINGBUF_TYPE_TIME_EXTEND,
2025 RB_LEN_TIME_EXTEND,
2026 ts);
2027 if (!event)
2028 return -EBUSY;
2029
2030 if (PTR_ERR(event) == -EAGAIN)
2031 return -EAGAIN;
2032
2033 /* Only a commited time event can update the write stamp */
2034 if (rb_event_is_commit(cpu_buffer, event)) {
2035 /*
2036 * If this is the first on the page, then it was
2037 * updated with the page itself. Try to discard it
2038 * and if we can't just make it zero.
2039 */
2040 if (rb_event_index(event)) {
2041 event->time_delta = *delta & TS_MASK;
2042 event->array[0] = *delta >> TS_SHIFT;
2043 } else {
2044 /* try to discard, since we do not need this */
2045 if (!rb_try_to_discard(cpu_buffer, event)) {
2046 /* nope, just zero it */
2047 event->time_delta = 0;
2048 event->array[0] = 0;
2049 }
2050 }
2051 cpu_buffer->write_stamp = *ts;
2052 /* let the caller know this was the commit */
2053 ret = 1;
2054 } else {
2055 /* Try to discard the event */
2056 if (!rb_try_to_discard(cpu_buffer, event)) {
2057 /* Darn, this is just wasted space */
2058 event->time_delta = 0;
2059 event->array[0] = 0;
2060 }
2061 ret = 0;
2062 }
2063
2064 *delta = 0;
2065
2066 return ret;
2067}
2068
2069static void rb_start_commit(struct ring_buffer_per_cpu *cpu_buffer) 2072static void rb_start_commit(struct ring_buffer_per_cpu *cpu_buffer)
2070{ 2073{
2071 local_inc(&cpu_buffer->committing); 2074 local_inc(&cpu_buffer->committing);
2072 local_inc(&cpu_buffer->commits); 2075 local_inc(&cpu_buffer->commits);
2073} 2076}
2074 2077
2075static void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer) 2078static inline void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer)
2076{ 2079{
2077 unsigned long commits; 2080 unsigned long commits;
2078 2081
@@ -2110,9 +2113,10 @@ rb_reserve_next_event(struct ring_buffer *buffer,
2110 unsigned long length) 2113 unsigned long length)
2111{ 2114{
2112 struct ring_buffer_event *event; 2115 struct ring_buffer_event *event;
2113 u64 ts, delta = 0; 2116 u64 ts, delta;
2114 int commit = 0;
2115 int nr_loops = 0; 2117 int nr_loops = 0;
2118 int add_timestamp;
2119 u64 diff;
2116 2120
2117 rb_start_commit(cpu_buffer); 2121 rb_start_commit(cpu_buffer);
2118 2122
@@ -2133,6 +2137,9 @@ rb_reserve_next_event(struct ring_buffer *buffer,
2133 2137
2134 length = rb_calculate_event_length(length); 2138 length = rb_calculate_event_length(length);
2135 again: 2139 again:
2140 add_timestamp = 0;
2141 delta = 0;
2142
2136 /* 2143 /*
2137 * We allow for interrupts to reenter here and do a trace. 2144 * We allow for interrupts to reenter here and do a trace.
2138 * If one does, it will cause this original code to loop 2145 * If one does, it will cause this original code to loop
@@ -2146,56 +2153,32 @@ rb_reserve_next_event(struct ring_buffer *buffer,
2146 goto out_fail; 2153 goto out_fail;
2147 2154
2148 ts = rb_time_stamp(cpu_buffer->buffer); 2155 ts = rb_time_stamp(cpu_buffer->buffer);
2156 diff = ts - cpu_buffer->write_stamp;
2149 2157
2150 /* 2158 /* make sure this diff is calculated here */
2151 * Only the first commit can update the timestamp. 2159 barrier();
2152 * Yes there is a race here. If an interrupt comes in
2153 * just after the conditional and it traces too, then it
2154 * will also check the deltas. More than one timestamp may
2155 * also be made. But only the entry that did the actual
2156 * commit will be something other than zero.
2157 */
2158 if (likely(cpu_buffer->tail_page == cpu_buffer->commit_page &&
2159 rb_page_write(cpu_buffer->tail_page) ==
2160 rb_commit_index(cpu_buffer))) {
2161 u64 diff;
2162
2163 diff = ts - cpu_buffer->write_stamp;
2164
2165 /* make sure this diff is calculated here */
2166 barrier();
2167
2168 /* Did the write stamp get updated already? */
2169 if (unlikely(ts < cpu_buffer->write_stamp))
2170 goto get_event;
2171 2160
2161 /* Did the write stamp get updated already? */
2162 if (likely(ts >= cpu_buffer->write_stamp)) {
2172 delta = diff; 2163 delta = diff;
2173 if (unlikely(test_time_stamp(delta))) { 2164 if (unlikely(test_time_stamp(delta))) {
2174 2165 WARN_ONCE(delta > (1ULL << 59),
2175 commit = rb_add_time_stamp(cpu_buffer, &ts, &delta); 2166 KERN_WARNING "Delta way too big! %llu ts=%llu write stamp = %llu\n",
2176 if (commit == -EBUSY) 2167 (unsigned long long)delta,
2177 goto out_fail; 2168 (unsigned long long)ts,
2178 2169 (unsigned long long)cpu_buffer->write_stamp);
2179 if (commit == -EAGAIN) 2170 add_timestamp = 1;
2180 goto again;
2181
2182 RB_WARN_ON(cpu_buffer, commit < 0);
2183 } 2171 }
2184 } 2172 }
2185 2173
2186 get_event: 2174 event = __rb_reserve_next(cpu_buffer, length, ts,
2187 event = __rb_reserve_next(cpu_buffer, 0, length, &ts); 2175 delta, add_timestamp);
2188 if (unlikely(PTR_ERR(event) == -EAGAIN)) 2176 if (unlikely(PTR_ERR(event) == -EAGAIN))
2189 goto again; 2177 goto again;
2190 2178
2191 if (!event) 2179 if (!event)
2192 goto out_fail; 2180 goto out_fail;
2193 2181
2194 if (!rb_event_is_commit(cpu_buffer, event))
2195 delta = 0;
2196
2197 event->time_delta = delta;
2198
2199 return event; 2182 return event;
2200 2183
2201 out_fail: 2184 out_fail:
@@ -2207,13 +2190,9 @@ rb_reserve_next_event(struct ring_buffer *buffer,
2207 2190
2208#define TRACE_RECURSIVE_DEPTH 16 2191#define TRACE_RECURSIVE_DEPTH 16
2209 2192
2210static int trace_recursive_lock(void) 2193/* Keep this code out of the fast path cache */
2194static noinline void trace_recursive_fail(void)
2211{ 2195{
2212 current->trace_recursion++;
2213
2214 if (likely(current->trace_recursion < TRACE_RECURSIVE_DEPTH))
2215 return 0;
2216
2217 /* Disable all tracing before we do anything else */ 2196 /* Disable all tracing before we do anything else */
2218 tracing_off_permanent(); 2197 tracing_off_permanent();
2219 2198
@@ -2225,10 +2204,21 @@ static int trace_recursive_lock(void)
2225 in_nmi()); 2204 in_nmi());
2226 2205
2227 WARN_ON_ONCE(1); 2206 WARN_ON_ONCE(1);
2207}
2208
2209static inline int trace_recursive_lock(void)
2210{
2211 current->trace_recursion++;
2212
2213 if (likely(current->trace_recursion < TRACE_RECURSIVE_DEPTH))
2214 return 0;
2215
2216 trace_recursive_fail();
2217
2228 return -1; 2218 return -1;
2229} 2219}
2230 2220
2231static void trace_recursive_unlock(void) 2221static inline void trace_recursive_unlock(void)
2232{ 2222{
2233 WARN_ON_ONCE(!current->trace_recursion); 2223 WARN_ON_ONCE(!current->trace_recursion);
2234 2224
@@ -2308,12 +2298,28 @@ static void
2308rb_update_write_stamp(struct ring_buffer_per_cpu *cpu_buffer, 2298rb_update_write_stamp(struct ring_buffer_per_cpu *cpu_buffer,
2309 struct ring_buffer_event *event) 2299 struct ring_buffer_event *event)
2310{ 2300{
2301 u64 delta;
2302
2311 /* 2303 /*
2312 * The event first in the commit queue updates the 2304 * The event first in the commit queue updates the
2313 * time stamp. 2305 * time stamp.
2314 */ 2306 */
2315 if (rb_event_is_commit(cpu_buffer, event)) 2307 if (rb_event_is_commit(cpu_buffer, event)) {
2316 cpu_buffer->write_stamp += event->time_delta; 2308 /*
2309 * A commit event that is first on a page
2310 * updates the write timestamp with the page stamp
2311 */
2312 if (!rb_event_index(event))
2313 cpu_buffer->write_stamp =
2314 cpu_buffer->commit_page->page->time_stamp;
2315 else if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) {
2316 delta = event->array[0];
2317 delta <<= TS_SHIFT;
2318 delta += event->time_delta;
2319 cpu_buffer->write_stamp += delta;
2320 } else
2321 cpu_buffer->write_stamp += event->time_delta;
2322 }
2317} 2323}
2318 2324
2319static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer, 2325static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer,
@@ -2353,6 +2359,9 @@ EXPORT_SYMBOL_GPL(ring_buffer_unlock_commit);
2353 2359
2354static inline void rb_event_discard(struct ring_buffer_event *event) 2360static inline void rb_event_discard(struct ring_buffer_event *event)
2355{ 2361{
2362 if (event->type_len == RINGBUF_TYPE_TIME_EXTEND)
2363 event = skip_time_extend(event);
2364
2356 /* array[0] holds the actual length for the discarded event */ 2365 /* array[0] holds the actual length for the discarded event */
2357 event->array[0] = rb_event_data_length(event) - RB_EVNT_HDR_SIZE; 2366 event->array[0] = rb_event_data_length(event) - RB_EVNT_HDR_SIZE;
2358 event->type_len = RINGBUF_TYPE_PADDING; 2367 event->type_len = RINGBUF_TYPE_PADDING;
@@ -3049,12 +3058,12 @@ rb_buffer_peek(struct ring_buffer_per_cpu *cpu_buffer, u64 *ts,
3049 3058
3050 again: 3059 again:
3051 /* 3060 /*
3052 * We repeat when a timestamp is encountered. It is possible 3061 * We repeat when a time extend is encountered.
3053 * to get multiple timestamps from an interrupt entering just 3062 * Since the time extend is always attached to a data event,
3054 * as one timestamp is about to be written, or from discarded 3063 * we should never loop more than once.
3055 * commits. The most that we can have is the number on a single page. 3064 * (We never hit the following condition more than twice).
3056 */ 3065 */
3057 if (RB_WARN_ON(cpu_buffer, ++nr_loops > RB_TIMESTAMPS_PER_PAGE)) 3066 if (RB_WARN_ON(cpu_buffer, ++nr_loops > 2))
3058 return NULL; 3067 return NULL;
3059 3068
3060 reader = rb_get_reader_page(cpu_buffer); 3069 reader = rb_get_reader_page(cpu_buffer);
@@ -3130,14 +3139,12 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
3130 return NULL; 3139 return NULL;
3131 3140
3132 /* 3141 /*
3133 * We repeat when a timestamp is encountered. 3142 * We repeat when a time extend is encountered.
3134 * We can get multiple timestamps by nested interrupts or also 3143 * Since the time extend is always attached to a data event,
3135 * if filtering is on (discarding commits). Since discarding 3144 * we should never loop more than once.
3136 * commits can be frequent we can get a lot of timestamps. 3145 * (We never hit the following condition more than twice).
3137 * But we limit them by not adding timestamps if they begin
3138 * at the start of a page.
3139 */ 3146 */
3140 if (RB_WARN_ON(cpu_buffer, ++nr_loops > RB_TIMESTAMPS_PER_PAGE)) 3147 if (RB_WARN_ON(cpu_buffer, ++nr_loops > 2))
3141 return NULL; 3148 return NULL;
3142 3149
3143 if (rb_per_cpu_empty(cpu_buffer)) 3150 if (rb_per_cpu_empty(cpu_buffer))
@@ -3835,7 +3842,8 @@ int ring_buffer_read_page(struct ring_buffer *buffer,
3835 if (len > (commit - read)) 3842 if (len > (commit - read))
3836 len = (commit - read); 3843 len = (commit - read);
3837 3844
3838 size = rb_event_length(event); 3845 /* Always keep the time extend and data together */
3846 size = rb_event_ts_length(event);
3839 3847
3840 if (len < size) 3848 if (len < size)
3841 goto out_unlock; 3849 goto out_unlock;
@@ -3857,7 +3865,8 @@ int ring_buffer_read_page(struct ring_buffer *buffer,
3857 break; 3865 break;
3858 3866
3859 event = rb_reader_event(cpu_buffer); 3867 event = rb_reader_event(cpu_buffer);
3860 size = rb_event_length(event); 3868 /* Always keep the time extend and data together */
3869 size = rb_event_ts_length(event);
3861 } while (len > size); 3870 } while (len > size);
3862 3871
3863 /* update bpage */ 3872 /* update bpage */
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 001bcd2ccf4a..82d9b8106cd0 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -3996,13 +3996,9 @@ static void tracing_init_debugfs_percpu(long cpu)
3996{ 3996{
3997 struct dentry *d_percpu = tracing_dentry_percpu(); 3997 struct dentry *d_percpu = tracing_dentry_percpu();
3998 struct dentry *d_cpu; 3998 struct dentry *d_cpu;
3999 /* strlen(cpu) + MAX(log10(cpu)) + '\0' */ 3999 char cpu_dir[30]; /* 30 characters should be more than enough */
4000 char cpu_dir[7];
4001 4000
4002 if (cpu > 999 || cpu < 0) 4001 snprintf(cpu_dir, 30, "cpu%ld", cpu);
4003 return;
4004
4005 sprintf(cpu_dir, "cpu%ld", cpu);
4006 d_cpu = debugfs_create_dir(cpu_dir, d_percpu); 4002 d_cpu = debugfs_create_dir(cpu_dir, d_percpu);
4007 if (!d_cpu) { 4003 if (!d_cpu) {
4008 pr_warning("Could not create debugfs '%s' entry\n", cpu_dir); 4004 pr_warning("Could not create debugfs '%s' entry\n", cpu_dir);
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index b8d2852baa4a..2dec9bcde8b4 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -31,7 +31,6 @@
31#include <linux/perf_event.h> 31#include <linux/perf_event.h>
32#include <linux/stringify.h> 32#include <linux/stringify.h>
33#include <linux/limits.h> 33#include <linux/limits.h>
34#include <linux/uaccess.h>
35#include <asm/bitsperlong.h> 34#include <asm/bitsperlong.h>
36 35
37#include "trace.h" 36#include "trace.h"
diff --git a/kernel/tsacct.c b/kernel/tsacct.c
index 0a67e041edf8..24dc60d9fa1f 100644
--- a/kernel/tsacct.c
+++ b/kernel/tsacct.c
@@ -63,12 +63,10 @@ void bacct_add_tsk(struct taskstats *stats, struct task_struct *tsk)
63 stats->ac_ppid = pid_alive(tsk) ? 63 stats->ac_ppid = pid_alive(tsk) ?
64 rcu_dereference(tsk->real_parent)->tgid : 0; 64 rcu_dereference(tsk->real_parent)->tgid : 0;
65 rcu_read_unlock(); 65 rcu_read_unlock();
66 stats->ac_utime = cputime_to_msecs(tsk->utime) * USEC_PER_MSEC; 66 stats->ac_utime = cputime_to_usecs(tsk->utime);
67 stats->ac_stime = cputime_to_msecs(tsk->stime) * USEC_PER_MSEC; 67 stats->ac_stime = cputime_to_usecs(tsk->stime);
68 stats->ac_utimescaled = 68 stats->ac_utimescaled = cputime_to_usecs(tsk->utimescaled);
69 cputime_to_msecs(tsk->utimescaled) * USEC_PER_MSEC; 69 stats->ac_stimescaled = cputime_to_usecs(tsk->stimescaled);
70 stats->ac_stimescaled =
71 cputime_to_msecs(tsk->stimescaled) * USEC_PER_MSEC;
72 stats->ac_minflt = tsk->min_flt; 70 stats->ac_minflt = tsk->min_flt;
73 stats->ac_majflt = tsk->maj_flt; 71 stats->ac_majflt = tsk->maj_flt;
74 72
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index bafba687a6d8..6e3c41a4024c 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -43,7 +43,7 @@ static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved);
43static DEFINE_PER_CPU(struct perf_event *, watchdog_ev); 43static DEFINE_PER_CPU(struct perf_event *, watchdog_ev);
44#endif 44#endif
45 45
46static int __initdata no_watchdog; 46static int no_watchdog;
47 47
48 48
49/* boot commands */ 49/* boot commands */