diff options
Diffstat (limited to 'kernel')
41 files changed, 928 insertions, 617 deletions
diff --git a/kernel/audit.c b/kernel/audit.c index d96045789b54..77770a034d59 100644 --- a/kernel/audit.c +++ b/kernel/audit.c | |||
@@ -467,23 +467,16 @@ static int audit_prepare_user_tty(pid_t pid, uid_t loginuid, u32 sessionid) | |||
467 | struct task_struct *tsk; | 467 | struct task_struct *tsk; |
468 | int err; | 468 | int err; |
469 | 469 | ||
470 | read_lock(&tasklist_lock); | 470 | rcu_read_lock(); |
471 | tsk = find_task_by_vpid(pid); | 471 | tsk = find_task_by_vpid(pid); |
472 | err = -ESRCH; | 472 | if (!tsk) { |
473 | if (!tsk) | 473 | rcu_read_unlock(); |
474 | goto out; | 474 | return -ESRCH; |
475 | err = 0; | 475 | } |
476 | 476 | get_task_struct(tsk); | |
477 | spin_lock_irq(&tsk->sighand->siglock); | 477 | rcu_read_unlock(); |
478 | if (!tsk->signal->audit_tty) | 478 | err = tty_audit_push_task(tsk, loginuid, sessionid); |
479 | err = -EPERM; | 479 | put_task_struct(tsk); |
480 | spin_unlock_irq(&tsk->sighand->siglock); | ||
481 | if (err) | ||
482 | goto out; | ||
483 | |||
484 | tty_audit_push_task(tsk, loginuid, sessionid); | ||
485 | out: | ||
486 | read_unlock(&tasklist_lock); | ||
487 | return err; | 480 | return err; |
488 | } | 481 | } |
489 | 482 | ||
@@ -506,7 +499,7 @@ int audit_send_list(void *_dest) | |||
506 | } | 499 | } |
507 | 500 | ||
508 | struct sk_buff *audit_make_reply(int pid, int seq, int type, int done, | 501 | struct sk_buff *audit_make_reply(int pid, int seq, int type, int done, |
509 | int multi, void *payload, int size) | 502 | int multi, const void *payload, int size) |
510 | { | 503 | { |
511 | struct sk_buff *skb; | 504 | struct sk_buff *skb; |
512 | struct nlmsghdr *nlh; | 505 | struct nlmsghdr *nlh; |
@@ -555,8 +548,8 @@ static int audit_send_reply_thread(void *arg) | |||
555 | * Allocates an skb, builds the netlink message, and sends it to the pid. | 548 | * Allocates an skb, builds the netlink message, and sends it to the pid. |
556 | * No failure notifications. | 549 | * No failure notifications. |
557 | */ | 550 | */ |
558 | void audit_send_reply(int pid, int seq, int type, int done, int multi, | 551 | static void audit_send_reply(int pid, int seq, int type, int done, int multi, |
559 | void *payload, int size) | 552 | const void *payload, int size) |
560 | { | 553 | { |
561 | struct sk_buff *skb; | 554 | struct sk_buff *skb; |
562 | struct task_struct *tsk; | 555 | struct task_struct *tsk; |
@@ -880,40 +873,40 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) | |||
880 | case AUDIT_TTY_GET: { | 873 | case AUDIT_TTY_GET: { |
881 | struct audit_tty_status s; | 874 | struct audit_tty_status s; |
882 | struct task_struct *tsk; | 875 | struct task_struct *tsk; |
876 | unsigned long flags; | ||
883 | 877 | ||
884 | read_lock(&tasklist_lock); | 878 | rcu_read_lock(); |
885 | tsk = find_task_by_vpid(pid); | 879 | tsk = find_task_by_vpid(pid); |
886 | if (!tsk) | 880 | if (tsk && lock_task_sighand(tsk, &flags)) { |
887 | err = -ESRCH; | ||
888 | else { | ||
889 | spin_lock_irq(&tsk->sighand->siglock); | ||
890 | s.enabled = tsk->signal->audit_tty != 0; | 881 | s.enabled = tsk->signal->audit_tty != 0; |
891 | spin_unlock_irq(&tsk->sighand->siglock); | 882 | unlock_task_sighand(tsk, &flags); |
892 | } | 883 | } else |
893 | read_unlock(&tasklist_lock); | 884 | err = -ESRCH; |
894 | audit_send_reply(NETLINK_CB(skb).pid, seq, AUDIT_TTY_GET, 0, 0, | 885 | rcu_read_unlock(); |
895 | &s, sizeof(s)); | 886 | |
887 | if (!err) | ||
888 | audit_send_reply(NETLINK_CB(skb).pid, seq, | ||
889 | AUDIT_TTY_GET, 0, 0, &s, sizeof(s)); | ||
896 | break; | 890 | break; |
897 | } | 891 | } |
898 | case AUDIT_TTY_SET: { | 892 | case AUDIT_TTY_SET: { |
899 | struct audit_tty_status *s; | 893 | struct audit_tty_status *s; |
900 | struct task_struct *tsk; | 894 | struct task_struct *tsk; |
895 | unsigned long flags; | ||
901 | 896 | ||
902 | if (nlh->nlmsg_len < sizeof(struct audit_tty_status)) | 897 | if (nlh->nlmsg_len < sizeof(struct audit_tty_status)) |
903 | return -EINVAL; | 898 | return -EINVAL; |
904 | s = data; | 899 | s = data; |
905 | if (s->enabled != 0 && s->enabled != 1) | 900 | if (s->enabled != 0 && s->enabled != 1) |
906 | return -EINVAL; | 901 | return -EINVAL; |
907 | read_lock(&tasklist_lock); | 902 | rcu_read_lock(); |
908 | tsk = find_task_by_vpid(pid); | 903 | tsk = find_task_by_vpid(pid); |
909 | if (!tsk) | 904 | if (tsk && lock_task_sighand(tsk, &flags)) { |
910 | err = -ESRCH; | ||
911 | else { | ||
912 | spin_lock_irq(&tsk->sighand->siglock); | ||
913 | tsk->signal->audit_tty = s->enabled != 0; | 905 | tsk->signal->audit_tty = s->enabled != 0; |
914 | spin_unlock_irq(&tsk->sighand->siglock); | 906 | unlock_task_sighand(tsk, &flags); |
915 | } | 907 | } else |
916 | read_unlock(&tasklist_lock); | 908 | err = -ESRCH; |
909 | rcu_read_unlock(); | ||
917 | break; | 910 | break; |
918 | } | 911 | } |
919 | default: | 912 | default: |
diff --git a/kernel/audit.h b/kernel/audit.h index f7206db4e13d..91e7071c4d2c 100644 --- a/kernel/audit.h +++ b/kernel/audit.h | |||
@@ -84,10 +84,7 @@ extern int audit_compare_dname_path(const char *dname, const char *path, | |||
84 | int *dirlen); | 84 | int *dirlen); |
85 | extern struct sk_buff * audit_make_reply(int pid, int seq, int type, | 85 | extern struct sk_buff * audit_make_reply(int pid, int seq, int type, |
86 | int done, int multi, | 86 | int done, int multi, |
87 | void *payload, int size); | 87 | const void *payload, int size); |
88 | extern void audit_send_reply(int pid, int seq, int type, | ||
89 | int done, int multi, | ||
90 | void *payload, int size); | ||
91 | extern void audit_panic(const char *message); | 88 | extern void audit_panic(const char *message); |
92 | 89 | ||
93 | struct audit_netlink_list { | 90 | struct audit_netlink_list { |
diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c index 7f18d3a4527e..37b2bea170c8 100644 --- a/kernel/audit_tree.c +++ b/kernel/audit_tree.c | |||
@@ -223,7 +223,7 @@ static void untag_chunk(struct node *p) | |||
223 | { | 223 | { |
224 | struct audit_chunk *chunk = find_chunk(p); | 224 | struct audit_chunk *chunk = find_chunk(p); |
225 | struct fsnotify_mark *entry = &chunk->mark; | 225 | struct fsnotify_mark *entry = &chunk->mark; |
226 | struct audit_chunk *new; | 226 | struct audit_chunk *new = NULL; |
227 | struct audit_tree *owner; | 227 | struct audit_tree *owner; |
228 | int size = chunk->count - 1; | 228 | int size = chunk->count - 1; |
229 | int i, j; | 229 | int i, j; |
@@ -232,9 +232,14 @@ static void untag_chunk(struct node *p) | |||
232 | 232 | ||
233 | spin_unlock(&hash_lock); | 233 | spin_unlock(&hash_lock); |
234 | 234 | ||
235 | if (size) | ||
236 | new = alloc_chunk(size); | ||
237 | |||
235 | spin_lock(&entry->lock); | 238 | spin_lock(&entry->lock); |
236 | if (chunk->dead || !entry->i.inode) { | 239 | if (chunk->dead || !entry->i.inode) { |
237 | spin_unlock(&entry->lock); | 240 | spin_unlock(&entry->lock); |
241 | if (new) | ||
242 | free_chunk(new); | ||
238 | goto out; | 243 | goto out; |
239 | } | 244 | } |
240 | 245 | ||
@@ -255,9 +260,9 @@ static void untag_chunk(struct node *p) | |||
255 | goto out; | 260 | goto out; |
256 | } | 261 | } |
257 | 262 | ||
258 | new = alloc_chunk(size); | ||
259 | if (!new) | 263 | if (!new) |
260 | goto Fallback; | 264 | goto Fallback; |
265 | |||
261 | fsnotify_duplicate_mark(&new->mark, entry); | 266 | fsnotify_duplicate_mark(&new->mark, entry); |
262 | if (fsnotify_add_mark(&new->mark, new->mark.group, new->mark.i.inode, NULL, 1)) { | 267 | if (fsnotify_add_mark(&new->mark, new->mark.group, new->mark.i.inode, NULL, 1)) { |
263 | free_chunk(new); | 268 | free_chunk(new); |
diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c index f0c9b2e7542d..d2e3c7866460 100644 --- a/kernel/audit_watch.c +++ b/kernel/audit_watch.c | |||
@@ -60,7 +60,7 @@ struct audit_parent { | |||
60 | }; | 60 | }; |
61 | 61 | ||
62 | /* fsnotify handle. */ | 62 | /* fsnotify handle. */ |
63 | struct fsnotify_group *audit_watch_group; | 63 | static struct fsnotify_group *audit_watch_group; |
64 | 64 | ||
65 | /* fsnotify events we care about. */ | 65 | /* fsnotify events we care about. */ |
66 | #define AUDIT_FS_WATCH (FS_MOVE | FS_CREATE | FS_DELETE | FS_DELETE_SELF |\ | 66 | #define AUDIT_FS_WATCH (FS_MOVE | FS_CREATE | FS_DELETE | FS_DELETE_SELF |\ |
@@ -123,7 +123,7 @@ void audit_put_watch(struct audit_watch *watch) | |||
123 | } | 123 | } |
124 | } | 124 | } |
125 | 125 | ||
126 | void audit_remove_watch(struct audit_watch *watch) | 126 | static void audit_remove_watch(struct audit_watch *watch) |
127 | { | 127 | { |
128 | list_del(&watch->wlist); | 128 | list_del(&watch->wlist); |
129 | audit_put_parent(watch->parent); | 129 | audit_put_parent(watch->parent); |
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c index eb7675499fb5..add2819af71b 100644 --- a/kernel/auditfilter.c +++ b/kernel/auditfilter.c | |||
@@ -1252,6 +1252,18 @@ static int audit_filter_user_rules(struct netlink_skb_parms *cb, | |||
1252 | case AUDIT_LOGINUID: | 1252 | case AUDIT_LOGINUID: |
1253 | result = audit_comparator(cb->loginuid, f->op, f->val); | 1253 | result = audit_comparator(cb->loginuid, f->op, f->val); |
1254 | break; | 1254 | break; |
1255 | case AUDIT_SUBJ_USER: | ||
1256 | case AUDIT_SUBJ_ROLE: | ||
1257 | case AUDIT_SUBJ_TYPE: | ||
1258 | case AUDIT_SUBJ_SEN: | ||
1259 | case AUDIT_SUBJ_CLR: | ||
1260 | if (f->lsm_rule) | ||
1261 | result = security_audit_rule_match(cb->sid, | ||
1262 | f->type, | ||
1263 | f->op, | ||
1264 | f->lsm_rule, | ||
1265 | NULL); | ||
1266 | break; | ||
1255 | } | 1267 | } |
1256 | 1268 | ||
1257 | if (!result) | 1269 | if (!result) |
diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 1b31c130d034..f49a0318c2ed 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c | |||
@@ -241,6 +241,10 @@ struct audit_context { | |||
241 | pid_t pid; | 241 | pid_t pid; |
242 | struct audit_cap_data cap; | 242 | struct audit_cap_data cap; |
243 | } capset; | 243 | } capset; |
244 | struct { | ||
245 | int fd; | ||
246 | int flags; | ||
247 | } mmap; | ||
244 | }; | 248 | }; |
245 | int fds[2]; | 249 | int fds[2]; |
246 | 250 | ||
@@ -1305,6 +1309,10 @@ static void show_special(struct audit_context *context, int *call_panic) | |||
1305 | audit_log_cap(ab, "cap_pp", &context->capset.cap.permitted); | 1309 | audit_log_cap(ab, "cap_pp", &context->capset.cap.permitted); |
1306 | audit_log_cap(ab, "cap_pe", &context->capset.cap.effective); | 1310 | audit_log_cap(ab, "cap_pe", &context->capset.cap.effective); |
1307 | break; } | 1311 | break; } |
1312 | case AUDIT_MMAP: { | ||
1313 | audit_log_format(ab, "fd=%d flags=0x%x", context->mmap.fd, | ||
1314 | context->mmap.flags); | ||
1315 | break; } | ||
1308 | } | 1316 | } |
1309 | audit_log_end(ab); | 1317 | audit_log_end(ab); |
1310 | } | 1318 | } |
@@ -2476,6 +2484,14 @@ void __audit_log_capset(pid_t pid, | |||
2476 | context->type = AUDIT_CAPSET; | 2484 | context->type = AUDIT_CAPSET; |
2477 | } | 2485 | } |
2478 | 2486 | ||
2487 | void __audit_mmap_fd(int fd, int flags) | ||
2488 | { | ||
2489 | struct audit_context *context = current->audit_context; | ||
2490 | context->mmap.fd = fd; | ||
2491 | context->mmap.flags = flags; | ||
2492 | context->type = AUDIT_MMAP; | ||
2493 | } | ||
2494 | |||
2479 | /** | 2495 | /** |
2480 | * audit_core_dumps - record information about processes that end abnormally | 2496 | * audit_core_dumps - record information about processes that end abnormally |
2481 | * @signr: signal value | 2497 | * @signr: signal value |
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 9270d532ec3c..66a416b42c18 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
@@ -243,6 +243,11 @@ static int notify_on_release(const struct cgroup *cgrp) | |||
243 | return test_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags); | 243 | return test_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags); |
244 | } | 244 | } |
245 | 245 | ||
246 | static int clone_children(const struct cgroup *cgrp) | ||
247 | { | ||
248 | return test_bit(CGRP_CLONE_CHILDREN, &cgrp->flags); | ||
249 | } | ||
250 | |||
246 | /* | 251 | /* |
247 | * for_each_subsys() allows you to iterate on each subsystem attached to | 252 | * for_each_subsys() allows you to iterate on each subsystem attached to |
248 | * an active hierarchy | 253 | * an active hierarchy |
@@ -1040,6 +1045,8 @@ static int cgroup_show_options(struct seq_file *seq, struct vfsmount *vfs) | |||
1040 | seq_puts(seq, ",noprefix"); | 1045 | seq_puts(seq, ",noprefix"); |
1041 | if (strlen(root->release_agent_path)) | 1046 | if (strlen(root->release_agent_path)) |
1042 | seq_printf(seq, ",release_agent=%s", root->release_agent_path); | 1047 | seq_printf(seq, ",release_agent=%s", root->release_agent_path); |
1048 | if (clone_children(&root->top_cgroup)) | ||
1049 | seq_puts(seq, ",clone_children"); | ||
1043 | if (strlen(root->name)) | 1050 | if (strlen(root->name)) |
1044 | seq_printf(seq, ",name=%s", root->name); | 1051 | seq_printf(seq, ",name=%s", root->name); |
1045 | mutex_unlock(&cgroup_mutex); | 1052 | mutex_unlock(&cgroup_mutex); |
@@ -1050,6 +1057,7 @@ struct cgroup_sb_opts { | |||
1050 | unsigned long subsys_bits; | 1057 | unsigned long subsys_bits; |
1051 | unsigned long flags; | 1058 | unsigned long flags; |
1052 | char *release_agent; | 1059 | char *release_agent; |
1060 | bool clone_children; | ||
1053 | char *name; | 1061 | char *name; |
1054 | /* User explicitly requested empty subsystem */ | 1062 | /* User explicitly requested empty subsystem */ |
1055 | bool none; | 1063 | bool none; |
@@ -1066,7 +1074,8 @@ struct cgroup_sb_opts { | |||
1066 | */ | 1074 | */ |
1067 | static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts) | 1075 | static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts) |
1068 | { | 1076 | { |
1069 | char *token, *o = data ?: "all"; | 1077 | char *token, *o = data; |
1078 | bool all_ss = false, one_ss = false; | ||
1070 | unsigned long mask = (unsigned long)-1; | 1079 | unsigned long mask = (unsigned long)-1; |
1071 | int i; | 1080 | int i; |
1072 | bool module_pin_failed = false; | 1081 | bool module_pin_failed = false; |
@@ -1082,22 +1091,27 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts) | |||
1082 | while ((token = strsep(&o, ",")) != NULL) { | 1091 | while ((token = strsep(&o, ",")) != NULL) { |
1083 | if (!*token) | 1092 | if (!*token) |
1084 | return -EINVAL; | 1093 | return -EINVAL; |
1085 | if (!strcmp(token, "all")) { | 1094 | if (!strcmp(token, "none")) { |
1086 | /* Add all non-disabled subsystems */ | ||
1087 | opts->subsys_bits = 0; | ||
1088 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { | ||
1089 | struct cgroup_subsys *ss = subsys[i]; | ||
1090 | if (ss == NULL) | ||
1091 | continue; | ||
1092 | if (!ss->disabled) | ||
1093 | opts->subsys_bits |= 1ul << i; | ||
1094 | } | ||
1095 | } else if (!strcmp(token, "none")) { | ||
1096 | /* Explicitly have no subsystems */ | 1095 | /* Explicitly have no subsystems */ |
1097 | opts->none = true; | 1096 | opts->none = true; |
1098 | } else if (!strcmp(token, "noprefix")) { | 1097 | continue; |
1098 | } | ||
1099 | if (!strcmp(token, "all")) { | ||
1100 | /* Mutually exclusive option 'all' + subsystem name */ | ||
1101 | if (one_ss) | ||
1102 | return -EINVAL; | ||
1103 | all_ss = true; | ||
1104 | continue; | ||
1105 | } | ||
1106 | if (!strcmp(token, "noprefix")) { | ||
1099 | set_bit(ROOT_NOPREFIX, &opts->flags); | 1107 | set_bit(ROOT_NOPREFIX, &opts->flags); |
1100 | } else if (!strncmp(token, "release_agent=", 14)) { | 1108 | continue; |
1109 | } | ||
1110 | if (!strcmp(token, "clone_children")) { | ||
1111 | opts->clone_children = true; | ||
1112 | continue; | ||
1113 | } | ||
1114 | if (!strncmp(token, "release_agent=", 14)) { | ||
1101 | /* Specifying two release agents is forbidden */ | 1115 | /* Specifying two release agents is forbidden */ |
1102 | if (opts->release_agent) | 1116 | if (opts->release_agent) |
1103 | return -EINVAL; | 1117 | return -EINVAL; |
@@ -1105,7 +1119,9 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts) | |||
1105 | kstrndup(token + 14, PATH_MAX - 1, GFP_KERNEL); | 1119 | kstrndup(token + 14, PATH_MAX - 1, GFP_KERNEL); |
1106 | if (!opts->release_agent) | 1120 | if (!opts->release_agent) |
1107 | return -ENOMEM; | 1121 | return -ENOMEM; |
1108 | } else if (!strncmp(token, "name=", 5)) { | 1122 | continue; |
1123 | } | ||
1124 | if (!strncmp(token, "name=", 5)) { | ||
1109 | const char *name = token + 5; | 1125 | const char *name = token + 5; |
1110 | /* Can't specify an empty name */ | 1126 | /* Can't specify an empty name */ |
1111 | if (!strlen(name)) | 1127 | if (!strlen(name)) |
@@ -1127,20 +1143,44 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts) | |||
1127 | GFP_KERNEL); | 1143 | GFP_KERNEL); |
1128 | if (!opts->name) | 1144 | if (!opts->name) |
1129 | return -ENOMEM; | 1145 | return -ENOMEM; |
1130 | } else { | 1146 | |
1131 | struct cgroup_subsys *ss; | 1147 | continue; |
1132 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { | 1148 | } |
1133 | ss = subsys[i]; | 1149 | |
1134 | if (ss == NULL) | 1150 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { |
1135 | continue; | 1151 | struct cgroup_subsys *ss = subsys[i]; |
1136 | if (!strcmp(token, ss->name)) { | 1152 | if (ss == NULL) |
1137 | if (!ss->disabled) | 1153 | continue; |
1138 | set_bit(i, &opts->subsys_bits); | 1154 | if (strcmp(token, ss->name)) |
1139 | break; | 1155 | continue; |
1140 | } | 1156 | if (ss->disabled) |
1141 | } | 1157 | continue; |
1142 | if (i == CGROUP_SUBSYS_COUNT) | 1158 | |
1143 | return -ENOENT; | 1159 | /* Mutually exclusive option 'all' + subsystem name */ |
1160 | if (all_ss) | ||
1161 | return -EINVAL; | ||
1162 | set_bit(i, &opts->subsys_bits); | ||
1163 | one_ss = true; | ||
1164 | |||
1165 | break; | ||
1166 | } | ||
1167 | if (i == CGROUP_SUBSYS_COUNT) | ||
1168 | return -ENOENT; | ||
1169 | } | ||
1170 | |||
1171 | /* | ||
1172 | * If the 'all' option was specified select all the subsystems, | ||
1173 | * otherwise 'all, 'none' and a subsystem name options were not | ||
1174 | * specified, let's default to 'all' | ||
1175 | */ | ||
1176 | if (all_ss || (!all_ss && !one_ss && !opts->none)) { | ||
1177 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { | ||
1178 | struct cgroup_subsys *ss = subsys[i]; | ||
1179 | if (ss == NULL) | ||
1180 | continue; | ||
1181 | if (ss->disabled) | ||
1182 | continue; | ||
1183 | set_bit(i, &opts->subsys_bits); | ||
1144 | } | 1184 | } |
1145 | } | 1185 | } |
1146 | 1186 | ||
@@ -1355,6 +1395,8 @@ static struct cgroupfs_root *cgroup_root_from_opts(struct cgroup_sb_opts *opts) | |||
1355 | strcpy(root->release_agent_path, opts->release_agent); | 1395 | strcpy(root->release_agent_path, opts->release_agent); |
1356 | if (opts->name) | 1396 | if (opts->name) |
1357 | strcpy(root->name, opts->name); | 1397 | strcpy(root->name, opts->name); |
1398 | if (opts->clone_children) | ||
1399 | set_bit(CGRP_CLONE_CHILDREN, &root->top_cgroup.flags); | ||
1358 | return root; | 1400 | return root; |
1359 | } | 1401 | } |
1360 | 1402 | ||
@@ -1418,9 +1460,9 @@ static int cgroup_get_rootdir(struct super_block *sb) | |||
1418 | return 0; | 1460 | return 0; |
1419 | } | 1461 | } |
1420 | 1462 | ||
1421 | static int cgroup_get_sb(struct file_system_type *fs_type, | 1463 | static struct dentry *cgroup_mount(struct file_system_type *fs_type, |
1422 | int flags, const char *unused_dev_name, | 1464 | int flags, const char *unused_dev_name, |
1423 | void *data, struct vfsmount *mnt) | 1465 | void *data) |
1424 | { | 1466 | { |
1425 | struct cgroup_sb_opts opts; | 1467 | struct cgroup_sb_opts opts; |
1426 | struct cgroupfs_root *root; | 1468 | struct cgroupfs_root *root; |
@@ -1554,10 +1596,9 @@ static int cgroup_get_sb(struct file_system_type *fs_type, | |||
1554 | drop_parsed_module_refcounts(opts.subsys_bits); | 1596 | drop_parsed_module_refcounts(opts.subsys_bits); |
1555 | } | 1597 | } |
1556 | 1598 | ||
1557 | simple_set_mnt(mnt, sb); | ||
1558 | kfree(opts.release_agent); | 1599 | kfree(opts.release_agent); |
1559 | kfree(opts.name); | 1600 | kfree(opts.name); |
1560 | return 0; | 1601 | return dget(sb->s_root); |
1561 | 1602 | ||
1562 | drop_new_super: | 1603 | drop_new_super: |
1563 | deactivate_locked_super(sb); | 1604 | deactivate_locked_super(sb); |
@@ -1566,7 +1607,7 @@ static int cgroup_get_sb(struct file_system_type *fs_type, | |||
1566 | out_err: | 1607 | out_err: |
1567 | kfree(opts.release_agent); | 1608 | kfree(opts.release_agent); |
1568 | kfree(opts.name); | 1609 | kfree(opts.name); |
1569 | return ret; | 1610 | return ERR_PTR(ret); |
1570 | } | 1611 | } |
1571 | 1612 | ||
1572 | static void cgroup_kill_sb(struct super_block *sb) { | 1613 | static void cgroup_kill_sb(struct super_block *sb) { |
@@ -1616,7 +1657,7 @@ static void cgroup_kill_sb(struct super_block *sb) { | |||
1616 | 1657 | ||
1617 | static struct file_system_type cgroup_fs_type = { | 1658 | static struct file_system_type cgroup_fs_type = { |
1618 | .name = "cgroup", | 1659 | .name = "cgroup", |
1619 | .get_sb = cgroup_get_sb, | 1660 | .mount = cgroup_mount, |
1620 | .kill_sb = cgroup_kill_sb, | 1661 | .kill_sb = cgroup_kill_sb, |
1621 | }; | 1662 | }; |
1622 | 1663 | ||
@@ -1880,6 +1921,8 @@ static int cgroup_release_agent_write(struct cgroup *cgrp, struct cftype *cft, | |||
1880 | const char *buffer) | 1921 | const char *buffer) |
1881 | { | 1922 | { |
1882 | BUILD_BUG_ON(sizeof(cgrp->root->release_agent_path) < PATH_MAX); | 1923 | BUILD_BUG_ON(sizeof(cgrp->root->release_agent_path) < PATH_MAX); |
1924 | if (strlen(buffer) >= PATH_MAX) | ||
1925 | return -EINVAL; | ||
1883 | if (!cgroup_lock_live_group(cgrp)) | 1926 | if (!cgroup_lock_live_group(cgrp)) |
1884 | return -ENODEV; | 1927 | return -ENODEV; |
1885 | strcpy(cgrp->root->release_agent_path, buffer); | 1928 | strcpy(cgrp->root->release_agent_path, buffer); |
@@ -3173,6 +3216,23 @@ fail: | |||
3173 | return ret; | 3216 | return ret; |
3174 | } | 3217 | } |
3175 | 3218 | ||
3219 | static u64 cgroup_clone_children_read(struct cgroup *cgrp, | ||
3220 | struct cftype *cft) | ||
3221 | { | ||
3222 | return clone_children(cgrp); | ||
3223 | } | ||
3224 | |||
3225 | static int cgroup_clone_children_write(struct cgroup *cgrp, | ||
3226 | struct cftype *cft, | ||
3227 | u64 val) | ||
3228 | { | ||
3229 | if (val) | ||
3230 | set_bit(CGRP_CLONE_CHILDREN, &cgrp->flags); | ||
3231 | else | ||
3232 | clear_bit(CGRP_CLONE_CHILDREN, &cgrp->flags); | ||
3233 | return 0; | ||
3234 | } | ||
3235 | |||
3176 | /* | 3236 | /* |
3177 | * for the common functions, 'private' gives the type of file | 3237 | * for the common functions, 'private' gives the type of file |
3178 | */ | 3238 | */ |
@@ -3203,6 +3263,11 @@ static struct cftype files[] = { | |||
3203 | .write_string = cgroup_write_event_control, | 3263 | .write_string = cgroup_write_event_control, |
3204 | .mode = S_IWUGO, | 3264 | .mode = S_IWUGO, |
3205 | }, | 3265 | }, |
3266 | { | ||
3267 | .name = "cgroup.clone_children", | ||
3268 | .read_u64 = cgroup_clone_children_read, | ||
3269 | .write_u64 = cgroup_clone_children_write, | ||
3270 | }, | ||
3206 | }; | 3271 | }; |
3207 | 3272 | ||
3208 | static struct cftype cft_release_agent = { | 3273 | static struct cftype cft_release_agent = { |
@@ -3332,6 +3397,9 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry, | |||
3332 | if (notify_on_release(parent)) | 3397 | if (notify_on_release(parent)) |
3333 | set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags); | 3398 | set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags); |
3334 | 3399 | ||
3400 | if (clone_children(parent)) | ||
3401 | set_bit(CGRP_CLONE_CHILDREN, &cgrp->flags); | ||
3402 | |||
3335 | for_each_subsys(root, ss) { | 3403 | for_each_subsys(root, ss) { |
3336 | struct cgroup_subsys_state *css = ss->create(ss, cgrp); | 3404 | struct cgroup_subsys_state *css = ss->create(ss, cgrp); |
3337 | 3405 | ||
@@ -3346,6 +3414,8 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry, | |||
3346 | goto err_destroy; | 3414 | goto err_destroy; |
3347 | } | 3415 | } |
3348 | /* At error, ->destroy() callback has to free assigned ID. */ | 3416 | /* At error, ->destroy() callback has to free assigned ID. */ |
3417 | if (clone_children(parent) && ss->post_clone) | ||
3418 | ss->post_clone(ss, cgrp); | ||
3349 | } | 3419 | } |
3350 | 3420 | ||
3351 | cgroup_lock_hierarchy(root); | 3421 | cgroup_lock_hierarchy(root); |
diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c index ce71ed53e88f..e7bebb7c6c38 100644 --- a/kernel/cgroup_freezer.c +++ b/kernel/cgroup_freezer.c | |||
@@ -48,20 +48,19 @@ static inline struct freezer *task_freezer(struct task_struct *task) | |||
48 | struct freezer, css); | 48 | struct freezer, css); |
49 | } | 49 | } |
50 | 50 | ||
51 | int cgroup_freezing_or_frozen(struct task_struct *task) | 51 | static inline int __cgroup_freezing_or_frozen(struct task_struct *task) |
52 | { | 52 | { |
53 | struct freezer *freezer; | 53 | enum freezer_state state = task_freezer(task)->state; |
54 | enum freezer_state state; | 54 | return (state == CGROUP_FREEZING) || (state == CGROUP_FROZEN); |
55 | } | ||
55 | 56 | ||
57 | int cgroup_freezing_or_frozen(struct task_struct *task) | ||
58 | { | ||
59 | int result; | ||
56 | task_lock(task); | 60 | task_lock(task); |
57 | freezer = task_freezer(task); | 61 | result = __cgroup_freezing_or_frozen(task); |
58 | if (!freezer->css.cgroup->parent) | ||
59 | state = CGROUP_THAWED; /* root cgroup can't be frozen */ | ||
60 | else | ||
61 | state = freezer->state; | ||
62 | task_unlock(task); | 62 | task_unlock(task); |
63 | 63 | return result; | |
64 | return (state == CGROUP_FREEZING) || (state == CGROUP_FROZEN); | ||
65 | } | 64 | } |
66 | 65 | ||
67 | /* | 66 | /* |
@@ -154,13 +153,6 @@ static void freezer_destroy(struct cgroup_subsys *ss, | |||
154 | kfree(cgroup_freezer(cgroup)); | 153 | kfree(cgroup_freezer(cgroup)); |
155 | } | 154 | } |
156 | 155 | ||
157 | /* Task is frozen or will freeze immediately when next it gets woken */ | ||
158 | static bool is_task_frozen_enough(struct task_struct *task) | ||
159 | { | ||
160 | return frozen(task) || | ||
161 | (task_is_stopped_or_traced(task) && freezing(task)); | ||
162 | } | ||
163 | |||
164 | /* | 156 | /* |
165 | * The call to cgroup_lock() in the freezer.state write method prevents | 157 | * The call to cgroup_lock() in the freezer.state write method prevents |
166 | * a write to that file racing against an attach, and hence the | 158 | * a write to that file racing against an attach, and hence the |
@@ -174,24 +166,25 @@ static int freezer_can_attach(struct cgroup_subsys *ss, | |||
174 | 166 | ||
175 | /* | 167 | /* |
176 | * Anything frozen can't move or be moved to/from. | 168 | * Anything frozen can't move or be moved to/from. |
177 | * | ||
178 | * Since orig_freezer->state == FROZEN means that @task has been | ||
179 | * frozen, so it's sufficient to check the latter condition. | ||
180 | */ | 169 | */ |
181 | 170 | ||
182 | if (is_task_frozen_enough(task)) | 171 | freezer = cgroup_freezer(new_cgroup); |
172 | if (freezer->state != CGROUP_THAWED) | ||
183 | return -EBUSY; | 173 | return -EBUSY; |
184 | 174 | ||
185 | freezer = cgroup_freezer(new_cgroup); | 175 | rcu_read_lock(); |
186 | if (freezer->state == CGROUP_FROZEN) | 176 | if (__cgroup_freezing_or_frozen(task)) { |
177 | rcu_read_unlock(); | ||
187 | return -EBUSY; | 178 | return -EBUSY; |
179 | } | ||
180 | rcu_read_unlock(); | ||
188 | 181 | ||
189 | if (threadgroup) { | 182 | if (threadgroup) { |
190 | struct task_struct *c; | 183 | struct task_struct *c; |
191 | 184 | ||
192 | rcu_read_lock(); | 185 | rcu_read_lock(); |
193 | list_for_each_entry_rcu(c, &task->thread_group, thread_group) { | 186 | list_for_each_entry_rcu(c, &task->thread_group, thread_group) { |
194 | if (is_task_frozen_enough(c)) { | 187 | if (__cgroup_freezing_or_frozen(c)) { |
195 | rcu_read_unlock(); | 188 | rcu_read_unlock(); |
196 | return -EBUSY; | 189 | return -EBUSY; |
197 | } | 190 | } |
@@ -236,31 +229,30 @@ static void freezer_fork(struct cgroup_subsys *ss, struct task_struct *task) | |||
236 | /* | 229 | /* |
237 | * caller must hold freezer->lock | 230 | * caller must hold freezer->lock |
238 | */ | 231 | */ |
239 | static void update_freezer_state(struct cgroup *cgroup, | 232 | static void update_if_frozen(struct cgroup *cgroup, |
240 | struct freezer *freezer) | 233 | struct freezer *freezer) |
241 | { | 234 | { |
242 | struct cgroup_iter it; | 235 | struct cgroup_iter it; |
243 | struct task_struct *task; | 236 | struct task_struct *task; |
244 | unsigned int nfrozen = 0, ntotal = 0; | 237 | unsigned int nfrozen = 0, ntotal = 0; |
238 | enum freezer_state old_state = freezer->state; | ||
245 | 239 | ||
246 | cgroup_iter_start(cgroup, &it); | 240 | cgroup_iter_start(cgroup, &it); |
247 | while ((task = cgroup_iter_next(cgroup, &it))) { | 241 | while ((task = cgroup_iter_next(cgroup, &it))) { |
248 | ntotal++; | 242 | ntotal++; |
249 | if (is_task_frozen_enough(task)) | 243 | if (frozen(task)) |
250 | nfrozen++; | 244 | nfrozen++; |
251 | } | 245 | } |
252 | 246 | ||
253 | /* | 247 | if (old_state == CGROUP_THAWED) { |
254 | * Transition to FROZEN when no new tasks can be added ensures | 248 | BUG_ON(nfrozen > 0); |
255 | * that we never exist in the FROZEN state while there are unfrozen | 249 | } else if (old_state == CGROUP_FREEZING) { |
256 | * tasks. | 250 | if (nfrozen == ntotal) |
257 | */ | 251 | freezer->state = CGROUP_FROZEN; |
258 | if (nfrozen == ntotal) | 252 | } else { /* old_state == CGROUP_FROZEN */ |
259 | freezer->state = CGROUP_FROZEN; | 253 | BUG_ON(nfrozen != ntotal); |
260 | else if (nfrozen > 0) | 254 | } |
261 | freezer->state = CGROUP_FREEZING; | 255 | |
262 | else | ||
263 | freezer->state = CGROUP_THAWED; | ||
264 | cgroup_iter_end(cgroup, &it); | 256 | cgroup_iter_end(cgroup, &it); |
265 | } | 257 | } |
266 | 258 | ||
@@ -279,7 +271,7 @@ static int freezer_read(struct cgroup *cgroup, struct cftype *cft, | |||
279 | if (state == CGROUP_FREEZING) { | 271 | if (state == CGROUP_FREEZING) { |
280 | /* We change from FREEZING to FROZEN lazily if the cgroup was | 272 | /* We change from FREEZING to FROZEN lazily if the cgroup was |
281 | * only partially frozen when we exitted write. */ | 273 | * only partially frozen when we exitted write. */ |
282 | update_freezer_state(cgroup, freezer); | 274 | update_if_frozen(cgroup, freezer); |
283 | state = freezer->state; | 275 | state = freezer->state; |
284 | } | 276 | } |
285 | spin_unlock_irq(&freezer->lock); | 277 | spin_unlock_irq(&freezer->lock); |
@@ -301,7 +293,7 @@ static int try_to_freeze_cgroup(struct cgroup *cgroup, struct freezer *freezer) | |||
301 | while ((task = cgroup_iter_next(cgroup, &it))) { | 293 | while ((task = cgroup_iter_next(cgroup, &it))) { |
302 | if (!freeze_task(task, true)) | 294 | if (!freeze_task(task, true)) |
303 | continue; | 295 | continue; |
304 | if (is_task_frozen_enough(task)) | 296 | if (frozen(task)) |
305 | continue; | 297 | continue; |
306 | if (!freezing(task) && !freezer_should_skip(task)) | 298 | if (!freezing(task) && !freezer_should_skip(task)) |
307 | num_cant_freeze_now++; | 299 | num_cant_freeze_now++; |
@@ -335,7 +327,7 @@ static int freezer_change_state(struct cgroup *cgroup, | |||
335 | 327 | ||
336 | spin_lock_irq(&freezer->lock); | 328 | spin_lock_irq(&freezer->lock); |
337 | 329 | ||
338 | update_freezer_state(cgroup, freezer); | 330 | update_if_frozen(cgroup, freezer); |
339 | if (goal_state == freezer->state) | 331 | if (goal_state == freezer->state) |
340 | goto out; | 332 | goto out; |
341 | 333 | ||
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 51b143e2a07a..4349935c2ad8 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
@@ -231,18 +231,17 @@ static DEFINE_SPINLOCK(cpuset_buffer_lock); | |||
231 | * users. If someone tries to mount the "cpuset" filesystem, we | 231 | * users. If someone tries to mount the "cpuset" filesystem, we |
232 | * silently switch it to mount "cgroup" instead | 232 | * silently switch it to mount "cgroup" instead |
233 | */ | 233 | */ |
234 | static int cpuset_get_sb(struct file_system_type *fs_type, | 234 | static struct dentry *cpuset_mount(struct file_system_type *fs_type, |
235 | int flags, const char *unused_dev_name, | 235 | int flags, const char *unused_dev_name, void *data) |
236 | void *data, struct vfsmount *mnt) | ||
237 | { | 236 | { |
238 | struct file_system_type *cgroup_fs = get_fs_type("cgroup"); | 237 | struct file_system_type *cgroup_fs = get_fs_type("cgroup"); |
239 | int ret = -ENODEV; | 238 | struct dentry *ret = ERR_PTR(-ENODEV); |
240 | if (cgroup_fs) { | 239 | if (cgroup_fs) { |
241 | char mountopts[] = | 240 | char mountopts[] = |
242 | "cpuset,noprefix," | 241 | "cpuset,noprefix," |
243 | "release_agent=/sbin/cpuset_release_agent"; | 242 | "release_agent=/sbin/cpuset_release_agent"; |
244 | ret = cgroup_fs->get_sb(cgroup_fs, flags, | 243 | ret = cgroup_fs->mount(cgroup_fs, flags, |
245 | unused_dev_name, mountopts, mnt); | 244 | unused_dev_name, mountopts); |
246 | put_filesystem(cgroup_fs); | 245 | put_filesystem(cgroup_fs); |
247 | } | 246 | } |
248 | return ret; | 247 | return ret; |
@@ -250,7 +249,7 @@ static int cpuset_get_sb(struct file_system_type *fs_type, | |||
250 | 249 | ||
251 | static struct file_system_type cpuset_fs_type = { | 250 | static struct file_system_type cpuset_fs_type = { |
252 | .name = "cpuset", | 251 | .name = "cpuset", |
253 | .get_sb = cpuset_get_sb, | 252 | .mount = cpuset_mount, |
254 | }; | 253 | }; |
255 | 254 | ||
256 | /* | 255 | /* |
diff --git a/kernel/cred.c b/kernel/cred.c index 9a3e22641fe7..6a1aa004e376 100644 --- a/kernel/cred.c +++ b/kernel/cred.c | |||
@@ -325,7 +325,7 @@ EXPORT_SYMBOL(prepare_creds); | |||
325 | 325 | ||
326 | /* | 326 | /* |
327 | * Prepare credentials for current to perform an execve() | 327 | * Prepare credentials for current to perform an execve() |
328 | * - The caller must hold current->cred_guard_mutex | 328 | * - The caller must hold ->cred_guard_mutex |
329 | */ | 329 | */ |
330 | struct cred *prepare_exec_creds(void) | 330 | struct cred *prepare_exec_creds(void) |
331 | { | 331 | { |
@@ -384,8 +384,6 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags) | |||
384 | struct cred *new; | 384 | struct cred *new; |
385 | int ret; | 385 | int ret; |
386 | 386 | ||
387 | mutex_init(&p->cred_guard_mutex); | ||
388 | |||
389 | if ( | 387 | if ( |
390 | #ifdef CONFIG_KEYS | 388 | #ifdef CONFIG_KEYS |
391 | !p->cred->thread_keyring && | 389 | !p->cred->thread_keyring && |
diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c index fec596da9bd0..cefd4a11f6d9 100644 --- a/kernel/debug/debug_core.c +++ b/kernel/debug/debug_core.c | |||
@@ -209,18 +209,6 @@ int __weak kgdb_skipexception(int exception, struct pt_regs *regs) | |||
209 | return 0; | 209 | return 0; |
210 | } | 210 | } |
211 | 211 | ||
212 | /** | ||
213 | * kgdb_disable_hw_debug - Disable hardware debugging while we in kgdb. | ||
214 | * @regs: Current &struct pt_regs. | ||
215 | * | ||
216 | * This function will be called if the particular architecture must | ||
217 | * disable hardware debugging while it is processing gdb packets or | ||
218 | * handling exception. | ||
219 | */ | ||
220 | void __weak kgdb_disable_hw_debug(struct pt_regs *regs) | ||
221 | { | ||
222 | } | ||
223 | |||
224 | /* | 212 | /* |
225 | * Some architectures need cache flushes when we set/clear a | 213 | * Some architectures need cache flushes when we set/clear a |
226 | * breakpoint: | 214 | * breakpoint: |
@@ -484,7 +472,9 @@ static int kgdb_cpu_enter(struct kgdb_state *ks, struct pt_regs *regs, | |||
484 | atomic_inc(&masters_in_kgdb); | 472 | atomic_inc(&masters_in_kgdb); |
485 | else | 473 | else |
486 | atomic_inc(&slaves_in_kgdb); | 474 | atomic_inc(&slaves_in_kgdb); |
487 | kgdb_disable_hw_debug(ks->linux_regs); | 475 | |
476 | if (arch_kgdb_ops.disable_hw_break) | ||
477 | arch_kgdb_ops.disable_hw_break(regs); | ||
488 | 478 | ||
489 | acquirelock: | 479 | acquirelock: |
490 | /* | 480 | /* |
diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c index d7bda21a106b..37755d621924 100644 --- a/kernel/debug/kdb/kdb_main.c +++ b/kernel/debug/kdb/kdb_main.c | |||
@@ -1127,7 +1127,7 @@ static int kdb_local(kdb_reason_t reason, int error, struct pt_regs *regs, | |||
1127 | /* special case below */ | 1127 | /* special case below */ |
1128 | } else { | 1128 | } else { |
1129 | kdb_printf("\nEntering kdb (current=0x%p, pid %d) ", | 1129 | kdb_printf("\nEntering kdb (current=0x%p, pid %d) ", |
1130 | kdb_current, kdb_current->pid); | 1130 | kdb_current, kdb_current ? kdb_current->pid : 0); |
1131 | #if defined(CONFIG_SMP) | 1131 | #if defined(CONFIG_SMP) |
1132 | kdb_printf("on processor %d ", raw_smp_processor_id()); | 1132 | kdb_printf("on processor %d ", raw_smp_processor_id()); |
1133 | #endif | 1133 | #endif |
@@ -2603,20 +2603,17 @@ static int kdb_summary(int argc, const char **argv) | |||
2603 | */ | 2603 | */ |
2604 | static int kdb_per_cpu(int argc, const char **argv) | 2604 | static int kdb_per_cpu(int argc, const char **argv) |
2605 | { | 2605 | { |
2606 | char buf[256], fmtstr[64]; | 2606 | char fmtstr[64]; |
2607 | kdb_symtab_t symtab; | 2607 | int cpu, diag, nextarg = 1; |
2608 | cpumask_t suppress = CPU_MASK_NONE; | 2608 | unsigned long addr, symaddr, val, bytesperword = 0, whichcpu = ~0UL; |
2609 | int cpu, diag; | ||
2610 | unsigned long addr, val, bytesperword = 0, whichcpu = ~0UL; | ||
2611 | 2609 | ||
2612 | if (argc < 1 || argc > 3) | 2610 | if (argc < 1 || argc > 3) |
2613 | return KDB_ARGCOUNT; | 2611 | return KDB_ARGCOUNT; |
2614 | 2612 | ||
2615 | snprintf(buf, sizeof(buf), "per_cpu__%s", argv[1]); | 2613 | diag = kdbgetaddrarg(argc, argv, &nextarg, &symaddr, NULL, NULL); |
2616 | if (!kdbgetsymval(buf, &symtab)) { | 2614 | if (diag) |
2617 | kdb_printf("%s is not a per_cpu variable\n", argv[1]); | 2615 | return diag; |
2618 | return KDB_BADADDR; | 2616 | |
2619 | } | ||
2620 | if (argc >= 2) { | 2617 | if (argc >= 2) { |
2621 | diag = kdbgetularg(argv[2], &bytesperword); | 2618 | diag = kdbgetularg(argv[2], &bytesperword); |
2622 | if (diag) | 2619 | if (diag) |
@@ -2649,46 +2646,25 @@ static int kdb_per_cpu(int argc, const char **argv) | |||
2649 | #define KDB_PCU(cpu) 0 | 2646 | #define KDB_PCU(cpu) 0 |
2650 | #endif | 2647 | #endif |
2651 | #endif | 2648 | #endif |
2652 | |||
2653 | for_each_online_cpu(cpu) { | 2649 | for_each_online_cpu(cpu) { |
2650 | if (KDB_FLAG(CMD_INTERRUPT)) | ||
2651 | return 0; | ||
2652 | |||
2654 | if (whichcpu != ~0UL && whichcpu != cpu) | 2653 | if (whichcpu != ~0UL && whichcpu != cpu) |
2655 | continue; | 2654 | continue; |
2656 | addr = symtab.sym_start + KDB_PCU(cpu); | 2655 | addr = symaddr + KDB_PCU(cpu); |
2657 | diag = kdb_getword(&val, addr, bytesperword); | 2656 | diag = kdb_getword(&val, addr, bytesperword); |
2658 | if (diag) { | 2657 | if (diag) { |
2659 | kdb_printf("%5d " kdb_bfd_vma_fmt0 " - unable to " | 2658 | kdb_printf("%5d " kdb_bfd_vma_fmt0 " - unable to " |
2660 | "read, diag=%d\n", cpu, addr, diag); | 2659 | "read, diag=%d\n", cpu, addr, diag); |
2661 | continue; | 2660 | continue; |
2662 | } | 2661 | } |
2663 | #ifdef CONFIG_SMP | ||
2664 | if (!val) { | ||
2665 | cpu_set(cpu, suppress); | ||
2666 | continue; | ||
2667 | } | ||
2668 | #endif /* CONFIG_SMP */ | ||
2669 | kdb_printf("%5d ", cpu); | 2662 | kdb_printf("%5d ", cpu); |
2670 | kdb_md_line(fmtstr, addr, | 2663 | kdb_md_line(fmtstr, addr, |
2671 | bytesperword == KDB_WORD_SIZE, | 2664 | bytesperword == KDB_WORD_SIZE, |
2672 | 1, bytesperword, 1, 1, 0); | 2665 | 1, bytesperword, 1, 1, 0); |
2673 | } | 2666 | } |
2674 | if (cpus_weight(suppress) == 0) | ||
2675 | return 0; | ||
2676 | kdb_printf("Zero suppressed cpu(s):"); | ||
2677 | for (cpu = first_cpu(suppress); cpu < num_possible_cpus(); | ||
2678 | cpu = next_cpu(cpu, suppress)) { | ||
2679 | kdb_printf(" %d", cpu); | ||
2680 | if (cpu == num_possible_cpus() - 1 || | ||
2681 | next_cpu(cpu, suppress) != cpu + 1) | ||
2682 | continue; | ||
2683 | while (cpu < num_possible_cpus() && | ||
2684 | next_cpu(cpu, suppress) == cpu + 1) | ||
2685 | ++cpu; | ||
2686 | kdb_printf("-%d", cpu); | ||
2687 | } | ||
2688 | kdb_printf("\n"); | ||
2689 | |||
2690 | #undef KDB_PCU | 2667 | #undef KDB_PCU |
2691 | |||
2692 | return 0; | 2668 | return 0; |
2693 | } | 2669 | } |
2694 | 2670 | ||
diff --git a/kernel/exit.c b/kernel/exit.c index 894179a32ec1..21aa7b3001fb 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
@@ -96,6 +96,14 @@ static void __exit_signal(struct task_struct *tsk) | |||
96 | sig->tty = NULL; | 96 | sig->tty = NULL; |
97 | } else { | 97 | } else { |
98 | /* | 98 | /* |
99 | * This can only happen if the caller is de_thread(). | ||
100 | * FIXME: this is the temporary hack, we should teach | ||
101 | * posix-cpu-timers to handle this case correctly. | ||
102 | */ | ||
103 | if (unlikely(has_group_leader_pid(tsk))) | ||
104 | posix_cpu_timers_exit_group(tsk); | ||
105 | |||
106 | /* | ||
99 | * If there is any task waiting for the group exit | 107 | * If there is any task waiting for the group exit |
100 | * then notify it: | 108 | * then notify it: |
101 | */ | 109 | */ |
@@ -703,6 +711,8 @@ static void exit_mm(struct task_struct * tsk) | |||
703 | * space. | 711 | * space. |
704 | */ | 712 | */ |
705 | static struct task_struct *find_new_reaper(struct task_struct *father) | 713 | static struct task_struct *find_new_reaper(struct task_struct *father) |
714 | __releases(&tasklist_lock) | ||
715 | __acquires(&tasklist_lock) | ||
706 | { | 716 | { |
707 | struct pid_namespace *pid_ns = task_active_pid_ns(father); | 717 | struct pid_namespace *pid_ns = task_active_pid_ns(father); |
708 | struct task_struct *thread; | 718 | struct task_struct *thread; |
diff --git a/kernel/fork.c b/kernel/fork.c index e87aaaaf5131..3b159c5991b7 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -908,6 +908,8 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) | |||
908 | sig->oom_adj = current->signal->oom_adj; | 908 | sig->oom_adj = current->signal->oom_adj; |
909 | sig->oom_score_adj = current->signal->oom_score_adj; | 909 | sig->oom_score_adj = current->signal->oom_score_adj; |
910 | 910 | ||
911 | mutex_init(&sig->cred_guard_mutex); | ||
912 | |||
911 | return 0; | 913 | return 0; |
912 | } | 914 | } |
913 | 915 | ||
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index 9d917ff72675..9988d03797f5 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c | |||
@@ -393,3 +393,18 @@ unsigned int kstat_irqs_cpu(unsigned int irq, int cpu) | |||
393 | struct irq_desc *desc = irq_to_desc(irq); | 393 | struct irq_desc *desc = irq_to_desc(irq); |
394 | return desc ? desc->kstat_irqs[cpu] : 0; | 394 | return desc ? desc->kstat_irqs[cpu] : 0; |
395 | } | 395 | } |
396 | |||
397 | #ifdef CONFIG_GENERIC_HARDIRQS | ||
398 | unsigned int kstat_irqs(unsigned int irq) | ||
399 | { | ||
400 | struct irq_desc *desc = irq_to_desc(irq); | ||
401 | int cpu; | ||
402 | int sum = 0; | ||
403 | |||
404 | if (!desc) | ||
405 | return 0; | ||
406 | for_each_possible_cpu(cpu) | ||
407 | sum += desc->kstat_irqs[cpu]; | ||
408 | return sum; | ||
409 | } | ||
410 | #endif /* CONFIG_GENERIC_HARDIRQS */ | ||
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 644e8d5fa367..5f92acc5f952 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c | |||
@@ -324,6 +324,10 @@ void enable_irq(unsigned int irq) | |||
324 | if (!desc) | 324 | if (!desc) |
325 | return; | 325 | return; |
326 | 326 | ||
327 | if (WARN(!desc->irq_data.chip || !desc->irq_data.chip->irq_enable, | ||
328 | KERN_ERR "enable_irq before setup/request_irq: irq %u\n", irq)) | ||
329 | return; | ||
330 | |||
327 | chip_bus_lock(desc); | 331 | chip_bus_lock(desc); |
328 | raw_spin_lock_irqsave(&desc->lock, flags); | 332 | raw_spin_lock_irqsave(&desc->lock, flags); |
329 | __enable_irq(desc, irq, false); | 333 | __enable_irq(desc, irq, false); |
diff --git a/kernel/jump_label.c b/kernel/jump_label.c index 7be868bf25c6..3b79bd938330 100644 --- a/kernel/jump_label.c +++ b/kernel/jump_label.c | |||
@@ -39,6 +39,16 @@ struct jump_label_module_entry { | |||
39 | struct module *mod; | 39 | struct module *mod; |
40 | }; | 40 | }; |
41 | 41 | ||
42 | void jump_label_lock(void) | ||
43 | { | ||
44 | mutex_lock(&jump_label_mutex); | ||
45 | } | ||
46 | |||
47 | void jump_label_unlock(void) | ||
48 | { | ||
49 | mutex_unlock(&jump_label_mutex); | ||
50 | } | ||
51 | |||
42 | static int jump_label_cmp(const void *a, const void *b) | 52 | static int jump_label_cmp(const void *a, const void *b) |
43 | { | 53 | { |
44 | const struct jump_entry *jea = a; | 54 | const struct jump_entry *jea = a; |
@@ -152,7 +162,7 @@ void jump_label_update(unsigned long key, enum jump_label_type type) | |||
152 | struct jump_label_module_entry *e_module; | 162 | struct jump_label_module_entry *e_module; |
153 | int count; | 163 | int count; |
154 | 164 | ||
155 | mutex_lock(&jump_label_mutex); | 165 | jump_label_lock(); |
156 | entry = get_jump_label_entry((jump_label_t)key); | 166 | entry = get_jump_label_entry((jump_label_t)key); |
157 | if (entry) { | 167 | if (entry) { |
158 | count = entry->nr_entries; | 168 | count = entry->nr_entries; |
@@ -168,13 +178,14 @@ void jump_label_update(unsigned long key, enum jump_label_type type) | |||
168 | count = e_module->nr_entries; | 178 | count = e_module->nr_entries; |
169 | iter = e_module->table; | 179 | iter = e_module->table; |
170 | while (count--) { | 180 | while (count--) { |
171 | if (kernel_text_address(iter->code)) | 181 | if (iter->key && |
182 | kernel_text_address(iter->code)) | ||
172 | arch_jump_label_transform(iter, type); | 183 | arch_jump_label_transform(iter, type); |
173 | iter++; | 184 | iter++; |
174 | } | 185 | } |
175 | } | 186 | } |
176 | } | 187 | } |
177 | mutex_unlock(&jump_label_mutex); | 188 | jump_label_unlock(); |
178 | } | 189 | } |
179 | 190 | ||
180 | static int addr_conflict(struct jump_entry *entry, void *start, void *end) | 191 | static int addr_conflict(struct jump_entry *entry, void *start, void *end) |
@@ -231,6 +242,7 @@ out: | |||
231 | * overlaps with any of the jump label patch addresses. Code | 242 | * overlaps with any of the jump label patch addresses. Code |
232 | * that wants to modify kernel text should first verify that | 243 | * that wants to modify kernel text should first verify that |
233 | * it does not overlap with any of the jump label addresses. | 244 | * it does not overlap with any of the jump label addresses. |
245 | * Caller must hold jump_label_mutex. | ||
234 | * | 246 | * |
235 | * returns 1 if there is an overlap, 0 otherwise | 247 | * returns 1 if there is an overlap, 0 otherwise |
236 | */ | 248 | */ |
@@ -241,7 +253,6 @@ int jump_label_text_reserved(void *start, void *end) | |||
241 | struct jump_entry *iter_stop = __start___jump_table; | 253 | struct jump_entry *iter_stop = __start___jump_table; |
242 | int conflict = 0; | 254 | int conflict = 0; |
243 | 255 | ||
244 | mutex_lock(&jump_label_mutex); | ||
245 | iter = iter_start; | 256 | iter = iter_start; |
246 | while (iter < iter_stop) { | 257 | while (iter < iter_stop) { |
247 | if (addr_conflict(iter, start, end)) { | 258 | if (addr_conflict(iter, start, end)) { |
@@ -256,10 +267,16 @@ int jump_label_text_reserved(void *start, void *end) | |||
256 | conflict = module_conflict(start, end); | 267 | conflict = module_conflict(start, end); |
257 | #endif | 268 | #endif |
258 | out: | 269 | out: |
259 | mutex_unlock(&jump_label_mutex); | ||
260 | return conflict; | 270 | return conflict; |
261 | } | 271 | } |
262 | 272 | ||
273 | /* | ||
274 | * Not all archs need this. | ||
275 | */ | ||
276 | void __weak arch_jump_label_text_poke_early(jump_label_t addr) | ||
277 | { | ||
278 | } | ||
279 | |||
263 | static __init int init_jump_label(void) | 280 | static __init int init_jump_label(void) |
264 | { | 281 | { |
265 | int ret; | 282 | int ret; |
@@ -267,7 +284,7 @@ static __init int init_jump_label(void) | |||
267 | struct jump_entry *iter_stop = __stop___jump_table; | 284 | struct jump_entry *iter_stop = __stop___jump_table; |
268 | struct jump_entry *iter; | 285 | struct jump_entry *iter; |
269 | 286 | ||
270 | mutex_lock(&jump_label_mutex); | 287 | jump_label_lock(); |
271 | ret = build_jump_label_hashtable(__start___jump_table, | 288 | ret = build_jump_label_hashtable(__start___jump_table, |
272 | __stop___jump_table); | 289 | __stop___jump_table); |
273 | iter = iter_start; | 290 | iter = iter_start; |
@@ -275,7 +292,7 @@ static __init int init_jump_label(void) | |||
275 | arch_jump_label_text_poke_early(iter->code); | 292 | arch_jump_label_text_poke_early(iter->code); |
276 | iter++; | 293 | iter++; |
277 | } | 294 | } |
278 | mutex_unlock(&jump_label_mutex); | 295 | jump_label_unlock(); |
279 | return ret; | 296 | return ret; |
280 | } | 297 | } |
281 | early_initcall(init_jump_label); | 298 | early_initcall(init_jump_label); |
@@ -366,6 +383,39 @@ static void remove_jump_label_module(struct module *mod) | |||
366 | } | 383 | } |
367 | } | 384 | } |
368 | 385 | ||
386 | static void remove_jump_label_module_init(struct module *mod) | ||
387 | { | ||
388 | struct hlist_head *head; | ||
389 | struct hlist_node *node, *node_next, *module_node, *module_node_next; | ||
390 | struct jump_label_entry *e; | ||
391 | struct jump_label_module_entry *e_module; | ||
392 | struct jump_entry *iter; | ||
393 | int i, count; | ||
394 | |||
395 | /* if the module doesn't have jump label entries, just return */ | ||
396 | if (!mod->num_jump_entries) | ||
397 | return; | ||
398 | |||
399 | for (i = 0; i < JUMP_LABEL_TABLE_SIZE; i++) { | ||
400 | head = &jump_label_table[i]; | ||
401 | hlist_for_each_entry_safe(e, node, node_next, head, hlist) { | ||
402 | hlist_for_each_entry_safe(e_module, module_node, | ||
403 | module_node_next, | ||
404 | &(e->modules), hlist) { | ||
405 | if (e_module->mod != mod) | ||
406 | continue; | ||
407 | count = e_module->nr_entries; | ||
408 | iter = e_module->table; | ||
409 | while (count--) { | ||
410 | if (within_module_init(iter->code, mod)) | ||
411 | iter->key = 0; | ||
412 | iter++; | ||
413 | } | ||
414 | } | ||
415 | } | ||
416 | } | ||
417 | } | ||
418 | |||
369 | static int | 419 | static int |
370 | jump_label_module_notify(struct notifier_block *self, unsigned long val, | 420 | jump_label_module_notify(struct notifier_block *self, unsigned long val, |
371 | void *data) | 421 | void *data) |
@@ -375,16 +425,21 @@ jump_label_module_notify(struct notifier_block *self, unsigned long val, | |||
375 | 425 | ||
376 | switch (val) { | 426 | switch (val) { |
377 | case MODULE_STATE_COMING: | 427 | case MODULE_STATE_COMING: |
378 | mutex_lock(&jump_label_mutex); | 428 | jump_label_lock(); |
379 | ret = add_jump_label_module(mod); | 429 | ret = add_jump_label_module(mod); |
380 | if (ret) | 430 | if (ret) |
381 | remove_jump_label_module(mod); | 431 | remove_jump_label_module(mod); |
382 | mutex_unlock(&jump_label_mutex); | 432 | jump_label_unlock(); |
383 | break; | 433 | break; |
384 | case MODULE_STATE_GOING: | 434 | case MODULE_STATE_GOING: |
385 | mutex_lock(&jump_label_mutex); | 435 | jump_label_lock(); |
386 | remove_jump_label_module(mod); | 436 | remove_jump_label_module(mod); |
387 | mutex_unlock(&jump_label_mutex); | 437 | jump_label_unlock(); |
438 | break; | ||
439 | case MODULE_STATE_LIVE: | ||
440 | jump_label_lock(); | ||
441 | remove_jump_label_module_init(mod); | ||
442 | jump_label_unlock(); | ||
388 | break; | 443 | break; |
389 | } | 444 | } |
390 | return ret; | 445 | return ret; |
diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 56a891914273..9737a76e106f 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c | |||
@@ -74,7 +74,8 @@ static struct hlist_head kretprobe_inst_table[KPROBE_TABLE_SIZE]; | |||
74 | /* NOTE: change this value only with kprobe_mutex held */ | 74 | /* NOTE: change this value only with kprobe_mutex held */ |
75 | static bool kprobes_all_disarmed; | 75 | static bool kprobes_all_disarmed; |
76 | 76 | ||
77 | static DEFINE_MUTEX(kprobe_mutex); /* Protects kprobe_table */ | 77 | /* This protects kprobe_table and optimizing_list */ |
78 | static DEFINE_MUTEX(kprobe_mutex); | ||
78 | static DEFINE_PER_CPU(struct kprobe *, kprobe_instance) = NULL; | 79 | static DEFINE_PER_CPU(struct kprobe *, kprobe_instance) = NULL; |
79 | static struct { | 80 | static struct { |
80 | spinlock_t lock ____cacheline_aligned_in_smp; | 81 | spinlock_t lock ____cacheline_aligned_in_smp; |
@@ -595,6 +596,7 @@ static __kprobes void try_to_optimize_kprobe(struct kprobe *p) | |||
595 | } | 596 | } |
596 | 597 | ||
597 | #ifdef CONFIG_SYSCTL | 598 | #ifdef CONFIG_SYSCTL |
599 | /* This should be called with kprobe_mutex locked */ | ||
598 | static void __kprobes optimize_all_kprobes(void) | 600 | static void __kprobes optimize_all_kprobes(void) |
599 | { | 601 | { |
600 | struct hlist_head *head; | 602 | struct hlist_head *head; |
@@ -607,17 +609,16 @@ static void __kprobes optimize_all_kprobes(void) | |||
607 | return; | 609 | return; |
608 | 610 | ||
609 | kprobes_allow_optimization = true; | 611 | kprobes_allow_optimization = true; |
610 | mutex_lock(&text_mutex); | ||
611 | for (i = 0; i < KPROBE_TABLE_SIZE; i++) { | 612 | for (i = 0; i < KPROBE_TABLE_SIZE; i++) { |
612 | head = &kprobe_table[i]; | 613 | head = &kprobe_table[i]; |
613 | hlist_for_each_entry_rcu(p, node, head, hlist) | 614 | hlist_for_each_entry_rcu(p, node, head, hlist) |
614 | if (!kprobe_disabled(p)) | 615 | if (!kprobe_disabled(p)) |
615 | optimize_kprobe(p); | 616 | optimize_kprobe(p); |
616 | } | 617 | } |
617 | mutex_unlock(&text_mutex); | ||
618 | printk(KERN_INFO "Kprobes globally optimized\n"); | 618 | printk(KERN_INFO "Kprobes globally optimized\n"); |
619 | } | 619 | } |
620 | 620 | ||
621 | /* This should be called with kprobe_mutex locked */ | ||
621 | static void __kprobes unoptimize_all_kprobes(void) | 622 | static void __kprobes unoptimize_all_kprobes(void) |
622 | { | 623 | { |
623 | struct hlist_head *head; | 624 | struct hlist_head *head; |
@@ -1144,14 +1145,13 @@ int __kprobes register_kprobe(struct kprobe *p) | |||
1144 | if (ret) | 1145 | if (ret) |
1145 | return ret; | 1146 | return ret; |
1146 | 1147 | ||
1148 | jump_label_lock(); | ||
1147 | preempt_disable(); | 1149 | preempt_disable(); |
1148 | if (!kernel_text_address((unsigned long) p->addr) || | 1150 | if (!kernel_text_address((unsigned long) p->addr) || |
1149 | in_kprobes_functions((unsigned long) p->addr) || | 1151 | in_kprobes_functions((unsigned long) p->addr) || |
1150 | ftrace_text_reserved(p->addr, p->addr) || | 1152 | ftrace_text_reserved(p->addr, p->addr) || |
1151 | jump_label_text_reserved(p->addr, p->addr)) { | 1153 | jump_label_text_reserved(p->addr, p->addr)) |
1152 | preempt_enable(); | 1154 | goto fail_with_jump_label; |
1153 | return -EINVAL; | ||
1154 | } | ||
1155 | 1155 | ||
1156 | /* User can pass only KPROBE_FLAG_DISABLED to register_kprobe */ | 1156 | /* User can pass only KPROBE_FLAG_DISABLED to register_kprobe */ |
1157 | p->flags &= KPROBE_FLAG_DISABLED; | 1157 | p->flags &= KPROBE_FLAG_DISABLED; |
@@ -1165,10 +1165,9 @@ int __kprobes register_kprobe(struct kprobe *p) | |||
1165 | * We must hold a refcount of the probed module while updating | 1165 | * We must hold a refcount of the probed module while updating |
1166 | * its code to prohibit unexpected unloading. | 1166 | * its code to prohibit unexpected unloading. |
1167 | */ | 1167 | */ |
1168 | if (unlikely(!try_module_get(probed_mod))) { | 1168 | if (unlikely(!try_module_get(probed_mod))) |
1169 | preempt_enable(); | 1169 | goto fail_with_jump_label; |
1170 | return -EINVAL; | 1170 | |
1171 | } | ||
1172 | /* | 1171 | /* |
1173 | * If the module freed .init.text, we couldn't insert | 1172 | * If the module freed .init.text, we couldn't insert |
1174 | * kprobes in there. | 1173 | * kprobes in there. |
@@ -1176,16 +1175,18 @@ int __kprobes register_kprobe(struct kprobe *p) | |||
1176 | if (within_module_init((unsigned long)p->addr, probed_mod) && | 1175 | if (within_module_init((unsigned long)p->addr, probed_mod) && |
1177 | probed_mod->state != MODULE_STATE_COMING) { | 1176 | probed_mod->state != MODULE_STATE_COMING) { |
1178 | module_put(probed_mod); | 1177 | module_put(probed_mod); |
1179 | preempt_enable(); | 1178 | goto fail_with_jump_label; |
1180 | return -EINVAL; | ||
1181 | } | 1179 | } |
1182 | } | 1180 | } |
1183 | preempt_enable(); | 1181 | preempt_enable(); |
1182 | jump_label_unlock(); | ||
1184 | 1183 | ||
1185 | p->nmissed = 0; | 1184 | p->nmissed = 0; |
1186 | INIT_LIST_HEAD(&p->list); | 1185 | INIT_LIST_HEAD(&p->list); |
1187 | mutex_lock(&kprobe_mutex); | 1186 | mutex_lock(&kprobe_mutex); |
1188 | 1187 | ||
1188 | jump_label_lock(); /* needed to call jump_label_text_reserved() */ | ||
1189 | |||
1189 | get_online_cpus(); /* For avoiding text_mutex deadlock. */ | 1190 | get_online_cpus(); /* For avoiding text_mutex deadlock. */ |
1190 | mutex_lock(&text_mutex); | 1191 | mutex_lock(&text_mutex); |
1191 | 1192 | ||
@@ -1213,12 +1214,18 @@ int __kprobes register_kprobe(struct kprobe *p) | |||
1213 | out: | 1214 | out: |
1214 | mutex_unlock(&text_mutex); | 1215 | mutex_unlock(&text_mutex); |
1215 | put_online_cpus(); | 1216 | put_online_cpus(); |
1217 | jump_label_unlock(); | ||
1216 | mutex_unlock(&kprobe_mutex); | 1218 | mutex_unlock(&kprobe_mutex); |
1217 | 1219 | ||
1218 | if (probed_mod) | 1220 | if (probed_mod) |
1219 | module_put(probed_mod); | 1221 | module_put(probed_mod); |
1220 | 1222 | ||
1221 | return ret; | 1223 | return ret; |
1224 | |||
1225 | fail_with_jump_label: | ||
1226 | preempt_enable(); | ||
1227 | jump_label_unlock(); | ||
1228 | return -EINVAL; | ||
1222 | } | 1229 | } |
1223 | EXPORT_SYMBOL_GPL(register_kprobe); | 1230 | EXPORT_SYMBOL_GPL(register_kprobe); |
1224 | 1231 | ||
diff --git a/kernel/latencytop.c b/kernel/latencytop.c index 877fb306d415..17110a4a4fc2 100644 --- a/kernel/latencytop.c +++ b/kernel/latencytop.c | |||
@@ -194,14 +194,7 @@ __account_scheduler_latency(struct task_struct *tsk, int usecs, int inter) | |||
194 | 194 | ||
195 | account_global_scheduler_latency(tsk, &lat); | 195 | account_global_scheduler_latency(tsk, &lat); |
196 | 196 | ||
197 | /* | 197 | for (i = 0; i < tsk->latency_record_count; i++) { |
198 | * short term hack; if we're > 32 we stop; future we recycle: | ||
199 | */ | ||
200 | tsk->latency_record_count++; | ||
201 | if (tsk->latency_record_count >= LT_SAVECOUNT) | ||
202 | goto out_unlock; | ||
203 | |||
204 | for (i = 0; i < LT_SAVECOUNT; i++) { | ||
205 | struct latency_record *mylat; | 198 | struct latency_record *mylat; |
206 | int same = 1; | 199 | int same = 1; |
207 | 200 | ||
@@ -227,8 +220,14 @@ __account_scheduler_latency(struct task_struct *tsk, int usecs, int inter) | |||
227 | } | 220 | } |
228 | } | 221 | } |
229 | 222 | ||
223 | /* | ||
224 | * short term hack; if we're > 32 we stop; future we recycle: | ||
225 | */ | ||
226 | if (tsk->latency_record_count >= LT_SAVECOUNT) | ||
227 | goto out_unlock; | ||
228 | |||
230 | /* Allocated a new one: */ | 229 | /* Allocated a new one: */ |
231 | i = tsk->latency_record_count; | 230 | i = tsk->latency_record_count++; |
232 | memcpy(&tsk->latency_record[i], &lat, sizeof(struct latency_record)); | 231 | memcpy(&tsk->latency_record[i], &lat, sizeof(struct latency_record)); |
233 | 232 | ||
234 | out_unlock: | 233 | out_unlock: |
diff --git a/kernel/module.c b/kernel/module.c index 2df46301a7a4..437a74a7524a 100644 --- a/kernel/module.c +++ b/kernel/module.c | |||
@@ -2037,7 +2037,7 @@ static inline void layout_symtab(struct module *mod, struct load_info *info) | |||
2037 | { | 2037 | { |
2038 | } | 2038 | } |
2039 | 2039 | ||
2040 | static void add_kallsyms(struct module *mod, struct load_info *info) | 2040 | static void add_kallsyms(struct module *mod, const struct load_info *info) |
2041 | { | 2041 | { |
2042 | } | 2042 | } |
2043 | #endif /* CONFIG_KALLSYMS */ | 2043 | #endif /* CONFIG_KALLSYMS */ |
diff --git a/kernel/ns_cgroup.c b/kernel/ns_cgroup.c index 2a5dfec8efe0..2c98ad94ba0e 100644 --- a/kernel/ns_cgroup.c +++ b/kernel/ns_cgroup.c | |||
@@ -85,6 +85,14 @@ static struct cgroup_subsys_state *ns_create(struct cgroup_subsys *ss, | |||
85 | return ERR_PTR(-EPERM); | 85 | return ERR_PTR(-EPERM); |
86 | if (!cgroup_is_descendant(cgroup, current)) | 86 | if (!cgroup_is_descendant(cgroup, current)) |
87 | return ERR_PTR(-EPERM); | 87 | return ERR_PTR(-EPERM); |
88 | if (test_bit(CGRP_CLONE_CHILDREN, &cgroup->flags)) { | ||
89 | printk("ns_cgroup can't be created with parent " | ||
90 | "'clone_children' set.\n"); | ||
91 | return ERR_PTR(-EINVAL); | ||
92 | } | ||
93 | |||
94 | printk_once("ns_cgroup deprecated: consider using the " | ||
95 | "'clone_children' flag without the ns_cgroup.\n"); | ||
88 | 96 | ||
89 | ns_cgroup = kzalloc(sizeof(*ns_cgroup), GFP_KERNEL); | 97 | ns_cgroup = kzalloc(sizeof(*ns_cgroup), GFP_KERNEL); |
90 | if (!ns_cgroup) | 98 | if (!ns_cgroup) |
diff --git a/kernel/perf_event.c b/kernel/perf_event.c index f309e8014c78..cb6c0d2af68f 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c | |||
@@ -417,8 +417,8 @@ event_filter_match(struct perf_event *event) | |||
417 | return event->cpu == -1 || event->cpu == smp_processor_id(); | 417 | return event->cpu == -1 || event->cpu == smp_processor_id(); |
418 | } | 418 | } |
419 | 419 | ||
420 | static int | 420 | static void |
421 | __event_sched_out(struct perf_event *event, | 421 | event_sched_out(struct perf_event *event, |
422 | struct perf_cpu_context *cpuctx, | 422 | struct perf_cpu_context *cpuctx, |
423 | struct perf_event_context *ctx) | 423 | struct perf_event_context *ctx) |
424 | { | 424 | { |
@@ -437,13 +437,14 @@ __event_sched_out(struct perf_event *event, | |||
437 | } | 437 | } |
438 | 438 | ||
439 | if (event->state != PERF_EVENT_STATE_ACTIVE) | 439 | if (event->state != PERF_EVENT_STATE_ACTIVE) |
440 | return 0; | 440 | return; |
441 | 441 | ||
442 | event->state = PERF_EVENT_STATE_INACTIVE; | 442 | event->state = PERF_EVENT_STATE_INACTIVE; |
443 | if (event->pending_disable) { | 443 | if (event->pending_disable) { |
444 | event->pending_disable = 0; | 444 | event->pending_disable = 0; |
445 | event->state = PERF_EVENT_STATE_OFF; | 445 | event->state = PERF_EVENT_STATE_OFF; |
446 | } | 446 | } |
447 | event->tstamp_stopped = ctx->time; | ||
447 | event->pmu->del(event, 0); | 448 | event->pmu->del(event, 0); |
448 | event->oncpu = -1; | 449 | event->oncpu = -1; |
449 | 450 | ||
@@ -452,19 +453,6 @@ __event_sched_out(struct perf_event *event, | |||
452 | ctx->nr_active--; | 453 | ctx->nr_active--; |
453 | if (event->attr.exclusive || !cpuctx->active_oncpu) | 454 | if (event->attr.exclusive || !cpuctx->active_oncpu) |
454 | cpuctx->exclusive = 0; | 455 | cpuctx->exclusive = 0; |
455 | return 1; | ||
456 | } | ||
457 | |||
458 | static void | ||
459 | event_sched_out(struct perf_event *event, | ||
460 | struct perf_cpu_context *cpuctx, | ||
461 | struct perf_event_context *ctx) | ||
462 | { | ||
463 | int ret; | ||
464 | |||
465 | ret = __event_sched_out(event, cpuctx, ctx); | ||
466 | if (ret) | ||
467 | event->tstamp_stopped = ctx->time; | ||
468 | } | 456 | } |
469 | 457 | ||
470 | static void | 458 | static void |
@@ -664,7 +652,7 @@ retry: | |||
664 | } | 652 | } |
665 | 653 | ||
666 | static int | 654 | static int |
667 | __event_sched_in(struct perf_event *event, | 655 | event_sched_in(struct perf_event *event, |
668 | struct perf_cpu_context *cpuctx, | 656 | struct perf_cpu_context *cpuctx, |
669 | struct perf_event_context *ctx) | 657 | struct perf_event_context *ctx) |
670 | { | 658 | { |
@@ -684,6 +672,10 @@ __event_sched_in(struct perf_event *event, | |||
684 | return -EAGAIN; | 672 | return -EAGAIN; |
685 | } | 673 | } |
686 | 674 | ||
675 | event->tstamp_running += ctx->time - event->tstamp_stopped; | ||
676 | |||
677 | event->shadow_ctx_time = ctx->time - ctx->timestamp; | ||
678 | |||
687 | if (!is_software_event(event)) | 679 | if (!is_software_event(event)) |
688 | cpuctx->active_oncpu++; | 680 | cpuctx->active_oncpu++; |
689 | ctx->nr_active++; | 681 | ctx->nr_active++; |
@@ -694,35 +686,6 @@ __event_sched_in(struct perf_event *event, | |||
694 | return 0; | 686 | return 0; |
695 | } | 687 | } |
696 | 688 | ||
697 | static inline int | ||
698 | event_sched_in(struct perf_event *event, | ||
699 | struct perf_cpu_context *cpuctx, | ||
700 | struct perf_event_context *ctx) | ||
701 | { | ||
702 | int ret = __event_sched_in(event, cpuctx, ctx); | ||
703 | if (ret) | ||
704 | return ret; | ||
705 | event->tstamp_running += ctx->time - event->tstamp_stopped; | ||
706 | return 0; | ||
707 | } | ||
708 | |||
709 | static void | ||
710 | group_commit_event_sched_in(struct perf_event *group_event, | ||
711 | struct perf_cpu_context *cpuctx, | ||
712 | struct perf_event_context *ctx) | ||
713 | { | ||
714 | struct perf_event *event; | ||
715 | u64 now = ctx->time; | ||
716 | |||
717 | group_event->tstamp_running += now - group_event->tstamp_stopped; | ||
718 | /* | ||
719 | * Schedule in siblings as one group (if any): | ||
720 | */ | ||
721 | list_for_each_entry(event, &group_event->sibling_list, group_entry) { | ||
722 | event->tstamp_running += now - event->tstamp_stopped; | ||
723 | } | ||
724 | } | ||
725 | |||
726 | static int | 689 | static int |
727 | group_sched_in(struct perf_event *group_event, | 690 | group_sched_in(struct perf_event *group_event, |
728 | struct perf_cpu_context *cpuctx, | 691 | struct perf_cpu_context *cpuctx, |
@@ -730,19 +693,15 @@ group_sched_in(struct perf_event *group_event, | |||
730 | { | 693 | { |
731 | struct perf_event *event, *partial_group = NULL; | 694 | struct perf_event *event, *partial_group = NULL; |
732 | struct pmu *pmu = group_event->pmu; | 695 | struct pmu *pmu = group_event->pmu; |
696 | u64 now = ctx->time; | ||
697 | bool simulate = false; | ||
733 | 698 | ||
734 | if (group_event->state == PERF_EVENT_STATE_OFF) | 699 | if (group_event->state == PERF_EVENT_STATE_OFF) |
735 | return 0; | 700 | return 0; |
736 | 701 | ||
737 | pmu->start_txn(pmu); | 702 | pmu->start_txn(pmu); |
738 | 703 | ||
739 | /* | 704 | if (event_sched_in(group_event, cpuctx, ctx)) { |
740 | * use __event_sched_in() to delay updating tstamp_running | ||
741 | * until the transaction is committed. In case of failure | ||
742 | * we will keep an unmodified tstamp_running which is a | ||
743 | * requirement to get correct timing information | ||
744 | */ | ||
745 | if (__event_sched_in(group_event, cpuctx, ctx)) { | ||
746 | pmu->cancel_txn(pmu); | 705 | pmu->cancel_txn(pmu); |
747 | return -EAGAIN; | 706 | return -EAGAIN; |
748 | } | 707 | } |
@@ -751,31 +710,42 @@ group_sched_in(struct perf_event *group_event, | |||
751 | * Schedule in siblings as one group (if any): | 710 | * Schedule in siblings as one group (if any): |
752 | */ | 711 | */ |
753 | list_for_each_entry(event, &group_event->sibling_list, group_entry) { | 712 | list_for_each_entry(event, &group_event->sibling_list, group_entry) { |
754 | if (__event_sched_in(event, cpuctx, ctx)) { | 713 | if (event_sched_in(event, cpuctx, ctx)) { |
755 | partial_group = event; | 714 | partial_group = event; |
756 | goto group_error; | 715 | goto group_error; |
757 | } | 716 | } |
758 | } | 717 | } |
759 | 718 | ||
760 | if (!pmu->commit_txn(pmu)) { | 719 | if (!pmu->commit_txn(pmu)) |
761 | /* commit tstamp_running */ | ||
762 | group_commit_event_sched_in(group_event, cpuctx, ctx); | ||
763 | return 0; | 720 | return 0; |
764 | } | 721 | |
765 | group_error: | 722 | group_error: |
766 | /* | 723 | /* |
767 | * Groups can be scheduled in as one unit only, so undo any | 724 | * Groups can be scheduled in as one unit only, so undo any |
768 | * partial group before returning: | 725 | * partial group before returning: |
726 | * The events up to the failed event are scheduled out normally, | ||
727 | * tstamp_stopped will be updated. | ||
769 | * | 728 | * |
770 | * use __event_sched_out() to avoid updating tstamp_stopped | 729 | * The failed events and the remaining siblings need to have |
771 | * because the event never actually ran | 730 | * their timings updated as if they had gone thru event_sched_in() |
731 | * and event_sched_out(). This is required to get consistent timings | ||
732 | * across the group. This also takes care of the case where the group | ||
733 | * could never be scheduled by ensuring tstamp_stopped is set to mark | ||
734 | * the time the event was actually stopped, such that time delta | ||
735 | * calculation in update_event_times() is correct. | ||
772 | */ | 736 | */ |
773 | list_for_each_entry(event, &group_event->sibling_list, group_entry) { | 737 | list_for_each_entry(event, &group_event->sibling_list, group_entry) { |
774 | if (event == partial_group) | 738 | if (event == partial_group) |
775 | break; | 739 | simulate = true; |
776 | __event_sched_out(event, cpuctx, ctx); | 740 | |
741 | if (simulate) { | ||
742 | event->tstamp_running += now - event->tstamp_stopped; | ||
743 | event->tstamp_stopped = now; | ||
744 | } else { | ||
745 | event_sched_out(event, cpuctx, ctx); | ||
746 | } | ||
777 | } | 747 | } |
778 | __event_sched_out(group_event, cpuctx, ctx); | 748 | event_sched_out(group_event, cpuctx, ctx); |
779 | 749 | ||
780 | pmu->cancel_txn(pmu); | 750 | pmu->cancel_txn(pmu); |
781 | 751 | ||
@@ -3428,7 +3398,8 @@ static u32 perf_event_tid(struct perf_event *event, struct task_struct *p) | |||
3428 | } | 3398 | } |
3429 | 3399 | ||
3430 | static void perf_output_read_one(struct perf_output_handle *handle, | 3400 | static void perf_output_read_one(struct perf_output_handle *handle, |
3431 | struct perf_event *event) | 3401 | struct perf_event *event, |
3402 | u64 enabled, u64 running) | ||
3432 | { | 3403 | { |
3433 | u64 read_format = event->attr.read_format; | 3404 | u64 read_format = event->attr.read_format; |
3434 | u64 values[4]; | 3405 | u64 values[4]; |
@@ -3436,11 +3407,11 @@ static void perf_output_read_one(struct perf_output_handle *handle, | |||
3436 | 3407 | ||
3437 | values[n++] = perf_event_count(event); | 3408 | values[n++] = perf_event_count(event); |
3438 | if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) { | 3409 | if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) { |
3439 | values[n++] = event->total_time_enabled + | 3410 | values[n++] = enabled + |
3440 | atomic64_read(&event->child_total_time_enabled); | 3411 | atomic64_read(&event->child_total_time_enabled); |
3441 | } | 3412 | } |
3442 | if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) { | 3413 | if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) { |
3443 | values[n++] = event->total_time_running + | 3414 | values[n++] = running + |
3444 | atomic64_read(&event->child_total_time_running); | 3415 | atomic64_read(&event->child_total_time_running); |
3445 | } | 3416 | } |
3446 | if (read_format & PERF_FORMAT_ID) | 3417 | if (read_format & PERF_FORMAT_ID) |
@@ -3453,7 +3424,8 @@ static void perf_output_read_one(struct perf_output_handle *handle, | |||
3453 | * XXX PERF_FORMAT_GROUP vs inherited events seems difficult. | 3424 | * XXX PERF_FORMAT_GROUP vs inherited events seems difficult. |
3454 | */ | 3425 | */ |
3455 | static void perf_output_read_group(struct perf_output_handle *handle, | 3426 | static void perf_output_read_group(struct perf_output_handle *handle, |
3456 | struct perf_event *event) | 3427 | struct perf_event *event, |
3428 | u64 enabled, u64 running) | ||
3457 | { | 3429 | { |
3458 | struct perf_event *leader = event->group_leader, *sub; | 3430 | struct perf_event *leader = event->group_leader, *sub; |
3459 | u64 read_format = event->attr.read_format; | 3431 | u64 read_format = event->attr.read_format; |
@@ -3463,10 +3435,10 @@ static void perf_output_read_group(struct perf_output_handle *handle, | |||
3463 | values[n++] = 1 + leader->nr_siblings; | 3435 | values[n++] = 1 + leader->nr_siblings; |
3464 | 3436 | ||
3465 | if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) | 3437 | if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) |
3466 | values[n++] = leader->total_time_enabled; | 3438 | values[n++] = enabled; |
3467 | 3439 | ||
3468 | if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) | 3440 | if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) |
3469 | values[n++] = leader->total_time_running; | 3441 | values[n++] = running; |
3470 | 3442 | ||
3471 | if (leader != event) | 3443 | if (leader != event) |
3472 | leader->pmu->read(leader); | 3444 | leader->pmu->read(leader); |
@@ -3491,13 +3463,35 @@ static void perf_output_read_group(struct perf_output_handle *handle, | |||
3491 | } | 3463 | } |
3492 | } | 3464 | } |
3493 | 3465 | ||
3466 | #define PERF_FORMAT_TOTAL_TIMES (PERF_FORMAT_TOTAL_TIME_ENABLED|\ | ||
3467 | PERF_FORMAT_TOTAL_TIME_RUNNING) | ||
3468 | |||
3494 | static void perf_output_read(struct perf_output_handle *handle, | 3469 | static void perf_output_read(struct perf_output_handle *handle, |
3495 | struct perf_event *event) | 3470 | struct perf_event *event) |
3496 | { | 3471 | { |
3472 | u64 enabled = 0, running = 0, now, ctx_time; | ||
3473 | u64 read_format = event->attr.read_format; | ||
3474 | |||
3475 | /* | ||
3476 | * compute total_time_enabled, total_time_running | ||
3477 | * based on snapshot values taken when the event | ||
3478 | * was last scheduled in. | ||
3479 | * | ||
3480 | * we cannot simply called update_context_time() | ||
3481 | * because of locking issue as we are called in | ||
3482 | * NMI context | ||
3483 | */ | ||
3484 | if (read_format & PERF_FORMAT_TOTAL_TIMES) { | ||
3485 | now = perf_clock(); | ||
3486 | ctx_time = event->shadow_ctx_time + now; | ||
3487 | enabled = ctx_time - event->tstamp_enabled; | ||
3488 | running = ctx_time - event->tstamp_running; | ||
3489 | } | ||
3490 | |||
3497 | if (event->attr.read_format & PERF_FORMAT_GROUP) | 3491 | if (event->attr.read_format & PERF_FORMAT_GROUP) |
3498 | perf_output_read_group(handle, event); | 3492 | perf_output_read_group(handle, event, enabled, running); |
3499 | else | 3493 | else |
3500 | perf_output_read_one(handle, event); | 3494 | perf_output_read_one(handle, event, enabled, running); |
3501 | } | 3495 | } |
3502 | 3496 | ||
3503 | void perf_output_sample(struct perf_output_handle *handle, | 3497 | void perf_output_sample(struct perf_output_handle *handle, |
diff --git a/kernel/printk.c b/kernel/printk.c index b2ebaee8c377..38e7d5868d60 100644 --- a/kernel/printk.c +++ b/kernel/printk.c | |||
@@ -261,6 +261,12 @@ static inline void boot_delay_msec(void) | |||
261 | } | 261 | } |
262 | #endif | 262 | #endif |
263 | 263 | ||
264 | #ifdef CONFIG_SECURITY_DMESG_RESTRICT | ||
265 | int dmesg_restrict = 1; | ||
266 | #else | ||
267 | int dmesg_restrict; | ||
268 | #endif | ||
269 | |||
264 | int do_syslog(int type, char __user *buf, int len, bool from_file) | 270 | int do_syslog(int type, char __user *buf, int len, bool from_file) |
265 | { | 271 | { |
266 | unsigned i, j, limit, count; | 272 | unsigned i, j, limit, count; |
diff --git a/kernel/ptrace.c b/kernel/ptrace.c index f34d798ef4a2..99bbaa3e5b0d 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c | |||
@@ -181,7 +181,7 @@ int ptrace_attach(struct task_struct *task) | |||
181 | * under ptrace. | 181 | * under ptrace. |
182 | */ | 182 | */ |
183 | retval = -ERESTARTNOINTR; | 183 | retval = -ERESTARTNOINTR; |
184 | if (mutex_lock_interruptible(&task->cred_guard_mutex)) | 184 | if (mutex_lock_interruptible(&task->signal->cred_guard_mutex)) |
185 | goto out; | 185 | goto out; |
186 | 186 | ||
187 | task_lock(task); | 187 | task_lock(task); |
@@ -208,7 +208,7 @@ int ptrace_attach(struct task_struct *task) | |||
208 | unlock_tasklist: | 208 | unlock_tasklist: |
209 | write_unlock_irq(&tasklist_lock); | 209 | write_unlock_irq(&tasklist_lock); |
210 | unlock_creds: | 210 | unlock_creds: |
211 | mutex_unlock(&task->cred_guard_mutex); | 211 | mutex_unlock(&task->signal->cred_guard_mutex); |
212 | out: | 212 | out: |
213 | return retval; | 213 | return retval; |
214 | } | 214 | } |
@@ -329,6 +329,8 @@ int ptrace_detach(struct task_struct *child, unsigned int data) | |||
329 | * and reacquire the lock. | 329 | * and reacquire the lock. |
330 | */ | 330 | */ |
331 | void exit_ptrace(struct task_struct *tracer) | 331 | void exit_ptrace(struct task_struct *tracer) |
332 | __releases(&tasklist_lock) | ||
333 | __acquires(&tasklist_lock) | ||
332 | { | 334 | { |
333 | struct task_struct *p, *n; | 335 | struct task_struct *p, *n; |
334 | LIST_HEAD(ptrace_dead); | 336 | LIST_HEAD(ptrace_dead); |
@@ -402,7 +404,7 @@ int ptrace_writedata(struct task_struct *tsk, char __user *src, unsigned long ds | |||
402 | return copied; | 404 | return copied; |
403 | } | 405 | } |
404 | 406 | ||
405 | static int ptrace_setoptions(struct task_struct *child, long data) | 407 | static int ptrace_setoptions(struct task_struct *child, unsigned long data) |
406 | { | 408 | { |
407 | child->ptrace &= ~PT_TRACE_MASK; | 409 | child->ptrace &= ~PT_TRACE_MASK; |
408 | 410 | ||
@@ -481,7 +483,8 @@ static int ptrace_setsiginfo(struct task_struct *child, const siginfo_t *info) | |||
481 | #define is_sysemu_singlestep(request) 0 | 483 | #define is_sysemu_singlestep(request) 0 |
482 | #endif | 484 | #endif |
483 | 485 | ||
484 | static int ptrace_resume(struct task_struct *child, long request, long data) | 486 | static int ptrace_resume(struct task_struct *child, long request, |
487 | unsigned long data) | ||
485 | { | 488 | { |
486 | if (!valid_signal(data)) | 489 | if (!valid_signal(data)) |
487 | return -EIO; | 490 | return -EIO; |
@@ -558,10 +561,12 @@ static int ptrace_regset(struct task_struct *task, int req, unsigned int type, | |||
558 | #endif | 561 | #endif |
559 | 562 | ||
560 | int ptrace_request(struct task_struct *child, long request, | 563 | int ptrace_request(struct task_struct *child, long request, |
561 | long addr, long data) | 564 | unsigned long addr, unsigned long data) |
562 | { | 565 | { |
563 | int ret = -EIO; | 566 | int ret = -EIO; |
564 | siginfo_t siginfo; | 567 | siginfo_t siginfo; |
568 | void __user *datavp = (void __user *) data; | ||
569 | unsigned long __user *datalp = datavp; | ||
565 | 570 | ||
566 | switch (request) { | 571 | switch (request) { |
567 | case PTRACE_PEEKTEXT: | 572 | case PTRACE_PEEKTEXT: |
@@ -578,19 +583,17 @@ int ptrace_request(struct task_struct *child, long request, | |||
578 | ret = ptrace_setoptions(child, data); | 583 | ret = ptrace_setoptions(child, data); |
579 | break; | 584 | break; |
580 | case PTRACE_GETEVENTMSG: | 585 | case PTRACE_GETEVENTMSG: |
581 | ret = put_user(child->ptrace_message, (unsigned long __user *) data); | 586 | ret = put_user(child->ptrace_message, datalp); |
582 | break; | 587 | break; |
583 | 588 | ||
584 | case PTRACE_GETSIGINFO: | 589 | case PTRACE_GETSIGINFO: |
585 | ret = ptrace_getsiginfo(child, &siginfo); | 590 | ret = ptrace_getsiginfo(child, &siginfo); |
586 | if (!ret) | 591 | if (!ret) |
587 | ret = copy_siginfo_to_user((siginfo_t __user *) data, | 592 | ret = copy_siginfo_to_user(datavp, &siginfo); |
588 | &siginfo); | ||
589 | break; | 593 | break; |
590 | 594 | ||
591 | case PTRACE_SETSIGINFO: | 595 | case PTRACE_SETSIGINFO: |
592 | if (copy_from_user(&siginfo, (siginfo_t __user *) data, | 596 | if (copy_from_user(&siginfo, datavp, sizeof siginfo)) |
593 | sizeof siginfo)) | ||
594 | ret = -EFAULT; | 597 | ret = -EFAULT; |
595 | else | 598 | else |
596 | ret = ptrace_setsiginfo(child, &siginfo); | 599 | ret = ptrace_setsiginfo(child, &siginfo); |
@@ -621,7 +624,7 @@ int ptrace_request(struct task_struct *child, long request, | |||
621 | } | 624 | } |
622 | mmput(mm); | 625 | mmput(mm); |
623 | 626 | ||
624 | ret = put_user(tmp, (unsigned long __user *) data); | 627 | ret = put_user(tmp, datalp); |
625 | break; | 628 | break; |
626 | } | 629 | } |
627 | #endif | 630 | #endif |
@@ -650,7 +653,7 @@ int ptrace_request(struct task_struct *child, long request, | |||
650 | case PTRACE_SETREGSET: | 653 | case PTRACE_SETREGSET: |
651 | { | 654 | { |
652 | struct iovec kiov; | 655 | struct iovec kiov; |
653 | struct iovec __user *uiov = (struct iovec __user *) data; | 656 | struct iovec __user *uiov = datavp; |
654 | 657 | ||
655 | if (!access_ok(VERIFY_WRITE, uiov, sizeof(*uiov))) | 658 | if (!access_ok(VERIFY_WRITE, uiov, sizeof(*uiov))) |
656 | return -EFAULT; | 659 | return -EFAULT; |
@@ -691,7 +694,8 @@ static struct task_struct *ptrace_get_task_struct(pid_t pid) | |||
691 | #define arch_ptrace_attach(child) do { } while (0) | 694 | #define arch_ptrace_attach(child) do { } while (0) |
692 | #endif | 695 | #endif |
693 | 696 | ||
694 | SYSCALL_DEFINE4(ptrace, long, request, long, pid, long, addr, long, data) | 697 | SYSCALL_DEFINE4(ptrace, long, request, long, pid, unsigned long, addr, |
698 | unsigned long, data) | ||
695 | { | 699 | { |
696 | struct task_struct *child; | 700 | struct task_struct *child; |
697 | long ret; | 701 | long ret; |
@@ -732,7 +736,8 @@ SYSCALL_DEFINE4(ptrace, long, request, long, pid, long, addr, long, data) | |||
732 | return ret; | 736 | return ret; |
733 | } | 737 | } |
734 | 738 | ||
735 | int generic_ptrace_peekdata(struct task_struct *tsk, long addr, long data) | 739 | int generic_ptrace_peekdata(struct task_struct *tsk, unsigned long addr, |
740 | unsigned long data) | ||
736 | { | 741 | { |
737 | unsigned long tmp; | 742 | unsigned long tmp; |
738 | int copied; | 743 | int copied; |
@@ -743,7 +748,8 @@ int generic_ptrace_peekdata(struct task_struct *tsk, long addr, long data) | |||
743 | return put_user(tmp, (unsigned long __user *)data); | 748 | return put_user(tmp, (unsigned long __user *)data); |
744 | } | 749 | } |
745 | 750 | ||
746 | int generic_ptrace_pokedata(struct task_struct *tsk, long addr, long data) | 751 | int generic_ptrace_pokedata(struct task_struct *tsk, unsigned long addr, |
752 | unsigned long data) | ||
747 | { | 753 | { |
748 | int copied; | 754 | int copied; |
749 | 755 | ||
diff --git a/kernel/range.c b/kernel/range.c index 471b66acabb5..37fa9b99ad58 100644 --- a/kernel/range.c +++ b/kernel/range.c | |||
@@ -119,7 +119,7 @@ static int cmp_range(const void *x1, const void *x2) | |||
119 | 119 | ||
120 | int clean_sort_range(struct range *range, int az) | 120 | int clean_sort_range(struct range *range, int az) |
121 | { | 121 | { |
122 | int i, j, k = az - 1, nr_range = 0; | 122 | int i, j, k = az - 1, nr_range = az; |
123 | 123 | ||
124 | for (i = 0; i < k; i++) { | 124 | for (i = 0; i < k; i++) { |
125 | if (range[i].end) | 125 | if (range[i].end) |
diff --git a/kernel/relay.c b/kernel/relay.c index c7cf397fb929..859ea5a9605f 100644 --- a/kernel/relay.c +++ b/kernel/relay.c | |||
@@ -70,17 +70,10 @@ static const struct vm_operations_struct relay_file_mmap_ops = { | |||
70 | */ | 70 | */ |
71 | static struct page **relay_alloc_page_array(unsigned int n_pages) | 71 | static struct page **relay_alloc_page_array(unsigned int n_pages) |
72 | { | 72 | { |
73 | struct page **array; | 73 | const size_t pa_size = n_pages * sizeof(struct page *); |
74 | size_t pa_size = n_pages * sizeof(struct page *); | 74 | if (pa_size > PAGE_SIZE) |
75 | 75 | return vzalloc(pa_size); | |
76 | if (pa_size > PAGE_SIZE) { | 76 | return kzalloc(pa_size, GFP_KERNEL); |
77 | array = vmalloc(pa_size); | ||
78 | if (array) | ||
79 | memset(array, 0, pa_size); | ||
80 | } else { | ||
81 | array = kzalloc(pa_size, GFP_KERNEL); | ||
82 | } | ||
83 | return array; | ||
84 | } | 77 | } |
85 | 78 | ||
86 | /* | 79 | /* |
diff --git a/kernel/resource.c b/kernel/resource.c index 7b36976e5dea..9fad33efd0db 100644 --- a/kernel/resource.c +++ b/kernel/resource.c | |||
@@ -40,6 +40,23 @@ EXPORT_SYMBOL(iomem_resource); | |||
40 | 40 | ||
41 | static DEFINE_RWLOCK(resource_lock); | 41 | static DEFINE_RWLOCK(resource_lock); |
42 | 42 | ||
43 | /* | ||
44 | * By default, we allocate free space bottom-up. The architecture can request | ||
45 | * top-down by clearing this flag. The user can override the architecture's | ||
46 | * choice with the "resource_alloc_from_bottom" kernel boot option, but that | ||
47 | * should only be a debugging tool. | ||
48 | */ | ||
49 | int resource_alloc_from_bottom = 1; | ||
50 | |||
51 | static __init int setup_alloc_from_bottom(char *s) | ||
52 | { | ||
53 | printk(KERN_INFO | ||
54 | "resource: allocating from bottom-up; please report a bug\n"); | ||
55 | resource_alloc_from_bottom = 1; | ||
56 | return 0; | ||
57 | } | ||
58 | early_param("resource_alloc_from_bottom", setup_alloc_from_bottom); | ||
59 | |||
43 | static void *r_next(struct seq_file *m, void *v, loff_t *pos) | 60 | static void *r_next(struct seq_file *m, void *v, loff_t *pos) |
44 | { | 61 | { |
45 | struct resource *p = v; | 62 | struct resource *p = v; |
@@ -357,8 +374,97 @@ int __weak page_is_ram(unsigned long pfn) | |||
357 | return walk_system_ram_range(pfn, 1, NULL, __is_ram) == 1; | 374 | return walk_system_ram_range(pfn, 1, NULL, __is_ram) == 1; |
358 | } | 375 | } |
359 | 376 | ||
377 | static resource_size_t simple_align_resource(void *data, | ||
378 | const struct resource *avail, | ||
379 | resource_size_t size, | ||
380 | resource_size_t align) | ||
381 | { | ||
382 | return avail->start; | ||
383 | } | ||
384 | |||
385 | static void resource_clip(struct resource *res, resource_size_t min, | ||
386 | resource_size_t max) | ||
387 | { | ||
388 | if (res->start < min) | ||
389 | res->start = min; | ||
390 | if (res->end > max) | ||
391 | res->end = max; | ||
392 | } | ||
393 | |||
394 | static bool resource_contains(struct resource *res1, struct resource *res2) | ||
395 | { | ||
396 | return res1->start <= res2->start && res1->end >= res2->end; | ||
397 | } | ||
398 | |||
399 | /* | ||
400 | * Find the resource before "child" in the sibling list of "root" children. | ||
401 | */ | ||
402 | static struct resource *find_sibling_prev(struct resource *root, struct resource *child) | ||
403 | { | ||
404 | struct resource *this; | ||
405 | |||
406 | for (this = root->child; this; this = this->sibling) | ||
407 | if (this->sibling == child) | ||
408 | return this; | ||
409 | |||
410 | return NULL; | ||
411 | } | ||
412 | |||
413 | /* | ||
414 | * Find empty slot in the resource tree given range and alignment. | ||
415 | * This version allocates from the end of the root resource first. | ||
416 | */ | ||
417 | static int find_resource_from_top(struct resource *root, struct resource *new, | ||
418 | resource_size_t size, resource_size_t min, | ||
419 | resource_size_t max, resource_size_t align, | ||
420 | resource_size_t (*alignf)(void *, | ||
421 | const struct resource *, | ||
422 | resource_size_t, | ||
423 | resource_size_t), | ||
424 | void *alignf_data) | ||
425 | { | ||
426 | struct resource *this; | ||
427 | struct resource tmp, avail, alloc; | ||
428 | |||
429 | tmp.start = root->end; | ||
430 | tmp.end = root->end; | ||
431 | |||
432 | this = find_sibling_prev(root, NULL); | ||
433 | for (;;) { | ||
434 | if (this) { | ||
435 | if (this->end < root->end) | ||
436 | tmp.start = this->end + 1; | ||
437 | } else | ||
438 | tmp.start = root->start; | ||
439 | |||
440 | resource_clip(&tmp, min, max); | ||
441 | |||
442 | /* Check for overflow after ALIGN() */ | ||
443 | avail = *new; | ||
444 | avail.start = ALIGN(tmp.start, align); | ||
445 | avail.end = tmp.end; | ||
446 | if (avail.start >= tmp.start) { | ||
447 | alloc.start = alignf(alignf_data, &avail, size, align); | ||
448 | alloc.end = alloc.start + size - 1; | ||
449 | if (resource_contains(&avail, &alloc)) { | ||
450 | new->start = alloc.start; | ||
451 | new->end = alloc.end; | ||
452 | return 0; | ||
453 | } | ||
454 | } | ||
455 | |||
456 | if (!this || this->start == root->start) | ||
457 | break; | ||
458 | |||
459 | tmp.end = this->start - 1; | ||
460 | this = find_sibling_prev(root, this); | ||
461 | } | ||
462 | return -EBUSY; | ||
463 | } | ||
464 | |||
360 | /* | 465 | /* |
361 | * Find empty slot in the resource tree given range and alignment. | 466 | * Find empty slot in the resource tree given range and alignment. |
467 | * This version allocates from the beginning of the root resource first. | ||
362 | */ | 468 | */ |
363 | static int find_resource(struct resource *root, struct resource *new, | 469 | static int find_resource(struct resource *root, struct resource *new, |
364 | resource_size_t size, resource_size_t min, | 470 | resource_size_t size, resource_size_t min, |
@@ -370,36 +476,43 @@ static int find_resource(struct resource *root, struct resource *new, | |||
370 | void *alignf_data) | 476 | void *alignf_data) |
371 | { | 477 | { |
372 | struct resource *this = root->child; | 478 | struct resource *this = root->child; |
373 | struct resource tmp = *new; | 479 | struct resource tmp = *new, avail, alloc; |
374 | 480 | ||
375 | tmp.start = root->start; | 481 | tmp.start = root->start; |
376 | /* | 482 | /* |
377 | * Skip past an allocated resource that starts at 0, since the assignment | 483 | * Skip past an allocated resource that starts at 0, since the |
378 | * of this->start - 1 to tmp->end below would cause an underflow. | 484 | * assignment of this->start - 1 to tmp->end below would cause an |
485 | * underflow. | ||
379 | */ | 486 | */ |
380 | if (this && this->start == 0) { | 487 | if (this && this->start == 0) { |
381 | tmp.start = this->end + 1; | 488 | tmp.start = this->end + 1; |
382 | this = this->sibling; | 489 | this = this->sibling; |
383 | } | 490 | } |
384 | for(;;) { | 491 | for (;;) { |
385 | if (this) | 492 | if (this) |
386 | tmp.end = this->start - 1; | 493 | tmp.end = this->start - 1; |
387 | else | 494 | else |
388 | tmp.end = root->end; | 495 | tmp.end = root->end; |
389 | if (tmp.start < min) | 496 | |
390 | tmp.start = min; | 497 | resource_clip(&tmp, min, max); |
391 | if (tmp.end > max) | 498 | |
392 | tmp.end = max; | 499 | /* Check for overflow after ALIGN() */ |
393 | tmp.start = ALIGN(tmp.start, align); | 500 | avail = *new; |
394 | if (alignf) | 501 | avail.start = ALIGN(tmp.start, align); |
395 | tmp.start = alignf(alignf_data, &tmp, size, align); | 502 | avail.end = tmp.end; |
396 | if (tmp.start < tmp.end && tmp.end - tmp.start >= size - 1) { | 503 | if (avail.start >= tmp.start) { |
397 | new->start = tmp.start; | 504 | alloc.start = alignf(alignf_data, &avail, size, align); |
398 | new->end = tmp.start + size - 1; | 505 | alloc.end = alloc.start + size - 1; |
399 | return 0; | 506 | if (resource_contains(&avail, &alloc)) { |
507 | new->start = alloc.start; | ||
508 | new->end = alloc.end; | ||
509 | return 0; | ||
510 | } | ||
400 | } | 511 | } |
512 | |||
401 | if (!this) | 513 | if (!this) |
402 | break; | 514 | break; |
515 | |||
403 | tmp.start = this->end + 1; | 516 | tmp.start = this->end + 1; |
404 | this = this->sibling; | 517 | this = this->sibling; |
405 | } | 518 | } |
@@ -428,8 +541,14 @@ int allocate_resource(struct resource *root, struct resource *new, | |||
428 | { | 541 | { |
429 | int err; | 542 | int err; |
430 | 543 | ||
544 | if (!alignf) | ||
545 | alignf = simple_align_resource; | ||
546 | |||
431 | write_lock(&resource_lock); | 547 | write_lock(&resource_lock); |
432 | err = find_resource(root, new, size, min, max, align, alignf, alignf_data); | 548 | if (resource_alloc_from_bottom) |
549 | err = find_resource(root, new, size, min, max, align, alignf, alignf_data); | ||
550 | else | ||
551 | err = find_resource_from_top(root, new, size, min, max, align, alignf, alignf_data); | ||
433 | if (err >= 0 && __request_resource(root, new)) | 552 | if (err >= 0 && __request_resource(root, new)) |
434 | err = -EBUSY; | 553 | err = -EBUSY; |
435 | write_unlock(&resource_lock); | 554 | write_unlock(&resource_lock); |
@@ -453,6 +572,8 @@ static struct resource * __insert_resource(struct resource *parent, struct resou | |||
453 | 572 | ||
454 | if (first == parent) | 573 | if (first == parent) |
455 | return first; | 574 | return first; |
575 | if (WARN_ON(first == new)) /* duplicated insertion */ | ||
576 | return first; | ||
456 | 577 | ||
457 | if ((first->start > new->start) || (first->end < new->end)) | 578 | if ((first->start > new->start) || (first->end < new->end)) |
458 | break; | 579 | break; |
diff --git a/kernel/sched.c b/kernel/sched.c index d42992bccdfa..aa14a56f9d03 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -8510,12 +8510,12 @@ void sched_move_task(struct task_struct *tsk) | |||
8510 | if (unlikely(running)) | 8510 | if (unlikely(running)) |
8511 | tsk->sched_class->put_prev_task(rq, tsk); | 8511 | tsk->sched_class->put_prev_task(rq, tsk); |
8512 | 8512 | ||
8513 | set_task_rq(tsk, task_cpu(tsk)); | ||
8514 | |||
8515 | #ifdef CONFIG_FAIR_GROUP_SCHED | 8513 | #ifdef CONFIG_FAIR_GROUP_SCHED |
8516 | if (tsk->sched_class->moved_group) | 8514 | if (tsk->sched_class->task_move_group) |
8517 | tsk->sched_class->moved_group(tsk, on_rq); | 8515 | tsk->sched_class->task_move_group(tsk, on_rq); |
8516 | else | ||
8518 | #endif | 8517 | #endif |
8518 | set_task_rq(tsk, task_cpu(tsk)); | ||
8519 | 8519 | ||
8520 | if (unlikely(running)) | 8520 | if (unlikely(running)) |
8521 | tsk->sched_class->set_curr_task(rq); | 8521 | tsk->sched_class->set_curr_task(rq); |
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 933f3d1b62ea..f4f6a8326dd0 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c | |||
@@ -3869,13 +3869,26 @@ static void set_curr_task_fair(struct rq *rq) | |||
3869 | } | 3869 | } |
3870 | 3870 | ||
3871 | #ifdef CONFIG_FAIR_GROUP_SCHED | 3871 | #ifdef CONFIG_FAIR_GROUP_SCHED |
3872 | static void moved_group_fair(struct task_struct *p, int on_rq) | 3872 | static void task_move_group_fair(struct task_struct *p, int on_rq) |
3873 | { | 3873 | { |
3874 | struct cfs_rq *cfs_rq = task_cfs_rq(p); | 3874 | /* |
3875 | 3875 | * If the task was not on the rq at the time of this cgroup movement | |
3876 | update_curr(cfs_rq); | 3876 | * it must have been asleep, sleeping tasks keep their ->vruntime |
3877 | * absolute on their old rq until wakeup (needed for the fair sleeper | ||
3878 | * bonus in place_entity()). | ||
3879 | * | ||
3880 | * If it was on the rq, we've just 'preempted' it, which does convert | ||
3881 | * ->vruntime to a relative base. | ||
3882 | * | ||
3883 | * Make sure both cases convert their relative position when migrating | ||
3884 | * to another cgroup's rq. This does somewhat interfere with the | ||
3885 | * fair sleeper stuff for the first placement, but who cares. | ||
3886 | */ | ||
3887 | if (!on_rq) | ||
3888 | p->se.vruntime -= cfs_rq_of(&p->se)->min_vruntime; | ||
3889 | set_task_rq(p, task_cpu(p)); | ||
3877 | if (!on_rq) | 3890 | if (!on_rq) |
3878 | place_entity(cfs_rq, &p->se, 1); | 3891 | p->se.vruntime += cfs_rq_of(&p->se)->min_vruntime; |
3879 | } | 3892 | } |
3880 | #endif | 3893 | #endif |
3881 | 3894 | ||
@@ -3927,7 +3940,7 @@ static const struct sched_class fair_sched_class = { | |||
3927 | .get_rr_interval = get_rr_interval_fair, | 3940 | .get_rr_interval = get_rr_interval_fair, |
3928 | 3941 | ||
3929 | #ifdef CONFIG_FAIR_GROUP_SCHED | 3942 | #ifdef CONFIG_FAIR_GROUP_SCHED |
3930 | .moved_group = moved_group_fair, | 3943 | .task_move_group = task_move_group_fair, |
3931 | #endif | 3944 | #endif |
3932 | }; | 3945 | }; |
3933 | 3946 | ||
diff --git a/kernel/sched_stats.h b/kernel/sched_stats.h index 25c2f962f6fc..48ddf431db0e 100644 --- a/kernel/sched_stats.h +++ b/kernel/sched_stats.h | |||
@@ -157,15 +157,7 @@ static inline void sched_info_reset_dequeued(struct task_struct *t) | |||
157 | } | 157 | } |
158 | 158 | ||
159 | /* | 159 | /* |
160 | * Called when a process is dequeued from the active array and given | 160 | * We are interested in knowing how long it was from the *first* time a |
161 | * the cpu. We should note that with the exception of interactive | ||
162 | * tasks, the expired queue will become the active queue after the active | ||
163 | * queue is empty, without explicitly dequeuing and requeuing tasks in the | ||
164 | * expired queue. (Interactive tasks may be requeued directly to the | ||
165 | * active queue, thus delaying tasks in the expired queue from running; | ||
166 | * see scheduler_tick()). | ||
167 | * | ||
168 | * Though we are interested in knowing how long it was from the *first* time a | ||
169 | * task was queued to the time that it finally hit a cpu, we call this routine | 161 | * task was queued to the time that it finally hit a cpu, we call this routine |
170 | * from dequeue_task() to account for possible rq->clock skew across cpus. The | 162 | * from dequeue_task() to account for possible rq->clock skew across cpus. The |
171 | * delta taken on each cpu would annul the skew. | 163 | * delta taken on each cpu would annul the skew. |
@@ -203,16 +195,6 @@ static void sched_info_arrive(struct task_struct *t) | |||
203 | } | 195 | } |
204 | 196 | ||
205 | /* | 197 | /* |
206 | * Called when a process is queued into either the active or expired | ||
207 | * array. The time is noted and later used to determine how long we | ||
208 | * had to wait for us to reach the cpu. Since the expired queue will | ||
209 | * become the active queue after active queue is empty, without dequeuing | ||
210 | * and requeuing any tasks, we are interested in queuing to either. It | ||
211 | * is unusual but not impossible for tasks to be dequeued and immediately | ||
212 | * requeued in the same or another array: this can happen in sched_yield(), | ||
213 | * set_user_nice(), and even load_balance() as it moves tasks from runqueue | ||
214 | * to runqueue. | ||
215 | * | ||
216 | * This function is only called from enqueue_task(), but also only updates | 198 | * This function is only called from enqueue_task(), but also only updates |
217 | * the timestamp if it is already not set. It's assumed that | 199 | * the timestamp if it is already not set. It's assumed that |
218 | * sched_info_dequeued() will clear that stamp when appropriate. | 200 | * sched_info_dequeued() will clear that stamp when appropriate. |
diff --git a/kernel/signal.c b/kernel/signal.c index 919562c3d6b7..4e3cff10fdce 100644 --- a/kernel/signal.c +++ b/kernel/signal.c | |||
@@ -1105,7 +1105,8 @@ int zap_other_threads(struct task_struct *p) | |||
1105 | return count; | 1105 | return count; |
1106 | } | 1106 | } |
1107 | 1107 | ||
1108 | struct sighand_struct *lock_task_sighand(struct task_struct *tsk, unsigned long *flags) | 1108 | struct sighand_struct *__lock_task_sighand(struct task_struct *tsk, |
1109 | unsigned long *flags) | ||
1109 | { | 1110 | { |
1110 | struct sighand_struct *sighand; | 1111 | struct sighand_struct *sighand; |
1111 | 1112 | ||
@@ -1617,6 +1618,8 @@ static int sigkill_pending(struct task_struct *tsk) | |||
1617 | * is gone, we keep current->exit_code unless clear_code. | 1618 | * is gone, we keep current->exit_code unless clear_code. |
1618 | */ | 1619 | */ |
1619 | static void ptrace_stop(int exit_code, int clear_code, siginfo_t *info) | 1620 | static void ptrace_stop(int exit_code, int clear_code, siginfo_t *info) |
1621 | __releases(¤t->sighand->siglock) | ||
1622 | __acquires(¤t->sighand->siglock) | ||
1620 | { | 1623 | { |
1621 | if (arch_ptrace_stop_needed(exit_code, info)) { | 1624 | if (arch_ptrace_stop_needed(exit_code, info)) { |
1622 | /* | 1625 | /* |
diff --git a/kernel/smp.c b/kernel/smp.c index ed6aacfcb7ef..12ed8b013e2d 100644 --- a/kernel/smp.c +++ b/kernel/smp.c | |||
@@ -267,7 +267,7 @@ static DEFINE_PER_CPU_SHARED_ALIGNED(struct call_single_data, csd_data); | |||
267 | * | 267 | * |
268 | * Returns 0 on success, else a negative status code. | 268 | * Returns 0 on success, else a negative status code. |
269 | */ | 269 | */ |
270 | int smp_call_function_single(int cpu, void (*func) (void *info), void *info, | 270 | int smp_call_function_single(int cpu, smp_call_func_t func, void *info, |
271 | int wait) | 271 | int wait) |
272 | { | 272 | { |
273 | struct call_single_data d = { | 273 | struct call_single_data d = { |
@@ -336,7 +336,7 @@ EXPORT_SYMBOL(smp_call_function_single); | |||
336 | * 3) any other online cpu in @mask | 336 | * 3) any other online cpu in @mask |
337 | */ | 337 | */ |
338 | int smp_call_function_any(const struct cpumask *mask, | 338 | int smp_call_function_any(const struct cpumask *mask, |
339 | void (*func)(void *info), void *info, int wait) | 339 | smp_call_func_t func, void *info, int wait) |
340 | { | 340 | { |
341 | unsigned int cpu; | 341 | unsigned int cpu; |
342 | const struct cpumask *nodemask; | 342 | const struct cpumask *nodemask; |
@@ -416,7 +416,7 @@ void __smp_call_function_single(int cpu, struct call_single_data *data, | |||
416 | * must be disabled when calling this function. | 416 | * must be disabled when calling this function. |
417 | */ | 417 | */ |
418 | void smp_call_function_many(const struct cpumask *mask, | 418 | void smp_call_function_many(const struct cpumask *mask, |
419 | void (*func)(void *), void *info, bool wait) | 419 | smp_call_func_t func, void *info, bool wait) |
420 | { | 420 | { |
421 | struct call_function_data *data; | 421 | struct call_function_data *data; |
422 | unsigned long flags; | 422 | unsigned long flags; |
@@ -500,7 +500,7 @@ EXPORT_SYMBOL(smp_call_function_many); | |||
500 | * You must not call this function with disabled interrupts or from a | 500 | * You must not call this function with disabled interrupts or from a |
501 | * hardware interrupt handler or from a bottom half handler. | 501 | * hardware interrupt handler or from a bottom half handler. |
502 | */ | 502 | */ |
503 | int smp_call_function(void (*func)(void *), void *info, int wait) | 503 | int smp_call_function(smp_call_func_t func, void *info, int wait) |
504 | { | 504 | { |
505 | preempt_disable(); | 505 | preempt_disable(); |
506 | smp_call_function_many(cpu_online_mask, func, info, wait); | 506 | smp_call_function_many(cpu_online_mask, func, info, wait); |
diff --git a/kernel/softirq.c b/kernel/softirq.c index f02a9dfa19bc..18f4be0d5fe0 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c | |||
@@ -229,18 +229,20 @@ restart: | |||
229 | 229 | ||
230 | do { | 230 | do { |
231 | if (pending & 1) { | 231 | if (pending & 1) { |
232 | unsigned int vec_nr = h - softirq_vec; | ||
232 | int prev_count = preempt_count(); | 233 | int prev_count = preempt_count(); |
233 | kstat_incr_softirqs_this_cpu(h - softirq_vec); | ||
234 | 234 | ||
235 | trace_softirq_entry(h, softirq_vec); | 235 | kstat_incr_softirqs_this_cpu(vec_nr); |
236 | |||
237 | trace_softirq_entry(vec_nr); | ||
236 | h->action(h); | 238 | h->action(h); |
237 | trace_softirq_exit(h, softirq_vec); | 239 | trace_softirq_exit(vec_nr); |
238 | if (unlikely(prev_count != preempt_count())) { | 240 | if (unlikely(prev_count != preempt_count())) { |
239 | printk(KERN_ERR "huh, entered softirq %td %s %p" | 241 | printk(KERN_ERR "huh, entered softirq %u %s %p" |
240 | "with preempt_count %08x," | 242 | "with preempt_count %08x," |
241 | " exited with %08x?\n", h - softirq_vec, | 243 | " exited with %08x?\n", vec_nr, |
242 | softirq_to_name[h - softirq_vec], | 244 | softirq_to_name[vec_nr], h->action, |
243 | h->action, prev_count, preempt_count()); | 245 | prev_count, preempt_count()); |
244 | preempt_count() = prev_count; | 246 | preempt_count() = prev_count; |
245 | } | 247 | } |
246 | 248 | ||
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index c33a1edb799f..b65bf634035e 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
@@ -704,6 +704,15 @@ static struct ctl_table kern_table[] = { | |||
704 | }, | 704 | }, |
705 | #endif | 705 | #endif |
706 | { | 706 | { |
707 | .procname = "dmesg_restrict", | ||
708 | .data = &dmesg_restrict, | ||
709 | .maxlen = sizeof(int), | ||
710 | .mode = 0644, | ||
711 | .proc_handler = proc_dointvec_minmax, | ||
712 | .extra1 = &zero, | ||
713 | .extra2 = &one, | ||
714 | }, | ||
715 | { | ||
707 | .procname = "ngroups_max", | 716 | .procname = "ngroups_max", |
708 | .data = &ngroups_max, | 717 | .data = &ngroups_max, |
709 | .maxlen = sizeof (int), | 718 | .maxlen = sizeof (int), |
diff --git a/kernel/taskstats.c b/kernel/taskstats.c index 11281d5792bd..c8231fb15708 100644 --- a/kernel/taskstats.c +++ b/kernel/taskstats.c | |||
@@ -175,22 +175,8 @@ static void send_cpu_listeners(struct sk_buff *skb, | |||
175 | up_write(&listeners->sem); | 175 | up_write(&listeners->sem); |
176 | } | 176 | } |
177 | 177 | ||
178 | static int fill_pid(pid_t pid, struct task_struct *tsk, | 178 | static void fill_stats(struct task_struct *tsk, struct taskstats *stats) |
179 | struct taskstats *stats) | ||
180 | { | 179 | { |
181 | int rc = 0; | ||
182 | |||
183 | if (!tsk) { | ||
184 | rcu_read_lock(); | ||
185 | tsk = find_task_by_vpid(pid); | ||
186 | if (tsk) | ||
187 | get_task_struct(tsk); | ||
188 | rcu_read_unlock(); | ||
189 | if (!tsk) | ||
190 | return -ESRCH; | ||
191 | } else | ||
192 | get_task_struct(tsk); | ||
193 | |||
194 | memset(stats, 0, sizeof(*stats)); | 180 | memset(stats, 0, sizeof(*stats)); |
195 | /* | 181 | /* |
196 | * Each accounting subsystem adds calls to its functions to | 182 | * Each accounting subsystem adds calls to its functions to |
@@ -209,17 +195,27 @@ static int fill_pid(pid_t pid, struct task_struct *tsk, | |||
209 | 195 | ||
210 | /* fill in extended acct fields */ | 196 | /* fill in extended acct fields */ |
211 | xacct_add_tsk(stats, tsk); | 197 | xacct_add_tsk(stats, tsk); |
198 | } | ||
212 | 199 | ||
213 | /* Define err: label here if needed */ | 200 | static int fill_stats_for_pid(pid_t pid, struct taskstats *stats) |
214 | put_task_struct(tsk); | 201 | { |
215 | return rc; | 202 | struct task_struct *tsk; |
216 | 203 | ||
204 | rcu_read_lock(); | ||
205 | tsk = find_task_by_vpid(pid); | ||
206 | if (tsk) | ||
207 | get_task_struct(tsk); | ||
208 | rcu_read_unlock(); | ||
209 | if (!tsk) | ||
210 | return -ESRCH; | ||
211 | fill_stats(tsk, stats); | ||
212 | put_task_struct(tsk); | ||
213 | return 0; | ||
217 | } | 214 | } |
218 | 215 | ||
219 | static int fill_tgid(pid_t tgid, struct task_struct *first, | 216 | static int fill_stats_for_tgid(pid_t tgid, struct taskstats *stats) |
220 | struct taskstats *stats) | ||
221 | { | 217 | { |
222 | struct task_struct *tsk; | 218 | struct task_struct *tsk, *first; |
223 | unsigned long flags; | 219 | unsigned long flags; |
224 | int rc = -ESRCH; | 220 | int rc = -ESRCH; |
225 | 221 | ||
@@ -228,8 +224,7 @@ static int fill_tgid(pid_t tgid, struct task_struct *first, | |||
228 | * leaders who are already counted with the dead tasks | 224 | * leaders who are already counted with the dead tasks |
229 | */ | 225 | */ |
230 | rcu_read_lock(); | 226 | rcu_read_lock(); |
231 | if (!first) | 227 | first = find_task_by_vpid(tgid); |
232 | first = find_task_by_vpid(tgid); | ||
233 | 228 | ||
234 | if (!first || !lock_task_sighand(first, &flags)) | 229 | if (!first || !lock_task_sighand(first, &flags)) |
235 | goto out; | 230 | goto out; |
@@ -268,7 +263,6 @@ out: | |||
268 | return rc; | 263 | return rc; |
269 | } | 264 | } |
270 | 265 | ||
271 | |||
272 | static void fill_tgid_exit(struct task_struct *tsk) | 266 | static void fill_tgid_exit(struct task_struct *tsk) |
273 | { | 267 | { |
274 | unsigned long flags; | 268 | unsigned long flags; |
@@ -360,6 +354,12 @@ static struct taskstats *mk_reply(struct sk_buff *skb, int type, u32 pid) | |||
360 | struct nlattr *na, *ret; | 354 | struct nlattr *na, *ret; |
361 | int aggr; | 355 | int aggr; |
362 | 356 | ||
357 | /* If we don't pad, we end up with alignment on a 4 byte boundary. | ||
358 | * This causes lots of runtime warnings on systems requiring 8 byte | ||
359 | * alignment */ | ||
360 | u32 pids[2] = { pid, 0 }; | ||
361 | int pid_size = ALIGN(sizeof(pid), sizeof(long)); | ||
362 | |||
363 | aggr = (type == TASKSTATS_TYPE_PID) | 363 | aggr = (type == TASKSTATS_TYPE_PID) |
364 | ? TASKSTATS_TYPE_AGGR_PID | 364 | ? TASKSTATS_TYPE_AGGR_PID |
365 | : TASKSTATS_TYPE_AGGR_TGID; | 365 | : TASKSTATS_TYPE_AGGR_TGID; |
@@ -367,7 +367,7 @@ static struct taskstats *mk_reply(struct sk_buff *skb, int type, u32 pid) | |||
367 | na = nla_nest_start(skb, aggr); | 367 | na = nla_nest_start(skb, aggr); |
368 | if (!na) | 368 | if (!na) |
369 | goto err; | 369 | goto err; |
370 | if (nla_put(skb, type, sizeof(pid), &pid) < 0) | 370 | if (nla_put(skb, type, pid_size, pids) < 0) |
371 | goto err; | 371 | goto err; |
372 | ret = nla_reserve(skb, TASKSTATS_TYPE_STATS, sizeof(struct taskstats)); | 372 | ret = nla_reserve(skb, TASKSTATS_TYPE_STATS, sizeof(struct taskstats)); |
373 | if (!ret) | 373 | if (!ret) |
@@ -424,39 +424,46 @@ err: | |||
424 | return rc; | 424 | return rc; |
425 | } | 425 | } |
426 | 426 | ||
427 | static int taskstats_user_cmd(struct sk_buff *skb, struct genl_info *info) | 427 | static int cmd_attr_register_cpumask(struct genl_info *info) |
428 | { | 428 | { |
429 | int rc; | ||
430 | struct sk_buff *rep_skb; | ||
431 | struct taskstats *stats; | ||
432 | size_t size; | ||
433 | cpumask_var_t mask; | 429 | cpumask_var_t mask; |
430 | int rc; | ||
434 | 431 | ||
435 | if (!alloc_cpumask_var(&mask, GFP_KERNEL)) | 432 | if (!alloc_cpumask_var(&mask, GFP_KERNEL)) |
436 | return -ENOMEM; | 433 | return -ENOMEM; |
437 | |||
438 | rc = parse(info->attrs[TASKSTATS_CMD_ATTR_REGISTER_CPUMASK], mask); | 434 | rc = parse(info->attrs[TASKSTATS_CMD_ATTR_REGISTER_CPUMASK], mask); |
439 | if (rc < 0) | 435 | if (rc < 0) |
440 | goto free_return_rc; | 436 | goto out; |
441 | if (rc == 0) { | 437 | rc = add_del_listener(info->snd_pid, mask, REGISTER); |
442 | rc = add_del_listener(info->snd_pid, mask, REGISTER); | 438 | out: |
443 | goto free_return_rc; | 439 | free_cpumask_var(mask); |
444 | } | 440 | return rc; |
441 | } | ||
442 | |||
443 | static int cmd_attr_deregister_cpumask(struct genl_info *info) | ||
444 | { | ||
445 | cpumask_var_t mask; | ||
446 | int rc; | ||
445 | 447 | ||
448 | if (!alloc_cpumask_var(&mask, GFP_KERNEL)) | ||
449 | return -ENOMEM; | ||
446 | rc = parse(info->attrs[TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK], mask); | 450 | rc = parse(info->attrs[TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK], mask); |
447 | if (rc < 0) | 451 | if (rc < 0) |
448 | goto free_return_rc; | 452 | goto out; |
449 | if (rc == 0) { | 453 | rc = add_del_listener(info->snd_pid, mask, DEREGISTER); |
450 | rc = add_del_listener(info->snd_pid, mask, DEREGISTER); | 454 | out: |
451 | free_return_rc: | ||
452 | free_cpumask_var(mask); | ||
453 | return rc; | ||
454 | } | ||
455 | free_cpumask_var(mask); | 455 | free_cpumask_var(mask); |
456 | return rc; | ||
457 | } | ||
458 | |||
459 | static int cmd_attr_pid(struct genl_info *info) | ||
460 | { | ||
461 | struct taskstats *stats; | ||
462 | struct sk_buff *rep_skb; | ||
463 | size_t size; | ||
464 | u32 pid; | ||
465 | int rc; | ||
456 | 466 | ||
457 | /* | ||
458 | * Size includes space for nested attributes | ||
459 | */ | ||
460 | size = nla_total_size(sizeof(u32)) + | 467 | size = nla_total_size(sizeof(u32)) + |
461 | nla_total_size(sizeof(struct taskstats)) + nla_total_size(0); | 468 | nla_total_size(sizeof(struct taskstats)) + nla_total_size(0); |
462 | 469 | ||
@@ -465,33 +472,64 @@ free_return_rc: | |||
465 | return rc; | 472 | return rc; |
466 | 473 | ||
467 | rc = -EINVAL; | 474 | rc = -EINVAL; |
468 | if (info->attrs[TASKSTATS_CMD_ATTR_PID]) { | 475 | pid = nla_get_u32(info->attrs[TASKSTATS_CMD_ATTR_PID]); |
469 | u32 pid = nla_get_u32(info->attrs[TASKSTATS_CMD_ATTR_PID]); | 476 | stats = mk_reply(rep_skb, TASKSTATS_TYPE_PID, pid); |
470 | stats = mk_reply(rep_skb, TASKSTATS_TYPE_PID, pid); | 477 | if (!stats) |
471 | if (!stats) | 478 | goto err; |
472 | goto err; | 479 | |
473 | 480 | rc = fill_stats_for_pid(pid, stats); | |
474 | rc = fill_pid(pid, NULL, stats); | 481 | if (rc < 0) |
475 | if (rc < 0) | 482 | goto err; |
476 | goto err; | 483 | return send_reply(rep_skb, info); |
477 | } else if (info->attrs[TASKSTATS_CMD_ATTR_TGID]) { | 484 | err: |
478 | u32 tgid = nla_get_u32(info->attrs[TASKSTATS_CMD_ATTR_TGID]); | 485 | nlmsg_free(rep_skb); |
479 | stats = mk_reply(rep_skb, TASKSTATS_TYPE_TGID, tgid); | 486 | return rc; |
480 | if (!stats) | 487 | } |
481 | goto err; | 488 | |
482 | 489 | static int cmd_attr_tgid(struct genl_info *info) | |
483 | rc = fill_tgid(tgid, NULL, stats); | 490 | { |
484 | if (rc < 0) | 491 | struct taskstats *stats; |
485 | goto err; | 492 | struct sk_buff *rep_skb; |
486 | } else | 493 | size_t size; |
494 | u32 tgid; | ||
495 | int rc; | ||
496 | |||
497 | size = nla_total_size(sizeof(u32)) + | ||
498 | nla_total_size(sizeof(struct taskstats)) + nla_total_size(0); | ||
499 | |||
500 | rc = prepare_reply(info, TASKSTATS_CMD_NEW, &rep_skb, size); | ||
501 | if (rc < 0) | ||
502 | return rc; | ||
503 | |||
504 | rc = -EINVAL; | ||
505 | tgid = nla_get_u32(info->attrs[TASKSTATS_CMD_ATTR_TGID]); | ||
506 | stats = mk_reply(rep_skb, TASKSTATS_TYPE_TGID, tgid); | ||
507 | if (!stats) | ||
487 | goto err; | 508 | goto err; |
488 | 509 | ||
510 | rc = fill_stats_for_tgid(tgid, stats); | ||
511 | if (rc < 0) | ||
512 | goto err; | ||
489 | return send_reply(rep_skb, info); | 513 | return send_reply(rep_skb, info); |
490 | err: | 514 | err: |
491 | nlmsg_free(rep_skb); | 515 | nlmsg_free(rep_skb); |
492 | return rc; | 516 | return rc; |
493 | } | 517 | } |
494 | 518 | ||
519 | static int taskstats_user_cmd(struct sk_buff *skb, struct genl_info *info) | ||
520 | { | ||
521 | if (info->attrs[TASKSTATS_CMD_ATTR_REGISTER_CPUMASK]) | ||
522 | return cmd_attr_register_cpumask(info); | ||
523 | else if (info->attrs[TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK]) | ||
524 | return cmd_attr_deregister_cpumask(info); | ||
525 | else if (info->attrs[TASKSTATS_CMD_ATTR_PID]) | ||
526 | return cmd_attr_pid(info); | ||
527 | else if (info->attrs[TASKSTATS_CMD_ATTR_TGID]) | ||
528 | return cmd_attr_tgid(info); | ||
529 | else | ||
530 | return -EINVAL; | ||
531 | } | ||
532 | |||
495 | static struct taskstats *taskstats_tgid_alloc(struct task_struct *tsk) | 533 | static struct taskstats *taskstats_tgid_alloc(struct task_struct *tsk) |
496 | { | 534 | { |
497 | struct signal_struct *sig = tsk->signal; | 535 | struct signal_struct *sig = tsk->signal; |
@@ -555,9 +593,7 @@ void taskstats_exit(struct task_struct *tsk, int group_dead) | |||
555 | if (!stats) | 593 | if (!stats) |
556 | goto err; | 594 | goto err; |
557 | 595 | ||
558 | rc = fill_pid(-1, tsk, stats); | 596 | fill_stats(tsk, stats); |
559 | if (rc < 0) | ||
560 | goto err; | ||
561 | 597 | ||
562 | /* | 598 | /* |
563 | * Doesn't matter if tsk is the leader or the last group member leaving | 599 | * Doesn't matter if tsk is the leader or the last group member leaving |
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index bc251ed66724..7b8ec0281548 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c | |||
@@ -168,7 +168,6 @@ static int act_log_check(struct blk_trace *bt, u32 what, sector_t sector, | |||
168 | static const u32 ddir_act[2] = { BLK_TC_ACT(BLK_TC_READ), | 168 | static const u32 ddir_act[2] = { BLK_TC_ACT(BLK_TC_READ), |
169 | BLK_TC_ACT(BLK_TC_WRITE) }; | 169 | BLK_TC_ACT(BLK_TC_WRITE) }; |
170 | 170 | ||
171 | #define BLK_TC_HARDBARRIER BLK_TC_BARRIER | ||
172 | #define BLK_TC_RAHEAD BLK_TC_AHEAD | 171 | #define BLK_TC_RAHEAD BLK_TC_AHEAD |
173 | 172 | ||
174 | /* The ilog2() calls fall out because they're constant */ | 173 | /* The ilog2() calls fall out because they're constant */ |
@@ -196,7 +195,6 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes, | |||
196 | return; | 195 | return; |
197 | 196 | ||
198 | what |= ddir_act[rw & WRITE]; | 197 | what |= ddir_act[rw & WRITE]; |
199 | what |= MASK_TC_BIT(rw, HARDBARRIER); | ||
200 | what |= MASK_TC_BIT(rw, SYNC); | 198 | what |= MASK_TC_BIT(rw, SYNC); |
201 | what |= MASK_TC_BIT(rw, RAHEAD); | 199 | what |= MASK_TC_BIT(rw, RAHEAD); |
202 | what |= MASK_TC_BIT(rw, META); | 200 | what |= MASK_TC_BIT(rw, META); |
@@ -1807,8 +1805,6 @@ void blk_fill_rwbs(char *rwbs, u32 rw, int bytes) | |||
1807 | 1805 | ||
1808 | if (rw & REQ_RAHEAD) | 1806 | if (rw & REQ_RAHEAD) |
1809 | rwbs[i++] = 'A'; | 1807 | rwbs[i++] = 'A'; |
1810 | if (rw & REQ_HARDBARRIER) | ||
1811 | rwbs[i++] = 'B'; | ||
1812 | if (rw & REQ_SYNC) | 1808 | if (rw & REQ_SYNC) |
1813 | rwbs[i++] = 'S'; | 1809 | rwbs[i++] = 'S'; |
1814 | if (rw & REQ_META) | 1810 | if (rw & REQ_META) |
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index c3dab054d18e..9ed509a015d8 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c | |||
@@ -224,6 +224,9 @@ enum { | |||
224 | RB_LEN_TIME_STAMP = 16, | 224 | RB_LEN_TIME_STAMP = 16, |
225 | }; | 225 | }; |
226 | 226 | ||
227 | #define skip_time_extend(event) \ | ||
228 | ((struct ring_buffer_event *)((char *)event + RB_LEN_TIME_EXTEND)) | ||
229 | |||
227 | static inline int rb_null_event(struct ring_buffer_event *event) | 230 | static inline int rb_null_event(struct ring_buffer_event *event) |
228 | { | 231 | { |
229 | return event->type_len == RINGBUF_TYPE_PADDING && !event->time_delta; | 232 | return event->type_len == RINGBUF_TYPE_PADDING && !event->time_delta; |
@@ -248,8 +251,12 @@ rb_event_data_length(struct ring_buffer_event *event) | |||
248 | return length + RB_EVNT_HDR_SIZE; | 251 | return length + RB_EVNT_HDR_SIZE; |
249 | } | 252 | } |
250 | 253 | ||
251 | /* inline for ring buffer fast paths */ | 254 | /* |
252 | static unsigned | 255 | * Return the length of the given event. Will return |
256 | * the length of the time extend if the event is a | ||
257 | * time extend. | ||
258 | */ | ||
259 | static inline unsigned | ||
253 | rb_event_length(struct ring_buffer_event *event) | 260 | rb_event_length(struct ring_buffer_event *event) |
254 | { | 261 | { |
255 | switch (event->type_len) { | 262 | switch (event->type_len) { |
@@ -274,13 +281,41 @@ rb_event_length(struct ring_buffer_event *event) | |||
274 | return 0; | 281 | return 0; |
275 | } | 282 | } |
276 | 283 | ||
284 | /* | ||
285 | * Return total length of time extend and data, | ||
286 | * or just the event length for all other events. | ||
287 | */ | ||
288 | static inline unsigned | ||
289 | rb_event_ts_length(struct ring_buffer_event *event) | ||
290 | { | ||
291 | unsigned len = 0; | ||
292 | |||
293 | if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) { | ||
294 | /* time extends include the data event after it */ | ||
295 | len = RB_LEN_TIME_EXTEND; | ||
296 | event = skip_time_extend(event); | ||
297 | } | ||
298 | return len + rb_event_length(event); | ||
299 | } | ||
300 | |||
277 | /** | 301 | /** |
278 | * ring_buffer_event_length - return the length of the event | 302 | * ring_buffer_event_length - return the length of the event |
279 | * @event: the event to get the length of | 303 | * @event: the event to get the length of |
304 | * | ||
305 | * Returns the size of the data load of a data event. | ||
306 | * If the event is something other than a data event, it | ||
307 | * returns the size of the event itself. With the exception | ||
308 | * of a TIME EXTEND, where it still returns the size of the | ||
309 | * data load of the data event after it. | ||
280 | */ | 310 | */ |
281 | unsigned ring_buffer_event_length(struct ring_buffer_event *event) | 311 | unsigned ring_buffer_event_length(struct ring_buffer_event *event) |
282 | { | 312 | { |
283 | unsigned length = rb_event_length(event); | 313 | unsigned length; |
314 | |||
315 | if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) | ||
316 | event = skip_time_extend(event); | ||
317 | |||
318 | length = rb_event_length(event); | ||
284 | if (event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX) | 319 | if (event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX) |
285 | return length; | 320 | return length; |
286 | length -= RB_EVNT_HDR_SIZE; | 321 | length -= RB_EVNT_HDR_SIZE; |
@@ -294,6 +329,8 @@ EXPORT_SYMBOL_GPL(ring_buffer_event_length); | |||
294 | static void * | 329 | static void * |
295 | rb_event_data(struct ring_buffer_event *event) | 330 | rb_event_data(struct ring_buffer_event *event) |
296 | { | 331 | { |
332 | if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) | ||
333 | event = skip_time_extend(event); | ||
297 | BUG_ON(event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX); | 334 | BUG_ON(event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX); |
298 | /* If length is in len field, then array[0] has the data */ | 335 | /* If length is in len field, then array[0] has the data */ |
299 | if (event->type_len) | 336 | if (event->type_len) |
@@ -404,9 +441,6 @@ static inline int test_time_stamp(u64 delta) | |||
404 | /* Max payload is BUF_PAGE_SIZE - header (8bytes) */ | 441 | /* Max payload is BUF_PAGE_SIZE - header (8bytes) */ |
405 | #define BUF_MAX_DATA_SIZE (BUF_PAGE_SIZE - (sizeof(u32) * 2)) | 442 | #define BUF_MAX_DATA_SIZE (BUF_PAGE_SIZE - (sizeof(u32) * 2)) |
406 | 443 | ||
407 | /* Max number of timestamps that can fit on a page */ | ||
408 | #define RB_TIMESTAMPS_PER_PAGE (BUF_PAGE_SIZE / RB_LEN_TIME_EXTEND) | ||
409 | |||
410 | int ring_buffer_print_page_header(struct trace_seq *s) | 444 | int ring_buffer_print_page_header(struct trace_seq *s) |
411 | { | 445 | { |
412 | struct buffer_data_page field; | 446 | struct buffer_data_page field; |
@@ -1546,6 +1580,25 @@ static void rb_inc_iter(struct ring_buffer_iter *iter) | |||
1546 | iter->head = 0; | 1580 | iter->head = 0; |
1547 | } | 1581 | } |
1548 | 1582 | ||
1583 | /* Slow path, do not inline */ | ||
1584 | static noinline struct ring_buffer_event * | ||
1585 | rb_add_time_stamp(struct ring_buffer_event *event, u64 delta) | ||
1586 | { | ||
1587 | event->type_len = RINGBUF_TYPE_TIME_EXTEND; | ||
1588 | |||
1589 | /* Not the first event on the page? */ | ||
1590 | if (rb_event_index(event)) { | ||
1591 | event->time_delta = delta & TS_MASK; | ||
1592 | event->array[0] = delta >> TS_SHIFT; | ||
1593 | } else { | ||
1594 | /* nope, just zero it */ | ||
1595 | event->time_delta = 0; | ||
1596 | event->array[0] = 0; | ||
1597 | } | ||
1598 | |||
1599 | return skip_time_extend(event); | ||
1600 | } | ||
1601 | |||
1549 | /** | 1602 | /** |
1550 | * ring_buffer_update_event - update event type and data | 1603 | * ring_buffer_update_event - update event type and data |
1551 | * @event: the even to update | 1604 | * @event: the even to update |
@@ -1558,28 +1611,31 @@ static void rb_inc_iter(struct ring_buffer_iter *iter) | |||
1558 | * data field. | 1611 | * data field. |
1559 | */ | 1612 | */ |
1560 | static void | 1613 | static void |
1561 | rb_update_event(struct ring_buffer_event *event, | 1614 | rb_update_event(struct ring_buffer_per_cpu *cpu_buffer, |
1562 | unsigned type, unsigned length) | 1615 | struct ring_buffer_event *event, unsigned length, |
1616 | int add_timestamp, u64 delta) | ||
1563 | { | 1617 | { |
1564 | event->type_len = type; | 1618 | /* Only a commit updates the timestamp */ |
1565 | 1619 | if (unlikely(!rb_event_is_commit(cpu_buffer, event))) | |
1566 | switch (type) { | 1620 | delta = 0; |
1567 | |||
1568 | case RINGBUF_TYPE_PADDING: | ||
1569 | case RINGBUF_TYPE_TIME_EXTEND: | ||
1570 | case RINGBUF_TYPE_TIME_STAMP: | ||
1571 | break; | ||
1572 | 1621 | ||
1573 | case 0: | 1622 | /* |
1574 | length -= RB_EVNT_HDR_SIZE; | 1623 | * If we need to add a timestamp, then we |
1575 | if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT) | 1624 | * add it to the start of the resevered space. |
1576 | event->array[0] = length; | 1625 | */ |
1577 | else | 1626 | if (unlikely(add_timestamp)) { |
1578 | event->type_len = DIV_ROUND_UP(length, RB_ALIGNMENT); | 1627 | event = rb_add_time_stamp(event, delta); |
1579 | break; | 1628 | length -= RB_LEN_TIME_EXTEND; |
1580 | default: | 1629 | delta = 0; |
1581 | BUG(); | ||
1582 | } | 1630 | } |
1631 | |||
1632 | event->time_delta = delta; | ||
1633 | length -= RB_EVNT_HDR_SIZE; | ||
1634 | if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT) { | ||
1635 | event->type_len = 0; | ||
1636 | event->array[0] = length; | ||
1637 | } else | ||
1638 | event->type_len = DIV_ROUND_UP(length, RB_ALIGNMENT); | ||
1583 | } | 1639 | } |
1584 | 1640 | ||
1585 | /* | 1641 | /* |
@@ -1823,10 +1879,13 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer, | |||
1823 | local_sub(length, &tail_page->write); | 1879 | local_sub(length, &tail_page->write); |
1824 | } | 1880 | } |
1825 | 1881 | ||
1826 | static struct ring_buffer_event * | 1882 | /* |
1883 | * This is the slow path, force gcc not to inline it. | ||
1884 | */ | ||
1885 | static noinline struct ring_buffer_event * | ||
1827 | rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer, | 1886 | rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer, |
1828 | unsigned long length, unsigned long tail, | 1887 | unsigned long length, unsigned long tail, |
1829 | struct buffer_page *tail_page, u64 *ts) | 1888 | struct buffer_page *tail_page, u64 ts) |
1830 | { | 1889 | { |
1831 | struct buffer_page *commit_page = cpu_buffer->commit_page; | 1890 | struct buffer_page *commit_page = cpu_buffer->commit_page; |
1832 | struct ring_buffer *buffer = cpu_buffer->buffer; | 1891 | struct ring_buffer *buffer = cpu_buffer->buffer; |
@@ -1909,8 +1968,8 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer, | |||
1909 | * Nested commits always have zero deltas, so | 1968 | * Nested commits always have zero deltas, so |
1910 | * just reread the time stamp | 1969 | * just reread the time stamp |
1911 | */ | 1970 | */ |
1912 | *ts = rb_time_stamp(buffer); | 1971 | ts = rb_time_stamp(buffer); |
1913 | next_page->page->time_stamp = *ts; | 1972 | next_page->page->time_stamp = ts; |
1914 | } | 1973 | } |
1915 | 1974 | ||
1916 | out_again: | 1975 | out_again: |
@@ -1929,12 +1988,21 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer, | |||
1929 | 1988 | ||
1930 | static struct ring_buffer_event * | 1989 | static struct ring_buffer_event * |
1931 | __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, | 1990 | __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, |
1932 | unsigned type, unsigned long length, u64 *ts) | 1991 | unsigned long length, u64 ts, |
1992 | u64 delta, int add_timestamp) | ||
1933 | { | 1993 | { |
1934 | struct buffer_page *tail_page; | 1994 | struct buffer_page *tail_page; |
1935 | struct ring_buffer_event *event; | 1995 | struct ring_buffer_event *event; |
1936 | unsigned long tail, write; | 1996 | unsigned long tail, write; |
1937 | 1997 | ||
1998 | /* | ||
1999 | * If the time delta since the last event is too big to | ||
2000 | * hold in the time field of the event, then we append a | ||
2001 | * TIME EXTEND event ahead of the data event. | ||
2002 | */ | ||
2003 | if (unlikely(add_timestamp)) | ||
2004 | length += RB_LEN_TIME_EXTEND; | ||
2005 | |||
1938 | tail_page = cpu_buffer->tail_page; | 2006 | tail_page = cpu_buffer->tail_page; |
1939 | write = local_add_return(length, &tail_page->write); | 2007 | write = local_add_return(length, &tail_page->write); |
1940 | 2008 | ||
@@ -1943,7 +2011,7 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, | |||
1943 | tail = write - length; | 2011 | tail = write - length; |
1944 | 2012 | ||
1945 | /* See if we shot pass the end of this buffer page */ | 2013 | /* See if we shot pass the end of this buffer page */ |
1946 | if (write > BUF_PAGE_SIZE) | 2014 | if (unlikely(write > BUF_PAGE_SIZE)) |
1947 | return rb_move_tail(cpu_buffer, length, tail, | 2015 | return rb_move_tail(cpu_buffer, length, tail, |
1948 | tail_page, ts); | 2016 | tail_page, ts); |
1949 | 2017 | ||
@@ -1951,18 +2019,16 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, | |||
1951 | 2019 | ||
1952 | event = __rb_page_index(tail_page, tail); | 2020 | event = __rb_page_index(tail_page, tail); |
1953 | kmemcheck_annotate_bitfield(event, bitfield); | 2021 | kmemcheck_annotate_bitfield(event, bitfield); |
1954 | rb_update_event(event, type, length); | 2022 | rb_update_event(cpu_buffer, event, length, add_timestamp, delta); |
1955 | 2023 | ||
1956 | /* The passed in type is zero for DATA */ | 2024 | local_inc(&tail_page->entries); |
1957 | if (likely(!type)) | ||
1958 | local_inc(&tail_page->entries); | ||
1959 | 2025 | ||
1960 | /* | 2026 | /* |
1961 | * If this is the first commit on the page, then update | 2027 | * If this is the first commit on the page, then update |
1962 | * its timestamp. | 2028 | * its timestamp. |
1963 | */ | 2029 | */ |
1964 | if (!tail) | 2030 | if (!tail) |
1965 | tail_page->page->time_stamp = *ts; | 2031 | tail_page->page->time_stamp = ts; |
1966 | 2032 | ||
1967 | return event; | 2033 | return event; |
1968 | } | 2034 | } |
@@ -1977,7 +2043,7 @@ rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer, | |||
1977 | unsigned long addr; | 2043 | unsigned long addr; |
1978 | 2044 | ||
1979 | new_index = rb_event_index(event); | 2045 | new_index = rb_event_index(event); |
1980 | old_index = new_index + rb_event_length(event); | 2046 | old_index = new_index + rb_event_ts_length(event); |
1981 | addr = (unsigned long)event; | 2047 | addr = (unsigned long)event; |
1982 | addr &= PAGE_MASK; | 2048 | addr &= PAGE_MASK; |
1983 | 2049 | ||
@@ -2003,76 +2069,13 @@ rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer, | |||
2003 | return 0; | 2069 | return 0; |
2004 | } | 2070 | } |
2005 | 2071 | ||
2006 | static int | ||
2007 | rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer, | ||
2008 | u64 *ts, u64 *delta) | ||
2009 | { | ||
2010 | struct ring_buffer_event *event; | ||
2011 | int ret; | ||
2012 | |||
2013 | WARN_ONCE(*delta > (1ULL << 59), | ||
2014 | KERN_WARNING "Delta way too big! %llu ts=%llu write stamp = %llu\n", | ||
2015 | (unsigned long long)*delta, | ||
2016 | (unsigned long long)*ts, | ||
2017 | (unsigned long long)cpu_buffer->write_stamp); | ||
2018 | |||
2019 | /* | ||
2020 | * The delta is too big, we to add a | ||
2021 | * new timestamp. | ||
2022 | */ | ||
2023 | event = __rb_reserve_next(cpu_buffer, | ||
2024 | RINGBUF_TYPE_TIME_EXTEND, | ||
2025 | RB_LEN_TIME_EXTEND, | ||
2026 | ts); | ||
2027 | if (!event) | ||
2028 | return -EBUSY; | ||
2029 | |||
2030 | if (PTR_ERR(event) == -EAGAIN) | ||
2031 | return -EAGAIN; | ||
2032 | |||
2033 | /* Only a commited time event can update the write stamp */ | ||
2034 | if (rb_event_is_commit(cpu_buffer, event)) { | ||
2035 | /* | ||
2036 | * If this is the first on the page, then it was | ||
2037 | * updated with the page itself. Try to discard it | ||
2038 | * and if we can't just make it zero. | ||
2039 | */ | ||
2040 | if (rb_event_index(event)) { | ||
2041 | event->time_delta = *delta & TS_MASK; | ||
2042 | event->array[0] = *delta >> TS_SHIFT; | ||
2043 | } else { | ||
2044 | /* try to discard, since we do not need this */ | ||
2045 | if (!rb_try_to_discard(cpu_buffer, event)) { | ||
2046 | /* nope, just zero it */ | ||
2047 | event->time_delta = 0; | ||
2048 | event->array[0] = 0; | ||
2049 | } | ||
2050 | } | ||
2051 | cpu_buffer->write_stamp = *ts; | ||
2052 | /* let the caller know this was the commit */ | ||
2053 | ret = 1; | ||
2054 | } else { | ||
2055 | /* Try to discard the event */ | ||
2056 | if (!rb_try_to_discard(cpu_buffer, event)) { | ||
2057 | /* Darn, this is just wasted space */ | ||
2058 | event->time_delta = 0; | ||
2059 | event->array[0] = 0; | ||
2060 | } | ||
2061 | ret = 0; | ||
2062 | } | ||
2063 | |||
2064 | *delta = 0; | ||
2065 | |||
2066 | return ret; | ||
2067 | } | ||
2068 | |||
2069 | static void rb_start_commit(struct ring_buffer_per_cpu *cpu_buffer) | 2072 | static void rb_start_commit(struct ring_buffer_per_cpu *cpu_buffer) |
2070 | { | 2073 | { |
2071 | local_inc(&cpu_buffer->committing); | 2074 | local_inc(&cpu_buffer->committing); |
2072 | local_inc(&cpu_buffer->commits); | 2075 | local_inc(&cpu_buffer->commits); |
2073 | } | 2076 | } |
2074 | 2077 | ||
2075 | static void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer) | 2078 | static inline void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer) |
2076 | { | 2079 | { |
2077 | unsigned long commits; | 2080 | unsigned long commits; |
2078 | 2081 | ||
@@ -2110,9 +2113,10 @@ rb_reserve_next_event(struct ring_buffer *buffer, | |||
2110 | unsigned long length) | 2113 | unsigned long length) |
2111 | { | 2114 | { |
2112 | struct ring_buffer_event *event; | 2115 | struct ring_buffer_event *event; |
2113 | u64 ts, delta = 0; | 2116 | u64 ts, delta; |
2114 | int commit = 0; | ||
2115 | int nr_loops = 0; | 2117 | int nr_loops = 0; |
2118 | int add_timestamp; | ||
2119 | u64 diff; | ||
2116 | 2120 | ||
2117 | rb_start_commit(cpu_buffer); | 2121 | rb_start_commit(cpu_buffer); |
2118 | 2122 | ||
@@ -2133,6 +2137,9 @@ rb_reserve_next_event(struct ring_buffer *buffer, | |||
2133 | 2137 | ||
2134 | length = rb_calculate_event_length(length); | 2138 | length = rb_calculate_event_length(length); |
2135 | again: | 2139 | again: |
2140 | add_timestamp = 0; | ||
2141 | delta = 0; | ||
2142 | |||
2136 | /* | 2143 | /* |
2137 | * We allow for interrupts to reenter here and do a trace. | 2144 | * We allow for interrupts to reenter here and do a trace. |
2138 | * If one does, it will cause this original code to loop | 2145 | * If one does, it will cause this original code to loop |
@@ -2146,56 +2153,32 @@ rb_reserve_next_event(struct ring_buffer *buffer, | |||
2146 | goto out_fail; | 2153 | goto out_fail; |
2147 | 2154 | ||
2148 | ts = rb_time_stamp(cpu_buffer->buffer); | 2155 | ts = rb_time_stamp(cpu_buffer->buffer); |
2156 | diff = ts - cpu_buffer->write_stamp; | ||
2149 | 2157 | ||
2150 | /* | 2158 | /* make sure this diff is calculated here */ |
2151 | * Only the first commit can update the timestamp. | 2159 | barrier(); |
2152 | * Yes there is a race here. If an interrupt comes in | ||
2153 | * just after the conditional and it traces too, then it | ||
2154 | * will also check the deltas. More than one timestamp may | ||
2155 | * also be made. But only the entry that did the actual | ||
2156 | * commit will be something other than zero. | ||
2157 | */ | ||
2158 | if (likely(cpu_buffer->tail_page == cpu_buffer->commit_page && | ||
2159 | rb_page_write(cpu_buffer->tail_page) == | ||
2160 | rb_commit_index(cpu_buffer))) { | ||
2161 | u64 diff; | ||
2162 | |||
2163 | diff = ts - cpu_buffer->write_stamp; | ||
2164 | |||
2165 | /* make sure this diff is calculated here */ | ||
2166 | barrier(); | ||
2167 | |||
2168 | /* Did the write stamp get updated already? */ | ||
2169 | if (unlikely(ts < cpu_buffer->write_stamp)) | ||
2170 | goto get_event; | ||
2171 | 2160 | ||
2161 | /* Did the write stamp get updated already? */ | ||
2162 | if (likely(ts >= cpu_buffer->write_stamp)) { | ||
2172 | delta = diff; | 2163 | delta = diff; |
2173 | if (unlikely(test_time_stamp(delta))) { | 2164 | if (unlikely(test_time_stamp(delta))) { |
2174 | 2165 | WARN_ONCE(delta > (1ULL << 59), | |
2175 | commit = rb_add_time_stamp(cpu_buffer, &ts, &delta); | 2166 | KERN_WARNING "Delta way too big! %llu ts=%llu write stamp = %llu\n", |
2176 | if (commit == -EBUSY) | 2167 | (unsigned long long)delta, |
2177 | goto out_fail; | 2168 | (unsigned long long)ts, |
2178 | 2169 | (unsigned long long)cpu_buffer->write_stamp); | |
2179 | if (commit == -EAGAIN) | 2170 | add_timestamp = 1; |
2180 | goto again; | ||
2181 | |||
2182 | RB_WARN_ON(cpu_buffer, commit < 0); | ||
2183 | } | 2171 | } |
2184 | } | 2172 | } |
2185 | 2173 | ||
2186 | get_event: | 2174 | event = __rb_reserve_next(cpu_buffer, length, ts, |
2187 | event = __rb_reserve_next(cpu_buffer, 0, length, &ts); | 2175 | delta, add_timestamp); |
2188 | if (unlikely(PTR_ERR(event) == -EAGAIN)) | 2176 | if (unlikely(PTR_ERR(event) == -EAGAIN)) |
2189 | goto again; | 2177 | goto again; |
2190 | 2178 | ||
2191 | if (!event) | 2179 | if (!event) |
2192 | goto out_fail; | 2180 | goto out_fail; |
2193 | 2181 | ||
2194 | if (!rb_event_is_commit(cpu_buffer, event)) | ||
2195 | delta = 0; | ||
2196 | |||
2197 | event->time_delta = delta; | ||
2198 | |||
2199 | return event; | 2182 | return event; |
2200 | 2183 | ||
2201 | out_fail: | 2184 | out_fail: |
@@ -2207,13 +2190,9 @@ rb_reserve_next_event(struct ring_buffer *buffer, | |||
2207 | 2190 | ||
2208 | #define TRACE_RECURSIVE_DEPTH 16 | 2191 | #define TRACE_RECURSIVE_DEPTH 16 |
2209 | 2192 | ||
2210 | static int trace_recursive_lock(void) | 2193 | /* Keep this code out of the fast path cache */ |
2194 | static noinline void trace_recursive_fail(void) | ||
2211 | { | 2195 | { |
2212 | current->trace_recursion++; | ||
2213 | |||
2214 | if (likely(current->trace_recursion < TRACE_RECURSIVE_DEPTH)) | ||
2215 | return 0; | ||
2216 | |||
2217 | /* Disable all tracing before we do anything else */ | 2196 | /* Disable all tracing before we do anything else */ |
2218 | tracing_off_permanent(); | 2197 | tracing_off_permanent(); |
2219 | 2198 | ||
@@ -2225,10 +2204,21 @@ static int trace_recursive_lock(void) | |||
2225 | in_nmi()); | 2204 | in_nmi()); |
2226 | 2205 | ||
2227 | WARN_ON_ONCE(1); | 2206 | WARN_ON_ONCE(1); |
2207 | } | ||
2208 | |||
2209 | static inline int trace_recursive_lock(void) | ||
2210 | { | ||
2211 | current->trace_recursion++; | ||
2212 | |||
2213 | if (likely(current->trace_recursion < TRACE_RECURSIVE_DEPTH)) | ||
2214 | return 0; | ||
2215 | |||
2216 | trace_recursive_fail(); | ||
2217 | |||
2228 | return -1; | 2218 | return -1; |
2229 | } | 2219 | } |
2230 | 2220 | ||
2231 | static void trace_recursive_unlock(void) | 2221 | static inline void trace_recursive_unlock(void) |
2232 | { | 2222 | { |
2233 | WARN_ON_ONCE(!current->trace_recursion); | 2223 | WARN_ON_ONCE(!current->trace_recursion); |
2234 | 2224 | ||
@@ -2308,12 +2298,28 @@ static void | |||
2308 | rb_update_write_stamp(struct ring_buffer_per_cpu *cpu_buffer, | 2298 | rb_update_write_stamp(struct ring_buffer_per_cpu *cpu_buffer, |
2309 | struct ring_buffer_event *event) | 2299 | struct ring_buffer_event *event) |
2310 | { | 2300 | { |
2301 | u64 delta; | ||
2302 | |||
2311 | /* | 2303 | /* |
2312 | * The event first in the commit queue updates the | 2304 | * The event first in the commit queue updates the |
2313 | * time stamp. | 2305 | * time stamp. |
2314 | */ | 2306 | */ |
2315 | if (rb_event_is_commit(cpu_buffer, event)) | 2307 | if (rb_event_is_commit(cpu_buffer, event)) { |
2316 | cpu_buffer->write_stamp += event->time_delta; | 2308 | /* |
2309 | * A commit event that is first on a page | ||
2310 | * updates the write timestamp with the page stamp | ||
2311 | */ | ||
2312 | if (!rb_event_index(event)) | ||
2313 | cpu_buffer->write_stamp = | ||
2314 | cpu_buffer->commit_page->page->time_stamp; | ||
2315 | else if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) { | ||
2316 | delta = event->array[0]; | ||
2317 | delta <<= TS_SHIFT; | ||
2318 | delta += event->time_delta; | ||
2319 | cpu_buffer->write_stamp += delta; | ||
2320 | } else | ||
2321 | cpu_buffer->write_stamp += event->time_delta; | ||
2322 | } | ||
2317 | } | 2323 | } |
2318 | 2324 | ||
2319 | static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer, | 2325 | static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer, |
@@ -2353,6 +2359,9 @@ EXPORT_SYMBOL_GPL(ring_buffer_unlock_commit); | |||
2353 | 2359 | ||
2354 | static inline void rb_event_discard(struct ring_buffer_event *event) | 2360 | static inline void rb_event_discard(struct ring_buffer_event *event) |
2355 | { | 2361 | { |
2362 | if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) | ||
2363 | event = skip_time_extend(event); | ||
2364 | |||
2356 | /* array[0] holds the actual length for the discarded event */ | 2365 | /* array[0] holds the actual length for the discarded event */ |
2357 | event->array[0] = rb_event_data_length(event) - RB_EVNT_HDR_SIZE; | 2366 | event->array[0] = rb_event_data_length(event) - RB_EVNT_HDR_SIZE; |
2358 | event->type_len = RINGBUF_TYPE_PADDING; | 2367 | event->type_len = RINGBUF_TYPE_PADDING; |
@@ -3049,12 +3058,12 @@ rb_buffer_peek(struct ring_buffer_per_cpu *cpu_buffer, u64 *ts, | |||
3049 | 3058 | ||
3050 | again: | 3059 | again: |
3051 | /* | 3060 | /* |
3052 | * We repeat when a timestamp is encountered. It is possible | 3061 | * We repeat when a time extend is encountered. |
3053 | * to get multiple timestamps from an interrupt entering just | 3062 | * Since the time extend is always attached to a data event, |
3054 | * as one timestamp is about to be written, or from discarded | 3063 | * we should never loop more than once. |
3055 | * commits. The most that we can have is the number on a single page. | 3064 | * (We never hit the following condition more than twice). |
3056 | */ | 3065 | */ |
3057 | if (RB_WARN_ON(cpu_buffer, ++nr_loops > RB_TIMESTAMPS_PER_PAGE)) | 3066 | if (RB_WARN_ON(cpu_buffer, ++nr_loops > 2)) |
3058 | return NULL; | 3067 | return NULL; |
3059 | 3068 | ||
3060 | reader = rb_get_reader_page(cpu_buffer); | 3069 | reader = rb_get_reader_page(cpu_buffer); |
@@ -3130,14 +3139,12 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts) | |||
3130 | return NULL; | 3139 | return NULL; |
3131 | 3140 | ||
3132 | /* | 3141 | /* |
3133 | * We repeat when a timestamp is encountered. | 3142 | * We repeat when a time extend is encountered. |
3134 | * We can get multiple timestamps by nested interrupts or also | 3143 | * Since the time extend is always attached to a data event, |
3135 | * if filtering is on (discarding commits). Since discarding | 3144 | * we should never loop more than once. |
3136 | * commits can be frequent we can get a lot of timestamps. | 3145 | * (We never hit the following condition more than twice). |
3137 | * But we limit them by not adding timestamps if they begin | ||
3138 | * at the start of a page. | ||
3139 | */ | 3146 | */ |
3140 | if (RB_WARN_ON(cpu_buffer, ++nr_loops > RB_TIMESTAMPS_PER_PAGE)) | 3147 | if (RB_WARN_ON(cpu_buffer, ++nr_loops > 2)) |
3141 | return NULL; | 3148 | return NULL; |
3142 | 3149 | ||
3143 | if (rb_per_cpu_empty(cpu_buffer)) | 3150 | if (rb_per_cpu_empty(cpu_buffer)) |
@@ -3835,7 +3842,8 @@ int ring_buffer_read_page(struct ring_buffer *buffer, | |||
3835 | if (len > (commit - read)) | 3842 | if (len > (commit - read)) |
3836 | len = (commit - read); | 3843 | len = (commit - read); |
3837 | 3844 | ||
3838 | size = rb_event_length(event); | 3845 | /* Always keep the time extend and data together */ |
3846 | size = rb_event_ts_length(event); | ||
3839 | 3847 | ||
3840 | if (len < size) | 3848 | if (len < size) |
3841 | goto out_unlock; | 3849 | goto out_unlock; |
@@ -3857,7 +3865,8 @@ int ring_buffer_read_page(struct ring_buffer *buffer, | |||
3857 | break; | 3865 | break; |
3858 | 3866 | ||
3859 | event = rb_reader_event(cpu_buffer); | 3867 | event = rb_reader_event(cpu_buffer); |
3860 | size = rb_event_length(event); | 3868 | /* Always keep the time extend and data together */ |
3869 | size = rb_event_ts_length(event); | ||
3861 | } while (len > size); | 3870 | } while (len > size); |
3862 | 3871 | ||
3863 | /* update bpage */ | 3872 | /* update bpage */ |
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 001bcd2ccf4a..82d9b8106cd0 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c | |||
@@ -3996,13 +3996,9 @@ static void tracing_init_debugfs_percpu(long cpu) | |||
3996 | { | 3996 | { |
3997 | struct dentry *d_percpu = tracing_dentry_percpu(); | 3997 | struct dentry *d_percpu = tracing_dentry_percpu(); |
3998 | struct dentry *d_cpu; | 3998 | struct dentry *d_cpu; |
3999 | /* strlen(cpu) + MAX(log10(cpu)) + '\0' */ | 3999 | char cpu_dir[30]; /* 30 characters should be more than enough */ |
4000 | char cpu_dir[7]; | ||
4001 | 4000 | ||
4002 | if (cpu > 999 || cpu < 0) | 4001 | snprintf(cpu_dir, 30, "cpu%ld", cpu); |
4003 | return; | ||
4004 | |||
4005 | sprintf(cpu_dir, "cpu%ld", cpu); | ||
4006 | d_cpu = debugfs_create_dir(cpu_dir, d_percpu); | 4002 | d_cpu = debugfs_create_dir(cpu_dir, d_percpu); |
4007 | if (!d_cpu) { | 4003 | if (!d_cpu) { |
4008 | pr_warning("Could not create debugfs '%s' entry\n", cpu_dir); | 4004 | pr_warning("Could not create debugfs '%s' entry\n", cpu_dir); |
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index b8d2852baa4a..2dec9bcde8b4 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c | |||
@@ -31,7 +31,6 @@ | |||
31 | #include <linux/perf_event.h> | 31 | #include <linux/perf_event.h> |
32 | #include <linux/stringify.h> | 32 | #include <linux/stringify.h> |
33 | #include <linux/limits.h> | 33 | #include <linux/limits.h> |
34 | #include <linux/uaccess.h> | ||
35 | #include <asm/bitsperlong.h> | 34 | #include <asm/bitsperlong.h> |
36 | 35 | ||
37 | #include "trace.h" | 36 | #include "trace.h" |
diff --git a/kernel/tsacct.c b/kernel/tsacct.c index 0a67e041edf8..24dc60d9fa1f 100644 --- a/kernel/tsacct.c +++ b/kernel/tsacct.c | |||
@@ -63,12 +63,10 @@ void bacct_add_tsk(struct taskstats *stats, struct task_struct *tsk) | |||
63 | stats->ac_ppid = pid_alive(tsk) ? | 63 | stats->ac_ppid = pid_alive(tsk) ? |
64 | rcu_dereference(tsk->real_parent)->tgid : 0; | 64 | rcu_dereference(tsk->real_parent)->tgid : 0; |
65 | rcu_read_unlock(); | 65 | rcu_read_unlock(); |
66 | stats->ac_utime = cputime_to_msecs(tsk->utime) * USEC_PER_MSEC; | 66 | stats->ac_utime = cputime_to_usecs(tsk->utime); |
67 | stats->ac_stime = cputime_to_msecs(tsk->stime) * USEC_PER_MSEC; | 67 | stats->ac_stime = cputime_to_usecs(tsk->stime); |
68 | stats->ac_utimescaled = | 68 | stats->ac_utimescaled = cputime_to_usecs(tsk->utimescaled); |
69 | cputime_to_msecs(tsk->utimescaled) * USEC_PER_MSEC; | 69 | stats->ac_stimescaled = cputime_to_usecs(tsk->stimescaled); |
70 | stats->ac_stimescaled = | ||
71 | cputime_to_msecs(tsk->stimescaled) * USEC_PER_MSEC; | ||
72 | stats->ac_minflt = tsk->min_flt; | 70 | stats->ac_minflt = tsk->min_flt; |
73 | stats->ac_majflt = tsk->maj_flt; | 71 | stats->ac_majflt = tsk->maj_flt; |
74 | 72 | ||
diff --git a/kernel/watchdog.c b/kernel/watchdog.c index bafba687a6d8..6e3c41a4024c 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c | |||
@@ -43,7 +43,7 @@ static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved); | |||
43 | static DEFINE_PER_CPU(struct perf_event *, watchdog_ev); | 43 | static DEFINE_PER_CPU(struct perf_event *, watchdog_ev); |
44 | #endif | 44 | #endif |
45 | 45 | ||
46 | static int __initdata no_watchdog; | 46 | static int no_watchdog; |
47 | 47 | ||
48 | 48 | ||
49 | /* boot commands */ | 49 | /* boot commands */ |