diff options
author | Ingo Molnar <mingo@elte.hu> | 2010-11-18 07:22:14 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2010-11-18 07:22:26 -0500 |
commit | 92fd4d4d67b945c0766416284d4ab236b31542c4 (patch) | |
tree | 00b8b5f90748f752ccaba3dddbe271091d93543a /kernel | |
parent | fe7de49f9d4e53f24ec9ef762a503f70b562341c (diff) | |
parent | e53beacd23d9cb47590da6a7a7f6d417b941a994 (diff) |
Merge commit 'v2.6.37-rc2' into sched/core
Merge reason: Move to a .37-rc base.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel')
62 files changed, 1301 insertions, 911 deletions
diff --git a/kernel/audit.c b/kernel/audit.c index d96045789b54..77770a034d59 100644 --- a/kernel/audit.c +++ b/kernel/audit.c | |||
@@ -467,23 +467,16 @@ static int audit_prepare_user_tty(pid_t pid, uid_t loginuid, u32 sessionid) | |||
467 | struct task_struct *tsk; | 467 | struct task_struct *tsk; |
468 | int err; | 468 | int err; |
469 | 469 | ||
470 | read_lock(&tasklist_lock); | 470 | rcu_read_lock(); |
471 | tsk = find_task_by_vpid(pid); | 471 | tsk = find_task_by_vpid(pid); |
472 | err = -ESRCH; | 472 | if (!tsk) { |
473 | if (!tsk) | 473 | rcu_read_unlock(); |
474 | goto out; | 474 | return -ESRCH; |
475 | err = 0; | 475 | } |
476 | 476 | get_task_struct(tsk); | |
477 | spin_lock_irq(&tsk->sighand->siglock); | 477 | rcu_read_unlock(); |
478 | if (!tsk->signal->audit_tty) | 478 | err = tty_audit_push_task(tsk, loginuid, sessionid); |
479 | err = -EPERM; | 479 | put_task_struct(tsk); |
480 | spin_unlock_irq(&tsk->sighand->siglock); | ||
481 | if (err) | ||
482 | goto out; | ||
483 | |||
484 | tty_audit_push_task(tsk, loginuid, sessionid); | ||
485 | out: | ||
486 | read_unlock(&tasklist_lock); | ||
487 | return err; | 480 | return err; |
488 | } | 481 | } |
489 | 482 | ||
@@ -506,7 +499,7 @@ int audit_send_list(void *_dest) | |||
506 | } | 499 | } |
507 | 500 | ||
508 | struct sk_buff *audit_make_reply(int pid, int seq, int type, int done, | 501 | struct sk_buff *audit_make_reply(int pid, int seq, int type, int done, |
509 | int multi, void *payload, int size) | 502 | int multi, const void *payload, int size) |
510 | { | 503 | { |
511 | struct sk_buff *skb; | 504 | struct sk_buff *skb; |
512 | struct nlmsghdr *nlh; | 505 | struct nlmsghdr *nlh; |
@@ -555,8 +548,8 @@ static int audit_send_reply_thread(void *arg) | |||
555 | * Allocates an skb, builds the netlink message, and sends it to the pid. | 548 | * Allocates an skb, builds the netlink message, and sends it to the pid. |
556 | * No failure notifications. | 549 | * No failure notifications. |
557 | */ | 550 | */ |
558 | void audit_send_reply(int pid, int seq, int type, int done, int multi, | 551 | static void audit_send_reply(int pid, int seq, int type, int done, int multi, |
559 | void *payload, int size) | 552 | const void *payload, int size) |
560 | { | 553 | { |
561 | struct sk_buff *skb; | 554 | struct sk_buff *skb; |
562 | struct task_struct *tsk; | 555 | struct task_struct *tsk; |
@@ -880,40 +873,40 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) | |||
880 | case AUDIT_TTY_GET: { | 873 | case AUDIT_TTY_GET: { |
881 | struct audit_tty_status s; | 874 | struct audit_tty_status s; |
882 | struct task_struct *tsk; | 875 | struct task_struct *tsk; |
876 | unsigned long flags; | ||
883 | 877 | ||
884 | read_lock(&tasklist_lock); | 878 | rcu_read_lock(); |
885 | tsk = find_task_by_vpid(pid); | 879 | tsk = find_task_by_vpid(pid); |
886 | if (!tsk) | 880 | if (tsk && lock_task_sighand(tsk, &flags)) { |
887 | err = -ESRCH; | ||
888 | else { | ||
889 | spin_lock_irq(&tsk->sighand->siglock); | ||
890 | s.enabled = tsk->signal->audit_tty != 0; | 881 | s.enabled = tsk->signal->audit_tty != 0; |
891 | spin_unlock_irq(&tsk->sighand->siglock); | 882 | unlock_task_sighand(tsk, &flags); |
892 | } | 883 | } else |
893 | read_unlock(&tasklist_lock); | 884 | err = -ESRCH; |
894 | audit_send_reply(NETLINK_CB(skb).pid, seq, AUDIT_TTY_GET, 0, 0, | 885 | rcu_read_unlock(); |
895 | &s, sizeof(s)); | 886 | |
887 | if (!err) | ||
888 | audit_send_reply(NETLINK_CB(skb).pid, seq, | ||
889 | AUDIT_TTY_GET, 0, 0, &s, sizeof(s)); | ||
896 | break; | 890 | break; |
897 | } | 891 | } |
898 | case AUDIT_TTY_SET: { | 892 | case AUDIT_TTY_SET: { |
899 | struct audit_tty_status *s; | 893 | struct audit_tty_status *s; |
900 | struct task_struct *tsk; | 894 | struct task_struct *tsk; |
895 | unsigned long flags; | ||
901 | 896 | ||
902 | if (nlh->nlmsg_len < sizeof(struct audit_tty_status)) | 897 | if (nlh->nlmsg_len < sizeof(struct audit_tty_status)) |
903 | return -EINVAL; | 898 | return -EINVAL; |
904 | s = data; | 899 | s = data; |
905 | if (s->enabled != 0 && s->enabled != 1) | 900 | if (s->enabled != 0 && s->enabled != 1) |
906 | return -EINVAL; | 901 | return -EINVAL; |
907 | read_lock(&tasklist_lock); | 902 | rcu_read_lock(); |
908 | tsk = find_task_by_vpid(pid); | 903 | tsk = find_task_by_vpid(pid); |
909 | if (!tsk) | 904 | if (tsk && lock_task_sighand(tsk, &flags)) { |
910 | err = -ESRCH; | ||
911 | else { | ||
912 | spin_lock_irq(&tsk->sighand->siglock); | ||
913 | tsk->signal->audit_tty = s->enabled != 0; | 905 | tsk->signal->audit_tty = s->enabled != 0; |
914 | spin_unlock_irq(&tsk->sighand->siglock); | 906 | unlock_task_sighand(tsk, &flags); |
915 | } | 907 | } else |
916 | read_unlock(&tasklist_lock); | 908 | err = -ESRCH; |
909 | rcu_read_unlock(); | ||
917 | break; | 910 | break; |
918 | } | 911 | } |
919 | default: | 912 | default: |
diff --git a/kernel/audit.h b/kernel/audit.h index f7206db4e13d..91e7071c4d2c 100644 --- a/kernel/audit.h +++ b/kernel/audit.h | |||
@@ -84,10 +84,7 @@ extern int audit_compare_dname_path(const char *dname, const char *path, | |||
84 | int *dirlen); | 84 | int *dirlen); |
85 | extern struct sk_buff * audit_make_reply(int pid, int seq, int type, | 85 | extern struct sk_buff * audit_make_reply(int pid, int seq, int type, |
86 | int done, int multi, | 86 | int done, int multi, |
87 | void *payload, int size); | 87 | const void *payload, int size); |
88 | extern void audit_send_reply(int pid, int seq, int type, | ||
89 | int done, int multi, | ||
90 | void *payload, int size); | ||
91 | extern void audit_panic(const char *message); | 88 | extern void audit_panic(const char *message); |
92 | 89 | ||
93 | struct audit_netlink_list { | 90 | struct audit_netlink_list { |
diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c index 7f18d3a4527e..37b2bea170c8 100644 --- a/kernel/audit_tree.c +++ b/kernel/audit_tree.c | |||
@@ -223,7 +223,7 @@ static void untag_chunk(struct node *p) | |||
223 | { | 223 | { |
224 | struct audit_chunk *chunk = find_chunk(p); | 224 | struct audit_chunk *chunk = find_chunk(p); |
225 | struct fsnotify_mark *entry = &chunk->mark; | 225 | struct fsnotify_mark *entry = &chunk->mark; |
226 | struct audit_chunk *new; | 226 | struct audit_chunk *new = NULL; |
227 | struct audit_tree *owner; | 227 | struct audit_tree *owner; |
228 | int size = chunk->count - 1; | 228 | int size = chunk->count - 1; |
229 | int i, j; | 229 | int i, j; |
@@ -232,9 +232,14 @@ static void untag_chunk(struct node *p) | |||
232 | 232 | ||
233 | spin_unlock(&hash_lock); | 233 | spin_unlock(&hash_lock); |
234 | 234 | ||
235 | if (size) | ||
236 | new = alloc_chunk(size); | ||
237 | |||
235 | spin_lock(&entry->lock); | 238 | spin_lock(&entry->lock); |
236 | if (chunk->dead || !entry->i.inode) { | 239 | if (chunk->dead || !entry->i.inode) { |
237 | spin_unlock(&entry->lock); | 240 | spin_unlock(&entry->lock); |
241 | if (new) | ||
242 | free_chunk(new); | ||
238 | goto out; | 243 | goto out; |
239 | } | 244 | } |
240 | 245 | ||
@@ -255,9 +260,9 @@ static void untag_chunk(struct node *p) | |||
255 | goto out; | 260 | goto out; |
256 | } | 261 | } |
257 | 262 | ||
258 | new = alloc_chunk(size); | ||
259 | if (!new) | 263 | if (!new) |
260 | goto Fallback; | 264 | goto Fallback; |
265 | |||
261 | fsnotify_duplicate_mark(&new->mark, entry); | 266 | fsnotify_duplicate_mark(&new->mark, entry); |
262 | if (fsnotify_add_mark(&new->mark, new->mark.group, new->mark.i.inode, NULL, 1)) { | 267 | if (fsnotify_add_mark(&new->mark, new->mark.group, new->mark.i.inode, NULL, 1)) { |
263 | free_chunk(new); | 268 | free_chunk(new); |
diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c index f0c9b2e7542d..d2e3c7866460 100644 --- a/kernel/audit_watch.c +++ b/kernel/audit_watch.c | |||
@@ -60,7 +60,7 @@ struct audit_parent { | |||
60 | }; | 60 | }; |
61 | 61 | ||
62 | /* fsnotify handle. */ | 62 | /* fsnotify handle. */ |
63 | struct fsnotify_group *audit_watch_group; | 63 | static struct fsnotify_group *audit_watch_group; |
64 | 64 | ||
65 | /* fsnotify events we care about. */ | 65 | /* fsnotify events we care about. */ |
66 | #define AUDIT_FS_WATCH (FS_MOVE | FS_CREATE | FS_DELETE | FS_DELETE_SELF |\ | 66 | #define AUDIT_FS_WATCH (FS_MOVE | FS_CREATE | FS_DELETE | FS_DELETE_SELF |\ |
@@ -123,7 +123,7 @@ void audit_put_watch(struct audit_watch *watch) | |||
123 | } | 123 | } |
124 | } | 124 | } |
125 | 125 | ||
126 | void audit_remove_watch(struct audit_watch *watch) | 126 | static void audit_remove_watch(struct audit_watch *watch) |
127 | { | 127 | { |
128 | list_del(&watch->wlist); | 128 | list_del(&watch->wlist); |
129 | audit_put_parent(watch->parent); | 129 | audit_put_parent(watch->parent); |
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c index eb7675499fb5..add2819af71b 100644 --- a/kernel/auditfilter.c +++ b/kernel/auditfilter.c | |||
@@ -1252,6 +1252,18 @@ static int audit_filter_user_rules(struct netlink_skb_parms *cb, | |||
1252 | case AUDIT_LOGINUID: | 1252 | case AUDIT_LOGINUID: |
1253 | result = audit_comparator(cb->loginuid, f->op, f->val); | 1253 | result = audit_comparator(cb->loginuid, f->op, f->val); |
1254 | break; | 1254 | break; |
1255 | case AUDIT_SUBJ_USER: | ||
1256 | case AUDIT_SUBJ_ROLE: | ||
1257 | case AUDIT_SUBJ_TYPE: | ||
1258 | case AUDIT_SUBJ_SEN: | ||
1259 | case AUDIT_SUBJ_CLR: | ||
1260 | if (f->lsm_rule) | ||
1261 | result = security_audit_rule_match(cb->sid, | ||
1262 | f->type, | ||
1263 | f->op, | ||
1264 | f->lsm_rule, | ||
1265 | NULL); | ||
1266 | break; | ||
1255 | } | 1267 | } |
1256 | 1268 | ||
1257 | if (!result) | 1269 | if (!result) |
diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 1b31c130d034..f49a0318c2ed 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c | |||
@@ -241,6 +241,10 @@ struct audit_context { | |||
241 | pid_t pid; | 241 | pid_t pid; |
242 | struct audit_cap_data cap; | 242 | struct audit_cap_data cap; |
243 | } capset; | 243 | } capset; |
244 | struct { | ||
245 | int fd; | ||
246 | int flags; | ||
247 | } mmap; | ||
244 | }; | 248 | }; |
245 | int fds[2]; | 249 | int fds[2]; |
246 | 250 | ||
@@ -1305,6 +1309,10 @@ static void show_special(struct audit_context *context, int *call_panic) | |||
1305 | audit_log_cap(ab, "cap_pp", &context->capset.cap.permitted); | 1309 | audit_log_cap(ab, "cap_pp", &context->capset.cap.permitted); |
1306 | audit_log_cap(ab, "cap_pe", &context->capset.cap.effective); | 1310 | audit_log_cap(ab, "cap_pe", &context->capset.cap.effective); |
1307 | break; } | 1311 | break; } |
1312 | case AUDIT_MMAP: { | ||
1313 | audit_log_format(ab, "fd=%d flags=0x%x", context->mmap.fd, | ||
1314 | context->mmap.flags); | ||
1315 | break; } | ||
1308 | } | 1316 | } |
1309 | audit_log_end(ab); | 1317 | audit_log_end(ab); |
1310 | } | 1318 | } |
@@ -2476,6 +2484,14 @@ void __audit_log_capset(pid_t pid, | |||
2476 | context->type = AUDIT_CAPSET; | 2484 | context->type = AUDIT_CAPSET; |
2477 | } | 2485 | } |
2478 | 2486 | ||
2487 | void __audit_mmap_fd(int fd, int flags) | ||
2488 | { | ||
2489 | struct audit_context *context = current->audit_context; | ||
2490 | context->mmap.fd = fd; | ||
2491 | context->mmap.flags = flags; | ||
2492 | context->type = AUDIT_MMAP; | ||
2493 | } | ||
2494 | |||
2479 | /** | 2495 | /** |
2480 | * audit_core_dumps - record information about processes that end abnormally | 2496 | * audit_core_dumps - record information about processes that end abnormally |
2481 | * @signr: signal value | 2497 | * @signr: signal value |
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 291ba3d04bea..66a416b42c18 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
@@ -52,7 +52,6 @@ | |||
52 | #include <linux/cgroupstats.h> | 52 | #include <linux/cgroupstats.h> |
53 | #include <linux/hash.h> | 53 | #include <linux/hash.h> |
54 | #include <linux/namei.h> | 54 | #include <linux/namei.h> |
55 | #include <linux/smp_lock.h> | ||
56 | #include <linux/pid_namespace.h> | 55 | #include <linux/pid_namespace.h> |
57 | #include <linux/idr.h> | 56 | #include <linux/idr.h> |
58 | #include <linux/vmalloc.h> /* TODO: replace with more sophisticated array */ | 57 | #include <linux/vmalloc.h> /* TODO: replace with more sophisticated array */ |
@@ -244,6 +243,11 @@ static int notify_on_release(const struct cgroup *cgrp) | |||
244 | return test_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags); | 243 | return test_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags); |
245 | } | 244 | } |
246 | 245 | ||
246 | static int clone_children(const struct cgroup *cgrp) | ||
247 | { | ||
248 | return test_bit(CGRP_CLONE_CHILDREN, &cgrp->flags); | ||
249 | } | ||
250 | |||
247 | /* | 251 | /* |
248 | * for_each_subsys() allows you to iterate on each subsystem attached to | 252 | * for_each_subsys() allows you to iterate on each subsystem attached to |
249 | * an active hierarchy | 253 | * an active hierarchy |
@@ -778,6 +782,7 @@ static struct inode *cgroup_new_inode(mode_t mode, struct super_block *sb) | |||
778 | struct inode *inode = new_inode(sb); | 782 | struct inode *inode = new_inode(sb); |
779 | 783 | ||
780 | if (inode) { | 784 | if (inode) { |
785 | inode->i_ino = get_next_ino(); | ||
781 | inode->i_mode = mode; | 786 | inode->i_mode = mode; |
782 | inode->i_uid = current_fsuid(); | 787 | inode->i_uid = current_fsuid(); |
783 | inode->i_gid = current_fsgid(); | 788 | inode->i_gid = current_fsgid(); |
@@ -1040,6 +1045,8 @@ static int cgroup_show_options(struct seq_file *seq, struct vfsmount *vfs) | |||
1040 | seq_puts(seq, ",noprefix"); | 1045 | seq_puts(seq, ",noprefix"); |
1041 | if (strlen(root->release_agent_path)) | 1046 | if (strlen(root->release_agent_path)) |
1042 | seq_printf(seq, ",release_agent=%s", root->release_agent_path); | 1047 | seq_printf(seq, ",release_agent=%s", root->release_agent_path); |
1048 | if (clone_children(&root->top_cgroup)) | ||
1049 | seq_puts(seq, ",clone_children"); | ||
1043 | if (strlen(root->name)) | 1050 | if (strlen(root->name)) |
1044 | seq_printf(seq, ",name=%s", root->name); | 1051 | seq_printf(seq, ",name=%s", root->name); |
1045 | mutex_unlock(&cgroup_mutex); | 1052 | mutex_unlock(&cgroup_mutex); |
@@ -1050,6 +1057,7 @@ struct cgroup_sb_opts { | |||
1050 | unsigned long subsys_bits; | 1057 | unsigned long subsys_bits; |
1051 | unsigned long flags; | 1058 | unsigned long flags; |
1052 | char *release_agent; | 1059 | char *release_agent; |
1060 | bool clone_children; | ||
1053 | char *name; | 1061 | char *name; |
1054 | /* User explicitly requested empty subsystem */ | 1062 | /* User explicitly requested empty subsystem */ |
1055 | bool none; | 1063 | bool none; |
@@ -1066,7 +1074,8 @@ struct cgroup_sb_opts { | |||
1066 | */ | 1074 | */ |
1067 | static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts) | 1075 | static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts) |
1068 | { | 1076 | { |
1069 | char *token, *o = data ?: "all"; | 1077 | char *token, *o = data; |
1078 | bool all_ss = false, one_ss = false; | ||
1070 | unsigned long mask = (unsigned long)-1; | 1079 | unsigned long mask = (unsigned long)-1; |
1071 | int i; | 1080 | int i; |
1072 | bool module_pin_failed = false; | 1081 | bool module_pin_failed = false; |
@@ -1082,22 +1091,27 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts) | |||
1082 | while ((token = strsep(&o, ",")) != NULL) { | 1091 | while ((token = strsep(&o, ",")) != NULL) { |
1083 | if (!*token) | 1092 | if (!*token) |
1084 | return -EINVAL; | 1093 | return -EINVAL; |
1085 | if (!strcmp(token, "all")) { | 1094 | if (!strcmp(token, "none")) { |
1086 | /* Add all non-disabled subsystems */ | ||
1087 | opts->subsys_bits = 0; | ||
1088 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { | ||
1089 | struct cgroup_subsys *ss = subsys[i]; | ||
1090 | if (ss == NULL) | ||
1091 | continue; | ||
1092 | if (!ss->disabled) | ||
1093 | opts->subsys_bits |= 1ul << i; | ||
1094 | } | ||
1095 | } else if (!strcmp(token, "none")) { | ||
1096 | /* Explicitly have no subsystems */ | 1095 | /* Explicitly have no subsystems */ |
1097 | opts->none = true; | 1096 | opts->none = true; |
1098 | } else if (!strcmp(token, "noprefix")) { | 1097 | continue; |
1098 | } | ||
1099 | if (!strcmp(token, "all")) { | ||
1100 | /* Mutually exclusive option 'all' + subsystem name */ | ||
1101 | if (one_ss) | ||
1102 | return -EINVAL; | ||
1103 | all_ss = true; | ||
1104 | continue; | ||
1105 | } | ||
1106 | if (!strcmp(token, "noprefix")) { | ||
1099 | set_bit(ROOT_NOPREFIX, &opts->flags); | 1107 | set_bit(ROOT_NOPREFIX, &opts->flags); |
1100 | } else if (!strncmp(token, "release_agent=", 14)) { | 1108 | continue; |
1109 | } | ||
1110 | if (!strcmp(token, "clone_children")) { | ||
1111 | opts->clone_children = true; | ||
1112 | continue; | ||
1113 | } | ||
1114 | if (!strncmp(token, "release_agent=", 14)) { | ||
1101 | /* Specifying two release agents is forbidden */ | 1115 | /* Specifying two release agents is forbidden */ |
1102 | if (opts->release_agent) | 1116 | if (opts->release_agent) |
1103 | return -EINVAL; | 1117 | return -EINVAL; |
@@ -1105,7 +1119,9 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts) | |||
1105 | kstrndup(token + 14, PATH_MAX - 1, GFP_KERNEL); | 1119 | kstrndup(token + 14, PATH_MAX - 1, GFP_KERNEL); |
1106 | if (!opts->release_agent) | 1120 | if (!opts->release_agent) |
1107 | return -ENOMEM; | 1121 | return -ENOMEM; |
1108 | } else if (!strncmp(token, "name=", 5)) { | 1122 | continue; |
1123 | } | ||
1124 | if (!strncmp(token, "name=", 5)) { | ||
1109 | const char *name = token + 5; | 1125 | const char *name = token + 5; |
1110 | /* Can't specify an empty name */ | 1126 | /* Can't specify an empty name */ |
1111 | if (!strlen(name)) | 1127 | if (!strlen(name)) |
@@ -1127,20 +1143,44 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts) | |||
1127 | GFP_KERNEL); | 1143 | GFP_KERNEL); |
1128 | if (!opts->name) | 1144 | if (!opts->name) |
1129 | return -ENOMEM; | 1145 | return -ENOMEM; |
1130 | } else { | 1146 | |
1131 | struct cgroup_subsys *ss; | 1147 | continue; |
1132 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { | 1148 | } |
1133 | ss = subsys[i]; | 1149 | |
1134 | if (ss == NULL) | 1150 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { |
1135 | continue; | 1151 | struct cgroup_subsys *ss = subsys[i]; |
1136 | if (!strcmp(token, ss->name)) { | 1152 | if (ss == NULL) |
1137 | if (!ss->disabled) | 1153 | continue; |
1138 | set_bit(i, &opts->subsys_bits); | 1154 | if (strcmp(token, ss->name)) |
1139 | break; | 1155 | continue; |
1140 | } | 1156 | if (ss->disabled) |
1141 | } | 1157 | continue; |
1142 | if (i == CGROUP_SUBSYS_COUNT) | 1158 | |
1143 | return -ENOENT; | 1159 | /* Mutually exclusive option 'all' + subsystem name */ |
1160 | if (all_ss) | ||
1161 | return -EINVAL; | ||
1162 | set_bit(i, &opts->subsys_bits); | ||
1163 | one_ss = true; | ||
1164 | |||
1165 | break; | ||
1166 | } | ||
1167 | if (i == CGROUP_SUBSYS_COUNT) | ||
1168 | return -ENOENT; | ||
1169 | } | ||
1170 | |||
1171 | /* | ||
1172 | * If the 'all' option was specified select all the subsystems, | ||
1173 | * otherwise 'all, 'none' and a subsystem name options were not | ||
1174 | * specified, let's default to 'all' | ||
1175 | */ | ||
1176 | if (all_ss || (!all_ss && !one_ss && !opts->none)) { | ||
1177 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { | ||
1178 | struct cgroup_subsys *ss = subsys[i]; | ||
1179 | if (ss == NULL) | ||
1180 | continue; | ||
1181 | if (ss->disabled) | ||
1182 | continue; | ||
1183 | set_bit(i, &opts->subsys_bits); | ||
1144 | } | 1184 | } |
1145 | } | 1185 | } |
1146 | 1186 | ||
@@ -1222,7 +1262,6 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data) | |||
1222 | struct cgroup *cgrp = &root->top_cgroup; | 1262 | struct cgroup *cgrp = &root->top_cgroup; |
1223 | struct cgroup_sb_opts opts; | 1263 | struct cgroup_sb_opts opts; |
1224 | 1264 | ||
1225 | lock_kernel(); | ||
1226 | mutex_lock(&cgrp->dentry->d_inode->i_mutex); | 1265 | mutex_lock(&cgrp->dentry->d_inode->i_mutex); |
1227 | mutex_lock(&cgroup_mutex); | 1266 | mutex_lock(&cgroup_mutex); |
1228 | 1267 | ||
@@ -1255,7 +1294,6 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data) | |||
1255 | kfree(opts.name); | 1294 | kfree(opts.name); |
1256 | mutex_unlock(&cgroup_mutex); | 1295 | mutex_unlock(&cgroup_mutex); |
1257 | mutex_unlock(&cgrp->dentry->d_inode->i_mutex); | 1296 | mutex_unlock(&cgrp->dentry->d_inode->i_mutex); |
1258 | unlock_kernel(); | ||
1259 | return ret; | 1297 | return ret; |
1260 | } | 1298 | } |
1261 | 1299 | ||
@@ -1357,6 +1395,8 @@ static struct cgroupfs_root *cgroup_root_from_opts(struct cgroup_sb_opts *opts) | |||
1357 | strcpy(root->release_agent_path, opts->release_agent); | 1395 | strcpy(root->release_agent_path, opts->release_agent); |
1358 | if (opts->name) | 1396 | if (opts->name) |
1359 | strcpy(root->name, opts->name); | 1397 | strcpy(root->name, opts->name); |
1398 | if (opts->clone_children) | ||
1399 | set_bit(CGRP_CLONE_CHILDREN, &root->top_cgroup.flags); | ||
1360 | return root; | 1400 | return root; |
1361 | } | 1401 | } |
1362 | 1402 | ||
@@ -1420,9 +1460,9 @@ static int cgroup_get_rootdir(struct super_block *sb) | |||
1420 | return 0; | 1460 | return 0; |
1421 | } | 1461 | } |
1422 | 1462 | ||
1423 | static int cgroup_get_sb(struct file_system_type *fs_type, | 1463 | static struct dentry *cgroup_mount(struct file_system_type *fs_type, |
1424 | int flags, const char *unused_dev_name, | 1464 | int flags, const char *unused_dev_name, |
1425 | void *data, struct vfsmount *mnt) | 1465 | void *data) |
1426 | { | 1466 | { |
1427 | struct cgroup_sb_opts opts; | 1467 | struct cgroup_sb_opts opts; |
1428 | struct cgroupfs_root *root; | 1468 | struct cgroupfs_root *root; |
@@ -1556,10 +1596,9 @@ static int cgroup_get_sb(struct file_system_type *fs_type, | |||
1556 | drop_parsed_module_refcounts(opts.subsys_bits); | 1596 | drop_parsed_module_refcounts(opts.subsys_bits); |
1557 | } | 1597 | } |
1558 | 1598 | ||
1559 | simple_set_mnt(mnt, sb); | ||
1560 | kfree(opts.release_agent); | 1599 | kfree(opts.release_agent); |
1561 | kfree(opts.name); | 1600 | kfree(opts.name); |
1562 | return 0; | 1601 | return dget(sb->s_root); |
1563 | 1602 | ||
1564 | drop_new_super: | 1603 | drop_new_super: |
1565 | deactivate_locked_super(sb); | 1604 | deactivate_locked_super(sb); |
@@ -1568,8 +1607,7 @@ static int cgroup_get_sb(struct file_system_type *fs_type, | |||
1568 | out_err: | 1607 | out_err: |
1569 | kfree(opts.release_agent); | 1608 | kfree(opts.release_agent); |
1570 | kfree(opts.name); | 1609 | kfree(opts.name); |
1571 | 1610 | return ERR_PTR(ret); | |
1572 | return ret; | ||
1573 | } | 1611 | } |
1574 | 1612 | ||
1575 | static void cgroup_kill_sb(struct super_block *sb) { | 1613 | static void cgroup_kill_sb(struct super_block *sb) { |
@@ -1619,7 +1657,7 @@ static void cgroup_kill_sb(struct super_block *sb) { | |||
1619 | 1657 | ||
1620 | static struct file_system_type cgroup_fs_type = { | 1658 | static struct file_system_type cgroup_fs_type = { |
1621 | .name = "cgroup", | 1659 | .name = "cgroup", |
1622 | .get_sb = cgroup_get_sb, | 1660 | .mount = cgroup_mount, |
1623 | .kill_sb = cgroup_kill_sb, | 1661 | .kill_sb = cgroup_kill_sb, |
1624 | }; | 1662 | }; |
1625 | 1663 | ||
@@ -1883,6 +1921,8 @@ static int cgroup_release_agent_write(struct cgroup *cgrp, struct cftype *cft, | |||
1883 | const char *buffer) | 1921 | const char *buffer) |
1884 | { | 1922 | { |
1885 | BUILD_BUG_ON(sizeof(cgrp->root->release_agent_path) < PATH_MAX); | 1923 | BUILD_BUG_ON(sizeof(cgrp->root->release_agent_path) < PATH_MAX); |
1924 | if (strlen(buffer) >= PATH_MAX) | ||
1925 | return -EINVAL; | ||
1886 | if (!cgroup_lock_live_group(cgrp)) | 1926 | if (!cgroup_lock_live_group(cgrp)) |
1887 | return -ENODEV; | 1927 | return -ENODEV; |
1888 | strcpy(cgrp->root->release_agent_path, buffer); | 1928 | strcpy(cgrp->root->release_agent_path, buffer); |
@@ -3176,6 +3216,23 @@ fail: | |||
3176 | return ret; | 3216 | return ret; |
3177 | } | 3217 | } |
3178 | 3218 | ||
3219 | static u64 cgroup_clone_children_read(struct cgroup *cgrp, | ||
3220 | struct cftype *cft) | ||
3221 | { | ||
3222 | return clone_children(cgrp); | ||
3223 | } | ||
3224 | |||
3225 | static int cgroup_clone_children_write(struct cgroup *cgrp, | ||
3226 | struct cftype *cft, | ||
3227 | u64 val) | ||
3228 | { | ||
3229 | if (val) | ||
3230 | set_bit(CGRP_CLONE_CHILDREN, &cgrp->flags); | ||
3231 | else | ||
3232 | clear_bit(CGRP_CLONE_CHILDREN, &cgrp->flags); | ||
3233 | return 0; | ||
3234 | } | ||
3235 | |||
3179 | /* | 3236 | /* |
3180 | * for the common functions, 'private' gives the type of file | 3237 | * for the common functions, 'private' gives the type of file |
3181 | */ | 3238 | */ |
@@ -3206,6 +3263,11 @@ static struct cftype files[] = { | |||
3206 | .write_string = cgroup_write_event_control, | 3263 | .write_string = cgroup_write_event_control, |
3207 | .mode = S_IWUGO, | 3264 | .mode = S_IWUGO, |
3208 | }, | 3265 | }, |
3266 | { | ||
3267 | .name = "cgroup.clone_children", | ||
3268 | .read_u64 = cgroup_clone_children_read, | ||
3269 | .write_u64 = cgroup_clone_children_write, | ||
3270 | }, | ||
3209 | }; | 3271 | }; |
3210 | 3272 | ||
3211 | static struct cftype cft_release_agent = { | 3273 | static struct cftype cft_release_agent = { |
@@ -3335,6 +3397,9 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry, | |||
3335 | if (notify_on_release(parent)) | 3397 | if (notify_on_release(parent)) |
3336 | set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags); | 3398 | set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags); |
3337 | 3399 | ||
3400 | if (clone_children(parent)) | ||
3401 | set_bit(CGRP_CLONE_CHILDREN, &cgrp->flags); | ||
3402 | |||
3338 | for_each_subsys(root, ss) { | 3403 | for_each_subsys(root, ss) { |
3339 | struct cgroup_subsys_state *css = ss->create(ss, cgrp); | 3404 | struct cgroup_subsys_state *css = ss->create(ss, cgrp); |
3340 | 3405 | ||
@@ -3349,6 +3414,8 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry, | |||
3349 | goto err_destroy; | 3414 | goto err_destroy; |
3350 | } | 3415 | } |
3351 | /* At error, ->destroy() callback has to free assigned ID. */ | 3416 | /* At error, ->destroy() callback has to free assigned ID. */ |
3417 | if (clone_children(parent) && ss->post_clone) | ||
3418 | ss->post_clone(ss, cgrp); | ||
3352 | } | 3419 | } |
3353 | 3420 | ||
3354 | cgroup_lock_hierarchy(root); | 3421 | cgroup_lock_hierarchy(root); |
diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c index ce71ed53e88f..e7bebb7c6c38 100644 --- a/kernel/cgroup_freezer.c +++ b/kernel/cgroup_freezer.c | |||
@@ -48,20 +48,19 @@ static inline struct freezer *task_freezer(struct task_struct *task) | |||
48 | struct freezer, css); | 48 | struct freezer, css); |
49 | } | 49 | } |
50 | 50 | ||
51 | int cgroup_freezing_or_frozen(struct task_struct *task) | 51 | static inline int __cgroup_freezing_or_frozen(struct task_struct *task) |
52 | { | 52 | { |
53 | struct freezer *freezer; | 53 | enum freezer_state state = task_freezer(task)->state; |
54 | enum freezer_state state; | 54 | return (state == CGROUP_FREEZING) || (state == CGROUP_FROZEN); |
55 | } | ||
55 | 56 | ||
57 | int cgroup_freezing_or_frozen(struct task_struct *task) | ||
58 | { | ||
59 | int result; | ||
56 | task_lock(task); | 60 | task_lock(task); |
57 | freezer = task_freezer(task); | 61 | result = __cgroup_freezing_or_frozen(task); |
58 | if (!freezer->css.cgroup->parent) | ||
59 | state = CGROUP_THAWED; /* root cgroup can't be frozen */ | ||
60 | else | ||
61 | state = freezer->state; | ||
62 | task_unlock(task); | 62 | task_unlock(task); |
63 | 63 | return result; | |
64 | return (state == CGROUP_FREEZING) || (state == CGROUP_FROZEN); | ||
65 | } | 64 | } |
66 | 65 | ||
67 | /* | 66 | /* |
@@ -154,13 +153,6 @@ static void freezer_destroy(struct cgroup_subsys *ss, | |||
154 | kfree(cgroup_freezer(cgroup)); | 153 | kfree(cgroup_freezer(cgroup)); |
155 | } | 154 | } |
156 | 155 | ||
157 | /* Task is frozen or will freeze immediately when next it gets woken */ | ||
158 | static bool is_task_frozen_enough(struct task_struct *task) | ||
159 | { | ||
160 | return frozen(task) || | ||
161 | (task_is_stopped_or_traced(task) && freezing(task)); | ||
162 | } | ||
163 | |||
164 | /* | 156 | /* |
165 | * The call to cgroup_lock() in the freezer.state write method prevents | 157 | * The call to cgroup_lock() in the freezer.state write method prevents |
166 | * a write to that file racing against an attach, and hence the | 158 | * a write to that file racing against an attach, and hence the |
@@ -174,24 +166,25 @@ static int freezer_can_attach(struct cgroup_subsys *ss, | |||
174 | 166 | ||
175 | /* | 167 | /* |
176 | * Anything frozen can't move or be moved to/from. | 168 | * Anything frozen can't move or be moved to/from. |
177 | * | ||
178 | * Since orig_freezer->state == FROZEN means that @task has been | ||
179 | * frozen, so it's sufficient to check the latter condition. | ||
180 | */ | 169 | */ |
181 | 170 | ||
182 | if (is_task_frozen_enough(task)) | 171 | freezer = cgroup_freezer(new_cgroup); |
172 | if (freezer->state != CGROUP_THAWED) | ||
183 | return -EBUSY; | 173 | return -EBUSY; |
184 | 174 | ||
185 | freezer = cgroup_freezer(new_cgroup); | 175 | rcu_read_lock(); |
186 | if (freezer->state == CGROUP_FROZEN) | 176 | if (__cgroup_freezing_or_frozen(task)) { |
177 | rcu_read_unlock(); | ||
187 | return -EBUSY; | 178 | return -EBUSY; |
179 | } | ||
180 | rcu_read_unlock(); | ||
188 | 181 | ||
189 | if (threadgroup) { | 182 | if (threadgroup) { |
190 | struct task_struct *c; | 183 | struct task_struct *c; |
191 | 184 | ||
192 | rcu_read_lock(); | 185 | rcu_read_lock(); |
193 | list_for_each_entry_rcu(c, &task->thread_group, thread_group) { | 186 | list_for_each_entry_rcu(c, &task->thread_group, thread_group) { |
194 | if (is_task_frozen_enough(c)) { | 187 | if (__cgroup_freezing_or_frozen(c)) { |
195 | rcu_read_unlock(); | 188 | rcu_read_unlock(); |
196 | return -EBUSY; | 189 | return -EBUSY; |
197 | } | 190 | } |
@@ -236,31 +229,30 @@ static void freezer_fork(struct cgroup_subsys *ss, struct task_struct *task) | |||
236 | /* | 229 | /* |
237 | * caller must hold freezer->lock | 230 | * caller must hold freezer->lock |
238 | */ | 231 | */ |
239 | static void update_freezer_state(struct cgroup *cgroup, | 232 | static void update_if_frozen(struct cgroup *cgroup, |
240 | struct freezer *freezer) | 233 | struct freezer *freezer) |
241 | { | 234 | { |
242 | struct cgroup_iter it; | 235 | struct cgroup_iter it; |
243 | struct task_struct *task; | 236 | struct task_struct *task; |
244 | unsigned int nfrozen = 0, ntotal = 0; | 237 | unsigned int nfrozen = 0, ntotal = 0; |
238 | enum freezer_state old_state = freezer->state; | ||
245 | 239 | ||
246 | cgroup_iter_start(cgroup, &it); | 240 | cgroup_iter_start(cgroup, &it); |
247 | while ((task = cgroup_iter_next(cgroup, &it))) { | 241 | while ((task = cgroup_iter_next(cgroup, &it))) { |
248 | ntotal++; | 242 | ntotal++; |
249 | if (is_task_frozen_enough(task)) | 243 | if (frozen(task)) |
250 | nfrozen++; | 244 | nfrozen++; |
251 | } | 245 | } |
252 | 246 | ||
253 | /* | 247 | if (old_state == CGROUP_THAWED) { |
254 | * Transition to FROZEN when no new tasks can be added ensures | 248 | BUG_ON(nfrozen > 0); |
255 | * that we never exist in the FROZEN state while there are unfrozen | 249 | } else if (old_state == CGROUP_FREEZING) { |
256 | * tasks. | 250 | if (nfrozen == ntotal) |
257 | */ | 251 | freezer->state = CGROUP_FROZEN; |
258 | if (nfrozen == ntotal) | 252 | } else { /* old_state == CGROUP_FROZEN */ |
259 | freezer->state = CGROUP_FROZEN; | 253 | BUG_ON(nfrozen != ntotal); |
260 | else if (nfrozen > 0) | 254 | } |
261 | freezer->state = CGROUP_FREEZING; | 255 | |
262 | else | ||
263 | freezer->state = CGROUP_THAWED; | ||
264 | cgroup_iter_end(cgroup, &it); | 256 | cgroup_iter_end(cgroup, &it); |
265 | } | 257 | } |
266 | 258 | ||
@@ -279,7 +271,7 @@ static int freezer_read(struct cgroup *cgroup, struct cftype *cft, | |||
279 | if (state == CGROUP_FREEZING) { | 271 | if (state == CGROUP_FREEZING) { |
280 | /* We change from FREEZING to FROZEN lazily if the cgroup was | 272 | /* We change from FREEZING to FROZEN lazily if the cgroup was |
281 | * only partially frozen when we exitted write. */ | 273 | * only partially frozen when we exitted write. */ |
282 | update_freezer_state(cgroup, freezer); | 274 | update_if_frozen(cgroup, freezer); |
283 | state = freezer->state; | 275 | state = freezer->state; |
284 | } | 276 | } |
285 | spin_unlock_irq(&freezer->lock); | 277 | spin_unlock_irq(&freezer->lock); |
@@ -301,7 +293,7 @@ static int try_to_freeze_cgroup(struct cgroup *cgroup, struct freezer *freezer) | |||
301 | while ((task = cgroup_iter_next(cgroup, &it))) { | 293 | while ((task = cgroup_iter_next(cgroup, &it))) { |
302 | if (!freeze_task(task, true)) | 294 | if (!freeze_task(task, true)) |
303 | continue; | 295 | continue; |
304 | if (is_task_frozen_enough(task)) | 296 | if (frozen(task)) |
305 | continue; | 297 | continue; |
306 | if (!freezing(task) && !freezer_should_skip(task)) | 298 | if (!freezing(task) && !freezer_should_skip(task)) |
307 | num_cant_freeze_now++; | 299 | num_cant_freeze_now++; |
@@ -335,7 +327,7 @@ static int freezer_change_state(struct cgroup *cgroup, | |||
335 | 327 | ||
336 | spin_lock_irq(&freezer->lock); | 328 | spin_lock_irq(&freezer->lock); |
337 | 329 | ||
338 | update_freezer_state(cgroup, freezer); | 330 | update_if_frozen(cgroup, freezer); |
339 | if (goal_state == freezer->state) | 331 | if (goal_state == freezer->state) |
340 | goto out; | 332 | goto out; |
341 | 333 | ||
diff --git a/kernel/configs.c b/kernel/configs.c index abaee684ecbf..b4066b44a99d 100644 --- a/kernel/configs.c +++ b/kernel/configs.c | |||
@@ -66,6 +66,7 @@ ikconfig_read_current(struct file *file, char __user *buf, | |||
66 | static const struct file_operations ikconfig_file_ops = { | 66 | static const struct file_operations ikconfig_file_ops = { |
67 | .owner = THIS_MODULE, | 67 | .owner = THIS_MODULE, |
68 | .read = ikconfig_read_current, | 68 | .read = ikconfig_read_current, |
69 | .llseek = default_llseek, | ||
69 | }; | 70 | }; |
70 | 71 | ||
71 | static int __init ikconfig_init(void) | 72 | static int __init ikconfig_init(void) |
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 51b143e2a07a..4349935c2ad8 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
@@ -231,18 +231,17 @@ static DEFINE_SPINLOCK(cpuset_buffer_lock); | |||
231 | * users. If someone tries to mount the "cpuset" filesystem, we | 231 | * users. If someone tries to mount the "cpuset" filesystem, we |
232 | * silently switch it to mount "cgroup" instead | 232 | * silently switch it to mount "cgroup" instead |
233 | */ | 233 | */ |
234 | static int cpuset_get_sb(struct file_system_type *fs_type, | 234 | static struct dentry *cpuset_mount(struct file_system_type *fs_type, |
235 | int flags, const char *unused_dev_name, | 235 | int flags, const char *unused_dev_name, void *data) |
236 | void *data, struct vfsmount *mnt) | ||
237 | { | 236 | { |
238 | struct file_system_type *cgroup_fs = get_fs_type("cgroup"); | 237 | struct file_system_type *cgroup_fs = get_fs_type("cgroup"); |
239 | int ret = -ENODEV; | 238 | struct dentry *ret = ERR_PTR(-ENODEV); |
240 | if (cgroup_fs) { | 239 | if (cgroup_fs) { |
241 | char mountopts[] = | 240 | char mountopts[] = |
242 | "cpuset,noprefix," | 241 | "cpuset,noprefix," |
243 | "release_agent=/sbin/cpuset_release_agent"; | 242 | "release_agent=/sbin/cpuset_release_agent"; |
244 | ret = cgroup_fs->get_sb(cgroup_fs, flags, | 243 | ret = cgroup_fs->mount(cgroup_fs, flags, |
245 | unused_dev_name, mountopts, mnt); | 244 | unused_dev_name, mountopts); |
246 | put_filesystem(cgroup_fs); | 245 | put_filesystem(cgroup_fs); |
247 | } | 246 | } |
248 | return ret; | 247 | return ret; |
@@ -250,7 +249,7 @@ static int cpuset_get_sb(struct file_system_type *fs_type, | |||
250 | 249 | ||
251 | static struct file_system_type cpuset_fs_type = { | 250 | static struct file_system_type cpuset_fs_type = { |
252 | .name = "cpuset", | 251 | .name = "cpuset", |
253 | .get_sb = cpuset_get_sb, | 252 | .mount = cpuset_mount, |
254 | }; | 253 | }; |
255 | 254 | ||
256 | /* | 255 | /* |
diff --git a/kernel/cred.c b/kernel/cred.c index 9a3e22641fe7..6a1aa004e376 100644 --- a/kernel/cred.c +++ b/kernel/cred.c | |||
@@ -325,7 +325,7 @@ EXPORT_SYMBOL(prepare_creds); | |||
325 | 325 | ||
326 | /* | 326 | /* |
327 | * Prepare credentials for current to perform an execve() | 327 | * Prepare credentials for current to perform an execve() |
328 | * - The caller must hold current->cred_guard_mutex | 328 | * - The caller must hold ->cred_guard_mutex |
329 | */ | 329 | */ |
330 | struct cred *prepare_exec_creds(void) | 330 | struct cred *prepare_exec_creds(void) |
331 | { | 331 | { |
@@ -384,8 +384,6 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags) | |||
384 | struct cred *new; | 384 | struct cred *new; |
385 | int ret; | 385 | int ret; |
386 | 386 | ||
387 | mutex_init(&p->cred_guard_mutex); | ||
388 | |||
389 | if ( | 387 | if ( |
390 | #ifdef CONFIG_KEYS | 388 | #ifdef CONFIG_KEYS |
391 | !p->cred->thread_keyring && | 389 | !p->cred->thread_keyring && |
diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c index de407c78178d..cefd4a11f6d9 100644 --- a/kernel/debug/debug_core.c +++ b/kernel/debug/debug_core.c | |||
@@ -47,6 +47,7 @@ | |||
47 | #include <linux/pid.h> | 47 | #include <linux/pid.h> |
48 | #include <linux/smp.h> | 48 | #include <linux/smp.h> |
49 | #include <linux/mm.h> | 49 | #include <linux/mm.h> |
50 | #include <linux/rcupdate.h> | ||
50 | 51 | ||
51 | #include <asm/cacheflush.h> | 52 | #include <asm/cacheflush.h> |
52 | #include <asm/byteorder.h> | 53 | #include <asm/byteorder.h> |
@@ -109,13 +110,15 @@ static struct kgdb_bkpt kgdb_break[KGDB_MAX_BREAKPOINTS] = { | |||
109 | */ | 110 | */ |
110 | atomic_t kgdb_active = ATOMIC_INIT(-1); | 111 | atomic_t kgdb_active = ATOMIC_INIT(-1); |
111 | EXPORT_SYMBOL_GPL(kgdb_active); | 112 | EXPORT_SYMBOL_GPL(kgdb_active); |
113 | static DEFINE_RAW_SPINLOCK(dbg_master_lock); | ||
114 | static DEFINE_RAW_SPINLOCK(dbg_slave_lock); | ||
112 | 115 | ||
113 | /* | 116 | /* |
114 | * We use NR_CPUs not PERCPU, in case kgdb is used to debug early | 117 | * We use NR_CPUs not PERCPU, in case kgdb is used to debug early |
115 | * bootup code (which might not have percpu set up yet): | 118 | * bootup code (which might not have percpu set up yet): |
116 | */ | 119 | */ |
117 | static atomic_t passive_cpu_wait[NR_CPUS]; | 120 | static atomic_t masters_in_kgdb; |
118 | static atomic_t cpu_in_kgdb[NR_CPUS]; | 121 | static atomic_t slaves_in_kgdb; |
119 | static atomic_t kgdb_break_tasklet_var; | 122 | static atomic_t kgdb_break_tasklet_var; |
120 | atomic_t kgdb_setting_breakpoint; | 123 | atomic_t kgdb_setting_breakpoint; |
121 | 124 | ||
@@ -206,18 +209,6 @@ int __weak kgdb_skipexception(int exception, struct pt_regs *regs) | |||
206 | return 0; | 209 | return 0; |
207 | } | 210 | } |
208 | 211 | ||
209 | /** | ||
210 | * kgdb_disable_hw_debug - Disable hardware debugging while we in kgdb. | ||
211 | * @regs: Current &struct pt_regs. | ||
212 | * | ||
213 | * This function will be called if the particular architecture must | ||
214 | * disable hardware debugging while it is processing gdb packets or | ||
215 | * handling exception. | ||
216 | */ | ||
217 | void __weak kgdb_disable_hw_debug(struct pt_regs *regs) | ||
218 | { | ||
219 | } | ||
220 | |||
221 | /* | 212 | /* |
222 | * Some architectures need cache flushes when we set/clear a | 213 | * Some architectures need cache flushes when we set/clear a |
223 | * breakpoint: | 214 | * breakpoint: |
@@ -457,26 +448,34 @@ static int kgdb_reenter_check(struct kgdb_state *ks) | |||
457 | return 1; | 448 | return 1; |
458 | } | 449 | } |
459 | 450 | ||
460 | static void dbg_cpu_switch(int cpu, int next_cpu) | 451 | static void dbg_touch_watchdogs(void) |
461 | { | 452 | { |
462 | /* Mark the cpu we are switching away from as a slave when it | 453 | touch_softlockup_watchdog_sync(); |
463 | * holds the kgdb_active token. This must be done so that the | 454 | clocksource_touch_watchdog(); |
464 | * that all the cpus wait in for the debug core will not enter | 455 | rcu_cpu_stall_reset(); |
465 | * again as the master. */ | ||
466 | if (cpu == atomic_read(&kgdb_active)) { | ||
467 | kgdb_info[cpu].exception_state |= DCPU_IS_SLAVE; | ||
468 | kgdb_info[cpu].exception_state &= ~DCPU_WANT_MASTER; | ||
469 | } | ||
470 | kgdb_info[next_cpu].exception_state |= DCPU_NEXT_MASTER; | ||
471 | } | 456 | } |
472 | 457 | ||
473 | static int kgdb_cpu_enter(struct kgdb_state *ks, struct pt_regs *regs) | 458 | static int kgdb_cpu_enter(struct kgdb_state *ks, struct pt_regs *regs, |
459 | int exception_state) | ||
474 | { | 460 | { |
475 | unsigned long flags; | 461 | unsigned long flags; |
476 | int sstep_tries = 100; | 462 | int sstep_tries = 100; |
477 | int error; | 463 | int error; |
478 | int i, cpu; | 464 | int cpu; |
479 | int trace_on = 0; | 465 | int trace_on = 0; |
466 | int online_cpus = num_online_cpus(); | ||
467 | |||
468 | kgdb_info[ks->cpu].enter_kgdb++; | ||
469 | kgdb_info[ks->cpu].exception_state |= exception_state; | ||
470 | |||
471 | if (exception_state == DCPU_WANT_MASTER) | ||
472 | atomic_inc(&masters_in_kgdb); | ||
473 | else | ||
474 | atomic_inc(&slaves_in_kgdb); | ||
475 | |||
476 | if (arch_kgdb_ops.disable_hw_break) | ||
477 | arch_kgdb_ops.disable_hw_break(regs); | ||
478 | |||
480 | acquirelock: | 479 | acquirelock: |
481 | /* | 480 | /* |
482 | * Interrupts will be restored by the 'trap return' code, except when | 481 | * Interrupts will be restored by the 'trap return' code, except when |
@@ -489,14 +488,15 @@ acquirelock: | |||
489 | kgdb_info[cpu].task = current; | 488 | kgdb_info[cpu].task = current; |
490 | kgdb_info[cpu].ret_state = 0; | 489 | kgdb_info[cpu].ret_state = 0; |
491 | kgdb_info[cpu].irq_depth = hardirq_count() >> HARDIRQ_SHIFT; | 490 | kgdb_info[cpu].irq_depth = hardirq_count() >> HARDIRQ_SHIFT; |
492 | /* | ||
493 | * Make sure the above info reaches the primary CPU before | ||
494 | * our cpu_in_kgdb[] flag setting does: | ||
495 | */ | ||
496 | atomic_inc(&cpu_in_kgdb[cpu]); | ||
497 | 491 | ||
498 | if (exception_level == 1) | 492 | /* Make sure the above info reaches the primary CPU */ |
493 | smp_mb(); | ||
494 | |||
495 | if (exception_level == 1) { | ||
496 | if (raw_spin_trylock(&dbg_master_lock)) | ||
497 | atomic_xchg(&kgdb_active, cpu); | ||
499 | goto cpu_master_loop; | 498 | goto cpu_master_loop; |
499 | } | ||
500 | 500 | ||
501 | /* | 501 | /* |
502 | * CPU will loop if it is a slave or request to become a kgdb | 502 | * CPU will loop if it is a slave or request to become a kgdb |
@@ -508,10 +508,12 @@ cpu_loop: | |||
508 | kgdb_info[cpu].exception_state &= ~DCPU_NEXT_MASTER; | 508 | kgdb_info[cpu].exception_state &= ~DCPU_NEXT_MASTER; |
509 | goto cpu_master_loop; | 509 | goto cpu_master_loop; |
510 | } else if (kgdb_info[cpu].exception_state & DCPU_WANT_MASTER) { | 510 | } else if (kgdb_info[cpu].exception_state & DCPU_WANT_MASTER) { |
511 | if (atomic_cmpxchg(&kgdb_active, -1, cpu) == cpu) | 511 | if (raw_spin_trylock(&dbg_master_lock)) { |
512 | atomic_xchg(&kgdb_active, cpu); | ||
512 | break; | 513 | break; |
514 | } | ||
513 | } else if (kgdb_info[cpu].exception_state & DCPU_IS_SLAVE) { | 515 | } else if (kgdb_info[cpu].exception_state & DCPU_IS_SLAVE) { |
514 | if (!atomic_read(&passive_cpu_wait[cpu])) | 516 | if (!raw_spin_is_locked(&dbg_slave_lock)) |
515 | goto return_normal; | 517 | goto return_normal; |
516 | } else { | 518 | } else { |
517 | return_normal: | 519 | return_normal: |
@@ -522,9 +524,12 @@ return_normal: | |||
522 | arch_kgdb_ops.correct_hw_break(); | 524 | arch_kgdb_ops.correct_hw_break(); |
523 | if (trace_on) | 525 | if (trace_on) |
524 | tracing_on(); | 526 | tracing_on(); |
525 | atomic_dec(&cpu_in_kgdb[cpu]); | 527 | kgdb_info[cpu].exception_state &= |
526 | touch_softlockup_watchdog_sync(); | 528 | ~(DCPU_WANT_MASTER | DCPU_IS_SLAVE); |
527 | clocksource_touch_watchdog(); | 529 | kgdb_info[cpu].enter_kgdb--; |
530 | smp_mb__before_atomic_dec(); | ||
531 | atomic_dec(&slaves_in_kgdb); | ||
532 | dbg_touch_watchdogs(); | ||
528 | local_irq_restore(flags); | 533 | local_irq_restore(flags); |
529 | return 0; | 534 | return 0; |
530 | } | 535 | } |
@@ -541,8 +546,8 @@ return_normal: | |||
541 | (kgdb_info[cpu].task && | 546 | (kgdb_info[cpu].task && |
542 | kgdb_info[cpu].task->pid != kgdb_sstep_pid) && --sstep_tries) { | 547 | kgdb_info[cpu].task->pid != kgdb_sstep_pid) && --sstep_tries) { |
543 | atomic_set(&kgdb_active, -1); | 548 | atomic_set(&kgdb_active, -1); |
544 | touch_softlockup_watchdog_sync(); | 549 | raw_spin_unlock(&dbg_master_lock); |
545 | clocksource_touch_watchdog(); | 550 | dbg_touch_watchdogs(); |
546 | local_irq_restore(flags); | 551 | local_irq_restore(flags); |
547 | 552 | ||
548 | goto acquirelock; | 553 | goto acquirelock; |
@@ -563,16 +568,12 @@ return_normal: | |||
563 | if (dbg_io_ops->pre_exception) | 568 | if (dbg_io_ops->pre_exception) |
564 | dbg_io_ops->pre_exception(); | 569 | dbg_io_ops->pre_exception(); |
565 | 570 | ||
566 | kgdb_disable_hw_debug(ks->linux_regs); | ||
567 | |||
568 | /* | 571 | /* |
569 | * Get the passive CPU lock which will hold all the non-primary | 572 | * Get the passive CPU lock which will hold all the non-primary |
570 | * CPU in a spin state while the debugger is active | 573 | * CPU in a spin state while the debugger is active |
571 | */ | 574 | */ |
572 | if (!kgdb_single_step) { | 575 | if (!kgdb_single_step) |
573 | for (i = 0; i < NR_CPUS; i++) | 576 | raw_spin_lock(&dbg_slave_lock); |
574 | atomic_inc(&passive_cpu_wait[i]); | ||
575 | } | ||
576 | 577 | ||
577 | #ifdef CONFIG_SMP | 578 | #ifdef CONFIG_SMP |
578 | /* Signal the other CPUs to enter kgdb_wait() */ | 579 | /* Signal the other CPUs to enter kgdb_wait() */ |
@@ -583,10 +584,9 @@ return_normal: | |||
583 | /* | 584 | /* |
584 | * Wait for the other CPUs to be notified and be waiting for us: | 585 | * Wait for the other CPUs to be notified and be waiting for us: |
585 | */ | 586 | */ |
586 | for_each_online_cpu(i) { | 587 | while (kgdb_do_roundup && (atomic_read(&masters_in_kgdb) + |
587 | while (kgdb_do_roundup && !atomic_read(&cpu_in_kgdb[i])) | 588 | atomic_read(&slaves_in_kgdb)) != online_cpus) |
588 | cpu_relax(); | 589 | cpu_relax(); |
589 | } | ||
590 | 590 | ||
591 | /* | 591 | /* |
592 | * At this point the primary processor is completely | 592 | * At this point the primary processor is completely |
@@ -615,7 +615,8 @@ cpu_master_loop: | |||
615 | if (error == DBG_PASS_EVENT) { | 615 | if (error == DBG_PASS_EVENT) { |
616 | dbg_kdb_mode = !dbg_kdb_mode; | 616 | dbg_kdb_mode = !dbg_kdb_mode; |
617 | } else if (error == DBG_SWITCH_CPU_EVENT) { | 617 | } else if (error == DBG_SWITCH_CPU_EVENT) { |
618 | dbg_cpu_switch(cpu, dbg_switch_cpu); | 618 | kgdb_info[dbg_switch_cpu].exception_state |= |
619 | DCPU_NEXT_MASTER; | ||
619 | goto cpu_loop; | 620 | goto cpu_loop; |
620 | } else { | 621 | } else { |
621 | kgdb_info[cpu].ret_state = error; | 622 | kgdb_info[cpu].ret_state = error; |
@@ -627,24 +628,11 @@ cpu_master_loop: | |||
627 | if (dbg_io_ops->post_exception) | 628 | if (dbg_io_ops->post_exception) |
628 | dbg_io_ops->post_exception(); | 629 | dbg_io_ops->post_exception(); |
629 | 630 | ||
630 | atomic_dec(&cpu_in_kgdb[ks->cpu]); | ||
631 | |||
632 | if (!kgdb_single_step) { | 631 | if (!kgdb_single_step) { |
633 | for (i = NR_CPUS-1; i >= 0; i--) | 632 | raw_spin_unlock(&dbg_slave_lock); |
634 | atomic_dec(&passive_cpu_wait[i]); | 633 | /* Wait till all the CPUs have quit from the debugger. */ |
635 | /* | 634 | while (kgdb_do_roundup && atomic_read(&slaves_in_kgdb)) |
636 | * Wait till all the CPUs have quit from the debugger, | 635 | cpu_relax(); |
637 | * but allow a CPU that hit an exception and is | ||
638 | * waiting to become the master to remain in the debug | ||
639 | * core. | ||
640 | */ | ||
641 | for_each_online_cpu(i) { | ||
642 | while (kgdb_do_roundup && | ||
643 | atomic_read(&cpu_in_kgdb[i]) && | ||
644 | !(kgdb_info[i].exception_state & | ||
645 | DCPU_WANT_MASTER)) | ||
646 | cpu_relax(); | ||
647 | } | ||
648 | } | 636 | } |
649 | 637 | ||
650 | kgdb_restore: | 638 | kgdb_restore: |
@@ -655,12 +643,20 @@ kgdb_restore: | |||
655 | else | 643 | else |
656 | kgdb_sstep_pid = 0; | 644 | kgdb_sstep_pid = 0; |
657 | } | 645 | } |
646 | if (arch_kgdb_ops.correct_hw_break) | ||
647 | arch_kgdb_ops.correct_hw_break(); | ||
658 | if (trace_on) | 648 | if (trace_on) |
659 | tracing_on(); | 649 | tracing_on(); |
650 | |||
651 | kgdb_info[cpu].exception_state &= | ||
652 | ~(DCPU_WANT_MASTER | DCPU_IS_SLAVE); | ||
653 | kgdb_info[cpu].enter_kgdb--; | ||
654 | smp_mb__before_atomic_dec(); | ||
655 | atomic_dec(&masters_in_kgdb); | ||
660 | /* Free kgdb_active */ | 656 | /* Free kgdb_active */ |
661 | atomic_set(&kgdb_active, -1); | 657 | atomic_set(&kgdb_active, -1); |
662 | touch_softlockup_watchdog_sync(); | 658 | raw_spin_unlock(&dbg_master_lock); |
663 | clocksource_touch_watchdog(); | 659 | dbg_touch_watchdogs(); |
664 | local_irq_restore(flags); | 660 | local_irq_restore(flags); |
665 | 661 | ||
666 | return kgdb_info[cpu].ret_state; | 662 | return kgdb_info[cpu].ret_state; |
@@ -678,7 +674,6 @@ kgdb_handle_exception(int evector, int signo, int ecode, struct pt_regs *regs) | |||
678 | { | 674 | { |
679 | struct kgdb_state kgdb_var; | 675 | struct kgdb_state kgdb_var; |
680 | struct kgdb_state *ks = &kgdb_var; | 676 | struct kgdb_state *ks = &kgdb_var; |
681 | int ret; | ||
682 | 677 | ||
683 | ks->cpu = raw_smp_processor_id(); | 678 | ks->cpu = raw_smp_processor_id(); |
684 | ks->ex_vector = evector; | 679 | ks->ex_vector = evector; |
@@ -689,11 +684,10 @@ kgdb_handle_exception(int evector, int signo, int ecode, struct pt_regs *regs) | |||
689 | 684 | ||
690 | if (kgdb_reenter_check(ks)) | 685 | if (kgdb_reenter_check(ks)) |
691 | return 0; /* Ouch, double exception ! */ | 686 | return 0; /* Ouch, double exception ! */ |
692 | kgdb_info[ks->cpu].exception_state |= DCPU_WANT_MASTER; | 687 | if (kgdb_info[ks->cpu].enter_kgdb != 0) |
693 | ret = kgdb_cpu_enter(ks, regs); | 688 | return 0; |
694 | kgdb_info[ks->cpu].exception_state &= ~(DCPU_WANT_MASTER | | 689 | |
695 | DCPU_IS_SLAVE); | 690 | return kgdb_cpu_enter(ks, regs, DCPU_WANT_MASTER); |
696 | return ret; | ||
697 | } | 691 | } |
698 | 692 | ||
699 | int kgdb_nmicallback(int cpu, void *regs) | 693 | int kgdb_nmicallback(int cpu, void *regs) |
@@ -706,12 +700,9 @@ int kgdb_nmicallback(int cpu, void *regs) | |||
706 | ks->cpu = cpu; | 700 | ks->cpu = cpu; |
707 | ks->linux_regs = regs; | 701 | ks->linux_regs = regs; |
708 | 702 | ||
709 | if (!atomic_read(&cpu_in_kgdb[cpu]) && | 703 | if (kgdb_info[ks->cpu].enter_kgdb == 0 && |
710 | atomic_read(&kgdb_active) != -1 && | 704 | raw_spin_is_locked(&dbg_master_lock)) { |
711 | atomic_read(&kgdb_active) != cpu) { | 705 | kgdb_cpu_enter(ks, regs, DCPU_IS_SLAVE); |
712 | kgdb_info[cpu].exception_state |= DCPU_IS_SLAVE; | ||
713 | kgdb_cpu_enter(ks, regs); | ||
714 | kgdb_info[cpu].exception_state &= ~DCPU_IS_SLAVE; | ||
715 | return 0; | 706 | return 0; |
716 | } | 707 | } |
717 | #endif | 708 | #endif |
diff --git a/kernel/debug/debug_core.h b/kernel/debug/debug_core.h index c5d753d80f67..3494c28a7e7a 100644 --- a/kernel/debug/debug_core.h +++ b/kernel/debug/debug_core.h | |||
@@ -40,6 +40,7 @@ struct debuggerinfo_struct { | |||
40 | int exception_state; | 40 | int exception_state; |
41 | int ret_state; | 41 | int ret_state; |
42 | int irq_depth; | 42 | int irq_depth; |
43 | int enter_kgdb; | ||
43 | }; | 44 | }; |
44 | 45 | ||
45 | extern struct debuggerinfo_struct kgdb_info[]; | 46 | extern struct debuggerinfo_struct kgdb_info[]; |
diff --git a/kernel/debug/kdb/kdb_debugger.c b/kernel/debug/kdb/kdb_debugger.c index bf6e8270e957..dd0b1b7dd02c 100644 --- a/kernel/debug/kdb/kdb_debugger.c +++ b/kernel/debug/kdb/kdb_debugger.c | |||
@@ -86,7 +86,7 @@ int kdb_stub(struct kgdb_state *ks) | |||
86 | } | 86 | } |
87 | /* Set initial kdb state variables */ | 87 | /* Set initial kdb state variables */ |
88 | KDB_STATE_CLEAR(KGDB_TRANS); | 88 | KDB_STATE_CLEAR(KGDB_TRANS); |
89 | kdb_initial_cpu = ks->cpu; | 89 | kdb_initial_cpu = atomic_read(&kgdb_active); |
90 | kdb_current_task = kgdb_info[ks->cpu].task; | 90 | kdb_current_task = kgdb_info[ks->cpu].task; |
91 | kdb_current_regs = kgdb_info[ks->cpu].debuggerinfo; | 91 | kdb_current_regs = kgdb_info[ks->cpu].debuggerinfo; |
92 | /* Remove any breakpoints as needed by kdb and clear single step */ | 92 | /* Remove any breakpoints as needed by kdb and clear single step */ |
@@ -105,7 +105,6 @@ int kdb_stub(struct kgdb_state *ks) | |||
105 | ks->pass_exception = 1; | 105 | ks->pass_exception = 1; |
106 | KDB_FLAG_SET(CATASTROPHIC); | 106 | KDB_FLAG_SET(CATASTROPHIC); |
107 | } | 107 | } |
108 | kdb_initial_cpu = ks->cpu; | ||
109 | if (KDB_STATE(SSBPT) && reason == KDB_REASON_SSTEP) { | 108 | if (KDB_STATE(SSBPT) && reason == KDB_REASON_SSTEP) { |
110 | KDB_STATE_CLEAR(SSBPT); | 109 | KDB_STATE_CLEAR(SSBPT); |
111 | KDB_STATE_CLEAR(DOING_SS); | 110 | KDB_STATE_CLEAR(DOING_SS); |
diff --git a/kernel/debug/kdb/kdb_io.c b/kernel/debug/kdb/kdb_io.c index c9b7f4f90bba..96fdaac46a80 100644 --- a/kernel/debug/kdb/kdb_io.c +++ b/kernel/debug/kdb/kdb_io.c | |||
@@ -823,4 +823,4 @@ int kdb_printf(const char *fmt, ...) | |||
823 | 823 | ||
824 | return r; | 824 | return r; |
825 | } | 825 | } |
826 | 826 | EXPORT_SYMBOL_GPL(kdb_printf); | |
diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c index caf057a3de0e..37755d621924 100644 --- a/kernel/debug/kdb/kdb_main.c +++ b/kernel/debug/kdb/kdb_main.c | |||
@@ -1127,7 +1127,7 @@ static int kdb_local(kdb_reason_t reason, int error, struct pt_regs *regs, | |||
1127 | /* special case below */ | 1127 | /* special case below */ |
1128 | } else { | 1128 | } else { |
1129 | kdb_printf("\nEntering kdb (current=0x%p, pid %d) ", | 1129 | kdb_printf("\nEntering kdb (current=0x%p, pid %d) ", |
1130 | kdb_current, kdb_current->pid); | 1130 | kdb_current, kdb_current ? kdb_current->pid : 0); |
1131 | #if defined(CONFIG_SMP) | 1131 | #if defined(CONFIG_SMP) |
1132 | kdb_printf("on processor %d ", raw_smp_processor_id()); | 1132 | kdb_printf("on processor %d ", raw_smp_processor_id()); |
1133 | #endif | 1133 | #endif |
@@ -1749,13 +1749,13 @@ static int kdb_go(int argc, const char **argv) | |||
1749 | int nextarg; | 1749 | int nextarg; |
1750 | long offset; | 1750 | long offset; |
1751 | 1751 | ||
1752 | if (raw_smp_processor_id() != kdb_initial_cpu) { | ||
1753 | kdb_printf("go must execute on the entry cpu, " | ||
1754 | "please use \"cpu %d\" and then execute go\n", | ||
1755 | kdb_initial_cpu); | ||
1756 | return KDB_BADCPUNUM; | ||
1757 | } | ||
1752 | if (argc == 1) { | 1758 | if (argc == 1) { |
1753 | if (raw_smp_processor_id() != kdb_initial_cpu) { | ||
1754 | kdb_printf("go <address> must be issued from the " | ||
1755 | "initial cpu, do cpu %d first\n", | ||
1756 | kdb_initial_cpu); | ||
1757 | return KDB_ARGCOUNT; | ||
1758 | } | ||
1759 | nextarg = 1; | 1759 | nextarg = 1; |
1760 | diag = kdbgetaddrarg(argc, argv, &nextarg, | 1760 | diag = kdbgetaddrarg(argc, argv, &nextarg, |
1761 | &addr, &offset, NULL); | 1761 | &addr, &offset, NULL); |
@@ -2603,20 +2603,17 @@ static int kdb_summary(int argc, const char **argv) | |||
2603 | */ | 2603 | */ |
2604 | static int kdb_per_cpu(int argc, const char **argv) | 2604 | static int kdb_per_cpu(int argc, const char **argv) |
2605 | { | 2605 | { |
2606 | char buf[256], fmtstr[64]; | 2606 | char fmtstr[64]; |
2607 | kdb_symtab_t symtab; | 2607 | int cpu, diag, nextarg = 1; |
2608 | cpumask_t suppress = CPU_MASK_NONE; | 2608 | unsigned long addr, symaddr, val, bytesperword = 0, whichcpu = ~0UL; |
2609 | int cpu, diag; | ||
2610 | unsigned long addr, val, bytesperword = 0, whichcpu = ~0UL; | ||
2611 | 2609 | ||
2612 | if (argc < 1 || argc > 3) | 2610 | if (argc < 1 || argc > 3) |
2613 | return KDB_ARGCOUNT; | 2611 | return KDB_ARGCOUNT; |
2614 | 2612 | ||
2615 | snprintf(buf, sizeof(buf), "per_cpu__%s", argv[1]); | 2613 | diag = kdbgetaddrarg(argc, argv, &nextarg, &symaddr, NULL, NULL); |
2616 | if (!kdbgetsymval(buf, &symtab)) { | 2614 | if (diag) |
2617 | kdb_printf("%s is not a per_cpu variable\n", argv[1]); | 2615 | return diag; |
2618 | return KDB_BADADDR; | 2616 | |
2619 | } | ||
2620 | if (argc >= 2) { | 2617 | if (argc >= 2) { |
2621 | diag = kdbgetularg(argv[2], &bytesperword); | 2618 | diag = kdbgetularg(argv[2], &bytesperword); |
2622 | if (diag) | 2619 | if (diag) |
@@ -2649,46 +2646,25 @@ static int kdb_per_cpu(int argc, const char **argv) | |||
2649 | #define KDB_PCU(cpu) 0 | 2646 | #define KDB_PCU(cpu) 0 |
2650 | #endif | 2647 | #endif |
2651 | #endif | 2648 | #endif |
2652 | |||
2653 | for_each_online_cpu(cpu) { | 2649 | for_each_online_cpu(cpu) { |
2650 | if (KDB_FLAG(CMD_INTERRUPT)) | ||
2651 | return 0; | ||
2652 | |||
2654 | if (whichcpu != ~0UL && whichcpu != cpu) | 2653 | if (whichcpu != ~0UL && whichcpu != cpu) |
2655 | continue; | 2654 | continue; |
2656 | addr = symtab.sym_start + KDB_PCU(cpu); | 2655 | addr = symaddr + KDB_PCU(cpu); |
2657 | diag = kdb_getword(&val, addr, bytesperword); | 2656 | diag = kdb_getword(&val, addr, bytesperword); |
2658 | if (diag) { | 2657 | if (diag) { |
2659 | kdb_printf("%5d " kdb_bfd_vma_fmt0 " - unable to " | 2658 | kdb_printf("%5d " kdb_bfd_vma_fmt0 " - unable to " |
2660 | "read, diag=%d\n", cpu, addr, diag); | 2659 | "read, diag=%d\n", cpu, addr, diag); |
2661 | continue; | 2660 | continue; |
2662 | } | 2661 | } |
2663 | #ifdef CONFIG_SMP | ||
2664 | if (!val) { | ||
2665 | cpu_set(cpu, suppress); | ||
2666 | continue; | ||
2667 | } | ||
2668 | #endif /* CONFIG_SMP */ | ||
2669 | kdb_printf("%5d ", cpu); | 2662 | kdb_printf("%5d ", cpu); |
2670 | kdb_md_line(fmtstr, addr, | 2663 | kdb_md_line(fmtstr, addr, |
2671 | bytesperword == KDB_WORD_SIZE, | 2664 | bytesperword == KDB_WORD_SIZE, |
2672 | 1, bytesperword, 1, 1, 0); | 2665 | 1, bytesperword, 1, 1, 0); |
2673 | } | 2666 | } |
2674 | if (cpus_weight(suppress) == 0) | ||
2675 | return 0; | ||
2676 | kdb_printf("Zero suppressed cpu(s):"); | ||
2677 | for (cpu = first_cpu(suppress); cpu < num_possible_cpus(); | ||
2678 | cpu = next_cpu(cpu, suppress)) { | ||
2679 | kdb_printf(" %d", cpu); | ||
2680 | if (cpu == num_possible_cpus() - 1 || | ||
2681 | next_cpu(cpu, suppress) != cpu + 1) | ||
2682 | continue; | ||
2683 | while (cpu < num_possible_cpus() && | ||
2684 | next_cpu(cpu, suppress) == cpu + 1) | ||
2685 | ++cpu; | ||
2686 | kdb_printf("-%d", cpu); | ||
2687 | } | ||
2688 | kdb_printf("\n"); | ||
2689 | |||
2690 | #undef KDB_PCU | 2667 | #undef KDB_PCU |
2691 | |||
2692 | return 0; | 2668 | return 0; |
2693 | } | 2669 | } |
2694 | 2670 | ||
@@ -2783,6 +2759,8 @@ int kdb_register_repeat(char *cmd, | |||
2783 | 2759 | ||
2784 | return 0; | 2760 | return 0; |
2785 | } | 2761 | } |
2762 | EXPORT_SYMBOL_GPL(kdb_register_repeat); | ||
2763 | |||
2786 | 2764 | ||
2787 | /* | 2765 | /* |
2788 | * kdb_register - Compatibility register function for commands that do | 2766 | * kdb_register - Compatibility register function for commands that do |
@@ -2805,6 +2783,7 @@ int kdb_register(char *cmd, | |||
2805 | return kdb_register_repeat(cmd, func, usage, help, minlen, | 2783 | return kdb_register_repeat(cmd, func, usage, help, minlen, |
2806 | KDB_REPEAT_NONE); | 2784 | KDB_REPEAT_NONE); |
2807 | } | 2785 | } |
2786 | EXPORT_SYMBOL_GPL(kdb_register); | ||
2808 | 2787 | ||
2809 | /* | 2788 | /* |
2810 | * kdb_unregister - This function is used to unregister a kernel | 2789 | * kdb_unregister - This function is used to unregister a kernel |
@@ -2823,7 +2802,7 @@ int kdb_unregister(char *cmd) | |||
2823 | /* | 2802 | /* |
2824 | * find the command. | 2803 | * find the command. |
2825 | */ | 2804 | */ |
2826 | for (i = 0, kp = kdb_commands; i < kdb_max_commands; i++, kp++) { | 2805 | for_each_kdbcmd(kp, i) { |
2827 | if (kp->cmd_name && (strcmp(kp->cmd_name, cmd) == 0)) { | 2806 | if (kp->cmd_name && (strcmp(kp->cmd_name, cmd) == 0)) { |
2828 | kp->cmd_name = NULL; | 2807 | kp->cmd_name = NULL; |
2829 | return 0; | 2808 | return 0; |
@@ -2833,6 +2812,7 @@ int kdb_unregister(char *cmd) | |||
2833 | /* Couldn't find it. */ | 2812 | /* Couldn't find it. */ |
2834 | return 1; | 2813 | return 1; |
2835 | } | 2814 | } |
2815 | EXPORT_SYMBOL_GPL(kdb_unregister); | ||
2836 | 2816 | ||
2837 | /* Initialize the kdb command table. */ | 2817 | /* Initialize the kdb command table. */ |
2838 | static void __init kdb_inittab(void) | 2818 | static void __init kdb_inittab(void) |
diff --git a/kernel/debug/kdb/kdb_private.h b/kernel/debug/kdb/kdb_private.h index be775f7e81e0..35d69ed1dfb5 100644 --- a/kernel/debug/kdb/kdb_private.h +++ b/kernel/debug/kdb/kdb_private.h | |||
@@ -15,29 +15,6 @@ | |||
15 | #include <linux/kgdb.h> | 15 | #include <linux/kgdb.h> |
16 | #include "../debug_core.h" | 16 | #include "../debug_core.h" |
17 | 17 | ||
18 | /* Kernel Debugger Error codes. Must not overlap with command codes. */ | ||
19 | #define KDB_NOTFOUND (-1) | ||
20 | #define KDB_ARGCOUNT (-2) | ||
21 | #define KDB_BADWIDTH (-3) | ||
22 | #define KDB_BADRADIX (-4) | ||
23 | #define KDB_NOTENV (-5) | ||
24 | #define KDB_NOENVVALUE (-6) | ||
25 | #define KDB_NOTIMP (-7) | ||
26 | #define KDB_ENVFULL (-8) | ||
27 | #define KDB_ENVBUFFULL (-9) | ||
28 | #define KDB_TOOMANYBPT (-10) | ||
29 | #define KDB_TOOMANYDBREGS (-11) | ||
30 | #define KDB_DUPBPT (-12) | ||
31 | #define KDB_BPTNOTFOUND (-13) | ||
32 | #define KDB_BADMODE (-14) | ||
33 | #define KDB_BADINT (-15) | ||
34 | #define KDB_INVADDRFMT (-16) | ||
35 | #define KDB_BADREG (-17) | ||
36 | #define KDB_BADCPUNUM (-18) | ||
37 | #define KDB_BADLENGTH (-19) | ||
38 | #define KDB_NOBP (-20) | ||
39 | #define KDB_BADADDR (-21) | ||
40 | |||
41 | /* Kernel Debugger Command codes. Must not overlap with error codes. */ | 18 | /* Kernel Debugger Command codes. Must not overlap with error codes. */ |
42 | #define KDB_CMD_GO (-1001) | 19 | #define KDB_CMD_GO (-1001) |
43 | #define KDB_CMD_CPU (-1002) | 20 | #define KDB_CMD_CPU (-1002) |
@@ -93,17 +70,6 @@ | |||
93 | */ | 70 | */ |
94 | #define KDB_MAXBPT 16 | 71 | #define KDB_MAXBPT 16 |
95 | 72 | ||
96 | /* Maximum number of arguments to a function */ | ||
97 | #define KDB_MAXARGS 16 | ||
98 | |||
99 | typedef enum { | ||
100 | KDB_REPEAT_NONE = 0, /* Do not repeat this command */ | ||
101 | KDB_REPEAT_NO_ARGS, /* Repeat the command without arguments */ | ||
102 | KDB_REPEAT_WITH_ARGS, /* Repeat the command including its arguments */ | ||
103 | } kdb_repeat_t; | ||
104 | |||
105 | typedef int (*kdb_func_t)(int, const char **); | ||
106 | |||
107 | /* Symbol table format returned by kallsyms. */ | 73 | /* Symbol table format returned by kallsyms. */ |
108 | typedef struct __ksymtab { | 74 | typedef struct __ksymtab { |
109 | unsigned long value; /* Address of symbol */ | 75 | unsigned long value; /* Address of symbol */ |
@@ -123,11 +89,6 @@ extern int kallsyms_symbol_next(char *prefix_name, int flag); | |||
123 | extern int kallsyms_symbol_complete(char *prefix_name, int max_len); | 89 | extern int kallsyms_symbol_complete(char *prefix_name, int max_len); |
124 | 90 | ||
125 | /* Exported Symbols for kernel loadable modules to use. */ | 91 | /* Exported Symbols for kernel loadable modules to use. */ |
126 | extern int kdb_register(char *, kdb_func_t, char *, char *, short); | ||
127 | extern int kdb_register_repeat(char *, kdb_func_t, char *, char *, | ||
128 | short, kdb_repeat_t); | ||
129 | extern int kdb_unregister(char *); | ||
130 | |||
131 | extern int kdb_getarea_size(void *, unsigned long, size_t); | 92 | extern int kdb_getarea_size(void *, unsigned long, size_t); |
132 | extern int kdb_putarea_size(unsigned long, void *, size_t); | 93 | extern int kdb_putarea_size(unsigned long, void *, size_t); |
133 | 94 | ||
@@ -144,6 +105,7 @@ extern int kdb_getword(unsigned long *, unsigned long, size_t); | |||
144 | extern int kdb_putword(unsigned long, unsigned long, size_t); | 105 | extern int kdb_putword(unsigned long, unsigned long, size_t); |
145 | 106 | ||
146 | extern int kdbgetularg(const char *, unsigned long *); | 107 | extern int kdbgetularg(const char *, unsigned long *); |
108 | extern int kdbgetu64arg(const char *, u64 *); | ||
147 | extern char *kdbgetenv(const char *); | 109 | extern char *kdbgetenv(const char *); |
148 | extern int kdbgetaddrarg(int, const char **, int*, unsigned long *, | 110 | extern int kdbgetaddrarg(int, const char **, int*, unsigned long *, |
149 | long *, char **); | 111 | long *, char **); |
@@ -255,14 +217,6 @@ extern void kdb_ps1(const struct task_struct *p); | |||
255 | extern void kdb_print_nameval(const char *name, unsigned long val); | 217 | extern void kdb_print_nameval(const char *name, unsigned long val); |
256 | extern void kdb_send_sig_info(struct task_struct *p, struct siginfo *info); | 218 | extern void kdb_send_sig_info(struct task_struct *p, struct siginfo *info); |
257 | extern void kdb_meminfo_proc_show(void); | 219 | extern void kdb_meminfo_proc_show(void); |
258 | #ifdef CONFIG_KALLSYMS | ||
259 | extern const char *kdb_walk_kallsyms(loff_t *pos); | ||
260 | #else /* ! CONFIG_KALLSYMS */ | ||
261 | static inline const char *kdb_walk_kallsyms(loff_t *pos) | ||
262 | { | ||
263 | return NULL; | ||
264 | } | ||
265 | #endif /* ! CONFIG_KALLSYMS */ | ||
266 | extern char *kdb_getstr(char *, size_t, char *); | 220 | extern char *kdb_getstr(char *, size_t, char *); |
267 | 221 | ||
268 | /* Defines for kdb_symbol_print */ | 222 | /* Defines for kdb_symbol_print */ |
diff --git a/kernel/exit.c b/kernel/exit.c index e2bdf37f9fde..21aa7b3001fb 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
@@ -50,6 +50,7 @@ | |||
50 | #include <linux/perf_event.h> | 50 | #include <linux/perf_event.h> |
51 | #include <trace/events/sched.h> | 51 | #include <trace/events/sched.h> |
52 | #include <linux/hw_breakpoint.h> | 52 | #include <linux/hw_breakpoint.h> |
53 | #include <linux/oom.h> | ||
53 | 54 | ||
54 | #include <asm/uaccess.h> | 55 | #include <asm/uaccess.h> |
55 | #include <asm/unistd.h> | 56 | #include <asm/unistd.h> |
@@ -95,6 +96,14 @@ static void __exit_signal(struct task_struct *tsk) | |||
95 | sig->tty = NULL; | 96 | sig->tty = NULL; |
96 | } else { | 97 | } else { |
97 | /* | 98 | /* |
99 | * This can only happen if the caller is de_thread(). | ||
100 | * FIXME: this is the temporary hack, we should teach | ||
101 | * posix-cpu-timers to handle this case correctly. | ||
102 | */ | ||
103 | if (unlikely(has_group_leader_pid(tsk))) | ||
104 | posix_cpu_timers_exit_group(tsk); | ||
105 | |||
106 | /* | ||
98 | * If there is any task waiting for the group exit | 107 | * If there is any task waiting for the group exit |
99 | * then notify it: | 108 | * then notify it: |
100 | */ | 109 | */ |
@@ -687,6 +696,8 @@ static void exit_mm(struct task_struct * tsk) | |||
687 | enter_lazy_tlb(mm, current); | 696 | enter_lazy_tlb(mm, current); |
688 | /* We don't want this task to be frozen prematurely */ | 697 | /* We don't want this task to be frozen prematurely */ |
689 | clear_freeze_flag(tsk); | 698 | clear_freeze_flag(tsk); |
699 | if (tsk->signal->oom_score_adj == OOM_SCORE_ADJ_MIN) | ||
700 | atomic_dec(&mm->oom_disable_count); | ||
690 | task_unlock(tsk); | 701 | task_unlock(tsk); |
691 | mm_update_next_owner(mm); | 702 | mm_update_next_owner(mm); |
692 | mmput(mm); | 703 | mmput(mm); |
@@ -700,6 +711,8 @@ static void exit_mm(struct task_struct * tsk) | |||
700 | * space. | 711 | * space. |
701 | */ | 712 | */ |
702 | static struct task_struct *find_new_reaper(struct task_struct *father) | 713 | static struct task_struct *find_new_reaper(struct task_struct *father) |
714 | __releases(&tasklist_lock) | ||
715 | __acquires(&tasklist_lock) | ||
703 | { | 716 | { |
704 | struct pid_namespace *pid_ns = task_active_pid_ns(father); | 717 | struct pid_namespace *pid_ns = task_active_pid_ns(father); |
705 | struct task_struct *thread; | 718 | struct task_struct *thread; |
diff --git a/kernel/fork.c b/kernel/fork.c index c445f8cc408d..3b159c5991b7 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -65,6 +65,7 @@ | |||
65 | #include <linux/perf_event.h> | 65 | #include <linux/perf_event.h> |
66 | #include <linux/posix-timers.h> | 66 | #include <linux/posix-timers.h> |
67 | #include <linux/user-return-notifier.h> | 67 | #include <linux/user-return-notifier.h> |
68 | #include <linux/oom.h> | ||
68 | 69 | ||
69 | #include <asm/pgtable.h> | 70 | #include <asm/pgtable.h> |
70 | #include <asm/pgalloc.h> | 71 | #include <asm/pgalloc.h> |
@@ -488,6 +489,7 @@ static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p) | |||
488 | mm->cached_hole_size = ~0UL; | 489 | mm->cached_hole_size = ~0UL; |
489 | mm_init_aio(mm); | 490 | mm_init_aio(mm); |
490 | mm_init_owner(mm, p); | 491 | mm_init_owner(mm, p); |
492 | atomic_set(&mm->oom_disable_count, 0); | ||
491 | 493 | ||
492 | if (likely(!mm_alloc_pgd(mm))) { | 494 | if (likely(!mm_alloc_pgd(mm))) { |
493 | mm->def_flags = 0; | 495 | mm->def_flags = 0; |
@@ -741,6 +743,8 @@ good_mm: | |||
741 | /* Initializing for Swap token stuff */ | 743 | /* Initializing for Swap token stuff */ |
742 | mm->token_priority = 0; | 744 | mm->token_priority = 0; |
743 | mm->last_interval = 0; | 745 | mm->last_interval = 0; |
746 | if (tsk->signal->oom_score_adj == OOM_SCORE_ADJ_MIN) | ||
747 | atomic_inc(&mm->oom_disable_count); | ||
744 | 748 | ||
745 | tsk->mm = mm; | 749 | tsk->mm = mm; |
746 | tsk->active_mm = mm; | 750 | tsk->active_mm = mm; |
@@ -904,6 +908,8 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) | |||
904 | sig->oom_adj = current->signal->oom_adj; | 908 | sig->oom_adj = current->signal->oom_adj; |
905 | sig->oom_score_adj = current->signal->oom_score_adj; | 909 | sig->oom_score_adj = current->signal->oom_score_adj; |
906 | 910 | ||
911 | mutex_init(&sig->cred_guard_mutex); | ||
912 | |||
907 | return 0; | 913 | return 0; |
908 | } | 914 | } |
909 | 915 | ||
@@ -1299,8 +1305,13 @@ bad_fork_cleanup_io: | |||
1299 | bad_fork_cleanup_namespaces: | 1305 | bad_fork_cleanup_namespaces: |
1300 | exit_task_namespaces(p); | 1306 | exit_task_namespaces(p); |
1301 | bad_fork_cleanup_mm: | 1307 | bad_fork_cleanup_mm: |
1302 | if (p->mm) | 1308 | if (p->mm) { |
1309 | task_lock(p); | ||
1310 | if (p->signal->oom_score_adj == OOM_SCORE_ADJ_MIN) | ||
1311 | atomic_dec(&p->mm->oom_disable_count); | ||
1312 | task_unlock(p); | ||
1303 | mmput(p->mm); | 1313 | mmput(p->mm); |
1314 | } | ||
1304 | bad_fork_cleanup_signal: | 1315 | bad_fork_cleanup_signal: |
1305 | if (!(clone_flags & CLONE_THREAD)) | 1316 | if (!(clone_flags & CLONE_THREAD)) |
1306 | free_signal_struct(p->signal); | 1317 | free_signal_struct(p->signal); |
@@ -1693,6 +1704,10 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags) | |||
1693 | active_mm = current->active_mm; | 1704 | active_mm = current->active_mm; |
1694 | current->mm = new_mm; | 1705 | current->mm = new_mm; |
1695 | current->active_mm = new_mm; | 1706 | current->active_mm = new_mm; |
1707 | if (current->signal->oom_score_adj == OOM_SCORE_ADJ_MIN) { | ||
1708 | atomic_dec(&mm->oom_disable_count); | ||
1709 | atomic_inc(&new_mm->oom_disable_count); | ||
1710 | } | ||
1696 | activate_mm(active_mm, new_mm); | 1711 | activate_mm(active_mm, new_mm); |
1697 | new_mm = mm; | 1712 | new_mm = mm; |
1698 | } | 1713 | } |
diff --git a/kernel/futex.c b/kernel/futex.c index a118bf160e0b..6c683b37f2ce 100644 --- a/kernel/futex.c +++ b/kernel/futex.c | |||
@@ -169,7 +169,7 @@ static void get_futex_key_refs(union futex_key *key) | |||
169 | 169 | ||
170 | switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) { | 170 | switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) { |
171 | case FUT_OFF_INODE: | 171 | case FUT_OFF_INODE: |
172 | atomic_inc(&key->shared.inode->i_count); | 172 | ihold(key->shared.inode); |
173 | break; | 173 | break; |
174 | case FUT_OFF_MMSHARED: | 174 | case FUT_OFF_MMSHARED: |
175 | atomic_inc(&key->private.mm->mm_count); | 175 | atomic_inc(&key->private.mm->mm_count); |
diff --git a/kernel/gcov/fs.c b/kernel/gcov/fs.c index f83972b16564..9bd0934f6c33 100644 --- a/kernel/gcov/fs.c +++ b/kernel/gcov/fs.c | |||
@@ -561,6 +561,7 @@ static ssize_t reset_read(struct file *file, char __user *addr, size_t len, | |||
561 | static const struct file_operations gcov_reset_fops = { | 561 | static const struct file_operations gcov_reset_fops = { |
562 | .write = reset_write, | 562 | .write = reset_write, |
563 | .read = reset_read, | 563 | .read = reset_read, |
564 | .llseek = noop_llseek, | ||
564 | }; | 565 | }; |
565 | 566 | ||
566 | /* | 567 | /* |
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index 9d917ff72675..9988d03797f5 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c | |||
@@ -393,3 +393,18 @@ unsigned int kstat_irqs_cpu(unsigned int irq, int cpu) | |||
393 | struct irq_desc *desc = irq_to_desc(irq); | 393 | struct irq_desc *desc = irq_to_desc(irq); |
394 | return desc ? desc->kstat_irqs[cpu] : 0; | 394 | return desc ? desc->kstat_irqs[cpu] : 0; |
395 | } | 395 | } |
396 | |||
397 | #ifdef CONFIG_GENERIC_HARDIRQS | ||
398 | unsigned int kstat_irqs(unsigned int irq) | ||
399 | { | ||
400 | struct irq_desc *desc = irq_to_desc(irq); | ||
401 | int cpu; | ||
402 | int sum = 0; | ||
403 | |||
404 | if (!desc) | ||
405 | return 0; | ||
406 | for_each_possible_cpu(cpu) | ||
407 | sum += desc->kstat_irqs[cpu]; | ||
408 | return sum; | ||
409 | } | ||
410 | #endif /* CONFIG_GENERIC_HARDIRQS */ | ||
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 850f030fa0c2..91a5fa25054e 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c | |||
@@ -324,6 +324,10 @@ void enable_irq(unsigned int irq) | |||
324 | if (!desc) | 324 | if (!desc) |
325 | return; | 325 | return; |
326 | 326 | ||
327 | if (WARN(!desc->irq_data.chip || !desc->irq_data.chip->irq_enable, | ||
328 | KERN_ERR "enable_irq before setup/request_irq: irq %u\n", irq)) | ||
329 | return; | ||
330 | |||
327 | chip_bus_lock(desc); | 331 | chip_bus_lock(desc); |
328 | raw_spin_lock_irqsave(&desc->lock, flags); | 332 | raw_spin_lock_irqsave(&desc->lock, flags); |
329 | __enable_irq(desc, irq, false); | 333 | __enable_irq(desc, irq, false); |
diff --git a/kernel/jump_label.c b/kernel/jump_label.c index 7be868bf25c6..3b79bd938330 100644 --- a/kernel/jump_label.c +++ b/kernel/jump_label.c | |||
@@ -39,6 +39,16 @@ struct jump_label_module_entry { | |||
39 | struct module *mod; | 39 | struct module *mod; |
40 | }; | 40 | }; |
41 | 41 | ||
42 | void jump_label_lock(void) | ||
43 | { | ||
44 | mutex_lock(&jump_label_mutex); | ||
45 | } | ||
46 | |||
47 | void jump_label_unlock(void) | ||
48 | { | ||
49 | mutex_unlock(&jump_label_mutex); | ||
50 | } | ||
51 | |||
42 | static int jump_label_cmp(const void *a, const void *b) | 52 | static int jump_label_cmp(const void *a, const void *b) |
43 | { | 53 | { |
44 | const struct jump_entry *jea = a; | 54 | const struct jump_entry *jea = a; |
@@ -152,7 +162,7 @@ void jump_label_update(unsigned long key, enum jump_label_type type) | |||
152 | struct jump_label_module_entry *e_module; | 162 | struct jump_label_module_entry *e_module; |
153 | int count; | 163 | int count; |
154 | 164 | ||
155 | mutex_lock(&jump_label_mutex); | 165 | jump_label_lock(); |
156 | entry = get_jump_label_entry((jump_label_t)key); | 166 | entry = get_jump_label_entry((jump_label_t)key); |
157 | if (entry) { | 167 | if (entry) { |
158 | count = entry->nr_entries; | 168 | count = entry->nr_entries; |
@@ -168,13 +178,14 @@ void jump_label_update(unsigned long key, enum jump_label_type type) | |||
168 | count = e_module->nr_entries; | 178 | count = e_module->nr_entries; |
169 | iter = e_module->table; | 179 | iter = e_module->table; |
170 | while (count--) { | 180 | while (count--) { |
171 | if (kernel_text_address(iter->code)) | 181 | if (iter->key && |
182 | kernel_text_address(iter->code)) | ||
172 | arch_jump_label_transform(iter, type); | 183 | arch_jump_label_transform(iter, type); |
173 | iter++; | 184 | iter++; |
174 | } | 185 | } |
175 | } | 186 | } |
176 | } | 187 | } |
177 | mutex_unlock(&jump_label_mutex); | 188 | jump_label_unlock(); |
178 | } | 189 | } |
179 | 190 | ||
180 | static int addr_conflict(struct jump_entry *entry, void *start, void *end) | 191 | static int addr_conflict(struct jump_entry *entry, void *start, void *end) |
@@ -231,6 +242,7 @@ out: | |||
231 | * overlaps with any of the jump label patch addresses. Code | 242 | * overlaps with any of the jump label patch addresses. Code |
232 | * that wants to modify kernel text should first verify that | 243 | * that wants to modify kernel text should first verify that |
233 | * it does not overlap with any of the jump label addresses. | 244 | * it does not overlap with any of the jump label addresses. |
245 | * Caller must hold jump_label_mutex. | ||
234 | * | 246 | * |
235 | * returns 1 if there is an overlap, 0 otherwise | 247 | * returns 1 if there is an overlap, 0 otherwise |
236 | */ | 248 | */ |
@@ -241,7 +253,6 @@ int jump_label_text_reserved(void *start, void *end) | |||
241 | struct jump_entry *iter_stop = __start___jump_table; | 253 | struct jump_entry *iter_stop = __start___jump_table; |
242 | int conflict = 0; | 254 | int conflict = 0; |
243 | 255 | ||
244 | mutex_lock(&jump_label_mutex); | ||
245 | iter = iter_start; | 256 | iter = iter_start; |
246 | while (iter < iter_stop) { | 257 | while (iter < iter_stop) { |
247 | if (addr_conflict(iter, start, end)) { | 258 | if (addr_conflict(iter, start, end)) { |
@@ -256,10 +267,16 @@ int jump_label_text_reserved(void *start, void *end) | |||
256 | conflict = module_conflict(start, end); | 267 | conflict = module_conflict(start, end); |
257 | #endif | 268 | #endif |
258 | out: | 269 | out: |
259 | mutex_unlock(&jump_label_mutex); | ||
260 | return conflict; | 270 | return conflict; |
261 | } | 271 | } |
262 | 272 | ||
273 | /* | ||
274 | * Not all archs need this. | ||
275 | */ | ||
276 | void __weak arch_jump_label_text_poke_early(jump_label_t addr) | ||
277 | { | ||
278 | } | ||
279 | |||
263 | static __init int init_jump_label(void) | 280 | static __init int init_jump_label(void) |
264 | { | 281 | { |
265 | int ret; | 282 | int ret; |
@@ -267,7 +284,7 @@ static __init int init_jump_label(void) | |||
267 | struct jump_entry *iter_stop = __stop___jump_table; | 284 | struct jump_entry *iter_stop = __stop___jump_table; |
268 | struct jump_entry *iter; | 285 | struct jump_entry *iter; |
269 | 286 | ||
270 | mutex_lock(&jump_label_mutex); | 287 | jump_label_lock(); |
271 | ret = build_jump_label_hashtable(__start___jump_table, | 288 | ret = build_jump_label_hashtable(__start___jump_table, |
272 | __stop___jump_table); | 289 | __stop___jump_table); |
273 | iter = iter_start; | 290 | iter = iter_start; |
@@ -275,7 +292,7 @@ static __init int init_jump_label(void) | |||
275 | arch_jump_label_text_poke_early(iter->code); | 292 | arch_jump_label_text_poke_early(iter->code); |
276 | iter++; | 293 | iter++; |
277 | } | 294 | } |
278 | mutex_unlock(&jump_label_mutex); | 295 | jump_label_unlock(); |
279 | return ret; | 296 | return ret; |
280 | } | 297 | } |
281 | early_initcall(init_jump_label); | 298 | early_initcall(init_jump_label); |
@@ -366,6 +383,39 @@ static void remove_jump_label_module(struct module *mod) | |||
366 | } | 383 | } |
367 | } | 384 | } |
368 | 385 | ||
386 | static void remove_jump_label_module_init(struct module *mod) | ||
387 | { | ||
388 | struct hlist_head *head; | ||
389 | struct hlist_node *node, *node_next, *module_node, *module_node_next; | ||
390 | struct jump_label_entry *e; | ||
391 | struct jump_label_module_entry *e_module; | ||
392 | struct jump_entry *iter; | ||
393 | int i, count; | ||
394 | |||
395 | /* if the module doesn't have jump label entries, just return */ | ||
396 | if (!mod->num_jump_entries) | ||
397 | return; | ||
398 | |||
399 | for (i = 0; i < JUMP_LABEL_TABLE_SIZE; i++) { | ||
400 | head = &jump_label_table[i]; | ||
401 | hlist_for_each_entry_safe(e, node, node_next, head, hlist) { | ||
402 | hlist_for_each_entry_safe(e_module, module_node, | ||
403 | module_node_next, | ||
404 | &(e->modules), hlist) { | ||
405 | if (e_module->mod != mod) | ||
406 | continue; | ||
407 | count = e_module->nr_entries; | ||
408 | iter = e_module->table; | ||
409 | while (count--) { | ||
410 | if (within_module_init(iter->code, mod)) | ||
411 | iter->key = 0; | ||
412 | iter++; | ||
413 | } | ||
414 | } | ||
415 | } | ||
416 | } | ||
417 | } | ||
418 | |||
369 | static int | 419 | static int |
370 | jump_label_module_notify(struct notifier_block *self, unsigned long val, | 420 | jump_label_module_notify(struct notifier_block *self, unsigned long val, |
371 | void *data) | 421 | void *data) |
@@ -375,16 +425,21 @@ jump_label_module_notify(struct notifier_block *self, unsigned long val, | |||
375 | 425 | ||
376 | switch (val) { | 426 | switch (val) { |
377 | case MODULE_STATE_COMING: | 427 | case MODULE_STATE_COMING: |
378 | mutex_lock(&jump_label_mutex); | 428 | jump_label_lock(); |
379 | ret = add_jump_label_module(mod); | 429 | ret = add_jump_label_module(mod); |
380 | if (ret) | 430 | if (ret) |
381 | remove_jump_label_module(mod); | 431 | remove_jump_label_module(mod); |
382 | mutex_unlock(&jump_label_mutex); | 432 | jump_label_unlock(); |
383 | break; | 433 | break; |
384 | case MODULE_STATE_GOING: | 434 | case MODULE_STATE_GOING: |
385 | mutex_lock(&jump_label_mutex); | 435 | jump_label_lock(); |
386 | remove_jump_label_module(mod); | 436 | remove_jump_label_module(mod); |
387 | mutex_unlock(&jump_label_mutex); | 437 | jump_label_unlock(); |
438 | break; | ||
439 | case MODULE_STATE_LIVE: | ||
440 | jump_label_lock(); | ||
441 | remove_jump_label_module_init(mod); | ||
442 | jump_label_unlock(); | ||
388 | break; | 443 | break; |
389 | } | 444 | } |
390 | return ret; | 445 | return ret; |
diff --git a/kernel/kexec.c b/kernel/kexec.c index c0613f7d6730..b55045bc7563 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c | |||
@@ -816,7 +816,7 @@ static int kimage_load_normal_segment(struct kimage *image, | |||
816 | 816 | ||
817 | ptr = kmap(page); | 817 | ptr = kmap(page); |
818 | /* Start with a clear page */ | 818 | /* Start with a clear page */ |
819 | memset(ptr, 0, PAGE_SIZE); | 819 | clear_page(ptr); |
820 | ptr += maddr & ~PAGE_MASK; | 820 | ptr += maddr & ~PAGE_MASK; |
821 | mchunk = PAGE_SIZE - (maddr & ~PAGE_MASK); | 821 | mchunk = PAGE_SIZE - (maddr & ~PAGE_MASK); |
822 | if (mchunk > mbytes) | 822 | if (mchunk > mbytes) |
diff --git a/kernel/kprobes.c b/kernel/kprobes.c index ec4210c6501e..9737a76e106f 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c | |||
@@ -74,7 +74,8 @@ static struct hlist_head kretprobe_inst_table[KPROBE_TABLE_SIZE]; | |||
74 | /* NOTE: change this value only with kprobe_mutex held */ | 74 | /* NOTE: change this value only with kprobe_mutex held */ |
75 | static bool kprobes_all_disarmed; | 75 | static bool kprobes_all_disarmed; |
76 | 76 | ||
77 | static DEFINE_MUTEX(kprobe_mutex); /* Protects kprobe_table */ | 77 | /* This protects kprobe_table and optimizing_list */ |
78 | static DEFINE_MUTEX(kprobe_mutex); | ||
78 | static DEFINE_PER_CPU(struct kprobe *, kprobe_instance) = NULL; | 79 | static DEFINE_PER_CPU(struct kprobe *, kprobe_instance) = NULL; |
79 | static struct { | 80 | static struct { |
80 | spinlock_t lock ____cacheline_aligned_in_smp; | 81 | spinlock_t lock ____cacheline_aligned_in_smp; |
@@ -595,6 +596,7 @@ static __kprobes void try_to_optimize_kprobe(struct kprobe *p) | |||
595 | } | 596 | } |
596 | 597 | ||
597 | #ifdef CONFIG_SYSCTL | 598 | #ifdef CONFIG_SYSCTL |
599 | /* This should be called with kprobe_mutex locked */ | ||
598 | static void __kprobes optimize_all_kprobes(void) | 600 | static void __kprobes optimize_all_kprobes(void) |
599 | { | 601 | { |
600 | struct hlist_head *head; | 602 | struct hlist_head *head; |
@@ -607,17 +609,16 @@ static void __kprobes optimize_all_kprobes(void) | |||
607 | return; | 609 | return; |
608 | 610 | ||
609 | kprobes_allow_optimization = true; | 611 | kprobes_allow_optimization = true; |
610 | mutex_lock(&text_mutex); | ||
611 | for (i = 0; i < KPROBE_TABLE_SIZE; i++) { | 612 | for (i = 0; i < KPROBE_TABLE_SIZE; i++) { |
612 | head = &kprobe_table[i]; | 613 | head = &kprobe_table[i]; |
613 | hlist_for_each_entry_rcu(p, node, head, hlist) | 614 | hlist_for_each_entry_rcu(p, node, head, hlist) |
614 | if (!kprobe_disabled(p)) | 615 | if (!kprobe_disabled(p)) |
615 | optimize_kprobe(p); | 616 | optimize_kprobe(p); |
616 | } | 617 | } |
617 | mutex_unlock(&text_mutex); | ||
618 | printk(KERN_INFO "Kprobes globally optimized\n"); | 618 | printk(KERN_INFO "Kprobes globally optimized\n"); |
619 | } | 619 | } |
620 | 620 | ||
621 | /* This should be called with kprobe_mutex locked */ | ||
621 | static void __kprobes unoptimize_all_kprobes(void) | 622 | static void __kprobes unoptimize_all_kprobes(void) |
622 | { | 623 | { |
623 | struct hlist_head *head; | 624 | struct hlist_head *head; |
@@ -1144,14 +1145,13 @@ int __kprobes register_kprobe(struct kprobe *p) | |||
1144 | if (ret) | 1145 | if (ret) |
1145 | return ret; | 1146 | return ret; |
1146 | 1147 | ||
1148 | jump_label_lock(); | ||
1147 | preempt_disable(); | 1149 | preempt_disable(); |
1148 | if (!kernel_text_address((unsigned long) p->addr) || | 1150 | if (!kernel_text_address((unsigned long) p->addr) || |
1149 | in_kprobes_functions((unsigned long) p->addr) || | 1151 | in_kprobes_functions((unsigned long) p->addr) || |
1150 | ftrace_text_reserved(p->addr, p->addr) || | 1152 | ftrace_text_reserved(p->addr, p->addr) || |
1151 | jump_label_text_reserved(p->addr, p->addr)) { | 1153 | jump_label_text_reserved(p->addr, p->addr)) |
1152 | preempt_enable(); | 1154 | goto fail_with_jump_label; |
1153 | return -EINVAL; | ||
1154 | } | ||
1155 | 1155 | ||
1156 | /* User can pass only KPROBE_FLAG_DISABLED to register_kprobe */ | 1156 | /* User can pass only KPROBE_FLAG_DISABLED to register_kprobe */ |
1157 | p->flags &= KPROBE_FLAG_DISABLED; | 1157 | p->flags &= KPROBE_FLAG_DISABLED; |
@@ -1165,10 +1165,9 @@ int __kprobes register_kprobe(struct kprobe *p) | |||
1165 | * We must hold a refcount of the probed module while updating | 1165 | * We must hold a refcount of the probed module while updating |
1166 | * its code to prohibit unexpected unloading. | 1166 | * its code to prohibit unexpected unloading. |
1167 | */ | 1167 | */ |
1168 | if (unlikely(!try_module_get(probed_mod))) { | 1168 | if (unlikely(!try_module_get(probed_mod))) |
1169 | preempt_enable(); | 1169 | goto fail_with_jump_label; |
1170 | return -EINVAL; | 1170 | |
1171 | } | ||
1172 | /* | 1171 | /* |
1173 | * If the module freed .init.text, we couldn't insert | 1172 | * If the module freed .init.text, we couldn't insert |
1174 | * kprobes in there. | 1173 | * kprobes in there. |
@@ -1176,16 +1175,18 @@ int __kprobes register_kprobe(struct kprobe *p) | |||
1176 | if (within_module_init((unsigned long)p->addr, probed_mod) && | 1175 | if (within_module_init((unsigned long)p->addr, probed_mod) && |
1177 | probed_mod->state != MODULE_STATE_COMING) { | 1176 | probed_mod->state != MODULE_STATE_COMING) { |
1178 | module_put(probed_mod); | 1177 | module_put(probed_mod); |
1179 | preempt_enable(); | 1178 | goto fail_with_jump_label; |
1180 | return -EINVAL; | ||
1181 | } | 1179 | } |
1182 | } | 1180 | } |
1183 | preempt_enable(); | 1181 | preempt_enable(); |
1182 | jump_label_unlock(); | ||
1184 | 1183 | ||
1185 | p->nmissed = 0; | 1184 | p->nmissed = 0; |
1186 | INIT_LIST_HEAD(&p->list); | 1185 | INIT_LIST_HEAD(&p->list); |
1187 | mutex_lock(&kprobe_mutex); | 1186 | mutex_lock(&kprobe_mutex); |
1188 | 1187 | ||
1188 | jump_label_lock(); /* needed to call jump_label_text_reserved() */ | ||
1189 | |||
1189 | get_online_cpus(); /* For avoiding text_mutex deadlock. */ | 1190 | get_online_cpus(); /* For avoiding text_mutex deadlock. */ |
1190 | mutex_lock(&text_mutex); | 1191 | mutex_lock(&text_mutex); |
1191 | 1192 | ||
@@ -1213,12 +1214,18 @@ int __kprobes register_kprobe(struct kprobe *p) | |||
1213 | out: | 1214 | out: |
1214 | mutex_unlock(&text_mutex); | 1215 | mutex_unlock(&text_mutex); |
1215 | put_online_cpus(); | 1216 | put_online_cpus(); |
1217 | jump_label_unlock(); | ||
1216 | mutex_unlock(&kprobe_mutex); | 1218 | mutex_unlock(&kprobe_mutex); |
1217 | 1219 | ||
1218 | if (probed_mod) | 1220 | if (probed_mod) |
1219 | module_put(probed_mod); | 1221 | module_put(probed_mod); |
1220 | 1222 | ||
1221 | return ret; | 1223 | return ret; |
1224 | |||
1225 | fail_with_jump_label: | ||
1226 | preempt_enable(); | ||
1227 | jump_label_unlock(); | ||
1228 | return -EINVAL; | ||
1222 | } | 1229 | } |
1223 | EXPORT_SYMBOL_GPL(register_kprobe); | 1230 | EXPORT_SYMBOL_GPL(register_kprobe); |
1224 | 1231 | ||
@@ -2000,6 +2007,7 @@ static ssize_t write_enabled_file_bool(struct file *file, | |||
2000 | static const struct file_operations fops_kp = { | 2007 | static const struct file_operations fops_kp = { |
2001 | .read = read_enabled_file_bool, | 2008 | .read = read_enabled_file_bool, |
2002 | .write = write_enabled_file_bool, | 2009 | .write = write_enabled_file_bool, |
2010 | .llseek = default_llseek, | ||
2003 | }; | 2011 | }; |
2004 | 2012 | ||
2005 | static int __kprobes debugfs_kprobe_init(void) | 2013 | static int __kprobes debugfs_kprobe_init(void) |
diff --git a/kernel/latencytop.c b/kernel/latencytop.c index 877fb306d415..17110a4a4fc2 100644 --- a/kernel/latencytop.c +++ b/kernel/latencytop.c | |||
@@ -194,14 +194,7 @@ __account_scheduler_latency(struct task_struct *tsk, int usecs, int inter) | |||
194 | 194 | ||
195 | account_global_scheduler_latency(tsk, &lat); | 195 | account_global_scheduler_latency(tsk, &lat); |
196 | 196 | ||
197 | /* | 197 | for (i = 0; i < tsk->latency_record_count; i++) { |
198 | * short term hack; if we're > 32 we stop; future we recycle: | ||
199 | */ | ||
200 | tsk->latency_record_count++; | ||
201 | if (tsk->latency_record_count >= LT_SAVECOUNT) | ||
202 | goto out_unlock; | ||
203 | |||
204 | for (i = 0; i < LT_SAVECOUNT; i++) { | ||
205 | struct latency_record *mylat; | 198 | struct latency_record *mylat; |
206 | int same = 1; | 199 | int same = 1; |
207 | 200 | ||
@@ -227,8 +220,14 @@ __account_scheduler_latency(struct task_struct *tsk, int usecs, int inter) | |||
227 | } | 220 | } |
228 | } | 221 | } |
229 | 222 | ||
223 | /* | ||
224 | * short term hack; if we're > 32 we stop; future we recycle: | ||
225 | */ | ||
226 | if (tsk->latency_record_count >= LT_SAVECOUNT) | ||
227 | goto out_unlock; | ||
228 | |||
230 | /* Allocated a new one: */ | 229 | /* Allocated a new one: */ |
231 | i = tsk->latency_record_count; | 230 | i = tsk->latency_record_count++; |
232 | memcpy(&tsk->latency_record[i], &lat, sizeof(struct latency_record)); | 231 | memcpy(&tsk->latency_record[i], &lat, sizeof(struct latency_record)); |
233 | 232 | ||
234 | out_unlock: | 233 | out_unlock: |
diff --git a/kernel/module.c b/kernel/module.c index 2df46301a7a4..437a74a7524a 100644 --- a/kernel/module.c +++ b/kernel/module.c | |||
@@ -2037,7 +2037,7 @@ static inline void layout_symtab(struct module *mod, struct load_info *info) | |||
2037 | { | 2037 | { |
2038 | } | 2038 | } |
2039 | 2039 | ||
2040 | static void add_kallsyms(struct module *mod, struct load_info *info) | 2040 | static void add_kallsyms(struct module *mod, const struct load_info *info) |
2041 | { | 2041 | { |
2042 | } | 2042 | } |
2043 | #endif /* CONFIG_KALLSYMS */ | 2043 | #endif /* CONFIG_KALLSYMS */ |
diff --git a/kernel/ns_cgroup.c b/kernel/ns_cgroup.c index 2a5dfec8efe0..2c98ad94ba0e 100644 --- a/kernel/ns_cgroup.c +++ b/kernel/ns_cgroup.c | |||
@@ -85,6 +85,14 @@ static struct cgroup_subsys_state *ns_create(struct cgroup_subsys *ss, | |||
85 | return ERR_PTR(-EPERM); | 85 | return ERR_PTR(-EPERM); |
86 | if (!cgroup_is_descendant(cgroup, current)) | 86 | if (!cgroup_is_descendant(cgroup, current)) |
87 | return ERR_PTR(-EPERM); | 87 | return ERR_PTR(-EPERM); |
88 | if (test_bit(CGRP_CLONE_CHILDREN, &cgroup->flags)) { | ||
89 | printk("ns_cgroup can't be created with parent " | ||
90 | "'clone_children' set.\n"); | ||
91 | return ERR_PTR(-EINVAL); | ||
92 | } | ||
93 | |||
94 | printk_once("ns_cgroup deprecated: consider using the " | ||
95 | "'clone_children' flag without the ns_cgroup.\n"); | ||
88 | 96 | ||
89 | ns_cgroup = kzalloc(sizeof(*ns_cgroup), GFP_KERNEL); | 97 | ns_cgroup = kzalloc(sizeof(*ns_cgroup), GFP_KERNEL); |
90 | if (!ns_cgroup) | 98 | if (!ns_cgroup) |
diff --git a/kernel/perf_event.c b/kernel/perf_event.c index f309e8014c78..cb6c0d2af68f 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c | |||
@@ -417,8 +417,8 @@ event_filter_match(struct perf_event *event) | |||
417 | return event->cpu == -1 || event->cpu == smp_processor_id(); | 417 | return event->cpu == -1 || event->cpu == smp_processor_id(); |
418 | } | 418 | } |
419 | 419 | ||
420 | static int | 420 | static void |
421 | __event_sched_out(struct perf_event *event, | 421 | event_sched_out(struct perf_event *event, |
422 | struct perf_cpu_context *cpuctx, | 422 | struct perf_cpu_context *cpuctx, |
423 | struct perf_event_context *ctx) | 423 | struct perf_event_context *ctx) |
424 | { | 424 | { |
@@ -437,13 +437,14 @@ __event_sched_out(struct perf_event *event, | |||
437 | } | 437 | } |
438 | 438 | ||
439 | if (event->state != PERF_EVENT_STATE_ACTIVE) | 439 | if (event->state != PERF_EVENT_STATE_ACTIVE) |
440 | return 0; | 440 | return; |
441 | 441 | ||
442 | event->state = PERF_EVENT_STATE_INACTIVE; | 442 | event->state = PERF_EVENT_STATE_INACTIVE; |
443 | if (event->pending_disable) { | 443 | if (event->pending_disable) { |
444 | event->pending_disable = 0; | 444 | event->pending_disable = 0; |
445 | event->state = PERF_EVENT_STATE_OFF; | 445 | event->state = PERF_EVENT_STATE_OFF; |
446 | } | 446 | } |
447 | event->tstamp_stopped = ctx->time; | ||
447 | event->pmu->del(event, 0); | 448 | event->pmu->del(event, 0); |
448 | event->oncpu = -1; | 449 | event->oncpu = -1; |
449 | 450 | ||
@@ -452,19 +453,6 @@ __event_sched_out(struct perf_event *event, | |||
452 | ctx->nr_active--; | 453 | ctx->nr_active--; |
453 | if (event->attr.exclusive || !cpuctx->active_oncpu) | 454 | if (event->attr.exclusive || !cpuctx->active_oncpu) |
454 | cpuctx->exclusive = 0; | 455 | cpuctx->exclusive = 0; |
455 | return 1; | ||
456 | } | ||
457 | |||
458 | static void | ||
459 | event_sched_out(struct perf_event *event, | ||
460 | struct perf_cpu_context *cpuctx, | ||
461 | struct perf_event_context *ctx) | ||
462 | { | ||
463 | int ret; | ||
464 | |||
465 | ret = __event_sched_out(event, cpuctx, ctx); | ||
466 | if (ret) | ||
467 | event->tstamp_stopped = ctx->time; | ||
468 | } | 456 | } |
469 | 457 | ||
470 | static void | 458 | static void |
@@ -664,7 +652,7 @@ retry: | |||
664 | } | 652 | } |
665 | 653 | ||
666 | static int | 654 | static int |
667 | __event_sched_in(struct perf_event *event, | 655 | event_sched_in(struct perf_event *event, |
668 | struct perf_cpu_context *cpuctx, | 656 | struct perf_cpu_context *cpuctx, |
669 | struct perf_event_context *ctx) | 657 | struct perf_event_context *ctx) |
670 | { | 658 | { |
@@ -684,6 +672,10 @@ __event_sched_in(struct perf_event *event, | |||
684 | return -EAGAIN; | 672 | return -EAGAIN; |
685 | } | 673 | } |
686 | 674 | ||
675 | event->tstamp_running += ctx->time - event->tstamp_stopped; | ||
676 | |||
677 | event->shadow_ctx_time = ctx->time - ctx->timestamp; | ||
678 | |||
687 | if (!is_software_event(event)) | 679 | if (!is_software_event(event)) |
688 | cpuctx->active_oncpu++; | 680 | cpuctx->active_oncpu++; |
689 | ctx->nr_active++; | 681 | ctx->nr_active++; |
@@ -694,35 +686,6 @@ __event_sched_in(struct perf_event *event, | |||
694 | return 0; | 686 | return 0; |
695 | } | 687 | } |
696 | 688 | ||
697 | static inline int | ||
698 | event_sched_in(struct perf_event *event, | ||
699 | struct perf_cpu_context *cpuctx, | ||
700 | struct perf_event_context *ctx) | ||
701 | { | ||
702 | int ret = __event_sched_in(event, cpuctx, ctx); | ||
703 | if (ret) | ||
704 | return ret; | ||
705 | event->tstamp_running += ctx->time - event->tstamp_stopped; | ||
706 | return 0; | ||
707 | } | ||
708 | |||
709 | static void | ||
710 | group_commit_event_sched_in(struct perf_event *group_event, | ||
711 | struct perf_cpu_context *cpuctx, | ||
712 | struct perf_event_context *ctx) | ||
713 | { | ||
714 | struct perf_event *event; | ||
715 | u64 now = ctx->time; | ||
716 | |||
717 | group_event->tstamp_running += now - group_event->tstamp_stopped; | ||
718 | /* | ||
719 | * Schedule in siblings as one group (if any): | ||
720 | */ | ||
721 | list_for_each_entry(event, &group_event->sibling_list, group_entry) { | ||
722 | event->tstamp_running += now - event->tstamp_stopped; | ||
723 | } | ||
724 | } | ||
725 | |||
726 | static int | 689 | static int |
727 | group_sched_in(struct perf_event *group_event, | 690 | group_sched_in(struct perf_event *group_event, |
728 | struct perf_cpu_context *cpuctx, | 691 | struct perf_cpu_context *cpuctx, |
@@ -730,19 +693,15 @@ group_sched_in(struct perf_event *group_event, | |||
730 | { | 693 | { |
731 | struct perf_event *event, *partial_group = NULL; | 694 | struct perf_event *event, *partial_group = NULL; |
732 | struct pmu *pmu = group_event->pmu; | 695 | struct pmu *pmu = group_event->pmu; |
696 | u64 now = ctx->time; | ||
697 | bool simulate = false; | ||
733 | 698 | ||
734 | if (group_event->state == PERF_EVENT_STATE_OFF) | 699 | if (group_event->state == PERF_EVENT_STATE_OFF) |
735 | return 0; | 700 | return 0; |
736 | 701 | ||
737 | pmu->start_txn(pmu); | 702 | pmu->start_txn(pmu); |
738 | 703 | ||
739 | /* | 704 | if (event_sched_in(group_event, cpuctx, ctx)) { |
740 | * use __event_sched_in() to delay updating tstamp_running | ||
741 | * until the transaction is committed. In case of failure | ||
742 | * we will keep an unmodified tstamp_running which is a | ||
743 | * requirement to get correct timing information | ||
744 | */ | ||
745 | if (__event_sched_in(group_event, cpuctx, ctx)) { | ||
746 | pmu->cancel_txn(pmu); | 705 | pmu->cancel_txn(pmu); |
747 | return -EAGAIN; | 706 | return -EAGAIN; |
748 | } | 707 | } |
@@ -751,31 +710,42 @@ group_sched_in(struct perf_event *group_event, | |||
751 | * Schedule in siblings as one group (if any): | 710 | * Schedule in siblings as one group (if any): |
752 | */ | 711 | */ |
753 | list_for_each_entry(event, &group_event->sibling_list, group_entry) { | 712 | list_for_each_entry(event, &group_event->sibling_list, group_entry) { |
754 | if (__event_sched_in(event, cpuctx, ctx)) { | 713 | if (event_sched_in(event, cpuctx, ctx)) { |
755 | partial_group = event; | 714 | partial_group = event; |
756 | goto group_error; | 715 | goto group_error; |
757 | } | 716 | } |
758 | } | 717 | } |
759 | 718 | ||
760 | if (!pmu->commit_txn(pmu)) { | 719 | if (!pmu->commit_txn(pmu)) |
761 | /* commit tstamp_running */ | ||
762 | group_commit_event_sched_in(group_event, cpuctx, ctx); | ||
763 | return 0; | 720 | return 0; |
764 | } | 721 | |
765 | group_error: | 722 | group_error: |
766 | /* | 723 | /* |
767 | * Groups can be scheduled in as one unit only, so undo any | 724 | * Groups can be scheduled in as one unit only, so undo any |
768 | * partial group before returning: | 725 | * partial group before returning: |
726 | * The events up to the failed event are scheduled out normally, | ||
727 | * tstamp_stopped will be updated. | ||
769 | * | 728 | * |
770 | * use __event_sched_out() to avoid updating tstamp_stopped | 729 | * The failed events and the remaining siblings need to have |
771 | * because the event never actually ran | 730 | * their timings updated as if they had gone thru event_sched_in() |
731 | * and event_sched_out(). This is required to get consistent timings | ||
732 | * across the group. This also takes care of the case where the group | ||
733 | * could never be scheduled by ensuring tstamp_stopped is set to mark | ||
734 | * the time the event was actually stopped, such that time delta | ||
735 | * calculation in update_event_times() is correct. | ||
772 | */ | 736 | */ |
773 | list_for_each_entry(event, &group_event->sibling_list, group_entry) { | 737 | list_for_each_entry(event, &group_event->sibling_list, group_entry) { |
774 | if (event == partial_group) | 738 | if (event == partial_group) |
775 | break; | 739 | simulate = true; |
776 | __event_sched_out(event, cpuctx, ctx); | 740 | |
741 | if (simulate) { | ||
742 | event->tstamp_running += now - event->tstamp_stopped; | ||
743 | event->tstamp_stopped = now; | ||
744 | } else { | ||
745 | event_sched_out(event, cpuctx, ctx); | ||
746 | } | ||
777 | } | 747 | } |
778 | __event_sched_out(group_event, cpuctx, ctx); | 748 | event_sched_out(group_event, cpuctx, ctx); |
779 | 749 | ||
780 | pmu->cancel_txn(pmu); | 750 | pmu->cancel_txn(pmu); |
781 | 751 | ||
@@ -3428,7 +3398,8 @@ static u32 perf_event_tid(struct perf_event *event, struct task_struct *p) | |||
3428 | } | 3398 | } |
3429 | 3399 | ||
3430 | static void perf_output_read_one(struct perf_output_handle *handle, | 3400 | static void perf_output_read_one(struct perf_output_handle *handle, |
3431 | struct perf_event *event) | 3401 | struct perf_event *event, |
3402 | u64 enabled, u64 running) | ||
3432 | { | 3403 | { |
3433 | u64 read_format = event->attr.read_format; | 3404 | u64 read_format = event->attr.read_format; |
3434 | u64 values[4]; | 3405 | u64 values[4]; |
@@ -3436,11 +3407,11 @@ static void perf_output_read_one(struct perf_output_handle *handle, | |||
3436 | 3407 | ||
3437 | values[n++] = perf_event_count(event); | 3408 | values[n++] = perf_event_count(event); |
3438 | if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) { | 3409 | if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) { |
3439 | values[n++] = event->total_time_enabled + | 3410 | values[n++] = enabled + |
3440 | atomic64_read(&event->child_total_time_enabled); | 3411 | atomic64_read(&event->child_total_time_enabled); |
3441 | } | 3412 | } |
3442 | if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) { | 3413 | if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) { |
3443 | values[n++] = event->total_time_running + | 3414 | values[n++] = running + |
3444 | atomic64_read(&event->child_total_time_running); | 3415 | atomic64_read(&event->child_total_time_running); |
3445 | } | 3416 | } |
3446 | if (read_format & PERF_FORMAT_ID) | 3417 | if (read_format & PERF_FORMAT_ID) |
@@ -3453,7 +3424,8 @@ static void perf_output_read_one(struct perf_output_handle *handle, | |||
3453 | * XXX PERF_FORMAT_GROUP vs inherited events seems difficult. | 3424 | * XXX PERF_FORMAT_GROUP vs inherited events seems difficult. |
3454 | */ | 3425 | */ |
3455 | static void perf_output_read_group(struct perf_output_handle *handle, | 3426 | static void perf_output_read_group(struct perf_output_handle *handle, |
3456 | struct perf_event *event) | 3427 | struct perf_event *event, |
3428 | u64 enabled, u64 running) | ||
3457 | { | 3429 | { |
3458 | struct perf_event *leader = event->group_leader, *sub; | 3430 | struct perf_event *leader = event->group_leader, *sub; |
3459 | u64 read_format = event->attr.read_format; | 3431 | u64 read_format = event->attr.read_format; |
@@ -3463,10 +3435,10 @@ static void perf_output_read_group(struct perf_output_handle *handle, | |||
3463 | values[n++] = 1 + leader->nr_siblings; | 3435 | values[n++] = 1 + leader->nr_siblings; |
3464 | 3436 | ||
3465 | if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) | 3437 | if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) |
3466 | values[n++] = leader->total_time_enabled; | 3438 | values[n++] = enabled; |
3467 | 3439 | ||
3468 | if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) | 3440 | if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) |
3469 | values[n++] = leader->total_time_running; | 3441 | values[n++] = running; |
3470 | 3442 | ||
3471 | if (leader != event) | 3443 | if (leader != event) |
3472 | leader->pmu->read(leader); | 3444 | leader->pmu->read(leader); |
@@ -3491,13 +3463,35 @@ static void perf_output_read_group(struct perf_output_handle *handle, | |||
3491 | } | 3463 | } |
3492 | } | 3464 | } |
3493 | 3465 | ||
3466 | #define PERF_FORMAT_TOTAL_TIMES (PERF_FORMAT_TOTAL_TIME_ENABLED|\ | ||
3467 | PERF_FORMAT_TOTAL_TIME_RUNNING) | ||
3468 | |||
3494 | static void perf_output_read(struct perf_output_handle *handle, | 3469 | static void perf_output_read(struct perf_output_handle *handle, |
3495 | struct perf_event *event) | 3470 | struct perf_event *event) |
3496 | { | 3471 | { |
3472 | u64 enabled = 0, running = 0, now, ctx_time; | ||
3473 | u64 read_format = event->attr.read_format; | ||
3474 | |||
3475 | /* | ||
3476 | * compute total_time_enabled, total_time_running | ||
3477 | * based on snapshot values taken when the event | ||
3478 | * was last scheduled in. | ||
3479 | * | ||
3480 | * we cannot simply called update_context_time() | ||
3481 | * because of locking issue as we are called in | ||
3482 | * NMI context | ||
3483 | */ | ||
3484 | if (read_format & PERF_FORMAT_TOTAL_TIMES) { | ||
3485 | now = perf_clock(); | ||
3486 | ctx_time = event->shadow_ctx_time + now; | ||
3487 | enabled = ctx_time - event->tstamp_enabled; | ||
3488 | running = ctx_time - event->tstamp_running; | ||
3489 | } | ||
3490 | |||
3497 | if (event->attr.read_format & PERF_FORMAT_GROUP) | 3491 | if (event->attr.read_format & PERF_FORMAT_GROUP) |
3498 | perf_output_read_group(handle, event); | 3492 | perf_output_read_group(handle, event, enabled, running); |
3499 | else | 3493 | else |
3500 | perf_output_read_one(handle, event); | 3494 | perf_output_read_one(handle, event, enabled, running); |
3501 | } | 3495 | } |
3502 | 3496 | ||
3503 | void perf_output_sample(struct perf_output_handle *handle, | 3497 | void perf_output_sample(struct perf_output_handle *handle, |
diff --git a/kernel/pm_qos_params.c b/kernel/pm_qos_params.c index 645e541a45f6..c7a8f453919e 100644 --- a/kernel/pm_qos_params.c +++ b/kernel/pm_qos_params.c | |||
@@ -110,6 +110,7 @@ static const struct file_operations pm_qos_power_fops = { | |||
110 | .write = pm_qos_power_write, | 110 | .write = pm_qos_power_write, |
111 | .open = pm_qos_power_open, | 111 | .open = pm_qos_power_open, |
112 | .release = pm_qos_power_release, | 112 | .release = pm_qos_power_release, |
113 | .llseek = noop_llseek, | ||
113 | }; | 114 | }; |
114 | 115 | ||
115 | /* unlocked internal variant */ | 116 | /* unlocked internal variant */ |
@@ -398,7 +399,7 @@ static ssize_t pm_qos_power_write(struct file *filp, const char __user *buf, | |||
398 | } else | 399 | } else |
399 | return -EINVAL; | 400 | return -EINVAL; |
400 | 401 | ||
401 | pm_qos_req = (struct pm_qos_request_list *)filp->private_data; | 402 | pm_qos_req = filp->private_data; |
402 | pm_qos_update_request(pm_qos_req, value); | 403 | pm_qos_update_request(pm_qos_req, value); |
403 | 404 | ||
404 | return count; | 405 | return count; |
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index ac7eb109f196..0dac75ea4456 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c | |||
@@ -984,8 +984,8 @@ static void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn) | |||
984 | src = kmap_atomic(s_page, KM_USER0); | 984 | src = kmap_atomic(s_page, KM_USER0); |
985 | dst = kmap_atomic(d_page, KM_USER1); | 985 | dst = kmap_atomic(d_page, KM_USER1); |
986 | do_copy_page(dst, src); | 986 | do_copy_page(dst, src); |
987 | kunmap_atomic(src, KM_USER0); | ||
988 | kunmap_atomic(dst, KM_USER1); | 987 | kunmap_atomic(dst, KM_USER1); |
988 | kunmap_atomic(src, KM_USER0); | ||
989 | } else { | 989 | } else { |
990 | if (PageHighMem(d_page)) { | 990 | if (PageHighMem(d_page)) { |
991 | /* Page pointed to by src may contain some kernel | 991 | /* Page pointed to by src may contain some kernel |
@@ -993,7 +993,7 @@ static void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn) | |||
993 | */ | 993 | */ |
994 | safe_copy_page(buffer, s_page); | 994 | safe_copy_page(buffer, s_page); |
995 | dst = kmap_atomic(d_page, KM_USER0); | 995 | dst = kmap_atomic(d_page, KM_USER0); |
996 | memcpy(dst, buffer, PAGE_SIZE); | 996 | copy_page(dst, buffer); |
997 | kunmap_atomic(dst, KM_USER0); | 997 | kunmap_atomic(dst, KM_USER0); |
998 | } else { | 998 | } else { |
999 | safe_copy_page(page_address(d_page), s_page); | 999 | safe_copy_page(page_address(d_page), s_page); |
@@ -1687,7 +1687,7 @@ int snapshot_read_next(struct snapshot_handle *handle) | |||
1687 | memory_bm_position_reset(&orig_bm); | 1687 | memory_bm_position_reset(&orig_bm); |
1688 | memory_bm_position_reset(©_bm); | 1688 | memory_bm_position_reset(©_bm); |
1689 | } else if (handle->cur <= nr_meta_pages) { | 1689 | } else if (handle->cur <= nr_meta_pages) { |
1690 | memset(buffer, 0, PAGE_SIZE); | 1690 | clear_page(buffer); |
1691 | pack_pfns(buffer, &orig_bm); | 1691 | pack_pfns(buffer, &orig_bm); |
1692 | } else { | 1692 | } else { |
1693 | struct page *page; | 1693 | struct page *page; |
@@ -1701,7 +1701,7 @@ int snapshot_read_next(struct snapshot_handle *handle) | |||
1701 | void *kaddr; | 1701 | void *kaddr; |
1702 | 1702 | ||
1703 | kaddr = kmap_atomic(page, KM_USER0); | 1703 | kaddr = kmap_atomic(page, KM_USER0); |
1704 | memcpy(buffer, kaddr, PAGE_SIZE); | 1704 | copy_page(buffer, kaddr); |
1705 | kunmap_atomic(kaddr, KM_USER0); | 1705 | kunmap_atomic(kaddr, KM_USER0); |
1706 | handle->buffer = buffer; | 1706 | handle->buffer = buffer; |
1707 | } else { | 1707 | } else { |
@@ -1984,7 +1984,7 @@ static void copy_last_highmem_page(void) | |||
1984 | void *dst; | 1984 | void *dst; |
1985 | 1985 | ||
1986 | dst = kmap_atomic(last_highmem_page, KM_USER0); | 1986 | dst = kmap_atomic(last_highmem_page, KM_USER0); |
1987 | memcpy(dst, buffer, PAGE_SIZE); | 1987 | copy_page(dst, buffer); |
1988 | kunmap_atomic(dst, KM_USER0); | 1988 | kunmap_atomic(dst, KM_USER0); |
1989 | last_highmem_page = NULL; | 1989 | last_highmem_page = NULL; |
1990 | } | 1990 | } |
@@ -2270,11 +2270,11 @@ swap_two_pages_data(struct page *p1, struct page *p2, void *buf) | |||
2270 | 2270 | ||
2271 | kaddr1 = kmap_atomic(p1, KM_USER0); | 2271 | kaddr1 = kmap_atomic(p1, KM_USER0); |
2272 | kaddr2 = kmap_atomic(p2, KM_USER1); | 2272 | kaddr2 = kmap_atomic(p2, KM_USER1); |
2273 | memcpy(buf, kaddr1, PAGE_SIZE); | 2273 | copy_page(buf, kaddr1); |
2274 | memcpy(kaddr1, kaddr2, PAGE_SIZE); | 2274 | copy_page(kaddr1, kaddr2); |
2275 | memcpy(kaddr2, buf, PAGE_SIZE); | 2275 | copy_page(kaddr2, buf); |
2276 | kunmap_atomic(kaddr1, KM_USER0); | ||
2277 | kunmap_atomic(kaddr2, KM_USER1); | 2276 | kunmap_atomic(kaddr2, KM_USER1); |
2277 | kunmap_atomic(kaddr1, KM_USER0); | ||
2278 | } | 2278 | } |
2279 | 2279 | ||
2280 | /** | 2280 | /** |
diff --git a/kernel/power/swap.c b/kernel/power/swap.c index 916eaa790399..a0e4a86ccf94 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c | |||
@@ -251,7 +251,7 @@ static int write_page(void *buf, sector_t offset, struct bio **bio_chain) | |||
251 | if (bio_chain) { | 251 | if (bio_chain) { |
252 | src = (void *)__get_free_page(__GFP_WAIT | __GFP_HIGH); | 252 | src = (void *)__get_free_page(__GFP_WAIT | __GFP_HIGH); |
253 | if (src) { | 253 | if (src) { |
254 | memcpy(src, buf, PAGE_SIZE); | 254 | copy_page(src, buf); |
255 | } else { | 255 | } else { |
256 | WARN_ON_ONCE(1); | 256 | WARN_ON_ONCE(1); |
257 | bio_chain = NULL; /* Go synchronous */ | 257 | bio_chain = NULL; /* Go synchronous */ |
@@ -325,7 +325,7 @@ static int swap_write_page(struct swap_map_handle *handle, void *buf, | |||
325 | error = write_page(handle->cur, handle->cur_swap, NULL); | 325 | error = write_page(handle->cur, handle->cur_swap, NULL); |
326 | if (error) | 326 | if (error) |
327 | goto out; | 327 | goto out; |
328 | memset(handle->cur, 0, PAGE_SIZE); | 328 | clear_page(handle->cur); |
329 | handle->cur_swap = offset; | 329 | handle->cur_swap = offset; |
330 | handle->k = 0; | 330 | handle->k = 0; |
331 | } | 331 | } |
@@ -910,7 +910,7 @@ int swsusp_check(void) | |||
910 | hib_resume_bdev = open_by_devnum(swsusp_resume_device, FMODE_READ); | 910 | hib_resume_bdev = open_by_devnum(swsusp_resume_device, FMODE_READ); |
911 | if (!IS_ERR(hib_resume_bdev)) { | 911 | if (!IS_ERR(hib_resume_bdev)) { |
912 | set_blocksize(hib_resume_bdev, PAGE_SIZE); | 912 | set_blocksize(hib_resume_bdev, PAGE_SIZE); |
913 | memset(swsusp_header, 0, PAGE_SIZE); | 913 | clear_page(swsusp_header); |
914 | error = hib_bio_read_page(swsusp_resume_block, | 914 | error = hib_bio_read_page(swsusp_resume_block, |
915 | swsusp_header, NULL); | 915 | swsusp_header, NULL); |
916 | if (error) | 916 | if (error) |
diff --git a/kernel/printk.c b/kernel/printk.c index 2531017795f6..9a2264fc42ca 100644 --- a/kernel/printk.c +++ b/kernel/printk.c | |||
@@ -210,7 +210,7 @@ __setup("log_buf_len=", log_buf_len_setup); | |||
210 | 210 | ||
211 | #ifdef CONFIG_BOOT_PRINTK_DELAY | 211 | #ifdef CONFIG_BOOT_PRINTK_DELAY |
212 | 212 | ||
213 | static unsigned int boot_delay; /* msecs delay after each printk during bootup */ | 213 | static int boot_delay; /* msecs delay after each printk during bootup */ |
214 | static unsigned long long loops_per_msec; /* based on boot_delay */ | 214 | static unsigned long long loops_per_msec; /* based on boot_delay */ |
215 | 215 | ||
216 | static int __init boot_delay_setup(char *str) | 216 | static int __init boot_delay_setup(char *str) |
@@ -261,6 +261,12 @@ static inline void boot_delay_msec(void) | |||
261 | } | 261 | } |
262 | #endif | 262 | #endif |
263 | 263 | ||
264 | #ifdef CONFIG_SECURITY_DMESG_RESTRICT | ||
265 | int dmesg_restrict = 1; | ||
266 | #else | ||
267 | int dmesg_restrict; | ||
268 | #endif | ||
269 | |||
264 | int do_syslog(int type, char __user *buf, int len, bool from_file) | 270 | int do_syslog(int type, char __user *buf, int len, bool from_file) |
265 | { | 271 | { |
266 | unsigned i, j, limit, count; | 272 | unsigned i, j, limit, count; |
@@ -268,7 +274,20 @@ int do_syslog(int type, char __user *buf, int len, bool from_file) | |||
268 | char c; | 274 | char c; |
269 | int error = 0; | 275 | int error = 0; |
270 | 276 | ||
271 | error = security_syslog(type, from_file); | 277 | /* |
278 | * If this is from /proc/kmsg we only do the capabilities checks | ||
279 | * at open time. | ||
280 | */ | ||
281 | if (type == SYSLOG_ACTION_OPEN || !from_file) { | ||
282 | if (dmesg_restrict && !capable(CAP_SYS_ADMIN)) | ||
283 | return -EPERM; | ||
284 | if ((type != SYSLOG_ACTION_READ_ALL && | ||
285 | type != SYSLOG_ACTION_SIZE_BUFFER) && | ||
286 | !capable(CAP_SYS_ADMIN)) | ||
287 | return -EPERM; | ||
288 | } | ||
289 | |||
290 | error = security_syslog(type); | ||
272 | if (error) | 291 | if (error) |
273 | return error; | 292 | return error; |
274 | 293 | ||
@@ -647,6 +666,7 @@ static inline int can_use_console(unsigned int cpu) | |||
647 | * released but interrupts still disabled. | 666 | * released but interrupts still disabled. |
648 | */ | 667 | */ |
649 | static int acquire_console_semaphore_for_printk(unsigned int cpu) | 668 | static int acquire_console_semaphore_for_printk(unsigned int cpu) |
669 | __releases(&logbuf_lock) | ||
650 | { | 670 | { |
651 | int retval = 0; | 671 | int retval = 0; |
652 | 672 | ||
@@ -1511,7 +1531,7 @@ int kmsg_dump_unregister(struct kmsg_dumper *dumper) | |||
1511 | } | 1531 | } |
1512 | EXPORT_SYMBOL_GPL(kmsg_dump_unregister); | 1532 | EXPORT_SYMBOL_GPL(kmsg_dump_unregister); |
1513 | 1533 | ||
1514 | static const char const *kmsg_reasons[] = { | 1534 | static const char * const kmsg_reasons[] = { |
1515 | [KMSG_DUMP_OOPS] = "oops", | 1535 | [KMSG_DUMP_OOPS] = "oops", |
1516 | [KMSG_DUMP_PANIC] = "panic", | 1536 | [KMSG_DUMP_PANIC] = "panic", |
1517 | [KMSG_DUMP_KEXEC] = "kexec", | 1537 | [KMSG_DUMP_KEXEC] = "kexec", |
diff --git a/kernel/profile.c b/kernel/profile.c index b22a899934cc..66f841b7fbd3 100644 --- a/kernel/profile.c +++ b/kernel/profile.c | |||
@@ -555,6 +555,7 @@ static ssize_t write_profile(struct file *file, const char __user *buf, | |||
555 | static const struct file_operations proc_profile_operations = { | 555 | static const struct file_operations proc_profile_operations = { |
556 | .read = read_profile, | 556 | .read = read_profile, |
557 | .write = write_profile, | 557 | .write = write_profile, |
558 | .llseek = default_llseek, | ||
558 | }; | 559 | }; |
559 | 560 | ||
560 | #ifdef CONFIG_SMP | 561 | #ifdef CONFIG_SMP |
diff --git a/kernel/ptrace.c b/kernel/ptrace.c index f34d798ef4a2..99bbaa3e5b0d 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c | |||
@@ -181,7 +181,7 @@ int ptrace_attach(struct task_struct *task) | |||
181 | * under ptrace. | 181 | * under ptrace. |
182 | */ | 182 | */ |
183 | retval = -ERESTARTNOINTR; | 183 | retval = -ERESTARTNOINTR; |
184 | if (mutex_lock_interruptible(&task->cred_guard_mutex)) | 184 | if (mutex_lock_interruptible(&task->signal->cred_guard_mutex)) |
185 | goto out; | 185 | goto out; |
186 | 186 | ||
187 | task_lock(task); | 187 | task_lock(task); |
@@ -208,7 +208,7 @@ int ptrace_attach(struct task_struct *task) | |||
208 | unlock_tasklist: | 208 | unlock_tasklist: |
209 | write_unlock_irq(&tasklist_lock); | 209 | write_unlock_irq(&tasklist_lock); |
210 | unlock_creds: | 210 | unlock_creds: |
211 | mutex_unlock(&task->cred_guard_mutex); | 211 | mutex_unlock(&task->signal->cred_guard_mutex); |
212 | out: | 212 | out: |
213 | return retval; | 213 | return retval; |
214 | } | 214 | } |
@@ -329,6 +329,8 @@ int ptrace_detach(struct task_struct *child, unsigned int data) | |||
329 | * and reacquire the lock. | 329 | * and reacquire the lock. |
330 | */ | 330 | */ |
331 | void exit_ptrace(struct task_struct *tracer) | 331 | void exit_ptrace(struct task_struct *tracer) |
332 | __releases(&tasklist_lock) | ||
333 | __acquires(&tasklist_lock) | ||
332 | { | 334 | { |
333 | struct task_struct *p, *n; | 335 | struct task_struct *p, *n; |
334 | LIST_HEAD(ptrace_dead); | 336 | LIST_HEAD(ptrace_dead); |
@@ -402,7 +404,7 @@ int ptrace_writedata(struct task_struct *tsk, char __user *src, unsigned long ds | |||
402 | return copied; | 404 | return copied; |
403 | } | 405 | } |
404 | 406 | ||
405 | static int ptrace_setoptions(struct task_struct *child, long data) | 407 | static int ptrace_setoptions(struct task_struct *child, unsigned long data) |
406 | { | 408 | { |
407 | child->ptrace &= ~PT_TRACE_MASK; | 409 | child->ptrace &= ~PT_TRACE_MASK; |
408 | 410 | ||
@@ -481,7 +483,8 @@ static int ptrace_setsiginfo(struct task_struct *child, const siginfo_t *info) | |||
481 | #define is_sysemu_singlestep(request) 0 | 483 | #define is_sysemu_singlestep(request) 0 |
482 | #endif | 484 | #endif |
483 | 485 | ||
484 | static int ptrace_resume(struct task_struct *child, long request, long data) | 486 | static int ptrace_resume(struct task_struct *child, long request, |
487 | unsigned long data) | ||
485 | { | 488 | { |
486 | if (!valid_signal(data)) | 489 | if (!valid_signal(data)) |
487 | return -EIO; | 490 | return -EIO; |
@@ -558,10 +561,12 @@ static int ptrace_regset(struct task_struct *task, int req, unsigned int type, | |||
558 | #endif | 561 | #endif |
559 | 562 | ||
560 | int ptrace_request(struct task_struct *child, long request, | 563 | int ptrace_request(struct task_struct *child, long request, |
561 | long addr, long data) | 564 | unsigned long addr, unsigned long data) |
562 | { | 565 | { |
563 | int ret = -EIO; | 566 | int ret = -EIO; |
564 | siginfo_t siginfo; | 567 | siginfo_t siginfo; |
568 | void __user *datavp = (void __user *) data; | ||
569 | unsigned long __user *datalp = datavp; | ||
565 | 570 | ||
566 | switch (request) { | 571 | switch (request) { |
567 | case PTRACE_PEEKTEXT: | 572 | case PTRACE_PEEKTEXT: |
@@ -578,19 +583,17 @@ int ptrace_request(struct task_struct *child, long request, | |||
578 | ret = ptrace_setoptions(child, data); | 583 | ret = ptrace_setoptions(child, data); |
579 | break; | 584 | break; |
580 | case PTRACE_GETEVENTMSG: | 585 | case PTRACE_GETEVENTMSG: |
581 | ret = put_user(child->ptrace_message, (unsigned long __user *) data); | 586 | ret = put_user(child->ptrace_message, datalp); |
582 | break; | 587 | break; |
583 | 588 | ||
584 | case PTRACE_GETSIGINFO: | 589 | case PTRACE_GETSIGINFO: |
585 | ret = ptrace_getsiginfo(child, &siginfo); | 590 | ret = ptrace_getsiginfo(child, &siginfo); |
586 | if (!ret) | 591 | if (!ret) |
587 | ret = copy_siginfo_to_user((siginfo_t __user *) data, | 592 | ret = copy_siginfo_to_user(datavp, &siginfo); |
588 | &siginfo); | ||
589 | break; | 593 | break; |
590 | 594 | ||
591 | case PTRACE_SETSIGINFO: | 595 | case PTRACE_SETSIGINFO: |
592 | if (copy_from_user(&siginfo, (siginfo_t __user *) data, | 596 | if (copy_from_user(&siginfo, datavp, sizeof siginfo)) |
593 | sizeof siginfo)) | ||
594 | ret = -EFAULT; | 597 | ret = -EFAULT; |
595 | else | 598 | else |
596 | ret = ptrace_setsiginfo(child, &siginfo); | 599 | ret = ptrace_setsiginfo(child, &siginfo); |
@@ -621,7 +624,7 @@ int ptrace_request(struct task_struct *child, long request, | |||
621 | } | 624 | } |
622 | mmput(mm); | 625 | mmput(mm); |
623 | 626 | ||
624 | ret = put_user(tmp, (unsigned long __user *) data); | 627 | ret = put_user(tmp, datalp); |
625 | break; | 628 | break; |
626 | } | 629 | } |
627 | #endif | 630 | #endif |
@@ -650,7 +653,7 @@ int ptrace_request(struct task_struct *child, long request, | |||
650 | case PTRACE_SETREGSET: | 653 | case PTRACE_SETREGSET: |
651 | { | 654 | { |
652 | struct iovec kiov; | 655 | struct iovec kiov; |
653 | struct iovec __user *uiov = (struct iovec __user *) data; | 656 | struct iovec __user *uiov = datavp; |
654 | 657 | ||
655 | if (!access_ok(VERIFY_WRITE, uiov, sizeof(*uiov))) | 658 | if (!access_ok(VERIFY_WRITE, uiov, sizeof(*uiov))) |
656 | return -EFAULT; | 659 | return -EFAULT; |
@@ -691,7 +694,8 @@ static struct task_struct *ptrace_get_task_struct(pid_t pid) | |||
691 | #define arch_ptrace_attach(child) do { } while (0) | 694 | #define arch_ptrace_attach(child) do { } while (0) |
692 | #endif | 695 | #endif |
693 | 696 | ||
694 | SYSCALL_DEFINE4(ptrace, long, request, long, pid, long, addr, long, data) | 697 | SYSCALL_DEFINE4(ptrace, long, request, long, pid, unsigned long, addr, |
698 | unsigned long, data) | ||
695 | { | 699 | { |
696 | struct task_struct *child; | 700 | struct task_struct *child; |
697 | long ret; | 701 | long ret; |
@@ -732,7 +736,8 @@ SYSCALL_DEFINE4(ptrace, long, request, long, pid, long, addr, long, data) | |||
732 | return ret; | 736 | return ret; |
733 | } | 737 | } |
734 | 738 | ||
735 | int generic_ptrace_peekdata(struct task_struct *tsk, long addr, long data) | 739 | int generic_ptrace_peekdata(struct task_struct *tsk, unsigned long addr, |
740 | unsigned long data) | ||
736 | { | 741 | { |
737 | unsigned long tmp; | 742 | unsigned long tmp; |
738 | int copied; | 743 | int copied; |
@@ -743,7 +748,8 @@ int generic_ptrace_peekdata(struct task_struct *tsk, long addr, long data) | |||
743 | return put_user(tmp, (unsigned long __user *)data); | 748 | return put_user(tmp, (unsigned long __user *)data); |
744 | } | 749 | } |
745 | 750 | ||
746 | int generic_ptrace_pokedata(struct task_struct *tsk, long addr, long data) | 751 | int generic_ptrace_pokedata(struct task_struct *tsk, unsigned long addr, |
752 | unsigned long data) | ||
747 | { | 753 | { |
748 | int copied; | 754 | int copied; |
749 | 755 | ||
diff --git a/kernel/range.c b/kernel/range.c index 471b66acabb5..37fa9b99ad58 100644 --- a/kernel/range.c +++ b/kernel/range.c | |||
@@ -119,7 +119,7 @@ static int cmp_range(const void *x1, const void *x2) | |||
119 | 119 | ||
120 | int clean_sort_range(struct range *range, int az) | 120 | int clean_sort_range(struct range *range, int az) |
121 | { | 121 | { |
122 | int i, j, k = az - 1, nr_range = 0; | 122 | int i, j, k = az - 1, nr_range = az; |
123 | 123 | ||
124 | for (i = 0; i < k; i++) { | 124 | for (i = 0; i < k; i++) { |
125 | if (range[i].end) | 125 | if (range[i].end) |
diff --git a/kernel/relay.c b/kernel/relay.c index c7cf397fb929..859ea5a9605f 100644 --- a/kernel/relay.c +++ b/kernel/relay.c | |||
@@ -70,17 +70,10 @@ static const struct vm_operations_struct relay_file_mmap_ops = { | |||
70 | */ | 70 | */ |
71 | static struct page **relay_alloc_page_array(unsigned int n_pages) | 71 | static struct page **relay_alloc_page_array(unsigned int n_pages) |
72 | { | 72 | { |
73 | struct page **array; | 73 | const size_t pa_size = n_pages * sizeof(struct page *); |
74 | size_t pa_size = n_pages * sizeof(struct page *); | 74 | if (pa_size > PAGE_SIZE) |
75 | 75 | return vzalloc(pa_size); | |
76 | if (pa_size > PAGE_SIZE) { | 76 | return kzalloc(pa_size, GFP_KERNEL); |
77 | array = vmalloc(pa_size); | ||
78 | if (array) | ||
79 | memset(array, 0, pa_size); | ||
80 | } else { | ||
81 | array = kzalloc(pa_size, GFP_KERNEL); | ||
82 | } | ||
83 | return array; | ||
84 | } | 77 | } |
85 | 78 | ||
86 | /* | 79 | /* |
diff --git a/kernel/resource.c b/kernel/resource.c index 7b36976e5dea..9fad33efd0db 100644 --- a/kernel/resource.c +++ b/kernel/resource.c | |||
@@ -40,6 +40,23 @@ EXPORT_SYMBOL(iomem_resource); | |||
40 | 40 | ||
41 | static DEFINE_RWLOCK(resource_lock); | 41 | static DEFINE_RWLOCK(resource_lock); |
42 | 42 | ||
43 | /* | ||
44 | * By default, we allocate free space bottom-up. The architecture can request | ||
45 | * top-down by clearing this flag. The user can override the architecture's | ||
46 | * choice with the "resource_alloc_from_bottom" kernel boot option, but that | ||
47 | * should only be a debugging tool. | ||
48 | */ | ||
49 | int resource_alloc_from_bottom = 1; | ||
50 | |||
51 | static __init int setup_alloc_from_bottom(char *s) | ||
52 | { | ||
53 | printk(KERN_INFO | ||
54 | "resource: allocating from bottom-up; please report a bug\n"); | ||
55 | resource_alloc_from_bottom = 1; | ||
56 | return 0; | ||
57 | } | ||
58 | early_param("resource_alloc_from_bottom", setup_alloc_from_bottom); | ||
59 | |||
43 | static void *r_next(struct seq_file *m, void *v, loff_t *pos) | 60 | static void *r_next(struct seq_file *m, void *v, loff_t *pos) |
44 | { | 61 | { |
45 | struct resource *p = v; | 62 | struct resource *p = v; |
@@ -357,8 +374,97 @@ int __weak page_is_ram(unsigned long pfn) | |||
357 | return walk_system_ram_range(pfn, 1, NULL, __is_ram) == 1; | 374 | return walk_system_ram_range(pfn, 1, NULL, __is_ram) == 1; |
358 | } | 375 | } |
359 | 376 | ||
377 | static resource_size_t simple_align_resource(void *data, | ||
378 | const struct resource *avail, | ||
379 | resource_size_t size, | ||
380 | resource_size_t align) | ||
381 | { | ||
382 | return avail->start; | ||
383 | } | ||
384 | |||
385 | static void resource_clip(struct resource *res, resource_size_t min, | ||
386 | resource_size_t max) | ||
387 | { | ||
388 | if (res->start < min) | ||
389 | res->start = min; | ||
390 | if (res->end > max) | ||
391 | res->end = max; | ||
392 | } | ||
393 | |||
394 | static bool resource_contains(struct resource *res1, struct resource *res2) | ||
395 | { | ||
396 | return res1->start <= res2->start && res1->end >= res2->end; | ||
397 | } | ||
398 | |||
399 | /* | ||
400 | * Find the resource before "child" in the sibling list of "root" children. | ||
401 | */ | ||
402 | static struct resource *find_sibling_prev(struct resource *root, struct resource *child) | ||
403 | { | ||
404 | struct resource *this; | ||
405 | |||
406 | for (this = root->child; this; this = this->sibling) | ||
407 | if (this->sibling == child) | ||
408 | return this; | ||
409 | |||
410 | return NULL; | ||
411 | } | ||
412 | |||
413 | /* | ||
414 | * Find empty slot in the resource tree given range and alignment. | ||
415 | * This version allocates from the end of the root resource first. | ||
416 | */ | ||
417 | static int find_resource_from_top(struct resource *root, struct resource *new, | ||
418 | resource_size_t size, resource_size_t min, | ||
419 | resource_size_t max, resource_size_t align, | ||
420 | resource_size_t (*alignf)(void *, | ||
421 | const struct resource *, | ||
422 | resource_size_t, | ||
423 | resource_size_t), | ||
424 | void *alignf_data) | ||
425 | { | ||
426 | struct resource *this; | ||
427 | struct resource tmp, avail, alloc; | ||
428 | |||
429 | tmp.start = root->end; | ||
430 | tmp.end = root->end; | ||
431 | |||
432 | this = find_sibling_prev(root, NULL); | ||
433 | for (;;) { | ||
434 | if (this) { | ||
435 | if (this->end < root->end) | ||
436 | tmp.start = this->end + 1; | ||
437 | } else | ||
438 | tmp.start = root->start; | ||
439 | |||
440 | resource_clip(&tmp, min, max); | ||
441 | |||
442 | /* Check for overflow after ALIGN() */ | ||
443 | avail = *new; | ||
444 | avail.start = ALIGN(tmp.start, align); | ||
445 | avail.end = tmp.end; | ||
446 | if (avail.start >= tmp.start) { | ||
447 | alloc.start = alignf(alignf_data, &avail, size, align); | ||
448 | alloc.end = alloc.start + size - 1; | ||
449 | if (resource_contains(&avail, &alloc)) { | ||
450 | new->start = alloc.start; | ||
451 | new->end = alloc.end; | ||
452 | return 0; | ||
453 | } | ||
454 | } | ||
455 | |||
456 | if (!this || this->start == root->start) | ||
457 | break; | ||
458 | |||
459 | tmp.end = this->start - 1; | ||
460 | this = find_sibling_prev(root, this); | ||
461 | } | ||
462 | return -EBUSY; | ||
463 | } | ||
464 | |||
360 | /* | 465 | /* |
361 | * Find empty slot in the resource tree given range and alignment. | 466 | * Find empty slot in the resource tree given range and alignment. |
467 | * This version allocates from the beginning of the root resource first. | ||
362 | */ | 468 | */ |
363 | static int find_resource(struct resource *root, struct resource *new, | 469 | static int find_resource(struct resource *root, struct resource *new, |
364 | resource_size_t size, resource_size_t min, | 470 | resource_size_t size, resource_size_t min, |
@@ -370,36 +476,43 @@ static int find_resource(struct resource *root, struct resource *new, | |||
370 | void *alignf_data) | 476 | void *alignf_data) |
371 | { | 477 | { |
372 | struct resource *this = root->child; | 478 | struct resource *this = root->child; |
373 | struct resource tmp = *new; | 479 | struct resource tmp = *new, avail, alloc; |
374 | 480 | ||
375 | tmp.start = root->start; | 481 | tmp.start = root->start; |
376 | /* | 482 | /* |
377 | * Skip past an allocated resource that starts at 0, since the assignment | 483 | * Skip past an allocated resource that starts at 0, since the |
378 | * of this->start - 1 to tmp->end below would cause an underflow. | 484 | * assignment of this->start - 1 to tmp->end below would cause an |
485 | * underflow. | ||
379 | */ | 486 | */ |
380 | if (this && this->start == 0) { | 487 | if (this && this->start == 0) { |
381 | tmp.start = this->end + 1; | 488 | tmp.start = this->end + 1; |
382 | this = this->sibling; | 489 | this = this->sibling; |
383 | } | 490 | } |
384 | for(;;) { | 491 | for (;;) { |
385 | if (this) | 492 | if (this) |
386 | tmp.end = this->start - 1; | 493 | tmp.end = this->start - 1; |
387 | else | 494 | else |
388 | tmp.end = root->end; | 495 | tmp.end = root->end; |
389 | if (tmp.start < min) | 496 | |
390 | tmp.start = min; | 497 | resource_clip(&tmp, min, max); |
391 | if (tmp.end > max) | 498 | |
392 | tmp.end = max; | 499 | /* Check for overflow after ALIGN() */ |
393 | tmp.start = ALIGN(tmp.start, align); | 500 | avail = *new; |
394 | if (alignf) | 501 | avail.start = ALIGN(tmp.start, align); |
395 | tmp.start = alignf(alignf_data, &tmp, size, align); | 502 | avail.end = tmp.end; |
396 | if (tmp.start < tmp.end && tmp.end - tmp.start >= size - 1) { | 503 | if (avail.start >= tmp.start) { |
397 | new->start = tmp.start; | 504 | alloc.start = alignf(alignf_data, &avail, size, align); |
398 | new->end = tmp.start + size - 1; | 505 | alloc.end = alloc.start + size - 1; |
399 | return 0; | 506 | if (resource_contains(&avail, &alloc)) { |
507 | new->start = alloc.start; | ||
508 | new->end = alloc.end; | ||
509 | return 0; | ||
510 | } | ||
400 | } | 511 | } |
512 | |||
401 | if (!this) | 513 | if (!this) |
402 | break; | 514 | break; |
515 | |||
403 | tmp.start = this->end + 1; | 516 | tmp.start = this->end + 1; |
404 | this = this->sibling; | 517 | this = this->sibling; |
405 | } | 518 | } |
@@ -428,8 +541,14 @@ int allocate_resource(struct resource *root, struct resource *new, | |||
428 | { | 541 | { |
429 | int err; | 542 | int err; |
430 | 543 | ||
544 | if (!alignf) | ||
545 | alignf = simple_align_resource; | ||
546 | |||
431 | write_lock(&resource_lock); | 547 | write_lock(&resource_lock); |
432 | err = find_resource(root, new, size, min, max, align, alignf, alignf_data); | 548 | if (resource_alloc_from_bottom) |
549 | err = find_resource(root, new, size, min, max, align, alignf, alignf_data); | ||
550 | else | ||
551 | err = find_resource_from_top(root, new, size, min, max, align, alignf, alignf_data); | ||
433 | if (err >= 0 && __request_resource(root, new)) | 552 | if (err >= 0 && __request_resource(root, new)) |
434 | err = -EBUSY; | 553 | err = -EBUSY; |
435 | write_unlock(&resource_lock); | 554 | write_unlock(&resource_lock); |
@@ -453,6 +572,8 @@ static struct resource * __insert_resource(struct resource *parent, struct resou | |||
453 | 572 | ||
454 | if (first == parent) | 573 | if (first == parent) |
455 | return first; | 574 | return first; |
575 | if (WARN_ON(first == new)) /* duplicated insertion */ | ||
576 | return first; | ||
456 | 577 | ||
457 | if ((first->start > new->start) || (first->end < new->end)) | 578 | if ((first->start > new->start) || (first->end < new->end)) |
458 | break; | 579 | break; |
diff --git a/kernel/rtmutex-tester.c b/kernel/rtmutex-tester.c index a56f629b057a..66cb89bc5ef1 100644 --- a/kernel/rtmutex-tester.c +++ b/kernel/rtmutex-tester.c | |||
@@ -76,7 +76,9 @@ static int handle_op(struct test_thread_data *td, int lockwakeup) | |||
76 | } | 76 | } |
77 | 77 | ||
78 | if (!lockwakeup && td->bkl == 4) { | 78 | if (!lockwakeup && td->bkl == 4) { |
79 | #ifdef CONFIG_LOCK_KERNEL | ||
79 | unlock_kernel(); | 80 | unlock_kernel(); |
81 | #endif | ||
80 | td->bkl = 0; | 82 | td->bkl = 0; |
81 | } | 83 | } |
82 | return 0; | 84 | return 0; |
@@ -133,14 +135,18 @@ static int handle_op(struct test_thread_data *td, int lockwakeup) | |||
133 | if (td->bkl) | 135 | if (td->bkl) |
134 | return 0; | 136 | return 0; |
135 | td->bkl = 1; | 137 | td->bkl = 1; |
138 | #ifdef CONFIG_LOCK_KERNEL | ||
136 | lock_kernel(); | 139 | lock_kernel(); |
140 | #endif | ||
137 | td->bkl = 4; | 141 | td->bkl = 4; |
138 | return 0; | 142 | return 0; |
139 | 143 | ||
140 | case RTTEST_UNLOCKBKL: | 144 | case RTTEST_UNLOCKBKL: |
141 | if (td->bkl != 4) | 145 | if (td->bkl != 4) |
142 | break; | 146 | break; |
147 | #ifdef CONFIG_LOCK_KERNEL | ||
143 | unlock_kernel(); | 148 | unlock_kernel(); |
149 | #endif | ||
144 | td->bkl = 0; | 150 | td->bkl = 0; |
145 | return 0; | 151 | return 0; |
146 | 152 | ||
diff --git a/kernel/sched.c b/kernel/sched.c index 51944e8c38a8..41f18695b730 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -8510,12 +8510,12 @@ void sched_move_task(struct task_struct *tsk) | |||
8510 | if (unlikely(running)) | 8510 | if (unlikely(running)) |
8511 | tsk->sched_class->put_prev_task(rq, tsk); | 8511 | tsk->sched_class->put_prev_task(rq, tsk); |
8512 | 8512 | ||
8513 | set_task_rq(tsk, task_cpu(tsk)); | ||
8514 | |||
8515 | #ifdef CONFIG_FAIR_GROUP_SCHED | 8513 | #ifdef CONFIG_FAIR_GROUP_SCHED |
8516 | if (tsk->sched_class->moved_group) | 8514 | if (tsk->sched_class->task_move_group) |
8517 | tsk->sched_class->moved_group(tsk, on_rq); | 8515 | tsk->sched_class->task_move_group(tsk, on_rq); |
8516 | else | ||
8518 | #endif | 8517 | #endif |
8518 | set_task_rq(tsk, task_cpu(tsk)); | ||
8519 | 8519 | ||
8520 | if (unlikely(running)) | 8520 | if (unlikely(running)) |
8521 | tsk->sched_class->set_curr_task(rq); | 8521 | tsk->sched_class->set_curr_task(rq); |
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 933f3d1b62ea..f4f6a8326dd0 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c | |||
@@ -3869,13 +3869,26 @@ static void set_curr_task_fair(struct rq *rq) | |||
3869 | } | 3869 | } |
3870 | 3870 | ||
3871 | #ifdef CONFIG_FAIR_GROUP_SCHED | 3871 | #ifdef CONFIG_FAIR_GROUP_SCHED |
3872 | static void moved_group_fair(struct task_struct *p, int on_rq) | 3872 | static void task_move_group_fair(struct task_struct *p, int on_rq) |
3873 | { | 3873 | { |
3874 | struct cfs_rq *cfs_rq = task_cfs_rq(p); | 3874 | /* |
3875 | 3875 | * If the task was not on the rq at the time of this cgroup movement | |
3876 | update_curr(cfs_rq); | 3876 | * it must have been asleep, sleeping tasks keep their ->vruntime |
3877 | * absolute on their old rq until wakeup (needed for the fair sleeper | ||
3878 | * bonus in place_entity()). | ||
3879 | * | ||
3880 | * If it was on the rq, we've just 'preempted' it, which does convert | ||
3881 | * ->vruntime to a relative base. | ||
3882 | * | ||
3883 | * Make sure both cases convert their relative position when migrating | ||
3884 | * to another cgroup's rq. This does somewhat interfere with the | ||
3885 | * fair sleeper stuff for the first placement, but who cares. | ||
3886 | */ | ||
3887 | if (!on_rq) | ||
3888 | p->se.vruntime -= cfs_rq_of(&p->se)->min_vruntime; | ||
3889 | set_task_rq(p, task_cpu(p)); | ||
3877 | if (!on_rq) | 3890 | if (!on_rq) |
3878 | place_entity(cfs_rq, &p->se, 1); | 3891 | p->se.vruntime += cfs_rq_of(&p->se)->min_vruntime; |
3879 | } | 3892 | } |
3880 | #endif | 3893 | #endif |
3881 | 3894 | ||
@@ -3927,7 +3940,7 @@ static const struct sched_class fair_sched_class = { | |||
3927 | .get_rr_interval = get_rr_interval_fair, | 3940 | .get_rr_interval = get_rr_interval_fair, |
3928 | 3941 | ||
3929 | #ifdef CONFIG_FAIR_GROUP_SCHED | 3942 | #ifdef CONFIG_FAIR_GROUP_SCHED |
3930 | .moved_group = moved_group_fair, | 3943 | .task_move_group = task_move_group_fair, |
3931 | #endif | 3944 | #endif |
3932 | }; | 3945 | }; |
3933 | 3946 | ||
diff --git a/kernel/sched_stats.h b/kernel/sched_stats.h index 25c2f962f6fc..48ddf431db0e 100644 --- a/kernel/sched_stats.h +++ b/kernel/sched_stats.h | |||
@@ -157,15 +157,7 @@ static inline void sched_info_reset_dequeued(struct task_struct *t) | |||
157 | } | 157 | } |
158 | 158 | ||
159 | /* | 159 | /* |
160 | * Called when a process is dequeued from the active array and given | 160 | * We are interested in knowing how long it was from the *first* time a |
161 | * the cpu. We should note that with the exception of interactive | ||
162 | * tasks, the expired queue will become the active queue after the active | ||
163 | * queue is empty, without explicitly dequeuing and requeuing tasks in the | ||
164 | * expired queue. (Interactive tasks may be requeued directly to the | ||
165 | * active queue, thus delaying tasks in the expired queue from running; | ||
166 | * see scheduler_tick()). | ||
167 | * | ||
168 | * Though we are interested in knowing how long it was from the *first* time a | ||
169 | * task was queued to the time that it finally hit a cpu, we call this routine | 161 | * task was queued to the time that it finally hit a cpu, we call this routine |
170 | * from dequeue_task() to account for possible rq->clock skew across cpus. The | 162 | * from dequeue_task() to account for possible rq->clock skew across cpus. The |
171 | * delta taken on each cpu would annul the skew. | 163 | * delta taken on each cpu would annul the skew. |
@@ -203,16 +195,6 @@ static void sched_info_arrive(struct task_struct *t) | |||
203 | } | 195 | } |
204 | 196 | ||
205 | /* | 197 | /* |
206 | * Called when a process is queued into either the active or expired | ||
207 | * array. The time is noted and later used to determine how long we | ||
208 | * had to wait for us to reach the cpu. Since the expired queue will | ||
209 | * become the active queue after active queue is empty, without dequeuing | ||
210 | * and requeuing any tasks, we are interested in queuing to either. It | ||
211 | * is unusual but not impossible for tasks to be dequeued and immediately | ||
212 | * requeued in the same or another array: this can happen in sched_yield(), | ||
213 | * set_user_nice(), and even load_balance() as it moves tasks from runqueue | ||
214 | * to runqueue. | ||
215 | * | ||
216 | * This function is only called from enqueue_task(), but also only updates | 198 | * This function is only called from enqueue_task(), but also only updates |
217 | * the timestamp if it is already not set. It's assumed that | 199 | * the timestamp if it is already not set. It's assumed that |
218 | * sched_info_dequeued() will clear that stamp when appropriate. | 200 | * sched_info_dequeued() will clear that stamp when appropriate. |
diff --git a/kernel/signal.c b/kernel/signal.c index 919562c3d6b7..4e3cff10fdce 100644 --- a/kernel/signal.c +++ b/kernel/signal.c | |||
@@ -1105,7 +1105,8 @@ int zap_other_threads(struct task_struct *p) | |||
1105 | return count; | 1105 | return count; |
1106 | } | 1106 | } |
1107 | 1107 | ||
1108 | struct sighand_struct *lock_task_sighand(struct task_struct *tsk, unsigned long *flags) | 1108 | struct sighand_struct *__lock_task_sighand(struct task_struct *tsk, |
1109 | unsigned long *flags) | ||
1109 | { | 1110 | { |
1110 | struct sighand_struct *sighand; | 1111 | struct sighand_struct *sighand; |
1111 | 1112 | ||
@@ -1617,6 +1618,8 @@ static int sigkill_pending(struct task_struct *tsk) | |||
1617 | * is gone, we keep current->exit_code unless clear_code. | 1618 | * is gone, we keep current->exit_code unless clear_code. |
1618 | */ | 1619 | */ |
1619 | static void ptrace_stop(int exit_code, int clear_code, siginfo_t *info) | 1620 | static void ptrace_stop(int exit_code, int clear_code, siginfo_t *info) |
1621 | __releases(¤t->sighand->siglock) | ||
1622 | __acquires(¤t->sighand->siglock) | ||
1620 | { | 1623 | { |
1621 | if (arch_ptrace_stop_needed(exit_code, info)) { | 1624 | if (arch_ptrace_stop_needed(exit_code, info)) { |
1622 | /* | 1625 | /* |
diff --git a/kernel/smp.c b/kernel/smp.c index ed6aacfcb7ef..12ed8b013e2d 100644 --- a/kernel/smp.c +++ b/kernel/smp.c | |||
@@ -267,7 +267,7 @@ static DEFINE_PER_CPU_SHARED_ALIGNED(struct call_single_data, csd_data); | |||
267 | * | 267 | * |
268 | * Returns 0 on success, else a negative status code. | 268 | * Returns 0 on success, else a negative status code. |
269 | */ | 269 | */ |
270 | int smp_call_function_single(int cpu, void (*func) (void *info), void *info, | 270 | int smp_call_function_single(int cpu, smp_call_func_t func, void *info, |
271 | int wait) | 271 | int wait) |
272 | { | 272 | { |
273 | struct call_single_data d = { | 273 | struct call_single_data d = { |
@@ -336,7 +336,7 @@ EXPORT_SYMBOL(smp_call_function_single); | |||
336 | * 3) any other online cpu in @mask | 336 | * 3) any other online cpu in @mask |
337 | */ | 337 | */ |
338 | int smp_call_function_any(const struct cpumask *mask, | 338 | int smp_call_function_any(const struct cpumask *mask, |
339 | void (*func)(void *info), void *info, int wait) | 339 | smp_call_func_t func, void *info, int wait) |
340 | { | 340 | { |
341 | unsigned int cpu; | 341 | unsigned int cpu; |
342 | const struct cpumask *nodemask; | 342 | const struct cpumask *nodemask; |
@@ -416,7 +416,7 @@ void __smp_call_function_single(int cpu, struct call_single_data *data, | |||
416 | * must be disabled when calling this function. | 416 | * must be disabled when calling this function. |
417 | */ | 417 | */ |
418 | void smp_call_function_many(const struct cpumask *mask, | 418 | void smp_call_function_many(const struct cpumask *mask, |
419 | void (*func)(void *), void *info, bool wait) | 419 | smp_call_func_t func, void *info, bool wait) |
420 | { | 420 | { |
421 | struct call_function_data *data; | 421 | struct call_function_data *data; |
422 | unsigned long flags; | 422 | unsigned long flags; |
@@ -500,7 +500,7 @@ EXPORT_SYMBOL(smp_call_function_many); | |||
500 | * You must not call this function with disabled interrupts or from a | 500 | * You must not call this function with disabled interrupts or from a |
501 | * hardware interrupt handler or from a bottom half handler. | 501 | * hardware interrupt handler or from a bottom half handler. |
502 | */ | 502 | */ |
503 | int smp_call_function(void (*func)(void *), void *info, int wait) | 503 | int smp_call_function(smp_call_func_t func, void *info, int wait) |
504 | { | 504 | { |
505 | preempt_disable(); | 505 | preempt_disable(); |
506 | smp_call_function_many(cpu_online_mask, func, info, wait); | 506 | smp_call_function_many(cpu_online_mask, func, info, wait); |
diff --git a/kernel/softirq.c b/kernel/softirq.c index 081869ed3a9f..d4d918a91881 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c | |||
@@ -67,7 +67,7 @@ char *softirq_to_name[NR_SOFTIRQS] = { | |||
67 | * to the pending events, so lets the scheduler to balance | 67 | * to the pending events, so lets the scheduler to balance |
68 | * the softirq load for us. | 68 | * the softirq load for us. |
69 | */ | 69 | */ |
70 | void wakeup_softirqd(void) | 70 | static void wakeup_softirqd(void) |
71 | { | 71 | { |
72 | /* Interrupts are disabled: no need to stop preemption */ | 72 | /* Interrupts are disabled: no need to stop preemption */ |
73 | struct task_struct *tsk = __get_cpu_var(ksoftirqd); | 73 | struct task_struct *tsk = __get_cpu_var(ksoftirqd); |
@@ -229,18 +229,20 @@ restart: | |||
229 | 229 | ||
230 | do { | 230 | do { |
231 | if (pending & 1) { | 231 | if (pending & 1) { |
232 | unsigned int vec_nr = h - softirq_vec; | ||
232 | int prev_count = preempt_count(); | 233 | int prev_count = preempt_count(); |
233 | kstat_incr_softirqs_this_cpu(h - softirq_vec); | ||
234 | 234 | ||
235 | trace_softirq_entry(h, softirq_vec); | 235 | kstat_incr_softirqs_this_cpu(vec_nr); |
236 | |||
237 | trace_softirq_entry(vec_nr); | ||
236 | h->action(h); | 238 | h->action(h); |
237 | trace_softirq_exit(h, softirq_vec); | 239 | trace_softirq_exit(vec_nr); |
238 | if (unlikely(prev_count != preempt_count())) { | 240 | if (unlikely(prev_count != preempt_count())) { |
239 | printk(KERN_ERR "huh, entered softirq %td %s %p" | 241 | printk(KERN_ERR "huh, entered softirq %u %s %p" |
240 | "with preempt_count %08x," | 242 | "with preempt_count %08x," |
241 | " exited with %08x?\n", h - softirq_vec, | 243 | " exited with %08x?\n", vec_nr, |
242 | softirq_to_name[h - softirq_vec], | 244 | softirq_to_name[vec_nr], h->action, |
243 | h->action, prev_count, preempt_count()); | 245 | prev_count, preempt_count()); |
244 | preempt_count() = prev_count; | 246 | preempt_count() = prev_count; |
245 | } | 247 | } |
246 | 248 | ||
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c index 090c28812ce1..2df820b03beb 100644 --- a/kernel/stop_machine.c +++ b/kernel/stop_machine.c | |||
@@ -262,7 +262,7 @@ repeat: | |||
262 | cpu_stop_fn_t fn = work->fn; | 262 | cpu_stop_fn_t fn = work->fn; |
263 | void *arg = work->arg; | 263 | void *arg = work->arg; |
264 | struct cpu_stop_done *done = work->done; | 264 | struct cpu_stop_done *done = work->done; |
265 | char ksym_buf[KSYM_NAME_LEN]; | 265 | char ksym_buf[KSYM_NAME_LEN] __maybe_unused; |
266 | 266 | ||
267 | __set_current_state(TASK_RUNNING); | 267 | __set_current_state(TASK_RUNNING); |
268 | 268 | ||
@@ -304,7 +304,7 @@ static int __cpuinit cpu_stop_cpu_callback(struct notifier_block *nfb, | |||
304 | p = kthread_create(cpu_stopper_thread, stopper, "migration/%d", | 304 | p = kthread_create(cpu_stopper_thread, stopper, "migration/%d", |
305 | cpu); | 305 | cpu); |
306 | if (IS_ERR(p)) | 306 | if (IS_ERR(p)) |
307 | return NOTIFY_BAD; | 307 | return notifier_from_errno(PTR_ERR(p)); |
308 | get_task_struct(p); | 308 | get_task_struct(p); |
309 | kthread_bind(p, cpu); | 309 | kthread_bind(p, cpu); |
310 | sched_set_stop_task(cpu, p); | 310 | sched_set_stop_task(cpu, p); |
@@ -372,7 +372,7 @@ static int __init cpu_stop_init(void) | |||
372 | /* start one for the boot cpu */ | 372 | /* start one for the boot cpu */ |
373 | err = cpu_stop_cpu_callback(&cpu_stop_cpu_notifier, CPU_UP_PREPARE, | 373 | err = cpu_stop_cpu_callback(&cpu_stop_cpu_notifier, CPU_UP_PREPARE, |
374 | bcpu); | 374 | bcpu); |
375 | BUG_ON(err == NOTIFY_BAD); | 375 | BUG_ON(err != NOTIFY_OK); |
376 | cpu_stop_cpu_callback(&cpu_stop_cpu_notifier, CPU_ONLINE, bcpu); | 376 | cpu_stop_cpu_callback(&cpu_stop_cpu_notifier, CPU_ONLINE, bcpu); |
377 | register_cpu_notifier(&cpu_stop_cpu_notifier); | 377 | register_cpu_notifier(&cpu_stop_cpu_notifier); |
378 | 378 | ||
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 3a45c224770f..b65bf634035e 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
@@ -161,8 +161,6 @@ extern int no_unaligned_warning; | |||
161 | extern int unaligned_dump_stack; | 161 | extern int unaligned_dump_stack; |
162 | #endif | 162 | #endif |
163 | 163 | ||
164 | extern struct ratelimit_state printk_ratelimit_state; | ||
165 | |||
166 | #ifdef CONFIG_PROC_SYSCTL | 164 | #ifdef CONFIG_PROC_SYSCTL |
167 | static int proc_do_cad_pid(struct ctl_table *table, int write, | 165 | static int proc_do_cad_pid(struct ctl_table *table, int write, |
168 | void __user *buffer, size_t *lenp, loff_t *ppos); | 166 | void __user *buffer, size_t *lenp, loff_t *ppos); |
@@ -706,6 +704,15 @@ static struct ctl_table kern_table[] = { | |||
706 | }, | 704 | }, |
707 | #endif | 705 | #endif |
708 | { | 706 | { |
707 | .procname = "dmesg_restrict", | ||
708 | .data = &dmesg_restrict, | ||
709 | .maxlen = sizeof(int), | ||
710 | .mode = 0644, | ||
711 | .proc_handler = proc_dointvec_minmax, | ||
712 | .extra1 = &zero, | ||
713 | .extra2 = &one, | ||
714 | }, | ||
715 | { | ||
709 | .procname = "ngroups_max", | 716 | .procname = "ngroups_max", |
710 | .data = &ngroups_max, | 717 | .data = &ngroups_max, |
711 | .maxlen = sizeof (int), | 718 | .maxlen = sizeof (int), |
@@ -1340,28 +1347,28 @@ static struct ctl_table fs_table[] = { | |||
1340 | .data = &inodes_stat, | 1347 | .data = &inodes_stat, |
1341 | .maxlen = 2*sizeof(int), | 1348 | .maxlen = 2*sizeof(int), |
1342 | .mode = 0444, | 1349 | .mode = 0444, |
1343 | .proc_handler = proc_dointvec, | 1350 | .proc_handler = proc_nr_inodes, |
1344 | }, | 1351 | }, |
1345 | { | 1352 | { |
1346 | .procname = "inode-state", | 1353 | .procname = "inode-state", |
1347 | .data = &inodes_stat, | 1354 | .data = &inodes_stat, |
1348 | .maxlen = 7*sizeof(int), | 1355 | .maxlen = 7*sizeof(int), |
1349 | .mode = 0444, | 1356 | .mode = 0444, |
1350 | .proc_handler = proc_dointvec, | 1357 | .proc_handler = proc_nr_inodes, |
1351 | }, | 1358 | }, |
1352 | { | 1359 | { |
1353 | .procname = "file-nr", | 1360 | .procname = "file-nr", |
1354 | .data = &files_stat, | 1361 | .data = &files_stat, |
1355 | .maxlen = 3*sizeof(int), | 1362 | .maxlen = sizeof(files_stat), |
1356 | .mode = 0444, | 1363 | .mode = 0444, |
1357 | .proc_handler = proc_nr_files, | 1364 | .proc_handler = proc_nr_files, |
1358 | }, | 1365 | }, |
1359 | { | 1366 | { |
1360 | .procname = "file-max", | 1367 | .procname = "file-max", |
1361 | .data = &files_stat.max_files, | 1368 | .data = &files_stat.max_files, |
1362 | .maxlen = sizeof(int), | 1369 | .maxlen = sizeof(files_stat.max_files), |
1363 | .mode = 0644, | 1370 | .mode = 0644, |
1364 | .proc_handler = proc_dointvec, | 1371 | .proc_handler = proc_doulongvec_minmax, |
1365 | }, | 1372 | }, |
1366 | { | 1373 | { |
1367 | .procname = "nr_open", | 1374 | .procname = "nr_open", |
@@ -1377,7 +1384,7 @@ static struct ctl_table fs_table[] = { | |||
1377 | .data = &dentry_stat, | 1384 | .data = &dentry_stat, |
1378 | .maxlen = 6*sizeof(int), | 1385 | .maxlen = 6*sizeof(int), |
1379 | .mode = 0444, | 1386 | .mode = 0444, |
1380 | .proc_handler = proc_dointvec, | 1387 | .proc_handler = proc_nr_dentry, |
1381 | }, | 1388 | }, |
1382 | { | 1389 | { |
1383 | .procname = "overflowuid", | 1390 | .procname = "overflowuid", |
diff --git a/kernel/taskstats.c b/kernel/taskstats.c index 11281d5792bd..c8231fb15708 100644 --- a/kernel/taskstats.c +++ b/kernel/taskstats.c | |||
@@ -175,22 +175,8 @@ static void send_cpu_listeners(struct sk_buff *skb, | |||
175 | up_write(&listeners->sem); | 175 | up_write(&listeners->sem); |
176 | } | 176 | } |
177 | 177 | ||
178 | static int fill_pid(pid_t pid, struct task_struct *tsk, | 178 | static void fill_stats(struct task_struct *tsk, struct taskstats *stats) |
179 | struct taskstats *stats) | ||
180 | { | 179 | { |
181 | int rc = 0; | ||
182 | |||
183 | if (!tsk) { | ||
184 | rcu_read_lock(); | ||
185 | tsk = find_task_by_vpid(pid); | ||
186 | if (tsk) | ||
187 | get_task_struct(tsk); | ||
188 | rcu_read_unlock(); | ||
189 | if (!tsk) | ||
190 | return -ESRCH; | ||
191 | } else | ||
192 | get_task_struct(tsk); | ||
193 | |||
194 | memset(stats, 0, sizeof(*stats)); | 180 | memset(stats, 0, sizeof(*stats)); |
195 | /* | 181 | /* |
196 | * Each accounting subsystem adds calls to its functions to | 182 | * Each accounting subsystem adds calls to its functions to |
@@ -209,17 +195,27 @@ static int fill_pid(pid_t pid, struct task_struct *tsk, | |||
209 | 195 | ||
210 | /* fill in extended acct fields */ | 196 | /* fill in extended acct fields */ |
211 | xacct_add_tsk(stats, tsk); | 197 | xacct_add_tsk(stats, tsk); |
198 | } | ||
212 | 199 | ||
213 | /* Define err: label here if needed */ | 200 | static int fill_stats_for_pid(pid_t pid, struct taskstats *stats) |
214 | put_task_struct(tsk); | 201 | { |
215 | return rc; | 202 | struct task_struct *tsk; |
216 | 203 | ||
204 | rcu_read_lock(); | ||
205 | tsk = find_task_by_vpid(pid); | ||
206 | if (tsk) | ||
207 | get_task_struct(tsk); | ||
208 | rcu_read_unlock(); | ||
209 | if (!tsk) | ||
210 | return -ESRCH; | ||
211 | fill_stats(tsk, stats); | ||
212 | put_task_struct(tsk); | ||
213 | return 0; | ||
217 | } | 214 | } |
218 | 215 | ||
219 | static int fill_tgid(pid_t tgid, struct task_struct *first, | 216 | static int fill_stats_for_tgid(pid_t tgid, struct taskstats *stats) |
220 | struct taskstats *stats) | ||
221 | { | 217 | { |
222 | struct task_struct *tsk; | 218 | struct task_struct *tsk, *first; |
223 | unsigned long flags; | 219 | unsigned long flags; |
224 | int rc = -ESRCH; | 220 | int rc = -ESRCH; |
225 | 221 | ||
@@ -228,8 +224,7 @@ static int fill_tgid(pid_t tgid, struct task_struct *first, | |||
228 | * leaders who are already counted with the dead tasks | 224 | * leaders who are already counted with the dead tasks |
229 | */ | 225 | */ |
230 | rcu_read_lock(); | 226 | rcu_read_lock(); |
231 | if (!first) | 227 | first = find_task_by_vpid(tgid); |
232 | first = find_task_by_vpid(tgid); | ||
233 | 228 | ||
234 | if (!first || !lock_task_sighand(first, &flags)) | 229 | if (!first || !lock_task_sighand(first, &flags)) |
235 | goto out; | 230 | goto out; |
@@ -268,7 +263,6 @@ out: | |||
268 | return rc; | 263 | return rc; |
269 | } | 264 | } |
270 | 265 | ||
271 | |||
272 | static void fill_tgid_exit(struct task_struct *tsk) | 266 | static void fill_tgid_exit(struct task_struct *tsk) |
273 | { | 267 | { |
274 | unsigned long flags; | 268 | unsigned long flags; |
@@ -360,6 +354,12 @@ static struct taskstats *mk_reply(struct sk_buff *skb, int type, u32 pid) | |||
360 | struct nlattr *na, *ret; | 354 | struct nlattr *na, *ret; |
361 | int aggr; | 355 | int aggr; |
362 | 356 | ||
357 | /* If we don't pad, we end up with alignment on a 4 byte boundary. | ||
358 | * This causes lots of runtime warnings on systems requiring 8 byte | ||
359 | * alignment */ | ||
360 | u32 pids[2] = { pid, 0 }; | ||
361 | int pid_size = ALIGN(sizeof(pid), sizeof(long)); | ||
362 | |||
363 | aggr = (type == TASKSTATS_TYPE_PID) | 363 | aggr = (type == TASKSTATS_TYPE_PID) |
364 | ? TASKSTATS_TYPE_AGGR_PID | 364 | ? TASKSTATS_TYPE_AGGR_PID |
365 | : TASKSTATS_TYPE_AGGR_TGID; | 365 | : TASKSTATS_TYPE_AGGR_TGID; |
@@ -367,7 +367,7 @@ static struct taskstats *mk_reply(struct sk_buff *skb, int type, u32 pid) | |||
367 | na = nla_nest_start(skb, aggr); | 367 | na = nla_nest_start(skb, aggr); |
368 | if (!na) | 368 | if (!na) |
369 | goto err; | 369 | goto err; |
370 | if (nla_put(skb, type, sizeof(pid), &pid) < 0) | 370 | if (nla_put(skb, type, pid_size, pids) < 0) |
371 | goto err; | 371 | goto err; |
372 | ret = nla_reserve(skb, TASKSTATS_TYPE_STATS, sizeof(struct taskstats)); | 372 | ret = nla_reserve(skb, TASKSTATS_TYPE_STATS, sizeof(struct taskstats)); |
373 | if (!ret) | 373 | if (!ret) |
@@ -424,39 +424,46 @@ err: | |||
424 | return rc; | 424 | return rc; |
425 | } | 425 | } |
426 | 426 | ||
427 | static int taskstats_user_cmd(struct sk_buff *skb, struct genl_info *info) | 427 | static int cmd_attr_register_cpumask(struct genl_info *info) |
428 | { | 428 | { |
429 | int rc; | ||
430 | struct sk_buff *rep_skb; | ||
431 | struct taskstats *stats; | ||
432 | size_t size; | ||
433 | cpumask_var_t mask; | 429 | cpumask_var_t mask; |
430 | int rc; | ||
434 | 431 | ||
435 | if (!alloc_cpumask_var(&mask, GFP_KERNEL)) | 432 | if (!alloc_cpumask_var(&mask, GFP_KERNEL)) |
436 | return -ENOMEM; | 433 | return -ENOMEM; |
437 | |||
438 | rc = parse(info->attrs[TASKSTATS_CMD_ATTR_REGISTER_CPUMASK], mask); | 434 | rc = parse(info->attrs[TASKSTATS_CMD_ATTR_REGISTER_CPUMASK], mask); |
439 | if (rc < 0) | 435 | if (rc < 0) |
440 | goto free_return_rc; | 436 | goto out; |
441 | if (rc == 0) { | 437 | rc = add_del_listener(info->snd_pid, mask, REGISTER); |
442 | rc = add_del_listener(info->snd_pid, mask, REGISTER); | 438 | out: |
443 | goto free_return_rc; | 439 | free_cpumask_var(mask); |
444 | } | 440 | return rc; |
441 | } | ||
442 | |||
443 | static int cmd_attr_deregister_cpumask(struct genl_info *info) | ||
444 | { | ||
445 | cpumask_var_t mask; | ||
446 | int rc; | ||
445 | 447 | ||
448 | if (!alloc_cpumask_var(&mask, GFP_KERNEL)) | ||
449 | return -ENOMEM; | ||
446 | rc = parse(info->attrs[TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK], mask); | 450 | rc = parse(info->attrs[TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK], mask); |
447 | if (rc < 0) | 451 | if (rc < 0) |
448 | goto free_return_rc; | 452 | goto out; |
449 | if (rc == 0) { | 453 | rc = add_del_listener(info->snd_pid, mask, DEREGISTER); |
450 | rc = add_del_listener(info->snd_pid, mask, DEREGISTER); | 454 | out: |
451 | free_return_rc: | ||
452 | free_cpumask_var(mask); | ||
453 | return rc; | ||
454 | } | ||
455 | free_cpumask_var(mask); | 455 | free_cpumask_var(mask); |
456 | return rc; | ||
457 | } | ||
458 | |||
459 | static int cmd_attr_pid(struct genl_info *info) | ||
460 | { | ||
461 | struct taskstats *stats; | ||
462 | struct sk_buff *rep_skb; | ||
463 | size_t size; | ||
464 | u32 pid; | ||
465 | int rc; | ||
456 | 466 | ||
457 | /* | ||
458 | * Size includes space for nested attributes | ||
459 | */ | ||
460 | size = nla_total_size(sizeof(u32)) + | 467 | size = nla_total_size(sizeof(u32)) + |
461 | nla_total_size(sizeof(struct taskstats)) + nla_total_size(0); | 468 | nla_total_size(sizeof(struct taskstats)) + nla_total_size(0); |
462 | 469 | ||
@@ -465,33 +472,64 @@ free_return_rc: | |||
465 | return rc; | 472 | return rc; |
466 | 473 | ||
467 | rc = -EINVAL; | 474 | rc = -EINVAL; |
468 | if (info->attrs[TASKSTATS_CMD_ATTR_PID]) { | 475 | pid = nla_get_u32(info->attrs[TASKSTATS_CMD_ATTR_PID]); |
469 | u32 pid = nla_get_u32(info->attrs[TASKSTATS_CMD_ATTR_PID]); | 476 | stats = mk_reply(rep_skb, TASKSTATS_TYPE_PID, pid); |
470 | stats = mk_reply(rep_skb, TASKSTATS_TYPE_PID, pid); | 477 | if (!stats) |
471 | if (!stats) | 478 | goto err; |
472 | goto err; | 479 | |
473 | 480 | rc = fill_stats_for_pid(pid, stats); | |
474 | rc = fill_pid(pid, NULL, stats); | 481 | if (rc < 0) |
475 | if (rc < 0) | 482 | goto err; |
476 | goto err; | 483 | return send_reply(rep_skb, info); |
477 | } else if (info->attrs[TASKSTATS_CMD_ATTR_TGID]) { | 484 | err: |
478 | u32 tgid = nla_get_u32(info->attrs[TASKSTATS_CMD_ATTR_TGID]); | 485 | nlmsg_free(rep_skb); |
479 | stats = mk_reply(rep_skb, TASKSTATS_TYPE_TGID, tgid); | 486 | return rc; |
480 | if (!stats) | 487 | } |
481 | goto err; | 488 | |
482 | 489 | static int cmd_attr_tgid(struct genl_info *info) | |
483 | rc = fill_tgid(tgid, NULL, stats); | 490 | { |
484 | if (rc < 0) | 491 | struct taskstats *stats; |
485 | goto err; | 492 | struct sk_buff *rep_skb; |
486 | } else | 493 | size_t size; |
494 | u32 tgid; | ||
495 | int rc; | ||
496 | |||
497 | size = nla_total_size(sizeof(u32)) + | ||
498 | nla_total_size(sizeof(struct taskstats)) + nla_total_size(0); | ||
499 | |||
500 | rc = prepare_reply(info, TASKSTATS_CMD_NEW, &rep_skb, size); | ||
501 | if (rc < 0) | ||
502 | return rc; | ||
503 | |||
504 | rc = -EINVAL; | ||
505 | tgid = nla_get_u32(info->attrs[TASKSTATS_CMD_ATTR_TGID]); | ||
506 | stats = mk_reply(rep_skb, TASKSTATS_TYPE_TGID, tgid); | ||
507 | if (!stats) | ||
487 | goto err; | 508 | goto err; |
488 | 509 | ||
510 | rc = fill_stats_for_tgid(tgid, stats); | ||
511 | if (rc < 0) | ||
512 | goto err; | ||
489 | return send_reply(rep_skb, info); | 513 | return send_reply(rep_skb, info); |
490 | err: | 514 | err: |
491 | nlmsg_free(rep_skb); | 515 | nlmsg_free(rep_skb); |
492 | return rc; | 516 | return rc; |
493 | } | 517 | } |
494 | 518 | ||
519 | static int taskstats_user_cmd(struct sk_buff *skb, struct genl_info *info) | ||
520 | { | ||
521 | if (info->attrs[TASKSTATS_CMD_ATTR_REGISTER_CPUMASK]) | ||
522 | return cmd_attr_register_cpumask(info); | ||
523 | else if (info->attrs[TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK]) | ||
524 | return cmd_attr_deregister_cpumask(info); | ||
525 | else if (info->attrs[TASKSTATS_CMD_ATTR_PID]) | ||
526 | return cmd_attr_pid(info); | ||
527 | else if (info->attrs[TASKSTATS_CMD_ATTR_TGID]) | ||
528 | return cmd_attr_tgid(info); | ||
529 | else | ||
530 | return -EINVAL; | ||
531 | } | ||
532 | |||
495 | static struct taskstats *taskstats_tgid_alloc(struct task_struct *tsk) | 533 | static struct taskstats *taskstats_tgid_alloc(struct task_struct *tsk) |
496 | { | 534 | { |
497 | struct signal_struct *sig = tsk->signal; | 535 | struct signal_struct *sig = tsk->signal; |
@@ -555,9 +593,7 @@ void taskstats_exit(struct task_struct *tsk, int group_dead) | |||
555 | if (!stats) | 593 | if (!stats) |
556 | goto err; | 594 | goto err; |
557 | 595 | ||
558 | rc = fill_pid(-1, tsk, stats); | 596 | fill_stats(tsk, stats); |
559 | if (rc < 0) | ||
560 | goto err; | ||
561 | 597 | ||
562 | /* | 598 | /* |
563 | * Doesn't matter if tsk is the leader or the last group member leaving | 599 | * Doesn't matter if tsk is the leader or the last group member leaving |
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 959f8d6c8cc1..7b8ec0281548 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c | |||
@@ -23,7 +23,6 @@ | |||
23 | #include <linux/mutex.h> | 23 | #include <linux/mutex.h> |
24 | #include <linux/slab.h> | 24 | #include <linux/slab.h> |
25 | #include <linux/debugfs.h> | 25 | #include <linux/debugfs.h> |
26 | #include <linux/smp_lock.h> | ||
27 | #include <linux/time.h> | 26 | #include <linux/time.h> |
28 | #include <linux/uaccess.h> | 27 | #include <linux/uaccess.h> |
29 | 28 | ||
@@ -169,7 +168,6 @@ static int act_log_check(struct blk_trace *bt, u32 what, sector_t sector, | |||
169 | static const u32 ddir_act[2] = { BLK_TC_ACT(BLK_TC_READ), | 168 | static const u32 ddir_act[2] = { BLK_TC_ACT(BLK_TC_READ), |
170 | BLK_TC_ACT(BLK_TC_WRITE) }; | 169 | BLK_TC_ACT(BLK_TC_WRITE) }; |
171 | 170 | ||
172 | #define BLK_TC_HARDBARRIER BLK_TC_BARRIER | ||
173 | #define BLK_TC_RAHEAD BLK_TC_AHEAD | 171 | #define BLK_TC_RAHEAD BLK_TC_AHEAD |
174 | 172 | ||
175 | /* The ilog2() calls fall out because they're constant */ | 173 | /* The ilog2() calls fall out because they're constant */ |
@@ -197,7 +195,6 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes, | |||
197 | return; | 195 | return; |
198 | 196 | ||
199 | what |= ddir_act[rw & WRITE]; | 197 | what |= ddir_act[rw & WRITE]; |
200 | what |= MASK_TC_BIT(rw, HARDBARRIER); | ||
201 | what |= MASK_TC_BIT(rw, SYNC); | 198 | what |= MASK_TC_BIT(rw, SYNC); |
202 | what |= MASK_TC_BIT(rw, RAHEAD); | 199 | what |= MASK_TC_BIT(rw, RAHEAD); |
203 | what |= MASK_TC_BIT(rw, META); | 200 | what |= MASK_TC_BIT(rw, META); |
@@ -326,6 +323,7 @@ static const struct file_operations blk_dropped_fops = { | |||
326 | .owner = THIS_MODULE, | 323 | .owner = THIS_MODULE, |
327 | .open = blk_dropped_open, | 324 | .open = blk_dropped_open, |
328 | .read = blk_dropped_read, | 325 | .read = blk_dropped_read, |
326 | .llseek = default_llseek, | ||
329 | }; | 327 | }; |
330 | 328 | ||
331 | static int blk_msg_open(struct inode *inode, struct file *filp) | 329 | static int blk_msg_open(struct inode *inode, struct file *filp) |
@@ -365,6 +363,7 @@ static const struct file_operations blk_msg_fops = { | |||
365 | .owner = THIS_MODULE, | 363 | .owner = THIS_MODULE, |
366 | .open = blk_msg_open, | 364 | .open = blk_msg_open, |
367 | .write = blk_msg_write, | 365 | .write = blk_msg_write, |
366 | .llseek = noop_llseek, | ||
368 | }; | 367 | }; |
369 | 368 | ||
370 | /* | 369 | /* |
@@ -639,7 +638,6 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg) | |||
639 | if (!q) | 638 | if (!q) |
640 | return -ENXIO; | 639 | return -ENXIO; |
641 | 640 | ||
642 | lock_kernel(); | ||
643 | mutex_lock(&bdev->bd_mutex); | 641 | mutex_lock(&bdev->bd_mutex); |
644 | 642 | ||
645 | switch (cmd) { | 643 | switch (cmd) { |
@@ -667,7 +665,6 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg) | |||
667 | } | 665 | } |
668 | 666 | ||
669 | mutex_unlock(&bdev->bd_mutex); | 667 | mutex_unlock(&bdev->bd_mutex); |
670 | unlock_kernel(); | ||
671 | return ret; | 668 | return ret; |
672 | } | 669 | } |
673 | 670 | ||
@@ -1652,10 +1649,9 @@ static ssize_t sysfs_blk_trace_attr_show(struct device *dev, | |||
1652 | struct block_device *bdev; | 1649 | struct block_device *bdev; |
1653 | ssize_t ret = -ENXIO; | 1650 | ssize_t ret = -ENXIO; |
1654 | 1651 | ||
1655 | lock_kernel(); | ||
1656 | bdev = bdget(part_devt(p)); | 1652 | bdev = bdget(part_devt(p)); |
1657 | if (bdev == NULL) | 1653 | if (bdev == NULL) |
1658 | goto out_unlock_kernel; | 1654 | goto out; |
1659 | 1655 | ||
1660 | q = blk_trace_get_queue(bdev); | 1656 | q = blk_trace_get_queue(bdev); |
1661 | if (q == NULL) | 1657 | if (q == NULL) |
@@ -1683,8 +1679,7 @@ out_unlock_bdev: | |||
1683 | mutex_unlock(&bdev->bd_mutex); | 1679 | mutex_unlock(&bdev->bd_mutex); |
1684 | out_bdput: | 1680 | out_bdput: |
1685 | bdput(bdev); | 1681 | bdput(bdev); |
1686 | out_unlock_kernel: | 1682 | out: |
1687 | unlock_kernel(); | ||
1688 | return ret; | 1683 | return ret; |
1689 | } | 1684 | } |
1690 | 1685 | ||
@@ -1714,11 +1709,10 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev, | |||
1714 | 1709 | ||
1715 | ret = -ENXIO; | 1710 | ret = -ENXIO; |
1716 | 1711 | ||
1717 | lock_kernel(); | ||
1718 | p = dev_to_part(dev); | 1712 | p = dev_to_part(dev); |
1719 | bdev = bdget(part_devt(p)); | 1713 | bdev = bdget(part_devt(p)); |
1720 | if (bdev == NULL) | 1714 | if (bdev == NULL) |
1721 | goto out_unlock_kernel; | 1715 | goto out; |
1722 | 1716 | ||
1723 | q = blk_trace_get_queue(bdev); | 1717 | q = blk_trace_get_queue(bdev); |
1724 | if (q == NULL) | 1718 | if (q == NULL) |
@@ -1753,8 +1747,6 @@ out_unlock_bdev: | |||
1753 | mutex_unlock(&bdev->bd_mutex); | 1747 | mutex_unlock(&bdev->bd_mutex); |
1754 | out_bdput: | 1748 | out_bdput: |
1755 | bdput(bdev); | 1749 | bdput(bdev); |
1756 | out_unlock_kernel: | ||
1757 | unlock_kernel(); | ||
1758 | out: | 1750 | out: |
1759 | return ret ? ret : count; | 1751 | return ret ? ret : count; |
1760 | } | 1752 | } |
@@ -1813,8 +1805,6 @@ void blk_fill_rwbs(char *rwbs, u32 rw, int bytes) | |||
1813 | 1805 | ||
1814 | if (rw & REQ_RAHEAD) | 1806 | if (rw & REQ_RAHEAD) |
1815 | rwbs[i++] = 'A'; | 1807 | rwbs[i++] = 'A'; |
1816 | if (rw & REQ_HARDBARRIER) | ||
1817 | rwbs[i++] = 'B'; | ||
1818 | if (rw & REQ_SYNC) | 1808 | if (rw & REQ_SYNC) |
1819 | rwbs[i++] = 'S'; | 1809 | rwbs[i++] = 'S'; |
1820 | if (rw & REQ_META) | 1810 | if (rw & REQ_META) |
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index ebd80d50c474..f3dadae83883 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c | |||
@@ -800,6 +800,7 @@ static const struct file_operations ftrace_profile_fops = { | |||
800 | .open = tracing_open_generic, | 800 | .open = tracing_open_generic, |
801 | .read = ftrace_profile_read, | 801 | .read = ftrace_profile_read, |
802 | .write = ftrace_profile_write, | 802 | .write = ftrace_profile_write, |
803 | .llseek = default_llseek, | ||
803 | }; | 804 | }; |
804 | 805 | ||
805 | /* used to initialize the real stat files */ | 806 | /* used to initialize the real stat files */ |
@@ -2669,6 +2670,7 @@ static const struct file_operations ftrace_graph_fops = { | |||
2669 | .read = seq_read, | 2670 | .read = seq_read, |
2670 | .write = ftrace_graph_write, | 2671 | .write = ftrace_graph_write, |
2671 | .release = ftrace_graph_release, | 2672 | .release = ftrace_graph_release, |
2673 | .llseek = seq_lseek, | ||
2672 | }; | 2674 | }; |
2673 | #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ | 2675 | #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ |
2674 | 2676 | ||
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index c5a632a669e1..9ed509a015d8 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c | |||
@@ -224,6 +224,9 @@ enum { | |||
224 | RB_LEN_TIME_STAMP = 16, | 224 | RB_LEN_TIME_STAMP = 16, |
225 | }; | 225 | }; |
226 | 226 | ||
227 | #define skip_time_extend(event) \ | ||
228 | ((struct ring_buffer_event *)((char *)event + RB_LEN_TIME_EXTEND)) | ||
229 | |||
227 | static inline int rb_null_event(struct ring_buffer_event *event) | 230 | static inline int rb_null_event(struct ring_buffer_event *event) |
228 | { | 231 | { |
229 | return event->type_len == RINGBUF_TYPE_PADDING && !event->time_delta; | 232 | return event->type_len == RINGBUF_TYPE_PADDING && !event->time_delta; |
@@ -248,8 +251,12 @@ rb_event_data_length(struct ring_buffer_event *event) | |||
248 | return length + RB_EVNT_HDR_SIZE; | 251 | return length + RB_EVNT_HDR_SIZE; |
249 | } | 252 | } |
250 | 253 | ||
251 | /* inline for ring buffer fast paths */ | 254 | /* |
252 | static unsigned | 255 | * Return the length of the given event. Will return |
256 | * the length of the time extend if the event is a | ||
257 | * time extend. | ||
258 | */ | ||
259 | static inline unsigned | ||
253 | rb_event_length(struct ring_buffer_event *event) | 260 | rb_event_length(struct ring_buffer_event *event) |
254 | { | 261 | { |
255 | switch (event->type_len) { | 262 | switch (event->type_len) { |
@@ -274,13 +281,41 @@ rb_event_length(struct ring_buffer_event *event) | |||
274 | return 0; | 281 | return 0; |
275 | } | 282 | } |
276 | 283 | ||
284 | /* | ||
285 | * Return total length of time extend and data, | ||
286 | * or just the event length for all other events. | ||
287 | */ | ||
288 | static inline unsigned | ||
289 | rb_event_ts_length(struct ring_buffer_event *event) | ||
290 | { | ||
291 | unsigned len = 0; | ||
292 | |||
293 | if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) { | ||
294 | /* time extends include the data event after it */ | ||
295 | len = RB_LEN_TIME_EXTEND; | ||
296 | event = skip_time_extend(event); | ||
297 | } | ||
298 | return len + rb_event_length(event); | ||
299 | } | ||
300 | |||
277 | /** | 301 | /** |
278 | * ring_buffer_event_length - return the length of the event | 302 | * ring_buffer_event_length - return the length of the event |
279 | * @event: the event to get the length of | 303 | * @event: the event to get the length of |
304 | * | ||
305 | * Returns the size of the data load of a data event. | ||
306 | * If the event is something other than a data event, it | ||
307 | * returns the size of the event itself. With the exception | ||
308 | * of a TIME EXTEND, where it still returns the size of the | ||
309 | * data load of the data event after it. | ||
280 | */ | 310 | */ |
281 | unsigned ring_buffer_event_length(struct ring_buffer_event *event) | 311 | unsigned ring_buffer_event_length(struct ring_buffer_event *event) |
282 | { | 312 | { |
283 | unsigned length = rb_event_length(event); | 313 | unsigned length; |
314 | |||
315 | if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) | ||
316 | event = skip_time_extend(event); | ||
317 | |||
318 | length = rb_event_length(event); | ||
284 | if (event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX) | 319 | if (event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX) |
285 | return length; | 320 | return length; |
286 | length -= RB_EVNT_HDR_SIZE; | 321 | length -= RB_EVNT_HDR_SIZE; |
@@ -294,6 +329,8 @@ EXPORT_SYMBOL_GPL(ring_buffer_event_length); | |||
294 | static void * | 329 | static void * |
295 | rb_event_data(struct ring_buffer_event *event) | 330 | rb_event_data(struct ring_buffer_event *event) |
296 | { | 331 | { |
332 | if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) | ||
333 | event = skip_time_extend(event); | ||
297 | BUG_ON(event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX); | 334 | BUG_ON(event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX); |
298 | /* If length is in len field, then array[0] has the data */ | 335 | /* If length is in len field, then array[0] has the data */ |
299 | if (event->type_len) | 336 | if (event->type_len) |
@@ -404,9 +441,6 @@ static inline int test_time_stamp(u64 delta) | |||
404 | /* Max payload is BUF_PAGE_SIZE - header (8bytes) */ | 441 | /* Max payload is BUF_PAGE_SIZE - header (8bytes) */ |
405 | #define BUF_MAX_DATA_SIZE (BUF_PAGE_SIZE - (sizeof(u32) * 2)) | 442 | #define BUF_MAX_DATA_SIZE (BUF_PAGE_SIZE - (sizeof(u32) * 2)) |
406 | 443 | ||
407 | /* Max number of timestamps that can fit on a page */ | ||
408 | #define RB_TIMESTAMPS_PER_PAGE (BUF_PAGE_SIZE / RB_LEN_TIME_EXTEND) | ||
409 | |||
410 | int ring_buffer_print_page_header(struct trace_seq *s) | 444 | int ring_buffer_print_page_header(struct trace_seq *s) |
411 | { | 445 | { |
412 | struct buffer_data_page field; | 446 | struct buffer_data_page field; |
@@ -1546,6 +1580,25 @@ static void rb_inc_iter(struct ring_buffer_iter *iter) | |||
1546 | iter->head = 0; | 1580 | iter->head = 0; |
1547 | } | 1581 | } |
1548 | 1582 | ||
1583 | /* Slow path, do not inline */ | ||
1584 | static noinline struct ring_buffer_event * | ||
1585 | rb_add_time_stamp(struct ring_buffer_event *event, u64 delta) | ||
1586 | { | ||
1587 | event->type_len = RINGBUF_TYPE_TIME_EXTEND; | ||
1588 | |||
1589 | /* Not the first event on the page? */ | ||
1590 | if (rb_event_index(event)) { | ||
1591 | event->time_delta = delta & TS_MASK; | ||
1592 | event->array[0] = delta >> TS_SHIFT; | ||
1593 | } else { | ||
1594 | /* nope, just zero it */ | ||
1595 | event->time_delta = 0; | ||
1596 | event->array[0] = 0; | ||
1597 | } | ||
1598 | |||
1599 | return skip_time_extend(event); | ||
1600 | } | ||
1601 | |||
1549 | /** | 1602 | /** |
1550 | * ring_buffer_update_event - update event type and data | 1603 | * ring_buffer_update_event - update event type and data |
1551 | * @event: the even to update | 1604 | * @event: the even to update |
@@ -1558,28 +1611,31 @@ static void rb_inc_iter(struct ring_buffer_iter *iter) | |||
1558 | * data field. | 1611 | * data field. |
1559 | */ | 1612 | */ |
1560 | static void | 1613 | static void |
1561 | rb_update_event(struct ring_buffer_event *event, | 1614 | rb_update_event(struct ring_buffer_per_cpu *cpu_buffer, |
1562 | unsigned type, unsigned length) | 1615 | struct ring_buffer_event *event, unsigned length, |
1616 | int add_timestamp, u64 delta) | ||
1563 | { | 1617 | { |
1564 | event->type_len = type; | 1618 | /* Only a commit updates the timestamp */ |
1565 | 1619 | if (unlikely(!rb_event_is_commit(cpu_buffer, event))) | |
1566 | switch (type) { | 1620 | delta = 0; |
1567 | |||
1568 | case RINGBUF_TYPE_PADDING: | ||
1569 | case RINGBUF_TYPE_TIME_EXTEND: | ||
1570 | case RINGBUF_TYPE_TIME_STAMP: | ||
1571 | break; | ||
1572 | 1621 | ||
1573 | case 0: | 1622 | /* |
1574 | length -= RB_EVNT_HDR_SIZE; | 1623 | * If we need to add a timestamp, then we |
1575 | if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT) | 1624 | * add it to the start of the resevered space. |
1576 | event->array[0] = length; | 1625 | */ |
1577 | else | 1626 | if (unlikely(add_timestamp)) { |
1578 | event->type_len = DIV_ROUND_UP(length, RB_ALIGNMENT); | 1627 | event = rb_add_time_stamp(event, delta); |
1579 | break; | 1628 | length -= RB_LEN_TIME_EXTEND; |
1580 | default: | 1629 | delta = 0; |
1581 | BUG(); | ||
1582 | } | 1630 | } |
1631 | |||
1632 | event->time_delta = delta; | ||
1633 | length -= RB_EVNT_HDR_SIZE; | ||
1634 | if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT) { | ||
1635 | event->type_len = 0; | ||
1636 | event->array[0] = length; | ||
1637 | } else | ||
1638 | event->type_len = DIV_ROUND_UP(length, RB_ALIGNMENT); | ||
1583 | } | 1639 | } |
1584 | 1640 | ||
1585 | /* | 1641 | /* |
@@ -1823,10 +1879,13 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer, | |||
1823 | local_sub(length, &tail_page->write); | 1879 | local_sub(length, &tail_page->write); |
1824 | } | 1880 | } |
1825 | 1881 | ||
1826 | static struct ring_buffer_event * | 1882 | /* |
1883 | * This is the slow path, force gcc not to inline it. | ||
1884 | */ | ||
1885 | static noinline struct ring_buffer_event * | ||
1827 | rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer, | 1886 | rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer, |
1828 | unsigned long length, unsigned long tail, | 1887 | unsigned long length, unsigned long tail, |
1829 | struct buffer_page *tail_page, u64 *ts) | 1888 | struct buffer_page *tail_page, u64 ts) |
1830 | { | 1889 | { |
1831 | struct buffer_page *commit_page = cpu_buffer->commit_page; | 1890 | struct buffer_page *commit_page = cpu_buffer->commit_page; |
1832 | struct ring_buffer *buffer = cpu_buffer->buffer; | 1891 | struct ring_buffer *buffer = cpu_buffer->buffer; |
@@ -1909,8 +1968,8 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer, | |||
1909 | * Nested commits always have zero deltas, so | 1968 | * Nested commits always have zero deltas, so |
1910 | * just reread the time stamp | 1969 | * just reread the time stamp |
1911 | */ | 1970 | */ |
1912 | *ts = rb_time_stamp(buffer); | 1971 | ts = rb_time_stamp(buffer); |
1913 | next_page->page->time_stamp = *ts; | 1972 | next_page->page->time_stamp = ts; |
1914 | } | 1973 | } |
1915 | 1974 | ||
1916 | out_again: | 1975 | out_again: |
@@ -1929,12 +1988,21 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer, | |||
1929 | 1988 | ||
1930 | static struct ring_buffer_event * | 1989 | static struct ring_buffer_event * |
1931 | __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, | 1990 | __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, |
1932 | unsigned type, unsigned long length, u64 *ts) | 1991 | unsigned long length, u64 ts, |
1992 | u64 delta, int add_timestamp) | ||
1933 | { | 1993 | { |
1934 | struct buffer_page *tail_page; | 1994 | struct buffer_page *tail_page; |
1935 | struct ring_buffer_event *event; | 1995 | struct ring_buffer_event *event; |
1936 | unsigned long tail, write; | 1996 | unsigned long tail, write; |
1937 | 1997 | ||
1998 | /* | ||
1999 | * If the time delta since the last event is too big to | ||
2000 | * hold in the time field of the event, then we append a | ||
2001 | * TIME EXTEND event ahead of the data event. | ||
2002 | */ | ||
2003 | if (unlikely(add_timestamp)) | ||
2004 | length += RB_LEN_TIME_EXTEND; | ||
2005 | |||
1938 | tail_page = cpu_buffer->tail_page; | 2006 | tail_page = cpu_buffer->tail_page; |
1939 | write = local_add_return(length, &tail_page->write); | 2007 | write = local_add_return(length, &tail_page->write); |
1940 | 2008 | ||
@@ -1943,7 +2011,7 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, | |||
1943 | tail = write - length; | 2011 | tail = write - length; |
1944 | 2012 | ||
1945 | /* See if we shot pass the end of this buffer page */ | 2013 | /* See if we shot pass the end of this buffer page */ |
1946 | if (write > BUF_PAGE_SIZE) | 2014 | if (unlikely(write > BUF_PAGE_SIZE)) |
1947 | return rb_move_tail(cpu_buffer, length, tail, | 2015 | return rb_move_tail(cpu_buffer, length, tail, |
1948 | tail_page, ts); | 2016 | tail_page, ts); |
1949 | 2017 | ||
@@ -1951,18 +2019,16 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, | |||
1951 | 2019 | ||
1952 | event = __rb_page_index(tail_page, tail); | 2020 | event = __rb_page_index(tail_page, tail); |
1953 | kmemcheck_annotate_bitfield(event, bitfield); | 2021 | kmemcheck_annotate_bitfield(event, bitfield); |
1954 | rb_update_event(event, type, length); | 2022 | rb_update_event(cpu_buffer, event, length, add_timestamp, delta); |
1955 | 2023 | ||
1956 | /* The passed in type is zero for DATA */ | 2024 | local_inc(&tail_page->entries); |
1957 | if (likely(!type)) | ||
1958 | local_inc(&tail_page->entries); | ||
1959 | 2025 | ||
1960 | /* | 2026 | /* |
1961 | * If this is the first commit on the page, then update | 2027 | * If this is the first commit on the page, then update |
1962 | * its timestamp. | 2028 | * its timestamp. |
1963 | */ | 2029 | */ |
1964 | if (!tail) | 2030 | if (!tail) |
1965 | tail_page->page->time_stamp = *ts; | 2031 | tail_page->page->time_stamp = ts; |
1966 | 2032 | ||
1967 | return event; | 2033 | return event; |
1968 | } | 2034 | } |
@@ -1977,7 +2043,7 @@ rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer, | |||
1977 | unsigned long addr; | 2043 | unsigned long addr; |
1978 | 2044 | ||
1979 | new_index = rb_event_index(event); | 2045 | new_index = rb_event_index(event); |
1980 | old_index = new_index + rb_event_length(event); | 2046 | old_index = new_index + rb_event_ts_length(event); |
1981 | addr = (unsigned long)event; | 2047 | addr = (unsigned long)event; |
1982 | addr &= PAGE_MASK; | 2048 | addr &= PAGE_MASK; |
1983 | 2049 | ||
@@ -2003,76 +2069,13 @@ rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer, | |||
2003 | return 0; | 2069 | return 0; |
2004 | } | 2070 | } |
2005 | 2071 | ||
2006 | static int | ||
2007 | rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer, | ||
2008 | u64 *ts, u64 *delta) | ||
2009 | { | ||
2010 | struct ring_buffer_event *event; | ||
2011 | int ret; | ||
2012 | |||
2013 | WARN_ONCE(*delta > (1ULL << 59), | ||
2014 | KERN_WARNING "Delta way too big! %llu ts=%llu write stamp = %llu\n", | ||
2015 | (unsigned long long)*delta, | ||
2016 | (unsigned long long)*ts, | ||
2017 | (unsigned long long)cpu_buffer->write_stamp); | ||
2018 | |||
2019 | /* | ||
2020 | * The delta is too big, we to add a | ||
2021 | * new timestamp. | ||
2022 | */ | ||
2023 | event = __rb_reserve_next(cpu_buffer, | ||
2024 | RINGBUF_TYPE_TIME_EXTEND, | ||
2025 | RB_LEN_TIME_EXTEND, | ||
2026 | ts); | ||
2027 | if (!event) | ||
2028 | return -EBUSY; | ||
2029 | |||
2030 | if (PTR_ERR(event) == -EAGAIN) | ||
2031 | return -EAGAIN; | ||
2032 | |||
2033 | /* Only a commited time event can update the write stamp */ | ||
2034 | if (rb_event_is_commit(cpu_buffer, event)) { | ||
2035 | /* | ||
2036 | * If this is the first on the page, then it was | ||
2037 | * updated with the page itself. Try to discard it | ||
2038 | * and if we can't just make it zero. | ||
2039 | */ | ||
2040 | if (rb_event_index(event)) { | ||
2041 | event->time_delta = *delta & TS_MASK; | ||
2042 | event->array[0] = *delta >> TS_SHIFT; | ||
2043 | } else { | ||
2044 | /* try to discard, since we do not need this */ | ||
2045 | if (!rb_try_to_discard(cpu_buffer, event)) { | ||
2046 | /* nope, just zero it */ | ||
2047 | event->time_delta = 0; | ||
2048 | event->array[0] = 0; | ||
2049 | } | ||
2050 | } | ||
2051 | cpu_buffer->write_stamp = *ts; | ||
2052 | /* let the caller know this was the commit */ | ||
2053 | ret = 1; | ||
2054 | } else { | ||
2055 | /* Try to discard the event */ | ||
2056 | if (!rb_try_to_discard(cpu_buffer, event)) { | ||
2057 | /* Darn, this is just wasted space */ | ||
2058 | event->time_delta = 0; | ||
2059 | event->array[0] = 0; | ||
2060 | } | ||
2061 | ret = 0; | ||
2062 | } | ||
2063 | |||
2064 | *delta = 0; | ||
2065 | |||
2066 | return ret; | ||
2067 | } | ||
2068 | |||
2069 | static void rb_start_commit(struct ring_buffer_per_cpu *cpu_buffer) | 2072 | static void rb_start_commit(struct ring_buffer_per_cpu *cpu_buffer) |
2070 | { | 2073 | { |
2071 | local_inc(&cpu_buffer->committing); | 2074 | local_inc(&cpu_buffer->committing); |
2072 | local_inc(&cpu_buffer->commits); | 2075 | local_inc(&cpu_buffer->commits); |
2073 | } | 2076 | } |
2074 | 2077 | ||
2075 | static void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer) | 2078 | static inline void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer) |
2076 | { | 2079 | { |
2077 | unsigned long commits; | 2080 | unsigned long commits; |
2078 | 2081 | ||
@@ -2110,9 +2113,10 @@ rb_reserve_next_event(struct ring_buffer *buffer, | |||
2110 | unsigned long length) | 2113 | unsigned long length) |
2111 | { | 2114 | { |
2112 | struct ring_buffer_event *event; | 2115 | struct ring_buffer_event *event; |
2113 | u64 ts, delta = 0; | 2116 | u64 ts, delta; |
2114 | int commit = 0; | ||
2115 | int nr_loops = 0; | 2117 | int nr_loops = 0; |
2118 | int add_timestamp; | ||
2119 | u64 diff; | ||
2116 | 2120 | ||
2117 | rb_start_commit(cpu_buffer); | 2121 | rb_start_commit(cpu_buffer); |
2118 | 2122 | ||
@@ -2133,6 +2137,9 @@ rb_reserve_next_event(struct ring_buffer *buffer, | |||
2133 | 2137 | ||
2134 | length = rb_calculate_event_length(length); | 2138 | length = rb_calculate_event_length(length); |
2135 | again: | 2139 | again: |
2140 | add_timestamp = 0; | ||
2141 | delta = 0; | ||
2142 | |||
2136 | /* | 2143 | /* |
2137 | * We allow for interrupts to reenter here and do a trace. | 2144 | * We allow for interrupts to reenter here and do a trace. |
2138 | * If one does, it will cause this original code to loop | 2145 | * If one does, it will cause this original code to loop |
@@ -2146,56 +2153,32 @@ rb_reserve_next_event(struct ring_buffer *buffer, | |||
2146 | goto out_fail; | 2153 | goto out_fail; |
2147 | 2154 | ||
2148 | ts = rb_time_stamp(cpu_buffer->buffer); | 2155 | ts = rb_time_stamp(cpu_buffer->buffer); |
2156 | diff = ts - cpu_buffer->write_stamp; | ||
2149 | 2157 | ||
2150 | /* | 2158 | /* make sure this diff is calculated here */ |
2151 | * Only the first commit can update the timestamp. | 2159 | barrier(); |
2152 | * Yes there is a race here. If an interrupt comes in | ||
2153 | * just after the conditional and it traces too, then it | ||
2154 | * will also check the deltas. More than one timestamp may | ||
2155 | * also be made. But only the entry that did the actual | ||
2156 | * commit will be something other than zero. | ||
2157 | */ | ||
2158 | if (likely(cpu_buffer->tail_page == cpu_buffer->commit_page && | ||
2159 | rb_page_write(cpu_buffer->tail_page) == | ||
2160 | rb_commit_index(cpu_buffer))) { | ||
2161 | u64 diff; | ||
2162 | |||
2163 | diff = ts - cpu_buffer->write_stamp; | ||
2164 | |||
2165 | /* make sure this diff is calculated here */ | ||
2166 | barrier(); | ||
2167 | |||
2168 | /* Did the write stamp get updated already? */ | ||
2169 | if (unlikely(ts < cpu_buffer->write_stamp)) | ||
2170 | goto get_event; | ||
2171 | 2160 | ||
2161 | /* Did the write stamp get updated already? */ | ||
2162 | if (likely(ts >= cpu_buffer->write_stamp)) { | ||
2172 | delta = diff; | 2163 | delta = diff; |
2173 | if (unlikely(test_time_stamp(delta))) { | 2164 | if (unlikely(test_time_stamp(delta))) { |
2174 | 2165 | WARN_ONCE(delta > (1ULL << 59), | |
2175 | commit = rb_add_time_stamp(cpu_buffer, &ts, &delta); | 2166 | KERN_WARNING "Delta way too big! %llu ts=%llu write stamp = %llu\n", |
2176 | if (commit == -EBUSY) | 2167 | (unsigned long long)delta, |
2177 | goto out_fail; | 2168 | (unsigned long long)ts, |
2178 | 2169 | (unsigned long long)cpu_buffer->write_stamp); | |
2179 | if (commit == -EAGAIN) | 2170 | add_timestamp = 1; |
2180 | goto again; | ||
2181 | |||
2182 | RB_WARN_ON(cpu_buffer, commit < 0); | ||
2183 | } | 2171 | } |
2184 | } | 2172 | } |
2185 | 2173 | ||
2186 | get_event: | 2174 | event = __rb_reserve_next(cpu_buffer, length, ts, |
2187 | event = __rb_reserve_next(cpu_buffer, 0, length, &ts); | 2175 | delta, add_timestamp); |
2188 | if (unlikely(PTR_ERR(event) == -EAGAIN)) | 2176 | if (unlikely(PTR_ERR(event) == -EAGAIN)) |
2189 | goto again; | 2177 | goto again; |
2190 | 2178 | ||
2191 | if (!event) | 2179 | if (!event) |
2192 | goto out_fail; | 2180 | goto out_fail; |
2193 | 2181 | ||
2194 | if (!rb_event_is_commit(cpu_buffer, event)) | ||
2195 | delta = 0; | ||
2196 | |||
2197 | event->time_delta = delta; | ||
2198 | |||
2199 | return event; | 2182 | return event; |
2200 | 2183 | ||
2201 | out_fail: | 2184 | out_fail: |
@@ -2207,13 +2190,9 @@ rb_reserve_next_event(struct ring_buffer *buffer, | |||
2207 | 2190 | ||
2208 | #define TRACE_RECURSIVE_DEPTH 16 | 2191 | #define TRACE_RECURSIVE_DEPTH 16 |
2209 | 2192 | ||
2210 | static int trace_recursive_lock(void) | 2193 | /* Keep this code out of the fast path cache */ |
2194 | static noinline void trace_recursive_fail(void) | ||
2211 | { | 2195 | { |
2212 | current->trace_recursion++; | ||
2213 | |||
2214 | if (likely(current->trace_recursion < TRACE_RECURSIVE_DEPTH)) | ||
2215 | return 0; | ||
2216 | |||
2217 | /* Disable all tracing before we do anything else */ | 2196 | /* Disable all tracing before we do anything else */ |
2218 | tracing_off_permanent(); | 2197 | tracing_off_permanent(); |
2219 | 2198 | ||
@@ -2225,10 +2204,21 @@ static int trace_recursive_lock(void) | |||
2225 | in_nmi()); | 2204 | in_nmi()); |
2226 | 2205 | ||
2227 | WARN_ON_ONCE(1); | 2206 | WARN_ON_ONCE(1); |
2207 | } | ||
2208 | |||
2209 | static inline int trace_recursive_lock(void) | ||
2210 | { | ||
2211 | current->trace_recursion++; | ||
2212 | |||
2213 | if (likely(current->trace_recursion < TRACE_RECURSIVE_DEPTH)) | ||
2214 | return 0; | ||
2215 | |||
2216 | trace_recursive_fail(); | ||
2217 | |||
2228 | return -1; | 2218 | return -1; |
2229 | } | 2219 | } |
2230 | 2220 | ||
2231 | static void trace_recursive_unlock(void) | 2221 | static inline void trace_recursive_unlock(void) |
2232 | { | 2222 | { |
2233 | WARN_ON_ONCE(!current->trace_recursion); | 2223 | WARN_ON_ONCE(!current->trace_recursion); |
2234 | 2224 | ||
@@ -2308,12 +2298,28 @@ static void | |||
2308 | rb_update_write_stamp(struct ring_buffer_per_cpu *cpu_buffer, | 2298 | rb_update_write_stamp(struct ring_buffer_per_cpu *cpu_buffer, |
2309 | struct ring_buffer_event *event) | 2299 | struct ring_buffer_event *event) |
2310 | { | 2300 | { |
2301 | u64 delta; | ||
2302 | |||
2311 | /* | 2303 | /* |
2312 | * The event first in the commit queue updates the | 2304 | * The event first in the commit queue updates the |
2313 | * time stamp. | 2305 | * time stamp. |
2314 | */ | 2306 | */ |
2315 | if (rb_event_is_commit(cpu_buffer, event)) | 2307 | if (rb_event_is_commit(cpu_buffer, event)) { |
2316 | cpu_buffer->write_stamp += event->time_delta; | 2308 | /* |
2309 | * A commit event that is first on a page | ||
2310 | * updates the write timestamp with the page stamp | ||
2311 | */ | ||
2312 | if (!rb_event_index(event)) | ||
2313 | cpu_buffer->write_stamp = | ||
2314 | cpu_buffer->commit_page->page->time_stamp; | ||
2315 | else if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) { | ||
2316 | delta = event->array[0]; | ||
2317 | delta <<= TS_SHIFT; | ||
2318 | delta += event->time_delta; | ||
2319 | cpu_buffer->write_stamp += delta; | ||
2320 | } else | ||
2321 | cpu_buffer->write_stamp += event->time_delta; | ||
2322 | } | ||
2317 | } | 2323 | } |
2318 | 2324 | ||
2319 | static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer, | 2325 | static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer, |
@@ -2353,6 +2359,9 @@ EXPORT_SYMBOL_GPL(ring_buffer_unlock_commit); | |||
2353 | 2359 | ||
2354 | static inline void rb_event_discard(struct ring_buffer_event *event) | 2360 | static inline void rb_event_discard(struct ring_buffer_event *event) |
2355 | { | 2361 | { |
2362 | if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) | ||
2363 | event = skip_time_extend(event); | ||
2364 | |||
2356 | /* array[0] holds the actual length for the discarded event */ | 2365 | /* array[0] holds the actual length for the discarded event */ |
2357 | event->array[0] = rb_event_data_length(event) - RB_EVNT_HDR_SIZE; | 2366 | event->array[0] = rb_event_data_length(event) - RB_EVNT_HDR_SIZE; |
2358 | event->type_len = RINGBUF_TYPE_PADDING; | 2367 | event->type_len = RINGBUF_TYPE_PADDING; |
@@ -3049,12 +3058,12 @@ rb_buffer_peek(struct ring_buffer_per_cpu *cpu_buffer, u64 *ts, | |||
3049 | 3058 | ||
3050 | again: | 3059 | again: |
3051 | /* | 3060 | /* |
3052 | * We repeat when a timestamp is encountered. It is possible | 3061 | * We repeat when a time extend is encountered. |
3053 | * to get multiple timestamps from an interrupt entering just | 3062 | * Since the time extend is always attached to a data event, |
3054 | * as one timestamp is about to be written, or from discarded | 3063 | * we should never loop more than once. |
3055 | * commits. The most that we can have is the number on a single page. | 3064 | * (We never hit the following condition more than twice). |
3056 | */ | 3065 | */ |
3057 | if (RB_WARN_ON(cpu_buffer, ++nr_loops > RB_TIMESTAMPS_PER_PAGE)) | 3066 | if (RB_WARN_ON(cpu_buffer, ++nr_loops > 2)) |
3058 | return NULL; | 3067 | return NULL; |
3059 | 3068 | ||
3060 | reader = rb_get_reader_page(cpu_buffer); | 3069 | reader = rb_get_reader_page(cpu_buffer); |
@@ -3130,14 +3139,12 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts) | |||
3130 | return NULL; | 3139 | return NULL; |
3131 | 3140 | ||
3132 | /* | 3141 | /* |
3133 | * We repeat when a timestamp is encountered. | 3142 | * We repeat when a time extend is encountered. |
3134 | * We can get multiple timestamps by nested interrupts or also | 3143 | * Since the time extend is always attached to a data event, |
3135 | * if filtering is on (discarding commits). Since discarding | 3144 | * we should never loop more than once. |
3136 | * commits can be frequent we can get a lot of timestamps. | 3145 | * (We never hit the following condition more than twice). |
3137 | * But we limit them by not adding timestamps if they begin | ||
3138 | * at the start of a page. | ||
3139 | */ | 3146 | */ |
3140 | if (RB_WARN_ON(cpu_buffer, ++nr_loops > RB_TIMESTAMPS_PER_PAGE)) | 3147 | if (RB_WARN_ON(cpu_buffer, ++nr_loops > 2)) |
3141 | return NULL; | 3148 | return NULL; |
3142 | 3149 | ||
3143 | if (rb_per_cpu_empty(cpu_buffer)) | 3150 | if (rb_per_cpu_empty(cpu_buffer)) |
@@ -3835,7 +3842,8 @@ int ring_buffer_read_page(struct ring_buffer *buffer, | |||
3835 | if (len > (commit - read)) | 3842 | if (len > (commit - read)) |
3836 | len = (commit - read); | 3843 | len = (commit - read); |
3837 | 3844 | ||
3838 | size = rb_event_length(event); | 3845 | /* Always keep the time extend and data together */ |
3846 | size = rb_event_ts_length(event); | ||
3839 | 3847 | ||
3840 | if (len < size) | 3848 | if (len < size) |
3841 | goto out_unlock; | 3849 | goto out_unlock; |
@@ -3857,7 +3865,8 @@ int ring_buffer_read_page(struct ring_buffer *buffer, | |||
3857 | break; | 3865 | break; |
3858 | 3866 | ||
3859 | event = rb_reader_event(cpu_buffer); | 3867 | event = rb_reader_event(cpu_buffer); |
3860 | size = rb_event_length(event); | 3868 | /* Always keep the time extend and data together */ |
3869 | size = rb_event_ts_length(event); | ||
3861 | } while (len > size); | 3870 | } while (len > size); |
3862 | 3871 | ||
3863 | /* update bpage */ | 3872 | /* update bpage */ |
@@ -3974,6 +3983,7 @@ static const struct file_operations rb_simple_fops = { | |||
3974 | .open = tracing_open_generic, | 3983 | .open = tracing_open_generic, |
3975 | .read = rb_simple_read, | 3984 | .read = rb_simple_read, |
3976 | .write = rb_simple_write, | 3985 | .write = rb_simple_write, |
3986 | .llseek = default_llseek, | ||
3977 | }; | 3987 | }; |
3978 | 3988 | ||
3979 | 3989 | ||
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 001bcd2ccf4a..82d9b8106cd0 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c | |||
@@ -3996,13 +3996,9 @@ static void tracing_init_debugfs_percpu(long cpu) | |||
3996 | { | 3996 | { |
3997 | struct dentry *d_percpu = tracing_dentry_percpu(); | 3997 | struct dentry *d_percpu = tracing_dentry_percpu(); |
3998 | struct dentry *d_cpu; | 3998 | struct dentry *d_cpu; |
3999 | /* strlen(cpu) + MAX(log10(cpu)) + '\0' */ | 3999 | char cpu_dir[30]; /* 30 characters should be more than enough */ |
4000 | char cpu_dir[7]; | ||
4001 | 4000 | ||
4002 | if (cpu > 999 || cpu < 0) | 4001 | snprintf(cpu_dir, 30, "cpu%ld", cpu); |
4003 | return; | ||
4004 | |||
4005 | sprintf(cpu_dir, "cpu%ld", cpu); | ||
4006 | d_cpu = debugfs_create_dir(cpu_dir, d_percpu); | 4002 | d_cpu = debugfs_create_dir(cpu_dir, d_percpu); |
4007 | if (!d_cpu) { | 4003 | if (!d_cpu) { |
4008 | pr_warning("Could not create debugfs '%s' entry\n", cpu_dir); | 4004 | pr_warning("Could not create debugfs '%s' entry\n", cpu_dir); |
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 398c0e8b332c..0725eeab1937 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c | |||
@@ -932,6 +932,7 @@ static const struct file_operations ftrace_enable_fops = { | |||
932 | .open = tracing_open_generic, | 932 | .open = tracing_open_generic, |
933 | .read = event_enable_read, | 933 | .read = event_enable_read, |
934 | .write = event_enable_write, | 934 | .write = event_enable_write, |
935 | .llseek = default_llseek, | ||
935 | }; | 936 | }; |
936 | 937 | ||
937 | static const struct file_operations ftrace_event_format_fops = { | 938 | static const struct file_operations ftrace_event_format_fops = { |
@@ -944,29 +945,34 @@ static const struct file_operations ftrace_event_format_fops = { | |||
944 | static const struct file_operations ftrace_event_id_fops = { | 945 | static const struct file_operations ftrace_event_id_fops = { |
945 | .open = tracing_open_generic, | 946 | .open = tracing_open_generic, |
946 | .read = event_id_read, | 947 | .read = event_id_read, |
948 | .llseek = default_llseek, | ||
947 | }; | 949 | }; |
948 | 950 | ||
949 | static const struct file_operations ftrace_event_filter_fops = { | 951 | static const struct file_operations ftrace_event_filter_fops = { |
950 | .open = tracing_open_generic, | 952 | .open = tracing_open_generic, |
951 | .read = event_filter_read, | 953 | .read = event_filter_read, |
952 | .write = event_filter_write, | 954 | .write = event_filter_write, |
955 | .llseek = default_llseek, | ||
953 | }; | 956 | }; |
954 | 957 | ||
955 | static const struct file_operations ftrace_subsystem_filter_fops = { | 958 | static const struct file_operations ftrace_subsystem_filter_fops = { |
956 | .open = tracing_open_generic, | 959 | .open = tracing_open_generic, |
957 | .read = subsystem_filter_read, | 960 | .read = subsystem_filter_read, |
958 | .write = subsystem_filter_write, | 961 | .write = subsystem_filter_write, |
962 | .llseek = default_llseek, | ||
959 | }; | 963 | }; |
960 | 964 | ||
961 | static const struct file_operations ftrace_system_enable_fops = { | 965 | static const struct file_operations ftrace_system_enable_fops = { |
962 | .open = tracing_open_generic, | 966 | .open = tracing_open_generic, |
963 | .read = system_enable_read, | 967 | .read = system_enable_read, |
964 | .write = system_enable_write, | 968 | .write = system_enable_write, |
969 | .llseek = default_llseek, | ||
965 | }; | 970 | }; |
966 | 971 | ||
967 | static const struct file_operations ftrace_show_header_fops = { | 972 | static const struct file_operations ftrace_show_header_fops = { |
968 | .open = tracing_open_generic, | 973 | .open = tracing_open_generic, |
969 | .read = show_header, | 974 | .read = show_header, |
975 | .llseek = default_llseek, | ||
970 | }; | 976 | }; |
971 | 977 | ||
972 | static struct dentry *event_trace_events_dir(void) | 978 | static struct dentry *event_trace_events_dir(void) |
diff --git a/kernel/trace/trace_kdb.c b/kernel/trace/trace_kdb.c index 7b8ecd751d93..3c5c5dfea0b3 100644 --- a/kernel/trace/trace_kdb.c +++ b/kernel/trace/trace_kdb.c | |||
@@ -13,7 +13,6 @@ | |||
13 | #include <linux/kdb.h> | 13 | #include <linux/kdb.h> |
14 | #include <linux/ftrace.h> | 14 | #include <linux/ftrace.h> |
15 | 15 | ||
16 | #include "../debug/kdb/kdb_private.h" | ||
17 | #include "trace.h" | 16 | #include "trace.h" |
18 | #include "trace_output.h" | 17 | #include "trace_output.h" |
19 | 18 | ||
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 544301d29dee..2dec9bcde8b4 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c | |||
@@ -31,7 +31,6 @@ | |||
31 | #include <linux/perf_event.h> | 31 | #include <linux/perf_event.h> |
32 | #include <linux/stringify.h> | 32 | #include <linux/stringify.h> |
33 | #include <linux/limits.h> | 33 | #include <linux/limits.h> |
34 | #include <linux/uaccess.h> | ||
35 | #include <asm/bitsperlong.h> | 34 | #include <asm/bitsperlong.h> |
36 | 35 | ||
37 | #include "trace.h" | 36 | #include "trace.h" |
@@ -648,7 +647,7 @@ static int register_trace_probe(struct trace_probe *tp) | |||
648 | } | 647 | } |
649 | ret = register_probe_event(tp); | 648 | ret = register_probe_event(tp); |
650 | if (ret) { | 649 | if (ret) { |
651 | pr_warning("Faild to register probe event(%d)\n", ret); | 650 | pr_warning("Failed to register probe event(%d)\n", ret); |
652 | goto end; | 651 | goto end; |
653 | } | 652 | } |
654 | 653 | ||
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c index a6b7e0e0f3eb..4c5dead0c239 100644 --- a/kernel/trace/trace_stack.c +++ b/kernel/trace/trace_stack.c | |||
@@ -195,6 +195,7 @@ static const struct file_operations stack_max_size_fops = { | |||
195 | .open = tracing_open_generic, | 195 | .open = tracing_open_generic, |
196 | .read = stack_max_size_read, | 196 | .read = stack_max_size_read, |
197 | .write = stack_max_size_write, | 197 | .write = stack_max_size_write, |
198 | .llseek = default_llseek, | ||
198 | }; | 199 | }; |
199 | 200 | ||
200 | static void * | 201 | static void * |
diff --git a/kernel/tsacct.c b/kernel/tsacct.c index 0a67e041edf8..24dc60d9fa1f 100644 --- a/kernel/tsacct.c +++ b/kernel/tsacct.c | |||
@@ -63,12 +63,10 @@ void bacct_add_tsk(struct taskstats *stats, struct task_struct *tsk) | |||
63 | stats->ac_ppid = pid_alive(tsk) ? | 63 | stats->ac_ppid = pid_alive(tsk) ? |
64 | rcu_dereference(tsk->real_parent)->tgid : 0; | 64 | rcu_dereference(tsk->real_parent)->tgid : 0; |
65 | rcu_read_unlock(); | 65 | rcu_read_unlock(); |
66 | stats->ac_utime = cputime_to_msecs(tsk->utime) * USEC_PER_MSEC; | 66 | stats->ac_utime = cputime_to_usecs(tsk->utime); |
67 | stats->ac_stime = cputime_to_msecs(tsk->stime) * USEC_PER_MSEC; | 67 | stats->ac_stime = cputime_to_usecs(tsk->stime); |
68 | stats->ac_utimescaled = | 68 | stats->ac_utimescaled = cputime_to_usecs(tsk->utimescaled); |
69 | cputime_to_msecs(tsk->utimescaled) * USEC_PER_MSEC; | 69 | stats->ac_stimescaled = cputime_to_usecs(tsk->stimescaled); |
70 | stats->ac_stimescaled = | ||
71 | cputime_to_msecs(tsk->stimescaled) * USEC_PER_MSEC; | ||
72 | stats->ac_minflt = tsk->min_flt; | 70 | stats->ac_minflt = tsk->min_flt; |
73 | stats->ac_majflt = tsk->maj_flt; | 71 | stats->ac_majflt = tsk->maj_flt; |
74 | 72 | ||
diff --git a/kernel/user.c b/kernel/user.c index 7e72614b736d..2c7d8d5914b1 100644 --- a/kernel/user.c +++ b/kernel/user.c | |||
@@ -91,6 +91,7 @@ static struct user_struct *uid_hash_find(uid_t uid, struct hlist_head *hashent) | |||
91 | * upon function exit. | 91 | * upon function exit. |
92 | */ | 92 | */ |
93 | static void free_user(struct user_struct *up, unsigned long flags) | 93 | static void free_user(struct user_struct *up, unsigned long flags) |
94 | __releases(&uidhash_lock) | ||
94 | { | 95 | { |
95 | uid_hash_remove(up); | 96 | uid_hash_remove(up); |
96 | spin_unlock_irqrestore(&uidhash_lock, flags); | 97 | spin_unlock_irqrestore(&uidhash_lock, flags); |
diff --git a/kernel/wait.c b/kernel/wait.c index c4bd3d825f35..b0310eb6cc1e 100644 --- a/kernel/wait.c +++ b/kernel/wait.c | |||
@@ -92,7 +92,7 @@ prepare_to_wait_exclusive(wait_queue_head_t *q, wait_queue_t *wait, int state) | |||
92 | } | 92 | } |
93 | EXPORT_SYMBOL(prepare_to_wait_exclusive); | 93 | EXPORT_SYMBOL(prepare_to_wait_exclusive); |
94 | 94 | ||
95 | /* | 95 | /** |
96 | * finish_wait - clean up after waiting in a queue | 96 | * finish_wait - clean up after waiting in a queue |
97 | * @q: waitqueue waited on | 97 | * @q: waitqueue waited on |
98 | * @wait: wait descriptor | 98 | * @wait: wait descriptor |
@@ -127,11 +127,11 @@ void finish_wait(wait_queue_head_t *q, wait_queue_t *wait) | |||
127 | } | 127 | } |
128 | EXPORT_SYMBOL(finish_wait); | 128 | EXPORT_SYMBOL(finish_wait); |
129 | 129 | ||
130 | /* | 130 | /** |
131 | * abort_exclusive_wait - abort exclusive waiting in a queue | 131 | * abort_exclusive_wait - abort exclusive waiting in a queue |
132 | * @q: waitqueue waited on | 132 | * @q: waitqueue waited on |
133 | * @wait: wait descriptor | 133 | * @wait: wait descriptor |
134 | * @state: runstate of the waiter to be woken | 134 | * @mode: runstate of the waiter to be woken |
135 | * @key: key to identify a wait bit queue or %NULL | 135 | * @key: key to identify a wait bit queue or %NULL |
136 | * | 136 | * |
137 | * Sets current thread back to running state and removes | 137 | * Sets current thread back to running state and removes |
diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 94ca779aa9c2..14b8120d5232 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c | |||
@@ -43,7 +43,7 @@ static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved); | |||
43 | static DEFINE_PER_CPU(struct perf_event *, watchdog_ev); | 43 | static DEFINE_PER_CPU(struct perf_event *, watchdog_ev); |
44 | #endif | 44 | #endif |
45 | 45 | ||
46 | static int __initdata no_watchdog; | 46 | static int no_watchdog; |
47 | 47 | ||
48 | 48 | ||
49 | /* boot commands */ | 49 | /* boot commands */ |
diff --git a/kernel/workqueue.c b/kernel/workqueue.c index f77afd939229..90db1bd1a978 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c | |||
@@ -42,9 +42,6 @@ | |||
42 | #include <linux/lockdep.h> | 42 | #include <linux/lockdep.h> |
43 | #include <linux/idr.h> | 43 | #include <linux/idr.h> |
44 | 44 | ||
45 | #define CREATE_TRACE_POINTS | ||
46 | #include <trace/events/workqueue.h> | ||
47 | |||
48 | #include "workqueue_sched.h" | 45 | #include "workqueue_sched.h" |
49 | 46 | ||
50 | enum { | 47 | enum { |
@@ -257,6 +254,9 @@ EXPORT_SYMBOL_GPL(system_long_wq); | |||
257 | EXPORT_SYMBOL_GPL(system_nrt_wq); | 254 | EXPORT_SYMBOL_GPL(system_nrt_wq); |
258 | EXPORT_SYMBOL_GPL(system_unbound_wq); | 255 | EXPORT_SYMBOL_GPL(system_unbound_wq); |
259 | 256 | ||
257 | #define CREATE_TRACE_POINTS | ||
258 | #include <trace/events/workqueue.h> | ||
259 | |||
260 | #define for_each_busy_worker(worker, i, pos, gcwq) \ | 260 | #define for_each_busy_worker(worker, i, pos, gcwq) \ |
261 | for (i = 0; i < BUSY_WORKER_HASH_SIZE; i++) \ | 261 | for (i = 0; i < BUSY_WORKER_HASH_SIZE; i++) \ |
262 | hlist_for_each_entry(worker, pos, &gcwq->busy_hash[i], hentry) | 262 | hlist_for_each_entry(worker, pos, &gcwq->busy_hash[i], hentry) |
@@ -310,21 +310,6 @@ static inline int __next_wq_cpu(int cpu, const struct cpumask *mask, | |||
310 | (cpu) < WORK_CPU_NONE; \ | 310 | (cpu) < WORK_CPU_NONE; \ |
311 | (cpu) = __next_wq_cpu((cpu), cpu_possible_mask, (wq))) | 311 | (cpu) = __next_wq_cpu((cpu), cpu_possible_mask, (wq))) |
312 | 312 | ||
313 | #ifdef CONFIG_LOCKDEP | ||
314 | /** | ||
315 | * in_workqueue_context() - in context of specified workqueue? | ||
316 | * @wq: the workqueue of interest | ||
317 | * | ||
318 | * Checks lockdep state to see if the current task is executing from | ||
319 | * within a workqueue item. This function exists only if lockdep is | ||
320 | * enabled. | ||
321 | */ | ||
322 | int in_workqueue_context(struct workqueue_struct *wq) | ||
323 | { | ||
324 | return lock_is_held(&wq->lockdep_map); | ||
325 | } | ||
326 | #endif | ||
327 | |||
328 | #ifdef CONFIG_DEBUG_OBJECTS_WORK | 313 | #ifdef CONFIG_DEBUG_OBJECTS_WORK |
329 | 314 | ||
330 | static struct debug_obj_descr work_debug_descr; | 315 | static struct debug_obj_descr work_debug_descr; |
@@ -604,7 +589,9 @@ static bool keep_working(struct global_cwq *gcwq) | |||
604 | { | 589 | { |
605 | atomic_t *nr_running = get_gcwq_nr_running(gcwq->cpu); | 590 | atomic_t *nr_running = get_gcwq_nr_running(gcwq->cpu); |
606 | 591 | ||
607 | return !list_empty(&gcwq->worklist) && atomic_read(nr_running) <= 1; | 592 | return !list_empty(&gcwq->worklist) && |
593 | (atomic_read(nr_running) <= 1 || | ||
594 | gcwq->flags & GCWQ_HIGHPRI_PENDING); | ||
608 | } | 595 | } |
609 | 596 | ||
610 | /* Do we need a new worker? Called from manager. */ | 597 | /* Do we need a new worker? Called from manager. */ |
@@ -997,6 +984,7 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq, | |||
997 | 984 | ||
998 | /* gcwq determined, get cwq and queue */ | 985 | /* gcwq determined, get cwq and queue */ |
999 | cwq = get_cwq(gcwq->cpu, wq); | 986 | cwq = get_cwq(gcwq->cpu, wq); |
987 | trace_workqueue_queue_work(cpu, cwq, work); | ||
1000 | 988 | ||
1001 | BUG_ON(!list_empty(&work->entry)); | 989 | BUG_ON(!list_empty(&work->entry)); |
1002 | 990 | ||
@@ -1004,6 +992,7 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq, | |||
1004 | work_flags = work_color_to_flags(cwq->work_color); | 992 | work_flags = work_color_to_flags(cwq->work_color); |
1005 | 993 | ||
1006 | if (likely(cwq->nr_active < cwq->max_active)) { | 994 | if (likely(cwq->nr_active < cwq->max_active)) { |
995 | trace_workqueue_activate_work(work); | ||
1007 | cwq->nr_active++; | 996 | cwq->nr_active++; |
1008 | worklist = gcwq_determine_ins_pos(gcwq, cwq); | 997 | worklist = gcwq_determine_ins_pos(gcwq, cwq); |
1009 | } else { | 998 | } else { |
@@ -1679,6 +1668,7 @@ static void cwq_activate_first_delayed(struct cpu_workqueue_struct *cwq) | |||
1679 | struct work_struct, entry); | 1668 | struct work_struct, entry); |
1680 | struct list_head *pos = gcwq_determine_ins_pos(cwq->gcwq, cwq); | 1669 | struct list_head *pos = gcwq_determine_ins_pos(cwq->gcwq, cwq); |
1681 | 1670 | ||
1671 | trace_workqueue_activate_work(work); | ||
1682 | move_linked_works(work, pos, NULL); | 1672 | move_linked_works(work, pos, NULL); |
1683 | __clear_bit(WORK_STRUCT_DELAYED_BIT, work_data_bits(work)); | 1673 | __clear_bit(WORK_STRUCT_DELAYED_BIT, work_data_bits(work)); |
1684 | cwq->nr_active++; | 1674 | cwq->nr_active++; |
@@ -2074,7 +2064,7 @@ static void insert_wq_barrier(struct cpu_workqueue_struct *cwq, | |||
2074 | * checks and call back into the fixup functions where we | 2064 | * checks and call back into the fixup functions where we |
2075 | * might deadlock. | 2065 | * might deadlock. |
2076 | */ | 2066 | */ |
2077 | INIT_WORK_ON_STACK(&barr->work, wq_barrier_func); | 2067 | INIT_WORK_ONSTACK(&barr->work, wq_barrier_func); |
2078 | __set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(&barr->work)); | 2068 | __set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(&barr->work)); |
2079 | init_completion(&barr->done); | 2069 | init_completion(&barr->done); |
2080 | 2070 | ||
@@ -2326,27 +2316,17 @@ out_unlock: | |||
2326 | } | 2316 | } |
2327 | EXPORT_SYMBOL_GPL(flush_workqueue); | 2317 | EXPORT_SYMBOL_GPL(flush_workqueue); |
2328 | 2318 | ||
2329 | /** | 2319 | static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr, |
2330 | * flush_work - block until a work_struct's callback has terminated | 2320 | bool wait_executing) |
2331 | * @work: the work which is to be flushed | ||
2332 | * | ||
2333 | * Returns false if @work has already terminated. | ||
2334 | * | ||
2335 | * It is expected that, prior to calling flush_work(), the caller has | ||
2336 | * arranged for the work to not be requeued, otherwise it doesn't make | ||
2337 | * sense to use this function. | ||
2338 | */ | ||
2339 | int flush_work(struct work_struct *work) | ||
2340 | { | 2321 | { |
2341 | struct worker *worker = NULL; | 2322 | struct worker *worker = NULL; |
2342 | struct global_cwq *gcwq; | 2323 | struct global_cwq *gcwq; |
2343 | struct cpu_workqueue_struct *cwq; | 2324 | struct cpu_workqueue_struct *cwq; |
2344 | struct wq_barrier barr; | ||
2345 | 2325 | ||
2346 | might_sleep(); | 2326 | might_sleep(); |
2347 | gcwq = get_work_gcwq(work); | 2327 | gcwq = get_work_gcwq(work); |
2348 | if (!gcwq) | 2328 | if (!gcwq) |
2349 | return 0; | 2329 | return false; |
2350 | 2330 | ||
2351 | spin_lock_irq(&gcwq->lock); | 2331 | spin_lock_irq(&gcwq->lock); |
2352 | if (!list_empty(&work->entry)) { | 2332 | if (!list_empty(&work->entry)) { |
@@ -2359,28 +2339,127 @@ int flush_work(struct work_struct *work) | |||
2359 | cwq = get_work_cwq(work); | 2339 | cwq = get_work_cwq(work); |
2360 | if (unlikely(!cwq || gcwq != cwq->gcwq)) | 2340 | if (unlikely(!cwq || gcwq != cwq->gcwq)) |
2361 | goto already_gone; | 2341 | goto already_gone; |
2362 | } else { | 2342 | } else if (wait_executing) { |
2363 | worker = find_worker_executing_work(gcwq, work); | 2343 | worker = find_worker_executing_work(gcwq, work); |
2364 | if (!worker) | 2344 | if (!worker) |
2365 | goto already_gone; | 2345 | goto already_gone; |
2366 | cwq = worker->current_cwq; | 2346 | cwq = worker->current_cwq; |
2367 | } | 2347 | } else |
2348 | goto already_gone; | ||
2368 | 2349 | ||
2369 | insert_wq_barrier(cwq, &barr, work, worker); | 2350 | insert_wq_barrier(cwq, barr, work, worker); |
2370 | spin_unlock_irq(&gcwq->lock); | 2351 | spin_unlock_irq(&gcwq->lock); |
2371 | 2352 | ||
2372 | lock_map_acquire(&cwq->wq->lockdep_map); | 2353 | lock_map_acquire(&cwq->wq->lockdep_map); |
2373 | lock_map_release(&cwq->wq->lockdep_map); | 2354 | lock_map_release(&cwq->wq->lockdep_map); |
2374 | 2355 | return true; | |
2375 | wait_for_completion(&barr.done); | ||
2376 | destroy_work_on_stack(&barr.work); | ||
2377 | return 1; | ||
2378 | already_gone: | 2356 | already_gone: |
2379 | spin_unlock_irq(&gcwq->lock); | 2357 | spin_unlock_irq(&gcwq->lock); |
2380 | return 0; | 2358 | return false; |
2359 | } | ||
2360 | |||
2361 | /** | ||
2362 | * flush_work - wait for a work to finish executing the last queueing instance | ||
2363 | * @work: the work to flush | ||
2364 | * | ||
2365 | * Wait until @work has finished execution. This function considers | ||
2366 | * only the last queueing instance of @work. If @work has been | ||
2367 | * enqueued across different CPUs on a non-reentrant workqueue or on | ||
2368 | * multiple workqueues, @work might still be executing on return on | ||
2369 | * some of the CPUs from earlier queueing. | ||
2370 | * | ||
2371 | * If @work was queued only on a non-reentrant, ordered or unbound | ||
2372 | * workqueue, @work is guaranteed to be idle on return if it hasn't | ||
2373 | * been requeued since flush started. | ||
2374 | * | ||
2375 | * RETURNS: | ||
2376 | * %true if flush_work() waited for the work to finish execution, | ||
2377 | * %false if it was already idle. | ||
2378 | */ | ||
2379 | bool flush_work(struct work_struct *work) | ||
2380 | { | ||
2381 | struct wq_barrier barr; | ||
2382 | |||
2383 | if (start_flush_work(work, &barr, true)) { | ||
2384 | wait_for_completion(&barr.done); | ||
2385 | destroy_work_on_stack(&barr.work); | ||
2386 | return true; | ||
2387 | } else | ||
2388 | return false; | ||
2381 | } | 2389 | } |
2382 | EXPORT_SYMBOL_GPL(flush_work); | 2390 | EXPORT_SYMBOL_GPL(flush_work); |
2383 | 2391 | ||
2392 | static bool wait_on_cpu_work(struct global_cwq *gcwq, struct work_struct *work) | ||
2393 | { | ||
2394 | struct wq_barrier barr; | ||
2395 | struct worker *worker; | ||
2396 | |||
2397 | spin_lock_irq(&gcwq->lock); | ||
2398 | |||
2399 | worker = find_worker_executing_work(gcwq, work); | ||
2400 | if (unlikely(worker)) | ||
2401 | insert_wq_barrier(worker->current_cwq, &barr, work, worker); | ||
2402 | |||
2403 | spin_unlock_irq(&gcwq->lock); | ||
2404 | |||
2405 | if (unlikely(worker)) { | ||
2406 | wait_for_completion(&barr.done); | ||
2407 | destroy_work_on_stack(&barr.work); | ||
2408 | return true; | ||
2409 | } else | ||
2410 | return false; | ||
2411 | } | ||
2412 | |||
2413 | static bool wait_on_work(struct work_struct *work) | ||
2414 | { | ||
2415 | bool ret = false; | ||
2416 | int cpu; | ||
2417 | |||
2418 | might_sleep(); | ||
2419 | |||
2420 | lock_map_acquire(&work->lockdep_map); | ||
2421 | lock_map_release(&work->lockdep_map); | ||
2422 | |||
2423 | for_each_gcwq_cpu(cpu) | ||
2424 | ret |= wait_on_cpu_work(get_gcwq(cpu), work); | ||
2425 | return ret; | ||
2426 | } | ||
2427 | |||
2428 | /** | ||
2429 | * flush_work_sync - wait until a work has finished execution | ||
2430 | * @work: the work to flush | ||
2431 | * | ||
2432 | * Wait until @work has finished execution. On return, it's | ||
2433 | * guaranteed that all queueing instances of @work which happened | ||
2434 | * before this function is called are finished. In other words, if | ||
2435 | * @work hasn't been requeued since this function was called, @work is | ||
2436 | * guaranteed to be idle on return. | ||
2437 | * | ||
2438 | * RETURNS: | ||
2439 | * %true if flush_work_sync() waited for the work to finish execution, | ||
2440 | * %false if it was already idle. | ||
2441 | */ | ||
2442 | bool flush_work_sync(struct work_struct *work) | ||
2443 | { | ||
2444 | struct wq_barrier barr; | ||
2445 | bool pending, waited; | ||
2446 | |||
2447 | /* we'll wait for executions separately, queue barr only if pending */ | ||
2448 | pending = start_flush_work(work, &barr, false); | ||
2449 | |||
2450 | /* wait for executions to finish */ | ||
2451 | waited = wait_on_work(work); | ||
2452 | |||
2453 | /* wait for the pending one */ | ||
2454 | if (pending) { | ||
2455 | wait_for_completion(&barr.done); | ||
2456 | destroy_work_on_stack(&barr.work); | ||
2457 | } | ||
2458 | |||
2459 | return pending || waited; | ||
2460 | } | ||
2461 | EXPORT_SYMBOL_GPL(flush_work_sync); | ||
2462 | |||
2384 | /* | 2463 | /* |
2385 | * Upon a successful return (>= 0), the caller "owns" WORK_STRUCT_PENDING bit, | 2464 | * Upon a successful return (>= 0), the caller "owns" WORK_STRUCT_PENDING bit, |
2386 | * so this work can't be re-armed in any way. | 2465 | * so this work can't be re-armed in any way. |
@@ -2423,39 +2502,7 @@ static int try_to_grab_pending(struct work_struct *work) | |||
2423 | return ret; | 2502 | return ret; |
2424 | } | 2503 | } |
2425 | 2504 | ||
2426 | static void wait_on_cpu_work(struct global_cwq *gcwq, struct work_struct *work) | 2505 | static bool __cancel_work_timer(struct work_struct *work, |
2427 | { | ||
2428 | struct wq_barrier barr; | ||
2429 | struct worker *worker; | ||
2430 | |||
2431 | spin_lock_irq(&gcwq->lock); | ||
2432 | |||
2433 | worker = find_worker_executing_work(gcwq, work); | ||
2434 | if (unlikely(worker)) | ||
2435 | insert_wq_barrier(worker->current_cwq, &barr, work, worker); | ||
2436 | |||
2437 | spin_unlock_irq(&gcwq->lock); | ||
2438 | |||
2439 | if (unlikely(worker)) { | ||
2440 | wait_for_completion(&barr.done); | ||
2441 | destroy_work_on_stack(&barr.work); | ||
2442 | } | ||
2443 | } | ||
2444 | |||
2445 | static void wait_on_work(struct work_struct *work) | ||
2446 | { | ||
2447 | int cpu; | ||
2448 | |||
2449 | might_sleep(); | ||
2450 | |||
2451 | lock_map_acquire(&work->lockdep_map); | ||
2452 | lock_map_release(&work->lockdep_map); | ||
2453 | |||
2454 | for_each_gcwq_cpu(cpu) | ||
2455 | wait_on_cpu_work(get_gcwq(cpu), work); | ||
2456 | } | ||
2457 | |||
2458 | static int __cancel_work_timer(struct work_struct *work, | ||
2459 | struct timer_list* timer) | 2506 | struct timer_list* timer) |
2460 | { | 2507 | { |
2461 | int ret; | 2508 | int ret; |
@@ -2472,42 +2519,81 @@ static int __cancel_work_timer(struct work_struct *work, | |||
2472 | } | 2519 | } |
2473 | 2520 | ||
2474 | /** | 2521 | /** |
2475 | * cancel_work_sync - block until a work_struct's callback has terminated | 2522 | * cancel_work_sync - cancel a work and wait for it to finish |
2476 | * @work: the work which is to be flushed | 2523 | * @work: the work to cancel |
2477 | * | ||
2478 | * Returns true if @work was pending. | ||
2479 | * | 2524 | * |
2480 | * cancel_work_sync() will cancel the work if it is queued. If the work's | 2525 | * Cancel @work and wait for its execution to finish. This function |
2481 | * callback appears to be running, cancel_work_sync() will block until it | 2526 | * can be used even if the work re-queues itself or migrates to |
2482 | * has completed. | 2527 | * another workqueue. On return from this function, @work is |
2528 | * guaranteed to be not pending or executing on any CPU. | ||
2483 | * | 2529 | * |
2484 | * It is possible to use this function if the work re-queues itself. It can | 2530 | * cancel_work_sync(&delayed_work->work) must not be used for |
2485 | * cancel the work even if it migrates to another workqueue, however in that | 2531 | * delayed_work's. Use cancel_delayed_work_sync() instead. |
2486 | * case it only guarantees that work->func() has completed on the last queued | ||
2487 | * workqueue. | ||
2488 | * | 2532 | * |
2489 | * cancel_work_sync(&delayed_work->work) should be used only if ->timer is not | 2533 | * The caller must ensure that the workqueue on which @work was last |
2490 | * pending, otherwise it goes into a busy-wait loop until the timer expires. | ||
2491 | * | ||
2492 | * The caller must ensure that workqueue_struct on which this work was last | ||
2493 | * queued can't be destroyed before this function returns. | 2534 | * queued can't be destroyed before this function returns. |
2535 | * | ||
2536 | * RETURNS: | ||
2537 | * %true if @work was pending, %false otherwise. | ||
2494 | */ | 2538 | */ |
2495 | int cancel_work_sync(struct work_struct *work) | 2539 | bool cancel_work_sync(struct work_struct *work) |
2496 | { | 2540 | { |
2497 | return __cancel_work_timer(work, NULL); | 2541 | return __cancel_work_timer(work, NULL); |
2498 | } | 2542 | } |
2499 | EXPORT_SYMBOL_GPL(cancel_work_sync); | 2543 | EXPORT_SYMBOL_GPL(cancel_work_sync); |
2500 | 2544 | ||
2501 | /** | 2545 | /** |
2502 | * cancel_delayed_work_sync - reliably kill off a delayed work. | 2546 | * flush_delayed_work - wait for a dwork to finish executing the last queueing |
2503 | * @dwork: the delayed work struct | 2547 | * @dwork: the delayed work to flush |
2548 | * | ||
2549 | * Delayed timer is cancelled and the pending work is queued for | ||
2550 | * immediate execution. Like flush_work(), this function only | ||
2551 | * considers the last queueing instance of @dwork. | ||
2552 | * | ||
2553 | * RETURNS: | ||
2554 | * %true if flush_work() waited for the work to finish execution, | ||
2555 | * %false if it was already idle. | ||
2556 | */ | ||
2557 | bool flush_delayed_work(struct delayed_work *dwork) | ||
2558 | { | ||
2559 | if (del_timer_sync(&dwork->timer)) | ||
2560 | __queue_work(raw_smp_processor_id(), | ||
2561 | get_work_cwq(&dwork->work)->wq, &dwork->work); | ||
2562 | return flush_work(&dwork->work); | ||
2563 | } | ||
2564 | EXPORT_SYMBOL(flush_delayed_work); | ||
2565 | |||
2566 | /** | ||
2567 | * flush_delayed_work_sync - wait for a dwork to finish | ||
2568 | * @dwork: the delayed work to flush | ||
2504 | * | 2569 | * |
2505 | * Returns true if @dwork was pending. | 2570 | * Delayed timer is cancelled and the pending work is queued for |
2571 | * execution immediately. Other than timer handling, its behavior | ||
2572 | * is identical to flush_work_sync(). | ||
2506 | * | 2573 | * |
2507 | * It is possible to use this function if @dwork rearms itself via queue_work() | 2574 | * RETURNS: |
2508 | * or queue_delayed_work(). See also the comment for cancel_work_sync(). | 2575 | * %true if flush_work_sync() waited for the work to finish execution, |
2576 | * %false if it was already idle. | ||
2509 | */ | 2577 | */ |
2510 | int cancel_delayed_work_sync(struct delayed_work *dwork) | 2578 | bool flush_delayed_work_sync(struct delayed_work *dwork) |
2579 | { | ||
2580 | if (del_timer_sync(&dwork->timer)) | ||
2581 | __queue_work(raw_smp_processor_id(), | ||
2582 | get_work_cwq(&dwork->work)->wq, &dwork->work); | ||
2583 | return flush_work_sync(&dwork->work); | ||
2584 | } | ||
2585 | EXPORT_SYMBOL(flush_delayed_work_sync); | ||
2586 | |||
2587 | /** | ||
2588 | * cancel_delayed_work_sync - cancel a delayed work and wait for it to finish | ||
2589 | * @dwork: the delayed work cancel | ||
2590 | * | ||
2591 | * This is cancel_work_sync() for delayed works. | ||
2592 | * | ||
2593 | * RETURNS: | ||
2594 | * %true if @dwork was pending, %false otherwise. | ||
2595 | */ | ||
2596 | bool cancel_delayed_work_sync(struct delayed_work *dwork) | ||
2511 | { | 2597 | { |
2512 | return __cancel_work_timer(&dwork->work, &dwork->timer); | 2598 | return __cancel_work_timer(&dwork->work, &dwork->timer); |
2513 | } | 2599 | } |
@@ -2559,23 +2645,6 @@ int schedule_delayed_work(struct delayed_work *dwork, | |||
2559 | EXPORT_SYMBOL(schedule_delayed_work); | 2645 | EXPORT_SYMBOL(schedule_delayed_work); |
2560 | 2646 | ||
2561 | /** | 2647 | /** |
2562 | * flush_delayed_work - block until a dwork_struct's callback has terminated | ||
2563 | * @dwork: the delayed work which is to be flushed | ||
2564 | * | ||
2565 | * Any timeout is cancelled, and any pending work is run immediately. | ||
2566 | */ | ||
2567 | void flush_delayed_work(struct delayed_work *dwork) | ||
2568 | { | ||
2569 | if (del_timer_sync(&dwork->timer)) { | ||
2570 | __queue_work(get_cpu(), get_work_cwq(&dwork->work)->wq, | ||
2571 | &dwork->work); | ||
2572 | put_cpu(); | ||
2573 | } | ||
2574 | flush_work(&dwork->work); | ||
2575 | } | ||
2576 | EXPORT_SYMBOL(flush_delayed_work); | ||
2577 | |||
2578 | /** | ||
2579 | * schedule_delayed_work_on - queue work in global workqueue on CPU after delay | 2648 | * schedule_delayed_work_on - queue work in global workqueue on CPU after delay |
2580 | * @cpu: cpu to use | 2649 | * @cpu: cpu to use |
2581 | * @dwork: job to be done | 2650 | * @dwork: job to be done |
@@ -2592,13 +2661,15 @@ int schedule_delayed_work_on(int cpu, | |||
2592 | EXPORT_SYMBOL(schedule_delayed_work_on); | 2661 | EXPORT_SYMBOL(schedule_delayed_work_on); |
2593 | 2662 | ||
2594 | /** | 2663 | /** |
2595 | * schedule_on_each_cpu - call a function on each online CPU from keventd | 2664 | * schedule_on_each_cpu - execute a function synchronously on each online CPU |
2596 | * @func: the function to call | 2665 | * @func: the function to call |
2597 | * | 2666 | * |
2598 | * Returns zero on success. | 2667 | * schedule_on_each_cpu() executes @func on each online CPU using the |
2599 | * Returns -ve errno on failure. | 2668 | * system workqueue and blocks until all CPUs have completed. |
2600 | * | ||
2601 | * schedule_on_each_cpu() is very slow. | 2669 | * schedule_on_each_cpu() is very slow. |
2670 | * | ||
2671 | * RETURNS: | ||
2672 | * 0 on success, -errno on failure. | ||
2602 | */ | 2673 | */ |
2603 | int schedule_on_each_cpu(work_func_t func) | 2674 | int schedule_on_each_cpu(work_func_t func) |
2604 | { | 2675 | { |
@@ -2720,7 +2791,9 @@ static int alloc_cwqs(struct workqueue_struct *wq) | |||
2720 | } | 2791 | } |
2721 | } | 2792 | } |
2722 | 2793 | ||
2723 | /* just in case, make sure it's actually aligned */ | 2794 | /* just in case, make sure it's actually aligned |
2795 | * - this is affected by PERCPU() alignment in vmlinux.lds.S | ||
2796 | */ | ||
2724 | BUG_ON(!IS_ALIGNED(wq->cpu_wq.v, align)); | 2797 | BUG_ON(!IS_ALIGNED(wq->cpu_wq.v, align)); |
2725 | return wq->cpu_wq.v ? 0 : -ENOMEM; | 2798 | return wq->cpu_wq.v ? 0 : -ENOMEM; |
2726 | } | 2799 | } |
@@ -2764,6 +2837,13 @@ struct workqueue_struct *__alloc_workqueue_key(const char *name, | |||
2764 | unsigned int cpu; | 2837 | unsigned int cpu; |
2765 | 2838 | ||
2766 | /* | 2839 | /* |
2840 | * Workqueues which may be used during memory reclaim should | ||
2841 | * have a rescuer to guarantee forward progress. | ||
2842 | */ | ||
2843 | if (flags & WQ_MEM_RECLAIM) | ||
2844 | flags |= WQ_RESCUER; | ||
2845 | |||
2846 | /* | ||
2767 | * Unbound workqueues aren't concurrency managed and should be | 2847 | * Unbound workqueues aren't concurrency managed and should be |
2768 | * dispatched to workers immediately. | 2848 | * dispatched to workers immediately. |
2769 | */ | 2849 | */ |