diff options
author | Ingo Molnar <mingo@elte.hu> | 2011-01-04 03:43:42 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2011-01-04 03:43:42 -0500 |
commit | bc030d6cb9532877c1c5a3f5e7123344fa24a285 (patch) | |
tree | d223d410b868b80d4c0deec192d354a5d06b201a /kernel | |
parent | d3bd058826aa8b79590cca6c8e6d1557bf576ada (diff) | |
parent | 387c31c7e5c9805b0aef8833d1731a5fe7bdea14 (diff) |
Merge commit 'v2.6.37-rc8' into x86/apic
Conflicts:
arch/x86/include/asm/io_apic.h
Merge reason: move to a fresh -rc, resolve the conflict.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel')
75 files changed, 1823 insertions, 1071 deletions
diff --git a/kernel/audit.c b/kernel/audit.c index d96045789b54..77770a034d59 100644 --- a/kernel/audit.c +++ b/kernel/audit.c | |||
@@ -467,23 +467,16 @@ static int audit_prepare_user_tty(pid_t pid, uid_t loginuid, u32 sessionid) | |||
467 | struct task_struct *tsk; | 467 | struct task_struct *tsk; |
468 | int err; | 468 | int err; |
469 | 469 | ||
470 | read_lock(&tasklist_lock); | 470 | rcu_read_lock(); |
471 | tsk = find_task_by_vpid(pid); | 471 | tsk = find_task_by_vpid(pid); |
472 | err = -ESRCH; | 472 | if (!tsk) { |
473 | if (!tsk) | 473 | rcu_read_unlock(); |
474 | goto out; | 474 | return -ESRCH; |
475 | err = 0; | 475 | } |
476 | 476 | get_task_struct(tsk); | |
477 | spin_lock_irq(&tsk->sighand->siglock); | 477 | rcu_read_unlock(); |
478 | if (!tsk->signal->audit_tty) | 478 | err = tty_audit_push_task(tsk, loginuid, sessionid); |
479 | err = -EPERM; | 479 | put_task_struct(tsk); |
480 | spin_unlock_irq(&tsk->sighand->siglock); | ||
481 | if (err) | ||
482 | goto out; | ||
483 | |||
484 | tty_audit_push_task(tsk, loginuid, sessionid); | ||
485 | out: | ||
486 | read_unlock(&tasklist_lock); | ||
487 | return err; | 480 | return err; |
488 | } | 481 | } |
489 | 482 | ||
@@ -506,7 +499,7 @@ int audit_send_list(void *_dest) | |||
506 | } | 499 | } |
507 | 500 | ||
508 | struct sk_buff *audit_make_reply(int pid, int seq, int type, int done, | 501 | struct sk_buff *audit_make_reply(int pid, int seq, int type, int done, |
509 | int multi, void *payload, int size) | 502 | int multi, const void *payload, int size) |
510 | { | 503 | { |
511 | struct sk_buff *skb; | 504 | struct sk_buff *skb; |
512 | struct nlmsghdr *nlh; | 505 | struct nlmsghdr *nlh; |
@@ -555,8 +548,8 @@ static int audit_send_reply_thread(void *arg) | |||
555 | * Allocates an skb, builds the netlink message, and sends it to the pid. | 548 | * Allocates an skb, builds the netlink message, and sends it to the pid. |
556 | * No failure notifications. | 549 | * No failure notifications. |
557 | */ | 550 | */ |
558 | void audit_send_reply(int pid, int seq, int type, int done, int multi, | 551 | static void audit_send_reply(int pid, int seq, int type, int done, int multi, |
559 | void *payload, int size) | 552 | const void *payload, int size) |
560 | { | 553 | { |
561 | struct sk_buff *skb; | 554 | struct sk_buff *skb; |
562 | struct task_struct *tsk; | 555 | struct task_struct *tsk; |
@@ -880,40 +873,40 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) | |||
880 | case AUDIT_TTY_GET: { | 873 | case AUDIT_TTY_GET: { |
881 | struct audit_tty_status s; | 874 | struct audit_tty_status s; |
882 | struct task_struct *tsk; | 875 | struct task_struct *tsk; |
876 | unsigned long flags; | ||
883 | 877 | ||
884 | read_lock(&tasklist_lock); | 878 | rcu_read_lock(); |
885 | tsk = find_task_by_vpid(pid); | 879 | tsk = find_task_by_vpid(pid); |
886 | if (!tsk) | 880 | if (tsk && lock_task_sighand(tsk, &flags)) { |
887 | err = -ESRCH; | ||
888 | else { | ||
889 | spin_lock_irq(&tsk->sighand->siglock); | ||
890 | s.enabled = tsk->signal->audit_tty != 0; | 881 | s.enabled = tsk->signal->audit_tty != 0; |
891 | spin_unlock_irq(&tsk->sighand->siglock); | 882 | unlock_task_sighand(tsk, &flags); |
892 | } | 883 | } else |
893 | read_unlock(&tasklist_lock); | 884 | err = -ESRCH; |
894 | audit_send_reply(NETLINK_CB(skb).pid, seq, AUDIT_TTY_GET, 0, 0, | 885 | rcu_read_unlock(); |
895 | &s, sizeof(s)); | 886 | |
887 | if (!err) | ||
888 | audit_send_reply(NETLINK_CB(skb).pid, seq, | ||
889 | AUDIT_TTY_GET, 0, 0, &s, sizeof(s)); | ||
896 | break; | 890 | break; |
897 | } | 891 | } |
898 | case AUDIT_TTY_SET: { | 892 | case AUDIT_TTY_SET: { |
899 | struct audit_tty_status *s; | 893 | struct audit_tty_status *s; |
900 | struct task_struct *tsk; | 894 | struct task_struct *tsk; |
895 | unsigned long flags; | ||
901 | 896 | ||
902 | if (nlh->nlmsg_len < sizeof(struct audit_tty_status)) | 897 | if (nlh->nlmsg_len < sizeof(struct audit_tty_status)) |
903 | return -EINVAL; | 898 | return -EINVAL; |
904 | s = data; | 899 | s = data; |
905 | if (s->enabled != 0 && s->enabled != 1) | 900 | if (s->enabled != 0 && s->enabled != 1) |
906 | return -EINVAL; | 901 | return -EINVAL; |
907 | read_lock(&tasklist_lock); | 902 | rcu_read_lock(); |
908 | tsk = find_task_by_vpid(pid); | 903 | tsk = find_task_by_vpid(pid); |
909 | if (!tsk) | 904 | if (tsk && lock_task_sighand(tsk, &flags)) { |
910 | err = -ESRCH; | ||
911 | else { | ||
912 | spin_lock_irq(&tsk->sighand->siglock); | ||
913 | tsk->signal->audit_tty = s->enabled != 0; | 905 | tsk->signal->audit_tty = s->enabled != 0; |
914 | spin_unlock_irq(&tsk->sighand->siglock); | 906 | unlock_task_sighand(tsk, &flags); |
915 | } | 907 | } else |
916 | read_unlock(&tasklist_lock); | 908 | err = -ESRCH; |
909 | rcu_read_unlock(); | ||
917 | break; | 910 | break; |
918 | } | 911 | } |
919 | default: | 912 | default: |
diff --git a/kernel/audit.h b/kernel/audit.h index f7206db4e13d..91e7071c4d2c 100644 --- a/kernel/audit.h +++ b/kernel/audit.h | |||
@@ -84,10 +84,7 @@ extern int audit_compare_dname_path(const char *dname, const char *path, | |||
84 | int *dirlen); | 84 | int *dirlen); |
85 | extern struct sk_buff * audit_make_reply(int pid, int seq, int type, | 85 | extern struct sk_buff * audit_make_reply(int pid, int seq, int type, |
86 | int done, int multi, | 86 | int done, int multi, |
87 | void *payload, int size); | 87 | const void *payload, int size); |
88 | extern void audit_send_reply(int pid, int seq, int type, | ||
89 | int done, int multi, | ||
90 | void *payload, int size); | ||
91 | extern void audit_panic(const char *message); | 88 | extern void audit_panic(const char *message); |
92 | 89 | ||
93 | struct audit_netlink_list { | 90 | struct audit_netlink_list { |
diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c index 7f18d3a4527e..37b2bea170c8 100644 --- a/kernel/audit_tree.c +++ b/kernel/audit_tree.c | |||
@@ -223,7 +223,7 @@ static void untag_chunk(struct node *p) | |||
223 | { | 223 | { |
224 | struct audit_chunk *chunk = find_chunk(p); | 224 | struct audit_chunk *chunk = find_chunk(p); |
225 | struct fsnotify_mark *entry = &chunk->mark; | 225 | struct fsnotify_mark *entry = &chunk->mark; |
226 | struct audit_chunk *new; | 226 | struct audit_chunk *new = NULL; |
227 | struct audit_tree *owner; | 227 | struct audit_tree *owner; |
228 | int size = chunk->count - 1; | 228 | int size = chunk->count - 1; |
229 | int i, j; | 229 | int i, j; |
@@ -232,9 +232,14 @@ static void untag_chunk(struct node *p) | |||
232 | 232 | ||
233 | spin_unlock(&hash_lock); | 233 | spin_unlock(&hash_lock); |
234 | 234 | ||
235 | if (size) | ||
236 | new = alloc_chunk(size); | ||
237 | |||
235 | spin_lock(&entry->lock); | 238 | spin_lock(&entry->lock); |
236 | if (chunk->dead || !entry->i.inode) { | 239 | if (chunk->dead || !entry->i.inode) { |
237 | spin_unlock(&entry->lock); | 240 | spin_unlock(&entry->lock); |
241 | if (new) | ||
242 | free_chunk(new); | ||
238 | goto out; | 243 | goto out; |
239 | } | 244 | } |
240 | 245 | ||
@@ -255,9 +260,9 @@ static void untag_chunk(struct node *p) | |||
255 | goto out; | 260 | goto out; |
256 | } | 261 | } |
257 | 262 | ||
258 | new = alloc_chunk(size); | ||
259 | if (!new) | 263 | if (!new) |
260 | goto Fallback; | 264 | goto Fallback; |
265 | |||
261 | fsnotify_duplicate_mark(&new->mark, entry); | 266 | fsnotify_duplicate_mark(&new->mark, entry); |
262 | if (fsnotify_add_mark(&new->mark, new->mark.group, new->mark.i.inode, NULL, 1)) { | 267 | if (fsnotify_add_mark(&new->mark, new->mark.group, new->mark.i.inode, NULL, 1)) { |
263 | free_chunk(new); | 268 | free_chunk(new); |
diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c index f0c9b2e7542d..d2e3c7866460 100644 --- a/kernel/audit_watch.c +++ b/kernel/audit_watch.c | |||
@@ -60,7 +60,7 @@ struct audit_parent { | |||
60 | }; | 60 | }; |
61 | 61 | ||
62 | /* fsnotify handle. */ | 62 | /* fsnotify handle. */ |
63 | struct fsnotify_group *audit_watch_group; | 63 | static struct fsnotify_group *audit_watch_group; |
64 | 64 | ||
65 | /* fsnotify events we care about. */ | 65 | /* fsnotify events we care about. */ |
66 | #define AUDIT_FS_WATCH (FS_MOVE | FS_CREATE | FS_DELETE | FS_DELETE_SELF |\ | 66 | #define AUDIT_FS_WATCH (FS_MOVE | FS_CREATE | FS_DELETE | FS_DELETE_SELF |\ |
@@ -123,7 +123,7 @@ void audit_put_watch(struct audit_watch *watch) | |||
123 | } | 123 | } |
124 | } | 124 | } |
125 | 125 | ||
126 | void audit_remove_watch(struct audit_watch *watch) | 126 | static void audit_remove_watch(struct audit_watch *watch) |
127 | { | 127 | { |
128 | list_del(&watch->wlist); | 128 | list_del(&watch->wlist); |
129 | audit_put_parent(watch->parent); | 129 | audit_put_parent(watch->parent); |
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c index eb7675499fb5..add2819af71b 100644 --- a/kernel/auditfilter.c +++ b/kernel/auditfilter.c | |||
@@ -1252,6 +1252,18 @@ static int audit_filter_user_rules(struct netlink_skb_parms *cb, | |||
1252 | case AUDIT_LOGINUID: | 1252 | case AUDIT_LOGINUID: |
1253 | result = audit_comparator(cb->loginuid, f->op, f->val); | 1253 | result = audit_comparator(cb->loginuid, f->op, f->val); |
1254 | break; | 1254 | break; |
1255 | case AUDIT_SUBJ_USER: | ||
1256 | case AUDIT_SUBJ_ROLE: | ||
1257 | case AUDIT_SUBJ_TYPE: | ||
1258 | case AUDIT_SUBJ_SEN: | ||
1259 | case AUDIT_SUBJ_CLR: | ||
1260 | if (f->lsm_rule) | ||
1261 | result = security_audit_rule_match(cb->sid, | ||
1262 | f->type, | ||
1263 | f->op, | ||
1264 | f->lsm_rule, | ||
1265 | NULL); | ||
1266 | break; | ||
1255 | } | 1267 | } |
1256 | 1268 | ||
1257 | if (!result) | 1269 | if (!result) |
diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 1b31c130d034..f49a0318c2ed 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c | |||
@@ -241,6 +241,10 @@ struct audit_context { | |||
241 | pid_t pid; | 241 | pid_t pid; |
242 | struct audit_cap_data cap; | 242 | struct audit_cap_data cap; |
243 | } capset; | 243 | } capset; |
244 | struct { | ||
245 | int fd; | ||
246 | int flags; | ||
247 | } mmap; | ||
244 | }; | 248 | }; |
245 | int fds[2]; | 249 | int fds[2]; |
246 | 250 | ||
@@ -1305,6 +1309,10 @@ static void show_special(struct audit_context *context, int *call_panic) | |||
1305 | audit_log_cap(ab, "cap_pp", &context->capset.cap.permitted); | 1309 | audit_log_cap(ab, "cap_pp", &context->capset.cap.permitted); |
1306 | audit_log_cap(ab, "cap_pe", &context->capset.cap.effective); | 1310 | audit_log_cap(ab, "cap_pe", &context->capset.cap.effective); |
1307 | break; } | 1311 | break; } |
1312 | case AUDIT_MMAP: { | ||
1313 | audit_log_format(ab, "fd=%d flags=0x%x", context->mmap.fd, | ||
1314 | context->mmap.flags); | ||
1315 | break; } | ||
1308 | } | 1316 | } |
1309 | audit_log_end(ab); | 1317 | audit_log_end(ab); |
1310 | } | 1318 | } |
@@ -2476,6 +2484,14 @@ void __audit_log_capset(pid_t pid, | |||
2476 | context->type = AUDIT_CAPSET; | 2484 | context->type = AUDIT_CAPSET; |
2477 | } | 2485 | } |
2478 | 2486 | ||
2487 | void __audit_mmap_fd(int fd, int flags) | ||
2488 | { | ||
2489 | struct audit_context *context = current->audit_context; | ||
2490 | context->mmap.fd = fd; | ||
2491 | context->mmap.flags = flags; | ||
2492 | context->type = AUDIT_MMAP; | ||
2493 | } | ||
2494 | |||
2479 | /** | 2495 | /** |
2480 | * audit_core_dumps - record information about processes that end abnormally | 2496 | * audit_core_dumps - record information about processes that end abnormally |
2481 | * @signr: signal value | 2497 | * @signr: signal value |
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 291ba3d04bea..66a416b42c18 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
@@ -52,7 +52,6 @@ | |||
52 | #include <linux/cgroupstats.h> | 52 | #include <linux/cgroupstats.h> |
53 | #include <linux/hash.h> | 53 | #include <linux/hash.h> |
54 | #include <linux/namei.h> | 54 | #include <linux/namei.h> |
55 | #include <linux/smp_lock.h> | ||
56 | #include <linux/pid_namespace.h> | 55 | #include <linux/pid_namespace.h> |
57 | #include <linux/idr.h> | 56 | #include <linux/idr.h> |
58 | #include <linux/vmalloc.h> /* TODO: replace with more sophisticated array */ | 57 | #include <linux/vmalloc.h> /* TODO: replace with more sophisticated array */ |
@@ -244,6 +243,11 @@ static int notify_on_release(const struct cgroup *cgrp) | |||
244 | return test_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags); | 243 | return test_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags); |
245 | } | 244 | } |
246 | 245 | ||
246 | static int clone_children(const struct cgroup *cgrp) | ||
247 | { | ||
248 | return test_bit(CGRP_CLONE_CHILDREN, &cgrp->flags); | ||
249 | } | ||
250 | |||
247 | /* | 251 | /* |
248 | * for_each_subsys() allows you to iterate on each subsystem attached to | 252 | * for_each_subsys() allows you to iterate on each subsystem attached to |
249 | * an active hierarchy | 253 | * an active hierarchy |
@@ -778,6 +782,7 @@ static struct inode *cgroup_new_inode(mode_t mode, struct super_block *sb) | |||
778 | struct inode *inode = new_inode(sb); | 782 | struct inode *inode = new_inode(sb); |
779 | 783 | ||
780 | if (inode) { | 784 | if (inode) { |
785 | inode->i_ino = get_next_ino(); | ||
781 | inode->i_mode = mode; | 786 | inode->i_mode = mode; |
782 | inode->i_uid = current_fsuid(); | 787 | inode->i_uid = current_fsuid(); |
783 | inode->i_gid = current_fsgid(); | 788 | inode->i_gid = current_fsgid(); |
@@ -1040,6 +1045,8 @@ static int cgroup_show_options(struct seq_file *seq, struct vfsmount *vfs) | |||
1040 | seq_puts(seq, ",noprefix"); | 1045 | seq_puts(seq, ",noprefix"); |
1041 | if (strlen(root->release_agent_path)) | 1046 | if (strlen(root->release_agent_path)) |
1042 | seq_printf(seq, ",release_agent=%s", root->release_agent_path); | 1047 | seq_printf(seq, ",release_agent=%s", root->release_agent_path); |
1048 | if (clone_children(&root->top_cgroup)) | ||
1049 | seq_puts(seq, ",clone_children"); | ||
1043 | if (strlen(root->name)) | 1050 | if (strlen(root->name)) |
1044 | seq_printf(seq, ",name=%s", root->name); | 1051 | seq_printf(seq, ",name=%s", root->name); |
1045 | mutex_unlock(&cgroup_mutex); | 1052 | mutex_unlock(&cgroup_mutex); |
@@ -1050,6 +1057,7 @@ struct cgroup_sb_opts { | |||
1050 | unsigned long subsys_bits; | 1057 | unsigned long subsys_bits; |
1051 | unsigned long flags; | 1058 | unsigned long flags; |
1052 | char *release_agent; | 1059 | char *release_agent; |
1060 | bool clone_children; | ||
1053 | char *name; | 1061 | char *name; |
1054 | /* User explicitly requested empty subsystem */ | 1062 | /* User explicitly requested empty subsystem */ |
1055 | bool none; | 1063 | bool none; |
@@ -1066,7 +1074,8 @@ struct cgroup_sb_opts { | |||
1066 | */ | 1074 | */ |
1067 | static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts) | 1075 | static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts) |
1068 | { | 1076 | { |
1069 | char *token, *o = data ?: "all"; | 1077 | char *token, *o = data; |
1078 | bool all_ss = false, one_ss = false; | ||
1070 | unsigned long mask = (unsigned long)-1; | 1079 | unsigned long mask = (unsigned long)-1; |
1071 | int i; | 1080 | int i; |
1072 | bool module_pin_failed = false; | 1081 | bool module_pin_failed = false; |
@@ -1082,22 +1091,27 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts) | |||
1082 | while ((token = strsep(&o, ",")) != NULL) { | 1091 | while ((token = strsep(&o, ",")) != NULL) { |
1083 | if (!*token) | 1092 | if (!*token) |
1084 | return -EINVAL; | 1093 | return -EINVAL; |
1085 | if (!strcmp(token, "all")) { | 1094 | if (!strcmp(token, "none")) { |
1086 | /* Add all non-disabled subsystems */ | ||
1087 | opts->subsys_bits = 0; | ||
1088 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { | ||
1089 | struct cgroup_subsys *ss = subsys[i]; | ||
1090 | if (ss == NULL) | ||
1091 | continue; | ||
1092 | if (!ss->disabled) | ||
1093 | opts->subsys_bits |= 1ul << i; | ||
1094 | } | ||
1095 | } else if (!strcmp(token, "none")) { | ||
1096 | /* Explicitly have no subsystems */ | 1095 | /* Explicitly have no subsystems */ |
1097 | opts->none = true; | 1096 | opts->none = true; |
1098 | } else if (!strcmp(token, "noprefix")) { | 1097 | continue; |
1098 | } | ||
1099 | if (!strcmp(token, "all")) { | ||
1100 | /* Mutually exclusive option 'all' + subsystem name */ | ||
1101 | if (one_ss) | ||
1102 | return -EINVAL; | ||
1103 | all_ss = true; | ||
1104 | continue; | ||
1105 | } | ||
1106 | if (!strcmp(token, "noprefix")) { | ||
1099 | set_bit(ROOT_NOPREFIX, &opts->flags); | 1107 | set_bit(ROOT_NOPREFIX, &opts->flags); |
1100 | } else if (!strncmp(token, "release_agent=", 14)) { | 1108 | continue; |
1109 | } | ||
1110 | if (!strcmp(token, "clone_children")) { | ||
1111 | opts->clone_children = true; | ||
1112 | continue; | ||
1113 | } | ||
1114 | if (!strncmp(token, "release_agent=", 14)) { | ||
1101 | /* Specifying two release agents is forbidden */ | 1115 | /* Specifying two release agents is forbidden */ |
1102 | if (opts->release_agent) | 1116 | if (opts->release_agent) |
1103 | return -EINVAL; | 1117 | return -EINVAL; |
@@ -1105,7 +1119,9 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts) | |||
1105 | kstrndup(token + 14, PATH_MAX - 1, GFP_KERNEL); | 1119 | kstrndup(token + 14, PATH_MAX - 1, GFP_KERNEL); |
1106 | if (!opts->release_agent) | 1120 | if (!opts->release_agent) |
1107 | return -ENOMEM; | 1121 | return -ENOMEM; |
1108 | } else if (!strncmp(token, "name=", 5)) { | 1122 | continue; |
1123 | } | ||
1124 | if (!strncmp(token, "name=", 5)) { | ||
1109 | const char *name = token + 5; | 1125 | const char *name = token + 5; |
1110 | /* Can't specify an empty name */ | 1126 | /* Can't specify an empty name */ |
1111 | if (!strlen(name)) | 1127 | if (!strlen(name)) |
@@ -1127,20 +1143,44 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts) | |||
1127 | GFP_KERNEL); | 1143 | GFP_KERNEL); |
1128 | if (!opts->name) | 1144 | if (!opts->name) |
1129 | return -ENOMEM; | 1145 | return -ENOMEM; |
1130 | } else { | 1146 | |
1131 | struct cgroup_subsys *ss; | 1147 | continue; |
1132 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { | 1148 | } |
1133 | ss = subsys[i]; | 1149 | |
1134 | if (ss == NULL) | 1150 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { |
1135 | continue; | 1151 | struct cgroup_subsys *ss = subsys[i]; |
1136 | if (!strcmp(token, ss->name)) { | 1152 | if (ss == NULL) |
1137 | if (!ss->disabled) | 1153 | continue; |
1138 | set_bit(i, &opts->subsys_bits); | 1154 | if (strcmp(token, ss->name)) |
1139 | break; | 1155 | continue; |
1140 | } | 1156 | if (ss->disabled) |
1141 | } | 1157 | continue; |
1142 | if (i == CGROUP_SUBSYS_COUNT) | 1158 | |
1143 | return -ENOENT; | 1159 | /* Mutually exclusive option 'all' + subsystem name */ |
1160 | if (all_ss) | ||
1161 | return -EINVAL; | ||
1162 | set_bit(i, &opts->subsys_bits); | ||
1163 | one_ss = true; | ||
1164 | |||
1165 | break; | ||
1166 | } | ||
1167 | if (i == CGROUP_SUBSYS_COUNT) | ||
1168 | return -ENOENT; | ||
1169 | } | ||
1170 | |||
1171 | /* | ||
1172 | * If the 'all' option was specified select all the subsystems, | ||
1173 | * otherwise 'all, 'none' and a subsystem name options were not | ||
1174 | * specified, let's default to 'all' | ||
1175 | */ | ||
1176 | if (all_ss || (!all_ss && !one_ss && !opts->none)) { | ||
1177 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { | ||
1178 | struct cgroup_subsys *ss = subsys[i]; | ||
1179 | if (ss == NULL) | ||
1180 | continue; | ||
1181 | if (ss->disabled) | ||
1182 | continue; | ||
1183 | set_bit(i, &opts->subsys_bits); | ||
1144 | } | 1184 | } |
1145 | } | 1185 | } |
1146 | 1186 | ||
@@ -1222,7 +1262,6 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data) | |||
1222 | struct cgroup *cgrp = &root->top_cgroup; | 1262 | struct cgroup *cgrp = &root->top_cgroup; |
1223 | struct cgroup_sb_opts opts; | 1263 | struct cgroup_sb_opts opts; |
1224 | 1264 | ||
1225 | lock_kernel(); | ||
1226 | mutex_lock(&cgrp->dentry->d_inode->i_mutex); | 1265 | mutex_lock(&cgrp->dentry->d_inode->i_mutex); |
1227 | mutex_lock(&cgroup_mutex); | 1266 | mutex_lock(&cgroup_mutex); |
1228 | 1267 | ||
@@ -1255,7 +1294,6 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data) | |||
1255 | kfree(opts.name); | 1294 | kfree(opts.name); |
1256 | mutex_unlock(&cgroup_mutex); | 1295 | mutex_unlock(&cgroup_mutex); |
1257 | mutex_unlock(&cgrp->dentry->d_inode->i_mutex); | 1296 | mutex_unlock(&cgrp->dentry->d_inode->i_mutex); |
1258 | unlock_kernel(); | ||
1259 | return ret; | 1297 | return ret; |
1260 | } | 1298 | } |
1261 | 1299 | ||
@@ -1357,6 +1395,8 @@ static struct cgroupfs_root *cgroup_root_from_opts(struct cgroup_sb_opts *opts) | |||
1357 | strcpy(root->release_agent_path, opts->release_agent); | 1395 | strcpy(root->release_agent_path, opts->release_agent); |
1358 | if (opts->name) | 1396 | if (opts->name) |
1359 | strcpy(root->name, opts->name); | 1397 | strcpy(root->name, opts->name); |
1398 | if (opts->clone_children) | ||
1399 | set_bit(CGRP_CLONE_CHILDREN, &root->top_cgroup.flags); | ||
1360 | return root; | 1400 | return root; |
1361 | } | 1401 | } |
1362 | 1402 | ||
@@ -1420,9 +1460,9 @@ static int cgroup_get_rootdir(struct super_block *sb) | |||
1420 | return 0; | 1460 | return 0; |
1421 | } | 1461 | } |
1422 | 1462 | ||
1423 | static int cgroup_get_sb(struct file_system_type *fs_type, | 1463 | static struct dentry *cgroup_mount(struct file_system_type *fs_type, |
1424 | int flags, const char *unused_dev_name, | 1464 | int flags, const char *unused_dev_name, |
1425 | void *data, struct vfsmount *mnt) | 1465 | void *data) |
1426 | { | 1466 | { |
1427 | struct cgroup_sb_opts opts; | 1467 | struct cgroup_sb_opts opts; |
1428 | struct cgroupfs_root *root; | 1468 | struct cgroupfs_root *root; |
@@ -1556,10 +1596,9 @@ static int cgroup_get_sb(struct file_system_type *fs_type, | |||
1556 | drop_parsed_module_refcounts(opts.subsys_bits); | 1596 | drop_parsed_module_refcounts(opts.subsys_bits); |
1557 | } | 1597 | } |
1558 | 1598 | ||
1559 | simple_set_mnt(mnt, sb); | ||
1560 | kfree(opts.release_agent); | 1599 | kfree(opts.release_agent); |
1561 | kfree(opts.name); | 1600 | kfree(opts.name); |
1562 | return 0; | 1601 | return dget(sb->s_root); |
1563 | 1602 | ||
1564 | drop_new_super: | 1603 | drop_new_super: |
1565 | deactivate_locked_super(sb); | 1604 | deactivate_locked_super(sb); |
@@ -1568,8 +1607,7 @@ static int cgroup_get_sb(struct file_system_type *fs_type, | |||
1568 | out_err: | 1607 | out_err: |
1569 | kfree(opts.release_agent); | 1608 | kfree(opts.release_agent); |
1570 | kfree(opts.name); | 1609 | kfree(opts.name); |
1571 | 1610 | return ERR_PTR(ret); | |
1572 | return ret; | ||
1573 | } | 1611 | } |
1574 | 1612 | ||
1575 | static void cgroup_kill_sb(struct super_block *sb) { | 1613 | static void cgroup_kill_sb(struct super_block *sb) { |
@@ -1619,7 +1657,7 @@ static void cgroup_kill_sb(struct super_block *sb) { | |||
1619 | 1657 | ||
1620 | static struct file_system_type cgroup_fs_type = { | 1658 | static struct file_system_type cgroup_fs_type = { |
1621 | .name = "cgroup", | 1659 | .name = "cgroup", |
1622 | .get_sb = cgroup_get_sb, | 1660 | .mount = cgroup_mount, |
1623 | .kill_sb = cgroup_kill_sb, | 1661 | .kill_sb = cgroup_kill_sb, |
1624 | }; | 1662 | }; |
1625 | 1663 | ||
@@ -1883,6 +1921,8 @@ static int cgroup_release_agent_write(struct cgroup *cgrp, struct cftype *cft, | |||
1883 | const char *buffer) | 1921 | const char *buffer) |
1884 | { | 1922 | { |
1885 | BUILD_BUG_ON(sizeof(cgrp->root->release_agent_path) < PATH_MAX); | 1923 | BUILD_BUG_ON(sizeof(cgrp->root->release_agent_path) < PATH_MAX); |
1924 | if (strlen(buffer) >= PATH_MAX) | ||
1925 | return -EINVAL; | ||
1886 | if (!cgroup_lock_live_group(cgrp)) | 1926 | if (!cgroup_lock_live_group(cgrp)) |
1887 | return -ENODEV; | 1927 | return -ENODEV; |
1888 | strcpy(cgrp->root->release_agent_path, buffer); | 1928 | strcpy(cgrp->root->release_agent_path, buffer); |
@@ -3176,6 +3216,23 @@ fail: | |||
3176 | return ret; | 3216 | return ret; |
3177 | } | 3217 | } |
3178 | 3218 | ||
3219 | static u64 cgroup_clone_children_read(struct cgroup *cgrp, | ||
3220 | struct cftype *cft) | ||
3221 | { | ||
3222 | return clone_children(cgrp); | ||
3223 | } | ||
3224 | |||
3225 | static int cgroup_clone_children_write(struct cgroup *cgrp, | ||
3226 | struct cftype *cft, | ||
3227 | u64 val) | ||
3228 | { | ||
3229 | if (val) | ||
3230 | set_bit(CGRP_CLONE_CHILDREN, &cgrp->flags); | ||
3231 | else | ||
3232 | clear_bit(CGRP_CLONE_CHILDREN, &cgrp->flags); | ||
3233 | return 0; | ||
3234 | } | ||
3235 | |||
3179 | /* | 3236 | /* |
3180 | * for the common functions, 'private' gives the type of file | 3237 | * for the common functions, 'private' gives the type of file |
3181 | */ | 3238 | */ |
@@ -3206,6 +3263,11 @@ static struct cftype files[] = { | |||
3206 | .write_string = cgroup_write_event_control, | 3263 | .write_string = cgroup_write_event_control, |
3207 | .mode = S_IWUGO, | 3264 | .mode = S_IWUGO, |
3208 | }, | 3265 | }, |
3266 | { | ||
3267 | .name = "cgroup.clone_children", | ||
3268 | .read_u64 = cgroup_clone_children_read, | ||
3269 | .write_u64 = cgroup_clone_children_write, | ||
3270 | }, | ||
3209 | }; | 3271 | }; |
3210 | 3272 | ||
3211 | static struct cftype cft_release_agent = { | 3273 | static struct cftype cft_release_agent = { |
@@ -3335,6 +3397,9 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry, | |||
3335 | if (notify_on_release(parent)) | 3397 | if (notify_on_release(parent)) |
3336 | set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags); | 3398 | set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags); |
3337 | 3399 | ||
3400 | if (clone_children(parent)) | ||
3401 | set_bit(CGRP_CLONE_CHILDREN, &cgrp->flags); | ||
3402 | |||
3338 | for_each_subsys(root, ss) { | 3403 | for_each_subsys(root, ss) { |
3339 | struct cgroup_subsys_state *css = ss->create(ss, cgrp); | 3404 | struct cgroup_subsys_state *css = ss->create(ss, cgrp); |
3340 | 3405 | ||
@@ -3349,6 +3414,8 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry, | |||
3349 | goto err_destroy; | 3414 | goto err_destroy; |
3350 | } | 3415 | } |
3351 | /* At error, ->destroy() callback has to free assigned ID. */ | 3416 | /* At error, ->destroy() callback has to free assigned ID. */ |
3417 | if (clone_children(parent) && ss->post_clone) | ||
3418 | ss->post_clone(ss, cgrp); | ||
3352 | } | 3419 | } |
3353 | 3420 | ||
3354 | cgroup_lock_hierarchy(root); | 3421 | cgroup_lock_hierarchy(root); |
diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c index ce71ed53e88f..e7bebb7c6c38 100644 --- a/kernel/cgroup_freezer.c +++ b/kernel/cgroup_freezer.c | |||
@@ -48,20 +48,19 @@ static inline struct freezer *task_freezer(struct task_struct *task) | |||
48 | struct freezer, css); | 48 | struct freezer, css); |
49 | } | 49 | } |
50 | 50 | ||
51 | int cgroup_freezing_or_frozen(struct task_struct *task) | 51 | static inline int __cgroup_freezing_or_frozen(struct task_struct *task) |
52 | { | 52 | { |
53 | struct freezer *freezer; | 53 | enum freezer_state state = task_freezer(task)->state; |
54 | enum freezer_state state; | 54 | return (state == CGROUP_FREEZING) || (state == CGROUP_FROZEN); |
55 | } | ||
55 | 56 | ||
57 | int cgroup_freezing_or_frozen(struct task_struct *task) | ||
58 | { | ||
59 | int result; | ||
56 | task_lock(task); | 60 | task_lock(task); |
57 | freezer = task_freezer(task); | 61 | result = __cgroup_freezing_or_frozen(task); |
58 | if (!freezer->css.cgroup->parent) | ||
59 | state = CGROUP_THAWED; /* root cgroup can't be frozen */ | ||
60 | else | ||
61 | state = freezer->state; | ||
62 | task_unlock(task); | 62 | task_unlock(task); |
63 | 63 | return result; | |
64 | return (state == CGROUP_FREEZING) || (state == CGROUP_FROZEN); | ||
65 | } | 64 | } |
66 | 65 | ||
67 | /* | 66 | /* |
@@ -154,13 +153,6 @@ static void freezer_destroy(struct cgroup_subsys *ss, | |||
154 | kfree(cgroup_freezer(cgroup)); | 153 | kfree(cgroup_freezer(cgroup)); |
155 | } | 154 | } |
156 | 155 | ||
157 | /* Task is frozen or will freeze immediately when next it gets woken */ | ||
158 | static bool is_task_frozen_enough(struct task_struct *task) | ||
159 | { | ||
160 | return frozen(task) || | ||
161 | (task_is_stopped_or_traced(task) && freezing(task)); | ||
162 | } | ||
163 | |||
164 | /* | 156 | /* |
165 | * The call to cgroup_lock() in the freezer.state write method prevents | 157 | * The call to cgroup_lock() in the freezer.state write method prevents |
166 | * a write to that file racing against an attach, and hence the | 158 | * a write to that file racing against an attach, and hence the |
@@ -174,24 +166,25 @@ static int freezer_can_attach(struct cgroup_subsys *ss, | |||
174 | 166 | ||
175 | /* | 167 | /* |
176 | * Anything frozen can't move or be moved to/from. | 168 | * Anything frozen can't move or be moved to/from. |
177 | * | ||
178 | * Since orig_freezer->state == FROZEN means that @task has been | ||
179 | * frozen, so it's sufficient to check the latter condition. | ||
180 | */ | 169 | */ |
181 | 170 | ||
182 | if (is_task_frozen_enough(task)) | 171 | freezer = cgroup_freezer(new_cgroup); |
172 | if (freezer->state != CGROUP_THAWED) | ||
183 | return -EBUSY; | 173 | return -EBUSY; |
184 | 174 | ||
185 | freezer = cgroup_freezer(new_cgroup); | 175 | rcu_read_lock(); |
186 | if (freezer->state == CGROUP_FROZEN) | 176 | if (__cgroup_freezing_or_frozen(task)) { |
177 | rcu_read_unlock(); | ||
187 | return -EBUSY; | 178 | return -EBUSY; |
179 | } | ||
180 | rcu_read_unlock(); | ||
188 | 181 | ||
189 | if (threadgroup) { | 182 | if (threadgroup) { |
190 | struct task_struct *c; | 183 | struct task_struct *c; |
191 | 184 | ||
192 | rcu_read_lock(); | 185 | rcu_read_lock(); |
193 | list_for_each_entry_rcu(c, &task->thread_group, thread_group) { | 186 | list_for_each_entry_rcu(c, &task->thread_group, thread_group) { |
194 | if (is_task_frozen_enough(c)) { | 187 | if (__cgroup_freezing_or_frozen(c)) { |
195 | rcu_read_unlock(); | 188 | rcu_read_unlock(); |
196 | return -EBUSY; | 189 | return -EBUSY; |
197 | } | 190 | } |
@@ -236,31 +229,30 @@ static void freezer_fork(struct cgroup_subsys *ss, struct task_struct *task) | |||
236 | /* | 229 | /* |
237 | * caller must hold freezer->lock | 230 | * caller must hold freezer->lock |
238 | */ | 231 | */ |
239 | static void update_freezer_state(struct cgroup *cgroup, | 232 | static void update_if_frozen(struct cgroup *cgroup, |
240 | struct freezer *freezer) | 233 | struct freezer *freezer) |
241 | { | 234 | { |
242 | struct cgroup_iter it; | 235 | struct cgroup_iter it; |
243 | struct task_struct *task; | 236 | struct task_struct *task; |
244 | unsigned int nfrozen = 0, ntotal = 0; | 237 | unsigned int nfrozen = 0, ntotal = 0; |
238 | enum freezer_state old_state = freezer->state; | ||
245 | 239 | ||
246 | cgroup_iter_start(cgroup, &it); | 240 | cgroup_iter_start(cgroup, &it); |
247 | while ((task = cgroup_iter_next(cgroup, &it))) { | 241 | while ((task = cgroup_iter_next(cgroup, &it))) { |
248 | ntotal++; | 242 | ntotal++; |
249 | if (is_task_frozen_enough(task)) | 243 | if (frozen(task)) |
250 | nfrozen++; | 244 | nfrozen++; |
251 | } | 245 | } |
252 | 246 | ||
253 | /* | 247 | if (old_state == CGROUP_THAWED) { |
254 | * Transition to FROZEN when no new tasks can be added ensures | 248 | BUG_ON(nfrozen > 0); |
255 | * that we never exist in the FROZEN state while there are unfrozen | 249 | } else if (old_state == CGROUP_FREEZING) { |
256 | * tasks. | 250 | if (nfrozen == ntotal) |
257 | */ | 251 | freezer->state = CGROUP_FROZEN; |
258 | if (nfrozen == ntotal) | 252 | } else { /* old_state == CGROUP_FROZEN */ |
259 | freezer->state = CGROUP_FROZEN; | 253 | BUG_ON(nfrozen != ntotal); |
260 | else if (nfrozen > 0) | 254 | } |
261 | freezer->state = CGROUP_FREEZING; | 255 | |
262 | else | ||
263 | freezer->state = CGROUP_THAWED; | ||
264 | cgroup_iter_end(cgroup, &it); | 256 | cgroup_iter_end(cgroup, &it); |
265 | } | 257 | } |
266 | 258 | ||
@@ -279,7 +271,7 @@ static int freezer_read(struct cgroup *cgroup, struct cftype *cft, | |||
279 | if (state == CGROUP_FREEZING) { | 271 | if (state == CGROUP_FREEZING) { |
280 | /* We change from FREEZING to FROZEN lazily if the cgroup was | 272 | /* We change from FREEZING to FROZEN lazily if the cgroup was |
281 | * only partially frozen when we exitted write. */ | 273 | * only partially frozen when we exitted write. */ |
282 | update_freezer_state(cgroup, freezer); | 274 | update_if_frozen(cgroup, freezer); |
283 | state = freezer->state; | 275 | state = freezer->state; |
284 | } | 276 | } |
285 | spin_unlock_irq(&freezer->lock); | 277 | spin_unlock_irq(&freezer->lock); |
@@ -301,7 +293,7 @@ static int try_to_freeze_cgroup(struct cgroup *cgroup, struct freezer *freezer) | |||
301 | while ((task = cgroup_iter_next(cgroup, &it))) { | 293 | while ((task = cgroup_iter_next(cgroup, &it))) { |
302 | if (!freeze_task(task, true)) | 294 | if (!freeze_task(task, true)) |
303 | continue; | 295 | continue; |
304 | if (is_task_frozen_enough(task)) | 296 | if (frozen(task)) |
305 | continue; | 297 | continue; |
306 | if (!freezing(task) && !freezer_should_skip(task)) | 298 | if (!freezing(task) && !freezer_should_skip(task)) |
307 | num_cant_freeze_now++; | 299 | num_cant_freeze_now++; |
@@ -335,7 +327,7 @@ static int freezer_change_state(struct cgroup *cgroup, | |||
335 | 327 | ||
336 | spin_lock_irq(&freezer->lock); | 328 | spin_lock_irq(&freezer->lock); |
337 | 329 | ||
338 | update_freezer_state(cgroup, freezer); | 330 | update_if_frozen(cgroup, freezer); |
339 | if (goal_state == freezer->state) | 331 | if (goal_state == freezer->state) |
340 | goto out; | 332 | goto out; |
341 | 333 | ||
diff --git a/kernel/configs.c b/kernel/configs.c index abaee684ecbf..b4066b44a99d 100644 --- a/kernel/configs.c +++ b/kernel/configs.c | |||
@@ -66,6 +66,7 @@ ikconfig_read_current(struct file *file, char __user *buf, | |||
66 | static const struct file_operations ikconfig_file_ops = { | 66 | static const struct file_operations ikconfig_file_ops = { |
67 | .owner = THIS_MODULE, | 67 | .owner = THIS_MODULE, |
68 | .read = ikconfig_read_current, | 68 | .read = ikconfig_read_current, |
69 | .llseek = default_llseek, | ||
69 | }; | 70 | }; |
70 | 71 | ||
71 | static int __init ikconfig_init(void) | 72 | static int __init ikconfig_init(void) |
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 51b143e2a07a..4349935c2ad8 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
@@ -231,18 +231,17 @@ static DEFINE_SPINLOCK(cpuset_buffer_lock); | |||
231 | * users. If someone tries to mount the "cpuset" filesystem, we | 231 | * users. If someone tries to mount the "cpuset" filesystem, we |
232 | * silently switch it to mount "cgroup" instead | 232 | * silently switch it to mount "cgroup" instead |
233 | */ | 233 | */ |
234 | static int cpuset_get_sb(struct file_system_type *fs_type, | 234 | static struct dentry *cpuset_mount(struct file_system_type *fs_type, |
235 | int flags, const char *unused_dev_name, | 235 | int flags, const char *unused_dev_name, void *data) |
236 | void *data, struct vfsmount *mnt) | ||
237 | { | 236 | { |
238 | struct file_system_type *cgroup_fs = get_fs_type("cgroup"); | 237 | struct file_system_type *cgroup_fs = get_fs_type("cgroup"); |
239 | int ret = -ENODEV; | 238 | struct dentry *ret = ERR_PTR(-ENODEV); |
240 | if (cgroup_fs) { | 239 | if (cgroup_fs) { |
241 | char mountopts[] = | 240 | char mountopts[] = |
242 | "cpuset,noprefix," | 241 | "cpuset,noprefix," |
243 | "release_agent=/sbin/cpuset_release_agent"; | 242 | "release_agent=/sbin/cpuset_release_agent"; |
244 | ret = cgroup_fs->get_sb(cgroup_fs, flags, | 243 | ret = cgroup_fs->mount(cgroup_fs, flags, |
245 | unused_dev_name, mountopts, mnt); | 244 | unused_dev_name, mountopts); |
246 | put_filesystem(cgroup_fs); | 245 | put_filesystem(cgroup_fs); |
247 | } | 246 | } |
248 | return ret; | 247 | return ret; |
@@ -250,7 +249,7 @@ static int cpuset_get_sb(struct file_system_type *fs_type, | |||
250 | 249 | ||
251 | static struct file_system_type cpuset_fs_type = { | 250 | static struct file_system_type cpuset_fs_type = { |
252 | .name = "cpuset", | 251 | .name = "cpuset", |
253 | .get_sb = cpuset_get_sb, | 252 | .mount = cpuset_mount, |
254 | }; | 253 | }; |
255 | 254 | ||
256 | /* | 255 | /* |
diff --git a/kernel/cred.c b/kernel/cred.c index 9a3e22641fe7..6a1aa004e376 100644 --- a/kernel/cred.c +++ b/kernel/cred.c | |||
@@ -325,7 +325,7 @@ EXPORT_SYMBOL(prepare_creds); | |||
325 | 325 | ||
326 | /* | 326 | /* |
327 | * Prepare credentials for current to perform an execve() | 327 | * Prepare credentials for current to perform an execve() |
328 | * - The caller must hold current->cred_guard_mutex | 328 | * - The caller must hold ->cred_guard_mutex |
329 | */ | 329 | */ |
330 | struct cred *prepare_exec_creds(void) | 330 | struct cred *prepare_exec_creds(void) |
331 | { | 331 | { |
@@ -384,8 +384,6 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags) | |||
384 | struct cred *new; | 384 | struct cred *new; |
385 | int ret; | 385 | int ret; |
386 | 386 | ||
387 | mutex_init(&p->cred_guard_mutex); | ||
388 | |||
389 | if ( | 387 | if ( |
390 | #ifdef CONFIG_KEYS | 388 | #ifdef CONFIG_KEYS |
391 | !p->cred->thread_keyring && | 389 | !p->cred->thread_keyring && |
diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c index de407c78178d..cefd4a11f6d9 100644 --- a/kernel/debug/debug_core.c +++ b/kernel/debug/debug_core.c | |||
@@ -47,6 +47,7 @@ | |||
47 | #include <linux/pid.h> | 47 | #include <linux/pid.h> |
48 | #include <linux/smp.h> | 48 | #include <linux/smp.h> |
49 | #include <linux/mm.h> | 49 | #include <linux/mm.h> |
50 | #include <linux/rcupdate.h> | ||
50 | 51 | ||
51 | #include <asm/cacheflush.h> | 52 | #include <asm/cacheflush.h> |
52 | #include <asm/byteorder.h> | 53 | #include <asm/byteorder.h> |
@@ -109,13 +110,15 @@ static struct kgdb_bkpt kgdb_break[KGDB_MAX_BREAKPOINTS] = { | |||
109 | */ | 110 | */ |
110 | atomic_t kgdb_active = ATOMIC_INIT(-1); | 111 | atomic_t kgdb_active = ATOMIC_INIT(-1); |
111 | EXPORT_SYMBOL_GPL(kgdb_active); | 112 | EXPORT_SYMBOL_GPL(kgdb_active); |
113 | static DEFINE_RAW_SPINLOCK(dbg_master_lock); | ||
114 | static DEFINE_RAW_SPINLOCK(dbg_slave_lock); | ||
112 | 115 | ||
113 | /* | 116 | /* |
114 | * We use NR_CPUs not PERCPU, in case kgdb is used to debug early | 117 | * We use NR_CPUs not PERCPU, in case kgdb is used to debug early |
115 | * bootup code (which might not have percpu set up yet): | 118 | * bootup code (which might not have percpu set up yet): |
116 | */ | 119 | */ |
117 | static atomic_t passive_cpu_wait[NR_CPUS]; | 120 | static atomic_t masters_in_kgdb; |
118 | static atomic_t cpu_in_kgdb[NR_CPUS]; | 121 | static atomic_t slaves_in_kgdb; |
119 | static atomic_t kgdb_break_tasklet_var; | 122 | static atomic_t kgdb_break_tasklet_var; |
120 | atomic_t kgdb_setting_breakpoint; | 123 | atomic_t kgdb_setting_breakpoint; |
121 | 124 | ||
@@ -206,18 +209,6 @@ int __weak kgdb_skipexception(int exception, struct pt_regs *regs) | |||
206 | return 0; | 209 | return 0; |
207 | } | 210 | } |
208 | 211 | ||
209 | /** | ||
210 | * kgdb_disable_hw_debug - Disable hardware debugging while we in kgdb. | ||
211 | * @regs: Current &struct pt_regs. | ||
212 | * | ||
213 | * This function will be called if the particular architecture must | ||
214 | * disable hardware debugging while it is processing gdb packets or | ||
215 | * handling exception. | ||
216 | */ | ||
217 | void __weak kgdb_disable_hw_debug(struct pt_regs *regs) | ||
218 | { | ||
219 | } | ||
220 | |||
221 | /* | 212 | /* |
222 | * Some architectures need cache flushes when we set/clear a | 213 | * Some architectures need cache flushes when we set/clear a |
223 | * breakpoint: | 214 | * breakpoint: |
@@ -457,26 +448,34 @@ static int kgdb_reenter_check(struct kgdb_state *ks) | |||
457 | return 1; | 448 | return 1; |
458 | } | 449 | } |
459 | 450 | ||
460 | static void dbg_cpu_switch(int cpu, int next_cpu) | 451 | static void dbg_touch_watchdogs(void) |
461 | { | 452 | { |
462 | /* Mark the cpu we are switching away from as a slave when it | 453 | touch_softlockup_watchdog_sync(); |
463 | * holds the kgdb_active token. This must be done so that the | 454 | clocksource_touch_watchdog(); |
464 | * that all the cpus wait in for the debug core will not enter | 455 | rcu_cpu_stall_reset(); |
465 | * again as the master. */ | ||
466 | if (cpu == atomic_read(&kgdb_active)) { | ||
467 | kgdb_info[cpu].exception_state |= DCPU_IS_SLAVE; | ||
468 | kgdb_info[cpu].exception_state &= ~DCPU_WANT_MASTER; | ||
469 | } | ||
470 | kgdb_info[next_cpu].exception_state |= DCPU_NEXT_MASTER; | ||
471 | } | 456 | } |
472 | 457 | ||
473 | static int kgdb_cpu_enter(struct kgdb_state *ks, struct pt_regs *regs) | 458 | static int kgdb_cpu_enter(struct kgdb_state *ks, struct pt_regs *regs, |
459 | int exception_state) | ||
474 | { | 460 | { |
475 | unsigned long flags; | 461 | unsigned long flags; |
476 | int sstep_tries = 100; | 462 | int sstep_tries = 100; |
477 | int error; | 463 | int error; |
478 | int i, cpu; | 464 | int cpu; |
479 | int trace_on = 0; | 465 | int trace_on = 0; |
466 | int online_cpus = num_online_cpus(); | ||
467 | |||
468 | kgdb_info[ks->cpu].enter_kgdb++; | ||
469 | kgdb_info[ks->cpu].exception_state |= exception_state; | ||
470 | |||
471 | if (exception_state == DCPU_WANT_MASTER) | ||
472 | atomic_inc(&masters_in_kgdb); | ||
473 | else | ||
474 | atomic_inc(&slaves_in_kgdb); | ||
475 | |||
476 | if (arch_kgdb_ops.disable_hw_break) | ||
477 | arch_kgdb_ops.disable_hw_break(regs); | ||
478 | |||
480 | acquirelock: | 479 | acquirelock: |
481 | /* | 480 | /* |
482 | * Interrupts will be restored by the 'trap return' code, except when | 481 | * Interrupts will be restored by the 'trap return' code, except when |
@@ -489,14 +488,15 @@ acquirelock: | |||
489 | kgdb_info[cpu].task = current; | 488 | kgdb_info[cpu].task = current; |
490 | kgdb_info[cpu].ret_state = 0; | 489 | kgdb_info[cpu].ret_state = 0; |
491 | kgdb_info[cpu].irq_depth = hardirq_count() >> HARDIRQ_SHIFT; | 490 | kgdb_info[cpu].irq_depth = hardirq_count() >> HARDIRQ_SHIFT; |
492 | /* | ||
493 | * Make sure the above info reaches the primary CPU before | ||
494 | * our cpu_in_kgdb[] flag setting does: | ||
495 | */ | ||
496 | atomic_inc(&cpu_in_kgdb[cpu]); | ||
497 | 491 | ||
498 | if (exception_level == 1) | 492 | /* Make sure the above info reaches the primary CPU */ |
493 | smp_mb(); | ||
494 | |||
495 | if (exception_level == 1) { | ||
496 | if (raw_spin_trylock(&dbg_master_lock)) | ||
497 | atomic_xchg(&kgdb_active, cpu); | ||
499 | goto cpu_master_loop; | 498 | goto cpu_master_loop; |
499 | } | ||
500 | 500 | ||
501 | /* | 501 | /* |
502 | * CPU will loop if it is a slave or request to become a kgdb | 502 | * CPU will loop if it is a slave or request to become a kgdb |
@@ -508,10 +508,12 @@ cpu_loop: | |||
508 | kgdb_info[cpu].exception_state &= ~DCPU_NEXT_MASTER; | 508 | kgdb_info[cpu].exception_state &= ~DCPU_NEXT_MASTER; |
509 | goto cpu_master_loop; | 509 | goto cpu_master_loop; |
510 | } else if (kgdb_info[cpu].exception_state & DCPU_WANT_MASTER) { | 510 | } else if (kgdb_info[cpu].exception_state & DCPU_WANT_MASTER) { |
511 | if (atomic_cmpxchg(&kgdb_active, -1, cpu) == cpu) | 511 | if (raw_spin_trylock(&dbg_master_lock)) { |
512 | atomic_xchg(&kgdb_active, cpu); | ||
512 | break; | 513 | break; |
514 | } | ||
513 | } else if (kgdb_info[cpu].exception_state & DCPU_IS_SLAVE) { | 515 | } else if (kgdb_info[cpu].exception_state & DCPU_IS_SLAVE) { |
514 | if (!atomic_read(&passive_cpu_wait[cpu])) | 516 | if (!raw_spin_is_locked(&dbg_slave_lock)) |
515 | goto return_normal; | 517 | goto return_normal; |
516 | } else { | 518 | } else { |
517 | return_normal: | 519 | return_normal: |
@@ -522,9 +524,12 @@ return_normal: | |||
522 | arch_kgdb_ops.correct_hw_break(); | 524 | arch_kgdb_ops.correct_hw_break(); |
523 | if (trace_on) | 525 | if (trace_on) |
524 | tracing_on(); | 526 | tracing_on(); |
525 | atomic_dec(&cpu_in_kgdb[cpu]); | 527 | kgdb_info[cpu].exception_state &= |
526 | touch_softlockup_watchdog_sync(); | 528 | ~(DCPU_WANT_MASTER | DCPU_IS_SLAVE); |
527 | clocksource_touch_watchdog(); | 529 | kgdb_info[cpu].enter_kgdb--; |
530 | smp_mb__before_atomic_dec(); | ||
531 | atomic_dec(&slaves_in_kgdb); | ||
532 | dbg_touch_watchdogs(); | ||
528 | local_irq_restore(flags); | 533 | local_irq_restore(flags); |
529 | return 0; | 534 | return 0; |
530 | } | 535 | } |
@@ -541,8 +546,8 @@ return_normal: | |||
541 | (kgdb_info[cpu].task && | 546 | (kgdb_info[cpu].task && |
542 | kgdb_info[cpu].task->pid != kgdb_sstep_pid) && --sstep_tries) { | 547 | kgdb_info[cpu].task->pid != kgdb_sstep_pid) && --sstep_tries) { |
543 | atomic_set(&kgdb_active, -1); | 548 | atomic_set(&kgdb_active, -1); |
544 | touch_softlockup_watchdog_sync(); | 549 | raw_spin_unlock(&dbg_master_lock); |
545 | clocksource_touch_watchdog(); | 550 | dbg_touch_watchdogs(); |
546 | local_irq_restore(flags); | 551 | local_irq_restore(flags); |
547 | 552 | ||
548 | goto acquirelock; | 553 | goto acquirelock; |
@@ -563,16 +568,12 @@ return_normal: | |||
563 | if (dbg_io_ops->pre_exception) | 568 | if (dbg_io_ops->pre_exception) |
564 | dbg_io_ops->pre_exception(); | 569 | dbg_io_ops->pre_exception(); |
565 | 570 | ||
566 | kgdb_disable_hw_debug(ks->linux_regs); | ||
567 | |||
568 | /* | 571 | /* |
569 | * Get the passive CPU lock which will hold all the non-primary | 572 | * Get the passive CPU lock which will hold all the non-primary |
570 | * CPU in a spin state while the debugger is active | 573 | * CPU in a spin state while the debugger is active |
571 | */ | 574 | */ |
572 | if (!kgdb_single_step) { | 575 | if (!kgdb_single_step) |
573 | for (i = 0; i < NR_CPUS; i++) | 576 | raw_spin_lock(&dbg_slave_lock); |
574 | atomic_inc(&passive_cpu_wait[i]); | ||
575 | } | ||
576 | 577 | ||
577 | #ifdef CONFIG_SMP | 578 | #ifdef CONFIG_SMP |
578 | /* Signal the other CPUs to enter kgdb_wait() */ | 579 | /* Signal the other CPUs to enter kgdb_wait() */ |
@@ -583,10 +584,9 @@ return_normal: | |||
583 | /* | 584 | /* |
584 | * Wait for the other CPUs to be notified and be waiting for us: | 585 | * Wait for the other CPUs to be notified and be waiting for us: |
585 | */ | 586 | */ |
586 | for_each_online_cpu(i) { | 587 | while (kgdb_do_roundup && (atomic_read(&masters_in_kgdb) + |
587 | while (kgdb_do_roundup && !atomic_read(&cpu_in_kgdb[i])) | 588 | atomic_read(&slaves_in_kgdb)) != online_cpus) |
588 | cpu_relax(); | 589 | cpu_relax(); |
589 | } | ||
590 | 590 | ||
591 | /* | 591 | /* |
592 | * At this point the primary processor is completely | 592 | * At this point the primary processor is completely |
@@ -615,7 +615,8 @@ cpu_master_loop: | |||
615 | if (error == DBG_PASS_EVENT) { | 615 | if (error == DBG_PASS_EVENT) { |
616 | dbg_kdb_mode = !dbg_kdb_mode; | 616 | dbg_kdb_mode = !dbg_kdb_mode; |
617 | } else if (error == DBG_SWITCH_CPU_EVENT) { | 617 | } else if (error == DBG_SWITCH_CPU_EVENT) { |
618 | dbg_cpu_switch(cpu, dbg_switch_cpu); | 618 | kgdb_info[dbg_switch_cpu].exception_state |= |
619 | DCPU_NEXT_MASTER; | ||
619 | goto cpu_loop; | 620 | goto cpu_loop; |
620 | } else { | 621 | } else { |
621 | kgdb_info[cpu].ret_state = error; | 622 | kgdb_info[cpu].ret_state = error; |
@@ -627,24 +628,11 @@ cpu_master_loop: | |||
627 | if (dbg_io_ops->post_exception) | 628 | if (dbg_io_ops->post_exception) |
628 | dbg_io_ops->post_exception(); | 629 | dbg_io_ops->post_exception(); |
629 | 630 | ||
630 | atomic_dec(&cpu_in_kgdb[ks->cpu]); | ||
631 | |||
632 | if (!kgdb_single_step) { | 631 | if (!kgdb_single_step) { |
633 | for (i = NR_CPUS-1; i >= 0; i--) | 632 | raw_spin_unlock(&dbg_slave_lock); |
634 | atomic_dec(&passive_cpu_wait[i]); | 633 | /* Wait till all the CPUs have quit from the debugger. */ |
635 | /* | 634 | while (kgdb_do_roundup && atomic_read(&slaves_in_kgdb)) |
636 | * Wait till all the CPUs have quit from the debugger, | 635 | cpu_relax(); |
637 | * but allow a CPU that hit an exception and is | ||
638 | * waiting to become the master to remain in the debug | ||
639 | * core. | ||
640 | */ | ||
641 | for_each_online_cpu(i) { | ||
642 | while (kgdb_do_roundup && | ||
643 | atomic_read(&cpu_in_kgdb[i]) && | ||
644 | !(kgdb_info[i].exception_state & | ||
645 | DCPU_WANT_MASTER)) | ||
646 | cpu_relax(); | ||
647 | } | ||
648 | } | 636 | } |
649 | 637 | ||
650 | kgdb_restore: | 638 | kgdb_restore: |
@@ -655,12 +643,20 @@ kgdb_restore: | |||
655 | else | 643 | else |
656 | kgdb_sstep_pid = 0; | 644 | kgdb_sstep_pid = 0; |
657 | } | 645 | } |
646 | if (arch_kgdb_ops.correct_hw_break) | ||
647 | arch_kgdb_ops.correct_hw_break(); | ||
658 | if (trace_on) | 648 | if (trace_on) |
659 | tracing_on(); | 649 | tracing_on(); |
650 | |||
651 | kgdb_info[cpu].exception_state &= | ||
652 | ~(DCPU_WANT_MASTER | DCPU_IS_SLAVE); | ||
653 | kgdb_info[cpu].enter_kgdb--; | ||
654 | smp_mb__before_atomic_dec(); | ||
655 | atomic_dec(&masters_in_kgdb); | ||
660 | /* Free kgdb_active */ | 656 | /* Free kgdb_active */ |
661 | atomic_set(&kgdb_active, -1); | 657 | atomic_set(&kgdb_active, -1); |
662 | touch_softlockup_watchdog_sync(); | 658 | raw_spin_unlock(&dbg_master_lock); |
663 | clocksource_touch_watchdog(); | 659 | dbg_touch_watchdogs(); |
664 | local_irq_restore(flags); | 660 | local_irq_restore(flags); |
665 | 661 | ||
666 | return kgdb_info[cpu].ret_state; | 662 | return kgdb_info[cpu].ret_state; |
@@ -678,7 +674,6 @@ kgdb_handle_exception(int evector, int signo, int ecode, struct pt_regs *regs) | |||
678 | { | 674 | { |
679 | struct kgdb_state kgdb_var; | 675 | struct kgdb_state kgdb_var; |
680 | struct kgdb_state *ks = &kgdb_var; | 676 | struct kgdb_state *ks = &kgdb_var; |
681 | int ret; | ||
682 | 677 | ||
683 | ks->cpu = raw_smp_processor_id(); | 678 | ks->cpu = raw_smp_processor_id(); |
684 | ks->ex_vector = evector; | 679 | ks->ex_vector = evector; |
@@ -689,11 +684,10 @@ kgdb_handle_exception(int evector, int signo, int ecode, struct pt_regs *regs) | |||
689 | 684 | ||
690 | if (kgdb_reenter_check(ks)) | 685 | if (kgdb_reenter_check(ks)) |
691 | return 0; /* Ouch, double exception ! */ | 686 | return 0; /* Ouch, double exception ! */ |
692 | kgdb_info[ks->cpu].exception_state |= DCPU_WANT_MASTER; | 687 | if (kgdb_info[ks->cpu].enter_kgdb != 0) |
693 | ret = kgdb_cpu_enter(ks, regs); | 688 | return 0; |
694 | kgdb_info[ks->cpu].exception_state &= ~(DCPU_WANT_MASTER | | 689 | |
695 | DCPU_IS_SLAVE); | 690 | return kgdb_cpu_enter(ks, regs, DCPU_WANT_MASTER); |
696 | return ret; | ||
697 | } | 691 | } |
698 | 692 | ||
699 | int kgdb_nmicallback(int cpu, void *regs) | 693 | int kgdb_nmicallback(int cpu, void *regs) |
@@ -706,12 +700,9 @@ int kgdb_nmicallback(int cpu, void *regs) | |||
706 | ks->cpu = cpu; | 700 | ks->cpu = cpu; |
707 | ks->linux_regs = regs; | 701 | ks->linux_regs = regs; |
708 | 702 | ||
709 | if (!atomic_read(&cpu_in_kgdb[cpu]) && | 703 | if (kgdb_info[ks->cpu].enter_kgdb == 0 && |
710 | atomic_read(&kgdb_active) != -1 && | 704 | raw_spin_is_locked(&dbg_master_lock)) { |
711 | atomic_read(&kgdb_active) != cpu) { | 705 | kgdb_cpu_enter(ks, regs, DCPU_IS_SLAVE); |
712 | kgdb_info[cpu].exception_state |= DCPU_IS_SLAVE; | ||
713 | kgdb_cpu_enter(ks, regs); | ||
714 | kgdb_info[cpu].exception_state &= ~DCPU_IS_SLAVE; | ||
715 | return 0; | 706 | return 0; |
716 | } | 707 | } |
717 | #endif | 708 | #endif |
diff --git a/kernel/debug/debug_core.h b/kernel/debug/debug_core.h index c5d753d80f67..3494c28a7e7a 100644 --- a/kernel/debug/debug_core.h +++ b/kernel/debug/debug_core.h | |||
@@ -40,6 +40,7 @@ struct debuggerinfo_struct { | |||
40 | int exception_state; | 40 | int exception_state; |
41 | int ret_state; | 41 | int ret_state; |
42 | int irq_depth; | 42 | int irq_depth; |
43 | int enter_kgdb; | ||
43 | }; | 44 | }; |
44 | 45 | ||
45 | extern struct debuggerinfo_struct kgdb_info[]; | 46 | extern struct debuggerinfo_struct kgdb_info[]; |
diff --git a/kernel/debug/kdb/kdb_debugger.c b/kernel/debug/kdb/kdb_debugger.c index bf6e8270e957..dd0b1b7dd02c 100644 --- a/kernel/debug/kdb/kdb_debugger.c +++ b/kernel/debug/kdb/kdb_debugger.c | |||
@@ -86,7 +86,7 @@ int kdb_stub(struct kgdb_state *ks) | |||
86 | } | 86 | } |
87 | /* Set initial kdb state variables */ | 87 | /* Set initial kdb state variables */ |
88 | KDB_STATE_CLEAR(KGDB_TRANS); | 88 | KDB_STATE_CLEAR(KGDB_TRANS); |
89 | kdb_initial_cpu = ks->cpu; | 89 | kdb_initial_cpu = atomic_read(&kgdb_active); |
90 | kdb_current_task = kgdb_info[ks->cpu].task; | 90 | kdb_current_task = kgdb_info[ks->cpu].task; |
91 | kdb_current_regs = kgdb_info[ks->cpu].debuggerinfo; | 91 | kdb_current_regs = kgdb_info[ks->cpu].debuggerinfo; |
92 | /* Remove any breakpoints as needed by kdb and clear single step */ | 92 | /* Remove any breakpoints as needed by kdb and clear single step */ |
@@ -105,7 +105,6 @@ int kdb_stub(struct kgdb_state *ks) | |||
105 | ks->pass_exception = 1; | 105 | ks->pass_exception = 1; |
106 | KDB_FLAG_SET(CATASTROPHIC); | 106 | KDB_FLAG_SET(CATASTROPHIC); |
107 | } | 107 | } |
108 | kdb_initial_cpu = ks->cpu; | ||
109 | if (KDB_STATE(SSBPT) && reason == KDB_REASON_SSTEP) { | 108 | if (KDB_STATE(SSBPT) && reason == KDB_REASON_SSTEP) { |
110 | KDB_STATE_CLEAR(SSBPT); | 109 | KDB_STATE_CLEAR(SSBPT); |
111 | KDB_STATE_CLEAR(DOING_SS); | 110 | KDB_STATE_CLEAR(DOING_SS); |
diff --git a/kernel/debug/kdb/kdb_io.c b/kernel/debug/kdb/kdb_io.c index c9b7f4f90bba..96fdaac46a80 100644 --- a/kernel/debug/kdb/kdb_io.c +++ b/kernel/debug/kdb/kdb_io.c | |||
@@ -823,4 +823,4 @@ int kdb_printf(const char *fmt, ...) | |||
823 | 823 | ||
824 | return r; | 824 | return r; |
825 | } | 825 | } |
826 | 826 | EXPORT_SYMBOL_GPL(kdb_printf); | |
diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c index caf057a3de0e..a6e729766821 100644 --- a/kernel/debug/kdb/kdb_main.c +++ b/kernel/debug/kdb/kdb_main.c | |||
@@ -82,7 +82,7 @@ static kdbtab_t kdb_base_commands[50]; | |||
82 | #define for_each_kdbcmd(cmd, num) \ | 82 | #define for_each_kdbcmd(cmd, num) \ |
83 | for ((cmd) = kdb_base_commands, (num) = 0; \ | 83 | for ((cmd) = kdb_base_commands, (num) = 0; \ |
84 | num < kdb_max_commands; \ | 84 | num < kdb_max_commands; \ |
85 | num == KDB_BASE_CMD_MAX ? cmd = kdb_commands : cmd++, num++) | 85 | num++, num == KDB_BASE_CMD_MAX ? cmd = kdb_commands : cmd++) |
86 | 86 | ||
87 | typedef struct _kdbmsg { | 87 | typedef struct _kdbmsg { |
88 | int km_diag; /* kdb diagnostic */ | 88 | int km_diag; /* kdb diagnostic */ |
@@ -646,7 +646,7 @@ static int kdb_defcmd2(const char *cmdstr, const char *argv0) | |||
646 | } | 646 | } |
647 | if (!s->usable) | 647 | if (!s->usable) |
648 | return KDB_NOTIMP; | 648 | return KDB_NOTIMP; |
649 | s->command = kmalloc((s->count + 1) * sizeof(*(s->command)), GFP_KDB); | 649 | s->command = kzalloc((s->count + 1) * sizeof(*(s->command)), GFP_KDB); |
650 | if (!s->command) { | 650 | if (!s->command) { |
651 | kdb_printf("Could not allocate new kdb_defcmd table for %s\n", | 651 | kdb_printf("Could not allocate new kdb_defcmd table for %s\n", |
652 | cmdstr); | 652 | cmdstr); |
@@ -1127,7 +1127,7 @@ static int kdb_local(kdb_reason_t reason, int error, struct pt_regs *regs, | |||
1127 | /* special case below */ | 1127 | /* special case below */ |
1128 | } else { | 1128 | } else { |
1129 | kdb_printf("\nEntering kdb (current=0x%p, pid %d) ", | 1129 | kdb_printf("\nEntering kdb (current=0x%p, pid %d) ", |
1130 | kdb_current, kdb_current->pid); | 1130 | kdb_current, kdb_current ? kdb_current->pid : 0); |
1131 | #if defined(CONFIG_SMP) | 1131 | #if defined(CONFIG_SMP) |
1132 | kdb_printf("on processor %d ", raw_smp_processor_id()); | 1132 | kdb_printf("on processor %d ", raw_smp_processor_id()); |
1133 | #endif | 1133 | #endif |
@@ -1749,13 +1749,13 @@ static int kdb_go(int argc, const char **argv) | |||
1749 | int nextarg; | 1749 | int nextarg; |
1750 | long offset; | 1750 | long offset; |
1751 | 1751 | ||
1752 | if (raw_smp_processor_id() != kdb_initial_cpu) { | ||
1753 | kdb_printf("go must execute on the entry cpu, " | ||
1754 | "please use \"cpu %d\" and then execute go\n", | ||
1755 | kdb_initial_cpu); | ||
1756 | return KDB_BADCPUNUM; | ||
1757 | } | ||
1752 | if (argc == 1) { | 1758 | if (argc == 1) { |
1753 | if (raw_smp_processor_id() != kdb_initial_cpu) { | ||
1754 | kdb_printf("go <address> must be issued from the " | ||
1755 | "initial cpu, do cpu %d first\n", | ||
1756 | kdb_initial_cpu); | ||
1757 | return KDB_ARGCOUNT; | ||
1758 | } | ||
1759 | nextarg = 1; | 1759 | nextarg = 1; |
1760 | diag = kdbgetaddrarg(argc, argv, &nextarg, | 1760 | diag = kdbgetaddrarg(argc, argv, &nextarg, |
1761 | &addr, &offset, NULL); | 1761 | &addr, &offset, NULL); |
@@ -2361,7 +2361,7 @@ static int kdb_pid(int argc, const char **argv) | |||
2361 | */ | 2361 | */ |
2362 | static int kdb_ll(int argc, const char **argv) | 2362 | static int kdb_ll(int argc, const char **argv) |
2363 | { | 2363 | { |
2364 | int diag; | 2364 | int diag = 0; |
2365 | unsigned long addr; | 2365 | unsigned long addr; |
2366 | long offset = 0; | 2366 | long offset = 0; |
2367 | unsigned long va; | 2367 | unsigned long va; |
@@ -2400,20 +2400,21 @@ static int kdb_ll(int argc, const char **argv) | |||
2400 | char buf[80]; | 2400 | char buf[80]; |
2401 | 2401 | ||
2402 | if (KDB_FLAG(CMD_INTERRUPT)) | 2402 | if (KDB_FLAG(CMD_INTERRUPT)) |
2403 | return 0; | 2403 | goto out; |
2404 | 2404 | ||
2405 | sprintf(buf, "%s " kdb_machreg_fmt "\n", command, va); | 2405 | sprintf(buf, "%s " kdb_machreg_fmt "\n", command, va); |
2406 | diag = kdb_parse(buf); | 2406 | diag = kdb_parse(buf); |
2407 | if (diag) | 2407 | if (diag) |
2408 | return diag; | 2408 | goto out; |
2409 | 2409 | ||
2410 | addr = va + linkoffset; | 2410 | addr = va + linkoffset; |
2411 | if (kdb_getword(&va, addr, sizeof(va))) | 2411 | if (kdb_getword(&va, addr, sizeof(va))) |
2412 | return 0; | 2412 | goto out; |
2413 | } | 2413 | } |
2414 | kfree(command); | ||
2415 | 2414 | ||
2416 | return 0; | 2415 | out: |
2416 | kfree(command); | ||
2417 | return diag; | ||
2417 | } | 2418 | } |
2418 | 2419 | ||
2419 | static int kdb_kgdb(int argc, const char **argv) | 2420 | static int kdb_kgdb(int argc, const char **argv) |
@@ -2603,20 +2604,17 @@ static int kdb_summary(int argc, const char **argv) | |||
2603 | */ | 2604 | */ |
2604 | static int kdb_per_cpu(int argc, const char **argv) | 2605 | static int kdb_per_cpu(int argc, const char **argv) |
2605 | { | 2606 | { |
2606 | char buf[256], fmtstr[64]; | 2607 | char fmtstr[64]; |
2607 | kdb_symtab_t symtab; | 2608 | int cpu, diag, nextarg = 1; |
2608 | cpumask_t suppress = CPU_MASK_NONE; | 2609 | unsigned long addr, symaddr, val, bytesperword = 0, whichcpu = ~0UL; |
2609 | int cpu, diag; | ||
2610 | unsigned long addr, val, bytesperword = 0, whichcpu = ~0UL; | ||
2611 | 2610 | ||
2612 | if (argc < 1 || argc > 3) | 2611 | if (argc < 1 || argc > 3) |
2613 | return KDB_ARGCOUNT; | 2612 | return KDB_ARGCOUNT; |
2614 | 2613 | ||
2615 | snprintf(buf, sizeof(buf), "per_cpu__%s", argv[1]); | 2614 | diag = kdbgetaddrarg(argc, argv, &nextarg, &symaddr, NULL, NULL); |
2616 | if (!kdbgetsymval(buf, &symtab)) { | 2615 | if (diag) |
2617 | kdb_printf("%s is not a per_cpu variable\n", argv[1]); | 2616 | return diag; |
2618 | return KDB_BADADDR; | 2617 | |
2619 | } | ||
2620 | if (argc >= 2) { | 2618 | if (argc >= 2) { |
2621 | diag = kdbgetularg(argv[2], &bytesperword); | 2619 | diag = kdbgetularg(argv[2], &bytesperword); |
2622 | if (diag) | 2620 | if (diag) |
@@ -2649,46 +2647,25 @@ static int kdb_per_cpu(int argc, const char **argv) | |||
2649 | #define KDB_PCU(cpu) 0 | 2647 | #define KDB_PCU(cpu) 0 |
2650 | #endif | 2648 | #endif |
2651 | #endif | 2649 | #endif |
2652 | |||
2653 | for_each_online_cpu(cpu) { | 2650 | for_each_online_cpu(cpu) { |
2651 | if (KDB_FLAG(CMD_INTERRUPT)) | ||
2652 | return 0; | ||
2653 | |||
2654 | if (whichcpu != ~0UL && whichcpu != cpu) | 2654 | if (whichcpu != ~0UL && whichcpu != cpu) |
2655 | continue; | 2655 | continue; |
2656 | addr = symtab.sym_start + KDB_PCU(cpu); | 2656 | addr = symaddr + KDB_PCU(cpu); |
2657 | diag = kdb_getword(&val, addr, bytesperword); | 2657 | diag = kdb_getword(&val, addr, bytesperword); |
2658 | if (diag) { | 2658 | if (diag) { |
2659 | kdb_printf("%5d " kdb_bfd_vma_fmt0 " - unable to " | 2659 | kdb_printf("%5d " kdb_bfd_vma_fmt0 " - unable to " |
2660 | "read, diag=%d\n", cpu, addr, diag); | 2660 | "read, diag=%d\n", cpu, addr, diag); |
2661 | continue; | 2661 | continue; |
2662 | } | 2662 | } |
2663 | #ifdef CONFIG_SMP | ||
2664 | if (!val) { | ||
2665 | cpu_set(cpu, suppress); | ||
2666 | continue; | ||
2667 | } | ||
2668 | #endif /* CONFIG_SMP */ | ||
2669 | kdb_printf("%5d ", cpu); | 2663 | kdb_printf("%5d ", cpu); |
2670 | kdb_md_line(fmtstr, addr, | 2664 | kdb_md_line(fmtstr, addr, |
2671 | bytesperword == KDB_WORD_SIZE, | 2665 | bytesperword == KDB_WORD_SIZE, |
2672 | 1, bytesperword, 1, 1, 0); | 2666 | 1, bytesperword, 1, 1, 0); |
2673 | } | 2667 | } |
2674 | if (cpus_weight(suppress) == 0) | ||
2675 | return 0; | ||
2676 | kdb_printf("Zero suppressed cpu(s):"); | ||
2677 | for (cpu = first_cpu(suppress); cpu < num_possible_cpus(); | ||
2678 | cpu = next_cpu(cpu, suppress)) { | ||
2679 | kdb_printf(" %d", cpu); | ||
2680 | if (cpu == num_possible_cpus() - 1 || | ||
2681 | next_cpu(cpu, suppress) != cpu + 1) | ||
2682 | continue; | ||
2683 | while (cpu < num_possible_cpus() && | ||
2684 | next_cpu(cpu, suppress) == cpu + 1) | ||
2685 | ++cpu; | ||
2686 | kdb_printf("-%d", cpu); | ||
2687 | } | ||
2688 | kdb_printf("\n"); | ||
2689 | |||
2690 | #undef KDB_PCU | 2668 | #undef KDB_PCU |
2691 | |||
2692 | return 0; | 2669 | return 0; |
2693 | } | 2670 | } |
2694 | 2671 | ||
@@ -2763,13 +2740,13 @@ int kdb_register_repeat(char *cmd, | |||
2763 | } | 2740 | } |
2764 | if (kdb_commands) { | 2741 | if (kdb_commands) { |
2765 | memcpy(new, kdb_commands, | 2742 | memcpy(new, kdb_commands, |
2766 | kdb_max_commands * sizeof(*new)); | 2743 | (kdb_max_commands - KDB_BASE_CMD_MAX) * sizeof(*new)); |
2767 | kfree(kdb_commands); | 2744 | kfree(kdb_commands); |
2768 | } | 2745 | } |
2769 | memset(new + kdb_max_commands, 0, | 2746 | memset(new + kdb_max_commands, 0, |
2770 | kdb_command_extend * sizeof(*new)); | 2747 | kdb_command_extend * sizeof(*new)); |
2771 | kdb_commands = new; | 2748 | kdb_commands = new; |
2772 | kp = kdb_commands + kdb_max_commands; | 2749 | kp = kdb_commands + kdb_max_commands - KDB_BASE_CMD_MAX; |
2773 | kdb_max_commands += kdb_command_extend; | 2750 | kdb_max_commands += kdb_command_extend; |
2774 | } | 2751 | } |
2775 | 2752 | ||
@@ -2783,6 +2760,8 @@ int kdb_register_repeat(char *cmd, | |||
2783 | 2760 | ||
2784 | return 0; | 2761 | return 0; |
2785 | } | 2762 | } |
2763 | EXPORT_SYMBOL_GPL(kdb_register_repeat); | ||
2764 | |||
2786 | 2765 | ||
2787 | /* | 2766 | /* |
2788 | * kdb_register - Compatibility register function for commands that do | 2767 | * kdb_register - Compatibility register function for commands that do |
@@ -2805,6 +2784,7 @@ int kdb_register(char *cmd, | |||
2805 | return kdb_register_repeat(cmd, func, usage, help, minlen, | 2784 | return kdb_register_repeat(cmd, func, usage, help, minlen, |
2806 | KDB_REPEAT_NONE); | 2785 | KDB_REPEAT_NONE); |
2807 | } | 2786 | } |
2787 | EXPORT_SYMBOL_GPL(kdb_register); | ||
2808 | 2788 | ||
2809 | /* | 2789 | /* |
2810 | * kdb_unregister - This function is used to unregister a kernel | 2790 | * kdb_unregister - This function is used to unregister a kernel |
@@ -2823,7 +2803,7 @@ int kdb_unregister(char *cmd) | |||
2823 | /* | 2803 | /* |
2824 | * find the command. | 2804 | * find the command. |
2825 | */ | 2805 | */ |
2826 | for (i = 0, kp = kdb_commands; i < kdb_max_commands; i++, kp++) { | 2806 | for_each_kdbcmd(kp, i) { |
2827 | if (kp->cmd_name && (strcmp(kp->cmd_name, cmd) == 0)) { | 2807 | if (kp->cmd_name && (strcmp(kp->cmd_name, cmd) == 0)) { |
2828 | kp->cmd_name = NULL; | 2808 | kp->cmd_name = NULL; |
2829 | return 0; | 2809 | return 0; |
@@ -2833,6 +2813,7 @@ int kdb_unregister(char *cmd) | |||
2833 | /* Couldn't find it. */ | 2813 | /* Couldn't find it. */ |
2834 | return 1; | 2814 | return 1; |
2835 | } | 2815 | } |
2816 | EXPORT_SYMBOL_GPL(kdb_unregister); | ||
2836 | 2817 | ||
2837 | /* Initialize the kdb command table. */ | 2818 | /* Initialize the kdb command table. */ |
2838 | static void __init kdb_inittab(void) | 2819 | static void __init kdb_inittab(void) |
diff --git a/kernel/debug/kdb/kdb_private.h b/kernel/debug/kdb/kdb_private.h index be775f7e81e0..35d69ed1dfb5 100644 --- a/kernel/debug/kdb/kdb_private.h +++ b/kernel/debug/kdb/kdb_private.h | |||
@@ -15,29 +15,6 @@ | |||
15 | #include <linux/kgdb.h> | 15 | #include <linux/kgdb.h> |
16 | #include "../debug_core.h" | 16 | #include "../debug_core.h" |
17 | 17 | ||
18 | /* Kernel Debugger Error codes. Must not overlap with command codes. */ | ||
19 | #define KDB_NOTFOUND (-1) | ||
20 | #define KDB_ARGCOUNT (-2) | ||
21 | #define KDB_BADWIDTH (-3) | ||
22 | #define KDB_BADRADIX (-4) | ||
23 | #define KDB_NOTENV (-5) | ||
24 | #define KDB_NOENVVALUE (-6) | ||
25 | #define KDB_NOTIMP (-7) | ||
26 | #define KDB_ENVFULL (-8) | ||
27 | #define KDB_ENVBUFFULL (-9) | ||
28 | #define KDB_TOOMANYBPT (-10) | ||
29 | #define KDB_TOOMANYDBREGS (-11) | ||
30 | #define KDB_DUPBPT (-12) | ||
31 | #define KDB_BPTNOTFOUND (-13) | ||
32 | #define KDB_BADMODE (-14) | ||
33 | #define KDB_BADINT (-15) | ||
34 | #define KDB_INVADDRFMT (-16) | ||
35 | #define KDB_BADREG (-17) | ||
36 | #define KDB_BADCPUNUM (-18) | ||
37 | #define KDB_BADLENGTH (-19) | ||
38 | #define KDB_NOBP (-20) | ||
39 | #define KDB_BADADDR (-21) | ||
40 | |||
41 | /* Kernel Debugger Command codes. Must not overlap with error codes. */ | 18 | /* Kernel Debugger Command codes. Must not overlap with error codes. */ |
42 | #define KDB_CMD_GO (-1001) | 19 | #define KDB_CMD_GO (-1001) |
43 | #define KDB_CMD_CPU (-1002) | 20 | #define KDB_CMD_CPU (-1002) |
@@ -93,17 +70,6 @@ | |||
93 | */ | 70 | */ |
94 | #define KDB_MAXBPT 16 | 71 | #define KDB_MAXBPT 16 |
95 | 72 | ||
96 | /* Maximum number of arguments to a function */ | ||
97 | #define KDB_MAXARGS 16 | ||
98 | |||
99 | typedef enum { | ||
100 | KDB_REPEAT_NONE = 0, /* Do not repeat this command */ | ||
101 | KDB_REPEAT_NO_ARGS, /* Repeat the command without arguments */ | ||
102 | KDB_REPEAT_WITH_ARGS, /* Repeat the command including its arguments */ | ||
103 | } kdb_repeat_t; | ||
104 | |||
105 | typedef int (*kdb_func_t)(int, const char **); | ||
106 | |||
107 | /* Symbol table format returned by kallsyms. */ | 73 | /* Symbol table format returned by kallsyms. */ |
108 | typedef struct __ksymtab { | 74 | typedef struct __ksymtab { |
109 | unsigned long value; /* Address of symbol */ | 75 | unsigned long value; /* Address of symbol */ |
@@ -123,11 +89,6 @@ extern int kallsyms_symbol_next(char *prefix_name, int flag); | |||
123 | extern int kallsyms_symbol_complete(char *prefix_name, int max_len); | 89 | extern int kallsyms_symbol_complete(char *prefix_name, int max_len); |
124 | 90 | ||
125 | /* Exported Symbols for kernel loadable modules to use. */ | 91 | /* Exported Symbols for kernel loadable modules to use. */ |
126 | extern int kdb_register(char *, kdb_func_t, char *, char *, short); | ||
127 | extern int kdb_register_repeat(char *, kdb_func_t, char *, char *, | ||
128 | short, kdb_repeat_t); | ||
129 | extern int kdb_unregister(char *); | ||
130 | |||
131 | extern int kdb_getarea_size(void *, unsigned long, size_t); | 92 | extern int kdb_getarea_size(void *, unsigned long, size_t); |
132 | extern int kdb_putarea_size(unsigned long, void *, size_t); | 93 | extern int kdb_putarea_size(unsigned long, void *, size_t); |
133 | 94 | ||
@@ -144,6 +105,7 @@ extern int kdb_getword(unsigned long *, unsigned long, size_t); | |||
144 | extern int kdb_putword(unsigned long, unsigned long, size_t); | 105 | extern int kdb_putword(unsigned long, unsigned long, size_t); |
145 | 106 | ||
146 | extern int kdbgetularg(const char *, unsigned long *); | 107 | extern int kdbgetularg(const char *, unsigned long *); |
108 | extern int kdbgetu64arg(const char *, u64 *); | ||
147 | extern char *kdbgetenv(const char *); | 109 | extern char *kdbgetenv(const char *); |
148 | extern int kdbgetaddrarg(int, const char **, int*, unsigned long *, | 110 | extern int kdbgetaddrarg(int, const char **, int*, unsigned long *, |
149 | long *, char **); | 111 | long *, char **); |
@@ -255,14 +217,6 @@ extern void kdb_ps1(const struct task_struct *p); | |||
255 | extern void kdb_print_nameval(const char *name, unsigned long val); | 217 | extern void kdb_print_nameval(const char *name, unsigned long val); |
256 | extern void kdb_send_sig_info(struct task_struct *p, struct siginfo *info); | 218 | extern void kdb_send_sig_info(struct task_struct *p, struct siginfo *info); |
257 | extern void kdb_meminfo_proc_show(void); | 219 | extern void kdb_meminfo_proc_show(void); |
258 | #ifdef CONFIG_KALLSYMS | ||
259 | extern const char *kdb_walk_kallsyms(loff_t *pos); | ||
260 | #else /* ! CONFIG_KALLSYMS */ | ||
261 | static inline const char *kdb_walk_kallsyms(loff_t *pos) | ||
262 | { | ||
263 | return NULL; | ||
264 | } | ||
265 | #endif /* ! CONFIG_KALLSYMS */ | ||
266 | extern char *kdb_getstr(char *, size_t, char *); | 220 | extern char *kdb_getstr(char *, size_t, char *); |
267 | 221 | ||
268 | /* Defines for kdb_symbol_print */ | 222 | /* Defines for kdb_symbol_print */ |
diff --git a/kernel/exit.c b/kernel/exit.c index e2bdf37f9fde..676149a4ac5f 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
@@ -50,6 +50,7 @@ | |||
50 | #include <linux/perf_event.h> | 50 | #include <linux/perf_event.h> |
51 | #include <trace/events/sched.h> | 51 | #include <trace/events/sched.h> |
52 | #include <linux/hw_breakpoint.h> | 52 | #include <linux/hw_breakpoint.h> |
53 | #include <linux/oom.h> | ||
53 | 54 | ||
54 | #include <asm/uaccess.h> | 55 | #include <asm/uaccess.h> |
55 | #include <asm/unistd.h> | 56 | #include <asm/unistd.h> |
@@ -95,6 +96,14 @@ static void __exit_signal(struct task_struct *tsk) | |||
95 | sig->tty = NULL; | 96 | sig->tty = NULL; |
96 | } else { | 97 | } else { |
97 | /* | 98 | /* |
99 | * This can only happen if the caller is de_thread(). | ||
100 | * FIXME: this is the temporary hack, we should teach | ||
101 | * posix-cpu-timers to handle this case correctly. | ||
102 | */ | ||
103 | if (unlikely(has_group_leader_pid(tsk))) | ||
104 | posix_cpu_timers_exit_group(tsk); | ||
105 | |||
106 | /* | ||
98 | * If there is any task waiting for the group exit | 107 | * If there is any task waiting for the group exit |
99 | * then notify it: | 108 | * then notify it: |
100 | */ | 109 | */ |
@@ -687,6 +696,8 @@ static void exit_mm(struct task_struct * tsk) | |||
687 | enter_lazy_tlb(mm, current); | 696 | enter_lazy_tlb(mm, current); |
688 | /* We don't want this task to be frozen prematurely */ | 697 | /* We don't want this task to be frozen prematurely */ |
689 | clear_freeze_flag(tsk); | 698 | clear_freeze_flag(tsk); |
699 | if (tsk->signal->oom_score_adj == OOM_SCORE_ADJ_MIN) | ||
700 | atomic_dec(&mm->oom_disable_count); | ||
690 | task_unlock(tsk); | 701 | task_unlock(tsk); |
691 | mm_update_next_owner(mm); | 702 | mm_update_next_owner(mm); |
692 | mmput(mm); | 703 | mmput(mm); |
@@ -700,6 +711,8 @@ static void exit_mm(struct task_struct * tsk) | |||
700 | * space. | 711 | * space. |
701 | */ | 712 | */ |
702 | static struct task_struct *find_new_reaper(struct task_struct *father) | 713 | static struct task_struct *find_new_reaper(struct task_struct *father) |
714 | __releases(&tasklist_lock) | ||
715 | __acquires(&tasklist_lock) | ||
703 | { | 716 | { |
704 | struct pid_namespace *pid_ns = task_active_pid_ns(father); | 717 | struct pid_namespace *pid_ns = task_active_pid_ns(father); |
705 | struct task_struct *thread; | 718 | struct task_struct *thread; |
@@ -901,6 +914,15 @@ NORET_TYPE void do_exit(long code) | |||
901 | if (unlikely(!tsk->pid)) | 914 | if (unlikely(!tsk->pid)) |
902 | panic("Attempted to kill the idle task!"); | 915 | panic("Attempted to kill the idle task!"); |
903 | 916 | ||
917 | /* | ||
918 | * If do_exit is called because this processes oopsed, it's possible | ||
919 | * that get_fs() was left as KERNEL_DS, so reset it to USER_DS before | ||
920 | * continuing. Amongst other possible reasons, this is to prevent | ||
921 | * mm_release()->clear_child_tid() from writing to a user-controlled | ||
922 | * kernel address. | ||
923 | */ | ||
924 | set_fs(USER_DS); | ||
925 | |||
904 | tracehook_report_exit(&code); | 926 | tracehook_report_exit(&code); |
905 | 927 | ||
906 | validate_creds_for_do_exit(tsk); | 928 | validate_creds_for_do_exit(tsk); |
diff --git a/kernel/fork.c b/kernel/fork.c index c445f8cc408d..5447dc7defa9 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -65,6 +65,7 @@ | |||
65 | #include <linux/perf_event.h> | 65 | #include <linux/perf_event.h> |
66 | #include <linux/posix-timers.h> | 66 | #include <linux/posix-timers.h> |
67 | #include <linux/user-return-notifier.h> | 67 | #include <linux/user-return-notifier.h> |
68 | #include <linux/oom.h> | ||
68 | 69 | ||
69 | #include <asm/pgtable.h> | 70 | #include <asm/pgtable.h> |
70 | #include <asm/pgalloc.h> | 71 | #include <asm/pgalloc.h> |
@@ -272,6 +273,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig) | |||
272 | 273 | ||
273 | setup_thread_stack(tsk, orig); | 274 | setup_thread_stack(tsk, orig); |
274 | clear_user_return_notifier(tsk); | 275 | clear_user_return_notifier(tsk); |
276 | clear_tsk_need_resched(tsk); | ||
275 | stackend = end_of_stack(tsk); | 277 | stackend = end_of_stack(tsk); |
276 | *stackend = STACK_END_MAGIC; /* for overflow detection */ | 278 | *stackend = STACK_END_MAGIC; /* for overflow detection */ |
277 | 279 | ||
@@ -488,6 +490,7 @@ static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p) | |||
488 | mm->cached_hole_size = ~0UL; | 490 | mm->cached_hole_size = ~0UL; |
489 | mm_init_aio(mm); | 491 | mm_init_aio(mm); |
490 | mm_init_owner(mm, p); | 492 | mm_init_owner(mm, p); |
493 | atomic_set(&mm->oom_disable_count, 0); | ||
491 | 494 | ||
492 | if (likely(!mm_alloc_pgd(mm))) { | 495 | if (likely(!mm_alloc_pgd(mm))) { |
493 | mm->def_flags = 0; | 496 | mm->def_flags = 0; |
@@ -741,6 +744,8 @@ good_mm: | |||
741 | /* Initializing for Swap token stuff */ | 744 | /* Initializing for Swap token stuff */ |
742 | mm->token_priority = 0; | 745 | mm->token_priority = 0; |
743 | mm->last_interval = 0; | 746 | mm->last_interval = 0; |
747 | if (tsk->signal->oom_score_adj == OOM_SCORE_ADJ_MIN) | ||
748 | atomic_inc(&mm->oom_disable_count); | ||
744 | 749 | ||
745 | tsk->mm = mm; | 750 | tsk->mm = mm; |
746 | tsk->active_mm = mm; | 751 | tsk->active_mm = mm; |
@@ -904,6 +909,8 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) | |||
904 | sig->oom_adj = current->signal->oom_adj; | 909 | sig->oom_adj = current->signal->oom_adj; |
905 | sig->oom_score_adj = current->signal->oom_score_adj; | 910 | sig->oom_score_adj = current->signal->oom_score_adj; |
906 | 911 | ||
912 | mutex_init(&sig->cred_guard_mutex); | ||
913 | |||
907 | return 0; | 914 | return 0; |
908 | } | 915 | } |
909 | 916 | ||
@@ -1299,8 +1306,13 @@ bad_fork_cleanup_io: | |||
1299 | bad_fork_cleanup_namespaces: | 1306 | bad_fork_cleanup_namespaces: |
1300 | exit_task_namespaces(p); | 1307 | exit_task_namespaces(p); |
1301 | bad_fork_cleanup_mm: | 1308 | bad_fork_cleanup_mm: |
1302 | if (p->mm) | 1309 | if (p->mm) { |
1310 | task_lock(p); | ||
1311 | if (p->signal->oom_score_adj == OOM_SCORE_ADJ_MIN) | ||
1312 | atomic_dec(&p->mm->oom_disable_count); | ||
1313 | task_unlock(p); | ||
1303 | mmput(p->mm); | 1314 | mmput(p->mm); |
1315 | } | ||
1304 | bad_fork_cleanup_signal: | 1316 | bad_fork_cleanup_signal: |
1305 | if (!(clone_flags & CLONE_THREAD)) | 1317 | if (!(clone_flags & CLONE_THREAD)) |
1306 | free_signal_struct(p->signal); | 1318 | free_signal_struct(p->signal); |
@@ -1693,6 +1705,10 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags) | |||
1693 | active_mm = current->active_mm; | 1705 | active_mm = current->active_mm; |
1694 | current->mm = new_mm; | 1706 | current->mm = new_mm; |
1695 | current->active_mm = new_mm; | 1707 | current->active_mm = new_mm; |
1708 | if (current->signal->oom_score_adj == OOM_SCORE_ADJ_MIN) { | ||
1709 | atomic_dec(&mm->oom_disable_count); | ||
1710 | atomic_inc(&new_mm->oom_disable_count); | ||
1711 | } | ||
1696 | activate_mm(active_mm, new_mm); | 1712 | activate_mm(active_mm, new_mm); |
1697 | new_mm = mm; | 1713 | new_mm = mm; |
1698 | } | 1714 | } |
diff --git a/kernel/futex.c b/kernel/futex.c index a118bf160e0b..40a8777a27d0 100644 --- a/kernel/futex.c +++ b/kernel/futex.c | |||
@@ -169,7 +169,7 @@ static void get_futex_key_refs(union futex_key *key) | |||
169 | 169 | ||
170 | switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) { | 170 | switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) { |
171 | case FUT_OFF_INODE: | 171 | case FUT_OFF_INODE: |
172 | atomic_inc(&key->shared.inode->i_count); | 172 | ihold(key->shared.inode); |
173 | break; | 173 | break; |
174 | case FUT_OFF_MMSHARED: | 174 | case FUT_OFF_MMSHARED: |
175 | atomic_inc(&key->private.mm->mm_count); | 175 | atomic_inc(&key->private.mm->mm_count); |
@@ -2489,7 +2489,8 @@ void exit_robust_list(struct task_struct *curr) | |||
2489 | { | 2489 | { |
2490 | struct robust_list_head __user *head = curr->robust_list; | 2490 | struct robust_list_head __user *head = curr->robust_list; |
2491 | struct robust_list __user *entry, *next_entry, *pending; | 2491 | struct robust_list __user *entry, *next_entry, *pending; |
2492 | unsigned int limit = ROBUST_LIST_LIMIT, pi, next_pi, pip; | 2492 | unsigned int limit = ROBUST_LIST_LIMIT, pi, pip; |
2493 | unsigned int uninitialized_var(next_pi); | ||
2493 | unsigned long futex_offset; | 2494 | unsigned long futex_offset; |
2494 | int rc; | 2495 | int rc; |
2495 | 2496 | ||
diff --git a/kernel/futex_compat.c b/kernel/futex_compat.c index 06da4dfc339b..a7934ac75e5b 100644 --- a/kernel/futex_compat.c +++ b/kernel/futex_compat.c | |||
@@ -49,7 +49,8 @@ void compat_exit_robust_list(struct task_struct *curr) | |||
49 | { | 49 | { |
50 | struct compat_robust_list_head __user *head = curr->compat_robust_list; | 50 | struct compat_robust_list_head __user *head = curr->compat_robust_list; |
51 | struct robust_list __user *entry, *next_entry, *pending; | 51 | struct robust_list __user *entry, *next_entry, *pending; |
52 | unsigned int limit = ROBUST_LIST_LIMIT, pi, next_pi, pip; | 52 | unsigned int limit = ROBUST_LIST_LIMIT, pi, pip; |
53 | unsigned int uninitialized_var(next_pi); | ||
53 | compat_uptr_t uentry, next_uentry, upending; | 54 | compat_uptr_t uentry, next_uentry, upending; |
54 | compat_long_t futex_offset; | 55 | compat_long_t futex_offset; |
55 | int rc; | 56 | int rc; |
diff --git a/kernel/gcov/fs.c b/kernel/gcov/fs.c index f83972b16564..9bd0934f6c33 100644 --- a/kernel/gcov/fs.c +++ b/kernel/gcov/fs.c | |||
@@ -561,6 +561,7 @@ static ssize_t reset_read(struct file *file, char __user *addr, size_t len, | |||
561 | static const struct file_operations gcov_reset_fops = { | 561 | static const struct file_operations gcov_reset_fops = { |
562 | .write = reset_write, | 562 | .write = reset_write, |
563 | .read = reset_read, | 563 | .read = reset_read, |
564 | .llseek = noop_llseek, | ||
564 | }; | 565 | }; |
565 | 566 | ||
566 | /* | 567 | /* |
diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c index 2c9120f0afca..e5325825aeb6 100644 --- a/kernel/hw_breakpoint.c +++ b/kernel/hw_breakpoint.c | |||
@@ -620,7 +620,7 @@ static struct pmu perf_breakpoint = { | |||
620 | .read = hw_breakpoint_pmu_read, | 620 | .read = hw_breakpoint_pmu_read, |
621 | }; | 621 | }; |
622 | 622 | ||
623 | static int __init init_hw_breakpoint(void) | 623 | int __init init_hw_breakpoint(void) |
624 | { | 624 | { |
625 | unsigned int **task_bp_pinned; | 625 | unsigned int **task_bp_pinned; |
626 | int cpu, err_cpu; | 626 | int cpu, err_cpu; |
@@ -655,6 +655,5 @@ static int __init init_hw_breakpoint(void) | |||
655 | 655 | ||
656 | return -ENOMEM; | 656 | return -ENOMEM; |
657 | } | 657 | } |
658 | core_initcall(init_hw_breakpoint); | ||
659 | 658 | ||
660 | 659 | ||
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index 9d917ff72675..9988d03797f5 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c | |||
@@ -393,3 +393,18 @@ unsigned int kstat_irqs_cpu(unsigned int irq, int cpu) | |||
393 | struct irq_desc *desc = irq_to_desc(irq); | 393 | struct irq_desc *desc = irq_to_desc(irq); |
394 | return desc ? desc->kstat_irqs[cpu] : 0; | 394 | return desc ? desc->kstat_irqs[cpu] : 0; |
395 | } | 395 | } |
396 | |||
397 | #ifdef CONFIG_GENERIC_HARDIRQS | ||
398 | unsigned int kstat_irqs(unsigned int irq) | ||
399 | { | ||
400 | struct irq_desc *desc = irq_to_desc(irq); | ||
401 | int cpu; | ||
402 | int sum = 0; | ||
403 | |||
404 | if (!desc) | ||
405 | return 0; | ||
406 | for_each_possible_cpu(cpu) | ||
407 | sum += desc->kstat_irqs[cpu]; | ||
408 | return sum; | ||
409 | } | ||
410 | #endif /* CONFIG_GENERIC_HARDIRQS */ | ||
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 644e8d5fa367..5f92acc5f952 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c | |||
@@ -324,6 +324,10 @@ void enable_irq(unsigned int irq) | |||
324 | if (!desc) | 324 | if (!desc) |
325 | return; | 325 | return; |
326 | 326 | ||
327 | if (WARN(!desc->irq_data.chip || !desc->irq_data.chip->irq_enable, | ||
328 | KERN_ERR "enable_irq before setup/request_irq: irq %u\n", irq)) | ||
329 | return; | ||
330 | |||
327 | chip_bus_lock(desc); | 331 | chip_bus_lock(desc); |
328 | raw_spin_lock_irqsave(&desc->lock, flags); | 332 | raw_spin_lock_irqsave(&desc->lock, flags); |
329 | __enable_irq(desc, irq, false); | 333 | __enable_irq(desc, irq, false); |
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c index 01b1d3a88983..6c8a2a9f8a7b 100644 --- a/kernel/irq/proc.c +++ b/kernel/irq/proc.c | |||
@@ -214,7 +214,7 @@ static int irq_spurious_proc_show(struct seq_file *m, void *v) | |||
214 | 214 | ||
215 | static int irq_spurious_proc_open(struct inode *inode, struct file *file) | 215 | static int irq_spurious_proc_open(struct inode *inode, struct file *file) |
216 | { | 216 | { |
217 | return single_open(file, irq_spurious_proc_show, NULL); | 217 | return single_open(file, irq_spurious_proc_show, PDE(inode)->data); |
218 | } | 218 | } |
219 | 219 | ||
220 | static const struct file_operations irq_spurious_proc_fops = { | 220 | static const struct file_operations irq_spurious_proc_fops = { |
diff --git a/kernel/irq_work.c b/kernel/irq_work.c index f16763ff8481..90f881904bb1 100644 --- a/kernel/irq_work.c +++ b/kernel/irq_work.c | |||
@@ -145,7 +145,9 @@ void irq_work_run(void) | |||
145 | * Clear the BUSY bit and return to the free state if | 145 | * Clear the BUSY bit and return to the free state if |
146 | * no-one else claimed it meanwhile. | 146 | * no-one else claimed it meanwhile. |
147 | */ | 147 | */ |
148 | cmpxchg(&entry->next, next_flags(NULL, IRQ_WORK_BUSY), NULL); | 148 | (void)cmpxchg(&entry->next, |
149 | next_flags(NULL, IRQ_WORK_BUSY), | ||
150 | NULL); | ||
149 | } | 151 | } |
150 | } | 152 | } |
151 | EXPORT_SYMBOL_GPL(irq_work_run); | 153 | EXPORT_SYMBOL_GPL(irq_work_run); |
diff --git a/kernel/jump_label.c b/kernel/jump_label.c index 7be868bf25c6..3b79bd938330 100644 --- a/kernel/jump_label.c +++ b/kernel/jump_label.c | |||
@@ -39,6 +39,16 @@ struct jump_label_module_entry { | |||
39 | struct module *mod; | 39 | struct module *mod; |
40 | }; | 40 | }; |
41 | 41 | ||
42 | void jump_label_lock(void) | ||
43 | { | ||
44 | mutex_lock(&jump_label_mutex); | ||
45 | } | ||
46 | |||
47 | void jump_label_unlock(void) | ||
48 | { | ||
49 | mutex_unlock(&jump_label_mutex); | ||
50 | } | ||
51 | |||
42 | static int jump_label_cmp(const void *a, const void *b) | 52 | static int jump_label_cmp(const void *a, const void *b) |
43 | { | 53 | { |
44 | const struct jump_entry *jea = a; | 54 | const struct jump_entry *jea = a; |
@@ -152,7 +162,7 @@ void jump_label_update(unsigned long key, enum jump_label_type type) | |||
152 | struct jump_label_module_entry *e_module; | 162 | struct jump_label_module_entry *e_module; |
153 | int count; | 163 | int count; |
154 | 164 | ||
155 | mutex_lock(&jump_label_mutex); | 165 | jump_label_lock(); |
156 | entry = get_jump_label_entry((jump_label_t)key); | 166 | entry = get_jump_label_entry((jump_label_t)key); |
157 | if (entry) { | 167 | if (entry) { |
158 | count = entry->nr_entries; | 168 | count = entry->nr_entries; |
@@ -168,13 +178,14 @@ void jump_label_update(unsigned long key, enum jump_label_type type) | |||
168 | count = e_module->nr_entries; | 178 | count = e_module->nr_entries; |
169 | iter = e_module->table; | 179 | iter = e_module->table; |
170 | while (count--) { | 180 | while (count--) { |
171 | if (kernel_text_address(iter->code)) | 181 | if (iter->key && |
182 | kernel_text_address(iter->code)) | ||
172 | arch_jump_label_transform(iter, type); | 183 | arch_jump_label_transform(iter, type); |
173 | iter++; | 184 | iter++; |
174 | } | 185 | } |
175 | } | 186 | } |
176 | } | 187 | } |
177 | mutex_unlock(&jump_label_mutex); | 188 | jump_label_unlock(); |
178 | } | 189 | } |
179 | 190 | ||
180 | static int addr_conflict(struct jump_entry *entry, void *start, void *end) | 191 | static int addr_conflict(struct jump_entry *entry, void *start, void *end) |
@@ -231,6 +242,7 @@ out: | |||
231 | * overlaps with any of the jump label patch addresses. Code | 242 | * overlaps with any of the jump label patch addresses. Code |
232 | * that wants to modify kernel text should first verify that | 243 | * that wants to modify kernel text should first verify that |
233 | * it does not overlap with any of the jump label addresses. | 244 | * it does not overlap with any of the jump label addresses. |
245 | * Caller must hold jump_label_mutex. | ||
234 | * | 246 | * |
235 | * returns 1 if there is an overlap, 0 otherwise | 247 | * returns 1 if there is an overlap, 0 otherwise |
236 | */ | 248 | */ |
@@ -241,7 +253,6 @@ int jump_label_text_reserved(void *start, void *end) | |||
241 | struct jump_entry *iter_stop = __start___jump_table; | 253 | struct jump_entry *iter_stop = __start___jump_table; |
242 | int conflict = 0; | 254 | int conflict = 0; |
243 | 255 | ||
244 | mutex_lock(&jump_label_mutex); | ||
245 | iter = iter_start; | 256 | iter = iter_start; |
246 | while (iter < iter_stop) { | 257 | while (iter < iter_stop) { |
247 | if (addr_conflict(iter, start, end)) { | 258 | if (addr_conflict(iter, start, end)) { |
@@ -256,10 +267,16 @@ int jump_label_text_reserved(void *start, void *end) | |||
256 | conflict = module_conflict(start, end); | 267 | conflict = module_conflict(start, end); |
257 | #endif | 268 | #endif |
258 | out: | 269 | out: |
259 | mutex_unlock(&jump_label_mutex); | ||
260 | return conflict; | 270 | return conflict; |
261 | } | 271 | } |
262 | 272 | ||
273 | /* | ||
274 | * Not all archs need this. | ||
275 | */ | ||
276 | void __weak arch_jump_label_text_poke_early(jump_label_t addr) | ||
277 | { | ||
278 | } | ||
279 | |||
263 | static __init int init_jump_label(void) | 280 | static __init int init_jump_label(void) |
264 | { | 281 | { |
265 | int ret; | 282 | int ret; |
@@ -267,7 +284,7 @@ static __init int init_jump_label(void) | |||
267 | struct jump_entry *iter_stop = __stop___jump_table; | 284 | struct jump_entry *iter_stop = __stop___jump_table; |
268 | struct jump_entry *iter; | 285 | struct jump_entry *iter; |
269 | 286 | ||
270 | mutex_lock(&jump_label_mutex); | 287 | jump_label_lock(); |
271 | ret = build_jump_label_hashtable(__start___jump_table, | 288 | ret = build_jump_label_hashtable(__start___jump_table, |
272 | __stop___jump_table); | 289 | __stop___jump_table); |
273 | iter = iter_start; | 290 | iter = iter_start; |
@@ -275,7 +292,7 @@ static __init int init_jump_label(void) | |||
275 | arch_jump_label_text_poke_early(iter->code); | 292 | arch_jump_label_text_poke_early(iter->code); |
276 | iter++; | 293 | iter++; |
277 | } | 294 | } |
278 | mutex_unlock(&jump_label_mutex); | 295 | jump_label_unlock(); |
279 | return ret; | 296 | return ret; |
280 | } | 297 | } |
281 | early_initcall(init_jump_label); | 298 | early_initcall(init_jump_label); |
@@ -366,6 +383,39 @@ static void remove_jump_label_module(struct module *mod) | |||
366 | } | 383 | } |
367 | } | 384 | } |
368 | 385 | ||
386 | static void remove_jump_label_module_init(struct module *mod) | ||
387 | { | ||
388 | struct hlist_head *head; | ||
389 | struct hlist_node *node, *node_next, *module_node, *module_node_next; | ||
390 | struct jump_label_entry *e; | ||
391 | struct jump_label_module_entry *e_module; | ||
392 | struct jump_entry *iter; | ||
393 | int i, count; | ||
394 | |||
395 | /* if the module doesn't have jump label entries, just return */ | ||
396 | if (!mod->num_jump_entries) | ||
397 | return; | ||
398 | |||
399 | for (i = 0; i < JUMP_LABEL_TABLE_SIZE; i++) { | ||
400 | head = &jump_label_table[i]; | ||
401 | hlist_for_each_entry_safe(e, node, node_next, head, hlist) { | ||
402 | hlist_for_each_entry_safe(e_module, module_node, | ||
403 | module_node_next, | ||
404 | &(e->modules), hlist) { | ||
405 | if (e_module->mod != mod) | ||
406 | continue; | ||
407 | count = e_module->nr_entries; | ||
408 | iter = e_module->table; | ||
409 | while (count--) { | ||
410 | if (within_module_init(iter->code, mod)) | ||
411 | iter->key = 0; | ||
412 | iter++; | ||
413 | } | ||
414 | } | ||
415 | } | ||
416 | } | ||
417 | } | ||
418 | |||
369 | static int | 419 | static int |
370 | jump_label_module_notify(struct notifier_block *self, unsigned long val, | 420 | jump_label_module_notify(struct notifier_block *self, unsigned long val, |
371 | void *data) | 421 | void *data) |
@@ -375,16 +425,21 @@ jump_label_module_notify(struct notifier_block *self, unsigned long val, | |||
375 | 425 | ||
376 | switch (val) { | 426 | switch (val) { |
377 | case MODULE_STATE_COMING: | 427 | case MODULE_STATE_COMING: |
378 | mutex_lock(&jump_label_mutex); | 428 | jump_label_lock(); |
379 | ret = add_jump_label_module(mod); | 429 | ret = add_jump_label_module(mod); |
380 | if (ret) | 430 | if (ret) |
381 | remove_jump_label_module(mod); | 431 | remove_jump_label_module(mod); |
382 | mutex_unlock(&jump_label_mutex); | 432 | jump_label_unlock(); |
383 | break; | 433 | break; |
384 | case MODULE_STATE_GOING: | 434 | case MODULE_STATE_GOING: |
385 | mutex_lock(&jump_label_mutex); | 435 | jump_label_lock(); |
386 | remove_jump_label_module(mod); | 436 | remove_jump_label_module(mod); |
387 | mutex_unlock(&jump_label_mutex); | 437 | jump_label_unlock(); |
438 | break; | ||
439 | case MODULE_STATE_LIVE: | ||
440 | jump_label_lock(); | ||
441 | remove_jump_label_module_init(mod); | ||
442 | jump_label_unlock(); | ||
388 | break; | 443 | break; |
389 | } | 444 | } |
390 | return ret; | 445 | return ret; |
diff --git a/kernel/kexec.c b/kernel/kexec.c index c0613f7d6730..b55045bc7563 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c | |||
@@ -816,7 +816,7 @@ static int kimage_load_normal_segment(struct kimage *image, | |||
816 | 816 | ||
817 | ptr = kmap(page); | 817 | ptr = kmap(page); |
818 | /* Start with a clear page */ | 818 | /* Start with a clear page */ |
819 | memset(ptr, 0, PAGE_SIZE); | 819 | clear_page(ptr); |
820 | ptr += maddr & ~PAGE_MASK; | 820 | ptr += maddr & ~PAGE_MASK; |
821 | mchunk = PAGE_SIZE - (maddr & ~PAGE_MASK); | 821 | mchunk = PAGE_SIZE - (maddr & ~PAGE_MASK); |
822 | if (mchunk > mbytes) | 822 | if (mchunk > mbytes) |
diff --git a/kernel/kprobes.c b/kernel/kprobes.c index ec4210c6501e..9737a76e106f 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c | |||
@@ -74,7 +74,8 @@ static struct hlist_head kretprobe_inst_table[KPROBE_TABLE_SIZE]; | |||
74 | /* NOTE: change this value only with kprobe_mutex held */ | 74 | /* NOTE: change this value only with kprobe_mutex held */ |
75 | static bool kprobes_all_disarmed; | 75 | static bool kprobes_all_disarmed; |
76 | 76 | ||
77 | static DEFINE_MUTEX(kprobe_mutex); /* Protects kprobe_table */ | 77 | /* This protects kprobe_table and optimizing_list */ |
78 | static DEFINE_MUTEX(kprobe_mutex); | ||
78 | static DEFINE_PER_CPU(struct kprobe *, kprobe_instance) = NULL; | 79 | static DEFINE_PER_CPU(struct kprobe *, kprobe_instance) = NULL; |
79 | static struct { | 80 | static struct { |
80 | spinlock_t lock ____cacheline_aligned_in_smp; | 81 | spinlock_t lock ____cacheline_aligned_in_smp; |
@@ -595,6 +596,7 @@ static __kprobes void try_to_optimize_kprobe(struct kprobe *p) | |||
595 | } | 596 | } |
596 | 597 | ||
597 | #ifdef CONFIG_SYSCTL | 598 | #ifdef CONFIG_SYSCTL |
599 | /* This should be called with kprobe_mutex locked */ | ||
598 | static void __kprobes optimize_all_kprobes(void) | 600 | static void __kprobes optimize_all_kprobes(void) |
599 | { | 601 | { |
600 | struct hlist_head *head; | 602 | struct hlist_head *head; |
@@ -607,17 +609,16 @@ static void __kprobes optimize_all_kprobes(void) | |||
607 | return; | 609 | return; |
608 | 610 | ||
609 | kprobes_allow_optimization = true; | 611 | kprobes_allow_optimization = true; |
610 | mutex_lock(&text_mutex); | ||
611 | for (i = 0; i < KPROBE_TABLE_SIZE; i++) { | 612 | for (i = 0; i < KPROBE_TABLE_SIZE; i++) { |
612 | head = &kprobe_table[i]; | 613 | head = &kprobe_table[i]; |
613 | hlist_for_each_entry_rcu(p, node, head, hlist) | 614 | hlist_for_each_entry_rcu(p, node, head, hlist) |
614 | if (!kprobe_disabled(p)) | 615 | if (!kprobe_disabled(p)) |
615 | optimize_kprobe(p); | 616 | optimize_kprobe(p); |
616 | } | 617 | } |
617 | mutex_unlock(&text_mutex); | ||
618 | printk(KERN_INFO "Kprobes globally optimized\n"); | 618 | printk(KERN_INFO "Kprobes globally optimized\n"); |
619 | } | 619 | } |
620 | 620 | ||
621 | /* This should be called with kprobe_mutex locked */ | ||
621 | static void __kprobes unoptimize_all_kprobes(void) | 622 | static void __kprobes unoptimize_all_kprobes(void) |
622 | { | 623 | { |
623 | struct hlist_head *head; | 624 | struct hlist_head *head; |
@@ -1144,14 +1145,13 @@ int __kprobes register_kprobe(struct kprobe *p) | |||
1144 | if (ret) | 1145 | if (ret) |
1145 | return ret; | 1146 | return ret; |
1146 | 1147 | ||
1148 | jump_label_lock(); | ||
1147 | preempt_disable(); | 1149 | preempt_disable(); |
1148 | if (!kernel_text_address((unsigned long) p->addr) || | 1150 | if (!kernel_text_address((unsigned long) p->addr) || |
1149 | in_kprobes_functions((unsigned long) p->addr) || | 1151 | in_kprobes_functions((unsigned long) p->addr) || |
1150 | ftrace_text_reserved(p->addr, p->addr) || | 1152 | ftrace_text_reserved(p->addr, p->addr) || |
1151 | jump_label_text_reserved(p->addr, p->addr)) { | 1153 | jump_label_text_reserved(p->addr, p->addr)) |
1152 | preempt_enable(); | 1154 | goto fail_with_jump_label; |
1153 | return -EINVAL; | ||
1154 | } | ||
1155 | 1155 | ||
1156 | /* User can pass only KPROBE_FLAG_DISABLED to register_kprobe */ | 1156 | /* User can pass only KPROBE_FLAG_DISABLED to register_kprobe */ |
1157 | p->flags &= KPROBE_FLAG_DISABLED; | 1157 | p->flags &= KPROBE_FLAG_DISABLED; |
@@ -1165,10 +1165,9 @@ int __kprobes register_kprobe(struct kprobe *p) | |||
1165 | * We must hold a refcount of the probed module while updating | 1165 | * We must hold a refcount of the probed module while updating |
1166 | * its code to prohibit unexpected unloading. | 1166 | * its code to prohibit unexpected unloading. |
1167 | */ | 1167 | */ |
1168 | if (unlikely(!try_module_get(probed_mod))) { | 1168 | if (unlikely(!try_module_get(probed_mod))) |
1169 | preempt_enable(); | 1169 | goto fail_with_jump_label; |
1170 | return -EINVAL; | 1170 | |
1171 | } | ||
1172 | /* | 1171 | /* |
1173 | * If the module freed .init.text, we couldn't insert | 1172 | * If the module freed .init.text, we couldn't insert |
1174 | * kprobes in there. | 1173 | * kprobes in there. |
@@ -1176,16 +1175,18 @@ int __kprobes register_kprobe(struct kprobe *p) | |||
1176 | if (within_module_init((unsigned long)p->addr, probed_mod) && | 1175 | if (within_module_init((unsigned long)p->addr, probed_mod) && |
1177 | probed_mod->state != MODULE_STATE_COMING) { | 1176 | probed_mod->state != MODULE_STATE_COMING) { |
1178 | module_put(probed_mod); | 1177 | module_put(probed_mod); |
1179 | preempt_enable(); | 1178 | goto fail_with_jump_label; |
1180 | return -EINVAL; | ||
1181 | } | 1179 | } |
1182 | } | 1180 | } |
1183 | preempt_enable(); | 1181 | preempt_enable(); |
1182 | jump_label_unlock(); | ||
1184 | 1183 | ||
1185 | p->nmissed = 0; | 1184 | p->nmissed = 0; |
1186 | INIT_LIST_HEAD(&p->list); | 1185 | INIT_LIST_HEAD(&p->list); |
1187 | mutex_lock(&kprobe_mutex); | 1186 | mutex_lock(&kprobe_mutex); |
1188 | 1187 | ||
1188 | jump_label_lock(); /* needed to call jump_label_text_reserved() */ | ||
1189 | |||
1189 | get_online_cpus(); /* For avoiding text_mutex deadlock. */ | 1190 | get_online_cpus(); /* For avoiding text_mutex deadlock. */ |
1190 | mutex_lock(&text_mutex); | 1191 | mutex_lock(&text_mutex); |
1191 | 1192 | ||
@@ -1213,12 +1214,18 @@ int __kprobes register_kprobe(struct kprobe *p) | |||
1213 | out: | 1214 | out: |
1214 | mutex_unlock(&text_mutex); | 1215 | mutex_unlock(&text_mutex); |
1215 | put_online_cpus(); | 1216 | put_online_cpus(); |
1217 | jump_label_unlock(); | ||
1216 | mutex_unlock(&kprobe_mutex); | 1218 | mutex_unlock(&kprobe_mutex); |
1217 | 1219 | ||
1218 | if (probed_mod) | 1220 | if (probed_mod) |
1219 | module_put(probed_mod); | 1221 | module_put(probed_mod); |
1220 | 1222 | ||
1221 | return ret; | 1223 | return ret; |
1224 | |||
1225 | fail_with_jump_label: | ||
1226 | preempt_enable(); | ||
1227 | jump_label_unlock(); | ||
1228 | return -EINVAL; | ||
1222 | } | 1229 | } |
1223 | EXPORT_SYMBOL_GPL(register_kprobe); | 1230 | EXPORT_SYMBOL_GPL(register_kprobe); |
1224 | 1231 | ||
@@ -2000,6 +2007,7 @@ static ssize_t write_enabled_file_bool(struct file *file, | |||
2000 | static const struct file_operations fops_kp = { | 2007 | static const struct file_operations fops_kp = { |
2001 | .read = read_enabled_file_bool, | 2008 | .read = read_enabled_file_bool, |
2002 | .write = write_enabled_file_bool, | 2009 | .write = write_enabled_file_bool, |
2010 | .llseek = default_llseek, | ||
2003 | }; | 2011 | }; |
2004 | 2012 | ||
2005 | static int __kprobes debugfs_kprobe_init(void) | 2013 | static int __kprobes debugfs_kprobe_init(void) |
diff --git a/kernel/kthread.c b/kernel/kthread.c index 2dc3786349d1..ca61bbdd44b2 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c | |||
@@ -265,6 +265,17 @@ int kthreadd(void *unused) | |||
265 | return 0; | 265 | return 0; |
266 | } | 266 | } |
267 | 267 | ||
268 | void __init_kthread_worker(struct kthread_worker *worker, | ||
269 | const char *name, | ||
270 | struct lock_class_key *key) | ||
271 | { | ||
272 | spin_lock_init(&worker->lock); | ||
273 | lockdep_set_class_and_name(&worker->lock, key, name); | ||
274 | INIT_LIST_HEAD(&worker->work_list); | ||
275 | worker->task = NULL; | ||
276 | } | ||
277 | EXPORT_SYMBOL_GPL(__init_kthread_worker); | ||
278 | |||
268 | /** | 279 | /** |
269 | * kthread_worker_fn - kthread function to process kthread_worker | 280 | * kthread_worker_fn - kthread function to process kthread_worker |
270 | * @worker_ptr: pointer to initialized kthread_worker | 281 | * @worker_ptr: pointer to initialized kthread_worker |
diff --git a/kernel/latencytop.c b/kernel/latencytop.c index 877fb306d415..17110a4a4fc2 100644 --- a/kernel/latencytop.c +++ b/kernel/latencytop.c | |||
@@ -194,14 +194,7 @@ __account_scheduler_latency(struct task_struct *tsk, int usecs, int inter) | |||
194 | 194 | ||
195 | account_global_scheduler_latency(tsk, &lat); | 195 | account_global_scheduler_latency(tsk, &lat); |
196 | 196 | ||
197 | /* | 197 | for (i = 0; i < tsk->latency_record_count; i++) { |
198 | * short term hack; if we're > 32 we stop; future we recycle: | ||
199 | */ | ||
200 | tsk->latency_record_count++; | ||
201 | if (tsk->latency_record_count >= LT_SAVECOUNT) | ||
202 | goto out_unlock; | ||
203 | |||
204 | for (i = 0; i < LT_SAVECOUNT; i++) { | ||
205 | struct latency_record *mylat; | 198 | struct latency_record *mylat; |
206 | int same = 1; | 199 | int same = 1; |
207 | 200 | ||
@@ -227,8 +220,14 @@ __account_scheduler_latency(struct task_struct *tsk, int usecs, int inter) | |||
227 | } | 220 | } |
228 | } | 221 | } |
229 | 222 | ||
223 | /* | ||
224 | * short term hack; if we're > 32 we stop; future we recycle: | ||
225 | */ | ||
226 | if (tsk->latency_record_count >= LT_SAVECOUNT) | ||
227 | goto out_unlock; | ||
228 | |||
230 | /* Allocated a new one: */ | 229 | /* Allocated a new one: */ |
231 | i = tsk->latency_record_count; | 230 | i = tsk->latency_record_count++; |
232 | memcpy(&tsk->latency_record[i], &lat, sizeof(struct latency_record)); | 231 | memcpy(&tsk->latency_record[i], &lat, sizeof(struct latency_record)); |
233 | 232 | ||
234 | out_unlock: | 233 | out_unlock: |
diff --git a/kernel/module.c b/kernel/module.c index 2df46301a7a4..d190664f25ff 100644 --- a/kernel/module.c +++ b/kernel/module.c | |||
@@ -2037,7 +2037,7 @@ static inline void layout_symtab(struct module *mod, struct load_info *info) | |||
2037 | { | 2037 | { |
2038 | } | 2038 | } |
2039 | 2039 | ||
2040 | static void add_kallsyms(struct module *mod, struct load_info *info) | 2040 | static void add_kallsyms(struct module *mod, const struct load_info *info) |
2041 | { | 2041 | { |
2042 | } | 2042 | } |
2043 | #endif /* CONFIG_KALLSYMS */ | 2043 | #endif /* CONFIG_KALLSYMS */ |
@@ -2326,6 +2326,18 @@ static void find_module_sections(struct module *mod, struct load_info *info) | |||
2326 | kmemleak_scan_area(mod->trace_events, sizeof(*mod->trace_events) * | 2326 | kmemleak_scan_area(mod->trace_events, sizeof(*mod->trace_events) * |
2327 | mod->num_trace_events, GFP_KERNEL); | 2327 | mod->num_trace_events, GFP_KERNEL); |
2328 | #endif | 2328 | #endif |
2329 | #ifdef CONFIG_TRACING | ||
2330 | mod->trace_bprintk_fmt_start = section_objs(info, "__trace_printk_fmt", | ||
2331 | sizeof(*mod->trace_bprintk_fmt_start), | ||
2332 | &mod->num_trace_bprintk_fmt); | ||
2333 | /* | ||
2334 | * This section contains pointers to allocated objects in the trace | ||
2335 | * code and not scanning it leads to false positives. | ||
2336 | */ | ||
2337 | kmemleak_scan_area(mod->trace_bprintk_fmt_start, | ||
2338 | sizeof(*mod->trace_bprintk_fmt_start) * | ||
2339 | mod->num_trace_bprintk_fmt, GFP_KERNEL); | ||
2340 | #endif | ||
2329 | #ifdef CONFIG_FTRACE_MCOUNT_RECORD | 2341 | #ifdef CONFIG_FTRACE_MCOUNT_RECORD |
2330 | /* sechdrs[0].sh_size is always zero */ | 2342 | /* sechdrs[0].sh_size is always zero */ |
2331 | mod->ftrace_callsites = section_objs(info, "__mcount_loc", | 2343 | mod->ftrace_callsites = section_objs(info, "__mcount_loc", |
diff --git a/kernel/ns_cgroup.c b/kernel/ns_cgroup.c index 2a5dfec8efe0..2c98ad94ba0e 100644 --- a/kernel/ns_cgroup.c +++ b/kernel/ns_cgroup.c | |||
@@ -85,6 +85,14 @@ static struct cgroup_subsys_state *ns_create(struct cgroup_subsys *ss, | |||
85 | return ERR_PTR(-EPERM); | 85 | return ERR_PTR(-EPERM); |
86 | if (!cgroup_is_descendant(cgroup, current)) | 86 | if (!cgroup_is_descendant(cgroup, current)) |
87 | return ERR_PTR(-EPERM); | 87 | return ERR_PTR(-EPERM); |
88 | if (test_bit(CGRP_CLONE_CHILDREN, &cgroup->flags)) { | ||
89 | printk("ns_cgroup can't be created with parent " | ||
90 | "'clone_children' set.\n"); | ||
91 | return ERR_PTR(-EINVAL); | ||
92 | } | ||
93 | |||
94 | printk_once("ns_cgroup deprecated: consider using the " | ||
95 | "'clone_children' flag without the ns_cgroup.\n"); | ||
88 | 96 | ||
89 | ns_cgroup = kzalloc(sizeof(*ns_cgroup), GFP_KERNEL); | 97 | ns_cgroup = kzalloc(sizeof(*ns_cgroup), GFP_KERNEL); |
90 | if (!ns_cgroup) | 98 | if (!ns_cgroup) |
diff --git a/kernel/perf_event.c b/kernel/perf_event.c index f309e8014c78..2870feee81dd 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c | |||
@@ -31,6 +31,7 @@ | |||
31 | #include <linux/kernel_stat.h> | 31 | #include <linux/kernel_stat.h> |
32 | #include <linux/perf_event.h> | 32 | #include <linux/perf_event.h> |
33 | #include <linux/ftrace_event.h> | 33 | #include <linux/ftrace_event.h> |
34 | #include <linux/hw_breakpoint.h> | ||
34 | 35 | ||
35 | #include <asm/irq_regs.h> | 36 | #include <asm/irq_regs.h> |
36 | 37 | ||
@@ -417,8 +418,8 @@ event_filter_match(struct perf_event *event) | |||
417 | return event->cpu == -1 || event->cpu == smp_processor_id(); | 418 | return event->cpu == -1 || event->cpu == smp_processor_id(); |
418 | } | 419 | } |
419 | 420 | ||
420 | static int | 421 | static void |
421 | __event_sched_out(struct perf_event *event, | 422 | event_sched_out(struct perf_event *event, |
422 | struct perf_cpu_context *cpuctx, | 423 | struct perf_cpu_context *cpuctx, |
423 | struct perf_event_context *ctx) | 424 | struct perf_event_context *ctx) |
424 | { | 425 | { |
@@ -437,13 +438,14 @@ __event_sched_out(struct perf_event *event, | |||
437 | } | 438 | } |
438 | 439 | ||
439 | if (event->state != PERF_EVENT_STATE_ACTIVE) | 440 | if (event->state != PERF_EVENT_STATE_ACTIVE) |
440 | return 0; | 441 | return; |
441 | 442 | ||
442 | event->state = PERF_EVENT_STATE_INACTIVE; | 443 | event->state = PERF_EVENT_STATE_INACTIVE; |
443 | if (event->pending_disable) { | 444 | if (event->pending_disable) { |
444 | event->pending_disable = 0; | 445 | event->pending_disable = 0; |
445 | event->state = PERF_EVENT_STATE_OFF; | 446 | event->state = PERF_EVENT_STATE_OFF; |
446 | } | 447 | } |
448 | event->tstamp_stopped = ctx->time; | ||
447 | event->pmu->del(event, 0); | 449 | event->pmu->del(event, 0); |
448 | event->oncpu = -1; | 450 | event->oncpu = -1; |
449 | 451 | ||
@@ -452,19 +454,6 @@ __event_sched_out(struct perf_event *event, | |||
452 | ctx->nr_active--; | 454 | ctx->nr_active--; |
453 | if (event->attr.exclusive || !cpuctx->active_oncpu) | 455 | if (event->attr.exclusive || !cpuctx->active_oncpu) |
454 | cpuctx->exclusive = 0; | 456 | cpuctx->exclusive = 0; |
455 | return 1; | ||
456 | } | ||
457 | |||
458 | static void | ||
459 | event_sched_out(struct perf_event *event, | ||
460 | struct perf_cpu_context *cpuctx, | ||
461 | struct perf_event_context *ctx) | ||
462 | { | ||
463 | int ret; | ||
464 | |||
465 | ret = __event_sched_out(event, cpuctx, ctx); | ||
466 | if (ret) | ||
467 | event->tstamp_stopped = ctx->time; | ||
468 | } | 457 | } |
469 | 458 | ||
470 | static void | 459 | static void |
@@ -664,7 +653,7 @@ retry: | |||
664 | } | 653 | } |
665 | 654 | ||
666 | static int | 655 | static int |
667 | __event_sched_in(struct perf_event *event, | 656 | event_sched_in(struct perf_event *event, |
668 | struct perf_cpu_context *cpuctx, | 657 | struct perf_cpu_context *cpuctx, |
669 | struct perf_event_context *ctx) | 658 | struct perf_event_context *ctx) |
670 | { | 659 | { |
@@ -684,6 +673,10 @@ __event_sched_in(struct perf_event *event, | |||
684 | return -EAGAIN; | 673 | return -EAGAIN; |
685 | } | 674 | } |
686 | 675 | ||
676 | event->tstamp_running += ctx->time - event->tstamp_stopped; | ||
677 | |||
678 | event->shadow_ctx_time = ctx->time - ctx->timestamp; | ||
679 | |||
687 | if (!is_software_event(event)) | 680 | if (!is_software_event(event)) |
688 | cpuctx->active_oncpu++; | 681 | cpuctx->active_oncpu++; |
689 | ctx->nr_active++; | 682 | ctx->nr_active++; |
@@ -694,35 +687,6 @@ __event_sched_in(struct perf_event *event, | |||
694 | return 0; | 687 | return 0; |
695 | } | 688 | } |
696 | 689 | ||
697 | static inline int | ||
698 | event_sched_in(struct perf_event *event, | ||
699 | struct perf_cpu_context *cpuctx, | ||
700 | struct perf_event_context *ctx) | ||
701 | { | ||
702 | int ret = __event_sched_in(event, cpuctx, ctx); | ||
703 | if (ret) | ||
704 | return ret; | ||
705 | event->tstamp_running += ctx->time - event->tstamp_stopped; | ||
706 | return 0; | ||
707 | } | ||
708 | |||
709 | static void | ||
710 | group_commit_event_sched_in(struct perf_event *group_event, | ||
711 | struct perf_cpu_context *cpuctx, | ||
712 | struct perf_event_context *ctx) | ||
713 | { | ||
714 | struct perf_event *event; | ||
715 | u64 now = ctx->time; | ||
716 | |||
717 | group_event->tstamp_running += now - group_event->tstamp_stopped; | ||
718 | /* | ||
719 | * Schedule in siblings as one group (if any): | ||
720 | */ | ||
721 | list_for_each_entry(event, &group_event->sibling_list, group_entry) { | ||
722 | event->tstamp_running += now - event->tstamp_stopped; | ||
723 | } | ||
724 | } | ||
725 | |||
726 | static int | 690 | static int |
727 | group_sched_in(struct perf_event *group_event, | 691 | group_sched_in(struct perf_event *group_event, |
728 | struct perf_cpu_context *cpuctx, | 692 | struct perf_cpu_context *cpuctx, |
@@ -730,19 +694,15 @@ group_sched_in(struct perf_event *group_event, | |||
730 | { | 694 | { |
731 | struct perf_event *event, *partial_group = NULL; | 695 | struct perf_event *event, *partial_group = NULL; |
732 | struct pmu *pmu = group_event->pmu; | 696 | struct pmu *pmu = group_event->pmu; |
697 | u64 now = ctx->time; | ||
698 | bool simulate = false; | ||
733 | 699 | ||
734 | if (group_event->state == PERF_EVENT_STATE_OFF) | 700 | if (group_event->state == PERF_EVENT_STATE_OFF) |
735 | return 0; | 701 | return 0; |
736 | 702 | ||
737 | pmu->start_txn(pmu); | 703 | pmu->start_txn(pmu); |
738 | 704 | ||
739 | /* | 705 | if (event_sched_in(group_event, cpuctx, ctx)) { |
740 | * use __event_sched_in() to delay updating tstamp_running | ||
741 | * until the transaction is committed. In case of failure | ||
742 | * we will keep an unmodified tstamp_running which is a | ||
743 | * requirement to get correct timing information | ||
744 | */ | ||
745 | if (__event_sched_in(group_event, cpuctx, ctx)) { | ||
746 | pmu->cancel_txn(pmu); | 706 | pmu->cancel_txn(pmu); |
747 | return -EAGAIN; | 707 | return -EAGAIN; |
748 | } | 708 | } |
@@ -751,31 +711,42 @@ group_sched_in(struct perf_event *group_event, | |||
751 | * Schedule in siblings as one group (if any): | 711 | * Schedule in siblings as one group (if any): |
752 | */ | 712 | */ |
753 | list_for_each_entry(event, &group_event->sibling_list, group_entry) { | 713 | list_for_each_entry(event, &group_event->sibling_list, group_entry) { |
754 | if (__event_sched_in(event, cpuctx, ctx)) { | 714 | if (event_sched_in(event, cpuctx, ctx)) { |
755 | partial_group = event; | 715 | partial_group = event; |
756 | goto group_error; | 716 | goto group_error; |
757 | } | 717 | } |
758 | } | 718 | } |
759 | 719 | ||
760 | if (!pmu->commit_txn(pmu)) { | 720 | if (!pmu->commit_txn(pmu)) |
761 | /* commit tstamp_running */ | ||
762 | group_commit_event_sched_in(group_event, cpuctx, ctx); | ||
763 | return 0; | 721 | return 0; |
764 | } | 722 | |
765 | group_error: | 723 | group_error: |
766 | /* | 724 | /* |
767 | * Groups can be scheduled in as one unit only, so undo any | 725 | * Groups can be scheduled in as one unit only, so undo any |
768 | * partial group before returning: | 726 | * partial group before returning: |
727 | * The events up to the failed event are scheduled out normally, | ||
728 | * tstamp_stopped will be updated. | ||
769 | * | 729 | * |
770 | * use __event_sched_out() to avoid updating tstamp_stopped | 730 | * The failed events and the remaining siblings need to have |
771 | * because the event never actually ran | 731 | * their timings updated as if they had gone thru event_sched_in() |
732 | * and event_sched_out(). This is required to get consistent timings | ||
733 | * across the group. This also takes care of the case where the group | ||
734 | * could never be scheduled by ensuring tstamp_stopped is set to mark | ||
735 | * the time the event was actually stopped, such that time delta | ||
736 | * calculation in update_event_times() is correct. | ||
772 | */ | 737 | */ |
773 | list_for_each_entry(event, &group_event->sibling_list, group_entry) { | 738 | list_for_each_entry(event, &group_event->sibling_list, group_entry) { |
774 | if (event == partial_group) | 739 | if (event == partial_group) |
775 | break; | 740 | simulate = true; |
776 | __event_sched_out(event, cpuctx, ctx); | 741 | |
742 | if (simulate) { | ||
743 | event->tstamp_running += now - event->tstamp_stopped; | ||
744 | event->tstamp_stopped = now; | ||
745 | } else { | ||
746 | event_sched_out(event, cpuctx, ctx); | ||
747 | } | ||
777 | } | 748 | } |
778 | __event_sched_out(group_event, cpuctx, ctx); | 749 | event_sched_out(group_event, cpuctx, ctx); |
779 | 750 | ||
780 | pmu->cancel_txn(pmu); | 751 | pmu->cancel_txn(pmu); |
781 | 752 | ||
@@ -1316,8 +1287,6 @@ void __perf_event_task_sched_out(struct task_struct *task, | |||
1316 | { | 1287 | { |
1317 | int ctxn; | 1288 | int ctxn; |
1318 | 1289 | ||
1319 | perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 1, NULL, 0); | ||
1320 | |||
1321 | for_each_task_context_nr(ctxn) | 1290 | for_each_task_context_nr(ctxn) |
1322 | perf_event_context_sched_out(task, ctxn, next); | 1291 | perf_event_context_sched_out(task, ctxn, next); |
1323 | } | 1292 | } |
@@ -1651,8 +1620,12 @@ static void rotate_ctx(struct perf_event_context *ctx) | |||
1651 | { | 1620 | { |
1652 | raw_spin_lock(&ctx->lock); | 1621 | raw_spin_lock(&ctx->lock); |
1653 | 1622 | ||
1654 | /* Rotate the first entry last of non-pinned groups */ | 1623 | /* |
1655 | list_rotate_left(&ctx->flexible_groups); | 1624 | * Rotate the first entry last of non-pinned groups. Rotation might be |
1625 | * disabled by the inheritance code. | ||
1626 | */ | ||
1627 | if (!ctx->rotate_disable) | ||
1628 | list_rotate_left(&ctx->flexible_groups); | ||
1656 | 1629 | ||
1657 | raw_spin_unlock(&ctx->lock); | 1630 | raw_spin_unlock(&ctx->lock); |
1658 | } | 1631 | } |
@@ -2264,11 +2237,6 @@ int perf_event_release_kernel(struct perf_event *event) | |||
2264 | raw_spin_unlock_irq(&ctx->lock); | 2237 | raw_spin_unlock_irq(&ctx->lock); |
2265 | mutex_unlock(&ctx->mutex); | 2238 | mutex_unlock(&ctx->mutex); |
2266 | 2239 | ||
2267 | mutex_lock(&event->owner->perf_event_mutex); | ||
2268 | list_del_init(&event->owner_entry); | ||
2269 | mutex_unlock(&event->owner->perf_event_mutex); | ||
2270 | put_task_struct(event->owner); | ||
2271 | |||
2272 | free_event(event); | 2240 | free_event(event); |
2273 | 2241 | ||
2274 | return 0; | 2242 | return 0; |
@@ -2281,9 +2249,43 @@ EXPORT_SYMBOL_GPL(perf_event_release_kernel); | |||
2281 | static int perf_release(struct inode *inode, struct file *file) | 2249 | static int perf_release(struct inode *inode, struct file *file) |
2282 | { | 2250 | { |
2283 | struct perf_event *event = file->private_data; | 2251 | struct perf_event *event = file->private_data; |
2252 | struct task_struct *owner; | ||
2284 | 2253 | ||
2285 | file->private_data = NULL; | 2254 | file->private_data = NULL; |
2286 | 2255 | ||
2256 | rcu_read_lock(); | ||
2257 | owner = ACCESS_ONCE(event->owner); | ||
2258 | /* | ||
2259 | * Matches the smp_wmb() in perf_event_exit_task(). If we observe | ||
2260 | * !owner it means the list deletion is complete and we can indeed | ||
2261 | * free this event, otherwise we need to serialize on | ||
2262 | * owner->perf_event_mutex. | ||
2263 | */ | ||
2264 | smp_read_barrier_depends(); | ||
2265 | if (owner) { | ||
2266 | /* | ||
2267 | * Since delayed_put_task_struct() also drops the last | ||
2268 | * task reference we can safely take a new reference | ||
2269 | * while holding the rcu_read_lock(). | ||
2270 | */ | ||
2271 | get_task_struct(owner); | ||
2272 | } | ||
2273 | rcu_read_unlock(); | ||
2274 | |||
2275 | if (owner) { | ||
2276 | mutex_lock(&owner->perf_event_mutex); | ||
2277 | /* | ||
2278 | * We have to re-check the event->owner field, if it is cleared | ||
2279 | * we raced with perf_event_exit_task(), acquiring the mutex | ||
2280 | * ensured they're done, and we can proceed with freeing the | ||
2281 | * event. | ||
2282 | */ | ||
2283 | if (event->owner) | ||
2284 | list_del_init(&event->owner_entry); | ||
2285 | mutex_unlock(&owner->perf_event_mutex); | ||
2286 | put_task_struct(owner); | ||
2287 | } | ||
2288 | |||
2287 | return perf_event_release_kernel(event); | 2289 | return perf_event_release_kernel(event); |
2288 | } | 2290 | } |
2289 | 2291 | ||
@@ -3428,7 +3430,8 @@ static u32 perf_event_tid(struct perf_event *event, struct task_struct *p) | |||
3428 | } | 3430 | } |
3429 | 3431 | ||
3430 | static void perf_output_read_one(struct perf_output_handle *handle, | 3432 | static void perf_output_read_one(struct perf_output_handle *handle, |
3431 | struct perf_event *event) | 3433 | struct perf_event *event, |
3434 | u64 enabled, u64 running) | ||
3432 | { | 3435 | { |
3433 | u64 read_format = event->attr.read_format; | 3436 | u64 read_format = event->attr.read_format; |
3434 | u64 values[4]; | 3437 | u64 values[4]; |
@@ -3436,11 +3439,11 @@ static void perf_output_read_one(struct perf_output_handle *handle, | |||
3436 | 3439 | ||
3437 | values[n++] = perf_event_count(event); | 3440 | values[n++] = perf_event_count(event); |
3438 | if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) { | 3441 | if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) { |
3439 | values[n++] = event->total_time_enabled + | 3442 | values[n++] = enabled + |
3440 | atomic64_read(&event->child_total_time_enabled); | 3443 | atomic64_read(&event->child_total_time_enabled); |
3441 | } | 3444 | } |
3442 | if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) { | 3445 | if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) { |
3443 | values[n++] = event->total_time_running + | 3446 | values[n++] = running + |
3444 | atomic64_read(&event->child_total_time_running); | 3447 | atomic64_read(&event->child_total_time_running); |
3445 | } | 3448 | } |
3446 | if (read_format & PERF_FORMAT_ID) | 3449 | if (read_format & PERF_FORMAT_ID) |
@@ -3453,7 +3456,8 @@ static void perf_output_read_one(struct perf_output_handle *handle, | |||
3453 | * XXX PERF_FORMAT_GROUP vs inherited events seems difficult. | 3456 | * XXX PERF_FORMAT_GROUP vs inherited events seems difficult. |
3454 | */ | 3457 | */ |
3455 | static void perf_output_read_group(struct perf_output_handle *handle, | 3458 | static void perf_output_read_group(struct perf_output_handle *handle, |
3456 | struct perf_event *event) | 3459 | struct perf_event *event, |
3460 | u64 enabled, u64 running) | ||
3457 | { | 3461 | { |
3458 | struct perf_event *leader = event->group_leader, *sub; | 3462 | struct perf_event *leader = event->group_leader, *sub; |
3459 | u64 read_format = event->attr.read_format; | 3463 | u64 read_format = event->attr.read_format; |
@@ -3463,10 +3467,10 @@ static void perf_output_read_group(struct perf_output_handle *handle, | |||
3463 | values[n++] = 1 + leader->nr_siblings; | 3467 | values[n++] = 1 + leader->nr_siblings; |
3464 | 3468 | ||
3465 | if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) | 3469 | if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) |
3466 | values[n++] = leader->total_time_enabled; | 3470 | values[n++] = enabled; |
3467 | 3471 | ||
3468 | if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) | 3472 | if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) |
3469 | values[n++] = leader->total_time_running; | 3473 | values[n++] = running; |
3470 | 3474 | ||
3471 | if (leader != event) | 3475 | if (leader != event) |
3472 | leader->pmu->read(leader); | 3476 | leader->pmu->read(leader); |
@@ -3491,13 +3495,35 @@ static void perf_output_read_group(struct perf_output_handle *handle, | |||
3491 | } | 3495 | } |
3492 | } | 3496 | } |
3493 | 3497 | ||
3498 | #define PERF_FORMAT_TOTAL_TIMES (PERF_FORMAT_TOTAL_TIME_ENABLED|\ | ||
3499 | PERF_FORMAT_TOTAL_TIME_RUNNING) | ||
3500 | |||
3494 | static void perf_output_read(struct perf_output_handle *handle, | 3501 | static void perf_output_read(struct perf_output_handle *handle, |
3495 | struct perf_event *event) | 3502 | struct perf_event *event) |
3496 | { | 3503 | { |
3504 | u64 enabled = 0, running = 0, now, ctx_time; | ||
3505 | u64 read_format = event->attr.read_format; | ||
3506 | |||
3507 | /* | ||
3508 | * compute total_time_enabled, total_time_running | ||
3509 | * based on snapshot values taken when the event | ||
3510 | * was last scheduled in. | ||
3511 | * | ||
3512 | * we cannot simply called update_context_time() | ||
3513 | * because of locking issue as we are called in | ||
3514 | * NMI context | ||
3515 | */ | ||
3516 | if (read_format & PERF_FORMAT_TOTAL_TIMES) { | ||
3517 | now = perf_clock(); | ||
3518 | ctx_time = event->shadow_ctx_time + now; | ||
3519 | enabled = ctx_time - event->tstamp_enabled; | ||
3520 | running = ctx_time - event->tstamp_running; | ||
3521 | } | ||
3522 | |||
3497 | if (event->attr.read_format & PERF_FORMAT_GROUP) | 3523 | if (event->attr.read_format & PERF_FORMAT_GROUP) |
3498 | perf_output_read_group(handle, event); | 3524 | perf_output_read_group(handle, event, enabled, running); |
3499 | else | 3525 | else |
3500 | perf_output_read_one(handle, event); | 3526 | perf_output_read_one(handle, event, enabled, running); |
3501 | } | 3527 | } |
3502 | 3528 | ||
3503 | void perf_output_sample(struct perf_output_handle *handle, | 3529 | void perf_output_sample(struct perf_output_handle *handle, |
@@ -3798,6 +3824,8 @@ static void perf_event_task_event(struct perf_task_event *task_event) | |||
3798 | rcu_read_lock(); | 3824 | rcu_read_lock(); |
3799 | list_for_each_entry_rcu(pmu, &pmus, entry) { | 3825 | list_for_each_entry_rcu(pmu, &pmus, entry) { |
3800 | cpuctx = get_cpu_ptr(pmu->pmu_cpu_context); | 3826 | cpuctx = get_cpu_ptr(pmu->pmu_cpu_context); |
3827 | if (cpuctx->active_pmu != pmu) | ||
3828 | goto next; | ||
3801 | perf_event_task_ctx(&cpuctx->ctx, task_event); | 3829 | perf_event_task_ctx(&cpuctx->ctx, task_event); |
3802 | 3830 | ||
3803 | ctx = task_event->task_ctx; | 3831 | ctx = task_event->task_ctx; |
@@ -3933,6 +3961,8 @@ static void perf_event_comm_event(struct perf_comm_event *comm_event) | |||
3933 | rcu_read_lock(); | 3961 | rcu_read_lock(); |
3934 | list_for_each_entry_rcu(pmu, &pmus, entry) { | 3962 | list_for_each_entry_rcu(pmu, &pmus, entry) { |
3935 | cpuctx = get_cpu_ptr(pmu->pmu_cpu_context); | 3963 | cpuctx = get_cpu_ptr(pmu->pmu_cpu_context); |
3964 | if (cpuctx->active_pmu != pmu) | ||
3965 | goto next; | ||
3936 | perf_event_comm_ctx(&cpuctx->ctx, comm_event); | 3966 | perf_event_comm_ctx(&cpuctx->ctx, comm_event); |
3937 | 3967 | ||
3938 | ctxn = pmu->task_ctx_nr; | 3968 | ctxn = pmu->task_ctx_nr; |
@@ -4118,6 +4148,8 @@ got_name: | |||
4118 | rcu_read_lock(); | 4148 | rcu_read_lock(); |
4119 | list_for_each_entry_rcu(pmu, &pmus, entry) { | 4149 | list_for_each_entry_rcu(pmu, &pmus, entry) { |
4120 | cpuctx = get_cpu_ptr(pmu->pmu_cpu_context); | 4150 | cpuctx = get_cpu_ptr(pmu->pmu_cpu_context); |
4151 | if (cpuctx->active_pmu != pmu) | ||
4152 | goto next; | ||
4121 | perf_event_mmap_ctx(&cpuctx->ctx, mmap_event, | 4153 | perf_event_mmap_ctx(&cpuctx->ctx, mmap_event, |
4122 | vma->vm_flags & VM_EXEC); | 4154 | vma->vm_flags & VM_EXEC); |
4123 | 4155 | ||
@@ -4687,7 +4719,7 @@ static int perf_swevent_init(struct perf_event *event) | |||
4687 | break; | 4719 | break; |
4688 | } | 4720 | } |
4689 | 4721 | ||
4690 | if (event_id > PERF_COUNT_SW_MAX) | 4722 | if (event_id >= PERF_COUNT_SW_MAX) |
4691 | return -ENOENT; | 4723 | return -ENOENT; |
4692 | 4724 | ||
4693 | if (!event->parent) { | 4725 | if (!event->parent) { |
@@ -5119,20 +5151,36 @@ static void *find_pmu_context(int ctxn) | |||
5119 | return NULL; | 5151 | return NULL; |
5120 | } | 5152 | } |
5121 | 5153 | ||
5122 | static void free_pmu_context(void * __percpu cpu_context) | 5154 | static void update_pmu_context(struct pmu *pmu, struct pmu *old_pmu) |
5123 | { | 5155 | { |
5124 | struct pmu *pmu; | 5156 | int cpu; |
5157 | |||
5158 | for_each_possible_cpu(cpu) { | ||
5159 | struct perf_cpu_context *cpuctx; | ||
5160 | |||
5161 | cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu); | ||
5162 | |||
5163 | if (cpuctx->active_pmu == old_pmu) | ||
5164 | cpuctx->active_pmu = pmu; | ||
5165 | } | ||
5166 | } | ||
5167 | |||
5168 | static void free_pmu_context(struct pmu *pmu) | ||
5169 | { | ||
5170 | struct pmu *i; | ||
5125 | 5171 | ||
5126 | mutex_lock(&pmus_lock); | 5172 | mutex_lock(&pmus_lock); |
5127 | /* | 5173 | /* |
5128 | * Like a real lame refcount. | 5174 | * Like a real lame refcount. |
5129 | */ | 5175 | */ |
5130 | list_for_each_entry(pmu, &pmus, entry) { | 5176 | list_for_each_entry(i, &pmus, entry) { |
5131 | if (pmu->pmu_cpu_context == cpu_context) | 5177 | if (i->pmu_cpu_context == pmu->pmu_cpu_context) { |
5178 | update_pmu_context(i, pmu); | ||
5132 | goto out; | 5179 | goto out; |
5180 | } | ||
5133 | } | 5181 | } |
5134 | 5182 | ||
5135 | free_percpu(cpu_context); | 5183 | free_percpu(pmu->pmu_cpu_context); |
5136 | out: | 5184 | out: |
5137 | mutex_unlock(&pmus_lock); | 5185 | mutex_unlock(&pmus_lock); |
5138 | } | 5186 | } |
@@ -5164,6 +5212,7 @@ int perf_pmu_register(struct pmu *pmu) | |||
5164 | cpuctx->ctx.pmu = pmu; | 5212 | cpuctx->ctx.pmu = pmu; |
5165 | cpuctx->jiffies_interval = 1; | 5213 | cpuctx->jiffies_interval = 1; |
5166 | INIT_LIST_HEAD(&cpuctx->rotation_list); | 5214 | INIT_LIST_HEAD(&cpuctx->rotation_list); |
5215 | cpuctx->active_pmu = pmu; | ||
5167 | } | 5216 | } |
5168 | 5217 | ||
5169 | got_cpu_context: | 5218 | got_cpu_context: |
@@ -5215,7 +5264,7 @@ void perf_pmu_unregister(struct pmu *pmu) | |||
5215 | synchronize_rcu(); | 5264 | synchronize_rcu(); |
5216 | 5265 | ||
5217 | free_percpu(pmu->pmu_disable_count); | 5266 | free_percpu(pmu->pmu_disable_count); |
5218 | free_pmu_context(pmu->pmu_cpu_context); | 5267 | free_pmu_context(pmu); |
5219 | } | 5268 | } |
5220 | 5269 | ||
5221 | struct pmu *perf_init_event(struct perf_event *event) | 5270 | struct pmu *perf_init_event(struct perf_event *event) |
@@ -5683,7 +5732,7 @@ SYSCALL_DEFINE5(perf_event_open, | |||
5683 | mutex_unlock(&ctx->mutex); | 5732 | mutex_unlock(&ctx->mutex); |
5684 | 5733 | ||
5685 | event->owner = current; | 5734 | event->owner = current; |
5686 | get_task_struct(current); | 5735 | |
5687 | mutex_lock(¤t->perf_event_mutex); | 5736 | mutex_lock(¤t->perf_event_mutex); |
5688 | list_add_tail(&event->owner_entry, ¤t->perf_event_list); | 5737 | list_add_tail(&event->owner_entry, ¤t->perf_event_list); |
5689 | mutex_unlock(¤t->perf_event_mutex); | 5738 | mutex_unlock(¤t->perf_event_mutex); |
@@ -5751,12 +5800,6 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu, | |||
5751 | ++ctx->generation; | 5800 | ++ctx->generation; |
5752 | mutex_unlock(&ctx->mutex); | 5801 | mutex_unlock(&ctx->mutex); |
5753 | 5802 | ||
5754 | event->owner = current; | ||
5755 | get_task_struct(current); | ||
5756 | mutex_lock(¤t->perf_event_mutex); | ||
5757 | list_add_tail(&event->owner_entry, ¤t->perf_event_list); | ||
5758 | mutex_unlock(¤t->perf_event_mutex); | ||
5759 | |||
5760 | return event; | 5803 | return event; |
5761 | 5804 | ||
5762 | err_free: | 5805 | err_free: |
@@ -5907,8 +5950,24 @@ again: | |||
5907 | */ | 5950 | */ |
5908 | void perf_event_exit_task(struct task_struct *child) | 5951 | void perf_event_exit_task(struct task_struct *child) |
5909 | { | 5952 | { |
5953 | struct perf_event *event, *tmp; | ||
5910 | int ctxn; | 5954 | int ctxn; |
5911 | 5955 | ||
5956 | mutex_lock(&child->perf_event_mutex); | ||
5957 | list_for_each_entry_safe(event, tmp, &child->perf_event_list, | ||
5958 | owner_entry) { | ||
5959 | list_del_init(&event->owner_entry); | ||
5960 | |||
5961 | /* | ||
5962 | * Ensure the list deletion is visible before we clear | ||
5963 | * the owner, closes a race against perf_release() where | ||
5964 | * we need to serialize on the owner->perf_event_mutex. | ||
5965 | */ | ||
5966 | smp_wmb(); | ||
5967 | event->owner = NULL; | ||
5968 | } | ||
5969 | mutex_unlock(&child->perf_event_mutex); | ||
5970 | |||
5912 | for_each_task_context_nr(ctxn) | 5971 | for_each_task_context_nr(ctxn) |
5913 | perf_event_exit_task_context(child, ctxn); | 5972 | perf_event_exit_task_context(child, ctxn); |
5914 | } | 5973 | } |
@@ -6128,6 +6187,7 @@ int perf_event_init_context(struct task_struct *child, int ctxn) | |||
6128 | struct perf_event *event; | 6187 | struct perf_event *event; |
6129 | struct task_struct *parent = current; | 6188 | struct task_struct *parent = current; |
6130 | int inherited_all = 1; | 6189 | int inherited_all = 1; |
6190 | unsigned long flags; | ||
6131 | int ret = 0; | 6191 | int ret = 0; |
6132 | 6192 | ||
6133 | child->perf_event_ctxp[ctxn] = NULL; | 6193 | child->perf_event_ctxp[ctxn] = NULL; |
@@ -6168,6 +6228,15 @@ int perf_event_init_context(struct task_struct *child, int ctxn) | |||
6168 | break; | 6228 | break; |
6169 | } | 6229 | } |
6170 | 6230 | ||
6231 | /* | ||
6232 | * We can't hold ctx->lock when iterating the ->flexible_group list due | ||
6233 | * to allocations, but we need to prevent rotation because | ||
6234 | * rotate_ctx() will change the list from interrupt context. | ||
6235 | */ | ||
6236 | raw_spin_lock_irqsave(&parent_ctx->lock, flags); | ||
6237 | parent_ctx->rotate_disable = 1; | ||
6238 | raw_spin_unlock_irqrestore(&parent_ctx->lock, flags); | ||
6239 | |||
6171 | list_for_each_entry(event, &parent_ctx->flexible_groups, group_entry) { | 6240 | list_for_each_entry(event, &parent_ctx->flexible_groups, group_entry) { |
6172 | ret = inherit_task_group(event, parent, parent_ctx, | 6241 | ret = inherit_task_group(event, parent, parent_ctx, |
6173 | child, ctxn, &inherited_all); | 6242 | child, ctxn, &inherited_all); |
@@ -6175,6 +6244,10 @@ int perf_event_init_context(struct task_struct *child, int ctxn) | |||
6175 | break; | 6244 | break; |
6176 | } | 6245 | } |
6177 | 6246 | ||
6247 | raw_spin_lock_irqsave(&parent_ctx->lock, flags); | ||
6248 | parent_ctx->rotate_disable = 0; | ||
6249 | raw_spin_unlock_irqrestore(&parent_ctx->lock, flags); | ||
6250 | |||
6178 | child_ctx = child->perf_event_ctxp[ctxn]; | 6251 | child_ctx = child->perf_event_ctxp[ctxn]; |
6179 | 6252 | ||
6180 | if (child_ctx && inherited_all) { | 6253 | if (child_ctx && inherited_all) { |
@@ -6327,6 +6400,8 @@ perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) | |||
6327 | 6400 | ||
6328 | void __init perf_event_init(void) | 6401 | void __init perf_event_init(void) |
6329 | { | 6402 | { |
6403 | int ret; | ||
6404 | |||
6330 | perf_event_init_all_cpus(); | 6405 | perf_event_init_all_cpus(); |
6331 | init_srcu_struct(&pmus_srcu); | 6406 | init_srcu_struct(&pmus_srcu); |
6332 | perf_pmu_register(&perf_swevent); | 6407 | perf_pmu_register(&perf_swevent); |
@@ -6334,4 +6409,7 @@ void __init perf_event_init(void) | |||
6334 | perf_pmu_register(&perf_task_clock); | 6409 | perf_pmu_register(&perf_task_clock); |
6335 | perf_tp_register(); | 6410 | perf_tp_register(); |
6336 | perf_cpu_notifier(perf_cpu_notify); | 6411 | perf_cpu_notifier(perf_cpu_notify); |
6412 | |||
6413 | ret = init_hw_breakpoint(); | ||
6414 | WARN(ret, "hw_breakpoint initialization failed with: %d", ret); | ||
6337 | } | 6415 | } |
diff --git a/kernel/pm_qos_params.c b/kernel/pm_qos_params.c index 645e541a45f6..aeaa7f846821 100644 --- a/kernel/pm_qos_params.c +++ b/kernel/pm_qos_params.c | |||
@@ -110,6 +110,7 @@ static const struct file_operations pm_qos_power_fops = { | |||
110 | .write = pm_qos_power_write, | 110 | .write = pm_qos_power_write, |
111 | .open = pm_qos_power_open, | 111 | .open = pm_qos_power_open, |
112 | .release = pm_qos_power_release, | 112 | .release = pm_qos_power_release, |
113 | .llseek = noop_llseek, | ||
113 | }; | 114 | }; |
114 | 115 | ||
115 | /* unlocked internal variant */ | 116 | /* unlocked internal variant */ |
@@ -120,10 +121,10 @@ static inline int pm_qos_get_value(struct pm_qos_object *o) | |||
120 | 121 | ||
121 | switch (o->type) { | 122 | switch (o->type) { |
122 | case PM_QOS_MIN: | 123 | case PM_QOS_MIN: |
123 | return plist_last(&o->requests)->prio; | 124 | return plist_first(&o->requests)->prio; |
124 | 125 | ||
125 | case PM_QOS_MAX: | 126 | case PM_QOS_MAX: |
126 | return plist_first(&o->requests)->prio; | 127 | return plist_last(&o->requests)->prio; |
127 | 128 | ||
128 | default: | 129 | default: |
129 | /* runtime check for not using enum */ | 130 | /* runtime check for not using enum */ |
@@ -398,7 +399,7 @@ static ssize_t pm_qos_power_write(struct file *filp, const char __user *buf, | |||
398 | } else | 399 | } else |
399 | return -EINVAL; | 400 | return -EINVAL; |
400 | 401 | ||
401 | pm_qos_req = (struct pm_qos_request_list *)filp->private_data; | 402 | pm_qos_req = filp->private_data; |
402 | pm_qos_update_request(pm_qos_req, value); | 403 | pm_qos_update_request(pm_qos_req, value); |
403 | 404 | ||
404 | return count; | 405 | return count; |
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index 6842eeba5879..05bb7173850e 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c | |||
@@ -37,13 +37,13 @@ static int check_clock(const clockid_t which_clock) | |||
37 | if (pid == 0) | 37 | if (pid == 0) |
38 | return 0; | 38 | return 0; |
39 | 39 | ||
40 | read_lock(&tasklist_lock); | 40 | rcu_read_lock(); |
41 | p = find_task_by_vpid(pid); | 41 | p = find_task_by_vpid(pid); |
42 | if (!p || !(CPUCLOCK_PERTHREAD(which_clock) ? | 42 | if (!p || !(CPUCLOCK_PERTHREAD(which_clock) ? |
43 | same_thread_group(p, current) : thread_group_leader(p))) { | 43 | same_thread_group(p, current) : has_group_leader_pid(p))) { |
44 | error = -EINVAL; | 44 | error = -EINVAL; |
45 | } | 45 | } |
46 | read_unlock(&tasklist_lock); | 46 | rcu_read_unlock(); |
47 | 47 | ||
48 | return error; | 48 | return error; |
49 | } | 49 | } |
@@ -390,7 +390,7 @@ int posix_cpu_timer_create(struct k_itimer *new_timer) | |||
390 | 390 | ||
391 | INIT_LIST_HEAD(&new_timer->it.cpu.entry); | 391 | INIT_LIST_HEAD(&new_timer->it.cpu.entry); |
392 | 392 | ||
393 | read_lock(&tasklist_lock); | 393 | rcu_read_lock(); |
394 | if (CPUCLOCK_PERTHREAD(new_timer->it_clock)) { | 394 | if (CPUCLOCK_PERTHREAD(new_timer->it_clock)) { |
395 | if (pid == 0) { | 395 | if (pid == 0) { |
396 | p = current; | 396 | p = current; |
@@ -404,7 +404,7 @@ int posix_cpu_timer_create(struct k_itimer *new_timer) | |||
404 | p = current->group_leader; | 404 | p = current->group_leader; |
405 | } else { | 405 | } else { |
406 | p = find_task_by_vpid(pid); | 406 | p = find_task_by_vpid(pid); |
407 | if (p && !thread_group_leader(p)) | 407 | if (p && !has_group_leader_pid(p)) |
408 | p = NULL; | 408 | p = NULL; |
409 | } | 409 | } |
410 | } | 410 | } |
@@ -414,7 +414,7 @@ int posix_cpu_timer_create(struct k_itimer *new_timer) | |||
414 | } else { | 414 | } else { |
415 | ret = -EINVAL; | 415 | ret = -EINVAL; |
416 | } | 416 | } |
417 | read_unlock(&tasklist_lock); | 417 | rcu_read_unlock(); |
418 | 418 | ||
419 | return ret; | 419 | return ret; |
420 | } | 420 | } |
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index 29bff6117abc..a5aff3ebad38 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig | |||
@@ -246,9 +246,13 @@ config PM_OPS | |||
246 | depends on PM_SLEEP || PM_RUNTIME | 246 | depends on PM_SLEEP || PM_RUNTIME |
247 | default y | 247 | default y |
248 | 248 | ||
249 | config ARCH_HAS_OPP | ||
250 | bool | ||
251 | |||
249 | config PM_OPP | 252 | config PM_OPP |
250 | bool "Operating Performance Point (OPP) Layer library" | 253 | bool "Operating Performance Point (OPP) Layer library" |
251 | depends on PM | 254 | depends on PM |
255 | depends on ARCH_HAS_OPP | ||
252 | ---help--- | 256 | ---help--- |
253 | SOCs have a standard set of tuples consisting of frequency and | 257 | SOCs have a standard set of tuples consisting of frequency and |
254 | voltage pairs that the device will support per voltage domain. This | 258 | voltage pairs that the device will support per voltage domain. This |
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index 657272e91d0a..048d0b514831 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c | |||
@@ -327,7 +327,6 @@ static int create_image(int platform_mode) | |||
327 | int hibernation_snapshot(int platform_mode) | 327 | int hibernation_snapshot(int platform_mode) |
328 | { | 328 | { |
329 | int error; | 329 | int error; |
330 | gfp_t saved_mask; | ||
331 | 330 | ||
332 | error = platform_begin(platform_mode); | 331 | error = platform_begin(platform_mode); |
333 | if (error) | 332 | if (error) |
@@ -339,7 +338,7 @@ int hibernation_snapshot(int platform_mode) | |||
339 | goto Close; | 338 | goto Close; |
340 | 339 | ||
341 | suspend_console(); | 340 | suspend_console(); |
342 | saved_mask = clear_gfp_allowed_mask(GFP_IOFS); | 341 | pm_restrict_gfp_mask(); |
343 | error = dpm_suspend_start(PMSG_FREEZE); | 342 | error = dpm_suspend_start(PMSG_FREEZE); |
344 | if (error) | 343 | if (error) |
345 | goto Recover_platform; | 344 | goto Recover_platform; |
@@ -348,7 +347,10 @@ int hibernation_snapshot(int platform_mode) | |||
348 | goto Recover_platform; | 347 | goto Recover_platform; |
349 | 348 | ||
350 | error = create_image(platform_mode); | 349 | error = create_image(platform_mode); |
351 | /* Control returns here after successful restore */ | 350 | /* |
351 | * Control returns here (1) after the image has been created or the | ||
352 | * image creation has failed and (2) after a successful restore. | ||
353 | */ | ||
352 | 354 | ||
353 | Resume_devices: | 355 | Resume_devices: |
354 | /* We may need to release the preallocated image pages here. */ | 356 | /* We may need to release the preallocated image pages here. */ |
@@ -357,7 +359,10 @@ int hibernation_snapshot(int platform_mode) | |||
357 | 359 | ||
358 | dpm_resume_end(in_suspend ? | 360 | dpm_resume_end(in_suspend ? |
359 | (error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE); | 361 | (error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE); |
360 | set_gfp_allowed_mask(saved_mask); | 362 | |
363 | if (error || !in_suspend) | ||
364 | pm_restore_gfp_mask(); | ||
365 | |||
361 | resume_console(); | 366 | resume_console(); |
362 | Close: | 367 | Close: |
363 | platform_end(platform_mode); | 368 | platform_end(platform_mode); |
@@ -452,17 +457,16 @@ static int resume_target_kernel(bool platform_mode) | |||
452 | int hibernation_restore(int platform_mode) | 457 | int hibernation_restore(int platform_mode) |
453 | { | 458 | { |
454 | int error; | 459 | int error; |
455 | gfp_t saved_mask; | ||
456 | 460 | ||
457 | pm_prepare_console(); | 461 | pm_prepare_console(); |
458 | suspend_console(); | 462 | suspend_console(); |
459 | saved_mask = clear_gfp_allowed_mask(GFP_IOFS); | 463 | pm_restrict_gfp_mask(); |
460 | error = dpm_suspend_start(PMSG_QUIESCE); | 464 | error = dpm_suspend_start(PMSG_QUIESCE); |
461 | if (!error) { | 465 | if (!error) { |
462 | error = resume_target_kernel(platform_mode); | 466 | error = resume_target_kernel(platform_mode); |
463 | dpm_resume_end(PMSG_RECOVER); | 467 | dpm_resume_end(PMSG_RECOVER); |
464 | } | 468 | } |
465 | set_gfp_allowed_mask(saved_mask); | 469 | pm_restore_gfp_mask(); |
466 | resume_console(); | 470 | resume_console(); |
467 | pm_restore_console(); | 471 | pm_restore_console(); |
468 | return error; | 472 | return error; |
@@ -476,7 +480,6 @@ int hibernation_restore(int platform_mode) | |||
476 | int hibernation_platform_enter(void) | 480 | int hibernation_platform_enter(void) |
477 | { | 481 | { |
478 | int error; | 482 | int error; |
479 | gfp_t saved_mask; | ||
480 | 483 | ||
481 | if (!hibernation_ops) | 484 | if (!hibernation_ops) |
482 | return -ENOSYS; | 485 | return -ENOSYS; |
@@ -492,7 +495,6 @@ int hibernation_platform_enter(void) | |||
492 | 495 | ||
493 | entering_platform_hibernation = true; | 496 | entering_platform_hibernation = true; |
494 | suspend_console(); | 497 | suspend_console(); |
495 | saved_mask = clear_gfp_allowed_mask(GFP_IOFS); | ||
496 | error = dpm_suspend_start(PMSG_HIBERNATE); | 498 | error = dpm_suspend_start(PMSG_HIBERNATE); |
497 | if (error) { | 499 | if (error) { |
498 | if (hibernation_ops->recover) | 500 | if (hibernation_ops->recover) |
@@ -536,7 +538,6 @@ int hibernation_platform_enter(void) | |||
536 | Resume_devices: | 538 | Resume_devices: |
537 | entering_platform_hibernation = false; | 539 | entering_platform_hibernation = false; |
538 | dpm_resume_end(PMSG_RESTORE); | 540 | dpm_resume_end(PMSG_RESTORE); |
539 | set_gfp_allowed_mask(saved_mask); | ||
540 | resume_console(); | 541 | resume_console(); |
541 | 542 | ||
542 | Close: | 543 | Close: |
@@ -646,6 +647,7 @@ int hibernate(void) | |||
646 | swsusp_free(); | 647 | swsusp_free(); |
647 | if (!error) | 648 | if (!error) |
648 | power_down(); | 649 | power_down(); |
650 | pm_restore_gfp_mask(); | ||
649 | } else { | 651 | } else { |
650 | pr_debug("PM: Image restored successfully.\n"); | 652 | pr_debug("PM: Image restored successfully.\n"); |
651 | } | 653 | } |
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index ac7eb109f196..0dac75ea4456 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c | |||
@@ -984,8 +984,8 @@ static void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn) | |||
984 | src = kmap_atomic(s_page, KM_USER0); | 984 | src = kmap_atomic(s_page, KM_USER0); |
985 | dst = kmap_atomic(d_page, KM_USER1); | 985 | dst = kmap_atomic(d_page, KM_USER1); |
986 | do_copy_page(dst, src); | 986 | do_copy_page(dst, src); |
987 | kunmap_atomic(src, KM_USER0); | ||
988 | kunmap_atomic(dst, KM_USER1); | 987 | kunmap_atomic(dst, KM_USER1); |
988 | kunmap_atomic(src, KM_USER0); | ||
989 | } else { | 989 | } else { |
990 | if (PageHighMem(d_page)) { | 990 | if (PageHighMem(d_page)) { |
991 | /* Page pointed to by src may contain some kernel | 991 | /* Page pointed to by src may contain some kernel |
@@ -993,7 +993,7 @@ static void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn) | |||
993 | */ | 993 | */ |
994 | safe_copy_page(buffer, s_page); | 994 | safe_copy_page(buffer, s_page); |
995 | dst = kmap_atomic(d_page, KM_USER0); | 995 | dst = kmap_atomic(d_page, KM_USER0); |
996 | memcpy(dst, buffer, PAGE_SIZE); | 996 | copy_page(dst, buffer); |
997 | kunmap_atomic(dst, KM_USER0); | 997 | kunmap_atomic(dst, KM_USER0); |
998 | } else { | 998 | } else { |
999 | safe_copy_page(page_address(d_page), s_page); | 999 | safe_copy_page(page_address(d_page), s_page); |
@@ -1687,7 +1687,7 @@ int snapshot_read_next(struct snapshot_handle *handle) | |||
1687 | memory_bm_position_reset(&orig_bm); | 1687 | memory_bm_position_reset(&orig_bm); |
1688 | memory_bm_position_reset(©_bm); | 1688 | memory_bm_position_reset(©_bm); |
1689 | } else if (handle->cur <= nr_meta_pages) { | 1689 | } else if (handle->cur <= nr_meta_pages) { |
1690 | memset(buffer, 0, PAGE_SIZE); | 1690 | clear_page(buffer); |
1691 | pack_pfns(buffer, &orig_bm); | 1691 | pack_pfns(buffer, &orig_bm); |
1692 | } else { | 1692 | } else { |
1693 | struct page *page; | 1693 | struct page *page; |
@@ -1701,7 +1701,7 @@ int snapshot_read_next(struct snapshot_handle *handle) | |||
1701 | void *kaddr; | 1701 | void *kaddr; |
1702 | 1702 | ||
1703 | kaddr = kmap_atomic(page, KM_USER0); | 1703 | kaddr = kmap_atomic(page, KM_USER0); |
1704 | memcpy(buffer, kaddr, PAGE_SIZE); | 1704 | copy_page(buffer, kaddr); |
1705 | kunmap_atomic(kaddr, KM_USER0); | 1705 | kunmap_atomic(kaddr, KM_USER0); |
1706 | handle->buffer = buffer; | 1706 | handle->buffer = buffer; |
1707 | } else { | 1707 | } else { |
@@ -1984,7 +1984,7 @@ static void copy_last_highmem_page(void) | |||
1984 | void *dst; | 1984 | void *dst; |
1985 | 1985 | ||
1986 | dst = kmap_atomic(last_highmem_page, KM_USER0); | 1986 | dst = kmap_atomic(last_highmem_page, KM_USER0); |
1987 | memcpy(dst, buffer, PAGE_SIZE); | 1987 | copy_page(dst, buffer); |
1988 | kunmap_atomic(dst, KM_USER0); | 1988 | kunmap_atomic(dst, KM_USER0); |
1989 | last_highmem_page = NULL; | 1989 | last_highmem_page = NULL; |
1990 | } | 1990 | } |
@@ -2270,11 +2270,11 @@ swap_two_pages_data(struct page *p1, struct page *p2, void *buf) | |||
2270 | 2270 | ||
2271 | kaddr1 = kmap_atomic(p1, KM_USER0); | 2271 | kaddr1 = kmap_atomic(p1, KM_USER0); |
2272 | kaddr2 = kmap_atomic(p2, KM_USER1); | 2272 | kaddr2 = kmap_atomic(p2, KM_USER1); |
2273 | memcpy(buf, kaddr1, PAGE_SIZE); | 2273 | copy_page(buf, kaddr1); |
2274 | memcpy(kaddr1, kaddr2, PAGE_SIZE); | 2274 | copy_page(kaddr1, kaddr2); |
2275 | memcpy(kaddr2, buf, PAGE_SIZE); | 2275 | copy_page(kaddr2, buf); |
2276 | kunmap_atomic(kaddr1, KM_USER0); | ||
2277 | kunmap_atomic(kaddr2, KM_USER1); | 2276 | kunmap_atomic(kaddr2, KM_USER1); |
2277 | kunmap_atomic(kaddr1, KM_USER0); | ||
2278 | } | 2278 | } |
2279 | 2279 | ||
2280 | /** | 2280 | /** |
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index 7335952ee473..ecf770509d0d 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c | |||
@@ -197,7 +197,6 @@ static int suspend_enter(suspend_state_t state) | |||
197 | int suspend_devices_and_enter(suspend_state_t state) | 197 | int suspend_devices_and_enter(suspend_state_t state) |
198 | { | 198 | { |
199 | int error; | 199 | int error; |
200 | gfp_t saved_mask; | ||
201 | 200 | ||
202 | if (!suspend_ops) | 201 | if (!suspend_ops) |
203 | return -ENOSYS; | 202 | return -ENOSYS; |
@@ -208,7 +207,7 @@ int suspend_devices_and_enter(suspend_state_t state) | |||
208 | goto Close; | 207 | goto Close; |
209 | } | 208 | } |
210 | suspend_console(); | 209 | suspend_console(); |
211 | saved_mask = clear_gfp_allowed_mask(GFP_IOFS); | 210 | pm_restrict_gfp_mask(); |
212 | suspend_test_start(); | 211 | suspend_test_start(); |
213 | error = dpm_suspend_start(PMSG_SUSPEND); | 212 | error = dpm_suspend_start(PMSG_SUSPEND); |
214 | if (error) { | 213 | if (error) { |
@@ -225,7 +224,7 @@ int suspend_devices_and_enter(suspend_state_t state) | |||
225 | suspend_test_start(); | 224 | suspend_test_start(); |
226 | dpm_resume_end(PMSG_RESUME); | 225 | dpm_resume_end(PMSG_RESUME); |
227 | suspend_test_finish("resume devices"); | 226 | suspend_test_finish("resume devices"); |
228 | set_gfp_allowed_mask(saved_mask); | 227 | pm_restore_gfp_mask(); |
229 | resume_console(); | 228 | resume_console(); |
230 | Close: | 229 | Close: |
231 | if (suspend_ops->end) | 230 | if (suspend_ops->end) |
diff --git a/kernel/power/swap.c b/kernel/power/swap.c index 916eaa790399..8c7e4832b9be 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c | |||
@@ -6,6 +6,7 @@ | |||
6 | * | 6 | * |
7 | * Copyright (C) 1998,2001-2005 Pavel Machek <pavel@ucw.cz> | 7 | * Copyright (C) 1998,2001-2005 Pavel Machek <pavel@ucw.cz> |
8 | * Copyright (C) 2006 Rafael J. Wysocki <rjw@sisk.pl> | 8 | * Copyright (C) 2006 Rafael J. Wysocki <rjw@sisk.pl> |
9 | * Copyright (C) 2010 Bojan Smojver <bojan@rexursive.com> | ||
9 | * | 10 | * |
10 | * This file is released under the GPLv2. | 11 | * This file is released under the GPLv2. |
11 | * | 12 | * |
@@ -29,7 +30,7 @@ | |||
29 | 30 | ||
30 | #include "power.h" | 31 | #include "power.h" |
31 | 32 | ||
32 | #define HIBERNATE_SIG "LINHIB0001" | 33 | #define HIBERNATE_SIG "S1SUSPEND" |
33 | 34 | ||
34 | /* | 35 | /* |
35 | * The swap map is a data structure used for keeping track of each page | 36 | * The swap map is a data structure used for keeping track of each page |
@@ -251,7 +252,7 @@ static int write_page(void *buf, sector_t offset, struct bio **bio_chain) | |||
251 | if (bio_chain) { | 252 | if (bio_chain) { |
252 | src = (void *)__get_free_page(__GFP_WAIT | __GFP_HIGH); | 253 | src = (void *)__get_free_page(__GFP_WAIT | __GFP_HIGH); |
253 | if (src) { | 254 | if (src) { |
254 | memcpy(src, buf, PAGE_SIZE); | 255 | copy_page(src, buf); |
255 | } else { | 256 | } else { |
256 | WARN_ON_ONCE(1); | 257 | WARN_ON_ONCE(1); |
257 | bio_chain = NULL; /* Go synchronous */ | 258 | bio_chain = NULL; /* Go synchronous */ |
@@ -325,7 +326,7 @@ static int swap_write_page(struct swap_map_handle *handle, void *buf, | |||
325 | error = write_page(handle->cur, handle->cur_swap, NULL); | 326 | error = write_page(handle->cur, handle->cur_swap, NULL); |
326 | if (error) | 327 | if (error) |
327 | goto out; | 328 | goto out; |
328 | memset(handle->cur, 0, PAGE_SIZE); | 329 | clear_page(handle->cur); |
329 | handle->cur_swap = offset; | 330 | handle->cur_swap = offset; |
330 | handle->k = 0; | 331 | handle->k = 0; |
331 | } | 332 | } |
@@ -753,30 +754,43 @@ static int load_image_lzo(struct swap_map_handle *handle, | |||
753 | { | 754 | { |
754 | unsigned int m; | 755 | unsigned int m; |
755 | int error = 0; | 756 | int error = 0; |
757 | struct bio *bio; | ||
756 | struct timeval start; | 758 | struct timeval start; |
757 | struct timeval stop; | 759 | struct timeval stop; |
758 | unsigned nr_pages; | 760 | unsigned nr_pages; |
759 | size_t off, unc_len, cmp_len; | 761 | size_t i, off, unc_len, cmp_len; |
760 | unsigned char *unc, *cmp, *page; | 762 | unsigned char *unc, *cmp, *page[LZO_CMP_PAGES]; |
761 | 763 | ||
762 | page = (void *)__get_free_page(__GFP_WAIT | __GFP_HIGH); | 764 | for (i = 0; i < LZO_CMP_PAGES; i++) { |
763 | if (!page) { | 765 | page[i] = (void *)__get_free_page(__GFP_WAIT | __GFP_HIGH); |
764 | printk(KERN_ERR "PM: Failed to allocate LZO page\n"); | 766 | if (!page[i]) { |
765 | return -ENOMEM; | 767 | printk(KERN_ERR "PM: Failed to allocate LZO page\n"); |
768 | |||
769 | while (i) | ||
770 | free_page((unsigned long)page[--i]); | ||
771 | |||
772 | return -ENOMEM; | ||
773 | } | ||
766 | } | 774 | } |
767 | 775 | ||
768 | unc = vmalloc(LZO_UNC_SIZE); | 776 | unc = vmalloc(LZO_UNC_SIZE); |
769 | if (!unc) { | 777 | if (!unc) { |
770 | printk(KERN_ERR "PM: Failed to allocate LZO uncompressed\n"); | 778 | printk(KERN_ERR "PM: Failed to allocate LZO uncompressed\n"); |
771 | free_page((unsigned long)page); | 779 | |
780 | for (i = 0; i < LZO_CMP_PAGES; i++) | ||
781 | free_page((unsigned long)page[i]); | ||
782 | |||
772 | return -ENOMEM; | 783 | return -ENOMEM; |
773 | } | 784 | } |
774 | 785 | ||
775 | cmp = vmalloc(LZO_CMP_SIZE); | 786 | cmp = vmalloc(LZO_CMP_SIZE); |
776 | if (!cmp) { | 787 | if (!cmp) { |
777 | printk(KERN_ERR "PM: Failed to allocate LZO compressed\n"); | 788 | printk(KERN_ERR "PM: Failed to allocate LZO compressed\n"); |
789 | |||
778 | vfree(unc); | 790 | vfree(unc); |
779 | free_page((unsigned long)page); | 791 | for (i = 0; i < LZO_CMP_PAGES; i++) |
792 | free_page((unsigned long)page[i]); | ||
793 | |||
780 | return -ENOMEM; | 794 | return -ENOMEM; |
781 | } | 795 | } |
782 | 796 | ||
@@ -787,6 +801,7 @@ static int load_image_lzo(struct swap_map_handle *handle, | |||
787 | if (!m) | 801 | if (!m) |
788 | m = 1; | 802 | m = 1; |
789 | nr_pages = 0; | 803 | nr_pages = 0; |
804 | bio = NULL; | ||
790 | do_gettimeofday(&start); | 805 | do_gettimeofday(&start); |
791 | 806 | ||
792 | error = snapshot_write_next(snapshot); | 807 | error = snapshot_write_next(snapshot); |
@@ -794,11 +809,11 @@ static int load_image_lzo(struct swap_map_handle *handle, | |||
794 | goto out_finish; | 809 | goto out_finish; |
795 | 810 | ||
796 | for (;;) { | 811 | for (;;) { |
797 | error = swap_read_page(handle, page, NULL); /* sync */ | 812 | error = swap_read_page(handle, page[0], NULL); /* sync */ |
798 | if (error) | 813 | if (error) |
799 | break; | 814 | break; |
800 | 815 | ||
801 | cmp_len = *(size_t *)page; | 816 | cmp_len = *(size_t *)page[0]; |
802 | if (unlikely(!cmp_len || | 817 | if (unlikely(!cmp_len || |
803 | cmp_len > lzo1x_worst_compress(LZO_UNC_SIZE))) { | 818 | cmp_len > lzo1x_worst_compress(LZO_UNC_SIZE))) { |
804 | printk(KERN_ERR "PM: Invalid LZO compressed length\n"); | 819 | printk(KERN_ERR "PM: Invalid LZO compressed length\n"); |
@@ -806,13 +821,20 @@ static int load_image_lzo(struct swap_map_handle *handle, | |||
806 | break; | 821 | break; |
807 | } | 822 | } |
808 | 823 | ||
809 | memcpy(cmp, page, PAGE_SIZE); | 824 | for (off = PAGE_SIZE, i = 1; |
810 | for (off = PAGE_SIZE; off < LZO_HEADER + cmp_len; off += PAGE_SIZE) { | 825 | off < LZO_HEADER + cmp_len; off += PAGE_SIZE, i++) { |
811 | error = swap_read_page(handle, page, NULL); /* sync */ | 826 | error = swap_read_page(handle, page[i], &bio); |
812 | if (error) | 827 | if (error) |
813 | goto out_finish; | 828 | goto out_finish; |
829 | } | ||
814 | 830 | ||
815 | memcpy(cmp + off, page, PAGE_SIZE); | 831 | error = hib_wait_on_bio_chain(&bio); /* need all data now */ |
832 | if (error) | ||
833 | goto out_finish; | ||
834 | |||
835 | for (off = 0, i = 0; | ||
836 | off < LZO_HEADER + cmp_len; off += PAGE_SIZE, i++) { | ||
837 | memcpy(cmp + off, page[i], PAGE_SIZE); | ||
816 | } | 838 | } |
817 | 839 | ||
818 | unc_len = LZO_UNC_SIZE; | 840 | unc_len = LZO_UNC_SIZE; |
@@ -857,7 +879,8 @@ out_finish: | |||
857 | 879 | ||
858 | vfree(cmp); | 880 | vfree(cmp); |
859 | vfree(unc); | 881 | vfree(unc); |
860 | free_page((unsigned long)page); | 882 | for (i = 0; i < LZO_CMP_PAGES; i++) |
883 | free_page((unsigned long)page[i]); | ||
861 | 884 | ||
862 | return error; | 885 | return error; |
863 | } | 886 | } |
@@ -910,7 +933,7 @@ int swsusp_check(void) | |||
910 | hib_resume_bdev = open_by_devnum(swsusp_resume_device, FMODE_READ); | 933 | hib_resume_bdev = open_by_devnum(swsusp_resume_device, FMODE_READ); |
911 | if (!IS_ERR(hib_resume_bdev)) { | 934 | if (!IS_ERR(hib_resume_bdev)) { |
912 | set_blocksize(hib_resume_bdev, PAGE_SIZE); | 935 | set_blocksize(hib_resume_bdev, PAGE_SIZE); |
913 | memset(swsusp_header, 0, PAGE_SIZE); | 936 | clear_page(swsusp_header); |
914 | error = hib_bio_read_page(swsusp_resume_block, | 937 | error = hib_bio_read_page(swsusp_resume_block, |
915 | swsusp_header, NULL); | 938 | swsusp_header, NULL); |
916 | if (error) | 939 | if (error) |
diff --git a/kernel/power/user.c b/kernel/power/user.c index e819e17877ca..c36c3b9e8a84 100644 --- a/kernel/power/user.c +++ b/kernel/power/user.c | |||
@@ -137,7 +137,7 @@ static int snapshot_release(struct inode *inode, struct file *filp) | |||
137 | free_all_swap_pages(data->swap); | 137 | free_all_swap_pages(data->swap); |
138 | if (data->frozen) | 138 | if (data->frozen) |
139 | thaw_processes(); | 139 | thaw_processes(); |
140 | pm_notifier_call_chain(data->mode == O_WRONLY ? | 140 | pm_notifier_call_chain(data->mode == O_RDONLY ? |
141 | PM_POST_HIBERNATION : PM_POST_RESTORE); | 141 | PM_POST_HIBERNATION : PM_POST_RESTORE); |
142 | atomic_inc(&snapshot_device_available); | 142 | atomic_inc(&snapshot_device_available); |
143 | 143 | ||
@@ -263,6 +263,7 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd, | |||
263 | case SNAPSHOT_UNFREEZE: | 263 | case SNAPSHOT_UNFREEZE: |
264 | if (!data->frozen || data->ready) | 264 | if (!data->frozen || data->ready) |
265 | break; | 265 | break; |
266 | pm_restore_gfp_mask(); | ||
266 | thaw_processes(); | 267 | thaw_processes(); |
267 | usermodehelper_enable(); | 268 | usermodehelper_enable(); |
268 | data->frozen = 0; | 269 | data->frozen = 0; |
@@ -275,6 +276,7 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd, | |||
275 | error = -EPERM; | 276 | error = -EPERM; |
276 | break; | 277 | break; |
277 | } | 278 | } |
279 | pm_restore_gfp_mask(); | ||
278 | error = hibernation_snapshot(data->platform_support); | 280 | error = hibernation_snapshot(data->platform_support); |
279 | if (!error) | 281 | if (!error) |
280 | error = put_user(in_suspend, (int __user *)arg); | 282 | error = put_user(in_suspend, (int __user *)arg); |
diff --git a/kernel/printk.c b/kernel/printk.c index 2531017795f6..a23315dc4498 100644 --- a/kernel/printk.c +++ b/kernel/printk.c | |||
@@ -210,7 +210,7 @@ __setup("log_buf_len=", log_buf_len_setup); | |||
210 | 210 | ||
211 | #ifdef CONFIG_BOOT_PRINTK_DELAY | 211 | #ifdef CONFIG_BOOT_PRINTK_DELAY |
212 | 212 | ||
213 | static unsigned int boot_delay; /* msecs delay after each printk during bootup */ | 213 | static int boot_delay; /* msecs delay after each printk during bootup */ |
214 | static unsigned long long loops_per_msec; /* based on boot_delay */ | 214 | static unsigned long long loops_per_msec; /* based on boot_delay */ |
215 | 215 | ||
216 | static int __init boot_delay_setup(char *str) | 216 | static int __init boot_delay_setup(char *str) |
@@ -261,6 +261,12 @@ static inline void boot_delay_msec(void) | |||
261 | } | 261 | } |
262 | #endif | 262 | #endif |
263 | 263 | ||
264 | #ifdef CONFIG_SECURITY_DMESG_RESTRICT | ||
265 | int dmesg_restrict = 1; | ||
266 | #else | ||
267 | int dmesg_restrict; | ||
268 | #endif | ||
269 | |||
264 | int do_syslog(int type, char __user *buf, int len, bool from_file) | 270 | int do_syslog(int type, char __user *buf, int len, bool from_file) |
265 | { | 271 | { |
266 | unsigned i, j, limit, count; | 272 | unsigned i, j, limit, count; |
@@ -268,7 +274,20 @@ int do_syslog(int type, char __user *buf, int len, bool from_file) | |||
268 | char c; | 274 | char c; |
269 | int error = 0; | 275 | int error = 0; |
270 | 276 | ||
271 | error = security_syslog(type, from_file); | 277 | /* |
278 | * If this is from /proc/kmsg we only do the capabilities checks | ||
279 | * at open time. | ||
280 | */ | ||
281 | if (type == SYSLOG_ACTION_OPEN || !from_file) { | ||
282 | if (dmesg_restrict && !capable(CAP_SYS_ADMIN)) | ||
283 | return -EPERM; | ||
284 | if ((type != SYSLOG_ACTION_READ_ALL && | ||
285 | type != SYSLOG_ACTION_SIZE_BUFFER) && | ||
286 | !capable(CAP_SYS_ADMIN)) | ||
287 | return -EPERM; | ||
288 | } | ||
289 | |||
290 | error = security_syslog(type); | ||
272 | if (error) | 291 | if (error) |
273 | return error; | 292 | return error; |
274 | 293 | ||
@@ -647,6 +666,7 @@ static inline int can_use_console(unsigned int cpu) | |||
647 | * released but interrupts still disabled. | 666 | * released but interrupts still disabled. |
648 | */ | 667 | */ |
649 | static int acquire_console_semaphore_for_printk(unsigned int cpu) | 668 | static int acquire_console_semaphore_for_printk(unsigned int cpu) |
669 | __releases(&logbuf_lock) | ||
650 | { | 670 | { |
651 | int retval = 0; | 671 | int retval = 0; |
652 | 672 | ||
@@ -1062,13 +1082,15 @@ void printk_tick(void) | |||
1062 | 1082 | ||
1063 | int printk_needs_cpu(int cpu) | 1083 | int printk_needs_cpu(int cpu) |
1064 | { | 1084 | { |
1085 | if (unlikely(cpu_is_offline(cpu))) | ||
1086 | printk_tick(); | ||
1065 | return per_cpu(printk_pending, cpu); | 1087 | return per_cpu(printk_pending, cpu); |
1066 | } | 1088 | } |
1067 | 1089 | ||
1068 | void wake_up_klogd(void) | 1090 | void wake_up_klogd(void) |
1069 | { | 1091 | { |
1070 | if (waitqueue_active(&log_wait)) | 1092 | if (waitqueue_active(&log_wait)) |
1071 | __raw_get_cpu_var(printk_pending) = 1; | 1093 | this_cpu_write(printk_pending, 1); |
1072 | } | 1094 | } |
1073 | 1095 | ||
1074 | /** | 1096 | /** |
@@ -1511,7 +1533,7 @@ int kmsg_dump_unregister(struct kmsg_dumper *dumper) | |||
1511 | } | 1533 | } |
1512 | EXPORT_SYMBOL_GPL(kmsg_dump_unregister); | 1534 | EXPORT_SYMBOL_GPL(kmsg_dump_unregister); |
1513 | 1535 | ||
1514 | static const char const *kmsg_reasons[] = { | 1536 | static const char * const kmsg_reasons[] = { |
1515 | [KMSG_DUMP_OOPS] = "oops", | 1537 | [KMSG_DUMP_OOPS] = "oops", |
1516 | [KMSG_DUMP_PANIC] = "panic", | 1538 | [KMSG_DUMP_PANIC] = "panic", |
1517 | [KMSG_DUMP_KEXEC] = "kexec", | 1539 | [KMSG_DUMP_KEXEC] = "kexec", |
diff --git a/kernel/profile.c b/kernel/profile.c index b22a899934cc..66f841b7fbd3 100644 --- a/kernel/profile.c +++ b/kernel/profile.c | |||
@@ -555,6 +555,7 @@ static ssize_t write_profile(struct file *file, const char __user *buf, | |||
555 | static const struct file_operations proc_profile_operations = { | 555 | static const struct file_operations proc_profile_operations = { |
556 | .read = read_profile, | 556 | .read = read_profile, |
557 | .write = write_profile, | 557 | .write = write_profile, |
558 | .llseek = default_llseek, | ||
558 | }; | 559 | }; |
559 | 560 | ||
560 | #ifdef CONFIG_SMP | 561 | #ifdef CONFIG_SMP |
diff --git a/kernel/ptrace.c b/kernel/ptrace.c index f34d798ef4a2..99bbaa3e5b0d 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c | |||
@@ -181,7 +181,7 @@ int ptrace_attach(struct task_struct *task) | |||
181 | * under ptrace. | 181 | * under ptrace. |
182 | */ | 182 | */ |
183 | retval = -ERESTARTNOINTR; | 183 | retval = -ERESTARTNOINTR; |
184 | if (mutex_lock_interruptible(&task->cred_guard_mutex)) | 184 | if (mutex_lock_interruptible(&task->signal->cred_guard_mutex)) |
185 | goto out; | 185 | goto out; |
186 | 186 | ||
187 | task_lock(task); | 187 | task_lock(task); |
@@ -208,7 +208,7 @@ int ptrace_attach(struct task_struct *task) | |||
208 | unlock_tasklist: | 208 | unlock_tasklist: |
209 | write_unlock_irq(&tasklist_lock); | 209 | write_unlock_irq(&tasklist_lock); |
210 | unlock_creds: | 210 | unlock_creds: |
211 | mutex_unlock(&task->cred_guard_mutex); | 211 | mutex_unlock(&task->signal->cred_guard_mutex); |
212 | out: | 212 | out: |
213 | return retval; | 213 | return retval; |
214 | } | 214 | } |
@@ -329,6 +329,8 @@ int ptrace_detach(struct task_struct *child, unsigned int data) | |||
329 | * and reacquire the lock. | 329 | * and reacquire the lock. |
330 | */ | 330 | */ |
331 | void exit_ptrace(struct task_struct *tracer) | 331 | void exit_ptrace(struct task_struct *tracer) |
332 | __releases(&tasklist_lock) | ||
333 | __acquires(&tasklist_lock) | ||
332 | { | 334 | { |
333 | struct task_struct *p, *n; | 335 | struct task_struct *p, *n; |
334 | LIST_HEAD(ptrace_dead); | 336 | LIST_HEAD(ptrace_dead); |
@@ -402,7 +404,7 @@ int ptrace_writedata(struct task_struct *tsk, char __user *src, unsigned long ds | |||
402 | return copied; | 404 | return copied; |
403 | } | 405 | } |
404 | 406 | ||
405 | static int ptrace_setoptions(struct task_struct *child, long data) | 407 | static int ptrace_setoptions(struct task_struct *child, unsigned long data) |
406 | { | 408 | { |
407 | child->ptrace &= ~PT_TRACE_MASK; | 409 | child->ptrace &= ~PT_TRACE_MASK; |
408 | 410 | ||
@@ -481,7 +483,8 @@ static int ptrace_setsiginfo(struct task_struct *child, const siginfo_t *info) | |||
481 | #define is_sysemu_singlestep(request) 0 | 483 | #define is_sysemu_singlestep(request) 0 |
482 | #endif | 484 | #endif |
483 | 485 | ||
484 | static int ptrace_resume(struct task_struct *child, long request, long data) | 486 | static int ptrace_resume(struct task_struct *child, long request, |
487 | unsigned long data) | ||
485 | { | 488 | { |
486 | if (!valid_signal(data)) | 489 | if (!valid_signal(data)) |
487 | return -EIO; | 490 | return -EIO; |
@@ -558,10 +561,12 @@ static int ptrace_regset(struct task_struct *task, int req, unsigned int type, | |||
558 | #endif | 561 | #endif |
559 | 562 | ||
560 | int ptrace_request(struct task_struct *child, long request, | 563 | int ptrace_request(struct task_struct *child, long request, |
561 | long addr, long data) | 564 | unsigned long addr, unsigned long data) |
562 | { | 565 | { |
563 | int ret = -EIO; | 566 | int ret = -EIO; |
564 | siginfo_t siginfo; | 567 | siginfo_t siginfo; |
568 | void __user *datavp = (void __user *) data; | ||
569 | unsigned long __user *datalp = datavp; | ||
565 | 570 | ||
566 | switch (request) { | 571 | switch (request) { |
567 | case PTRACE_PEEKTEXT: | 572 | case PTRACE_PEEKTEXT: |
@@ -578,19 +583,17 @@ int ptrace_request(struct task_struct *child, long request, | |||
578 | ret = ptrace_setoptions(child, data); | 583 | ret = ptrace_setoptions(child, data); |
579 | break; | 584 | break; |
580 | case PTRACE_GETEVENTMSG: | 585 | case PTRACE_GETEVENTMSG: |
581 | ret = put_user(child->ptrace_message, (unsigned long __user *) data); | 586 | ret = put_user(child->ptrace_message, datalp); |
582 | break; | 587 | break; |
583 | 588 | ||
584 | case PTRACE_GETSIGINFO: | 589 | case PTRACE_GETSIGINFO: |
585 | ret = ptrace_getsiginfo(child, &siginfo); | 590 | ret = ptrace_getsiginfo(child, &siginfo); |
586 | if (!ret) | 591 | if (!ret) |
587 | ret = copy_siginfo_to_user((siginfo_t __user *) data, | 592 | ret = copy_siginfo_to_user(datavp, &siginfo); |
588 | &siginfo); | ||
589 | break; | 593 | break; |
590 | 594 | ||
591 | case PTRACE_SETSIGINFO: | 595 | case PTRACE_SETSIGINFO: |
592 | if (copy_from_user(&siginfo, (siginfo_t __user *) data, | 596 | if (copy_from_user(&siginfo, datavp, sizeof siginfo)) |
593 | sizeof siginfo)) | ||
594 | ret = -EFAULT; | 597 | ret = -EFAULT; |
595 | else | 598 | else |
596 | ret = ptrace_setsiginfo(child, &siginfo); | 599 | ret = ptrace_setsiginfo(child, &siginfo); |
@@ -621,7 +624,7 @@ int ptrace_request(struct task_struct *child, long request, | |||
621 | } | 624 | } |
622 | mmput(mm); | 625 | mmput(mm); |
623 | 626 | ||
624 | ret = put_user(tmp, (unsigned long __user *) data); | 627 | ret = put_user(tmp, datalp); |
625 | break; | 628 | break; |
626 | } | 629 | } |
627 | #endif | 630 | #endif |
@@ -650,7 +653,7 @@ int ptrace_request(struct task_struct *child, long request, | |||
650 | case PTRACE_SETREGSET: | 653 | case PTRACE_SETREGSET: |
651 | { | 654 | { |
652 | struct iovec kiov; | 655 | struct iovec kiov; |
653 | struct iovec __user *uiov = (struct iovec __user *) data; | 656 | struct iovec __user *uiov = datavp; |
654 | 657 | ||
655 | if (!access_ok(VERIFY_WRITE, uiov, sizeof(*uiov))) | 658 | if (!access_ok(VERIFY_WRITE, uiov, sizeof(*uiov))) |
656 | return -EFAULT; | 659 | return -EFAULT; |
@@ -691,7 +694,8 @@ static struct task_struct *ptrace_get_task_struct(pid_t pid) | |||
691 | #define arch_ptrace_attach(child) do { } while (0) | 694 | #define arch_ptrace_attach(child) do { } while (0) |
692 | #endif | 695 | #endif |
693 | 696 | ||
694 | SYSCALL_DEFINE4(ptrace, long, request, long, pid, long, addr, long, data) | 697 | SYSCALL_DEFINE4(ptrace, long, request, long, pid, unsigned long, addr, |
698 | unsigned long, data) | ||
695 | { | 699 | { |
696 | struct task_struct *child; | 700 | struct task_struct *child; |
697 | long ret; | 701 | long ret; |
@@ -732,7 +736,8 @@ SYSCALL_DEFINE4(ptrace, long, request, long, pid, long, addr, long, data) | |||
732 | return ret; | 736 | return ret; |
733 | } | 737 | } |
734 | 738 | ||
735 | int generic_ptrace_peekdata(struct task_struct *tsk, long addr, long data) | 739 | int generic_ptrace_peekdata(struct task_struct *tsk, unsigned long addr, |
740 | unsigned long data) | ||
736 | { | 741 | { |
737 | unsigned long tmp; | 742 | unsigned long tmp; |
738 | int copied; | 743 | int copied; |
@@ -743,7 +748,8 @@ int generic_ptrace_peekdata(struct task_struct *tsk, long addr, long data) | |||
743 | return put_user(tmp, (unsigned long __user *)data); | 748 | return put_user(tmp, (unsigned long __user *)data); |
744 | } | 749 | } |
745 | 750 | ||
746 | int generic_ptrace_pokedata(struct task_struct *tsk, long addr, long data) | 751 | int generic_ptrace_pokedata(struct task_struct *tsk, unsigned long addr, |
752 | unsigned long data) | ||
747 | { | 753 | { |
748 | int copied; | 754 | int copied; |
749 | 755 | ||
diff --git a/kernel/range.c b/kernel/range.c index 471b66acabb5..37fa9b99ad58 100644 --- a/kernel/range.c +++ b/kernel/range.c | |||
@@ -119,7 +119,7 @@ static int cmp_range(const void *x1, const void *x2) | |||
119 | 119 | ||
120 | int clean_sort_range(struct range *range, int az) | 120 | int clean_sort_range(struct range *range, int az) |
121 | { | 121 | { |
122 | int i, j, k = az - 1, nr_range = 0; | 122 | int i, j, k = az - 1, nr_range = az; |
123 | 123 | ||
124 | for (i = 0; i < k; i++) { | 124 | for (i = 0; i < k; i++) { |
125 | if (range[i].end) | 125 | if (range[i].end) |
diff --git a/kernel/relay.c b/kernel/relay.c index c7cf397fb929..859ea5a9605f 100644 --- a/kernel/relay.c +++ b/kernel/relay.c | |||
@@ -70,17 +70,10 @@ static const struct vm_operations_struct relay_file_mmap_ops = { | |||
70 | */ | 70 | */ |
71 | static struct page **relay_alloc_page_array(unsigned int n_pages) | 71 | static struct page **relay_alloc_page_array(unsigned int n_pages) |
72 | { | 72 | { |
73 | struct page **array; | 73 | const size_t pa_size = n_pages * sizeof(struct page *); |
74 | size_t pa_size = n_pages * sizeof(struct page *); | 74 | if (pa_size > PAGE_SIZE) |
75 | 75 | return vzalloc(pa_size); | |
76 | if (pa_size > PAGE_SIZE) { | 76 | return kzalloc(pa_size, GFP_KERNEL); |
77 | array = vmalloc(pa_size); | ||
78 | if (array) | ||
79 | memset(array, 0, pa_size); | ||
80 | } else { | ||
81 | array = kzalloc(pa_size, GFP_KERNEL); | ||
82 | } | ||
83 | return array; | ||
84 | } | 77 | } |
85 | 78 | ||
86 | /* | 79 | /* |
diff --git a/kernel/resource.c b/kernel/resource.c index 7b36976e5dea..798e2fae2a06 100644 --- a/kernel/resource.c +++ b/kernel/resource.c | |||
@@ -357,6 +357,32 @@ int __weak page_is_ram(unsigned long pfn) | |||
357 | return walk_system_ram_range(pfn, 1, NULL, __is_ram) == 1; | 357 | return walk_system_ram_range(pfn, 1, NULL, __is_ram) == 1; |
358 | } | 358 | } |
359 | 359 | ||
360 | void __weak arch_remove_reservations(struct resource *avail) | ||
361 | { | ||
362 | } | ||
363 | |||
364 | static resource_size_t simple_align_resource(void *data, | ||
365 | const struct resource *avail, | ||
366 | resource_size_t size, | ||
367 | resource_size_t align) | ||
368 | { | ||
369 | return avail->start; | ||
370 | } | ||
371 | |||
372 | static void resource_clip(struct resource *res, resource_size_t min, | ||
373 | resource_size_t max) | ||
374 | { | ||
375 | if (res->start < min) | ||
376 | res->start = min; | ||
377 | if (res->end > max) | ||
378 | res->end = max; | ||
379 | } | ||
380 | |||
381 | static bool resource_contains(struct resource *res1, struct resource *res2) | ||
382 | { | ||
383 | return res1->start <= res2->start && res1->end >= res2->end; | ||
384 | } | ||
385 | |||
360 | /* | 386 | /* |
361 | * Find empty slot in the resource tree given range and alignment. | 387 | * Find empty slot in the resource tree given range and alignment. |
362 | */ | 388 | */ |
@@ -370,8 +396,9 @@ static int find_resource(struct resource *root, struct resource *new, | |||
370 | void *alignf_data) | 396 | void *alignf_data) |
371 | { | 397 | { |
372 | struct resource *this = root->child; | 398 | struct resource *this = root->child; |
373 | struct resource tmp = *new; | 399 | struct resource tmp = *new, avail, alloc; |
374 | 400 | ||
401 | tmp.flags = new->flags; | ||
375 | tmp.start = root->start; | 402 | tmp.start = root->start; |
376 | /* | 403 | /* |
377 | * Skip past an allocated resource that starts at 0, since the assignment | 404 | * Skip past an allocated resource that starts at 0, since the assignment |
@@ -386,17 +413,22 @@ static int find_resource(struct resource *root, struct resource *new, | |||
386 | tmp.end = this->start - 1; | 413 | tmp.end = this->start - 1; |
387 | else | 414 | else |
388 | tmp.end = root->end; | 415 | tmp.end = root->end; |
389 | if (tmp.start < min) | 416 | |
390 | tmp.start = min; | 417 | resource_clip(&tmp, min, max); |
391 | if (tmp.end > max) | 418 | arch_remove_reservations(&tmp); |
392 | tmp.end = max; | 419 | |
393 | tmp.start = ALIGN(tmp.start, align); | 420 | /* Check for overflow after ALIGN() */ |
394 | if (alignf) | 421 | avail = *new; |
395 | tmp.start = alignf(alignf_data, &tmp, size, align); | 422 | avail.start = ALIGN(tmp.start, align); |
396 | if (tmp.start < tmp.end && tmp.end - tmp.start >= size - 1) { | 423 | avail.end = tmp.end; |
397 | new->start = tmp.start; | 424 | if (avail.start >= tmp.start) { |
398 | new->end = tmp.start + size - 1; | 425 | alloc.start = alignf(alignf_data, &avail, size, align); |
399 | return 0; | 426 | alloc.end = alloc.start + size - 1; |
427 | if (resource_contains(&avail, &alloc)) { | ||
428 | new->start = alloc.start; | ||
429 | new->end = alloc.end; | ||
430 | return 0; | ||
431 | } | ||
400 | } | 432 | } |
401 | if (!this) | 433 | if (!this) |
402 | break; | 434 | break; |
@@ -428,6 +460,9 @@ int allocate_resource(struct resource *root, struct resource *new, | |||
428 | { | 460 | { |
429 | int err; | 461 | int err; |
430 | 462 | ||
463 | if (!alignf) | ||
464 | alignf = simple_align_resource; | ||
465 | |||
431 | write_lock(&resource_lock); | 466 | write_lock(&resource_lock); |
432 | err = find_resource(root, new, size, min, max, align, alignf, alignf_data); | 467 | err = find_resource(root, new, size, min, max, align, alignf, alignf_data); |
433 | if (err >= 0 && __request_resource(root, new)) | 468 | if (err >= 0 && __request_resource(root, new)) |
@@ -453,6 +488,8 @@ static struct resource * __insert_resource(struct resource *parent, struct resou | |||
453 | 488 | ||
454 | if (first == parent) | 489 | if (first == parent) |
455 | return first; | 490 | return first; |
491 | if (WARN_ON(first == new)) /* duplicated insertion */ | ||
492 | return first; | ||
456 | 493 | ||
457 | if ((first->start > new->start) || (first->end < new->end)) | 494 | if ((first->start > new->start) || (first->end < new->end)) |
458 | break; | 495 | break; |
diff --git a/kernel/rtmutex-tester.c b/kernel/rtmutex-tester.c index a56f629b057a..66cb89bc5ef1 100644 --- a/kernel/rtmutex-tester.c +++ b/kernel/rtmutex-tester.c | |||
@@ -76,7 +76,9 @@ static int handle_op(struct test_thread_data *td, int lockwakeup) | |||
76 | } | 76 | } |
77 | 77 | ||
78 | if (!lockwakeup && td->bkl == 4) { | 78 | if (!lockwakeup && td->bkl == 4) { |
79 | #ifdef CONFIG_LOCK_KERNEL | ||
79 | unlock_kernel(); | 80 | unlock_kernel(); |
81 | #endif | ||
80 | td->bkl = 0; | 82 | td->bkl = 0; |
81 | } | 83 | } |
82 | return 0; | 84 | return 0; |
@@ -133,14 +135,18 @@ static int handle_op(struct test_thread_data *td, int lockwakeup) | |||
133 | if (td->bkl) | 135 | if (td->bkl) |
134 | return 0; | 136 | return 0; |
135 | td->bkl = 1; | 137 | td->bkl = 1; |
138 | #ifdef CONFIG_LOCK_KERNEL | ||
136 | lock_kernel(); | 139 | lock_kernel(); |
140 | #endif | ||
137 | td->bkl = 4; | 141 | td->bkl = 4; |
138 | return 0; | 142 | return 0; |
139 | 143 | ||
140 | case RTTEST_UNLOCKBKL: | 144 | case RTTEST_UNLOCKBKL: |
141 | if (td->bkl != 4) | 145 | if (td->bkl != 4) |
142 | break; | 146 | break; |
147 | #ifdef CONFIG_LOCK_KERNEL | ||
143 | unlock_kernel(); | 148 | unlock_kernel(); |
149 | #endif | ||
144 | td->bkl = 0; | 150 | td->bkl = 0; |
145 | return 0; | 151 | return 0; |
146 | 152 | ||
diff --git a/kernel/sched.c b/kernel/sched.c index d42992bccdfa..297d1a0eedb0 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -560,18 +560,8 @@ struct rq { | |||
560 | 560 | ||
561 | static DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues); | 561 | static DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues); |
562 | 562 | ||
563 | static inline | ||
564 | void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags) | ||
565 | { | ||
566 | rq->curr->sched_class->check_preempt_curr(rq, p, flags); | ||
567 | 563 | ||
568 | /* | 564 | static void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags); |
569 | * A queue event has occurred, and we're going to schedule. In | ||
570 | * this case, we can save a useless back to back clock update. | ||
571 | */ | ||
572 | if (test_tsk_need_resched(p)) | ||
573 | rq->skip_clock_update = 1; | ||
574 | } | ||
575 | 565 | ||
576 | static inline int cpu_of(struct rq *rq) | 566 | static inline int cpu_of(struct rq *rq) |
577 | { | 567 | { |
@@ -646,22 +636,18 @@ static inline struct task_group *task_group(struct task_struct *p) | |||
646 | 636 | ||
647 | #endif /* CONFIG_CGROUP_SCHED */ | 637 | #endif /* CONFIG_CGROUP_SCHED */ |
648 | 638 | ||
649 | static u64 irq_time_cpu(int cpu); | 639 | static void update_rq_clock_task(struct rq *rq, s64 delta); |
650 | static void sched_irq_time_avg_update(struct rq *rq, u64 irq_time); | ||
651 | 640 | ||
652 | inline void update_rq_clock(struct rq *rq) | 641 | static void update_rq_clock(struct rq *rq) |
653 | { | 642 | { |
654 | if (!rq->skip_clock_update) { | 643 | s64 delta; |
655 | int cpu = cpu_of(rq); | ||
656 | u64 irq_time; | ||
657 | 644 | ||
658 | rq->clock = sched_clock_cpu(cpu); | 645 | if (rq->skip_clock_update) |
659 | irq_time = irq_time_cpu(cpu); | 646 | return; |
660 | if (rq->clock - irq_time > rq->clock_task) | ||
661 | rq->clock_task = rq->clock - irq_time; | ||
662 | 647 | ||
663 | sched_irq_time_avg_update(rq, irq_time); | 648 | delta = sched_clock_cpu(cpu_of(rq)) - rq->clock; |
664 | } | 649 | rq->clock += delta; |
650 | update_rq_clock_task(rq, delta); | ||
665 | } | 651 | } |
666 | 652 | ||
667 | /* | 653 | /* |
@@ -1934,10 +1920,9 @@ static void deactivate_task(struct rq *rq, struct task_struct *p, int flags) | |||
1934 | * They are read and saved off onto struct rq in update_rq_clock(). | 1920 | * They are read and saved off onto struct rq in update_rq_clock(). |
1935 | * This may result in other CPU reading this CPU's irq time and can | 1921 | * This may result in other CPU reading this CPU's irq time and can |
1936 | * race with irq/account_system_vtime on this CPU. We would either get old | 1922 | * race with irq/account_system_vtime on this CPU. We would either get old |
1937 | * or new value (or semi updated value on 32 bit) with a side effect of | 1923 | * or new value with a side effect of accounting a slice of irq time to wrong |
1938 | * accounting a slice of irq time to wrong task when irq is in progress | 1924 | * task when irq is in progress while we read rq->clock. That is a worthy |
1939 | * while we read rq->clock. That is a worthy compromise in place of having | 1925 | * compromise in place of having locks on each irq in account_system_time. |
1940 | * locks on each irq in account_system_time. | ||
1941 | */ | 1926 | */ |
1942 | static DEFINE_PER_CPU(u64, cpu_hardirq_time); | 1927 | static DEFINE_PER_CPU(u64, cpu_hardirq_time); |
1943 | static DEFINE_PER_CPU(u64, cpu_softirq_time); | 1928 | static DEFINE_PER_CPU(u64, cpu_softirq_time); |
@@ -1955,19 +1940,58 @@ void disable_sched_clock_irqtime(void) | |||
1955 | sched_clock_irqtime = 0; | 1940 | sched_clock_irqtime = 0; |
1956 | } | 1941 | } |
1957 | 1942 | ||
1958 | static u64 irq_time_cpu(int cpu) | 1943 | #ifndef CONFIG_64BIT |
1944 | static DEFINE_PER_CPU(seqcount_t, irq_time_seq); | ||
1945 | |||
1946 | static inline void irq_time_write_begin(void) | ||
1959 | { | 1947 | { |
1960 | if (!sched_clock_irqtime) | 1948 | __this_cpu_inc(irq_time_seq.sequence); |
1961 | return 0; | 1949 | smp_wmb(); |
1950 | } | ||
1962 | 1951 | ||
1952 | static inline void irq_time_write_end(void) | ||
1953 | { | ||
1954 | smp_wmb(); | ||
1955 | __this_cpu_inc(irq_time_seq.sequence); | ||
1956 | } | ||
1957 | |||
1958 | static inline u64 irq_time_read(int cpu) | ||
1959 | { | ||
1960 | u64 irq_time; | ||
1961 | unsigned seq; | ||
1962 | |||
1963 | do { | ||
1964 | seq = read_seqcount_begin(&per_cpu(irq_time_seq, cpu)); | ||
1965 | irq_time = per_cpu(cpu_softirq_time, cpu) + | ||
1966 | per_cpu(cpu_hardirq_time, cpu); | ||
1967 | } while (read_seqcount_retry(&per_cpu(irq_time_seq, cpu), seq)); | ||
1968 | |||
1969 | return irq_time; | ||
1970 | } | ||
1971 | #else /* CONFIG_64BIT */ | ||
1972 | static inline void irq_time_write_begin(void) | ||
1973 | { | ||
1974 | } | ||
1975 | |||
1976 | static inline void irq_time_write_end(void) | ||
1977 | { | ||
1978 | } | ||
1979 | |||
1980 | static inline u64 irq_time_read(int cpu) | ||
1981 | { | ||
1963 | return per_cpu(cpu_softirq_time, cpu) + per_cpu(cpu_hardirq_time, cpu); | 1982 | return per_cpu(cpu_softirq_time, cpu) + per_cpu(cpu_hardirq_time, cpu); |
1964 | } | 1983 | } |
1984 | #endif /* CONFIG_64BIT */ | ||
1965 | 1985 | ||
1986 | /* | ||
1987 | * Called before incrementing preempt_count on {soft,}irq_enter | ||
1988 | * and before decrementing preempt_count on {soft,}irq_exit. | ||
1989 | */ | ||
1966 | void account_system_vtime(struct task_struct *curr) | 1990 | void account_system_vtime(struct task_struct *curr) |
1967 | { | 1991 | { |
1968 | unsigned long flags; | 1992 | unsigned long flags; |
1993 | s64 delta; | ||
1969 | int cpu; | 1994 | int cpu; |
1970 | u64 now, delta; | ||
1971 | 1995 | ||
1972 | if (!sched_clock_irqtime) | 1996 | if (!sched_clock_irqtime) |
1973 | return; | 1997 | return; |
@@ -1975,9 +1999,10 @@ void account_system_vtime(struct task_struct *curr) | |||
1975 | local_irq_save(flags); | 1999 | local_irq_save(flags); |
1976 | 2000 | ||
1977 | cpu = smp_processor_id(); | 2001 | cpu = smp_processor_id(); |
1978 | now = sched_clock_cpu(cpu); | 2002 | delta = sched_clock_cpu(cpu) - __this_cpu_read(irq_start_time); |
1979 | delta = now - per_cpu(irq_start_time, cpu); | 2003 | __this_cpu_add(irq_start_time, delta); |
1980 | per_cpu(irq_start_time, cpu) = now; | 2004 | |
2005 | irq_time_write_begin(); | ||
1981 | /* | 2006 | /* |
1982 | * We do not account for softirq time from ksoftirqd here. | 2007 | * We do not account for softirq time from ksoftirqd here. |
1983 | * We want to continue accounting softirq time to ksoftirqd thread | 2008 | * We want to continue accounting softirq time to ksoftirqd thread |
@@ -1985,33 +2010,55 @@ void account_system_vtime(struct task_struct *curr) | |||
1985 | * that do not consume any time, but still wants to run. | 2010 | * that do not consume any time, but still wants to run. |
1986 | */ | 2011 | */ |
1987 | if (hardirq_count()) | 2012 | if (hardirq_count()) |
1988 | per_cpu(cpu_hardirq_time, cpu) += delta; | 2013 | __this_cpu_add(cpu_hardirq_time, delta); |
1989 | else if (in_serving_softirq() && !(curr->flags & PF_KSOFTIRQD)) | 2014 | else if (in_serving_softirq() && !(curr->flags & PF_KSOFTIRQD)) |
1990 | per_cpu(cpu_softirq_time, cpu) += delta; | 2015 | __this_cpu_add(cpu_softirq_time, delta); |
1991 | 2016 | ||
2017 | irq_time_write_end(); | ||
1992 | local_irq_restore(flags); | 2018 | local_irq_restore(flags); |
1993 | } | 2019 | } |
1994 | EXPORT_SYMBOL_GPL(account_system_vtime); | 2020 | EXPORT_SYMBOL_GPL(account_system_vtime); |
1995 | 2021 | ||
1996 | static void sched_irq_time_avg_update(struct rq *rq, u64 curr_irq_time) | 2022 | static void update_rq_clock_task(struct rq *rq, s64 delta) |
1997 | { | 2023 | { |
1998 | if (sched_clock_irqtime && sched_feat(NONIRQ_POWER)) { | 2024 | s64 irq_delta; |
1999 | u64 delta_irq = curr_irq_time - rq->prev_irq_time; | 2025 | |
2000 | rq->prev_irq_time = curr_irq_time; | 2026 | irq_delta = irq_time_read(cpu_of(rq)) - rq->prev_irq_time; |
2001 | sched_rt_avg_update(rq, delta_irq); | 2027 | |
2002 | } | 2028 | /* |
2029 | * Since irq_time is only updated on {soft,}irq_exit, we might run into | ||
2030 | * this case when a previous update_rq_clock() happened inside a | ||
2031 | * {soft,}irq region. | ||
2032 | * | ||
2033 | * When this happens, we stop ->clock_task and only update the | ||
2034 | * prev_irq_time stamp to account for the part that fit, so that a next | ||
2035 | * update will consume the rest. This ensures ->clock_task is | ||
2036 | * monotonic. | ||
2037 | * | ||
2038 | * It does however cause some slight miss-attribution of {soft,}irq | ||
2039 | * time, a more accurate solution would be to update the irq_time using | ||
2040 | * the current rq->clock timestamp, except that would require using | ||
2041 | * atomic ops. | ||
2042 | */ | ||
2043 | if (irq_delta > delta) | ||
2044 | irq_delta = delta; | ||
2045 | |||
2046 | rq->prev_irq_time += irq_delta; | ||
2047 | delta -= irq_delta; | ||
2048 | rq->clock_task += delta; | ||
2049 | |||
2050 | if (irq_delta && sched_feat(NONIRQ_POWER)) | ||
2051 | sched_rt_avg_update(rq, irq_delta); | ||
2003 | } | 2052 | } |
2004 | 2053 | ||
2005 | #else | 2054 | #else /* CONFIG_IRQ_TIME_ACCOUNTING */ |
2006 | 2055 | ||
2007 | static u64 irq_time_cpu(int cpu) | 2056 | static void update_rq_clock_task(struct rq *rq, s64 delta) |
2008 | { | 2057 | { |
2009 | return 0; | 2058 | rq->clock_task += delta; |
2010 | } | 2059 | } |
2011 | 2060 | ||
2012 | static void sched_irq_time_avg_update(struct rq *rq, u64 curr_irq_time) { } | 2061 | #endif /* CONFIG_IRQ_TIME_ACCOUNTING */ |
2013 | |||
2014 | #endif | ||
2015 | 2062 | ||
2016 | #include "sched_idletask.c" | 2063 | #include "sched_idletask.c" |
2017 | #include "sched_fair.c" | 2064 | #include "sched_fair.c" |
@@ -2118,6 +2165,31 @@ static inline void check_class_changed(struct rq *rq, struct task_struct *p, | |||
2118 | p->sched_class->prio_changed(rq, p, oldprio, running); | 2165 | p->sched_class->prio_changed(rq, p, oldprio, running); |
2119 | } | 2166 | } |
2120 | 2167 | ||
2168 | static void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags) | ||
2169 | { | ||
2170 | const struct sched_class *class; | ||
2171 | |||
2172 | if (p->sched_class == rq->curr->sched_class) { | ||
2173 | rq->curr->sched_class->check_preempt_curr(rq, p, flags); | ||
2174 | } else { | ||
2175 | for_each_class(class) { | ||
2176 | if (class == rq->curr->sched_class) | ||
2177 | break; | ||
2178 | if (class == p->sched_class) { | ||
2179 | resched_task(rq->curr); | ||
2180 | break; | ||
2181 | } | ||
2182 | } | ||
2183 | } | ||
2184 | |||
2185 | /* | ||
2186 | * A queue event has occurred, and we're going to schedule. In | ||
2187 | * this case, we can save a useless back to back clock update. | ||
2188 | */ | ||
2189 | if (rq->curr->se.on_rq && test_tsk_need_resched(rq->curr)) | ||
2190 | rq->skip_clock_update = 1; | ||
2191 | } | ||
2192 | |||
2121 | #ifdef CONFIG_SMP | 2193 | #ifdef CONFIG_SMP |
2122 | /* | 2194 | /* |
2123 | * Is this task likely cache-hot: | 2195 | * Is this task likely cache-hot: |
@@ -3104,6 +3176,15 @@ static long calc_load_fold_active(struct rq *this_rq) | |||
3104 | return delta; | 3176 | return delta; |
3105 | } | 3177 | } |
3106 | 3178 | ||
3179 | static unsigned long | ||
3180 | calc_load(unsigned long load, unsigned long exp, unsigned long active) | ||
3181 | { | ||
3182 | load *= exp; | ||
3183 | load += active * (FIXED_1 - exp); | ||
3184 | load += 1UL << (FSHIFT - 1); | ||
3185 | return load >> FSHIFT; | ||
3186 | } | ||
3187 | |||
3107 | #ifdef CONFIG_NO_HZ | 3188 | #ifdef CONFIG_NO_HZ |
3108 | /* | 3189 | /* |
3109 | * For NO_HZ we delay the active fold to the next LOAD_FREQ update. | 3190 | * For NO_HZ we delay the active fold to the next LOAD_FREQ update. |
@@ -3133,6 +3214,128 @@ static long calc_load_fold_idle(void) | |||
3133 | 3214 | ||
3134 | return delta; | 3215 | return delta; |
3135 | } | 3216 | } |
3217 | |||
3218 | /** | ||
3219 | * fixed_power_int - compute: x^n, in O(log n) time | ||
3220 | * | ||
3221 | * @x: base of the power | ||
3222 | * @frac_bits: fractional bits of @x | ||
3223 | * @n: power to raise @x to. | ||
3224 | * | ||
3225 | * By exploiting the relation between the definition of the natural power | ||
3226 | * function: x^n := x*x*...*x (x multiplied by itself for n times), and | ||
3227 | * the binary encoding of numbers used by computers: n := \Sum n_i * 2^i, | ||
3228 | * (where: n_i \elem {0, 1}, the binary vector representing n), | ||
3229 | * we find: x^n := x^(\Sum n_i * 2^i) := \Prod x^(n_i * 2^i), which is | ||
3230 | * of course trivially computable in O(log_2 n), the length of our binary | ||
3231 | * vector. | ||
3232 | */ | ||
3233 | static unsigned long | ||
3234 | fixed_power_int(unsigned long x, unsigned int frac_bits, unsigned int n) | ||
3235 | { | ||
3236 | unsigned long result = 1UL << frac_bits; | ||
3237 | |||
3238 | if (n) for (;;) { | ||
3239 | if (n & 1) { | ||
3240 | result *= x; | ||
3241 | result += 1UL << (frac_bits - 1); | ||
3242 | result >>= frac_bits; | ||
3243 | } | ||
3244 | n >>= 1; | ||
3245 | if (!n) | ||
3246 | break; | ||
3247 | x *= x; | ||
3248 | x += 1UL << (frac_bits - 1); | ||
3249 | x >>= frac_bits; | ||
3250 | } | ||
3251 | |||
3252 | return result; | ||
3253 | } | ||
3254 | |||
3255 | /* | ||
3256 | * a1 = a0 * e + a * (1 - e) | ||
3257 | * | ||
3258 | * a2 = a1 * e + a * (1 - e) | ||
3259 | * = (a0 * e + a * (1 - e)) * e + a * (1 - e) | ||
3260 | * = a0 * e^2 + a * (1 - e) * (1 + e) | ||
3261 | * | ||
3262 | * a3 = a2 * e + a * (1 - e) | ||
3263 | * = (a0 * e^2 + a * (1 - e) * (1 + e)) * e + a * (1 - e) | ||
3264 | * = a0 * e^3 + a * (1 - e) * (1 + e + e^2) | ||
3265 | * | ||
3266 | * ... | ||
3267 | * | ||
3268 | * an = a0 * e^n + a * (1 - e) * (1 + e + ... + e^n-1) [1] | ||
3269 | * = a0 * e^n + a * (1 - e) * (1 - e^n)/(1 - e) | ||
3270 | * = a0 * e^n + a * (1 - e^n) | ||
3271 | * | ||
3272 | * [1] application of the geometric series: | ||
3273 | * | ||
3274 | * n 1 - x^(n+1) | ||
3275 | * S_n := \Sum x^i = ------------- | ||
3276 | * i=0 1 - x | ||
3277 | */ | ||
3278 | static unsigned long | ||
3279 | calc_load_n(unsigned long load, unsigned long exp, | ||
3280 | unsigned long active, unsigned int n) | ||
3281 | { | ||
3282 | |||
3283 | return calc_load(load, fixed_power_int(exp, FSHIFT, n), active); | ||
3284 | } | ||
3285 | |||
3286 | /* | ||
3287 | * NO_HZ can leave us missing all per-cpu ticks calling | ||
3288 | * calc_load_account_active(), but since an idle CPU folds its delta into | ||
3289 | * calc_load_tasks_idle per calc_load_account_idle(), all we need to do is fold | ||
3290 | * in the pending idle delta if our idle period crossed a load cycle boundary. | ||
3291 | * | ||
3292 | * Once we've updated the global active value, we need to apply the exponential | ||
3293 | * weights adjusted to the number of cycles missed. | ||
3294 | */ | ||
3295 | static void calc_global_nohz(unsigned long ticks) | ||
3296 | { | ||
3297 | long delta, active, n; | ||
3298 | |||
3299 | if (time_before(jiffies, calc_load_update)) | ||
3300 | return; | ||
3301 | |||
3302 | /* | ||
3303 | * If we crossed a calc_load_update boundary, make sure to fold | ||
3304 | * any pending idle changes, the respective CPUs might have | ||
3305 | * missed the tick driven calc_load_account_active() update | ||
3306 | * due to NO_HZ. | ||
3307 | */ | ||
3308 | delta = calc_load_fold_idle(); | ||
3309 | if (delta) | ||
3310 | atomic_long_add(delta, &calc_load_tasks); | ||
3311 | |||
3312 | /* | ||
3313 | * If we were idle for multiple load cycles, apply them. | ||
3314 | */ | ||
3315 | if (ticks >= LOAD_FREQ) { | ||
3316 | n = ticks / LOAD_FREQ; | ||
3317 | |||
3318 | active = atomic_long_read(&calc_load_tasks); | ||
3319 | active = active > 0 ? active * FIXED_1 : 0; | ||
3320 | |||
3321 | avenrun[0] = calc_load_n(avenrun[0], EXP_1, active, n); | ||
3322 | avenrun[1] = calc_load_n(avenrun[1], EXP_5, active, n); | ||
3323 | avenrun[2] = calc_load_n(avenrun[2], EXP_15, active, n); | ||
3324 | |||
3325 | calc_load_update += n * LOAD_FREQ; | ||
3326 | } | ||
3327 | |||
3328 | /* | ||
3329 | * Its possible the remainder of the above division also crosses | ||
3330 | * a LOAD_FREQ period, the regular check in calc_global_load() | ||
3331 | * which comes after this will take care of that. | ||
3332 | * | ||
3333 | * Consider us being 11 ticks before a cycle completion, and us | ||
3334 | * sleeping for 4*LOAD_FREQ + 22 ticks, then the above code will | ||
3335 | * age us 4 cycles, and the test in calc_global_load() will | ||
3336 | * pick up the final one. | ||
3337 | */ | ||
3338 | } | ||
3136 | #else | 3339 | #else |
3137 | static void calc_load_account_idle(struct rq *this_rq) | 3340 | static void calc_load_account_idle(struct rq *this_rq) |
3138 | { | 3341 | { |
@@ -3142,6 +3345,10 @@ static inline long calc_load_fold_idle(void) | |||
3142 | { | 3345 | { |
3143 | return 0; | 3346 | return 0; |
3144 | } | 3347 | } |
3348 | |||
3349 | static void calc_global_nohz(unsigned long ticks) | ||
3350 | { | ||
3351 | } | ||
3145 | #endif | 3352 | #endif |
3146 | 3353 | ||
3147 | /** | 3354 | /** |
@@ -3159,24 +3366,17 @@ void get_avenrun(unsigned long *loads, unsigned long offset, int shift) | |||
3159 | loads[2] = (avenrun[2] + offset) << shift; | 3366 | loads[2] = (avenrun[2] + offset) << shift; |
3160 | } | 3367 | } |
3161 | 3368 | ||
3162 | static unsigned long | ||
3163 | calc_load(unsigned long load, unsigned long exp, unsigned long active) | ||
3164 | { | ||
3165 | load *= exp; | ||
3166 | load += active * (FIXED_1 - exp); | ||
3167 | return load >> FSHIFT; | ||
3168 | } | ||
3169 | |||
3170 | /* | 3369 | /* |
3171 | * calc_load - update the avenrun load estimates 10 ticks after the | 3370 | * calc_load - update the avenrun load estimates 10 ticks after the |
3172 | * CPUs have updated calc_load_tasks. | 3371 | * CPUs have updated calc_load_tasks. |
3173 | */ | 3372 | */ |
3174 | void calc_global_load(void) | 3373 | void calc_global_load(unsigned long ticks) |
3175 | { | 3374 | { |
3176 | unsigned long upd = calc_load_update + 10; | ||
3177 | long active; | 3375 | long active; |
3178 | 3376 | ||
3179 | if (time_before(jiffies, upd)) | 3377 | calc_global_nohz(ticks); |
3378 | |||
3379 | if (time_before(jiffies, calc_load_update + 10)) | ||
3180 | return; | 3380 | return; |
3181 | 3381 | ||
3182 | active = atomic_long_read(&calc_load_tasks); | 3382 | active = atomic_long_read(&calc_load_tasks); |
@@ -3830,7 +4030,6 @@ static void put_prev_task(struct rq *rq, struct task_struct *prev) | |||
3830 | { | 4030 | { |
3831 | if (prev->se.on_rq) | 4031 | if (prev->se.on_rq) |
3832 | update_rq_clock(rq); | 4032 | update_rq_clock(rq); |
3833 | rq->skip_clock_update = 0; | ||
3834 | prev->sched_class->put_prev_task(rq, prev); | 4033 | prev->sched_class->put_prev_task(rq, prev); |
3835 | } | 4034 | } |
3836 | 4035 | ||
@@ -3888,7 +4087,6 @@ need_resched_nonpreemptible: | |||
3888 | hrtick_clear(rq); | 4087 | hrtick_clear(rq); |
3889 | 4088 | ||
3890 | raw_spin_lock_irq(&rq->lock); | 4089 | raw_spin_lock_irq(&rq->lock); |
3891 | clear_tsk_need_resched(prev); | ||
3892 | 4090 | ||
3893 | switch_count = &prev->nivcsw; | 4091 | switch_count = &prev->nivcsw; |
3894 | if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) { | 4092 | if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) { |
@@ -3920,6 +4118,8 @@ need_resched_nonpreemptible: | |||
3920 | 4118 | ||
3921 | put_prev_task(rq, prev); | 4119 | put_prev_task(rq, prev); |
3922 | next = pick_next_task(rq); | 4120 | next = pick_next_task(rq); |
4121 | clear_tsk_need_resched(prev); | ||
4122 | rq->skip_clock_update = 0; | ||
3923 | 4123 | ||
3924 | if (likely(prev != next)) { | 4124 | if (likely(prev != next)) { |
3925 | sched_info_switch(prev, next); | 4125 | sched_info_switch(prev, next); |
@@ -6960,6 +7160,8 @@ static void init_sched_groups_power(int cpu, struct sched_domain *sd) | |||
6960 | if (cpu != group_first_cpu(sd->groups)) | 7160 | if (cpu != group_first_cpu(sd->groups)) |
6961 | return; | 7161 | return; |
6962 | 7162 | ||
7163 | sd->groups->group_weight = cpumask_weight(sched_group_cpus(sd->groups)); | ||
7164 | |||
6963 | child = sd->child; | 7165 | child = sd->child; |
6964 | 7166 | ||
6965 | sd->groups->cpu_power = 0; | 7167 | sd->groups->cpu_power = 0; |
@@ -8510,12 +8712,12 @@ void sched_move_task(struct task_struct *tsk) | |||
8510 | if (unlikely(running)) | 8712 | if (unlikely(running)) |
8511 | tsk->sched_class->put_prev_task(rq, tsk); | 8713 | tsk->sched_class->put_prev_task(rq, tsk); |
8512 | 8714 | ||
8513 | set_task_rq(tsk, task_cpu(tsk)); | ||
8514 | |||
8515 | #ifdef CONFIG_FAIR_GROUP_SCHED | 8715 | #ifdef CONFIG_FAIR_GROUP_SCHED |
8516 | if (tsk->sched_class->moved_group) | 8716 | if (tsk->sched_class->task_move_group) |
8517 | tsk->sched_class->moved_group(tsk, on_rq); | 8717 | tsk->sched_class->task_move_group(tsk, on_rq); |
8718 | else | ||
8518 | #endif | 8719 | #endif |
8720 | set_task_rq(tsk, task_cpu(tsk)); | ||
8519 | 8721 | ||
8520 | if (unlikely(running)) | 8722 | if (unlikely(running)) |
8521 | tsk->sched_class->set_curr_task(rq); | 8723 | tsk->sched_class->set_curr_task(rq); |
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 933f3d1b62ea..00ebd7686676 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c | |||
@@ -1654,12 +1654,6 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_ | |||
1654 | struct cfs_rq *cfs_rq = task_cfs_rq(curr); | 1654 | struct cfs_rq *cfs_rq = task_cfs_rq(curr); |
1655 | int scale = cfs_rq->nr_running >= sched_nr_latency; | 1655 | int scale = cfs_rq->nr_running >= sched_nr_latency; |
1656 | 1656 | ||
1657 | if (unlikely(rt_prio(p->prio))) | ||
1658 | goto preempt; | ||
1659 | |||
1660 | if (unlikely(p->sched_class != &fair_sched_class)) | ||
1661 | return; | ||
1662 | |||
1663 | if (unlikely(se == pse)) | 1657 | if (unlikely(se == pse)) |
1664 | return; | 1658 | return; |
1665 | 1659 | ||
@@ -1764,10 +1758,6 @@ static void pull_task(struct rq *src_rq, struct task_struct *p, | |||
1764 | set_task_cpu(p, this_cpu); | 1758 | set_task_cpu(p, this_cpu); |
1765 | activate_task(this_rq, p, 0); | 1759 | activate_task(this_rq, p, 0); |
1766 | check_preempt_curr(this_rq, p, 0); | 1760 | check_preempt_curr(this_rq, p, 0); |
1767 | |||
1768 | /* re-arm NEWIDLE balancing when moving tasks */ | ||
1769 | src_rq->avg_idle = this_rq->avg_idle = 2*sysctl_sched_migration_cost; | ||
1770 | this_rq->idle_stamp = 0; | ||
1771 | } | 1761 | } |
1772 | 1762 | ||
1773 | /* | 1763 | /* |
@@ -2035,13 +2025,16 @@ struct sd_lb_stats { | |||
2035 | unsigned long this_load_per_task; | 2025 | unsigned long this_load_per_task; |
2036 | unsigned long this_nr_running; | 2026 | unsigned long this_nr_running; |
2037 | unsigned long this_has_capacity; | 2027 | unsigned long this_has_capacity; |
2028 | unsigned int this_idle_cpus; | ||
2038 | 2029 | ||
2039 | /* Statistics of the busiest group */ | 2030 | /* Statistics of the busiest group */ |
2031 | unsigned int busiest_idle_cpus; | ||
2040 | unsigned long max_load; | 2032 | unsigned long max_load; |
2041 | unsigned long busiest_load_per_task; | 2033 | unsigned long busiest_load_per_task; |
2042 | unsigned long busiest_nr_running; | 2034 | unsigned long busiest_nr_running; |
2043 | unsigned long busiest_group_capacity; | 2035 | unsigned long busiest_group_capacity; |
2044 | unsigned long busiest_has_capacity; | 2036 | unsigned long busiest_has_capacity; |
2037 | unsigned int busiest_group_weight; | ||
2045 | 2038 | ||
2046 | int group_imb; /* Is there imbalance in this sd */ | 2039 | int group_imb; /* Is there imbalance in this sd */ |
2047 | #if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT) | 2040 | #if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT) |
@@ -2063,6 +2056,8 @@ struct sg_lb_stats { | |||
2063 | unsigned long sum_nr_running; /* Nr tasks running in the group */ | 2056 | unsigned long sum_nr_running; /* Nr tasks running in the group */ |
2064 | unsigned long sum_weighted_load; /* Weighted load of group's tasks */ | 2057 | unsigned long sum_weighted_load; /* Weighted load of group's tasks */ |
2065 | unsigned long group_capacity; | 2058 | unsigned long group_capacity; |
2059 | unsigned long idle_cpus; | ||
2060 | unsigned long group_weight; | ||
2066 | int group_imb; /* Is there an imbalance in the group ? */ | 2061 | int group_imb; /* Is there an imbalance in the group ? */ |
2067 | int group_has_capacity; /* Is there extra capacity in the group? */ | 2062 | int group_has_capacity; /* Is there extra capacity in the group? */ |
2068 | }; | 2063 | }; |
@@ -2431,7 +2426,8 @@ static inline void update_sg_lb_stats(struct sched_domain *sd, | |||
2431 | sgs->group_load += load; | 2426 | sgs->group_load += load; |
2432 | sgs->sum_nr_running += rq->nr_running; | 2427 | sgs->sum_nr_running += rq->nr_running; |
2433 | sgs->sum_weighted_load += weighted_cpuload(i); | 2428 | sgs->sum_weighted_load += weighted_cpuload(i); |
2434 | 2429 | if (idle_cpu(i)) | |
2430 | sgs->idle_cpus++; | ||
2435 | } | 2431 | } |
2436 | 2432 | ||
2437 | /* | 2433 | /* |
@@ -2469,6 +2465,7 @@ static inline void update_sg_lb_stats(struct sched_domain *sd, | |||
2469 | sgs->group_capacity = DIV_ROUND_CLOSEST(group->cpu_power, SCHED_LOAD_SCALE); | 2465 | sgs->group_capacity = DIV_ROUND_CLOSEST(group->cpu_power, SCHED_LOAD_SCALE); |
2470 | if (!sgs->group_capacity) | 2466 | if (!sgs->group_capacity) |
2471 | sgs->group_capacity = fix_small_capacity(sd, group); | 2467 | sgs->group_capacity = fix_small_capacity(sd, group); |
2468 | sgs->group_weight = group->group_weight; | ||
2472 | 2469 | ||
2473 | if (sgs->group_capacity > sgs->sum_nr_running) | 2470 | if (sgs->group_capacity > sgs->sum_nr_running) |
2474 | sgs->group_has_capacity = 1; | 2471 | sgs->group_has_capacity = 1; |
@@ -2576,13 +2573,16 @@ static inline void update_sd_lb_stats(struct sched_domain *sd, int this_cpu, | |||
2576 | sds->this_nr_running = sgs.sum_nr_running; | 2573 | sds->this_nr_running = sgs.sum_nr_running; |
2577 | sds->this_load_per_task = sgs.sum_weighted_load; | 2574 | sds->this_load_per_task = sgs.sum_weighted_load; |
2578 | sds->this_has_capacity = sgs.group_has_capacity; | 2575 | sds->this_has_capacity = sgs.group_has_capacity; |
2576 | sds->this_idle_cpus = sgs.idle_cpus; | ||
2579 | } else if (update_sd_pick_busiest(sd, sds, sg, &sgs, this_cpu)) { | 2577 | } else if (update_sd_pick_busiest(sd, sds, sg, &sgs, this_cpu)) { |
2580 | sds->max_load = sgs.avg_load; | 2578 | sds->max_load = sgs.avg_load; |
2581 | sds->busiest = sg; | 2579 | sds->busiest = sg; |
2582 | sds->busiest_nr_running = sgs.sum_nr_running; | 2580 | sds->busiest_nr_running = sgs.sum_nr_running; |
2581 | sds->busiest_idle_cpus = sgs.idle_cpus; | ||
2583 | sds->busiest_group_capacity = sgs.group_capacity; | 2582 | sds->busiest_group_capacity = sgs.group_capacity; |
2584 | sds->busiest_load_per_task = sgs.sum_weighted_load; | 2583 | sds->busiest_load_per_task = sgs.sum_weighted_load; |
2585 | sds->busiest_has_capacity = sgs.group_has_capacity; | 2584 | sds->busiest_has_capacity = sgs.group_has_capacity; |
2585 | sds->busiest_group_weight = sgs.group_weight; | ||
2586 | sds->group_imb = sgs.group_imb; | 2586 | sds->group_imb = sgs.group_imb; |
2587 | } | 2587 | } |
2588 | 2588 | ||
@@ -2860,8 +2860,26 @@ find_busiest_group(struct sched_domain *sd, int this_cpu, | |||
2860 | if (sds.this_load >= sds.avg_load) | 2860 | if (sds.this_load >= sds.avg_load) |
2861 | goto out_balanced; | 2861 | goto out_balanced; |
2862 | 2862 | ||
2863 | if (100 * sds.max_load <= sd->imbalance_pct * sds.this_load) | 2863 | /* |
2864 | goto out_balanced; | 2864 | * In the CPU_NEWLY_IDLE, use imbalance_pct to be conservative. |
2865 | * And to check for busy balance use !idle_cpu instead of | ||
2866 | * CPU_NOT_IDLE. This is because HT siblings will use CPU_NOT_IDLE | ||
2867 | * even when they are idle. | ||
2868 | */ | ||
2869 | if (idle == CPU_NEWLY_IDLE || !idle_cpu(this_cpu)) { | ||
2870 | if (100 * sds.max_load <= sd->imbalance_pct * sds.this_load) | ||
2871 | goto out_balanced; | ||
2872 | } else { | ||
2873 | /* | ||
2874 | * This cpu is idle. If the busiest group load doesn't | ||
2875 | * have more tasks than the number of available cpu's and | ||
2876 | * there is no imbalance between this and busiest group | ||
2877 | * wrt to idle cpu's, it is balanced. | ||
2878 | */ | ||
2879 | if ((sds.this_idle_cpus <= sds.busiest_idle_cpus + 1) && | ||
2880 | sds.busiest_nr_running <= sds.busiest_group_weight) | ||
2881 | goto out_balanced; | ||
2882 | } | ||
2865 | 2883 | ||
2866 | force_balance: | 2884 | force_balance: |
2867 | /* Looks like there is an imbalance. Compute it */ | 2885 | /* Looks like there is an imbalance. Compute it */ |
@@ -3197,8 +3215,10 @@ static void idle_balance(int this_cpu, struct rq *this_rq) | |||
3197 | interval = msecs_to_jiffies(sd->balance_interval); | 3215 | interval = msecs_to_jiffies(sd->balance_interval); |
3198 | if (time_after(next_balance, sd->last_balance + interval)) | 3216 | if (time_after(next_balance, sd->last_balance + interval)) |
3199 | next_balance = sd->last_balance + interval; | 3217 | next_balance = sd->last_balance + interval; |
3200 | if (pulled_task) | 3218 | if (pulled_task) { |
3219 | this_rq->idle_stamp = 0; | ||
3201 | break; | 3220 | break; |
3221 | } | ||
3202 | } | 3222 | } |
3203 | 3223 | ||
3204 | raw_spin_lock(&this_rq->lock); | 3224 | raw_spin_lock(&this_rq->lock); |
@@ -3869,13 +3889,26 @@ static void set_curr_task_fair(struct rq *rq) | |||
3869 | } | 3889 | } |
3870 | 3890 | ||
3871 | #ifdef CONFIG_FAIR_GROUP_SCHED | 3891 | #ifdef CONFIG_FAIR_GROUP_SCHED |
3872 | static void moved_group_fair(struct task_struct *p, int on_rq) | 3892 | static void task_move_group_fair(struct task_struct *p, int on_rq) |
3873 | { | 3893 | { |
3874 | struct cfs_rq *cfs_rq = task_cfs_rq(p); | 3894 | /* |
3875 | 3895 | * If the task was not on the rq at the time of this cgroup movement | |
3876 | update_curr(cfs_rq); | 3896 | * it must have been asleep, sleeping tasks keep their ->vruntime |
3897 | * absolute on their old rq until wakeup (needed for the fair sleeper | ||
3898 | * bonus in place_entity()). | ||
3899 | * | ||
3900 | * If it was on the rq, we've just 'preempted' it, which does convert | ||
3901 | * ->vruntime to a relative base. | ||
3902 | * | ||
3903 | * Make sure both cases convert their relative position when migrating | ||
3904 | * to another cgroup's rq. This does somewhat interfere with the | ||
3905 | * fair sleeper stuff for the first placement, but who cares. | ||
3906 | */ | ||
3907 | if (!on_rq) | ||
3908 | p->se.vruntime -= cfs_rq_of(&p->se)->min_vruntime; | ||
3909 | set_task_rq(p, task_cpu(p)); | ||
3877 | if (!on_rq) | 3910 | if (!on_rq) |
3878 | place_entity(cfs_rq, &p->se, 1); | 3911 | p->se.vruntime += cfs_rq_of(&p->se)->min_vruntime; |
3879 | } | 3912 | } |
3880 | #endif | 3913 | #endif |
3881 | 3914 | ||
@@ -3927,7 +3960,7 @@ static const struct sched_class fair_sched_class = { | |||
3927 | .get_rr_interval = get_rr_interval_fair, | 3960 | .get_rr_interval = get_rr_interval_fair, |
3928 | 3961 | ||
3929 | #ifdef CONFIG_FAIR_GROUP_SCHED | 3962 | #ifdef CONFIG_FAIR_GROUP_SCHED |
3930 | .moved_group = moved_group_fair, | 3963 | .task_move_group = task_move_group_fair, |
3931 | #endif | 3964 | #endif |
3932 | }; | 3965 | }; |
3933 | 3966 | ||
diff --git a/kernel/sched_stats.h b/kernel/sched_stats.h index 25c2f962f6fc..48ddf431db0e 100644 --- a/kernel/sched_stats.h +++ b/kernel/sched_stats.h | |||
@@ -157,15 +157,7 @@ static inline void sched_info_reset_dequeued(struct task_struct *t) | |||
157 | } | 157 | } |
158 | 158 | ||
159 | /* | 159 | /* |
160 | * Called when a process is dequeued from the active array and given | 160 | * We are interested in knowing how long it was from the *first* time a |
161 | * the cpu. We should note that with the exception of interactive | ||
162 | * tasks, the expired queue will become the active queue after the active | ||
163 | * queue is empty, without explicitly dequeuing and requeuing tasks in the | ||
164 | * expired queue. (Interactive tasks may be requeued directly to the | ||
165 | * active queue, thus delaying tasks in the expired queue from running; | ||
166 | * see scheduler_tick()). | ||
167 | * | ||
168 | * Though we are interested in knowing how long it was from the *first* time a | ||
169 | * task was queued to the time that it finally hit a cpu, we call this routine | 161 | * task was queued to the time that it finally hit a cpu, we call this routine |
170 | * from dequeue_task() to account for possible rq->clock skew across cpus. The | 162 | * from dequeue_task() to account for possible rq->clock skew across cpus. The |
171 | * delta taken on each cpu would annul the skew. | 163 | * delta taken on each cpu would annul the skew. |
@@ -203,16 +195,6 @@ static void sched_info_arrive(struct task_struct *t) | |||
203 | } | 195 | } |
204 | 196 | ||
205 | /* | 197 | /* |
206 | * Called when a process is queued into either the active or expired | ||
207 | * array. The time is noted and later used to determine how long we | ||
208 | * had to wait for us to reach the cpu. Since the expired queue will | ||
209 | * become the active queue after active queue is empty, without dequeuing | ||
210 | * and requeuing any tasks, we are interested in queuing to either. It | ||
211 | * is unusual but not impossible for tasks to be dequeued and immediately | ||
212 | * requeued in the same or another array: this can happen in sched_yield(), | ||
213 | * set_user_nice(), and even load_balance() as it moves tasks from runqueue | ||
214 | * to runqueue. | ||
215 | * | ||
216 | * This function is only called from enqueue_task(), but also only updates | 198 | * This function is only called from enqueue_task(), but also only updates |
217 | * the timestamp if it is already not set. It's assumed that | 199 | * the timestamp if it is already not set. It's assumed that |
218 | * sched_info_dequeued() will clear that stamp when appropriate. | 200 | * sched_info_dequeued() will clear that stamp when appropriate. |
diff --git a/kernel/sched_stoptask.c b/kernel/sched_stoptask.c index 45bddc0c1048..2bf6b47058c1 100644 --- a/kernel/sched_stoptask.c +++ b/kernel/sched_stoptask.c | |||
@@ -19,14 +19,14 @@ select_task_rq_stop(struct rq *rq, struct task_struct *p, | |||
19 | static void | 19 | static void |
20 | check_preempt_curr_stop(struct rq *rq, struct task_struct *p, int flags) | 20 | check_preempt_curr_stop(struct rq *rq, struct task_struct *p, int flags) |
21 | { | 21 | { |
22 | resched_task(rq->curr); /* we preempt everything */ | 22 | /* we're never preempted */ |
23 | } | 23 | } |
24 | 24 | ||
25 | static struct task_struct *pick_next_task_stop(struct rq *rq) | 25 | static struct task_struct *pick_next_task_stop(struct rq *rq) |
26 | { | 26 | { |
27 | struct task_struct *stop = rq->stop; | 27 | struct task_struct *stop = rq->stop; |
28 | 28 | ||
29 | if (stop && stop->state == TASK_RUNNING) | 29 | if (stop && stop->se.on_rq) |
30 | return stop; | 30 | return stop; |
31 | 31 | ||
32 | return NULL; | 32 | return NULL; |
diff --git a/kernel/signal.c b/kernel/signal.c index 919562c3d6b7..4e3cff10fdce 100644 --- a/kernel/signal.c +++ b/kernel/signal.c | |||
@@ -1105,7 +1105,8 @@ int zap_other_threads(struct task_struct *p) | |||
1105 | return count; | 1105 | return count; |
1106 | } | 1106 | } |
1107 | 1107 | ||
1108 | struct sighand_struct *lock_task_sighand(struct task_struct *tsk, unsigned long *flags) | 1108 | struct sighand_struct *__lock_task_sighand(struct task_struct *tsk, |
1109 | unsigned long *flags) | ||
1109 | { | 1110 | { |
1110 | struct sighand_struct *sighand; | 1111 | struct sighand_struct *sighand; |
1111 | 1112 | ||
@@ -1617,6 +1618,8 @@ static int sigkill_pending(struct task_struct *tsk) | |||
1617 | * is gone, we keep current->exit_code unless clear_code. | 1618 | * is gone, we keep current->exit_code unless clear_code. |
1618 | */ | 1619 | */ |
1619 | static void ptrace_stop(int exit_code, int clear_code, siginfo_t *info) | 1620 | static void ptrace_stop(int exit_code, int clear_code, siginfo_t *info) |
1621 | __releases(¤t->sighand->siglock) | ||
1622 | __acquires(¤t->sighand->siglock) | ||
1620 | { | 1623 | { |
1621 | if (arch_ptrace_stop_needed(exit_code, info)) { | 1624 | if (arch_ptrace_stop_needed(exit_code, info)) { |
1622 | /* | 1625 | /* |
diff --git a/kernel/smp.c b/kernel/smp.c index ed6aacfcb7ef..12ed8b013e2d 100644 --- a/kernel/smp.c +++ b/kernel/smp.c | |||
@@ -267,7 +267,7 @@ static DEFINE_PER_CPU_SHARED_ALIGNED(struct call_single_data, csd_data); | |||
267 | * | 267 | * |
268 | * Returns 0 on success, else a negative status code. | 268 | * Returns 0 on success, else a negative status code. |
269 | */ | 269 | */ |
270 | int smp_call_function_single(int cpu, void (*func) (void *info), void *info, | 270 | int smp_call_function_single(int cpu, smp_call_func_t func, void *info, |
271 | int wait) | 271 | int wait) |
272 | { | 272 | { |
273 | struct call_single_data d = { | 273 | struct call_single_data d = { |
@@ -336,7 +336,7 @@ EXPORT_SYMBOL(smp_call_function_single); | |||
336 | * 3) any other online cpu in @mask | 336 | * 3) any other online cpu in @mask |
337 | */ | 337 | */ |
338 | int smp_call_function_any(const struct cpumask *mask, | 338 | int smp_call_function_any(const struct cpumask *mask, |
339 | void (*func)(void *info), void *info, int wait) | 339 | smp_call_func_t func, void *info, int wait) |
340 | { | 340 | { |
341 | unsigned int cpu; | 341 | unsigned int cpu; |
342 | const struct cpumask *nodemask; | 342 | const struct cpumask *nodemask; |
@@ -416,7 +416,7 @@ void __smp_call_function_single(int cpu, struct call_single_data *data, | |||
416 | * must be disabled when calling this function. | 416 | * must be disabled when calling this function. |
417 | */ | 417 | */ |
418 | void smp_call_function_many(const struct cpumask *mask, | 418 | void smp_call_function_many(const struct cpumask *mask, |
419 | void (*func)(void *), void *info, bool wait) | 419 | smp_call_func_t func, void *info, bool wait) |
420 | { | 420 | { |
421 | struct call_function_data *data; | 421 | struct call_function_data *data; |
422 | unsigned long flags; | 422 | unsigned long flags; |
@@ -500,7 +500,7 @@ EXPORT_SYMBOL(smp_call_function_many); | |||
500 | * You must not call this function with disabled interrupts or from a | 500 | * You must not call this function with disabled interrupts or from a |
501 | * hardware interrupt handler or from a bottom half handler. | 501 | * hardware interrupt handler or from a bottom half handler. |
502 | */ | 502 | */ |
503 | int smp_call_function(void (*func)(void *), void *info, int wait) | 503 | int smp_call_function(smp_call_func_t func, void *info, int wait) |
504 | { | 504 | { |
505 | preempt_disable(); | 505 | preempt_disable(); |
506 | smp_call_function_many(cpu_online_mask, func, info, wait); | 506 | smp_call_function_many(cpu_online_mask, func, info, wait); |
diff --git a/kernel/softirq.c b/kernel/softirq.c index fc978889b194..18f4be0d5fe0 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c | |||
@@ -67,7 +67,7 @@ char *softirq_to_name[NR_SOFTIRQS] = { | |||
67 | * to the pending events, so lets the scheduler to balance | 67 | * to the pending events, so lets the scheduler to balance |
68 | * the softirq load for us. | 68 | * the softirq load for us. |
69 | */ | 69 | */ |
70 | void wakeup_softirqd(void) | 70 | static void wakeup_softirqd(void) |
71 | { | 71 | { |
72 | /* Interrupts are disabled: no need to stop preemption */ | 72 | /* Interrupts are disabled: no need to stop preemption */ |
73 | struct task_struct *tsk = __get_cpu_var(ksoftirqd); | 73 | struct task_struct *tsk = __get_cpu_var(ksoftirqd); |
@@ -229,18 +229,20 @@ restart: | |||
229 | 229 | ||
230 | do { | 230 | do { |
231 | if (pending & 1) { | 231 | if (pending & 1) { |
232 | unsigned int vec_nr = h - softirq_vec; | ||
232 | int prev_count = preempt_count(); | 233 | int prev_count = preempt_count(); |
233 | kstat_incr_softirqs_this_cpu(h - softirq_vec); | ||
234 | 234 | ||
235 | trace_softirq_entry(h, softirq_vec); | 235 | kstat_incr_softirqs_this_cpu(vec_nr); |
236 | |||
237 | trace_softirq_entry(vec_nr); | ||
236 | h->action(h); | 238 | h->action(h); |
237 | trace_softirq_exit(h, softirq_vec); | 239 | trace_softirq_exit(vec_nr); |
238 | if (unlikely(prev_count != preempt_count())) { | 240 | if (unlikely(prev_count != preempt_count())) { |
239 | printk(KERN_ERR "huh, entered softirq %td %s %p" | 241 | printk(KERN_ERR "huh, entered softirq %u %s %p" |
240 | "with preempt_count %08x," | 242 | "with preempt_count %08x," |
241 | " exited with %08x?\n", h - softirq_vec, | 243 | " exited with %08x?\n", vec_nr, |
242 | softirq_to_name[h - softirq_vec], | 244 | softirq_to_name[vec_nr], h->action, |
243 | h->action, prev_count, preempt_count()); | 245 | prev_count, preempt_count()); |
244 | preempt_count() = prev_count; | 246 | preempt_count() = prev_count; |
245 | } | 247 | } |
246 | 248 | ||
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c index 090c28812ce1..2df820b03beb 100644 --- a/kernel/stop_machine.c +++ b/kernel/stop_machine.c | |||
@@ -262,7 +262,7 @@ repeat: | |||
262 | cpu_stop_fn_t fn = work->fn; | 262 | cpu_stop_fn_t fn = work->fn; |
263 | void *arg = work->arg; | 263 | void *arg = work->arg; |
264 | struct cpu_stop_done *done = work->done; | 264 | struct cpu_stop_done *done = work->done; |
265 | char ksym_buf[KSYM_NAME_LEN]; | 265 | char ksym_buf[KSYM_NAME_LEN] __maybe_unused; |
266 | 266 | ||
267 | __set_current_state(TASK_RUNNING); | 267 | __set_current_state(TASK_RUNNING); |
268 | 268 | ||
@@ -304,7 +304,7 @@ static int __cpuinit cpu_stop_cpu_callback(struct notifier_block *nfb, | |||
304 | p = kthread_create(cpu_stopper_thread, stopper, "migration/%d", | 304 | p = kthread_create(cpu_stopper_thread, stopper, "migration/%d", |
305 | cpu); | 305 | cpu); |
306 | if (IS_ERR(p)) | 306 | if (IS_ERR(p)) |
307 | return NOTIFY_BAD; | 307 | return notifier_from_errno(PTR_ERR(p)); |
308 | get_task_struct(p); | 308 | get_task_struct(p); |
309 | kthread_bind(p, cpu); | 309 | kthread_bind(p, cpu); |
310 | sched_set_stop_task(cpu, p); | 310 | sched_set_stop_task(cpu, p); |
@@ -372,7 +372,7 @@ static int __init cpu_stop_init(void) | |||
372 | /* start one for the boot cpu */ | 372 | /* start one for the boot cpu */ |
373 | err = cpu_stop_cpu_callback(&cpu_stop_cpu_notifier, CPU_UP_PREPARE, | 373 | err = cpu_stop_cpu_callback(&cpu_stop_cpu_notifier, CPU_UP_PREPARE, |
374 | bcpu); | 374 | bcpu); |
375 | BUG_ON(err == NOTIFY_BAD); | 375 | BUG_ON(err != NOTIFY_OK); |
376 | cpu_stop_cpu_callback(&cpu_stop_cpu_notifier, CPU_ONLINE, bcpu); | 376 | cpu_stop_cpu_callback(&cpu_stop_cpu_notifier, CPU_ONLINE, bcpu); |
377 | register_cpu_notifier(&cpu_stop_cpu_notifier); | 377 | register_cpu_notifier(&cpu_stop_cpu_notifier); |
378 | 378 | ||
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 3a45c224770f..5abfa1518554 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
@@ -161,8 +161,6 @@ extern int no_unaligned_warning; | |||
161 | extern int unaligned_dump_stack; | 161 | extern int unaligned_dump_stack; |
162 | #endif | 162 | #endif |
163 | 163 | ||
164 | extern struct ratelimit_state printk_ratelimit_state; | ||
165 | |||
166 | #ifdef CONFIG_PROC_SYSCTL | 164 | #ifdef CONFIG_PROC_SYSCTL |
167 | static int proc_do_cad_pid(struct ctl_table *table, int write, | 165 | static int proc_do_cad_pid(struct ctl_table *table, int write, |
168 | void __user *buffer, size_t *lenp, loff_t *ppos); | 166 | void __user *buffer, size_t *lenp, loff_t *ppos); |
@@ -704,6 +702,15 @@ static struct ctl_table kern_table[] = { | |||
704 | .extra1 = &zero, | 702 | .extra1 = &zero, |
705 | .extra2 = &ten_thousand, | 703 | .extra2 = &ten_thousand, |
706 | }, | 704 | }, |
705 | { | ||
706 | .procname = "dmesg_restrict", | ||
707 | .data = &dmesg_restrict, | ||
708 | .maxlen = sizeof(int), | ||
709 | .mode = 0644, | ||
710 | .proc_handler = proc_dointvec_minmax, | ||
711 | .extra1 = &zero, | ||
712 | .extra2 = &one, | ||
713 | }, | ||
707 | #endif | 714 | #endif |
708 | { | 715 | { |
709 | .procname = "ngroups_max", | 716 | .procname = "ngroups_max", |
@@ -1340,28 +1347,28 @@ static struct ctl_table fs_table[] = { | |||
1340 | .data = &inodes_stat, | 1347 | .data = &inodes_stat, |
1341 | .maxlen = 2*sizeof(int), | 1348 | .maxlen = 2*sizeof(int), |
1342 | .mode = 0444, | 1349 | .mode = 0444, |
1343 | .proc_handler = proc_dointvec, | 1350 | .proc_handler = proc_nr_inodes, |
1344 | }, | 1351 | }, |
1345 | { | 1352 | { |
1346 | .procname = "inode-state", | 1353 | .procname = "inode-state", |
1347 | .data = &inodes_stat, | 1354 | .data = &inodes_stat, |
1348 | .maxlen = 7*sizeof(int), | 1355 | .maxlen = 7*sizeof(int), |
1349 | .mode = 0444, | 1356 | .mode = 0444, |
1350 | .proc_handler = proc_dointvec, | 1357 | .proc_handler = proc_nr_inodes, |
1351 | }, | 1358 | }, |
1352 | { | 1359 | { |
1353 | .procname = "file-nr", | 1360 | .procname = "file-nr", |
1354 | .data = &files_stat, | 1361 | .data = &files_stat, |
1355 | .maxlen = 3*sizeof(int), | 1362 | .maxlen = sizeof(files_stat), |
1356 | .mode = 0444, | 1363 | .mode = 0444, |
1357 | .proc_handler = proc_nr_files, | 1364 | .proc_handler = proc_nr_files, |
1358 | }, | 1365 | }, |
1359 | { | 1366 | { |
1360 | .procname = "file-max", | 1367 | .procname = "file-max", |
1361 | .data = &files_stat.max_files, | 1368 | .data = &files_stat.max_files, |
1362 | .maxlen = sizeof(int), | 1369 | .maxlen = sizeof(files_stat.max_files), |
1363 | .mode = 0644, | 1370 | .mode = 0644, |
1364 | .proc_handler = proc_dointvec, | 1371 | .proc_handler = proc_doulongvec_minmax, |
1365 | }, | 1372 | }, |
1366 | { | 1373 | { |
1367 | .procname = "nr_open", | 1374 | .procname = "nr_open", |
@@ -1377,7 +1384,7 @@ static struct ctl_table fs_table[] = { | |||
1377 | .data = &dentry_stat, | 1384 | .data = &dentry_stat, |
1378 | .maxlen = 6*sizeof(int), | 1385 | .maxlen = 6*sizeof(int), |
1379 | .mode = 0444, | 1386 | .mode = 0444, |
1380 | .proc_handler = proc_dointvec, | 1387 | .proc_handler = proc_nr_dentry, |
1381 | }, | 1388 | }, |
1382 | { | 1389 | { |
1383 | .procname = "overflowuid", | 1390 | .procname = "overflowuid", |
diff --git a/kernel/taskstats.c b/kernel/taskstats.c index 11281d5792bd..3308fd7f1b52 100644 --- a/kernel/taskstats.c +++ b/kernel/taskstats.c | |||
@@ -175,22 +175,8 @@ static void send_cpu_listeners(struct sk_buff *skb, | |||
175 | up_write(&listeners->sem); | 175 | up_write(&listeners->sem); |
176 | } | 176 | } |
177 | 177 | ||
178 | static int fill_pid(pid_t pid, struct task_struct *tsk, | 178 | static void fill_stats(struct task_struct *tsk, struct taskstats *stats) |
179 | struct taskstats *stats) | ||
180 | { | 179 | { |
181 | int rc = 0; | ||
182 | |||
183 | if (!tsk) { | ||
184 | rcu_read_lock(); | ||
185 | tsk = find_task_by_vpid(pid); | ||
186 | if (tsk) | ||
187 | get_task_struct(tsk); | ||
188 | rcu_read_unlock(); | ||
189 | if (!tsk) | ||
190 | return -ESRCH; | ||
191 | } else | ||
192 | get_task_struct(tsk); | ||
193 | |||
194 | memset(stats, 0, sizeof(*stats)); | 180 | memset(stats, 0, sizeof(*stats)); |
195 | /* | 181 | /* |
196 | * Each accounting subsystem adds calls to its functions to | 182 | * Each accounting subsystem adds calls to its functions to |
@@ -209,17 +195,27 @@ static int fill_pid(pid_t pid, struct task_struct *tsk, | |||
209 | 195 | ||
210 | /* fill in extended acct fields */ | 196 | /* fill in extended acct fields */ |
211 | xacct_add_tsk(stats, tsk); | 197 | xacct_add_tsk(stats, tsk); |
198 | } | ||
212 | 199 | ||
213 | /* Define err: label here if needed */ | 200 | static int fill_stats_for_pid(pid_t pid, struct taskstats *stats) |
214 | put_task_struct(tsk); | 201 | { |
215 | return rc; | 202 | struct task_struct *tsk; |
216 | 203 | ||
204 | rcu_read_lock(); | ||
205 | tsk = find_task_by_vpid(pid); | ||
206 | if (tsk) | ||
207 | get_task_struct(tsk); | ||
208 | rcu_read_unlock(); | ||
209 | if (!tsk) | ||
210 | return -ESRCH; | ||
211 | fill_stats(tsk, stats); | ||
212 | put_task_struct(tsk); | ||
213 | return 0; | ||
217 | } | 214 | } |
218 | 215 | ||
219 | static int fill_tgid(pid_t tgid, struct task_struct *first, | 216 | static int fill_stats_for_tgid(pid_t tgid, struct taskstats *stats) |
220 | struct taskstats *stats) | ||
221 | { | 217 | { |
222 | struct task_struct *tsk; | 218 | struct task_struct *tsk, *first; |
223 | unsigned long flags; | 219 | unsigned long flags; |
224 | int rc = -ESRCH; | 220 | int rc = -ESRCH; |
225 | 221 | ||
@@ -228,8 +224,7 @@ static int fill_tgid(pid_t tgid, struct task_struct *first, | |||
228 | * leaders who are already counted with the dead tasks | 224 | * leaders who are already counted with the dead tasks |
229 | */ | 225 | */ |
230 | rcu_read_lock(); | 226 | rcu_read_lock(); |
231 | if (!first) | 227 | first = find_task_by_vpid(tgid); |
232 | first = find_task_by_vpid(tgid); | ||
233 | 228 | ||
234 | if (!first || !lock_task_sighand(first, &flags)) | 229 | if (!first || !lock_task_sighand(first, &flags)) |
235 | goto out; | 230 | goto out; |
@@ -268,7 +263,6 @@ out: | |||
268 | return rc; | 263 | return rc; |
269 | } | 264 | } |
270 | 265 | ||
271 | |||
272 | static void fill_tgid_exit(struct task_struct *tsk) | 266 | static void fill_tgid_exit(struct task_struct *tsk) |
273 | { | 267 | { |
274 | unsigned long flags; | 268 | unsigned long flags; |
@@ -355,6 +349,10 @@ static int parse(struct nlattr *na, struct cpumask *mask) | |||
355 | return ret; | 349 | return ret; |
356 | } | 350 | } |
357 | 351 | ||
352 | #ifdef CONFIG_IA64 | ||
353 | #define TASKSTATS_NEEDS_PADDING 1 | ||
354 | #endif | ||
355 | |||
358 | static struct taskstats *mk_reply(struct sk_buff *skb, int type, u32 pid) | 356 | static struct taskstats *mk_reply(struct sk_buff *skb, int type, u32 pid) |
359 | { | 357 | { |
360 | struct nlattr *na, *ret; | 358 | struct nlattr *na, *ret; |
@@ -364,9 +362,33 @@ static struct taskstats *mk_reply(struct sk_buff *skb, int type, u32 pid) | |||
364 | ? TASKSTATS_TYPE_AGGR_PID | 362 | ? TASKSTATS_TYPE_AGGR_PID |
365 | : TASKSTATS_TYPE_AGGR_TGID; | 363 | : TASKSTATS_TYPE_AGGR_TGID; |
366 | 364 | ||
365 | /* | ||
366 | * The taskstats structure is internally aligned on 8 byte | ||
367 | * boundaries but the layout of the aggregrate reply, with | ||
368 | * two NLA headers and the pid (each 4 bytes), actually | ||
369 | * force the entire structure to be unaligned. This causes | ||
370 | * the kernel to issue unaligned access warnings on some | ||
371 | * architectures like ia64. Unfortunately, some software out there | ||
372 | * doesn't properly unroll the NLA packet and assumes that the start | ||
373 | * of the taskstats structure will always be 20 bytes from the start | ||
374 | * of the netlink payload. Aligning the start of the taskstats | ||
375 | * structure breaks this software, which we don't want. So, for now | ||
376 | * the alignment only happens on architectures that require it | ||
377 | * and those users will have to update to fixed versions of those | ||
378 | * packages. Space is reserved in the packet only when needed. | ||
379 | * This ifdef should be removed in several years e.g. 2012 once | ||
380 | * we can be confident that fixed versions are installed on most | ||
381 | * systems. We add the padding before the aggregate since the | ||
382 | * aggregate is already a defined type. | ||
383 | */ | ||
384 | #ifdef TASKSTATS_NEEDS_PADDING | ||
385 | if (nla_put(skb, TASKSTATS_TYPE_NULL, 0, NULL) < 0) | ||
386 | goto err; | ||
387 | #endif | ||
367 | na = nla_nest_start(skb, aggr); | 388 | na = nla_nest_start(skb, aggr); |
368 | if (!na) | 389 | if (!na) |
369 | goto err; | 390 | goto err; |
391 | |||
370 | if (nla_put(skb, type, sizeof(pid), &pid) < 0) | 392 | if (nla_put(skb, type, sizeof(pid), &pid) < 0) |
371 | goto err; | 393 | goto err; |
372 | ret = nla_reserve(skb, TASKSTATS_TYPE_STATS, sizeof(struct taskstats)); | 394 | ret = nla_reserve(skb, TASKSTATS_TYPE_STATS, sizeof(struct taskstats)); |
@@ -424,74 +446,122 @@ err: | |||
424 | return rc; | 446 | return rc; |
425 | } | 447 | } |
426 | 448 | ||
427 | static int taskstats_user_cmd(struct sk_buff *skb, struct genl_info *info) | 449 | static int cmd_attr_register_cpumask(struct genl_info *info) |
428 | { | 450 | { |
429 | int rc; | ||
430 | struct sk_buff *rep_skb; | ||
431 | struct taskstats *stats; | ||
432 | size_t size; | ||
433 | cpumask_var_t mask; | 451 | cpumask_var_t mask; |
452 | int rc; | ||
434 | 453 | ||
435 | if (!alloc_cpumask_var(&mask, GFP_KERNEL)) | 454 | if (!alloc_cpumask_var(&mask, GFP_KERNEL)) |
436 | return -ENOMEM; | 455 | return -ENOMEM; |
437 | |||
438 | rc = parse(info->attrs[TASKSTATS_CMD_ATTR_REGISTER_CPUMASK], mask); | 456 | rc = parse(info->attrs[TASKSTATS_CMD_ATTR_REGISTER_CPUMASK], mask); |
439 | if (rc < 0) | 457 | if (rc < 0) |
440 | goto free_return_rc; | 458 | goto out; |
441 | if (rc == 0) { | 459 | rc = add_del_listener(info->snd_pid, mask, REGISTER); |
442 | rc = add_del_listener(info->snd_pid, mask, REGISTER); | 460 | out: |
443 | goto free_return_rc; | 461 | free_cpumask_var(mask); |
444 | } | 462 | return rc; |
463 | } | ||
464 | |||
465 | static int cmd_attr_deregister_cpumask(struct genl_info *info) | ||
466 | { | ||
467 | cpumask_var_t mask; | ||
468 | int rc; | ||
445 | 469 | ||
470 | if (!alloc_cpumask_var(&mask, GFP_KERNEL)) | ||
471 | return -ENOMEM; | ||
446 | rc = parse(info->attrs[TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK], mask); | 472 | rc = parse(info->attrs[TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK], mask); |
447 | if (rc < 0) | 473 | if (rc < 0) |
448 | goto free_return_rc; | 474 | goto out; |
449 | if (rc == 0) { | 475 | rc = add_del_listener(info->snd_pid, mask, DEREGISTER); |
450 | rc = add_del_listener(info->snd_pid, mask, DEREGISTER); | 476 | out: |
451 | free_return_rc: | ||
452 | free_cpumask_var(mask); | ||
453 | return rc; | ||
454 | } | ||
455 | free_cpumask_var(mask); | 477 | free_cpumask_var(mask); |
478 | return rc; | ||
479 | } | ||
480 | |||
481 | static size_t taskstats_packet_size(void) | ||
482 | { | ||
483 | size_t size; | ||
456 | 484 | ||
457 | /* | ||
458 | * Size includes space for nested attributes | ||
459 | */ | ||
460 | size = nla_total_size(sizeof(u32)) + | 485 | size = nla_total_size(sizeof(u32)) + |
461 | nla_total_size(sizeof(struct taskstats)) + nla_total_size(0); | 486 | nla_total_size(sizeof(struct taskstats)) + nla_total_size(0); |
487 | #ifdef TASKSTATS_NEEDS_PADDING | ||
488 | size += nla_total_size(0); /* Padding for alignment */ | ||
489 | #endif | ||
490 | return size; | ||
491 | } | ||
492 | |||
493 | static int cmd_attr_pid(struct genl_info *info) | ||
494 | { | ||
495 | struct taskstats *stats; | ||
496 | struct sk_buff *rep_skb; | ||
497 | size_t size; | ||
498 | u32 pid; | ||
499 | int rc; | ||
500 | |||
501 | size = taskstats_packet_size(); | ||
462 | 502 | ||
463 | rc = prepare_reply(info, TASKSTATS_CMD_NEW, &rep_skb, size); | 503 | rc = prepare_reply(info, TASKSTATS_CMD_NEW, &rep_skb, size); |
464 | if (rc < 0) | 504 | if (rc < 0) |
465 | return rc; | 505 | return rc; |
466 | 506 | ||
467 | rc = -EINVAL; | 507 | rc = -EINVAL; |
468 | if (info->attrs[TASKSTATS_CMD_ATTR_PID]) { | 508 | pid = nla_get_u32(info->attrs[TASKSTATS_CMD_ATTR_PID]); |
469 | u32 pid = nla_get_u32(info->attrs[TASKSTATS_CMD_ATTR_PID]); | 509 | stats = mk_reply(rep_skb, TASKSTATS_TYPE_PID, pid); |
470 | stats = mk_reply(rep_skb, TASKSTATS_TYPE_PID, pid); | 510 | if (!stats) |
471 | if (!stats) | ||
472 | goto err; | ||
473 | |||
474 | rc = fill_pid(pid, NULL, stats); | ||
475 | if (rc < 0) | ||
476 | goto err; | ||
477 | } else if (info->attrs[TASKSTATS_CMD_ATTR_TGID]) { | ||
478 | u32 tgid = nla_get_u32(info->attrs[TASKSTATS_CMD_ATTR_TGID]); | ||
479 | stats = mk_reply(rep_skb, TASKSTATS_TYPE_TGID, tgid); | ||
480 | if (!stats) | ||
481 | goto err; | ||
482 | |||
483 | rc = fill_tgid(tgid, NULL, stats); | ||
484 | if (rc < 0) | ||
485 | goto err; | ||
486 | } else | ||
487 | goto err; | 511 | goto err; |
488 | 512 | ||
513 | rc = fill_stats_for_pid(pid, stats); | ||
514 | if (rc < 0) | ||
515 | goto err; | ||
489 | return send_reply(rep_skb, info); | 516 | return send_reply(rep_skb, info); |
490 | err: | 517 | err: |
491 | nlmsg_free(rep_skb); | 518 | nlmsg_free(rep_skb); |
492 | return rc; | 519 | return rc; |
493 | } | 520 | } |
494 | 521 | ||
522 | static int cmd_attr_tgid(struct genl_info *info) | ||
523 | { | ||
524 | struct taskstats *stats; | ||
525 | struct sk_buff *rep_skb; | ||
526 | size_t size; | ||
527 | u32 tgid; | ||
528 | int rc; | ||
529 | |||
530 | size = taskstats_packet_size(); | ||
531 | |||
532 | rc = prepare_reply(info, TASKSTATS_CMD_NEW, &rep_skb, size); | ||
533 | if (rc < 0) | ||
534 | return rc; | ||
535 | |||
536 | rc = -EINVAL; | ||
537 | tgid = nla_get_u32(info->attrs[TASKSTATS_CMD_ATTR_TGID]); | ||
538 | stats = mk_reply(rep_skb, TASKSTATS_TYPE_TGID, tgid); | ||
539 | if (!stats) | ||
540 | goto err; | ||
541 | |||
542 | rc = fill_stats_for_tgid(tgid, stats); | ||
543 | if (rc < 0) | ||
544 | goto err; | ||
545 | return send_reply(rep_skb, info); | ||
546 | err: | ||
547 | nlmsg_free(rep_skb); | ||
548 | return rc; | ||
549 | } | ||
550 | |||
551 | static int taskstats_user_cmd(struct sk_buff *skb, struct genl_info *info) | ||
552 | { | ||
553 | if (info->attrs[TASKSTATS_CMD_ATTR_REGISTER_CPUMASK]) | ||
554 | return cmd_attr_register_cpumask(info); | ||
555 | else if (info->attrs[TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK]) | ||
556 | return cmd_attr_deregister_cpumask(info); | ||
557 | else if (info->attrs[TASKSTATS_CMD_ATTR_PID]) | ||
558 | return cmd_attr_pid(info); | ||
559 | else if (info->attrs[TASKSTATS_CMD_ATTR_TGID]) | ||
560 | return cmd_attr_tgid(info); | ||
561 | else | ||
562 | return -EINVAL; | ||
563 | } | ||
564 | |||
495 | static struct taskstats *taskstats_tgid_alloc(struct task_struct *tsk) | 565 | static struct taskstats *taskstats_tgid_alloc(struct task_struct *tsk) |
496 | { | 566 | { |
497 | struct signal_struct *sig = tsk->signal; | 567 | struct signal_struct *sig = tsk->signal; |
@@ -532,8 +602,7 @@ void taskstats_exit(struct task_struct *tsk, int group_dead) | |||
532 | /* | 602 | /* |
533 | * Size includes space for nested attributes | 603 | * Size includes space for nested attributes |
534 | */ | 604 | */ |
535 | size = nla_total_size(sizeof(u32)) + | 605 | size = taskstats_packet_size(); |
536 | nla_total_size(sizeof(struct taskstats)) + nla_total_size(0); | ||
537 | 606 | ||
538 | is_thread_group = !!taskstats_tgid_alloc(tsk); | 607 | is_thread_group = !!taskstats_tgid_alloc(tsk); |
539 | if (is_thread_group) { | 608 | if (is_thread_group) { |
@@ -555,9 +624,7 @@ void taskstats_exit(struct task_struct *tsk, int group_dead) | |||
555 | if (!stats) | 624 | if (!stats) |
556 | goto err; | 625 | goto err; |
557 | 626 | ||
558 | rc = fill_pid(-1, tsk, stats); | 627 | fill_stats(tsk, stats); |
559 | if (rc < 0) | ||
560 | goto err; | ||
561 | 628 | ||
562 | /* | 629 | /* |
563 | * Doesn't matter if tsk is the leader or the last group member leaving | 630 | * Doesn't matter if tsk is the leader or the last group member leaving |
diff --git a/kernel/timer.c b/kernel/timer.c index 68a9ae7679b7..353b9227c2ec 100644 --- a/kernel/timer.c +++ b/kernel/timer.c | |||
@@ -1252,6 +1252,12 @@ unsigned long get_next_timer_interrupt(unsigned long now) | |||
1252 | struct tvec_base *base = __get_cpu_var(tvec_bases); | 1252 | struct tvec_base *base = __get_cpu_var(tvec_bases); |
1253 | unsigned long expires; | 1253 | unsigned long expires; |
1254 | 1254 | ||
1255 | /* | ||
1256 | * Pretend that there is no timer pending if the cpu is offline. | ||
1257 | * Possible pending timers will be migrated later to an active cpu. | ||
1258 | */ | ||
1259 | if (cpu_is_offline(smp_processor_id())) | ||
1260 | return now + NEXT_TIMER_MAX_DELTA; | ||
1255 | spin_lock(&base->lock); | 1261 | spin_lock(&base->lock); |
1256 | if (time_before_eq(base->next_timer, base->timer_jiffies)) | 1262 | if (time_before_eq(base->next_timer, base->timer_jiffies)) |
1257 | base->next_timer = __next_timer_interrupt(base); | 1263 | base->next_timer = __next_timer_interrupt(base); |
@@ -1319,7 +1325,7 @@ void do_timer(unsigned long ticks) | |||
1319 | { | 1325 | { |
1320 | jiffies_64 += ticks; | 1326 | jiffies_64 += ticks; |
1321 | update_wall_time(); | 1327 | update_wall_time(); |
1322 | calc_global_load(); | 1328 | calc_global_load(ticks); |
1323 | } | 1329 | } |
1324 | 1330 | ||
1325 | #ifdef __ARCH_WANT_SYS_ALARM | 1331 | #ifdef __ARCH_WANT_SYS_ALARM |
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index e04b8bcdef88..ea37e2ff4164 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig | |||
@@ -126,7 +126,7 @@ if FTRACE | |||
126 | config FUNCTION_TRACER | 126 | config FUNCTION_TRACER |
127 | bool "Kernel Function Tracer" | 127 | bool "Kernel Function Tracer" |
128 | depends on HAVE_FUNCTION_TRACER | 128 | depends on HAVE_FUNCTION_TRACER |
129 | select FRAME_POINTER if (!ARM_UNWIND) | 129 | select FRAME_POINTER if !ARM_UNWIND && !S390 |
130 | select KALLSYMS | 130 | select KALLSYMS |
131 | select GENERIC_TRACER | 131 | select GENERIC_TRACER |
132 | select CONTEXT_SWITCH_TRACER | 132 | select CONTEXT_SWITCH_TRACER |
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 959f8d6c8cc1..7b8ec0281548 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c | |||
@@ -23,7 +23,6 @@ | |||
23 | #include <linux/mutex.h> | 23 | #include <linux/mutex.h> |
24 | #include <linux/slab.h> | 24 | #include <linux/slab.h> |
25 | #include <linux/debugfs.h> | 25 | #include <linux/debugfs.h> |
26 | #include <linux/smp_lock.h> | ||
27 | #include <linux/time.h> | 26 | #include <linux/time.h> |
28 | #include <linux/uaccess.h> | 27 | #include <linux/uaccess.h> |
29 | 28 | ||
@@ -169,7 +168,6 @@ static int act_log_check(struct blk_trace *bt, u32 what, sector_t sector, | |||
169 | static const u32 ddir_act[2] = { BLK_TC_ACT(BLK_TC_READ), | 168 | static const u32 ddir_act[2] = { BLK_TC_ACT(BLK_TC_READ), |
170 | BLK_TC_ACT(BLK_TC_WRITE) }; | 169 | BLK_TC_ACT(BLK_TC_WRITE) }; |
171 | 170 | ||
172 | #define BLK_TC_HARDBARRIER BLK_TC_BARRIER | ||
173 | #define BLK_TC_RAHEAD BLK_TC_AHEAD | 171 | #define BLK_TC_RAHEAD BLK_TC_AHEAD |
174 | 172 | ||
175 | /* The ilog2() calls fall out because they're constant */ | 173 | /* The ilog2() calls fall out because they're constant */ |
@@ -197,7 +195,6 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes, | |||
197 | return; | 195 | return; |
198 | 196 | ||
199 | what |= ddir_act[rw & WRITE]; | 197 | what |= ddir_act[rw & WRITE]; |
200 | what |= MASK_TC_BIT(rw, HARDBARRIER); | ||
201 | what |= MASK_TC_BIT(rw, SYNC); | 198 | what |= MASK_TC_BIT(rw, SYNC); |
202 | what |= MASK_TC_BIT(rw, RAHEAD); | 199 | what |= MASK_TC_BIT(rw, RAHEAD); |
203 | what |= MASK_TC_BIT(rw, META); | 200 | what |= MASK_TC_BIT(rw, META); |
@@ -326,6 +323,7 @@ static const struct file_operations blk_dropped_fops = { | |||
326 | .owner = THIS_MODULE, | 323 | .owner = THIS_MODULE, |
327 | .open = blk_dropped_open, | 324 | .open = blk_dropped_open, |
328 | .read = blk_dropped_read, | 325 | .read = blk_dropped_read, |
326 | .llseek = default_llseek, | ||
329 | }; | 327 | }; |
330 | 328 | ||
331 | static int blk_msg_open(struct inode *inode, struct file *filp) | 329 | static int blk_msg_open(struct inode *inode, struct file *filp) |
@@ -365,6 +363,7 @@ static const struct file_operations blk_msg_fops = { | |||
365 | .owner = THIS_MODULE, | 363 | .owner = THIS_MODULE, |
366 | .open = blk_msg_open, | 364 | .open = blk_msg_open, |
367 | .write = blk_msg_write, | 365 | .write = blk_msg_write, |
366 | .llseek = noop_llseek, | ||
368 | }; | 367 | }; |
369 | 368 | ||
370 | /* | 369 | /* |
@@ -639,7 +638,6 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg) | |||
639 | if (!q) | 638 | if (!q) |
640 | return -ENXIO; | 639 | return -ENXIO; |
641 | 640 | ||
642 | lock_kernel(); | ||
643 | mutex_lock(&bdev->bd_mutex); | 641 | mutex_lock(&bdev->bd_mutex); |
644 | 642 | ||
645 | switch (cmd) { | 643 | switch (cmd) { |
@@ -667,7 +665,6 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg) | |||
667 | } | 665 | } |
668 | 666 | ||
669 | mutex_unlock(&bdev->bd_mutex); | 667 | mutex_unlock(&bdev->bd_mutex); |
670 | unlock_kernel(); | ||
671 | return ret; | 668 | return ret; |
672 | } | 669 | } |
673 | 670 | ||
@@ -1652,10 +1649,9 @@ static ssize_t sysfs_blk_trace_attr_show(struct device *dev, | |||
1652 | struct block_device *bdev; | 1649 | struct block_device *bdev; |
1653 | ssize_t ret = -ENXIO; | 1650 | ssize_t ret = -ENXIO; |
1654 | 1651 | ||
1655 | lock_kernel(); | ||
1656 | bdev = bdget(part_devt(p)); | 1652 | bdev = bdget(part_devt(p)); |
1657 | if (bdev == NULL) | 1653 | if (bdev == NULL) |
1658 | goto out_unlock_kernel; | 1654 | goto out; |
1659 | 1655 | ||
1660 | q = blk_trace_get_queue(bdev); | 1656 | q = blk_trace_get_queue(bdev); |
1661 | if (q == NULL) | 1657 | if (q == NULL) |
@@ -1683,8 +1679,7 @@ out_unlock_bdev: | |||
1683 | mutex_unlock(&bdev->bd_mutex); | 1679 | mutex_unlock(&bdev->bd_mutex); |
1684 | out_bdput: | 1680 | out_bdput: |
1685 | bdput(bdev); | 1681 | bdput(bdev); |
1686 | out_unlock_kernel: | 1682 | out: |
1687 | unlock_kernel(); | ||
1688 | return ret; | 1683 | return ret; |
1689 | } | 1684 | } |
1690 | 1685 | ||
@@ -1714,11 +1709,10 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev, | |||
1714 | 1709 | ||
1715 | ret = -ENXIO; | 1710 | ret = -ENXIO; |
1716 | 1711 | ||
1717 | lock_kernel(); | ||
1718 | p = dev_to_part(dev); | 1712 | p = dev_to_part(dev); |
1719 | bdev = bdget(part_devt(p)); | 1713 | bdev = bdget(part_devt(p)); |
1720 | if (bdev == NULL) | 1714 | if (bdev == NULL) |
1721 | goto out_unlock_kernel; | 1715 | goto out; |
1722 | 1716 | ||
1723 | q = blk_trace_get_queue(bdev); | 1717 | q = blk_trace_get_queue(bdev); |
1724 | if (q == NULL) | 1718 | if (q == NULL) |
@@ -1753,8 +1747,6 @@ out_unlock_bdev: | |||
1753 | mutex_unlock(&bdev->bd_mutex); | 1747 | mutex_unlock(&bdev->bd_mutex); |
1754 | out_bdput: | 1748 | out_bdput: |
1755 | bdput(bdev); | 1749 | bdput(bdev); |
1756 | out_unlock_kernel: | ||
1757 | unlock_kernel(); | ||
1758 | out: | 1750 | out: |
1759 | return ret ? ret : count; | 1751 | return ret ? ret : count; |
1760 | } | 1752 | } |
@@ -1813,8 +1805,6 @@ void blk_fill_rwbs(char *rwbs, u32 rw, int bytes) | |||
1813 | 1805 | ||
1814 | if (rw & REQ_RAHEAD) | 1806 | if (rw & REQ_RAHEAD) |
1815 | rwbs[i++] = 'A'; | 1807 | rwbs[i++] = 'A'; |
1816 | if (rw & REQ_HARDBARRIER) | ||
1817 | rwbs[i++] = 'B'; | ||
1818 | if (rw & REQ_SYNC) | 1808 | if (rw & REQ_SYNC) |
1819 | rwbs[i++] = 'S'; | 1809 | rwbs[i++] = 'S'; |
1820 | if (rw & REQ_META) | 1810 | if (rw & REQ_META) |
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index ebd80d50c474..f3dadae83883 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c | |||
@@ -800,6 +800,7 @@ static const struct file_operations ftrace_profile_fops = { | |||
800 | .open = tracing_open_generic, | 800 | .open = tracing_open_generic, |
801 | .read = ftrace_profile_read, | 801 | .read = ftrace_profile_read, |
802 | .write = ftrace_profile_write, | 802 | .write = ftrace_profile_write, |
803 | .llseek = default_llseek, | ||
803 | }; | 804 | }; |
804 | 805 | ||
805 | /* used to initialize the real stat files */ | 806 | /* used to initialize the real stat files */ |
@@ -2669,6 +2670,7 @@ static const struct file_operations ftrace_graph_fops = { | |||
2669 | .read = seq_read, | 2670 | .read = seq_read, |
2670 | .write = ftrace_graph_write, | 2671 | .write = ftrace_graph_write, |
2671 | .release = ftrace_graph_release, | 2672 | .release = ftrace_graph_release, |
2673 | .llseek = seq_lseek, | ||
2672 | }; | 2674 | }; |
2673 | #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ | 2675 | #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ |
2674 | 2676 | ||
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index c5a632a669e1..bd1c35a4fbcc 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c | |||
@@ -224,6 +224,9 @@ enum { | |||
224 | RB_LEN_TIME_STAMP = 16, | 224 | RB_LEN_TIME_STAMP = 16, |
225 | }; | 225 | }; |
226 | 226 | ||
227 | #define skip_time_extend(event) \ | ||
228 | ((struct ring_buffer_event *)((char *)event + RB_LEN_TIME_EXTEND)) | ||
229 | |||
227 | static inline int rb_null_event(struct ring_buffer_event *event) | 230 | static inline int rb_null_event(struct ring_buffer_event *event) |
228 | { | 231 | { |
229 | return event->type_len == RINGBUF_TYPE_PADDING && !event->time_delta; | 232 | return event->type_len == RINGBUF_TYPE_PADDING && !event->time_delta; |
@@ -248,8 +251,12 @@ rb_event_data_length(struct ring_buffer_event *event) | |||
248 | return length + RB_EVNT_HDR_SIZE; | 251 | return length + RB_EVNT_HDR_SIZE; |
249 | } | 252 | } |
250 | 253 | ||
251 | /* inline for ring buffer fast paths */ | 254 | /* |
252 | static unsigned | 255 | * Return the length of the given event. Will return |
256 | * the length of the time extend if the event is a | ||
257 | * time extend. | ||
258 | */ | ||
259 | static inline unsigned | ||
253 | rb_event_length(struct ring_buffer_event *event) | 260 | rb_event_length(struct ring_buffer_event *event) |
254 | { | 261 | { |
255 | switch (event->type_len) { | 262 | switch (event->type_len) { |
@@ -274,13 +281,41 @@ rb_event_length(struct ring_buffer_event *event) | |||
274 | return 0; | 281 | return 0; |
275 | } | 282 | } |
276 | 283 | ||
284 | /* | ||
285 | * Return total length of time extend and data, | ||
286 | * or just the event length for all other events. | ||
287 | */ | ||
288 | static inline unsigned | ||
289 | rb_event_ts_length(struct ring_buffer_event *event) | ||
290 | { | ||
291 | unsigned len = 0; | ||
292 | |||
293 | if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) { | ||
294 | /* time extends include the data event after it */ | ||
295 | len = RB_LEN_TIME_EXTEND; | ||
296 | event = skip_time_extend(event); | ||
297 | } | ||
298 | return len + rb_event_length(event); | ||
299 | } | ||
300 | |||
277 | /** | 301 | /** |
278 | * ring_buffer_event_length - return the length of the event | 302 | * ring_buffer_event_length - return the length of the event |
279 | * @event: the event to get the length of | 303 | * @event: the event to get the length of |
304 | * | ||
305 | * Returns the size of the data load of a data event. | ||
306 | * If the event is something other than a data event, it | ||
307 | * returns the size of the event itself. With the exception | ||
308 | * of a TIME EXTEND, where it still returns the size of the | ||
309 | * data load of the data event after it. | ||
280 | */ | 310 | */ |
281 | unsigned ring_buffer_event_length(struct ring_buffer_event *event) | 311 | unsigned ring_buffer_event_length(struct ring_buffer_event *event) |
282 | { | 312 | { |
283 | unsigned length = rb_event_length(event); | 313 | unsigned length; |
314 | |||
315 | if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) | ||
316 | event = skip_time_extend(event); | ||
317 | |||
318 | length = rb_event_length(event); | ||
284 | if (event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX) | 319 | if (event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX) |
285 | return length; | 320 | return length; |
286 | length -= RB_EVNT_HDR_SIZE; | 321 | length -= RB_EVNT_HDR_SIZE; |
@@ -294,6 +329,8 @@ EXPORT_SYMBOL_GPL(ring_buffer_event_length); | |||
294 | static void * | 329 | static void * |
295 | rb_event_data(struct ring_buffer_event *event) | 330 | rb_event_data(struct ring_buffer_event *event) |
296 | { | 331 | { |
332 | if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) | ||
333 | event = skip_time_extend(event); | ||
297 | BUG_ON(event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX); | 334 | BUG_ON(event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX); |
298 | /* If length is in len field, then array[0] has the data */ | 335 | /* If length is in len field, then array[0] has the data */ |
299 | if (event->type_len) | 336 | if (event->type_len) |
@@ -404,9 +441,6 @@ static inline int test_time_stamp(u64 delta) | |||
404 | /* Max payload is BUF_PAGE_SIZE - header (8bytes) */ | 441 | /* Max payload is BUF_PAGE_SIZE - header (8bytes) */ |
405 | #define BUF_MAX_DATA_SIZE (BUF_PAGE_SIZE - (sizeof(u32) * 2)) | 442 | #define BUF_MAX_DATA_SIZE (BUF_PAGE_SIZE - (sizeof(u32) * 2)) |
406 | 443 | ||
407 | /* Max number of timestamps that can fit on a page */ | ||
408 | #define RB_TIMESTAMPS_PER_PAGE (BUF_PAGE_SIZE / RB_LEN_TIME_EXTEND) | ||
409 | |||
410 | int ring_buffer_print_page_header(struct trace_seq *s) | 444 | int ring_buffer_print_page_header(struct trace_seq *s) |
411 | { | 445 | { |
412 | struct buffer_data_page field; | 446 | struct buffer_data_page field; |
@@ -1546,6 +1580,25 @@ static void rb_inc_iter(struct ring_buffer_iter *iter) | |||
1546 | iter->head = 0; | 1580 | iter->head = 0; |
1547 | } | 1581 | } |
1548 | 1582 | ||
1583 | /* Slow path, do not inline */ | ||
1584 | static noinline struct ring_buffer_event * | ||
1585 | rb_add_time_stamp(struct ring_buffer_event *event, u64 delta) | ||
1586 | { | ||
1587 | event->type_len = RINGBUF_TYPE_TIME_EXTEND; | ||
1588 | |||
1589 | /* Not the first event on the page? */ | ||
1590 | if (rb_event_index(event)) { | ||
1591 | event->time_delta = delta & TS_MASK; | ||
1592 | event->array[0] = delta >> TS_SHIFT; | ||
1593 | } else { | ||
1594 | /* nope, just zero it */ | ||
1595 | event->time_delta = 0; | ||
1596 | event->array[0] = 0; | ||
1597 | } | ||
1598 | |||
1599 | return skip_time_extend(event); | ||
1600 | } | ||
1601 | |||
1549 | /** | 1602 | /** |
1550 | * ring_buffer_update_event - update event type and data | 1603 | * ring_buffer_update_event - update event type and data |
1551 | * @event: the even to update | 1604 | * @event: the even to update |
@@ -1558,28 +1611,31 @@ static void rb_inc_iter(struct ring_buffer_iter *iter) | |||
1558 | * data field. | 1611 | * data field. |
1559 | */ | 1612 | */ |
1560 | static void | 1613 | static void |
1561 | rb_update_event(struct ring_buffer_event *event, | 1614 | rb_update_event(struct ring_buffer_per_cpu *cpu_buffer, |
1562 | unsigned type, unsigned length) | 1615 | struct ring_buffer_event *event, unsigned length, |
1616 | int add_timestamp, u64 delta) | ||
1563 | { | 1617 | { |
1564 | event->type_len = type; | 1618 | /* Only a commit updates the timestamp */ |
1565 | 1619 | if (unlikely(!rb_event_is_commit(cpu_buffer, event))) | |
1566 | switch (type) { | 1620 | delta = 0; |
1567 | |||
1568 | case RINGBUF_TYPE_PADDING: | ||
1569 | case RINGBUF_TYPE_TIME_EXTEND: | ||
1570 | case RINGBUF_TYPE_TIME_STAMP: | ||
1571 | break; | ||
1572 | 1621 | ||
1573 | case 0: | 1622 | /* |
1574 | length -= RB_EVNT_HDR_SIZE; | 1623 | * If we need to add a timestamp, then we |
1575 | if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT) | 1624 | * add it to the start of the resevered space. |
1576 | event->array[0] = length; | 1625 | */ |
1577 | else | 1626 | if (unlikely(add_timestamp)) { |
1578 | event->type_len = DIV_ROUND_UP(length, RB_ALIGNMENT); | 1627 | event = rb_add_time_stamp(event, delta); |
1579 | break; | 1628 | length -= RB_LEN_TIME_EXTEND; |
1580 | default: | 1629 | delta = 0; |
1581 | BUG(); | ||
1582 | } | 1630 | } |
1631 | |||
1632 | event->time_delta = delta; | ||
1633 | length -= RB_EVNT_HDR_SIZE; | ||
1634 | if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT) { | ||
1635 | event->type_len = 0; | ||
1636 | event->array[0] = length; | ||
1637 | } else | ||
1638 | event->type_len = DIV_ROUND_UP(length, RB_ALIGNMENT); | ||
1583 | } | 1639 | } |
1584 | 1640 | ||
1585 | /* | 1641 | /* |
@@ -1823,10 +1879,13 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer, | |||
1823 | local_sub(length, &tail_page->write); | 1879 | local_sub(length, &tail_page->write); |
1824 | } | 1880 | } |
1825 | 1881 | ||
1826 | static struct ring_buffer_event * | 1882 | /* |
1883 | * This is the slow path, force gcc not to inline it. | ||
1884 | */ | ||
1885 | static noinline struct ring_buffer_event * | ||
1827 | rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer, | 1886 | rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer, |
1828 | unsigned long length, unsigned long tail, | 1887 | unsigned long length, unsigned long tail, |
1829 | struct buffer_page *tail_page, u64 *ts) | 1888 | struct buffer_page *tail_page, u64 ts) |
1830 | { | 1889 | { |
1831 | struct buffer_page *commit_page = cpu_buffer->commit_page; | 1890 | struct buffer_page *commit_page = cpu_buffer->commit_page; |
1832 | struct ring_buffer *buffer = cpu_buffer->buffer; | 1891 | struct ring_buffer *buffer = cpu_buffer->buffer; |
@@ -1909,8 +1968,8 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer, | |||
1909 | * Nested commits always have zero deltas, so | 1968 | * Nested commits always have zero deltas, so |
1910 | * just reread the time stamp | 1969 | * just reread the time stamp |
1911 | */ | 1970 | */ |
1912 | *ts = rb_time_stamp(buffer); | 1971 | ts = rb_time_stamp(buffer); |
1913 | next_page->page->time_stamp = *ts; | 1972 | next_page->page->time_stamp = ts; |
1914 | } | 1973 | } |
1915 | 1974 | ||
1916 | out_again: | 1975 | out_again: |
@@ -1929,12 +1988,21 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer, | |||
1929 | 1988 | ||
1930 | static struct ring_buffer_event * | 1989 | static struct ring_buffer_event * |
1931 | __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, | 1990 | __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, |
1932 | unsigned type, unsigned long length, u64 *ts) | 1991 | unsigned long length, u64 ts, |
1992 | u64 delta, int add_timestamp) | ||
1933 | { | 1993 | { |
1934 | struct buffer_page *tail_page; | 1994 | struct buffer_page *tail_page; |
1935 | struct ring_buffer_event *event; | 1995 | struct ring_buffer_event *event; |
1936 | unsigned long tail, write; | 1996 | unsigned long tail, write; |
1937 | 1997 | ||
1998 | /* | ||
1999 | * If the time delta since the last event is too big to | ||
2000 | * hold in the time field of the event, then we append a | ||
2001 | * TIME EXTEND event ahead of the data event. | ||
2002 | */ | ||
2003 | if (unlikely(add_timestamp)) | ||
2004 | length += RB_LEN_TIME_EXTEND; | ||
2005 | |||
1938 | tail_page = cpu_buffer->tail_page; | 2006 | tail_page = cpu_buffer->tail_page; |
1939 | write = local_add_return(length, &tail_page->write); | 2007 | write = local_add_return(length, &tail_page->write); |
1940 | 2008 | ||
@@ -1943,7 +2011,7 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, | |||
1943 | tail = write - length; | 2011 | tail = write - length; |
1944 | 2012 | ||
1945 | /* See if we shot pass the end of this buffer page */ | 2013 | /* See if we shot pass the end of this buffer page */ |
1946 | if (write > BUF_PAGE_SIZE) | 2014 | if (unlikely(write > BUF_PAGE_SIZE)) |
1947 | return rb_move_tail(cpu_buffer, length, tail, | 2015 | return rb_move_tail(cpu_buffer, length, tail, |
1948 | tail_page, ts); | 2016 | tail_page, ts); |
1949 | 2017 | ||
@@ -1951,18 +2019,16 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, | |||
1951 | 2019 | ||
1952 | event = __rb_page_index(tail_page, tail); | 2020 | event = __rb_page_index(tail_page, tail); |
1953 | kmemcheck_annotate_bitfield(event, bitfield); | 2021 | kmemcheck_annotate_bitfield(event, bitfield); |
1954 | rb_update_event(event, type, length); | 2022 | rb_update_event(cpu_buffer, event, length, add_timestamp, delta); |
1955 | 2023 | ||
1956 | /* The passed in type is zero for DATA */ | 2024 | local_inc(&tail_page->entries); |
1957 | if (likely(!type)) | ||
1958 | local_inc(&tail_page->entries); | ||
1959 | 2025 | ||
1960 | /* | 2026 | /* |
1961 | * If this is the first commit on the page, then update | 2027 | * If this is the first commit on the page, then update |
1962 | * its timestamp. | 2028 | * its timestamp. |
1963 | */ | 2029 | */ |
1964 | if (!tail) | 2030 | if (!tail) |
1965 | tail_page->page->time_stamp = *ts; | 2031 | tail_page->page->time_stamp = ts; |
1966 | 2032 | ||
1967 | return event; | 2033 | return event; |
1968 | } | 2034 | } |
@@ -1977,7 +2043,7 @@ rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer, | |||
1977 | unsigned long addr; | 2043 | unsigned long addr; |
1978 | 2044 | ||
1979 | new_index = rb_event_index(event); | 2045 | new_index = rb_event_index(event); |
1980 | old_index = new_index + rb_event_length(event); | 2046 | old_index = new_index + rb_event_ts_length(event); |
1981 | addr = (unsigned long)event; | 2047 | addr = (unsigned long)event; |
1982 | addr &= PAGE_MASK; | 2048 | addr &= PAGE_MASK; |
1983 | 2049 | ||
@@ -2003,76 +2069,13 @@ rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer, | |||
2003 | return 0; | 2069 | return 0; |
2004 | } | 2070 | } |
2005 | 2071 | ||
2006 | static int | ||
2007 | rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer, | ||
2008 | u64 *ts, u64 *delta) | ||
2009 | { | ||
2010 | struct ring_buffer_event *event; | ||
2011 | int ret; | ||
2012 | |||
2013 | WARN_ONCE(*delta > (1ULL << 59), | ||
2014 | KERN_WARNING "Delta way too big! %llu ts=%llu write stamp = %llu\n", | ||
2015 | (unsigned long long)*delta, | ||
2016 | (unsigned long long)*ts, | ||
2017 | (unsigned long long)cpu_buffer->write_stamp); | ||
2018 | |||
2019 | /* | ||
2020 | * The delta is too big, we to add a | ||
2021 | * new timestamp. | ||
2022 | */ | ||
2023 | event = __rb_reserve_next(cpu_buffer, | ||
2024 | RINGBUF_TYPE_TIME_EXTEND, | ||
2025 | RB_LEN_TIME_EXTEND, | ||
2026 | ts); | ||
2027 | if (!event) | ||
2028 | return -EBUSY; | ||
2029 | |||
2030 | if (PTR_ERR(event) == -EAGAIN) | ||
2031 | return -EAGAIN; | ||
2032 | |||
2033 | /* Only a commited time event can update the write stamp */ | ||
2034 | if (rb_event_is_commit(cpu_buffer, event)) { | ||
2035 | /* | ||
2036 | * If this is the first on the page, then it was | ||
2037 | * updated with the page itself. Try to discard it | ||
2038 | * and if we can't just make it zero. | ||
2039 | */ | ||
2040 | if (rb_event_index(event)) { | ||
2041 | event->time_delta = *delta & TS_MASK; | ||
2042 | event->array[0] = *delta >> TS_SHIFT; | ||
2043 | } else { | ||
2044 | /* try to discard, since we do not need this */ | ||
2045 | if (!rb_try_to_discard(cpu_buffer, event)) { | ||
2046 | /* nope, just zero it */ | ||
2047 | event->time_delta = 0; | ||
2048 | event->array[0] = 0; | ||
2049 | } | ||
2050 | } | ||
2051 | cpu_buffer->write_stamp = *ts; | ||
2052 | /* let the caller know this was the commit */ | ||
2053 | ret = 1; | ||
2054 | } else { | ||
2055 | /* Try to discard the event */ | ||
2056 | if (!rb_try_to_discard(cpu_buffer, event)) { | ||
2057 | /* Darn, this is just wasted space */ | ||
2058 | event->time_delta = 0; | ||
2059 | event->array[0] = 0; | ||
2060 | } | ||
2061 | ret = 0; | ||
2062 | } | ||
2063 | |||
2064 | *delta = 0; | ||
2065 | |||
2066 | return ret; | ||
2067 | } | ||
2068 | |||
2069 | static void rb_start_commit(struct ring_buffer_per_cpu *cpu_buffer) | 2072 | static void rb_start_commit(struct ring_buffer_per_cpu *cpu_buffer) |
2070 | { | 2073 | { |
2071 | local_inc(&cpu_buffer->committing); | 2074 | local_inc(&cpu_buffer->committing); |
2072 | local_inc(&cpu_buffer->commits); | 2075 | local_inc(&cpu_buffer->commits); |
2073 | } | 2076 | } |
2074 | 2077 | ||
2075 | static void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer) | 2078 | static inline void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer) |
2076 | { | 2079 | { |
2077 | unsigned long commits; | 2080 | unsigned long commits; |
2078 | 2081 | ||
@@ -2110,9 +2113,10 @@ rb_reserve_next_event(struct ring_buffer *buffer, | |||
2110 | unsigned long length) | 2113 | unsigned long length) |
2111 | { | 2114 | { |
2112 | struct ring_buffer_event *event; | 2115 | struct ring_buffer_event *event; |
2113 | u64 ts, delta = 0; | 2116 | u64 ts, delta; |
2114 | int commit = 0; | ||
2115 | int nr_loops = 0; | 2117 | int nr_loops = 0; |
2118 | int add_timestamp; | ||
2119 | u64 diff; | ||
2116 | 2120 | ||
2117 | rb_start_commit(cpu_buffer); | 2121 | rb_start_commit(cpu_buffer); |
2118 | 2122 | ||
@@ -2133,6 +2137,9 @@ rb_reserve_next_event(struct ring_buffer *buffer, | |||
2133 | 2137 | ||
2134 | length = rb_calculate_event_length(length); | 2138 | length = rb_calculate_event_length(length); |
2135 | again: | 2139 | again: |
2140 | add_timestamp = 0; | ||
2141 | delta = 0; | ||
2142 | |||
2136 | /* | 2143 | /* |
2137 | * We allow for interrupts to reenter here and do a trace. | 2144 | * We allow for interrupts to reenter here and do a trace. |
2138 | * If one does, it will cause this original code to loop | 2145 | * If one does, it will cause this original code to loop |
@@ -2146,56 +2153,32 @@ rb_reserve_next_event(struct ring_buffer *buffer, | |||
2146 | goto out_fail; | 2153 | goto out_fail; |
2147 | 2154 | ||
2148 | ts = rb_time_stamp(cpu_buffer->buffer); | 2155 | ts = rb_time_stamp(cpu_buffer->buffer); |
2156 | diff = ts - cpu_buffer->write_stamp; | ||
2149 | 2157 | ||
2150 | /* | 2158 | /* make sure this diff is calculated here */ |
2151 | * Only the first commit can update the timestamp. | 2159 | barrier(); |
2152 | * Yes there is a race here. If an interrupt comes in | ||
2153 | * just after the conditional and it traces too, then it | ||
2154 | * will also check the deltas. More than one timestamp may | ||
2155 | * also be made. But only the entry that did the actual | ||
2156 | * commit will be something other than zero. | ||
2157 | */ | ||
2158 | if (likely(cpu_buffer->tail_page == cpu_buffer->commit_page && | ||
2159 | rb_page_write(cpu_buffer->tail_page) == | ||
2160 | rb_commit_index(cpu_buffer))) { | ||
2161 | u64 diff; | ||
2162 | |||
2163 | diff = ts - cpu_buffer->write_stamp; | ||
2164 | |||
2165 | /* make sure this diff is calculated here */ | ||
2166 | barrier(); | ||
2167 | |||
2168 | /* Did the write stamp get updated already? */ | ||
2169 | if (unlikely(ts < cpu_buffer->write_stamp)) | ||
2170 | goto get_event; | ||
2171 | 2160 | ||
2161 | /* Did the write stamp get updated already? */ | ||
2162 | if (likely(ts >= cpu_buffer->write_stamp)) { | ||
2172 | delta = diff; | 2163 | delta = diff; |
2173 | if (unlikely(test_time_stamp(delta))) { | 2164 | if (unlikely(test_time_stamp(delta))) { |
2174 | 2165 | WARN_ONCE(delta > (1ULL << 59), | |
2175 | commit = rb_add_time_stamp(cpu_buffer, &ts, &delta); | 2166 | KERN_WARNING "Delta way too big! %llu ts=%llu write stamp = %llu\n", |
2176 | if (commit == -EBUSY) | 2167 | (unsigned long long)delta, |
2177 | goto out_fail; | 2168 | (unsigned long long)ts, |
2178 | 2169 | (unsigned long long)cpu_buffer->write_stamp); | |
2179 | if (commit == -EAGAIN) | 2170 | add_timestamp = 1; |
2180 | goto again; | ||
2181 | |||
2182 | RB_WARN_ON(cpu_buffer, commit < 0); | ||
2183 | } | 2171 | } |
2184 | } | 2172 | } |
2185 | 2173 | ||
2186 | get_event: | 2174 | event = __rb_reserve_next(cpu_buffer, length, ts, |
2187 | event = __rb_reserve_next(cpu_buffer, 0, length, &ts); | 2175 | delta, add_timestamp); |
2188 | if (unlikely(PTR_ERR(event) == -EAGAIN)) | 2176 | if (unlikely(PTR_ERR(event) == -EAGAIN)) |
2189 | goto again; | 2177 | goto again; |
2190 | 2178 | ||
2191 | if (!event) | 2179 | if (!event) |
2192 | goto out_fail; | 2180 | goto out_fail; |
2193 | 2181 | ||
2194 | if (!rb_event_is_commit(cpu_buffer, event)) | ||
2195 | delta = 0; | ||
2196 | |||
2197 | event->time_delta = delta; | ||
2198 | |||
2199 | return event; | 2182 | return event; |
2200 | 2183 | ||
2201 | out_fail: | 2184 | out_fail: |
@@ -2207,13 +2190,9 @@ rb_reserve_next_event(struct ring_buffer *buffer, | |||
2207 | 2190 | ||
2208 | #define TRACE_RECURSIVE_DEPTH 16 | 2191 | #define TRACE_RECURSIVE_DEPTH 16 |
2209 | 2192 | ||
2210 | static int trace_recursive_lock(void) | 2193 | /* Keep this code out of the fast path cache */ |
2194 | static noinline void trace_recursive_fail(void) | ||
2211 | { | 2195 | { |
2212 | current->trace_recursion++; | ||
2213 | |||
2214 | if (likely(current->trace_recursion < TRACE_RECURSIVE_DEPTH)) | ||
2215 | return 0; | ||
2216 | |||
2217 | /* Disable all tracing before we do anything else */ | 2196 | /* Disable all tracing before we do anything else */ |
2218 | tracing_off_permanent(); | 2197 | tracing_off_permanent(); |
2219 | 2198 | ||
@@ -2225,10 +2204,21 @@ static int trace_recursive_lock(void) | |||
2225 | in_nmi()); | 2204 | in_nmi()); |
2226 | 2205 | ||
2227 | WARN_ON_ONCE(1); | 2206 | WARN_ON_ONCE(1); |
2207 | } | ||
2208 | |||
2209 | static inline int trace_recursive_lock(void) | ||
2210 | { | ||
2211 | current->trace_recursion++; | ||
2212 | |||
2213 | if (likely(current->trace_recursion < TRACE_RECURSIVE_DEPTH)) | ||
2214 | return 0; | ||
2215 | |||
2216 | trace_recursive_fail(); | ||
2217 | |||
2228 | return -1; | 2218 | return -1; |
2229 | } | 2219 | } |
2230 | 2220 | ||
2231 | static void trace_recursive_unlock(void) | 2221 | static inline void trace_recursive_unlock(void) |
2232 | { | 2222 | { |
2233 | WARN_ON_ONCE(!current->trace_recursion); | 2223 | WARN_ON_ONCE(!current->trace_recursion); |
2234 | 2224 | ||
@@ -2308,12 +2298,28 @@ static void | |||
2308 | rb_update_write_stamp(struct ring_buffer_per_cpu *cpu_buffer, | 2298 | rb_update_write_stamp(struct ring_buffer_per_cpu *cpu_buffer, |
2309 | struct ring_buffer_event *event) | 2299 | struct ring_buffer_event *event) |
2310 | { | 2300 | { |
2301 | u64 delta; | ||
2302 | |||
2311 | /* | 2303 | /* |
2312 | * The event first in the commit queue updates the | 2304 | * The event first in the commit queue updates the |
2313 | * time stamp. | 2305 | * time stamp. |
2314 | */ | 2306 | */ |
2315 | if (rb_event_is_commit(cpu_buffer, event)) | 2307 | if (rb_event_is_commit(cpu_buffer, event)) { |
2316 | cpu_buffer->write_stamp += event->time_delta; | 2308 | /* |
2309 | * A commit event that is first on a page | ||
2310 | * updates the write timestamp with the page stamp | ||
2311 | */ | ||
2312 | if (!rb_event_index(event)) | ||
2313 | cpu_buffer->write_stamp = | ||
2314 | cpu_buffer->commit_page->page->time_stamp; | ||
2315 | else if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) { | ||
2316 | delta = event->array[0]; | ||
2317 | delta <<= TS_SHIFT; | ||
2318 | delta += event->time_delta; | ||
2319 | cpu_buffer->write_stamp += delta; | ||
2320 | } else | ||
2321 | cpu_buffer->write_stamp += event->time_delta; | ||
2322 | } | ||
2317 | } | 2323 | } |
2318 | 2324 | ||
2319 | static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer, | 2325 | static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer, |
@@ -2353,6 +2359,9 @@ EXPORT_SYMBOL_GPL(ring_buffer_unlock_commit); | |||
2353 | 2359 | ||
2354 | static inline void rb_event_discard(struct ring_buffer_event *event) | 2360 | static inline void rb_event_discard(struct ring_buffer_event *event) |
2355 | { | 2361 | { |
2362 | if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) | ||
2363 | event = skip_time_extend(event); | ||
2364 | |||
2356 | /* array[0] holds the actual length for the discarded event */ | 2365 | /* array[0] holds the actual length for the discarded event */ |
2357 | event->array[0] = rb_event_data_length(event) - RB_EVNT_HDR_SIZE; | 2366 | event->array[0] = rb_event_data_length(event) - RB_EVNT_HDR_SIZE; |
2358 | event->type_len = RINGBUF_TYPE_PADDING; | 2367 | event->type_len = RINGBUF_TYPE_PADDING; |
@@ -3049,12 +3058,12 @@ rb_buffer_peek(struct ring_buffer_per_cpu *cpu_buffer, u64 *ts, | |||
3049 | 3058 | ||
3050 | again: | 3059 | again: |
3051 | /* | 3060 | /* |
3052 | * We repeat when a timestamp is encountered. It is possible | 3061 | * We repeat when a time extend is encountered. |
3053 | * to get multiple timestamps from an interrupt entering just | 3062 | * Since the time extend is always attached to a data event, |
3054 | * as one timestamp is about to be written, or from discarded | 3063 | * we should never loop more than once. |
3055 | * commits. The most that we can have is the number on a single page. | 3064 | * (We never hit the following condition more than twice). |
3056 | */ | 3065 | */ |
3057 | if (RB_WARN_ON(cpu_buffer, ++nr_loops > RB_TIMESTAMPS_PER_PAGE)) | 3066 | if (RB_WARN_ON(cpu_buffer, ++nr_loops > 2)) |
3058 | return NULL; | 3067 | return NULL; |
3059 | 3068 | ||
3060 | reader = rb_get_reader_page(cpu_buffer); | 3069 | reader = rb_get_reader_page(cpu_buffer); |
@@ -3130,14 +3139,12 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts) | |||
3130 | return NULL; | 3139 | return NULL; |
3131 | 3140 | ||
3132 | /* | 3141 | /* |
3133 | * We repeat when a timestamp is encountered. | 3142 | * We repeat when a time extend is encountered. |
3134 | * We can get multiple timestamps by nested interrupts or also | 3143 | * Since the time extend is always attached to a data event, |
3135 | * if filtering is on (discarding commits). Since discarding | 3144 | * we should never loop more than once. |
3136 | * commits can be frequent we can get a lot of timestamps. | 3145 | * (We never hit the following condition more than twice). |
3137 | * But we limit them by not adding timestamps if they begin | ||
3138 | * at the start of a page. | ||
3139 | */ | 3146 | */ |
3140 | if (RB_WARN_ON(cpu_buffer, ++nr_loops > RB_TIMESTAMPS_PER_PAGE)) | 3147 | if (RB_WARN_ON(cpu_buffer, ++nr_loops > 2)) |
3141 | return NULL; | 3148 | return NULL; |
3142 | 3149 | ||
3143 | if (rb_per_cpu_empty(cpu_buffer)) | 3150 | if (rb_per_cpu_empty(cpu_buffer)) |
@@ -3835,7 +3842,8 @@ int ring_buffer_read_page(struct ring_buffer *buffer, | |||
3835 | if (len > (commit - read)) | 3842 | if (len > (commit - read)) |
3836 | len = (commit - read); | 3843 | len = (commit - read); |
3837 | 3844 | ||
3838 | size = rb_event_length(event); | 3845 | /* Always keep the time extend and data together */ |
3846 | size = rb_event_ts_length(event); | ||
3839 | 3847 | ||
3840 | if (len < size) | 3848 | if (len < size) |
3841 | goto out_unlock; | 3849 | goto out_unlock; |
@@ -3845,6 +3853,13 @@ int ring_buffer_read_page(struct ring_buffer *buffer, | |||
3845 | 3853 | ||
3846 | /* Need to copy one event at a time */ | 3854 | /* Need to copy one event at a time */ |
3847 | do { | 3855 | do { |
3856 | /* We need the size of one event, because | ||
3857 | * rb_advance_reader only advances by one event, | ||
3858 | * whereas rb_event_ts_length may include the size of | ||
3859 | * one or two events. | ||
3860 | * We have already ensured there's enough space if this | ||
3861 | * is a time extend. */ | ||
3862 | size = rb_event_length(event); | ||
3848 | memcpy(bpage->data + pos, rpage->data + rpos, size); | 3863 | memcpy(bpage->data + pos, rpage->data + rpos, size); |
3849 | 3864 | ||
3850 | len -= size; | 3865 | len -= size; |
@@ -3857,8 +3872,9 @@ int ring_buffer_read_page(struct ring_buffer *buffer, | |||
3857 | break; | 3872 | break; |
3858 | 3873 | ||
3859 | event = rb_reader_event(cpu_buffer); | 3874 | event = rb_reader_event(cpu_buffer); |
3860 | size = rb_event_length(event); | 3875 | /* Always keep the time extend and data together */ |
3861 | } while (len > size); | 3876 | size = rb_event_ts_length(event); |
3877 | } while (len >= size); | ||
3862 | 3878 | ||
3863 | /* update bpage */ | 3879 | /* update bpage */ |
3864 | local_set(&bpage->commit, pos); | 3880 | local_set(&bpage->commit, pos); |
@@ -3974,6 +3990,7 @@ static const struct file_operations rb_simple_fops = { | |||
3974 | .open = tracing_open_generic, | 3990 | .open = tracing_open_generic, |
3975 | .read = rb_simple_read, | 3991 | .read = rb_simple_read, |
3976 | .write = rb_simple_write, | 3992 | .write = rb_simple_write, |
3993 | .llseek = default_llseek, | ||
3977 | }; | 3994 | }; |
3978 | 3995 | ||
3979 | 3996 | ||
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 001bcd2ccf4a..f8cf959bad45 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c | |||
@@ -17,7 +17,6 @@ | |||
17 | #include <linux/writeback.h> | 17 | #include <linux/writeback.h> |
18 | #include <linux/kallsyms.h> | 18 | #include <linux/kallsyms.h> |
19 | #include <linux/seq_file.h> | 19 | #include <linux/seq_file.h> |
20 | #include <linux/smp_lock.h> | ||
21 | #include <linux/notifier.h> | 20 | #include <linux/notifier.h> |
22 | #include <linux/irqflags.h> | 21 | #include <linux/irqflags.h> |
23 | #include <linux/debugfs.h> | 22 | #include <linux/debugfs.h> |
@@ -1284,6 +1283,8 @@ void trace_dump_stack(void) | |||
1284 | __ftrace_trace_stack(global_trace.buffer, flags, 3, preempt_count()); | 1283 | __ftrace_trace_stack(global_trace.buffer, flags, 3, preempt_count()); |
1285 | } | 1284 | } |
1286 | 1285 | ||
1286 | static DEFINE_PER_CPU(int, user_stack_count); | ||
1287 | |||
1287 | void | 1288 | void |
1288 | ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc) | 1289 | ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc) |
1289 | { | 1290 | { |
@@ -1302,6 +1303,18 @@ ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc) | |||
1302 | if (unlikely(in_nmi())) | 1303 | if (unlikely(in_nmi())) |
1303 | return; | 1304 | return; |
1304 | 1305 | ||
1306 | /* | ||
1307 | * prevent recursion, since the user stack tracing may | ||
1308 | * trigger other kernel events. | ||
1309 | */ | ||
1310 | preempt_disable(); | ||
1311 | if (__this_cpu_read(user_stack_count)) | ||
1312 | goto out; | ||
1313 | |||
1314 | __this_cpu_inc(user_stack_count); | ||
1315 | |||
1316 | |||
1317 | |||
1305 | event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK, | 1318 | event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK, |
1306 | sizeof(*entry), flags, pc); | 1319 | sizeof(*entry), flags, pc); |
1307 | if (!event) | 1320 | if (!event) |
@@ -1319,6 +1332,11 @@ ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc) | |||
1319 | save_stack_trace_user(&trace); | 1332 | save_stack_trace_user(&trace); |
1320 | if (!filter_check_discard(call, entry, buffer, event)) | 1333 | if (!filter_check_discard(call, entry, buffer, event)) |
1321 | ring_buffer_unlock_commit(buffer, event); | 1334 | ring_buffer_unlock_commit(buffer, event); |
1335 | |||
1336 | __this_cpu_dec(user_stack_count); | ||
1337 | |||
1338 | out: | ||
1339 | preempt_enable(); | ||
1322 | } | 1340 | } |
1323 | 1341 | ||
1324 | #ifdef UNUSED | 1342 | #ifdef UNUSED |
@@ -2320,11 +2338,19 @@ tracing_write_stub(struct file *filp, const char __user *ubuf, | |||
2320 | return count; | 2338 | return count; |
2321 | } | 2339 | } |
2322 | 2340 | ||
2341 | static loff_t tracing_seek(struct file *file, loff_t offset, int origin) | ||
2342 | { | ||
2343 | if (file->f_mode & FMODE_READ) | ||
2344 | return seq_lseek(file, offset, origin); | ||
2345 | else | ||
2346 | return 0; | ||
2347 | } | ||
2348 | |||
2323 | static const struct file_operations tracing_fops = { | 2349 | static const struct file_operations tracing_fops = { |
2324 | .open = tracing_open, | 2350 | .open = tracing_open, |
2325 | .read = seq_read, | 2351 | .read = seq_read, |
2326 | .write = tracing_write_stub, | 2352 | .write = tracing_write_stub, |
2327 | .llseek = seq_lseek, | 2353 | .llseek = tracing_seek, |
2328 | .release = tracing_release, | 2354 | .release = tracing_release, |
2329 | }; | 2355 | }; |
2330 | 2356 | ||
@@ -3996,13 +4022,9 @@ static void tracing_init_debugfs_percpu(long cpu) | |||
3996 | { | 4022 | { |
3997 | struct dentry *d_percpu = tracing_dentry_percpu(); | 4023 | struct dentry *d_percpu = tracing_dentry_percpu(); |
3998 | struct dentry *d_cpu; | 4024 | struct dentry *d_cpu; |
3999 | /* strlen(cpu) + MAX(log10(cpu)) + '\0' */ | 4025 | char cpu_dir[30]; /* 30 characters should be more than enough */ |
4000 | char cpu_dir[7]; | ||
4001 | |||
4002 | if (cpu > 999 || cpu < 0) | ||
4003 | return; | ||
4004 | 4026 | ||
4005 | sprintf(cpu_dir, "cpu%ld", cpu); | 4027 | snprintf(cpu_dir, 30, "cpu%ld", cpu); |
4006 | d_cpu = debugfs_create_dir(cpu_dir, d_percpu); | 4028 | d_cpu = debugfs_create_dir(cpu_dir, d_percpu); |
4007 | if (!d_cpu) { | 4029 | if (!d_cpu) { |
4008 | pr_warning("Could not create debugfs '%s' entry\n", cpu_dir); | 4030 | pr_warning("Could not create debugfs '%s' entry\n", cpu_dir); |
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 398c0e8b332c..0725eeab1937 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c | |||
@@ -932,6 +932,7 @@ static const struct file_operations ftrace_enable_fops = { | |||
932 | .open = tracing_open_generic, | 932 | .open = tracing_open_generic, |
933 | .read = event_enable_read, | 933 | .read = event_enable_read, |
934 | .write = event_enable_write, | 934 | .write = event_enable_write, |
935 | .llseek = default_llseek, | ||
935 | }; | 936 | }; |
936 | 937 | ||
937 | static const struct file_operations ftrace_event_format_fops = { | 938 | static const struct file_operations ftrace_event_format_fops = { |
@@ -944,29 +945,34 @@ static const struct file_operations ftrace_event_format_fops = { | |||
944 | static const struct file_operations ftrace_event_id_fops = { | 945 | static const struct file_operations ftrace_event_id_fops = { |
945 | .open = tracing_open_generic, | 946 | .open = tracing_open_generic, |
946 | .read = event_id_read, | 947 | .read = event_id_read, |
948 | .llseek = default_llseek, | ||
947 | }; | 949 | }; |
948 | 950 | ||
949 | static const struct file_operations ftrace_event_filter_fops = { | 951 | static const struct file_operations ftrace_event_filter_fops = { |
950 | .open = tracing_open_generic, | 952 | .open = tracing_open_generic, |
951 | .read = event_filter_read, | 953 | .read = event_filter_read, |
952 | .write = event_filter_write, | 954 | .write = event_filter_write, |
955 | .llseek = default_llseek, | ||
953 | }; | 956 | }; |
954 | 957 | ||
955 | static const struct file_operations ftrace_subsystem_filter_fops = { | 958 | static const struct file_operations ftrace_subsystem_filter_fops = { |
956 | .open = tracing_open_generic, | 959 | .open = tracing_open_generic, |
957 | .read = subsystem_filter_read, | 960 | .read = subsystem_filter_read, |
958 | .write = subsystem_filter_write, | 961 | .write = subsystem_filter_write, |
962 | .llseek = default_llseek, | ||
959 | }; | 963 | }; |
960 | 964 | ||
961 | static const struct file_operations ftrace_system_enable_fops = { | 965 | static const struct file_operations ftrace_system_enable_fops = { |
962 | .open = tracing_open_generic, | 966 | .open = tracing_open_generic, |
963 | .read = system_enable_read, | 967 | .read = system_enable_read, |
964 | .write = system_enable_write, | 968 | .write = system_enable_write, |
969 | .llseek = default_llseek, | ||
965 | }; | 970 | }; |
966 | 971 | ||
967 | static const struct file_operations ftrace_show_header_fops = { | 972 | static const struct file_operations ftrace_show_header_fops = { |
968 | .open = tracing_open_generic, | 973 | .open = tracing_open_generic, |
969 | .read = show_header, | 974 | .read = show_header, |
975 | .llseek = default_llseek, | ||
970 | }; | 976 | }; |
971 | 977 | ||
972 | static struct dentry *event_trace_events_dir(void) | 978 | static struct dentry *event_trace_events_dir(void) |
diff --git a/kernel/trace/trace_kdb.c b/kernel/trace/trace_kdb.c index 7b8ecd751d93..3c5c5dfea0b3 100644 --- a/kernel/trace/trace_kdb.c +++ b/kernel/trace/trace_kdb.c | |||
@@ -13,7 +13,6 @@ | |||
13 | #include <linux/kdb.h> | 13 | #include <linux/kdb.h> |
14 | #include <linux/ftrace.h> | 14 | #include <linux/ftrace.h> |
15 | 15 | ||
16 | #include "../debug/kdb/kdb_private.h" | ||
17 | #include "trace.h" | 16 | #include "trace.h" |
18 | #include "trace_output.h" | 17 | #include "trace_output.h" |
19 | 18 | ||
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 544301d29dee..2dec9bcde8b4 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c | |||
@@ -31,7 +31,6 @@ | |||
31 | #include <linux/perf_event.h> | 31 | #include <linux/perf_event.h> |
32 | #include <linux/stringify.h> | 32 | #include <linux/stringify.h> |
33 | #include <linux/limits.h> | 33 | #include <linux/limits.h> |
34 | #include <linux/uaccess.h> | ||
35 | #include <asm/bitsperlong.h> | 34 | #include <asm/bitsperlong.h> |
36 | 35 | ||
37 | #include "trace.h" | 36 | #include "trace.h" |
@@ -648,7 +647,7 @@ static int register_trace_probe(struct trace_probe *tp) | |||
648 | } | 647 | } |
649 | ret = register_probe_event(tp); | 648 | ret = register_probe_event(tp); |
650 | if (ret) { | 649 | if (ret) { |
651 | pr_warning("Faild to register probe event(%d)\n", ret); | 650 | pr_warning("Failed to register probe event(%d)\n", ret); |
652 | goto end; | 651 | goto end; |
653 | } | 652 | } |
654 | 653 | ||
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c index a6b7e0e0f3eb..4c5dead0c239 100644 --- a/kernel/trace/trace_stack.c +++ b/kernel/trace/trace_stack.c | |||
@@ -195,6 +195,7 @@ static const struct file_operations stack_max_size_fops = { | |||
195 | .open = tracing_open_generic, | 195 | .open = tracing_open_generic, |
196 | .read = stack_max_size_read, | 196 | .read = stack_max_size_read, |
197 | .write = stack_max_size_write, | 197 | .write = stack_max_size_write, |
198 | .llseek = default_llseek, | ||
198 | }; | 199 | }; |
199 | 200 | ||
200 | static void * | 201 | static void * |
diff --git a/kernel/tsacct.c b/kernel/tsacct.c index 0a67e041edf8..24dc60d9fa1f 100644 --- a/kernel/tsacct.c +++ b/kernel/tsacct.c | |||
@@ -63,12 +63,10 @@ void bacct_add_tsk(struct taskstats *stats, struct task_struct *tsk) | |||
63 | stats->ac_ppid = pid_alive(tsk) ? | 63 | stats->ac_ppid = pid_alive(tsk) ? |
64 | rcu_dereference(tsk->real_parent)->tgid : 0; | 64 | rcu_dereference(tsk->real_parent)->tgid : 0; |
65 | rcu_read_unlock(); | 65 | rcu_read_unlock(); |
66 | stats->ac_utime = cputime_to_msecs(tsk->utime) * USEC_PER_MSEC; | 66 | stats->ac_utime = cputime_to_usecs(tsk->utime); |
67 | stats->ac_stime = cputime_to_msecs(tsk->stime) * USEC_PER_MSEC; | 67 | stats->ac_stime = cputime_to_usecs(tsk->stime); |
68 | stats->ac_utimescaled = | 68 | stats->ac_utimescaled = cputime_to_usecs(tsk->utimescaled); |
69 | cputime_to_msecs(tsk->utimescaled) * USEC_PER_MSEC; | 69 | stats->ac_stimescaled = cputime_to_usecs(tsk->stimescaled); |
70 | stats->ac_stimescaled = | ||
71 | cputime_to_msecs(tsk->stimescaled) * USEC_PER_MSEC; | ||
72 | stats->ac_minflt = tsk->min_flt; | 70 | stats->ac_minflt = tsk->min_flt; |
73 | stats->ac_majflt = tsk->maj_flt; | 71 | stats->ac_majflt = tsk->maj_flt; |
74 | 72 | ||
diff --git a/kernel/user.c b/kernel/user.c index 7e72614b736d..2c7d8d5914b1 100644 --- a/kernel/user.c +++ b/kernel/user.c | |||
@@ -91,6 +91,7 @@ static struct user_struct *uid_hash_find(uid_t uid, struct hlist_head *hashent) | |||
91 | * upon function exit. | 91 | * upon function exit. |
92 | */ | 92 | */ |
93 | static void free_user(struct user_struct *up, unsigned long flags) | 93 | static void free_user(struct user_struct *up, unsigned long flags) |
94 | __releases(&uidhash_lock) | ||
94 | { | 95 | { |
95 | uid_hash_remove(up); | 96 | uid_hash_remove(up); |
96 | spin_unlock_irqrestore(&uidhash_lock, flags); | 97 | spin_unlock_irqrestore(&uidhash_lock, flags); |
diff --git a/kernel/wait.c b/kernel/wait.c index c4bd3d825f35..b0310eb6cc1e 100644 --- a/kernel/wait.c +++ b/kernel/wait.c | |||
@@ -92,7 +92,7 @@ prepare_to_wait_exclusive(wait_queue_head_t *q, wait_queue_t *wait, int state) | |||
92 | } | 92 | } |
93 | EXPORT_SYMBOL(prepare_to_wait_exclusive); | 93 | EXPORT_SYMBOL(prepare_to_wait_exclusive); |
94 | 94 | ||
95 | /* | 95 | /** |
96 | * finish_wait - clean up after waiting in a queue | 96 | * finish_wait - clean up after waiting in a queue |
97 | * @q: waitqueue waited on | 97 | * @q: waitqueue waited on |
98 | * @wait: wait descriptor | 98 | * @wait: wait descriptor |
@@ -127,11 +127,11 @@ void finish_wait(wait_queue_head_t *q, wait_queue_t *wait) | |||
127 | } | 127 | } |
128 | EXPORT_SYMBOL(finish_wait); | 128 | EXPORT_SYMBOL(finish_wait); |
129 | 129 | ||
130 | /* | 130 | /** |
131 | * abort_exclusive_wait - abort exclusive waiting in a queue | 131 | * abort_exclusive_wait - abort exclusive waiting in a queue |
132 | * @q: waitqueue waited on | 132 | * @q: waitqueue waited on |
133 | * @wait: wait descriptor | 133 | * @wait: wait descriptor |
134 | * @state: runstate of the waiter to be woken | 134 | * @mode: runstate of the waiter to be woken |
135 | * @key: key to identify a wait bit queue or %NULL | 135 | * @key: key to identify a wait bit queue or %NULL |
136 | * | 136 | * |
137 | * Sets current thread back to running state and removes | 137 | * Sets current thread back to running state and removes |
diff --git a/kernel/watchdog.c b/kernel/watchdog.c index bafba687a6d8..6e3c41a4024c 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c | |||
@@ -43,7 +43,7 @@ static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved); | |||
43 | static DEFINE_PER_CPU(struct perf_event *, watchdog_ev); | 43 | static DEFINE_PER_CPU(struct perf_event *, watchdog_ev); |
44 | #endif | 44 | #endif |
45 | 45 | ||
46 | static int __initdata no_watchdog; | 46 | static int no_watchdog; |
47 | 47 | ||
48 | 48 | ||
49 | /* boot commands */ | 49 | /* boot commands */ |
diff --git a/kernel/workqueue.c b/kernel/workqueue.c index f77afd939229..e785b0f2aea5 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c | |||
@@ -42,9 +42,6 @@ | |||
42 | #include <linux/lockdep.h> | 42 | #include <linux/lockdep.h> |
43 | #include <linux/idr.h> | 43 | #include <linux/idr.h> |
44 | 44 | ||
45 | #define CREATE_TRACE_POINTS | ||
46 | #include <trace/events/workqueue.h> | ||
47 | |||
48 | #include "workqueue_sched.h" | 45 | #include "workqueue_sched.h" |
49 | 46 | ||
50 | enum { | 47 | enum { |
@@ -257,6 +254,9 @@ EXPORT_SYMBOL_GPL(system_long_wq); | |||
257 | EXPORT_SYMBOL_GPL(system_nrt_wq); | 254 | EXPORT_SYMBOL_GPL(system_nrt_wq); |
258 | EXPORT_SYMBOL_GPL(system_unbound_wq); | 255 | EXPORT_SYMBOL_GPL(system_unbound_wq); |
259 | 256 | ||
257 | #define CREATE_TRACE_POINTS | ||
258 | #include <trace/events/workqueue.h> | ||
259 | |||
260 | #define for_each_busy_worker(worker, i, pos, gcwq) \ | 260 | #define for_each_busy_worker(worker, i, pos, gcwq) \ |
261 | for (i = 0; i < BUSY_WORKER_HASH_SIZE; i++) \ | 261 | for (i = 0; i < BUSY_WORKER_HASH_SIZE; i++) \ |
262 | hlist_for_each_entry(worker, pos, &gcwq->busy_hash[i], hentry) | 262 | hlist_for_each_entry(worker, pos, &gcwq->busy_hash[i], hentry) |
@@ -310,21 +310,6 @@ static inline int __next_wq_cpu(int cpu, const struct cpumask *mask, | |||
310 | (cpu) < WORK_CPU_NONE; \ | 310 | (cpu) < WORK_CPU_NONE; \ |
311 | (cpu) = __next_wq_cpu((cpu), cpu_possible_mask, (wq))) | 311 | (cpu) = __next_wq_cpu((cpu), cpu_possible_mask, (wq))) |
312 | 312 | ||
313 | #ifdef CONFIG_LOCKDEP | ||
314 | /** | ||
315 | * in_workqueue_context() - in context of specified workqueue? | ||
316 | * @wq: the workqueue of interest | ||
317 | * | ||
318 | * Checks lockdep state to see if the current task is executing from | ||
319 | * within a workqueue item. This function exists only if lockdep is | ||
320 | * enabled. | ||
321 | */ | ||
322 | int in_workqueue_context(struct workqueue_struct *wq) | ||
323 | { | ||
324 | return lock_is_held(&wq->lockdep_map); | ||
325 | } | ||
326 | #endif | ||
327 | |||
328 | #ifdef CONFIG_DEBUG_OBJECTS_WORK | 313 | #ifdef CONFIG_DEBUG_OBJECTS_WORK |
329 | 314 | ||
330 | static struct debug_obj_descr work_debug_descr; | 315 | static struct debug_obj_descr work_debug_descr; |
@@ -604,7 +589,9 @@ static bool keep_working(struct global_cwq *gcwq) | |||
604 | { | 589 | { |
605 | atomic_t *nr_running = get_gcwq_nr_running(gcwq->cpu); | 590 | atomic_t *nr_running = get_gcwq_nr_running(gcwq->cpu); |
606 | 591 | ||
607 | return !list_empty(&gcwq->worklist) && atomic_read(nr_running) <= 1; | 592 | return !list_empty(&gcwq->worklist) && |
593 | (atomic_read(nr_running) <= 1 || | ||
594 | gcwq->flags & GCWQ_HIGHPRI_PENDING); | ||
608 | } | 595 | } |
609 | 596 | ||
610 | /* Do we need a new worker? Called from manager. */ | 597 | /* Do we need a new worker? Called from manager. */ |
@@ -674,7 +661,7 @@ void wq_worker_waking_up(struct task_struct *task, unsigned int cpu) | |||
674 | { | 661 | { |
675 | struct worker *worker = kthread_data(task); | 662 | struct worker *worker = kthread_data(task); |
676 | 663 | ||
677 | if (likely(!(worker->flags & WORKER_NOT_RUNNING))) | 664 | if (!(worker->flags & WORKER_NOT_RUNNING)) |
678 | atomic_inc(get_gcwq_nr_running(cpu)); | 665 | atomic_inc(get_gcwq_nr_running(cpu)); |
679 | } | 666 | } |
680 | 667 | ||
@@ -700,7 +687,7 @@ struct task_struct *wq_worker_sleeping(struct task_struct *task, | |||
700 | struct global_cwq *gcwq = get_gcwq(cpu); | 687 | struct global_cwq *gcwq = get_gcwq(cpu); |
701 | atomic_t *nr_running = get_gcwq_nr_running(cpu); | 688 | atomic_t *nr_running = get_gcwq_nr_running(cpu); |
702 | 689 | ||
703 | if (unlikely(worker->flags & WORKER_NOT_RUNNING)) | 690 | if (worker->flags & WORKER_NOT_RUNNING) |
704 | return NULL; | 691 | return NULL; |
705 | 692 | ||
706 | /* this can only happen on the local cpu */ | 693 | /* this can only happen on the local cpu */ |
@@ -997,6 +984,7 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq, | |||
997 | 984 | ||
998 | /* gcwq determined, get cwq and queue */ | 985 | /* gcwq determined, get cwq and queue */ |
999 | cwq = get_cwq(gcwq->cpu, wq); | 986 | cwq = get_cwq(gcwq->cpu, wq); |
987 | trace_workqueue_queue_work(cpu, cwq, work); | ||
1000 | 988 | ||
1001 | BUG_ON(!list_empty(&work->entry)); | 989 | BUG_ON(!list_empty(&work->entry)); |
1002 | 990 | ||
@@ -1004,6 +992,7 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq, | |||
1004 | work_flags = work_color_to_flags(cwq->work_color); | 992 | work_flags = work_color_to_flags(cwq->work_color); |
1005 | 993 | ||
1006 | if (likely(cwq->nr_active < cwq->max_active)) { | 994 | if (likely(cwq->nr_active < cwq->max_active)) { |
995 | trace_workqueue_activate_work(work); | ||
1007 | cwq->nr_active++; | 996 | cwq->nr_active++; |
1008 | worklist = gcwq_determine_ins_pos(gcwq, cwq); | 997 | worklist = gcwq_determine_ins_pos(gcwq, cwq); |
1009 | } else { | 998 | } else { |
@@ -1679,6 +1668,7 @@ static void cwq_activate_first_delayed(struct cpu_workqueue_struct *cwq) | |||
1679 | struct work_struct, entry); | 1668 | struct work_struct, entry); |
1680 | struct list_head *pos = gcwq_determine_ins_pos(cwq->gcwq, cwq); | 1669 | struct list_head *pos = gcwq_determine_ins_pos(cwq->gcwq, cwq); |
1681 | 1670 | ||
1671 | trace_workqueue_activate_work(work); | ||
1682 | move_linked_works(work, pos, NULL); | 1672 | move_linked_works(work, pos, NULL); |
1683 | __clear_bit(WORK_STRUCT_DELAYED_BIT, work_data_bits(work)); | 1673 | __clear_bit(WORK_STRUCT_DELAYED_BIT, work_data_bits(work)); |
1684 | cwq->nr_active++; | 1674 | cwq->nr_active++; |
@@ -2074,7 +2064,7 @@ static void insert_wq_barrier(struct cpu_workqueue_struct *cwq, | |||
2074 | * checks and call back into the fixup functions where we | 2064 | * checks and call back into the fixup functions where we |
2075 | * might deadlock. | 2065 | * might deadlock. |
2076 | */ | 2066 | */ |
2077 | INIT_WORK_ON_STACK(&barr->work, wq_barrier_func); | 2067 | INIT_WORK_ONSTACK(&barr->work, wq_barrier_func); |
2078 | __set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(&barr->work)); | 2068 | __set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(&barr->work)); |
2079 | init_completion(&barr->done); | 2069 | init_completion(&barr->done); |
2080 | 2070 | ||
@@ -2326,27 +2316,17 @@ out_unlock: | |||
2326 | } | 2316 | } |
2327 | EXPORT_SYMBOL_GPL(flush_workqueue); | 2317 | EXPORT_SYMBOL_GPL(flush_workqueue); |
2328 | 2318 | ||
2329 | /** | 2319 | static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr, |
2330 | * flush_work - block until a work_struct's callback has terminated | 2320 | bool wait_executing) |
2331 | * @work: the work which is to be flushed | ||
2332 | * | ||
2333 | * Returns false if @work has already terminated. | ||
2334 | * | ||
2335 | * It is expected that, prior to calling flush_work(), the caller has | ||
2336 | * arranged for the work to not be requeued, otherwise it doesn't make | ||
2337 | * sense to use this function. | ||
2338 | */ | ||
2339 | int flush_work(struct work_struct *work) | ||
2340 | { | 2321 | { |
2341 | struct worker *worker = NULL; | 2322 | struct worker *worker = NULL; |
2342 | struct global_cwq *gcwq; | 2323 | struct global_cwq *gcwq; |
2343 | struct cpu_workqueue_struct *cwq; | 2324 | struct cpu_workqueue_struct *cwq; |
2344 | struct wq_barrier barr; | ||
2345 | 2325 | ||
2346 | might_sleep(); | 2326 | might_sleep(); |
2347 | gcwq = get_work_gcwq(work); | 2327 | gcwq = get_work_gcwq(work); |
2348 | if (!gcwq) | 2328 | if (!gcwq) |
2349 | return 0; | 2329 | return false; |
2350 | 2330 | ||
2351 | spin_lock_irq(&gcwq->lock); | 2331 | spin_lock_irq(&gcwq->lock); |
2352 | if (!list_empty(&work->entry)) { | 2332 | if (!list_empty(&work->entry)) { |
@@ -2359,28 +2339,127 @@ int flush_work(struct work_struct *work) | |||
2359 | cwq = get_work_cwq(work); | 2339 | cwq = get_work_cwq(work); |
2360 | if (unlikely(!cwq || gcwq != cwq->gcwq)) | 2340 | if (unlikely(!cwq || gcwq != cwq->gcwq)) |
2361 | goto already_gone; | 2341 | goto already_gone; |
2362 | } else { | 2342 | } else if (wait_executing) { |
2363 | worker = find_worker_executing_work(gcwq, work); | 2343 | worker = find_worker_executing_work(gcwq, work); |
2364 | if (!worker) | 2344 | if (!worker) |
2365 | goto already_gone; | 2345 | goto already_gone; |
2366 | cwq = worker->current_cwq; | 2346 | cwq = worker->current_cwq; |
2367 | } | 2347 | } else |
2348 | goto already_gone; | ||
2368 | 2349 | ||
2369 | insert_wq_barrier(cwq, &barr, work, worker); | 2350 | insert_wq_barrier(cwq, barr, work, worker); |
2370 | spin_unlock_irq(&gcwq->lock); | 2351 | spin_unlock_irq(&gcwq->lock); |
2371 | 2352 | ||
2372 | lock_map_acquire(&cwq->wq->lockdep_map); | 2353 | lock_map_acquire(&cwq->wq->lockdep_map); |
2373 | lock_map_release(&cwq->wq->lockdep_map); | 2354 | lock_map_release(&cwq->wq->lockdep_map); |
2374 | 2355 | return true; | |
2375 | wait_for_completion(&barr.done); | ||
2376 | destroy_work_on_stack(&barr.work); | ||
2377 | return 1; | ||
2378 | already_gone: | 2356 | already_gone: |
2379 | spin_unlock_irq(&gcwq->lock); | 2357 | spin_unlock_irq(&gcwq->lock); |
2380 | return 0; | 2358 | return false; |
2359 | } | ||
2360 | |||
2361 | /** | ||
2362 | * flush_work - wait for a work to finish executing the last queueing instance | ||
2363 | * @work: the work to flush | ||
2364 | * | ||
2365 | * Wait until @work has finished execution. This function considers | ||
2366 | * only the last queueing instance of @work. If @work has been | ||
2367 | * enqueued across different CPUs on a non-reentrant workqueue or on | ||
2368 | * multiple workqueues, @work might still be executing on return on | ||
2369 | * some of the CPUs from earlier queueing. | ||
2370 | * | ||
2371 | * If @work was queued only on a non-reentrant, ordered or unbound | ||
2372 | * workqueue, @work is guaranteed to be idle on return if it hasn't | ||
2373 | * been requeued since flush started. | ||
2374 | * | ||
2375 | * RETURNS: | ||
2376 | * %true if flush_work() waited for the work to finish execution, | ||
2377 | * %false if it was already idle. | ||
2378 | */ | ||
2379 | bool flush_work(struct work_struct *work) | ||
2380 | { | ||
2381 | struct wq_barrier barr; | ||
2382 | |||
2383 | if (start_flush_work(work, &barr, true)) { | ||
2384 | wait_for_completion(&barr.done); | ||
2385 | destroy_work_on_stack(&barr.work); | ||
2386 | return true; | ||
2387 | } else | ||
2388 | return false; | ||
2381 | } | 2389 | } |
2382 | EXPORT_SYMBOL_GPL(flush_work); | 2390 | EXPORT_SYMBOL_GPL(flush_work); |
2383 | 2391 | ||
2392 | static bool wait_on_cpu_work(struct global_cwq *gcwq, struct work_struct *work) | ||
2393 | { | ||
2394 | struct wq_barrier barr; | ||
2395 | struct worker *worker; | ||
2396 | |||
2397 | spin_lock_irq(&gcwq->lock); | ||
2398 | |||
2399 | worker = find_worker_executing_work(gcwq, work); | ||
2400 | if (unlikely(worker)) | ||
2401 | insert_wq_barrier(worker->current_cwq, &barr, work, worker); | ||
2402 | |||
2403 | spin_unlock_irq(&gcwq->lock); | ||
2404 | |||
2405 | if (unlikely(worker)) { | ||
2406 | wait_for_completion(&barr.done); | ||
2407 | destroy_work_on_stack(&barr.work); | ||
2408 | return true; | ||
2409 | } else | ||
2410 | return false; | ||
2411 | } | ||
2412 | |||
2413 | static bool wait_on_work(struct work_struct *work) | ||
2414 | { | ||
2415 | bool ret = false; | ||
2416 | int cpu; | ||
2417 | |||
2418 | might_sleep(); | ||
2419 | |||
2420 | lock_map_acquire(&work->lockdep_map); | ||
2421 | lock_map_release(&work->lockdep_map); | ||
2422 | |||
2423 | for_each_gcwq_cpu(cpu) | ||
2424 | ret |= wait_on_cpu_work(get_gcwq(cpu), work); | ||
2425 | return ret; | ||
2426 | } | ||
2427 | |||
2428 | /** | ||
2429 | * flush_work_sync - wait until a work has finished execution | ||
2430 | * @work: the work to flush | ||
2431 | * | ||
2432 | * Wait until @work has finished execution. On return, it's | ||
2433 | * guaranteed that all queueing instances of @work which happened | ||
2434 | * before this function is called are finished. In other words, if | ||
2435 | * @work hasn't been requeued since this function was called, @work is | ||
2436 | * guaranteed to be idle on return. | ||
2437 | * | ||
2438 | * RETURNS: | ||
2439 | * %true if flush_work_sync() waited for the work to finish execution, | ||
2440 | * %false if it was already idle. | ||
2441 | */ | ||
2442 | bool flush_work_sync(struct work_struct *work) | ||
2443 | { | ||
2444 | struct wq_barrier barr; | ||
2445 | bool pending, waited; | ||
2446 | |||
2447 | /* we'll wait for executions separately, queue barr only if pending */ | ||
2448 | pending = start_flush_work(work, &barr, false); | ||
2449 | |||
2450 | /* wait for executions to finish */ | ||
2451 | waited = wait_on_work(work); | ||
2452 | |||
2453 | /* wait for the pending one */ | ||
2454 | if (pending) { | ||
2455 | wait_for_completion(&barr.done); | ||
2456 | destroy_work_on_stack(&barr.work); | ||
2457 | } | ||
2458 | |||
2459 | return pending || waited; | ||
2460 | } | ||
2461 | EXPORT_SYMBOL_GPL(flush_work_sync); | ||
2462 | |||
2384 | /* | 2463 | /* |
2385 | * Upon a successful return (>= 0), the caller "owns" WORK_STRUCT_PENDING bit, | 2464 | * Upon a successful return (>= 0), the caller "owns" WORK_STRUCT_PENDING bit, |
2386 | * so this work can't be re-armed in any way. | 2465 | * so this work can't be re-armed in any way. |
@@ -2423,39 +2502,7 @@ static int try_to_grab_pending(struct work_struct *work) | |||
2423 | return ret; | 2502 | return ret; |
2424 | } | 2503 | } |
2425 | 2504 | ||
2426 | static void wait_on_cpu_work(struct global_cwq *gcwq, struct work_struct *work) | 2505 | static bool __cancel_work_timer(struct work_struct *work, |
2427 | { | ||
2428 | struct wq_barrier barr; | ||
2429 | struct worker *worker; | ||
2430 | |||
2431 | spin_lock_irq(&gcwq->lock); | ||
2432 | |||
2433 | worker = find_worker_executing_work(gcwq, work); | ||
2434 | if (unlikely(worker)) | ||
2435 | insert_wq_barrier(worker->current_cwq, &barr, work, worker); | ||
2436 | |||
2437 | spin_unlock_irq(&gcwq->lock); | ||
2438 | |||
2439 | if (unlikely(worker)) { | ||
2440 | wait_for_completion(&barr.done); | ||
2441 | destroy_work_on_stack(&barr.work); | ||
2442 | } | ||
2443 | } | ||
2444 | |||
2445 | static void wait_on_work(struct work_struct *work) | ||
2446 | { | ||
2447 | int cpu; | ||
2448 | |||
2449 | might_sleep(); | ||
2450 | |||
2451 | lock_map_acquire(&work->lockdep_map); | ||
2452 | lock_map_release(&work->lockdep_map); | ||
2453 | |||
2454 | for_each_gcwq_cpu(cpu) | ||
2455 | wait_on_cpu_work(get_gcwq(cpu), work); | ||
2456 | } | ||
2457 | |||
2458 | static int __cancel_work_timer(struct work_struct *work, | ||
2459 | struct timer_list* timer) | 2506 | struct timer_list* timer) |
2460 | { | 2507 | { |
2461 | int ret; | 2508 | int ret; |
@@ -2472,42 +2519,81 @@ static int __cancel_work_timer(struct work_struct *work, | |||
2472 | } | 2519 | } |
2473 | 2520 | ||
2474 | /** | 2521 | /** |
2475 | * cancel_work_sync - block until a work_struct's callback has terminated | 2522 | * cancel_work_sync - cancel a work and wait for it to finish |
2476 | * @work: the work which is to be flushed | 2523 | * @work: the work to cancel |
2477 | * | ||
2478 | * Returns true if @work was pending. | ||
2479 | * | 2524 | * |
2480 | * cancel_work_sync() will cancel the work if it is queued. If the work's | 2525 | * Cancel @work and wait for its execution to finish. This function |
2481 | * callback appears to be running, cancel_work_sync() will block until it | 2526 | * can be used even if the work re-queues itself or migrates to |
2482 | * has completed. | 2527 | * another workqueue. On return from this function, @work is |
2528 | * guaranteed to be not pending or executing on any CPU. | ||
2483 | * | 2529 | * |
2484 | * It is possible to use this function if the work re-queues itself. It can | 2530 | * cancel_work_sync(&delayed_work->work) must not be used for |
2485 | * cancel the work even if it migrates to another workqueue, however in that | 2531 | * delayed_work's. Use cancel_delayed_work_sync() instead. |
2486 | * case it only guarantees that work->func() has completed on the last queued | ||
2487 | * workqueue. | ||
2488 | * | 2532 | * |
2489 | * cancel_work_sync(&delayed_work->work) should be used only if ->timer is not | 2533 | * The caller must ensure that the workqueue on which @work was last |
2490 | * pending, otherwise it goes into a busy-wait loop until the timer expires. | ||
2491 | * | ||
2492 | * The caller must ensure that workqueue_struct on which this work was last | ||
2493 | * queued can't be destroyed before this function returns. | 2534 | * queued can't be destroyed before this function returns. |
2535 | * | ||
2536 | * RETURNS: | ||
2537 | * %true if @work was pending, %false otherwise. | ||
2494 | */ | 2538 | */ |
2495 | int cancel_work_sync(struct work_struct *work) | 2539 | bool cancel_work_sync(struct work_struct *work) |
2496 | { | 2540 | { |
2497 | return __cancel_work_timer(work, NULL); | 2541 | return __cancel_work_timer(work, NULL); |
2498 | } | 2542 | } |
2499 | EXPORT_SYMBOL_GPL(cancel_work_sync); | 2543 | EXPORT_SYMBOL_GPL(cancel_work_sync); |
2500 | 2544 | ||
2501 | /** | 2545 | /** |
2502 | * cancel_delayed_work_sync - reliably kill off a delayed work. | 2546 | * flush_delayed_work - wait for a dwork to finish executing the last queueing |
2503 | * @dwork: the delayed work struct | 2547 | * @dwork: the delayed work to flush |
2548 | * | ||
2549 | * Delayed timer is cancelled and the pending work is queued for | ||
2550 | * immediate execution. Like flush_work(), this function only | ||
2551 | * considers the last queueing instance of @dwork. | ||
2552 | * | ||
2553 | * RETURNS: | ||
2554 | * %true if flush_work() waited for the work to finish execution, | ||
2555 | * %false if it was already idle. | ||
2556 | */ | ||
2557 | bool flush_delayed_work(struct delayed_work *dwork) | ||
2558 | { | ||
2559 | if (del_timer_sync(&dwork->timer)) | ||
2560 | __queue_work(raw_smp_processor_id(), | ||
2561 | get_work_cwq(&dwork->work)->wq, &dwork->work); | ||
2562 | return flush_work(&dwork->work); | ||
2563 | } | ||
2564 | EXPORT_SYMBOL(flush_delayed_work); | ||
2565 | |||
2566 | /** | ||
2567 | * flush_delayed_work_sync - wait for a dwork to finish | ||
2568 | * @dwork: the delayed work to flush | ||
2504 | * | 2569 | * |
2505 | * Returns true if @dwork was pending. | 2570 | * Delayed timer is cancelled and the pending work is queued for |
2571 | * execution immediately. Other than timer handling, its behavior | ||
2572 | * is identical to flush_work_sync(). | ||
2506 | * | 2573 | * |
2507 | * It is possible to use this function if @dwork rearms itself via queue_work() | 2574 | * RETURNS: |
2508 | * or queue_delayed_work(). See also the comment for cancel_work_sync(). | 2575 | * %true if flush_work_sync() waited for the work to finish execution, |
2576 | * %false if it was already idle. | ||
2509 | */ | 2577 | */ |
2510 | int cancel_delayed_work_sync(struct delayed_work *dwork) | 2578 | bool flush_delayed_work_sync(struct delayed_work *dwork) |
2579 | { | ||
2580 | if (del_timer_sync(&dwork->timer)) | ||
2581 | __queue_work(raw_smp_processor_id(), | ||
2582 | get_work_cwq(&dwork->work)->wq, &dwork->work); | ||
2583 | return flush_work_sync(&dwork->work); | ||
2584 | } | ||
2585 | EXPORT_SYMBOL(flush_delayed_work_sync); | ||
2586 | |||
2587 | /** | ||
2588 | * cancel_delayed_work_sync - cancel a delayed work and wait for it to finish | ||
2589 | * @dwork: the delayed work cancel | ||
2590 | * | ||
2591 | * This is cancel_work_sync() for delayed works. | ||
2592 | * | ||
2593 | * RETURNS: | ||
2594 | * %true if @dwork was pending, %false otherwise. | ||
2595 | */ | ||
2596 | bool cancel_delayed_work_sync(struct delayed_work *dwork) | ||
2511 | { | 2597 | { |
2512 | return __cancel_work_timer(&dwork->work, &dwork->timer); | 2598 | return __cancel_work_timer(&dwork->work, &dwork->timer); |
2513 | } | 2599 | } |
@@ -2559,23 +2645,6 @@ int schedule_delayed_work(struct delayed_work *dwork, | |||
2559 | EXPORT_SYMBOL(schedule_delayed_work); | 2645 | EXPORT_SYMBOL(schedule_delayed_work); |
2560 | 2646 | ||
2561 | /** | 2647 | /** |
2562 | * flush_delayed_work - block until a dwork_struct's callback has terminated | ||
2563 | * @dwork: the delayed work which is to be flushed | ||
2564 | * | ||
2565 | * Any timeout is cancelled, and any pending work is run immediately. | ||
2566 | */ | ||
2567 | void flush_delayed_work(struct delayed_work *dwork) | ||
2568 | { | ||
2569 | if (del_timer_sync(&dwork->timer)) { | ||
2570 | __queue_work(get_cpu(), get_work_cwq(&dwork->work)->wq, | ||
2571 | &dwork->work); | ||
2572 | put_cpu(); | ||
2573 | } | ||
2574 | flush_work(&dwork->work); | ||
2575 | } | ||
2576 | EXPORT_SYMBOL(flush_delayed_work); | ||
2577 | |||
2578 | /** | ||
2579 | * schedule_delayed_work_on - queue work in global workqueue on CPU after delay | 2648 | * schedule_delayed_work_on - queue work in global workqueue on CPU after delay |
2580 | * @cpu: cpu to use | 2649 | * @cpu: cpu to use |
2581 | * @dwork: job to be done | 2650 | * @dwork: job to be done |
@@ -2592,13 +2661,15 @@ int schedule_delayed_work_on(int cpu, | |||
2592 | EXPORT_SYMBOL(schedule_delayed_work_on); | 2661 | EXPORT_SYMBOL(schedule_delayed_work_on); |
2593 | 2662 | ||
2594 | /** | 2663 | /** |
2595 | * schedule_on_each_cpu - call a function on each online CPU from keventd | 2664 | * schedule_on_each_cpu - execute a function synchronously on each online CPU |
2596 | * @func: the function to call | 2665 | * @func: the function to call |
2597 | * | 2666 | * |
2598 | * Returns zero on success. | 2667 | * schedule_on_each_cpu() executes @func on each online CPU using the |
2599 | * Returns -ve errno on failure. | 2668 | * system workqueue and blocks until all CPUs have completed. |
2600 | * | ||
2601 | * schedule_on_each_cpu() is very slow. | 2669 | * schedule_on_each_cpu() is very slow. |
2670 | * | ||
2671 | * RETURNS: | ||
2672 | * 0 on success, -errno on failure. | ||
2602 | */ | 2673 | */ |
2603 | int schedule_on_each_cpu(work_func_t func) | 2674 | int schedule_on_each_cpu(work_func_t func) |
2604 | { | 2675 | { |
@@ -2720,7 +2791,9 @@ static int alloc_cwqs(struct workqueue_struct *wq) | |||
2720 | } | 2791 | } |
2721 | } | 2792 | } |
2722 | 2793 | ||
2723 | /* just in case, make sure it's actually aligned */ | 2794 | /* just in case, make sure it's actually aligned |
2795 | * - this is affected by PERCPU() alignment in vmlinux.lds.S | ||
2796 | */ | ||
2724 | BUG_ON(!IS_ALIGNED(wq->cpu_wq.v, align)); | 2797 | BUG_ON(!IS_ALIGNED(wq->cpu_wq.v, align)); |
2725 | return wq->cpu_wq.v ? 0 : -ENOMEM; | 2798 | return wq->cpu_wq.v ? 0 : -ENOMEM; |
2726 | } | 2799 | } |
@@ -2764,6 +2837,13 @@ struct workqueue_struct *__alloc_workqueue_key(const char *name, | |||
2764 | unsigned int cpu; | 2837 | unsigned int cpu; |
2765 | 2838 | ||
2766 | /* | 2839 | /* |
2840 | * Workqueues which may be used during memory reclaim should | ||
2841 | * have a rescuer to guarantee forward progress. | ||
2842 | */ | ||
2843 | if (flags & WQ_MEM_RECLAIM) | ||
2844 | flags |= WQ_RESCUER; | ||
2845 | |||
2846 | /* | ||
2767 | * Unbound workqueues aren't concurrency managed and should be | 2847 | * Unbound workqueues aren't concurrency managed and should be |
2768 | * dispatched to workers immediately. | 2848 | * dispatched to workers immediately. |
2769 | */ | 2849 | */ |
@@ -3612,7 +3692,8 @@ static int __init init_workqueues(void) | |||
3612 | system_nrt_wq = alloc_workqueue("events_nrt", WQ_NON_REENTRANT, 0); | 3692 | system_nrt_wq = alloc_workqueue("events_nrt", WQ_NON_REENTRANT, 0); |
3613 | system_unbound_wq = alloc_workqueue("events_unbound", WQ_UNBOUND, | 3693 | system_unbound_wq = alloc_workqueue("events_unbound", WQ_UNBOUND, |
3614 | WQ_UNBOUND_MAX_ACTIVE); | 3694 | WQ_UNBOUND_MAX_ACTIVE); |
3615 | BUG_ON(!system_wq || !system_long_wq || !system_nrt_wq); | 3695 | BUG_ON(!system_wq || !system_long_wq || !system_nrt_wq || |
3696 | !system_unbound_wq); | ||
3616 | return 0; | 3697 | return 0; |
3617 | } | 3698 | } |
3618 | early_initcall(init_workqueues); | 3699 | early_initcall(init_workqueues); |