aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/audit.c10
-rw-r--r--kernel/audit.h1
-rw-r--r--kernel/auditfilter.c63
-rw-r--r--kernel/auditsc.c168
-rw-r--r--kernel/cpuset.c35
-rw-r--r--kernel/delayacct.c24
-rw-r--r--kernel/exit.c3
-rw-r--r--kernel/fork.c10
-rw-r--r--kernel/futex.c224
-rw-r--r--kernel/futex_compat.c34
-rw-r--r--kernel/hrtimer.c6
-rw-r--r--kernel/irq/handle.c5
-rw-r--r--kernel/irq/manage.c28
-rw-r--r--kernel/kprobes.c1
-rw-r--r--kernel/panic.c3
-rw-r--r--kernel/power/Kconfig6
-rw-r--r--kernel/power/process.c26
-rw-r--r--kernel/printk.c4
-rw-r--r--kernel/rcupdate.c4
-rw-r--r--kernel/resource.c9
-rw-r--r--kernel/rtmutex.c2
-rw-r--r--kernel/sched.c26
-rw-r--r--kernel/signal.c25
-rw-r--r--kernel/softirq.c22
-rw-r--r--kernel/softlockup.c4
-rw-r--r--kernel/spinlock.c2
-rw-r--r--kernel/stop_machine.c1
-rw-r--r--kernel/taskstats.c32
-rw-r--r--kernel/timer.c49
-rw-r--r--kernel/workqueue.c91
30 files changed, 602 insertions, 316 deletions
diff --git a/kernel/audit.c b/kernel/audit.c
index d417ca1db79b..963fd15c9621 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -690,9 +690,7 @@ static const struct inotify_operations audit_inotify_ops = {
690/* Initialize audit support at boot time. */ 690/* Initialize audit support at boot time. */
691static int __init audit_init(void) 691static int __init audit_init(void)
692{ 692{
693#ifdef CONFIG_AUDITSYSCALL
694 int i; 693 int i;
695#endif
696 694
697 printk(KERN_INFO "audit: initializing netlink socket (%s)\n", 695 printk(KERN_INFO "audit: initializing netlink socket (%s)\n",
698 audit_default ? "enabled" : "disabled"); 696 audit_default ? "enabled" : "disabled");
@@ -717,10 +715,10 @@ static int __init audit_init(void)
717 audit_ih = inotify_init(&audit_inotify_ops); 715 audit_ih = inotify_init(&audit_inotify_ops);
718 if (IS_ERR(audit_ih)) 716 if (IS_ERR(audit_ih))
719 audit_panic("cannot initialize inotify handle"); 717 audit_panic("cannot initialize inotify handle");
718#endif
720 719
721 for (i = 0; i < AUDIT_INODE_BUCKETS; i++) 720 for (i = 0; i < AUDIT_INODE_BUCKETS; i++)
722 INIT_LIST_HEAD(&audit_inode_hash[i]); 721 INIT_LIST_HEAD(&audit_inode_hash[i]);
723#endif
724 722
725 return 0; 723 return 0;
726} 724}
@@ -1030,6 +1028,9 @@ void audit_log_hex(struct audit_buffer *ab, const unsigned char *buf,
1030 struct sk_buff *skb; 1028 struct sk_buff *skb;
1031 static const unsigned char *hex = "0123456789ABCDEF"; 1029 static const unsigned char *hex = "0123456789ABCDEF";
1032 1030
1031 if (!ab)
1032 return;
1033
1033 BUG_ON(!ab->skb); 1034 BUG_ON(!ab->skb);
1034 skb = ab->skb; 1035 skb = ab->skb;
1035 avail = skb_tailroom(skb); 1036 avail = skb_tailroom(skb);
@@ -1062,6 +1063,9 @@ static void audit_log_n_string(struct audit_buffer *ab, size_t slen,
1062 unsigned char *ptr; 1063 unsigned char *ptr;
1063 struct sk_buff *skb; 1064 struct sk_buff *skb;
1064 1065
1066 if (!ab)
1067 return;
1068
1065 BUG_ON(!ab->skb); 1069 BUG_ON(!ab->skb);
1066 skb = ab->skb; 1070 skb = ab->skb;
1067 avail = skb_tailroom(skb); 1071 avail = skb_tailroom(skb);
diff --git a/kernel/audit.h b/kernel/audit.h
index 6aa33b848cf2..a3370232a390 100644
--- a/kernel/audit.h
+++ b/kernel/audit.h
@@ -104,6 +104,7 @@ static inline int audit_hash_ino(u32 ino)
104 return (ino & (AUDIT_INODE_BUCKETS-1)); 104 return (ino & (AUDIT_INODE_BUCKETS-1));
105} 105}
106 106
107extern int audit_match_class(int class, unsigned syscall);
107extern int audit_comparator(const u32 left, const u32 op, const u32 right); 108extern int audit_comparator(const u32 left, const u32 op, const u32 right);
108extern int audit_compare_dname_path(const char *dname, const char *path, 109extern int audit_compare_dname_path(const char *dname, const char *path,
109 int *dirlen); 110 int *dirlen);
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index 5b4e16276ca0..a44879b0c72f 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -302,6 +302,15 @@ int __init audit_register_class(int class, unsigned *list)
302 return 0; 302 return 0;
303} 303}
304 304
305int audit_match_class(int class, unsigned syscall)
306{
307 if (unlikely(syscall >= AUDIT_BITMASK_SIZE * sizeof(__u32)))
308 return 0;
309 if (unlikely(class >= AUDIT_SYSCALL_CLASSES || !classes[class]))
310 return 0;
311 return classes[class][AUDIT_WORD(syscall)] & AUDIT_BIT(syscall);
312}
313
305/* Common user-space to kernel rule translation. */ 314/* Common user-space to kernel rule translation. */
306static inline struct audit_entry *audit_to_entry_common(struct audit_rule *rule) 315static inline struct audit_entry *audit_to_entry_common(struct audit_rule *rule)
307{ 316{
@@ -404,6 +413,7 @@ static struct audit_entry *audit_rule_to_entry(struct audit_rule *rule)
404 case AUDIT_PERS: 413 case AUDIT_PERS:
405 case AUDIT_ARCH: 414 case AUDIT_ARCH:
406 case AUDIT_MSGTYPE: 415 case AUDIT_MSGTYPE:
416 case AUDIT_PPID:
407 case AUDIT_DEVMAJOR: 417 case AUDIT_DEVMAJOR:
408 case AUDIT_DEVMINOR: 418 case AUDIT_DEVMINOR:
409 case AUDIT_EXIT: 419 case AUDIT_EXIT:
@@ -413,6 +423,10 @@ static struct audit_entry *audit_rule_to_entry(struct audit_rule *rule)
413 case AUDIT_ARG2: 423 case AUDIT_ARG2:
414 case AUDIT_ARG3: 424 case AUDIT_ARG3:
415 break; 425 break;
426 case AUDIT_PERM:
427 if (f->val & ~15)
428 goto exit_free;
429 break;
416 case AUDIT_INODE: 430 case AUDIT_INODE:
417 err = audit_to_inode(&entry->rule, f); 431 err = audit_to_inode(&entry->rule, f);
418 if (err) 432 if (err)
@@ -442,6 +456,7 @@ static struct audit_entry *audit_rule_to_entry(struct audit_rule *rule)
442 case AUDIT_EQUAL: 456 case AUDIT_EQUAL:
443 break; 457 break;
444 default: 458 default:
459 err = -EINVAL;
445 goto exit_free; 460 goto exit_free;
446 } 461 }
447 } 462 }
@@ -566,6 +581,10 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data,
566 entry->rule.buflen += f->val; 581 entry->rule.buflen += f->val;
567 entry->rule.filterkey = str; 582 entry->rule.filterkey = str;
568 break; 583 break;
584 case AUDIT_PERM:
585 if (f->val & ~15)
586 goto exit_free;
587 break;
569 default: 588 default:
570 goto exit_free; 589 goto exit_free;
571 } 590 }
@@ -579,6 +598,7 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data,
579 case AUDIT_EQUAL: 598 case AUDIT_EQUAL:
580 break; 599 break;
581 default: 600 default:
601 err = -EINVAL;
582 goto exit_free; 602 goto exit_free;
583 } 603 }
584 } 604 }
@@ -911,7 +931,7 @@ static void audit_update_watch(struct audit_parent *parent,
911 } 931 }
912 932
913 ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE); 933 ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE);
914 audit_log_format(ab, "audit updated rules specifying watch="); 934 audit_log_format(ab, "audit updated rules specifying path=");
915 audit_log_untrustedstring(ab, owatch->path); 935 audit_log_untrustedstring(ab, owatch->path);
916 audit_log_format(ab, " with dev=%u ino=%lu\n", dev, ino); 936 audit_log_format(ab, " with dev=%u ino=%lu\n", dev, ino);
917 audit_log_end(ab); 937 audit_log_end(ab);
@@ -934,19 +954,28 @@ static void audit_remove_parent_watches(struct audit_parent *parent)
934 struct audit_watch *w, *nextw; 954 struct audit_watch *w, *nextw;
935 struct audit_krule *r, *nextr; 955 struct audit_krule *r, *nextr;
936 struct audit_entry *e; 956 struct audit_entry *e;
957 struct audit_buffer *ab;
937 958
938 mutex_lock(&audit_filter_mutex); 959 mutex_lock(&audit_filter_mutex);
939 parent->flags |= AUDIT_PARENT_INVALID; 960 parent->flags |= AUDIT_PARENT_INVALID;
940 list_for_each_entry_safe(w, nextw, &parent->watches, wlist) { 961 list_for_each_entry_safe(w, nextw, &parent->watches, wlist) {
941 list_for_each_entry_safe(r, nextr, &w->rules, rlist) { 962 list_for_each_entry_safe(r, nextr, &w->rules, rlist) {
942 e = container_of(r, struct audit_entry, rule); 963 e = container_of(r, struct audit_entry, rule);
964
965 ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE);
966 audit_log_format(ab, "audit implicitly removed rule path=");
967 audit_log_untrustedstring(ab, w->path);
968 if (r->filterkey) {
969 audit_log_format(ab, " key=");
970 audit_log_untrustedstring(ab, r->filterkey);
971 } else
972 audit_log_format(ab, " key=(null)");
973 audit_log_format(ab, " list=%d", r->listnr);
974 audit_log_end(ab);
975
943 list_del(&r->rlist); 976 list_del(&r->rlist);
944 list_del_rcu(&e->list); 977 list_del_rcu(&e->list);
945 call_rcu(&e->rcu, audit_free_rule_rcu); 978 call_rcu(&e->rcu, audit_free_rule_rcu);
946
947 audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE,
948 "audit implicitly removed rule from list=%d\n",
949 AUDIT_FILTER_EXIT);
950 } 979 }
951 audit_remove_watch(w); 980 audit_remove_watch(w);
952 } 981 }
@@ -1134,6 +1163,14 @@ static inline int audit_add_rule(struct audit_entry *entry,
1134 struct audit_watch *watch = entry->rule.watch; 1163 struct audit_watch *watch = entry->rule.watch;
1135 struct nameidata *ndp, *ndw; 1164 struct nameidata *ndp, *ndw;
1136 int h, err, putnd_needed = 0; 1165 int h, err, putnd_needed = 0;
1166#ifdef CONFIG_AUDITSYSCALL
1167 int dont_count = 0;
1168
1169 /* If either of these, don't count towards total */
1170 if (entry->rule.listnr == AUDIT_FILTER_USER ||
1171 entry->rule.listnr == AUDIT_FILTER_TYPE)
1172 dont_count = 1;
1173#endif
1137 1174
1138 if (inode_f) { 1175 if (inode_f) {
1139 h = audit_hash_ino(inode_f->val); 1176 h = audit_hash_ino(inode_f->val);
@@ -1174,6 +1211,10 @@ static inline int audit_add_rule(struct audit_entry *entry,
1174 } else { 1211 } else {
1175 list_add_tail_rcu(&entry->list, list); 1212 list_add_tail_rcu(&entry->list, list);
1176 } 1213 }
1214#ifdef CONFIG_AUDITSYSCALL
1215 if (!dont_count)
1216 audit_n_rules++;
1217#endif
1177 mutex_unlock(&audit_filter_mutex); 1218 mutex_unlock(&audit_filter_mutex);
1178 1219
1179 if (putnd_needed) 1220 if (putnd_needed)
@@ -1198,6 +1239,14 @@ static inline int audit_del_rule(struct audit_entry *entry,
1198 struct audit_watch *watch, *tmp_watch = entry->rule.watch; 1239 struct audit_watch *watch, *tmp_watch = entry->rule.watch;
1199 LIST_HEAD(inotify_list); 1240 LIST_HEAD(inotify_list);
1200 int h, ret = 0; 1241 int h, ret = 0;
1242#ifdef CONFIG_AUDITSYSCALL
1243 int dont_count = 0;
1244
1245 /* If either of these, don't count towards total */
1246 if (entry->rule.listnr == AUDIT_FILTER_USER ||
1247 entry->rule.listnr == AUDIT_FILTER_TYPE)
1248 dont_count = 1;
1249#endif
1201 1250
1202 if (inode_f) { 1251 if (inode_f) {
1203 h = audit_hash_ino(inode_f->val); 1252 h = audit_hash_ino(inode_f->val);
@@ -1235,6 +1284,10 @@ static inline int audit_del_rule(struct audit_entry *entry,
1235 list_del_rcu(&e->list); 1284 list_del_rcu(&e->list);
1236 call_rcu(&e->rcu, audit_free_rule_rcu); 1285 call_rcu(&e->rcu, audit_free_rule_rcu);
1237 1286
1287#ifdef CONFIG_AUDITSYSCALL
1288 if (!dont_count)
1289 audit_n_rules--;
1290#endif
1238 mutex_unlock(&audit_filter_mutex); 1291 mutex_unlock(&audit_filter_mutex);
1239 1292
1240 if (!list_empty(&inotify_list)) 1293 if (!list_empty(&inotify_list))
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index ae40ac8c39e7..1bd8827a0102 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -85,6 +85,9 @@ extern int audit_enabled;
85/* Indicates that audit should log the full pathname. */ 85/* Indicates that audit should log the full pathname. */
86#define AUDIT_NAME_FULL -1 86#define AUDIT_NAME_FULL -1
87 87
88/* number of audit rules */
89int audit_n_rules;
90
88/* When fs/namei.c:getname() is called, we store the pointer in name and 91/* When fs/namei.c:getname() is called, we store the pointer in name and
89 * we don't let putname() free it (instead we free all of the saved 92 * we don't let putname() free it (instead we free all of the saved
90 * pointers at syscall exit time). 93 * pointers at syscall exit time).
@@ -174,6 +177,7 @@ struct audit_aux_data_path {
174 177
175/* The per-task audit context. */ 178/* The per-task audit context. */
176struct audit_context { 179struct audit_context {
180 int dummy; /* must be the first element */
177 int in_syscall; /* 1 if task is in a syscall */ 181 int in_syscall; /* 1 if task is in a syscall */
178 enum audit_state state; 182 enum audit_state state;
179 unsigned int serial; /* serial number for record */ 183 unsigned int serial; /* serial number for record */
@@ -205,6 +209,54 @@ struct audit_context {
205#endif 209#endif
206}; 210};
207 211
212#define ACC_MODE(x) ("\004\002\006\006"[(x)&O_ACCMODE])
213static inline int open_arg(int flags, int mask)
214{
215 int n = ACC_MODE(flags);
216 if (flags & (O_TRUNC | O_CREAT))
217 n |= AUDIT_PERM_WRITE;
218 return n & mask;
219}
220
221static int audit_match_perm(struct audit_context *ctx, int mask)
222{
223 unsigned n = ctx->major;
224 switch (audit_classify_syscall(ctx->arch, n)) {
225 case 0: /* native */
226 if ((mask & AUDIT_PERM_WRITE) &&
227 audit_match_class(AUDIT_CLASS_WRITE, n))
228 return 1;
229 if ((mask & AUDIT_PERM_READ) &&
230 audit_match_class(AUDIT_CLASS_READ, n))
231 return 1;
232 if ((mask & AUDIT_PERM_ATTR) &&
233 audit_match_class(AUDIT_CLASS_CHATTR, n))
234 return 1;
235 return 0;
236 case 1: /* 32bit on biarch */
237 if ((mask & AUDIT_PERM_WRITE) &&
238 audit_match_class(AUDIT_CLASS_WRITE_32, n))
239 return 1;
240 if ((mask & AUDIT_PERM_READ) &&
241 audit_match_class(AUDIT_CLASS_READ_32, n))
242 return 1;
243 if ((mask & AUDIT_PERM_ATTR) &&
244 audit_match_class(AUDIT_CLASS_CHATTR_32, n))
245 return 1;
246 return 0;
247 case 2: /* open */
248 return mask & ACC_MODE(ctx->argv[1]);
249 case 3: /* openat */
250 return mask & ACC_MODE(ctx->argv[2]);
251 case 4: /* socketcall */
252 return ((mask & AUDIT_PERM_WRITE) && ctx->argv[0] == SYS_BIND);
253 case 5: /* execve */
254 return mask & AUDIT_PERM_EXEC;
255 default:
256 return 0;
257 }
258}
259
208/* Determine if any context name data matches a rule's watch data */ 260/* Determine if any context name data matches a rule's watch data */
209/* Compare a task_struct with an audit_rule. Return 1 on match, 0 261/* Compare a task_struct with an audit_rule. Return 1 on match, 0
210 * otherwise. */ 262 * otherwise. */
@@ -393,6 +445,9 @@ static int audit_filter_rules(struct task_struct *tsk,
393 /* ignore this field for filtering */ 445 /* ignore this field for filtering */
394 result = 1; 446 result = 1;
395 break; 447 break;
448 case AUDIT_PERM:
449 result = audit_match_perm(ctx, f->val);
450 break;
396 } 451 }
397 452
398 if (!result) 453 if (!result)
@@ -514,7 +569,7 @@ static inline struct audit_context *audit_get_context(struct task_struct *tsk,
514 context->return_valid = return_valid; 569 context->return_valid = return_valid;
515 context->return_code = return_code; 570 context->return_code = return_code;
516 571
517 if (context->in_syscall && !context->auditable) { 572 if (context->in_syscall && !context->dummy && !context->auditable) {
518 enum audit_state state; 573 enum audit_state state;
519 574
520 state = audit_filter_syscall(tsk, context, &audit_filter_list[AUDIT_FILTER_EXIT]); 575 state = audit_filter_syscall(tsk, context, &audit_filter_list[AUDIT_FILTER_EXIT]);
@@ -530,17 +585,7 @@ static inline struct audit_context *audit_get_context(struct task_struct *tsk,
530 } 585 }
531 586
532get_context: 587get_context:
533 context->pid = tsk->pid; 588
534 context->ppid = sys_getppid(); /* sic. tsk == current in all cases */
535 context->uid = tsk->uid;
536 context->gid = tsk->gid;
537 context->euid = tsk->euid;
538 context->suid = tsk->suid;
539 context->fsuid = tsk->fsuid;
540 context->egid = tsk->egid;
541 context->sgid = tsk->sgid;
542 context->fsgid = tsk->fsgid;
543 context->personality = tsk->personality;
544 tsk->audit_context = NULL; 589 tsk->audit_context = NULL;
545 return context; 590 return context;
546} 591}
@@ -749,6 +794,17 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
749 const char *tty; 794 const char *tty;
750 795
751 /* tsk == current */ 796 /* tsk == current */
797 context->pid = tsk->pid;
798 context->ppid = sys_getppid(); /* sic. tsk == current in all cases */
799 context->uid = tsk->uid;
800 context->gid = tsk->gid;
801 context->euid = tsk->euid;
802 context->suid = tsk->suid;
803 context->fsuid = tsk->fsuid;
804 context->egid = tsk->egid;
805 context->sgid = tsk->sgid;
806 context->fsgid = tsk->fsgid;
807 context->personality = tsk->personality;
752 808
753 ab = audit_log_start(context, GFP_KERNEL, AUDIT_SYSCALL); 809 ab = audit_log_start(context, GFP_KERNEL, AUDIT_SYSCALL);
754 if (!ab) 810 if (!ab)
@@ -1066,7 +1122,8 @@ void audit_syscall_entry(int arch, int major,
1066 context->argv[3] = a4; 1122 context->argv[3] = a4;
1067 1123
1068 state = context->state; 1124 state = context->state;
1069 if (state == AUDIT_SETUP_CONTEXT || state == AUDIT_BUILD_CONTEXT) 1125 context->dummy = !audit_n_rules;
1126 if (!context->dummy && (state == AUDIT_SETUP_CONTEXT || state == AUDIT_BUILD_CONTEXT))
1070 state = audit_filter_syscall(tsk, context, &audit_filter_list[AUDIT_FILTER_ENTRY]); 1127 state = audit_filter_syscall(tsk, context, &audit_filter_list[AUDIT_FILTER_ENTRY]);
1071 if (likely(state == AUDIT_DISABLED)) 1128 if (likely(state == AUDIT_DISABLED))
1072 return; 1129 return;
@@ -1199,14 +1256,18 @@ void audit_putname(const char *name)
1199#endif 1256#endif
1200} 1257}
1201 1258
1202static void audit_inode_context(int idx, const struct inode *inode) 1259/* Copy inode data into an audit_names. */
1260static void audit_copy_inode(struct audit_names *name, const struct inode *inode)
1203{ 1261{
1204 struct audit_context *context = current->audit_context; 1262 name->ino = inode->i_ino;
1205 1263 name->dev = inode->i_sb->s_dev;
1206 selinux_get_inode_sid(inode, &context->names[idx].osid); 1264 name->mode = inode->i_mode;
1265 name->uid = inode->i_uid;
1266 name->gid = inode->i_gid;
1267 name->rdev = inode->i_rdev;
1268 selinux_get_inode_sid(inode, &name->osid);
1207} 1269}
1208 1270
1209
1210/** 1271/**
1211 * audit_inode - store the inode and device from a lookup 1272 * audit_inode - store the inode and device from a lookup
1212 * @name: name being audited 1273 * @name: name being audited
@@ -1240,20 +1301,14 @@ void __audit_inode(const char *name, const struct inode *inode)
1240 ++context->ino_count; 1301 ++context->ino_count;
1241#endif 1302#endif
1242 } 1303 }
1243 context->names[idx].ino = inode->i_ino; 1304 audit_copy_inode(&context->names[idx], inode);
1244 context->names[idx].dev = inode->i_sb->s_dev;
1245 context->names[idx].mode = inode->i_mode;
1246 context->names[idx].uid = inode->i_uid;
1247 context->names[idx].gid = inode->i_gid;
1248 context->names[idx].rdev = inode->i_rdev;
1249 audit_inode_context(idx, inode);
1250} 1305}
1251 1306
1252/** 1307/**
1253 * audit_inode_child - collect inode info for created/removed objects 1308 * audit_inode_child - collect inode info for created/removed objects
1254 * @dname: inode's dentry name 1309 * @dname: inode's dentry name
1255 * @inode: inode being audited 1310 * @inode: inode being audited
1256 * @pino: inode number of dentry parent 1311 * @parent: inode of dentry parent
1257 * 1312 *
1258 * For syscalls that create or remove filesystem objects, audit_inode 1313 * For syscalls that create or remove filesystem objects, audit_inode
1259 * can only collect information for the filesystem object's parent. 1314 * can only collect information for the filesystem object's parent.
@@ -1264,7 +1319,7 @@ void __audit_inode(const char *name, const struct inode *inode)
1264 * unsuccessful attempts. 1319 * unsuccessful attempts.
1265 */ 1320 */
1266void __audit_inode_child(const char *dname, const struct inode *inode, 1321void __audit_inode_child(const char *dname, const struct inode *inode,
1267 unsigned long pino) 1322 const struct inode *parent)
1268{ 1323{
1269 int idx; 1324 int idx;
1270 struct audit_context *context = current->audit_context; 1325 struct audit_context *context = current->audit_context;
@@ -1278,7 +1333,7 @@ void __audit_inode_child(const char *dname, const struct inode *inode,
1278 if (!dname) 1333 if (!dname)
1279 goto update_context; 1334 goto update_context;
1280 for (idx = 0; idx < context->name_count; idx++) 1335 for (idx = 0; idx < context->name_count; idx++)
1281 if (context->names[idx].ino == pino) { 1336 if (context->names[idx].ino == parent->i_ino) {
1282 const char *name = context->names[idx].name; 1337 const char *name = context->names[idx].name;
1283 1338
1284 if (!name) 1339 if (!name)
@@ -1302,16 +1357,47 @@ update_context:
1302 context->names[idx].name_len = AUDIT_NAME_FULL; 1357 context->names[idx].name_len = AUDIT_NAME_FULL;
1303 context->names[idx].name_put = 0; /* don't call __putname() */ 1358 context->names[idx].name_put = 0; /* don't call __putname() */
1304 1359
1305 if (inode) { 1360 if (!inode)
1306 context->names[idx].ino = inode->i_ino; 1361 context->names[idx].ino = (unsigned long)-1;
1307 context->names[idx].dev = inode->i_sb->s_dev; 1362 else
1308 context->names[idx].mode = inode->i_mode; 1363 audit_copy_inode(&context->names[idx], inode);
1309 context->names[idx].uid = inode->i_uid; 1364
1310 context->names[idx].gid = inode->i_gid; 1365 /* A parent was not found in audit_names, so copy the inode data for the
1311 context->names[idx].rdev = inode->i_rdev; 1366 * provided parent. */
1312 audit_inode_context(idx, inode); 1367 if (!found_name) {
1313 } else 1368 idx = context->name_count++;
1314 context->names[idx].ino = (unsigned long)-1; 1369#if AUDIT_DEBUG
1370 context->ino_count++;
1371#endif
1372 audit_copy_inode(&context->names[idx], parent);
1373 }
1374}
1375
1376/**
1377 * audit_inode_update - update inode info for last collected name
1378 * @inode: inode being audited
1379 *
1380 * When open() is called on an existing object with the O_CREAT flag, the inode
1381 * data audit initially collects is incorrect. This additional hook ensures
1382 * audit has the inode data for the actual object to be opened.
1383 */
1384void __audit_inode_update(const struct inode *inode)
1385{
1386 struct audit_context *context = current->audit_context;
1387 int idx;
1388
1389 if (!context->in_syscall || !inode)
1390 return;
1391
1392 if (context->name_count == 0) {
1393 context->name_count++;
1394#if AUDIT_DEBUG
1395 context->ino_count++;
1396#endif
1397 }
1398 idx = context->name_count - 1;
1399
1400 audit_copy_inode(&context->names[idx], inode);
1315} 1401}
1316 1402
1317/** 1403/**
@@ -1642,7 +1728,7 @@ int audit_bprm(struct linux_binprm *bprm)
1642 unsigned long p, next; 1728 unsigned long p, next;
1643 void *to; 1729 void *to;
1644 1730
1645 if (likely(!audit_enabled || !context)) 1731 if (likely(!audit_enabled || !context || context->dummy))
1646 return 0; 1732 return 0;
1647 1733
1648 ax = kmalloc(sizeof(*ax) + PAGE_SIZE * MAX_ARG_PAGES - bprm->p, 1734 ax = kmalloc(sizeof(*ax) + PAGE_SIZE * MAX_ARG_PAGES - bprm->p,
@@ -1680,7 +1766,7 @@ int audit_socketcall(int nargs, unsigned long *args)
1680 struct audit_aux_data_socketcall *ax; 1766 struct audit_aux_data_socketcall *ax;
1681 struct audit_context *context = current->audit_context; 1767 struct audit_context *context = current->audit_context;
1682 1768
1683 if (likely(!context)) 1769 if (likely(!context || context->dummy))
1684 return 0; 1770 return 0;
1685 1771
1686 ax = kmalloc(sizeof(*ax) + nargs * sizeof(unsigned long), GFP_KERNEL); 1772 ax = kmalloc(sizeof(*ax) + nargs * sizeof(unsigned long), GFP_KERNEL);
@@ -1708,7 +1794,7 @@ int audit_sockaddr(int len, void *a)
1708 struct audit_aux_data_sockaddr *ax; 1794 struct audit_aux_data_sockaddr *ax;
1709 struct audit_context *context = current->audit_context; 1795 struct audit_context *context = current->audit_context;
1710 1796
1711 if (likely(!context)) 1797 if (likely(!context || context->dummy))
1712 return 0; 1798 return 0;
1713 1799
1714 ax = kmalloc(sizeof(*ax) + len, GFP_KERNEL); 1800 ax = kmalloc(sizeof(*ax) + len, GFP_KERNEL);
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 1a649f2bb9bb..4ea6f0dc2fc5 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -816,6 +816,10 @@ static int update_cpumask(struct cpuset *cs, char *buf)
816 struct cpuset trialcs; 816 struct cpuset trialcs;
817 int retval, cpus_unchanged; 817 int retval, cpus_unchanged;
818 818
819 /* top_cpuset.cpus_allowed tracks cpu_online_map; it's read-only */
820 if (cs == &top_cpuset)
821 return -EACCES;
822
819 trialcs = *cs; 823 trialcs = *cs;
820 retval = cpulist_parse(buf, trialcs.cpus_allowed); 824 retval = cpulist_parse(buf, trialcs.cpus_allowed);
821 if (retval < 0) 825 if (retval < 0)
@@ -2033,6 +2037,33 @@ out:
2033 return err; 2037 return err;
2034} 2038}
2035 2039
2040/*
2041 * The top_cpuset tracks what CPUs and Memory Nodes are online,
2042 * period. This is necessary in order to make cpusets transparent
2043 * (of no affect) on systems that are actively using CPU hotplug
2044 * but making no active use of cpusets.
2045 *
2046 * This handles CPU hotplug (cpuhp) events. If someday Memory
2047 * Nodes can be hotplugged (dynamically changing node_online_map)
2048 * then we should handle that too, perhaps in a similar way.
2049 */
2050
2051#ifdef CONFIG_HOTPLUG_CPU
2052static int cpuset_handle_cpuhp(struct notifier_block *nb,
2053 unsigned long phase, void *cpu)
2054{
2055 mutex_lock(&manage_mutex);
2056 mutex_lock(&callback_mutex);
2057
2058 top_cpuset.cpus_allowed = cpu_online_map;
2059
2060 mutex_unlock(&callback_mutex);
2061 mutex_unlock(&manage_mutex);
2062
2063 return 0;
2064}
2065#endif
2066
2036/** 2067/**
2037 * cpuset_init_smp - initialize cpus_allowed 2068 * cpuset_init_smp - initialize cpus_allowed
2038 * 2069 *
@@ -2043,6 +2074,8 @@ void __init cpuset_init_smp(void)
2043{ 2074{
2044 top_cpuset.cpus_allowed = cpu_online_map; 2075 top_cpuset.cpus_allowed = cpu_online_map;
2045 top_cpuset.mems_allowed = node_online_map; 2076 top_cpuset.mems_allowed = node_online_map;
2077
2078 hotcpu_notifier(cpuset_handle_cpuhp, 0);
2046} 2079}
2047 2080
2048/** 2081/**
@@ -2387,7 +2420,7 @@ EXPORT_SYMBOL_GPL(cpuset_mem_spread_node);
2387int cpuset_excl_nodes_overlap(const struct task_struct *p) 2420int cpuset_excl_nodes_overlap(const struct task_struct *p)
2388{ 2421{
2389 const struct cpuset *cs1, *cs2; /* my and p's cpuset ancestors */ 2422 const struct cpuset *cs1, *cs2; /* my and p's cpuset ancestors */
2390 int overlap = 0; /* do cpusets overlap? */ 2423 int overlap = 1; /* do cpusets overlap? */
2391 2424
2392 task_lock(current); 2425 task_lock(current);
2393 if (current->flags & PF_EXITING) { 2426 if (current->flags & PF_EXITING) {
diff --git a/kernel/delayacct.c b/kernel/delayacct.c
index f05392d64267..36752f124c6a 100644
--- a/kernel/delayacct.c
+++ b/kernel/delayacct.c
@@ -19,15 +19,15 @@
19#include <linux/sysctl.h> 19#include <linux/sysctl.h>
20#include <linux/delayacct.h> 20#include <linux/delayacct.h>
21 21
22int delayacct_on __read_mostly; /* Delay accounting turned on/off */ 22int delayacct_on __read_mostly = 1; /* Delay accounting turned on/off */
23kmem_cache_t *delayacct_cache; 23kmem_cache_t *delayacct_cache;
24 24
25static int __init delayacct_setup_enable(char *str) 25static int __init delayacct_setup_disable(char *str)
26{ 26{
27 delayacct_on = 1; 27 delayacct_on = 0;
28 return 1; 28 return 1;
29} 29}
30__setup("delayacct", delayacct_setup_enable); 30__setup("nodelayacct", delayacct_setup_disable);
31 31
32void delayacct_init(void) 32void delayacct_init(void)
33{ 33{
@@ -41,24 +41,11 @@ void delayacct_init(void)
41 41
42void __delayacct_tsk_init(struct task_struct *tsk) 42void __delayacct_tsk_init(struct task_struct *tsk)
43{ 43{
44 spin_lock_init(&tsk->delays_lock);
45 /* No need to acquire tsk->delays_lock for allocation here unless
46 __delayacct_tsk_init called after tsk is attached to tasklist
47 */
48 tsk->delays = kmem_cache_zalloc(delayacct_cache, SLAB_KERNEL); 44 tsk->delays = kmem_cache_zalloc(delayacct_cache, SLAB_KERNEL);
49 if (tsk->delays) 45 if (tsk->delays)
50 spin_lock_init(&tsk->delays->lock); 46 spin_lock_init(&tsk->delays->lock);
51} 47}
52 48
53void __delayacct_tsk_exit(struct task_struct *tsk)
54{
55 struct task_delay_info *delays = tsk->delays;
56 spin_lock(&tsk->delays_lock);
57 tsk->delays = NULL;
58 spin_unlock(&tsk->delays_lock);
59 kmem_cache_free(delayacct_cache, delays);
60}
61
62/* 49/*
63 * Start accounting for a delay statistic using 50 * Start accounting for a delay statistic using
64 * its starting timestamp (@start) 51 * its starting timestamp (@start)
@@ -118,8 +105,6 @@ int __delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk)
118 struct timespec ts; 105 struct timespec ts;
119 unsigned long t1,t2,t3; 106 unsigned long t1,t2,t3;
120 107
121 spin_lock(&tsk->delays_lock);
122
123 /* Though tsk->delays accessed later, early exit avoids 108 /* Though tsk->delays accessed later, early exit avoids
124 * unnecessary returning of other data 109 * unnecessary returning of other data
125 */ 110 */
@@ -161,7 +146,6 @@ int __delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk)
161 spin_unlock(&tsk->delays->lock); 146 spin_unlock(&tsk->delays->lock);
162 147
163done: 148done:
164 spin_unlock(&tsk->delays_lock);
165 return 0; 149 return 0;
166} 150}
167 151
diff --git a/kernel/exit.c b/kernel/exit.c
index dba194a8d416..d891883420f7 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -908,7 +908,6 @@ fastcall NORET_TYPE void do_exit(long code)
908 audit_free(tsk); 908 audit_free(tsk);
909 taskstats_exit_send(tsk, tidstats, group_dead, mycpu); 909 taskstats_exit_send(tsk, tidstats, group_dead, mycpu);
910 taskstats_exit_free(tidstats); 910 taskstats_exit_free(tidstats);
911 delayacct_tsk_exit(tsk);
912 911
913 exit_mm(tsk); 912 exit_mm(tsk);
914 913
@@ -1054,7 +1053,7 @@ static int eligible_child(pid_t pid, int options, struct task_struct *p)
1054 * Do not consider thread group leaders that are 1053 * Do not consider thread group leaders that are
1055 * in a non-empty thread group: 1054 * in a non-empty thread group:
1056 */ 1055 */
1057 if (current->tgid != p->tgid && delay_group_leader(p)) 1056 if (delay_group_leader(p))
1058 return 2; 1057 return 2;
1059 1058
1060 if (security_task_wait(p)) 1059 if (security_task_wait(p))
diff --git a/kernel/fork.c b/kernel/fork.c
index 1b0f7b1e0881..f9b014e3e700 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -117,6 +117,7 @@ void __put_task_struct(struct task_struct *tsk)
117 security_task_free(tsk); 117 security_task_free(tsk);
118 free_uid(tsk->user); 118 free_uid(tsk->user);
119 put_group_info(tsk->group_info); 119 put_group_info(tsk->group_info);
120 delayacct_tsk_free(tsk);
120 121
121 if (!profile_handoff_task(tsk)) 122 if (!profile_handoff_task(tsk))
122 free_task(tsk); 123 free_task(tsk);
@@ -1011,7 +1012,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1011 retval = -EFAULT; 1012 retval = -EFAULT;
1012 if (clone_flags & CLONE_PARENT_SETTID) 1013 if (clone_flags & CLONE_PARENT_SETTID)
1013 if (put_user(p->pid, parent_tidptr)) 1014 if (put_user(p->pid, parent_tidptr))
1014 goto bad_fork_cleanup; 1015 goto bad_fork_cleanup_delays_binfmt;
1015 1016
1016 INIT_LIST_HEAD(&p->children); 1017 INIT_LIST_HEAD(&p->children);
1017 INIT_LIST_HEAD(&p->sibling); 1018 INIT_LIST_HEAD(&p->sibling);
@@ -1277,7 +1278,8 @@ bad_fork_cleanup_policy:
1277bad_fork_cleanup_cpuset: 1278bad_fork_cleanup_cpuset:
1278#endif 1279#endif
1279 cpuset_exit(p); 1280 cpuset_exit(p);
1280bad_fork_cleanup: 1281bad_fork_cleanup_delays_binfmt:
1282 delayacct_tsk_free(p);
1281 if (p->binfmt) 1283 if (p->binfmt)
1282 module_put(p->binfmt->module); 1284 module_put(p->binfmt->module);
1283bad_fork_cleanup_put_domain: 1285bad_fork_cleanup_put_domain:
@@ -1387,8 +1389,10 @@ long do_fork(unsigned long clone_flags,
1387 1389
1388 if (clone_flags & CLONE_VFORK) { 1390 if (clone_flags & CLONE_VFORK) {
1389 wait_for_completion(&vfork); 1391 wait_for_completion(&vfork);
1390 if (unlikely (current->ptrace & PT_TRACE_VFORK_DONE)) 1392 if (unlikely (current->ptrace & PT_TRACE_VFORK_DONE)) {
1393 current->ptrace_message = nr;
1391 ptrace_notify ((PTRACE_EVENT_VFORK_DONE << 8) | SIGTRAP); 1394 ptrace_notify ((PTRACE_EVENT_VFORK_DONE << 8) | SIGTRAP);
1395 }
1392 } 1396 }
1393 } else { 1397 } else {
1394 free_pid(pid); 1398 free_pid(pid);
diff --git a/kernel/futex.c b/kernel/futex.c
index cf0c8e21d1ab..9d260e838cff 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -297,7 +297,7 @@ static int futex_handle_fault(unsigned long address, int attempt)
297 struct vm_area_struct * vma; 297 struct vm_area_struct * vma;
298 struct mm_struct *mm = current->mm; 298 struct mm_struct *mm = current->mm;
299 299
300 if (attempt >= 2 || !(vma = find_vma(mm, address)) || 300 if (attempt > 2 || !(vma = find_vma(mm, address)) ||
301 vma->vm_start > address || !(vma->vm_flags & VM_WRITE)) 301 vma->vm_start > address || !(vma->vm_flags & VM_WRITE))
302 return -EFAULT; 302 return -EFAULT;
303 303
@@ -397,7 +397,7 @@ static struct task_struct * futex_find_get_task(pid_t pid)
397 p = NULL; 397 p = NULL;
398 goto out_unlock; 398 goto out_unlock;
399 } 399 }
400 if (p->state == EXIT_ZOMBIE || p->exit_state == EXIT_ZOMBIE) { 400 if (p->exit_state != 0) {
401 p = NULL; 401 p = NULL;
402 goto out_unlock; 402 goto out_unlock;
403 } 403 }
@@ -415,15 +415,15 @@ out_unlock:
415 */ 415 */
416void exit_pi_state_list(struct task_struct *curr) 416void exit_pi_state_list(struct task_struct *curr)
417{ 417{
418 struct futex_hash_bucket *hb;
419 struct list_head *next, *head = &curr->pi_state_list; 418 struct list_head *next, *head = &curr->pi_state_list;
420 struct futex_pi_state *pi_state; 419 struct futex_pi_state *pi_state;
420 struct futex_hash_bucket *hb;
421 union futex_key key; 421 union futex_key key;
422 422
423 /* 423 /*
424 * We are a ZOMBIE and nobody can enqueue itself on 424 * We are a ZOMBIE and nobody can enqueue itself on
425 * pi_state_list anymore, but we have to be careful 425 * pi_state_list anymore, but we have to be careful
426 * versus waiters unqueueing themselfs 426 * versus waiters unqueueing themselves:
427 */ 427 */
428 spin_lock_irq(&curr->pi_lock); 428 spin_lock_irq(&curr->pi_lock);
429 while (!list_empty(head)) { 429 while (!list_empty(head)) {
@@ -431,21 +431,24 @@ void exit_pi_state_list(struct task_struct *curr)
431 next = head->next; 431 next = head->next;
432 pi_state = list_entry(next, struct futex_pi_state, list); 432 pi_state = list_entry(next, struct futex_pi_state, list);
433 key = pi_state->key; 433 key = pi_state->key;
434 hb = hash_futex(&key);
434 spin_unlock_irq(&curr->pi_lock); 435 spin_unlock_irq(&curr->pi_lock);
435 436
436 hb = hash_futex(&key);
437 spin_lock(&hb->lock); 437 spin_lock(&hb->lock);
438 438
439 spin_lock_irq(&curr->pi_lock); 439 spin_lock_irq(&curr->pi_lock);
440 /*
441 * We dropped the pi-lock, so re-check whether this
442 * task still owns the PI-state:
443 */
440 if (head->next != next) { 444 if (head->next != next) {
441 spin_unlock(&hb->lock); 445 spin_unlock(&hb->lock);
442 continue; 446 continue;
443 } 447 }
444 448
445 list_del_init(&pi_state->list);
446
447 WARN_ON(pi_state->owner != curr); 449 WARN_ON(pi_state->owner != curr);
448 450 WARN_ON(list_empty(&pi_state->list));
451 list_del_init(&pi_state->list);
449 pi_state->owner = NULL; 452 pi_state->owner = NULL;
450 spin_unlock_irq(&curr->pi_lock); 453 spin_unlock_irq(&curr->pi_lock);
451 454
@@ -470,7 +473,7 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, struct futex_q *me)
470 head = &hb->chain; 473 head = &hb->chain;
471 474
472 list_for_each_entry_safe(this, next, head, list) { 475 list_for_each_entry_safe(this, next, head, list) {
473 if (match_futex (&this->key, &me->key)) { 476 if (match_futex(&this->key, &me->key)) {
474 /* 477 /*
475 * Another waiter already exists - bump up 478 * Another waiter already exists - bump up
476 * the refcount and return its pi_state: 479 * the refcount and return its pi_state:
@@ -482,6 +485,8 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, struct futex_q *me)
482 if (unlikely(!pi_state)) 485 if (unlikely(!pi_state))
483 return -EINVAL; 486 return -EINVAL;
484 487
488 WARN_ON(!atomic_read(&pi_state->refcount));
489
485 atomic_inc(&pi_state->refcount); 490 atomic_inc(&pi_state->refcount);
486 me->pi_state = pi_state; 491 me->pi_state = pi_state;
487 492
@@ -490,10 +495,13 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, struct futex_q *me)
490 } 495 }
491 496
492 /* 497 /*
493 * We are the first waiter - try to look up the real owner and 498 * We are the first waiter - try to look up the real owner and attach
494 * attach the new pi_state to it: 499 * the new pi_state to it, but bail out when the owner died bit is set
500 * and TID = 0:
495 */ 501 */
496 pid = uval & FUTEX_TID_MASK; 502 pid = uval & FUTEX_TID_MASK;
503 if (!pid && (uval & FUTEX_OWNER_DIED))
504 return -ESRCH;
497 p = futex_find_get_task(pid); 505 p = futex_find_get_task(pid);
498 if (!p) 506 if (!p)
499 return -ESRCH; 507 return -ESRCH;
@@ -510,6 +518,7 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, struct futex_q *me)
510 pi_state->key = me->key; 518 pi_state->key = me->key;
511 519
512 spin_lock_irq(&p->pi_lock); 520 spin_lock_irq(&p->pi_lock);
521 WARN_ON(!list_empty(&pi_state->list));
513 list_add(&pi_state->list, &p->pi_state_list); 522 list_add(&pi_state->list, &p->pi_state_list);
514 pi_state->owner = p; 523 pi_state->owner = p;
515 spin_unlock_irq(&p->pi_lock); 524 spin_unlock_irq(&p->pi_lock);
@@ -573,20 +582,29 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this)
573 * kept enabled while there is PI state around. We must also 582 * kept enabled while there is PI state around. We must also
574 * preserve the owner died bit.) 583 * preserve the owner died bit.)
575 */ 584 */
576 newval = (uval & FUTEX_OWNER_DIED) | FUTEX_WAITERS | new_owner->pid; 585 if (!(uval & FUTEX_OWNER_DIED)) {
586 newval = FUTEX_WAITERS | new_owner->pid;
577 587
578 inc_preempt_count(); 588 inc_preempt_count();
579 curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval); 589 curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval);
580 dec_preempt_count(); 590 dec_preempt_count();
591 if (curval == -EFAULT)
592 return -EFAULT;
593 if (curval != uval)
594 return -EINVAL;
595 }
581 596
582 if (curval == -EFAULT) 597 spin_lock_irq(&pi_state->owner->pi_lock);
583 return -EFAULT; 598 WARN_ON(list_empty(&pi_state->list));
584 if (curval != uval) 599 list_del_init(&pi_state->list);
585 return -EINVAL; 600 spin_unlock_irq(&pi_state->owner->pi_lock);
586 601
587 list_del_init(&pi_state->owner->pi_state_list); 602 spin_lock_irq(&new_owner->pi_lock);
603 WARN_ON(!list_empty(&pi_state->list));
588 list_add(&pi_state->list, &new_owner->pi_state_list); 604 list_add(&pi_state->list, &new_owner->pi_state_list);
589 pi_state->owner = new_owner; 605 pi_state->owner = new_owner;
606 spin_unlock_irq(&new_owner->pi_lock);
607
590 rt_mutex_unlock(&pi_state->pi_mutex); 608 rt_mutex_unlock(&pi_state->pi_mutex);
591 609
592 return 0; 610 return 0;
@@ -729,8 +747,10 @@ retry:
729 */ 747 */
730 if (attempt++) { 748 if (attempt++) {
731 if (futex_handle_fault((unsigned long)uaddr2, 749 if (futex_handle_fault((unsigned long)uaddr2,
732 attempt)) 750 attempt)) {
751 ret = -EFAULT;
733 goto out; 752 goto out;
753 }
734 goto retry; 754 goto retry;
735 } 755 }
736 756
@@ -930,6 +950,7 @@ static int unqueue_me(struct futex_q *q)
930 /* In the common case we don't take the spinlock, which is nice. */ 950 /* In the common case we don't take the spinlock, which is nice. */
931 retry: 951 retry:
932 lock_ptr = q->lock_ptr; 952 lock_ptr = q->lock_ptr;
953 barrier();
933 if (lock_ptr != 0) { 954 if (lock_ptr != 0) {
934 spin_lock(lock_ptr); 955 spin_lock(lock_ptr);
935 /* 956 /*
@@ -1099,9 +1120,10 @@ static int futex_wait(u32 __user *uaddr, u32 val, unsigned long time)
1099 * if there are waiters then it will block, it does PI, etc. (Due to 1120 * if there are waiters then it will block, it does PI, etc. (Due to
1100 * races the kernel might see a 0 value of the futex too.) 1121 * races the kernel might see a 0 value of the futex too.)
1101 */ 1122 */
1102static int do_futex_lock_pi(u32 __user *uaddr, int detect, int trylock, 1123static int futex_lock_pi(u32 __user *uaddr, int detect, unsigned long sec,
1103 struct hrtimer_sleeper *to) 1124 long nsec, int trylock)
1104{ 1125{
1126 struct hrtimer_sleeper timeout, *to = NULL;
1105 struct task_struct *curr = current; 1127 struct task_struct *curr = current;
1106 struct futex_hash_bucket *hb; 1128 struct futex_hash_bucket *hb;
1107 u32 uval, newval, curval; 1129 u32 uval, newval, curval;
@@ -1111,6 +1133,13 @@ static int do_futex_lock_pi(u32 __user *uaddr, int detect, int trylock,
1111 if (refill_pi_state_cache()) 1133 if (refill_pi_state_cache())
1112 return -ENOMEM; 1134 return -ENOMEM;
1113 1135
1136 if (sec != MAX_SCHEDULE_TIMEOUT) {
1137 to = &timeout;
1138 hrtimer_init(&to->timer, CLOCK_REALTIME, HRTIMER_ABS);
1139 hrtimer_init_sleeper(to, current);
1140 to->timer.expires = ktime_set(sec, nsec);
1141 }
1142
1114 q.pi_state = NULL; 1143 q.pi_state = NULL;
1115 retry: 1144 retry:
1116 down_read(&curr->mm->mmap_sem); 1145 down_read(&curr->mm->mmap_sem);
@@ -1236,6 +1265,7 @@ static int do_futex_lock_pi(u32 __user *uaddr, int detect, int trylock,
1236 /* Owner died? */ 1265 /* Owner died? */
1237 if (q.pi_state->owner != NULL) { 1266 if (q.pi_state->owner != NULL) {
1238 spin_lock_irq(&q.pi_state->owner->pi_lock); 1267 spin_lock_irq(&q.pi_state->owner->pi_lock);
1268 WARN_ON(list_empty(&q.pi_state->list));
1239 list_del_init(&q.pi_state->list); 1269 list_del_init(&q.pi_state->list);
1240 spin_unlock_irq(&q.pi_state->owner->pi_lock); 1270 spin_unlock_irq(&q.pi_state->owner->pi_lock);
1241 } else 1271 } else
@@ -1244,6 +1274,7 @@ static int do_futex_lock_pi(u32 __user *uaddr, int detect, int trylock,
1244 q.pi_state->owner = current; 1274 q.pi_state->owner = current;
1245 1275
1246 spin_lock_irq(&current->pi_lock); 1276 spin_lock_irq(&current->pi_lock);
1277 WARN_ON(!list_empty(&q.pi_state->list));
1247 list_add(&q.pi_state->list, &current->pi_state_list); 1278 list_add(&q.pi_state->list, &current->pi_state_list);
1248 spin_unlock_irq(&current->pi_lock); 1279 spin_unlock_irq(&current->pi_lock);
1249 1280
@@ -1284,7 +1315,7 @@ static int do_futex_lock_pi(u32 __user *uaddr, int detect, int trylock,
1284 if (!detect && ret == -EDEADLK && 0) 1315 if (!detect && ret == -EDEADLK && 0)
1285 force_sig(SIGKILL, current); 1316 force_sig(SIGKILL, current);
1286 1317
1287 return ret; 1318 return ret != -EINTR ? ret : -ERESTARTNOINTR;
1288 1319
1289 out_unlock_release_sem: 1320 out_unlock_release_sem:
1290 queue_unlock(&q, hb); 1321 queue_unlock(&q, hb);
@@ -1301,9 +1332,10 @@ static int do_futex_lock_pi(u32 __user *uaddr, int detect, int trylock,
1301 * still holding the mmap_sem. 1332 * still holding the mmap_sem.
1302 */ 1333 */
1303 if (attempt++) { 1334 if (attempt++) {
1304 if (futex_handle_fault((unsigned long)uaddr, attempt)) 1335 if (futex_handle_fault((unsigned long)uaddr, attempt)) {
1336 ret = -EFAULT;
1305 goto out_unlock_release_sem; 1337 goto out_unlock_release_sem;
1306 1338 }
1307 goto retry_locked; 1339 goto retry_locked;
1308 } 1340 }
1309 1341
@@ -1318,76 +1350,6 @@ static int do_futex_lock_pi(u32 __user *uaddr, int detect, int trylock,
1318} 1350}
1319 1351
1320/* 1352/*
1321 * Restart handler
1322 */
1323static long futex_lock_pi_restart(struct restart_block *restart)
1324{
1325 struct hrtimer_sleeper timeout, *to = NULL;
1326 int ret;
1327
1328 restart->fn = do_no_restart_syscall;
1329
1330 if (restart->arg2 || restart->arg3) {
1331 to = &timeout;
1332 hrtimer_init(&to->timer, CLOCK_REALTIME, HRTIMER_ABS);
1333 hrtimer_init_sleeper(to, current);
1334 to->timer.expires.tv64 = ((u64)restart->arg1 << 32) |
1335 (u64) restart->arg0;
1336 }
1337
1338 pr_debug("lock_pi restart: %p, %d (%d)\n",
1339 (u32 __user *)restart->arg0, current->pid);
1340
1341 ret = do_futex_lock_pi((u32 __user *)restart->arg0, restart->arg1,
1342 0, to);
1343
1344 if (ret != -EINTR)
1345 return ret;
1346
1347 restart->fn = futex_lock_pi_restart;
1348
1349 /* The other values are filled in */
1350 return -ERESTART_RESTARTBLOCK;
1351}
1352
1353/*
1354 * Called from the syscall entry below.
1355 */
1356static int futex_lock_pi(u32 __user *uaddr, int detect, unsigned long sec,
1357 long nsec, int trylock)
1358{
1359 struct hrtimer_sleeper timeout, *to = NULL;
1360 struct restart_block *restart;
1361 int ret;
1362
1363 if (sec != MAX_SCHEDULE_TIMEOUT) {
1364 to = &timeout;
1365 hrtimer_init(&to->timer, CLOCK_REALTIME, HRTIMER_ABS);
1366 hrtimer_init_sleeper(to, current);
1367 to->timer.expires = ktime_set(sec, nsec);
1368 }
1369
1370 ret = do_futex_lock_pi(uaddr, detect, trylock, to);
1371
1372 if (ret != -EINTR)
1373 return ret;
1374
1375 pr_debug("lock_pi interrupted: %p, %d (%d)\n", uaddr, current->pid);
1376
1377 restart = &current_thread_info()->restart_block;
1378 restart->fn = futex_lock_pi_restart;
1379 restart->arg0 = (unsigned long) uaddr;
1380 restart->arg1 = detect;
1381 if (to) {
1382 restart->arg2 = to->timer.expires.tv64 & 0xFFFFFFFF;
1383 restart->arg3 = to->timer.expires.tv64 >> 32;
1384 } else
1385 restart->arg2 = restart->arg3 = 0;
1386
1387 return -ERESTART_RESTARTBLOCK;
1388}
1389
1390/*
1391 * Userspace attempted a TID -> 0 atomic transition, and failed. 1353 * Userspace attempted a TID -> 0 atomic transition, and failed.
1392 * This is the in-kernel slowpath: we look up the PI state (if any), 1354 * This is the in-kernel slowpath: we look up the PI state (if any),
1393 * and do the rt-mutex unlock. 1355 * and do the rt-mutex unlock.
@@ -1427,9 +1389,11 @@ retry_locked:
1427 * again. If it succeeds then we can return without waking 1389 * again. If it succeeds then we can return without waking
1428 * anyone else up: 1390 * anyone else up:
1429 */ 1391 */
1430 inc_preempt_count(); 1392 if (!(uval & FUTEX_OWNER_DIED)) {
1431 uval = futex_atomic_cmpxchg_inatomic(uaddr, current->pid, 0); 1393 inc_preempt_count();
1432 dec_preempt_count(); 1394 uval = futex_atomic_cmpxchg_inatomic(uaddr, current->pid, 0);
1395 dec_preempt_count();
1396 }
1433 1397
1434 if (unlikely(uval == -EFAULT)) 1398 if (unlikely(uval == -EFAULT))
1435 goto pi_faulted; 1399 goto pi_faulted;
@@ -1462,9 +1426,11 @@ retry_locked:
1462 /* 1426 /*
1463 * No waiters - kernel unlocks the futex: 1427 * No waiters - kernel unlocks the futex:
1464 */ 1428 */
1465 ret = unlock_futex_pi(uaddr, uval); 1429 if (!(uval & FUTEX_OWNER_DIED)) {
1466 if (ret == -EFAULT) 1430 ret = unlock_futex_pi(uaddr, uval);
1467 goto pi_faulted; 1431 if (ret == -EFAULT)
1432 goto pi_faulted;
1433 }
1468 1434
1469out_unlock: 1435out_unlock:
1470 spin_unlock(&hb->lock); 1436 spin_unlock(&hb->lock);
@@ -1481,9 +1447,10 @@ pi_faulted:
1481 * still holding the mmap_sem. 1447 * still holding the mmap_sem.
1482 */ 1448 */
1483 if (attempt++) { 1449 if (attempt++) {
1484 if (futex_handle_fault((unsigned long)uaddr, attempt)) 1450 if (futex_handle_fault((unsigned long)uaddr, attempt)) {
1451 ret = -EFAULT;
1485 goto out_unlock; 1452 goto out_unlock;
1486 1453 }
1487 goto retry_locked; 1454 goto retry_locked;
1488 } 1455 }
1489 1456
@@ -1683,9 +1650,9 @@ err_unlock:
1683 * Process a futex-list entry, check whether it's owned by the 1650 * Process a futex-list entry, check whether it's owned by the
1684 * dying task, and do notification if so: 1651 * dying task, and do notification if so:
1685 */ 1652 */
1686int handle_futex_death(u32 __user *uaddr, struct task_struct *curr) 1653int handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int pi)
1687{ 1654{
1688 u32 uval, nval; 1655 u32 uval, nval, mval;
1689 1656
1690retry: 1657retry:
1691 if (get_user(uval, uaddr)) 1658 if (get_user(uval, uaddr))
@@ -1702,21 +1669,45 @@ retry:
1702 * thread-death.) The rest of the cleanup is done in 1669 * thread-death.) The rest of the cleanup is done in
1703 * userspace. 1670 * userspace.
1704 */ 1671 */
1705 nval = futex_atomic_cmpxchg_inatomic(uaddr, uval, 1672 mval = (uval & FUTEX_WAITERS) | FUTEX_OWNER_DIED;
1706 uval | FUTEX_OWNER_DIED); 1673 nval = futex_atomic_cmpxchg_inatomic(uaddr, uval, mval);
1674
1707 if (nval == -EFAULT) 1675 if (nval == -EFAULT)
1708 return -1; 1676 return -1;
1709 1677
1710 if (nval != uval) 1678 if (nval != uval)
1711 goto retry; 1679 goto retry;
1712 1680
1713 if (uval & FUTEX_WAITERS) 1681 /*
1714 futex_wake(uaddr, 1); 1682 * Wake robust non-PI futexes here. The wakeup of
1683 * PI futexes happens in exit_pi_state():
1684 */
1685 if (!pi) {
1686 if (uval & FUTEX_WAITERS)
1687 futex_wake(uaddr, 1);
1688 }
1715 } 1689 }
1716 return 0; 1690 return 0;
1717} 1691}
1718 1692
1719/* 1693/*
1694 * Fetch a robust-list pointer. Bit 0 signals PI futexes:
1695 */
1696static inline int fetch_robust_entry(struct robust_list __user **entry,
1697 struct robust_list __user **head, int *pi)
1698{
1699 unsigned long uentry;
1700
1701 if (get_user(uentry, (unsigned long *)head))
1702 return -EFAULT;
1703
1704 *entry = (void *)(uentry & ~1UL);
1705 *pi = uentry & 1;
1706
1707 return 0;
1708}
1709
1710/*
1720 * Walk curr->robust_list (very carefully, it's a userspace list!) 1711 * Walk curr->robust_list (very carefully, it's a userspace list!)
1721 * and mark any locks found there dead, and notify any waiters. 1712 * and mark any locks found there dead, and notify any waiters.
1722 * 1713 *
@@ -1726,14 +1717,14 @@ void exit_robust_list(struct task_struct *curr)
1726{ 1717{
1727 struct robust_list_head __user *head = curr->robust_list; 1718 struct robust_list_head __user *head = curr->robust_list;
1728 struct robust_list __user *entry, *pending; 1719 struct robust_list __user *entry, *pending;
1729 unsigned int limit = ROBUST_LIST_LIMIT; 1720 unsigned int limit = ROBUST_LIST_LIMIT, pi, pip;
1730 unsigned long futex_offset; 1721 unsigned long futex_offset;
1731 1722
1732 /* 1723 /*
1733 * Fetch the list head (which was registered earlier, via 1724 * Fetch the list head (which was registered earlier, via
1734 * sys_set_robust_list()): 1725 * sys_set_robust_list()):
1735 */ 1726 */
1736 if (get_user(entry, &head->list.next)) 1727 if (fetch_robust_entry(&entry, &head->list.next, &pi))
1737 return; 1728 return;
1738 /* 1729 /*
1739 * Fetch the relative futex offset: 1730 * Fetch the relative futex offset:
@@ -1744,10 +1735,11 @@ void exit_robust_list(struct task_struct *curr)
1744 * Fetch any possibly pending lock-add first, and handle it 1735 * Fetch any possibly pending lock-add first, and handle it
1745 * if it exists: 1736 * if it exists:
1746 */ 1737 */
1747 if (get_user(pending, &head->list_op_pending)) 1738 if (fetch_robust_entry(&pending, &head->list_op_pending, &pip))
1748 return; 1739 return;
1740
1749 if (pending) 1741 if (pending)
1750 handle_futex_death((void *)pending + futex_offset, curr); 1742 handle_futex_death((void *)pending + futex_offset, curr, pip);
1751 1743
1752 while (entry != &head->list) { 1744 while (entry != &head->list) {
1753 /* 1745 /*
@@ -1756,12 +1748,12 @@ void exit_robust_list(struct task_struct *curr)
1756 */ 1748 */
1757 if (entry != pending) 1749 if (entry != pending)
1758 if (handle_futex_death((void *)entry + futex_offset, 1750 if (handle_futex_death((void *)entry + futex_offset,
1759 curr)) 1751 curr, pi))
1760 return; 1752 return;
1761 /* 1753 /*
1762 * Fetch the next entry in the list: 1754 * Fetch the next entry in the list:
1763 */ 1755 */
1764 if (get_user(entry, &entry->next)) 1756 if (fetch_robust_entry(&entry, &entry->next, &pi))
1765 return; 1757 return;
1766 /* 1758 /*
1767 * Avoid excessively long or circular lists: 1759 * Avoid excessively long or circular lists:
diff --git a/kernel/futex_compat.c b/kernel/futex_compat.c
index d1d92b441fb7..c5cca3f65cb7 100644
--- a/kernel/futex_compat.c
+++ b/kernel/futex_compat.c
@@ -12,6 +12,23 @@
12 12
13#include <asm/uaccess.h> 13#include <asm/uaccess.h>
14 14
15
16/*
17 * Fetch a robust-list pointer. Bit 0 signals PI futexes:
18 */
19static inline int
20fetch_robust_entry(compat_uptr_t *uentry, struct robust_list __user **entry,
21 compat_uptr_t *head, int *pi)
22{
23 if (get_user(*uentry, head))
24 return -EFAULT;
25
26 *entry = compat_ptr((*uentry) & ~1);
27 *pi = (unsigned int)(*uentry) & 1;
28
29 return 0;
30}
31
15/* 32/*
16 * Walk curr->robust_list (very carefully, it's a userspace list!) 33 * Walk curr->robust_list (very carefully, it's a userspace list!)
17 * and mark any locks found there dead, and notify any waiters. 34 * and mark any locks found there dead, and notify any waiters.
@@ -22,17 +39,16 @@ void compat_exit_robust_list(struct task_struct *curr)
22{ 39{
23 struct compat_robust_list_head __user *head = curr->compat_robust_list; 40 struct compat_robust_list_head __user *head = curr->compat_robust_list;
24 struct robust_list __user *entry, *pending; 41 struct robust_list __user *entry, *pending;
42 unsigned int limit = ROBUST_LIST_LIMIT, pi, pip;
25 compat_uptr_t uentry, upending; 43 compat_uptr_t uentry, upending;
26 unsigned int limit = ROBUST_LIST_LIMIT;
27 compat_long_t futex_offset; 44 compat_long_t futex_offset;
28 45
29 /* 46 /*
30 * Fetch the list head (which was registered earlier, via 47 * Fetch the list head (which was registered earlier, via
31 * sys_set_robust_list()): 48 * sys_set_robust_list()):
32 */ 49 */
33 if (get_user(uentry, &head->list.next)) 50 if (fetch_robust_entry(&uentry, &entry, &head->list.next, &pi))
34 return; 51 return;
35 entry = compat_ptr(uentry);
36 /* 52 /*
37 * Fetch the relative futex offset: 53 * Fetch the relative futex offset:
38 */ 54 */
@@ -42,11 +58,11 @@ void compat_exit_robust_list(struct task_struct *curr)
42 * Fetch any possibly pending lock-add first, and handle it 58 * Fetch any possibly pending lock-add first, and handle it
43 * if it exists: 59 * if it exists:
44 */ 60 */
45 if (get_user(upending, &head->list_op_pending)) 61 if (fetch_robust_entry(&upending, &pending,
62 &head->list_op_pending, &pip))
46 return; 63 return;
47 pending = compat_ptr(upending);
48 if (upending) 64 if (upending)
49 handle_futex_death((void *)pending + futex_offset, curr); 65 handle_futex_death((void *)pending + futex_offset, curr, pip);
50 66
51 while (compat_ptr(uentry) != &head->list) { 67 while (compat_ptr(uentry) != &head->list) {
52 /* 68 /*
@@ -55,15 +71,15 @@ void compat_exit_robust_list(struct task_struct *curr)
55 */ 71 */
56 if (entry != pending) 72 if (entry != pending)
57 if (handle_futex_death((void *)entry + futex_offset, 73 if (handle_futex_death((void *)entry + futex_offset,
58 curr)) 74 curr, pi))
59 return; 75 return;
60 76
61 /* 77 /*
62 * Fetch the next entry in the list: 78 * Fetch the next entry in the list:
63 */ 79 */
64 if (get_user(uentry, (compat_uptr_t *)&entry->next)) 80 if (fetch_robust_entry(&uentry, &entry,
81 (compat_uptr_t *)&entry->next, &pi))
65 return; 82 return;
66 entry = compat_ptr(uentry);
67 /* 83 /*
68 * Avoid excessively long or circular lists: 84 * Avoid excessively long or circular lists:
69 */ 85 */
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index d17766d40dab..21c38a7e666b 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -187,7 +187,7 @@ switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_base *base)
187{ 187{
188 struct hrtimer_base *new_base; 188 struct hrtimer_base *new_base;
189 189
190 new_base = &__get_cpu_var(hrtimer_bases[base->index]); 190 new_base = &__get_cpu_var(hrtimer_bases)[base->index];
191 191
192 if (base != new_base) { 192 if (base != new_base) {
193 /* 193 /*
@@ -835,7 +835,7 @@ static void migrate_hrtimers(int cpu)
835} 835}
836#endif /* CONFIG_HOTPLUG_CPU */ 836#endif /* CONFIG_HOTPLUG_CPU */
837 837
838static int __devinit hrtimer_cpu_notify(struct notifier_block *self, 838static int __cpuinit hrtimer_cpu_notify(struct notifier_block *self,
839 unsigned long action, void *hcpu) 839 unsigned long action, void *hcpu)
840{ 840{
841 long cpu = (long)hcpu; 841 long cpu = (long)hcpu;
@@ -859,7 +859,7 @@ static int __devinit hrtimer_cpu_notify(struct notifier_block *self,
859 return NOTIFY_OK; 859 return NOTIFY_OK;
860} 860}
861 861
862static struct notifier_block __devinitdata hrtimers_nb = { 862static struct notifier_block __cpuinitdata hrtimers_nb = {
863 .notifier_call = hrtimer_cpu_notify, 863 .notifier_call = hrtimer_cpu_notify,
864}; 864};
865 865
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index fc4e906aedbd..48a53f68af96 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -20,6 +20,11 @@
20 20
21/** 21/**
22 * handle_bad_irq - handle spurious and unhandled irqs 22 * handle_bad_irq - handle spurious and unhandled irqs
23 * @irq: the interrupt number
24 * @desc: description of the interrupt
25 * @regs: pointer to a register structure
26 *
27 * Handles spurious and unhandled IRQ's. It also prints a debugmessage.
23 */ 28 */
24void fastcall 29void fastcall
25handle_bad_irq(unsigned int irq, struct irq_desc *desc, struct pt_regs *regs) 30handle_bad_irq(unsigned int irq, struct irq_desc *desc, struct pt_regs *regs)
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 4e461438e48b..92be519eff26 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -137,16 +137,40 @@ EXPORT_SYMBOL(enable_irq);
137 * @irq: interrupt to control 137 * @irq: interrupt to control
138 * @on: enable/disable power management wakeup 138 * @on: enable/disable power management wakeup
139 * 139 *
140 * Enable/disable power management wakeup mode 140 * Enable/disable power management wakeup mode, which is
141 * disabled by default. Enables and disables must match,
142 * just as they match for non-wakeup mode support.
143 *
144 * Wakeup mode lets this IRQ wake the system from sleep
145 * states like "suspend to RAM".
141 */ 146 */
142int set_irq_wake(unsigned int irq, unsigned int on) 147int set_irq_wake(unsigned int irq, unsigned int on)
143{ 148{
144 struct irq_desc *desc = irq_desc + irq; 149 struct irq_desc *desc = irq_desc + irq;
145 unsigned long flags; 150 unsigned long flags;
146 int ret = -ENXIO; 151 int ret = -ENXIO;
152 int (*set_wake)(unsigned, unsigned) = desc->chip->set_wake;
147 153
154 /* wakeup-capable irqs can be shared between drivers that
155 * don't need to have the same sleep mode behaviors.
156 */
148 spin_lock_irqsave(&desc->lock, flags); 157 spin_lock_irqsave(&desc->lock, flags);
149 if (desc->chip->set_wake) 158 if (on) {
159 if (desc->wake_depth++ == 0)
160 desc->status |= IRQ_WAKEUP;
161 else
162 set_wake = NULL;
163 } else {
164 if (desc->wake_depth == 0) {
165 printk(KERN_WARNING "Unbalanced IRQ %d "
166 "wake disable\n", irq);
167 WARN_ON(1);
168 } else if (--desc->wake_depth == 0)
169 desc->status &= ~IRQ_WAKEUP;
170 else
171 set_wake = NULL;
172 }
173 if (set_wake)
150 ret = desc->chip->set_wake(irq, on); 174 ret = desc->chip->set_wake(irq, on);
151 spin_unlock_irqrestore(&desc->lock, flags); 175 spin_unlock_irqrestore(&desc->lock, flags);
152 return ret; 176 return ret;
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 64aab081153b..3f57dfdc8f92 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -393,6 +393,7 @@ static int __kprobes add_new_kprobe(struct kprobe *old_p, struct kprobe *p)
393static inline void add_aggr_kprobe(struct kprobe *ap, struct kprobe *p) 393static inline void add_aggr_kprobe(struct kprobe *ap, struct kprobe *p)
394{ 394{
395 copy_kprobe(p, ap); 395 copy_kprobe(p, ap);
396 flush_insn_slot(ap);
396 ap->addr = p->addr; 397 ap->addr = p->addr;
397 ap->pre_handler = aggr_pre_handler; 398 ap->pre_handler = aggr_pre_handler;
398 ap->fault_handler = aggr_fault_handler; 399 ap->fault_handler = aggr_fault_handler;
diff --git a/kernel/panic.c b/kernel/panic.c
index d8a0bca21233..8010b9b17aca 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -18,6 +18,7 @@
18#include <linux/interrupt.h> 18#include <linux/interrupt.h>
19#include <linux/nmi.h> 19#include <linux/nmi.h>
20#include <linux/kexec.h> 20#include <linux/kexec.h>
21#include <linux/debug_locks.h>
21 22
22int panic_on_oops; 23int panic_on_oops;
23int tainted; 24int tainted;
@@ -172,7 +173,7 @@ const char *print_tainted(void)
172 173
173void add_taint(unsigned flag) 174void add_taint(unsigned flag)
174{ 175{
175 debug_locks_off(); /* can't trust the integrity of the kernel anymore */ 176 debug_locks = 0; /* can't trust the integrity of the kernel anymore */
176 tainted |= flag; 177 tainted |= flag;
177} 178}
178EXPORT_SYMBOL(add_taint); 179EXPORT_SYMBOL(add_taint);
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index ae44a70aae8a..619ecabf7c58 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -56,7 +56,7 @@ config PM_TRACE
56 56
57config SOFTWARE_SUSPEND 57config SOFTWARE_SUSPEND
58 bool "Software Suspend" 58 bool "Software Suspend"
59 depends on PM && SWAP && (X86 && (!SMP || SUSPEND_SMP)) || ((FRV || PPC32) && !SMP) 59 depends on PM && SWAP && ((X86 && (!SMP || SUSPEND_SMP) && !X86_PAE) || ((FRV || PPC32) && !SMP))
60 ---help--- 60 ---help---
61 Enable the possibility of suspending the machine. 61 Enable the possibility of suspending the machine.
62 It doesn't need ACPI or APM. 62 It doesn't need ACPI or APM.
@@ -78,6 +78,10 @@ config SOFTWARE_SUSPEND
78 78
79 For more information take a look at <file:Documentation/power/swsusp.txt>. 79 For more information take a look at <file:Documentation/power/swsusp.txt>.
80 80
81 (For now, swsusp is incompatible with PAE aka HIGHMEM_64G on i386.
82 we need identity mapping for resume to work, and that is trivial
83 to get with 4MB pages, but less than trivial on PAE).
84
81config PM_STD_PARTITION 85config PM_STD_PARTITION
82 string "Default resume partition" 86 string "Default resume partition"
83 depends on SOFTWARE_SUSPEND 87 depends on SOFTWARE_SUSPEND
diff --git a/kernel/power/process.c b/kernel/power/process.c
index b2a5f671d6cd..72e72d2c61e6 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -66,13 +66,25 @@ static inline void freeze_process(struct task_struct *p)
66 } 66 }
67} 67}
68 68
69static void cancel_freezing(struct task_struct *p)
70{
71 unsigned long flags;
72
73 if (freezing(p)) {
74 pr_debug(" clean up: %s\n", p->comm);
75 do_not_freeze(p);
76 spin_lock_irqsave(&p->sighand->siglock, flags);
77 recalc_sigpending_tsk(p);
78 spin_unlock_irqrestore(&p->sighand->siglock, flags);
79 }
80}
81
69/* 0 = success, else # of processes that we failed to stop */ 82/* 0 = success, else # of processes that we failed to stop */
70int freeze_processes(void) 83int freeze_processes(void)
71{ 84{
72 int todo, nr_user, user_frozen; 85 int todo, nr_user, user_frozen;
73 unsigned long start_time; 86 unsigned long start_time;
74 struct task_struct *g, *p; 87 struct task_struct *g, *p;
75 unsigned long flags;
76 88
77 printk( "Stopping tasks: " ); 89 printk( "Stopping tasks: " );
78 start_time = jiffies; 90 start_time = jiffies;
@@ -85,6 +97,10 @@ int freeze_processes(void)
85 continue; 97 continue;
86 if (frozen(p)) 98 if (frozen(p))
87 continue; 99 continue;
100 if (p->state == TASK_TRACED && frozen(p->parent)) {
101 cancel_freezing(p);
102 continue;
103 }
88 if (p->mm && !(p->flags & PF_BORROWED_MM)) { 104 if (p->mm && !(p->flags & PF_BORROWED_MM)) {
89 /* The task is a user-space one. 105 /* The task is a user-space one.
90 * Freeze it unless there's a vfork completion 106 * Freeze it unless there's a vfork completion
@@ -126,13 +142,7 @@ int freeze_processes(void)
126 do_each_thread(g, p) { 142 do_each_thread(g, p) {
127 if (freezeable(p) && !frozen(p)) 143 if (freezeable(p) && !frozen(p))
128 printk(KERN_ERR " %s\n", p->comm); 144 printk(KERN_ERR " %s\n", p->comm);
129 if (freezing(p)) { 145 cancel_freezing(p);
130 pr_debug(" clean up: %s\n", p->comm);
131 p->flags &= ~PF_FREEZE;
132 spin_lock_irqsave(&p->sighand->siglock, flags);
133 recalc_sigpending_tsk(p);
134 spin_unlock_irqrestore(&p->sighand->siglock, flags);
135 }
136 } while_each_thread(g, p); 146 } while_each_thread(g, p);
137 read_unlock(&tasklist_lock); 147 read_unlock(&tasklist_lock);
138 return todo; 148 return todo;
diff --git a/kernel/printk.c b/kernel/printk.c
index 65ca0688f86f..1149365e989e 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -799,6 +799,9 @@ void release_console_sem(void)
799 up(&secondary_console_sem); 799 up(&secondary_console_sem);
800 return; 800 return;
801 } 801 }
802
803 console_may_schedule = 0;
804
802 for ( ; ; ) { 805 for ( ; ; ) {
803 spin_lock_irqsave(&logbuf_lock, flags); 806 spin_lock_irqsave(&logbuf_lock, flags);
804 wake_klogd |= log_start - log_end; 807 wake_klogd |= log_start - log_end;
@@ -812,7 +815,6 @@ void release_console_sem(void)
812 local_irq_restore(flags); 815 local_irq_restore(flags);
813 } 816 }
814 console_locked = 0; 817 console_locked = 0;
815 console_may_schedule = 0;
816 up(&console_sem); 818 up(&console_sem);
817 spin_unlock_irqrestore(&logbuf_lock, flags); 819 spin_unlock_irqrestore(&logbuf_lock, flags);
818 if (wake_klogd && !oops_in_progress && waitqueue_active(&log_wait)) { 820 if (wake_klogd && !oops_in_progress && waitqueue_active(&log_wait)) {
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index 759805c9859a..436ab35f6fa7 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -548,7 +548,7 @@ static void __devinit rcu_online_cpu(int cpu)
548 tasklet_init(&per_cpu(rcu_tasklet, cpu), rcu_process_callbacks, 0UL); 548 tasklet_init(&per_cpu(rcu_tasklet, cpu), rcu_process_callbacks, 0UL);
549} 549}
550 550
551static int __devinit rcu_cpu_notify(struct notifier_block *self, 551static int __cpuinit rcu_cpu_notify(struct notifier_block *self,
552 unsigned long action, void *hcpu) 552 unsigned long action, void *hcpu)
553{ 553{
554 long cpu = (long)hcpu; 554 long cpu = (long)hcpu;
@@ -565,7 +565,7 @@ static int __devinit rcu_cpu_notify(struct notifier_block *self,
565 return NOTIFY_OK; 565 return NOTIFY_OK;
566} 566}
567 567
568static struct notifier_block __devinitdata rcu_nb = { 568static struct notifier_block __cpuinitdata rcu_nb = {
569 .notifier_call = rcu_cpu_notify, 569 .notifier_call = rcu_cpu_notify,
570}; 570};
571 571
diff --git a/kernel/resource.c b/kernel/resource.c
index 0dd3a857579e..46286434af80 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -244,6 +244,7 @@ int find_next_system_ram(struct resource *res)
244 244
245 start = res->start; 245 start = res->start;
246 end = res->end; 246 end = res->end;
247 BUG_ON(start >= end);
247 248
248 read_lock(&resource_lock); 249 read_lock(&resource_lock);
249 for (p = iomem_resource.child; p ; p = p->sibling) { 250 for (p = iomem_resource.child; p ; p = p->sibling) {
@@ -254,15 +255,17 @@ int find_next_system_ram(struct resource *res)
254 p = NULL; 255 p = NULL;
255 break; 256 break;
256 } 257 }
257 if (p->start >= start) 258 if ((p->end >= start) && (p->start < end))
258 break; 259 break;
259 } 260 }
260 read_unlock(&resource_lock); 261 read_unlock(&resource_lock);
261 if (!p) 262 if (!p)
262 return -1; 263 return -1;
263 /* copy data */ 264 /* copy data */
264 res->start = p->start; 265 if (res->start < p->start)
265 res->end = p->end; 266 res->start = p->start;
267 if (res->end > p->end)
268 res->end = p->end;
266 return 0; 269 return 0;
267} 270}
268#endif 271#endif
diff --git a/kernel/rtmutex.c b/kernel/rtmutex.c
index d2ef13b485e7..3e13a1e5856f 100644
--- a/kernel/rtmutex.c
+++ b/kernel/rtmutex.c
@@ -7,6 +7,8 @@
7 * Copyright (C) 2005-2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com> 7 * Copyright (C) 2005-2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com>
8 * Copyright (C) 2005 Kihon Technologies Inc., Steven Rostedt 8 * Copyright (C) 2005 Kihon Technologies Inc., Steven Rostedt
9 * Copyright (C) 2006 Esben Nielsen 9 * Copyright (C) 2006 Esben Nielsen
10 *
11 * See Documentation/rt-mutex-design.txt for details.
10 */ 12 */
11#include <linux/spinlock.h> 13#include <linux/spinlock.h>
12#include <linux/module.h> 14#include <linux/module.h>
diff --git a/kernel/sched.c b/kernel/sched.c
index b44b9a43b0fc..a234fbee1238 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4162,10 +4162,8 @@ do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param)
4162 read_unlock_irq(&tasklist_lock); 4162 read_unlock_irq(&tasklist_lock);
4163 return -ESRCH; 4163 return -ESRCH;
4164 } 4164 }
4165 get_task_struct(p);
4166 read_unlock_irq(&tasklist_lock);
4167 retval = sched_setscheduler(p, policy, &lparam); 4165 retval = sched_setscheduler(p, policy, &lparam);
4168 put_task_struct(p); 4166 read_unlock_irq(&tasklist_lock);
4169 4167
4170 return retval; 4168 return retval;
4171} 4169}
@@ -4456,9 +4454,9 @@ asmlinkage long sys_sched_yield(void)
4456 return 0; 4454 return 0;
4457} 4455}
4458 4456
4459static inline int __resched_legal(void) 4457static inline int __resched_legal(int expected_preempt_count)
4460{ 4458{
4461 if (unlikely(preempt_count())) 4459 if (unlikely(preempt_count() != expected_preempt_count))
4462 return 0; 4460 return 0;
4463 if (unlikely(system_state != SYSTEM_RUNNING)) 4461 if (unlikely(system_state != SYSTEM_RUNNING))
4464 return 0; 4462 return 0;
@@ -4484,7 +4482,7 @@ static void __cond_resched(void)
4484 4482
4485int __sched cond_resched(void) 4483int __sched cond_resched(void)
4486{ 4484{
4487 if (need_resched() && __resched_legal()) { 4485 if (need_resched() && __resched_legal(0)) {
4488 __cond_resched(); 4486 __cond_resched();
4489 return 1; 4487 return 1;
4490 } 4488 }
@@ -4510,7 +4508,7 @@ int cond_resched_lock(spinlock_t *lock)
4510 ret = 1; 4508 ret = 1;
4511 spin_lock(lock); 4509 spin_lock(lock);
4512 } 4510 }
4513 if (need_resched() && __resched_legal()) { 4511 if (need_resched() && __resched_legal(1)) {
4514 spin_release(&lock->dep_map, 1, _THIS_IP_); 4512 spin_release(&lock->dep_map, 1, _THIS_IP_);
4515 _raw_spin_unlock(lock); 4513 _raw_spin_unlock(lock);
4516 preempt_enable_no_resched(); 4514 preempt_enable_no_resched();
@@ -4526,7 +4524,7 @@ int __sched cond_resched_softirq(void)
4526{ 4524{
4527 BUG_ON(!in_softirq()); 4525 BUG_ON(!in_softirq());
4528 4526
4529 if (need_resched() && __resched_legal()) { 4527 if (need_resched() && __resched_legal(0)) {
4530 raw_local_irq_disable(); 4528 raw_local_irq_disable();
4531 _local_bh_enable(); 4529 _local_bh_enable();
4532 raw_local_irq_enable(); 4530 raw_local_irq_enable();
@@ -6494,7 +6492,12 @@ static int build_sched_domains(const cpumask_t *cpu_map)
6494 for (i = 0; i < MAX_NUMNODES; i++) 6492 for (i = 0; i < MAX_NUMNODES; i++)
6495 init_numa_sched_groups_power(sched_group_nodes[i]); 6493 init_numa_sched_groups_power(sched_group_nodes[i]);
6496 6494
6497 init_numa_sched_groups_power(sched_group_allnodes); 6495 if (sched_group_allnodes) {
6496 int group = cpu_to_allnodes_group(first_cpu(*cpu_map));
6497 struct sched_group *sg = &sched_group_allnodes[group];
6498
6499 init_numa_sched_groups_power(sg);
6500 }
6498#endif 6501#endif
6499 6502
6500 /* Attach the domains */ 6503 /* Attach the domains */
@@ -6761,6 +6764,11 @@ void __init sched_init(void)
6761 } 6764 }
6762 6765
6763 set_load_weight(&init_task); 6766 set_load_weight(&init_task);
6767
6768#ifdef CONFIG_RT_MUTEXES
6769 plist_head_init(&init_task.pi_waiters, &init_task.pi_lock);
6770#endif
6771
6764 /* 6772 /*
6765 * The boot idle thread does lazy MMU switching as well: 6773 * The boot idle thread does lazy MMU switching as well:
6766 */ 6774 */
diff --git a/kernel/signal.c b/kernel/signal.c
index 7fe874d12fae..bfdb5686fa3e 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -791,22 +791,31 @@ out:
791/* 791/*
792 * Force a signal that the process can't ignore: if necessary 792 * Force a signal that the process can't ignore: if necessary
793 * we unblock the signal and change any SIG_IGN to SIG_DFL. 793 * we unblock the signal and change any SIG_IGN to SIG_DFL.
794 *
795 * Note: If we unblock the signal, we always reset it to SIG_DFL,
796 * since we do not want to have a signal handler that was blocked
797 * be invoked when user space had explicitly blocked it.
798 *
799 * We don't want to have recursive SIGSEGV's etc, for example.
794 */ 800 */
795
796int 801int
797force_sig_info(int sig, struct siginfo *info, struct task_struct *t) 802force_sig_info(int sig, struct siginfo *info, struct task_struct *t)
798{ 803{
799 unsigned long int flags; 804 unsigned long int flags;
800 int ret; 805 int ret, blocked, ignored;
806 struct k_sigaction *action;
801 807
802 spin_lock_irqsave(&t->sighand->siglock, flags); 808 spin_lock_irqsave(&t->sighand->siglock, flags);
803 if (t->sighand->action[sig-1].sa.sa_handler == SIG_IGN) { 809 action = &t->sighand->action[sig-1];
804 t->sighand->action[sig-1].sa.sa_handler = SIG_DFL; 810 ignored = action->sa.sa_handler == SIG_IGN;
805 } 811 blocked = sigismember(&t->blocked, sig);
806 if (sigismember(&t->blocked, sig)) { 812 if (blocked || ignored) {
807 sigdelset(&t->blocked, sig); 813 action->sa.sa_handler = SIG_DFL;
814 if (blocked) {
815 sigdelset(&t->blocked, sig);
816 recalc_sigpending_tsk(t);
817 }
808 } 818 }
809 recalc_sigpending_tsk(t);
810 ret = specific_send_sig_info(sig, info, t); 819 ret = specific_send_sig_info(sig, info, t);
811 spin_unlock_irqrestore(&t->sighand->siglock, flags); 820 spin_unlock_irqrestore(&t->sighand->siglock, flags);
812 821
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 0f08a84ae307..3789ca98197c 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -65,6 +65,7 @@ static inline void wakeup_softirqd(void)
65 * This one is for softirq.c-internal use, 65 * This one is for softirq.c-internal use,
66 * where hardirqs are disabled legitimately: 66 * where hardirqs are disabled legitimately:
67 */ 67 */
68#ifdef CONFIG_TRACE_IRQFLAGS
68static void __local_bh_disable(unsigned long ip) 69static void __local_bh_disable(unsigned long ip)
69{ 70{
70 unsigned long flags; 71 unsigned long flags;
@@ -80,6 +81,13 @@ static void __local_bh_disable(unsigned long ip)
80 trace_softirqs_off(ip); 81 trace_softirqs_off(ip);
81 raw_local_irq_restore(flags); 82 raw_local_irq_restore(flags);
82} 83}
84#else /* !CONFIG_TRACE_IRQFLAGS */
85static inline void __local_bh_disable(unsigned long ip)
86{
87 add_preempt_count(SOFTIRQ_OFFSET);
88 barrier();
89}
90#endif /* CONFIG_TRACE_IRQFLAGS */
83 91
84void local_bh_disable(void) 92void local_bh_disable(void)
85{ 93{
@@ -121,12 +129,16 @@ EXPORT_SYMBOL(_local_bh_enable);
121 129
122void local_bh_enable(void) 130void local_bh_enable(void)
123{ 131{
132#ifdef CONFIG_TRACE_IRQFLAGS
124 unsigned long flags; 133 unsigned long flags;
125 134
126 WARN_ON_ONCE(in_irq()); 135 WARN_ON_ONCE(in_irq());
136#endif
127 WARN_ON_ONCE(irqs_disabled()); 137 WARN_ON_ONCE(irqs_disabled());
128 138
139#ifdef CONFIG_TRACE_IRQFLAGS
129 local_irq_save(flags); 140 local_irq_save(flags);
141#endif
130 /* 142 /*
131 * Are softirqs going to be turned on now: 143 * Are softirqs going to be turned on now:
132 */ 144 */
@@ -142,18 +154,22 @@ void local_bh_enable(void)
142 do_softirq(); 154 do_softirq();
143 155
144 dec_preempt_count(); 156 dec_preempt_count();
157#ifdef CONFIG_TRACE_IRQFLAGS
145 local_irq_restore(flags); 158 local_irq_restore(flags);
159#endif
146 preempt_check_resched(); 160 preempt_check_resched();
147} 161}
148EXPORT_SYMBOL(local_bh_enable); 162EXPORT_SYMBOL(local_bh_enable);
149 163
150void local_bh_enable_ip(unsigned long ip) 164void local_bh_enable_ip(unsigned long ip)
151{ 165{
166#ifdef CONFIG_TRACE_IRQFLAGS
152 unsigned long flags; 167 unsigned long flags;
153 168
154 WARN_ON_ONCE(in_irq()); 169 WARN_ON_ONCE(in_irq());
155 170
156 local_irq_save(flags); 171 local_irq_save(flags);
172#endif
157 /* 173 /*
158 * Are softirqs going to be turned on now: 174 * Are softirqs going to be turned on now:
159 */ 175 */
@@ -169,7 +185,9 @@ void local_bh_enable_ip(unsigned long ip)
169 do_softirq(); 185 do_softirq();
170 186
171 dec_preempt_count(); 187 dec_preempt_count();
188#ifdef CONFIG_TRACE_IRQFLAGS
172 local_irq_restore(flags); 189 local_irq_restore(flags);
190#endif
173 preempt_check_resched(); 191 preempt_check_resched();
174} 192}
175EXPORT_SYMBOL(local_bh_enable_ip); 193EXPORT_SYMBOL(local_bh_enable_ip);
@@ -547,7 +565,7 @@ static void takeover_tasklets(unsigned int cpu)
547} 565}
548#endif /* CONFIG_HOTPLUG_CPU */ 566#endif /* CONFIG_HOTPLUG_CPU */
549 567
550static int __devinit cpu_callback(struct notifier_block *nfb, 568static int __cpuinit cpu_callback(struct notifier_block *nfb,
551 unsigned long action, 569 unsigned long action,
552 void *hcpu) 570 void *hcpu)
553{ 571{
@@ -587,7 +605,7 @@ static int __devinit cpu_callback(struct notifier_block *nfb,
587 return NOTIFY_OK; 605 return NOTIFY_OK;
588} 606}
589 607
590static struct notifier_block __devinitdata cpu_nfb = { 608static struct notifier_block __cpuinitdata cpu_nfb = {
591 .notifier_call = cpu_callback 609 .notifier_call = cpu_callback
592}; 610};
593 611
diff --git a/kernel/softlockup.c b/kernel/softlockup.c
index 6b76caa22981..03e6a2b0b787 100644
--- a/kernel/softlockup.c
+++ b/kernel/softlockup.c
@@ -104,7 +104,7 @@ static int watchdog(void * __bind_cpu)
104/* 104/*
105 * Create/destroy watchdog threads as CPUs come and go: 105 * Create/destroy watchdog threads as CPUs come and go:
106 */ 106 */
107static int __devinit 107static int __cpuinit
108cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) 108cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
109{ 109{
110 int hotcpu = (unsigned long)hcpu; 110 int hotcpu = (unsigned long)hcpu;
@@ -142,7 +142,7 @@ cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
142 return NOTIFY_OK; 142 return NOTIFY_OK;
143} 143}
144 144
145static struct notifier_block __devinitdata cpu_nfb = { 145static struct notifier_block __cpuinitdata cpu_nfb = {
146 .notifier_call = cpu_callback 146 .notifier_call = cpu_callback
147}; 147};
148 148
diff --git a/kernel/spinlock.c b/kernel/spinlock.c
index bfd6ad9c0330..fb524b009eef 100644
--- a/kernel/spinlock.c
+++ b/kernel/spinlock.c
@@ -72,7 +72,7 @@ EXPORT_SYMBOL(_write_trylock);
72 * not re-enabled during lock-acquire (which the preempt-spin-ops do): 72 * not re-enabled during lock-acquire (which the preempt-spin-ops do):
73 */ 73 */
74#if !defined(CONFIG_PREEMPT) || !defined(CONFIG_SMP) || \ 74#if !defined(CONFIG_PREEMPT) || !defined(CONFIG_SMP) || \
75 defined(CONFIG_PROVE_LOCKING) 75 defined(CONFIG_DEBUG_LOCK_ALLOC)
76 76
77void __lockfunc _read_lock(rwlock_t *lock) 77void __lockfunc _read_lock(rwlock_t *lock)
78{ 78{
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index dcfb5d731466..51cacd111dbd 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -111,7 +111,6 @@ static int stop_machine(void)
111 /* If some failed, kill them all. */ 111 /* If some failed, kill them all. */
112 if (ret < 0) { 112 if (ret < 0) {
113 stopmachine_set_state(STOPMACHINE_EXIT); 113 stopmachine_set_state(STOPMACHINE_EXIT);
114 up(&stopmachine_mutex);
115 return ret; 114 return ret;
116 } 115 }
117 116
diff --git a/kernel/taskstats.c b/kernel/taskstats.c
index f45179ce028e..e78187657330 100644
--- a/kernel/taskstats.c
+++ b/kernel/taskstats.c
@@ -121,46 +121,45 @@ static int send_reply(struct sk_buff *skb, pid_t pid)
121/* 121/*
122 * Send taskstats data in @skb to listeners registered for @cpu's exit data 122 * Send taskstats data in @skb to listeners registered for @cpu's exit data
123 */ 123 */
124static int send_cpu_listeners(struct sk_buff *skb, unsigned int cpu) 124static void send_cpu_listeners(struct sk_buff *skb, unsigned int cpu)
125{ 125{
126 struct genlmsghdr *genlhdr = nlmsg_data((struct nlmsghdr *)skb->data); 126 struct genlmsghdr *genlhdr = nlmsg_data((struct nlmsghdr *)skb->data);
127 struct listener_list *listeners; 127 struct listener_list *listeners;
128 struct listener *s, *tmp; 128 struct listener *s, *tmp;
129 struct sk_buff *skb_next, *skb_cur = skb; 129 struct sk_buff *skb_next, *skb_cur = skb;
130 void *reply = genlmsg_data(genlhdr); 130 void *reply = genlmsg_data(genlhdr);
131 int rc, ret, delcount = 0; 131 int rc, delcount = 0;
132 132
133 rc = genlmsg_end(skb, reply); 133 rc = genlmsg_end(skb, reply);
134 if (rc < 0) { 134 if (rc < 0) {
135 nlmsg_free(skb); 135 nlmsg_free(skb);
136 return rc; 136 return;
137 } 137 }
138 138
139 rc = 0; 139 rc = 0;
140 listeners = &per_cpu(listener_array, cpu); 140 listeners = &per_cpu(listener_array, cpu);
141 down_read(&listeners->sem); 141 down_read(&listeners->sem);
142 list_for_each_entry_safe(s, tmp, &listeners->list, list) { 142 list_for_each_entry(s, &listeners->list, list) {
143 skb_next = NULL; 143 skb_next = NULL;
144 if (!list_is_last(&s->list, &listeners->list)) { 144 if (!list_is_last(&s->list, &listeners->list)) {
145 skb_next = skb_clone(skb_cur, GFP_KERNEL); 145 skb_next = skb_clone(skb_cur, GFP_KERNEL);
146 if (!skb_next) { 146 if (!skb_next)
147 nlmsg_free(skb_cur);
148 rc = -ENOMEM;
149 break; 147 break;
150 }
151 } 148 }
152 ret = genlmsg_unicast(skb_cur, s->pid); 149 rc = genlmsg_unicast(skb_cur, s->pid);
153 if (ret == -ECONNREFUSED) { 150 if (rc == -ECONNREFUSED) {
154 s->valid = 0; 151 s->valid = 0;
155 delcount++; 152 delcount++;
156 rc = ret;
157 } 153 }
158 skb_cur = skb_next; 154 skb_cur = skb_next;
159 } 155 }
160 up_read(&listeners->sem); 156 up_read(&listeners->sem);
161 157
158 if (skb_cur)
159 nlmsg_free(skb_cur);
160
162 if (!delcount) 161 if (!delcount)
163 return rc; 162 return;
164 163
165 /* Delete invalidated entries */ 164 /* Delete invalidated entries */
166 down_write(&listeners->sem); 165 down_write(&listeners->sem);
@@ -171,13 +170,12 @@ static int send_cpu_listeners(struct sk_buff *skb, unsigned int cpu)
171 } 170 }
172 } 171 }
173 up_write(&listeners->sem); 172 up_write(&listeners->sem);
174 return rc;
175} 173}
176 174
177static int fill_pid(pid_t pid, struct task_struct *pidtsk, 175static int fill_pid(pid_t pid, struct task_struct *pidtsk,
178 struct taskstats *stats) 176 struct taskstats *stats)
179{ 177{
180 int rc; 178 int rc = 0;
181 struct task_struct *tsk = pidtsk; 179 struct task_struct *tsk = pidtsk;
182 180
183 if (!pidtsk) { 181 if (!pidtsk) {
@@ -196,12 +194,10 @@ static int fill_pid(pid_t pid, struct task_struct *pidtsk,
196 * Each accounting subsystem adds calls to its functions to 194 * Each accounting subsystem adds calls to its functions to
197 * fill in relevant parts of struct taskstsats as follows 195 * fill in relevant parts of struct taskstsats as follows
198 * 196 *
199 * rc = per-task-foo(stats, tsk); 197 * per-task-foo(stats, tsk);
200 * if (rc)
201 * goto err;
202 */ 198 */
203 199
204 rc = delayacct_add_tsk(stats, tsk); 200 delayacct_add_tsk(stats, tsk);
205 stats->version = TASKSTATS_VERSION; 201 stats->version = TASKSTATS_VERSION;
206 202
207 /* Define err: label here if needed */ 203 /* Define err: label here if needed */
diff --git a/kernel/timer.c b/kernel/timer.c
index 05809c2e2fd6..1d7dd6267c2d 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -84,7 +84,7 @@ typedef struct tvec_t_base_s tvec_base_t;
84 84
85tvec_base_t boot_tvec_bases; 85tvec_base_t boot_tvec_bases;
86EXPORT_SYMBOL(boot_tvec_bases); 86EXPORT_SYMBOL(boot_tvec_bases);
87static DEFINE_PER_CPU(tvec_base_t *, tvec_bases) = { &boot_tvec_bases }; 87static DEFINE_PER_CPU(tvec_base_t *, tvec_bases) = &boot_tvec_bases;
88 88
89static inline void set_running_timer(tvec_base_t *base, 89static inline void set_running_timer(tvec_base_t *base,
90 struct timer_list *timer) 90 struct timer_list *timer)
@@ -408,7 +408,7 @@ static int cascade(tvec_base_t *base, tvec_t *tv, int index)
408 * This function cascades all vectors and executes all expired timer 408 * This function cascades all vectors and executes all expired timer
409 * vectors. 409 * vectors.
410 */ 410 */
411#define INDEX(N) (base->timer_jiffies >> (TVR_BITS + N * TVN_BITS)) & TVN_MASK 411#define INDEX(N) ((base->timer_jiffies >> (TVR_BITS + (N) * TVN_BITS)) & TVN_MASK)
412 412
413static inline void __run_timers(tvec_base_t *base) 413static inline void __run_timers(tvec_base_t *base)
414{ 414{
@@ -1324,46 +1324,19 @@ asmlinkage long sys_getpid(void)
1324} 1324}
1325 1325
1326/* 1326/*
1327 * Accessing ->group_leader->real_parent is not SMP-safe, it could 1327 * Accessing ->real_parent is not SMP-safe, it could
1328 * change from under us. However, rather than getting any lock 1328 * change from under us. However, we can use a stale
1329 * we can use an optimistic algorithm: get the parent 1329 * value of ->real_parent under rcu_read_lock(), see
1330 * pid, and go back and check that the parent is still 1330 * release_task()->call_rcu(delayed_put_task_struct).
1331 * the same. If it has changed (which is extremely unlikely
1332 * indeed), we just try again..
1333 *
1334 * NOTE! This depends on the fact that even if we _do_
1335 * get an old value of "parent", we can happily dereference
1336 * the pointer (it was and remains a dereferencable kernel pointer
1337 * no matter what): we just can't necessarily trust the result
1338 * until we know that the parent pointer is valid.
1339 *
1340 * NOTE2: ->group_leader never changes from under us.
1341 */ 1331 */
1342asmlinkage long sys_getppid(void) 1332asmlinkage long sys_getppid(void)
1343{ 1333{
1344 int pid; 1334 int pid;
1345 struct task_struct *me = current;
1346 struct task_struct *parent;
1347 1335
1348 parent = me->group_leader->real_parent; 1336 rcu_read_lock();
1349 for (;;) { 1337 pid = rcu_dereference(current->real_parent)->tgid;
1350 pid = parent->tgid; 1338 rcu_read_unlock();
1351#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT)
1352{
1353 struct task_struct *old = parent;
1354 1339
1355 /*
1356 * Make sure we read the pid before re-reading the
1357 * parent pointer:
1358 */
1359 smp_rmb();
1360 parent = me->group_leader->real_parent;
1361 if (old != parent)
1362 continue;
1363}
1364#endif
1365 break;
1366 }
1367 return pid; 1340 return pid;
1368} 1341}
1369 1342
@@ -1688,7 +1661,7 @@ static void __devinit migrate_timers(int cpu)
1688} 1661}
1689#endif /* CONFIG_HOTPLUG_CPU */ 1662#endif /* CONFIG_HOTPLUG_CPU */
1690 1663
1691static int __devinit timer_cpu_notify(struct notifier_block *self, 1664static int __cpuinit timer_cpu_notify(struct notifier_block *self,
1692 unsigned long action, void *hcpu) 1665 unsigned long action, void *hcpu)
1693{ 1666{
1694 long cpu = (long)hcpu; 1667 long cpu = (long)hcpu;
@@ -1708,7 +1681,7 @@ static int __devinit timer_cpu_notify(struct notifier_block *self,
1708 return NOTIFY_OK; 1681 return NOTIFY_OK;
1709} 1682}
1710 1683
1711static struct notifier_block __devinitdata timers_nb = { 1684static struct notifier_block __cpuinitdata timers_nb = {
1712 .notifier_call = timer_cpu_notify, 1685 .notifier_call = timer_cpu_notify,
1713}; 1686};
1714 1687
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index eebb1d839235..835fe28b87a8 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -68,7 +68,7 @@ struct workqueue_struct {
68 68
69/* All the per-cpu workqueues on the system, for hotplug cpu to add/remove 69/* All the per-cpu workqueues on the system, for hotplug cpu to add/remove
70 threads to each one as cpus come/go. */ 70 threads to each one as cpus come/go. */
71static DEFINE_SPINLOCK(workqueue_lock); 71static DEFINE_MUTEX(workqueue_mutex);
72static LIST_HEAD(workqueues); 72static LIST_HEAD(workqueues);
73 73
74static int singlethread_cpu; 74static int singlethread_cpu;
@@ -93,9 +93,12 @@ static void __queue_work(struct cpu_workqueue_struct *cwq,
93 spin_unlock_irqrestore(&cwq->lock, flags); 93 spin_unlock_irqrestore(&cwq->lock, flags);
94} 94}
95 95
96/* 96/**
97 * Queue work on a workqueue. Return non-zero if it was successfully 97 * queue_work - queue work on a workqueue
98 * added. 98 * @wq: workqueue to use
99 * @work: work to queue
100 *
101 * Returns non-zero if it was successfully added.
99 * 102 *
100 * We queue the work to the CPU it was submitted, but there is no 103 * We queue the work to the CPU it was submitted, but there is no
101 * guarantee that it will be processed by that CPU. 104 * guarantee that it will be processed by that CPU.
@@ -128,6 +131,14 @@ static void delayed_work_timer_fn(unsigned long __data)
128 __queue_work(per_cpu_ptr(wq->cpu_wq, cpu), work); 131 __queue_work(per_cpu_ptr(wq->cpu_wq, cpu), work);
129} 132}
130 133
134/**
135 * queue_delayed_work - queue work on a workqueue after delay
136 * @wq: workqueue to use
137 * @work: work to queue
138 * @delay: number of jiffies to wait before queueing
139 *
140 * Returns non-zero if it was successfully added.
141 */
131int fastcall queue_delayed_work(struct workqueue_struct *wq, 142int fastcall queue_delayed_work(struct workqueue_struct *wq,
132 struct work_struct *work, unsigned long delay) 143 struct work_struct *work, unsigned long delay)
133{ 144{
@@ -150,6 +161,15 @@ int fastcall queue_delayed_work(struct workqueue_struct *wq,
150} 161}
151EXPORT_SYMBOL_GPL(queue_delayed_work); 162EXPORT_SYMBOL_GPL(queue_delayed_work);
152 163
164/**
165 * queue_delayed_work_on - queue work on specific CPU after delay
166 * @cpu: CPU number to execute work on
167 * @wq: workqueue to use
168 * @work: work to queue
169 * @delay: number of jiffies to wait before queueing
170 *
171 * Returns non-zero if it was successfully added.
172 */
153int queue_delayed_work_on(int cpu, struct workqueue_struct *wq, 173int queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
154 struct work_struct *work, unsigned long delay) 174 struct work_struct *work, unsigned long delay)
155{ 175{
@@ -275,8 +295,9 @@ static void flush_cpu_workqueue(struct cpu_workqueue_struct *cwq)
275 } 295 }
276} 296}
277 297
278/* 298/**
279 * flush_workqueue - ensure that any scheduled work has run to completion. 299 * flush_workqueue - ensure that any scheduled work has run to completion.
300 * @wq: workqueue to flush
280 * 301 *
281 * Forces execution of the workqueue and blocks until its completion. 302 * Forces execution of the workqueue and blocks until its completion.
282 * This is typically used in driver shutdown handlers. 303 * This is typically used in driver shutdown handlers.
@@ -299,10 +320,10 @@ void fastcall flush_workqueue(struct workqueue_struct *wq)
299 } else { 320 } else {
300 int cpu; 321 int cpu;
301 322
302 lock_cpu_hotplug(); 323 mutex_lock(&workqueue_mutex);
303 for_each_online_cpu(cpu) 324 for_each_online_cpu(cpu)
304 flush_cpu_workqueue(per_cpu_ptr(wq->cpu_wq, cpu)); 325 flush_cpu_workqueue(per_cpu_ptr(wq->cpu_wq, cpu));
305 unlock_cpu_hotplug(); 326 mutex_unlock(&workqueue_mutex);
306 } 327 }
307} 328}
308EXPORT_SYMBOL_GPL(flush_workqueue); 329EXPORT_SYMBOL_GPL(flush_workqueue);
@@ -350,8 +371,7 @@ struct workqueue_struct *__create_workqueue(const char *name,
350 } 371 }
351 372
352 wq->name = name; 373 wq->name = name;
353 /* We don't need the distraction of CPUs appearing and vanishing. */ 374 mutex_lock(&workqueue_mutex);
354 lock_cpu_hotplug();
355 if (singlethread) { 375 if (singlethread) {
356 INIT_LIST_HEAD(&wq->list); 376 INIT_LIST_HEAD(&wq->list);
357 p = create_workqueue_thread(wq, singlethread_cpu); 377 p = create_workqueue_thread(wq, singlethread_cpu);
@@ -360,9 +380,7 @@ struct workqueue_struct *__create_workqueue(const char *name,
360 else 380 else
361 wake_up_process(p); 381 wake_up_process(p);
362 } else { 382 } else {
363 spin_lock(&workqueue_lock);
364 list_add(&wq->list, &workqueues); 383 list_add(&wq->list, &workqueues);
365 spin_unlock(&workqueue_lock);
366 for_each_online_cpu(cpu) { 384 for_each_online_cpu(cpu) {
367 p = create_workqueue_thread(wq, cpu); 385 p = create_workqueue_thread(wq, cpu);
368 if (p) { 386 if (p) {
@@ -372,7 +390,7 @@ struct workqueue_struct *__create_workqueue(const char *name,
372 destroy = 1; 390 destroy = 1;
373 } 391 }
374 } 392 }
375 unlock_cpu_hotplug(); 393 mutex_unlock(&workqueue_mutex);
376 394
377 /* 395 /*
378 * Was there any error during startup? If yes then clean up: 396 * Was there any error during startup? If yes then clean up:
@@ -400,6 +418,12 @@ static void cleanup_workqueue_thread(struct workqueue_struct *wq, int cpu)
400 kthread_stop(p); 418 kthread_stop(p);
401} 419}
402 420
421/**
422 * destroy_workqueue - safely terminate a workqueue
423 * @wq: target workqueue
424 *
425 * Safely destroy a workqueue. All work currently pending will be done first.
426 */
403void destroy_workqueue(struct workqueue_struct *wq) 427void destroy_workqueue(struct workqueue_struct *wq)
404{ 428{
405 int cpu; 429 int cpu;
@@ -407,17 +431,15 @@ void destroy_workqueue(struct workqueue_struct *wq)
407 flush_workqueue(wq); 431 flush_workqueue(wq);
408 432
409 /* We don't need the distraction of CPUs appearing and vanishing. */ 433 /* We don't need the distraction of CPUs appearing and vanishing. */
410 lock_cpu_hotplug(); 434 mutex_lock(&workqueue_mutex);
411 if (is_single_threaded(wq)) 435 if (is_single_threaded(wq))
412 cleanup_workqueue_thread(wq, singlethread_cpu); 436 cleanup_workqueue_thread(wq, singlethread_cpu);
413 else { 437 else {
414 for_each_online_cpu(cpu) 438 for_each_online_cpu(cpu)
415 cleanup_workqueue_thread(wq, cpu); 439 cleanup_workqueue_thread(wq, cpu);
416 spin_lock(&workqueue_lock);
417 list_del(&wq->list); 440 list_del(&wq->list);
418 spin_unlock(&workqueue_lock);
419 } 441 }
420 unlock_cpu_hotplug(); 442 mutex_unlock(&workqueue_mutex);
421 free_percpu(wq->cpu_wq); 443 free_percpu(wq->cpu_wq);
422 kfree(wq); 444 kfree(wq);
423} 445}
@@ -425,18 +447,41 @@ EXPORT_SYMBOL_GPL(destroy_workqueue);
425 447
426static struct workqueue_struct *keventd_wq; 448static struct workqueue_struct *keventd_wq;
427 449
450/**
451 * schedule_work - put work task in global workqueue
452 * @work: job to be done
453 *
454 * This puts a job in the kernel-global workqueue.
455 */
428int fastcall schedule_work(struct work_struct *work) 456int fastcall schedule_work(struct work_struct *work)
429{ 457{
430 return queue_work(keventd_wq, work); 458 return queue_work(keventd_wq, work);
431} 459}
432EXPORT_SYMBOL(schedule_work); 460EXPORT_SYMBOL(schedule_work);
433 461
462/**
463 * schedule_delayed_work - put work task in global workqueue after delay
464 * @work: job to be done
465 * @delay: number of jiffies to wait
466 *
467 * After waiting for a given time this puts a job in the kernel-global
468 * workqueue.
469 */
434int fastcall schedule_delayed_work(struct work_struct *work, unsigned long delay) 470int fastcall schedule_delayed_work(struct work_struct *work, unsigned long delay)
435{ 471{
436 return queue_delayed_work(keventd_wq, work, delay); 472 return queue_delayed_work(keventd_wq, work, delay);
437} 473}
438EXPORT_SYMBOL(schedule_delayed_work); 474EXPORT_SYMBOL(schedule_delayed_work);
439 475
476/**
477 * schedule_delayed_work_on - queue work in global workqueue on CPU after delay
478 * @cpu: cpu to use
479 * @work: job to be done
480 * @delay: number of jiffies to wait
481 *
482 * After waiting for a given time this puts a job in the kernel-global
483 * workqueue on the specified CPU.
484 */
440int schedule_delayed_work_on(int cpu, 485int schedule_delayed_work_on(int cpu,
441 struct work_struct *work, unsigned long delay) 486 struct work_struct *work, unsigned long delay)
442{ 487{
@@ -465,11 +510,13 @@ int schedule_on_each_cpu(void (*func)(void *info), void *info)
465 if (!works) 510 if (!works)
466 return -ENOMEM; 511 return -ENOMEM;
467 512
513 mutex_lock(&workqueue_mutex);
468 for_each_online_cpu(cpu) { 514 for_each_online_cpu(cpu) {
469 INIT_WORK(per_cpu_ptr(works, cpu), func, info); 515 INIT_WORK(per_cpu_ptr(works, cpu), func, info);
470 __queue_work(per_cpu_ptr(keventd_wq->cpu_wq, cpu), 516 __queue_work(per_cpu_ptr(keventd_wq->cpu_wq, cpu),
471 per_cpu_ptr(works, cpu)); 517 per_cpu_ptr(works, cpu));
472 } 518 }
519 mutex_unlock(&workqueue_mutex);
473 flush_workqueue(keventd_wq); 520 flush_workqueue(keventd_wq);
474 free_percpu(works); 521 free_percpu(works);
475 return 0; 522 return 0;
@@ -585,6 +632,7 @@ static int __devinit workqueue_cpu_callback(struct notifier_block *nfb,
585 632
586 switch (action) { 633 switch (action) {
587 case CPU_UP_PREPARE: 634 case CPU_UP_PREPARE:
635 mutex_lock(&workqueue_mutex);
588 /* Create a new workqueue thread for it. */ 636 /* Create a new workqueue thread for it. */
589 list_for_each_entry(wq, &workqueues, list) { 637 list_for_each_entry(wq, &workqueues, list) {
590 if (!create_workqueue_thread(wq, hotcpu)) { 638 if (!create_workqueue_thread(wq, hotcpu)) {
@@ -603,6 +651,7 @@ static int __devinit workqueue_cpu_callback(struct notifier_block *nfb,
603 kthread_bind(cwq->thread, hotcpu); 651 kthread_bind(cwq->thread, hotcpu);
604 wake_up_process(cwq->thread); 652 wake_up_process(cwq->thread);
605 } 653 }
654 mutex_unlock(&workqueue_mutex);
606 break; 655 break;
607 656
608 case CPU_UP_CANCELED: 657 case CPU_UP_CANCELED:
@@ -614,6 +663,15 @@ static int __devinit workqueue_cpu_callback(struct notifier_block *nfb,
614 any_online_cpu(cpu_online_map)); 663 any_online_cpu(cpu_online_map));
615 cleanup_workqueue_thread(wq, hotcpu); 664 cleanup_workqueue_thread(wq, hotcpu);
616 } 665 }
666 mutex_unlock(&workqueue_mutex);
667 break;
668
669 case CPU_DOWN_PREPARE:
670 mutex_lock(&workqueue_mutex);
671 break;
672
673 case CPU_DOWN_FAILED:
674 mutex_unlock(&workqueue_mutex);
617 break; 675 break;
618 676
619 case CPU_DEAD: 677 case CPU_DEAD:
@@ -621,6 +679,7 @@ static int __devinit workqueue_cpu_callback(struct notifier_block *nfb,
621 cleanup_workqueue_thread(wq, hotcpu); 679 cleanup_workqueue_thread(wq, hotcpu);
622 list_for_each_entry(wq, &workqueues, list) 680 list_for_each_entry(wq, &workqueues, list)
623 take_over_work(wq, hotcpu); 681 take_over_work(wq, hotcpu);
682 mutex_unlock(&workqueue_mutex);
624 break; 683 break;
625 } 684 }
626 685