aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorJames Bottomley <jejb@mulgrave.il.steeleye.com>2006-08-27 22:59:59 -0400
committerJames Bottomley <jejb@mulgrave.il.steeleye.com>2006-08-27 22:59:59 -0400
commit8ce7a9c159c8c4eb480f0a65c6af753dbf9a1a70 (patch)
treebe59573c0af3617d0cd8a7d61f0ed119e58b1156 /kernel
parentd2afb3ae04e36dbc6e9eb2d8bd54406ff7b6b3bd (diff)
parent01da5fd83d6b2c5e36b77539f6cbdd8f49849225 (diff)
Merge ../linux-2.6
Diffstat (limited to 'kernel')
-rw-r--r--kernel/audit.c4
-rw-r--r--kernel/auditfilter.c26
-rw-r--r--kernel/auditsc.c117
-rw-r--r--kernel/cpuset.c35
-rw-r--r--kernel/delayacct.c8
-rw-r--r--kernel/fork.c4
-rw-r--r--kernel/futex.c140
-rw-r--r--kernel/futex_compat.c34
-rw-r--r--kernel/hrtimer.c6
-rw-r--r--kernel/irq/manage.c28
-rw-r--r--kernel/kprobes.c1
-rw-r--r--kernel/panic.c1
-rw-r--r--kernel/power/process.c26
-rw-r--r--kernel/printk.c4
-rw-r--r--kernel/rcupdate.c4
-rw-r--r--kernel/resource.c9
-rw-r--r--kernel/rtmutex.c2
-rw-r--r--kernel/sched.c26
-rw-r--r--kernel/signal.c25
-rw-r--r--kernel/softirq.c22
-rw-r--r--kernel/softlockup.c4
-rw-r--r--kernel/stop_machine.c1
-rw-r--r--kernel/taskstats.c32
-rw-r--r--kernel/timer.c49
-rw-r--r--kernel/workqueue.c91
25 files changed, 484 insertions, 215 deletions
diff --git a/kernel/audit.c b/kernel/audit.c
index d417ca1db79b..0a36091ed712 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -690,9 +690,7 @@ static const struct inotify_operations audit_inotify_ops = {
690/* Initialize audit support at boot time. */ 690/* Initialize audit support at boot time. */
691static int __init audit_init(void) 691static int __init audit_init(void)
692{ 692{
693#ifdef CONFIG_AUDITSYSCALL
694 int i; 693 int i;
695#endif
696 694
697 printk(KERN_INFO "audit: initializing netlink socket (%s)\n", 695 printk(KERN_INFO "audit: initializing netlink socket (%s)\n",
698 audit_default ? "enabled" : "disabled"); 696 audit_default ? "enabled" : "disabled");
@@ -717,10 +715,10 @@ static int __init audit_init(void)
717 audit_ih = inotify_init(&audit_inotify_ops); 715 audit_ih = inotify_init(&audit_inotify_ops);
718 if (IS_ERR(audit_ih)) 716 if (IS_ERR(audit_ih))
719 audit_panic("cannot initialize inotify handle"); 717 audit_panic("cannot initialize inotify handle");
718#endif
720 719
721 for (i = 0; i < AUDIT_INODE_BUCKETS; i++) 720 for (i = 0; i < AUDIT_INODE_BUCKETS; i++)
722 INIT_LIST_HEAD(&audit_inode_hash[i]); 721 INIT_LIST_HEAD(&audit_inode_hash[i]);
723#endif
724 722
725 return 0; 723 return 0;
726} 724}
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index 5b4e16276ca0..6a9a5c5a4e7d 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -442,6 +442,7 @@ static struct audit_entry *audit_rule_to_entry(struct audit_rule *rule)
442 case AUDIT_EQUAL: 442 case AUDIT_EQUAL:
443 break; 443 break;
444 default: 444 default:
445 err = -EINVAL;
445 goto exit_free; 446 goto exit_free;
446 } 447 }
447 } 448 }
@@ -579,6 +580,7 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data,
579 case AUDIT_EQUAL: 580 case AUDIT_EQUAL:
580 break; 581 break;
581 default: 582 default:
583 err = -EINVAL;
582 goto exit_free; 584 goto exit_free;
583 } 585 }
584 } 586 }
@@ -1134,6 +1136,14 @@ static inline int audit_add_rule(struct audit_entry *entry,
1134 struct audit_watch *watch = entry->rule.watch; 1136 struct audit_watch *watch = entry->rule.watch;
1135 struct nameidata *ndp, *ndw; 1137 struct nameidata *ndp, *ndw;
1136 int h, err, putnd_needed = 0; 1138 int h, err, putnd_needed = 0;
1139#ifdef CONFIG_AUDITSYSCALL
1140 int dont_count = 0;
1141
1142 /* If either of these, don't count towards total */
1143 if (entry->rule.listnr == AUDIT_FILTER_USER ||
1144 entry->rule.listnr == AUDIT_FILTER_TYPE)
1145 dont_count = 1;
1146#endif
1137 1147
1138 if (inode_f) { 1148 if (inode_f) {
1139 h = audit_hash_ino(inode_f->val); 1149 h = audit_hash_ino(inode_f->val);
@@ -1174,6 +1184,10 @@ static inline int audit_add_rule(struct audit_entry *entry,
1174 } else { 1184 } else {
1175 list_add_tail_rcu(&entry->list, list); 1185 list_add_tail_rcu(&entry->list, list);
1176 } 1186 }
1187#ifdef CONFIG_AUDITSYSCALL
1188 if (!dont_count)
1189 audit_n_rules++;
1190#endif
1177 mutex_unlock(&audit_filter_mutex); 1191 mutex_unlock(&audit_filter_mutex);
1178 1192
1179 if (putnd_needed) 1193 if (putnd_needed)
@@ -1198,6 +1212,14 @@ static inline int audit_del_rule(struct audit_entry *entry,
1198 struct audit_watch *watch, *tmp_watch = entry->rule.watch; 1212 struct audit_watch *watch, *tmp_watch = entry->rule.watch;
1199 LIST_HEAD(inotify_list); 1213 LIST_HEAD(inotify_list);
1200 int h, ret = 0; 1214 int h, ret = 0;
1215#ifdef CONFIG_AUDITSYSCALL
1216 int dont_count = 0;
1217
1218 /* If either of these, don't count towards total */
1219 if (entry->rule.listnr == AUDIT_FILTER_USER ||
1220 entry->rule.listnr == AUDIT_FILTER_TYPE)
1221 dont_count = 1;
1222#endif
1201 1223
1202 if (inode_f) { 1224 if (inode_f) {
1203 h = audit_hash_ino(inode_f->val); 1225 h = audit_hash_ino(inode_f->val);
@@ -1235,6 +1257,10 @@ static inline int audit_del_rule(struct audit_entry *entry,
1235 list_del_rcu(&e->list); 1257 list_del_rcu(&e->list);
1236 call_rcu(&e->rcu, audit_free_rule_rcu); 1258 call_rcu(&e->rcu, audit_free_rule_rcu);
1237 1259
1260#ifdef CONFIG_AUDITSYSCALL
1261 if (!dont_count)
1262 audit_n_rules--;
1263#endif
1238 mutex_unlock(&audit_filter_mutex); 1264 mutex_unlock(&audit_filter_mutex);
1239 1265
1240 if (!list_empty(&inotify_list)) 1266 if (!list_empty(&inotify_list))
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index ae40ac8c39e7..efc1b74bebf3 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -85,6 +85,9 @@ extern int audit_enabled;
85/* Indicates that audit should log the full pathname. */ 85/* Indicates that audit should log the full pathname. */
86#define AUDIT_NAME_FULL -1 86#define AUDIT_NAME_FULL -1
87 87
88/* number of audit rules */
89int audit_n_rules;
90
88/* When fs/namei.c:getname() is called, we store the pointer in name and 91/* When fs/namei.c:getname() is called, we store the pointer in name and
89 * we don't let putname() free it (instead we free all of the saved 92 * we don't let putname() free it (instead we free all of the saved
90 * pointers at syscall exit time). 93 * pointers at syscall exit time).
@@ -174,6 +177,7 @@ struct audit_aux_data_path {
174 177
175/* The per-task audit context. */ 178/* The per-task audit context. */
176struct audit_context { 179struct audit_context {
180 int dummy; /* must be the first element */
177 int in_syscall; /* 1 if task is in a syscall */ 181 int in_syscall; /* 1 if task is in a syscall */
178 enum audit_state state; 182 enum audit_state state;
179 unsigned int serial; /* serial number for record */ 183 unsigned int serial; /* serial number for record */
@@ -514,7 +518,7 @@ static inline struct audit_context *audit_get_context(struct task_struct *tsk,
514 context->return_valid = return_valid; 518 context->return_valid = return_valid;
515 context->return_code = return_code; 519 context->return_code = return_code;
516 520
517 if (context->in_syscall && !context->auditable) { 521 if (context->in_syscall && !context->dummy && !context->auditable) {
518 enum audit_state state; 522 enum audit_state state;
519 523
520 state = audit_filter_syscall(tsk, context, &audit_filter_list[AUDIT_FILTER_EXIT]); 524 state = audit_filter_syscall(tsk, context, &audit_filter_list[AUDIT_FILTER_EXIT]);
@@ -530,17 +534,7 @@ static inline struct audit_context *audit_get_context(struct task_struct *tsk,
530 } 534 }
531 535
532get_context: 536get_context:
533 context->pid = tsk->pid; 537
534 context->ppid = sys_getppid(); /* sic. tsk == current in all cases */
535 context->uid = tsk->uid;
536 context->gid = tsk->gid;
537 context->euid = tsk->euid;
538 context->suid = tsk->suid;
539 context->fsuid = tsk->fsuid;
540 context->egid = tsk->egid;
541 context->sgid = tsk->sgid;
542 context->fsgid = tsk->fsgid;
543 context->personality = tsk->personality;
544 tsk->audit_context = NULL; 538 tsk->audit_context = NULL;
545 return context; 539 return context;
546} 540}
@@ -749,6 +743,17 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
749 const char *tty; 743 const char *tty;
750 744
751 /* tsk == current */ 745 /* tsk == current */
746 context->pid = tsk->pid;
747 context->ppid = sys_getppid(); /* sic. tsk == current in all cases */
748 context->uid = tsk->uid;
749 context->gid = tsk->gid;
750 context->euid = tsk->euid;
751 context->suid = tsk->suid;
752 context->fsuid = tsk->fsuid;
753 context->egid = tsk->egid;
754 context->sgid = tsk->sgid;
755 context->fsgid = tsk->fsgid;
756 context->personality = tsk->personality;
752 757
753 ab = audit_log_start(context, GFP_KERNEL, AUDIT_SYSCALL); 758 ab = audit_log_start(context, GFP_KERNEL, AUDIT_SYSCALL);
754 if (!ab) 759 if (!ab)
@@ -1066,7 +1071,8 @@ void audit_syscall_entry(int arch, int major,
1066 context->argv[3] = a4; 1071 context->argv[3] = a4;
1067 1072
1068 state = context->state; 1073 state = context->state;
1069 if (state == AUDIT_SETUP_CONTEXT || state == AUDIT_BUILD_CONTEXT) 1074 context->dummy = !audit_n_rules;
1075 if (!context->dummy && (state == AUDIT_SETUP_CONTEXT || state == AUDIT_BUILD_CONTEXT))
1070 state = audit_filter_syscall(tsk, context, &audit_filter_list[AUDIT_FILTER_ENTRY]); 1076 state = audit_filter_syscall(tsk, context, &audit_filter_list[AUDIT_FILTER_ENTRY]);
1071 if (likely(state == AUDIT_DISABLED)) 1077 if (likely(state == AUDIT_DISABLED))
1072 return; 1078 return;
@@ -1199,14 +1205,18 @@ void audit_putname(const char *name)
1199#endif 1205#endif
1200} 1206}
1201 1207
1202static void audit_inode_context(int idx, const struct inode *inode) 1208/* Copy inode data into an audit_names. */
1209static void audit_copy_inode(struct audit_names *name, const struct inode *inode)
1203{ 1210{
1204 struct audit_context *context = current->audit_context; 1211 name->ino = inode->i_ino;
1205 1212 name->dev = inode->i_sb->s_dev;
1206 selinux_get_inode_sid(inode, &context->names[idx].osid); 1213 name->mode = inode->i_mode;
1214 name->uid = inode->i_uid;
1215 name->gid = inode->i_gid;
1216 name->rdev = inode->i_rdev;
1217 selinux_get_inode_sid(inode, &name->osid);
1207} 1218}
1208 1219
1209
1210/** 1220/**
1211 * audit_inode - store the inode and device from a lookup 1221 * audit_inode - store the inode and device from a lookup
1212 * @name: name being audited 1222 * @name: name being audited
@@ -1240,20 +1250,14 @@ void __audit_inode(const char *name, const struct inode *inode)
1240 ++context->ino_count; 1250 ++context->ino_count;
1241#endif 1251#endif
1242 } 1252 }
1243 context->names[idx].ino = inode->i_ino; 1253 audit_copy_inode(&context->names[idx], inode);
1244 context->names[idx].dev = inode->i_sb->s_dev;
1245 context->names[idx].mode = inode->i_mode;
1246 context->names[idx].uid = inode->i_uid;
1247 context->names[idx].gid = inode->i_gid;
1248 context->names[idx].rdev = inode->i_rdev;
1249 audit_inode_context(idx, inode);
1250} 1254}
1251 1255
1252/** 1256/**
1253 * audit_inode_child - collect inode info for created/removed objects 1257 * audit_inode_child - collect inode info for created/removed objects
1254 * @dname: inode's dentry name 1258 * @dname: inode's dentry name
1255 * @inode: inode being audited 1259 * @inode: inode being audited
1256 * @pino: inode number of dentry parent 1260 * @parent: inode of dentry parent
1257 * 1261 *
1258 * For syscalls that create or remove filesystem objects, audit_inode 1262 * For syscalls that create or remove filesystem objects, audit_inode
1259 * can only collect information for the filesystem object's parent. 1263 * can only collect information for the filesystem object's parent.
@@ -1264,7 +1268,7 @@ void __audit_inode(const char *name, const struct inode *inode)
1264 * unsuccessful attempts. 1268 * unsuccessful attempts.
1265 */ 1269 */
1266void __audit_inode_child(const char *dname, const struct inode *inode, 1270void __audit_inode_child(const char *dname, const struct inode *inode,
1267 unsigned long pino) 1271 const struct inode *parent)
1268{ 1272{
1269 int idx; 1273 int idx;
1270 struct audit_context *context = current->audit_context; 1274 struct audit_context *context = current->audit_context;
@@ -1278,7 +1282,7 @@ void __audit_inode_child(const char *dname, const struct inode *inode,
1278 if (!dname) 1282 if (!dname)
1279 goto update_context; 1283 goto update_context;
1280 for (idx = 0; idx < context->name_count; idx++) 1284 for (idx = 0; idx < context->name_count; idx++)
1281 if (context->names[idx].ino == pino) { 1285 if (context->names[idx].ino == parent->i_ino) {
1282 const char *name = context->names[idx].name; 1286 const char *name = context->names[idx].name;
1283 1287
1284 if (!name) 1288 if (!name)
@@ -1302,16 +1306,47 @@ update_context:
1302 context->names[idx].name_len = AUDIT_NAME_FULL; 1306 context->names[idx].name_len = AUDIT_NAME_FULL;
1303 context->names[idx].name_put = 0; /* don't call __putname() */ 1307 context->names[idx].name_put = 0; /* don't call __putname() */
1304 1308
1305 if (inode) { 1309 if (!inode)
1306 context->names[idx].ino = inode->i_ino; 1310 context->names[idx].ino = (unsigned long)-1;
1307 context->names[idx].dev = inode->i_sb->s_dev; 1311 else
1308 context->names[idx].mode = inode->i_mode; 1312 audit_copy_inode(&context->names[idx], inode);
1309 context->names[idx].uid = inode->i_uid; 1313
1310 context->names[idx].gid = inode->i_gid; 1314 /* A parent was not found in audit_names, so copy the inode data for the
1311 context->names[idx].rdev = inode->i_rdev; 1315 * provided parent. */
1312 audit_inode_context(idx, inode); 1316 if (!found_name) {
1313 } else 1317 idx = context->name_count++;
1314 context->names[idx].ino = (unsigned long)-1; 1318#if AUDIT_DEBUG
1319 context->ino_count++;
1320#endif
1321 audit_copy_inode(&context->names[idx], parent);
1322 }
1323}
1324
1325/**
1326 * audit_inode_update - update inode info for last collected name
1327 * @inode: inode being audited
1328 *
1329 * When open() is called on an existing object with the O_CREAT flag, the inode
1330 * data audit initially collects is incorrect. This additional hook ensures
1331 * audit has the inode data for the actual object to be opened.
1332 */
1333void __audit_inode_update(const struct inode *inode)
1334{
1335 struct audit_context *context = current->audit_context;
1336 int idx;
1337
1338 if (!context->in_syscall || !inode)
1339 return;
1340
1341 if (context->name_count == 0) {
1342 context->name_count++;
1343#if AUDIT_DEBUG
1344 context->ino_count++;
1345#endif
1346 }
1347 idx = context->name_count - 1;
1348
1349 audit_copy_inode(&context->names[idx], inode);
1315} 1350}
1316 1351
1317/** 1352/**
@@ -1642,7 +1677,7 @@ int audit_bprm(struct linux_binprm *bprm)
1642 unsigned long p, next; 1677 unsigned long p, next;
1643 void *to; 1678 void *to;
1644 1679
1645 if (likely(!audit_enabled || !context)) 1680 if (likely(!audit_enabled || !context || context->dummy))
1646 return 0; 1681 return 0;
1647 1682
1648 ax = kmalloc(sizeof(*ax) + PAGE_SIZE * MAX_ARG_PAGES - bprm->p, 1683 ax = kmalloc(sizeof(*ax) + PAGE_SIZE * MAX_ARG_PAGES - bprm->p,
@@ -1680,7 +1715,7 @@ int audit_socketcall(int nargs, unsigned long *args)
1680 struct audit_aux_data_socketcall *ax; 1715 struct audit_aux_data_socketcall *ax;
1681 struct audit_context *context = current->audit_context; 1716 struct audit_context *context = current->audit_context;
1682 1717
1683 if (likely(!context)) 1718 if (likely(!context || context->dummy))
1684 return 0; 1719 return 0;
1685 1720
1686 ax = kmalloc(sizeof(*ax) + nargs * sizeof(unsigned long), GFP_KERNEL); 1721 ax = kmalloc(sizeof(*ax) + nargs * sizeof(unsigned long), GFP_KERNEL);
@@ -1708,7 +1743,7 @@ int audit_sockaddr(int len, void *a)
1708 struct audit_aux_data_sockaddr *ax; 1743 struct audit_aux_data_sockaddr *ax;
1709 struct audit_context *context = current->audit_context; 1744 struct audit_context *context = current->audit_context;
1710 1745
1711 if (likely(!context)) 1746 if (likely(!context || context->dummy))
1712 return 0; 1747 return 0;
1713 1748
1714 ax = kmalloc(sizeof(*ax) + len, GFP_KERNEL); 1749 ax = kmalloc(sizeof(*ax) + len, GFP_KERNEL);
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 1a649f2bb9bb..4ea6f0dc2fc5 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -816,6 +816,10 @@ static int update_cpumask(struct cpuset *cs, char *buf)
816 struct cpuset trialcs; 816 struct cpuset trialcs;
817 int retval, cpus_unchanged; 817 int retval, cpus_unchanged;
818 818
819 /* top_cpuset.cpus_allowed tracks cpu_online_map; it's read-only */
820 if (cs == &top_cpuset)
821 return -EACCES;
822
819 trialcs = *cs; 823 trialcs = *cs;
820 retval = cpulist_parse(buf, trialcs.cpus_allowed); 824 retval = cpulist_parse(buf, trialcs.cpus_allowed);
821 if (retval < 0) 825 if (retval < 0)
@@ -2033,6 +2037,33 @@ out:
2033 return err; 2037 return err;
2034} 2038}
2035 2039
2040/*
2041 * The top_cpuset tracks what CPUs and Memory Nodes are online,
2042 * period. This is necessary in order to make cpusets transparent
2043 * (of no affect) on systems that are actively using CPU hotplug
2044 * but making no active use of cpusets.
2045 *
2046 * This handles CPU hotplug (cpuhp) events. If someday Memory
2047 * Nodes can be hotplugged (dynamically changing node_online_map)
2048 * then we should handle that too, perhaps in a similar way.
2049 */
2050
2051#ifdef CONFIG_HOTPLUG_CPU
2052static int cpuset_handle_cpuhp(struct notifier_block *nb,
2053 unsigned long phase, void *cpu)
2054{
2055 mutex_lock(&manage_mutex);
2056 mutex_lock(&callback_mutex);
2057
2058 top_cpuset.cpus_allowed = cpu_online_map;
2059
2060 mutex_unlock(&callback_mutex);
2061 mutex_unlock(&manage_mutex);
2062
2063 return 0;
2064}
2065#endif
2066
2036/** 2067/**
2037 * cpuset_init_smp - initialize cpus_allowed 2068 * cpuset_init_smp - initialize cpus_allowed
2038 * 2069 *
@@ -2043,6 +2074,8 @@ void __init cpuset_init_smp(void)
2043{ 2074{
2044 top_cpuset.cpus_allowed = cpu_online_map; 2075 top_cpuset.cpus_allowed = cpu_online_map;
2045 top_cpuset.mems_allowed = node_online_map; 2076 top_cpuset.mems_allowed = node_online_map;
2077
2078 hotcpu_notifier(cpuset_handle_cpuhp, 0);
2046} 2079}
2047 2080
2048/** 2081/**
@@ -2387,7 +2420,7 @@ EXPORT_SYMBOL_GPL(cpuset_mem_spread_node);
2387int cpuset_excl_nodes_overlap(const struct task_struct *p) 2420int cpuset_excl_nodes_overlap(const struct task_struct *p)
2388{ 2421{
2389 const struct cpuset *cs1, *cs2; /* my and p's cpuset ancestors */ 2422 const struct cpuset *cs1, *cs2; /* my and p's cpuset ancestors */
2390 int overlap = 0; /* do cpusets overlap? */ 2423 int overlap = 1; /* do cpusets overlap? */
2391 2424
2392 task_lock(current); 2425 task_lock(current);
2393 if (current->flags & PF_EXITING) { 2426 if (current->flags & PF_EXITING) {
diff --git a/kernel/delayacct.c b/kernel/delayacct.c
index f05392d64267..57ca3730205d 100644
--- a/kernel/delayacct.c
+++ b/kernel/delayacct.c
@@ -19,15 +19,15 @@
19#include <linux/sysctl.h> 19#include <linux/sysctl.h>
20#include <linux/delayacct.h> 20#include <linux/delayacct.h>
21 21
22int delayacct_on __read_mostly; /* Delay accounting turned on/off */ 22int delayacct_on __read_mostly = 1; /* Delay accounting turned on/off */
23kmem_cache_t *delayacct_cache; 23kmem_cache_t *delayacct_cache;
24 24
25static int __init delayacct_setup_enable(char *str) 25static int __init delayacct_setup_disable(char *str)
26{ 26{
27 delayacct_on = 1; 27 delayacct_on = 0;
28 return 1; 28 return 1;
29} 29}
30__setup("delayacct", delayacct_setup_enable); 30__setup("nodelayacct", delayacct_setup_disable);
31 31
32void delayacct_init(void) 32void delayacct_init(void)
33{ 33{
diff --git a/kernel/fork.c b/kernel/fork.c
index 1b0f7b1e0881..aa36c43783cc 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1387,8 +1387,10 @@ long do_fork(unsigned long clone_flags,
1387 1387
1388 if (clone_flags & CLONE_VFORK) { 1388 if (clone_flags & CLONE_VFORK) {
1389 wait_for_completion(&vfork); 1389 wait_for_completion(&vfork);
1390 if (unlikely (current->ptrace & PT_TRACE_VFORK_DONE)) 1390 if (unlikely (current->ptrace & PT_TRACE_VFORK_DONE)) {
1391 current->ptrace_message = nr;
1391 ptrace_notify ((PTRACE_EVENT_VFORK_DONE << 8) | SIGTRAP); 1392 ptrace_notify ((PTRACE_EVENT_VFORK_DONE << 8) | SIGTRAP);
1393 }
1392 } 1394 }
1393 } else { 1395 } else {
1394 free_pid(pid); 1396 free_pid(pid);
diff --git a/kernel/futex.c b/kernel/futex.c
index cf0c8e21d1ab..b9b8aea5389e 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -297,7 +297,7 @@ static int futex_handle_fault(unsigned long address, int attempt)
297 struct vm_area_struct * vma; 297 struct vm_area_struct * vma;
298 struct mm_struct *mm = current->mm; 298 struct mm_struct *mm = current->mm;
299 299
300 if (attempt >= 2 || !(vma = find_vma(mm, address)) || 300 if (attempt > 2 || !(vma = find_vma(mm, address)) ||
301 vma->vm_start > address || !(vma->vm_flags & VM_WRITE)) 301 vma->vm_start > address || !(vma->vm_flags & VM_WRITE))
302 return -EFAULT; 302 return -EFAULT;
303 303
@@ -397,7 +397,7 @@ static struct task_struct * futex_find_get_task(pid_t pid)
397 p = NULL; 397 p = NULL;
398 goto out_unlock; 398 goto out_unlock;
399 } 399 }
400 if (p->state == EXIT_ZOMBIE || p->exit_state == EXIT_ZOMBIE) { 400 if (p->exit_state != 0) {
401 p = NULL; 401 p = NULL;
402 goto out_unlock; 402 goto out_unlock;
403 } 403 }
@@ -415,15 +415,15 @@ out_unlock:
415 */ 415 */
416void exit_pi_state_list(struct task_struct *curr) 416void exit_pi_state_list(struct task_struct *curr)
417{ 417{
418 struct futex_hash_bucket *hb;
419 struct list_head *next, *head = &curr->pi_state_list; 418 struct list_head *next, *head = &curr->pi_state_list;
420 struct futex_pi_state *pi_state; 419 struct futex_pi_state *pi_state;
420 struct futex_hash_bucket *hb;
421 union futex_key key; 421 union futex_key key;
422 422
423 /* 423 /*
424 * We are a ZOMBIE and nobody can enqueue itself on 424 * We are a ZOMBIE and nobody can enqueue itself on
425 * pi_state_list anymore, but we have to be careful 425 * pi_state_list anymore, but we have to be careful
426 * versus waiters unqueueing themselfs 426 * versus waiters unqueueing themselves:
427 */ 427 */
428 spin_lock_irq(&curr->pi_lock); 428 spin_lock_irq(&curr->pi_lock);
429 while (!list_empty(head)) { 429 while (!list_empty(head)) {
@@ -431,21 +431,24 @@ void exit_pi_state_list(struct task_struct *curr)
431 next = head->next; 431 next = head->next;
432 pi_state = list_entry(next, struct futex_pi_state, list); 432 pi_state = list_entry(next, struct futex_pi_state, list);
433 key = pi_state->key; 433 key = pi_state->key;
434 hb = hash_futex(&key);
434 spin_unlock_irq(&curr->pi_lock); 435 spin_unlock_irq(&curr->pi_lock);
435 436
436 hb = hash_futex(&key);
437 spin_lock(&hb->lock); 437 spin_lock(&hb->lock);
438 438
439 spin_lock_irq(&curr->pi_lock); 439 spin_lock_irq(&curr->pi_lock);
440 /*
441 * We dropped the pi-lock, so re-check whether this
442 * task still owns the PI-state:
443 */
440 if (head->next != next) { 444 if (head->next != next) {
441 spin_unlock(&hb->lock); 445 spin_unlock(&hb->lock);
442 continue; 446 continue;
443 } 447 }
444 448
445 list_del_init(&pi_state->list);
446
447 WARN_ON(pi_state->owner != curr); 449 WARN_ON(pi_state->owner != curr);
448 450 WARN_ON(list_empty(&pi_state->list));
451 list_del_init(&pi_state->list);
449 pi_state->owner = NULL; 452 pi_state->owner = NULL;
450 spin_unlock_irq(&curr->pi_lock); 453 spin_unlock_irq(&curr->pi_lock);
451 454
@@ -470,7 +473,7 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, struct futex_q *me)
470 head = &hb->chain; 473 head = &hb->chain;
471 474
472 list_for_each_entry_safe(this, next, head, list) { 475 list_for_each_entry_safe(this, next, head, list) {
473 if (match_futex (&this->key, &me->key)) { 476 if (match_futex(&this->key, &me->key)) {
474 /* 477 /*
475 * Another waiter already exists - bump up 478 * Another waiter already exists - bump up
476 * the refcount and return its pi_state: 479 * the refcount and return its pi_state:
@@ -482,6 +485,8 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, struct futex_q *me)
482 if (unlikely(!pi_state)) 485 if (unlikely(!pi_state))
483 return -EINVAL; 486 return -EINVAL;
484 487
488 WARN_ON(!atomic_read(&pi_state->refcount));
489
485 atomic_inc(&pi_state->refcount); 490 atomic_inc(&pi_state->refcount);
486 me->pi_state = pi_state; 491 me->pi_state = pi_state;
487 492
@@ -490,10 +495,13 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, struct futex_q *me)
490 } 495 }
491 496
492 /* 497 /*
493 * We are the first waiter - try to look up the real owner and 498 * We are the first waiter - try to look up the real owner and attach
494 * attach the new pi_state to it: 499 * the new pi_state to it, but bail out when the owner died bit is set
500 * and TID = 0:
495 */ 501 */
496 pid = uval & FUTEX_TID_MASK; 502 pid = uval & FUTEX_TID_MASK;
503 if (!pid && (uval & FUTEX_OWNER_DIED))
504 return -ESRCH;
497 p = futex_find_get_task(pid); 505 p = futex_find_get_task(pid);
498 if (!p) 506 if (!p)
499 return -ESRCH; 507 return -ESRCH;
@@ -510,6 +518,7 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, struct futex_q *me)
510 pi_state->key = me->key; 518 pi_state->key = me->key;
511 519
512 spin_lock_irq(&p->pi_lock); 520 spin_lock_irq(&p->pi_lock);
521 WARN_ON(!list_empty(&pi_state->list));
513 list_add(&pi_state->list, &p->pi_state_list); 522 list_add(&pi_state->list, &p->pi_state_list);
514 pi_state->owner = p; 523 pi_state->owner = p;
515 spin_unlock_irq(&p->pi_lock); 524 spin_unlock_irq(&p->pi_lock);
@@ -573,20 +582,29 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this)
573 * kept enabled while there is PI state around. We must also 582 * kept enabled while there is PI state around. We must also
574 * preserve the owner died bit.) 583 * preserve the owner died bit.)
575 */ 584 */
576 newval = (uval & FUTEX_OWNER_DIED) | FUTEX_WAITERS | new_owner->pid; 585 if (!(uval & FUTEX_OWNER_DIED)) {
586 newval = FUTEX_WAITERS | new_owner->pid;
577 587
578 inc_preempt_count(); 588 inc_preempt_count();
579 curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval); 589 curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval);
580 dec_preempt_count(); 590 dec_preempt_count();
591 if (curval == -EFAULT)
592 return -EFAULT;
593 if (curval != uval)
594 return -EINVAL;
595 }
581 596
582 if (curval == -EFAULT) 597 spin_lock_irq(&pi_state->owner->pi_lock);
583 return -EFAULT; 598 WARN_ON(list_empty(&pi_state->list));
584 if (curval != uval) 599 list_del_init(&pi_state->list);
585 return -EINVAL; 600 spin_unlock_irq(&pi_state->owner->pi_lock);
586 601
587 list_del_init(&pi_state->owner->pi_state_list); 602 spin_lock_irq(&new_owner->pi_lock);
603 WARN_ON(!list_empty(&pi_state->list));
588 list_add(&pi_state->list, &new_owner->pi_state_list); 604 list_add(&pi_state->list, &new_owner->pi_state_list);
589 pi_state->owner = new_owner; 605 pi_state->owner = new_owner;
606 spin_unlock_irq(&new_owner->pi_lock);
607
590 rt_mutex_unlock(&pi_state->pi_mutex); 608 rt_mutex_unlock(&pi_state->pi_mutex);
591 609
592 return 0; 610 return 0;
@@ -729,8 +747,10 @@ retry:
729 */ 747 */
730 if (attempt++) { 748 if (attempt++) {
731 if (futex_handle_fault((unsigned long)uaddr2, 749 if (futex_handle_fault((unsigned long)uaddr2,
732 attempt)) 750 attempt)) {
751 ret = -EFAULT;
733 goto out; 752 goto out;
753 }
734 goto retry; 754 goto retry;
735 } 755 }
736 756
@@ -930,6 +950,7 @@ static int unqueue_me(struct futex_q *q)
930 /* In the common case we don't take the spinlock, which is nice. */ 950 /* In the common case we don't take the spinlock, which is nice. */
931 retry: 951 retry:
932 lock_ptr = q->lock_ptr; 952 lock_ptr = q->lock_ptr;
953 barrier();
933 if (lock_ptr != 0) { 954 if (lock_ptr != 0) {
934 spin_lock(lock_ptr); 955 spin_lock(lock_ptr);
935 /* 956 /*
@@ -1236,6 +1257,7 @@ static int do_futex_lock_pi(u32 __user *uaddr, int detect, int trylock,
1236 /* Owner died? */ 1257 /* Owner died? */
1237 if (q.pi_state->owner != NULL) { 1258 if (q.pi_state->owner != NULL) {
1238 spin_lock_irq(&q.pi_state->owner->pi_lock); 1259 spin_lock_irq(&q.pi_state->owner->pi_lock);
1260 WARN_ON(list_empty(&q.pi_state->list));
1239 list_del_init(&q.pi_state->list); 1261 list_del_init(&q.pi_state->list);
1240 spin_unlock_irq(&q.pi_state->owner->pi_lock); 1262 spin_unlock_irq(&q.pi_state->owner->pi_lock);
1241 } else 1263 } else
@@ -1244,6 +1266,7 @@ static int do_futex_lock_pi(u32 __user *uaddr, int detect, int trylock,
1244 q.pi_state->owner = current; 1266 q.pi_state->owner = current;
1245 1267
1246 spin_lock_irq(&current->pi_lock); 1268 spin_lock_irq(&current->pi_lock);
1269 WARN_ON(!list_empty(&q.pi_state->list));
1247 list_add(&q.pi_state->list, &current->pi_state_list); 1270 list_add(&q.pi_state->list, &current->pi_state_list);
1248 spin_unlock_irq(&current->pi_lock); 1271 spin_unlock_irq(&current->pi_lock);
1249 1272
@@ -1301,9 +1324,10 @@ static int do_futex_lock_pi(u32 __user *uaddr, int detect, int trylock,
1301 * still holding the mmap_sem. 1324 * still holding the mmap_sem.
1302 */ 1325 */
1303 if (attempt++) { 1326 if (attempt++) {
1304 if (futex_handle_fault((unsigned long)uaddr, attempt)) 1327 if (futex_handle_fault((unsigned long)uaddr, attempt)) {
1328 ret = -EFAULT;
1305 goto out_unlock_release_sem; 1329 goto out_unlock_release_sem;
1306 1330 }
1307 goto retry_locked; 1331 goto retry_locked;
1308 } 1332 }
1309 1333
@@ -1427,9 +1451,11 @@ retry_locked:
1427 * again. If it succeeds then we can return without waking 1451 * again. If it succeeds then we can return without waking
1428 * anyone else up: 1452 * anyone else up:
1429 */ 1453 */
1430 inc_preempt_count(); 1454 if (!(uval & FUTEX_OWNER_DIED)) {
1431 uval = futex_atomic_cmpxchg_inatomic(uaddr, current->pid, 0); 1455 inc_preempt_count();
1432 dec_preempt_count(); 1456 uval = futex_atomic_cmpxchg_inatomic(uaddr, current->pid, 0);
1457 dec_preempt_count();
1458 }
1433 1459
1434 if (unlikely(uval == -EFAULT)) 1460 if (unlikely(uval == -EFAULT))
1435 goto pi_faulted; 1461 goto pi_faulted;
@@ -1462,9 +1488,11 @@ retry_locked:
1462 /* 1488 /*
1463 * No waiters - kernel unlocks the futex: 1489 * No waiters - kernel unlocks the futex:
1464 */ 1490 */
1465 ret = unlock_futex_pi(uaddr, uval); 1491 if (!(uval & FUTEX_OWNER_DIED)) {
1466 if (ret == -EFAULT) 1492 ret = unlock_futex_pi(uaddr, uval);
1467 goto pi_faulted; 1493 if (ret == -EFAULT)
1494 goto pi_faulted;
1495 }
1468 1496
1469out_unlock: 1497out_unlock:
1470 spin_unlock(&hb->lock); 1498 spin_unlock(&hb->lock);
@@ -1481,9 +1509,10 @@ pi_faulted:
1481 * still holding the mmap_sem. 1509 * still holding the mmap_sem.
1482 */ 1510 */
1483 if (attempt++) { 1511 if (attempt++) {
1484 if (futex_handle_fault((unsigned long)uaddr, attempt)) 1512 if (futex_handle_fault((unsigned long)uaddr, attempt)) {
1513 ret = -EFAULT;
1485 goto out_unlock; 1514 goto out_unlock;
1486 1515 }
1487 goto retry_locked; 1516 goto retry_locked;
1488 } 1517 }
1489 1518
@@ -1683,9 +1712,9 @@ err_unlock:
1683 * Process a futex-list entry, check whether it's owned by the 1712 * Process a futex-list entry, check whether it's owned by the
1684 * dying task, and do notification if so: 1713 * dying task, and do notification if so:
1685 */ 1714 */
1686int handle_futex_death(u32 __user *uaddr, struct task_struct *curr) 1715int handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int pi)
1687{ 1716{
1688 u32 uval, nval; 1717 u32 uval, nval, mval;
1689 1718
1690retry: 1719retry:
1691 if (get_user(uval, uaddr)) 1720 if (get_user(uval, uaddr))
@@ -1702,21 +1731,45 @@ retry:
1702 * thread-death.) The rest of the cleanup is done in 1731 * thread-death.) The rest of the cleanup is done in
1703 * userspace. 1732 * userspace.
1704 */ 1733 */
1705 nval = futex_atomic_cmpxchg_inatomic(uaddr, uval, 1734 mval = (uval & FUTEX_WAITERS) | FUTEX_OWNER_DIED;
1706 uval | FUTEX_OWNER_DIED); 1735 nval = futex_atomic_cmpxchg_inatomic(uaddr, uval, mval);
1736
1707 if (nval == -EFAULT) 1737 if (nval == -EFAULT)
1708 return -1; 1738 return -1;
1709 1739
1710 if (nval != uval) 1740 if (nval != uval)
1711 goto retry; 1741 goto retry;
1712 1742
1713 if (uval & FUTEX_WAITERS) 1743 /*
1714 futex_wake(uaddr, 1); 1744 * Wake robust non-PI futexes here. The wakeup of
1745 * PI futexes happens in exit_pi_state():
1746 */
1747 if (!pi) {
1748 if (uval & FUTEX_WAITERS)
1749 futex_wake(uaddr, 1);
1750 }
1715 } 1751 }
1716 return 0; 1752 return 0;
1717} 1753}
1718 1754
1719/* 1755/*
1756 * Fetch a robust-list pointer. Bit 0 signals PI futexes:
1757 */
1758static inline int fetch_robust_entry(struct robust_list __user **entry,
1759 struct robust_list __user **head, int *pi)
1760{
1761 unsigned long uentry;
1762
1763 if (get_user(uentry, (unsigned long *)head))
1764 return -EFAULT;
1765
1766 *entry = (void *)(uentry & ~1UL);
1767 *pi = uentry & 1;
1768
1769 return 0;
1770}
1771
1772/*
1720 * Walk curr->robust_list (very carefully, it's a userspace list!) 1773 * Walk curr->robust_list (very carefully, it's a userspace list!)
1721 * and mark any locks found there dead, and notify any waiters. 1774 * and mark any locks found there dead, and notify any waiters.
1722 * 1775 *
@@ -1726,14 +1779,14 @@ void exit_robust_list(struct task_struct *curr)
1726{ 1779{
1727 struct robust_list_head __user *head = curr->robust_list; 1780 struct robust_list_head __user *head = curr->robust_list;
1728 struct robust_list __user *entry, *pending; 1781 struct robust_list __user *entry, *pending;
1729 unsigned int limit = ROBUST_LIST_LIMIT; 1782 unsigned int limit = ROBUST_LIST_LIMIT, pi, pip;
1730 unsigned long futex_offset; 1783 unsigned long futex_offset;
1731 1784
1732 /* 1785 /*
1733 * Fetch the list head (which was registered earlier, via 1786 * Fetch the list head (which was registered earlier, via
1734 * sys_set_robust_list()): 1787 * sys_set_robust_list()):
1735 */ 1788 */
1736 if (get_user(entry, &head->list.next)) 1789 if (fetch_robust_entry(&entry, &head->list.next, &pi))
1737 return; 1790 return;
1738 /* 1791 /*
1739 * Fetch the relative futex offset: 1792 * Fetch the relative futex offset:
@@ -1744,10 +1797,11 @@ void exit_robust_list(struct task_struct *curr)
1744 * Fetch any possibly pending lock-add first, and handle it 1797 * Fetch any possibly pending lock-add first, and handle it
1745 * if it exists: 1798 * if it exists:
1746 */ 1799 */
1747 if (get_user(pending, &head->list_op_pending)) 1800 if (fetch_robust_entry(&pending, &head->list_op_pending, &pip))
1748 return; 1801 return;
1802
1749 if (pending) 1803 if (pending)
1750 handle_futex_death((void *)pending + futex_offset, curr); 1804 handle_futex_death((void *)pending + futex_offset, curr, pip);
1751 1805
1752 while (entry != &head->list) { 1806 while (entry != &head->list) {
1753 /* 1807 /*
@@ -1756,12 +1810,12 @@ void exit_robust_list(struct task_struct *curr)
1756 */ 1810 */
1757 if (entry != pending) 1811 if (entry != pending)
1758 if (handle_futex_death((void *)entry + futex_offset, 1812 if (handle_futex_death((void *)entry + futex_offset,
1759 curr)) 1813 curr, pi))
1760 return; 1814 return;
1761 /* 1815 /*
1762 * Fetch the next entry in the list: 1816 * Fetch the next entry in the list:
1763 */ 1817 */
1764 if (get_user(entry, &entry->next)) 1818 if (fetch_robust_entry(&entry, &entry->next, &pi))
1765 return; 1819 return;
1766 /* 1820 /*
1767 * Avoid excessively long or circular lists: 1821 * Avoid excessively long or circular lists:
diff --git a/kernel/futex_compat.c b/kernel/futex_compat.c
index d1d92b441fb7..c5cca3f65cb7 100644
--- a/kernel/futex_compat.c
+++ b/kernel/futex_compat.c
@@ -12,6 +12,23 @@
12 12
13#include <asm/uaccess.h> 13#include <asm/uaccess.h>
14 14
15
16/*
17 * Fetch a robust-list pointer. Bit 0 signals PI futexes:
18 */
19static inline int
20fetch_robust_entry(compat_uptr_t *uentry, struct robust_list __user **entry,
21 compat_uptr_t *head, int *pi)
22{
23 if (get_user(*uentry, head))
24 return -EFAULT;
25
26 *entry = compat_ptr((*uentry) & ~1);
27 *pi = (unsigned int)(*uentry) & 1;
28
29 return 0;
30}
31
15/* 32/*
16 * Walk curr->robust_list (very carefully, it's a userspace list!) 33 * Walk curr->robust_list (very carefully, it's a userspace list!)
17 * and mark any locks found there dead, and notify any waiters. 34 * and mark any locks found there dead, and notify any waiters.
@@ -22,17 +39,16 @@ void compat_exit_robust_list(struct task_struct *curr)
22{ 39{
23 struct compat_robust_list_head __user *head = curr->compat_robust_list; 40 struct compat_robust_list_head __user *head = curr->compat_robust_list;
24 struct robust_list __user *entry, *pending; 41 struct robust_list __user *entry, *pending;
42 unsigned int limit = ROBUST_LIST_LIMIT, pi, pip;
25 compat_uptr_t uentry, upending; 43 compat_uptr_t uentry, upending;
26 unsigned int limit = ROBUST_LIST_LIMIT;
27 compat_long_t futex_offset; 44 compat_long_t futex_offset;
28 45
29 /* 46 /*
30 * Fetch the list head (which was registered earlier, via 47 * Fetch the list head (which was registered earlier, via
31 * sys_set_robust_list()): 48 * sys_set_robust_list()):
32 */ 49 */
33 if (get_user(uentry, &head->list.next)) 50 if (fetch_robust_entry(&uentry, &entry, &head->list.next, &pi))
34 return; 51 return;
35 entry = compat_ptr(uentry);
36 /* 52 /*
37 * Fetch the relative futex offset: 53 * Fetch the relative futex offset:
38 */ 54 */
@@ -42,11 +58,11 @@ void compat_exit_robust_list(struct task_struct *curr)
42 * Fetch any possibly pending lock-add first, and handle it 58 * Fetch any possibly pending lock-add first, and handle it
43 * if it exists: 59 * if it exists:
44 */ 60 */
45 if (get_user(upending, &head->list_op_pending)) 61 if (fetch_robust_entry(&upending, &pending,
62 &head->list_op_pending, &pip))
46 return; 63 return;
47 pending = compat_ptr(upending);
48 if (upending) 64 if (upending)
49 handle_futex_death((void *)pending + futex_offset, curr); 65 handle_futex_death((void *)pending + futex_offset, curr, pip);
50 66
51 while (compat_ptr(uentry) != &head->list) { 67 while (compat_ptr(uentry) != &head->list) {
52 /* 68 /*
@@ -55,15 +71,15 @@ void compat_exit_robust_list(struct task_struct *curr)
55 */ 71 */
56 if (entry != pending) 72 if (entry != pending)
57 if (handle_futex_death((void *)entry + futex_offset, 73 if (handle_futex_death((void *)entry + futex_offset,
58 curr)) 74 curr, pi))
59 return; 75 return;
60 76
61 /* 77 /*
62 * Fetch the next entry in the list: 78 * Fetch the next entry in the list:
63 */ 79 */
64 if (get_user(uentry, (compat_uptr_t *)&entry->next)) 80 if (fetch_robust_entry(&uentry, &entry,
81 (compat_uptr_t *)&entry->next, &pi))
65 return; 82 return;
66 entry = compat_ptr(uentry);
67 /* 83 /*
68 * Avoid excessively long or circular lists: 84 * Avoid excessively long or circular lists:
69 */ 85 */
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index d17766d40dab..21c38a7e666b 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -187,7 +187,7 @@ switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_base *base)
187{ 187{
188 struct hrtimer_base *new_base; 188 struct hrtimer_base *new_base;
189 189
190 new_base = &__get_cpu_var(hrtimer_bases[base->index]); 190 new_base = &__get_cpu_var(hrtimer_bases)[base->index];
191 191
192 if (base != new_base) { 192 if (base != new_base) {
193 /* 193 /*
@@ -835,7 +835,7 @@ static void migrate_hrtimers(int cpu)
835} 835}
836#endif /* CONFIG_HOTPLUG_CPU */ 836#endif /* CONFIG_HOTPLUG_CPU */
837 837
838static int __devinit hrtimer_cpu_notify(struct notifier_block *self, 838static int __cpuinit hrtimer_cpu_notify(struct notifier_block *self,
839 unsigned long action, void *hcpu) 839 unsigned long action, void *hcpu)
840{ 840{
841 long cpu = (long)hcpu; 841 long cpu = (long)hcpu;
@@ -859,7 +859,7 @@ static int __devinit hrtimer_cpu_notify(struct notifier_block *self,
859 return NOTIFY_OK; 859 return NOTIFY_OK;
860} 860}
861 861
862static struct notifier_block __devinitdata hrtimers_nb = { 862static struct notifier_block __cpuinitdata hrtimers_nb = {
863 .notifier_call = hrtimer_cpu_notify, 863 .notifier_call = hrtimer_cpu_notify,
864}; 864};
865 865
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 4e461438e48b..92be519eff26 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -137,16 +137,40 @@ EXPORT_SYMBOL(enable_irq);
137 * @irq: interrupt to control 137 * @irq: interrupt to control
138 * @on: enable/disable power management wakeup 138 * @on: enable/disable power management wakeup
139 * 139 *
140 * Enable/disable power management wakeup mode 140 * Enable/disable power management wakeup mode, which is
141 * disabled by default. Enables and disables must match,
142 * just as they match for non-wakeup mode support.
143 *
144 * Wakeup mode lets this IRQ wake the system from sleep
145 * states like "suspend to RAM".
141 */ 146 */
142int set_irq_wake(unsigned int irq, unsigned int on) 147int set_irq_wake(unsigned int irq, unsigned int on)
143{ 148{
144 struct irq_desc *desc = irq_desc + irq; 149 struct irq_desc *desc = irq_desc + irq;
145 unsigned long flags; 150 unsigned long flags;
146 int ret = -ENXIO; 151 int ret = -ENXIO;
152 int (*set_wake)(unsigned, unsigned) = desc->chip->set_wake;
147 153
154 /* wakeup-capable irqs can be shared between drivers that
155 * don't need to have the same sleep mode behaviors.
156 */
148 spin_lock_irqsave(&desc->lock, flags); 157 spin_lock_irqsave(&desc->lock, flags);
149 if (desc->chip->set_wake) 158 if (on) {
159 if (desc->wake_depth++ == 0)
160 desc->status |= IRQ_WAKEUP;
161 else
162 set_wake = NULL;
163 } else {
164 if (desc->wake_depth == 0) {
165 printk(KERN_WARNING "Unbalanced IRQ %d "
166 "wake disable\n", irq);
167 WARN_ON(1);
168 } else if (--desc->wake_depth == 0)
169 desc->status &= ~IRQ_WAKEUP;
170 else
171 set_wake = NULL;
172 }
173 if (set_wake)
150 ret = desc->chip->set_wake(irq, on); 174 ret = desc->chip->set_wake(irq, on);
151 spin_unlock_irqrestore(&desc->lock, flags); 175 spin_unlock_irqrestore(&desc->lock, flags);
152 return ret; 176 return ret;
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 64aab081153b..3f57dfdc8f92 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -393,6 +393,7 @@ static int __kprobes add_new_kprobe(struct kprobe *old_p, struct kprobe *p)
393static inline void add_aggr_kprobe(struct kprobe *ap, struct kprobe *p) 393static inline void add_aggr_kprobe(struct kprobe *ap, struct kprobe *p)
394{ 394{
395 copy_kprobe(p, ap); 395 copy_kprobe(p, ap);
396 flush_insn_slot(ap);
396 ap->addr = p->addr; 397 ap->addr = p->addr;
397 ap->pre_handler = aggr_pre_handler; 398 ap->pre_handler = aggr_pre_handler;
398 ap->fault_handler = aggr_fault_handler; 399 ap->fault_handler = aggr_fault_handler;
diff --git a/kernel/panic.c b/kernel/panic.c
index d8a0bca21233..9b8dcfd1ca93 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -18,6 +18,7 @@
18#include <linux/interrupt.h> 18#include <linux/interrupt.h>
19#include <linux/nmi.h> 19#include <linux/nmi.h>
20#include <linux/kexec.h> 20#include <linux/kexec.h>
21#include <linux/debug_locks.h>
21 22
22int panic_on_oops; 23int panic_on_oops;
23int tainted; 24int tainted;
diff --git a/kernel/power/process.c b/kernel/power/process.c
index b2a5f671d6cd..72e72d2c61e6 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -66,13 +66,25 @@ static inline void freeze_process(struct task_struct *p)
66 } 66 }
67} 67}
68 68
69static void cancel_freezing(struct task_struct *p)
70{
71 unsigned long flags;
72
73 if (freezing(p)) {
74 pr_debug(" clean up: %s\n", p->comm);
75 do_not_freeze(p);
76 spin_lock_irqsave(&p->sighand->siglock, flags);
77 recalc_sigpending_tsk(p);
78 spin_unlock_irqrestore(&p->sighand->siglock, flags);
79 }
80}
81
69/* 0 = success, else # of processes that we failed to stop */ 82/* 0 = success, else # of processes that we failed to stop */
70int freeze_processes(void) 83int freeze_processes(void)
71{ 84{
72 int todo, nr_user, user_frozen; 85 int todo, nr_user, user_frozen;
73 unsigned long start_time; 86 unsigned long start_time;
74 struct task_struct *g, *p; 87 struct task_struct *g, *p;
75 unsigned long flags;
76 88
77 printk( "Stopping tasks: " ); 89 printk( "Stopping tasks: " );
78 start_time = jiffies; 90 start_time = jiffies;
@@ -85,6 +97,10 @@ int freeze_processes(void)
85 continue; 97 continue;
86 if (frozen(p)) 98 if (frozen(p))
87 continue; 99 continue;
100 if (p->state == TASK_TRACED && frozen(p->parent)) {
101 cancel_freezing(p);
102 continue;
103 }
88 if (p->mm && !(p->flags & PF_BORROWED_MM)) { 104 if (p->mm && !(p->flags & PF_BORROWED_MM)) {
89 /* The task is a user-space one. 105 /* The task is a user-space one.
90 * Freeze it unless there's a vfork completion 106 * Freeze it unless there's a vfork completion
@@ -126,13 +142,7 @@ int freeze_processes(void)
126 do_each_thread(g, p) { 142 do_each_thread(g, p) {
127 if (freezeable(p) && !frozen(p)) 143 if (freezeable(p) && !frozen(p))
128 printk(KERN_ERR " %s\n", p->comm); 144 printk(KERN_ERR " %s\n", p->comm);
129 if (freezing(p)) { 145 cancel_freezing(p);
130 pr_debug(" clean up: %s\n", p->comm);
131 p->flags &= ~PF_FREEZE;
132 spin_lock_irqsave(&p->sighand->siglock, flags);
133 recalc_sigpending_tsk(p);
134 spin_unlock_irqrestore(&p->sighand->siglock, flags);
135 }
136 } while_each_thread(g, p); 146 } while_each_thread(g, p);
137 read_unlock(&tasklist_lock); 147 read_unlock(&tasklist_lock);
138 return todo; 148 return todo;
diff --git a/kernel/printk.c b/kernel/printk.c
index 65ca0688f86f..1149365e989e 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -799,6 +799,9 @@ void release_console_sem(void)
799 up(&secondary_console_sem); 799 up(&secondary_console_sem);
800 return; 800 return;
801 } 801 }
802
803 console_may_schedule = 0;
804
802 for ( ; ; ) { 805 for ( ; ; ) {
803 spin_lock_irqsave(&logbuf_lock, flags); 806 spin_lock_irqsave(&logbuf_lock, flags);
804 wake_klogd |= log_start - log_end; 807 wake_klogd |= log_start - log_end;
@@ -812,7 +815,6 @@ void release_console_sem(void)
812 local_irq_restore(flags); 815 local_irq_restore(flags);
813 } 816 }
814 console_locked = 0; 817 console_locked = 0;
815 console_may_schedule = 0;
816 up(&console_sem); 818 up(&console_sem);
817 spin_unlock_irqrestore(&logbuf_lock, flags); 819 spin_unlock_irqrestore(&logbuf_lock, flags);
818 if (wake_klogd && !oops_in_progress && waitqueue_active(&log_wait)) { 820 if (wake_klogd && !oops_in_progress && waitqueue_active(&log_wait)) {
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index 759805c9859a..436ab35f6fa7 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -548,7 +548,7 @@ static void __devinit rcu_online_cpu(int cpu)
548 tasklet_init(&per_cpu(rcu_tasklet, cpu), rcu_process_callbacks, 0UL); 548 tasklet_init(&per_cpu(rcu_tasklet, cpu), rcu_process_callbacks, 0UL);
549} 549}
550 550
551static int __devinit rcu_cpu_notify(struct notifier_block *self, 551static int __cpuinit rcu_cpu_notify(struct notifier_block *self,
552 unsigned long action, void *hcpu) 552 unsigned long action, void *hcpu)
553{ 553{
554 long cpu = (long)hcpu; 554 long cpu = (long)hcpu;
@@ -565,7 +565,7 @@ static int __devinit rcu_cpu_notify(struct notifier_block *self,
565 return NOTIFY_OK; 565 return NOTIFY_OK;
566} 566}
567 567
568static struct notifier_block __devinitdata rcu_nb = { 568static struct notifier_block __cpuinitdata rcu_nb = {
569 .notifier_call = rcu_cpu_notify, 569 .notifier_call = rcu_cpu_notify,
570}; 570};
571 571
diff --git a/kernel/resource.c b/kernel/resource.c
index 0dd3a857579e..46286434af80 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -244,6 +244,7 @@ int find_next_system_ram(struct resource *res)
244 244
245 start = res->start; 245 start = res->start;
246 end = res->end; 246 end = res->end;
247 BUG_ON(start >= end);
247 248
248 read_lock(&resource_lock); 249 read_lock(&resource_lock);
249 for (p = iomem_resource.child; p ; p = p->sibling) { 250 for (p = iomem_resource.child; p ; p = p->sibling) {
@@ -254,15 +255,17 @@ int find_next_system_ram(struct resource *res)
254 p = NULL; 255 p = NULL;
255 break; 256 break;
256 } 257 }
257 if (p->start >= start) 258 if ((p->end >= start) && (p->start < end))
258 break; 259 break;
259 } 260 }
260 read_unlock(&resource_lock); 261 read_unlock(&resource_lock);
261 if (!p) 262 if (!p)
262 return -1; 263 return -1;
263 /* copy data */ 264 /* copy data */
264 res->start = p->start; 265 if (res->start < p->start)
265 res->end = p->end; 266 res->start = p->start;
267 if (res->end > p->end)
268 res->end = p->end;
266 return 0; 269 return 0;
267} 270}
268#endif 271#endif
diff --git a/kernel/rtmutex.c b/kernel/rtmutex.c
index d2ef13b485e7..3e13a1e5856f 100644
--- a/kernel/rtmutex.c
+++ b/kernel/rtmutex.c
@@ -7,6 +7,8 @@
7 * Copyright (C) 2005-2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com> 7 * Copyright (C) 2005-2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com>
8 * Copyright (C) 2005 Kihon Technologies Inc., Steven Rostedt 8 * Copyright (C) 2005 Kihon Technologies Inc., Steven Rostedt
9 * Copyright (C) 2006 Esben Nielsen 9 * Copyright (C) 2006 Esben Nielsen
10 *
11 * See Documentation/rt-mutex-design.txt for details.
10 */ 12 */
11#include <linux/spinlock.h> 13#include <linux/spinlock.h>
12#include <linux/module.h> 14#include <linux/module.h>
diff --git a/kernel/sched.c b/kernel/sched.c
index b44b9a43b0fc..a234fbee1238 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4162,10 +4162,8 @@ do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param)
4162 read_unlock_irq(&tasklist_lock); 4162 read_unlock_irq(&tasklist_lock);
4163 return -ESRCH; 4163 return -ESRCH;
4164 } 4164 }
4165 get_task_struct(p);
4166 read_unlock_irq(&tasklist_lock);
4167 retval = sched_setscheduler(p, policy, &lparam); 4165 retval = sched_setscheduler(p, policy, &lparam);
4168 put_task_struct(p); 4166 read_unlock_irq(&tasklist_lock);
4169 4167
4170 return retval; 4168 return retval;
4171} 4169}
@@ -4456,9 +4454,9 @@ asmlinkage long sys_sched_yield(void)
4456 return 0; 4454 return 0;
4457} 4455}
4458 4456
4459static inline int __resched_legal(void) 4457static inline int __resched_legal(int expected_preempt_count)
4460{ 4458{
4461 if (unlikely(preempt_count())) 4459 if (unlikely(preempt_count() != expected_preempt_count))
4462 return 0; 4460 return 0;
4463 if (unlikely(system_state != SYSTEM_RUNNING)) 4461 if (unlikely(system_state != SYSTEM_RUNNING))
4464 return 0; 4462 return 0;
@@ -4484,7 +4482,7 @@ static void __cond_resched(void)
4484 4482
4485int __sched cond_resched(void) 4483int __sched cond_resched(void)
4486{ 4484{
4487 if (need_resched() && __resched_legal()) { 4485 if (need_resched() && __resched_legal(0)) {
4488 __cond_resched(); 4486 __cond_resched();
4489 return 1; 4487 return 1;
4490 } 4488 }
@@ -4510,7 +4508,7 @@ int cond_resched_lock(spinlock_t *lock)
4510 ret = 1; 4508 ret = 1;
4511 spin_lock(lock); 4509 spin_lock(lock);
4512 } 4510 }
4513 if (need_resched() && __resched_legal()) { 4511 if (need_resched() && __resched_legal(1)) {
4514 spin_release(&lock->dep_map, 1, _THIS_IP_); 4512 spin_release(&lock->dep_map, 1, _THIS_IP_);
4515 _raw_spin_unlock(lock); 4513 _raw_spin_unlock(lock);
4516 preempt_enable_no_resched(); 4514 preempt_enable_no_resched();
@@ -4526,7 +4524,7 @@ int __sched cond_resched_softirq(void)
4526{ 4524{
4527 BUG_ON(!in_softirq()); 4525 BUG_ON(!in_softirq());
4528 4526
4529 if (need_resched() && __resched_legal()) { 4527 if (need_resched() && __resched_legal(0)) {
4530 raw_local_irq_disable(); 4528 raw_local_irq_disable();
4531 _local_bh_enable(); 4529 _local_bh_enable();
4532 raw_local_irq_enable(); 4530 raw_local_irq_enable();
@@ -6494,7 +6492,12 @@ static int build_sched_domains(const cpumask_t *cpu_map)
6494 for (i = 0; i < MAX_NUMNODES; i++) 6492 for (i = 0; i < MAX_NUMNODES; i++)
6495 init_numa_sched_groups_power(sched_group_nodes[i]); 6493 init_numa_sched_groups_power(sched_group_nodes[i]);
6496 6494
6497 init_numa_sched_groups_power(sched_group_allnodes); 6495 if (sched_group_allnodes) {
6496 int group = cpu_to_allnodes_group(first_cpu(*cpu_map));
6497 struct sched_group *sg = &sched_group_allnodes[group];
6498
6499 init_numa_sched_groups_power(sg);
6500 }
6498#endif 6501#endif
6499 6502
6500 /* Attach the domains */ 6503 /* Attach the domains */
@@ -6761,6 +6764,11 @@ void __init sched_init(void)
6761 } 6764 }
6762 6765
6763 set_load_weight(&init_task); 6766 set_load_weight(&init_task);
6767
6768#ifdef CONFIG_RT_MUTEXES
6769 plist_head_init(&init_task.pi_waiters, &init_task.pi_lock);
6770#endif
6771
6764 /* 6772 /*
6765 * The boot idle thread does lazy MMU switching as well: 6773 * The boot idle thread does lazy MMU switching as well:
6766 */ 6774 */
diff --git a/kernel/signal.c b/kernel/signal.c
index 7fe874d12fae..bfdb5686fa3e 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -791,22 +791,31 @@ out:
791/* 791/*
792 * Force a signal that the process can't ignore: if necessary 792 * Force a signal that the process can't ignore: if necessary
793 * we unblock the signal and change any SIG_IGN to SIG_DFL. 793 * we unblock the signal and change any SIG_IGN to SIG_DFL.
794 *
795 * Note: If we unblock the signal, we always reset it to SIG_DFL,
796 * since we do not want to have a signal handler that was blocked
797 * be invoked when user space had explicitly blocked it.
798 *
799 * We don't want to have recursive SIGSEGV's etc, for example.
794 */ 800 */
795
796int 801int
797force_sig_info(int sig, struct siginfo *info, struct task_struct *t) 802force_sig_info(int sig, struct siginfo *info, struct task_struct *t)
798{ 803{
799 unsigned long int flags; 804 unsigned long int flags;
800 int ret; 805 int ret, blocked, ignored;
806 struct k_sigaction *action;
801 807
802 spin_lock_irqsave(&t->sighand->siglock, flags); 808 spin_lock_irqsave(&t->sighand->siglock, flags);
803 if (t->sighand->action[sig-1].sa.sa_handler == SIG_IGN) { 809 action = &t->sighand->action[sig-1];
804 t->sighand->action[sig-1].sa.sa_handler = SIG_DFL; 810 ignored = action->sa.sa_handler == SIG_IGN;
805 } 811 blocked = sigismember(&t->blocked, sig);
806 if (sigismember(&t->blocked, sig)) { 812 if (blocked || ignored) {
807 sigdelset(&t->blocked, sig); 813 action->sa.sa_handler = SIG_DFL;
814 if (blocked) {
815 sigdelset(&t->blocked, sig);
816 recalc_sigpending_tsk(t);
817 }
808 } 818 }
809 recalc_sigpending_tsk(t);
810 ret = specific_send_sig_info(sig, info, t); 819 ret = specific_send_sig_info(sig, info, t);
811 spin_unlock_irqrestore(&t->sighand->siglock, flags); 820 spin_unlock_irqrestore(&t->sighand->siglock, flags);
812 821
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 0f08a84ae307..3789ca98197c 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -65,6 +65,7 @@ static inline void wakeup_softirqd(void)
65 * This one is for softirq.c-internal use, 65 * This one is for softirq.c-internal use,
66 * where hardirqs are disabled legitimately: 66 * where hardirqs are disabled legitimately:
67 */ 67 */
68#ifdef CONFIG_TRACE_IRQFLAGS
68static void __local_bh_disable(unsigned long ip) 69static void __local_bh_disable(unsigned long ip)
69{ 70{
70 unsigned long flags; 71 unsigned long flags;
@@ -80,6 +81,13 @@ static void __local_bh_disable(unsigned long ip)
80 trace_softirqs_off(ip); 81 trace_softirqs_off(ip);
81 raw_local_irq_restore(flags); 82 raw_local_irq_restore(flags);
82} 83}
84#else /* !CONFIG_TRACE_IRQFLAGS */
85static inline void __local_bh_disable(unsigned long ip)
86{
87 add_preempt_count(SOFTIRQ_OFFSET);
88 barrier();
89}
90#endif /* CONFIG_TRACE_IRQFLAGS */
83 91
84void local_bh_disable(void) 92void local_bh_disable(void)
85{ 93{
@@ -121,12 +129,16 @@ EXPORT_SYMBOL(_local_bh_enable);
121 129
122void local_bh_enable(void) 130void local_bh_enable(void)
123{ 131{
132#ifdef CONFIG_TRACE_IRQFLAGS
124 unsigned long flags; 133 unsigned long flags;
125 134
126 WARN_ON_ONCE(in_irq()); 135 WARN_ON_ONCE(in_irq());
136#endif
127 WARN_ON_ONCE(irqs_disabled()); 137 WARN_ON_ONCE(irqs_disabled());
128 138
139#ifdef CONFIG_TRACE_IRQFLAGS
129 local_irq_save(flags); 140 local_irq_save(flags);
141#endif
130 /* 142 /*
131 * Are softirqs going to be turned on now: 143 * Are softirqs going to be turned on now:
132 */ 144 */
@@ -142,18 +154,22 @@ void local_bh_enable(void)
142 do_softirq(); 154 do_softirq();
143 155
144 dec_preempt_count(); 156 dec_preempt_count();
157#ifdef CONFIG_TRACE_IRQFLAGS
145 local_irq_restore(flags); 158 local_irq_restore(flags);
159#endif
146 preempt_check_resched(); 160 preempt_check_resched();
147} 161}
148EXPORT_SYMBOL(local_bh_enable); 162EXPORT_SYMBOL(local_bh_enable);
149 163
150void local_bh_enable_ip(unsigned long ip) 164void local_bh_enable_ip(unsigned long ip)
151{ 165{
166#ifdef CONFIG_TRACE_IRQFLAGS
152 unsigned long flags; 167 unsigned long flags;
153 168
154 WARN_ON_ONCE(in_irq()); 169 WARN_ON_ONCE(in_irq());
155 170
156 local_irq_save(flags); 171 local_irq_save(flags);
172#endif
157 /* 173 /*
158 * Are softirqs going to be turned on now: 174 * Are softirqs going to be turned on now:
159 */ 175 */
@@ -169,7 +185,9 @@ void local_bh_enable_ip(unsigned long ip)
169 do_softirq(); 185 do_softirq();
170 186
171 dec_preempt_count(); 187 dec_preempt_count();
188#ifdef CONFIG_TRACE_IRQFLAGS
172 local_irq_restore(flags); 189 local_irq_restore(flags);
190#endif
173 preempt_check_resched(); 191 preempt_check_resched();
174} 192}
175EXPORT_SYMBOL(local_bh_enable_ip); 193EXPORT_SYMBOL(local_bh_enable_ip);
@@ -547,7 +565,7 @@ static void takeover_tasklets(unsigned int cpu)
547} 565}
548#endif /* CONFIG_HOTPLUG_CPU */ 566#endif /* CONFIG_HOTPLUG_CPU */
549 567
550static int __devinit cpu_callback(struct notifier_block *nfb, 568static int __cpuinit cpu_callback(struct notifier_block *nfb,
551 unsigned long action, 569 unsigned long action,
552 void *hcpu) 570 void *hcpu)
553{ 571{
@@ -587,7 +605,7 @@ static int __devinit cpu_callback(struct notifier_block *nfb,
587 return NOTIFY_OK; 605 return NOTIFY_OK;
588} 606}
589 607
590static struct notifier_block __devinitdata cpu_nfb = { 608static struct notifier_block __cpuinitdata cpu_nfb = {
591 .notifier_call = cpu_callback 609 .notifier_call = cpu_callback
592}; 610};
593 611
diff --git a/kernel/softlockup.c b/kernel/softlockup.c
index 6b76caa22981..03e6a2b0b787 100644
--- a/kernel/softlockup.c
+++ b/kernel/softlockup.c
@@ -104,7 +104,7 @@ static int watchdog(void * __bind_cpu)
104/* 104/*
105 * Create/destroy watchdog threads as CPUs come and go: 105 * Create/destroy watchdog threads as CPUs come and go:
106 */ 106 */
107static int __devinit 107static int __cpuinit
108cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) 108cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
109{ 109{
110 int hotcpu = (unsigned long)hcpu; 110 int hotcpu = (unsigned long)hcpu;
@@ -142,7 +142,7 @@ cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
142 return NOTIFY_OK; 142 return NOTIFY_OK;
143} 143}
144 144
145static struct notifier_block __devinitdata cpu_nfb = { 145static struct notifier_block __cpuinitdata cpu_nfb = {
146 .notifier_call = cpu_callback 146 .notifier_call = cpu_callback
147}; 147};
148 148
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index dcfb5d731466..51cacd111dbd 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -111,7 +111,6 @@ static int stop_machine(void)
111 /* If some failed, kill them all. */ 111 /* If some failed, kill them all. */
112 if (ret < 0) { 112 if (ret < 0) {
113 stopmachine_set_state(STOPMACHINE_EXIT); 113 stopmachine_set_state(STOPMACHINE_EXIT);
114 up(&stopmachine_mutex);
115 return ret; 114 return ret;
116 } 115 }
117 116
diff --git a/kernel/taskstats.c b/kernel/taskstats.c
index f45179ce028e..e78187657330 100644
--- a/kernel/taskstats.c
+++ b/kernel/taskstats.c
@@ -121,46 +121,45 @@ static int send_reply(struct sk_buff *skb, pid_t pid)
121/* 121/*
122 * Send taskstats data in @skb to listeners registered for @cpu's exit data 122 * Send taskstats data in @skb to listeners registered for @cpu's exit data
123 */ 123 */
124static int send_cpu_listeners(struct sk_buff *skb, unsigned int cpu) 124static void send_cpu_listeners(struct sk_buff *skb, unsigned int cpu)
125{ 125{
126 struct genlmsghdr *genlhdr = nlmsg_data((struct nlmsghdr *)skb->data); 126 struct genlmsghdr *genlhdr = nlmsg_data((struct nlmsghdr *)skb->data);
127 struct listener_list *listeners; 127 struct listener_list *listeners;
128 struct listener *s, *tmp; 128 struct listener *s, *tmp;
129 struct sk_buff *skb_next, *skb_cur = skb; 129 struct sk_buff *skb_next, *skb_cur = skb;
130 void *reply = genlmsg_data(genlhdr); 130 void *reply = genlmsg_data(genlhdr);
131 int rc, ret, delcount = 0; 131 int rc, delcount = 0;
132 132
133 rc = genlmsg_end(skb, reply); 133 rc = genlmsg_end(skb, reply);
134 if (rc < 0) { 134 if (rc < 0) {
135 nlmsg_free(skb); 135 nlmsg_free(skb);
136 return rc; 136 return;
137 } 137 }
138 138
139 rc = 0; 139 rc = 0;
140 listeners = &per_cpu(listener_array, cpu); 140 listeners = &per_cpu(listener_array, cpu);
141 down_read(&listeners->sem); 141 down_read(&listeners->sem);
142 list_for_each_entry_safe(s, tmp, &listeners->list, list) { 142 list_for_each_entry(s, &listeners->list, list) {
143 skb_next = NULL; 143 skb_next = NULL;
144 if (!list_is_last(&s->list, &listeners->list)) { 144 if (!list_is_last(&s->list, &listeners->list)) {
145 skb_next = skb_clone(skb_cur, GFP_KERNEL); 145 skb_next = skb_clone(skb_cur, GFP_KERNEL);
146 if (!skb_next) { 146 if (!skb_next)
147 nlmsg_free(skb_cur);
148 rc = -ENOMEM;
149 break; 147 break;
150 }
151 } 148 }
152 ret = genlmsg_unicast(skb_cur, s->pid); 149 rc = genlmsg_unicast(skb_cur, s->pid);
153 if (ret == -ECONNREFUSED) { 150 if (rc == -ECONNREFUSED) {
154 s->valid = 0; 151 s->valid = 0;
155 delcount++; 152 delcount++;
156 rc = ret;
157 } 153 }
158 skb_cur = skb_next; 154 skb_cur = skb_next;
159 } 155 }
160 up_read(&listeners->sem); 156 up_read(&listeners->sem);
161 157
158 if (skb_cur)
159 nlmsg_free(skb_cur);
160
162 if (!delcount) 161 if (!delcount)
163 return rc; 162 return;
164 163
165 /* Delete invalidated entries */ 164 /* Delete invalidated entries */
166 down_write(&listeners->sem); 165 down_write(&listeners->sem);
@@ -171,13 +170,12 @@ static int send_cpu_listeners(struct sk_buff *skb, unsigned int cpu)
171 } 170 }
172 } 171 }
173 up_write(&listeners->sem); 172 up_write(&listeners->sem);
174 return rc;
175} 173}
176 174
177static int fill_pid(pid_t pid, struct task_struct *pidtsk, 175static int fill_pid(pid_t pid, struct task_struct *pidtsk,
178 struct taskstats *stats) 176 struct taskstats *stats)
179{ 177{
180 int rc; 178 int rc = 0;
181 struct task_struct *tsk = pidtsk; 179 struct task_struct *tsk = pidtsk;
182 180
183 if (!pidtsk) { 181 if (!pidtsk) {
@@ -196,12 +194,10 @@ static int fill_pid(pid_t pid, struct task_struct *pidtsk,
196 * Each accounting subsystem adds calls to its functions to 194 * Each accounting subsystem adds calls to its functions to
197 * fill in relevant parts of struct taskstsats as follows 195 * fill in relevant parts of struct taskstsats as follows
198 * 196 *
199 * rc = per-task-foo(stats, tsk); 197 * per-task-foo(stats, tsk);
200 * if (rc)
201 * goto err;
202 */ 198 */
203 199
204 rc = delayacct_add_tsk(stats, tsk); 200 delayacct_add_tsk(stats, tsk);
205 stats->version = TASKSTATS_VERSION; 201 stats->version = TASKSTATS_VERSION;
206 202
207 /* Define err: label here if needed */ 203 /* Define err: label here if needed */
diff --git a/kernel/timer.c b/kernel/timer.c
index 05809c2e2fd6..1d7dd6267c2d 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -84,7 +84,7 @@ typedef struct tvec_t_base_s tvec_base_t;
84 84
85tvec_base_t boot_tvec_bases; 85tvec_base_t boot_tvec_bases;
86EXPORT_SYMBOL(boot_tvec_bases); 86EXPORT_SYMBOL(boot_tvec_bases);
87static DEFINE_PER_CPU(tvec_base_t *, tvec_bases) = { &boot_tvec_bases }; 87static DEFINE_PER_CPU(tvec_base_t *, tvec_bases) = &boot_tvec_bases;
88 88
89static inline void set_running_timer(tvec_base_t *base, 89static inline void set_running_timer(tvec_base_t *base,
90 struct timer_list *timer) 90 struct timer_list *timer)
@@ -408,7 +408,7 @@ static int cascade(tvec_base_t *base, tvec_t *tv, int index)
408 * This function cascades all vectors and executes all expired timer 408 * This function cascades all vectors and executes all expired timer
409 * vectors. 409 * vectors.
410 */ 410 */
411#define INDEX(N) (base->timer_jiffies >> (TVR_BITS + N * TVN_BITS)) & TVN_MASK 411#define INDEX(N) ((base->timer_jiffies >> (TVR_BITS + (N) * TVN_BITS)) & TVN_MASK)
412 412
413static inline void __run_timers(tvec_base_t *base) 413static inline void __run_timers(tvec_base_t *base)
414{ 414{
@@ -1324,46 +1324,19 @@ asmlinkage long sys_getpid(void)
1324} 1324}
1325 1325
1326/* 1326/*
1327 * Accessing ->group_leader->real_parent is not SMP-safe, it could 1327 * Accessing ->real_parent is not SMP-safe, it could
1328 * change from under us. However, rather than getting any lock 1328 * change from under us. However, we can use a stale
1329 * we can use an optimistic algorithm: get the parent 1329 * value of ->real_parent under rcu_read_lock(), see
1330 * pid, and go back and check that the parent is still 1330 * release_task()->call_rcu(delayed_put_task_struct).
1331 * the same. If it has changed (which is extremely unlikely
1332 * indeed), we just try again..
1333 *
1334 * NOTE! This depends on the fact that even if we _do_
1335 * get an old value of "parent", we can happily dereference
1336 * the pointer (it was and remains a dereferencable kernel pointer
1337 * no matter what): we just can't necessarily trust the result
1338 * until we know that the parent pointer is valid.
1339 *
1340 * NOTE2: ->group_leader never changes from under us.
1341 */ 1331 */
1342asmlinkage long sys_getppid(void) 1332asmlinkage long sys_getppid(void)
1343{ 1333{
1344 int pid; 1334 int pid;
1345 struct task_struct *me = current;
1346 struct task_struct *parent;
1347 1335
1348 parent = me->group_leader->real_parent; 1336 rcu_read_lock();
1349 for (;;) { 1337 pid = rcu_dereference(current->real_parent)->tgid;
1350 pid = parent->tgid; 1338 rcu_read_unlock();
1351#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT)
1352{
1353 struct task_struct *old = parent;
1354 1339
1355 /*
1356 * Make sure we read the pid before re-reading the
1357 * parent pointer:
1358 */
1359 smp_rmb();
1360 parent = me->group_leader->real_parent;
1361 if (old != parent)
1362 continue;
1363}
1364#endif
1365 break;
1366 }
1367 return pid; 1340 return pid;
1368} 1341}
1369 1342
@@ -1688,7 +1661,7 @@ static void __devinit migrate_timers(int cpu)
1688} 1661}
1689#endif /* CONFIG_HOTPLUG_CPU */ 1662#endif /* CONFIG_HOTPLUG_CPU */
1690 1663
1691static int __devinit timer_cpu_notify(struct notifier_block *self, 1664static int __cpuinit timer_cpu_notify(struct notifier_block *self,
1692 unsigned long action, void *hcpu) 1665 unsigned long action, void *hcpu)
1693{ 1666{
1694 long cpu = (long)hcpu; 1667 long cpu = (long)hcpu;
@@ -1708,7 +1681,7 @@ static int __devinit timer_cpu_notify(struct notifier_block *self,
1708 return NOTIFY_OK; 1681 return NOTIFY_OK;
1709} 1682}
1710 1683
1711static struct notifier_block __devinitdata timers_nb = { 1684static struct notifier_block __cpuinitdata timers_nb = {
1712 .notifier_call = timer_cpu_notify, 1685 .notifier_call = timer_cpu_notify,
1713}; 1686};
1714 1687
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index eebb1d839235..835fe28b87a8 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -68,7 +68,7 @@ struct workqueue_struct {
68 68
69/* All the per-cpu workqueues on the system, for hotplug cpu to add/remove 69/* All the per-cpu workqueues on the system, for hotplug cpu to add/remove
70 threads to each one as cpus come/go. */ 70 threads to each one as cpus come/go. */
71static DEFINE_SPINLOCK(workqueue_lock); 71static DEFINE_MUTEX(workqueue_mutex);
72static LIST_HEAD(workqueues); 72static LIST_HEAD(workqueues);
73 73
74static int singlethread_cpu; 74static int singlethread_cpu;
@@ -93,9 +93,12 @@ static void __queue_work(struct cpu_workqueue_struct *cwq,
93 spin_unlock_irqrestore(&cwq->lock, flags); 93 spin_unlock_irqrestore(&cwq->lock, flags);
94} 94}
95 95
96/* 96/**
97 * Queue work on a workqueue. Return non-zero if it was successfully 97 * queue_work - queue work on a workqueue
98 * added. 98 * @wq: workqueue to use
99 * @work: work to queue
100 *
101 * Returns non-zero if it was successfully added.
99 * 102 *
100 * We queue the work to the CPU it was submitted, but there is no 103 * We queue the work to the CPU it was submitted, but there is no
101 * guarantee that it will be processed by that CPU. 104 * guarantee that it will be processed by that CPU.
@@ -128,6 +131,14 @@ static void delayed_work_timer_fn(unsigned long __data)
128 __queue_work(per_cpu_ptr(wq->cpu_wq, cpu), work); 131 __queue_work(per_cpu_ptr(wq->cpu_wq, cpu), work);
129} 132}
130 133
134/**
135 * queue_delayed_work - queue work on a workqueue after delay
136 * @wq: workqueue to use
137 * @work: work to queue
138 * @delay: number of jiffies to wait before queueing
139 *
140 * Returns non-zero if it was successfully added.
141 */
131int fastcall queue_delayed_work(struct workqueue_struct *wq, 142int fastcall queue_delayed_work(struct workqueue_struct *wq,
132 struct work_struct *work, unsigned long delay) 143 struct work_struct *work, unsigned long delay)
133{ 144{
@@ -150,6 +161,15 @@ int fastcall queue_delayed_work(struct workqueue_struct *wq,
150} 161}
151EXPORT_SYMBOL_GPL(queue_delayed_work); 162EXPORT_SYMBOL_GPL(queue_delayed_work);
152 163
164/**
165 * queue_delayed_work_on - queue work on specific CPU after delay
166 * @cpu: CPU number to execute work on
167 * @wq: workqueue to use
168 * @work: work to queue
169 * @delay: number of jiffies to wait before queueing
170 *
171 * Returns non-zero if it was successfully added.
172 */
153int queue_delayed_work_on(int cpu, struct workqueue_struct *wq, 173int queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
154 struct work_struct *work, unsigned long delay) 174 struct work_struct *work, unsigned long delay)
155{ 175{
@@ -275,8 +295,9 @@ static void flush_cpu_workqueue(struct cpu_workqueue_struct *cwq)
275 } 295 }
276} 296}
277 297
278/* 298/**
279 * flush_workqueue - ensure that any scheduled work has run to completion. 299 * flush_workqueue - ensure that any scheduled work has run to completion.
300 * @wq: workqueue to flush
280 * 301 *
281 * Forces execution of the workqueue and blocks until its completion. 302 * Forces execution of the workqueue and blocks until its completion.
282 * This is typically used in driver shutdown handlers. 303 * This is typically used in driver shutdown handlers.
@@ -299,10 +320,10 @@ void fastcall flush_workqueue(struct workqueue_struct *wq)
299 } else { 320 } else {
300 int cpu; 321 int cpu;
301 322
302 lock_cpu_hotplug(); 323 mutex_lock(&workqueue_mutex);
303 for_each_online_cpu(cpu) 324 for_each_online_cpu(cpu)
304 flush_cpu_workqueue(per_cpu_ptr(wq->cpu_wq, cpu)); 325 flush_cpu_workqueue(per_cpu_ptr(wq->cpu_wq, cpu));
305 unlock_cpu_hotplug(); 326 mutex_unlock(&workqueue_mutex);
306 } 327 }
307} 328}
308EXPORT_SYMBOL_GPL(flush_workqueue); 329EXPORT_SYMBOL_GPL(flush_workqueue);
@@ -350,8 +371,7 @@ struct workqueue_struct *__create_workqueue(const char *name,
350 } 371 }
351 372
352 wq->name = name; 373 wq->name = name;
353 /* We don't need the distraction of CPUs appearing and vanishing. */ 374 mutex_lock(&workqueue_mutex);
354 lock_cpu_hotplug();
355 if (singlethread) { 375 if (singlethread) {
356 INIT_LIST_HEAD(&wq->list); 376 INIT_LIST_HEAD(&wq->list);
357 p = create_workqueue_thread(wq, singlethread_cpu); 377 p = create_workqueue_thread(wq, singlethread_cpu);
@@ -360,9 +380,7 @@ struct workqueue_struct *__create_workqueue(const char *name,
360 else 380 else
361 wake_up_process(p); 381 wake_up_process(p);
362 } else { 382 } else {
363 spin_lock(&workqueue_lock);
364 list_add(&wq->list, &workqueues); 383 list_add(&wq->list, &workqueues);
365 spin_unlock(&workqueue_lock);
366 for_each_online_cpu(cpu) { 384 for_each_online_cpu(cpu) {
367 p = create_workqueue_thread(wq, cpu); 385 p = create_workqueue_thread(wq, cpu);
368 if (p) { 386 if (p) {
@@ -372,7 +390,7 @@ struct workqueue_struct *__create_workqueue(const char *name,
372 destroy = 1; 390 destroy = 1;
373 } 391 }
374 } 392 }
375 unlock_cpu_hotplug(); 393 mutex_unlock(&workqueue_mutex);
376 394
377 /* 395 /*
378 * Was there any error during startup? If yes then clean up: 396 * Was there any error during startup? If yes then clean up:
@@ -400,6 +418,12 @@ static void cleanup_workqueue_thread(struct workqueue_struct *wq, int cpu)
400 kthread_stop(p); 418 kthread_stop(p);
401} 419}
402 420
421/**
422 * destroy_workqueue - safely terminate a workqueue
423 * @wq: target workqueue
424 *
425 * Safely destroy a workqueue. All work currently pending will be done first.
426 */
403void destroy_workqueue(struct workqueue_struct *wq) 427void destroy_workqueue(struct workqueue_struct *wq)
404{ 428{
405 int cpu; 429 int cpu;
@@ -407,17 +431,15 @@ void destroy_workqueue(struct workqueue_struct *wq)
407 flush_workqueue(wq); 431 flush_workqueue(wq);
408 432
409 /* We don't need the distraction of CPUs appearing and vanishing. */ 433 /* We don't need the distraction of CPUs appearing and vanishing. */
410 lock_cpu_hotplug(); 434 mutex_lock(&workqueue_mutex);
411 if (is_single_threaded(wq)) 435 if (is_single_threaded(wq))
412 cleanup_workqueue_thread(wq, singlethread_cpu); 436 cleanup_workqueue_thread(wq, singlethread_cpu);
413 else { 437 else {
414 for_each_online_cpu(cpu) 438 for_each_online_cpu(cpu)
415 cleanup_workqueue_thread(wq, cpu); 439 cleanup_workqueue_thread(wq, cpu);
416 spin_lock(&workqueue_lock);
417 list_del(&wq->list); 440 list_del(&wq->list);
418 spin_unlock(&workqueue_lock);
419 } 441 }
420 unlock_cpu_hotplug(); 442 mutex_unlock(&workqueue_mutex);
421 free_percpu(wq->cpu_wq); 443 free_percpu(wq->cpu_wq);
422 kfree(wq); 444 kfree(wq);
423} 445}
@@ -425,18 +447,41 @@ EXPORT_SYMBOL_GPL(destroy_workqueue);
425 447
426static struct workqueue_struct *keventd_wq; 448static struct workqueue_struct *keventd_wq;
427 449
450/**
451 * schedule_work - put work task in global workqueue
452 * @work: job to be done
453 *
454 * This puts a job in the kernel-global workqueue.
455 */
428int fastcall schedule_work(struct work_struct *work) 456int fastcall schedule_work(struct work_struct *work)
429{ 457{
430 return queue_work(keventd_wq, work); 458 return queue_work(keventd_wq, work);
431} 459}
432EXPORT_SYMBOL(schedule_work); 460EXPORT_SYMBOL(schedule_work);
433 461
462/**
463 * schedule_delayed_work - put work task in global workqueue after delay
464 * @work: job to be done
465 * @delay: number of jiffies to wait
466 *
467 * After waiting for a given time this puts a job in the kernel-global
468 * workqueue.
469 */
434int fastcall schedule_delayed_work(struct work_struct *work, unsigned long delay) 470int fastcall schedule_delayed_work(struct work_struct *work, unsigned long delay)
435{ 471{
436 return queue_delayed_work(keventd_wq, work, delay); 472 return queue_delayed_work(keventd_wq, work, delay);
437} 473}
438EXPORT_SYMBOL(schedule_delayed_work); 474EXPORT_SYMBOL(schedule_delayed_work);
439 475
476/**
477 * schedule_delayed_work_on - queue work in global workqueue on CPU after delay
478 * @cpu: cpu to use
479 * @work: job to be done
480 * @delay: number of jiffies to wait
481 *
482 * After waiting for a given time this puts a job in the kernel-global
483 * workqueue on the specified CPU.
484 */
440int schedule_delayed_work_on(int cpu, 485int schedule_delayed_work_on(int cpu,
441 struct work_struct *work, unsigned long delay) 486 struct work_struct *work, unsigned long delay)
442{ 487{
@@ -465,11 +510,13 @@ int schedule_on_each_cpu(void (*func)(void *info), void *info)
465 if (!works) 510 if (!works)
466 return -ENOMEM; 511 return -ENOMEM;
467 512
513 mutex_lock(&workqueue_mutex);
468 for_each_online_cpu(cpu) { 514 for_each_online_cpu(cpu) {
469 INIT_WORK(per_cpu_ptr(works, cpu), func, info); 515 INIT_WORK(per_cpu_ptr(works, cpu), func, info);
470 __queue_work(per_cpu_ptr(keventd_wq->cpu_wq, cpu), 516 __queue_work(per_cpu_ptr(keventd_wq->cpu_wq, cpu),
471 per_cpu_ptr(works, cpu)); 517 per_cpu_ptr(works, cpu));
472 } 518 }
519 mutex_unlock(&workqueue_mutex);
473 flush_workqueue(keventd_wq); 520 flush_workqueue(keventd_wq);
474 free_percpu(works); 521 free_percpu(works);
475 return 0; 522 return 0;
@@ -585,6 +632,7 @@ static int __devinit workqueue_cpu_callback(struct notifier_block *nfb,
585 632
586 switch (action) { 633 switch (action) {
587 case CPU_UP_PREPARE: 634 case CPU_UP_PREPARE:
635 mutex_lock(&workqueue_mutex);
588 /* Create a new workqueue thread for it. */ 636 /* Create a new workqueue thread for it. */
589 list_for_each_entry(wq, &workqueues, list) { 637 list_for_each_entry(wq, &workqueues, list) {
590 if (!create_workqueue_thread(wq, hotcpu)) { 638 if (!create_workqueue_thread(wq, hotcpu)) {
@@ -603,6 +651,7 @@ static int __devinit workqueue_cpu_callback(struct notifier_block *nfb,
603 kthread_bind(cwq->thread, hotcpu); 651 kthread_bind(cwq->thread, hotcpu);
604 wake_up_process(cwq->thread); 652 wake_up_process(cwq->thread);
605 } 653 }
654 mutex_unlock(&workqueue_mutex);
606 break; 655 break;
607 656
608 case CPU_UP_CANCELED: 657 case CPU_UP_CANCELED:
@@ -614,6 +663,15 @@ static int __devinit workqueue_cpu_callback(struct notifier_block *nfb,
614 any_online_cpu(cpu_online_map)); 663 any_online_cpu(cpu_online_map));
615 cleanup_workqueue_thread(wq, hotcpu); 664 cleanup_workqueue_thread(wq, hotcpu);
616 } 665 }
666 mutex_unlock(&workqueue_mutex);
667 break;
668
669 case CPU_DOWN_PREPARE:
670 mutex_lock(&workqueue_mutex);
671 break;
672
673 case CPU_DOWN_FAILED:
674 mutex_unlock(&workqueue_mutex);
617 break; 675 break;
618 676
619 case CPU_DEAD: 677 case CPU_DEAD:
@@ -621,6 +679,7 @@ static int __devinit workqueue_cpu_callback(struct notifier_block *nfb,
621 cleanup_workqueue_thread(wq, hotcpu); 679 cleanup_workqueue_thread(wq, hotcpu);
622 list_for_each_entry(wq, &workqueues, list) 680 list_for_each_entry(wq, &workqueues, list)
623 take_over_work(wq, hotcpu); 681 take_over_work(wq, hotcpu);
682 mutex_unlock(&workqueue_mutex);
624 break; 683 break;
625 } 684 }
626 685