aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Kconfig.preempt3
-rw-r--r--kernel/auditsc.c1
-rw-r--r--kernel/capability.c4
-rw-r--r--kernel/cpuset.c185
-rw-r--r--kernel/exit.c116
-rw-r--r--kernel/fork.c18
-rw-r--r--kernel/futex.c3
-rw-r--r--kernel/irq/chip.c3
-rw-r--r--kernel/irq/manage.c31
-rw-r--r--kernel/kexec.c113
-rw-r--r--kernel/kprobes.c62
-rw-r--r--kernel/ksysfs.c10
-rw-r--r--kernel/module.c130
-rw-r--r--kernel/nsproxy.c3
-rw-r--r--kernel/params.c17
-rw-r--r--kernel/posix-timers.c3
-rw-r--r--kernel/printk.c111
-rw-r--r--kernel/profile.c6
-rw-r--r--kernel/ptrace.c13
-rw-r--r--kernel/rcupdate.c1
-rw-r--r--kernel/rcutorture.c10
-rw-r--r--kernel/resource.c26
-rw-r--r--kernel/rtmutex-debug.c7
-rw-r--r--kernel/sched.c101
-rw-r--r--kernel/sched_fair.c6
-rw-r--r--kernel/signal.c24
-rw-r--r--kernel/softlockup.c54
-rw-r--r--kernel/sys_ni.c4
-rw-r--r--kernel/sysctl.c52
-rw-r--r--kernel/taskstats.c1
-rw-r--r--kernel/time.c7
-rw-r--r--kernel/time/tick-broadcast.c35
-rw-r--r--kernel/time/tick-sched.c2
-rw-r--r--kernel/time/timekeeping.c12
-rw-r--r--kernel/user.c35
35 files changed, 730 insertions, 479 deletions
diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt
index 6b066632e4..c64ce9c142 100644
--- a/kernel/Kconfig.preempt
+++ b/kernel/Kconfig.preempt
@@ -63,6 +63,3 @@ config PREEMPT_BKL
63 Say Y here if you are building a kernel for a desktop system. 63 Say Y here if you are building a kernel for a desktop system.
64 Say N if you are unsure. 64 Say N if you are unsure.
65 65
66config PREEMPT_NOTIFIERS
67 bool
68
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 0ae703c157..938e60a618 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -45,7 +45,6 @@
45#include <linux/init.h> 45#include <linux/init.h>
46#include <asm/types.h> 46#include <asm/types.h>
47#include <asm/atomic.h> 47#include <asm/atomic.h>
48#include <asm/types.h>
49#include <linux/fs.h> 48#include <linux/fs.h>
50#include <linux/namei.h> 49#include <linux/namei.h>
51#include <linux/mm.h> 50#include <linux/mm.h>
diff --git a/kernel/capability.c b/kernel/capability.c
index c8d3c77620..4e350a36ed 100644
--- a/kernel/capability.c
+++ b/kernel/capability.c
@@ -17,9 +17,6 @@
17unsigned securebits = SECUREBITS_DEFAULT; /* systemwide security settings */ 17unsigned securebits = SECUREBITS_DEFAULT; /* systemwide security settings */
18kernel_cap_t cap_bset = CAP_INIT_EFF_SET; 18kernel_cap_t cap_bset = CAP_INIT_EFF_SET;
19 19
20EXPORT_SYMBOL(securebits);
21EXPORT_SYMBOL(cap_bset);
22
23/* 20/*
24 * This lock protects task->cap_* for all tasks including current. 21 * This lock protects task->cap_* for all tasks including current.
25 * Locking rule: acquire this prior to tasklist_lock. 22 * Locking rule: acquire this prior to tasklist_lock.
@@ -244,7 +241,6 @@ int __capable(struct task_struct *t, int cap)
244 } 241 }
245 return 0; 242 return 0;
246} 243}
247EXPORT_SYMBOL(__capable);
248 244
249int capable(int cap) 245int capable(int cap)
250{ 246{
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 57e6448b17..2eb2e50db0 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -581,26 +581,28 @@ static void guarantee_online_cpus(const struct cpuset *cs, cpumask_t *pmask)
581 581
582/* 582/*
583 * Return in *pmask the portion of a cpusets's mems_allowed that 583 * Return in *pmask the portion of a cpusets's mems_allowed that
584 * are online. If none are online, walk up the cpuset hierarchy 584 * are online, with memory. If none are online with memory, walk
585 * until we find one that does have some online mems. If we get 585 * up the cpuset hierarchy until we find one that does have some
586 * all the way to the top and still haven't found any online mems, 586 * online mems. If we get all the way to the top and still haven't
587 * return node_online_map. 587 * found any online mems, return node_states[N_HIGH_MEMORY].
588 * 588 *
589 * One way or another, we guarantee to return some non-empty subset 589 * One way or another, we guarantee to return some non-empty subset
590 * of node_online_map. 590 * of node_states[N_HIGH_MEMORY].
591 * 591 *
592 * Call with callback_mutex held. 592 * Call with callback_mutex held.
593 */ 593 */
594 594
595static void guarantee_online_mems(const struct cpuset *cs, nodemask_t *pmask) 595static void guarantee_online_mems(const struct cpuset *cs, nodemask_t *pmask)
596{ 596{
597 while (cs && !nodes_intersects(cs->mems_allowed, node_online_map)) 597 while (cs && !nodes_intersects(cs->mems_allowed,
598 node_states[N_HIGH_MEMORY]))
598 cs = cs->parent; 599 cs = cs->parent;
599 if (cs) 600 if (cs)
600 nodes_and(*pmask, cs->mems_allowed, node_online_map); 601 nodes_and(*pmask, cs->mems_allowed,
602 node_states[N_HIGH_MEMORY]);
601 else 603 else
602 *pmask = node_online_map; 604 *pmask = node_states[N_HIGH_MEMORY];
603 BUG_ON(!nodes_intersects(*pmask, node_online_map)); 605 BUG_ON(!nodes_intersects(*pmask, node_states[N_HIGH_MEMORY]));
604} 606}
605 607
606/** 608/**
@@ -753,68 +755,13 @@ static int validate_change(const struct cpuset *cur, const struct cpuset *trial)
753} 755}
754 756
755/* 757/*
756 * For a given cpuset cur, partition the system as follows
757 * a. All cpus in the parent cpuset's cpus_allowed that are not part of any
758 * exclusive child cpusets
759 * b. All cpus in the current cpuset's cpus_allowed that are not part of any
760 * exclusive child cpusets
761 * Build these two partitions by calling partition_sched_domains
762 *
763 * Call with manage_mutex held. May nest a call to the
764 * lock_cpu_hotplug()/unlock_cpu_hotplug() pair.
765 * Must not be called holding callback_mutex, because we must
766 * not call lock_cpu_hotplug() while holding callback_mutex.
767 */
768
769static void update_cpu_domains(struct cpuset *cur)
770{
771 struct cpuset *c, *par = cur->parent;
772 cpumask_t pspan, cspan;
773
774 if (par == NULL || cpus_empty(cur->cpus_allowed))
775 return;
776
777 /*
778 * Get all cpus from parent's cpus_allowed not part of exclusive
779 * children
780 */
781 pspan = par->cpus_allowed;
782 list_for_each_entry(c, &par->children, sibling) {
783 if (is_cpu_exclusive(c))
784 cpus_andnot(pspan, pspan, c->cpus_allowed);
785 }
786 if (!is_cpu_exclusive(cur)) {
787 cpus_or(pspan, pspan, cur->cpus_allowed);
788 if (cpus_equal(pspan, cur->cpus_allowed))
789 return;
790 cspan = CPU_MASK_NONE;
791 } else {
792 if (cpus_empty(pspan))
793 return;
794 cspan = cur->cpus_allowed;
795 /*
796 * Get all cpus from current cpuset's cpus_allowed not part
797 * of exclusive children
798 */
799 list_for_each_entry(c, &cur->children, sibling) {
800 if (is_cpu_exclusive(c))
801 cpus_andnot(cspan, cspan, c->cpus_allowed);
802 }
803 }
804
805 lock_cpu_hotplug();
806 partition_sched_domains(&pspan, &cspan);
807 unlock_cpu_hotplug();
808}
809
810/*
811 * Call with manage_mutex held. May take callback_mutex during call. 758 * Call with manage_mutex held. May take callback_mutex during call.
812 */ 759 */
813 760
814static int update_cpumask(struct cpuset *cs, char *buf) 761static int update_cpumask(struct cpuset *cs, char *buf)
815{ 762{
816 struct cpuset trialcs; 763 struct cpuset trialcs;
817 int retval, cpus_unchanged; 764 int retval;
818 765
819 /* top_cpuset.cpus_allowed tracks cpu_online_map; it's read-only */ 766 /* top_cpuset.cpus_allowed tracks cpu_online_map; it's read-only */
820 if (cs == &top_cpuset) 767 if (cs == &top_cpuset)
@@ -841,12 +788,9 @@ static int update_cpumask(struct cpuset *cs, char *buf)
841 retval = validate_change(cs, &trialcs); 788 retval = validate_change(cs, &trialcs);
842 if (retval < 0) 789 if (retval < 0)
843 return retval; 790 return retval;
844 cpus_unchanged = cpus_equal(cs->cpus_allowed, trialcs.cpus_allowed);
845 mutex_lock(&callback_mutex); 791 mutex_lock(&callback_mutex);
846 cs->cpus_allowed = trialcs.cpus_allowed; 792 cs->cpus_allowed = trialcs.cpus_allowed;
847 mutex_unlock(&callback_mutex); 793 mutex_unlock(&callback_mutex);
848 if (is_cpu_exclusive(cs) && !cpus_unchanged)
849 update_cpu_domains(cs);
850 return 0; 794 return 0;
851} 795}
852 796
@@ -924,7 +868,10 @@ static int update_nodemask(struct cpuset *cs, char *buf)
924 int fudge; 868 int fudge;
925 int retval; 869 int retval;
926 870
927 /* top_cpuset.mems_allowed tracks node_online_map; it's read-only */ 871 /*
872 * top_cpuset.mems_allowed tracks node_stats[N_HIGH_MEMORY];
873 * it's read-only
874 */
928 if (cs == &top_cpuset) 875 if (cs == &top_cpuset)
929 return -EACCES; 876 return -EACCES;
930 877
@@ -941,8 +888,21 @@ static int update_nodemask(struct cpuset *cs, char *buf)
941 retval = nodelist_parse(buf, trialcs.mems_allowed); 888 retval = nodelist_parse(buf, trialcs.mems_allowed);
942 if (retval < 0) 889 if (retval < 0)
943 goto done; 890 goto done;
891 if (!nodes_intersects(trialcs.mems_allowed,
892 node_states[N_HIGH_MEMORY])) {
893 /*
894 * error if only memoryless nodes specified.
895 */
896 retval = -ENOSPC;
897 goto done;
898 }
944 } 899 }
945 nodes_and(trialcs.mems_allowed, trialcs.mems_allowed, node_online_map); 900 /*
901 * Exclude memoryless nodes. We know that trialcs.mems_allowed
902 * contains at least one node with memory.
903 */
904 nodes_and(trialcs.mems_allowed, trialcs.mems_allowed,
905 node_states[N_HIGH_MEMORY]);
946 oldmem = cs->mems_allowed; 906 oldmem = cs->mems_allowed;
947 if (nodes_equal(oldmem, trialcs.mems_allowed)) { 907 if (nodes_equal(oldmem, trialcs.mems_allowed)) {
948 retval = 0; /* Too easy - nothing to do */ 908 retval = 0; /* Too easy - nothing to do */
@@ -1067,7 +1027,7 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs, char *buf)
1067{ 1027{
1068 int turning_on; 1028 int turning_on;
1069 struct cpuset trialcs; 1029 struct cpuset trialcs;
1070 int err, cpu_exclusive_changed; 1030 int err;
1071 1031
1072 turning_on = (simple_strtoul(buf, NULL, 10) != 0); 1032 turning_on = (simple_strtoul(buf, NULL, 10) != 0);
1073 1033
@@ -1080,14 +1040,10 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs, char *buf)
1080 err = validate_change(cs, &trialcs); 1040 err = validate_change(cs, &trialcs);
1081 if (err < 0) 1041 if (err < 0)
1082 return err; 1042 return err;
1083 cpu_exclusive_changed =
1084 (is_cpu_exclusive(cs) != is_cpu_exclusive(&trialcs));
1085 mutex_lock(&callback_mutex); 1043 mutex_lock(&callback_mutex);
1086 cs->flags = trialcs.flags; 1044 cs->flags = trialcs.flags;
1087 mutex_unlock(&callback_mutex); 1045 mutex_unlock(&callback_mutex);
1088 1046
1089 if (cpu_exclusive_changed)
1090 update_cpu_domains(cs);
1091 return 0; 1047 return 0;
1092} 1048}
1093 1049
@@ -1445,7 +1401,7 @@ static ssize_t cpuset_common_file_read(struct file *file, char __user *buf,
1445 ssize_t retval = 0; 1401 ssize_t retval = 0;
1446 char *s; 1402 char *s;
1447 1403
1448 if (!(page = (char *)__get_free_page(GFP_KERNEL))) 1404 if (!(page = (char *)__get_free_page(GFP_TEMPORARY)))
1449 return -ENOMEM; 1405 return -ENOMEM;
1450 1406
1451 s = page; 1407 s = page;
@@ -1947,17 +1903,6 @@ static int cpuset_mkdir(struct inode *dir, struct dentry *dentry, int mode)
1947 return cpuset_create(c_parent, dentry->d_name.name, mode | S_IFDIR); 1903 return cpuset_create(c_parent, dentry->d_name.name, mode | S_IFDIR);
1948} 1904}
1949 1905
1950/*
1951 * Locking note on the strange update_flag() call below:
1952 *
1953 * If the cpuset being removed is marked cpu_exclusive, then simulate
1954 * turning cpu_exclusive off, which will call update_cpu_domains().
1955 * The lock_cpu_hotplug() call in update_cpu_domains() must not be
1956 * made while holding callback_mutex. Elsewhere the kernel nests
1957 * callback_mutex inside lock_cpu_hotplug() calls. So the reverse
1958 * nesting would risk an ABBA deadlock.
1959 */
1960
1961static int cpuset_rmdir(struct inode *unused_dir, struct dentry *dentry) 1906static int cpuset_rmdir(struct inode *unused_dir, struct dentry *dentry)
1962{ 1907{
1963 struct cpuset *cs = dentry->d_fsdata; 1908 struct cpuset *cs = dentry->d_fsdata;
@@ -1977,13 +1922,6 @@ static int cpuset_rmdir(struct inode *unused_dir, struct dentry *dentry)
1977 mutex_unlock(&manage_mutex); 1922 mutex_unlock(&manage_mutex);
1978 return -EBUSY; 1923 return -EBUSY;
1979 } 1924 }
1980 if (is_cpu_exclusive(cs)) {
1981 int retval = update_flag(CS_CPU_EXCLUSIVE, cs, "0");
1982 if (retval < 0) {
1983 mutex_unlock(&manage_mutex);
1984 return retval;
1985 }
1986 }
1987 parent = cs->parent; 1925 parent = cs->parent;
1988 mutex_lock(&callback_mutex); 1926 mutex_lock(&callback_mutex);
1989 set_bit(CS_REMOVED, &cs->flags); 1927 set_bit(CS_REMOVED, &cs->flags);
@@ -2098,8 +2036,9 @@ static void guarantee_online_cpus_mems_in_subtree(const struct cpuset *cur)
2098 2036
2099/* 2037/*
2100 * The cpus_allowed and mems_allowed nodemasks in the top_cpuset track 2038 * The cpus_allowed and mems_allowed nodemasks in the top_cpuset track
2101 * cpu_online_map and node_online_map. Force the top cpuset to track 2039 * cpu_online_map and node_states[N_HIGH_MEMORY]. Force the top cpuset to
2102 * whats online after any CPU or memory node hotplug or unplug event. 2040 * track what's online after any CPU or memory node hotplug or unplug
2041 * event.
2103 * 2042 *
2104 * To ensure that we don't remove a CPU or node from the top cpuset 2043 * To ensure that we don't remove a CPU or node from the top cpuset
2105 * that is currently in use by a child cpuset (which would violate 2044 * that is currently in use by a child cpuset (which would violate
@@ -2119,7 +2058,7 @@ static void common_cpu_mem_hotplug_unplug(void)
2119 2058
2120 guarantee_online_cpus_mems_in_subtree(&top_cpuset); 2059 guarantee_online_cpus_mems_in_subtree(&top_cpuset);
2121 top_cpuset.cpus_allowed = cpu_online_map; 2060 top_cpuset.cpus_allowed = cpu_online_map;
2122 top_cpuset.mems_allowed = node_online_map; 2061 top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY];
2123 2062
2124 mutex_unlock(&callback_mutex); 2063 mutex_unlock(&callback_mutex);
2125 mutex_unlock(&manage_mutex); 2064 mutex_unlock(&manage_mutex);
@@ -2147,8 +2086,9 @@ static int cpuset_handle_cpuhp(struct notifier_block *nb,
2147 2086
2148#ifdef CONFIG_MEMORY_HOTPLUG 2087#ifdef CONFIG_MEMORY_HOTPLUG
2149/* 2088/*
2150 * Keep top_cpuset.mems_allowed tracking node_online_map. 2089 * Keep top_cpuset.mems_allowed tracking node_states[N_HIGH_MEMORY].
2151 * Call this routine anytime after you change node_online_map. 2090 * Call this routine anytime after you change
2091 * node_states[N_HIGH_MEMORY].
2152 * See also the previous routine cpuset_handle_cpuhp(). 2092 * See also the previous routine cpuset_handle_cpuhp().
2153 */ 2093 */
2154 2094
@@ -2167,7 +2107,7 @@ void cpuset_track_online_nodes(void)
2167void __init cpuset_init_smp(void) 2107void __init cpuset_init_smp(void)
2168{ 2108{
2169 top_cpuset.cpus_allowed = cpu_online_map; 2109 top_cpuset.cpus_allowed = cpu_online_map;
2170 top_cpuset.mems_allowed = node_online_map; 2110 top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY];
2171 2111
2172 hotcpu_notifier(cpuset_handle_cpuhp, 0); 2112 hotcpu_notifier(cpuset_handle_cpuhp, 0);
2173} 2113}
@@ -2309,7 +2249,7 @@ void cpuset_init_current_mems_allowed(void)
2309 * 2249 *
2310 * Description: Returns the nodemask_t mems_allowed of the cpuset 2250 * Description: Returns the nodemask_t mems_allowed of the cpuset
2311 * attached to the specified @tsk. Guaranteed to return some non-empty 2251 * attached to the specified @tsk. Guaranteed to return some non-empty
2312 * subset of node_online_map, even if this means going outside the 2252 * subset of node_states[N_HIGH_MEMORY], even if this means going outside the
2313 * tasks cpuset. 2253 * tasks cpuset.
2314 **/ 2254 **/
2315 2255
@@ -2566,41 +2506,20 @@ int cpuset_mem_spread_node(void)
2566EXPORT_SYMBOL_GPL(cpuset_mem_spread_node); 2506EXPORT_SYMBOL_GPL(cpuset_mem_spread_node);
2567 2507
2568/** 2508/**
2569 * cpuset_excl_nodes_overlap - Do we overlap @p's mem_exclusive ancestors? 2509 * cpuset_mems_allowed_intersects - Does @tsk1's mems_allowed intersect @tsk2's?
2570 * @p: pointer to task_struct of some other task. 2510 * @tsk1: pointer to task_struct of some task.
2571 * 2511 * @tsk2: pointer to task_struct of some other task.
2572 * Description: Return true if the nearest mem_exclusive ancestor 2512 *
2573 * cpusets of tasks @p and current overlap. Used by oom killer to 2513 * Description: Return true if @tsk1's mems_allowed intersects the
2574 * determine if task @p's memory usage might impact the memory 2514 * mems_allowed of @tsk2. Used by the OOM killer to determine if
2575 * available to the current task. 2515 * one of the task's memory usage might impact the memory available
2576 * 2516 * to the other.
2577 * Call while holding callback_mutex.
2578 **/ 2517 **/
2579 2518
2580int cpuset_excl_nodes_overlap(const struct task_struct *p) 2519int cpuset_mems_allowed_intersects(const struct task_struct *tsk1,
2520 const struct task_struct *tsk2)
2581{ 2521{
2582 const struct cpuset *cs1, *cs2; /* my and p's cpuset ancestors */ 2522 return nodes_intersects(tsk1->mems_allowed, tsk2->mems_allowed);
2583 int overlap = 1; /* do cpusets overlap? */
2584
2585 task_lock(current);
2586 if (current->flags & PF_EXITING) {
2587 task_unlock(current);
2588 goto done;
2589 }
2590 cs1 = nearest_exclusive_ancestor(current->cpuset);
2591 task_unlock(current);
2592
2593 task_lock((struct task_struct *)p);
2594 if (p->flags & PF_EXITING) {
2595 task_unlock((struct task_struct *)p);
2596 goto done;
2597 }
2598 cs2 = nearest_exclusive_ancestor(p->cpuset);
2599 task_unlock((struct task_struct *)p);
2600
2601 overlap = nodes_intersects(cs1->mems_allowed, cs2->mems_allowed);
2602done:
2603 return overlap;
2604} 2523}
2605 2524
2606/* 2525/*
diff --git a/kernel/exit.c b/kernel/exit.c
index 7f7959de4a..2c704c86ed 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -44,7 +44,6 @@
44#include <linux/resource.h> 44#include <linux/resource.h>
45#include <linux/blkdev.h> 45#include <linux/blkdev.h>
46#include <linux/task_io_accounting_ops.h> 46#include <linux/task_io_accounting_ops.h>
47#include <linux/freezer.h>
48 47
49#include <asm/uaccess.h> 48#include <asm/uaccess.h>
50#include <asm/unistd.h> 49#include <asm/unistd.h>
@@ -93,10 +92,9 @@ static void __exit_signal(struct task_struct *tsk)
93 * If there is any task waiting for the group exit 92 * If there is any task waiting for the group exit
94 * then notify it: 93 * then notify it:
95 */ 94 */
96 if (sig->group_exit_task && atomic_read(&sig->count) == sig->notify_count) { 95 if (sig->group_exit_task && atomic_read(&sig->count) == sig->notify_count)
97 wake_up_process(sig->group_exit_task); 96 wake_up_process(sig->group_exit_task);
98 sig->group_exit_task = NULL; 97
99 }
100 if (tsk == sig->curr_target) 98 if (tsk == sig->curr_target)
101 sig->curr_target = next_thread(tsk); 99 sig->curr_target = next_thread(tsk);
102 /* 100 /*
@@ -593,17 +591,6 @@ static void exit_mm(struct task_struct * tsk)
593 mmput(mm); 591 mmput(mm);
594} 592}
595 593
596static inline void
597choose_new_parent(struct task_struct *p, struct task_struct *reaper)
598{
599 /*
600 * Make sure we're not reparenting to ourselves and that
601 * the parent is not a zombie.
602 */
603 BUG_ON(p == reaper || reaper->exit_state);
604 p->real_parent = reaper;
605}
606
607static void 594static void
608reparent_thread(struct task_struct *p, struct task_struct *father, int traced) 595reparent_thread(struct task_struct *p, struct task_struct *father, int traced)
609{ 596{
@@ -711,7 +698,7 @@ forget_original_parent(struct task_struct *father, struct list_head *to_release)
711 698
712 if (father == p->real_parent) { 699 if (father == p->real_parent) {
713 /* reparent with a reaper, real father it's us */ 700 /* reparent with a reaper, real father it's us */
714 choose_new_parent(p, reaper); 701 p->real_parent = reaper;
715 reparent_thread(p, father, 0); 702 reparent_thread(p, father, 0);
716 } else { 703 } else {
717 /* reparent ptraced task to its real parent */ 704 /* reparent ptraced task to its real parent */
@@ -732,7 +719,7 @@ forget_original_parent(struct task_struct *father, struct list_head *to_release)
732 } 719 }
733 list_for_each_safe(_p, _n, &father->ptrace_children) { 720 list_for_each_safe(_p, _n, &father->ptrace_children) {
734 p = list_entry(_p, struct task_struct, ptrace_list); 721 p = list_entry(_p, struct task_struct, ptrace_list);
735 choose_new_parent(p, reaper); 722 p->real_parent = reaper;
736 reparent_thread(p, father, 1); 723 reparent_thread(p, father, 1);
737 } 724 }
738} 725}
@@ -759,13 +746,11 @@ static void exit_notify(struct task_struct *tsk)
759 * Now we'll wake all the threads in the group just to make 746 * Now we'll wake all the threads in the group just to make
760 * sure someone gets all the pending signals. 747 * sure someone gets all the pending signals.
761 */ 748 */
762 read_lock(&tasklist_lock);
763 spin_lock_irq(&tsk->sighand->siglock); 749 spin_lock_irq(&tsk->sighand->siglock);
764 for (t = next_thread(tsk); t != tsk; t = next_thread(t)) 750 for (t = next_thread(tsk); t != tsk; t = next_thread(t))
765 if (!signal_pending(t) && !(t->flags & PF_EXITING)) 751 if (!signal_pending(t) && !(t->flags & PF_EXITING))
766 recalc_sigpending_and_wake(t); 752 recalc_sigpending_and_wake(t);
767 spin_unlock_irq(&tsk->sighand->siglock); 753 spin_unlock_irq(&tsk->sighand->siglock);
768 read_unlock(&tasklist_lock);
769 } 754 }
770 755
771 write_lock_irq(&tasklist_lock); 756 write_lock_irq(&tasklist_lock);
@@ -793,9 +778,8 @@ static void exit_notify(struct task_struct *tsk)
793 * and we were the only connection outside, so our pgrp 778 * and we were the only connection outside, so our pgrp
794 * is about to become orphaned. 779 * is about to become orphaned.
795 */ 780 */
796
797 t = tsk->real_parent; 781 t = tsk->real_parent;
798 782
799 pgrp = task_pgrp(tsk); 783 pgrp = task_pgrp(tsk);
800 if ((task_pgrp(t) != pgrp) && 784 if ((task_pgrp(t) != pgrp) &&
801 (task_session(t) == task_session(tsk)) && 785 (task_session(t) == task_session(tsk)) &&
@@ -842,6 +826,11 @@ static void exit_notify(struct task_struct *tsk)
842 state = EXIT_DEAD; 826 state = EXIT_DEAD;
843 tsk->exit_state = state; 827 tsk->exit_state = state;
844 828
829 if (thread_group_leader(tsk) &&
830 tsk->signal->notify_count < 0 &&
831 tsk->signal->group_exit_task)
832 wake_up_process(tsk->signal->group_exit_task);
833
845 write_unlock_irq(&tasklist_lock); 834 write_unlock_irq(&tasklist_lock);
846 835
847 list_for_each_safe(_p, _n, &ptrace_dead) { 836 list_for_each_safe(_p, _n, &ptrace_dead) {
@@ -883,6 +872,14 @@ static void check_stack_usage(void)
883static inline void check_stack_usage(void) {} 872static inline void check_stack_usage(void) {}
884#endif 873#endif
885 874
875static inline void exit_child_reaper(struct task_struct *tsk)
876{
877 if (likely(tsk->group_leader != child_reaper(tsk)))
878 return;
879
880 panic("Attempted to kill init!");
881}
882
886fastcall NORET_TYPE void do_exit(long code) 883fastcall NORET_TYPE void do_exit(long code)
887{ 884{
888 struct task_struct *tsk = current; 885 struct task_struct *tsk = current;
@@ -896,13 +893,6 @@ fastcall NORET_TYPE void do_exit(long code)
896 panic("Aiee, killing interrupt handler!"); 893 panic("Aiee, killing interrupt handler!");
897 if (unlikely(!tsk->pid)) 894 if (unlikely(!tsk->pid))
898 panic("Attempted to kill the idle task!"); 895 panic("Attempted to kill the idle task!");
899 if (unlikely(tsk == child_reaper(tsk))) {
900 if (tsk->nsproxy->pid_ns != &init_pid_ns)
901 tsk->nsproxy->pid_ns->child_reaper = init_pid_ns.child_reaper;
902 else
903 panic("Attempted to kill init!");
904 }
905
906 896
907 if (unlikely(current->ptrace & PT_TRACE_EXIT)) { 897 if (unlikely(current->ptrace & PT_TRACE_EXIT)) {
908 current->ptrace_message = code; 898 current->ptrace_message = code;
@@ -932,13 +922,13 @@ fastcall NORET_TYPE void do_exit(long code)
932 schedule(); 922 schedule();
933 } 923 }
934 924
925 tsk->flags |= PF_EXITING;
935 /* 926 /*
936 * tsk->flags are checked in the futex code to protect against 927 * tsk->flags are checked in the futex code to protect against
937 * an exiting task cleaning up the robust pi futexes. 928 * an exiting task cleaning up the robust pi futexes.
938 */ 929 */
939 spin_lock_irq(&tsk->pi_lock); 930 smp_mb();
940 tsk->flags |= PF_EXITING; 931 spin_unlock_wait(&tsk->pi_lock);
941 spin_unlock_irq(&tsk->pi_lock);
942 932
943 if (unlikely(in_atomic())) 933 if (unlikely(in_atomic()))
944 printk(KERN_INFO "note: %s[%d] exited with preempt_count %d\n", 934 printk(KERN_INFO "note: %s[%d] exited with preempt_count %d\n",
@@ -952,16 +942,19 @@ fastcall NORET_TYPE void do_exit(long code)
952 } 942 }
953 group_dead = atomic_dec_and_test(&tsk->signal->live); 943 group_dead = atomic_dec_and_test(&tsk->signal->live);
954 if (group_dead) { 944 if (group_dead) {
945 exit_child_reaper(tsk);
955 hrtimer_cancel(&tsk->signal->real_timer); 946 hrtimer_cancel(&tsk->signal->real_timer);
956 exit_itimers(tsk->signal); 947 exit_itimers(tsk->signal);
957 } 948 }
958 acct_collect(code, group_dead); 949 acct_collect(code, group_dead);
950#ifdef CONFIG_FUTEX
959 if (unlikely(tsk->robust_list)) 951 if (unlikely(tsk->robust_list))
960 exit_robust_list(tsk); 952 exit_robust_list(tsk);
961#if defined(CONFIG_FUTEX) && defined(CONFIG_COMPAT) 953#ifdef CONFIG_COMPAT
962 if (unlikely(tsk->compat_robust_list)) 954 if (unlikely(tsk->compat_robust_list))
963 compat_exit_robust_list(tsk); 955 compat_exit_robust_list(tsk);
964#endif 956#endif
957#endif
965 if (group_dead) 958 if (group_dead)
966 tty_audit_exit(); 959 tty_audit_exit();
967 if (unlikely(tsk->audit_context)) 960 if (unlikely(tsk->audit_context))
@@ -996,6 +989,7 @@ fastcall NORET_TYPE void do_exit(long code)
996 mpol_free(tsk->mempolicy); 989 mpol_free(tsk->mempolicy);
997 tsk->mempolicy = NULL; 990 tsk->mempolicy = NULL;
998#endif 991#endif
992#ifdef CONFIG_FUTEX
999 /* 993 /*
1000 * This must happen late, after the PID is not 994 * This must happen late, after the PID is not
1001 * hashed anymore: 995 * hashed anymore:
@@ -1004,6 +998,7 @@ fastcall NORET_TYPE void do_exit(long code)
1004 exit_pi_state_list(tsk); 998 exit_pi_state_list(tsk);
1005 if (unlikely(current->pi_state_cache)) 999 if (unlikely(current->pi_state_cache))
1006 kfree(current->pi_state_cache); 1000 kfree(current->pi_state_cache);
1001#endif
1007 /* 1002 /*
1008 * Make sure we are holding no locks: 1003 * Make sure we are holding no locks:
1009 */ 1004 */
@@ -1168,8 +1163,7 @@ static int wait_task_zombie(struct task_struct *p, int noreap,
1168 int __user *stat_addr, struct rusage __user *ru) 1163 int __user *stat_addr, struct rusage __user *ru)
1169{ 1164{
1170 unsigned long state; 1165 unsigned long state;
1171 int retval; 1166 int retval, status, traced;
1172 int status;
1173 1167
1174 if (unlikely(noreap)) { 1168 if (unlikely(noreap)) {
1175 pid_t pid = p->pid; 1169 pid_t pid = p->pid;
@@ -1203,15 +1197,11 @@ static int wait_task_zombie(struct task_struct *p, int noreap,
1203 BUG_ON(state != EXIT_DEAD); 1197 BUG_ON(state != EXIT_DEAD);
1204 return 0; 1198 return 0;
1205 } 1199 }
1206 if (unlikely(p->exit_signal == -1 && p->ptrace == 0)) {
1207 /*
1208 * This can only happen in a race with a ptraced thread
1209 * dying on another processor.
1210 */
1211 return 0;
1212 }
1213 1200
1214 if (likely(p->real_parent == p->parent) && likely(p->signal)) { 1201 /* traced means p->ptrace, but not vice versa */
1202 traced = (p->real_parent != p->parent);
1203
1204 if (likely(!traced)) {
1215 struct signal_struct *psig; 1205 struct signal_struct *psig;
1216 struct signal_struct *sig; 1206 struct signal_struct *sig;
1217 1207
@@ -1298,35 +1288,30 @@ static int wait_task_zombie(struct task_struct *p, int noreap,
1298 retval = put_user(p->pid, &infop->si_pid); 1288 retval = put_user(p->pid, &infop->si_pid);
1299 if (!retval && infop) 1289 if (!retval && infop)
1300 retval = put_user(p->uid, &infop->si_uid); 1290 retval = put_user(p->uid, &infop->si_uid);
1301 if (retval) { 1291 if (!retval)
1302 // TODO: is this safe? 1292 retval = p->pid;
1303 p->exit_state = EXIT_ZOMBIE; 1293
1304 return retval; 1294 if (traced) {
1305 }
1306 retval = p->pid;
1307 if (p->real_parent != p->parent) {
1308 write_lock_irq(&tasklist_lock); 1295 write_lock_irq(&tasklist_lock);
1309 /* Double-check with lock held. */ 1296 /* We dropped tasklist, ptracer could die and untrace */
1310 if (p->real_parent != p->parent) { 1297 ptrace_unlink(p);
1311 __ptrace_unlink(p); 1298 /*
1312 // TODO: is this safe? 1299 * If this is not a detached task, notify the parent.
1313 p->exit_state = EXIT_ZOMBIE; 1300 * If it's still not detached after that, don't release
1314 /* 1301 * it now.
1315 * If this is not a detached task, notify the parent. 1302 */
1316 * If it's still not detached after that, don't release 1303 if (p->exit_signal != -1) {
1317 * it now. 1304 do_notify_parent(p, p->exit_signal);
1318 */
1319 if (p->exit_signal != -1) { 1305 if (p->exit_signal != -1) {
1320 do_notify_parent(p, p->exit_signal); 1306 p->exit_state = EXIT_ZOMBIE;
1321 if (p->exit_signal != -1) 1307 p = NULL;
1322 p = NULL;
1323 } 1308 }
1324 } 1309 }
1325 write_unlock_irq(&tasklist_lock); 1310 write_unlock_irq(&tasklist_lock);
1326 } 1311 }
1327 if (p != NULL) 1312 if (p != NULL)
1328 release_task(p); 1313 release_task(p);
1329 BUG_ON(!retval); 1314
1330 return retval; 1315 return retval;
1331} 1316}
1332 1317
@@ -1345,7 +1330,7 @@ static int wait_task_stopped(struct task_struct *p, int delayed_group_leader,
1345 if (!p->exit_code) 1330 if (!p->exit_code)
1346 return 0; 1331 return 0;
1347 if (delayed_group_leader && !(p->ptrace & PT_PTRACED) && 1332 if (delayed_group_leader && !(p->ptrace & PT_PTRACED) &&
1348 p->signal && p->signal->group_stop_count > 0) 1333 p->signal->group_stop_count > 0)
1349 /* 1334 /*
1350 * A group stop is in progress and this is the group leader. 1335 * A group stop is in progress and this is the group leader.
1351 * We won't report until all threads have stopped. 1336 * We won't report until all threads have stopped.
@@ -1459,9 +1444,6 @@ static int wait_task_continued(struct task_struct *p, int noreap,
1459 pid_t pid; 1444 pid_t pid;
1460 uid_t uid; 1445 uid_t uid;
1461 1446
1462 if (unlikely(!p->signal))
1463 return 0;
1464
1465 if (!(p->signal->flags & SIGNAL_STOP_CONTINUED)) 1447 if (!(p->signal->flags & SIGNAL_STOP_CONTINUED))
1466 return 0; 1448 return 0;
1467 1449
diff --git a/kernel/fork.c b/kernel/fork.c
index 3fc3c13839..490495a39c 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -107,6 +107,7 @@ static struct kmem_cache *mm_cachep;
107 107
108void free_task(struct task_struct *tsk) 108void free_task(struct task_struct *tsk)
109{ 109{
110 prop_local_destroy_single(&tsk->dirties);
110 free_thread_info(tsk->stack); 111 free_thread_info(tsk->stack);
111 rt_mutex_debug_task_free(tsk); 112 rt_mutex_debug_task_free(tsk);
112 free_task_struct(tsk); 113 free_task_struct(tsk);
@@ -163,6 +164,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
163{ 164{
164 struct task_struct *tsk; 165 struct task_struct *tsk;
165 struct thread_info *ti; 166 struct thread_info *ti;
167 int err;
166 168
167 prepare_to_copy(orig); 169 prepare_to_copy(orig);
168 170
@@ -178,6 +180,14 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
178 180
179 *tsk = *orig; 181 *tsk = *orig;
180 tsk->stack = ti; 182 tsk->stack = ti;
183
184 err = prop_local_init_single(&tsk->dirties);
185 if (err) {
186 free_thread_info(ti);
187 free_task_struct(tsk);
188 return NULL;
189 }
190
181 setup_thread_stack(tsk, orig); 191 setup_thread_stack(tsk, orig);
182 192
183#ifdef CONFIG_CC_STACKPROTECTOR 193#ifdef CONFIG_CC_STACKPROTECTOR
@@ -1069,7 +1079,9 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1069 do_posix_clock_monotonic_gettime(&p->start_time); 1079 do_posix_clock_monotonic_gettime(&p->start_time);
1070 p->real_start_time = p->start_time; 1080 p->real_start_time = p->start_time;
1071 monotonic_to_bootbased(&p->real_start_time); 1081 monotonic_to_bootbased(&p->real_start_time);
1082#ifdef CONFIG_SECURITY
1072 p->security = NULL; 1083 p->security = NULL;
1084#endif
1073 p->io_context = NULL; 1085 p->io_context = NULL;
1074 p->io_wait = NULL; 1086 p->io_wait = NULL;
1075 p->audit_context = NULL; 1087 p->audit_context = NULL;
@@ -1146,13 +1158,14 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1146 * Clear TID on mm_release()? 1158 * Clear TID on mm_release()?
1147 */ 1159 */
1148 p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr: NULL; 1160 p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr: NULL;
1161#ifdef CONFIG_FUTEX
1149 p->robust_list = NULL; 1162 p->robust_list = NULL;
1150#ifdef CONFIG_COMPAT 1163#ifdef CONFIG_COMPAT
1151 p->compat_robust_list = NULL; 1164 p->compat_robust_list = NULL;
1152#endif 1165#endif
1153 INIT_LIST_HEAD(&p->pi_state_list); 1166 INIT_LIST_HEAD(&p->pi_state_list);
1154 p->pi_state_cache = NULL; 1167 p->pi_state_cache = NULL;
1155 1168#endif
1156 /* 1169 /*
1157 * sigaltstack should be cleared when sharing the same VM 1170 * sigaltstack should be cleared when sharing the same VM
1158 */ 1171 */
@@ -1435,8 +1448,7 @@ long do_fork(unsigned long clone_flags,
1435#define ARCH_MIN_MMSTRUCT_ALIGN 0 1448#define ARCH_MIN_MMSTRUCT_ALIGN 0
1436#endif 1449#endif
1437 1450
1438static void sighand_ctor(void *data, struct kmem_cache *cachep, 1451static void sighand_ctor(struct kmem_cache *cachep, void *data)
1439 unsigned long flags)
1440{ 1452{
1441 struct sighand_struct *sighand = data; 1453 struct sighand_struct *sighand = data;
1442 1454
diff --git a/kernel/futex.c b/kernel/futex.c
index fcc94e7b40..d725676d84 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -52,6 +52,7 @@
52#include <linux/syscalls.h> 52#include <linux/syscalls.h>
53#include <linux/signal.h> 53#include <linux/signal.h>
54#include <linux/module.h> 54#include <linux/module.h>
55#include <linux/magic.h>
55#include <asm/futex.h> 56#include <asm/futex.h>
56 57
57#include "rtmutex_common.h" 58#include "rtmutex_common.h"
@@ -2080,7 +2081,7 @@ static int futexfs_get_sb(struct file_system_type *fs_type,
2080 int flags, const char *dev_name, void *data, 2081 int flags, const char *dev_name, void *data,
2081 struct vfsmount *mnt) 2082 struct vfsmount *mnt)
2082{ 2083{
2083 return get_sb_pseudo(fs_type, "futex", NULL, 0xBAD1DEA, mnt); 2084 return get_sb_pseudo(fs_type, "futex", NULL, FUTEXFS_SUPER_MAGIC, mnt);
2084} 2085}
2085 2086
2086static struct file_system_type futex_fs_type = { 2087static struct file_system_type futex_fs_type = {
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index f1a73f0b54..9b5dff6b3f 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -503,7 +503,6 @@ out_unlock:
503 spin_unlock(&desc->lock); 503 spin_unlock(&desc->lock);
504} 504}
505 505
506#ifdef CONFIG_SMP
507/** 506/**
508 * handle_percpu_IRQ - Per CPU local irq handler 507 * handle_percpu_IRQ - Per CPU local irq handler
509 * @irq: the interrupt number 508 * @irq: the interrupt number
@@ -529,8 +528,6 @@ handle_percpu_irq(unsigned int irq, struct irq_desc *desc)
529 desc->chip->eoi(irq); 528 desc->chip->eoi(irq);
530} 529}
531 530
532#endif /* CONFIG_SMP */
533
534void 531void
535__set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained, 532__set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained,
536 const char *name) 533 const char *name)
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 7230d914ea..80eab7a042 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -405,7 +405,6 @@ void free_irq(unsigned int irq, void *dev_id)
405 struct irq_desc *desc; 405 struct irq_desc *desc;
406 struct irqaction **p; 406 struct irqaction **p;
407 unsigned long flags; 407 unsigned long flags;
408 irqreturn_t (*handler)(int, void *) = NULL;
409 408
410 WARN_ON(in_interrupt()); 409 WARN_ON(in_interrupt());
411 if (irq >= NR_IRQS) 410 if (irq >= NR_IRQS)
@@ -445,8 +444,21 @@ void free_irq(unsigned int irq, void *dev_id)
445 444
446 /* Make sure it's not being used on another CPU */ 445 /* Make sure it's not being used on another CPU */
447 synchronize_irq(irq); 446 synchronize_irq(irq);
448 if (action->flags & IRQF_SHARED) 447#ifdef CONFIG_DEBUG_SHIRQ
449 handler = action->handler; 448 /*
449 * It's a shared IRQ -- the driver ought to be
450 * prepared for it to happen even now it's
451 * being freed, so let's make sure.... We do
452 * this after actually deregistering it, to
453 * make sure that a 'real' IRQ doesn't run in
454 * parallel with our fake
455 */
456 if (action->flags & IRQF_SHARED) {
457 local_irq_save(flags);
458 action->handler(irq, dev_id);
459 local_irq_restore(flags);
460 }
461#endif
450 kfree(action); 462 kfree(action);
451 return; 463 return;
452 } 464 }
@@ -454,19 +466,6 @@ void free_irq(unsigned int irq, void *dev_id)
454 spin_unlock_irqrestore(&desc->lock, flags); 466 spin_unlock_irqrestore(&desc->lock, flags);
455 return; 467 return;
456 } 468 }
457#ifdef CONFIG_DEBUG_SHIRQ
458 if (handler) {
459 /*
460 * It's a shared IRQ -- the driver ought to be prepared for it
461 * to happen even now it's being freed, so let's make sure....
462 * We do this after actually deregistering it, to make sure that
463 * a 'real' IRQ doesn't run in parallel with our fake
464 */
465 local_irq_save(flags);
466 handler(irq, dev_id);
467 local_irq_restore(flags);
468 }
469#endif
470} 469}
471EXPORT_SYMBOL(free_irq); 470EXPORT_SYMBOL(free_irq);
472 471
diff --git a/kernel/kexec.c b/kernel/kexec.c
index 25db14b89e..7885269b0d 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -17,21 +17,30 @@
17#include <linux/highmem.h> 17#include <linux/highmem.h>
18#include <linux/syscalls.h> 18#include <linux/syscalls.h>
19#include <linux/reboot.h> 19#include <linux/reboot.h>
20#include <linux/syscalls.h>
21#include <linux/ioport.h> 20#include <linux/ioport.h>
22#include <linux/hardirq.h> 21#include <linux/hardirq.h>
23#include <linux/elf.h> 22#include <linux/elf.h>
24#include <linux/elfcore.h> 23#include <linux/elfcore.h>
24#include <linux/utsrelease.h>
25#include <linux/utsname.h>
26#include <linux/numa.h>
25 27
26#include <asm/page.h> 28#include <asm/page.h>
27#include <asm/uaccess.h> 29#include <asm/uaccess.h>
28#include <asm/io.h> 30#include <asm/io.h>
29#include <asm/system.h> 31#include <asm/system.h>
30#include <asm/semaphore.h> 32#include <asm/semaphore.h>
33#include <asm/sections.h>
31 34
32/* Per cpu memory for storing cpu states in case of system crash. */ 35/* Per cpu memory for storing cpu states in case of system crash. */
33note_buf_t* crash_notes; 36note_buf_t* crash_notes;
34 37
38/* vmcoreinfo stuff */
39unsigned char vmcoreinfo_data[VMCOREINFO_BYTES];
40u32 vmcoreinfo_note[VMCOREINFO_NOTE_SIZE/4];
41size_t vmcoreinfo_size;
42size_t vmcoreinfo_max_size = sizeof(vmcoreinfo_data);
43
35/* Location of the reserved area for the crash kernel */ 44/* Location of the reserved area for the crash kernel */
36struct resource crashk_res = { 45struct resource crashk_res = {
37 .name = "Crash kernel", 46 .name = "Crash kernel",
@@ -1061,6 +1070,7 @@ void crash_kexec(struct pt_regs *regs)
1061 if (kexec_crash_image) { 1070 if (kexec_crash_image) {
1062 struct pt_regs fixed_regs; 1071 struct pt_regs fixed_regs;
1063 crash_setup_regs(&fixed_regs, regs); 1072 crash_setup_regs(&fixed_regs, regs);
1073 crash_save_vmcoreinfo();
1064 machine_crash_shutdown(&fixed_regs); 1074 machine_crash_shutdown(&fixed_regs);
1065 machine_kexec(kexec_crash_image); 1075 machine_kexec(kexec_crash_image);
1066 } 1076 }
@@ -1135,3 +1145,104 @@ static int __init crash_notes_memory_init(void)
1135 return 0; 1145 return 0;
1136} 1146}
1137module_init(crash_notes_memory_init) 1147module_init(crash_notes_memory_init)
1148
1149void crash_save_vmcoreinfo(void)
1150{
1151 u32 *buf;
1152
1153 if (!vmcoreinfo_size)
1154 return;
1155
1156 vmcoreinfo_append_str("CRASHTIME=%ld", get_seconds());
1157
1158 buf = (u32 *)vmcoreinfo_note;
1159
1160 buf = append_elf_note(buf, VMCOREINFO_NOTE_NAME, 0, vmcoreinfo_data,
1161 vmcoreinfo_size);
1162
1163 final_note(buf);
1164}
1165
1166void vmcoreinfo_append_str(const char *fmt, ...)
1167{
1168 va_list args;
1169 char buf[0x50];
1170 int r;
1171
1172 va_start(args, fmt);
1173 r = vsnprintf(buf, sizeof(buf), fmt, args);
1174 va_end(args);
1175
1176 if (r + vmcoreinfo_size > vmcoreinfo_max_size)
1177 r = vmcoreinfo_max_size - vmcoreinfo_size;
1178
1179 memcpy(&vmcoreinfo_data[vmcoreinfo_size], buf, r);
1180
1181 vmcoreinfo_size += r;
1182}
1183
1184/*
1185 * provide an empty default implementation here -- architecture
1186 * code may override this
1187 */
1188void __attribute__ ((weak)) arch_crash_save_vmcoreinfo(void)
1189{}
1190
1191unsigned long __attribute__ ((weak)) paddr_vmcoreinfo_note(void)
1192{
1193 return __pa((unsigned long)(char *)&vmcoreinfo_note);
1194}
1195
1196static int __init crash_save_vmcoreinfo_init(void)
1197{
1198 vmcoreinfo_append_str("OSRELEASE=%s\n", init_uts_ns.name.release);
1199 vmcoreinfo_append_str("PAGESIZE=%ld\n", PAGE_SIZE);
1200
1201 VMCOREINFO_SYMBOL(init_uts_ns);
1202 VMCOREINFO_SYMBOL(node_online_map);
1203 VMCOREINFO_SYMBOL(swapper_pg_dir);
1204 VMCOREINFO_SYMBOL(_stext);
1205
1206#ifndef CONFIG_NEED_MULTIPLE_NODES
1207 VMCOREINFO_SYMBOL(mem_map);
1208 VMCOREINFO_SYMBOL(contig_page_data);
1209#endif
1210#ifdef CONFIG_SPARSEMEM
1211 VMCOREINFO_SYMBOL(mem_section);
1212 VMCOREINFO_LENGTH(mem_section, NR_SECTION_ROOTS);
1213 VMCOREINFO_SIZE(mem_section);
1214 VMCOREINFO_OFFSET(mem_section, section_mem_map);
1215#endif
1216 VMCOREINFO_SIZE(page);
1217 VMCOREINFO_SIZE(pglist_data);
1218 VMCOREINFO_SIZE(zone);
1219 VMCOREINFO_SIZE(free_area);
1220 VMCOREINFO_SIZE(list_head);
1221 VMCOREINFO_TYPEDEF_SIZE(nodemask_t);
1222 VMCOREINFO_OFFSET(page, flags);
1223 VMCOREINFO_OFFSET(page, _count);
1224 VMCOREINFO_OFFSET(page, mapping);
1225 VMCOREINFO_OFFSET(page, lru);
1226 VMCOREINFO_OFFSET(pglist_data, node_zones);
1227 VMCOREINFO_OFFSET(pglist_data, nr_zones);
1228#ifdef CONFIG_FLAT_NODE_MEM_MAP
1229 VMCOREINFO_OFFSET(pglist_data, node_mem_map);
1230#endif
1231 VMCOREINFO_OFFSET(pglist_data, node_start_pfn);
1232 VMCOREINFO_OFFSET(pglist_data, node_spanned_pages);
1233 VMCOREINFO_OFFSET(pglist_data, node_id);
1234 VMCOREINFO_OFFSET(zone, free_area);
1235 VMCOREINFO_OFFSET(zone, vm_stat);
1236 VMCOREINFO_OFFSET(zone, spanned_pages);
1237 VMCOREINFO_OFFSET(free_area, free_list);
1238 VMCOREINFO_OFFSET(list_head, next);
1239 VMCOREINFO_OFFSET(list_head, prev);
1240 VMCOREINFO_LENGTH(zone.free_area, MAX_ORDER);
1241 VMCOREINFO_NUMBER(NR_FREE_PAGES);
1242
1243 arch_crash_save_vmcoreinfo();
1244
1245 return 0;
1246}
1247
1248module_init(crash_save_vmcoreinfo_init)
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 4b8a4493c5..e3a5d817ac 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -64,7 +64,6 @@
64 64
65static struct hlist_head kprobe_table[KPROBE_TABLE_SIZE]; 65static struct hlist_head kprobe_table[KPROBE_TABLE_SIZE];
66static struct hlist_head kretprobe_inst_table[KPROBE_TABLE_SIZE]; 66static struct hlist_head kretprobe_inst_table[KPROBE_TABLE_SIZE];
67static atomic_t kprobe_count;
68 67
69/* NOTE: change this value only with kprobe_mutex held */ 68/* NOTE: change this value only with kprobe_mutex held */
70static bool kprobe_enabled; 69static bool kprobe_enabled;
@@ -73,11 +72,6 @@ DEFINE_MUTEX(kprobe_mutex); /* Protects kprobe_table */
73DEFINE_SPINLOCK(kretprobe_lock); /* Protects kretprobe_inst_table */ 72DEFINE_SPINLOCK(kretprobe_lock); /* Protects kretprobe_inst_table */
74static DEFINE_PER_CPU(struct kprobe *, kprobe_instance) = NULL; 73static DEFINE_PER_CPU(struct kprobe *, kprobe_instance) = NULL;
75 74
76static struct notifier_block kprobe_page_fault_nb = {
77 .notifier_call = kprobe_exceptions_notify,
78 .priority = 0x7fffffff /* we need to notified first */
79};
80
81#ifdef __ARCH_WANT_KPROBES_INSN_SLOT 75#ifdef __ARCH_WANT_KPROBES_INSN_SLOT
82/* 76/*
83 * kprobe->ainsn.insn points to the copy of the instruction to be 77 * kprobe->ainsn.insn points to the copy of the instruction to be
@@ -556,8 +550,6 @@ static int __kprobes __register_kprobe(struct kprobe *p,
556 old_p = get_kprobe(p->addr); 550 old_p = get_kprobe(p->addr);
557 if (old_p) { 551 if (old_p) {
558 ret = register_aggr_kprobe(old_p, p); 552 ret = register_aggr_kprobe(old_p, p);
559 if (!ret)
560 atomic_inc(&kprobe_count);
561 goto out; 553 goto out;
562 } 554 }
563 555
@@ -569,13 +561,9 @@ static int __kprobes __register_kprobe(struct kprobe *p,
569 hlist_add_head_rcu(&p->hlist, 561 hlist_add_head_rcu(&p->hlist,
570 &kprobe_table[hash_ptr(p->addr, KPROBE_HASH_BITS)]); 562 &kprobe_table[hash_ptr(p->addr, KPROBE_HASH_BITS)]);
571 563
572 if (kprobe_enabled) { 564 if (kprobe_enabled)
573 if (atomic_add_return(1, &kprobe_count) == \
574 (ARCH_INACTIVE_KPROBE_COUNT + 1))
575 register_page_fault_notifier(&kprobe_page_fault_nb);
576
577 arch_arm_kprobe(p); 565 arch_arm_kprobe(p);
578 } 566
579out: 567out:
580 mutex_unlock(&kprobe_mutex); 568 mutex_unlock(&kprobe_mutex);
581 569
@@ -658,16 +646,6 @@ valid_p:
658 } 646 }
659 mutex_unlock(&kprobe_mutex); 647 mutex_unlock(&kprobe_mutex);
660 } 648 }
661
662 /* Call unregister_page_fault_notifier()
663 * if no probes are active
664 */
665 mutex_lock(&kprobe_mutex);
666 if (atomic_add_return(-1, &kprobe_count) == \
667 ARCH_INACTIVE_KPROBE_COUNT)
668 unregister_page_fault_notifier(&kprobe_page_fault_nb);
669 mutex_unlock(&kprobe_mutex);
670 return;
671} 649}
672 650
673static struct notifier_block kprobe_exceptions_nb = { 651static struct notifier_block kprobe_exceptions_nb = {
@@ -738,6 +716,18 @@ int __kprobes register_kretprobe(struct kretprobe *rp)
738 int ret = 0; 716 int ret = 0;
739 struct kretprobe_instance *inst; 717 struct kretprobe_instance *inst;
740 int i; 718 int i;
719 void *addr = rp->kp.addr;
720
721 if (kretprobe_blacklist_size) {
722 if (addr == NULL)
723 kprobe_lookup_name(rp->kp.symbol_name, addr);
724 addr += rp->kp.offset;
725
726 for (i = 0; kretprobe_blacklist[i].name != NULL; i++) {
727 if (kretprobe_blacklist[i].addr == addr)
728 return -EINVAL;
729 }
730 }
741 731
742 rp->kp.pre_handler = pre_handler_kretprobe; 732 rp->kp.pre_handler = pre_handler_kretprobe;
743 rp->kp.post_handler = NULL; 733 rp->kp.post_handler = NULL;
@@ -815,7 +805,17 @@ static int __init init_kprobes(void)
815 INIT_HLIST_HEAD(&kprobe_table[i]); 805 INIT_HLIST_HEAD(&kprobe_table[i]);
816 INIT_HLIST_HEAD(&kretprobe_inst_table[i]); 806 INIT_HLIST_HEAD(&kretprobe_inst_table[i]);
817 } 807 }
818 atomic_set(&kprobe_count, 0); 808
809 if (kretprobe_blacklist_size) {
810 /* lookup the function address from its name */
811 for (i = 0; kretprobe_blacklist[i].name != NULL; i++) {
812 kprobe_lookup_name(kretprobe_blacklist[i].name,
813 kretprobe_blacklist[i].addr);
814 if (!kretprobe_blacklist[i].addr)
815 printk("kretprobe: lookup failed: %s\n",
816 kretprobe_blacklist[i].name);
817 }
818 }
819 819
820 /* By default, kprobes are enabled */ 820 /* By default, kprobes are enabled */
821 kprobe_enabled = true; 821 kprobe_enabled = true;
@@ -921,13 +921,6 @@ static void __kprobes enable_all_kprobes(void)
921 if (kprobe_enabled) 921 if (kprobe_enabled)
922 goto already_enabled; 922 goto already_enabled;
923 923
924 /*
925 * Re-register the page fault notifier only if there are any
926 * active probes at the time of enabling kprobes globally
927 */
928 if (atomic_read(&kprobe_count) > ARCH_INACTIVE_KPROBE_COUNT)
929 register_page_fault_notifier(&kprobe_page_fault_nb);
930
931 for (i = 0; i < KPROBE_TABLE_SIZE; i++) { 924 for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
932 head = &kprobe_table[i]; 925 head = &kprobe_table[i];
933 hlist_for_each_entry_rcu(p, node, head, hlist) 926 hlist_for_each_entry_rcu(p, node, head, hlist)
@@ -968,10 +961,7 @@ static void __kprobes disable_all_kprobes(void)
968 mutex_unlock(&kprobe_mutex); 961 mutex_unlock(&kprobe_mutex);
969 /* Allow all currently running kprobes to complete */ 962 /* Allow all currently running kprobes to complete */
970 synchronize_sched(); 963 synchronize_sched();
971 964 return;
972 mutex_lock(&kprobe_mutex);
973 /* Unconditionally unregister the page_fault notifier */
974 unregister_page_fault_notifier(&kprobe_page_fault_nb);
975 965
976already_disabled: 966already_disabled:
977 mutex_unlock(&kprobe_mutex); 967 mutex_unlock(&kprobe_mutex);
diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c
index 6046939d08..65daa5373c 100644
--- a/kernel/ksysfs.c
+++ b/kernel/ksysfs.c
@@ -61,6 +61,15 @@ static ssize_t kexec_crash_loaded_show(struct kset *kset, char *page)
61 return sprintf(page, "%d\n", !!kexec_crash_image); 61 return sprintf(page, "%d\n", !!kexec_crash_image);
62} 62}
63KERNEL_ATTR_RO(kexec_crash_loaded); 63KERNEL_ATTR_RO(kexec_crash_loaded);
64
65static ssize_t vmcoreinfo_show(struct kset *kset, char *page)
66{
67 return sprintf(page, "%lx %x\n",
68 paddr_vmcoreinfo_note(),
69 (unsigned int)vmcoreinfo_max_size);
70}
71KERNEL_ATTR_RO(vmcoreinfo);
72
64#endif /* CONFIG_KEXEC */ 73#endif /* CONFIG_KEXEC */
65 74
66/* 75/*
@@ -96,6 +105,7 @@ static struct attribute * kernel_attrs[] = {
96#ifdef CONFIG_KEXEC 105#ifdef CONFIG_KEXEC
97 &kexec_loaded_attr.attr, 106 &kexec_loaded_attr.attr,
98 &kexec_crash_loaded_attr.attr, 107 &kexec_crash_loaded_attr.attr,
108 &vmcoreinfo_attr.attr,
99#endif 109#endif
100 NULL 110 NULL
101}; 111};
diff --git a/kernel/module.c b/kernel/module.c
index db0ead0363..a389b423c2 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -20,6 +20,7 @@
20#include <linux/moduleloader.h> 20#include <linux/moduleloader.h>
21#include <linux/init.h> 21#include <linux/init.h>
22#include <linux/kallsyms.h> 22#include <linux/kallsyms.h>
23#include <linux/sysfs.h>
23#include <linux/kernel.h> 24#include <linux/kernel.h>
24#include <linux/slab.h> 25#include <linux/slab.h>
25#include <linux/vmalloc.h> 26#include <linux/vmalloc.h>
@@ -692,8 +693,7 @@ sys_delete_module(const char __user *name_user, unsigned int flags)
692 } 693 }
693 694
694 /* If it has an init func, it must have an exit func to unload */ 695 /* If it has an init func, it must have an exit func to unload */
695 if ((mod->init != NULL && mod->exit == NULL) 696 if (mod->init && !mod->exit) {
696 || mod->unsafe) {
697 forced = try_force_unload(flags); 697 forced = try_force_unload(flags);
698 if (!forced) { 698 if (!forced) {
699 /* This module can't be removed */ 699 /* This module can't be removed */
@@ -741,11 +741,6 @@ static void print_unload_info(struct seq_file *m, struct module *mod)
741 seq_printf(m, "%s,", use->module_which_uses->name); 741 seq_printf(m, "%s,", use->module_which_uses->name);
742 } 742 }
743 743
744 if (mod->unsafe) {
745 printed_something = 1;
746 seq_printf(m, "[unsafe],");
747 }
748
749 if (mod->init != NULL && mod->exit == NULL) { 744 if (mod->init != NULL && mod->exit == NULL) {
750 printed_something = 1; 745 printed_something = 1;
751 seq_printf(m, "[permanent],"); 746 seq_printf(m, "[permanent],");
@@ -1053,6 +1048,100 @@ static void remove_sect_attrs(struct module *mod)
1053 } 1048 }
1054} 1049}
1055 1050
1051/*
1052 * /sys/module/foo/notes/.section.name gives contents of SHT_NOTE sections.
1053 */
1054
1055struct module_notes_attrs {
1056 struct kobject *dir;
1057 unsigned int notes;
1058 struct bin_attribute attrs[0];
1059};
1060
1061static ssize_t module_notes_read(struct kobject *kobj,
1062 struct bin_attribute *bin_attr,
1063 char *buf, loff_t pos, size_t count)
1064{
1065 /*
1066 * The caller checked the pos and count against our size.
1067 */
1068 memcpy(buf, bin_attr->private + pos, count);
1069 return count;
1070}
1071
1072static void free_notes_attrs(struct module_notes_attrs *notes_attrs,
1073 unsigned int i)
1074{
1075 if (notes_attrs->dir) {
1076 while (i-- > 0)
1077 sysfs_remove_bin_file(notes_attrs->dir,
1078 &notes_attrs->attrs[i]);
1079 kobject_del(notes_attrs->dir);
1080 }
1081 kfree(notes_attrs);
1082}
1083
1084static void add_notes_attrs(struct module *mod, unsigned int nsect,
1085 char *secstrings, Elf_Shdr *sechdrs)
1086{
1087 unsigned int notes, loaded, i;
1088 struct module_notes_attrs *notes_attrs;
1089 struct bin_attribute *nattr;
1090
1091 /* Count notes sections and allocate structures. */
1092 notes = 0;
1093 for (i = 0; i < nsect; i++)
1094 if ((sechdrs[i].sh_flags & SHF_ALLOC) &&
1095 (sechdrs[i].sh_type == SHT_NOTE))
1096 ++notes;
1097
1098 if (notes == 0)
1099 return;
1100
1101 notes_attrs = kzalloc(sizeof(*notes_attrs)
1102 + notes * sizeof(notes_attrs->attrs[0]),
1103 GFP_KERNEL);
1104 if (notes_attrs == NULL)
1105 return;
1106
1107 notes_attrs->notes = notes;
1108 nattr = &notes_attrs->attrs[0];
1109 for (loaded = i = 0; i < nsect; ++i) {
1110 if (!(sechdrs[i].sh_flags & SHF_ALLOC))
1111 continue;
1112 if (sechdrs[i].sh_type == SHT_NOTE) {
1113 nattr->attr.name = mod->sect_attrs->attrs[loaded].name;
1114 nattr->attr.mode = S_IRUGO;
1115 nattr->size = sechdrs[i].sh_size;
1116 nattr->private = (void *) sechdrs[i].sh_addr;
1117 nattr->read = module_notes_read;
1118 ++nattr;
1119 }
1120 ++loaded;
1121 }
1122
1123 notes_attrs->dir = kobject_add_dir(&mod->mkobj.kobj, "notes");
1124 if (!notes_attrs->dir)
1125 goto out;
1126
1127 for (i = 0; i < notes; ++i)
1128 if (sysfs_create_bin_file(notes_attrs->dir,
1129 &notes_attrs->attrs[i]))
1130 goto out;
1131
1132 mod->notes_attrs = notes_attrs;
1133 return;
1134
1135 out:
1136 free_notes_attrs(notes_attrs, i);
1137}
1138
1139static void remove_notes_attrs(struct module *mod)
1140{
1141 if (mod->notes_attrs)
1142 free_notes_attrs(mod->notes_attrs, mod->notes_attrs->notes);
1143}
1144
1056#else 1145#else
1057 1146
1058static inline void add_sect_attrs(struct module *mod, unsigned int nsect, 1147static inline void add_sect_attrs(struct module *mod, unsigned int nsect,
@@ -1063,6 +1152,15 @@ static inline void add_sect_attrs(struct module *mod, unsigned int nsect,
1063static inline void remove_sect_attrs(struct module *mod) 1152static inline void remove_sect_attrs(struct module *mod)
1064{ 1153{
1065} 1154}
1155
1156static inline void add_notes_attrs(struct module *mod, unsigned int nsect,
1157 char *sectstrings, Elf_Shdr *sechdrs)
1158{
1159}
1160
1161static inline void remove_notes_attrs(struct module *mod)
1162{
1163}
1066#endif /* CONFIG_KALLSYMS */ 1164#endif /* CONFIG_KALLSYMS */
1067 1165
1068#ifdef CONFIG_SYSFS 1166#ifdef CONFIG_SYSFS
@@ -1197,6 +1295,7 @@ static void free_module(struct module *mod)
1197{ 1295{
1198 /* Delete from various lists */ 1296 /* Delete from various lists */
1199 stop_machine_run(__unlink_module, mod, NR_CPUS); 1297 stop_machine_run(__unlink_module, mod, NR_CPUS);
1298 remove_notes_attrs(mod);
1200 remove_sect_attrs(mod); 1299 remove_sect_attrs(mod);
1201 mod_kobject_remove(mod); 1300 mod_kobject_remove(mod);
1202 1301
@@ -1782,7 +1881,8 @@ static struct module *load_module(void __user *umod,
1782 module_unload_init(mod); 1881 module_unload_init(mod);
1783 1882
1784 /* Initialize kobject, so we can reference it. */ 1883 /* Initialize kobject, so we can reference it. */
1785 if (mod_sysfs_init(mod) != 0) 1884 err = mod_sysfs_init(mod);
1885 if (err)
1786 goto cleanup; 1886 goto cleanup;
1787 1887
1788 /* Set up license info based on the info section */ 1888 /* Set up license info based on the info section */
@@ -1924,6 +2024,7 @@ static struct module *load_module(void __user *umod,
1924 if (err < 0) 2024 if (err < 0)
1925 goto arch_cleanup; 2025 goto arch_cleanup;
1926 add_sect_attrs(mod, hdr->e_shnum, secstrings, sechdrs); 2026 add_sect_attrs(mod, hdr->e_shnum, secstrings, sechdrs);
2027 add_notes_attrs(mod, hdr->e_shnum, secstrings, sechdrs);
1927 2028
1928 /* Size of section 0 is 0, so this works well if no unwind info. */ 2029 /* Size of section 0 is 0, so this works well if no unwind info. */
1929 mod->unwind_info = unwind_add_table(mod, 2030 mod->unwind_info = unwind_add_table(mod,
@@ -2011,15 +2112,10 @@ sys_init_module(void __user *umod,
2011 buggy refcounters. */ 2112 buggy refcounters. */
2012 mod->state = MODULE_STATE_GOING; 2113 mod->state = MODULE_STATE_GOING;
2013 synchronize_sched(); 2114 synchronize_sched();
2014 if (mod->unsafe) 2115 module_put(mod);
2015 printk(KERN_ERR "%s: module is now stuck!\n", 2116 mutex_lock(&module_mutex);
2016 mod->name); 2117 free_module(mod);
2017 else { 2118 mutex_unlock(&module_mutex);
2018 module_put(mod);
2019 mutex_lock(&module_mutex);
2020 free_module(mod);
2021 mutex_unlock(&module_mutex);
2022 }
2023 return ret; 2119 return ret;
2024 } 2120 }
2025 2121
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
index f1decd21a5..049e7c0ac5 100644
--- a/kernel/nsproxy.c
+++ b/kernel/nsproxy.c
@@ -203,8 +203,7 @@ int unshare_nsproxy_namespaces(unsigned long unshare_flags,
203 203
204static int __init nsproxy_cache_init(void) 204static int __init nsproxy_cache_init(void)
205{ 205{
206 nsproxy_cachep = kmem_cache_create("nsproxy", sizeof(struct nsproxy), 206 nsproxy_cachep = KMEM_CACHE(nsproxy, SLAB_PANIC);
207 0, SLAB_PANIC, NULL);
208 return 0; 207 return 0;
209} 208}
210 209
diff --git a/kernel/params.c b/kernel/params.c
index 4e57732fcf..1d6aca288c 100644
--- a/kernel/params.c
+++ b/kernel/params.c
@@ -252,8 +252,9 @@ int param_get_bool(char *buffer, struct kernel_param *kp)
252int param_set_invbool(const char *val, struct kernel_param *kp) 252int param_set_invbool(const char *val, struct kernel_param *kp)
253{ 253{
254 int boolval, ret; 254 int boolval, ret;
255 struct kernel_param dummy = { .arg = &boolval }; 255 struct kernel_param dummy;
256 256
257 dummy.arg = &boolval;
257 ret = param_set_bool(val, &dummy); 258 ret = param_set_bool(val, &dummy);
258 if (ret == 0) 259 if (ret == 0)
259 *(int *)kp->arg = !boolval; 260 *(int *)kp->arg = !boolval;
@@ -262,11 +263,7 @@ int param_set_invbool(const char *val, struct kernel_param *kp)
262 263
263int param_get_invbool(char *buffer, struct kernel_param *kp) 264int param_get_invbool(char *buffer, struct kernel_param *kp)
264{ 265{
265 int val; 266 return sprintf(buffer, "%c", (*(int *)kp->arg) ? 'N' : 'Y');
266 struct kernel_param dummy = { .arg = &val };
267
268 val = !*(int *)kp->arg;
269 return param_get_bool(buffer, &dummy);
270} 267}
271 268
272/* We break the rule and mangle the string. */ 269/* We break the rule and mangle the string. */
@@ -325,7 +322,7 @@ static int param_array(const char *name,
325 322
326int param_array_set(const char *val, struct kernel_param *kp) 323int param_array_set(const char *val, struct kernel_param *kp)
327{ 324{
328 struct kparam_array *arr = kp->arg; 325 const struct kparam_array *arr = kp->arr;
329 unsigned int temp_num; 326 unsigned int temp_num;
330 327
331 return param_array(kp->name, val, 1, arr->max, arr->elem, 328 return param_array(kp->name, val, 1, arr->max, arr->elem,
@@ -335,7 +332,7 @@ int param_array_set(const char *val, struct kernel_param *kp)
335int param_array_get(char *buffer, struct kernel_param *kp) 332int param_array_get(char *buffer, struct kernel_param *kp)
336{ 333{
337 int i, off, ret; 334 int i, off, ret;
338 struct kparam_array *arr = kp->arg; 335 const struct kparam_array *arr = kp->arr;
339 struct kernel_param p; 336 struct kernel_param p;
340 337
341 p = *kp; 338 p = *kp;
@@ -354,7 +351,7 @@ int param_array_get(char *buffer, struct kernel_param *kp)
354 351
355int param_set_copystring(const char *val, struct kernel_param *kp) 352int param_set_copystring(const char *val, struct kernel_param *kp)
356{ 353{
357 struct kparam_string *kps = kp->arg; 354 const struct kparam_string *kps = kp->str;
358 355
359 if (!val) { 356 if (!val) {
360 printk(KERN_ERR "%s: missing param set value\n", kp->name); 357 printk(KERN_ERR "%s: missing param set value\n", kp->name);
@@ -371,7 +368,7 @@ int param_set_copystring(const char *val, struct kernel_param *kp)
371 368
372int param_get_string(char *buffer, struct kernel_param *kp) 369int param_get_string(char *buffer, struct kernel_param *kp)
373{ 370{
374 struct kparam_string *kps = kp->arg; 371 const struct kparam_string *kps = kp->str;
375 return strlcpy(buffer, kps->string, kps->maxlen); 372 return strlcpy(buffer, kps->string, kps->maxlen);
376} 373}
377 374
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index 57efe0400b..d71ed09fe1 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -241,7 +241,8 @@ static __init int init_posix_timers(void)
241 register_posix_clock(CLOCK_MONOTONIC, &clock_monotonic); 241 register_posix_clock(CLOCK_MONOTONIC, &clock_monotonic);
242 242
243 posix_timers_cache = kmem_cache_create("posix_timers_cache", 243 posix_timers_cache = kmem_cache_create("posix_timers_cache",
244 sizeof (struct k_itimer), 0, 0, NULL); 244 sizeof (struct k_itimer), 0, SLAB_PANIC,
245 NULL);
245 idr_init(&posix_timers_id); 246 idr_init(&posix_timers_id);
246 return 0; 247 return 0;
247} 248}
diff --git a/kernel/printk.c b/kernel/printk.c
index 8451dfc31d..52493474f0 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -22,6 +22,8 @@
22#include <linux/tty_driver.h> 22#include <linux/tty_driver.h>
23#include <linux/console.h> 23#include <linux/console.h>
24#include <linux/init.h> 24#include <linux/init.h>
25#include <linux/jiffies.h>
26#include <linux/nmi.h>
25#include <linux/module.h> 27#include <linux/module.h>
26#include <linux/moduleparam.h> 28#include <linux/moduleparam.h>
27#include <linux/interrupt.h> /* For in_interrupt() */ 29#include <linux/interrupt.h> /* For in_interrupt() */
@@ -162,6 +164,113 @@ out:
162 164
163__setup("log_buf_len=", log_buf_len_setup); 165__setup("log_buf_len=", log_buf_len_setup);
164 166
167#ifdef CONFIG_BOOT_PRINTK_DELAY
168
169static unsigned int boot_delay; /* msecs delay after each printk during bootup */
170static unsigned long long printk_delay_msec; /* per msec, based on boot_delay */
171
172static int __init boot_delay_setup(char *str)
173{
174 unsigned long lpj;
175 unsigned long long loops_per_msec;
176
177 lpj = preset_lpj ? preset_lpj : 1000000; /* some guess */
178 loops_per_msec = (unsigned long long)lpj / 1000 * HZ;
179
180 get_option(&str, &boot_delay);
181 if (boot_delay > 10 * 1000)
182 boot_delay = 0;
183
184 printk_delay_msec = loops_per_msec;
185 printk(KERN_DEBUG "boot_delay: %u, preset_lpj: %ld, lpj: %lu, "
186 "HZ: %d, printk_delay_msec: %llu\n",
187 boot_delay, preset_lpj, lpj, HZ, printk_delay_msec);
188 return 1;
189}
190__setup("boot_delay=", boot_delay_setup);
191
192static void boot_delay_msec(void)
193{
194 unsigned long long k;
195 unsigned long timeout;
196
197 if (boot_delay == 0 || system_state != SYSTEM_BOOTING)
198 return;
199
200 k = (unsigned long long)printk_delay_msec * boot_delay;
201
202 timeout = jiffies + msecs_to_jiffies(boot_delay);
203 while (k) {
204 k--;
205 cpu_relax();
206 /*
207 * use (volatile) jiffies to prevent
208 * compiler reduction; loop termination via jiffies
209 * is secondary and may or may not happen.
210 */
211 if (time_after(jiffies, timeout))
212 break;
213 touch_nmi_watchdog();
214 }
215}
216#else
217static inline void boot_delay_msec(void)
218{
219}
220#endif
221
222/*
223 * Return the number of unread characters in the log buffer.
224 */
225int log_buf_get_len(void)
226{
227 return logged_chars;
228}
229
230/*
231 * Copy a range of characters from the log buffer.
232 */
233int log_buf_copy(char *dest, int idx, int len)
234{
235 int ret, max;
236 bool took_lock = false;
237
238 if (!oops_in_progress) {
239 spin_lock_irq(&logbuf_lock);
240 took_lock = true;
241 }
242
243 max = log_buf_get_len();
244 if (idx < 0 || idx >= max) {
245 ret = -1;
246 } else {
247 if (len > max)
248 len = max;
249 ret = len;
250 idx += (log_end - max);
251 while (len-- > 0)
252 dest[len] = LOG_BUF(idx + len);
253 }
254
255 if (took_lock)
256 spin_unlock_irq(&logbuf_lock);
257
258 return ret;
259}
260
261/*
262 * Extract a single character from the log buffer.
263 */
264int log_buf_read(int idx)
265{
266 char ret;
267
268 if (log_buf_copy(&ret, idx, 1) == 1)
269 return ret;
270 else
271 return -1;
272}
273
165/* 274/*
166 * Commands to do_syslog: 275 * Commands to do_syslog:
167 * 276 *
@@ -527,6 +636,8 @@ asmlinkage int vprintk(const char *fmt, va_list args)
527 static char printk_buf[1024]; 636 static char printk_buf[1024];
528 static int log_level_unknown = 1; 637 static int log_level_unknown = 1;
529 638
639 boot_delay_msec();
640
530 preempt_disable(); 641 preempt_disable();
531 if (unlikely(oops_in_progress) && printk_cpu == smp_processor_id()) 642 if (unlikely(oops_in_progress) && printk_cpu == smp_processor_id())
532 /* If a crash is occurring during printk() on this CPU, 643 /* If a crash is occurring during printk() on this CPU,
diff --git a/kernel/profile.c b/kernel/profile.c
index cb1e37d2da..631b75c25d 100644
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -37,7 +37,7 @@ struct profile_hit {
37#define NR_PROFILE_GRP (NR_PROFILE_HIT/PROFILE_GRPSZ) 37#define NR_PROFILE_GRP (NR_PROFILE_HIT/PROFILE_GRPSZ)
38 38
39/* Oprofile timer tick hook */ 39/* Oprofile timer tick hook */
40int (*timer_hook)(struct pt_regs *) __read_mostly; 40static int (*timer_hook)(struct pt_regs *) __read_mostly;
41 41
42static atomic_t *prof_buffer; 42static atomic_t *prof_buffer;
43static unsigned long prof_len, prof_shift; 43static unsigned long prof_len, prof_shift;
@@ -346,7 +346,7 @@ static int __devinit profile_cpu_callback(struct notifier_block *info,
346 per_cpu(cpu_profile_flip, cpu) = 0; 346 per_cpu(cpu_profile_flip, cpu) = 0;
347 if (!per_cpu(cpu_profile_hits, cpu)[1]) { 347 if (!per_cpu(cpu_profile_hits, cpu)[1]) {
348 page = alloc_pages_node(node, 348 page = alloc_pages_node(node,
349 GFP_KERNEL | __GFP_ZERO | GFP_THISNODE, 349 GFP_KERNEL | __GFP_ZERO,
350 0); 350 0);
351 if (!page) 351 if (!page)
352 return NOTIFY_BAD; 352 return NOTIFY_BAD;
@@ -354,7 +354,7 @@ static int __devinit profile_cpu_callback(struct notifier_block *info,
354 } 354 }
355 if (!per_cpu(cpu_profile_hits, cpu)[0]) { 355 if (!per_cpu(cpu_profile_hits, cpu)[0]) {
356 page = alloc_pages_node(node, 356 page = alloc_pages_node(node,
357 GFP_KERNEL | __GFP_ZERO | GFP_THISNODE, 357 GFP_KERNEL | __GFP_ZERO,
358 0); 358 0);
359 if (!page) 359 if (!page)
360 goto out_free; 360 goto out_free;
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 3eca7a55f2..a73ebd3b9d 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -386,6 +386,9 @@ int ptrace_request(struct task_struct *child, long request,
386 case PTRACE_SETSIGINFO: 386 case PTRACE_SETSIGINFO:
387 ret = ptrace_setsiginfo(child, (siginfo_t __user *) data); 387 ret = ptrace_setsiginfo(child, (siginfo_t __user *) data);
388 break; 388 break;
389 case PTRACE_DETACH: /* detach a process that was attached. */
390 ret = ptrace_detach(child, data);
391 break;
389 default: 392 default:
390 break; 393 break;
391 } 394 }
@@ -450,6 +453,10 @@ struct task_struct *ptrace_get_task_struct(pid_t pid)
450 return child; 453 return child;
451} 454}
452 455
456#ifndef arch_ptrace_attach
457#define arch_ptrace_attach(child) do { } while (0)
458#endif
459
453#ifndef __ARCH_SYS_PTRACE 460#ifndef __ARCH_SYS_PTRACE
454asmlinkage long sys_ptrace(long request, long pid, long addr, long data) 461asmlinkage long sys_ptrace(long request, long pid, long addr, long data)
455{ 462{
@@ -473,6 +480,12 @@ asmlinkage long sys_ptrace(long request, long pid, long addr, long data)
473 480
474 if (request == PTRACE_ATTACH) { 481 if (request == PTRACE_ATTACH) {
475 ret = ptrace_attach(child); 482 ret = ptrace_attach(child);
483 /*
484 * Some architectures need to do book-keeping after
485 * a ptrace attach.
486 */
487 if (!ret)
488 arch_ptrace_attach(child);
476 goto out_put_task_struct; 489 goto out_put_task_struct;
477 } 490 }
478 491
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index 130214f3d2..a66d4d1615 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -45,7 +45,6 @@
45#include <linux/moduleparam.h> 45#include <linux/moduleparam.h>
46#include <linux/percpu.h> 46#include <linux/percpu.h>
47#include <linux/notifier.h> 47#include <linux/notifier.h>
48#include <linux/rcupdate.h>
49#include <linux/cpu.h> 48#include <linux/cpu.h>
50#include <linux/mutex.h> 49#include <linux/mutex.h>
51 50
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c
index ddff332477..c3e165c231 100644
--- a/kernel/rcutorture.c
+++ b/kernel/rcutorture.c
@@ -35,14 +35,12 @@
35#include <linux/sched.h> 35#include <linux/sched.h>
36#include <asm/atomic.h> 36#include <asm/atomic.h>
37#include <linux/bitops.h> 37#include <linux/bitops.h>
38#include <linux/module.h>
39#include <linux/completion.h> 38#include <linux/completion.h>
40#include <linux/moduleparam.h> 39#include <linux/moduleparam.h>
41#include <linux/percpu.h> 40#include <linux/percpu.h>
42#include <linux/notifier.h> 41#include <linux/notifier.h>
43#include <linux/freezer.h> 42#include <linux/freezer.h>
44#include <linux/cpu.h> 43#include <linux/cpu.h>
45#include <linux/random.h>
46#include <linux/delay.h> 44#include <linux/delay.h>
47#include <linux/byteorder/swabb.h> 45#include <linux/byteorder/swabb.h>
48#include <linux/stat.h> 46#include <linux/stat.h>
@@ -166,16 +164,14 @@ struct rcu_random_state {
166 164
167/* 165/*
168 * Crude but fast random-number generator. Uses a linear congruential 166 * Crude but fast random-number generator. Uses a linear congruential
169 * generator, with occasional help from get_random_bytes(). 167 * generator, with occasional help from cpu_clock().
170 */ 168 */
171static unsigned long 169static unsigned long
172rcu_random(struct rcu_random_state *rrsp) 170rcu_random(struct rcu_random_state *rrsp)
173{ 171{
174 long refresh;
175
176 if (--rrsp->rrs_count < 0) { 172 if (--rrsp->rrs_count < 0) {
177 get_random_bytes(&refresh, sizeof(refresh)); 173 rrsp->rrs_state +=
178 rrsp->rrs_state += refresh; 174 (unsigned long)cpu_clock(raw_smp_processor_id());
179 rrsp->rrs_count = RCU_RANDOM_REFRESH; 175 rrsp->rrs_count = RCU_RANDOM_REFRESH;
180 } 176 }
181 rrsp->rrs_state = rrsp->rrs_state * RCU_RANDOM_MULT + RCU_RANDOM_ADD; 177 rrsp->rrs_state = rrsp->rrs_state * RCU_RANDOM_MULT + RCU_RANDOM_ADD;
diff --git a/kernel/resource.c b/kernel/resource.c
index 9bd14fd3e6..a358142ff4 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -234,7 +234,7 @@ EXPORT_SYMBOL(release_resource);
234 * the caller must specify res->start, res->end, res->flags. 234 * the caller must specify res->start, res->end, res->flags.
235 * If found, returns 0, res is overwritten, if not found, returns -1. 235 * If found, returns 0, res is overwritten, if not found, returns -1.
236 */ 236 */
237int find_next_system_ram(struct resource *res) 237static int find_next_system_ram(struct resource *res)
238{ 238{
239 resource_size_t start, end; 239 resource_size_t start, end;
240 struct resource *p; 240 struct resource *p;
@@ -267,6 +267,30 @@ int find_next_system_ram(struct resource *res)
267 res->end = p->end; 267 res->end = p->end;
268 return 0; 268 return 0;
269} 269}
270int
271walk_memory_resource(unsigned long start_pfn, unsigned long nr_pages, void *arg,
272 int (*func)(unsigned long, unsigned long, void *))
273{
274 struct resource res;
275 unsigned long pfn, len;
276 u64 orig_end;
277 int ret = -1;
278 res.start = (u64) start_pfn << PAGE_SHIFT;
279 res.end = ((u64)(start_pfn + nr_pages) << PAGE_SHIFT) - 1;
280 res.flags = IORESOURCE_MEM;
281 orig_end = res.end;
282 while ((res.start < res.end) && (find_next_system_ram(&res) >= 0)) {
283 pfn = (unsigned long)(res.start >> PAGE_SHIFT);
284 len = (unsigned long)((res.end + 1 - res.start) >> PAGE_SHIFT);
285 ret = (*func)(pfn, len, arg);
286 if (ret)
287 break;
288 res.start = res.end + 1;
289 res.end = orig_end;
290 }
291 return ret;
292}
293
270#endif 294#endif
271 295
272/* 296/*
diff --git a/kernel/rtmutex-debug.c b/kernel/rtmutex-debug.c
index 5aedbee014..6b0703db15 100644
--- a/kernel/rtmutex-debug.c
+++ b/kernel/rtmutex-debug.c
@@ -82,12 +82,7 @@ do { \
82 * into the tracing code when doing error printk or 82 * into the tracing code when doing error printk or
83 * executing a BUG(): 83 * executing a BUG():
84 */ 84 */
85int rt_trace_on = 1; 85static int rt_trace_on = 1;
86
87void deadlock_trace_off(void)
88{
89 rt_trace_on = 0;
90}
91 86
92static void printk_task(struct task_struct *p) 87static void printk_task(struct task_struct *p)
93{ 88{
diff --git a/kernel/sched.c b/kernel/sched.c
index bba57adb95..92721d1534 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -1712,7 +1712,7 @@ void fastcall wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
1712 1712
1713 p->prio = effective_prio(p); 1713 p->prio = effective_prio(p);
1714 1714
1715 if (!p->sched_class->task_new || !current->se.on_rq || !rq->cfs.curr) { 1715 if (!p->sched_class->task_new || !current->se.on_rq) {
1716 activate_task(rq, p, 0); 1716 activate_task(rq, p, 0);
1717 } else { 1717 } else {
1718 /* 1718 /*
@@ -2336,7 +2336,7 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
2336 unsigned long max_pull; 2336 unsigned long max_pull;
2337 unsigned long busiest_load_per_task, busiest_nr_running; 2337 unsigned long busiest_load_per_task, busiest_nr_running;
2338 unsigned long this_load_per_task, this_nr_running; 2338 unsigned long this_load_per_task, this_nr_running;
2339 int load_idx; 2339 int load_idx, group_imb = 0;
2340#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT) 2340#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
2341 int power_savings_balance = 1; 2341 int power_savings_balance = 1;
2342 unsigned long leader_nr_running = 0, min_load_per_task = 0; 2342 unsigned long leader_nr_running = 0, min_load_per_task = 0;
@@ -2355,9 +2355,10 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
2355 load_idx = sd->idle_idx; 2355 load_idx = sd->idle_idx;
2356 2356
2357 do { 2357 do {
2358 unsigned long load, group_capacity; 2358 unsigned long load, group_capacity, max_cpu_load, min_cpu_load;
2359 int local_group; 2359 int local_group;
2360 int i; 2360 int i;
2361 int __group_imb = 0;
2361 unsigned int balance_cpu = -1, first_idle_cpu = 0; 2362 unsigned int balance_cpu = -1, first_idle_cpu = 0;
2362 unsigned long sum_nr_running, sum_weighted_load; 2363 unsigned long sum_nr_running, sum_weighted_load;
2363 2364
@@ -2368,6 +2369,8 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
2368 2369
2369 /* Tally up the load of all CPUs in the group */ 2370 /* Tally up the load of all CPUs in the group */
2370 sum_weighted_load = sum_nr_running = avg_load = 0; 2371 sum_weighted_load = sum_nr_running = avg_load = 0;
2372 max_cpu_load = 0;
2373 min_cpu_load = ~0UL;
2371 2374
2372 for_each_cpu_mask(i, group->cpumask) { 2375 for_each_cpu_mask(i, group->cpumask) {
2373 struct rq *rq; 2376 struct rq *rq;
@@ -2388,8 +2391,13 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
2388 } 2391 }
2389 2392
2390 load = target_load(i, load_idx); 2393 load = target_load(i, load_idx);
2391 } else 2394 } else {
2392 load = source_load(i, load_idx); 2395 load = source_load(i, load_idx);
2396 if (load > max_cpu_load)
2397 max_cpu_load = load;
2398 if (min_cpu_load > load)
2399 min_cpu_load = load;
2400 }
2393 2401
2394 avg_load += load; 2402 avg_load += load;
2395 sum_nr_running += rq->nr_running; 2403 sum_nr_running += rq->nr_running;
@@ -2415,6 +2423,9 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
2415 avg_load = sg_div_cpu_power(group, 2423 avg_load = sg_div_cpu_power(group,
2416 avg_load * SCHED_LOAD_SCALE); 2424 avg_load * SCHED_LOAD_SCALE);
2417 2425
2426 if ((max_cpu_load - min_cpu_load) > SCHED_LOAD_SCALE)
2427 __group_imb = 1;
2428
2418 group_capacity = group->__cpu_power / SCHED_LOAD_SCALE; 2429 group_capacity = group->__cpu_power / SCHED_LOAD_SCALE;
2419 2430
2420 if (local_group) { 2431 if (local_group) {
@@ -2423,11 +2434,12 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
2423 this_nr_running = sum_nr_running; 2434 this_nr_running = sum_nr_running;
2424 this_load_per_task = sum_weighted_load; 2435 this_load_per_task = sum_weighted_load;
2425 } else if (avg_load > max_load && 2436 } else if (avg_load > max_load &&
2426 sum_nr_running > group_capacity) { 2437 (sum_nr_running > group_capacity || __group_imb)) {
2427 max_load = avg_load; 2438 max_load = avg_load;
2428 busiest = group; 2439 busiest = group;
2429 busiest_nr_running = sum_nr_running; 2440 busiest_nr_running = sum_nr_running;
2430 busiest_load_per_task = sum_weighted_load; 2441 busiest_load_per_task = sum_weighted_load;
2442 group_imb = __group_imb;
2431 } 2443 }
2432 2444
2433#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT) 2445#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
@@ -2499,6 +2511,9 @@ group_next:
2499 goto out_balanced; 2511 goto out_balanced;
2500 2512
2501 busiest_load_per_task /= busiest_nr_running; 2513 busiest_load_per_task /= busiest_nr_running;
2514 if (group_imb)
2515 busiest_load_per_task = min(busiest_load_per_task, avg_load);
2516
2502 /* 2517 /*
2503 * We're trying to get all the cpus to the average_load, so we don't 2518 * We're trying to get all the cpus to the average_load, so we don't
2504 * want to push ourselves above the average load, nor do we wish to 2519 * want to push ourselves above the average load, nor do we wish to
@@ -5060,6 +5075,17 @@ wait_to_die:
5060} 5075}
5061 5076
5062#ifdef CONFIG_HOTPLUG_CPU 5077#ifdef CONFIG_HOTPLUG_CPU
5078
5079static int __migrate_task_irq(struct task_struct *p, int src_cpu, int dest_cpu)
5080{
5081 int ret;
5082
5083 local_irq_disable();
5084 ret = __migrate_task(p, src_cpu, dest_cpu);
5085 local_irq_enable();
5086 return ret;
5087}
5088
5063/* 5089/*
5064 * Figure out where task on dead CPU should go, use force if neccessary. 5090 * Figure out where task on dead CPU should go, use force if neccessary.
5065 * NOTE: interrupts should be disabled by the caller 5091 * NOTE: interrupts should be disabled by the caller
@@ -5098,7 +5124,7 @@ static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p)
5098 "longer affine to cpu%d\n", 5124 "longer affine to cpu%d\n",
5099 p->pid, p->comm, dead_cpu); 5125 p->pid, p->comm, dead_cpu);
5100 } 5126 }
5101 } while (!__migrate_task(p, dead_cpu, dest_cpu)); 5127 } while (!__migrate_task_irq(p, dead_cpu, dest_cpu));
5102} 5128}
5103 5129
5104/* 5130/*
@@ -5126,7 +5152,7 @@ static void migrate_live_tasks(int src_cpu)
5126{ 5152{
5127 struct task_struct *p, *t; 5153 struct task_struct *p, *t;
5128 5154
5129 write_lock_irq(&tasklist_lock); 5155 read_lock(&tasklist_lock);
5130 5156
5131 do_each_thread(t, p) { 5157 do_each_thread(t, p) {
5132 if (p == current) 5158 if (p == current)
@@ -5136,7 +5162,7 @@ static void migrate_live_tasks(int src_cpu)
5136 move_task_off_dead_cpu(src_cpu, p); 5162 move_task_off_dead_cpu(src_cpu, p);
5137 } while_each_thread(t, p); 5163 } while_each_thread(t, p);
5138 5164
5139 write_unlock_irq(&tasklist_lock); 5165 read_unlock(&tasklist_lock);
5140} 5166}
5141 5167
5142/* 5168/*
@@ -5214,11 +5240,10 @@ static void migrate_dead(unsigned int dead_cpu, struct task_struct *p)
5214 * Drop lock around migration; if someone else moves it, 5240 * Drop lock around migration; if someone else moves it,
5215 * that's OK. No task can be added to this CPU, so iteration is 5241 * that's OK. No task can be added to this CPU, so iteration is
5216 * fine. 5242 * fine.
5217 * NOTE: interrupts should be left disabled --dev@
5218 */ 5243 */
5219 spin_unlock(&rq->lock); 5244 spin_unlock_irq(&rq->lock);
5220 move_task_off_dead_cpu(dead_cpu, p); 5245 move_task_off_dead_cpu(dead_cpu, p);
5221 spin_lock(&rq->lock); 5246 spin_lock_irq(&rq->lock);
5222 5247
5223 put_task_struct(p); 5248 put_task_struct(p);
5224} 5249}
@@ -5272,11 +5297,20 @@ static struct ctl_table *sd_alloc_ctl_entry(int n)
5272 5297
5273static void sd_free_ctl_entry(struct ctl_table **tablep) 5298static void sd_free_ctl_entry(struct ctl_table **tablep)
5274{ 5299{
5275 struct ctl_table *entry = *tablep; 5300 struct ctl_table *entry;
5276 5301
5277 for (entry = *tablep; entry->procname; entry++) 5302 /*
5303 * In the intermediate directories, both the child directory and
5304 * procname are dynamically allocated and could fail but the mode
5305 * will always be set. In the lowest directory the names are
5306 * static strings and all have proc handlers.
5307 */
5308 for (entry = *tablep; entry->mode; entry++) {
5278 if (entry->child) 5309 if (entry->child)
5279 sd_free_ctl_entry(&entry->child); 5310 sd_free_ctl_entry(&entry->child);
5311 if (entry->proc_handler == NULL)
5312 kfree(entry->procname);
5313 }
5280 5314
5281 kfree(*tablep); 5315 kfree(*tablep);
5282 *tablep = NULL; 5316 *tablep = NULL;
@@ -5447,14 +5481,14 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
5447 kthread_stop(rq->migration_thread); 5481 kthread_stop(rq->migration_thread);
5448 rq->migration_thread = NULL; 5482 rq->migration_thread = NULL;
5449 /* Idle task back to normal (off runqueue, low prio) */ 5483 /* Idle task back to normal (off runqueue, low prio) */
5450 rq = task_rq_lock(rq->idle, &flags); 5484 spin_lock_irq(&rq->lock);
5451 update_rq_clock(rq); 5485 update_rq_clock(rq);
5452 deactivate_task(rq, rq->idle, 0); 5486 deactivate_task(rq, rq->idle, 0);
5453 rq->idle->static_prio = MAX_PRIO; 5487 rq->idle->static_prio = MAX_PRIO;
5454 __setscheduler(rq, rq->idle, SCHED_NORMAL, 0); 5488 __setscheduler(rq, rq->idle, SCHED_NORMAL, 0);
5455 rq->idle->sched_class = &idle_sched_class; 5489 rq->idle->sched_class = &idle_sched_class;
5456 migrate_dead_tasks(cpu); 5490 migrate_dead_tasks(cpu);
5457 task_rq_unlock(rq, &flags); 5491 spin_unlock_irq(&rq->lock);
5458 migrate_nr_uninterruptible(rq); 5492 migrate_nr_uninterruptible(rq);
5459 BUG_ON(rq->nr_running != 0); 5493 BUG_ON(rq->nr_running != 0);
5460 5494
@@ -5869,7 +5903,7 @@ static int cpu_to_core_group(int cpu, const cpumask_t *cpu_map,
5869 struct sched_group **sg) 5903 struct sched_group **sg)
5870{ 5904{
5871 int group; 5905 int group;
5872 cpumask_t mask = cpu_sibling_map[cpu]; 5906 cpumask_t mask = per_cpu(cpu_sibling_map, cpu);
5873 cpus_and(mask, mask, *cpu_map); 5907 cpus_and(mask, mask, *cpu_map);
5874 group = first_cpu(mask); 5908 group = first_cpu(mask);
5875 if (sg) 5909 if (sg)
@@ -5898,7 +5932,7 @@ static int cpu_to_phys_group(int cpu, const cpumask_t *cpu_map,
5898 cpus_and(mask, mask, *cpu_map); 5932 cpus_and(mask, mask, *cpu_map);
5899 group = first_cpu(mask); 5933 group = first_cpu(mask);
5900#elif defined(CONFIG_SCHED_SMT) 5934#elif defined(CONFIG_SCHED_SMT)
5901 cpumask_t mask = cpu_sibling_map[cpu]; 5935 cpumask_t mask = per_cpu(cpu_sibling_map, cpu);
5902 cpus_and(mask, mask, *cpu_map); 5936 cpus_and(mask, mask, *cpu_map);
5903 group = first_cpu(mask); 5937 group = first_cpu(mask);
5904#else 5938#else
@@ -6132,7 +6166,7 @@ static int build_sched_domains(const cpumask_t *cpu_map)
6132 p = sd; 6166 p = sd;
6133 sd = &per_cpu(cpu_domains, i); 6167 sd = &per_cpu(cpu_domains, i);
6134 *sd = SD_SIBLING_INIT; 6168 *sd = SD_SIBLING_INIT;
6135 sd->span = cpu_sibling_map[i]; 6169 sd->span = per_cpu(cpu_sibling_map, i);
6136 cpus_and(sd->span, sd->span, *cpu_map); 6170 cpus_and(sd->span, sd->span, *cpu_map);
6137 sd->parent = p; 6171 sd->parent = p;
6138 p->child = sd; 6172 p->child = sd;
@@ -6143,7 +6177,7 @@ static int build_sched_domains(const cpumask_t *cpu_map)
6143#ifdef CONFIG_SCHED_SMT 6177#ifdef CONFIG_SCHED_SMT
6144 /* Set up CPU (sibling) groups */ 6178 /* Set up CPU (sibling) groups */
6145 for_each_cpu_mask(i, *cpu_map) { 6179 for_each_cpu_mask(i, *cpu_map) {
6146 cpumask_t this_sibling_map = cpu_sibling_map[i]; 6180 cpumask_t this_sibling_map = per_cpu(cpu_sibling_map, i);
6147 cpus_and(this_sibling_map, this_sibling_map, *cpu_map); 6181 cpus_and(this_sibling_map, this_sibling_map, *cpu_map);
6148 if (i != first_cpu(this_sibling_map)) 6182 if (i != first_cpu(this_sibling_map))
6149 continue; 6183 continue;
@@ -6348,35 +6382,6 @@ static void detach_destroy_domains(const cpumask_t *cpu_map)
6348 arch_destroy_sched_domains(cpu_map); 6382 arch_destroy_sched_domains(cpu_map);
6349} 6383}
6350 6384
6351/*
6352 * Partition sched domains as specified by the cpumasks below.
6353 * This attaches all cpus from the cpumasks to the NULL domain,
6354 * waits for a RCU quiescent period, recalculates sched
6355 * domain information and then attaches them back to the
6356 * correct sched domains
6357 * Call with hotplug lock held
6358 */
6359int partition_sched_domains(cpumask_t *partition1, cpumask_t *partition2)
6360{
6361 cpumask_t change_map;
6362 int err = 0;
6363
6364 cpus_and(*partition1, *partition1, cpu_online_map);
6365 cpus_and(*partition2, *partition2, cpu_online_map);
6366 cpus_or(change_map, *partition1, *partition2);
6367
6368 /* Detach sched domains from all of the affected cpus */
6369 detach_destroy_domains(&change_map);
6370 if (!cpus_empty(*partition1))
6371 err = build_sched_domains(partition1);
6372 if (!err && !cpus_empty(*partition2))
6373 err = build_sched_domains(partition2);
6374
6375 register_sched_domain_sysctl();
6376
6377 return err;
6378}
6379
6380#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT) 6385#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
6381static int arch_reinit_sched_domains(void) 6386static int arch_reinit_sched_domains(void)
6382{ 6387{
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index a17b785d70..166ed6db60 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -1031,12 +1031,8 @@ static void task_new_fair(struct rq *rq, struct task_struct *p)
1031 swap(curr->vruntime, se->vruntime); 1031 swap(curr->vruntime, se->vruntime);
1032 } 1032 }
1033 1033
1034 update_stats_enqueue(cfs_rq, se);
1035 check_spread(cfs_rq, se);
1036 check_spread(cfs_rq, curr);
1037 __enqueue_entity(cfs_rq, se);
1038 account_entity_enqueue(cfs_rq, se);
1039 se->peer_preempt = 0; 1034 se->peer_preempt = 0;
1035 enqueue_task_fair(rq, p, 0);
1040 resched_task(rq->curr); 1036 resched_task(rq->curr);
1041} 1037}
1042 1038
diff --git a/kernel/signal.c b/kernel/signal.c
index 7929523810..2124ffadcf 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -909,8 +909,7 @@ __group_complete_signal(int sig, struct task_struct *p)
909 do { 909 do {
910 sigaddset(&t->pending.signal, SIGKILL); 910 sigaddset(&t->pending.signal, SIGKILL);
911 signal_wake_up(t, 1); 911 signal_wake_up(t, 1);
912 t = next_thread(t); 912 } while_each_thread(p, t);
913 } while (t != p);
914 return; 913 return;
915 } 914 }
916 915
@@ -928,13 +927,11 @@ __group_complete_signal(int sig, struct task_struct *p)
928 rm_from_queue(SIG_KERNEL_STOP_MASK, &p->signal->shared_pending); 927 rm_from_queue(SIG_KERNEL_STOP_MASK, &p->signal->shared_pending);
929 p->signal->group_stop_count = 0; 928 p->signal->group_stop_count = 0;
930 p->signal->group_exit_task = t; 929 p->signal->group_exit_task = t;
931 t = p; 930 p = t;
932 do { 931 do {
933 p->signal->group_stop_count++; 932 p->signal->group_stop_count++;
934 signal_wake_up(t, 0); 933 signal_wake_up(t, t == p);
935 t = next_thread(t); 934 } while_each_thread(p, t);
936 } while (t != p);
937 wake_up_process(p->signal->group_exit_task);
938 return; 935 return;
939 } 936 }
940 937
@@ -985,9 +982,6 @@ void zap_other_threads(struct task_struct *p)
985 p->signal->flags = SIGNAL_GROUP_EXIT; 982 p->signal->flags = SIGNAL_GROUP_EXIT;
986 p->signal->group_stop_count = 0; 983 p->signal->group_stop_count = 0;
987 984
988 if (thread_group_empty(p))
989 return;
990
991 for (t = next_thread(p); t != p; t = next_thread(t)) { 985 for (t = next_thread(p); t != p; t = next_thread(t)) {
992 /* 986 /*
993 * Don't bother with already dead threads 987 * Don't bother with already dead threads
@@ -2300,15 +2294,6 @@ int do_sigaction(int sig, struct k_sigaction *act, struct k_sigaction *oact)
2300 k = &current->sighand->action[sig-1]; 2294 k = &current->sighand->action[sig-1];
2301 2295
2302 spin_lock_irq(&current->sighand->siglock); 2296 spin_lock_irq(&current->sighand->siglock);
2303 if (signal_pending(current)) {
2304 /*
2305 * If there might be a fatal signal pending on multiple
2306 * threads, make sure we take it before changing the action.
2307 */
2308 spin_unlock_irq(&current->sighand->siglock);
2309 return -ERESTARTNOINTR;
2310 }
2311
2312 if (oact) 2297 if (oact)
2313 *oact = *k; 2298 *oact = *k;
2314 2299
@@ -2335,7 +2320,6 @@ int do_sigaction(int sig, struct k_sigaction *act, struct k_sigaction *oact)
2335 rm_from_queue_full(&mask, &t->signal->shared_pending); 2320 rm_from_queue_full(&mask, &t->signal->shared_pending);
2336 do { 2321 do {
2337 rm_from_queue_full(&mask, &t->pending); 2322 rm_from_queue_full(&mask, &t->pending);
2338 recalc_sigpending_and_wake(t);
2339 t = next_thread(t); 2323 t = next_thread(t);
2340 } while (t != current); 2324 } while (t != current);
2341 } 2325 }
diff --git a/kernel/softlockup.c b/kernel/softlockup.c
index 708d4882c0..edeeef3a6a 100644
--- a/kernel/softlockup.c
+++ b/kernel/softlockup.c
@@ -15,13 +15,16 @@
15#include <linux/notifier.h> 15#include <linux/notifier.h>
16#include <linux/module.h> 16#include <linux/module.h>
17 17
18#include <asm/irq_regs.h>
19
18static DEFINE_SPINLOCK(print_lock); 20static DEFINE_SPINLOCK(print_lock);
19 21
20static DEFINE_PER_CPU(unsigned long, touch_timestamp); 22static DEFINE_PER_CPU(unsigned long, touch_timestamp);
21static DEFINE_PER_CPU(unsigned long, print_timestamp); 23static DEFINE_PER_CPU(unsigned long, print_timestamp);
22static DEFINE_PER_CPU(struct task_struct *, watchdog_task); 24static DEFINE_PER_CPU(struct task_struct *, watchdog_task);
23 25
24static int did_panic = 0; 26static int did_panic;
27int softlockup_thresh = 10;
25 28
26static int 29static int
27softlock_panic(struct notifier_block *this, unsigned long event, void *ptr) 30softlock_panic(struct notifier_block *this, unsigned long event, void *ptr)
@@ -40,14 +43,16 @@ static struct notifier_block panic_block = {
40 * resolution, and we don't need to waste time with a big divide when 43 * resolution, and we don't need to waste time with a big divide when
41 * 2^30ns == 1.074s. 44 * 2^30ns == 1.074s.
42 */ 45 */
43static unsigned long get_timestamp(void) 46static unsigned long get_timestamp(int this_cpu)
44{ 47{
45 return sched_clock() >> 30; /* 2^30 ~= 10^9 */ 48 return cpu_clock(this_cpu) >> 30; /* 2^30 ~= 10^9 */
46} 49}
47 50
48void touch_softlockup_watchdog(void) 51void touch_softlockup_watchdog(void)
49{ 52{
50 __raw_get_cpu_var(touch_timestamp) = get_timestamp(); 53 int this_cpu = raw_smp_processor_id();
54
55 __raw_get_cpu_var(touch_timestamp) = get_timestamp(this_cpu);
51} 56}
52EXPORT_SYMBOL(touch_softlockup_watchdog); 57EXPORT_SYMBOL(touch_softlockup_watchdog);
53 58
@@ -70,6 +75,7 @@ void softlockup_tick(void)
70 int this_cpu = smp_processor_id(); 75 int this_cpu = smp_processor_id();
71 unsigned long touch_timestamp = per_cpu(touch_timestamp, this_cpu); 76 unsigned long touch_timestamp = per_cpu(touch_timestamp, this_cpu);
72 unsigned long print_timestamp; 77 unsigned long print_timestamp;
78 struct pt_regs *regs = get_irq_regs();
73 unsigned long now; 79 unsigned long now;
74 80
75 if (touch_timestamp == 0) { 81 if (touch_timestamp == 0) {
@@ -80,10 +86,11 @@ void softlockup_tick(void)
80 print_timestamp = per_cpu(print_timestamp, this_cpu); 86 print_timestamp = per_cpu(print_timestamp, this_cpu);
81 87
82 /* report at most once a second */ 88 /* report at most once a second */
83 if (print_timestamp < (touch_timestamp + 1) || 89 if ((print_timestamp >= touch_timestamp &&
84 did_panic || 90 print_timestamp < (touch_timestamp + 1)) ||
85 !per_cpu(watchdog_task, this_cpu)) 91 did_panic || !per_cpu(watchdog_task, this_cpu)) {
86 return; 92 return;
93 }
87 94
88 /* do not print during early bootup: */ 95 /* do not print during early bootup: */
89 if (unlikely(system_state != SYSTEM_RUNNING)) { 96 if (unlikely(system_state != SYSTEM_RUNNING)) {
@@ -91,28 +98,33 @@ void softlockup_tick(void)
91 return; 98 return;
92 } 99 }
93 100
94 now = get_timestamp(); 101 now = get_timestamp(this_cpu);
95 102
96 /* Wake up the high-prio watchdog task every second: */ 103 /* Wake up the high-prio watchdog task every second: */
97 if (now > (touch_timestamp + 1)) 104 if (now > (touch_timestamp + 1))
98 wake_up_process(per_cpu(watchdog_task, this_cpu)); 105 wake_up_process(per_cpu(watchdog_task, this_cpu));
99 106
100 /* Warn about unreasonable 10+ seconds delays: */ 107 /* Warn about unreasonable 10+ seconds delays: */
101 if (now > (touch_timestamp + 10)) { 108 if (now <= (touch_timestamp + softlockup_thresh))
102 per_cpu(print_timestamp, this_cpu) = touch_timestamp; 109 return;
103 110
104 spin_lock(&print_lock); 111 per_cpu(print_timestamp, this_cpu) = touch_timestamp;
105 printk(KERN_ERR "BUG: soft lockup detected on CPU#%d!\n", 112
106 this_cpu); 113 spin_lock(&print_lock);
114 printk(KERN_ERR "BUG: soft lockup - CPU#%d stuck for %lus! [%s:%d]\n",
115 this_cpu, now - touch_timestamp,
116 current->comm, current->pid);
117 if (regs)
118 show_regs(regs);
119 else
107 dump_stack(); 120 dump_stack();
108 spin_unlock(&print_lock); 121 spin_unlock(&print_lock);
109 }
110} 122}
111 123
112/* 124/*
113 * The watchdog thread - runs every second and touches the timestamp. 125 * The watchdog thread - runs every second and touches the timestamp.
114 */ 126 */
115static int watchdog(void * __bind_cpu) 127static int watchdog(void *__bind_cpu)
116{ 128{
117 struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; 129 struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
118 130
@@ -150,13 +162,13 @@ cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
150 BUG_ON(per_cpu(watchdog_task, hotcpu)); 162 BUG_ON(per_cpu(watchdog_task, hotcpu));
151 p = kthread_create(watchdog, hcpu, "watchdog/%d", hotcpu); 163 p = kthread_create(watchdog, hcpu, "watchdog/%d", hotcpu);
152 if (IS_ERR(p)) { 164 if (IS_ERR(p)) {
153 printk("watchdog for %i failed\n", hotcpu); 165 printk(KERN_ERR "watchdog for %i failed\n", hotcpu);
154 return NOTIFY_BAD; 166 return NOTIFY_BAD;
155 } 167 }
156 per_cpu(touch_timestamp, hotcpu) = 0; 168 per_cpu(touch_timestamp, hotcpu) = 0;
157 per_cpu(watchdog_task, hotcpu) = p; 169 per_cpu(watchdog_task, hotcpu) = p;
158 kthread_bind(p, hotcpu); 170 kthread_bind(p, hotcpu);
159 break; 171 break;
160 case CPU_ONLINE: 172 case CPU_ONLINE:
161 case CPU_ONLINE_FROZEN: 173 case CPU_ONLINE_FROZEN:
162 wake_up_process(per_cpu(watchdog_task, hotcpu)); 174 wake_up_process(per_cpu(watchdog_task, hotcpu));
@@ -176,7 +188,7 @@ cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
176 kthread_stop(p); 188 kthread_stop(p);
177 break; 189 break;
178#endif /* CONFIG_HOTPLUG_CPU */ 190#endif /* CONFIG_HOTPLUG_CPU */
179 } 191 }
180 return NOTIFY_OK; 192 return NOTIFY_OK;
181} 193}
182 194
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index b0ec498a18..52c7a151e2 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -4,6 +4,10 @@
4 4
5#include <asm/unistd.h> 5#include <asm/unistd.h>
6 6
7/* we can't #include <linux/syscalls.h> here,
8 but tell gcc to not warn with -Wmissing-prototypes */
9asmlinkage long sys_ni_syscall(void);
10
7/* 11/*
8 * Non-implemented system calls get redirected here. 12 * Non-implemented system calls get redirected here.
9 */ 13 */
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index ec14aa8ac5..dde3d53e8a 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -63,6 +63,7 @@ extern int print_fatal_signals;
63extern int sysctl_overcommit_memory; 63extern int sysctl_overcommit_memory;
64extern int sysctl_overcommit_ratio; 64extern int sysctl_overcommit_ratio;
65extern int sysctl_panic_on_oom; 65extern int sysctl_panic_on_oom;
66extern int sysctl_oom_kill_allocating_task;
66extern int max_threads; 67extern int max_threads;
67extern int core_uses_pid; 68extern int core_uses_pid;
68extern int suid_dumpable; 69extern int suid_dumpable;
@@ -79,6 +80,19 @@ extern int maps_protect;
79extern int sysctl_stat_interval; 80extern int sysctl_stat_interval;
80extern int audit_argv_kb; 81extern int audit_argv_kb;
81 82
83/* Constants used for minimum and maximum */
84#ifdef CONFIG_DETECT_SOFTLOCKUP
85static int one = 1;
86static int sixty = 60;
87#endif
88
89#ifdef CONFIG_MMU
90static int two = 2;
91#endif
92
93static int zero;
94static int one_hundred = 100;
95
82/* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */ 96/* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */
83static int maxolduid = 65535; 97static int maxolduid = 65535;
84static int minolduid; 98static int minolduid;
@@ -710,6 +724,19 @@ static ctl_table kern_table[] = {
710 .proc_handler = &proc_dointvec, 724 .proc_handler = &proc_dointvec,
711 }, 725 },
712#endif 726#endif
727#ifdef CONFIG_DETECT_SOFTLOCKUP
728 {
729 .ctl_name = CTL_UNNUMBERED,
730 .procname = "softlockup_thresh",
731 .data = &softlockup_thresh,
732 .maxlen = sizeof(int),
733 .mode = 0644,
734 .proc_handler = &proc_dointvec_minmax,
735 .strategy = &sysctl_intvec,
736 .extra1 = &one,
737 .extra2 = &sixty,
738 },
739#endif
713#ifdef CONFIG_COMPAT 740#ifdef CONFIG_COMPAT
714 { 741 {
715 .ctl_name = KERN_COMPAT_LOG, 742 .ctl_name = KERN_COMPAT_LOG,
@@ -756,13 +783,6 @@ static ctl_table kern_table[] = {
756 { .ctl_name = 0 } 783 { .ctl_name = 0 }
757}; 784};
758 785
759/* Constants for minimum and maximum testing in vm_table.
760 We use these as one-element integer vectors. */
761static int zero;
762static int two = 2;
763static int one_hundred = 100;
764
765
766static ctl_table vm_table[] = { 786static ctl_table vm_table[] = {
767 { 787 {
768 .ctl_name = VM_OVERCOMMIT_MEMORY, 788 .ctl_name = VM_OVERCOMMIT_MEMORY,
@@ -781,6 +801,14 @@ static ctl_table vm_table[] = {
781 .proc_handler = &proc_dointvec, 801 .proc_handler = &proc_dointvec,
782 }, 802 },
783 { 803 {
804 .ctl_name = CTL_UNNUMBERED,
805 .procname = "oom_kill_allocating_task",
806 .data = &sysctl_oom_kill_allocating_task,
807 .maxlen = sizeof(sysctl_oom_kill_allocating_task),
808 .mode = 0644,
809 .proc_handler = &proc_dointvec,
810 },
811 {
784 .ctl_name = VM_OVERCOMMIT_RATIO, 812 .ctl_name = VM_OVERCOMMIT_RATIO,
785 .procname = "overcommit_ratio", 813 .procname = "overcommit_ratio",
786 .data = &sysctl_overcommit_ratio, 814 .data = &sysctl_overcommit_ratio,
@@ -813,7 +841,7 @@ static ctl_table vm_table[] = {
813 .data = &vm_dirty_ratio, 841 .data = &vm_dirty_ratio,
814 .maxlen = sizeof(vm_dirty_ratio), 842 .maxlen = sizeof(vm_dirty_ratio),
815 .mode = 0644, 843 .mode = 0644,
816 .proc_handler = &proc_dointvec_minmax, 844 .proc_handler = &dirty_ratio_handler,
817 .strategy = &sysctl_intvec, 845 .strategy = &sysctl_intvec,
818 .extra1 = &zero, 846 .extra1 = &zero,
819 .extra2 = &one_hundred, 847 .extra2 = &one_hundred,
@@ -880,6 +908,14 @@ static ctl_table vm_table[] = {
880 .mode = 0644, 908 .mode = 0644,
881 .proc_handler = &hugetlb_treat_movable_handler, 909 .proc_handler = &hugetlb_treat_movable_handler,
882 }, 910 },
911 {
912 .ctl_name = CTL_UNNUMBERED,
913 .procname = "hugetlb_dynamic_pool",
914 .data = &hugetlb_dynamic_pool,
915 .maxlen = sizeof(hugetlb_dynamic_pool),
916 .mode = 0644,
917 .proc_handler = &proc_dointvec,
918 },
883#endif 919#endif
884 { 920 {
885 .ctl_name = VM_LOWMEM_RESERVE_RATIO, 921 .ctl_name = VM_LOWMEM_RESERVE_RATIO,
diff --git a/kernel/taskstats.c b/kernel/taskstats.c
index 059431ed67..7d4d7f9c1b 100644
--- a/kernel/taskstats.c
+++ b/kernel/taskstats.c
@@ -20,7 +20,6 @@
20#include <linux/taskstats_kern.h> 20#include <linux/taskstats_kern.h>
21#include <linux/tsacct_kern.h> 21#include <linux/tsacct_kern.h>
22#include <linux/delayacct.h> 22#include <linux/delayacct.h>
23#include <linux/tsacct_kern.h>
24#include <linux/cpumask.h> 23#include <linux/cpumask.h>
25#include <linux/percpu.h> 24#include <linux/percpu.h>
26#include <net/genetlink.h> 25#include <net/genetlink.h>
diff --git a/kernel/time.c b/kernel/time.c
index 2289a8d683..2d5b6a6821 100644
--- a/kernel/time.c
+++ b/kernel/time.c
@@ -34,7 +34,6 @@
34#include <linux/syscalls.h> 34#include <linux/syscalls.h>
35#include <linux/security.h> 35#include <linux/security.h>
36#include <linux/fs.h> 36#include <linux/fs.h>
37#include <linux/module.h>
38 37
39#include <asm/uaccess.h> 38#include <asm/uaccess.h>
40#include <asm/unistd.h> 39#include <asm/unistd.h>
@@ -57,11 +56,7 @@ EXPORT_SYMBOL(sys_tz);
57 */ 56 */
58asmlinkage long sys_time(time_t __user * tloc) 57asmlinkage long sys_time(time_t __user * tloc)
59{ 58{
60 time_t i; 59 time_t i = get_seconds();
61 struct timespec tv;
62
63 getnstimeofday(&tv);
64 i = tv.tv_sec;
65 60
66 if (tloc) { 61 if (tloc) {
67 if (put_user(i,tloc)) 62 if (put_user(i,tloc))
diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index fc3fc79b3d..8cfb8b2ce7 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -222,20 +222,8 @@ static void tick_do_broadcast_on_off(void *why)
222 if (!dev || !(dev->features & CLOCK_EVT_FEAT_C3STOP)) 222 if (!dev || !(dev->features & CLOCK_EVT_FEAT_C3STOP))
223 goto out; 223 goto out;
224 224
225 /* 225 if (!tick_device_is_functional(dev))
226 * Defect device ? 226 goto out;
227 */
228 if (!tick_device_is_functional(dev)) {
229 /*
230 * AMD C1E wreckage fixup:
231 *
232 * Device was registered functional in the first
233 * place. Now the secondary CPU detected the C1E
234 * misfeature and notifies us to fix it up
235 */
236 if (*reason != CLOCK_EVT_NOTIFY_BROADCAST_FORCE)
237 goto out;
238 }
239 227
240 switch (*reason) { 228 switch (*reason) {
241 case CLOCK_EVT_NOTIFY_BROADCAST_ON: 229 case CLOCK_EVT_NOTIFY_BROADCAST_ON:
@@ -246,6 +234,8 @@ static void tick_do_broadcast_on_off(void *why)
246 clockevents_set_mode(dev, 234 clockevents_set_mode(dev,
247 CLOCK_EVT_MODE_SHUTDOWN); 235 CLOCK_EVT_MODE_SHUTDOWN);
248 } 236 }
237 if (*reason == CLOCK_EVT_NOTIFY_BROADCAST_FORCE)
238 dev->features |= CLOCK_EVT_FEAT_DUMMY;
249 break; 239 break;
250 case CLOCK_EVT_NOTIFY_BROADCAST_OFF: 240 case CLOCK_EVT_NOTIFY_BROADCAST_OFF:
251 if (cpu_isset(cpu, tick_broadcast_mask)) { 241 if (cpu_isset(cpu, tick_broadcast_mask)) {
@@ -274,21 +264,12 @@ out:
274 */ 264 */
275void tick_broadcast_on_off(unsigned long reason, int *oncpu) 265void tick_broadcast_on_off(unsigned long reason, int *oncpu)
276{ 266{
277 int cpu = get_cpu(); 267 if (!cpu_isset(*oncpu, cpu_online_map))
278
279 if (!cpu_isset(*oncpu, cpu_online_map)) {
280 printk(KERN_ERR "tick-braodcast: ignoring broadcast for " 268 printk(KERN_ERR "tick-braodcast: ignoring broadcast for "
281 "offline CPU #%d\n", *oncpu); 269 "offline CPU #%d\n", *oncpu);
282 } else { 270 else
283 271 smp_call_function_single(*oncpu, tick_do_broadcast_on_off,
284 if (cpu == *oncpu) 272 &reason, 1, 1);
285 tick_do_broadcast_on_off(&reason);
286 else
287 smp_call_function_single(*oncpu,
288 tick_do_broadcast_on_off,
289 &reason, 1, 1);
290 }
291 put_cpu();
292} 273}
293 274
294/* 275/*
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 8c3fef1db0..ce89ffb474 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -570,7 +570,7 @@ void tick_setup_sched_timer(void)
570 /* Get the next period (per cpu) */ 570 /* Get the next period (per cpu) */
571 ts->sched_timer.expires = tick_init_jiffy_update(); 571 ts->sched_timer.expires = tick_init_jiffy_update();
572 offset = ktime_to_ns(tick_period) >> 1; 572 offset = ktime_to_ns(tick_period) >> 1;
573 do_div(offset, NR_CPUS); 573 do_div(offset, num_possible_cpus());
574 offset *= smp_processor_id(); 574 offset *= smp_processor_id();
575 ts->sched_timer.expires = ktime_add_ns(ts->sched_timer.expires, offset); 575 ts->sched_timer.expires = ktime_add_ns(ts->sched_timer.expires, offset);
576 576
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 4ad79f6bde..e5e466b275 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -24,9 +24,7 @@
24 * This read-write spinlock protects us from races in SMP while 24 * This read-write spinlock protects us from races in SMP while
25 * playing with xtime and avenrun. 25 * playing with xtime and avenrun.
26 */ 26 */
27__attribute__((weak)) __cacheline_aligned_in_smp DEFINE_SEQLOCK(xtime_lock); 27__cacheline_aligned_in_smp DEFINE_SEQLOCK(xtime_lock);
28
29EXPORT_SYMBOL(xtime_lock);
30 28
31 29
32/* 30/*
@@ -47,21 +45,13 @@ EXPORT_SYMBOL(xtime_lock);
47struct timespec xtime __attribute__ ((aligned (16))); 45struct timespec xtime __attribute__ ((aligned (16)));
48struct timespec wall_to_monotonic __attribute__ ((aligned (16))); 46struct timespec wall_to_monotonic __attribute__ ((aligned (16)));
49static unsigned long total_sleep_time; /* seconds */ 47static unsigned long total_sleep_time; /* seconds */
50EXPORT_SYMBOL(xtime);
51
52 48
53#ifdef CONFIG_NO_HZ
54static struct timespec xtime_cache __attribute__ ((aligned (16))); 49static struct timespec xtime_cache __attribute__ ((aligned (16)));
55static inline void update_xtime_cache(u64 nsec) 50static inline void update_xtime_cache(u64 nsec)
56{ 51{
57 xtime_cache = xtime; 52 xtime_cache = xtime;
58 timespec_add_ns(&xtime_cache, nsec); 53 timespec_add_ns(&xtime_cache, nsec);
59} 54}
60#else
61#define xtime_cache xtime
62/* We do *not* want to evaluate the argument for this case */
63#define update_xtime_cache(n) do { } while (0)
64#endif
65 55
66static struct clocksource *clock; /* pointer to current clocksource */ 56static struct clocksource *clock; /* pointer to current clocksource */
67 57
diff --git a/kernel/user.c b/kernel/user.c
index f0e561e6d0..e91331c457 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -44,7 +44,6 @@ struct user_struct root_user = {
44 .processes = ATOMIC_INIT(1), 44 .processes = ATOMIC_INIT(1),
45 .files = ATOMIC_INIT(0), 45 .files = ATOMIC_INIT(0),
46 .sigpending = ATOMIC_INIT(0), 46 .sigpending = ATOMIC_INIT(0),
47 .mq_bytes = 0,
48 .locked_shm = 0, 47 .locked_shm = 0,
49#ifdef CONFIG_KEYS 48#ifdef CONFIG_KEYS
50 .uid_keyring = &root_user_keyring, 49 .uid_keyring = &root_user_keyring,
@@ -58,19 +57,17 @@ struct user_struct root_user = {
58/* 57/*
59 * These routines must be called with the uidhash spinlock held! 58 * These routines must be called with the uidhash spinlock held!
60 */ 59 */
61static inline void uid_hash_insert(struct user_struct *up, 60static void uid_hash_insert(struct user_struct *up, struct hlist_head *hashent)
62 struct hlist_head *hashent)
63{ 61{
64 hlist_add_head(&up->uidhash_node, hashent); 62 hlist_add_head(&up->uidhash_node, hashent);
65} 63}
66 64
67static inline void uid_hash_remove(struct user_struct *up) 65static void uid_hash_remove(struct user_struct *up)
68{ 66{
69 hlist_del_init(&up->uidhash_node); 67 hlist_del_init(&up->uidhash_node);
70} 68}
71 69
72static inline struct user_struct *uid_hash_find(uid_t uid, 70static struct user_struct *uid_hash_find(uid_t uid, struct hlist_head *hashent)
73 struct hlist_head *hashent)
74{ 71{
75 struct user_struct *user; 72 struct user_struct *user;
76 struct hlist_node *h; 73 struct hlist_node *h;
@@ -87,9 +84,6 @@ static inline struct user_struct *uid_hash_find(uid_t uid,
87 84
88#ifdef CONFIG_FAIR_USER_SCHED 85#ifdef CONFIG_FAIR_USER_SCHED
89 86
90static struct kobject uids_kobject; /* represents /sys/kernel/uids directory */
91static DEFINE_MUTEX(uids_mutex);
92
93static void sched_destroy_user(struct user_struct *up) 87static void sched_destroy_user(struct user_struct *up)
94{ 88{
95 sched_destroy_group(up->tg); 89 sched_destroy_group(up->tg);
@@ -111,6 +105,19 @@ static void sched_switch_user(struct task_struct *p)
111 sched_move_task(p); 105 sched_move_task(p);
112} 106}
113 107
108#else /* CONFIG_FAIR_USER_SCHED */
109
110static void sched_destroy_user(struct user_struct *up) { }
111static int sched_create_user(struct user_struct *up) { return 0; }
112static void sched_switch_user(struct task_struct *p) { }
113
114#endif /* CONFIG_FAIR_USER_SCHED */
115
116#if defined(CONFIG_FAIR_USER_SCHED) && defined(CONFIG_SYSFS)
117
118static struct kobject uids_kobject; /* represents /sys/kernel/uids directory */
119static DEFINE_MUTEX(uids_mutex);
120
114static inline void uids_mutex_lock(void) 121static inline void uids_mutex_lock(void)
115{ 122{
116 mutex_lock(&uids_mutex); 123 mutex_lock(&uids_mutex);
@@ -257,11 +264,8 @@ static inline void free_user(struct user_struct *up, unsigned long flags)
257 schedule_work(&up->work); 264 schedule_work(&up->work);
258} 265}
259 266
260#else /* CONFIG_FAIR_USER_SCHED */ 267#else /* CONFIG_FAIR_USER_SCHED && CONFIG_SYSFS */
261 268
262static void sched_destroy_user(struct user_struct *up) { }
263static int sched_create_user(struct user_struct *up) { return 0; }
264static void sched_switch_user(struct task_struct *p) { }
265static inline int user_kobject_create(struct user_struct *up) { return 0; } 269static inline int user_kobject_create(struct user_struct *up) { return 0; }
266static inline void uids_mutex_lock(void) { } 270static inline void uids_mutex_lock(void) { }
267static inline void uids_mutex_unlock(void) { } 271static inline void uids_mutex_unlock(void) { }
@@ -280,7 +284,7 @@ static inline void free_user(struct user_struct *up, unsigned long flags)
280 kmem_cache_free(uid_cachep, up); 284 kmem_cache_free(uid_cachep, up);
281} 285}
282 286
283#endif /* CONFIG_FAIR_USER_SCHED */ 287#endif
284 288
285/* 289/*
286 * Locate the user_struct for the passed UID. If found, take a ref on it. The 290 * Locate the user_struct for the passed UID. If found, take a ref on it. The
@@ -343,8 +347,9 @@ struct user_struct * alloc_uid(struct user_namespace *ns, uid_t uid)
343 atomic_set(&new->inotify_watches, 0); 347 atomic_set(&new->inotify_watches, 0);
344 atomic_set(&new->inotify_devs, 0); 348 atomic_set(&new->inotify_devs, 0);
345#endif 349#endif
346 350#ifdef CONFIG_POSIX_MQUEUE
347 new->mq_bytes = 0; 351 new->mq_bytes = 0;
352#endif
348 new->locked_shm = 0; 353 new->locked_shm = 0;
349 354
350 if (alloc_uid_keyring(new, current) < 0) { 355 if (alloc_uid_keyring(new, current) < 0) {