diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/Makefile | 5 | ||||
-rw-r--r-- | kernel/cgroup.c | 20 | ||||
-rw-r--r-- | kernel/cpu.c | 44 | ||||
-rw-r--r-- | kernel/cpu_pm.c | 16 | ||||
-rw-r--r-- | kernel/cred.c | 9 | ||||
-rw-r--r-- | kernel/exit.c | 13 | ||||
-rw-r--r-- | kernel/fork.c | 28 | ||||
-rw-r--r-- | kernel/irq/manage.c | 80 | ||||
-rw-r--r-- | kernel/kallsyms.c | 32 | ||||
-rw-r--r-- | kernel/kcmp.c | 196 | ||||
-rw-r--r-- | kernel/kmod.c | 30 | ||||
-rw-r--r-- | kernel/pid_namespace.c | 13 | ||||
-rw-r--r-- | kernel/res_counter.c | 10 | ||||
-rw-r--r-- | kernel/resource.c | 4 | ||||
-rw-r--r-- | kernel/signal.c | 11 | ||||
-rw-r--r-- | kernel/sys.c | 213 | ||||
-rw-r--r-- | kernel/sys_ni.c | 3 | ||||
-rw-r--r-- | kernel/task_work.c | 84 | ||||
-rw-r--r-- | kernel/trace/ring_buffer.c | 5 |
19 files changed, 636 insertions, 180 deletions
diff --git a/kernel/Makefile b/kernel/Makefile index 296132c19a57..c0cc67ad764c 100644 --- a/kernel/Makefile +++ b/kernel/Makefile | |||
@@ -5,7 +5,7 @@ | |||
5 | obj-y = fork.o exec_domain.o panic.o printk.o \ | 5 | obj-y = fork.o exec_domain.o panic.o printk.o \ |
6 | cpu.o exit.o itimer.o time.o softirq.o resource.o \ | 6 | cpu.o exit.o itimer.o time.o softirq.o resource.o \ |
7 | sysctl.o sysctl_binary.o capability.o ptrace.o timer.o user.o \ | 7 | sysctl.o sysctl_binary.o capability.o ptrace.o timer.o user.o \ |
8 | signal.o sys.o kmod.o workqueue.o pid.o \ | 8 | signal.o sys.o kmod.o workqueue.o pid.o task_work.o \ |
9 | rcupdate.o extable.o params.o posix-timers.o \ | 9 | rcupdate.o extable.o params.o posix-timers.o \ |
10 | kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ | 10 | kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ |
11 | hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \ | 11 | hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \ |
@@ -25,6 +25,9 @@ endif | |||
25 | obj-y += sched/ | 25 | obj-y += sched/ |
26 | obj-y += power/ | 26 | obj-y += power/ |
27 | 27 | ||
28 | ifeq ($(CONFIG_CHECKPOINT_RESTORE),y) | ||
29 | obj-$(CONFIG_X86) += kcmp.o | ||
30 | endif | ||
28 | obj-$(CONFIG_FREEZER) += freezer.o | 31 | obj-$(CONFIG_FREEZER) += freezer.o |
29 | obj-$(CONFIG_PROFILING) += profile.o | 32 | obj-$(CONFIG_PROFILING) += profile.o |
30 | obj-$(CONFIG_STACKTRACE) += stacktrace.o | 33 | obj-$(CONFIG_STACKTRACE) += stacktrace.o |
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index a0c6af34d500..0f3527d6184a 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
@@ -5132,7 +5132,7 @@ EXPORT_SYMBOL_GPL(css_depth); | |||
5132 | * @root: the css supporsed to be an ancestor of the child. | 5132 | * @root: the css supporsed to be an ancestor of the child. |
5133 | * | 5133 | * |
5134 | * Returns true if "root" is an ancestor of "child" in its hierarchy. Because | 5134 | * Returns true if "root" is an ancestor of "child" in its hierarchy. Because |
5135 | * this function reads css->id, this use rcu_dereference() and rcu_read_lock(). | 5135 | * this function reads css->id, the caller must hold rcu_read_lock(). |
5136 | * But, considering usual usage, the csses should be valid objects after test. | 5136 | * But, considering usual usage, the csses should be valid objects after test. |
5137 | * Assuming that the caller will do some action to the child if this returns | 5137 | * Assuming that the caller will do some action to the child if this returns |
5138 | * returns true, the caller must take "child";s reference count. | 5138 | * returns true, the caller must take "child";s reference count. |
@@ -5144,18 +5144,18 @@ bool css_is_ancestor(struct cgroup_subsys_state *child, | |||
5144 | { | 5144 | { |
5145 | struct css_id *child_id; | 5145 | struct css_id *child_id; |
5146 | struct css_id *root_id; | 5146 | struct css_id *root_id; |
5147 | bool ret = true; | ||
5148 | 5147 | ||
5149 | rcu_read_lock(); | ||
5150 | child_id = rcu_dereference(child->id); | 5148 | child_id = rcu_dereference(child->id); |
5149 | if (!child_id) | ||
5150 | return false; | ||
5151 | root_id = rcu_dereference(root->id); | 5151 | root_id = rcu_dereference(root->id); |
5152 | if (!child_id | 5152 | if (!root_id) |
5153 | || !root_id | 5153 | return false; |
5154 | || (child_id->depth < root_id->depth) | 5154 | if (child_id->depth < root_id->depth) |
5155 | || (child_id->stack[root_id->depth] != root_id->id)) | 5155 | return false; |
5156 | ret = false; | 5156 | if (child_id->stack[root_id->depth] != root_id->id) |
5157 | rcu_read_unlock(); | 5157 | return false; |
5158 | return ret; | 5158 | return true; |
5159 | } | 5159 | } |
5160 | 5160 | ||
5161 | void free_css_id(struct cgroup_subsys *ss, struct cgroup_subsys_state *css) | 5161 | void free_css_id(struct cgroup_subsys *ss, struct cgroup_subsys_state *css) |
diff --git a/kernel/cpu.c b/kernel/cpu.c index 0e6353cf147a..a4eb5227a19e 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c | |||
@@ -10,7 +10,10 @@ | |||
10 | #include <linux/sched.h> | 10 | #include <linux/sched.h> |
11 | #include <linux/unistd.h> | 11 | #include <linux/unistd.h> |
12 | #include <linux/cpu.h> | 12 | #include <linux/cpu.h> |
13 | #include <linux/oom.h> | ||
14 | #include <linux/rcupdate.h> | ||
13 | #include <linux/export.h> | 15 | #include <linux/export.h> |
16 | #include <linux/bug.h> | ||
14 | #include <linux/kthread.h> | 17 | #include <linux/kthread.h> |
15 | #include <linux/stop_machine.h> | 18 | #include <linux/stop_machine.h> |
16 | #include <linux/mutex.h> | 19 | #include <linux/mutex.h> |
@@ -173,6 +176,47 @@ void __ref unregister_cpu_notifier(struct notifier_block *nb) | |||
173 | } | 176 | } |
174 | EXPORT_SYMBOL(unregister_cpu_notifier); | 177 | EXPORT_SYMBOL(unregister_cpu_notifier); |
175 | 178 | ||
179 | /** | ||
180 | * clear_tasks_mm_cpumask - Safely clear tasks' mm_cpumask for a CPU | ||
181 | * @cpu: a CPU id | ||
182 | * | ||
183 | * This function walks all processes, finds a valid mm struct for each one and | ||
184 | * then clears a corresponding bit in mm's cpumask. While this all sounds | ||
185 | * trivial, there are various non-obvious corner cases, which this function | ||
186 | * tries to solve in a safe manner. | ||
187 | * | ||
188 | * Also note that the function uses a somewhat relaxed locking scheme, so it may | ||
189 | * be called only for an already offlined CPU. | ||
190 | */ | ||
191 | void clear_tasks_mm_cpumask(int cpu) | ||
192 | { | ||
193 | struct task_struct *p; | ||
194 | |||
195 | /* | ||
196 | * This function is called after the cpu is taken down and marked | ||
197 | * offline, so its not like new tasks will ever get this cpu set in | ||
198 | * their mm mask. -- Peter Zijlstra | ||
199 | * Thus, we may use rcu_read_lock() here, instead of grabbing | ||
200 | * full-fledged tasklist_lock. | ||
201 | */ | ||
202 | WARN_ON(cpu_online(cpu)); | ||
203 | rcu_read_lock(); | ||
204 | for_each_process(p) { | ||
205 | struct task_struct *t; | ||
206 | |||
207 | /* | ||
208 | * Main thread might exit, but other threads may still have | ||
209 | * a valid mm. Find one. | ||
210 | */ | ||
211 | t = find_lock_task_mm(p); | ||
212 | if (!t) | ||
213 | continue; | ||
214 | cpumask_clear_cpu(cpu, mm_cpumask(t->mm)); | ||
215 | task_unlock(t); | ||
216 | } | ||
217 | rcu_read_unlock(); | ||
218 | } | ||
219 | |||
176 | static inline void check_for_tasks(int cpu) | 220 | static inline void check_for_tasks(int cpu) |
177 | { | 221 | { |
178 | struct task_struct *p; | 222 | struct task_struct *p; |
diff --git a/kernel/cpu_pm.c b/kernel/cpu_pm.c index 249152e15308..9656a3c36503 100644 --- a/kernel/cpu_pm.c +++ b/kernel/cpu_pm.c | |||
@@ -81,7 +81,7 @@ int cpu_pm_unregister_notifier(struct notifier_block *nb) | |||
81 | EXPORT_SYMBOL_GPL(cpu_pm_unregister_notifier); | 81 | EXPORT_SYMBOL_GPL(cpu_pm_unregister_notifier); |
82 | 82 | ||
83 | /** | 83 | /** |
84 | * cpm_pm_enter - CPU low power entry notifier | 84 | * cpu_pm_enter - CPU low power entry notifier |
85 | * | 85 | * |
86 | * Notifies listeners that a single CPU is entering a low power state that may | 86 | * Notifies listeners that a single CPU is entering a low power state that may |
87 | * cause some blocks in the same power domain as the cpu to reset. | 87 | * cause some blocks in the same power domain as the cpu to reset. |
@@ -89,7 +89,7 @@ EXPORT_SYMBOL_GPL(cpu_pm_unregister_notifier); | |||
89 | * Must be called on the affected CPU with interrupts disabled. Platform is | 89 | * Must be called on the affected CPU with interrupts disabled. Platform is |
90 | * responsible for ensuring that cpu_pm_enter is not called twice on the same | 90 | * responsible for ensuring that cpu_pm_enter is not called twice on the same |
91 | * CPU before cpu_pm_exit is called. Notified drivers can include VFP | 91 | * CPU before cpu_pm_exit is called. Notified drivers can include VFP |
92 | * co-processor, interrupt controller and it's PM extensions, local CPU | 92 | * co-processor, interrupt controller and its PM extensions, local CPU |
93 | * timers context save/restore which shouldn't be interrupted. Hence it | 93 | * timers context save/restore which shouldn't be interrupted. Hence it |
94 | * must be called with interrupts disabled. | 94 | * must be called with interrupts disabled. |
95 | * | 95 | * |
@@ -115,13 +115,13 @@ int cpu_pm_enter(void) | |||
115 | EXPORT_SYMBOL_GPL(cpu_pm_enter); | 115 | EXPORT_SYMBOL_GPL(cpu_pm_enter); |
116 | 116 | ||
117 | /** | 117 | /** |
118 | * cpm_pm_exit - CPU low power exit notifier | 118 | * cpu_pm_exit - CPU low power exit notifier |
119 | * | 119 | * |
120 | * Notifies listeners that a single CPU is exiting a low power state that may | 120 | * Notifies listeners that a single CPU is exiting a low power state that may |
121 | * have caused some blocks in the same power domain as the cpu to reset. | 121 | * have caused some blocks in the same power domain as the cpu to reset. |
122 | * | 122 | * |
123 | * Notified drivers can include VFP co-processor, interrupt controller | 123 | * Notified drivers can include VFP co-processor, interrupt controller |
124 | * and it's PM extensions, local CPU timers context save/restore which | 124 | * and its PM extensions, local CPU timers context save/restore which |
125 | * shouldn't be interrupted. Hence it must be called with interrupts disabled. | 125 | * shouldn't be interrupted. Hence it must be called with interrupts disabled. |
126 | * | 126 | * |
127 | * Return conditions are same as __raw_notifier_call_chain. | 127 | * Return conditions are same as __raw_notifier_call_chain. |
@@ -139,7 +139,7 @@ int cpu_pm_exit(void) | |||
139 | EXPORT_SYMBOL_GPL(cpu_pm_exit); | 139 | EXPORT_SYMBOL_GPL(cpu_pm_exit); |
140 | 140 | ||
141 | /** | 141 | /** |
142 | * cpm_cluster_pm_enter - CPU cluster low power entry notifier | 142 | * cpu_cluster_pm_enter - CPU cluster low power entry notifier |
143 | * | 143 | * |
144 | * Notifies listeners that all cpus in a power domain are entering a low power | 144 | * Notifies listeners that all cpus in a power domain are entering a low power |
145 | * state that may cause some blocks in the same power domain to reset. | 145 | * state that may cause some blocks in the same power domain to reset. |
@@ -147,7 +147,7 @@ EXPORT_SYMBOL_GPL(cpu_pm_exit); | |||
147 | * Must be called after cpu_pm_enter has been called on all cpus in the power | 147 | * Must be called after cpu_pm_enter has been called on all cpus in the power |
148 | * domain, and before cpu_pm_exit has been called on any cpu in the power | 148 | * domain, and before cpu_pm_exit has been called on any cpu in the power |
149 | * domain. Notified drivers can include VFP co-processor, interrupt controller | 149 | * domain. Notified drivers can include VFP co-processor, interrupt controller |
150 | * and it's PM extensions, local CPU timers context save/restore which | 150 | * and its PM extensions, local CPU timers context save/restore which |
151 | * shouldn't be interrupted. Hence it must be called with interrupts disabled. | 151 | * shouldn't be interrupted. Hence it must be called with interrupts disabled. |
152 | * | 152 | * |
153 | * Must be called with interrupts disabled. | 153 | * Must be called with interrupts disabled. |
@@ -174,7 +174,7 @@ int cpu_cluster_pm_enter(void) | |||
174 | EXPORT_SYMBOL_GPL(cpu_cluster_pm_enter); | 174 | EXPORT_SYMBOL_GPL(cpu_cluster_pm_enter); |
175 | 175 | ||
176 | /** | 176 | /** |
177 | * cpm_cluster_pm_exit - CPU cluster low power exit notifier | 177 | * cpu_cluster_pm_exit - CPU cluster low power exit notifier |
178 | * | 178 | * |
179 | * Notifies listeners that all cpus in a power domain are exiting form a | 179 | * Notifies listeners that all cpus in a power domain are exiting form a |
180 | * low power state that may have caused some blocks in the same power domain | 180 | * low power state that may have caused some blocks in the same power domain |
@@ -183,7 +183,7 @@ EXPORT_SYMBOL_GPL(cpu_cluster_pm_enter); | |||
183 | * Must be called after cpu_pm_exit has been called on all cpus in the power | 183 | * Must be called after cpu_pm_exit has been called on all cpus in the power |
184 | * domain, and before cpu_pm_exit has been called on any cpu in the power | 184 | * domain, and before cpu_pm_exit has been called on any cpu in the power |
185 | * domain. Notified drivers can include VFP co-processor, interrupt controller | 185 | * domain. Notified drivers can include VFP co-processor, interrupt controller |
186 | * and it's PM extensions, local CPU timers context save/restore which | 186 | * and its PM extensions, local CPU timers context save/restore which |
187 | * shouldn't be interrupted. Hence it must be called with interrupts disabled. | 187 | * shouldn't be interrupted. Hence it must be called with interrupts disabled. |
188 | * | 188 | * |
189 | * Return conditions are same as __raw_notifier_call_chain. | 189 | * Return conditions are same as __raw_notifier_call_chain. |
diff --git a/kernel/cred.c b/kernel/cred.c index 430557ea488f..de728ac50d82 100644 --- a/kernel/cred.c +++ b/kernel/cred.c | |||
@@ -207,13 +207,6 @@ void exit_creds(struct task_struct *tsk) | |||
207 | validate_creds(cred); | 207 | validate_creds(cred); |
208 | alter_cred_subscribers(cred, -1); | 208 | alter_cred_subscribers(cred, -1); |
209 | put_cred(cred); | 209 | put_cred(cred); |
210 | |||
211 | cred = (struct cred *) tsk->replacement_session_keyring; | ||
212 | if (cred) { | ||
213 | tsk->replacement_session_keyring = NULL; | ||
214 | validate_creds(cred); | ||
215 | put_cred(cred); | ||
216 | } | ||
217 | } | 210 | } |
218 | 211 | ||
219 | /** | 212 | /** |
@@ -396,8 +389,6 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags) | |||
396 | struct cred *new; | 389 | struct cred *new; |
397 | int ret; | 390 | int ret; |
398 | 391 | ||
399 | p->replacement_session_keyring = NULL; | ||
400 | |||
401 | if ( | 392 | if ( |
402 | #ifdef CONFIG_KEYS | 393 | #ifdef CONFIG_KEYS |
403 | !p->cred->thread_keyring && | 394 | !p->cred->thread_keyring && |
diff --git a/kernel/exit.c b/kernel/exit.c index 910a0716e17a..34867cc5b42a 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
@@ -884,9 +884,9 @@ static void check_stack_usage(void) | |||
884 | 884 | ||
885 | spin_lock(&low_water_lock); | 885 | spin_lock(&low_water_lock); |
886 | if (free < lowest_to_date) { | 886 | if (free < lowest_to_date) { |
887 | printk(KERN_WARNING "%s used greatest stack depth: %lu bytes " | 887 | printk(KERN_WARNING "%s (%d) used greatest stack depth: " |
888 | "left\n", | 888 | "%lu bytes left\n", |
889 | current->comm, free); | 889 | current->comm, task_pid_nr(current), free); |
890 | lowest_to_date = free; | 890 | lowest_to_date = free; |
891 | } | 891 | } |
892 | spin_unlock(&low_water_lock); | 892 | spin_unlock(&low_water_lock); |
@@ -946,12 +946,13 @@ void do_exit(long code) | |||
946 | exit_signals(tsk); /* sets PF_EXITING */ | 946 | exit_signals(tsk); /* sets PF_EXITING */ |
947 | /* | 947 | /* |
948 | * tsk->flags are checked in the futex code to protect against | 948 | * tsk->flags are checked in the futex code to protect against |
949 | * an exiting task cleaning up the robust pi futexes. | 949 | * an exiting task cleaning up the robust pi futexes, and in |
950 | * task_work_add() to avoid the race with exit_task_work(). | ||
950 | */ | 951 | */ |
951 | smp_mb(); | 952 | smp_mb(); |
952 | raw_spin_unlock_wait(&tsk->pi_lock); | 953 | raw_spin_unlock_wait(&tsk->pi_lock); |
953 | 954 | ||
954 | exit_irq_thread(); | 955 | exit_task_work(tsk); |
955 | 956 | ||
956 | if (unlikely(in_atomic())) | 957 | if (unlikely(in_atomic())) |
957 | printk(KERN_INFO "note: %s[%d] exited with preempt_count %d\n", | 958 | printk(KERN_INFO "note: %s[%d] exited with preempt_count %d\n", |
@@ -1214,7 +1215,7 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p) | |||
1214 | unsigned long state; | 1215 | unsigned long state; |
1215 | int retval, status, traced; | 1216 | int retval, status, traced; |
1216 | pid_t pid = task_pid_vnr(p); | 1217 | pid_t pid = task_pid_vnr(p); |
1217 | uid_t uid = from_kuid_munged(current_user_ns(), __task_cred(p)->uid); | 1218 | uid_t uid = from_kuid_munged(current_user_ns(), task_uid(p)); |
1218 | struct siginfo __user *infop; | 1219 | struct siginfo __user *infop; |
1219 | 1220 | ||
1220 | if (!likely(wo->wo_flags & WEXITED)) | 1221 | if (!likely(wo->wo_flags & WEXITED)) |
diff --git a/kernel/fork.c b/kernel/fork.c index 47b4e4f379f9..ab5211b9e622 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -386,7 +386,8 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) | |||
386 | } | 386 | } |
387 | charge = 0; | 387 | charge = 0; |
388 | if (mpnt->vm_flags & VM_ACCOUNT) { | 388 | if (mpnt->vm_flags & VM_ACCOUNT) { |
389 | unsigned int len = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT; | 389 | unsigned long len; |
390 | len = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT; | ||
390 | if (security_vm_enough_memory_mm(oldmm, len)) /* sic */ | 391 | if (security_vm_enough_memory_mm(oldmm, len)) /* sic */ |
391 | goto fail_nomem; | 392 | goto fail_nomem; |
392 | charge = len; | 393 | charge = len; |
@@ -614,7 +615,6 @@ void mmput(struct mm_struct *mm) | |||
614 | list_del(&mm->mmlist); | 615 | list_del(&mm->mmlist); |
615 | spin_unlock(&mmlist_lock); | 616 | spin_unlock(&mmlist_lock); |
616 | } | 617 | } |
617 | put_swap_token(mm); | ||
618 | if (mm->binfmt) | 618 | if (mm->binfmt) |
619 | module_put(mm->binfmt->module); | 619 | module_put(mm->binfmt->module); |
620 | mmdrop(mm); | 620 | mmdrop(mm); |
@@ -787,9 +787,6 @@ void mm_release(struct task_struct *tsk, struct mm_struct *mm) | |||
787 | /* Get rid of any cached register state */ | 787 | /* Get rid of any cached register state */ |
788 | deactivate_mm(tsk, mm); | 788 | deactivate_mm(tsk, mm); |
789 | 789 | ||
790 | if (tsk->vfork_done) | ||
791 | complete_vfork_done(tsk); | ||
792 | |||
793 | /* | 790 | /* |
794 | * If we're exiting normally, clear a user-space tid field if | 791 | * If we're exiting normally, clear a user-space tid field if |
795 | * requested. We leave this alone when dying by signal, to leave | 792 | * requested. We leave this alone when dying by signal, to leave |
@@ -810,6 +807,13 @@ void mm_release(struct task_struct *tsk, struct mm_struct *mm) | |||
810 | } | 807 | } |
811 | tsk->clear_child_tid = NULL; | 808 | tsk->clear_child_tid = NULL; |
812 | } | 809 | } |
810 | |||
811 | /* | ||
812 | * All done, finally we can wake up parent and return this mm to him. | ||
813 | * Also kthread_stop() uses this completion for synchronization. | ||
814 | */ | ||
815 | if (tsk->vfork_done) | ||
816 | complete_vfork_done(tsk); | ||
813 | } | 817 | } |
814 | 818 | ||
815 | /* | 819 | /* |
@@ -831,10 +835,6 @@ struct mm_struct *dup_mm(struct task_struct *tsk) | |||
831 | memcpy(mm, oldmm, sizeof(*mm)); | 835 | memcpy(mm, oldmm, sizeof(*mm)); |
832 | mm_init_cpumask(mm); | 836 | mm_init_cpumask(mm); |
833 | 837 | ||
834 | /* Initializing for Swap token stuff */ | ||
835 | mm->token_priority = 0; | ||
836 | mm->last_interval = 0; | ||
837 | |||
838 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | 838 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE |
839 | mm->pmd_huge_pte = NULL; | 839 | mm->pmd_huge_pte = NULL; |
840 | #endif | 840 | #endif |
@@ -913,10 +913,6 @@ static int copy_mm(unsigned long clone_flags, struct task_struct *tsk) | |||
913 | goto fail_nomem; | 913 | goto fail_nomem; |
914 | 914 | ||
915 | good_mm: | 915 | good_mm: |
916 | /* Initializing for Swap token stuff */ | ||
917 | mm->token_priority = 0; | ||
918 | mm->last_interval = 0; | ||
919 | |||
920 | tsk->mm = mm; | 916 | tsk->mm = mm; |
921 | tsk->active_mm = mm; | 917 | tsk->active_mm = mm; |
922 | return 0; | 918 | return 0; |
@@ -984,9 +980,8 @@ static int copy_io(unsigned long clone_flags, struct task_struct *tsk) | |||
984 | * Share io context with parent, if CLONE_IO is set | 980 | * Share io context with parent, if CLONE_IO is set |
985 | */ | 981 | */ |
986 | if (clone_flags & CLONE_IO) { | 982 | if (clone_flags & CLONE_IO) { |
987 | tsk->io_context = ioc_task_link(ioc); | 983 | ioc_task_link(ioc); |
988 | if (unlikely(!tsk->io_context)) | 984 | tsk->io_context = ioc; |
989 | return -ENOMEM; | ||
990 | } else if (ioprio_valid(ioc->ioprio)) { | 985 | } else if (ioprio_valid(ioc->ioprio)) { |
991 | new_ioc = get_task_io_context(tsk, GFP_KERNEL, NUMA_NO_NODE); | 986 | new_ioc = get_task_io_context(tsk, GFP_KERNEL, NUMA_NO_NODE); |
992 | if (unlikely(!new_ioc)) | 987 | if (unlikely(!new_ioc)) |
@@ -1420,6 +1415,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1420 | */ | 1415 | */ |
1421 | p->group_leader = p; | 1416 | p->group_leader = p; |
1422 | INIT_LIST_HEAD(&p->thread_group); | 1417 | INIT_LIST_HEAD(&p->thread_group); |
1418 | INIT_HLIST_HEAD(&p->task_works); | ||
1423 | 1419 | ||
1424 | /* Now that the task is set up, run cgroup callbacks if | 1420 | /* Now that the task is set up, run cgroup callbacks if |
1425 | * necessary. We need to run them before the task is visible | 1421 | * necessary. We need to run them before the task is visible |
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index bb32326afe87..ea0c6c2ae6f7 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c | |||
@@ -7,6 +7,8 @@ | |||
7 | * This file contains driver APIs to the irq subsystem. | 7 | * This file contains driver APIs to the irq subsystem. |
8 | */ | 8 | */ |
9 | 9 | ||
10 | #define pr_fmt(fmt) "genirq: " fmt | ||
11 | |||
10 | #include <linux/irq.h> | 12 | #include <linux/irq.h> |
11 | #include <linux/kthread.h> | 13 | #include <linux/kthread.h> |
12 | #include <linux/module.h> | 14 | #include <linux/module.h> |
@@ -14,6 +16,7 @@ | |||
14 | #include <linux/interrupt.h> | 16 | #include <linux/interrupt.h> |
15 | #include <linux/slab.h> | 17 | #include <linux/slab.h> |
16 | #include <linux/sched.h> | 18 | #include <linux/sched.h> |
19 | #include <linux/task_work.h> | ||
17 | 20 | ||
18 | #include "internals.h" | 21 | #include "internals.h" |
19 | 22 | ||
@@ -565,7 +568,7 @@ int __irq_set_trigger(struct irq_desc *desc, unsigned int irq, | |||
565 | * IRQF_TRIGGER_* but the PIC does not support multiple | 568 | * IRQF_TRIGGER_* but the PIC does not support multiple |
566 | * flow-types? | 569 | * flow-types? |
567 | */ | 570 | */ |
568 | pr_debug("genirq: No set_type function for IRQ %d (%s)\n", irq, | 571 | pr_debug("No set_type function for IRQ %d (%s)\n", irq, |
569 | chip ? (chip->name ? : "unknown") : "unknown"); | 572 | chip ? (chip->name ? : "unknown") : "unknown"); |
570 | return 0; | 573 | return 0; |
571 | } | 574 | } |
@@ -600,7 +603,7 @@ int __irq_set_trigger(struct irq_desc *desc, unsigned int irq, | |||
600 | ret = 0; | 603 | ret = 0; |
601 | break; | 604 | break; |
602 | default: | 605 | default: |
603 | pr_err("genirq: Setting trigger mode %lu for irq %u failed (%pF)\n", | 606 | pr_err("Setting trigger mode %lu for irq %u failed (%pF)\n", |
604 | flags, irq, chip->irq_set_type); | 607 | flags, irq, chip->irq_set_type); |
605 | } | 608 | } |
606 | if (unmask) | 609 | if (unmask) |
@@ -773,11 +776,39 @@ static void wake_threads_waitq(struct irq_desc *desc) | |||
773 | wake_up(&desc->wait_for_threads); | 776 | wake_up(&desc->wait_for_threads); |
774 | } | 777 | } |
775 | 778 | ||
779 | static void irq_thread_dtor(struct task_work *unused) | ||
780 | { | ||
781 | struct task_struct *tsk = current; | ||
782 | struct irq_desc *desc; | ||
783 | struct irqaction *action; | ||
784 | |||
785 | if (WARN_ON_ONCE(!(current->flags & PF_EXITING))) | ||
786 | return; | ||
787 | |||
788 | action = kthread_data(tsk); | ||
789 | |||
790 | pr_err("exiting task \"%s\" (%d) is an active IRQ thread (irq %d)\n", | ||
791 | tsk->comm ? tsk->comm : "", tsk->pid, action->irq); | ||
792 | |||
793 | |||
794 | desc = irq_to_desc(action->irq); | ||
795 | /* | ||
796 | * If IRQTF_RUNTHREAD is set, we need to decrement | ||
797 | * desc->threads_active and wake possible waiters. | ||
798 | */ | ||
799 | if (test_and_clear_bit(IRQTF_RUNTHREAD, &action->thread_flags)) | ||
800 | wake_threads_waitq(desc); | ||
801 | |||
802 | /* Prevent a stale desc->threads_oneshot */ | ||
803 | irq_finalize_oneshot(desc, action); | ||
804 | } | ||
805 | |||
776 | /* | 806 | /* |
777 | * Interrupt handler thread | 807 | * Interrupt handler thread |
778 | */ | 808 | */ |
779 | static int irq_thread(void *data) | 809 | static int irq_thread(void *data) |
780 | { | 810 | { |
811 | struct task_work on_exit_work; | ||
781 | static const struct sched_param param = { | 812 | static const struct sched_param param = { |
782 | .sched_priority = MAX_USER_RT_PRIO/2, | 813 | .sched_priority = MAX_USER_RT_PRIO/2, |
783 | }; | 814 | }; |
@@ -793,7 +824,9 @@ static int irq_thread(void *data) | |||
793 | handler_fn = irq_thread_fn; | 824 | handler_fn = irq_thread_fn; |
794 | 825 | ||
795 | sched_setscheduler(current, SCHED_FIFO, ¶m); | 826 | sched_setscheduler(current, SCHED_FIFO, ¶m); |
796 | current->irq_thread = 1; | 827 | |
828 | init_task_work(&on_exit_work, irq_thread_dtor, NULL); | ||
829 | task_work_add(current, &on_exit_work, false); | ||
797 | 830 | ||
798 | while (!irq_wait_for_interrupt(action)) { | 831 | while (!irq_wait_for_interrupt(action)) { |
799 | irqreturn_t action_ret; | 832 | irqreturn_t action_ret; |
@@ -815,44 +848,11 @@ static int irq_thread(void *data) | |||
815 | * cannot touch the oneshot mask at this point anymore as | 848 | * cannot touch the oneshot mask at this point anymore as |
816 | * __setup_irq() might have given out currents thread_mask | 849 | * __setup_irq() might have given out currents thread_mask |
817 | * again. | 850 | * again. |
818 | * | ||
819 | * Clear irq_thread. Otherwise exit_irq_thread() would make | ||
820 | * fuzz about an active irq thread going into nirvana. | ||
821 | */ | 851 | */ |
822 | current->irq_thread = 0; | 852 | task_work_cancel(current, irq_thread_dtor); |
823 | return 0; | 853 | return 0; |
824 | } | 854 | } |
825 | 855 | ||
826 | /* | ||
827 | * Called from do_exit() | ||
828 | */ | ||
829 | void exit_irq_thread(void) | ||
830 | { | ||
831 | struct task_struct *tsk = current; | ||
832 | struct irq_desc *desc; | ||
833 | struct irqaction *action; | ||
834 | |||
835 | if (!tsk->irq_thread) | ||
836 | return; | ||
837 | |||
838 | action = kthread_data(tsk); | ||
839 | |||
840 | pr_err("genirq: exiting task \"%s\" (%d) is an active IRQ thread (irq %d)\n", | ||
841 | tsk->comm ? tsk->comm : "", tsk->pid, action->irq); | ||
842 | |||
843 | desc = irq_to_desc(action->irq); | ||
844 | |||
845 | /* | ||
846 | * If IRQTF_RUNTHREAD is set, we need to decrement | ||
847 | * desc->threads_active and wake possible waiters. | ||
848 | */ | ||
849 | if (test_and_clear_bit(IRQTF_RUNTHREAD, &action->thread_flags)) | ||
850 | wake_threads_waitq(desc); | ||
851 | |||
852 | /* Prevent a stale desc->threads_oneshot */ | ||
853 | irq_finalize_oneshot(desc, action); | ||
854 | } | ||
855 | |||
856 | static void irq_setup_forced_threading(struct irqaction *new) | 856 | static void irq_setup_forced_threading(struct irqaction *new) |
857 | { | 857 | { |
858 | if (!force_irqthreads) | 858 | if (!force_irqthreads) |
@@ -1044,7 +1044,7 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) | |||
1044 | * has. The type flags are unreliable as the | 1044 | * has. The type flags are unreliable as the |
1045 | * underlying chip implementation can override them. | 1045 | * underlying chip implementation can override them. |
1046 | */ | 1046 | */ |
1047 | pr_err("genirq: Threaded irq requested with handler=NULL and !ONESHOT for irq %d\n", | 1047 | pr_err("Threaded irq requested with handler=NULL and !ONESHOT for irq %d\n", |
1048 | irq); | 1048 | irq); |
1049 | ret = -EINVAL; | 1049 | ret = -EINVAL; |
1050 | goto out_mask; | 1050 | goto out_mask; |
@@ -1095,7 +1095,7 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) | |||
1095 | 1095 | ||
1096 | if (nmsk != omsk) | 1096 | if (nmsk != omsk) |
1097 | /* hope the handler works with current trigger mode */ | 1097 | /* hope the handler works with current trigger mode */ |
1098 | pr_warning("genirq: irq %d uses trigger mode %u; requested %u\n", | 1098 | pr_warning("irq %d uses trigger mode %u; requested %u\n", |
1099 | irq, nmsk, omsk); | 1099 | irq, nmsk, omsk); |
1100 | } | 1100 | } |
1101 | 1101 | ||
@@ -1133,7 +1133,7 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) | |||
1133 | 1133 | ||
1134 | mismatch: | 1134 | mismatch: |
1135 | if (!(new->flags & IRQF_PROBE_SHARED)) { | 1135 | if (!(new->flags & IRQF_PROBE_SHARED)) { |
1136 | pr_err("genirq: Flags mismatch irq %d. %08x (%s) vs. %08x (%s)\n", | 1136 | pr_err("Flags mismatch irq %d. %08x (%s) vs. %08x (%s)\n", |
1137 | irq, new->flags, new->name, old->flags, old->name); | 1137 | irq, new->flags, new->name, old->flags, old->name); |
1138 | #ifdef CONFIG_DEBUG_SHIRQ | 1138 | #ifdef CONFIG_DEBUG_SHIRQ |
1139 | dump_stack(); | 1139 | dump_stack(); |
diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c index 079f1d39a8b8..2169feeba529 100644 --- a/kernel/kallsyms.c +++ b/kernel/kallsyms.c | |||
@@ -343,7 +343,7 @@ int lookup_symbol_attrs(unsigned long addr, unsigned long *size, | |||
343 | 343 | ||
344 | /* Look up a kernel symbol and return it in a text buffer. */ | 344 | /* Look up a kernel symbol and return it in a text buffer. */ |
345 | static int __sprint_symbol(char *buffer, unsigned long address, | 345 | static int __sprint_symbol(char *buffer, unsigned long address, |
346 | int symbol_offset) | 346 | int symbol_offset, int add_offset) |
347 | { | 347 | { |
348 | char *modname; | 348 | char *modname; |
349 | const char *name; | 349 | const char *name; |
@@ -358,13 +358,13 @@ static int __sprint_symbol(char *buffer, unsigned long address, | |||
358 | if (name != buffer) | 358 | if (name != buffer) |
359 | strcpy(buffer, name); | 359 | strcpy(buffer, name); |
360 | len = strlen(buffer); | 360 | len = strlen(buffer); |
361 | buffer += len; | ||
362 | offset -= symbol_offset; | 361 | offset -= symbol_offset; |
363 | 362 | ||
363 | if (add_offset) | ||
364 | len += sprintf(buffer + len, "+%#lx/%#lx", offset, size); | ||
365 | |||
364 | if (modname) | 366 | if (modname) |
365 | len += sprintf(buffer, "+%#lx/%#lx [%s]", offset, size, modname); | 367 | len += sprintf(buffer + len, " [%s]", modname); |
366 | else | ||
367 | len += sprintf(buffer, "+%#lx/%#lx", offset, size); | ||
368 | 368 | ||
369 | return len; | 369 | return len; |
370 | } | 370 | } |
@@ -382,12 +382,28 @@ static int __sprint_symbol(char *buffer, unsigned long address, | |||
382 | */ | 382 | */ |
383 | int sprint_symbol(char *buffer, unsigned long address) | 383 | int sprint_symbol(char *buffer, unsigned long address) |
384 | { | 384 | { |
385 | return __sprint_symbol(buffer, address, 0); | 385 | return __sprint_symbol(buffer, address, 0, 1); |
386 | } | 386 | } |
387 | |||
388 | EXPORT_SYMBOL_GPL(sprint_symbol); | 387 | EXPORT_SYMBOL_GPL(sprint_symbol); |
389 | 388 | ||
390 | /** | 389 | /** |
390 | * sprint_symbol_no_offset - Look up a kernel symbol and return it in a text buffer | ||
391 | * @buffer: buffer to be stored | ||
392 | * @address: address to lookup | ||
393 | * | ||
394 | * This function looks up a kernel symbol with @address and stores its name | ||
395 | * and module name to @buffer if possible. If no symbol was found, just saves | ||
396 | * its @address as is. | ||
397 | * | ||
398 | * This function returns the number of bytes stored in @buffer. | ||
399 | */ | ||
400 | int sprint_symbol_no_offset(char *buffer, unsigned long address) | ||
401 | { | ||
402 | return __sprint_symbol(buffer, address, 0, 0); | ||
403 | } | ||
404 | EXPORT_SYMBOL_GPL(sprint_symbol_no_offset); | ||
405 | |||
406 | /** | ||
391 | * sprint_backtrace - Look up a backtrace symbol and return it in a text buffer | 407 | * sprint_backtrace - Look up a backtrace symbol and return it in a text buffer |
392 | * @buffer: buffer to be stored | 408 | * @buffer: buffer to be stored |
393 | * @address: address to lookup | 409 | * @address: address to lookup |
@@ -403,7 +419,7 @@ EXPORT_SYMBOL_GPL(sprint_symbol); | |||
403 | */ | 419 | */ |
404 | int sprint_backtrace(char *buffer, unsigned long address) | 420 | int sprint_backtrace(char *buffer, unsigned long address) |
405 | { | 421 | { |
406 | return __sprint_symbol(buffer, address, -1); | 422 | return __sprint_symbol(buffer, address, -1, 1); |
407 | } | 423 | } |
408 | 424 | ||
409 | /* Look up a kernel symbol and print it to the kernel messages. */ | 425 | /* Look up a kernel symbol and print it to the kernel messages. */ |
diff --git a/kernel/kcmp.c b/kernel/kcmp.c new file mode 100644 index 000000000000..30b7b225306c --- /dev/null +++ b/kernel/kcmp.c | |||
@@ -0,0 +1,196 @@ | |||
1 | #include <linux/kernel.h> | ||
2 | #include <linux/syscalls.h> | ||
3 | #include <linux/fdtable.h> | ||
4 | #include <linux/string.h> | ||
5 | #include <linux/random.h> | ||
6 | #include <linux/module.h> | ||
7 | #include <linux/init.h> | ||
8 | #include <linux/errno.h> | ||
9 | #include <linux/cache.h> | ||
10 | #include <linux/bug.h> | ||
11 | #include <linux/err.h> | ||
12 | #include <linux/kcmp.h> | ||
13 | |||
14 | #include <asm/unistd.h> | ||
15 | |||
16 | /* | ||
17 | * We don't expose the real in-memory order of objects for security reasons. | ||
18 | * But still the comparison results should be suitable for sorting. So we | ||
19 | * obfuscate kernel pointers values and compare the production instead. | ||
20 | * | ||
21 | * The obfuscation is done in two steps. First we xor the kernel pointer with | ||
22 | * a random value, which puts pointer into a new position in a reordered space. | ||
23 | * Secondly we multiply the xor production with a large odd random number to | ||
24 | * permute its bits even more (the odd multiplier guarantees that the product | ||
25 | * is unique ever after the high bits are truncated, since any odd number is | ||
26 | * relative prime to 2^n). | ||
27 | * | ||
28 | * Note also that the obfuscation itself is invisible to userspace and if needed | ||
29 | * it can be changed to an alternate scheme. | ||
30 | */ | ||
31 | static unsigned long cookies[KCMP_TYPES][2] __read_mostly; | ||
32 | |||
33 | static long kptr_obfuscate(long v, int type) | ||
34 | { | ||
35 | return (v ^ cookies[type][0]) * cookies[type][1]; | ||
36 | } | ||
37 | |||
38 | /* | ||
39 | * 0 - equal, i.e. v1 = v2 | ||
40 | * 1 - less than, i.e. v1 < v2 | ||
41 | * 2 - greater than, i.e. v1 > v2 | ||
42 | * 3 - not equal but ordering unavailable (reserved for future) | ||
43 | */ | ||
44 | static int kcmp_ptr(void *v1, void *v2, enum kcmp_type type) | ||
45 | { | ||
46 | long ret; | ||
47 | |||
48 | ret = kptr_obfuscate((long)v1, type) - kptr_obfuscate((long)v2, type); | ||
49 | |||
50 | return (ret < 0) | ((ret > 0) << 1); | ||
51 | } | ||
52 | |||
53 | /* The caller must have pinned the task */ | ||
54 | static struct file * | ||
55 | get_file_raw_ptr(struct task_struct *task, unsigned int idx) | ||
56 | { | ||
57 | struct file *file = NULL; | ||
58 | |||
59 | task_lock(task); | ||
60 | rcu_read_lock(); | ||
61 | |||
62 | if (task->files) | ||
63 | file = fcheck_files(task->files, idx); | ||
64 | |||
65 | rcu_read_unlock(); | ||
66 | task_unlock(task); | ||
67 | |||
68 | return file; | ||
69 | } | ||
70 | |||
71 | static void kcmp_unlock(struct mutex *m1, struct mutex *m2) | ||
72 | { | ||
73 | if (likely(m2 != m1)) | ||
74 | mutex_unlock(m2); | ||
75 | mutex_unlock(m1); | ||
76 | } | ||
77 | |||
78 | static int kcmp_lock(struct mutex *m1, struct mutex *m2) | ||
79 | { | ||
80 | int err; | ||
81 | |||
82 | if (m2 > m1) | ||
83 | swap(m1, m2); | ||
84 | |||
85 | err = mutex_lock_killable(m1); | ||
86 | if (!err && likely(m1 != m2)) { | ||
87 | err = mutex_lock_killable_nested(m2, SINGLE_DEPTH_NESTING); | ||
88 | if (err) | ||
89 | mutex_unlock(m1); | ||
90 | } | ||
91 | |||
92 | return err; | ||
93 | } | ||
94 | |||
95 | SYSCALL_DEFINE5(kcmp, pid_t, pid1, pid_t, pid2, int, type, | ||
96 | unsigned long, idx1, unsigned long, idx2) | ||
97 | { | ||
98 | struct task_struct *task1, *task2; | ||
99 | int ret; | ||
100 | |||
101 | rcu_read_lock(); | ||
102 | |||
103 | /* | ||
104 | * Tasks are looked up in caller's PID namespace only. | ||
105 | */ | ||
106 | task1 = find_task_by_vpid(pid1); | ||
107 | task2 = find_task_by_vpid(pid2); | ||
108 | if (!task1 || !task2) | ||
109 | goto err_no_task; | ||
110 | |||
111 | get_task_struct(task1); | ||
112 | get_task_struct(task2); | ||
113 | |||
114 | rcu_read_unlock(); | ||
115 | |||
116 | /* | ||
117 | * One should have enough rights to inspect task details. | ||
118 | */ | ||
119 | ret = kcmp_lock(&task1->signal->cred_guard_mutex, | ||
120 | &task2->signal->cred_guard_mutex); | ||
121 | if (ret) | ||
122 | goto err; | ||
123 | if (!ptrace_may_access(task1, PTRACE_MODE_READ) || | ||
124 | !ptrace_may_access(task2, PTRACE_MODE_READ)) { | ||
125 | ret = -EPERM; | ||
126 | goto err_unlock; | ||
127 | } | ||
128 | |||
129 | switch (type) { | ||
130 | case KCMP_FILE: { | ||
131 | struct file *filp1, *filp2; | ||
132 | |||
133 | filp1 = get_file_raw_ptr(task1, idx1); | ||
134 | filp2 = get_file_raw_ptr(task2, idx2); | ||
135 | |||
136 | if (filp1 && filp2) | ||
137 | ret = kcmp_ptr(filp1, filp2, KCMP_FILE); | ||
138 | else | ||
139 | ret = -EBADF; | ||
140 | break; | ||
141 | } | ||
142 | case KCMP_VM: | ||
143 | ret = kcmp_ptr(task1->mm, task2->mm, KCMP_VM); | ||
144 | break; | ||
145 | case KCMP_FILES: | ||
146 | ret = kcmp_ptr(task1->files, task2->files, KCMP_FILES); | ||
147 | break; | ||
148 | case KCMP_FS: | ||
149 | ret = kcmp_ptr(task1->fs, task2->fs, KCMP_FS); | ||
150 | break; | ||
151 | case KCMP_SIGHAND: | ||
152 | ret = kcmp_ptr(task1->sighand, task2->sighand, KCMP_SIGHAND); | ||
153 | break; | ||
154 | case KCMP_IO: | ||
155 | ret = kcmp_ptr(task1->io_context, task2->io_context, KCMP_IO); | ||
156 | break; | ||
157 | case KCMP_SYSVSEM: | ||
158 | #ifdef CONFIG_SYSVIPC | ||
159 | ret = kcmp_ptr(task1->sysvsem.undo_list, | ||
160 | task2->sysvsem.undo_list, | ||
161 | KCMP_SYSVSEM); | ||
162 | #else | ||
163 | ret = -EOPNOTSUPP; | ||
164 | #endif | ||
165 | break; | ||
166 | default: | ||
167 | ret = -EINVAL; | ||
168 | break; | ||
169 | } | ||
170 | |||
171 | err_unlock: | ||
172 | kcmp_unlock(&task1->signal->cred_guard_mutex, | ||
173 | &task2->signal->cred_guard_mutex); | ||
174 | err: | ||
175 | put_task_struct(task1); | ||
176 | put_task_struct(task2); | ||
177 | |||
178 | return ret; | ||
179 | |||
180 | err_no_task: | ||
181 | rcu_read_unlock(); | ||
182 | return -ESRCH; | ||
183 | } | ||
184 | |||
185 | static __init int kcmp_cookies_init(void) | ||
186 | { | ||
187 | int i; | ||
188 | |||
189 | get_random_bytes(cookies, sizeof(cookies)); | ||
190 | |||
191 | for (i = 0; i < KCMP_TYPES; i++) | ||
192 | cookies[i][1] |= (~(~0UL >> 1) | 1); | ||
193 | |||
194 | return 0; | ||
195 | } | ||
196 | arch_initcall(kcmp_cookies_init); | ||
diff --git a/kernel/kmod.c b/kernel/kmod.c index 05698a7415fe..ff2c7cb86d77 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c | |||
@@ -221,13 +221,12 @@ fail: | |||
221 | return 0; | 221 | return 0; |
222 | } | 222 | } |
223 | 223 | ||
224 | void call_usermodehelper_freeinfo(struct subprocess_info *info) | 224 | static void call_usermodehelper_freeinfo(struct subprocess_info *info) |
225 | { | 225 | { |
226 | if (info->cleanup) | 226 | if (info->cleanup) |
227 | (*info->cleanup)(info); | 227 | (*info->cleanup)(info); |
228 | kfree(info); | 228 | kfree(info); |
229 | } | 229 | } |
230 | EXPORT_SYMBOL(call_usermodehelper_freeinfo); | ||
231 | 230 | ||
232 | static void umh_complete(struct subprocess_info *sub_info) | 231 | static void umh_complete(struct subprocess_info *sub_info) |
233 | { | 232 | { |
@@ -410,7 +409,7 @@ EXPORT_SYMBOL_GPL(usermodehelper_read_unlock); | |||
410 | 409 | ||
411 | /** | 410 | /** |
412 | * __usermodehelper_set_disable_depth - Modify usermodehelper_disabled. | 411 | * __usermodehelper_set_disable_depth - Modify usermodehelper_disabled. |
413 | * depth: New value to assign to usermodehelper_disabled. | 412 | * @depth: New value to assign to usermodehelper_disabled. |
414 | * | 413 | * |
415 | * Change the value of usermodehelper_disabled (under umhelper_sem locked for | 414 | * Change the value of usermodehelper_disabled (under umhelper_sem locked for |
416 | * writing) and wakeup tasks waiting for it to change. | 415 | * writing) and wakeup tasks waiting for it to change. |
@@ -479,6 +478,7 @@ static void helper_unlock(void) | |||
479 | * structure. This should be passed to call_usermodehelper_exec to | 478 | * structure. This should be passed to call_usermodehelper_exec to |
480 | * exec the process and free the structure. | 479 | * exec the process and free the structure. |
481 | */ | 480 | */ |
481 | static | ||
482 | struct subprocess_info *call_usermodehelper_setup(char *path, char **argv, | 482 | struct subprocess_info *call_usermodehelper_setup(char *path, char **argv, |
483 | char **envp, gfp_t gfp_mask) | 483 | char **envp, gfp_t gfp_mask) |
484 | { | 484 | { |
@@ -494,7 +494,6 @@ struct subprocess_info *call_usermodehelper_setup(char *path, char **argv, | |||
494 | out: | 494 | out: |
495 | return sub_info; | 495 | return sub_info; |
496 | } | 496 | } |
497 | EXPORT_SYMBOL(call_usermodehelper_setup); | ||
498 | 497 | ||
499 | /** | 498 | /** |
500 | * call_usermodehelper_setfns - set a cleanup/init function | 499 | * call_usermodehelper_setfns - set a cleanup/init function |
@@ -512,6 +511,7 @@ EXPORT_SYMBOL(call_usermodehelper_setup); | |||
512 | * Function must be runnable in either a process context or the | 511 | * Function must be runnable in either a process context or the |
513 | * context in which call_usermodehelper_exec is called. | 512 | * context in which call_usermodehelper_exec is called. |
514 | */ | 513 | */ |
514 | static | ||
515 | void call_usermodehelper_setfns(struct subprocess_info *info, | 515 | void call_usermodehelper_setfns(struct subprocess_info *info, |
516 | int (*init)(struct subprocess_info *info, struct cred *new), | 516 | int (*init)(struct subprocess_info *info, struct cred *new), |
517 | void (*cleanup)(struct subprocess_info *info), | 517 | void (*cleanup)(struct subprocess_info *info), |
@@ -521,7 +521,6 @@ void call_usermodehelper_setfns(struct subprocess_info *info, | |||
521 | info->init = init; | 521 | info->init = init; |
522 | info->data = data; | 522 | info->data = data; |
523 | } | 523 | } |
524 | EXPORT_SYMBOL(call_usermodehelper_setfns); | ||
525 | 524 | ||
526 | /** | 525 | /** |
527 | * call_usermodehelper_exec - start a usermode application | 526 | * call_usermodehelper_exec - start a usermode application |
@@ -535,6 +534,7 @@ EXPORT_SYMBOL(call_usermodehelper_setfns); | |||
535 | * asynchronously if wait is not set, and runs as a child of keventd. | 534 | * asynchronously if wait is not set, and runs as a child of keventd. |
536 | * (ie. it runs with full root capabilities). | 535 | * (ie. it runs with full root capabilities). |
537 | */ | 536 | */ |
537 | static | ||
538 | int call_usermodehelper_exec(struct subprocess_info *sub_info, int wait) | 538 | int call_usermodehelper_exec(struct subprocess_info *sub_info, int wait) |
539 | { | 539 | { |
540 | DECLARE_COMPLETION_ONSTACK(done); | 540 | DECLARE_COMPLETION_ONSTACK(done); |
@@ -576,7 +576,25 @@ unlock: | |||
576 | helper_unlock(); | 576 | helper_unlock(); |
577 | return retval; | 577 | return retval; |
578 | } | 578 | } |
579 | EXPORT_SYMBOL(call_usermodehelper_exec); | 579 | |
580 | int call_usermodehelper_fns( | ||
581 | char *path, char **argv, char **envp, int wait, | ||
582 | int (*init)(struct subprocess_info *info, struct cred *new), | ||
583 | void (*cleanup)(struct subprocess_info *), void *data) | ||
584 | { | ||
585 | struct subprocess_info *info; | ||
586 | gfp_t gfp_mask = (wait == UMH_NO_WAIT) ? GFP_ATOMIC : GFP_KERNEL; | ||
587 | |||
588 | info = call_usermodehelper_setup(path, argv, envp, gfp_mask); | ||
589 | |||
590 | if (info == NULL) | ||
591 | return -ENOMEM; | ||
592 | |||
593 | call_usermodehelper_setfns(info, init, cleanup, data); | ||
594 | |||
595 | return call_usermodehelper_exec(info, wait); | ||
596 | } | ||
597 | EXPORT_SYMBOL(call_usermodehelper_fns); | ||
580 | 598 | ||
581 | static int proc_cap_handler(struct ctl_table *table, int write, | 599 | static int proc_cap_handler(struct ctl_table *table, int write, |
582 | void __user *buffer, size_t *lenp, loff_t *ppos) | 600 | void __user *buffer, size_t *lenp, loff_t *ppos) |
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c index 57bc1fd35b3c..16b20e38c4a1 100644 --- a/kernel/pid_namespace.c +++ b/kernel/pid_namespace.c | |||
@@ -149,7 +149,12 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns) | |||
149 | { | 149 | { |
150 | int nr; | 150 | int nr; |
151 | int rc; | 151 | int rc; |
152 | struct task_struct *task; | 152 | struct task_struct *task, *me = current; |
153 | |||
154 | /* Ignore SIGCHLD causing any terminated children to autoreap */ | ||
155 | spin_lock_irq(&me->sighand->siglock); | ||
156 | me->sighand->action[SIGCHLD - 1].sa.sa_handler = SIG_IGN; | ||
157 | spin_unlock_irq(&me->sighand->siglock); | ||
153 | 158 | ||
154 | /* | 159 | /* |
155 | * The last thread in the cgroup-init thread group is terminating. | 160 | * The last thread in the cgroup-init thread group is terminating. |
@@ -191,6 +196,7 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns) | |||
191 | return; | 196 | return; |
192 | } | 197 | } |
193 | 198 | ||
199 | #ifdef CONFIG_CHECKPOINT_RESTORE | ||
194 | static int pid_ns_ctl_handler(struct ctl_table *table, int write, | 200 | static int pid_ns_ctl_handler(struct ctl_table *table, int write, |
195 | void __user *buffer, size_t *lenp, loff_t *ppos) | 201 | void __user *buffer, size_t *lenp, loff_t *ppos) |
196 | { | 202 | { |
@@ -218,8 +224,8 @@ static struct ctl_table pid_ns_ctl_table[] = { | |||
218 | }, | 224 | }, |
219 | { } | 225 | { } |
220 | }; | 226 | }; |
221 | |||
222 | static struct ctl_path kern_path[] = { { .procname = "kernel", }, { } }; | 227 | static struct ctl_path kern_path[] = { { .procname = "kernel", }, { } }; |
228 | #endif /* CONFIG_CHECKPOINT_RESTORE */ | ||
223 | 229 | ||
224 | int reboot_pid_ns(struct pid_namespace *pid_ns, int cmd) | 230 | int reboot_pid_ns(struct pid_namespace *pid_ns, int cmd) |
225 | { | 231 | { |
@@ -253,7 +259,10 @@ int reboot_pid_ns(struct pid_namespace *pid_ns, int cmd) | |||
253 | static __init int pid_namespaces_init(void) | 259 | static __init int pid_namespaces_init(void) |
254 | { | 260 | { |
255 | pid_ns_cachep = KMEM_CACHE(pid_namespace, SLAB_PANIC); | 261 | pid_ns_cachep = KMEM_CACHE(pid_namespace, SLAB_PANIC); |
262 | |||
263 | #ifdef CONFIG_CHECKPOINT_RESTORE | ||
256 | register_sysctl_paths(kern_path, pid_ns_ctl_table); | 264 | register_sysctl_paths(kern_path, pid_ns_ctl_table); |
265 | #endif | ||
257 | return 0; | 266 | return 0; |
258 | } | 267 | } |
259 | 268 | ||
diff --git a/kernel/res_counter.c b/kernel/res_counter.c index bebe2b170d49..ad581aa2369a 100644 --- a/kernel/res_counter.c +++ b/kernel/res_counter.c | |||
@@ -94,13 +94,15 @@ void res_counter_uncharge_locked(struct res_counter *counter, unsigned long val) | |||
94 | counter->usage -= val; | 94 | counter->usage -= val; |
95 | } | 95 | } |
96 | 96 | ||
97 | void res_counter_uncharge(struct res_counter *counter, unsigned long val) | 97 | void res_counter_uncharge_until(struct res_counter *counter, |
98 | struct res_counter *top, | ||
99 | unsigned long val) | ||
98 | { | 100 | { |
99 | unsigned long flags; | 101 | unsigned long flags; |
100 | struct res_counter *c; | 102 | struct res_counter *c; |
101 | 103 | ||
102 | local_irq_save(flags); | 104 | local_irq_save(flags); |
103 | for (c = counter; c != NULL; c = c->parent) { | 105 | for (c = counter; c != top; c = c->parent) { |
104 | spin_lock(&c->lock); | 106 | spin_lock(&c->lock); |
105 | res_counter_uncharge_locked(c, val); | 107 | res_counter_uncharge_locked(c, val); |
106 | spin_unlock(&c->lock); | 108 | spin_unlock(&c->lock); |
@@ -108,6 +110,10 @@ void res_counter_uncharge(struct res_counter *counter, unsigned long val) | |||
108 | local_irq_restore(flags); | 110 | local_irq_restore(flags); |
109 | } | 111 | } |
110 | 112 | ||
113 | void res_counter_uncharge(struct res_counter *counter, unsigned long val) | ||
114 | { | ||
115 | res_counter_uncharge_until(counter, NULL, val); | ||
116 | } | ||
111 | 117 | ||
112 | static inline unsigned long long * | 118 | static inline unsigned long long * |
113 | res_counter_member(struct res_counter *counter, int member) | 119 | res_counter_member(struct res_counter *counter, int member) |
diff --git a/kernel/resource.c b/kernel/resource.c index 7e8ea66a8c01..e1d2b8ee76d5 100644 --- a/kernel/resource.c +++ b/kernel/resource.c | |||
@@ -515,8 +515,8 @@ out: | |||
515 | * @root: root resource descriptor | 515 | * @root: root resource descriptor |
516 | * @new: resource descriptor desired by caller | 516 | * @new: resource descriptor desired by caller |
517 | * @size: requested resource region size | 517 | * @size: requested resource region size |
518 | * @min: minimum size to allocate | 518 | * @min: minimum boundary to allocate |
519 | * @max: maximum size to allocate | 519 | * @max: maximum boundary to allocate |
520 | * @align: alignment requested, in bytes | 520 | * @align: alignment requested, in bytes |
521 | * @alignf: alignment function, optional, called if not NULL | 521 | * @alignf: alignment function, optional, called if not NULL |
522 | * @alignf_data: arbitrary data to pass to the @alignf function | 522 | * @alignf_data: arbitrary data to pass to the @alignf function |
diff --git a/kernel/signal.c b/kernel/signal.c index f7b418217633..08dfbd748cd2 100644 --- a/kernel/signal.c +++ b/kernel/signal.c | |||
@@ -1656,19 +1656,18 @@ bool do_notify_parent(struct task_struct *tsk, int sig) | |||
1656 | info.si_signo = sig; | 1656 | info.si_signo = sig; |
1657 | info.si_errno = 0; | 1657 | info.si_errno = 0; |
1658 | /* | 1658 | /* |
1659 | * we are under tasklist_lock here so our parent is tied to | 1659 | * We are under tasklist_lock here so our parent is tied to |
1660 | * us and cannot exit and release its namespace. | 1660 | * us and cannot change. |
1661 | * | 1661 | * |
1662 | * the only it can is to switch its nsproxy with sys_unshare, | 1662 | * task_active_pid_ns will always return the same pid namespace |
1663 | * bu uncharing pid namespaces is not allowed, so we'll always | 1663 | * until a task passes through release_task. |
1664 | * see relevant namespace | ||
1665 | * | 1664 | * |
1666 | * write_lock() currently calls preempt_disable() which is the | 1665 | * write_lock() currently calls preempt_disable() which is the |
1667 | * same as rcu_read_lock(), but according to Oleg, this is not | 1666 | * same as rcu_read_lock(), but according to Oleg, this is not |
1668 | * correct to rely on this | 1667 | * correct to rely on this |
1669 | */ | 1668 | */ |
1670 | rcu_read_lock(); | 1669 | rcu_read_lock(); |
1671 | info.si_pid = task_pid_nr_ns(tsk, tsk->parent->nsproxy->pid_ns); | 1670 | info.si_pid = task_pid_nr_ns(tsk, task_active_pid_ns(tsk->parent)); |
1672 | info.si_uid = from_kuid_munged(task_cred_xxx(tsk->parent, user_ns), | 1671 | info.si_uid = from_kuid_munged(task_cred_xxx(tsk->parent, user_ns), |
1673 | task_uid(tsk)); | 1672 | task_uid(tsk)); |
1674 | rcu_read_unlock(); | 1673 | rcu_read_unlock(); |
diff --git a/kernel/sys.c b/kernel/sys.c index 6df42624e454..9ff89cb9657a 100644 --- a/kernel/sys.c +++ b/kernel/sys.c | |||
@@ -36,6 +36,8 @@ | |||
36 | #include <linux/personality.h> | 36 | #include <linux/personality.h> |
37 | #include <linux/ptrace.h> | 37 | #include <linux/ptrace.h> |
38 | #include <linux/fs_struct.h> | 38 | #include <linux/fs_struct.h> |
39 | #include <linux/file.h> | ||
40 | #include <linux/mount.h> | ||
39 | #include <linux/gfp.h> | 41 | #include <linux/gfp.h> |
40 | #include <linux/syscore_ops.h> | 42 | #include <linux/syscore_ops.h> |
41 | #include <linux/version.h> | 43 | #include <linux/version.h> |
@@ -1378,8 +1380,8 @@ SYSCALL_DEFINE2(sethostname, char __user *, name, int, len) | |||
1378 | memcpy(u->nodename, tmp, len); | 1380 | memcpy(u->nodename, tmp, len); |
1379 | memset(u->nodename + len, 0, sizeof(u->nodename) - len); | 1381 | memset(u->nodename + len, 0, sizeof(u->nodename) - len); |
1380 | errno = 0; | 1382 | errno = 0; |
1383 | uts_proc_notify(UTS_PROC_HOSTNAME); | ||
1381 | } | 1384 | } |
1382 | uts_proc_notify(UTS_PROC_HOSTNAME); | ||
1383 | up_write(&uts_sem); | 1385 | up_write(&uts_sem); |
1384 | return errno; | 1386 | return errno; |
1385 | } | 1387 | } |
@@ -1429,8 +1431,8 @@ SYSCALL_DEFINE2(setdomainname, char __user *, name, int, len) | |||
1429 | memcpy(u->domainname, tmp, len); | 1431 | memcpy(u->domainname, tmp, len); |
1430 | memset(u->domainname + len, 0, sizeof(u->domainname) - len); | 1432 | memset(u->domainname + len, 0, sizeof(u->domainname) - len); |
1431 | errno = 0; | 1433 | errno = 0; |
1434 | uts_proc_notify(UTS_PROC_DOMAINNAME); | ||
1432 | } | 1435 | } |
1433 | uts_proc_notify(UTS_PROC_DOMAINNAME); | ||
1434 | up_write(&uts_sem); | 1436 | up_write(&uts_sem); |
1435 | return errno; | 1437 | return errno; |
1436 | } | 1438 | } |
@@ -1784,77 +1786,102 @@ SYSCALL_DEFINE1(umask, int, mask) | |||
1784 | } | 1786 | } |
1785 | 1787 | ||
1786 | #ifdef CONFIG_CHECKPOINT_RESTORE | 1788 | #ifdef CONFIG_CHECKPOINT_RESTORE |
1789 | static bool vma_flags_mismatch(struct vm_area_struct *vma, | ||
1790 | unsigned long required, | ||
1791 | unsigned long banned) | ||
1792 | { | ||
1793 | return (vma->vm_flags & required) != required || | ||
1794 | (vma->vm_flags & banned); | ||
1795 | } | ||
1796 | |||
1797 | static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd) | ||
1798 | { | ||
1799 | struct file *exe_file; | ||
1800 | struct dentry *dentry; | ||
1801 | int err; | ||
1802 | |||
1803 | /* | ||
1804 | * Setting new mm::exe_file is only allowed when no VM_EXECUTABLE vma's | ||
1805 | * remain. So perform a quick test first. | ||
1806 | */ | ||
1807 | if (mm->num_exe_file_vmas) | ||
1808 | return -EBUSY; | ||
1809 | |||
1810 | exe_file = fget(fd); | ||
1811 | if (!exe_file) | ||
1812 | return -EBADF; | ||
1813 | |||
1814 | dentry = exe_file->f_path.dentry; | ||
1815 | |||
1816 | /* | ||
1817 | * Because the original mm->exe_file points to executable file, make | ||
1818 | * sure that this one is executable as well, to avoid breaking an | ||
1819 | * overall picture. | ||
1820 | */ | ||
1821 | err = -EACCES; | ||
1822 | if (!S_ISREG(dentry->d_inode->i_mode) || | ||
1823 | exe_file->f_path.mnt->mnt_flags & MNT_NOEXEC) | ||
1824 | goto exit; | ||
1825 | |||
1826 | err = inode_permission(dentry->d_inode, MAY_EXEC); | ||
1827 | if (err) | ||
1828 | goto exit; | ||
1829 | |||
1830 | /* | ||
1831 | * The symlink can be changed only once, just to disallow arbitrary | ||
1832 | * transitions malicious software might bring in. This means one | ||
1833 | * could make a snapshot over all processes running and monitor | ||
1834 | * /proc/pid/exe changes to notice unusual activity if needed. | ||
1835 | */ | ||
1836 | down_write(&mm->mmap_sem); | ||
1837 | if (likely(!mm->exe_file)) | ||
1838 | set_mm_exe_file(mm, exe_file); | ||
1839 | else | ||
1840 | err = -EBUSY; | ||
1841 | up_write(&mm->mmap_sem); | ||
1842 | |||
1843 | exit: | ||
1844 | fput(exe_file); | ||
1845 | return err; | ||
1846 | } | ||
1847 | |||
1787 | static int prctl_set_mm(int opt, unsigned long addr, | 1848 | static int prctl_set_mm(int opt, unsigned long addr, |
1788 | unsigned long arg4, unsigned long arg5) | 1849 | unsigned long arg4, unsigned long arg5) |
1789 | { | 1850 | { |
1790 | unsigned long rlim = rlimit(RLIMIT_DATA); | 1851 | unsigned long rlim = rlimit(RLIMIT_DATA); |
1791 | unsigned long vm_req_flags; | ||
1792 | unsigned long vm_bad_flags; | ||
1793 | struct vm_area_struct *vma; | ||
1794 | int error = 0; | ||
1795 | struct mm_struct *mm = current->mm; | 1852 | struct mm_struct *mm = current->mm; |
1853 | struct vm_area_struct *vma; | ||
1854 | int error; | ||
1796 | 1855 | ||
1797 | if (arg4 | arg5) | 1856 | if (arg5 || (arg4 && opt != PR_SET_MM_AUXV)) |
1798 | return -EINVAL; | 1857 | return -EINVAL; |
1799 | 1858 | ||
1800 | if (!capable(CAP_SYS_RESOURCE)) | 1859 | if (!capable(CAP_SYS_RESOURCE)) |
1801 | return -EPERM; | 1860 | return -EPERM; |
1802 | 1861 | ||
1862 | if (opt == PR_SET_MM_EXE_FILE) | ||
1863 | return prctl_set_mm_exe_file(mm, (unsigned int)addr); | ||
1864 | |||
1803 | if (addr >= TASK_SIZE) | 1865 | if (addr >= TASK_SIZE) |
1804 | return -EINVAL; | 1866 | return -EINVAL; |
1805 | 1867 | ||
1868 | error = -EINVAL; | ||
1869 | |||
1806 | down_read(&mm->mmap_sem); | 1870 | down_read(&mm->mmap_sem); |
1807 | vma = find_vma(mm, addr); | 1871 | vma = find_vma(mm, addr); |
1808 | 1872 | ||
1809 | if (opt != PR_SET_MM_START_BRK && opt != PR_SET_MM_BRK) { | ||
1810 | /* It must be existing VMA */ | ||
1811 | if (!vma || vma->vm_start > addr) | ||
1812 | goto out; | ||
1813 | } | ||
1814 | |||
1815 | error = -EINVAL; | ||
1816 | switch (opt) { | 1873 | switch (opt) { |
1817 | case PR_SET_MM_START_CODE: | 1874 | case PR_SET_MM_START_CODE: |
1875 | mm->start_code = addr; | ||
1876 | break; | ||
1818 | case PR_SET_MM_END_CODE: | 1877 | case PR_SET_MM_END_CODE: |
1819 | vm_req_flags = VM_READ | VM_EXEC; | 1878 | mm->end_code = addr; |
1820 | vm_bad_flags = VM_WRITE | VM_MAYSHARE; | ||
1821 | |||
1822 | if ((vma->vm_flags & vm_req_flags) != vm_req_flags || | ||
1823 | (vma->vm_flags & vm_bad_flags)) | ||
1824 | goto out; | ||
1825 | |||
1826 | if (opt == PR_SET_MM_START_CODE) | ||
1827 | mm->start_code = addr; | ||
1828 | else | ||
1829 | mm->end_code = addr; | ||
1830 | break; | 1879 | break; |
1831 | |||
1832 | case PR_SET_MM_START_DATA: | 1880 | case PR_SET_MM_START_DATA: |
1833 | case PR_SET_MM_END_DATA: | 1881 | mm->start_data = addr; |
1834 | vm_req_flags = VM_READ | VM_WRITE; | ||
1835 | vm_bad_flags = VM_EXEC | VM_MAYSHARE; | ||
1836 | |||
1837 | if ((vma->vm_flags & vm_req_flags) != vm_req_flags || | ||
1838 | (vma->vm_flags & vm_bad_flags)) | ||
1839 | goto out; | ||
1840 | |||
1841 | if (opt == PR_SET_MM_START_DATA) | ||
1842 | mm->start_data = addr; | ||
1843 | else | ||
1844 | mm->end_data = addr; | ||
1845 | break; | 1882 | break; |
1846 | 1883 | case PR_SET_MM_END_DATA: | |
1847 | case PR_SET_MM_START_STACK: | 1884 | mm->end_data = addr; |
1848 | |||
1849 | #ifdef CONFIG_STACK_GROWSUP | ||
1850 | vm_req_flags = VM_READ | VM_WRITE | VM_GROWSUP; | ||
1851 | #else | ||
1852 | vm_req_flags = VM_READ | VM_WRITE | VM_GROWSDOWN; | ||
1853 | #endif | ||
1854 | if ((vma->vm_flags & vm_req_flags) != vm_req_flags) | ||
1855 | goto out; | ||
1856 | |||
1857 | mm->start_stack = addr; | ||
1858 | break; | 1885 | break; |
1859 | 1886 | ||
1860 | case PR_SET_MM_START_BRK: | 1887 | case PR_SET_MM_START_BRK: |
@@ -1881,16 +1908,77 @@ static int prctl_set_mm(int opt, unsigned long addr, | |||
1881 | mm->brk = addr; | 1908 | mm->brk = addr; |
1882 | break; | 1909 | break; |
1883 | 1910 | ||
1911 | /* | ||
1912 | * If command line arguments and environment | ||
1913 | * are placed somewhere else on stack, we can | ||
1914 | * set them up here, ARG_START/END to setup | ||
1915 | * command line argumets and ENV_START/END | ||
1916 | * for environment. | ||
1917 | */ | ||
1918 | case PR_SET_MM_START_STACK: | ||
1919 | case PR_SET_MM_ARG_START: | ||
1920 | case PR_SET_MM_ARG_END: | ||
1921 | case PR_SET_MM_ENV_START: | ||
1922 | case PR_SET_MM_ENV_END: | ||
1923 | if (!vma) { | ||
1924 | error = -EFAULT; | ||
1925 | goto out; | ||
1926 | } | ||
1927 | #ifdef CONFIG_STACK_GROWSUP | ||
1928 | if (vma_flags_mismatch(vma, VM_READ | VM_WRITE | VM_GROWSUP, 0)) | ||
1929 | #else | ||
1930 | if (vma_flags_mismatch(vma, VM_READ | VM_WRITE | VM_GROWSDOWN, 0)) | ||
1931 | #endif | ||
1932 | goto out; | ||
1933 | if (opt == PR_SET_MM_START_STACK) | ||
1934 | mm->start_stack = addr; | ||
1935 | else if (opt == PR_SET_MM_ARG_START) | ||
1936 | mm->arg_start = addr; | ||
1937 | else if (opt == PR_SET_MM_ARG_END) | ||
1938 | mm->arg_end = addr; | ||
1939 | else if (opt == PR_SET_MM_ENV_START) | ||
1940 | mm->env_start = addr; | ||
1941 | else if (opt == PR_SET_MM_ENV_END) | ||
1942 | mm->env_end = addr; | ||
1943 | break; | ||
1944 | |||
1945 | /* | ||
1946 | * This doesn't move auxiliary vector itself | ||
1947 | * since it's pinned to mm_struct, but allow | ||
1948 | * to fill vector with new values. It's up | ||
1949 | * to a caller to provide sane values here | ||
1950 | * otherwise user space tools which use this | ||
1951 | * vector might be unhappy. | ||
1952 | */ | ||
1953 | case PR_SET_MM_AUXV: { | ||
1954 | unsigned long user_auxv[AT_VECTOR_SIZE]; | ||
1955 | |||
1956 | if (arg4 > sizeof(user_auxv)) | ||
1957 | goto out; | ||
1958 | up_read(&mm->mmap_sem); | ||
1959 | |||
1960 | if (copy_from_user(user_auxv, (const void __user *)addr, arg4)) | ||
1961 | return -EFAULT; | ||
1962 | |||
1963 | /* Make sure the last entry is always AT_NULL */ | ||
1964 | user_auxv[AT_VECTOR_SIZE - 2] = 0; | ||
1965 | user_auxv[AT_VECTOR_SIZE - 1] = 0; | ||
1966 | |||
1967 | BUILD_BUG_ON(sizeof(user_auxv) != sizeof(mm->saved_auxv)); | ||
1968 | |||
1969 | task_lock(current); | ||
1970 | memcpy(mm->saved_auxv, user_auxv, arg4); | ||
1971 | task_unlock(current); | ||
1972 | |||
1973 | return 0; | ||
1974 | } | ||
1884 | default: | 1975 | default: |
1885 | error = -EINVAL; | ||
1886 | goto out; | 1976 | goto out; |
1887 | } | 1977 | } |
1888 | 1978 | ||
1889 | error = 0; | 1979 | error = 0; |
1890 | |||
1891 | out: | 1980 | out: |
1892 | up_read(&mm->mmap_sem); | 1981 | up_read(&mm->mmap_sem); |
1893 | |||
1894 | return error; | 1982 | return error; |
1895 | } | 1983 | } |
1896 | #else /* CONFIG_CHECKPOINT_RESTORE */ | 1984 | #else /* CONFIG_CHECKPOINT_RESTORE */ |
@@ -2114,7 +2202,6 @@ int orderly_poweroff(bool force) | |||
2114 | NULL | 2202 | NULL |
2115 | }; | 2203 | }; |
2116 | int ret = -ENOMEM; | 2204 | int ret = -ENOMEM; |
2117 | struct subprocess_info *info; | ||
2118 | 2205 | ||
2119 | if (argv == NULL) { | 2206 | if (argv == NULL) { |
2120 | printk(KERN_WARNING "%s failed to allocate memory for \"%s\"\n", | 2207 | printk(KERN_WARNING "%s failed to allocate memory for \"%s\"\n", |
@@ -2122,18 +2209,16 @@ int orderly_poweroff(bool force) | |||
2122 | goto out; | 2209 | goto out; |
2123 | } | 2210 | } |
2124 | 2211 | ||
2125 | info = call_usermodehelper_setup(argv[0], argv, envp, GFP_ATOMIC); | 2212 | ret = call_usermodehelper_fns(argv[0], argv, envp, UMH_NO_WAIT, |
2126 | if (info == NULL) { | 2213 | NULL, argv_cleanup, NULL); |
2127 | argv_free(argv); | 2214 | out: |
2128 | goto out; | 2215 | if (likely(!ret)) |
2129 | } | 2216 | return 0; |
2130 | |||
2131 | call_usermodehelper_setfns(info, NULL, argv_cleanup, NULL); | ||
2132 | 2217 | ||
2133 | ret = call_usermodehelper_exec(info, UMH_NO_WAIT); | 2218 | if (ret == -ENOMEM) |
2219 | argv_free(argv); | ||
2134 | 2220 | ||
2135 | out: | 2221 | if (force) { |
2136 | if (ret && force) { | ||
2137 | printk(KERN_WARNING "Failed to start orderly shutdown: " | 2222 | printk(KERN_WARNING "Failed to start orderly shutdown: " |
2138 | "forcing the issue\n"); | 2223 | "forcing the issue\n"); |
2139 | 2224 | ||
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index 47bfa16430d7..dbff751e4086 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c | |||
@@ -203,3 +203,6 @@ cond_syscall(sys_fanotify_mark); | |||
203 | cond_syscall(sys_name_to_handle_at); | 203 | cond_syscall(sys_name_to_handle_at); |
204 | cond_syscall(sys_open_by_handle_at); | 204 | cond_syscall(sys_open_by_handle_at); |
205 | cond_syscall(compat_sys_open_by_handle_at); | 205 | cond_syscall(compat_sys_open_by_handle_at); |
206 | |||
207 | /* compare kernel pointers */ | ||
208 | cond_syscall(sys_kcmp); | ||
diff --git a/kernel/task_work.c b/kernel/task_work.c new file mode 100644 index 000000000000..82d1c794066d --- /dev/null +++ b/kernel/task_work.c | |||
@@ -0,0 +1,84 @@ | |||
1 | #include <linux/spinlock.h> | ||
2 | #include <linux/task_work.h> | ||
3 | #include <linux/tracehook.h> | ||
4 | |||
5 | int | ||
6 | task_work_add(struct task_struct *task, struct task_work *twork, bool notify) | ||
7 | { | ||
8 | unsigned long flags; | ||
9 | int err = -ESRCH; | ||
10 | |||
11 | #ifndef TIF_NOTIFY_RESUME | ||
12 | if (notify) | ||
13 | return -ENOTSUPP; | ||
14 | #endif | ||
15 | /* | ||
16 | * We must not insert the new work if the task has already passed | ||
17 | * exit_task_work(). We rely on do_exit()->raw_spin_unlock_wait() | ||
18 | * and check PF_EXITING under pi_lock. | ||
19 | */ | ||
20 | raw_spin_lock_irqsave(&task->pi_lock, flags); | ||
21 | if (likely(!(task->flags & PF_EXITING))) { | ||
22 | hlist_add_head(&twork->hlist, &task->task_works); | ||
23 | err = 0; | ||
24 | } | ||
25 | raw_spin_unlock_irqrestore(&task->pi_lock, flags); | ||
26 | |||
27 | /* test_and_set_bit() implies mb(), see tracehook_notify_resume(). */ | ||
28 | if (likely(!err) && notify) | ||
29 | set_notify_resume(task); | ||
30 | return err; | ||
31 | } | ||
32 | |||
33 | struct task_work * | ||
34 | task_work_cancel(struct task_struct *task, task_work_func_t func) | ||
35 | { | ||
36 | unsigned long flags; | ||
37 | struct task_work *twork; | ||
38 | struct hlist_node *pos; | ||
39 | |||
40 | raw_spin_lock_irqsave(&task->pi_lock, flags); | ||
41 | hlist_for_each_entry(twork, pos, &task->task_works, hlist) { | ||
42 | if (twork->func == func) { | ||
43 | hlist_del(&twork->hlist); | ||
44 | goto found; | ||
45 | } | ||
46 | } | ||
47 | twork = NULL; | ||
48 | found: | ||
49 | raw_spin_unlock_irqrestore(&task->pi_lock, flags); | ||
50 | |||
51 | return twork; | ||
52 | } | ||
53 | |||
54 | void task_work_run(void) | ||
55 | { | ||
56 | struct task_struct *task = current; | ||
57 | struct hlist_head task_works; | ||
58 | struct hlist_node *pos; | ||
59 | |||
60 | raw_spin_lock_irq(&task->pi_lock); | ||
61 | hlist_move_list(&task->task_works, &task_works); | ||
62 | raw_spin_unlock_irq(&task->pi_lock); | ||
63 | |||
64 | if (unlikely(hlist_empty(&task_works))) | ||
65 | return; | ||
66 | /* | ||
67 | * We use hlist to save the space in task_struct, but we want fifo. | ||
68 | * Find the last entry, the list should be short, then process them | ||
69 | * in reverse order. | ||
70 | */ | ||
71 | for (pos = task_works.first; pos->next; pos = pos->next) | ||
72 | ; | ||
73 | |||
74 | for (;;) { | ||
75 | struct hlist_node **pprev = pos->pprev; | ||
76 | struct task_work *twork = container_of(pos, struct task_work, | ||
77 | hlist); | ||
78 | twork->func(twork); | ||
79 | |||
80 | if (pprev == &task_works.first) | ||
81 | break; | ||
82 | pos = container_of(pprev, struct hlist_node, next); | ||
83 | } | ||
84 | } | ||
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 6420cda62336..1d0f6a8a0e5e 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c | |||
@@ -1486,6 +1486,11 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size, | |||
1486 | if (!buffer) | 1486 | if (!buffer) |
1487 | return size; | 1487 | return size; |
1488 | 1488 | ||
1489 | /* Make sure the requested buffer exists */ | ||
1490 | if (cpu_id != RING_BUFFER_ALL_CPUS && | ||
1491 | !cpumask_test_cpu(cpu_id, buffer->cpumask)) | ||
1492 | return size; | ||
1493 | |||
1489 | size = DIV_ROUND_UP(size, BUF_PAGE_SIZE); | 1494 | size = DIV_ROUND_UP(size, BUF_PAGE_SIZE); |
1490 | size *= BUF_PAGE_SIZE; | 1495 | size *= BUF_PAGE_SIZE; |
1491 | 1496 | ||