diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-05-31 21:10:18 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-05-31 21:10:18 -0400 |
commit | 08615d7d85e5aa02c05bf6c4dde87d940e7f85f6 (patch) | |
tree | 18906149d313d25914160aca21cedf54b3a7e818 /kernel | |
parent | 9fdadb2cbaf4b482dfd6086e8bd3d2db071a1702 (diff) | |
parent | 0a4dd35c67b144d8ef9432120105f1aab9293ee9 (diff) |
Merge branch 'akpm' (Andrew's patch-bomb)
Merge misc patches from Andrew Morton:
- the "misc" tree - stuff from all over the map
- checkpatch updates
- fatfs
- kmod changes
- procfs
- cpumask
- UML
- kexec
- mqueue
- rapidio
- pidns
- some checkpoint-restore feature work. Reluctantly. Most of it
delayed a release. I'm still rather worried that we don't have a
clear roadmap to completion for this work.
* emailed from Andrew Morton <akpm@linux-foundation.org>: (78 patches)
kconfig: update compression algorithm info
c/r: prctl: add ability to set new mm_struct::exe_file
c/r: prctl: extend PR_SET_MM to set up more mm_struct entries
c/r: procfs: add arg_start/end, env_start/end and exit_code members to /proc/$pid/stat
syscalls, x86: add __NR_kcmp syscall
fs, proc: introduce /proc/<pid>/task/<tid>/children entry
sysctl: make kernel.ns_last_pid control dependent on CHECKPOINT_RESTORE
aio/vfs: cleanup of rw_copy_check_uvector() and compat_rw_copy_check_uvector()
eventfd: change int to __u64 in eventfd_signal()
fs/nls: add Apple NLS
pidns: make killed children autoreap
pidns: use task_active_pid_ns in do_notify_parent
rapidio/tsi721: add DMA engine support
rapidio: add DMA engine support for RIO data transfers
ipc/mqueue: add rbtree node caching support
tools/selftests: add mq_perf_tests
ipc/mqueue: strengthen checks on mqueue creation
ipc/mqueue: correct mq_attr_ok test
ipc/mqueue: improve performance of send/recv
selftests: add mq_open_tests
...
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/Makefile | 3 | ||||
-rw-r--r-- | kernel/cpu.c | 44 | ||||
-rw-r--r-- | kernel/cpu_pm.c | 16 | ||||
-rw-r--r-- | kernel/exit.c | 8 | ||||
-rw-r--r-- | kernel/fork.c | 10 | ||||
-rw-r--r-- | kernel/irq/manage.c | 14 | ||||
-rw-r--r-- | kernel/kcmp.c | 196 | ||||
-rw-r--r-- | kernel/kmod.c | 30 | ||||
-rw-r--r-- | kernel/pid_namespace.c | 13 | ||||
-rw-r--r-- | kernel/resource.c | 4 | ||||
-rw-r--r-- | kernel/signal.c | 11 | ||||
-rw-r--r-- | kernel/sys.c | 213 | ||||
-rw-r--r-- | kernel/sys_ni.c | 3 |
13 files changed, 464 insertions, 101 deletions
diff --git a/kernel/Makefile b/kernel/Makefile index 6c07f30fa9b7..80be6ca0cc75 100644 --- a/kernel/Makefile +++ b/kernel/Makefile | |||
@@ -25,6 +25,9 @@ endif | |||
25 | obj-y += sched/ | 25 | obj-y += sched/ |
26 | obj-y += power/ | 26 | obj-y += power/ |
27 | 27 | ||
28 | ifeq ($(CONFIG_CHECKPOINT_RESTORE),y) | ||
29 | obj-$(CONFIG_X86) += kcmp.o | ||
30 | endif | ||
28 | obj-$(CONFIG_FREEZER) += freezer.o | 31 | obj-$(CONFIG_FREEZER) += freezer.o |
29 | obj-$(CONFIG_PROFILING) += profile.o | 32 | obj-$(CONFIG_PROFILING) += profile.o |
30 | obj-$(CONFIG_STACKTRACE) += stacktrace.o | 33 | obj-$(CONFIG_STACKTRACE) += stacktrace.o |
diff --git a/kernel/cpu.c b/kernel/cpu.c index 0e6353cf147a..a4eb5227a19e 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c | |||
@@ -10,7 +10,10 @@ | |||
10 | #include <linux/sched.h> | 10 | #include <linux/sched.h> |
11 | #include <linux/unistd.h> | 11 | #include <linux/unistd.h> |
12 | #include <linux/cpu.h> | 12 | #include <linux/cpu.h> |
13 | #include <linux/oom.h> | ||
14 | #include <linux/rcupdate.h> | ||
13 | #include <linux/export.h> | 15 | #include <linux/export.h> |
16 | #include <linux/bug.h> | ||
14 | #include <linux/kthread.h> | 17 | #include <linux/kthread.h> |
15 | #include <linux/stop_machine.h> | 18 | #include <linux/stop_machine.h> |
16 | #include <linux/mutex.h> | 19 | #include <linux/mutex.h> |
@@ -173,6 +176,47 @@ void __ref unregister_cpu_notifier(struct notifier_block *nb) | |||
173 | } | 176 | } |
174 | EXPORT_SYMBOL(unregister_cpu_notifier); | 177 | EXPORT_SYMBOL(unregister_cpu_notifier); |
175 | 178 | ||
179 | /** | ||
180 | * clear_tasks_mm_cpumask - Safely clear tasks' mm_cpumask for a CPU | ||
181 | * @cpu: a CPU id | ||
182 | * | ||
183 | * This function walks all processes, finds a valid mm struct for each one and | ||
184 | * then clears a corresponding bit in mm's cpumask. While this all sounds | ||
185 | * trivial, there are various non-obvious corner cases, which this function | ||
186 | * tries to solve in a safe manner. | ||
187 | * | ||
188 | * Also note that the function uses a somewhat relaxed locking scheme, so it may | ||
189 | * be called only for an already offlined CPU. | ||
190 | */ | ||
191 | void clear_tasks_mm_cpumask(int cpu) | ||
192 | { | ||
193 | struct task_struct *p; | ||
194 | |||
195 | /* | ||
196 | * This function is called after the cpu is taken down and marked | ||
197 | * offline, so its not like new tasks will ever get this cpu set in | ||
198 | * their mm mask. -- Peter Zijlstra | ||
199 | * Thus, we may use rcu_read_lock() here, instead of grabbing | ||
200 | * full-fledged tasklist_lock. | ||
201 | */ | ||
202 | WARN_ON(cpu_online(cpu)); | ||
203 | rcu_read_lock(); | ||
204 | for_each_process(p) { | ||
205 | struct task_struct *t; | ||
206 | |||
207 | /* | ||
208 | * Main thread might exit, but other threads may still have | ||
209 | * a valid mm. Find one. | ||
210 | */ | ||
211 | t = find_lock_task_mm(p); | ||
212 | if (!t) | ||
213 | continue; | ||
214 | cpumask_clear_cpu(cpu, mm_cpumask(t->mm)); | ||
215 | task_unlock(t); | ||
216 | } | ||
217 | rcu_read_unlock(); | ||
218 | } | ||
219 | |||
176 | static inline void check_for_tasks(int cpu) | 220 | static inline void check_for_tasks(int cpu) |
177 | { | 221 | { |
178 | struct task_struct *p; | 222 | struct task_struct *p; |
diff --git a/kernel/cpu_pm.c b/kernel/cpu_pm.c index 249152e15308..9656a3c36503 100644 --- a/kernel/cpu_pm.c +++ b/kernel/cpu_pm.c | |||
@@ -81,7 +81,7 @@ int cpu_pm_unregister_notifier(struct notifier_block *nb) | |||
81 | EXPORT_SYMBOL_GPL(cpu_pm_unregister_notifier); | 81 | EXPORT_SYMBOL_GPL(cpu_pm_unregister_notifier); |
82 | 82 | ||
83 | /** | 83 | /** |
84 | * cpm_pm_enter - CPU low power entry notifier | 84 | * cpu_pm_enter - CPU low power entry notifier |
85 | * | 85 | * |
86 | * Notifies listeners that a single CPU is entering a low power state that may | 86 | * Notifies listeners that a single CPU is entering a low power state that may |
87 | * cause some blocks in the same power domain as the cpu to reset. | 87 | * cause some blocks in the same power domain as the cpu to reset. |
@@ -89,7 +89,7 @@ EXPORT_SYMBOL_GPL(cpu_pm_unregister_notifier); | |||
89 | * Must be called on the affected CPU with interrupts disabled. Platform is | 89 | * Must be called on the affected CPU with interrupts disabled. Platform is |
90 | * responsible for ensuring that cpu_pm_enter is not called twice on the same | 90 | * responsible for ensuring that cpu_pm_enter is not called twice on the same |
91 | * CPU before cpu_pm_exit is called. Notified drivers can include VFP | 91 | * CPU before cpu_pm_exit is called. Notified drivers can include VFP |
92 | * co-processor, interrupt controller and it's PM extensions, local CPU | 92 | * co-processor, interrupt controller and its PM extensions, local CPU |
93 | * timers context save/restore which shouldn't be interrupted. Hence it | 93 | * timers context save/restore which shouldn't be interrupted. Hence it |
94 | * must be called with interrupts disabled. | 94 | * must be called with interrupts disabled. |
95 | * | 95 | * |
@@ -115,13 +115,13 @@ int cpu_pm_enter(void) | |||
115 | EXPORT_SYMBOL_GPL(cpu_pm_enter); | 115 | EXPORT_SYMBOL_GPL(cpu_pm_enter); |
116 | 116 | ||
117 | /** | 117 | /** |
118 | * cpm_pm_exit - CPU low power exit notifier | 118 | * cpu_pm_exit - CPU low power exit notifier |
119 | * | 119 | * |
120 | * Notifies listeners that a single CPU is exiting a low power state that may | 120 | * Notifies listeners that a single CPU is exiting a low power state that may |
121 | * have caused some blocks in the same power domain as the cpu to reset. | 121 | * have caused some blocks in the same power domain as the cpu to reset. |
122 | * | 122 | * |
123 | * Notified drivers can include VFP co-processor, interrupt controller | 123 | * Notified drivers can include VFP co-processor, interrupt controller |
124 | * and it's PM extensions, local CPU timers context save/restore which | 124 | * and its PM extensions, local CPU timers context save/restore which |
125 | * shouldn't be interrupted. Hence it must be called with interrupts disabled. | 125 | * shouldn't be interrupted. Hence it must be called with interrupts disabled. |
126 | * | 126 | * |
127 | * Return conditions are same as __raw_notifier_call_chain. | 127 | * Return conditions are same as __raw_notifier_call_chain. |
@@ -139,7 +139,7 @@ int cpu_pm_exit(void) | |||
139 | EXPORT_SYMBOL_GPL(cpu_pm_exit); | 139 | EXPORT_SYMBOL_GPL(cpu_pm_exit); |
140 | 140 | ||
141 | /** | 141 | /** |
142 | * cpm_cluster_pm_enter - CPU cluster low power entry notifier | 142 | * cpu_cluster_pm_enter - CPU cluster low power entry notifier |
143 | * | 143 | * |
144 | * Notifies listeners that all cpus in a power domain are entering a low power | 144 | * Notifies listeners that all cpus in a power domain are entering a low power |
145 | * state that may cause some blocks in the same power domain to reset. | 145 | * state that may cause some blocks in the same power domain to reset. |
@@ -147,7 +147,7 @@ EXPORT_SYMBOL_GPL(cpu_pm_exit); | |||
147 | * Must be called after cpu_pm_enter has been called on all cpus in the power | 147 | * Must be called after cpu_pm_enter has been called on all cpus in the power |
148 | * domain, and before cpu_pm_exit has been called on any cpu in the power | 148 | * domain, and before cpu_pm_exit has been called on any cpu in the power |
149 | * domain. Notified drivers can include VFP co-processor, interrupt controller | 149 | * domain. Notified drivers can include VFP co-processor, interrupt controller |
150 | * and it's PM extensions, local CPU timers context save/restore which | 150 | * and its PM extensions, local CPU timers context save/restore which |
151 | * shouldn't be interrupted. Hence it must be called with interrupts disabled. | 151 | * shouldn't be interrupted. Hence it must be called with interrupts disabled. |
152 | * | 152 | * |
153 | * Must be called with interrupts disabled. | 153 | * Must be called with interrupts disabled. |
@@ -174,7 +174,7 @@ int cpu_cluster_pm_enter(void) | |||
174 | EXPORT_SYMBOL_GPL(cpu_cluster_pm_enter); | 174 | EXPORT_SYMBOL_GPL(cpu_cluster_pm_enter); |
175 | 175 | ||
176 | /** | 176 | /** |
177 | * cpm_cluster_pm_exit - CPU cluster low power exit notifier | 177 | * cpu_cluster_pm_exit - CPU cluster low power exit notifier |
178 | * | 178 | * |
179 | * Notifies listeners that all cpus in a power domain are exiting form a | 179 | * Notifies listeners that all cpus in a power domain are exiting form a |
180 | * low power state that may have caused some blocks in the same power domain | 180 | * low power state that may have caused some blocks in the same power domain |
@@ -183,7 +183,7 @@ EXPORT_SYMBOL_GPL(cpu_cluster_pm_enter); | |||
183 | * Must be called after cpu_pm_exit has been called on all cpus in the power | 183 | * Must be called after cpu_pm_exit has been called on all cpus in the power |
184 | * domain, and before cpu_pm_exit has been called on any cpu in the power | 184 | * domain, and before cpu_pm_exit has been called on any cpu in the power |
185 | * domain. Notified drivers can include VFP co-processor, interrupt controller | 185 | * domain. Notified drivers can include VFP co-processor, interrupt controller |
186 | * and it's PM extensions, local CPU timers context save/restore which | 186 | * and its PM extensions, local CPU timers context save/restore which |
187 | * shouldn't be interrupted. Hence it must be called with interrupts disabled. | 187 | * shouldn't be interrupted. Hence it must be called with interrupts disabled. |
188 | * | 188 | * |
189 | * Return conditions are same as __raw_notifier_call_chain. | 189 | * Return conditions are same as __raw_notifier_call_chain. |
diff --git a/kernel/exit.c b/kernel/exit.c index 910a0716e17a..6d85655353e9 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
@@ -884,9 +884,9 @@ static void check_stack_usage(void) | |||
884 | 884 | ||
885 | spin_lock(&low_water_lock); | 885 | spin_lock(&low_water_lock); |
886 | if (free < lowest_to_date) { | 886 | if (free < lowest_to_date) { |
887 | printk(KERN_WARNING "%s used greatest stack depth: %lu bytes " | 887 | printk(KERN_WARNING "%s (%d) used greatest stack depth: " |
888 | "left\n", | 888 | "%lu bytes left\n", |
889 | current->comm, free); | 889 | current->comm, task_pid_nr(current), free); |
890 | lowest_to_date = free; | 890 | lowest_to_date = free; |
891 | } | 891 | } |
892 | spin_unlock(&low_water_lock); | 892 | spin_unlock(&low_water_lock); |
@@ -1214,7 +1214,7 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p) | |||
1214 | unsigned long state; | 1214 | unsigned long state; |
1215 | int retval, status, traced; | 1215 | int retval, status, traced; |
1216 | pid_t pid = task_pid_vnr(p); | 1216 | pid_t pid = task_pid_vnr(p); |
1217 | uid_t uid = from_kuid_munged(current_user_ns(), __task_cred(p)->uid); | 1217 | uid_t uid = from_kuid_munged(current_user_ns(), task_uid(p)); |
1218 | struct siginfo __user *infop; | 1218 | struct siginfo __user *infop; |
1219 | 1219 | ||
1220 | if (!likely(wo->wo_flags & WEXITED)) | 1220 | if (!likely(wo->wo_flags & WEXITED)) |
diff --git a/kernel/fork.c b/kernel/fork.c index 31a32c7dd169..c55b61ab6d64 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -787,9 +787,6 @@ void mm_release(struct task_struct *tsk, struct mm_struct *mm) | |||
787 | /* Get rid of any cached register state */ | 787 | /* Get rid of any cached register state */ |
788 | deactivate_mm(tsk, mm); | 788 | deactivate_mm(tsk, mm); |
789 | 789 | ||
790 | if (tsk->vfork_done) | ||
791 | complete_vfork_done(tsk); | ||
792 | |||
793 | /* | 790 | /* |
794 | * If we're exiting normally, clear a user-space tid field if | 791 | * If we're exiting normally, clear a user-space tid field if |
795 | * requested. We leave this alone when dying by signal, to leave | 792 | * requested. We leave this alone when dying by signal, to leave |
@@ -810,6 +807,13 @@ void mm_release(struct task_struct *tsk, struct mm_struct *mm) | |||
810 | } | 807 | } |
811 | tsk->clear_child_tid = NULL; | 808 | tsk->clear_child_tid = NULL; |
812 | } | 809 | } |
810 | |||
811 | /* | ||
812 | * All done, finally we can wake up parent and return this mm to him. | ||
813 | * Also kthread_stop() uses this completion for synchronization. | ||
814 | */ | ||
815 | if (tsk->vfork_done) | ||
816 | complete_vfork_done(tsk); | ||
813 | } | 817 | } |
814 | 818 | ||
815 | /* | 819 | /* |
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index bb32326afe87..7c475cd3f6e6 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c | |||
@@ -7,6 +7,8 @@ | |||
7 | * This file contains driver APIs to the irq subsystem. | 7 | * This file contains driver APIs to the irq subsystem. |
8 | */ | 8 | */ |
9 | 9 | ||
10 | #define pr_fmt(fmt) "genirq: " fmt | ||
11 | |||
10 | #include <linux/irq.h> | 12 | #include <linux/irq.h> |
11 | #include <linux/kthread.h> | 13 | #include <linux/kthread.h> |
12 | #include <linux/module.h> | 14 | #include <linux/module.h> |
@@ -565,7 +567,7 @@ int __irq_set_trigger(struct irq_desc *desc, unsigned int irq, | |||
565 | * IRQF_TRIGGER_* but the PIC does not support multiple | 567 | * IRQF_TRIGGER_* but the PIC does not support multiple |
566 | * flow-types? | 568 | * flow-types? |
567 | */ | 569 | */ |
568 | pr_debug("genirq: No set_type function for IRQ %d (%s)\n", irq, | 570 | pr_debug("No set_type function for IRQ %d (%s)\n", irq, |
569 | chip ? (chip->name ? : "unknown") : "unknown"); | 571 | chip ? (chip->name ? : "unknown") : "unknown"); |
570 | return 0; | 572 | return 0; |
571 | } | 573 | } |
@@ -600,7 +602,7 @@ int __irq_set_trigger(struct irq_desc *desc, unsigned int irq, | |||
600 | ret = 0; | 602 | ret = 0; |
601 | break; | 603 | break; |
602 | default: | 604 | default: |
603 | pr_err("genirq: Setting trigger mode %lu for irq %u failed (%pF)\n", | 605 | pr_err("Setting trigger mode %lu for irq %u failed (%pF)\n", |
604 | flags, irq, chip->irq_set_type); | 606 | flags, irq, chip->irq_set_type); |
605 | } | 607 | } |
606 | if (unmask) | 608 | if (unmask) |
@@ -837,7 +839,7 @@ void exit_irq_thread(void) | |||
837 | 839 | ||
838 | action = kthread_data(tsk); | 840 | action = kthread_data(tsk); |
839 | 841 | ||
840 | pr_err("genirq: exiting task \"%s\" (%d) is an active IRQ thread (irq %d)\n", | 842 | pr_err("exiting task \"%s\" (%d) is an active IRQ thread (irq %d)\n", |
841 | tsk->comm ? tsk->comm : "", tsk->pid, action->irq); | 843 | tsk->comm ? tsk->comm : "", tsk->pid, action->irq); |
842 | 844 | ||
843 | desc = irq_to_desc(action->irq); | 845 | desc = irq_to_desc(action->irq); |
@@ -1044,7 +1046,7 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) | |||
1044 | * has. The type flags are unreliable as the | 1046 | * has. The type flags are unreliable as the |
1045 | * underlying chip implementation can override them. | 1047 | * underlying chip implementation can override them. |
1046 | */ | 1048 | */ |
1047 | pr_err("genirq: Threaded irq requested with handler=NULL and !ONESHOT for irq %d\n", | 1049 | pr_err("Threaded irq requested with handler=NULL and !ONESHOT for irq %d\n", |
1048 | irq); | 1050 | irq); |
1049 | ret = -EINVAL; | 1051 | ret = -EINVAL; |
1050 | goto out_mask; | 1052 | goto out_mask; |
@@ -1095,7 +1097,7 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) | |||
1095 | 1097 | ||
1096 | if (nmsk != omsk) | 1098 | if (nmsk != omsk) |
1097 | /* hope the handler works with current trigger mode */ | 1099 | /* hope the handler works with current trigger mode */ |
1098 | pr_warning("genirq: irq %d uses trigger mode %u; requested %u\n", | 1100 | pr_warning("irq %d uses trigger mode %u; requested %u\n", |
1099 | irq, nmsk, omsk); | 1101 | irq, nmsk, omsk); |
1100 | } | 1102 | } |
1101 | 1103 | ||
@@ -1133,7 +1135,7 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) | |||
1133 | 1135 | ||
1134 | mismatch: | 1136 | mismatch: |
1135 | if (!(new->flags & IRQF_PROBE_SHARED)) { | 1137 | if (!(new->flags & IRQF_PROBE_SHARED)) { |
1136 | pr_err("genirq: Flags mismatch irq %d. %08x (%s) vs. %08x (%s)\n", | 1138 | pr_err("Flags mismatch irq %d. %08x (%s) vs. %08x (%s)\n", |
1137 | irq, new->flags, new->name, old->flags, old->name); | 1139 | irq, new->flags, new->name, old->flags, old->name); |
1138 | #ifdef CONFIG_DEBUG_SHIRQ | 1140 | #ifdef CONFIG_DEBUG_SHIRQ |
1139 | dump_stack(); | 1141 | dump_stack(); |
diff --git a/kernel/kcmp.c b/kernel/kcmp.c new file mode 100644 index 000000000000..30b7b225306c --- /dev/null +++ b/kernel/kcmp.c | |||
@@ -0,0 +1,196 @@ | |||
1 | #include <linux/kernel.h> | ||
2 | #include <linux/syscalls.h> | ||
3 | #include <linux/fdtable.h> | ||
4 | #include <linux/string.h> | ||
5 | #include <linux/random.h> | ||
6 | #include <linux/module.h> | ||
7 | #include <linux/init.h> | ||
8 | #include <linux/errno.h> | ||
9 | #include <linux/cache.h> | ||
10 | #include <linux/bug.h> | ||
11 | #include <linux/err.h> | ||
12 | #include <linux/kcmp.h> | ||
13 | |||
14 | #include <asm/unistd.h> | ||
15 | |||
16 | /* | ||
17 | * We don't expose the real in-memory order of objects for security reasons. | ||
18 | * But still the comparison results should be suitable for sorting. So we | ||
19 | * obfuscate kernel pointers values and compare the production instead. | ||
20 | * | ||
21 | * The obfuscation is done in two steps. First we xor the kernel pointer with | ||
22 | * a random value, which puts pointer into a new position in a reordered space. | ||
23 | * Secondly we multiply the xor production with a large odd random number to | ||
24 | * permute its bits even more (the odd multiplier guarantees that the product | ||
25 | * is unique ever after the high bits are truncated, since any odd number is | ||
26 | * relative prime to 2^n). | ||
27 | * | ||
28 | * Note also that the obfuscation itself is invisible to userspace and if needed | ||
29 | * it can be changed to an alternate scheme. | ||
30 | */ | ||
31 | static unsigned long cookies[KCMP_TYPES][2] __read_mostly; | ||
32 | |||
33 | static long kptr_obfuscate(long v, int type) | ||
34 | { | ||
35 | return (v ^ cookies[type][0]) * cookies[type][1]; | ||
36 | } | ||
37 | |||
38 | /* | ||
39 | * 0 - equal, i.e. v1 = v2 | ||
40 | * 1 - less than, i.e. v1 < v2 | ||
41 | * 2 - greater than, i.e. v1 > v2 | ||
42 | * 3 - not equal but ordering unavailable (reserved for future) | ||
43 | */ | ||
44 | static int kcmp_ptr(void *v1, void *v2, enum kcmp_type type) | ||
45 | { | ||
46 | long ret; | ||
47 | |||
48 | ret = kptr_obfuscate((long)v1, type) - kptr_obfuscate((long)v2, type); | ||
49 | |||
50 | return (ret < 0) | ((ret > 0) << 1); | ||
51 | } | ||
52 | |||
53 | /* The caller must have pinned the task */ | ||
54 | static struct file * | ||
55 | get_file_raw_ptr(struct task_struct *task, unsigned int idx) | ||
56 | { | ||
57 | struct file *file = NULL; | ||
58 | |||
59 | task_lock(task); | ||
60 | rcu_read_lock(); | ||
61 | |||
62 | if (task->files) | ||
63 | file = fcheck_files(task->files, idx); | ||
64 | |||
65 | rcu_read_unlock(); | ||
66 | task_unlock(task); | ||
67 | |||
68 | return file; | ||
69 | } | ||
70 | |||
71 | static void kcmp_unlock(struct mutex *m1, struct mutex *m2) | ||
72 | { | ||
73 | if (likely(m2 != m1)) | ||
74 | mutex_unlock(m2); | ||
75 | mutex_unlock(m1); | ||
76 | } | ||
77 | |||
78 | static int kcmp_lock(struct mutex *m1, struct mutex *m2) | ||
79 | { | ||
80 | int err; | ||
81 | |||
82 | if (m2 > m1) | ||
83 | swap(m1, m2); | ||
84 | |||
85 | err = mutex_lock_killable(m1); | ||
86 | if (!err && likely(m1 != m2)) { | ||
87 | err = mutex_lock_killable_nested(m2, SINGLE_DEPTH_NESTING); | ||
88 | if (err) | ||
89 | mutex_unlock(m1); | ||
90 | } | ||
91 | |||
92 | return err; | ||
93 | } | ||
94 | |||
95 | SYSCALL_DEFINE5(kcmp, pid_t, pid1, pid_t, pid2, int, type, | ||
96 | unsigned long, idx1, unsigned long, idx2) | ||
97 | { | ||
98 | struct task_struct *task1, *task2; | ||
99 | int ret; | ||
100 | |||
101 | rcu_read_lock(); | ||
102 | |||
103 | /* | ||
104 | * Tasks are looked up in caller's PID namespace only. | ||
105 | */ | ||
106 | task1 = find_task_by_vpid(pid1); | ||
107 | task2 = find_task_by_vpid(pid2); | ||
108 | if (!task1 || !task2) | ||
109 | goto err_no_task; | ||
110 | |||
111 | get_task_struct(task1); | ||
112 | get_task_struct(task2); | ||
113 | |||
114 | rcu_read_unlock(); | ||
115 | |||
116 | /* | ||
117 | * One should have enough rights to inspect task details. | ||
118 | */ | ||
119 | ret = kcmp_lock(&task1->signal->cred_guard_mutex, | ||
120 | &task2->signal->cred_guard_mutex); | ||
121 | if (ret) | ||
122 | goto err; | ||
123 | if (!ptrace_may_access(task1, PTRACE_MODE_READ) || | ||
124 | !ptrace_may_access(task2, PTRACE_MODE_READ)) { | ||
125 | ret = -EPERM; | ||
126 | goto err_unlock; | ||
127 | } | ||
128 | |||
129 | switch (type) { | ||
130 | case KCMP_FILE: { | ||
131 | struct file *filp1, *filp2; | ||
132 | |||
133 | filp1 = get_file_raw_ptr(task1, idx1); | ||
134 | filp2 = get_file_raw_ptr(task2, idx2); | ||
135 | |||
136 | if (filp1 && filp2) | ||
137 | ret = kcmp_ptr(filp1, filp2, KCMP_FILE); | ||
138 | else | ||
139 | ret = -EBADF; | ||
140 | break; | ||
141 | } | ||
142 | case KCMP_VM: | ||
143 | ret = kcmp_ptr(task1->mm, task2->mm, KCMP_VM); | ||
144 | break; | ||
145 | case KCMP_FILES: | ||
146 | ret = kcmp_ptr(task1->files, task2->files, KCMP_FILES); | ||
147 | break; | ||
148 | case KCMP_FS: | ||
149 | ret = kcmp_ptr(task1->fs, task2->fs, KCMP_FS); | ||
150 | break; | ||
151 | case KCMP_SIGHAND: | ||
152 | ret = kcmp_ptr(task1->sighand, task2->sighand, KCMP_SIGHAND); | ||
153 | break; | ||
154 | case KCMP_IO: | ||
155 | ret = kcmp_ptr(task1->io_context, task2->io_context, KCMP_IO); | ||
156 | break; | ||
157 | case KCMP_SYSVSEM: | ||
158 | #ifdef CONFIG_SYSVIPC | ||
159 | ret = kcmp_ptr(task1->sysvsem.undo_list, | ||
160 | task2->sysvsem.undo_list, | ||
161 | KCMP_SYSVSEM); | ||
162 | #else | ||
163 | ret = -EOPNOTSUPP; | ||
164 | #endif | ||
165 | break; | ||
166 | default: | ||
167 | ret = -EINVAL; | ||
168 | break; | ||
169 | } | ||
170 | |||
171 | err_unlock: | ||
172 | kcmp_unlock(&task1->signal->cred_guard_mutex, | ||
173 | &task2->signal->cred_guard_mutex); | ||
174 | err: | ||
175 | put_task_struct(task1); | ||
176 | put_task_struct(task2); | ||
177 | |||
178 | return ret; | ||
179 | |||
180 | err_no_task: | ||
181 | rcu_read_unlock(); | ||
182 | return -ESRCH; | ||
183 | } | ||
184 | |||
185 | static __init int kcmp_cookies_init(void) | ||
186 | { | ||
187 | int i; | ||
188 | |||
189 | get_random_bytes(cookies, sizeof(cookies)); | ||
190 | |||
191 | for (i = 0; i < KCMP_TYPES; i++) | ||
192 | cookies[i][1] |= (~(~0UL >> 1) | 1); | ||
193 | |||
194 | return 0; | ||
195 | } | ||
196 | arch_initcall(kcmp_cookies_init); | ||
diff --git a/kernel/kmod.c b/kernel/kmod.c index 05698a7415fe..ff2c7cb86d77 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c | |||
@@ -221,13 +221,12 @@ fail: | |||
221 | return 0; | 221 | return 0; |
222 | } | 222 | } |
223 | 223 | ||
224 | void call_usermodehelper_freeinfo(struct subprocess_info *info) | 224 | static void call_usermodehelper_freeinfo(struct subprocess_info *info) |
225 | { | 225 | { |
226 | if (info->cleanup) | 226 | if (info->cleanup) |
227 | (*info->cleanup)(info); | 227 | (*info->cleanup)(info); |
228 | kfree(info); | 228 | kfree(info); |
229 | } | 229 | } |
230 | EXPORT_SYMBOL(call_usermodehelper_freeinfo); | ||
231 | 230 | ||
232 | static void umh_complete(struct subprocess_info *sub_info) | 231 | static void umh_complete(struct subprocess_info *sub_info) |
233 | { | 232 | { |
@@ -410,7 +409,7 @@ EXPORT_SYMBOL_GPL(usermodehelper_read_unlock); | |||
410 | 409 | ||
411 | /** | 410 | /** |
412 | * __usermodehelper_set_disable_depth - Modify usermodehelper_disabled. | 411 | * __usermodehelper_set_disable_depth - Modify usermodehelper_disabled. |
413 | * depth: New value to assign to usermodehelper_disabled. | 412 | * @depth: New value to assign to usermodehelper_disabled. |
414 | * | 413 | * |
415 | * Change the value of usermodehelper_disabled (under umhelper_sem locked for | 414 | * Change the value of usermodehelper_disabled (under umhelper_sem locked for |
416 | * writing) and wakeup tasks waiting for it to change. | 415 | * writing) and wakeup tasks waiting for it to change. |
@@ -479,6 +478,7 @@ static void helper_unlock(void) | |||
479 | * structure. This should be passed to call_usermodehelper_exec to | 478 | * structure. This should be passed to call_usermodehelper_exec to |
480 | * exec the process and free the structure. | 479 | * exec the process and free the structure. |
481 | */ | 480 | */ |
481 | static | ||
482 | struct subprocess_info *call_usermodehelper_setup(char *path, char **argv, | 482 | struct subprocess_info *call_usermodehelper_setup(char *path, char **argv, |
483 | char **envp, gfp_t gfp_mask) | 483 | char **envp, gfp_t gfp_mask) |
484 | { | 484 | { |
@@ -494,7 +494,6 @@ struct subprocess_info *call_usermodehelper_setup(char *path, char **argv, | |||
494 | out: | 494 | out: |
495 | return sub_info; | 495 | return sub_info; |
496 | } | 496 | } |
497 | EXPORT_SYMBOL(call_usermodehelper_setup); | ||
498 | 497 | ||
499 | /** | 498 | /** |
500 | * call_usermodehelper_setfns - set a cleanup/init function | 499 | * call_usermodehelper_setfns - set a cleanup/init function |
@@ -512,6 +511,7 @@ EXPORT_SYMBOL(call_usermodehelper_setup); | |||
512 | * Function must be runnable in either a process context or the | 511 | * Function must be runnable in either a process context or the |
513 | * context in which call_usermodehelper_exec is called. | 512 | * context in which call_usermodehelper_exec is called. |
514 | */ | 513 | */ |
514 | static | ||
515 | void call_usermodehelper_setfns(struct subprocess_info *info, | 515 | void call_usermodehelper_setfns(struct subprocess_info *info, |
516 | int (*init)(struct subprocess_info *info, struct cred *new), | 516 | int (*init)(struct subprocess_info *info, struct cred *new), |
517 | void (*cleanup)(struct subprocess_info *info), | 517 | void (*cleanup)(struct subprocess_info *info), |
@@ -521,7 +521,6 @@ void call_usermodehelper_setfns(struct subprocess_info *info, | |||
521 | info->init = init; | 521 | info->init = init; |
522 | info->data = data; | 522 | info->data = data; |
523 | } | 523 | } |
524 | EXPORT_SYMBOL(call_usermodehelper_setfns); | ||
525 | 524 | ||
526 | /** | 525 | /** |
527 | * call_usermodehelper_exec - start a usermode application | 526 | * call_usermodehelper_exec - start a usermode application |
@@ -535,6 +534,7 @@ EXPORT_SYMBOL(call_usermodehelper_setfns); | |||
535 | * asynchronously if wait is not set, and runs as a child of keventd. | 534 | * asynchronously if wait is not set, and runs as a child of keventd. |
536 | * (ie. it runs with full root capabilities). | 535 | * (ie. it runs with full root capabilities). |
537 | */ | 536 | */ |
537 | static | ||
538 | int call_usermodehelper_exec(struct subprocess_info *sub_info, int wait) | 538 | int call_usermodehelper_exec(struct subprocess_info *sub_info, int wait) |
539 | { | 539 | { |
540 | DECLARE_COMPLETION_ONSTACK(done); | 540 | DECLARE_COMPLETION_ONSTACK(done); |
@@ -576,7 +576,25 @@ unlock: | |||
576 | helper_unlock(); | 576 | helper_unlock(); |
577 | return retval; | 577 | return retval; |
578 | } | 578 | } |
579 | EXPORT_SYMBOL(call_usermodehelper_exec); | 579 | |
580 | int call_usermodehelper_fns( | ||
581 | char *path, char **argv, char **envp, int wait, | ||
582 | int (*init)(struct subprocess_info *info, struct cred *new), | ||
583 | void (*cleanup)(struct subprocess_info *), void *data) | ||
584 | { | ||
585 | struct subprocess_info *info; | ||
586 | gfp_t gfp_mask = (wait == UMH_NO_WAIT) ? GFP_ATOMIC : GFP_KERNEL; | ||
587 | |||
588 | info = call_usermodehelper_setup(path, argv, envp, gfp_mask); | ||
589 | |||
590 | if (info == NULL) | ||
591 | return -ENOMEM; | ||
592 | |||
593 | call_usermodehelper_setfns(info, init, cleanup, data); | ||
594 | |||
595 | return call_usermodehelper_exec(info, wait); | ||
596 | } | ||
597 | EXPORT_SYMBOL(call_usermodehelper_fns); | ||
580 | 598 | ||
581 | static int proc_cap_handler(struct ctl_table *table, int write, | 599 | static int proc_cap_handler(struct ctl_table *table, int write, |
582 | void __user *buffer, size_t *lenp, loff_t *ppos) | 600 | void __user *buffer, size_t *lenp, loff_t *ppos) |
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c index 57bc1fd35b3c..16b20e38c4a1 100644 --- a/kernel/pid_namespace.c +++ b/kernel/pid_namespace.c | |||
@@ -149,7 +149,12 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns) | |||
149 | { | 149 | { |
150 | int nr; | 150 | int nr; |
151 | int rc; | 151 | int rc; |
152 | struct task_struct *task; | 152 | struct task_struct *task, *me = current; |
153 | |||
154 | /* Ignore SIGCHLD causing any terminated children to autoreap */ | ||
155 | spin_lock_irq(&me->sighand->siglock); | ||
156 | me->sighand->action[SIGCHLD - 1].sa.sa_handler = SIG_IGN; | ||
157 | spin_unlock_irq(&me->sighand->siglock); | ||
153 | 158 | ||
154 | /* | 159 | /* |
155 | * The last thread in the cgroup-init thread group is terminating. | 160 | * The last thread in the cgroup-init thread group is terminating. |
@@ -191,6 +196,7 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns) | |||
191 | return; | 196 | return; |
192 | } | 197 | } |
193 | 198 | ||
199 | #ifdef CONFIG_CHECKPOINT_RESTORE | ||
194 | static int pid_ns_ctl_handler(struct ctl_table *table, int write, | 200 | static int pid_ns_ctl_handler(struct ctl_table *table, int write, |
195 | void __user *buffer, size_t *lenp, loff_t *ppos) | 201 | void __user *buffer, size_t *lenp, loff_t *ppos) |
196 | { | 202 | { |
@@ -218,8 +224,8 @@ static struct ctl_table pid_ns_ctl_table[] = { | |||
218 | }, | 224 | }, |
219 | { } | 225 | { } |
220 | }; | 226 | }; |
221 | |||
222 | static struct ctl_path kern_path[] = { { .procname = "kernel", }, { } }; | 227 | static struct ctl_path kern_path[] = { { .procname = "kernel", }, { } }; |
228 | #endif /* CONFIG_CHECKPOINT_RESTORE */ | ||
223 | 229 | ||
224 | int reboot_pid_ns(struct pid_namespace *pid_ns, int cmd) | 230 | int reboot_pid_ns(struct pid_namespace *pid_ns, int cmd) |
225 | { | 231 | { |
@@ -253,7 +259,10 @@ int reboot_pid_ns(struct pid_namespace *pid_ns, int cmd) | |||
253 | static __init int pid_namespaces_init(void) | 259 | static __init int pid_namespaces_init(void) |
254 | { | 260 | { |
255 | pid_ns_cachep = KMEM_CACHE(pid_namespace, SLAB_PANIC); | 261 | pid_ns_cachep = KMEM_CACHE(pid_namespace, SLAB_PANIC); |
262 | |||
263 | #ifdef CONFIG_CHECKPOINT_RESTORE | ||
256 | register_sysctl_paths(kern_path, pid_ns_ctl_table); | 264 | register_sysctl_paths(kern_path, pid_ns_ctl_table); |
265 | #endif | ||
257 | return 0; | 266 | return 0; |
258 | } | 267 | } |
259 | 268 | ||
diff --git a/kernel/resource.c b/kernel/resource.c index 7e8ea66a8c01..e1d2b8ee76d5 100644 --- a/kernel/resource.c +++ b/kernel/resource.c | |||
@@ -515,8 +515,8 @@ out: | |||
515 | * @root: root resource descriptor | 515 | * @root: root resource descriptor |
516 | * @new: resource descriptor desired by caller | 516 | * @new: resource descriptor desired by caller |
517 | * @size: requested resource region size | 517 | * @size: requested resource region size |
518 | * @min: minimum size to allocate | 518 | * @min: minimum boundary to allocate |
519 | * @max: maximum size to allocate | 519 | * @max: maximum boundary to allocate |
520 | * @align: alignment requested, in bytes | 520 | * @align: alignment requested, in bytes |
521 | * @alignf: alignment function, optional, called if not NULL | 521 | * @alignf: alignment function, optional, called if not NULL |
522 | * @alignf_data: arbitrary data to pass to the @alignf function | 522 | * @alignf_data: arbitrary data to pass to the @alignf function |
diff --git a/kernel/signal.c b/kernel/signal.c index f7b418217633..08dfbd748cd2 100644 --- a/kernel/signal.c +++ b/kernel/signal.c | |||
@@ -1656,19 +1656,18 @@ bool do_notify_parent(struct task_struct *tsk, int sig) | |||
1656 | info.si_signo = sig; | 1656 | info.si_signo = sig; |
1657 | info.si_errno = 0; | 1657 | info.si_errno = 0; |
1658 | /* | 1658 | /* |
1659 | * we are under tasklist_lock here so our parent is tied to | 1659 | * We are under tasklist_lock here so our parent is tied to |
1660 | * us and cannot exit and release its namespace. | 1660 | * us and cannot change. |
1661 | * | 1661 | * |
1662 | * the only it can is to switch its nsproxy with sys_unshare, | 1662 | * task_active_pid_ns will always return the same pid namespace |
1663 | * bu uncharing pid namespaces is not allowed, so we'll always | 1663 | * until a task passes through release_task. |
1664 | * see relevant namespace | ||
1665 | * | 1664 | * |
1666 | * write_lock() currently calls preempt_disable() which is the | 1665 | * write_lock() currently calls preempt_disable() which is the |
1667 | * same as rcu_read_lock(), but according to Oleg, this is not | 1666 | * same as rcu_read_lock(), but according to Oleg, this is not |
1668 | * correct to rely on this | 1667 | * correct to rely on this |
1669 | */ | 1668 | */ |
1670 | rcu_read_lock(); | 1669 | rcu_read_lock(); |
1671 | info.si_pid = task_pid_nr_ns(tsk, tsk->parent->nsproxy->pid_ns); | 1670 | info.si_pid = task_pid_nr_ns(tsk, task_active_pid_ns(tsk->parent)); |
1672 | info.si_uid = from_kuid_munged(task_cred_xxx(tsk->parent, user_ns), | 1671 | info.si_uid = from_kuid_munged(task_cred_xxx(tsk->parent, user_ns), |
1673 | task_uid(tsk)); | 1672 | task_uid(tsk)); |
1674 | rcu_read_unlock(); | 1673 | rcu_read_unlock(); |
diff --git a/kernel/sys.c b/kernel/sys.c index 6df42624e454..9ff89cb9657a 100644 --- a/kernel/sys.c +++ b/kernel/sys.c | |||
@@ -36,6 +36,8 @@ | |||
36 | #include <linux/personality.h> | 36 | #include <linux/personality.h> |
37 | #include <linux/ptrace.h> | 37 | #include <linux/ptrace.h> |
38 | #include <linux/fs_struct.h> | 38 | #include <linux/fs_struct.h> |
39 | #include <linux/file.h> | ||
40 | #include <linux/mount.h> | ||
39 | #include <linux/gfp.h> | 41 | #include <linux/gfp.h> |
40 | #include <linux/syscore_ops.h> | 42 | #include <linux/syscore_ops.h> |
41 | #include <linux/version.h> | 43 | #include <linux/version.h> |
@@ -1378,8 +1380,8 @@ SYSCALL_DEFINE2(sethostname, char __user *, name, int, len) | |||
1378 | memcpy(u->nodename, tmp, len); | 1380 | memcpy(u->nodename, tmp, len); |
1379 | memset(u->nodename + len, 0, sizeof(u->nodename) - len); | 1381 | memset(u->nodename + len, 0, sizeof(u->nodename) - len); |
1380 | errno = 0; | 1382 | errno = 0; |
1383 | uts_proc_notify(UTS_PROC_HOSTNAME); | ||
1381 | } | 1384 | } |
1382 | uts_proc_notify(UTS_PROC_HOSTNAME); | ||
1383 | up_write(&uts_sem); | 1385 | up_write(&uts_sem); |
1384 | return errno; | 1386 | return errno; |
1385 | } | 1387 | } |
@@ -1429,8 +1431,8 @@ SYSCALL_DEFINE2(setdomainname, char __user *, name, int, len) | |||
1429 | memcpy(u->domainname, tmp, len); | 1431 | memcpy(u->domainname, tmp, len); |
1430 | memset(u->domainname + len, 0, sizeof(u->domainname) - len); | 1432 | memset(u->domainname + len, 0, sizeof(u->domainname) - len); |
1431 | errno = 0; | 1433 | errno = 0; |
1434 | uts_proc_notify(UTS_PROC_DOMAINNAME); | ||
1432 | } | 1435 | } |
1433 | uts_proc_notify(UTS_PROC_DOMAINNAME); | ||
1434 | up_write(&uts_sem); | 1436 | up_write(&uts_sem); |
1435 | return errno; | 1437 | return errno; |
1436 | } | 1438 | } |
@@ -1784,77 +1786,102 @@ SYSCALL_DEFINE1(umask, int, mask) | |||
1784 | } | 1786 | } |
1785 | 1787 | ||
1786 | #ifdef CONFIG_CHECKPOINT_RESTORE | 1788 | #ifdef CONFIG_CHECKPOINT_RESTORE |
1789 | static bool vma_flags_mismatch(struct vm_area_struct *vma, | ||
1790 | unsigned long required, | ||
1791 | unsigned long banned) | ||
1792 | { | ||
1793 | return (vma->vm_flags & required) != required || | ||
1794 | (vma->vm_flags & banned); | ||
1795 | } | ||
1796 | |||
1797 | static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd) | ||
1798 | { | ||
1799 | struct file *exe_file; | ||
1800 | struct dentry *dentry; | ||
1801 | int err; | ||
1802 | |||
1803 | /* | ||
1804 | * Setting new mm::exe_file is only allowed when no VM_EXECUTABLE vma's | ||
1805 | * remain. So perform a quick test first. | ||
1806 | */ | ||
1807 | if (mm->num_exe_file_vmas) | ||
1808 | return -EBUSY; | ||
1809 | |||
1810 | exe_file = fget(fd); | ||
1811 | if (!exe_file) | ||
1812 | return -EBADF; | ||
1813 | |||
1814 | dentry = exe_file->f_path.dentry; | ||
1815 | |||
1816 | /* | ||
1817 | * Because the original mm->exe_file points to executable file, make | ||
1818 | * sure that this one is executable as well, to avoid breaking an | ||
1819 | * overall picture. | ||
1820 | */ | ||
1821 | err = -EACCES; | ||
1822 | if (!S_ISREG(dentry->d_inode->i_mode) || | ||
1823 | exe_file->f_path.mnt->mnt_flags & MNT_NOEXEC) | ||
1824 | goto exit; | ||
1825 | |||
1826 | err = inode_permission(dentry->d_inode, MAY_EXEC); | ||
1827 | if (err) | ||
1828 | goto exit; | ||
1829 | |||
1830 | /* | ||
1831 | * The symlink can be changed only once, just to disallow arbitrary | ||
1832 | * transitions malicious software might bring in. This means one | ||
1833 | * could make a snapshot over all processes running and monitor | ||
1834 | * /proc/pid/exe changes to notice unusual activity if needed. | ||
1835 | */ | ||
1836 | down_write(&mm->mmap_sem); | ||
1837 | if (likely(!mm->exe_file)) | ||
1838 | set_mm_exe_file(mm, exe_file); | ||
1839 | else | ||
1840 | err = -EBUSY; | ||
1841 | up_write(&mm->mmap_sem); | ||
1842 | |||
1843 | exit: | ||
1844 | fput(exe_file); | ||
1845 | return err; | ||
1846 | } | ||
1847 | |||
1787 | static int prctl_set_mm(int opt, unsigned long addr, | 1848 | static int prctl_set_mm(int opt, unsigned long addr, |
1788 | unsigned long arg4, unsigned long arg5) | 1849 | unsigned long arg4, unsigned long arg5) |
1789 | { | 1850 | { |
1790 | unsigned long rlim = rlimit(RLIMIT_DATA); | 1851 | unsigned long rlim = rlimit(RLIMIT_DATA); |
1791 | unsigned long vm_req_flags; | ||
1792 | unsigned long vm_bad_flags; | ||
1793 | struct vm_area_struct *vma; | ||
1794 | int error = 0; | ||
1795 | struct mm_struct *mm = current->mm; | 1852 | struct mm_struct *mm = current->mm; |
1853 | struct vm_area_struct *vma; | ||
1854 | int error; | ||
1796 | 1855 | ||
1797 | if (arg4 | arg5) | 1856 | if (arg5 || (arg4 && opt != PR_SET_MM_AUXV)) |
1798 | return -EINVAL; | 1857 | return -EINVAL; |
1799 | 1858 | ||
1800 | if (!capable(CAP_SYS_RESOURCE)) | 1859 | if (!capable(CAP_SYS_RESOURCE)) |
1801 | return -EPERM; | 1860 | return -EPERM; |
1802 | 1861 | ||
1862 | if (opt == PR_SET_MM_EXE_FILE) | ||
1863 | return prctl_set_mm_exe_file(mm, (unsigned int)addr); | ||
1864 | |||
1803 | if (addr >= TASK_SIZE) | 1865 | if (addr >= TASK_SIZE) |
1804 | return -EINVAL; | 1866 | return -EINVAL; |
1805 | 1867 | ||
1868 | error = -EINVAL; | ||
1869 | |||
1806 | down_read(&mm->mmap_sem); | 1870 | down_read(&mm->mmap_sem); |
1807 | vma = find_vma(mm, addr); | 1871 | vma = find_vma(mm, addr); |
1808 | 1872 | ||
1809 | if (opt != PR_SET_MM_START_BRK && opt != PR_SET_MM_BRK) { | ||
1810 | /* It must be existing VMA */ | ||
1811 | if (!vma || vma->vm_start > addr) | ||
1812 | goto out; | ||
1813 | } | ||
1814 | |||
1815 | error = -EINVAL; | ||
1816 | switch (opt) { | 1873 | switch (opt) { |
1817 | case PR_SET_MM_START_CODE: | 1874 | case PR_SET_MM_START_CODE: |
1875 | mm->start_code = addr; | ||
1876 | break; | ||
1818 | case PR_SET_MM_END_CODE: | 1877 | case PR_SET_MM_END_CODE: |
1819 | vm_req_flags = VM_READ | VM_EXEC; | 1878 | mm->end_code = addr; |
1820 | vm_bad_flags = VM_WRITE | VM_MAYSHARE; | ||
1821 | |||
1822 | if ((vma->vm_flags & vm_req_flags) != vm_req_flags || | ||
1823 | (vma->vm_flags & vm_bad_flags)) | ||
1824 | goto out; | ||
1825 | |||
1826 | if (opt == PR_SET_MM_START_CODE) | ||
1827 | mm->start_code = addr; | ||
1828 | else | ||
1829 | mm->end_code = addr; | ||
1830 | break; | 1879 | break; |
1831 | |||
1832 | case PR_SET_MM_START_DATA: | 1880 | case PR_SET_MM_START_DATA: |
1833 | case PR_SET_MM_END_DATA: | 1881 | mm->start_data = addr; |
1834 | vm_req_flags = VM_READ | VM_WRITE; | ||
1835 | vm_bad_flags = VM_EXEC | VM_MAYSHARE; | ||
1836 | |||
1837 | if ((vma->vm_flags & vm_req_flags) != vm_req_flags || | ||
1838 | (vma->vm_flags & vm_bad_flags)) | ||
1839 | goto out; | ||
1840 | |||
1841 | if (opt == PR_SET_MM_START_DATA) | ||
1842 | mm->start_data = addr; | ||
1843 | else | ||
1844 | mm->end_data = addr; | ||
1845 | break; | 1882 | break; |
1846 | 1883 | case PR_SET_MM_END_DATA: | |
1847 | case PR_SET_MM_START_STACK: | 1884 | mm->end_data = addr; |
1848 | |||
1849 | #ifdef CONFIG_STACK_GROWSUP | ||
1850 | vm_req_flags = VM_READ | VM_WRITE | VM_GROWSUP; | ||
1851 | #else | ||
1852 | vm_req_flags = VM_READ | VM_WRITE | VM_GROWSDOWN; | ||
1853 | #endif | ||
1854 | if ((vma->vm_flags & vm_req_flags) != vm_req_flags) | ||
1855 | goto out; | ||
1856 | |||
1857 | mm->start_stack = addr; | ||
1858 | break; | 1885 | break; |
1859 | 1886 | ||
1860 | case PR_SET_MM_START_BRK: | 1887 | case PR_SET_MM_START_BRK: |
@@ -1881,16 +1908,77 @@ static int prctl_set_mm(int opt, unsigned long addr, | |||
1881 | mm->brk = addr; | 1908 | mm->brk = addr; |
1882 | break; | 1909 | break; |
1883 | 1910 | ||
1911 | /* | ||
1912 | * If command line arguments and environment | ||
1913 | * are placed somewhere else on stack, we can | ||
1914 | * set them up here, ARG_START/END to setup | ||
1915 | * command line argumets and ENV_START/END | ||
1916 | * for environment. | ||
1917 | */ | ||
1918 | case PR_SET_MM_START_STACK: | ||
1919 | case PR_SET_MM_ARG_START: | ||
1920 | case PR_SET_MM_ARG_END: | ||
1921 | case PR_SET_MM_ENV_START: | ||
1922 | case PR_SET_MM_ENV_END: | ||
1923 | if (!vma) { | ||
1924 | error = -EFAULT; | ||
1925 | goto out; | ||
1926 | } | ||
1927 | #ifdef CONFIG_STACK_GROWSUP | ||
1928 | if (vma_flags_mismatch(vma, VM_READ | VM_WRITE | VM_GROWSUP, 0)) | ||
1929 | #else | ||
1930 | if (vma_flags_mismatch(vma, VM_READ | VM_WRITE | VM_GROWSDOWN, 0)) | ||
1931 | #endif | ||
1932 | goto out; | ||
1933 | if (opt == PR_SET_MM_START_STACK) | ||
1934 | mm->start_stack = addr; | ||
1935 | else if (opt == PR_SET_MM_ARG_START) | ||
1936 | mm->arg_start = addr; | ||
1937 | else if (opt == PR_SET_MM_ARG_END) | ||
1938 | mm->arg_end = addr; | ||
1939 | else if (opt == PR_SET_MM_ENV_START) | ||
1940 | mm->env_start = addr; | ||
1941 | else if (opt == PR_SET_MM_ENV_END) | ||
1942 | mm->env_end = addr; | ||
1943 | break; | ||
1944 | |||
1945 | /* | ||
1946 | * This doesn't move auxiliary vector itself | ||
1947 | * since it's pinned to mm_struct, but allow | ||
1948 | * to fill vector with new values. It's up | ||
1949 | * to a caller to provide sane values here | ||
1950 | * otherwise user space tools which use this | ||
1951 | * vector might be unhappy. | ||
1952 | */ | ||
1953 | case PR_SET_MM_AUXV: { | ||
1954 | unsigned long user_auxv[AT_VECTOR_SIZE]; | ||
1955 | |||
1956 | if (arg4 > sizeof(user_auxv)) | ||
1957 | goto out; | ||
1958 | up_read(&mm->mmap_sem); | ||
1959 | |||
1960 | if (copy_from_user(user_auxv, (const void __user *)addr, arg4)) | ||
1961 | return -EFAULT; | ||
1962 | |||
1963 | /* Make sure the last entry is always AT_NULL */ | ||
1964 | user_auxv[AT_VECTOR_SIZE - 2] = 0; | ||
1965 | user_auxv[AT_VECTOR_SIZE - 1] = 0; | ||
1966 | |||
1967 | BUILD_BUG_ON(sizeof(user_auxv) != sizeof(mm->saved_auxv)); | ||
1968 | |||
1969 | task_lock(current); | ||
1970 | memcpy(mm->saved_auxv, user_auxv, arg4); | ||
1971 | task_unlock(current); | ||
1972 | |||
1973 | return 0; | ||
1974 | } | ||
1884 | default: | 1975 | default: |
1885 | error = -EINVAL; | ||
1886 | goto out; | 1976 | goto out; |
1887 | } | 1977 | } |
1888 | 1978 | ||
1889 | error = 0; | 1979 | error = 0; |
1890 | |||
1891 | out: | 1980 | out: |
1892 | up_read(&mm->mmap_sem); | 1981 | up_read(&mm->mmap_sem); |
1893 | |||
1894 | return error; | 1982 | return error; |
1895 | } | 1983 | } |
1896 | #else /* CONFIG_CHECKPOINT_RESTORE */ | 1984 | #else /* CONFIG_CHECKPOINT_RESTORE */ |
@@ -2114,7 +2202,6 @@ int orderly_poweroff(bool force) | |||
2114 | NULL | 2202 | NULL |
2115 | }; | 2203 | }; |
2116 | int ret = -ENOMEM; | 2204 | int ret = -ENOMEM; |
2117 | struct subprocess_info *info; | ||
2118 | 2205 | ||
2119 | if (argv == NULL) { | 2206 | if (argv == NULL) { |
2120 | printk(KERN_WARNING "%s failed to allocate memory for \"%s\"\n", | 2207 | printk(KERN_WARNING "%s failed to allocate memory for \"%s\"\n", |
@@ -2122,18 +2209,16 @@ int orderly_poweroff(bool force) | |||
2122 | goto out; | 2209 | goto out; |
2123 | } | 2210 | } |
2124 | 2211 | ||
2125 | info = call_usermodehelper_setup(argv[0], argv, envp, GFP_ATOMIC); | 2212 | ret = call_usermodehelper_fns(argv[0], argv, envp, UMH_NO_WAIT, |
2126 | if (info == NULL) { | 2213 | NULL, argv_cleanup, NULL); |
2127 | argv_free(argv); | 2214 | out: |
2128 | goto out; | 2215 | if (likely(!ret)) |
2129 | } | 2216 | return 0; |
2130 | |||
2131 | call_usermodehelper_setfns(info, NULL, argv_cleanup, NULL); | ||
2132 | 2217 | ||
2133 | ret = call_usermodehelper_exec(info, UMH_NO_WAIT); | 2218 | if (ret == -ENOMEM) |
2219 | argv_free(argv); | ||
2134 | 2220 | ||
2135 | out: | 2221 | if (force) { |
2136 | if (ret && force) { | ||
2137 | printk(KERN_WARNING "Failed to start orderly shutdown: " | 2222 | printk(KERN_WARNING "Failed to start orderly shutdown: " |
2138 | "forcing the issue\n"); | 2223 | "forcing the issue\n"); |
2139 | 2224 | ||
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index 47bfa16430d7..dbff751e4086 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c | |||
@@ -203,3 +203,6 @@ cond_syscall(sys_fanotify_mark); | |||
203 | cond_syscall(sys_name_to_handle_at); | 203 | cond_syscall(sys_name_to_handle_at); |
204 | cond_syscall(sys_open_by_handle_at); | 204 | cond_syscall(sys_open_by_handle_at); |
205 | cond_syscall(compat_sys_open_by_handle_at); | 205 | cond_syscall(compat_sys_open_by_handle_at); |
206 | |||
207 | /* compare kernel pointers */ | ||
208 | cond_syscall(sys_kcmp); | ||