diff options
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/Makefile | 3 | ||||
| -rw-r--r-- | kernel/cpu.c | 44 | ||||
| -rw-r--r-- | kernel/cpu_pm.c | 16 | ||||
| -rw-r--r-- | kernel/exit.c | 8 | ||||
| -rw-r--r-- | kernel/fork.c | 10 | ||||
| -rw-r--r-- | kernel/irq/manage.c | 14 | ||||
| -rw-r--r-- | kernel/kcmp.c | 196 | ||||
| -rw-r--r-- | kernel/kmod.c | 30 | ||||
| -rw-r--r-- | kernel/pid_namespace.c | 13 | ||||
| -rw-r--r-- | kernel/resource.c | 4 | ||||
| -rw-r--r-- | kernel/signal.c | 11 | ||||
| -rw-r--r-- | kernel/sys.c | 213 | ||||
| -rw-r--r-- | kernel/sys_ni.c | 3 |
13 files changed, 464 insertions, 101 deletions
diff --git a/kernel/Makefile b/kernel/Makefile index 6c07f30fa9b7..80be6ca0cc75 100644 --- a/kernel/Makefile +++ b/kernel/Makefile | |||
| @@ -25,6 +25,9 @@ endif | |||
| 25 | obj-y += sched/ | 25 | obj-y += sched/ |
| 26 | obj-y += power/ | 26 | obj-y += power/ |
| 27 | 27 | ||
| 28 | ifeq ($(CONFIG_CHECKPOINT_RESTORE),y) | ||
| 29 | obj-$(CONFIG_X86) += kcmp.o | ||
| 30 | endif | ||
| 28 | obj-$(CONFIG_FREEZER) += freezer.o | 31 | obj-$(CONFIG_FREEZER) += freezer.o |
| 29 | obj-$(CONFIG_PROFILING) += profile.o | 32 | obj-$(CONFIG_PROFILING) += profile.o |
| 30 | obj-$(CONFIG_STACKTRACE) += stacktrace.o | 33 | obj-$(CONFIG_STACKTRACE) += stacktrace.o |
diff --git a/kernel/cpu.c b/kernel/cpu.c index 0e6353cf147a..a4eb5227a19e 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c | |||
| @@ -10,7 +10,10 @@ | |||
| 10 | #include <linux/sched.h> | 10 | #include <linux/sched.h> |
| 11 | #include <linux/unistd.h> | 11 | #include <linux/unistd.h> |
| 12 | #include <linux/cpu.h> | 12 | #include <linux/cpu.h> |
| 13 | #include <linux/oom.h> | ||
| 14 | #include <linux/rcupdate.h> | ||
| 13 | #include <linux/export.h> | 15 | #include <linux/export.h> |
| 16 | #include <linux/bug.h> | ||
| 14 | #include <linux/kthread.h> | 17 | #include <linux/kthread.h> |
| 15 | #include <linux/stop_machine.h> | 18 | #include <linux/stop_machine.h> |
| 16 | #include <linux/mutex.h> | 19 | #include <linux/mutex.h> |
| @@ -173,6 +176,47 @@ void __ref unregister_cpu_notifier(struct notifier_block *nb) | |||
| 173 | } | 176 | } |
| 174 | EXPORT_SYMBOL(unregister_cpu_notifier); | 177 | EXPORT_SYMBOL(unregister_cpu_notifier); |
| 175 | 178 | ||
| 179 | /** | ||
| 180 | * clear_tasks_mm_cpumask - Safely clear tasks' mm_cpumask for a CPU | ||
| 181 | * @cpu: a CPU id | ||
| 182 | * | ||
| 183 | * This function walks all processes, finds a valid mm struct for each one and | ||
| 184 | * then clears a corresponding bit in mm's cpumask. While this all sounds | ||
| 185 | * trivial, there are various non-obvious corner cases, which this function | ||
| 186 | * tries to solve in a safe manner. | ||
| 187 | * | ||
| 188 | * Also note that the function uses a somewhat relaxed locking scheme, so it may | ||
| 189 | * be called only for an already offlined CPU. | ||
| 190 | */ | ||
| 191 | void clear_tasks_mm_cpumask(int cpu) | ||
| 192 | { | ||
| 193 | struct task_struct *p; | ||
| 194 | |||
| 195 | /* | ||
| 196 | * This function is called after the cpu is taken down and marked | ||
| 197 | * offline, so its not like new tasks will ever get this cpu set in | ||
| 198 | * their mm mask. -- Peter Zijlstra | ||
| 199 | * Thus, we may use rcu_read_lock() here, instead of grabbing | ||
| 200 | * full-fledged tasklist_lock. | ||
| 201 | */ | ||
| 202 | WARN_ON(cpu_online(cpu)); | ||
| 203 | rcu_read_lock(); | ||
| 204 | for_each_process(p) { | ||
| 205 | struct task_struct *t; | ||
| 206 | |||
| 207 | /* | ||
| 208 | * Main thread might exit, but other threads may still have | ||
| 209 | * a valid mm. Find one. | ||
| 210 | */ | ||
| 211 | t = find_lock_task_mm(p); | ||
| 212 | if (!t) | ||
| 213 | continue; | ||
| 214 | cpumask_clear_cpu(cpu, mm_cpumask(t->mm)); | ||
| 215 | task_unlock(t); | ||
| 216 | } | ||
| 217 | rcu_read_unlock(); | ||
| 218 | } | ||
| 219 | |||
| 176 | static inline void check_for_tasks(int cpu) | 220 | static inline void check_for_tasks(int cpu) |
| 177 | { | 221 | { |
| 178 | struct task_struct *p; | 222 | struct task_struct *p; |
diff --git a/kernel/cpu_pm.c b/kernel/cpu_pm.c index 249152e15308..9656a3c36503 100644 --- a/kernel/cpu_pm.c +++ b/kernel/cpu_pm.c | |||
| @@ -81,7 +81,7 @@ int cpu_pm_unregister_notifier(struct notifier_block *nb) | |||
| 81 | EXPORT_SYMBOL_GPL(cpu_pm_unregister_notifier); | 81 | EXPORT_SYMBOL_GPL(cpu_pm_unregister_notifier); |
| 82 | 82 | ||
| 83 | /** | 83 | /** |
| 84 | * cpm_pm_enter - CPU low power entry notifier | 84 | * cpu_pm_enter - CPU low power entry notifier |
| 85 | * | 85 | * |
| 86 | * Notifies listeners that a single CPU is entering a low power state that may | 86 | * Notifies listeners that a single CPU is entering a low power state that may |
| 87 | * cause some blocks in the same power domain as the cpu to reset. | 87 | * cause some blocks in the same power domain as the cpu to reset. |
| @@ -89,7 +89,7 @@ EXPORT_SYMBOL_GPL(cpu_pm_unregister_notifier); | |||
| 89 | * Must be called on the affected CPU with interrupts disabled. Platform is | 89 | * Must be called on the affected CPU with interrupts disabled. Platform is |
| 90 | * responsible for ensuring that cpu_pm_enter is not called twice on the same | 90 | * responsible for ensuring that cpu_pm_enter is not called twice on the same |
| 91 | * CPU before cpu_pm_exit is called. Notified drivers can include VFP | 91 | * CPU before cpu_pm_exit is called. Notified drivers can include VFP |
| 92 | * co-processor, interrupt controller and it's PM extensions, local CPU | 92 | * co-processor, interrupt controller and its PM extensions, local CPU |
| 93 | * timers context save/restore which shouldn't be interrupted. Hence it | 93 | * timers context save/restore which shouldn't be interrupted. Hence it |
| 94 | * must be called with interrupts disabled. | 94 | * must be called with interrupts disabled. |
| 95 | * | 95 | * |
| @@ -115,13 +115,13 @@ int cpu_pm_enter(void) | |||
| 115 | EXPORT_SYMBOL_GPL(cpu_pm_enter); | 115 | EXPORT_SYMBOL_GPL(cpu_pm_enter); |
| 116 | 116 | ||
| 117 | /** | 117 | /** |
| 118 | * cpm_pm_exit - CPU low power exit notifier | 118 | * cpu_pm_exit - CPU low power exit notifier |
| 119 | * | 119 | * |
| 120 | * Notifies listeners that a single CPU is exiting a low power state that may | 120 | * Notifies listeners that a single CPU is exiting a low power state that may |
| 121 | * have caused some blocks in the same power domain as the cpu to reset. | 121 | * have caused some blocks in the same power domain as the cpu to reset. |
| 122 | * | 122 | * |
| 123 | * Notified drivers can include VFP co-processor, interrupt controller | 123 | * Notified drivers can include VFP co-processor, interrupt controller |
| 124 | * and it's PM extensions, local CPU timers context save/restore which | 124 | * and its PM extensions, local CPU timers context save/restore which |
| 125 | * shouldn't be interrupted. Hence it must be called with interrupts disabled. | 125 | * shouldn't be interrupted. Hence it must be called with interrupts disabled. |
| 126 | * | 126 | * |
| 127 | * Return conditions are same as __raw_notifier_call_chain. | 127 | * Return conditions are same as __raw_notifier_call_chain. |
| @@ -139,7 +139,7 @@ int cpu_pm_exit(void) | |||
| 139 | EXPORT_SYMBOL_GPL(cpu_pm_exit); | 139 | EXPORT_SYMBOL_GPL(cpu_pm_exit); |
| 140 | 140 | ||
| 141 | /** | 141 | /** |
| 142 | * cpm_cluster_pm_enter - CPU cluster low power entry notifier | 142 | * cpu_cluster_pm_enter - CPU cluster low power entry notifier |
| 143 | * | 143 | * |
| 144 | * Notifies listeners that all cpus in a power domain are entering a low power | 144 | * Notifies listeners that all cpus in a power domain are entering a low power |
| 145 | * state that may cause some blocks in the same power domain to reset. | 145 | * state that may cause some blocks in the same power domain to reset. |
| @@ -147,7 +147,7 @@ EXPORT_SYMBOL_GPL(cpu_pm_exit); | |||
| 147 | * Must be called after cpu_pm_enter has been called on all cpus in the power | 147 | * Must be called after cpu_pm_enter has been called on all cpus in the power |
| 148 | * domain, and before cpu_pm_exit has been called on any cpu in the power | 148 | * domain, and before cpu_pm_exit has been called on any cpu in the power |
| 149 | * domain. Notified drivers can include VFP co-processor, interrupt controller | 149 | * domain. Notified drivers can include VFP co-processor, interrupt controller |
| 150 | * and it's PM extensions, local CPU timers context save/restore which | 150 | * and its PM extensions, local CPU timers context save/restore which |
| 151 | * shouldn't be interrupted. Hence it must be called with interrupts disabled. | 151 | * shouldn't be interrupted. Hence it must be called with interrupts disabled. |
| 152 | * | 152 | * |
| 153 | * Must be called with interrupts disabled. | 153 | * Must be called with interrupts disabled. |
| @@ -174,7 +174,7 @@ int cpu_cluster_pm_enter(void) | |||
| 174 | EXPORT_SYMBOL_GPL(cpu_cluster_pm_enter); | 174 | EXPORT_SYMBOL_GPL(cpu_cluster_pm_enter); |
| 175 | 175 | ||
| 176 | /** | 176 | /** |
| 177 | * cpm_cluster_pm_exit - CPU cluster low power exit notifier | 177 | * cpu_cluster_pm_exit - CPU cluster low power exit notifier |
| 178 | * | 178 | * |
| 179 | * Notifies listeners that all cpus in a power domain are exiting form a | 179 | * Notifies listeners that all cpus in a power domain are exiting form a |
| 180 | * low power state that may have caused some blocks in the same power domain | 180 | * low power state that may have caused some blocks in the same power domain |
| @@ -183,7 +183,7 @@ EXPORT_SYMBOL_GPL(cpu_cluster_pm_enter); | |||
| 183 | * Must be called after cpu_pm_exit has been called on all cpus in the power | 183 | * Must be called after cpu_pm_exit has been called on all cpus in the power |
| 184 | * domain, and before cpu_pm_exit has been called on any cpu in the power | 184 | * domain, and before cpu_pm_exit has been called on any cpu in the power |
| 185 | * domain. Notified drivers can include VFP co-processor, interrupt controller | 185 | * domain. Notified drivers can include VFP co-processor, interrupt controller |
| 186 | * and it's PM extensions, local CPU timers context save/restore which | 186 | * and its PM extensions, local CPU timers context save/restore which |
| 187 | * shouldn't be interrupted. Hence it must be called with interrupts disabled. | 187 | * shouldn't be interrupted. Hence it must be called with interrupts disabled. |
| 188 | * | 188 | * |
| 189 | * Return conditions are same as __raw_notifier_call_chain. | 189 | * Return conditions are same as __raw_notifier_call_chain. |
diff --git a/kernel/exit.c b/kernel/exit.c index 910a0716e17a..6d85655353e9 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
| @@ -884,9 +884,9 @@ static void check_stack_usage(void) | |||
| 884 | 884 | ||
| 885 | spin_lock(&low_water_lock); | 885 | spin_lock(&low_water_lock); |
| 886 | if (free < lowest_to_date) { | 886 | if (free < lowest_to_date) { |
| 887 | printk(KERN_WARNING "%s used greatest stack depth: %lu bytes " | 887 | printk(KERN_WARNING "%s (%d) used greatest stack depth: " |
| 888 | "left\n", | 888 | "%lu bytes left\n", |
| 889 | current->comm, free); | 889 | current->comm, task_pid_nr(current), free); |
| 890 | lowest_to_date = free; | 890 | lowest_to_date = free; |
| 891 | } | 891 | } |
| 892 | spin_unlock(&low_water_lock); | 892 | spin_unlock(&low_water_lock); |
| @@ -1214,7 +1214,7 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p) | |||
| 1214 | unsigned long state; | 1214 | unsigned long state; |
| 1215 | int retval, status, traced; | 1215 | int retval, status, traced; |
| 1216 | pid_t pid = task_pid_vnr(p); | 1216 | pid_t pid = task_pid_vnr(p); |
| 1217 | uid_t uid = from_kuid_munged(current_user_ns(), __task_cred(p)->uid); | 1217 | uid_t uid = from_kuid_munged(current_user_ns(), task_uid(p)); |
| 1218 | struct siginfo __user *infop; | 1218 | struct siginfo __user *infop; |
| 1219 | 1219 | ||
| 1220 | if (!likely(wo->wo_flags & WEXITED)) | 1220 | if (!likely(wo->wo_flags & WEXITED)) |
diff --git a/kernel/fork.c b/kernel/fork.c index 31a32c7dd169..c55b61ab6d64 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
| @@ -787,9 +787,6 @@ void mm_release(struct task_struct *tsk, struct mm_struct *mm) | |||
| 787 | /* Get rid of any cached register state */ | 787 | /* Get rid of any cached register state */ |
| 788 | deactivate_mm(tsk, mm); | 788 | deactivate_mm(tsk, mm); |
| 789 | 789 | ||
| 790 | if (tsk->vfork_done) | ||
| 791 | complete_vfork_done(tsk); | ||
| 792 | |||
| 793 | /* | 790 | /* |
| 794 | * If we're exiting normally, clear a user-space tid field if | 791 | * If we're exiting normally, clear a user-space tid field if |
| 795 | * requested. We leave this alone when dying by signal, to leave | 792 | * requested. We leave this alone when dying by signal, to leave |
| @@ -810,6 +807,13 @@ void mm_release(struct task_struct *tsk, struct mm_struct *mm) | |||
| 810 | } | 807 | } |
| 811 | tsk->clear_child_tid = NULL; | 808 | tsk->clear_child_tid = NULL; |
| 812 | } | 809 | } |
| 810 | |||
| 811 | /* | ||
| 812 | * All done, finally we can wake up parent and return this mm to him. | ||
| 813 | * Also kthread_stop() uses this completion for synchronization. | ||
| 814 | */ | ||
| 815 | if (tsk->vfork_done) | ||
| 816 | complete_vfork_done(tsk); | ||
| 813 | } | 817 | } |
| 814 | 818 | ||
| 815 | /* | 819 | /* |
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index bb32326afe87..7c475cd3f6e6 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c | |||
| @@ -7,6 +7,8 @@ | |||
| 7 | * This file contains driver APIs to the irq subsystem. | 7 | * This file contains driver APIs to the irq subsystem. |
| 8 | */ | 8 | */ |
| 9 | 9 | ||
| 10 | #define pr_fmt(fmt) "genirq: " fmt | ||
| 11 | |||
| 10 | #include <linux/irq.h> | 12 | #include <linux/irq.h> |
| 11 | #include <linux/kthread.h> | 13 | #include <linux/kthread.h> |
| 12 | #include <linux/module.h> | 14 | #include <linux/module.h> |
| @@ -565,7 +567,7 @@ int __irq_set_trigger(struct irq_desc *desc, unsigned int irq, | |||
| 565 | * IRQF_TRIGGER_* but the PIC does not support multiple | 567 | * IRQF_TRIGGER_* but the PIC does not support multiple |
| 566 | * flow-types? | 568 | * flow-types? |
| 567 | */ | 569 | */ |
| 568 | pr_debug("genirq: No set_type function for IRQ %d (%s)\n", irq, | 570 | pr_debug("No set_type function for IRQ %d (%s)\n", irq, |
| 569 | chip ? (chip->name ? : "unknown") : "unknown"); | 571 | chip ? (chip->name ? : "unknown") : "unknown"); |
| 570 | return 0; | 572 | return 0; |
| 571 | } | 573 | } |
| @@ -600,7 +602,7 @@ int __irq_set_trigger(struct irq_desc *desc, unsigned int irq, | |||
| 600 | ret = 0; | 602 | ret = 0; |
| 601 | break; | 603 | break; |
| 602 | default: | 604 | default: |
| 603 | pr_err("genirq: Setting trigger mode %lu for irq %u failed (%pF)\n", | 605 | pr_err("Setting trigger mode %lu for irq %u failed (%pF)\n", |
| 604 | flags, irq, chip->irq_set_type); | 606 | flags, irq, chip->irq_set_type); |
| 605 | } | 607 | } |
| 606 | if (unmask) | 608 | if (unmask) |
| @@ -837,7 +839,7 @@ void exit_irq_thread(void) | |||
| 837 | 839 | ||
| 838 | action = kthread_data(tsk); | 840 | action = kthread_data(tsk); |
| 839 | 841 | ||
| 840 | pr_err("genirq: exiting task \"%s\" (%d) is an active IRQ thread (irq %d)\n", | 842 | pr_err("exiting task \"%s\" (%d) is an active IRQ thread (irq %d)\n", |
| 841 | tsk->comm ? tsk->comm : "", tsk->pid, action->irq); | 843 | tsk->comm ? tsk->comm : "", tsk->pid, action->irq); |
| 842 | 844 | ||
| 843 | desc = irq_to_desc(action->irq); | 845 | desc = irq_to_desc(action->irq); |
| @@ -1044,7 +1046,7 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) | |||
| 1044 | * has. The type flags are unreliable as the | 1046 | * has. The type flags are unreliable as the |
| 1045 | * underlying chip implementation can override them. | 1047 | * underlying chip implementation can override them. |
| 1046 | */ | 1048 | */ |
| 1047 | pr_err("genirq: Threaded irq requested with handler=NULL and !ONESHOT for irq %d\n", | 1049 | pr_err("Threaded irq requested with handler=NULL and !ONESHOT for irq %d\n", |
| 1048 | irq); | 1050 | irq); |
| 1049 | ret = -EINVAL; | 1051 | ret = -EINVAL; |
| 1050 | goto out_mask; | 1052 | goto out_mask; |
| @@ -1095,7 +1097,7 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) | |||
| 1095 | 1097 | ||
| 1096 | if (nmsk != omsk) | 1098 | if (nmsk != omsk) |
| 1097 | /* hope the handler works with current trigger mode */ | 1099 | /* hope the handler works with current trigger mode */ |
| 1098 | pr_warning("genirq: irq %d uses trigger mode %u; requested %u\n", | 1100 | pr_warning("irq %d uses trigger mode %u; requested %u\n", |
| 1099 | irq, nmsk, omsk); | 1101 | irq, nmsk, omsk); |
| 1100 | } | 1102 | } |
| 1101 | 1103 | ||
| @@ -1133,7 +1135,7 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) | |||
| 1133 | 1135 | ||
| 1134 | mismatch: | 1136 | mismatch: |
| 1135 | if (!(new->flags & IRQF_PROBE_SHARED)) { | 1137 | if (!(new->flags & IRQF_PROBE_SHARED)) { |
| 1136 | pr_err("genirq: Flags mismatch irq %d. %08x (%s) vs. %08x (%s)\n", | 1138 | pr_err("Flags mismatch irq %d. %08x (%s) vs. %08x (%s)\n", |
| 1137 | irq, new->flags, new->name, old->flags, old->name); | 1139 | irq, new->flags, new->name, old->flags, old->name); |
| 1138 | #ifdef CONFIG_DEBUG_SHIRQ | 1140 | #ifdef CONFIG_DEBUG_SHIRQ |
| 1139 | dump_stack(); | 1141 | dump_stack(); |
diff --git a/kernel/kcmp.c b/kernel/kcmp.c new file mode 100644 index 000000000000..30b7b225306c --- /dev/null +++ b/kernel/kcmp.c | |||
| @@ -0,0 +1,196 @@ | |||
| 1 | #include <linux/kernel.h> | ||
| 2 | #include <linux/syscalls.h> | ||
| 3 | #include <linux/fdtable.h> | ||
| 4 | #include <linux/string.h> | ||
| 5 | #include <linux/random.h> | ||
| 6 | #include <linux/module.h> | ||
| 7 | #include <linux/init.h> | ||
| 8 | #include <linux/errno.h> | ||
| 9 | #include <linux/cache.h> | ||
| 10 | #include <linux/bug.h> | ||
| 11 | #include <linux/err.h> | ||
| 12 | #include <linux/kcmp.h> | ||
| 13 | |||
| 14 | #include <asm/unistd.h> | ||
| 15 | |||
| 16 | /* | ||
| 17 | * We don't expose the real in-memory order of objects for security reasons. | ||
| 18 | * But still the comparison results should be suitable for sorting. So we | ||
| 19 | * obfuscate kernel pointers values and compare the production instead. | ||
| 20 | * | ||
| 21 | * The obfuscation is done in two steps. First we xor the kernel pointer with | ||
| 22 | * a random value, which puts pointer into a new position in a reordered space. | ||
| 23 | * Secondly we multiply the xor production with a large odd random number to | ||
| 24 | * permute its bits even more (the odd multiplier guarantees that the product | ||
| 25 | * is unique ever after the high bits are truncated, since any odd number is | ||
| 26 | * relative prime to 2^n). | ||
| 27 | * | ||
| 28 | * Note also that the obfuscation itself is invisible to userspace and if needed | ||
| 29 | * it can be changed to an alternate scheme. | ||
| 30 | */ | ||
| 31 | static unsigned long cookies[KCMP_TYPES][2] __read_mostly; | ||
| 32 | |||
| 33 | static long kptr_obfuscate(long v, int type) | ||
| 34 | { | ||
| 35 | return (v ^ cookies[type][0]) * cookies[type][1]; | ||
| 36 | } | ||
| 37 | |||
| 38 | /* | ||
| 39 | * 0 - equal, i.e. v1 = v2 | ||
| 40 | * 1 - less than, i.e. v1 < v2 | ||
| 41 | * 2 - greater than, i.e. v1 > v2 | ||
| 42 | * 3 - not equal but ordering unavailable (reserved for future) | ||
| 43 | */ | ||
| 44 | static int kcmp_ptr(void *v1, void *v2, enum kcmp_type type) | ||
| 45 | { | ||
| 46 | long ret; | ||
| 47 | |||
| 48 | ret = kptr_obfuscate((long)v1, type) - kptr_obfuscate((long)v2, type); | ||
| 49 | |||
| 50 | return (ret < 0) | ((ret > 0) << 1); | ||
| 51 | } | ||
| 52 | |||
| 53 | /* The caller must have pinned the task */ | ||
| 54 | static struct file * | ||
| 55 | get_file_raw_ptr(struct task_struct *task, unsigned int idx) | ||
| 56 | { | ||
| 57 | struct file *file = NULL; | ||
| 58 | |||
| 59 | task_lock(task); | ||
| 60 | rcu_read_lock(); | ||
| 61 | |||
| 62 | if (task->files) | ||
| 63 | file = fcheck_files(task->files, idx); | ||
| 64 | |||
| 65 | rcu_read_unlock(); | ||
| 66 | task_unlock(task); | ||
| 67 | |||
| 68 | return file; | ||
| 69 | } | ||
| 70 | |||
| 71 | static void kcmp_unlock(struct mutex *m1, struct mutex *m2) | ||
| 72 | { | ||
| 73 | if (likely(m2 != m1)) | ||
| 74 | mutex_unlock(m2); | ||
| 75 | mutex_unlock(m1); | ||
| 76 | } | ||
| 77 | |||
| 78 | static int kcmp_lock(struct mutex *m1, struct mutex *m2) | ||
| 79 | { | ||
| 80 | int err; | ||
| 81 | |||
| 82 | if (m2 > m1) | ||
| 83 | swap(m1, m2); | ||
| 84 | |||
| 85 | err = mutex_lock_killable(m1); | ||
| 86 | if (!err && likely(m1 != m2)) { | ||
| 87 | err = mutex_lock_killable_nested(m2, SINGLE_DEPTH_NESTING); | ||
| 88 | if (err) | ||
| 89 | mutex_unlock(m1); | ||
| 90 | } | ||
| 91 | |||
| 92 | return err; | ||
| 93 | } | ||
| 94 | |||
| 95 | SYSCALL_DEFINE5(kcmp, pid_t, pid1, pid_t, pid2, int, type, | ||
| 96 | unsigned long, idx1, unsigned long, idx2) | ||
| 97 | { | ||
| 98 | struct task_struct *task1, *task2; | ||
| 99 | int ret; | ||
| 100 | |||
| 101 | rcu_read_lock(); | ||
| 102 | |||
| 103 | /* | ||
| 104 | * Tasks are looked up in caller's PID namespace only. | ||
| 105 | */ | ||
| 106 | task1 = find_task_by_vpid(pid1); | ||
| 107 | task2 = find_task_by_vpid(pid2); | ||
| 108 | if (!task1 || !task2) | ||
| 109 | goto err_no_task; | ||
| 110 | |||
| 111 | get_task_struct(task1); | ||
| 112 | get_task_struct(task2); | ||
| 113 | |||
| 114 | rcu_read_unlock(); | ||
| 115 | |||
| 116 | /* | ||
| 117 | * One should have enough rights to inspect task details. | ||
| 118 | */ | ||
| 119 | ret = kcmp_lock(&task1->signal->cred_guard_mutex, | ||
| 120 | &task2->signal->cred_guard_mutex); | ||
| 121 | if (ret) | ||
| 122 | goto err; | ||
| 123 | if (!ptrace_may_access(task1, PTRACE_MODE_READ) || | ||
| 124 | !ptrace_may_access(task2, PTRACE_MODE_READ)) { | ||
| 125 | ret = -EPERM; | ||
| 126 | goto err_unlock; | ||
| 127 | } | ||
| 128 | |||
| 129 | switch (type) { | ||
| 130 | case KCMP_FILE: { | ||
| 131 | struct file *filp1, *filp2; | ||
| 132 | |||
| 133 | filp1 = get_file_raw_ptr(task1, idx1); | ||
| 134 | filp2 = get_file_raw_ptr(task2, idx2); | ||
| 135 | |||
| 136 | if (filp1 && filp2) | ||
| 137 | ret = kcmp_ptr(filp1, filp2, KCMP_FILE); | ||
| 138 | else | ||
| 139 | ret = -EBADF; | ||
| 140 | break; | ||
| 141 | } | ||
| 142 | case KCMP_VM: | ||
| 143 | ret = kcmp_ptr(task1->mm, task2->mm, KCMP_VM); | ||
| 144 | break; | ||
| 145 | case KCMP_FILES: | ||
| 146 | ret = kcmp_ptr(task1->files, task2->files, KCMP_FILES); | ||
| 147 | break; | ||
| 148 | case KCMP_FS: | ||
| 149 | ret = kcmp_ptr(task1->fs, task2->fs, KCMP_FS); | ||
| 150 | break; | ||
| 151 | case KCMP_SIGHAND: | ||
| 152 | ret = kcmp_ptr(task1->sighand, task2->sighand, KCMP_SIGHAND); | ||
| 153 | break; | ||
| 154 | case KCMP_IO: | ||
| 155 | ret = kcmp_ptr(task1->io_context, task2->io_context, KCMP_IO); | ||
| 156 | break; | ||
| 157 | case KCMP_SYSVSEM: | ||
| 158 | #ifdef CONFIG_SYSVIPC | ||
| 159 | ret = kcmp_ptr(task1->sysvsem.undo_list, | ||
| 160 | task2->sysvsem.undo_list, | ||
| 161 | KCMP_SYSVSEM); | ||
| 162 | #else | ||
| 163 | ret = -EOPNOTSUPP; | ||
| 164 | #endif | ||
| 165 | break; | ||
| 166 | default: | ||
| 167 | ret = -EINVAL; | ||
| 168 | break; | ||
| 169 | } | ||
| 170 | |||
| 171 | err_unlock: | ||
| 172 | kcmp_unlock(&task1->signal->cred_guard_mutex, | ||
| 173 | &task2->signal->cred_guard_mutex); | ||
| 174 | err: | ||
| 175 | put_task_struct(task1); | ||
| 176 | put_task_struct(task2); | ||
| 177 | |||
| 178 | return ret; | ||
| 179 | |||
| 180 | err_no_task: | ||
| 181 | rcu_read_unlock(); | ||
| 182 | return -ESRCH; | ||
| 183 | } | ||
| 184 | |||
| 185 | static __init int kcmp_cookies_init(void) | ||
| 186 | { | ||
| 187 | int i; | ||
| 188 | |||
| 189 | get_random_bytes(cookies, sizeof(cookies)); | ||
| 190 | |||
| 191 | for (i = 0; i < KCMP_TYPES; i++) | ||
| 192 | cookies[i][1] |= (~(~0UL >> 1) | 1); | ||
| 193 | |||
| 194 | return 0; | ||
| 195 | } | ||
| 196 | arch_initcall(kcmp_cookies_init); | ||
diff --git a/kernel/kmod.c b/kernel/kmod.c index 05698a7415fe..ff2c7cb86d77 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c | |||
| @@ -221,13 +221,12 @@ fail: | |||
| 221 | return 0; | 221 | return 0; |
| 222 | } | 222 | } |
| 223 | 223 | ||
| 224 | void call_usermodehelper_freeinfo(struct subprocess_info *info) | 224 | static void call_usermodehelper_freeinfo(struct subprocess_info *info) |
| 225 | { | 225 | { |
| 226 | if (info->cleanup) | 226 | if (info->cleanup) |
| 227 | (*info->cleanup)(info); | 227 | (*info->cleanup)(info); |
| 228 | kfree(info); | 228 | kfree(info); |
| 229 | } | 229 | } |
| 230 | EXPORT_SYMBOL(call_usermodehelper_freeinfo); | ||
| 231 | 230 | ||
| 232 | static void umh_complete(struct subprocess_info *sub_info) | 231 | static void umh_complete(struct subprocess_info *sub_info) |
| 233 | { | 232 | { |
| @@ -410,7 +409,7 @@ EXPORT_SYMBOL_GPL(usermodehelper_read_unlock); | |||
| 410 | 409 | ||
| 411 | /** | 410 | /** |
| 412 | * __usermodehelper_set_disable_depth - Modify usermodehelper_disabled. | 411 | * __usermodehelper_set_disable_depth - Modify usermodehelper_disabled. |
| 413 | * depth: New value to assign to usermodehelper_disabled. | 412 | * @depth: New value to assign to usermodehelper_disabled. |
| 414 | * | 413 | * |
| 415 | * Change the value of usermodehelper_disabled (under umhelper_sem locked for | 414 | * Change the value of usermodehelper_disabled (under umhelper_sem locked for |
| 416 | * writing) and wakeup tasks waiting for it to change. | 415 | * writing) and wakeup tasks waiting for it to change. |
| @@ -479,6 +478,7 @@ static void helper_unlock(void) | |||
| 479 | * structure. This should be passed to call_usermodehelper_exec to | 478 | * structure. This should be passed to call_usermodehelper_exec to |
| 480 | * exec the process and free the structure. | 479 | * exec the process and free the structure. |
| 481 | */ | 480 | */ |
| 481 | static | ||
| 482 | struct subprocess_info *call_usermodehelper_setup(char *path, char **argv, | 482 | struct subprocess_info *call_usermodehelper_setup(char *path, char **argv, |
| 483 | char **envp, gfp_t gfp_mask) | 483 | char **envp, gfp_t gfp_mask) |
| 484 | { | 484 | { |
| @@ -494,7 +494,6 @@ struct subprocess_info *call_usermodehelper_setup(char *path, char **argv, | |||
| 494 | out: | 494 | out: |
| 495 | return sub_info; | 495 | return sub_info; |
| 496 | } | 496 | } |
| 497 | EXPORT_SYMBOL(call_usermodehelper_setup); | ||
| 498 | 497 | ||
| 499 | /** | 498 | /** |
| 500 | * call_usermodehelper_setfns - set a cleanup/init function | 499 | * call_usermodehelper_setfns - set a cleanup/init function |
| @@ -512,6 +511,7 @@ EXPORT_SYMBOL(call_usermodehelper_setup); | |||
| 512 | * Function must be runnable in either a process context or the | 511 | * Function must be runnable in either a process context or the |
| 513 | * context in which call_usermodehelper_exec is called. | 512 | * context in which call_usermodehelper_exec is called. |
| 514 | */ | 513 | */ |
| 514 | static | ||
| 515 | void call_usermodehelper_setfns(struct subprocess_info *info, | 515 | void call_usermodehelper_setfns(struct subprocess_info *info, |
| 516 | int (*init)(struct subprocess_info *info, struct cred *new), | 516 | int (*init)(struct subprocess_info *info, struct cred *new), |
| 517 | void (*cleanup)(struct subprocess_info *info), | 517 | void (*cleanup)(struct subprocess_info *info), |
| @@ -521,7 +521,6 @@ void call_usermodehelper_setfns(struct subprocess_info *info, | |||
| 521 | info->init = init; | 521 | info->init = init; |
| 522 | info->data = data; | 522 | info->data = data; |
| 523 | } | 523 | } |
| 524 | EXPORT_SYMBOL(call_usermodehelper_setfns); | ||
| 525 | 524 | ||
| 526 | /** | 525 | /** |
| 527 | * call_usermodehelper_exec - start a usermode application | 526 | * call_usermodehelper_exec - start a usermode application |
| @@ -535,6 +534,7 @@ EXPORT_SYMBOL(call_usermodehelper_setfns); | |||
| 535 | * asynchronously if wait is not set, and runs as a child of keventd. | 534 | * asynchronously if wait is not set, and runs as a child of keventd. |
| 536 | * (ie. it runs with full root capabilities). | 535 | * (ie. it runs with full root capabilities). |
| 537 | */ | 536 | */ |
| 537 | static | ||
| 538 | int call_usermodehelper_exec(struct subprocess_info *sub_info, int wait) | 538 | int call_usermodehelper_exec(struct subprocess_info *sub_info, int wait) |
| 539 | { | 539 | { |
| 540 | DECLARE_COMPLETION_ONSTACK(done); | 540 | DECLARE_COMPLETION_ONSTACK(done); |
| @@ -576,7 +576,25 @@ unlock: | |||
| 576 | helper_unlock(); | 576 | helper_unlock(); |
| 577 | return retval; | 577 | return retval; |
| 578 | } | 578 | } |
| 579 | EXPORT_SYMBOL(call_usermodehelper_exec); | 579 | |
| 580 | int call_usermodehelper_fns( | ||
| 581 | char *path, char **argv, char **envp, int wait, | ||
| 582 | int (*init)(struct subprocess_info *info, struct cred *new), | ||
| 583 | void (*cleanup)(struct subprocess_info *), void *data) | ||
| 584 | { | ||
| 585 | struct subprocess_info *info; | ||
| 586 | gfp_t gfp_mask = (wait == UMH_NO_WAIT) ? GFP_ATOMIC : GFP_KERNEL; | ||
| 587 | |||
| 588 | info = call_usermodehelper_setup(path, argv, envp, gfp_mask); | ||
| 589 | |||
| 590 | if (info == NULL) | ||
| 591 | return -ENOMEM; | ||
| 592 | |||
| 593 | call_usermodehelper_setfns(info, init, cleanup, data); | ||
| 594 | |||
| 595 | return call_usermodehelper_exec(info, wait); | ||
| 596 | } | ||
| 597 | EXPORT_SYMBOL(call_usermodehelper_fns); | ||
| 580 | 598 | ||
| 581 | static int proc_cap_handler(struct ctl_table *table, int write, | 599 | static int proc_cap_handler(struct ctl_table *table, int write, |
| 582 | void __user *buffer, size_t *lenp, loff_t *ppos) | 600 | void __user *buffer, size_t *lenp, loff_t *ppos) |
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c index 57bc1fd35b3c..16b20e38c4a1 100644 --- a/kernel/pid_namespace.c +++ b/kernel/pid_namespace.c | |||
| @@ -149,7 +149,12 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns) | |||
| 149 | { | 149 | { |
| 150 | int nr; | 150 | int nr; |
| 151 | int rc; | 151 | int rc; |
| 152 | struct task_struct *task; | 152 | struct task_struct *task, *me = current; |
| 153 | |||
| 154 | /* Ignore SIGCHLD causing any terminated children to autoreap */ | ||
| 155 | spin_lock_irq(&me->sighand->siglock); | ||
| 156 | me->sighand->action[SIGCHLD - 1].sa.sa_handler = SIG_IGN; | ||
| 157 | spin_unlock_irq(&me->sighand->siglock); | ||
| 153 | 158 | ||
| 154 | /* | 159 | /* |
| 155 | * The last thread in the cgroup-init thread group is terminating. | 160 | * The last thread in the cgroup-init thread group is terminating. |
| @@ -191,6 +196,7 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns) | |||
| 191 | return; | 196 | return; |
| 192 | } | 197 | } |
| 193 | 198 | ||
| 199 | #ifdef CONFIG_CHECKPOINT_RESTORE | ||
| 194 | static int pid_ns_ctl_handler(struct ctl_table *table, int write, | 200 | static int pid_ns_ctl_handler(struct ctl_table *table, int write, |
| 195 | void __user *buffer, size_t *lenp, loff_t *ppos) | 201 | void __user *buffer, size_t *lenp, loff_t *ppos) |
| 196 | { | 202 | { |
| @@ -218,8 +224,8 @@ static struct ctl_table pid_ns_ctl_table[] = { | |||
| 218 | }, | 224 | }, |
| 219 | { } | 225 | { } |
| 220 | }; | 226 | }; |
| 221 | |||
| 222 | static struct ctl_path kern_path[] = { { .procname = "kernel", }, { } }; | 227 | static struct ctl_path kern_path[] = { { .procname = "kernel", }, { } }; |
| 228 | #endif /* CONFIG_CHECKPOINT_RESTORE */ | ||
| 223 | 229 | ||
| 224 | int reboot_pid_ns(struct pid_namespace *pid_ns, int cmd) | 230 | int reboot_pid_ns(struct pid_namespace *pid_ns, int cmd) |
| 225 | { | 231 | { |
| @@ -253,7 +259,10 @@ int reboot_pid_ns(struct pid_namespace *pid_ns, int cmd) | |||
| 253 | static __init int pid_namespaces_init(void) | 259 | static __init int pid_namespaces_init(void) |
| 254 | { | 260 | { |
| 255 | pid_ns_cachep = KMEM_CACHE(pid_namespace, SLAB_PANIC); | 261 | pid_ns_cachep = KMEM_CACHE(pid_namespace, SLAB_PANIC); |
| 262 | |||
| 263 | #ifdef CONFIG_CHECKPOINT_RESTORE | ||
| 256 | register_sysctl_paths(kern_path, pid_ns_ctl_table); | 264 | register_sysctl_paths(kern_path, pid_ns_ctl_table); |
| 265 | #endif | ||
| 257 | return 0; | 266 | return 0; |
| 258 | } | 267 | } |
| 259 | 268 | ||
diff --git a/kernel/resource.c b/kernel/resource.c index 7e8ea66a8c01..e1d2b8ee76d5 100644 --- a/kernel/resource.c +++ b/kernel/resource.c | |||
| @@ -515,8 +515,8 @@ out: | |||
| 515 | * @root: root resource descriptor | 515 | * @root: root resource descriptor |
| 516 | * @new: resource descriptor desired by caller | 516 | * @new: resource descriptor desired by caller |
| 517 | * @size: requested resource region size | 517 | * @size: requested resource region size |
| 518 | * @min: minimum size to allocate | 518 | * @min: minimum boundary to allocate |
| 519 | * @max: maximum size to allocate | 519 | * @max: maximum boundary to allocate |
| 520 | * @align: alignment requested, in bytes | 520 | * @align: alignment requested, in bytes |
| 521 | * @alignf: alignment function, optional, called if not NULL | 521 | * @alignf: alignment function, optional, called if not NULL |
| 522 | * @alignf_data: arbitrary data to pass to the @alignf function | 522 | * @alignf_data: arbitrary data to pass to the @alignf function |
diff --git a/kernel/signal.c b/kernel/signal.c index f7b418217633..08dfbd748cd2 100644 --- a/kernel/signal.c +++ b/kernel/signal.c | |||
| @@ -1656,19 +1656,18 @@ bool do_notify_parent(struct task_struct *tsk, int sig) | |||
| 1656 | info.si_signo = sig; | 1656 | info.si_signo = sig; |
| 1657 | info.si_errno = 0; | 1657 | info.si_errno = 0; |
| 1658 | /* | 1658 | /* |
| 1659 | * we are under tasklist_lock here so our parent is tied to | 1659 | * We are under tasklist_lock here so our parent is tied to |
| 1660 | * us and cannot exit and release its namespace. | 1660 | * us and cannot change. |
| 1661 | * | 1661 | * |
| 1662 | * the only it can is to switch its nsproxy with sys_unshare, | 1662 | * task_active_pid_ns will always return the same pid namespace |
| 1663 | * bu uncharing pid namespaces is not allowed, so we'll always | 1663 | * until a task passes through release_task. |
| 1664 | * see relevant namespace | ||
| 1665 | * | 1664 | * |
| 1666 | * write_lock() currently calls preempt_disable() which is the | 1665 | * write_lock() currently calls preempt_disable() which is the |
| 1667 | * same as rcu_read_lock(), but according to Oleg, this is not | 1666 | * same as rcu_read_lock(), but according to Oleg, this is not |
| 1668 | * correct to rely on this | 1667 | * correct to rely on this |
| 1669 | */ | 1668 | */ |
| 1670 | rcu_read_lock(); | 1669 | rcu_read_lock(); |
| 1671 | info.si_pid = task_pid_nr_ns(tsk, tsk->parent->nsproxy->pid_ns); | 1670 | info.si_pid = task_pid_nr_ns(tsk, task_active_pid_ns(tsk->parent)); |
| 1672 | info.si_uid = from_kuid_munged(task_cred_xxx(tsk->parent, user_ns), | 1671 | info.si_uid = from_kuid_munged(task_cred_xxx(tsk->parent, user_ns), |
| 1673 | task_uid(tsk)); | 1672 | task_uid(tsk)); |
| 1674 | rcu_read_unlock(); | 1673 | rcu_read_unlock(); |
diff --git a/kernel/sys.c b/kernel/sys.c index 6df42624e454..9ff89cb9657a 100644 --- a/kernel/sys.c +++ b/kernel/sys.c | |||
| @@ -36,6 +36,8 @@ | |||
| 36 | #include <linux/personality.h> | 36 | #include <linux/personality.h> |
| 37 | #include <linux/ptrace.h> | 37 | #include <linux/ptrace.h> |
| 38 | #include <linux/fs_struct.h> | 38 | #include <linux/fs_struct.h> |
| 39 | #include <linux/file.h> | ||
| 40 | #include <linux/mount.h> | ||
| 39 | #include <linux/gfp.h> | 41 | #include <linux/gfp.h> |
| 40 | #include <linux/syscore_ops.h> | 42 | #include <linux/syscore_ops.h> |
| 41 | #include <linux/version.h> | 43 | #include <linux/version.h> |
| @@ -1378,8 +1380,8 @@ SYSCALL_DEFINE2(sethostname, char __user *, name, int, len) | |||
| 1378 | memcpy(u->nodename, tmp, len); | 1380 | memcpy(u->nodename, tmp, len); |
| 1379 | memset(u->nodename + len, 0, sizeof(u->nodename) - len); | 1381 | memset(u->nodename + len, 0, sizeof(u->nodename) - len); |
| 1380 | errno = 0; | 1382 | errno = 0; |
| 1383 | uts_proc_notify(UTS_PROC_HOSTNAME); | ||
| 1381 | } | 1384 | } |
| 1382 | uts_proc_notify(UTS_PROC_HOSTNAME); | ||
| 1383 | up_write(&uts_sem); | 1385 | up_write(&uts_sem); |
| 1384 | return errno; | 1386 | return errno; |
| 1385 | } | 1387 | } |
| @@ -1429,8 +1431,8 @@ SYSCALL_DEFINE2(setdomainname, char __user *, name, int, len) | |||
| 1429 | memcpy(u->domainname, tmp, len); | 1431 | memcpy(u->domainname, tmp, len); |
| 1430 | memset(u->domainname + len, 0, sizeof(u->domainname) - len); | 1432 | memset(u->domainname + len, 0, sizeof(u->domainname) - len); |
| 1431 | errno = 0; | 1433 | errno = 0; |
| 1434 | uts_proc_notify(UTS_PROC_DOMAINNAME); | ||
| 1432 | } | 1435 | } |
| 1433 | uts_proc_notify(UTS_PROC_DOMAINNAME); | ||
| 1434 | up_write(&uts_sem); | 1436 | up_write(&uts_sem); |
| 1435 | return errno; | 1437 | return errno; |
| 1436 | } | 1438 | } |
| @@ -1784,77 +1786,102 @@ SYSCALL_DEFINE1(umask, int, mask) | |||
| 1784 | } | 1786 | } |
| 1785 | 1787 | ||
| 1786 | #ifdef CONFIG_CHECKPOINT_RESTORE | 1788 | #ifdef CONFIG_CHECKPOINT_RESTORE |
| 1789 | static bool vma_flags_mismatch(struct vm_area_struct *vma, | ||
| 1790 | unsigned long required, | ||
| 1791 | unsigned long banned) | ||
| 1792 | { | ||
| 1793 | return (vma->vm_flags & required) != required || | ||
| 1794 | (vma->vm_flags & banned); | ||
| 1795 | } | ||
| 1796 | |||
| 1797 | static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd) | ||
| 1798 | { | ||
| 1799 | struct file *exe_file; | ||
| 1800 | struct dentry *dentry; | ||
| 1801 | int err; | ||
| 1802 | |||
| 1803 | /* | ||
| 1804 | * Setting new mm::exe_file is only allowed when no VM_EXECUTABLE vma's | ||
| 1805 | * remain. So perform a quick test first. | ||
| 1806 | */ | ||
| 1807 | if (mm->num_exe_file_vmas) | ||
| 1808 | return -EBUSY; | ||
| 1809 | |||
| 1810 | exe_file = fget(fd); | ||
| 1811 | if (!exe_file) | ||
| 1812 | return -EBADF; | ||
| 1813 | |||
| 1814 | dentry = exe_file->f_path.dentry; | ||
| 1815 | |||
| 1816 | /* | ||
| 1817 | * Because the original mm->exe_file points to executable file, make | ||
| 1818 | * sure that this one is executable as well, to avoid breaking an | ||
| 1819 | * overall picture. | ||
| 1820 | */ | ||
| 1821 | err = -EACCES; | ||
| 1822 | if (!S_ISREG(dentry->d_inode->i_mode) || | ||
| 1823 | exe_file->f_path.mnt->mnt_flags & MNT_NOEXEC) | ||
| 1824 | goto exit; | ||
| 1825 | |||
| 1826 | err = inode_permission(dentry->d_inode, MAY_EXEC); | ||
| 1827 | if (err) | ||
| 1828 | goto exit; | ||
| 1829 | |||
| 1830 | /* | ||
| 1831 | * The symlink can be changed only once, just to disallow arbitrary | ||
| 1832 | * transitions malicious software might bring in. This means one | ||
| 1833 | * could make a snapshot over all processes running and monitor | ||
| 1834 | * /proc/pid/exe changes to notice unusual activity if needed. | ||
| 1835 | */ | ||
| 1836 | down_write(&mm->mmap_sem); | ||
| 1837 | if (likely(!mm->exe_file)) | ||
| 1838 | set_mm_exe_file(mm, exe_file); | ||
| 1839 | else | ||
| 1840 | err = -EBUSY; | ||
| 1841 | up_write(&mm->mmap_sem); | ||
| 1842 | |||
| 1843 | exit: | ||
| 1844 | fput(exe_file); | ||
| 1845 | return err; | ||
| 1846 | } | ||
| 1847 | |||
| 1787 | static int prctl_set_mm(int opt, unsigned long addr, | 1848 | static int prctl_set_mm(int opt, unsigned long addr, |
| 1788 | unsigned long arg4, unsigned long arg5) | 1849 | unsigned long arg4, unsigned long arg5) |
| 1789 | { | 1850 | { |
| 1790 | unsigned long rlim = rlimit(RLIMIT_DATA); | 1851 | unsigned long rlim = rlimit(RLIMIT_DATA); |
| 1791 | unsigned long vm_req_flags; | ||
| 1792 | unsigned long vm_bad_flags; | ||
| 1793 | struct vm_area_struct *vma; | ||
| 1794 | int error = 0; | ||
| 1795 | struct mm_struct *mm = current->mm; | 1852 | struct mm_struct *mm = current->mm; |
| 1853 | struct vm_area_struct *vma; | ||
| 1854 | int error; | ||
| 1796 | 1855 | ||
| 1797 | if (arg4 | arg5) | 1856 | if (arg5 || (arg4 && opt != PR_SET_MM_AUXV)) |
| 1798 | return -EINVAL; | 1857 | return -EINVAL; |
| 1799 | 1858 | ||
| 1800 | if (!capable(CAP_SYS_RESOURCE)) | 1859 | if (!capable(CAP_SYS_RESOURCE)) |
| 1801 | return -EPERM; | 1860 | return -EPERM; |
| 1802 | 1861 | ||
| 1862 | if (opt == PR_SET_MM_EXE_FILE) | ||
| 1863 | return prctl_set_mm_exe_file(mm, (unsigned int)addr); | ||
| 1864 | |||
| 1803 | if (addr >= TASK_SIZE) | 1865 | if (addr >= TASK_SIZE) |
| 1804 | return -EINVAL; | 1866 | return -EINVAL; |
| 1805 | 1867 | ||
| 1868 | error = -EINVAL; | ||
| 1869 | |||
| 1806 | down_read(&mm->mmap_sem); | 1870 | down_read(&mm->mmap_sem); |
| 1807 | vma = find_vma(mm, addr); | 1871 | vma = find_vma(mm, addr); |
| 1808 | 1872 | ||
| 1809 | if (opt != PR_SET_MM_START_BRK && opt != PR_SET_MM_BRK) { | ||
| 1810 | /* It must be existing VMA */ | ||
| 1811 | if (!vma || vma->vm_start > addr) | ||
| 1812 | goto out; | ||
| 1813 | } | ||
| 1814 | |||
| 1815 | error = -EINVAL; | ||
| 1816 | switch (opt) { | 1873 | switch (opt) { |
| 1817 | case PR_SET_MM_START_CODE: | 1874 | case PR_SET_MM_START_CODE: |
| 1875 | mm->start_code = addr; | ||
| 1876 | break; | ||
| 1818 | case PR_SET_MM_END_CODE: | 1877 | case PR_SET_MM_END_CODE: |
| 1819 | vm_req_flags = VM_READ | VM_EXEC; | 1878 | mm->end_code = addr; |
| 1820 | vm_bad_flags = VM_WRITE | VM_MAYSHARE; | ||
| 1821 | |||
| 1822 | if ((vma->vm_flags & vm_req_flags) != vm_req_flags || | ||
| 1823 | (vma->vm_flags & vm_bad_flags)) | ||
| 1824 | goto out; | ||
| 1825 | |||
| 1826 | if (opt == PR_SET_MM_START_CODE) | ||
| 1827 | mm->start_code = addr; | ||
| 1828 | else | ||
| 1829 | mm->end_code = addr; | ||
| 1830 | break; | 1879 | break; |
| 1831 | |||
| 1832 | case PR_SET_MM_START_DATA: | 1880 | case PR_SET_MM_START_DATA: |
| 1833 | case PR_SET_MM_END_DATA: | 1881 | mm->start_data = addr; |
| 1834 | vm_req_flags = VM_READ | VM_WRITE; | ||
| 1835 | vm_bad_flags = VM_EXEC | VM_MAYSHARE; | ||
| 1836 | |||
| 1837 | if ((vma->vm_flags & vm_req_flags) != vm_req_flags || | ||
| 1838 | (vma->vm_flags & vm_bad_flags)) | ||
| 1839 | goto out; | ||
| 1840 | |||
| 1841 | if (opt == PR_SET_MM_START_DATA) | ||
| 1842 | mm->start_data = addr; | ||
| 1843 | else | ||
| 1844 | mm->end_data = addr; | ||
| 1845 | break; | 1882 | break; |
| 1846 | 1883 | case PR_SET_MM_END_DATA: | |
| 1847 | case PR_SET_MM_START_STACK: | 1884 | mm->end_data = addr; |
| 1848 | |||
| 1849 | #ifdef CONFIG_STACK_GROWSUP | ||
| 1850 | vm_req_flags = VM_READ | VM_WRITE | VM_GROWSUP; | ||
| 1851 | #else | ||
| 1852 | vm_req_flags = VM_READ | VM_WRITE | VM_GROWSDOWN; | ||
| 1853 | #endif | ||
| 1854 | if ((vma->vm_flags & vm_req_flags) != vm_req_flags) | ||
| 1855 | goto out; | ||
| 1856 | |||
| 1857 | mm->start_stack = addr; | ||
| 1858 | break; | 1885 | break; |
| 1859 | 1886 | ||
| 1860 | case PR_SET_MM_START_BRK: | 1887 | case PR_SET_MM_START_BRK: |
| @@ -1881,16 +1908,77 @@ static int prctl_set_mm(int opt, unsigned long addr, | |||
| 1881 | mm->brk = addr; | 1908 | mm->brk = addr; |
| 1882 | break; | 1909 | break; |
| 1883 | 1910 | ||
| 1911 | /* | ||
| 1912 | * If command line arguments and environment | ||
| 1913 | * are placed somewhere else on stack, we can | ||
| 1914 | * set them up here, ARG_START/END to setup | ||
| 1915 | * command line argumets and ENV_START/END | ||
| 1916 | * for environment. | ||
| 1917 | */ | ||
| 1918 | case PR_SET_MM_START_STACK: | ||
| 1919 | case PR_SET_MM_ARG_START: | ||
| 1920 | case PR_SET_MM_ARG_END: | ||
| 1921 | case PR_SET_MM_ENV_START: | ||
| 1922 | case PR_SET_MM_ENV_END: | ||
| 1923 | if (!vma) { | ||
| 1924 | error = -EFAULT; | ||
| 1925 | goto out; | ||
| 1926 | } | ||
| 1927 | #ifdef CONFIG_STACK_GROWSUP | ||
| 1928 | if (vma_flags_mismatch(vma, VM_READ | VM_WRITE | VM_GROWSUP, 0)) | ||
| 1929 | #else | ||
| 1930 | if (vma_flags_mismatch(vma, VM_READ | VM_WRITE | VM_GROWSDOWN, 0)) | ||
| 1931 | #endif | ||
| 1932 | goto out; | ||
| 1933 | if (opt == PR_SET_MM_START_STACK) | ||
| 1934 | mm->start_stack = addr; | ||
| 1935 | else if (opt == PR_SET_MM_ARG_START) | ||
| 1936 | mm->arg_start = addr; | ||
| 1937 | else if (opt == PR_SET_MM_ARG_END) | ||
| 1938 | mm->arg_end = addr; | ||
| 1939 | else if (opt == PR_SET_MM_ENV_START) | ||
| 1940 | mm->env_start = addr; | ||
| 1941 | else if (opt == PR_SET_MM_ENV_END) | ||
| 1942 | mm->env_end = addr; | ||
| 1943 | break; | ||
| 1944 | |||
| 1945 | /* | ||
| 1946 | * This doesn't move auxiliary vector itself | ||
| 1947 | * since it's pinned to mm_struct, but allow | ||
| 1948 | * to fill vector with new values. It's up | ||
| 1949 | * to a caller to provide sane values here | ||
| 1950 | * otherwise user space tools which use this | ||
| 1951 | * vector might be unhappy. | ||
| 1952 | */ | ||
| 1953 | case PR_SET_MM_AUXV: { | ||
| 1954 | unsigned long user_auxv[AT_VECTOR_SIZE]; | ||
| 1955 | |||
| 1956 | if (arg4 > sizeof(user_auxv)) | ||
| 1957 | goto out; | ||
| 1958 | up_read(&mm->mmap_sem); | ||
| 1959 | |||
| 1960 | if (copy_from_user(user_auxv, (const void __user *)addr, arg4)) | ||
| 1961 | return -EFAULT; | ||
| 1962 | |||
| 1963 | /* Make sure the last entry is always AT_NULL */ | ||
| 1964 | user_auxv[AT_VECTOR_SIZE - 2] = 0; | ||
| 1965 | user_auxv[AT_VECTOR_SIZE - 1] = 0; | ||
| 1966 | |||
| 1967 | BUILD_BUG_ON(sizeof(user_auxv) != sizeof(mm->saved_auxv)); | ||
| 1968 | |||
| 1969 | task_lock(current); | ||
| 1970 | memcpy(mm->saved_auxv, user_auxv, arg4); | ||
| 1971 | task_unlock(current); | ||
| 1972 | |||
| 1973 | return 0; | ||
| 1974 | } | ||
| 1884 | default: | 1975 | default: |
| 1885 | error = -EINVAL; | ||
| 1886 | goto out; | 1976 | goto out; |
| 1887 | } | 1977 | } |
| 1888 | 1978 | ||
| 1889 | error = 0; | 1979 | error = 0; |
| 1890 | |||
| 1891 | out: | 1980 | out: |
| 1892 | up_read(&mm->mmap_sem); | 1981 | up_read(&mm->mmap_sem); |
| 1893 | |||
| 1894 | return error; | 1982 | return error; |
| 1895 | } | 1983 | } |
| 1896 | #else /* CONFIG_CHECKPOINT_RESTORE */ | 1984 | #else /* CONFIG_CHECKPOINT_RESTORE */ |
| @@ -2114,7 +2202,6 @@ int orderly_poweroff(bool force) | |||
| 2114 | NULL | 2202 | NULL |
| 2115 | }; | 2203 | }; |
| 2116 | int ret = -ENOMEM; | 2204 | int ret = -ENOMEM; |
| 2117 | struct subprocess_info *info; | ||
| 2118 | 2205 | ||
| 2119 | if (argv == NULL) { | 2206 | if (argv == NULL) { |
| 2120 | printk(KERN_WARNING "%s failed to allocate memory for \"%s\"\n", | 2207 | printk(KERN_WARNING "%s failed to allocate memory for \"%s\"\n", |
| @@ -2122,18 +2209,16 @@ int orderly_poweroff(bool force) | |||
| 2122 | goto out; | 2209 | goto out; |
| 2123 | } | 2210 | } |
| 2124 | 2211 | ||
| 2125 | info = call_usermodehelper_setup(argv[0], argv, envp, GFP_ATOMIC); | 2212 | ret = call_usermodehelper_fns(argv[0], argv, envp, UMH_NO_WAIT, |
| 2126 | if (info == NULL) { | 2213 | NULL, argv_cleanup, NULL); |
| 2127 | argv_free(argv); | 2214 | out: |
| 2128 | goto out; | 2215 | if (likely(!ret)) |
| 2129 | } | 2216 | return 0; |
| 2130 | |||
| 2131 | call_usermodehelper_setfns(info, NULL, argv_cleanup, NULL); | ||
| 2132 | 2217 | ||
| 2133 | ret = call_usermodehelper_exec(info, UMH_NO_WAIT); | 2218 | if (ret == -ENOMEM) |
| 2219 | argv_free(argv); | ||
| 2134 | 2220 | ||
| 2135 | out: | 2221 | if (force) { |
| 2136 | if (ret && force) { | ||
| 2137 | printk(KERN_WARNING "Failed to start orderly shutdown: " | 2222 | printk(KERN_WARNING "Failed to start orderly shutdown: " |
| 2138 | "forcing the issue\n"); | 2223 | "forcing the issue\n"); |
| 2139 | 2224 | ||
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index 47bfa16430d7..dbff751e4086 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c | |||
| @@ -203,3 +203,6 @@ cond_syscall(sys_fanotify_mark); | |||
| 203 | cond_syscall(sys_name_to_handle_at); | 203 | cond_syscall(sys_name_to_handle_at); |
| 204 | cond_syscall(sys_open_by_handle_at); | 204 | cond_syscall(sys_open_by_handle_at); |
| 205 | cond_syscall(compat_sys_open_by_handle_at); | 205 | cond_syscall(compat_sys_open_by_handle_at); |
| 206 | |||
| 207 | /* compare kernel pointers */ | ||
| 208 | cond_syscall(sys_kcmp); | ||
