aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Makefile5
-rw-r--r--kernel/cgroup.c20
-rw-r--r--kernel/cpu.c44
-rw-r--r--kernel/cpu_pm.c16
-rw-r--r--kernel/cred.c9
-rw-r--r--kernel/exit.c13
-rw-r--r--kernel/fork.c28
-rw-r--r--kernel/irq/manage.c80
-rw-r--r--kernel/kallsyms.c32
-rw-r--r--kernel/kcmp.c196
-rw-r--r--kernel/kmod.c30
-rw-r--r--kernel/pid_namespace.c13
-rw-r--r--kernel/res_counter.c10
-rw-r--r--kernel/resource.c4
-rw-r--r--kernel/signal.c11
-rw-r--r--kernel/sys.c213
-rw-r--r--kernel/sys_ni.c3
-rw-r--r--kernel/task_work.c84
-rw-r--r--kernel/trace/ring_buffer.c5
19 files changed, 636 insertions, 180 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index 296132c19a57..c0cc67ad764c 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -5,7 +5,7 @@
5obj-y = fork.o exec_domain.o panic.o printk.o \ 5obj-y = fork.o exec_domain.o panic.o printk.o \
6 cpu.o exit.o itimer.o time.o softirq.o resource.o \ 6 cpu.o exit.o itimer.o time.o softirq.o resource.o \
7 sysctl.o sysctl_binary.o capability.o ptrace.o timer.o user.o \ 7 sysctl.o sysctl_binary.o capability.o ptrace.o timer.o user.o \
8 signal.o sys.o kmod.o workqueue.o pid.o \ 8 signal.o sys.o kmod.o workqueue.o pid.o task_work.o \
9 rcupdate.o extable.o params.o posix-timers.o \ 9 rcupdate.o extable.o params.o posix-timers.o \
10 kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ 10 kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \
11 hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \ 11 hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \
@@ -25,6 +25,9 @@ endif
25obj-y += sched/ 25obj-y += sched/
26obj-y += power/ 26obj-y += power/
27 27
28ifeq ($(CONFIG_CHECKPOINT_RESTORE),y)
29obj-$(CONFIG_X86) += kcmp.o
30endif
28obj-$(CONFIG_FREEZER) += freezer.o 31obj-$(CONFIG_FREEZER) += freezer.o
29obj-$(CONFIG_PROFILING) += profile.o 32obj-$(CONFIG_PROFILING) += profile.o
30obj-$(CONFIG_STACKTRACE) += stacktrace.o 33obj-$(CONFIG_STACKTRACE) += stacktrace.o
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index a0c6af34d500..0f3527d6184a 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -5132,7 +5132,7 @@ EXPORT_SYMBOL_GPL(css_depth);
5132 * @root: the css supporsed to be an ancestor of the child. 5132 * @root: the css supporsed to be an ancestor of the child.
5133 * 5133 *
5134 * Returns true if "root" is an ancestor of "child" in its hierarchy. Because 5134 * Returns true if "root" is an ancestor of "child" in its hierarchy. Because
5135 * this function reads css->id, this use rcu_dereference() and rcu_read_lock(). 5135 * this function reads css->id, the caller must hold rcu_read_lock().
5136 * But, considering usual usage, the csses should be valid objects after test. 5136 * But, considering usual usage, the csses should be valid objects after test.
5137 * Assuming that the caller will do some action to the child if this returns 5137 * Assuming that the caller will do some action to the child if this returns
5138 * returns true, the caller must take "child";s reference count. 5138 * returns true, the caller must take "child";s reference count.
@@ -5144,18 +5144,18 @@ bool css_is_ancestor(struct cgroup_subsys_state *child,
5144{ 5144{
5145 struct css_id *child_id; 5145 struct css_id *child_id;
5146 struct css_id *root_id; 5146 struct css_id *root_id;
5147 bool ret = true;
5148 5147
5149 rcu_read_lock();
5150 child_id = rcu_dereference(child->id); 5148 child_id = rcu_dereference(child->id);
5149 if (!child_id)
5150 return false;
5151 root_id = rcu_dereference(root->id); 5151 root_id = rcu_dereference(root->id);
5152 if (!child_id 5152 if (!root_id)
5153 || !root_id 5153 return false;
5154 || (child_id->depth < root_id->depth) 5154 if (child_id->depth < root_id->depth)
5155 || (child_id->stack[root_id->depth] != root_id->id)) 5155 return false;
5156 ret = false; 5156 if (child_id->stack[root_id->depth] != root_id->id)
5157 rcu_read_unlock(); 5157 return false;
5158 return ret; 5158 return true;
5159} 5159}
5160 5160
5161void free_css_id(struct cgroup_subsys *ss, struct cgroup_subsys_state *css) 5161void free_css_id(struct cgroup_subsys *ss, struct cgroup_subsys_state *css)
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 0e6353cf147a..a4eb5227a19e 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -10,7 +10,10 @@
10#include <linux/sched.h> 10#include <linux/sched.h>
11#include <linux/unistd.h> 11#include <linux/unistd.h>
12#include <linux/cpu.h> 12#include <linux/cpu.h>
13#include <linux/oom.h>
14#include <linux/rcupdate.h>
13#include <linux/export.h> 15#include <linux/export.h>
16#include <linux/bug.h>
14#include <linux/kthread.h> 17#include <linux/kthread.h>
15#include <linux/stop_machine.h> 18#include <linux/stop_machine.h>
16#include <linux/mutex.h> 19#include <linux/mutex.h>
@@ -173,6 +176,47 @@ void __ref unregister_cpu_notifier(struct notifier_block *nb)
173} 176}
174EXPORT_SYMBOL(unregister_cpu_notifier); 177EXPORT_SYMBOL(unregister_cpu_notifier);
175 178
179/**
180 * clear_tasks_mm_cpumask - Safely clear tasks' mm_cpumask for a CPU
181 * @cpu: a CPU id
182 *
183 * This function walks all processes, finds a valid mm struct for each one and
184 * then clears a corresponding bit in mm's cpumask. While this all sounds
185 * trivial, there are various non-obvious corner cases, which this function
186 * tries to solve in a safe manner.
187 *
188 * Also note that the function uses a somewhat relaxed locking scheme, so it may
189 * be called only for an already offlined CPU.
190 */
191void clear_tasks_mm_cpumask(int cpu)
192{
193 struct task_struct *p;
194
195 /*
196 * This function is called after the cpu is taken down and marked
197 * offline, so its not like new tasks will ever get this cpu set in
198 * their mm mask. -- Peter Zijlstra
199 * Thus, we may use rcu_read_lock() here, instead of grabbing
200 * full-fledged tasklist_lock.
201 */
202 WARN_ON(cpu_online(cpu));
203 rcu_read_lock();
204 for_each_process(p) {
205 struct task_struct *t;
206
207 /*
208 * Main thread might exit, but other threads may still have
209 * a valid mm. Find one.
210 */
211 t = find_lock_task_mm(p);
212 if (!t)
213 continue;
214 cpumask_clear_cpu(cpu, mm_cpumask(t->mm));
215 task_unlock(t);
216 }
217 rcu_read_unlock();
218}
219
176static inline void check_for_tasks(int cpu) 220static inline void check_for_tasks(int cpu)
177{ 221{
178 struct task_struct *p; 222 struct task_struct *p;
diff --git a/kernel/cpu_pm.c b/kernel/cpu_pm.c
index 249152e15308..9656a3c36503 100644
--- a/kernel/cpu_pm.c
+++ b/kernel/cpu_pm.c
@@ -81,7 +81,7 @@ int cpu_pm_unregister_notifier(struct notifier_block *nb)
81EXPORT_SYMBOL_GPL(cpu_pm_unregister_notifier); 81EXPORT_SYMBOL_GPL(cpu_pm_unregister_notifier);
82 82
83/** 83/**
84 * cpm_pm_enter - CPU low power entry notifier 84 * cpu_pm_enter - CPU low power entry notifier
85 * 85 *
86 * Notifies listeners that a single CPU is entering a low power state that may 86 * Notifies listeners that a single CPU is entering a low power state that may
87 * cause some blocks in the same power domain as the cpu to reset. 87 * cause some blocks in the same power domain as the cpu to reset.
@@ -89,7 +89,7 @@ EXPORT_SYMBOL_GPL(cpu_pm_unregister_notifier);
89 * Must be called on the affected CPU with interrupts disabled. Platform is 89 * Must be called on the affected CPU with interrupts disabled. Platform is
90 * responsible for ensuring that cpu_pm_enter is not called twice on the same 90 * responsible for ensuring that cpu_pm_enter is not called twice on the same
91 * CPU before cpu_pm_exit is called. Notified drivers can include VFP 91 * CPU before cpu_pm_exit is called. Notified drivers can include VFP
92 * co-processor, interrupt controller and it's PM extensions, local CPU 92 * co-processor, interrupt controller and its PM extensions, local CPU
93 * timers context save/restore which shouldn't be interrupted. Hence it 93 * timers context save/restore which shouldn't be interrupted. Hence it
94 * must be called with interrupts disabled. 94 * must be called with interrupts disabled.
95 * 95 *
@@ -115,13 +115,13 @@ int cpu_pm_enter(void)
115EXPORT_SYMBOL_GPL(cpu_pm_enter); 115EXPORT_SYMBOL_GPL(cpu_pm_enter);
116 116
117/** 117/**
118 * cpm_pm_exit - CPU low power exit notifier 118 * cpu_pm_exit - CPU low power exit notifier
119 * 119 *
120 * Notifies listeners that a single CPU is exiting a low power state that may 120 * Notifies listeners that a single CPU is exiting a low power state that may
121 * have caused some blocks in the same power domain as the cpu to reset. 121 * have caused some blocks in the same power domain as the cpu to reset.
122 * 122 *
123 * Notified drivers can include VFP co-processor, interrupt controller 123 * Notified drivers can include VFP co-processor, interrupt controller
124 * and it's PM extensions, local CPU timers context save/restore which 124 * and its PM extensions, local CPU timers context save/restore which
125 * shouldn't be interrupted. Hence it must be called with interrupts disabled. 125 * shouldn't be interrupted. Hence it must be called with interrupts disabled.
126 * 126 *
127 * Return conditions are same as __raw_notifier_call_chain. 127 * Return conditions are same as __raw_notifier_call_chain.
@@ -139,7 +139,7 @@ int cpu_pm_exit(void)
139EXPORT_SYMBOL_GPL(cpu_pm_exit); 139EXPORT_SYMBOL_GPL(cpu_pm_exit);
140 140
141/** 141/**
142 * cpm_cluster_pm_enter - CPU cluster low power entry notifier 142 * cpu_cluster_pm_enter - CPU cluster low power entry notifier
143 * 143 *
144 * Notifies listeners that all cpus in a power domain are entering a low power 144 * Notifies listeners that all cpus in a power domain are entering a low power
145 * state that may cause some blocks in the same power domain to reset. 145 * state that may cause some blocks in the same power domain to reset.
@@ -147,7 +147,7 @@ EXPORT_SYMBOL_GPL(cpu_pm_exit);
147 * Must be called after cpu_pm_enter has been called on all cpus in the power 147 * Must be called after cpu_pm_enter has been called on all cpus in the power
148 * domain, and before cpu_pm_exit has been called on any cpu in the power 148 * domain, and before cpu_pm_exit has been called on any cpu in the power
149 * domain. Notified drivers can include VFP co-processor, interrupt controller 149 * domain. Notified drivers can include VFP co-processor, interrupt controller
150 * and it's PM extensions, local CPU timers context save/restore which 150 * and its PM extensions, local CPU timers context save/restore which
151 * shouldn't be interrupted. Hence it must be called with interrupts disabled. 151 * shouldn't be interrupted. Hence it must be called with interrupts disabled.
152 * 152 *
153 * Must be called with interrupts disabled. 153 * Must be called with interrupts disabled.
@@ -174,7 +174,7 @@ int cpu_cluster_pm_enter(void)
174EXPORT_SYMBOL_GPL(cpu_cluster_pm_enter); 174EXPORT_SYMBOL_GPL(cpu_cluster_pm_enter);
175 175
176/** 176/**
177 * cpm_cluster_pm_exit - CPU cluster low power exit notifier 177 * cpu_cluster_pm_exit - CPU cluster low power exit notifier
178 * 178 *
179 * Notifies listeners that all cpus in a power domain are exiting form a 179 * Notifies listeners that all cpus in a power domain are exiting form a
180 * low power state that may have caused some blocks in the same power domain 180 * low power state that may have caused some blocks in the same power domain
@@ -183,7 +183,7 @@ EXPORT_SYMBOL_GPL(cpu_cluster_pm_enter);
183 * Must be called after cpu_pm_exit has been called on all cpus in the power 183 * Must be called after cpu_pm_exit has been called on all cpus in the power
184 * domain, and before cpu_pm_exit has been called on any cpu in the power 184 * domain, and before cpu_pm_exit has been called on any cpu in the power
185 * domain. Notified drivers can include VFP co-processor, interrupt controller 185 * domain. Notified drivers can include VFP co-processor, interrupt controller
186 * and it's PM extensions, local CPU timers context save/restore which 186 * and its PM extensions, local CPU timers context save/restore which
187 * shouldn't be interrupted. Hence it must be called with interrupts disabled. 187 * shouldn't be interrupted. Hence it must be called with interrupts disabled.
188 * 188 *
189 * Return conditions are same as __raw_notifier_call_chain. 189 * Return conditions are same as __raw_notifier_call_chain.
diff --git a/kernel/cred.c b/kernel/cred.c
index 430557ea488f..de728ac50d82 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -207,13 +207,6 @@ void exit_creds(struct task_struct *tsk)
207 validate_creds(cred); 207 validate_creds(cred);
208 alter_cred_subscribers(cred, -1); 208 alter_cred_subscribers(cred, -1);
209 put_cred(cred); 209 put_cred(cred);
210
211 cred = (struct cred *) tsk->replacement_session_keyring;
212 if (cred) {
213 tsk->replacement_session_keyring = NULL;
214 validate_creds(cred);
215 put_cred(cred);
216 }
217} 210}
218 211
219/** 212/**
@@ -396,8 +389,6 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags)
396 struct cred *new; 389 struct cred *new;
397 int ret; 390 int ret;
398 391
399 p->replacement_session_keyring = NULL;
400
401 if ( 392 if (
402#ifdef CONFIG_KEYS 393#ifdef CONFIG_KEYS
403 !p->cred->thread_keyring && 394 !p->cred->thread_keyring &&
diff --git a/kernel/exit.c b/kernel/exit.c
index 910a0716e17a..34867cc5b42a 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -884,9 +884,9 @@ static void check_stack_usage(void)
884 884
885 spin_lock(&low_water_lock); 885 spin_lock(&low_water_lock);
886 if (free < lowest_to_date) { 886 if (free < lowest_to_date) {
887 printk(KERN_WARNING "%s used greatest stack depth: %lu bytes " 887 printk(KERN_WARNING "%s (%d) used greatest stack depth: "
888 "left\n", 888 "%lu bytes left\n",
889 current->comm, free); 889 current->comm, task_pid_nr(current), free);
890 lowest_to_date = free; 890 lowest_to_date = free;
891 } 891 }
892 spin_unlock(&low_water_lock); 892 spin_unlock(&low_water_lock);
@@ -946,12 +946,13 @@ void do_exit(long code)
946 exit_signals(tsk); /* sets PF_EXITING */ 946 exit_signals(tsk); /* sets PF_EXITING */
947 /* 947 /*
948 * tsk->flags are checked in the futex code to protect against 948 * tsk->flags are checked in the futex code to protect against
949 * an exiting task cleaning up the robust pi futexes. 949 * an exiting task cleaning up the robust pi futexes, and in
950 * task_work_add() to avoid the race with exit_task_work().
950 */ 951 */
951 smp_mb(); 952 smp_mb();
952 raw_spin_unlock_wait(&tsk->pi_lock); 953 raw_spin_unlock_wait(&tsk->pi_lock);
953 954
954 exit_irq_thread(); 955 exit_task_work(tsk);
955 956
956 if (unlikely(in_atomic())) 957 if (unlikely(in_atomic()))
957 printk(KERN_INFO "note: %s[%d] exited with preempt_count %d\n", 958 printk(KERN_INFO "note: %s[%d] exited with preempt_count %d\n",
@@ -1214,7 +1215,7 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
1214 unsigned long state; 1215 unsigned long state;
1215 int retval, status, traced; 1216 int retval, status, traced;
1216 pid_t pid = task_pid_vnr(p); 1217 pid_t pid = task_pid_vnr(p);
1217 uid_t uid = from_kuid_munged(current_user_ns(), __task_cred(p)->uid); 1218 uid_t uid = from_kuid_munged(current_user_ns(), task_uid(p));
1218 struct siginfo __user *infop; 1219 struct siginfo __user *infop;
1219 1220
1220 if (!likely(wo->wo_flags & WEXITED)) 1221 if (!likely(wo->wo_flags & WEXITED))
diff --git a/kernel/fork.c b/kernel/fork.c
index 47b4e4f379f9..ab5211b9e622 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -386,7 +386,8 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
386 } 386 }
387 charge = 0; 387 charge = 0;
388 if (mpnt->vm_flags & VM_ACCOUNT) { 388 if (mpnt->vm_flags & VM_ACCOUNT) {
389 unsigned int len = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT; 389 unsigned long len;
390 len = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT;
390 if (security_vm_enough_memory_mm(oldmm, len)) /* sic */ 391 if (security_vm_enough_memory_mm(oldmm, len)) /* sic */
391 goto fail_nomem; 392 goto fail_nomem;
392 charge = len; 393 charge = len;
@@ -614,7 +615,6 @@ void mmput(struct mm_struct *mm)
614 list_del(&mm->mmlist); 615 list_del(&mm->mmlist);
615 spin_unlock(&mmlist_lock); 616 spin_unlock(&mmlist_lock);
616 } 617 }
617 put_swap_token(mm);
618 if (mm->binfmt) 618 if (mm->binfmt)
619 module_put(mm->binfmt->module); 619 module_put(mm->binfmt->module);
620 mmdrop(mm); 620 mmdrop(mm);
@@ -787,9 +787,6 @@ void mm_release(struct task_struct *tsk, struct mm_struct *mm)
787 /* Get rid of any cached register state */ 787 /* Get rid of any cached register state */
788 deactivate_mm(tsk, mm); 788 deactivate_mm(tsk, mm);
789 789
790 if (tsk->vfork_done)
791 complete_vfork_done(tsk);
792
793 /* 790 /*
794 * If we're exiting normally, clear a user-space tid field if 791 * If we're exiting normally, clear a user-space tid field if
795 * requested. We leave this alone when dying by signal, to leave 792 * requested. We leave this alone when dying by signal, to leave
@@ -810,6 +807,13 @@ void mm_release(struct task_struct *tsk, struct mm_struct *mm)
810 } 807 }
811 tsk->clear_child_tid = NULL; 808 tsk->clear_child_tid = NULL;
812 } 809 }
810
811 /*
812 * All done, finally we can wake up parent and return this mm to him.
813 * Also kthread_stop() uses this completion for synchronization.
814 */
815 if (tsk->vfork_done)
816 complete_vfork_done(tsk);
813} 817}
814 818
815/* 819/*
@@ -831,10 +835,6 @@ struct mm_struct *dup_mm(struct task_struct *tsk)
831 memcpy(mm, oldmm, sizeof(*mm)); 835 memcpy(mm, oldmm, sizeof(*mm));
832 mm_init_cpumask(mm); 836 mm_init_cpumask(mm);
833 837
834 /* Initializing for Swap token stuff */
835 mm->token_priority = 0;
836 mm->last_interval = 0;
837
838#ifdef CONFIG_TRANSPARENT_HUGEPAGE 838#ifdef CONFIG_TRANSPARENT_HUGEPAGE
839 mm->pmd_huge_pte = NULL; 839 mm->pmd_huge_pte = NULL;
840#endif 840#endif
@@ -913,10 +913,6 @@ static int copy_mm(unsigned long clone_flags, struct task_struct *tsk)
913 goto fail_nomem; 913 goto fail_nomem;
914 914
915good_mm: 915good_mm:
916 /* Initializing for Swap token stuff */
917 mm->token_priority = 0;
918 mm->last_interval = 0;
919
920 tsk->mm = mm; 916 tsk->mm = mm;
921 tsk->active_mm = mm; 917 tsk->active_mm = mm;
922 return 0; 918 return 0;
@@ -984,9 +980,8 @@ static int copy_io(unsigned long clone_flags, struct task_struct *tsk)
984 * Share io context with parent, if CLONE_IO is set 980 * Share io context with parent, if CLONE_IO is set
985 */ 981 */
986 if (clone_flags & CLONE_IO) { 982 if (clone_flags & CLONE_IO) {
987 tsk->io_context = ioc_task_link(ioc); 983 ioc_task_link(ioc);
988 if (unlikely(!tsk->io_context)) 984 tsk->io_context = ioc;
989 return -ENOMEM;
990 } else if (ioprio_valid(ioc->ioprio)) { 985 } else if (ioprio_valid(ioc->ioprio)) {
991 new_ioc = get_task_io_context(tsk, GFP_KERNEL, NUMA_NO_NODE); 986 new_ioc = get_task_io_context(tsk, GFP_KERNEL, NUMA_NO_NODE);
992 if (unlikely(!new_ioc)) 987 if (unlikely(!new_ioc))
@@ -1420,6 +1415,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1420 */ 1415 */
1421 p->group_leader = p; 1416 p->group_leader = p;
1422 INIT_LIST_HEAD(&p->thread_group); 1417 INIT_LIST_HEAD(&p->thread_group);
1418 INIT_HLIST_HEAD(&p->task_works);
1423 1419
1424 /* Now that the task is set up, run cgroup callbacks if 1420 /* Now that the task is set up, run cgroup callbacks if
1425 * necessary. We need to run them before the task is visible 1421 * necessary. We need to run them before the task is visible
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index bb32326afe87..ea0c6c2ae6f7 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -7,6 +7,8 @@
7 * This file contains driver APIs to the irq subsystem. 7 * This file contains driver APIs to the irq subsystem.
8 */ 8 */
9 9
10#define pr_fmt(fmt) "genirq: " fmt
11
10#include <linux/irq.h> 12#include <linux/irq.h>
11#include <linux/kthread.h> 13#include <linux/kthread.h>
12#include <linux/module.h> 14#include <linux/module.h>
@@ -14,6 +16,7 @@
14#include <linux/interrupt.h> 16#include <linux/interrupt.h>
15#include <linux/slab.h> 17#include <linux/slab.h>
16#include <linux/sched.h> 18#include <linux/sched.h>
19#include <linux/task_work.h>
17 20
18#include "internals.h" 21#include "internals.h"
19 22
@@ -565,7 +568,7 @@ int __irq_set_trigger(struct irq_desc *desc, unsigned int irq,
565 * IRQF_TRIGGER_* but the PIC does not support multiple 568 * IRQF_TRIGGER_* but the PIC does not support multiple
566 * flow-types? 569 * flow-types?
567 */ 570 */
568 pr_debug("genirq: No set_type function for IRQ %d (%s)\n", irq, 571 pr_debug("No set_type function for IRQ %d (%s)\n", irq,
569 chip ? (chip->name ? : "unknown") : "unknown"); 572 chip ? (chip->name ? : "unknown") : "unknown");
570 return 0; 573 return 0;
571 } 574 }
@@ -600,7 +603,7 @@ int __irq_set_trigger(struct irq_desc *desc, unsigned int irq,
600 ret = 0; 603 ret = 0;
601 break; 604 break;
602 default: 605 default:
603 pr_err("genirq: Setting trigger mode %lu for irq %u failed (%pF)\n", 606 pr_err("Setting trigger mode %lu for irq %u failed (%pF)\n",
604 flags, irq, chip->irq_set_type); 607 flags, irq, chip->irq_set_type);
605 } 608 }
606 if (unmask) 609 if (unmask)
@@ -773,11 +776,39 @@ static void wake_threads_waitq(struct irq_desc *desc)
773 wake_up(&desc->wait_for_threads); 776 wake_up(&desc->wait_for_threads);
774} 777}
775 778
779static void irq_thread_dtor(struct task_work *unused)
780{
781 struct task_struct *tsk = current;
782 struct irq_desc *desc;
783 struct irqaction *action;
784
785 if (WARN_ON_ONCE(!(current->flags & PF_EXITING)))
786 return;
787
788 action = kthread_data(tsk);
789
790 pr_err("exiting task \"%s\" (%d) is an active IRQ thread (irq %d)\n",
791 tsk->comm ? tsk->comm : "", tsk->pid, action->irq);
792
793
794 desc = irq_to_desc(action->irq);
795 /*
796 * If IRQTF_RUNTHREAD is set, we need to decrement
797 * desc->threads_active and wake possible waiters.
798 */
799 if (test_and_clear_bit(IRQTF_RUNTHREAD, &action->thread_flags))
800 wake_threads_waitq(desc);
801
802 /* Prevent a stale desc->threads_oneshot */
803 irq_finalize_oneshot(desc, action);
804}
805
776/* 806/*
777 * Interrupt handler thread 807 * Interrupt handler thread
778 */ 808 */
779static int irq_thread(void *data) 809static int irq_thread(void *data)
780{ 810{
811 struct task_work on_exit_work;
781 static const struct sched_param param = { 812 static const struct sched_param param = {
782 .sched_priority = MAX_USER_RT_PRIO/2, 813 .sched_priority = MAX_USER_RT_PRIO/2,
783 }; 814 };
@@ -793,7 +824,9 @@ static int irq_thread(void *data)
793 handler_fn = irq_thread_fn; 824 handler_fn = irq_thread_fn;
794 825
795 sched_setscheduler(current, SCHED_FIFO, &param); 826 sched_setscheduler(current, SCHED_FIFO, &param);
796 current->irq_thread = 1; 827
828 init_task_work(&on_exit_work, irq_thread_dtor, NULL);
829 task_work_add(current, &on_exit_work, false);
797 830
798 while (!irq_wait_for_interrupt(action)) { 831 while (!irq_wait_for_interrupt(action)) {
799 irqreturn_t action_ret; 832 irqreturn_t action_ret;
@@ -815,44 +848,11 @@ static int irq_thread(void *data)
815 * cannot touch the oneshot mask at this point anymore as 848 * cannot touch the oneshot mask at this point anymore as
816 * __setup_irq() might have given out currents thread_mask 849 * __setup_irq() might have given out currents thread_mask
817 * again. 850 * again.
818 *
819 * Clear irq_thread. Otherwise exit_irq_thread() would make
820 * fuzz about an active irq thread going into nirvana.
821 */ 851 */
822 current->irq_thread = 0; 852 task_work_cancel(current, irq_thread_dtor);
823 return 0; 853 return 0;
824} 854}
825 855
826/*
827 * Called from do_exit()
828 */
829void exit_irq_thread(void)
830{
831 struct task_struct *tsk = current;
832 struct irq_desc *desc;
833 struct irqaction *action;
834
835 if (!tsk->irq_thread)
836 return;
837
838 action = kthread_data(tsk);
839
840 pr_err("genirq: exiting task \"%s\" (%d) is an active IRQ thread (irq %d)\n",
841 tsk->comm ? tsk->comm : "", tsk->pid, action->irq);
842
843 desc = irq_to_desc(action->irq);
844
845 /*
846 * If IRQTF_RUNTHREAD is set, we need to decrement
847 * desc->threads_active and wake possible waiters.
848 */
849 if (test_and_clear_bit(IRQTF_RUNTHREAD, &action->thread_flags))
850 wake_threads_waitq(desc);
851
852 /* Prevent a stale desc->threads_oneshot */
853 irq_finalize_oneshot(desc, action);
854}
855
856static void irq_setup_forced_threading(struct irqaction *new) 856static void irq_setup_forced_threading(struct irqaction *new)
857{ 857{
858 if (!force_irqthreads) 858 if (!force_irqthreads)
@@ -1044,7 +1044,7 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
1044 * has. The type flags are unreliable as the 1044 * has. The type flags are unreliable as the
1045 * underlying chip implementation can override them. 1045 * underlying chip implementation can override them.
1046 */ 1046 */
1047 pr_err("genirq: Threaded irq requested with handler=NULL and !ONESHOT for irq %d\n", 1047 pr_err("Threaded irq requested with handler=NULL and !ONESHOT for irq %d\n",
1048 irq); 1048 irq);
1049 ret = -EINVAL; 1049 ret = -EINVAL;
1050 goto out_mask; 1050 goto out_mask;
@@ -1095,7 +1095,7 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
1095 1095
1096 if (nmsk != omsk) 1096 if (nmsk != omsk)
1097 /* hope the handler works with current trigger mode */ 1097 /* hope the handler works with current trigger mode */
1098 pr_warning("genirq: irq %d uses trigger mode %u; requested %u\n", 1098 pr_warning("irq %d uses trigger mode %u; requested %u\n",
1099 irq, nmsk, omsk); 1099 irq, nmsk, omsk);
1100 } 1100 }
1101 1101
@@ -1133,7 +1133,7 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
1133 1133
1134mismatch: 1134mismatch:
1135 if (!(new->flags & IRQF_PROBE_SHARED)) { 1135 if (!(new->flags & IRQF_PROBE_SHARED)) {
1136 pr_err("genirq: Flags mismatch irq %d. %08x (%s) vs. %08x (%s)\n", 1136 pr_err("Flags mismatch irq %d. %08x (%s) vs. %08x (%s)\n",
1137 irq, new->flags, new->name, old->flags, old->name); 1137 irq, new->flags, new->name, old->flags, old->name);
1138#ifdef CONFIG_DEBUG_SHIRQ 1138#ifdef CONFIG_DEBUG_SHIRQ
1139 dump_stack(); 1139 dump_stack();
diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
index 079f1d39a8b8..2169feeba529 100644
--- a/kernel/kallsyms.c
+++ b/kernel/kallsyms.c
@@ -343,7 +343,7 @@ int lookup_symbol_attrs(unsigned long addr, unsigned long *size,
343 343
344/* Look up a kernel symbol and return it in a text buffer. */ 344/* Look up a kernel symbol and return it in a text buffer. */
345static int __sprint_symbol(char *buffer, unsigned long address, 345static int __sprint_symbol(char *buffer, unsigned long address,
346 int symbol_offset) 346 int symbol_offset, int add_offset)
347{ 347{
348 char *modname; 348 char *modname;
349 const char *name; 349 const char *name;
@@ -358,13 +358,13 @@ static int __sprint_symbol(char *buffer, unsigned long address,
358 if (name != buffer) 358 if (name != buffer)
359 strcpy(buffer, name); 359 strcpy(buffer, name);
360 len = strlen(buffer); 360 len = strlen(buffer);
361 buffer += len;
362 offset -= symbol_offset; 361 offset -= symbol_offset;
363 362
363 if (add_offset)
364 len += sprintf(buffer + len, "+%#lx/%#lx", offset, size);
365
364 if (modname) 366 if (modname)
365 len += sprintf(buffer, "+%#lx/%#lx [%s]", offset, size, modname); 367 len += sprintf(buffer + len, " [%s]", modname);
366 else
367 len += sprintf(buffer, "+%#lx/%#lx", offset, size);
368 368
369 return len; 369 return len;
370} 370}
@@ -382,12 +382,28 @@ static int __sprint_symbol(char *buffer, unsigned long address,
382 */ 382 */
383int sprint_symbol(char *buffer, unsigned long address) 383int sprint_symbol(char *buffer, unsigned long address)
384{ 384{
385 return __sprint_symbol(buffer, address, 0); 385 return __sprint_symbol(buffer, address, 0, 1);
386} 386}
387
388EXPORT_SYMBOL_GPL(sprint_symbol); 387EXPORT_SYMBOL_GPL(sprint_symbol);
389 388
390/** 389/**
390 * sprint_symbol_no_offset - Look up a kernel symbol and return it in a text buffer
391 * @buffer: buffer to be stored
392 * @address: address to lookup
393 *
394 * This function looks up a kernel symbol with @address and stores its name
395 * and module name to @buffer if possible. If no symbol was found, just saves
396 * its @address as is.
397 *
398 * This function returns the number of bytes stored in @buffer.
399 */
400int sprint_symbol_no_offset(char *buffer, unsigned long address)
401{
402 return __sprint_symbol(buffer, address, 0, 0);
403}
404EXPORT_SYMBOL_GPL(sprint_symbol_no_offset);
405
406/**
391 * sprint_backtrace - Look up a backtrace symbol and return it in a text buffer 407 * sprint_backtrace - Look up a backtrace symbol and return it in a text buffer
392 * @buffer: buffer to be stored 408 * @buffer: buffer to be stored
393 * @address: address to lookup 409 * @address: address to lookup
@@ -403,7 +419,7 @@ EXPORT_SYMBOL_GPL(sprint_symbol);
403 */ 419 */
404int sprint_backtrace(char *buffer, unsigned long address) 420int sprint_backtrace(char *buffer, unsigned long address)
405{ 421{
406 return __sprint_symbol(buffer, address, -1); 422 return __sprint_symbol(buffer, address, -1, 1);
407} 423}
408 424
409/* Look up a kernel symbol and print it to the kernel messages. */ 425/* Look up a kernel symbol and print it to the kernel messages. */
diff --git a/kernel/kcmp.c b/kernel/kcmp.c
new file mode 100644
index 000000000000..30b7b225306c
--- /dev/null
+++ b/kernel/kcmp.c
@@ -0,0 +1,196 @@
1#include <linux/kernel.h>
2#include <linux/syscalls.h>
3#include <linux/fdtable.h>
4#include <linux/string.h>
5#include <linux/random.h>
6#include <linux/module.h>
7#include <linux/init.h>
8#include <linux/errno.h>
9#include <linux/cache.h>
10#include <linux/bug.h>
11#include <linux/err.h>
12#include <linux/kcmp.h>
13
14#include <asm/unistd.h>
15
16/*
17 * We don't expose the real in-memory order of objects for security reasons.
18 * But still the comparison results should be suitable for sorting. So we
19 * obfuscate kernel pointers values and compare the production instead.
20 *
21 * The obfuscation is done in two steps. First we xor the kernel pointer with
22 * a random value, which puts pointer into a new position in a reordered space.
23 * Secondly we multiply the xor production with a large odd random number to
24 * permute its bits even more (the odd multiplier guarantees that the product
25 * is unique ever after the high bits are truncated, since any odd number is
26 * relative prime to 2^n).
27 *
28 * Note also that the obfuscation itself is invisible to userspace and if needed
29 * it can be changed to an alternate scheme.
30 */
31static unsigned long cookies[KCMP_TYPES][2] __read_mostly;
32
33static long kptr_obfuscate(long v, int type)
34{
35 return (v ^ cookies[type][0]) * cookies[type][1];
36}
37
38/*
39 * 0 - equal, i.e. v1 = v2
40 * 1 - less than, i.e. v1 < v2
41 * 2 - greater than, i.e. v1 > v2
42 * 3 - not equal but ordering unavailable (reserved for future)
43 */
44static int kcmp_ptr(void *v1, void *v2, enum kcmp_type type)
45{
46 long ret;
47
48 ret = kptr_obfuscate((long)v1, type) - kptr_obfuscate((long)v2, type);
49
50 return (ret < 0) | ((ret > 0) << 1);
51}
52
53/* The caller must have pinned the task */
54static struct file *
55get_file_raw_ptr(struct task_struct *task, unsigned int idx)
56{
57 struct file *file = NULL;
58
59 task_lock(task);
60 rcu_read_lock();
61
62 if (task->files)
63 file = fcheck_files(task->files, idx);
64
65 rcu_read_unlock();
66 task_unlock(task);
67
68 return file;
69}
70
71static void kcmp_unlock(struct mutex *m1, struct mutex *m2)
72{
73 if (likely(m2 != m1))
74 mutex_unlock(m2);
75 mutex_unlock(m1);
76}
77
78static int kcmp_lock(struct mutex *m1, struct mutex *m2)
79{
80 int err;
81
82 if (m2 > m1)
83 swap(m1, m2);
84
85 err = mutex_lock_killable(m1);
86 if (!err && likely(m1 != m2)) {
87 err = mutex_lock_killable_nested(m2, SINGLE_DEPTH_NESTING);
88 if (err)
89 mutex_unlock(m1);
90 }
91
92 return err;
93}
94
95SYSCALL_DEFINE5(kcmp, pid_t, pid1, pid_t, pid2, int, type,
96 unsigned long, idx1, unsigned long, idx2)
97{
98 struct task_struct *task1, *task2;
99 int ret;
100
101 rcu_read_lock();
102
103 /*
104 * Tasks are looked up in caller's PID namespace only.
105 */
106 task1 = find_task_by_vpid(pid1);
107 task2 = find_task_by_vpid(pid2);
108 if (!task1 || !task2)
109 goto err_no_task;
110
111 get_task_struct(task1);
112 get_task_struct(task2);
113
114 rcu_read_unlock();
115
116 /*
117 * One should have enough rights to inspect task details.
118 */
119 ret = kcmp_lock(&task1->signal->cred_guard_mutex,
120 &task2->signal->cred_guard_mutex);
121 if (ret)
122 goto err;
123 if (!ptrace_may_access(task1, PTRACE_MODE_READ) ||
124 !ptrace_may_access(task2, PTRACE_MODE_READ)) {
125 ret = -EPERM;
126 goto err_unlock;
127 }
128
129 switch (type) {
130 case KCMP_FILE: {
131 struct file *filp1, *filp2;
132
133 filp1 = get_file_raw_ptr(task1, idx1);
134 filp2 = get_file_raw_ptr(task2, idx2);
135
136 if (filp1 && filp2)
137 ret = kcmp_ptr(filp1, filp2, KCMP_FILE);
138 else
139 ret = -EBADF;
140 break;
141 }
142 case KCMP_VM:
143 ret = kcmp_ptr(task1->mm, task2->mm, KCMP_VM);
144 break;
145 case KCMP_FILES:
146 ret = kcmp_ptr(task1->files, task2->files, KCMP_FILES);
147 break;
148 case KCMP_FS:
149 ret = kcmp_ptr(task1->fs, task2->fs, KCMP_FS);
150 break;
151 case KCMP_SIGHAND:
152 ret = kcmp_ptr(task1->sighand, task2->sighand, KCMP_SIGHAND);
153 break;
154 case KCMP_IO:
155 ret = kcmp_ptr(task1->io_context, task2->io_context, KCMP_IO);
156 break;
157 case KCMP_SYSVSEM:
158#ifdef CONFIG_SYSVIPC
159 ret = kcmp_ptr(task1->sysvsem.undo_list,
160 task2->sysvsem.undo_list,
161 KCMP_SYSVSEM);
162#else
163 ret = -EOPNOTSUPP;
164#endif
165 break;
166 default:
167 ret = -EINVAL;
168 break;
169 }
170
171err_unlock:
172 kcmp_unlock(&task1->signal->cred_guard_mutex,
173 &task2->signal->cred_guard_mutex);
174err:
175 put_task_struct(task1);
176 put_task_struct(task2);
177
178 return ret;
179
180err_no_task:
181 rcu_read_unlock();
182 return -ESRCH;
183}
184
185static __init int kcmp_cookies_init(void)
186{
187 int i;
188
189 get_random_bytes(cookies, sizeof(cookies));
190
191 for (i = 0; i < KCMP_TYPES; i++)
192 cookies[i][1] |= (~(~0UL >> 1) | 1);
193
194 return 0;
195}
196arch_initcall(kcmp_cookies_init);
diff --git a/kernel/kmod.c b/kernel/kmod.c
index 05698a7415fe..ff2c7cb86d77 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -221,13 +221,12 @@ fail:
221 return 0; 221 return 0;
222} 222}
223 223
224void call_usermodehelper_freeinfo(struct subprocess_info *info) 224static void call_usermodehelper_freeinfo(struct subprocess_info *info)
225{ 225{
226 if (info->cleanup) 226 if (info->cleanup)
227 (*info->cleanup)(info); 227 (*info->cleanup)(info);
228 kfree(info); 228 kfree(info);
229} 229}
230EXPORT_SYMBOL(call_usermodehelper_freeinfo);
231 230
232static void umh_complete(struct subprocess_info *sub_info) 231static void umh_complete(struct subprocess_info *sub_info)
233{ 232{
@@ -410,7 +409,7 @@ EXPORT_SYMBOL_GPL(usermodehelper_read_unlock);
410 409
411/** 410/**
412 * __usermodehelper_set_disable_depth - Modify usermodehelper_disabled. 411 * __usermodehelper_set_disable_depth - Modify usermodehelper_disabled.
413 * depth: New value to assign to usermodehelper_disabled. 412 * @depth: New value to assign to usermodehelper_disabled.
414 * 413 *
415 * Change the value of usermodehelper_disabled (under umhelper_sem locked for 414 * Change the value of usermodehelper_disabled (under umhelper_sem locked for
416 * writing) and wakeup tasks waiting for it to change. 415 * writing) and wakeup tasks waiting for it to change.
@@ -479,6 +478,7 @@ static void helper_unlock(void)
479 * structure. This should be passed to call_usermodehelper_exec to 478 * structure. This should be passed to call_usermodehelper_exec to
480 * exec the process and free the structure. 479 * exec the process and free the structure.
481 */ 480 */
481static
482struct subprocess_info *call_usermodehelper_setup(char *path, char **argv, 482struct subprocess_info *call_usermodehelper_setup(char *path, char **argv,
483 char **envp, gfp_t gfp_mask) 483 char **envp, gfp_t gfp_mask)
484{ 484{
@@ -494,7 +494,6 @@ struct subprocess_info *call_usermodehelper_setup(char *path, char **argv,
494 out: 494 out:
495 return sub_info; 495 return sub_info;
496} 496}
497EXPORT_SYMBOL(call_usermodehelper_setup);
498 497
499/** 498/**
500 * call_usermodehelper_setfns - set a cleanup/init function 499 * call_usermodehelper_setfns - set a cleanup/init function
@@ -512,6 +511,7 @@ EXPORT_SYMBOL(call_usermodehelper_setup);
512 * Function must be runnable in either a process context or the 511 * Function must be runnable in either a process context or the
513 * context in which call_usermodehelper_exec is called. 512 * context in which call_usermodehelper_exec is called.
514 */ 513 */
514static
515void call_usermodehelper_setfns(struct subprocess_info *info, 515void call_usermodehelper_setfns(struct subprocess_info *info,
516 int (*init)(struct subprocess_info *info, struct cred *new), 516 int (*init)(struct subprocess_info *info, struct cred *new),
517 void (*cleanup)(struct subprocess_info *info), 517 void (*cleanup)(struct subprocess_info *info),
@@ -521,7 +521,6 @@ void call_usermodehelper_setfns(struct subprocess_info *info,
521 info->init = init; 521 info->init = init;
522 info->data = data; 522 info->data = data;
523} 523}
524EXPORT_SYMBOL(call_usermodehelper_setfns);
525 524
526/** 525/**
527 * call_usermodehelper_exec - start a usermode application 526 * call_usermodehelper_exec - start a usermode application
@@ -535,6 +534,7 @@ EXPORT_SYMBOL(call_usermodehelper_setfns);
535 * asynchronously if wait is not set, and runs as a child of keventd. 534 * asynchronously if wait is not set, and runs as a child of keventd.
536 * (ie. it runs with full root capabilities). 535 * (ie. it runs with full root capabilities).
537 */ 536 */
537static
538int call_usermodehelper_exec(struct subprocess_info *sub_info, int wait) 538int call_usermodehelper_exec(struct subprocess_info *sub_info, int wait)
539{ 539{
540 DECLARE_COMPLETION_ONSTACK(done); 540 DECLARE_COMPLETION_ONSTACK(done);
@@ -576,7 +576,25 @@ unlock:
576 helper_unlock(); 576 helper_unlock();
577 return retval; 577 return retval;
578} 578}
579EXPORT_SYMBOL(call_usermodehelper_exec); 579
580int call_usermodehelper_fns(
581 char *path, char **argv, char **envp, int wait,
582 int (*init)(struct subprocess_info *info, struct cred *new),
583 void (*cleanup)(struct subprocess_info *), void *data)
584{
585 struct subprocess_info *info;
586 gfp_t gfp_mask = (wait == UMH_NO_WAIT) ? GFP_ATOMIC : GFP_KERNEL;
587
588 info = call_usermodehelper_setup(path, argv, envp, gfp_mask);
589
590 if (info == NULL)
591 return -ENOMEM;
592
593 call_usermodehelper_setfns(info, init, cleanup, data);
594
595 return call_usermodehelper_exec(info, wait);
596}
597EXPORT_SYMBOL(call_usermodehelper_fns);
580 598
581static int proc_cap_handler(struct ctl_table *table, int write, 599static int proc_cap_handler(struct ctl_table *table, int write,
582 void __user *buffer, size_t *lenp, loff_t *ppos) 600 void __user *buffer, size_t *lenp, loff_t *ppos)
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index 57bc1fd35b3c..16b20e38c4a1 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -149,7 +149,12 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns)
149{ 149{
150 int nr; 150 int nr;
151 int rc; 151 int rc;
152 struct task_struct *task; 152 struct task_struct *task, *me = current;
153
154 /* Ignore SIGCHLD causing any terminated children to autoreap */
155 spin_lock_irq(&me->sighand->siglock);
156 me->sighand->action[SIGCHLD - 1].sa.sa_handler = SIG_IGN;
157 spin_unlock_irq(&me->sighand->siglock);
153 158
154 /* 159 /*
155 * The last thread in the cgroup-init thread group is terminating. 160 * The last thread in the cgroup-init thread group is terminating.
@@ -191,6 +196,7 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns)
191 return; 196 return;
192} 197}
193 198
199#ifdef CONFIG_CHECKPOINT_RESTORE
194static int pid_ns_ctl_handler(struct ctl_table *table, int write, 200static int pid_ns_ctl_handler(struct ctl_table *table, int write,
195 void __user *buffer, size_t *lenp, loff_t *ppos) 201 void __user *buffer, size_t *lenp, loff_t *ppos)
196{ 202{
@@ -218,8 +224,8 @@ static struct ctl_table pid_ns_ctl_table[] = {
218 }, 224 },
219 { } 225 { }
220}; 226};
221
222static struct ctl_path kern_path[] = { { .procname = "kernel", }, { } }; 227static struct ctl_path kern_path[] = { { .procname = "kernel", }, { } };
228#endif /* CONFIG_CHECKPOINT_RESTORE */
223 229
224int reboot_pid_ns(struct pid_namespace *pid_ns, int cmd) 230int reboot_pid_ns(struct pid_namespace *pid_ns, int cmd)
225{ 231{
@@ -253,7 +259,10 @@ int reboot_pid_ns(struct pid_namespace *pid_ns, int cmd)
253static __init int pid_namespaces_init(void) 259static __init int pid_namespaces_init(void)
254{ 260{
255 pid_ns_cachep = KMEM_CACHE(pid_namespace, SLAB_PANIC); 261 pid_ns_cachep = KMEM_CACHE(pid_namespace, SLAB_PANIC);
262
263#ifdef CONFIG_CHECKPOINT_RESTORE
256 register_sysctl_paths(kern_path, pid_ns_ctl_table); 264 register_sysctl_paths(kern_path, pid_ns_ctl_table);
265#endif
257 return 0; 266 return 0;
258} 267}
259 268
diff --git a/kernel/res_counter.c b/kernel/res_counter.c
index bebe2b170d49..ad581aa2369a 100644
--- a/kernel/res_counter.c
+++ b/kernel/res_counter.c
@@ -94,13 +94,15 @@ void res_counter_uncharge_locked(struct res_counter *counter, unsigned long val)
94 counter->usage -= val; 94 counter->usage -= val;
95} 95}
96 96
97void res_counter_uncharge(struct res_counter *counter, unsigned long val) 97void res_counter_uncharge_until(struct res_counter *counter,
98 struct res_counter *top,
99 unsigned long val)
98{ 100{
99 unsigned long flags; 101 unsigned long flags;
100 struct res_counter *c; 102 struct res_counter *c;
101 103
102 local_irq_save(flags); 104 local_irq_save(flags);
103 for (c = counter; c != NULL; c = c->parent) { 105 for (c = counter; c != top; c = c->parent) {
104 spin_lock(&c->lock); 106 spin_lock(&c->lock);
105 res_counter_uncharge_locked(c, val); 107 res_counter_uncharge_locked(c, val);
106 spin_unlock(&c->lock); 108 spin_unlock(&c->lock);
@@ -108,6 +110,10 @@ void res_counter_uncharge(struct res_counter *counter, unsigned long val)
108 local_irq_restore(flags); 110 local_irq_restore(flags);
109} 111}
110 112
113void res_counter_uncharge(struct res_counter *counter, unsigned long val)
114{
115 res_counter_uncharge_until(counter, NULL, val);
116}
111 117
112static inline unsigned long long * 118static inline unsigned long long *
113res_counter_member(struct res_counter *counter, int member) 119res_counter_member(struct res_counter *counter, int member)
diff --git a/kernel/resource.c b/kernel/resource.c
index 7e8ea66a8c01..e1d2b8ee76d5 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -515,8 +515,8 @@ out:
515 * @root: root resource descriptor 515 * @root: root resource descriptor
516 * @new: resource descriptor desired by caller 516 * @new: resource descriptor desired by caller
517 * @size: requested resource region size 517 * @size: requested resource region size
518 * @min: minimum size to allocate 518 * @min: minimum boundary to allocate
519 * @max: maximum size to allocate 519 * @max: maximum boundary to allocate
520 * @align: alignment requested, in bytes 520 * @align: alignment requested, in bytes
521 * @alignf: alignment function, optional, called if not NULL 521 * @alignf: alignment function, optional, called if not NULL
522 * @alignf_data: arbitrary data to pass to the @alignf function 522 * @alignf_data: arbitrary data to pass to the @alignf function
diff --git a/kernel/signal.c b/kernel/signal.c
index f7b418217633..08dfbd748cd2 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1656,19 +1656,18 @@ bool do_notify_parent(struct task_struct *tsk, int sig)
1656 info.si_signo = sig; 1656 info.si_signo = sig;
1657 info.si_errno = 0; 1657 info.si_errno = 0;
1658 /* 1658 /*
1659 * we are under tasklist_lock here so our parent is tied to 1659 * We are under tasklist_lock here so our parent is tied to
1660 * us and cannot exit and release its namespace. 1660 * us and cannot change.
1661 * 1661 *
1662 * the only it can is to switch its nsproxy with sys_unshare, 1662 * task_active_pid_ns will always return the same pid namespace
1663 * bu uncharing pid namespaces is not allowed, so we'll always 1663 * until a task passes through release_task.
1664 * see relevant namespace
1665 * 1664 *
1666 * write_lock() currently calls preempt_disable() which is the 1665 * write_lock() currently calls preempt_disable() which is the
1667 * same as rcu_read_lock(), but according to Oleg, this is not 1666 * same as rcu_read_lock(), but according to Oleg, this is not
1668 * correct to rely on this 1667 * correct to rely on this
1669 */ 1668 */
1670 rcu_read_lock(); 1669 rcu_read_lock();
1671 info.si_pid = task_pid_nr_ns(tsk, tsk->parent->nsproxy->pid_ns); 1670 info.si_pid = task_pid_nr_ns(tsk, task_active_pid_ns(tsk->parent));
1672 info.si_uid = from_kuid_munged(task_cred_xxx(tsk->parent, user_ns), 1671 info.si_uid = from_kuid_munged(task_cred_xxx(tsk->parent, user_ns),
1673 task_uid(tsk)); 1672 task_uid(tsk));
1674 rcu_read_unlock(); 1673 rcu_read_unlock();
diff --git a/kernel/sys.c b/kernel/sys.c
index 6df42624e454..9ff89cb9657a 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -36,6 +36,8 @@
36#include <linux/personality.h> 36#include <linux/personality.h>
37#include <linux/ptrace.h> 37#include <linux/ptrace.h>
38#include <linux/fs_struct.h> 38#include <linux/fs_struct.h>
39#include <linux/file.h>
40#include <linux/mount.h>
39#include <linux/gfp.h> 41#include <linux/gfp.h>
40#include <linux/syscore_ops.h> 42#include <linux/syscore_ops.h>
41#include <linux/version.h> 43#include <linux/version.h>
@@ -1378,8 +1380,8 @@ SYSCALL_DEFINE2(sethostname, char __user *, name, int, len)
1378 memcpy(u->nodename, tmp, len); 1380 memcpy(u->nodename, tmp, len);
1379 memset(u->nodename + len, 0, sizeof(u->nodename) - len); 1381 memset(u->nodename + len, 0, sizeof(u->nodename) - len);
1380 errno = 0; 1382 errno = 0;
1383 uts_proc_notify(UTS_PROC_HOSTNAME);
1381 } 1384 }
1382 uts_proc_notify(UTS_PROC_HOSTNAME);
1383 up_write(&uts_sem); 1385 up_write(&uts_sem);
1384 return errno; 1386 return errno;
1385} 1387}
@@ -1429,8 +1431,8 @@ SYSCALL_DEFINE2(setdomainname, char __user *, name, int, len)
1429 memcpy(u->domainname, tmp, len); 1431 memcpy(u->domainname, tmp, len);
1430 memset(u->domainname + len, 0, sizeof(u->domainname) - len); 1432 memset(u->domainname + len, 0, sizeof(u->domainname) - len);
1431 errno = 0; 1433 errno = 0;
1434 uts_proc_notify(UTS_PROC_DOMAINNAME);
1432 } 1435 }
1433 uts_proc_notify(UTS_PROC_DOMAINNAME);
1434 up_write(&uts_sem); 1436 up_write(&uts_sem);
1435 return errno; 1437 return errno;
1436} 1438}
@@ -1784,77 +1786,102 @@ SYSCALL_DEFINE1(umask, int, mask)
1784} 1786}
1785 1787
1786#ifdef CONFIG_CHECKPOINT_RESTORE 1788#ifdef CONFIG_CHECKPOINT_RESTORE
1789static bool vma_flags_mismatch(struct vm_area_struct *vma,
1790 unsigned long required,
1791 unsigned long banned)
1792{
1793 return (vma->vm_flags & required) != required ||
1794 (vma->vm_flags & banned);
1795}
1796
1797static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd)
1798{
1799 struct file *exe_file;
1800 struct dentry *dentry;
1801 int err;
1802
1803 /*
1804 * Setting new mm::exe_file is only allowed when no VM_EXECUTABLE vma's
1805 * remain. So perform a quick test first.
1806 */
1807 if (mm->num_exe_file_vmas)
1808 return -EBUSY;
1809
1810 exe_file = fget(fd);
1811 if (!exe_file)
1812 return -EBADF;
1813
1814 dentry = exe_file->f_path.dentry;
1815
1816 /*
1817 * Because the original mm->exe_file points to executable file, make
1818 * sure that this one is executable as well, to avoid breaking an
1819 * overall picture.
1820 */
1821 err = -EACCES;
1822 if (!S_ISREG(dentry->d_inode->i_mode) ||
1823 exe_file->f_path.mnt->mnt_flags & MNT_NOEXEC)
1824 goto exit;
1825
1826 err = inode_permission(dentry->d_inode, MAY_EXEC);
1827 if (err)
1828 goto exit;
1829
1830 /*
1831 * The symlink can be changed only once, just to disallow arbitrary
1832 * transitions malicious software might bring in. This means one
1833 * could make a snapshot over all processes running and monitor
1834 * /proc/pid/exe changes to notice unusual activity if needed.
1835 */
1836 down_write(&mm->mmap_sem);
1837 if (likely(!mm->exe_file))
1838 set_mm_exe_file(mm, exe_file);
1839 else
1840 err = -EBUSY;
1841 up_write(&mm->mmap_sem);
1842
1843exit:
1844 fput(exe_file);
1845 return err;
1846}
1847
1787static int prctl_set_mm(int opt, unsigned long addr, 1848static int prctl_set_mm(int opt, unsigned long addr,
1788 unsigned long arg4, unsigned long arg5) 1849 unsigned long arg4, unsigned long arg5)
1789{ 1850{
1790 unsigned long rlim = rlimit(RLIMIT_DATA); 1851 unsigned long rlim = rlimit(RLIMIT_DATA);
1791 unsigned long vm_req_flags;
1792 unsigned long vm_bad_flags;
1793 struct vm_area_struct *vma;
1794 int error = 0;
1795 struct mm_struct *mm = current->mm; 1852 struct mm_struct *mm = current->mm;
1853 struct vm_area_struct *vma;
1854 int error;
1796 1855
1797 if (arg4 | arg5) 1856 if (arg5 || (arg4 && opt != PR_SET_MM_AUXV))
1798 return -EINVAL; 1857 return -EINVAL;
1799 1858
1800 if (!capable(CAP_SYS_RESOURCE)) 1859 if (!capable(CAP_SYS_RESOURCE))
1801 return -EPERM; 1860 return -EPERM;
1802 1861
1862 if (opt == PR_SET_MM_EXE_FILE)
1863 return prctl_set_mm_exe_file(mm, (unsigned int)addr);
1864
1803 if (addr >= TASK_SIZE) 1865 if (addr >= TASK_SIZE)
1804 return -EINVAL; 1866 return -EINVAL;
1805 1867
1868 error = -EINVAL;
1869
1806 down_read(&mm->mmap_sem); 1870 down_read(&mm->mmap_sem);
1807 vma = find_vma(mm, addr); 1871 vma = find_vma(mm, addr);
1808 1872
1809 if (opt != PR_SET_MM_START_BRK && opt != PR_SET_MM_BRK) {
1810 /* It must be existing VMA */
1811 if (!vma || vma->vm_start > addr)
1812 goto out;
1813 }
1814
1815 error = -EINVAL;
1816 switch (opt) { 1873 switch (opt) {
1817 case PR_SET_MM_START_CODE: 1874 case PR_SET_MM_START_CODE:
1875 mm->start_code = addr;
1876 break;
1818 case PR_SET_MM_END_CODE: 1877 case PR_SET_MM_END_CODE:
1819 vm_req_flags = VM_READ | VM_EXEC; 1878 mm->end_code = addr;
1820 vm_bad_flags = VM_WRITE | VM_MAYSHARE;
1821
1822 if ((vma->vm_flags & vm_req_flags) != vm_req_flags ||
1823 (vma->vm_flags & vm_bad_flags))
1824 goto out;
1825
1826 if (opt == PR_SET_MM_START_CODE)
1827 mm->start_code = addr;
1828 else
1829 mm->end_code = addr;
1830 break; 1879 break;
1831
1832 case PR_SET_MM_START_DATA: 1880 case PR_SET_MM_START_DATA:
1833 case PR_SET_MM_END_DATA: 1881 mm->start_data = addr;
1834 vm_req_flags = VM_READ | VM_WRITE;
1835 vm_bad_flags = VM_EXEC | VM_MAYSHARE;
1836
1837 if ((vma->vm_flags & vm_req_flags) != vm_req_flags ||
1838 (vma->vm_flags & vm_bad_flags))
1839 goto out;
1840
1841 if (opt == PR_SET_MM_START_DATA)
1842 mm->start_data = addr;
1843 else
1844 mm->end_data = addr;
1845 break; 1882 break;
1846 1883 case PR_SET_MM_END_DATA:
1847 case PR_SET_MM_START_STACK: 1884 mm->end_data = addr;
1848
1849#ifdef CONFIG_STACK_GROWSUP
1850 vm_req_flags = VM_READ | VM_WRITE | VM_GROWSUP;
1851#else
1852 vm_req_flags = VM_READ | VM_WRITE | VM_GROWSDOWN;
1853#endif
1854 if ((vma->vm_flags & vm_req_flags) != vm_req_flags)
1855 goto out;
1856
1857 mm->start_stack = addr;
1858 break; 1885 break;
1859 1886
1860 case PR_SET_MM_START_BRK: 1887 case PR_SET_MM_START_BRK:
@@ -1881,16 +1908,77 @@ static int prctl_set_mm(int opt, unsigned long addr,
1881 mm->brk = addr; 1908 mm->brk = addr;
1882 break; 1909 break;
1883 1910
1911 /*
1912 * If command line arguments and environment
1913 * are placed somewhere else on stack, we can
1914 * set them up here, ARG_START/END to setup
1915 * command line argumets and ENV_START/END
1916 * for environment.
1917 */
1918 case PR_SET_MM_START_STACK:
1919 case PR_SET_MM_ARG_START:
1920 case PR_SET_MM_ARG_END:
1921 case PR_SET_MM_ENV_START:
1922 case PR_SET_MM_ENV_END:
1923 if (!vma) {
1924 error = -EFAULT;
1925 goto out;
1926 }
1927#ifdef CONFIG_STACK_GROWSUP
1928 if (vma_flags_mismatch(vma, VM_READ | VM_WRITE | VM_GROWSUP, 0))
1929#else
1930 if (vma_flags_mismatch(vma, VM_READ | VM_WRITE | VM_GROWSDOWN, 0))
1931#endif
1932 goto out;
1933 if (opt == PR_SET_MM_START_STACK)
1934 mm->start_stack = addr;
1935 else if (opt == PR_SET_MM_ARG_START)
1936 mm->arg_start = addr;
1937 else if (opt == PR_SET_MM_ARG_END)
1938 mm->arg_end = addr;
1939 else if (opt == PR_SET_MM_ENV_START)
1940 mm->env_start = addr;
1941 else if (opt == PR_SET_MM_ENV_END)
1942 mm->env_end = addr;
1943 break;
1944
1945 /*
1946 * This doesn't move auxiliary vector itself
1947 * since it's pinned to mm_struct, but allow
1948 * to fill vector with new values. It's up
1949 * to a caller to provide sane values here
1950 * otherwise user space tools which use this
1951 * vector might be unhappy.
1952 */
1953 case PR_SET_MM_AUXV: {
1954 unsigned long user_auxv[AT_VECTOR_SIZE];
1955
1956 if (arg4 > sizeof(user_auxv))
1957 goto out;
1958 up_read(&mm->mmap_sem);
1959
1960 if (copy_from_user(user_auxv, (const void __user *)addr, arg4))
1961 return -EFAULT;
1962
1963 /* Make sure the last entry is always AT_NULL */
1964 user_auxv[AT_VECTOR_SIZE - 2] = 0;
1965 user_auxv[AT_VECTOR_SIZE - 1] = 0;
1966
1967 BUILD_BUG_ON(sizeof(user_auxv) != sizeof(mm->saved_auxv));
1968
1969 task_lock(current);
1970 memcpy(mm->saved_auxv, user_auxv, arg4);
1971 task_unlock(current);
1972
1973 return 0;
1974 }
1884 default: 1975 default:
1885 error = -EINVAL;
1886 goto out; 1976 goto out;
1887 } 1977 }
1888 1978
1889 error = 0; 1979 error = 0;
1890
1891out: 1980out:
1892 up_read(&mm->mmap_sem); 1981 up_read(&mm->mmap_sem);
1893
1894 return error; 1982 return error;
1895} 1983}
1896#else /* CONFIG_CHECKPOINT_RESTORE */ 1984#else /* CONFIG_CHECKPOINT_RESTORE */
@@ -2114,7 +2202,6 @@ int orderly_poweroff(bool force)
2114 NULL 2202 NULL
2115 }; 2203 };
2116 int ret = -ENOMEM; 2204 int ret = -ENOMEM;
2117 struct subprocess_info *info;
2118 2205
2119 if (argv == NULL) { 2206 if (argv == NULL) {
2120 printk(KERN_WARNING "%s failed to allocate memory for \"%s\"\n", 2207 printk(KERN_WARNING "%s failed to allocate memory for \"%s\"\n",
@@ -2122,18 +2209,16 @@ int orderly_poweroff(bool force)
2122 goto out; 2209 goto out;
2123 } 2210 }
2124 2211
2125 info = call_usermodehelper_setup(argv[0], argv, envp, GFP_ATOMIC); 2212 ret = call_usermodehelper_fns(argv[0], argv, envp, UMH_NO_WAIT,
2126 if (info == NULL) { 2213 NULL, argv_cleanup, NULL);
2127 argv_free(argv); 2214out:
2128 goto out; 2215 if (likely(!ret))
2129 } 2216 return 0;
2130
2131 call_usermodehelper_setfns(info, NULL, argv_cleanup, NULL);
2132 2217
2133 ret = call_usermodehelper_exec(info, UMH_NO_WAIT); 2218 if (ret == -ENOMEM)
2219 argv_free(argv);
2134 2220
2135 out: 2221 if (force) {
2136 if (ret && force) {
2137 printk(KERN_WARNING "Failed to start orderly shutdown: " 2222 printk(KERN_WARNING "Failed to start orderly shutdown: "
2138 "forcing the issue\n"); 2223 "forcing the issue\n");
2139 2224
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 47bfa16430d7..dbff751e4086 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -203,3 +203,6 @@ cond_syscall(sys_fanotify_mark);
203cond_syscall(sys_name_to_handle_at); 203cond_syscall(sys_name_to_handle_at);
204cond_syscall(sys_open_by_handle_at); 204cond_syscall(sys_open_by_handle_at);
205cond_syscall(compat_sys_open_by_handle_at); 205cond_syscall(compat_sys_open_by_handle_at);
206
207/* compare kernel pointers */
208cond_syscall(sys_kcmp);
diff --git a/kernel/task_work.c b/kernel/task_work.c
new file mode 100644
index 000000000000..82d1c794066d
--- /dev/null
+++ b/kernel/task_work.c
@@ -0,0 +1,84 @@
1#include <linux/spinlock.h>
2#include <linux/task_work.h>
3#include <linux/tracehook.h>
4
5int
6task_work_add(struct task_struct *task, struct task_work *twork, bool notify)
7{
8 unsigned long flags;
9 int err = -ESRCH;
10
11#ifndef TIF_NOTIFY_RESUME
12 if (notify)
13 return -ENOTSUPP;
14#endif
15 /*
16 * We must not insert the new work if the task has already passed
17 * exit_task_work(). We rely on do_exit()->raw_spin_unlock_wait()
18 * and check PF_EXITING under pi_lock.
19 */
20 raw_spin_lock_irqsave(&task->pi_lock, flags);
21 if (likely(!(task->flags & PF_EXITING))) {
22 hlist_add_head(&twork->hlist, &task->task_works);
23 err = 0;
24 }
25 raw_spin_unlock_irqrestore(&task->pi_lock, flags);
26
27 /* test_and_set_bit() implies mb(), see tracehook_notify_resume(). */
28 if (likely(!err) && notify)
29 set_notify_resume(task);
30 return err;
31}
32
33struct task_work *
34task_work_cancel(struct task_struct *task, task_work_func_t func)
35{
36 unsigned long flags;
37 struct task_work *twork;
38 struct hlist_node *pos;
39
40 raw_spin_lock_irqsave(&task->pi_lock, flags);
41 hlist_for_each_entry(twork, pos, &task->task_works, hlist) {
42 if (twork->func == func) {
43 hlist_del(&twork->hlist);
44 goto found;
45 }
46 }
47 twork = NULL;
48 found:
49 raw_spin_unlock_irqrestore(&task->pi_lock, flags);
50
51 return twork;
52}
53
54void task_work_run(void)
55{
56 struct task_struct *task = current;
57 struct hlist_head task_works;
58 struct hlist_node *pos;
59
60 raw_spin_lock_irq(&task->pi_lock);
61 hlist_move_list(&task->task_works, &task_works);
62 raw_spin_unlock_irq(&task->pi_lock);
63
64 if (unlikely(hlist_empty(&task_works)))
65 return;
66 /*
67 * We use hlist to save the space in task_struct, but we want fifo.
68 * Find the last entry, the list should be short, then process them
69 * in reverse order.
70 */
71 for (pos = task_works.first; pos->next; pos = pos->next)
72 ;
73
74 for (;;) {
75 struct hlist_node **pprev = pos->pprev;
76 struct task_work *twork = container_of(pos, struct task_work,
77 hlist);
78 twork->func(twork);
79
80 if (pprev == &task_works.first)
81 break;
82 pos = container_of(pprev, struct hlist_node, next);
83 }
84}
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 6420cda62336..1d0f6a8a0e5e 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -1486,6 +1486,11 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size,
1486 if (!buffer) 1486 if (!buffer)
1487 return size; 1487 return size;
1488 1488
1489 /* Make sure the requested buffer exists */
1490 if (cpu_id != RING_BUFFER_ALL_CPUS &&
1491 !cpumask_test_cpu(cpu_id, buffer->cpumask))
1492 return size;
1493
1489 size = DIV_ROUND_UP(size, BUF_PAGE_SIZE); 1494 size = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
1490 size *= BUF_PAGE_SIZE; 1495 size *= BUF_PAGE_SIZE;
1491 1496