aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/fork.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/fork.c')
-rw-r--r--kernel/fork.c181
1 files changed, 180 insertions, 1 deletions
diff --git a/kernel/fork.c b/kernel/fork.c
index f88bd984df35..340cbc85f694 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -38,6 +38,7 @@
38#include <linux/syscalls.h> 38#include <linux/syscalls.h>
39#include <linux/jiffies.h> 39#include <linux/jiffies.h>
40#include <linux/tracehook.h> 40#include <linux/tracehook.h>
41#include <linux/interrupt.h>
41#include <linux/futex.h> 42#include <linux/futex.h>
42#include <linux/compat.h> 43#include <linux/compat.h>
43#include <linux/task_io_accounting_ops.h> 44#include <linux/task_io_accounting_ops.h>
@@ -65,6 +66,8 @@
65#include <linux/perf_event.h> 66#include <linux/perf_event.h>
66#include <linux/posix-timers.h> 67#include <linux/posix-timers.h>
67#include <linux/user-return-notifier.h> 68#include <linux/user-return-notifier.h>
69#include <linux/kthread.h>
70#include <linux/notifier.h>
68 71
69#include <asm/pgtable.h> 72#include <asm/pgtable.h>
70#include <asm/pgalloc.h> 73#include <asm/pgalloc.h>
@@ -85,7 +88,19 @@ int max_threads; /* tunable limit on nr_threads */
85 88
86DEFINE_PER_CPU(unsigned long, process_counts) = 0; 89DEFINE_PER_CPU(unsigned long, process_counts) = 0;
87 90
91#ifdef CONFIG_PREEMPT_RT
92DEFINE_RWLOCK(tasklist_lock); /* outer */
93#else
88__cacheline_aligned DEFINE_RWLOCK(tasklist_lock); /* outer */ 94__cacheline_aligned DEFINE_RWLOCK(tasklist_lock); /* outer */
95#endif
96
97/*
98 * Delayed mmdrop. In the PREEMPT_RT case we
99 * dont want to do this from the scheduling
100 * context.
101 */
102static DEFINE_PER_CPU(struct task_struct *, desched_task);
103static DEFINE_PER_CPU(struct list_head, delayed_drop_list);
89 104
90int nr_processes(void) 105int nr_processes(void)
91{ 106{
@@ -170,6 +185,16 @@ void __put_task_struct(struct task_struct *tsk)
170 free_task(tsk); 185 free_task(tsk);
171} 186}
172 187
188#ifdef CONFIG_PREEMPT_RT
189void __put_task_struct_cb(struct rcu_head *rhp)
190{
191 struct task_struct *tsk = container_of(rhp, struct task_struct, rcu);
192
193 __put_task_struct(tsk);
194
195}
196#endif
197
173/* 198/*
174 * macro override instead of weak attribute alias, to workaround 199 * macro override instead of weak attribute alias, to workaround
175 * gcc 4.1.0 and 4.1.1 bugs with weak attribute and empty functions. 200 * gcc 4.1.0 and 4.1.1 bugs with weak attribute and empty functions.
@@ -180,6 +205,8 @@ void __put_task_struct(struct task_struct *tsk)
180 205
181void __init fork_init(unsigned long mempages) 206void __init fork_init(unsigned long mempages)
182{ 207{
208 int i;
209
183#ifndef __HAVE_ARCH_TASK_STRUCT_ALLOCATOR 210#ifndef __HAVE_ARCH_TASK_STRUCT_ALLOCATOR
184#ifndef ARCH_MIN_TASKALIGN 211#ifndef ARCH_MIN_TASKALIGN
185#define ARCH_MIN_TASKALIGN L1_CACHE_BYTES 212#define ARCH_MIN_TASKALIGN L1_CACHE_BYTES
@@ -210,6 +237,9 @@ void __init fork_init(unsigned long mempages)
210 init_task.signal->rlim[RLIMIT_NPROC].rlim_max = max_threads/2; 237 init_task.signal->rlim[RLIMIT_NPROC].rlim_max = max_threads/2;
211 init_task.signal->rlim[RLIMIT_SIGPENDING] = 238 init_task.signal->rlim[RLIMIT_SIGPENDING] =
212 init_task.signal->rlim[RLIMIT_NPROC]; 239 init_task.signal->rlim[RLIMIT_NPROC];
240
241 for (i = 0; i < NR_CPUS; i++)
242 INIT_LIST_HEAD(&per_cpu(delayed_drop_list, i));
213} 243}
214 244
215int __attribute__((weak)) arch_dup_task_struct(struct task_struct *dst, 245int __attribute__((weak)) arch_dup_task_struct(struct task_struct *dst,
@@ -295,6 +325,7 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
295 mm->locked_vm = 0; 325 mm->locked_vm = 0;
296 mm->mmap = NULL; 326 mm->mmap = NULL;
297 mm->mmap_cache = NULL; 327 mm->mmap_cache = NULL;
328 INIT_LIST_HEAD(&mm->delayed_drop);
298 mm->free_area_cache = oldmm->mmap_base; 329 mm->free_area_cache = oldmm->mmap_base;
299 mm->cached_hole_size = ~0UL; 330 mm->cached_hole_size = ~0UL;
300 mm->map_count = 0; 331 mm->map_count = 0;
@@ -943,6 +974,9 @@ static void rt_mutex_init_task(struct task_struct *p)
943#ifdef CONFIG_RT_MUTEXES 974#ifdef CONFIG_RT_MUTEXES
944 plist_head_init_raw(&p->pi_waiters, &p->pi_lock); 975 plist_head_init_raw(&p->pi_waiters, &p->pi_lock);
945 p->pi_blocked_on = NULL; 976 p->pi_blocked_on = NULL;
977# ifdef CONFIG_DEBUG_RT_MUTEXES
978 p->last_kernel_lock = NULL;
979# endif
946#endif 980#endif
947} 981}
948 982
@@ -1065,6 +1099,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1065 spin_lock_init(&p->alloc_lock); 1099 spin_lock_init(&p->alloc_lock);
1066 1100
1067 init_sigpending(&p->pending); 1101 init_sigpending(&p->pending);
1102 p->sigqueue_cache = NULL;
1068 1103
1069 p->utime = cputime_zero; 1104 p->utime = cputime_zero;
1070 p->stime = cputime_zero; 1105 p->stime = cputime_zero;
@@ -1082,7 +1117,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1082 acct_clear_integrals(p); 1117 acct_clear_integrals(p);
1083 1118
1084 posix_cpu_timers_init(p); 1119 posix_cpu_timers_init(p);
1085 1120 p->posix_timer_list = NULL;
1086 p->lock_depth = -1; /* -1 = no lock */ 1121 p->lock_depth = -1; /* -1 = no lock */
1087 do_posix_clock_monotonic_gettime(&p->start_time); 1122 do_posix_clock_monotonic_gettime(&p->start_time);
1088 p->real_start_time = p->start_time; 1123 p->real_start_time = p->start_time;
@@ -1118,6 +1153,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1118 p->hardirq_context = 0; 1153 p->hardirq_context = 0;
1119 p->softirq_context = 0; 1154 p->softirq_context = 0;
1120#endif 1155#endif
1156 p->pagefault_disabled = 0;
1121#ifdef CONFIG_LOCKDEP 1157#ifdef CONFIG_LOCKDEP
1122 p->lockdep_depth = 0; /* no locks held yet */ 1158 p->lockdep_depth = 0; /* no locks held yet */
1123 p->curr_chain_key = 0; 1159 p->curr_chain_key = 0;
@@ -1165,6 +1201,9 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1165 retval = copy_thread(clone_flags, stack_start, stack_size, p, regs); 1201 retval = copy_thread(clone_flags, stack_start, stack_size, p, regs);
1166 if (retval) 1202 if (retval)
1167 goto bad_fork_cleanup_io; 1203 goto bad_fork_cleanup_io;
1204#ifdef CONFIG_DEBUG_PREEMPT
1205 atomic_set(&p->lock_count, 0);
1206#endif
1168 1207
1169 if (pid != &init_struct_pid) { 1208 if (pid != &init_struct_pid) {
1170 retval = -ENOMEM; 1209 retval = -ENOMEM;
@@ -1289,7 +1328,9 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1289 attach_pid(p, PIDTYPE_SID, task_session(current)); 1328 attach_pid(p, PIDTYPE_SID, task_session(current));
1290 list_add_tail(&p->sibling, &p->real_parent->children); 1329 list_add_tail(&p->sibling, &p->real_parent->children);
1291 list_add_tail_rcu(&p->tasks, &init_task.tasks); 1330 list_add_tail_rcu(&p->tasks, &init_task.tasks);
1331 preempt_disable();
1292 __get_cpu_var(process_counts)++; 1332 __get_cpu_var(process_counts)++;
1333 preempt_enable();
1293 } 1334 }
1294 attach_pid(p, PIDTYPE_PID, pid); 1335 attach_pid(p, PIDTYPE_PID, pid);
1295 nr_threads++; 1336 nr_threads++;
@@ -1763,3 +1804,141 @@ int unshare_files(struct files_struct **displaced)
1763 task_unlock(task); 1804 task_unlock(task);
1764 return 0; 1805 return 0;
1765} 1806}
1807
1808static int mmdrop_complete(void)
1809{
1810 struct list_head *head;
1811 int ret = 0;
1812
1813 head = &get_cpu_var(delayed_drop_list);
1814 while (!list_empty(head)) {
1815 struct mm_struct *mm = list_entry(head->next,
1816 struct mm_struct, delayed_drop);
1817 list_del(&mm->delayed_drop);
1818 put_cpu_var(delayed_drop_list);
1819
1820 __mmdrop(mm);
1821 ret = 1;
1822
1823 head = &get_cpu_var(delayed_drop_list);
1824 }
1825 put_cpu_var(delayed_drop_list);
1826
1827 return ret;
1828}
1829
1830/*
1831 * We dont want to do complex work from the scheduler, thus
1832 * we delay the work to a per-CPU worker thread:
1833 */
1834void __mmdrop_delayed(struct mm_struct *mm)
1835{
1836 struct task_struct *desched_task;
1837 struct list_head *head;
1838
1839 head = &get_cpu_var(delayed_drop_list);
1840 list_add_tail(&mm->delayed_drop, head);
1841 desched_task = __get_cpu_var(desched_task);
1842 if (desched_task)
1843 wake_up_process(desched_task);
1844 put_cpu_var(delayed_drop_list);
1845}
1846
1847#ifdef CONFIG_HOTPLUG_CPU
1848static void takeover_delayed_drop(int hotcpu)
1849{
1850 struct list_head *head = &per_cpu(delayed_drop_list, hotcpu);
1851
1852 while (!list_empty(head)) {
1853 struct mm_struct *mm = list_entry(head->next,
1854 struct mm_struct, delayed_drop);
1855
1856 list_del(&mm->delayed_drop);
1857 __mmdrop_delayed(mm);
1858 }
1859}
1860#endif
1861
1862static int desched_thread(void * __bind_cpu)
1863{
1864 set_user_nice(current, -10);
1865 current->flags |= PF_NOFREEZE;
1866 current->extra_flags |= PFE_SOFTIRQ;
1867
1868 set_current_state(TASK_INTERRUPTIBLE);
1869
1870 while (!kthread_should_stop()) {
1871
1872 if (mmdrop_complete())
1873 continue;
1874 schedule();
1875
1876 /*
1877 * This must be called from time to time on ia64, and is a
1878 * no-op on other archs. Used to be in cpu_idle(), but with
1879 * the new -rt semantics it can't stay there.
1880 */
1881 check_pgt_cache();
1882
1883 set_current_state(TASK_INTERRUPTIBLE);
1884 }
1885 __set_current_state(TASK_RUNNING);
1886 return 0;
1887}
1888
1889static int __devinit cpu_callback(struct notifier_block *nfb,
1890 unsigned long action,
1891 void *hcpu)
1892{
1893 int hotcpu = (unsigned long)hcpu;
1894 struct task_struct *p;
1895
1896 switch (action) {
1897 case CPU_UP_PREPARE:
1898
1899 BUG_ON(per_cpu(desched_task, hotcpu));
1900 INIT_LIST_HEAD(&per_cpu(delayed_drop_list, hotcpu));
1901 p = kthread_create(desched_thread, hcpu, "desched/%d", hotcpu);
1902 if (IS_ERR(p)) {
1903 printk("desched_thread for %i failed\n", hotcpu);
1904 return NOTIFY_BAD;
1905 }
1906 per_cpu(desched_task, hotcpu) = p;
1907 kthread_bind(p, hotcpu);
1908 break;
1909 case CPU_ONLINE:
1910
1911 wake_up_process(per_cpu(desched_task, hotcpu));
1912 break;
1913#ifdef CONFIG_HOTPLUG_CPU
1914 case CPU_UP_CANCELED:
1915
1916 /* Unbind so it can run. Fall thru. */
1917 kthread_bind(per_cpu(desched_task, hotcpu), smp_processor_id());
1918 case CPU_DEAD:
1919
1920 p = per_cpu(desched_task, hotcpu);
1921 per_cpu(desched_task, hotcpu) = NULL;
1922 kthread_stop(p);
1923 takeover_delayed_drop(hotcpu);
1924 takeover_tasklets(hotcpu);
1925 break;
1926#endif /* CONFIG_HOTPLUG_CPU */
1927 }
1928 return NOTIFY_OK;
1929}
1930
1931static struct notifier_block __devinitdata cpu_nfb = {
1932 .notifier_call = cpu_callback
1933};
1934
1935__init int spawn_desched_task(void)
1936{
1937 void *cpu = (void *)(long)smp_processor_id();
1938
1939 cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu);
1940 cpu_callback(&cpu_nfb, CPU_ONLINE, cpu);
1941 register_cpu_notifier(&cpu_nfb);
1942 return 0;
1943}
1944