diff options
Diffstat (limited to 'kernel/fork.c')
-rw-r--r-- | kernel/fork.c | 181 |
1 files changed, 180 insertions, 1 deletions
diff --git a/kernel/fork.c b/kernel/fork.c index f88bd984df35..340cbc85f694 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -38,6 +38,7 @@ | |||
38 | #include <linux/syscalls.h> | 38 | #include <linux/syscalls.h> |
39 | #include <linux/jiffies.h> | 39 | #include <linux/jiffies.h> |
40 | #include <linux/tracehook.h> | 40 | #include <linux/tracehook.h> |
41 | #include <linux/interrupt.h> | ||
41 | #include <linux/futex.h> | 42 | #include <linux/futex.h> |
42 | #include <linux/compat.h> | 43 | #include <linux/compat.h> |
43 | #include <linux/task_io_accounting_ops.h> | 44 | #include <linux/task_io_accounting_ops.h> |
@@ -65,6 +66,8 @@ | |||
65 | #include <linux/perf_event.h> | 66 | #include <linux/perf_event.h> |
66 | #include <linux/posix-timers.h> | 67 | #include <linux/posix-timers.h> |
67 | #include <linux/user-return-notifier.h> | 68 | #include <linux/user-return-notifier.h> |
69 | #include <linux/kthread.h> | ||
70 | #include <linux/notifier.h> | ||
68 | 71 | ||
69 | #include <asm/pgtable.h> | 72 | #include <asm/pgtable.h> |
70 | #include <asm/pgalloc.h> | 73 | #include <asm/pgalloc.h> |
@@ -85,7 +88,19 @@ int max_threads; /* tunable limit on nr_threads */ | |||
85 | 88 | ||
86 | DEFINE_PER_CPU(unsigned long, process_counts) = 0; | 89 | DEFINE_PER_CPU(unsigned long, process_counts) = 0; |
87 | 90 | ||
91 | #ifdef CONFIG_PREEMPT_RT | ||
92 | DEFINE_RWLOCK(tasklist_lock); /* outer */ | ||
93 | #else | ||
88 | __cacheline_aligned DEFINE_RWLOCK(tasklist_lock); /* outer */ | 94 | __cacheline_aligned DEFINE_RWLOCK(tasklist_lock); /* outer */ |
95 | #endif | ||
96 | |||
97 | /* | ||
98 | * Delayed mmdrop. In the PREEMPT_RT case we | ||
99 | * dont want to do this from the scheduling | ||
100 | * context. | ||
101 | */ | ||
102 | static DEFINE_PER_CPU(struct task_struct *, desched_task); | ||
103 | static DEFINE_PER_CPU(struct list_head, delayed_drop_list); | ||
89 | 104 | ||
90 | int nr_processes(void) | 105 | int nr_processes(void) |
91 | { | 106 | { |
@@ -170,6 +185,16 @@ void __put_task_struct(struct task_struct *tsk) | |||
170 | free_task(tsk); | 185 | free_task(tsk); |
171 | } | 186 | } |
172 | 187 | ||
188 | #ifdef CONFIG_PREEMPT_RT | ||
189 | void __put_task_struct_cb(struct rcu_head *rhp) | ||
190 | { | ||
191 | struct task_struct *tsk = container_of(rhp, struct task_struct, rcu); | ||
192 | |||
193 | __put_task_struct(tsk); | ||
194 | |||
195 | } | ||
196 | #endif | ||
197 | |||
173 | /* | 198 | /* |
174 | * macro override instead of weak attribute alias, to workaround | 199 | * macro override instead of weak attribute alias, to workaround |
175 | * gcc 4.1.0 and 4.1.1 bugs with weak attribute and empty functions. | 200 | * gcc 4.1.0 and 4.1.1 bugs with weak attribute and empty functions. |
@@ -180,6 +205,8 @@ void __put_task_struct(struct task_struct *tsk) | |||
180 | 205 | ||
181 | void __init fork_init(unsigned long mempages) | 206 | void __init fork_init(unsigned long mempages) |
182 | { | 207 | { |
208 | int i; | ||
209 | |||
183 | #ifndef __HAVE_ARCH_TASK_STRUCT_ALLOCATOR | 210 | #ifndef __HAVE_ARCH_TASK_STRUCT_ALLOCATOR |
184 | #ifndef ARCH_MIN_TASKALIGN | 211 | #ifndef ARCH_MIN_TASKALIGN |
185 | #define ARCH_MIN_TASKALIGN L1_CACHE_BYTES | 212 | #define ARCH_MIN_TASKALIGN L1_CACHE_BYTES |
@@ -210,6 +237,9 @@ void __init fork_init(unsigned long mempages) | |||
210 | init_task.signal->rlim[RLIMIT_NPROC].rlim_max = max_threads/2; | 237 | init_task.signal->rlim[RLIMIT_NPROC].rlim_max = max_threads/2; |
211 | init_task.signal->rlim[RLIMIT_SIGPENDING] = | 238 | init_task.signal->rlim[RLIMIT_SIGPENDING] = |
212 | init_task.signal->rlim[RLIMIT_NPROC]; | 239 | init_task.signal->rlim[RLIMIT_NPROC]; |
240 | |||
241 | for (i = 0; i < NR_CPUS; i++) | ||
242 | INIT_LIST_HEAD(&per_cpu(delayed_drop_list, i)); | ||
213 | } | 243 | } |
214 | 244 | ||
215 | int __attribute__((weak)) arch_dup_task_struct(struct task_struct *dst, | 245 | int __attribute__((weak)) arch_dup_task_struct(struct task_struct *dst, |
@@ -295,6 +325,7 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) | |||
295 | mm->locked_vm = 0; | 325 | mm->locked_vm = 0; |
296 | mm->mmap = NULL; | 326 | mm->mmap = NULL; |
297 | mm->mmap_cache = NULL; | 327 | mm->mmap_cache = NULL; |
328 | INIT_LIST_HEAD(&mm->delayed_drop); | ||
298 | mm->free_area_cache = oldmm->mmap_base; | 329 | mm->free_area_cache = oldmm->mmap_base; |
299 | mm->cached_hole_size = ~0UL; | 330 | mm->cached_hole_size = ~0UL; |
300 | mm->map_count = 0; | 331 | mm->map_count = 0; |
@@ -943,6 +974,9 @@ static void rt_mutex_init_task(struct task_struct *p) | |||
943 | #ifdef CONFIG_RT_MUTEXES | 974 | #ifdef CONFIG_RT_MUTEXES |
944 | plist_head_init_raw(&p->pi_waiters, &p->pi_lock); | 975 | plist_head_init_raw(&p->pi_waiters, &p->pi_lock); |
945 | p->pi_blocked_on = NULL; | 976 | p->pi_blocked_on = NULL; |
977 | # ifdef CONFIG_DEBUG_RT_MUTEXES | ||
978 | p->last_kernel_lock = NULL; | ||
979 | # endif | ||
946 | #endif | 980 | #endif |
947 | } | 981 | } |
948 | 982 | ||
@@ -1065,6 +1099,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1065 | spin_lock_init(&p->alloc_lock); | 1099 | spin_lock_init(&p->alloc_lock); |
1066 | 1100 | ||
1067 | init_sigpending(&p->pending); | 1101 | init_sigpending(&p->pending); |
1102 | p->sigqueue_cache = NULL; | ||
1068 | 1103 | ||
1069 | p->utime = cputime_zero; | 1104 | p->utime = cputime_zero; |
1070 | p->stime = cputime_zero; | 1105 | p->stime = cputime_zero; |
@@ -1082,7 +1117,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1082 | acct_clear_integrals(p); | 1117 | acct_clear_integrals(p); |
1083 | 1118 | ||
1084 | posix_cpu_timers_init(p); | 1119 | posix_cpu_timers_init(p); |
1085 | 1120 | p->posix_timer_list = NULL; | |
1086 | p->lock_depth = -1; /* -1 = no lock */ | 1121 | p->lock_depth = -1; /* -1 = no lock */ |
1087 | do_posix_clock_monotonic_gettime(&p->start_time); | 1122 | do_posix_clock_monotonic_gettime(&p->start_time); |
1088 | p->real_start_time = p->start_time; | 1123 | p->real_start_time = p->start_time; |
@@ -1118,6 +1153,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1118 | p->hardirq_context = 0; | 1153 | p->hardirq_context = 0; |
1119 | p->softirq_context = 0; | 1154 | p->softirq_context = 0; |
1120 | #endif | 1155 | #endif |
1156 | p->pagefault_disabled = 0; | ||
1121 | #ifdef CONFIG_LOCKDEP | 1157 | #ifdef CONFIG_LOCKDEP |
1122 | p->lockdep_depth = 0; /* no locks held yet */ | 1158 | p->lockdep_depth = 0; /* no locks held yet */ |
1123 | p->curr_chain_key = 0; | 1159 | p->curr_chain_key = 0; |
@@ -1165,6 +1201,9 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1165 | retval = copy_thread(clone_flags, stack_start, stack_size, p, regs); | 1201 | retval = copy_thread(clone_flags, stack_start, stack_size, p, regs); |
1166 | if (retval) | 1202 | if (retval) |
1167 | goto bad_fork_cleanup_io; | 1203 | goto bad_fork_cleanup_io; |
1204 | #ifdef CONFIG_DEBUG_PREEMPT | ||
1205 | atomic_set(&p->lock_count, 0); | ||
1206 | #endif | ||
1168 | 1207 | ||
1169 | if (pid != &init_struct_pid) { | 1208 | if (pid != &init_struct_pid) { |
1170 | retval = -ENOMEM; | 1209 | retval = -ENOMEM; |
@@ -1289,7 +1328,9 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1289 | attach_pid(p, PIDTYPE_SID, task_session(current)); | 1328 | attach_pid(p, PIDTYPE_SID, task_session(current)); |
1290 | list_add_tail(&p->sibling, &p->real_parent->children); | 1329 | list_add_tail(&p->sibling, &p->real_parent->children); |
1291 | list_add_tail_rcu(&p->tasks, &init_task.tasks); | 1330 | list_add_tail_rcu(&p->tasks, &init_task.tasks); |
1331 | preempt_disable(); | ||
1292 | __get_cpu_var(process_counts)++; | 1332 | __get_cpu_var(process_counts)++; |
1333 | preempt_enable(); | ||
1293 | } | 1334 | } |
1294 | attach_pid(p, PIDTYPE_PID, pid); | 1335 | attach_pid(p, PIDTYPE_PID, pid); |
1295 | nr_threads++; | 1336 | nr_threads++; |
@@ -1763,3 +1804,141 @@ int unshare_files(struct files_struct **displaced) | |||
1763 | task_unlock(task); | 1804 | task_unlock(task); |
1764 | return 0; | 1805 | return 0; |
1765 | } | 1806 | } |
1807 | |||
1808 | static int mmdrop_complete(void) | ||
1809 | { | ||
1810 | struct list_head *head; | ||
1811 | int ret = 0; | ||
1812 | |||
1813 | head = &get_cpu_var(delayed_drop_list); | ||
1814 | while (!list_empty(head)) { | ||
1815 | struct mm_struct *mm = list_entry(head->next, | ||
1816 | struct mm_struct, delayed_drop); | ||
1817 | list_del(&mm->delayed_drop); | ||
1818 | put_cpu_var(delayed_drop_list); | ||
1819 | |||
1820 | __mmdrop(mm); | ||
1821 | ret = 1; | ||
1822 | |||
1823 | head = &get_cpu_var(delayed_drop_list); | ||
1824 | } | ||
1825 | put_cpu_var(delayed_drop_list); | ||
1826 | |||
1827 | return ret; | ||
1828 | } | ||
1829 | |||
1830 | /* | ||
1831 | * We dont want to do complex work from the scheduler, thus | ||
1832 | * we delay the work to a per-CPU worker thread: | ||
1833 | */ | ||
1834 | void __mmdrop_delayed(struct mm_struct *mm) | ||
1835 | { | ||
1836 | struct task_struct *desched_task; | ||
1837 | struct list_head *head; | ||
1838 | |||
1839 | head = &get_cpu_var(delayed_drop_list); | ||
1840 | list_add_tail(&mm->delayed_drop, head); | ||
1841 | desched_task = __get_cpu_var(desched_task); | ||
1842 | if (desched_task) | ||
1843 | wake_up_process(desched_task); | ||
1844 | put_cpu_var(delayed_drop_list); | ||
1845 | } | ||
1846 | |||
1847 | #ifdef CONFIG_HOTPLUG_CPU | ||
1848 | static void takeover_delayed_drop(int hotcpu) | ||
1849 | { | ||
1850 | struct list_head *head = &per_cpu(delayed_drop_list, hotcpu); | ||
1851 | |||
1852 | while (!list_empty(head)) { | ||
1853 | struct mm_struct *mm = list_entry(head->next, | ||
1854 | struct mm_struct, delayed_drop); | ||
1855 | |||
1856 | list_del(&mm->delayed_drop); | ||
1857 | __mmdrop_delayed(mm); | ||
1858 | } | ||
1859 | } | ||
1860 | #endif | ||
1861 | |||
1862 | static int desched_thread(void * __bind_cpu) | ||
1863 | { | ||
1864 | set_user_nice(current, -10); | ||
1865 | current->flags |= PF_NOFREEZE; | ||
1866 | current->extra_flags |= PFE_SOFTIRQ; | ||
1867 | |||
1868 | set_current_state(TASK_INTERRUPTIBLE); | ||
1869 | |||
1870 | while (!kthread_should_stop()) { | ||
1871 | |||
1872 | if (mmdrop_complete()) | ||
1873 | continue; | ||
1874 | schedule(); | ||
1875 | |||
1876 | /* | ||
1877 | * This must be called from time to time on ia64, and is a | ||
1878 | * no-op on other archs. Used to be in cpu_idle(), but with | ||
1879 | * the new -rt semantics it can't stay there. | ||
1880 | */ | ||
1881 | check_pgt_cache(); | ||
1882 | |||
1883 | set_current_state(TASK_INTERRUPTIBLE); | ||
1884 | } | ||
1885 | __set_current_state(TASK_RUNNING); | ||
1886 | return 0; | ||
1887 | } | ||
1888 | |||
1889 | static int __devinit cpu_callback(struct notifier_block *nfb, | ||
1890 | unsigned long action, | ||
1891 | void *hcpu) | ||
1892 | { | ||
1893 | int hotcpu = (unsigned long)hcpu; | ||
1894 | struct task_struct *p; | ||
1895 | |||
1896 | switch (action) { | ||
1897 | case CPU_UP_PREPARE: | ||
1898 | |||
1899 | BUG_ON(per_cpu(desched_task, hotcpu)); | ||
1900 | INIT_LIST_HEAD(&per_cpu(delayed_drop_list, hotcpu)); | ||
1901 | p = kthread_create(desched_thread, hcpu, "desched/%d", hotcpu); | ||
1902 | if (IS_ERR(p)) { | ||
1903 | printk("desched_thread for %i failed\n", hotcpu); | ||
1904 | return NOTIFY_BAD; | ||
1905 | } | ||
1906 | per_cpu(desched_task, hotcpu) = p; | ||
1907 | kthread_bind(p, hotcpu); | ||
1908 | break; | ||
1909 | case CPU_ONLINE: | ||
1910 | |||
1911 | wake_up_process(per_cpu(desched_task, hotcpu)); | ||
1912 | break; | ||
1913 | #ifdef CONFIG_HOTPLUG_CPU | ||
1914 | case CPU_UP_CANCELED: | ||
1915 | |||
1916 | /* Unbind so it can run. Fall thru. */ | ||
1917 | kthread_bind(per_cpu(desched_task, hotcpu), smp_processor_id()); | ||
1918 | case CPU_DEAD: | ||
1919 | |||
1920 | p = per_cpu(desched_task, hotcpu); | ||
1921 | per_cpu(desched_task, hotcpu) = NULL; | ||
1922 | kthread_stop(p); | ||
1923 | takeover_delayed_drop(hotcpu); | ||
1924 | takeover_tasklets(hotcpu); | ||
1925 | break; | ||
1926 | #endif /* CONFIG_HOTPLUG_CPU */ | ||
1927 | } | ||
1928 | return NOTIFY_OK; | ||
1929 | } | ||
1930 | |||
1931 | static struct notifier_block __devinitdata cpu_nfb = { | ||
1932 | .notifier_call = cpu_callback | ||
1933 | }; | ||
1934 | |||
1935 | __init int spawn_desched_task(void) | ||
1936 | { | ||
1937 | void *cpu = (void *)(long)smp_processor_id(); | ||
1938 | |||
1939 | cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu); | ||
1940 | cpu_callback(&cpu_nfb, CPU_ONLINE, cpu); | ||
1941 | register_cpu_notifier(&cpu_nfb); | ||
1942 | return 0; | ||
1943 | } | ||
1944 | |||