aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2008-09-06 10:51:57 -0400
committerIngo Molnar <mingo@elte.hu>2008-09-06 10:51:57 -0400
commit7f79d852ed30a06eebf7497afe9334a726db3d40 (patch)
tree0057281f17501b635d3d88cda9a14203706f5dcc /kernel
parentaef745fca016aea45adae5c98e8698904dd8ad51 (diff)
parent70bb08962ea9bd50797ae9f16b2493f5f7c65053 (diff)
Merge branch 'linus' into sched/devel
Diffstat (limited to 'kernel')
-rw-r--r--kernel/auditsc.c3
-rw-r--r--kernel/exit.c88
-rw-r--r--kernel/lockdep.c6
-rw-r--r--kernel/lockdep_proc.c3
-rw-r--r--kernel/module.c2
-rw-r--r--kernel/pid_namespace.c3
-rw-r--r--kernel/pm_qos_params.c25
-rw-r--r--kernel/power/disk.c13
-rw-r--r--kernel/power/main.c5
-rw-r--r--kernel/resource.c88
-rw-r--r--kernel/sched.c59
-rw-r--r--kernel/sched_clock.c84
-rw-r--r--kernel/sched_rt.c13
-rw-r--r--kernel/smp.c10
-rw-r--r--kernel/softlockup.c3
-rw-r--r--kernel/sysctl.c1
-rw-r--r--kernel/time/tick-sched.c3
17 files changed, 246 insertions, 163 deletions
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 972f8e61d36a..59cedfb040e7 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -243,10 +243,11 @@ static inline int open_arg(int flags, int mask)
243 243
244static int audit_match_perm(struct audit_context *ctx, int mask) 244static int audit_match_perm(struct audit_context *ctx, int mask)
245{ 245{
246 unsigned n;
246 if (unlikely(!ctx)) 247 if (unlikely(!ctx))
247 return 0; 248 return 0;
248 249
249 unsigned n = ctx->major; 250 n = ctx->major;
250 switch (audit_classify_syscall(ctx->arch, n)) { 251 switch (audit_classify_syscall(ctx->arch, n)) {
251 case 0: /* native */ 252 case 0: /* native */
252 if ((mask & AUDIT_PERM_WRITE) && 253 if ((mask & AUDIT_PERM_WRITE) &&
diff --git a/kernel/exit.c b/kernel/exit.c
index 38ec40630149..16395644a98f 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -112,9 +112,9 @@ static void __exit_signal(struct task_struct *tsk)
112 * We won't ever get here for the group leader, since it 112 * We won't ever get here for the group leader, since it
113 * will have been the last reference on the signal_struct. 113 * will have been the last reference on the signal_struct.
114 */ 114 */
115 sig->utime = cputime_add(sig->utime, tsk->utime); 115 sig->utime = cputime_add(sig->utime, task_utime(tsk));
116 sig->stime = cputime_add(sig->stime, tsk->stime); 116 sig->stime = cputime_add(sig->stime, task_stime(tsk));
117 sig->gtime = cputime_add(sig->gtime, tsk->gtime); 117 sig->gtime = cputime_add(sig->gtime, task_gtime(tsk));
118 sig->min_flt += tsk->min_flt; 118 sig->min_flt += tsk->min_flt;
119 sig->maj_flt += tsk->maj_flt; 119 sig->maj_flt += tsk->maj_flt;
120 sig->nvcsw += tsk->nvcsw; 120 sig->nvcsw += tsk->nvcsw;
@@ -831,26 +831,50 @@ static void reparent_thread(struct task_struct *p, struct task_struct *father)
831 * the child reaper process (ie "init") in our pid 831 * the child reaper process (ie "init") in our pid
832 * space. 832 * space.
833 */ 833 */
834static struct task_struct *find_new_reaper(struct task_struct *father)
835{
836 struct pid_namespace *pid_ns = task_active_pid_ns(father);
837 struct task_struct *thread;
838
839 thread = father;
840 while_each_thread(father, thread) {
841 if (thread->flags & PF_EXITING)
842 continue;
843 if (unlikely(pid_ns->child_reaper == father))
844 pid_ns->child_reaper = thread;
845 return thread;
846 }
847
848 if (unlikely(pid_ns->child_reaper == father)) {
849 write_unlock_irq(&tasklist_lock);
850 if (unlikely(pid_ns == &init_pid_ns))
851 panic("Attempted to kill init!");
852
853 zap_pid_ns_processes(pid_ns);
854 write_lock_irq(&tasklist_lock);
855 /*
856 * We can not clear ->child_reaper or leave it alone.
857 * There may by stealth EXIT_DEAD tasks on ->children,
858 * forget_original_parent() must move them somewhere.
859 */
860 pid_ns->child_reaper = init_pid_ns.child_reaper;
861 }
862
863 return pid_ns->child_reaper;
864}
865
834static void forget_original_parent(struct task_struct *father) 866static void forget_original_parent(struct task_struct *father)
835{ 867{
836 struct task_struct *p, *n, *reaper = father; 868 struct task_struct *p, *n, *reaper;
837 LIST_HEAD(ptrace_dead); 869 LIST_HEAD(ptrace_dead);
838 870
839 write_lock_irq(&tasklist_lock); 871 write_lock_irq(&tasklist_lock);
840 872 reaper = find_new_reaper(father);
841 /* 873 /*
842 * First clean up ptrace if we were using it. 874 * First clean up ptrace if we were using it.
843 */ 875 */
844 ptrace_exit(father, &ptrace_dead); 876 ptrace_exit(father, &ptrace_dead);
845 877
846 do {
847 reaper = next_thread(reaper);
848 if (reaper == father) {
849 reaper = task_child_reaper(father);
850 break;
851 }
852 } while (reaper->flags & PF_EXITING);
853
854 list_for_each_entry_safe(p, n, &father->children, sibling) { 878 list_for_each_entry_safe(p, n, &father->children, sibling) {
855 p->real_parent = reaper; 879 p->real_parent = reaper;
856 if (p->parent == father) { 880 if (p->parent == father) {
@@ -918,8 +942,8 @@ static void exit_notify(struct task_struct *tsk, int group_dead)
918 942
919 /* mt-exec, de_thread() is waiting for us */ 943 /* mt-exec, de_thread() is waiting for us */
920 if (thread_group_leader(tsk) && 944 if (thread_group_leader(tsk) &&
921 tsk->signal->notify_count < 0 && 945 tsk->signal->group_exit_task &&
922 tsk->signal->group_exit_task) 946 tsk->signal->notify_count < 0)
923 wake_up_process(tsk->signal->group_exit_task); 947 wake_up_process(tsk->signal->group_exit_task);
924 948
925 write_unlock_irq(&tasklist_lock); 949 write_unlock_irq(&tasklist_lock);
@@ -959,39 +983,6 @@ static void check_stack_usage(void)
959static inline void check_stack_usage(void) {} 983static inline void check_stack_usage(void) {}
960#endif 984#endif
961 985
962static inline void exit_child_reaper(struct task_struct *tsk)
963{
964 if (likely(tsk->group_leader != task_child_reaper(tsk)))
965 return;
966
967 if (tsk->nsproxy->pid_ns == &init_pid_ns)
968 panic("Attempted to kill init!");
969
970 /*
971 * @tsk is the last thread in the 'cgroup-init' and is exiting.
972 * Terminate all remaining processes in the namespace and reap them
973 * before exiting @tsk.
974 *
975 * Note that @tsk (last thread of cgroup-init) may not necessarily
976 * be the child-reaper (i.e main thread of cgroup-init) of the
977 * namespace i.e the child_reaper may have already exited.
978 *
979 * Even after a child_reaper exits, we let it inherit orphaned children,
980 * because, pid_ns->child_reaper remains valid as long as there is
981 * at least one living sub-thread in the cgroup init.
982
983 * This living sub-thread of the cgroup-init will be notified when
984 * a child inherited by the 'child-reaper' exits (do_notify_parent()
985 * uses __group_send_sig_info()). Further, when reaping child processes,
986 * do_wait() iterates over children of all living sub threads.
987
988 * i.e even though 'child_reaper' thread is listed as the parent of the
989 * orphaned children, any living sub-thread in the cgroup-init can
990 * perform the role of the child_reaper.
991 */
992 zap_pid_ns_processes(tsk->nsproxy->pid_ns);
993}
994
995NORET_TYPE void do_exit(long code) 986NORET_TYPE void do_exit(long code)
996{ 987{
997 struct task_struct *tsk = current; 988 struct task_struct *tsk = current;
@@ -1051,7 +1042,6 @@ NORET_TYPE void do_exit(long code)
1051 } 1042 }
1052 group_dead = atomic_dec_and_test(&tsk->signal->live); 1043 group_dead = atomic_dec_and_test(&tsk->signal->live);
1053 if (group_dead) { 1044 if (group_dead) {
1054 exit_child_reaper(tsk);
1055 hrtimer_cancel(&tsk->signal->real_timer); 1045 hrtimer_cancel(&tsk->signal->real_timer);
1056 exit_itimers(tsk->signal); 1046 exit_itimers(tsk->signal);
1057 } 1047 }
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 3bfb1877a003..dbda475b13bd 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -875,11 +875,11 @@ static int add_lock_to_list(struct lock_class *class, struct lock_class *this,
875 if (!entry) 875 if (!entry)
876 return 0; 876 return 0;
877 877
878 entry->class = this;
879 entry->distance = distance;
880 if (!save_trace(&entry->trace)) 878 if (!save_trace(&entry->trace))
881 return 0; 879 return 0;
882 880
881 entry->class = this;
882 entry->distance = distance;
883 /* 883 /*
884 * Since we never remove from the dependency list, the list can 884 * Since we never remove from the dependency list, the list can
885 * be walked lockless by other CPUs, it's only allocation 885 * be walked lockless by other CPUs, it's only allocation
@@ -3029,7 +3029,7 @@ found_it:
3029 3029
3030 stats = get_lock_stats(hlock_class(hlock)); 3030 stats = get_lock_stats(hlock_class(hlock));
3031 if (point < ARRAY_SIZE(stats->contention_point)) 3031 if (point < ARRAY_SIZE(stats->contention_point))
3032 stats->contention_point[i]++; 3032 stats->contention_point[point]++;
3033 if (lock->cpu != smp_processor_id()) 3033 if (lock->cpu != smp_processor_id())
3034 stats->bounces[bounce_contended + !!hlock->read]++; 3034 stats->bounces[bounce_contended + !!hlock->read]++;
3035 put_lock_stats(stats); 3035 put_lock_stats(stats);
diff --git a/kernel/lockdep_proc.c b/kernel/lockdep_proc.c
index 4b194d34d77f..20dbcbf9c7dd 100644
--- a/kernel/lockdep_proc.c
+++ b/kernel/lockdep_proc.c
@@ -472,8 +472,9 @@ static void snprint_time(char *buf, size_t bufsiz, s64 nr)
472{ 472{
473 unsigned long rem; 473 unsigned long rem;
474 474
475 nr += 5; /* for display rounding */
475 rem = do_div(nr, 1000); /* XXX: do_div_signed */ 476 rem = do_div(nr, 1000); /* XXX: do_div_signed */
476 snprintf(buf, bufsiz, "%lld.%02d", (long long)nr, ((int)rem+5)/10); 477 snprintf(buf, bufsiz, "%lld.%02d", (long long)nr, (int)rem/10);
477} 478}
478 479
479static void seq_time(struct seq_file *m, s64 time) 480static void seq_time(struct seq_file *m, s64 time)
diff --git a/kernel/module.c b/kernel/module.c
index 08864d257eb0..9db11911e04b 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -1799,7 +1799,7 @@ static void *module_alloc_update_bounds(unsigned long size)
1799 1799
1800/* Allocate and load the module: note that size of section 0 is always 1800/* Allocate and load the module: note that size of section 0 is always
1801 zero, and we rely on this for optional sections. */ 1801 zero, and we rely on this for optional sections. */
1802static struct module *load_module(void __user *umod, 1802static noinline struct module *load_module(void __user *umod,
1803 unsigned long len, 1803 unsigned long len,
1804 const char __user *uargs) 1804 const char __user *uargs)
1805{ 1805{
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index ea567b78d1aa..fab8ea86fac3 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -179,9 +179,6 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns)
179 rc = sys_wait4(-1, NULL, __WALL, NULL); 179 rc = sys_wait4(-1, NULL, __WALL, NULL);
180 } while (rc != -ECHILD); 180 } while (rc != -ECHILD);
181 181
182
183 /* Child reaper for the pid namespace is going away */
184 pid_ns->child_reaper = NULL;
185 acct_exit_ns(pid_ns); 182 acct_exit_ns(pid_ns);
186 return; 183 return;
187} 184}
diff --git a/kernel/pm_qos_params.c b/kernel/pm_qos_params.c
index da9c2dda6a4e..dfdec524d1b7 100644
--- a/kernel/pm_qos_params.c
+++ b/kernel/pm_qos_params.c
@@ -43,7 +43,7 @@
43#include <linux/uaccess.h> 43#include <linux/uaccess.h>
44 44
45/* 45/*
46 * locking rule: all changes to target_value or requirements or notifiers lists 46 * locking rule: all changes to requirements or notifiers lists
47 * or pm_qos_object list and pm_qos_objects need to happen with pm_qos_lock 47 * or pm_qos_object list and pm_qos_objects need to happen with pm_qos_lock
48 * held, taken with _irqsave. One lock to rule them all 48 * held, taken with _irqsave. One lock to rule them all
49 */ 49 */
@@ -66,7 +66,7 @@ struct pm_qos_object {
66 struct miscdevice pm_qos_power_miscdev; 66 struct miscdevice pm_qos_power_miscdev;
67 char *name; 67 char *name;
68 s32 default_value; 68 s32 default_value;
69 s32 target_value; 69 atomic_t target_value;
70 s32 (*comparitor)(s32, s32); 70 s32 (*comparitor)(s32, s32);
71}; 71};
72 72
@@ -77,7 +77,7 @@ static struct pm_qos_object cpu_dma_pm_qos = {
77 .notifiers = &cpu_dma_lat_notifier, 77 .notifiers = &cpu_dma_lat_notifier,
78 .name = "cpu_dma_latency", 78 .name = "cpu_dma_latency",
79 .default_value = 2000 * USEC_PER_SEC, 79 .default_value = 2000 * USEC_PER_SEC,
80 .target_value = 2000 * USEC_PER_SEC, 80 .target_value = ATOMIC_INIT(2000 * USEC_PER_SEC),
81 .comparitor = min_compare 81 .comparitor = min_compare
82}; 82};
83 83
@@ -87,7 +87,7 @@ static struct pm_qos_object network_lat_pm_qos = {
87 .notifiers = &network_lat_notifier, 87 .notifiers = &network_lat_notifier,
88 .name = "network_latency", 88 .name = "network_latency",
89 .default_value = 2000 * USEC_PER_SEC, 89 .default_value = 2000 * USEC_PER_SEC,
90 .target_value = 2000 * USEC_PER_SEC, 90 .target_value = ATOMIC_INIT(2000 * USEC_PER_SEC),
91 .comparitor = min_compare 91 .comparitor = min_compare
92}; 92};
93 93
@@ -99,7 +99,7 @@ static struct pm_qos_object network_throughput_pm_qos = {
99 .notifiers = &network_throughput_notifier, 99 .notifiers = &network_throughput_notifier,
100 .name = "network_throughput", 100 .name = "network_throughput",
101 .default_value = 0, 101 .default_value = 0,
102 .target_value = 0, 102 .target_value = ATOMIC_INIT(0),
103 .comparitor = max_compare 103 .comparitor = max_compare
104}; 104};
105 105
@@ -150,11 +150,11 @@ static void update_target(int target)
150 extreme_value = pm_qos_array[target]->comparitor( 150 extreme_value = pm_qos_array[target]->comparitor(
151 extreme_value, node->value); 151 extreme_value, node->value);
152 } 152 }
153 if (pm_qos_array[target]->target_value != extreme_value) { 153 if (atomic_read(&pm_qos_array[target]->target_value) != extreme_value) {
154 call_notifier = 1; 154 call_notifier = 1;
155 pm_qos_array[target]->target_value = extreme_value; 155 atomic_set(&pm_qos_array[target]->target_value, extreme_value);
156 pr_debug(KERN_ERR "new target for qos %d is %d\n", target, 156 pr_debug(KERN_ERR "new target for qos %d is %d\n", target,
157 pm_qos_array[target]->target_value); 157 atomic_read(&pm_qos_array[target]->target_value));
158 } 158 }
159 spin_unlock_irqrestore(&pm_qos_lock, flags); 159 spin_unlock_irqrestore(&pm_qos_lock, flags);
160 160
@@ -193,14 +193,7 @@ static int find_pm_qos_object_by_minor(int minor)
193 */ 193 */
194int pm_qos_requirement(int pm_qos_class) 194int pm_qos_requirement(int pm_qos_class)
195{ 195{
196 int ret_val; 196 return atomic_read(&pm_qos_array[pm_qos_class]->target_value);
197 unsigned long flags;
198
199 spin_lock_irqsave(&pm_qos_lock, flags);
200 ret_val = pm_qos_array[pm_qos_class]->target_value;
201 spin_unlock_irqrestore(&pm_qos_lock, flags);
202
203 return ret_val;
204} 197}
205EXPORT_SYMBOL_GPL(pm_qos_requirement); 198EXPORT_SYMBOL_GPL(pm_qos_requirement);
206 199
diff --git a/kernel/power/disk.c b/kernel/power/disk.c
index f011e0870b52..bbd85c60f741 100644
--- a/kernel/power/disk.c
+++ b/kernel/power/disk.c
@@ -21,6 +21,7 @@
21#include <linux/console.h> 21#include <linux/console.h>
22#include <linux/cpu.h> 22#include <linux/cpu.h>
23#include <linux/freezer.h> 23#include <linux/freezer.h>
24#include <linux/ftrace.h>
24 25
25#include "power.h" 26#include "power.h"
26 27
@@ -255,7 +256,7 @@ static int create_image(int platform_mode)
255 256
256int hibernation_snapshot(int platform_mode) 257int hibernation_snapshot(int platform_mode)
257{ 258{
258 int error; 259 int error, ftrace_save;
259 260
260 /* Free memory before shutting down devices. */ 261 /* Free memory before shutting down devices. */
261 error = swsusp_shrink_memory(); 262 error = swsusp_shrink_memory();
@@ -267,6 +268,7 @@ int hibernation_snapshot(int platform_mode)
267 goto Close; 268 goto Close;
268 269
269 suspend_console(); 270 suspend_console();
271 ftrace_save = __ftrace_enabled_save();
270 error = device_suspend(PMSG_FREEZE); 272 error = device_suspend(PMSG_FREEZE);
271 if (error) 273 if (error)
272 goto Recover_platform; 274 goto Recover_platform;
@@ -296,6 +298,7 @@ int hibernation_snapshot(int platform_mode)
296 Resume_devices: 298 Resume_devices:
297 device_resume(in_suspend ? 299 device_resume(in_suspend ?
298 (error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE); 300 (error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE);
301 __ftrace_enabled_restore(ftrace_save);
299 resume_console(); 302 resume_console();
300 Close: 303 Close:
301 platform_end(platform_mode); 304 platform_end(platform_mode);
@@ -366,10 +369,11 @@ static int resume_target_kernel(void)
366 369
367int hibernation_restore(int platform_mode) 370int hibernation_restore(int platform_mode)
368{ 371{
369 int error; 372 int error, ftrace_save;
370 373
371 pm_prepare_console(); 374 pm_prepare_console();
372 suspend_console(); 375 suspend_console();
376 ftrace_save = __ftrace_enabled_save();
373 error = device_suspend(PMSG_QUIESCE); 377 error = device_suspend(PMSG_QUIESCE);
374 if (error) 378 if (error)
375 goto Finish; 379 goto Finish;
@@ -384,6 +388,7 @@ int hibernation_restore(int platform_mode)
384 platform_restore_cleanup(platform_mode); 388 platform_restore_cleanup(platform_mode);
385 device_resume(PMSG_RECOVER); 389 device_resume(PMSG_RECOVER);
386 Finish: 390 Finish:
391 __ftrace_enabled_restore(ftrace_save);
387 resume_console(); 392 resume_console();
388 pm_restore_console(); 393 pm_restore_console();
389 return error; 394 return error;
@@ -396,7 +401,7 @@ int hibernation_restore(int platform_mode)
396 401
397int hibernation_platform_enter(void) 402int hibernation_platform_enter(void)
398{ 403{
399 int error; 404 int error, ftrace_save;
400 405
401 if (!hibernation_ops) 406 if (!hibernation_ops)
402 return -ENOSYS; 407 return -ENOSYS;
@@ -411,6 +416,7 @@ int hibernation_platform_enter(void)
411 goto Close; 416 goto Close;
412 417
413 suspend_console(); 418 suspend_console();
419 ftrace_save = __ftrace_enabled_save();
414 error = device_suspend(PMSG_HIBERNATE); 420 error = device_suspend(PMSG_HIBERNATE);
415 if (error) { 421 if (error) {
416 if (hibernation_ops->recover) 422 if (hibernation_ops->recover)
@@ -445,6 +451,7 @@ int hibernation_platform_enter(void)
445 hibernation_ops->finish(); 451 hibernation_ops->finish();
446 Resume_devices: 452 Resume_devices:
447 device_resume(PMSG_RESTORE); 453 device_resume(PMSG_RESTORE);
454 __ftrace_enabled_restore(ftrace_save);
448 resume_console(); 455 resume_console();
449 Close: 456 Close:
450 hibernation_ops->end(); 457 hibernation_ops->end();
diff --git a/kernel/power/main.c b/kernel/power/main.c
index 0b7476f5d2a6..540b16b68565 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -21,6 +21,7 @@
21#include <linux/freezer.h> 21#include <linux/freezer.h>
22#include <linux/vmstat.h> 22#include <linux/vmstat.h>
23#include <linux/syscalls.h> 23#include <linux/syscalls.h>
24#include <linux/ftrace.h>
24 25
25#include "power.h" 26#include "power.h"
26 27
@@ -310,7 +311,7 @@ static int suspend_enter(suspend_state_t state)
310 */ 311 */
311int suspend_devices_and_enter(suspend_state_t state) 312int suspend_devices_and_enter(suspend_state_t state)
312{ 313{
313 int error; 314 int error, ftrace_save;
314 315
315 if (!suspend_ops) 316 if (!suspend_ops)
316 return -ENOSYS; 317 return -ENOSYS;
@@ -321,6 +322,7 @@ int suspend_devices_and_enter(suspend_state_t state)
321 goto Close; 322 goto Close;
322 } 323 }
323 suspend_console(); 324 suspend_console();
325 ftrace_save = __ftrace_enabled_save();
324 suspend_test_start(); 326 suspend_test_start();
325 error = device_suspend(PMSG_SUSPEND); 327 error = device_suspend(PMSG_SUSPEND);
326 if (error) { 328 if (error) {
@@ -352,6 +354,7 @@ int suspend_devices_and_enter(suspend_state_t state)
352 suspend_test_start(); 354 suspend_test_start();
353 device_resume(PMSG_RESUME); 355 device_resume(PMSG_RESUME);
354 suspend_test_finish("resume devices"); 356 suspend_test_finish("resume devices");
357 __ftrace_enabled_restore(ftrace_save);
355 resume_console(); 358 resume_console();
356 Close: 359 Close:
357 if (suspend_ops->end) 360 if (suspend_ops->end)
diff --git a/kernel/resource.c b/kernel/resource.c
index f5b518eabefe..03d796c1b2e9 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -362,35 +362,21 @@ int allocate_resource(struct resource *root, struct resource *new,
362 362
363EXPORT_SYMBOL(allocate_resource); 363EXPORT_SYMBOL(allocate_resource);
364 364
365/** 365/*
366 * insert_resource - Inserts a resource in the resource tree 366 * Insert a resource into the resource tree. If successful, return NULL,
367 * @parent: parent of the new resource 367 * otherwise return the conflicting resource (compare to __request_resource())
368 * @new: new resource to insert
369 *
370 * Returns 0 on success, -EBUSY if the resource can't be inserted.
371 *
372 * This function is equivalent to request_resource when no conflict
373 * happens. If a conflict happens, and the conflicting resources
374 * entirely fit within the range of the new resource, then the new
375 * resource is inserted and the conflicting resources become children of
376 * the new resource.
377 */ 368 */
378int insert_resource(struct resource *parent, struct resource *new) 369static struct resource * __insert_resource(struct resource *parent, struct resource *new)
379{ 370{
380 int result;
381 struct resource *first, *next; 371 struct resource *first, *next;
382 372
383 write_lock(&resource_lock);
384
385 for (;; parent = first) { 373 for (;; parent = first) {
386 result = 0;
387 first = __request_resource(parent, new); 374 first = __request_resource(parent, new);
388 if (!first) 375 if (!first)
389 goto out; 376 return first;
390 377
391 result = -EBUSY;
392 if (first == parent) 378 if (first == parent)
393 goto out; 379 return first;
394 380
395 if ((first->start > new->start) || (first->end < new->end)) 381 if ((first->start > new->start) || (first->end < new->end))
396 break; 382 break;
@@ -401,15 +387,13 @@ int insert_resource(struct resource *parent, struct resource *new)
401 for (next = first; ; next = next->sibling) { 387 for (next = first; ; next = next->sibling) {
402 /* Partial overlap? Bad, and unfixable */ 388 /* Partial overlap? Bad, and unfixable */
403 if (next->start < new->start || next->end > new->end) 389 if (next->start < new->start || next->end > new->end)
404 goto out; 390 return next;
405 if (!next->sibling) 391 if (!next->sibling)
406 break; 392 break;
407 if (next->sibling->start > new->end) 393 if (next->sibling->start > new->end)
408 break; 394 break;
409 } 395 }
410 396
411 result = 0;
412
413 new->parent = parent; 397 new->parent = parent;
414 new->sibling = next->sibling; 398 new->sibling = next->sibling;
415 new->child = first; 399 new->child = first;
@@ -426,10 +410,64 @@ int insert_resource(struct resource *parent, struct resource *new)
426 next = next->sibling; 410 next = next->sibling;
427 next->sibling = new; 411 next->sibling = new;
428 } 412 }
413 return NULL;
414}
429 415
430 out: 416/**
417 * insert_resource - Inserts a resource in the resource tree
418 * @parent: parent of the new resource
419 * @new: new resource to insert
420 *
421 * Returns 0 on success, -EBUSY if the resource can't be inserted.
422 *
423 * This function is equivalent to request_resource when no conflict
424 * happens. If a conflict happens, and the conflicting resources
425 * entirely fit within the range of the new resource, then the new
426 * resource is inserted and the conflicting resources become children of
427 * the new resource.
428 */
429int insert_resource(struct resource *parent, struct resource *new)
430{
431 struct resource *conflict;
432
433 write_lock(&resource_lock);
434 conflict = __insert_resource(parent, new);
435 write_unlock(&resource_lock);
436 return conflict ? -EBUSY : 0;
437}
438
439/**
440 * insert_resource_expand_to_fit - Insert a resource into the resource tree
441 * @root: root resource descriptor
442 * @new: new resource to insert
443 *
444 * Insert a resource into the resource tree, possibly expanding it in order
445 * to make it encompass any conflicting resources.
446 */
447void insert_resource_expand_to_fit(struct resource *root, struct resource *new)
448{
449 if (new->parent)
450 return;
451
452 write_lock(&resource_lock);
453 for (;;) {
454 struct resource *conflict;
455
456 conflict = __insert_resource(root, new);
457 if (!conflict)
458 break;
459 if (conflict == root)
460 break;
461
462 /* Ok, expand resource to cover the conflict, then try again .. */
463 if (conflict->start < new->start)
464 new->start = conflict->start;
465 if (conflict->end > new->end)
466 new->end = conflict->end;
467
468 printk("Expanded resource %s due to conflict with %s\n", new->name, conflict->name);
469 }
431 write_unlock(&resource_lock); 470 write_unlock(&resource_lock);
432 return result;
433} 471}
434 472
435/** 473/**
diff --git a/kernel/sched.c b/kernel/sched.c
index b112caaa400a..8626ae50ce08 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4176,6 +4176,65 @@ void account_steal_time(struct task_struct *p, cputime_t steal)
4176} 4176}
4177 4177
4178/* 4178/*
4179 * Use precise platform statistics if available:
4180 */
4181#ifdef CONFIG_VIRT_CPU_ACCOUNTING
4182cputime_t task_utime(struct task_struct *p)
4183{
4184 return p->utime;
4185}
4186
4187cputime_t task_stime(struct task_struct *p)
4188{
4189 return p->stime;
4190}
4191#else
4192cputime_t task_utime(struct task_struct *p)
4193{
4194 clock_t utime = cputime_to_clock_t(p->utime),
4195 total = utime + cputime_to_clock_t(p->stime);
4196 u64 temp;
4197
4198 /*
4199 * Use CFS's precise accounting:
4200 */
4201 temp = (u64)nsec_to_clock_t(p->se.sum_exec_runtime);
4202
4203 if (total) {
4204 temp *= utime;
4205 do_div(temp, total);
4206 }
4207 utime = (clock_t)temp;
4208
4209 p->prev_utime = max(p->prev_utime, clock_t_to_cputime(utime));
4210 return p->prev_utime;
4211}
4212
4213cputime_t task_stime(struct task_struct *p)
4214{
4215 clock_t stime;
4216
4217 /*
4218 * Use CFS's precise accounting. (we subtract utime from
4219 * the total, to make sure the total observed by userspace
4220 * grows monotonically - apps rely on that):
4221 */
4222 stime = nsec_to_clock_t(p->se.sum_exec_runtime) -
4223 cputime_to_clock_t(task_utime(p));
4224
4225 if (stime >= 0)
4226 p->prev_stime = max(p->prev_stime, clock_t_to_cputime(stime));
4227
4228 return p->prev_stime;
4229}
4230#endif
4231
4232inline cputime_t task_gtime(struct task_struct *p)
4233{
4234 return p->gtime;
4235}
4236
4237/*
4179 * This function gets called by the timer code, with HZ frequency. 4238 * This function gets called by the timer code, with HZ frequency.
4180 * We call it with interrupts disabled. 4239 * We call it with interrupts disabled.
4181 * 4240 *
diff --git a/kernel/sched_clock.c b/kernel/sched_clock.c
index 204991a0bfa7..e8ab096ddfe3 100644
--- a/kernel/sched_clock.c
+++ b/kernel/sched_clock.c
@@ -12,19 +12,17 @@
12 * 12 *
13 * Create a semi stable clock from a mixture of other events, including: 13 * Create a semi stable clock from a mixture of other events, including:
14 * - gtod 14 * - gtod
15 * - jiffies
16 * - sched_clock() 15 * - sched_clock()
17 * - explicit idle events 16 * - explicit idle events
18 * 17 *
19 * We use gtod as base and the unstable clock deltas. The deltas are filtered, 18 * We use gtod as base and the unstable clock deltas. The deltas are filtered,
20 * making it monotonic and keeping it within an expected window. This window 19 * making it monotonic and keeping it within an expected window.
21 * is set up using jiffies.
22 * 20 *
23 * Furthermore, explicit sleep and wakeup hooks allow us to account for time 21 * Furthermore, explicit sleep and wakeup hooks allow us to account for time
24 * that is otherwise invisible (TSC gets stopped). 22 * that is otherwise invisible (TSC gets stopped).
25 * 23 *
26 * The clock: sched_clock_cpu() is monotonic per cpu, and should be somewhat 24 * The clock: sched_clock_cpu() is monotonic per cpu, and should be somewhat
27 * consistent between cpus (never more than 1 jiffies difference). 25 * consistent between cpus (never more than 2 jiffies difference).
28 */ 26 */
29#include <linux/sched.h> 27#include <linux/sched.h>
30#include <linux/percpu.h> 28#include <linux/percpu.h>
@@ -54,7 +52,6 @@ struct sched_clock_data {
54 */ 52 */
55 raw_spinlock_t lock; 53 raw_spinlock_t lock;
56 54
57 unsigned long tick_jiffies;
58 u64 tick_raw; 55 u64 tick_raw;
59 u64 tick_gtod; 56 u64 tick_gtod;
60 u64 clock; 57 u64 clock;
@@ -75,14 +72,12 @@ static inline struct sched_clock_data *cpu_sdc(int cpu)
75void sched_clock_init(void) 72void sched_clock_init(void)
76{ 73{
77 u64 ktime_now = ktime_to_ns(ktime_get()); 74 u64 ktime_now = ktime_to_ns(ktime_get());
78 unsigned long now_jiffies = jiffies;
79 int cpu; 75 int cpu;
80 76
81 for_each_possible_cpu(cpu) { 77 for_each_possible_cpu(cpu) {
82 struct sched_clock_data *scd = cpu_sdc(cpu); 78 struct sched_clock_data *scd = cpu_sdc(cpu);
83 79
84 scd->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; 80 scd->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
85 scd->tick_jiffies = now_jiffies;
86 scd->tick_raw = 0; 81 scd->tick_raw = 0;
87 scd->tick_gtod = ktime_now; 82 scd->tick_gtod = ktime_now;
88 scd->clock = ktime_now; 83 scd->clock = ktime_now;
@@ -92,46 +87,51 @@ void sched_clock_init(void)
92} 87}
93 88
94/* 89/*
90 * min,max except they take wrapping into account
91 */
92
93static inline u64 wrap_min(u64 x, u64 y)
94{
95 return (s64)(x - y) < 0 ? x : y;
96}
97
98static inline u64 wrap_max(u64 x, u64 y)
99{
100 return (s64)(x - y) > 0 ? x : y;
101}
102
103/*
95 * update the percpu scd from the raw @now value 104 * update the percpu scd from the raw @now value
96 * 105 *
97 * - filter out backward motion 106 * - filter out backward motion
98 * - use jiffies to generate a min,max window to clip the raw values 107 * - use the GTOD tick value to create a window to filter crazy TSC values
99 */ 108 */
100static u64 __update_sched_clock(struct sched_clock_data *scd, u64 now) 109static u64 __update_sched_clock(struct sched_clock_data *scd, u64 now)
101{ 110{
102 unsigned long now_jiffies = jiffies;
103 long delta_jiffies = now_jiffies - scd->tick_jiffies;
104 u64 clock = scd->clock;
105 u64 min_clock, max_clock;
106 s64 delta = now - scd->tick_raw; 111 s64 delta = now - scd->tick_raw;
112 u64 clock, min_clock, max_clock;
107 113
108 WARN_ON_ONCE(!irqs_disabled()); 114 WARN_ON_ONCE(!irqs_disabled());
109 min_clock = scd->tick_gtod + delta_jiffies * TICK_NSEC;
110 115
111 if (unlikely(delta < 0)) { 116 if (unlikely(delta < 0))
112 clock++; 117 delta = 0;
113 goto out;
114 }
115 118
116 max_clock = min_clock + TICK_NSEC; 119 /*
120 * scd->clock = clamp(scd->tick_gtod + delta,
121 * max(scd->tick_gtod, scd->clock),
122 * scd->tick_gtod + TICK_NSEC);
123 */
117 124
118 if (unlikely(clock + delta > max_clock)) { 125 clock = scd->tick_gtod + delta;
119 if (clock < max_clock) 126 min_clock = wrap_max(scd->tick_gtod, scd->clock);
120 clock = max_clock; 127 max_clock = scd->tick_gtod + TICK_NSEC;
121 else
122 clock++;
123 } else {
124 clock += delta;
125 }
126 128
127 out: 129 clock = wrap_max(clock, min_clock);
128 if (unlikely(clock < min_clock)) 130 clock = wrap_min(clock, max_clock);
129 clock = min_clock;
130 131
131 scd->tick_jiffies = now_jiffies;
132 scd->clock = clock; 132 scd->clock = clock;
133 133
134 return clock; 134 return scd->clock;
135} 135}
136 136
137static void lock_double_clock(struct sched_clock_data *data1, 137static void lock_double_clock(struct sched_clock_data *data1,
@@ -171,7 +171,7 @@ u64 sched_clock_cpu(int cpu)
171 * larger time as the latest time for both 171 * larger time as the latest time for both
172 * runqueues. (this creates monotonic movement) 172 * runqueues. (this creates monotonic movement)
173 */ 173 */
174 if (likely(remote_clock < this_clock)) { 174 if (likely((s64)(remote_clock - this_clock) < 0)) {
175 clock = this_clock; 175 clock = this_clock;
176 scd->clock = clock; 176 scd->clock = clock;
177 } else { 177 } else {
@@ -207,14 +207,9 @@ void sched_clock_tick(void)
207 now = sched_clock(); 207 now = sched_clock();
208 208
209 __raw_spin_lock(&scd->lock); 209 __raw_spin_lock(&scd->lock);
210 __update_sched_clock(scd, now);
211 /*
212 * update tick_gtod after __update_sched_clock() because that will
213 * already observe 1 new jiffy; adding a new tick_gtod to that would
214 * increase the clock 2 jiffies.
215 */
216 scd->tick_raw = now; 210 scd->tick_raw = now;
217 scd->tick_gtod = now_gtod; 211 scd->tick_gtod = now_gtod;
212 __update_sched_clock(scd, now);
218 __raw_spin_unlock(&scd->lock); 213 __raw_spin_unlock(&scd->lock);
219} 214}
220 215
@@ -232,18 +227,7 @@ EXPORT_SYMBOL_GPL(sched_clock_idle_sleep_event);
232 */ 227 */
233void sched_clock_idle_wakeup_event(u64 delta_ns) 228void sched_clock_idle_wakeup_event(u64 delta_ns)
234{ 229{
235 struct sched_clock_data *scd = this_scd(); 230 sched_clock_tick();
236
237 /*
238 * Override the previous timestamp and ignore all
239 * sched_clock() deltas that occured while we idled,
240 * and use the PM-provided delta_ns to advance the
241 * rq clock:
242 */
243 __raw_spin_lock(&scd->lock);
244 scd->clock += delta_ns;
245 __raw_spin_unlock(&scd->lock);
246
247 touch_softlockup_watchdog(); 231 touch_softlockup_watchdog();
248} 232}
249EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event); 233EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event);
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 998ba54b4543..552310798dad 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -199,6 +199,8 @@ static inline struct rt_rq *group_rt_rq(struct sched_rt_entity *rt_se)
199 199
200static inline void sched_rt_rq_enqueue(struct rt_rq *rt_rq) 200static inline void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
201{ 201{
202 if (rt_rq->rt_nr_running)
203 resched_task(rq_of_rt_rq(rt_rq)->curr);
202} 204}
203 205
204static inline void sched_rt_rq_dequeue(struct rt_rq *rt_rq) 206static inline void sched_rt_rq_dequeue(struct rt_rq *rt_rq)
@@ -438,9 +440,6 @@ static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq)
438{ 440{
439 u64 runtime = sched_rt_runtime(rt_rq); 441 u64 runtime = sched_rt_runtime(rt_rq);
440 442
441 if (runtime == RUNTIME_INF)
442 return 0;
443
444 if (rt_rq->rt_throttled) 443 if (rt_rq->rt_throttled)
445 return rt_rq_throttled(rt_rq); 444 return rt_rq_throttled(rt_rq);
446 445
@@ -491,9 +490,11 @@ static void update_curr_rt(struct rq *rq)
491 rt_rq = rt_rq_of_se(rt_se); 490 rt_rq = rt_rq_of_se(rt_se);
492 491
493 spin_lock(&rt_rq->rt_runtime_lock); 492 spin_lock(&rt_rq->rt_runtime_lock);
494 rt_rq->rt_time += delta_exec; 493 if (sched_rt_runtime(rt_rq) != RUNTIME_INF) {
495 if (sched_rt_runtime_exceeded(rt_rq)) 494 rt_rq->rt_time += delta_exec;
496 resched_task(curr); 495 if (sched_rt_runtime_exceeded(rt_rq))
496 resched_task(curr);
497 }
497 spin_unlock(&rt_rq->rt_runtime_lock); 498 spin_unlock(&rt_rq->rt_runtime_lock);
498 } 499 }
499} 500}
diff --git a/kernel/smp.c b/kernel/smp.c
index 782e2b93e465..f362a8553777 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -210,8 +210,10 @@ int smp_call_function_single(int cpu, void (*func) (void *info), void *info,
210{ 210{
211 struct call_single_data d; 211 struct call_single_data d;
212 unsigned long flags; 212 unsigned long flags;
213 /* prevent preemption and reschedule on another processor */ 213 /* prevent preemption and reschedule on another processor,
214 as well as CPU removal */
214 int me = get_cpu(); 215 int me = get_cpu();
216 int err = 0;
215 217
216 /* Can deadlock when called with interrupts disabled */ 218 /* Can deadlock when called with interrupts disabled */
217 WARN_ON(irqs_disabled()); 219 WARN_ON(irqs_disabled());
@@ -220,7 +222,7 @@ int smp_call_function_single(int cpu, void (*func) (void *info), void *info,
220 local_irq_save(flags); 222 local_irq_save(flags);
221 func(info); 223 func(info);
222 local_irq_restore(flags); 224 local_irq_restore(flags);
223 } else { 225 } else if ((unsigned)cpu < NR_CPUS && cpu_online(cpu)) {
224 struct call_single_data *data = NULL; 226 struct call_single_data *data = NULL;
225 227
226 if (!wait) { 228 if (!wait) {
@@ -236,10 +238,12 @@ int smp_call_function_single(int cpu, void (*func) (void *info), void *info,
236 data->func = func; 238 data->func = func;
237 data->info = info; 239 data->info = info;
238 generic_exec_single(cpu, data); 240 generic_exec_single(cpu, data);
241 } else {
242 err = -ENXIO; /* CPU not online */
239 } 243 }
240 244
241 put_cpu(); 245 put_cpu();
242 return 0; 246 return err;
243} 247}
244EXPORT_SYMBOL(smp_call_function_single); 248EXPORT_SYMBOL(smp_call_function_single);
245 249
diff --git a/kernel/softlockup.c b/kernel/softlockup.c
index b75b492fbfcf..cb838ee93a82 100644
--- a/kernel/softlockup.c
+++ b/kernel/softlockup.c
@@ -233,7 +233,8 @@ static void check_hung_uninterruptible_tasks(int this_cpu)
233 do_each_thread(g, t) { 233 do_each_thread(g, t) {
234 if (!--max_count) 234 if (!--max_count)
235 goto unlock; 235 goto unlock;
236 if (t->state & TASK_UNINTERRUPTIBLE) 236 /* use "==" to skip the TASK_KILLABLE tasks waiting on NFS */
237 if (t->state == TASK_UNINTERRUPTIBLE)
237 check_hung_task(t, now); 238 check_hung_task(t, now);
238 } while_each_thread(g, t); 239 } while_each_thread(g, t);
239 unlock: 240 unlock:
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index fe4713347275..50ec0886fa3d 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -159,6 +159,7 @@ static int proc_dointvec_taint(struct ctl_table *table, int write, struct file *
159static struct ctl_table root_table[]; 159static struct ctl_table root_table[];
160static struct ctl_table_root sysctl_table_root; 160static struct ctl_table_root sysctl_table_root;
161static struct ctl_table_header root_table_header = { 161static struct ctl_table_header root_table_header = {
162 .count = 1,
162 .ctl_table = root_table, 163 .ctl_table = root_table,
163 .ctl_entry = LIST_HEAD_INIT(sysctl_table_root.default_set.list), 164 .ctl_entry = LIST_HEAD_INIT(sysctl_table_root.default_set.list),
164 .root = &sysctl_table_root, 165 .root = &sysctl_table_root,
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 7a46bde78c66..a87b0468568b 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -162,6 +162,8 @@ void tick_nohz_stop_idle(int cpu)
162 ts->idle_lastupdate = now; 162 ts->idle_lastupdate = now;
163 ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta); 163 ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta);
164 ts->idle_active = 0; 164 ts->idle_active = 0;
165
166 sched_clock_idle_wakeup_event(0);
165 } 167 }
166} 168}
167 169
@@ -177,6 +179,7 @@ static ktime_t tick_nohz_start_idle(struct tick_sched *ts)
177 } 179 }
178 ts->idle_entrytime = now; 180 ts->idle_entrytime = now;
179 ts->idle_active = 1; 181 ts->idle_active = 1;
182 sched_clock_idle_sleep_event();
180 return now; 183 return now;
181} 184}
182 185