diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/acct.c | 23 | ||||
-rw-r--r-- | kernel/audit.c | 6 | ||||
-rw-r--r-- | kernel/cgroup.c | 4 | ||||
-rw-r--r-- | kernel/fork.c | 2 | ||||
-rw-r--r-- | kernel/futex.c | 6 | ||||
-rw-r--r-- | kernel/futex_compat.c | 2 | ||||
-rw-r--r-- | kernel/marker.c | 40 | ||||
-rw-r--r-- | kernel/printk.c | 83 | ||||
-rw-r--r-- | kernel/relay.c | 12 | ||||
-rw-r--r-- | kernel/sched.c | 71 | ||||
-rw-r--r-- | kernel/sched_debug.c | 1 | ||||
-rw-r--r-- | kernel/sched_fair.c | 205 | ||||
-rw-r--r-- | kernel/time/clocksource.c | 16 | ||||
-rw-r--r-- | kernel/time/timekeeping.c | 4 | ||||
-rw-r--r-- | kernel/timer.c | 10 |
15 files changed, 306 insertions, 179 deletions
diff --git a/kernel/acct.c b/kernel/acct.c index 521dfa53cb99..91e1cfd734d2 100644 --- a/kernel/acct.c +++ b/kernel/acct.c | |||
@@ -58,6 +58,7 @@ | |||
58 | #include <asm/uaccess.h> | 58 | #include <asm/uaccess.h> |
59 | #include <asm/div64.h> | 59 | #include <asm/div64.h> |
60 | #include <linux/blkdev.h> /* sector_div */ | 60 | #include <linux/blkdev.h> /* sector_div */ |
61 | #include <linux/pid_namespace.h> | ||
61 | 62 | ||
62 | /* | 63 | /* |
63 | * These constants control the amount of freespace that suspend and | 64 | * These constants control the amount of freespace that suspend and |
@@ -74,7 +75,7 @@ int acct_parm[3] = {4, 2, 30}; | |||
74 | /* | 75 | /* |
75 | * External references and all of the globals. | 76 | * External references and all of the globals. |
76 | */ | 77 | */ |
77 | static void do_acct_process(struct file *); | 78 | static void do_acct_process(struct pid_namespace *ns, struct file *); |
78 | 79 | ||
79 | /* | 80 | /* |
80 | * This structure is used so that all the data protected by lock | 81 | * This structure is used so that all the data protected by lock |
@@ -86,6 +87,7 @@ struct acct_glbs { | |||
86 | volatile int active; | 87 | volatile int active; |
87 | volatile int needcheck; | 88 | volatile int needcheck; |
88 | struct file *file; | 89 | struct file *file; |
90 | struct pid_namespace *ns; | ||
89 | struct timer_list timer; | 91 | struct timer_list timer; |
90 | }; | 92 | }; |
91 | 93 | ||
@@ -175,9 +177,11 @@ out: | |||
175 | static void acct_file_reopen(struct file *file) | 177 | static void acct_file_reopen(struct file *file) |
176 | { | 178 | { |
177 | struct file *old_acct = NULL; | 179 | struct file *old_acct = NULL; |
180 | struct pid_namespace *old_ns = NULL; | ||
178 | 181 | ||
179 | if (acct_globals.file) { | 182 | if (acct_globals.file) { |
180 | old_acct = acct_globals.file; | 183 | old_acct = acct_globals.file; |
184 | old_ns = acct_globals.ns; | ||
181 | del_timer(&acct_globals.timer); | 185 | del_timer(&acct_globals.timer); |
182 | acct_globals.active = 0; | 186 | acct_globals.active = 0; |
183 | acct_globals.needcheck = 0; | 187 | acct_globals.needcheck = 0; |
@@ -185,6 +189,7 @@ static void acct_file_reopen(struct file *file) | |||
185 | } | 189 | } |
186 | if (file) { | 190 | if (file) { |
187 | acct_globals.file = file; | 191 | acct_globals.file = file; |
192 | acct_globals.ns = get_pid_ns(task_active_pid_ns(current)); | ||
188 | acct_globals.needcheck = 0; | 193 | acct_globals.needcheck = 0; |
189 | acct_globals.active = 1; | 194 | acct_globals.active = 1; |
190 | /* It's been deleted if it was used before so this is safe */ | 195 | /* It's been deleted if it was used before so this is safe */ |
@@ -196,8 +201,9 @@ static void acct_file_reopen(struct file *file) | |||
196 | if (old_acct) { | 201 | if (old_acct) { |
197 | mnt_unpin(old_acct->f_path.mnt); | 202 | mnt_unpin(old_acct->f_path.mnt); |
198 | spin_unlock(&acct_globals.lock); | 203 | spin_unlock(&acct_globals.lock); |
199 | do_acct_process(old_acct); | 204 | do_acct_process(old_ns, old_acct); |
200 | filp_close(old_acct, NULL); | 205 | filp_close(old_acct, NULL); |
206 | put_pid_ns(old_ns); | ||
201 | spin_lock(&acct_globals.lock); | 207 | spin_lock(&acct_globals.lock); |
202 | } | 208 | } |
203 | } | 209 | } |
@@ -419,7 +425,7 @@ static u32 encode_float(u64 value) | |||
419 | /* | 425 | /* |
420 | * do_acct_process does all actual work. Caller holds the reference to file. | 426 | * do_acct_process does all actual work. Caller holds the reference to file. |
421 | */ | 427 | */ |
422 | static void do_acct_process(struct file *file) | 428 | static void do_acct_process(struct pid_namespace *ns, struct file *file) |
423 | { | 429 | { |
424 | struct pacct_struct *pacct = ¤t->signal->pacct; | 430 | struct pacct_struct *pacct = ¤t->signal->pacct; |
425 | acct_t ac; | 431 | acct_t ac; |
@@ -481,8 +487,10 @@ static void do_acct_process(struct file *file) | |||
481 | ac.ac_gid16 = current->gid; | 487 | ac.ac_gid16 = current->gid; |
482 | #endif | 488 | #endif |
483 | #if ACCT_VERSION==3 | 489 | #if ACCT_VERSION==3 |
484 | ac.ac_pid = current->tgid; | 490 | ac.ac_pid = task_tgid_nr_ns(current, ns); |
485 | ac.ac_ppid = current->real_parent->tgid; | 491 | rcu_read_lock(); |
492 | ac.ac_ppid = task_tgid_nr_ns(rcu_dereference(current->real_parent), ns); | ||
493 | rcu_read_unlock(); | ||
486 | #endif | 494 | #endif |
487 | 495 | ||
488 | spin_lock_irq(¤t->sighand->siglock); | 496 | spin_lock_irq(¤t->sighand->siglock); |
@@ -578,6 +586,7 @@ void acct_collect(long exitcode, int group_dead) | |||
578 | void acct_process(void) | 586 | void acct_process(void) |
579 | { | 587 | { |
580 | struct file *file = NULL; | 588 | struct file *file = NULL; |
589 | struct pid_namespace *ns; | ||
581 | 590 | ||
582 | /* | 591 | /* |
583 | * accelerate the common fastpath: | 592 | * accelerate the common fastpath: |
@@ -592,8 +601,10 @@ void acct_process(void) | |||
592 | return; | 601 | return; |
593 | } | 602 | } |
594 | get_file(file); | 603 | get_file(file); |
604 | ns = get_pid_ns(acct_globals.ns); | ||
595 | spin_unlock(&acct_globals.lock); | 605 | spin_unlock(&acct_globals.lock); |
596 | 606 | ||
597 | do_acct_process(file); | 607 | do_acct_process(ns, file); |
598 | fput(file); | 608 | fput(file); |
609 | put_pid_ns(ns); | ||
599 | } | 610 | } |
diff --git a/kernel/audit.c b/kernel/audit.c index be55cb503633..b782b046543d 100644 --- a/kernel/audit.c +++ b/kernel/audit.c | |||
@@ -1269,8 +1269,8 @@ static void audit_log_n_string(struct audit_buffer *ab, size_t slen, | |||
1269 | 1269 | ||
1270 | /** | 1270 | /** |
1271 | * audit_string_contains_control - does a string need to be logged in hex | 1271 | * audit_string_contains_control - does a string need to be logged in hex |
1272 | * @string - string to be checked | 1272 | * @string: string to be checked |
1273 | * @len - max length of the string to check | 1273 | * @len: max length of the string to check |
1274 | */ | 1274 | */ |
1275 | int audit_string_contains_control(const char *string, size_t len) | 1275 | int audit_string_contains_control(const char *string, size_t len) |
1276 | { | 1276 | { |
@@ -1285,7 +1285,7 @@ int audit_string_contains_control(const char *string, size_t len) | |||
1285 | /** | 1285 | /** |
1286 | * audit_log_n_untrustedstring - log a string that may contain random characters | 1286 | * audit_log_n_untrustedstring - log a string that may contain random characters |
1287 | * @ab: audit_buffer | 1287 | * @ab: audit_buffer |
1288 | * @len: lenth of string (not including trailing null) | 1288 | * @len: length of string (not including trailing null) |
1289 | * @string: string to be logged | 1289 | * @string: string to be logged |
1290 | * | 1290 | * |
1291 | * This code will escape a string that is passed to it if the string | 1291 | * This code will escape a string that is passed to it if the string |
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index e9c2fb01e89b..53d86b4b0ce0 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
@@ -2082,7 +2082,7 @@ static int cgroup_tasks_open(struct inode *unused, struct file *file) | |||
2082 | 2082 | ||
2083 | kfree(pidarray); | 2083 | kfree(pidarray); |
2084 | } else { | 2084 | } else { |
2085 | ctr->buf = 0; | 2085 | ctr->buf = NULL; |
2086 | ctr->bufsz = 0; | 2086 | ctr->bufsz = 0; |
2087 | } | 2087 | } |
2088 | file->private_data = ctr; | 2088 | file->private_data = ctr; |
@@ -2614,7 +2614,7 @@ static int proc_cgroupstats_show(struct seq_file *m, void *v) | |||
2614 | 2614 | ||
2615 | static int cgroupstats_open(struct inode *inode, struct file *file) | 2615 | static int cgroupstats_open(struct inode *inode, struct file *file) |
2616 | { | 2616 | { |
2617 | return single_open(file, proc_cgroupstats_show, 0); | 2617 | return single_open(file, proc_cgroupstats_show, NULL); |
2618 | } | 2618 | } |
2619 | 2619 | ||
2620 | static struct file_operations proc_cgroupstats_operations = { | 2620 | static struct file_operations proc_cgroupstats_operations = { |
diff --git a/kernel/fork.c b/kernel/fork.c index dd249c37b3a3..9c042f901570 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -394,7 +394,6 @@ void __mmdrop(struct mm_struct *mm) | |||
394 | { | 394 | { |
395 | BUG_ON(mm == &init_mm); | 395 | BUG_ON(mm == &init_mm); |
396 | mm_free_pgd(mm); | 396 | mm_free_pgd(mm); |
397 | mm_free_cgroup(mm); | ||
398 | destroy_context(mm); | 397 | destroy_context(mm); |
399 | free_mm(mm); | 398 | free_mm(mm); |
400 | } | 399 | } |
@@ -416,6 +415,7 @@ void mmput(struct mm_struct *mm) | |||
416 | spin_unlock(&mmlist_lock); | 415 | spin_unlock(&mmlist_lock); |
417 | } | 416 | } |
418 | put_swap_token(mm); | 417 | put_swap_token(mm); |
418 | mm_free_cgroup(mm); | ||
419 | mmdrop(mm); | 419 | mmdrop(mm); |
420 | } | 420 | } |
421 | } | 421 | } |
diff --git a/kernel/futex.c b/kernel/futex.c index 06968cd79200..e43945e995f5 100644 --- a/kernel/futex.c +++ b/kernel/futex.c | |||
@@ -281,7 +281,7 @@ static int get_futex_key(u32 __user *uaddr, struct rw_semaphore *fshared, | |||
281 | */ | 281 | */ |
282 | static void get_futex_key_refs(union futex_key *key) | 282 | static void get_futex_key_refs(union futex_key *key) |
283 | { | 283 | { |
284 | if (key->both.ptr == 0) | 284 | if (key->both.ptr == NULL) |
285 | return; | 285 | return; |
286 | switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) { | 286 | switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) { |
287 | case FUT_OFF_INODE: | 287 | case FUT_OFF_INODE: |
@@ -2158,7 +2158,7 @@ static struct file_system_type futex_fs_type = { | |||
2158 | .kill_sb = kill_anon_super, | 2158 | .kill_sb = kill_anon_super, |
2159 | }; | 2159 | }; |
2160 | 2160 | ||
2161 | static int __init init(void) | 2161 | static int __init futex_init(void) |
2162 | { | 2162 | { |
2163 | u32 curval; | 2163 | u32 curval; |
2164 | int i; | 2164 | int i; |
@@ -2194,4 +2194,4 @@ static int __init init(void) | |||
2194 | 2194 | ||
2195 | return 0; | 2195 | return 0; |
2196 | } | 2196 | } |
2197 | __initcall(init); | 2197 | __initcall(futex_init); |
diff --git a/kernel/futex_compat.c b/kernel/futex_compat.c index ff90f049f8f6..04ac3a9e42cf 100644 --- a/kernel/futex_compat.c +++ b/kernel/futex_compat.c | |||
@@ -30,7 +30,7 @@ fetch_robust_entry(compat_uptr_t *uentry, struct robust_list __user **entry, | |||
30 | return 0; | 30 | return 0; |
31 | } | 31 | } |
32 | 32 | ||
33 | static void __user *futex_uaddr(struct robust_list *entry, | 33 | static void __user *futex_uaddr(struct robust_list __user *entry, |
34 | compat_long_t futex_offset) | 34 | compat_long_t futex_offset) |
35 | { | 35 | { |
36 | compat_uptr_t base = ptr_to_compat(entry); | 36 | compat_uptr_t base = ptr_to_compat(entry); |
diff --git a/kernel/marker.c b/kernel/marker.c index 48a4ea5afffd..005b95954593 100644 --- a/kernel/marker.c +++ b/kernel/marker.c | |||
@@ -104,18 +104,18 @@ void marker_probe_cb(const struct marker *mdata, void *call_private, | |||
104 | char ptype; | 104 | char ptype; |
105 | 105 | ||
106 | /* | 106 | /* |
107 | * disabling preemption to make sure the teardown of the callbacks can | 107 | * preempt_disable does two things : disabling preemption to make sure |
108 | * be done correctly when they are in modules and they insure RCU read | 108 | * the teardown of the callbacks can be done correctly when they are in |
109 | * coherency. | 109 | * modules and they insure RCU read coherency. |
110 | */ | 110 | */ |
111 | preempt_disable(); | 111 | preempt_disable(); |
112 | ptype = ACCESS_ONCE(mdata->ptype); | 112 | ptype = mdata->ptype; |
113 | if (likely(!ptype)) { | 113 | if (likely(!ptype)) { |
114 | marker_probe_func *func; | 114 | marker_probe_func *func; |
115 | /* Must read the ptype before ptr. They are not data dependant, | 115 | /* Must read the ptype before ptr. They are not data dependant, |
116 | * so we put an explicit smp_rmb() here. */ | 116 | * so we put an explicit smp_rmb() here. */ |
117 | smp_rmb(); | 117 | smp_rmb(); |
118 | func = ACCESS_ONCE(mdata->single.func); | 118 | func = mdata->single.func; |
119 | /* Must read the ptr before private data. They are not data | 119 | /* Must read the ptr before private data. They are not data |
120 | * dependant, so we put an explicit smp_rmb() here. */ | 120 | * dependant, so we put an explicit smp_rmb() here. */ |
121 | smp_rmb(); | 121 | smp_rmb(); |
@@ -133,7 +133,7 @@ void marker_probe_cb(const struct marker *mdata, void *call_private, | |||
133 | * in the fast path, so put the explicit barrier here. | 133 | * in the fast path, so put the explicit barrier here. |
134 | */ | 134 | */ |
135 | smp_read_barrier_depends(); | 135 | smp_read_barrier_depends(); |
136 | multi = ACCESS_ONCE(mdata->multi); | 136 | multi = mdata->multi; |
137 | for (i = 0; multi[i].func; i++) { | 137 | for (i = 0; multi[i].func; i++) { |
138 | va_start(args, fmt); | 138 | va_start(args, fmt); |
139 | multi[i].func(multi[i].probe_private, call_private, fmt, | 139 | multi[i].func(multi[i].probe_private, call_private, fmt, |
@@ -161,13 +161,13 @@ void marker_probe_cb_noarg(const struct marker *mdata, | |||
161 | char ptype; | 161 | char ptype; |
162 | 162 | ||
163 | preempt_disable(); | 163 | preempt_disable(); |
164 | ptype = ACCESS_ONCE(mdata->ptype); | 164 | ptype = mdata->ptype; |
165 | if (likely(!ptype)) { | 165 | if (likely(!ptype)) { |
166 | marker_probe_func *func; | 166 | marker_probe_func *func; |
167 | /* Must read the ptype before ptr. They are not data dependant, | 167 | /* Must read the ptype before ptr. They are not data dependant, |
168 | * so we put an explicit smp_rmb() here. */ | 168 | * so we put an explicit smp_rmb() here. */ |
169 | smp_rmb(); | 169 | smp_rmb(); |
170 | func = ACCESS_ONCE(mdata->single.func); | 170 | func = mdata->single.func; |
171 | /* Must read the ptr before private data. They are not data | 171 | /* Must read the ptr before private data. They are not data |
172 | * dependant, so we put an explicit smp_rmb() here. */ | 172 | * dependant, so we put an explicit smp_rmb() here. */ |
173 | smp_rmb(); | 173 | smp_rmb(); |
@@ -183,7 +183,7 @@ void marker_probe_cb_noarg(const struct marker *mdata, | |||
183 | * in the fast path, so put the explicit barrier here. | 183 | * in the fast path, so put the explicit barrier here. |
184 | */ | 184 | */ |
185 | smp_read_barrier_depends(); | 185 | smp_read_barrier_depends(); |
186 | multi = ACCESS_ONCE(mdata->multi); | 186 | multi = mdata->multi; |
187 | for (i = 0; multi[i].func; i++) | 187 | for (i = 0; multi[i].func; i++) |
188 | multi[i].func(multi[i].probe_private, call_private, fmt, | 188 | multi[i].func(multi[i].probe_private, call_private, fmt, |
189 | &args); | 189 | &args); |
@@ -551,9 +551,9 @@ static int set_marker(struct marker_entry **entry, struct marker *elem, | |||
551 | 551 | ||
552 | /* | 552 | /* |
553 | * Disable a marker and its probe callback. | 553 | * Disable a marker and its probe callback. |
554 | * Note: only after a synchronize_sched() issued after setting elem->call to the | 554 | * Note: only waiting an RCU period after setting elem->call to the empty |
555 | * empty function insures that the original callback is not used anymore. This | 555 | * function insures that the original callback is not used anymore. This insured |
556 | * insured by preemption disabling around the call site. | 556 | * by preempt_disable around the call site. |
557 | */ | 557 | */ |
558 | static void disable_marker(struct marker *elem) | 558 | static void disable_marker(struct marker *elem) |
559 | { | 559 | { |
@@ -565,8 +565,8 @@ static void disable_marker(struct marker *elem) | |||
565 | elem->ptype = 0; /* single probe */ | 565 | elem->ptype = 0; /* single probe */ |
566 | /* | 566 | /* |
567 | * Leave the private data and id there, because removal is racy and | 567 | * Leave the private data and id there, because removal is racy and |
568 | * should be done only after a synchronize_sched(). These are never used | 568 | * should be done only after an RCU period. These are never used until |
569 | * until the next initialization anyway. | 569 | * the next initialization anyway. |
570 | */ | 570 | */ |
571 | } | 571 | } |
572 | 572 | ||
@@ -601,9 +601,6 @@ void marker_update_probe_range(struct marker *begin, | |||
601 | 601 | ||
602 | /* | 602 | /* |
603 | * Update probes, removing the faulty probes. | 603 | * Update probes, removing the faulty probes. |
604 | * Issues a synchronize_sched() when no reference to the module passed | ||
605 | * as parameter is found in the probes so the probe module can be | ||
606 | * safely unloaded from now on. | ||
607 | * | 604 | * |
608 | * Internal callback only changed before the first probe is connected to it. | 605 | * Internal callback only changed before the first probe is connected to it. |
609 | * Single probe private data can only be changed on 0 -> 1 and 2 -> 1 | 606 | * Single probe private data can only be changed on 0 -> 1 and 2 -> 1 |
@@ -674,6 +671,9 @@ int marker_probe_register(const char *name, const char *format, | |||
674 | entry->rcu_pending = 1; | 671 | entry->rcu_pending = 1; |
675 | /* write rcu_pending before calling the RCU callback */ | 672 | /* write rcu_pending before calling the RCU callback */ |
676 | smp_wmb(); | 673 | smp_wmb(); |
674 | #ifdef CONFIG_PREEMPT_RCU | ||
675 | synchronize_sched(); /* Until we have the call_rcu_sched() */ | ||
676 | #endif | ||
677 | call_rcu(&entry->rcu, free_old_closure); | 677 | call_rcu(&entry->rcu, free_old_closure); |
678 | end: | 678 | end: |
679 | mutex_unlock(&markers_mutex); | 679 | mutex_unlock(&markers_mutex); |
@@ -717,6 +717,9 @@ int marker_probe_unregister(const char *name, | |||
717 | entry->rcu_pending = 1; | 717 | entry->rcu_pending = 1; |
718 | /* write rcu_pending before calling the RCU callback */ | 718 | /* write rcu_pending before calling the RCU callback */ |
719 | smp_wmb(); | 719 | smp_wmb(); |
720 | #ifdef CONFIG_PREEMPT_RCU | ||
721 | synchronize_sched(); /* Until we have the call_rcu_sched() */ | ||
722 | #endif | ||
720 | call_rcu(&entry->rcu, free_old_closure); | 723 | call_rcu(&entry->rcu, free_old_closure); |
721 | remove_marker(name); /* Ignore busy error message */ | 724 | remove_marker(name); /* Ignore busy error message */ |
722 | ret = 0; | 725 | ret = 0; |
@@ -795,6 +798,9 @@ int marker_probe_unregister_private_data(marker_probe_func *probe, | |||
795 | entry->rcu_pending = 1; | 798 | entry->rcu_pending = 1; |
796 | /* write rcu_pending before calling the RCU callback */ | 799 | /* write rcu_pending before calling the RCU callback */ |
797 | smp_wmb(); | 800 | smp_wmb(); |
801 | #ifdef CONFIG_PREEMPT_RCU | ||
802 | synchronize_sched(); /* Until we have the call_rcu_sched() */ | ||
803 | #endif | ||
798 | call_rcu(&entry->rcu, free_old_closure); | 804 | call_rcu(&entry->rcu, free_old_closure); |
799 | remove_marker(entry->name); /* Ignore busy error message */ | 805 | remove_marker(entry->name); /* Ignore busy error message */ |
800 | end: | 806 | end: |
diff --git a/kernel/printk.c b/kernel/printk.c index 9adc2a473e6e..c46a20a19a15 100644 --- a/kernel/printk.c +++ b/kernel/printk.c | |||
@@ -616,6 +616,40 @@ asmlinkage int printk(const char *fmt, ...) | |||
616 | /* cpu currently holding logbuf_lock */ | 616 | /* cpu currently holding logbuf_lock */ |
617 | static volatile unsigned int printk_cpu = UINT_MAX; | 617 | static volatile unsigned int printk_cpu = UINT_MAX; |
618 | 618 | ||
619 | /* | ||
620 | * Can we actually use the console at this time on this cpu? | ||
621 | * | ||
622 | * Console drivers may assume that per-cpu resources have | ||
623 | * been allocated. So unless they're explicitly marked as | ||
624 | * being able to cope (CON_ANYTIME) don't call them until | ||
625 | * this CPU is officially up. | ||
626 | */ | ||
627 | static inline int can_use_console(unsigned int cpu) | ||
628 | { | ||
629 | return cpu_online(cpu) || have_callable_console(); | ||
630 | } | ||
631 | |||
632 | /* | ||
633 | * Try to get console ownership to actually show the kernel | ||
634 | * messages from a 'printk'. Return true (and with the | ||
635 | * console_semaphore held, and 'console_locked' set) if it | ||
636 | * is successful, false otherwise. | ||
637 | * | ||
638 | * This gets called with the 'logbuf_lock' spinlock held and | ||
639 | * interrupts disabled. It should return with 'lockbuf_lock' | ||
640 | * released but interrupts still disabled. | ||
641 | */ | ||
642 | static int acquire_console_semaphore_for_printk(unsigned int cpu) | ||
643 | { | ||
644 | int retval = 0; | ||
645 | |||
646 | if (can_use_console(cpu)) | ||
647 | retval = !try_acquire_console_sem(); | ||
648 | printk_cpu = UINT_MAX; | ||
649 | spin_unlock(&logbuf_lock); | ||
650 | return retval; | ||
651 | } | ||
652 | |||
619 | const char printk_recursion_bug_msg [] = | 653 | const char printk_recursion_bug_msg [] = |
620 | KERN_CRIT "BUG: recent printk recursion!\n"; | 654 | KERN_CRIT "BUG: recent printk recursion!\n"; |
621 | static int printk_recursion_bug; | 655 | static int printk_recursion_bug; |
@@ -725,43 +759,22 @@ asmlinkage int vprintk(const char *fmt, va_list args) | |||
725 | log_level_unknown = 1; | 759 | log_level_unknown = 1; |
726 | } | 760 | } |
727 | 761 | ||
728 | if (!down_trylock(&console_sem)) { | 762 | /* |
729 | /* | 763 | * Try to acquire and then immediately release the |
730 | * We own the drivers. We can drop the spinlock and | 764 | * console semaphore. The release will do all the |
731 | * let release_console_sem() print the text, maybe ... | 765 | * actual magic (print out buffers, wake up klogd, |
732 | */ | 766 | * etc). |
733 | console_locked = 1; | 767 | * |
734 | printk_cpu = UINT_MAX; | 768 | * The acquire_console_semaphore_for_printk() function |
735 | spin_unlock(&logbuf_lock); | 769 | * will release 'logbuf_lock' regardless of whether it |
770 | * actually gets the semaphore or not. | ||
771 | */ | ||
772 | if (acquire_console_semaphore_for_printk(this_cpu)) | ||
773 | release_console_sem(); | ||
736 | 774 | ||
737 | /* | 775 | lockdep_on(); |
738 | * Console drivers may assume that per-cpu resources have | ||
739 | * been allocated. So unless they're explicitly marked as | ||
740 | * being able to cope (CON_ANYTIME) don't call them until | ||
741 | * this CPU is officially up. | ||
742 | */ | ||
743 | if (cpu_online(smp_processor_id()) || have_callable_console()) { | ||
744 | console_may_schedule = 0; | ||
745 | release_console_sem(); | ||
746 | } else { | ||
747 | /* Release by hand to avoid flushing the buffer. */ | ||
748 | console_locked = 0; | ||
749 | up(&console_sem); | ||
750 | } | ||
751 | lockdep_on(); | ||
752 | raw_local_irq_restore(flags); | ||
753 | } else { | ||
754 | /* | ||
755 | * Someone else owns the drivers. We drop the spinlock, which | ||
756 | * allows the semaphore holder to proceed and to call the | ||
757 | * console drivers with the output which we just produced. | ||
758 | */ | ||
759 | printk_cpu = UINT_MAX; | ||
760 | spin_unlock(&logbuf_lock); | ||
761 | lockdep_on(); | ||
762 | out_restore_irqs: | 776 | out_restore_irqs: |
763 | raw_local_irq_restore(flags); | 777 | raw_local_irq_restore(flags); |
764 | } | ||
765 | 778 | ||
766 | preempt_enable(); | 779 | preempt_enable(); |
767 | return printed_len; | 780 | return printed_len; |
diff --git a/kernel/relay.c b/kernel/relay.c index d080b9d161a7..d6204a485818 100644 --- a/kernel/relay.c +++ b/kernel/relay.c | |||
@@ -736,7 +736,7 @@ static int relay_file_open(struct inode *inode, struct file *filp) | |||
736 | kref_get(&buf->kref); | 736 | kref_get(&buf->kref); |
737 | filp->private_data = buf; | 737 | filp->private_data = buf; |
738 | 738 | ||
739 | return 0; | 739 | return nonseekable_open(inode, filp); |
740 | } | 740 | } |
741 | 741 | ||
742 | /** | 742 | /** |
@@ -1056,6 +1056,10 @@ static struct pipe_buf_operations relay_pipe_buf_ops = { | |||
1056 | .get = generic_pipe_buf_get, | 1056 | .get = generic_pipe_buf_get, |
1057 | }; | 1057 | }; |
1058 | 1058 | ||
1059 | static void relay_page_release(struct splice_pipe_desc *spd, unsigned int i) | ||
1060 | { | ||
1061 | } | ||
1062 | |||
1059 | /* | 1063 | /* |
1060 | * subbuf_splice_actor - splice up to one subbuf's worth of data | 1064 | * subbuf_splice_actor - splice up to one subbuf's worth of data |
1061 | */ | 1065 | */ |
@@ -1066,7 +1070,7 @@ static int subbuf_splice_actor(struct file *in, | |||
1066 | unsigned int flags, | 1070 | unsigned int flags, |
1067 | int *nonpad_ret) | 1071 | int *nonpad_ret) |
1068 | { | 1072 | { |
1069 | unsigned int pidx, poff, total_len, subbuf_pages, ret; | 1073 | unsigned int pidx, poff, total_len, subbuf_pages, nr_pages, ret; |
1070 | struct rchan_buf *rbuf = in->private_data; | 1074 | struct rchan_buf *rbuf = in->private_data; |
1071 | unsigned int subbuf_size = rbuf->chan->subbuf_size; | 1075 | unsigned int subbuf_size = rbuf->chan->subbuf_size; |
1072 | uint64_t pos = (uint64_t) *ppos; | 1076 | uint64_t pos = (uint64_t) *ppos; |
@@ -1083,6 +1087,7 @@ static int subbuf_splice_actor(struct file *in, | |||
1083 | .partial = partial, | 1087 | .partial = partial, |
1084 | .flags = flags, | 1088 | .flags = flags, |
1085 | .ops = &relay_pipe_buf_ops, | 1089 | .ops = &relay_pipe_buf_ops, |
1090 | .spd_release = relay_page_release, | ||
1086 | }; | 1091 | }; |
1087 | 1092 | ||
1088 | if (rbuf->subbufs_produced == rbuf->subbufs_consumed) | 1093 | if (rbuf->subbufs_produced == rbuf->subbufs_consumed) |
@@ -1097,8 +1102,9 @@ static int subbuf_splice_actor(struct file *in, | |||
1097 | subbuf_pages = rbuf->chan->alloc_size >> PAGE_SHIFT; | 1102 | subbuf_pages = rbuf->chan->alloc_size >> PAGE_SHIFT; |
1098 | pidx = (read_start / PAGE_SIZE) % subbuf_pages; | 1103 | pidx = (read_start / PAGE_SIZE) % subbuf_pages; |
1099 | poff = read_start & ~PAGE_MASK; | 1104 | poff = read_start & ~PAGE_MASK; |
1105 | nr_pages = min_t(unsigned int, subbuf_pages, PIPE_BUFFERS); | ||
1100 | 1106 | ||
1101 | for (total_len = 0; spd.nr_pages < subbuf_pages; spd.nr_pages++) { | 1107 | for (total_len = 0; spd.nr_pages < nr_pages; spd.nr_pages++) { |
1102 | unsigned int this_len, this_end, private; | 1108 | unsigned int this_len, this_end, private; |
1103 | unsigned int cur_pos = read_start + total_len; | 1109 | unsigned int cur_pos = read_start + total_len; |
1104 | 1110 | ||
diff --git a/kernel/sched.c b/kernel/sched.c index d1ad69b270ca..8dcdec6fe0fe 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -594,18 +594,14 @@ enum { | |||
594 | SCHED_FEAT_NEW_FAIR_SLEEPERS = 1, | 594 | SCHED_FEAT_NEW_FAIR_SLEEPERS = 1, |
595 | SCHED_FEAT_WAKEUP_PREEMPT = 2, | 595 | SCHED_FEAT_WAKEUP_PREEMPT = 2, |
596 | SCHED_FEAT_START_DEBIT = 4, | 596 | SCHED_FEAT_START_DEBIT = 4, |
597 | SCHED_FEAT_TREE_AVG = 8, | 597 | SCHED_FEAT_HRTICK = 8, |
598 | SCHED_FEAT_APPROX_AVG = 16, | 598 | SCHED_FEAT_DOUBLE_TICK = 16, |
599 | SCHED_FEAT_HRTICK = 32, | ||
600 | SCHED_FEAT_DOUBLE_TICK = 64, | ||
601 | }; | 599 | }; |
602 | 600 | ||
603 | const_debug unsigned int sysctl_sched_features = | 601 | const_debug unsigned int sysctl_sched_features = |
604 | SCHED_FEAT_NEW_FAIR_SLEEPERS * 1 | | 602 | SCHED_FEAT_NEW_FAIR_SLEEPERS * 1 | |
605 | SCHED_FEAT_WAKEUP_PREEMPT * 1 | | 603 | SCHED_FEAT_WAKEUP_PREEMPT * 1 | |
606 | SCHED_FEAT_START_DEBIT * 1 | | 604 | SCHED_FEAT_START_DEBIT * 1 | |
607 | SCHED_FEAT_TREE_AVG * 0 | | ||
608 | SCHED_FEAT_APPROX_AVG * 0 | | ||
609 | SCHED_FEAT_HRTICK * 1 | | 605 | SCHED_FEAT_HRTICK * 1 | |
610 | SCHED_FEAT_DOUBLE_TICK * 0; | 606 | SCHED_FEAT_DOUBLE_TICK * 0; |
611 | 607 | ||
@@ -1056,6 +1052,49 @@ static void resched_cpu(int cpu) | |||
1056 | resched_task(cpu_curr(cpu)); | 1052 | resched_task(cpu_curr(cpu)); |
1057 | spin_unlock_irqrestore(&rq->lock, flags); | 1053 | spin_unlock_irqrestore(&rq->lock, flags); |
1058 | } | 1054 | } |
1055 | |||
1056 | #ifdef CONFIG_NO_HZ | ||
1057 | /* | ||
1058 | * When add_timer_on() enqueues a timer into the timer wheel of an | ||
1059 | * idle CPU then this timer might expire before the next timer event | ||
1060 | * which is scheduled to wake up that CPU. In case of a completely | ||
1061 | * idle system the next event might even be infinite time into the | ||
1062 | * future. wake_up_idle_cpu() ensures that the CPU is woken up and | ||
1063 | * leaves the inner idle loop so the newly added timer is taken into | ||
1064 | * account when the CPU goes back to idle and evaluates the timer | ||
1065 | * wheel for the next timer event. | ||
1066 | */ | ||
1067 | void wake_up_idle_cpu(int cpu) | ||
1068 | { | ||
1069 | struct rq *rq = cpu_rq(cpu); | ||
1070 | |||
1071 | if (cpu == smp_processor_id()) | ||
1072 | return; | ||
1073 | |||
1074 | /* | ||
1075 | * This is safe, as this function is called with the timer | ||
1076 | * wheel base lock of (cpu) held. When the CPU is on the way | ||
1077 | * to idle and has not yet set rq->curr to idle then it will | ||
1078 | * be serialized on the timer wheel base lock and take the new | ||
1079 | * timer into account automatically. | ||
1080 | */ | ||
1081 | if (rq->curr != rq->idle) | ||
1082 | return; | ||
1083 | |||
1084 | /* | ||
1085 | * We can set TIF_RESCHED on the idle task of the other CPU | ||
1086 | * lockless. The worst case is that the other CPU runs the | ||
1087 | * idle task through an additional NOOP schedule() | ||
1088 | */ | ||
1089 | set_tsk_thread_flag(rq->idle, TIF_NEED_RESCHED); | ||
1090 | |||
1091 | /* NEED_RESCHED must be visible before we test polling */ | ||
1092 | smp_mb(); | ||
1093 | if (!tsk_is_polling(rq->idle)) | ||
1094 | smp_send_reschedule(cpu); | ||
1095 | } | ||
1096 | #endif | ||
1097 | |||
1059 | #else | 1098 | #else |
1060 | static void __resched_task(struct task_struct *p, int tif_bit) | 1099 | static void __resched_task(struct task_struct *p, int tif_bit) |
1061 | { | 1100 | { |
@@ -1396,6 +1435,12 @@ task_hot(struct task_struct *p, u64 now, struct sched_domain *sd) | |||
1396 | { | 1435 | { |
1397 | s64 delta; | 1436 | s64 delta; |
1398 | 1437 | ||
1438 | /* | ||
1439 | * Buddy candidates are cache hot: | ||
1440 | */ | ||
1441 | if (&p->se == cfs_rq_of(&p->se)->next) | ||
1442 | return 1; | ||
1443 | |||
1399 | if (p->sched_class != &fair_sched_class) | 1444 | if (p->sched_class != &fair_sched_class) |
1400 | return 0; | 1445 | return 0; |
1401 | 1446 | ||
@@ -1855,10 +1900,11 @@ out_activate: | |||
1855 | schedstat_inc(p, se.nr_wakeups_remote); | 1900 | schedstat_inc(p, se.nr_wakeups_remote); |
1856 | update_rq_clock(rq); | 1901 | update_rq_clock(rq); |
1857 | activate_task(rq, p, 1); | 1902 | activate_task(rq, p, 1); |
1858 | check_preempt_curr(rq, p); | ||
1859 | success = 1; | 1903 | success = 1; |
1860 | 1904 | ||
1861 | out_running: | 1905 | out_running: |
1906 | check_preempt_curr(rq, p); | ||
1907 | |||
1862 | p->state = TASK_RUNNING; | 1908 | p->state = TASK_RUNNING; |
1863 | #ifdef CONFIG_SMP | 1909 | #ifdef CONFIG_SMP |
1864 | if (p->sched_class->task_wake_up) | 1910 | if (p->sched_class->task_wake_up) |
@@ -1892,6 +1938,8 @@ static void __sched_fork(struct task_struct *p) | |||
1892 | p->se.exec_start = 0; | 1938 | p->se.exec_start = 0; |
1893 | p->se.sum_exec_runtime = 0; | 1939 | p->se.sum_exec_runtime = 0; |
1894 | p->se.prev_sum_exec_runtime = 0; | 1940 | p->se.prev_sum_exec_runtime = 0; |
1941 | p->se.last_wakeup = 0; | ||
1942 | p->se.avg_overlap = 0; | ||
1895 | 1943 | ||
1896 | #ifdef CONFIG_SCHEDSTATS | 1944 | #ifdef CONFIG_SCHEDSTATS |
1897 | p->se.wait_start = 0; | 1945 | p->se.wait_start = 0; |
@@ -3877,7 +3925,7 @@ need_resched_nonpreemptible: | |||
3877 | 3925 | ||
3878 | if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) { | 3926 | if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) { |
3879 | if (unlikely((prev->state & TASK_INTERRUPTIBLE) && | 3927 | if (unlikely((prev->state & TASK_INTERRUPTIBLE) && |
3880 | unlikely(signal_pending(prev)))) { | 3928 | signal_pending(prev))) { |
3881 | prev->state = TASK_RUNNING; | 3929 | prev->state = TASK_RUNNING; |
3882 | } else { | 3930 | } else { |
3883 | deactivate_task(rq, prev, 1); | 3931 | deactivate_task(rq, prev, 1); |
@@ -6802,6 +6850,10 @@ static int ndoms_cur; /* number of sched domains in 'doms_cur' */ | |||
6802 | */ | 6850 | */ |
6803 | static cpumask_t fallback_doms; | 6851 | static cpumask_t fallback_doms; |
6804 | 6852 | ||
6853 | void __attribute__((weak)) arch_update_cpu_topology(void) | ||
6854 | { | ||
6855 | } | ||
6856 | |||
6805 | /* | 6857 | /* |
6806 | * Set up scheduler domains and groups. Callers must hold the hotplug lock. | 6858 | * Set up scheduler domains and groups. Callers must hold the hotplug lock. |
6807 | * For now this just excludes isolated cpus, but could be used to | 6859 | * For now this just excludes isolated cpus, but could be used to |
@@ -6811,6 +6863,7 @@ static int arch_init_sched_domains(const cpumask_t *cpu_map) | |||
6811 | { | 6863 | { |
6812 | int err; | 6864 | int err; |
6813 | 6865 | ||
6866 | arch_update_cpu_topology(); | ||
6814 | ndoms_cur = 1; | 6867 | ndoms_cur = 1; |
6815 | doms_cur = kmalloc(sizeof(cpumask_t), GFP_KERNEL); | 6868 | doms_cur = kmalloc(sizeof(cpumask_t), GFP_KERNEL); |
6816 | if (!doms_cur) | 6869 | if (!doms_cur) |
@@ -6915,7 +6968,7 @@ match2: | |||
6915 | } | 6968 | } |
6916 | 6969 | ||
6917 | #if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT) | 6970 | #if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT) |
6918 | static int arch_reinit_sched_domains(void) | 6971 | int arch_reinit_sched_domains(void) |
6919 | { | 6972 | { |
6920 | int err; | 6973 | int err; |
6921 | 6974 | ||
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c index 4b5e24cf2f4a..ef358ba07683 100644 --- a/kernel/sched_debug.c +++ b/kernel/sched_debug.c | |||
@@ -288,6 +288,7 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m) | |||
288 | PN(se.exec_start); | 288 | PN(se.exec_start); |
289 | PN(se.vruntime); | 289 | PN(se.vruntime); |
290 | PN(se.sum_exec_runtime); | 290 | PN(se.sum_exec_runtime); |
291 | PN(se.avg_overlap); | ||
291 | 292 | ||
292 | nr_switches = p->nvcsw + p->nivcsw; | 293 | nr_switches = p->nvcsw + p->nivcsw; |
293 | 294 | ||
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index f2cc59080efa..86a93376282c 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c | |||
@@ -73,13 +73,13 @@ unsigned int sysctl_sched_batch_wakeup_granularity = 10000000UL; | |||
73 | 73 | ||
74 | /* | 74 | /* |
75 | * SCHED_OTHER wake-up granularity. | 75 | * SCHED_OTHER wake-up granularity. |
76 | * (default: 10 msec * (1 + ilog(ncpus)), units: nanoseconds) | 76 | * (default: 5 msec * (1 + ilog(ncpus)), units: nanoseconds) |
77 | * | 77 | * |
78 | * This option delays the preemption effects of decoupled workloads | 78 | * This option delays the preemption effects of decoupled workloads |
79 | * and reduces their over-scheduling. Synchronous workloads will still | 79 | * and reduces their over-scheduling. Synchronous workloads will still |
80 | * have immediate wakeup/sleep latencies. | 80 | * have immediate wakeup/sleep latencies. |
81 | */ | 81 | */ |
82 | unsigned int sysctl_sched_wakeup_granularity = 10000000UL; | 82 | unsigned int sysctl_sched_wakeup_granularity = 5000000UL; |
83 | 83 | ||
84 | const_debug unsigned int sysctl_sched_migration_cost = 500000UL; | 84 | const_debug unsigned int sysctl_sched_migration_cost = 500000UL; |
85 | 85 | ||
@@ -302,11 +302,6 @@ static u64 __sched_vslice(unsigned long rq_weight, unsigned long nr_running) | |||
302 | return vslice; | 302 | return vslice; |
303 | } | 303 | } |
304 | 304 | ||
305 | static u64 sched_vslice(struct cfs_rq *cfs_rq) | ||
306 | { | ||
307 | return __sched_vslice(cfs_rq->load.weight, cfs_rq->nr_running); | ||
308 | } | ||
309 | |||
310 | static u64 sched_vslice_add(struct cfs_rq *cfs_rq, struct sched_entity *se) | 305 | static u64 sched_vslice_add(struct cfs_rq *cfs_rq, struct sched_entity *se) |
311 | { | 306 | { |
312 | return __sched_vslice(cfs_rq->load.weight + se->load.weight, | 307 | return __sched_vslice(cfs_rq->load.weight + se->load.weight, |
@@ -504,15 +499,6 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial) | |||
504 | } else | 499 | } else |
505 | vruntime = cfs_rq->min_vruntime; | 500 | vruntime = cfs_rq->min_vruntime; |
506 | 501 | ||
507 | if (sched_feat(TREE_AVG)) { | ||
508 | struct sched_entity *last = __pick_last_entity(cfs_rq); | ||
509 | if (last) { | ||
510 | vruntime += last->vruntime; | ||
511 | vruntime >>= 1; | ||
512 | } | ||
513 | } else if (sched_feat(APPROX_AVG) && cfs_rq->nr_running) | ||
514 | vruntime += sched_vslice(cfs_rq)/2; | ||
515 | |||
516 | /* | 502 | /* |
517 | * The 'current' period is already promised to the current tasks, | 503 | * The 'current' period is already promised to the current tasks, |
518 | * however the extra weight of the new task will slow them down a | 504 | * however the extra weight of the new task will slow them down a |
@@ -556,6 +542,21 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int wakeup) | |||
556 | account_entity_enqueue(cfs_rq, se); | 542 | account_entity_enqueue(cfs_rq, se); |
557 | } | 543 | } |
558 | 544 | ||
545 | static void update_avg(u64 *avg, u64 sample) | ||
546 | { | ||
547 | s64 diff = sample - *avg; | ||
548 | *avg += diff >> 3; | ||
549 | } | ||
550 | |||
551 | static void update_avg_stats(struct cfs_rq *cfs_rq, struct sched_entity *se) | ||
552 | { | ||
553 | if (!se->last_wakeup) | ||
554 | return; | ||
555 | |||
556 | update_avg(&se->avg_overlap, se->sum_exec_runtime - se->last_wakeup); | ||
557 | se->last_wakeup = 0; | ||
558 | } | ||
559 | |||
559 | static void | 560 | static void |
560 | dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep) | 561 | dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep) |
561 | { | 562 | { |
@@ -566,6 +567,7 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep) | |||
566 | 567 | ||
567 | update_stats_dequeue(cfs_rq, se); | 568 | update_stats_dequeue(cfs_rq, se); |
568 | if (sleep) { | 569 | if (sleep) { |
570 | update_avg_stats(cfs_rq, se); | ||
569 | #ifdef CONFIG_SCHEDSTATS | 571 | #ifdef CONFIG_SCHEDSTATS |
570 | if (entity_is_task(se)) { | 572 | if (entity_is_task(se)) { |
571 | struct task_struct *tsk = task_of(se); | 573 | struct task_struct *tsk = task_of(se); |
@@ -980,96 +982,121 @@ static inline int wake_idle(int cpu, struct task_struct *p) | |||
980 | #endif | 982 | #endif |
981 | 983 | ||
982 | #ifdef CONFIG_SMP | 984 | #ifdef CONFIG_SMP |
983 | static int select_task_rq_fair(struct task_struct *p, int sync) | 985 | |
986 | static const struct sched_class fair_sched_class; | ||
987 | |||
988 | static int | ||
989 | wake_affine(struct rq *rq, struct sched_domain *this_sd, struct rq *this_rq, | ||
990 | struct task_struct *p, int prev_cpu, int this_cpu, int sync, | ||
991 | int idx, unsigned long load, unsigned long this_load, | ||
992 | unsigned int imbalance) | ||
984 | { | 993 | { |
985 | int cpu, this_cpu; | 994 | struct task_struct *curr = this_rq->curr; |
986 | struct rq *rq; | 995 | unsigned long tl = this_load; |
987 | struct sched_domain *sd, *this_sd = NULL; | 996 | unsigned long tl_per_task; |
988 | int new_cpu; | 997 | |
998 | if (!(this_sd->flags & SD_WAKE_AFFINE)) | ||
999 | return 0; | ||
1000 | |||
1001 | /* | ||
1002 | * If the currently running task will sleep within | ||
1003 | * a reasonable amount of time then attract this newly | ||
1004 | * woken task: | ||
1005 | */ | ||
1006 | if (sync && curr->sched_class == &fair_sched_class) { | ||
1007 | if (curr->se.avg_overlap < sysctl_sched_migration_cost && | ||
1008 | p->se.avg_overlap < sysctl_sched_migration_cost) | ||
1009 | return 1; | ||
1010 | } | ||
1011 | |||
1012 | schedstat_inc(p, se.nr_wakeups_affine_attempts); | ||
1013 | tl_per_task = cpu_avg_load_per_task(this_cpu); | ||
1014 | |||
1015 | /* | ||
1016 | * If sync wakeup then subtract the (maximum possible) | ||
1017 | * effect of the currently running task from the load | ||
1018 | * of the current CPU: | ||
1019 | */ | ||
1020 | if (sync) | ||
1021 | tl -= current->se.load.weight; | ||
1022 | |||
1023 | if ((tl <= load && tl + target_load(prev_cpu, idx) <= tl_per_task) || | ||
1024 | 100*(tl + p->se.load.weight) <= imbalance*load) { | ||
1025 | /* | ||
1026 | * This domain has SD_WAKE_AFFINE and | ||
1027 | * p is cache cold in this domain, and | ||
1028 | * there is no bad imbalance. | ||
1029 | */ | ||
1030 | schedstat_inc(this_sd, ttwu_move_affine); | ||
1031 | schedstat_inc(p, se.nr_wakeups_affine); | ||
989 | 1032 | ||
990 | cpu = task_cpu(p); | 1033 | return 1; |
991 | rq = task_rq(p); | 1034 | } |
992 | this_cpu = smp_processor_id(); | 1035 | return 0; |
993 | new_cpu = cpu; | 1036 | } |
994 | 1037 | ||
995 | if (cpu == this_cpu) | 1038 | static int select_task_rq_fair(struct task_struct *p, int sync) |
996 | goto out_set_cpu; | 1039 | { |
1040 | struct sched_domain *sd, *this_sd = NULL; | ||
1041 | int prev_cpu, this_cpu, new_cpu; | ||
1042 | unsigned long load, this_load; | ||
1043 | struct rq *rq, *this_rq; | ||
1044 | unsigned int imbalance; | ||
1045 | int idx; | ||
1046 | |||
1047 | prev_cpu = task_cpu(p); | ||
1048 | rq = task_rq(p); | ||
1049 | this_cpu = smp_processor_id(); | ||
1050 | this_rq = cpu_rq(this_cpu); | ||
1051 | new_cpu = prev_cpu; | ||
997 | 1052 | ||
1053 | /* | ||
1054 | * 'this_sd' is the first domain that both | ||
1055 | * this_cpu and prev_cpu are present in: | ||
1056 | */ | ||
998 | for_each_domain(this_cpu, sd) { | 1057 | for_each_domain(this_cpu, sd) { |
999 | if (cpu_isset(cpu, sd->span)) { | 1058 | if (cpu_isset(prev_cpu, sd->span)) { |
1000 | this_sd = sd; | 1059 | this_sd = sd; |
1001 | break; | 1060 | break; |
1002 | } | 1061 | } |
1003 | } | 1062 | } |
1004 | 1063 | ||
1005 | if (unlikely(!cpu_isset(this_cpu, p->cpus_allowed))) | 1064 | if (unlikely(!cpu_isset(this_cpu, p->cpus_allowed))) |
1006 | goto out_set_cpu; | 1065 | goto out; |
1007 | 1066 | ||
1008 | /* | 1067 | /* |
1009 | * Check for affine wakeup and passive balancing possibilities. | 1068 | * Check for affine wakeup and passive balancing possibilities. |
1010 | */ | 1069 | */ |
1011 | if (this_sd) { | 1070 | if (!this_sd) |
1012 | int idx = this_sd->wake_idx; | 1071 | goto out; |
1013 | unsigned int imbalance; | ||
1014 | unsigned long load, this_load; | ||
1015 | |||
1016 | imbalance = 100 + (this_sd->imbalance_pct - 100) / 2; | ||
1017 | |||
1018 | load = source_load(cpu, idx); | ||
1019 | this_load = target_load(this_cpu, idx); | ||
1020 | |||
1021 | new_cpu = this_cpu; /* Wake to this CPU if we can */ | ||
1022 | |||
1023 | if (this_sd->flags & SD_WAKE_AFFINE) { | ||
1024 | unsigned long tl = this_load; | ||
1025 | unsigned long tl_per_task; | ||
1026 | |||
1027 | /* | ||
1028 | * Attract cache-cold tasks on sync wakeups: | ||
1029 | */ | ||
1030 | if (sync && !task_hot(p, rq->clock, this_sd)) | ||
1031 | goto out_set_cpu; | ||
1032 | |||
1033 | schedstat_inc(p, se.nr_wakeups_affine_attempts); | ||
1034 | tl_per_task = cpu_avg_load_per_task(this_cpu); | ||
1035 | |||
1036 | /* | ||
1037 | * If sync wakeup then subtract the (maximum possible) | ||
1038 | * effect of the currently running task from the load | ||
1039 | * of the current CPU: | ||
1040 | */ | ||
1041 | if (sync) | ||
1042 | tl -= current->se.load.weight; | ||
1043 | |||
1044 | if ((tl <= load && | ||
1045 | tl + target_load(cpu, idx) <= tl_per_task) || | ||
1046 | 100*(tl + p->se.load.weight) <= imbalance*load) { | ||
1047 | /* | ||
1048 | * This domain has SD_WAKE_AFFINE and | ||
1049 | * p is cache cold in this domain, and | ||
1050 | * there is no bad imbalance. | ||
1051 | */ | ||
1052 | schedstat_inc(this_sd, ttwu_move_affine); | ||
1053 | schedstat_inc(p, se.nr_wakeups_affine); | ||
1054 | goto out_set_cpu; | ||
1055 | } | ||
1056 | } | ||
1057 | 1072 | ||
1058 | /* | 1073 | idx = this_sd->wake_idx; |
1059 | * Start passive balancing when half the imbalance_pct | 1074 | |
1060 | * limit is reached. | 1075 | imbalance = 100 + (this_sd->imbalance_pct - 100) / 2; |
1061 | */ | 1076 | |
1062 | if (this_sd->flags & SD_WAKE_BALANCE) { | 1077 | load = source_load(prev_cpu, idx); |
1063 | if (imbalance*this_load <= 100*load) { | 1078 | this_load = target_load(this_cpu, idx); |
1064 | schedstat_inc(this_sd, ttwu_move_balance); | 1079 | |
1065 | schedstat_inc(p, se.nr_wakeups_passive); | 1080 | if (wake_affine(rq, this_sd, this_rq, p, prev_cpu, this_cpu, sync, idx, |
1066 | goto out_set_cpu; | 1081 | load, this_load, imbalance)) |
1067 | } | 1082 | return this_cpu; |
1083 | |||
1084 | if (prev_cpu == this_cpu) | ||
1085 | goto out; | ||
1086 | |||
1087 | /* | ||
1088 | * Start passive balancing when half the imbalance_pct | ||
1089 | * limit is reached. | ||
1090 | */ | ||
1091 | if (this_sd->flags & SD_WAKE_BALANCE) { | ||
1092 | if (imbalance*this_load <= 100*load) { | ||
1093 | schedstat_inc(this_sd, ttwu_move_balance); | ||
1094 | schedstat_inc(p, se.nr_wakeups_passive); | ||
1095 | return this_cpu; | ||
1068 | } | 1096 | } |
1069 | } | 1097 | } |
1070 | 1098 | ||
1071 | new_cpu = cpu; /* Could not wake to this_cpu. Wake to cpu instead */ | 1099 | out: |
1072 | out_set_cpu: | ||
1073 | return wake_idle(new_cpu, p); | 1100 | return wake_idle(new_cpu, p); |
1074 | } | 1101 | } |
1075 | #endif /* CONFIG_SMP */ | 1102 | #endif /* CONFIG_SMP */ |
@@ -1092,6 +1119,10 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p) | |||
1092 | return; | 1119 | return; |
1093 | } | 1120 | } |
1094 | 1121 | ||
1122 | se->last_wakeup = se->sum_exec_runtime; | ||
1123 | if (unlikely(se == pse)) | ||
1124 | return; | ||
1125 | |||
1095 | cfs_rq_of(pse)->next = pse; | 1126 | cfs_rq_of(pse)->next = pse; |
1096 | 1127 | ||
1097 | /* | 1128 | /* |
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index 548c436a776b..7f60097d443a 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c | |||
@@ -141,13 +141,8 @@ static void clocksource_watchdog(unsigned long data) | |||
141 | } | 141 | } |
142 | 142 | ||
143 | if (!list_empty(&watchdog_list)) { | 143 | if (!list_empty(&watchdog_list)) { |
144 | /* Cycle through CPUs to check if the CPUs stay synchronized to | 144 | __mod_timer(&watchdog_timer, |
145 | * each other. */ | 145 | watchdog_timer.expires + WATCHDOG_INTERVAL); |
146 | int next_cpu = next_cpu(raw_smp_processor_id(), cpu_online_map); | ||
147 | if (next_cpu >= NR_CPUS) | ||
148 | next_cpu = first_cpu(cpu_online_map); | ||
149 | watchdog_timer.expires += WATCHDOG_INTERVAL; | ||
150 | add_timer_on(&watchdog_timer, next_cpu); | ||
151 | } | 146 | } |
152 | spin_unlock(&watchdog_lock); | 147 | spin_unlock(&watchdog_lock); |
153 | } | 148 | } |
@@ -169,7 +164,7 @@ static void clocksource_check_watchdog(struct clocksource *cs) | |||
169 | if (!started && watchdog) { | 164 | if (!started && watchdog) { |
170 | watchdog_last = watchdog->read(); | 165 | watchdog_last = watchdog->read(); |
171 | watchdog_timer.expires = jiffies + WATCHDOG_INTERVAL; | 166 | watchdog_timer.expires = jiffies + WATCHDOG_INTERVAL; |
172 | add_timer_on(&watchdog_timer, first_cpu(cpu_online_map)); | 167 | add_timer(&watchdog_timer); |
173 | } | 168 | } |
174 | } else { | 169 | } else { |
175 | if (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS) | 170 | if (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS) |
@@ -179,7 +174,7 @@ static void clocksource_check_watchdog(struct clocksource *cs) | |||
179 | if (watchdog) | 174 | if (watchdog) |
180 | del_timer(&watchdog_timer); | 175 | del_timer(&watchdog_timer); |
181 | watchdog = cs; | 176 | watchdog = cs; |
182 | init_timer_deferrable(&watchdog_timer); | 177 | init_timer(&watchdog_timer); |
183 | watchdog_timer.function = clocksource_watchdog; | 178 | watchdog_timer.function = clocksource_watchdog; |
184 | 179 | ||
185 | /* Reset watchdog cycles */ | 180 | /* Reset watchdog cycles */ |
@@ -190,8 +185,7 @@ static void clocksource_check_watchdog(struct clocksource *cs) | |||
190 | watchdog_last = watchdog->read(); | 185 | watchdog_last = watchdog->read(); |
191 | watchdog_timer.expires = | 186 | watchdog_timer.expires = |
192 | jiffies + WATCHDOG_INTERVAL; | 187 | jiffies + WATCHDOG_INTERVAL; |
193 | add_timer_on(&watchdog_timer, | 188 | add_timer(&watchdog_timer); |
194 | first_cpu(cpu_online_map)); | ||
195 | } | 189 | } |
196 | } | 190 | } |
197 | } | 191 | } |
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 671af612b768..a3fa587c350c 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c | |||
@@ -191,8 +191,12 @@ static void change_clocksource(void) | |||
191 | 191 | ||
192 | tick_clock_notify(); | 192 | tick_clock_notify(); |
193 | 193 | ||
194 | /* | ||
195 | * We're holding xtime lock and waking up klogd would deadlock | ||
196 | * us on enqueue. So no printing! | ||
194 | printk(KERN_INFO "Time: %s clocksource has been installed.\n", | 197 | printk(KERN_INFO "Time: %s clocksource has been installed.\n", |
195 | clock->name); | 198 | clock->name); |
199 | */ | ||
196 | } | 200 | } |
197 | #else | 201 | #else |
198 | static inline void change_clocksource(void) { } | 202 | static inline void change_clocksource(void) { } |
diff --git a/kernel/timer.c b/kernel/timer.c index 99b00a25f88b..b024106daa70 100644 --- a/kernel/timer.c +++ b/kernel/timer.c | |||
@@ -451,10 +451,18 @@ void add_timer_on(struct timer_list *timer, int cpu) | |||
451 | spin_lock_irqsave(&base->lock, flags); | 451 | spin_lock_irqsave(&base->lock, flags); |
452 | timer_set_base(timer, base); | 452 | timer_set_base(timer, base); |
453 | internal_add_timer(base, timer); | 453 | internal_add_timer(base, timer); |
454 | /* | ||
455 | * Check whether the other CPU is idle and needs to be | ||
456 | * triggered to reevaluate the timer wheel when nohz is | ||
457 | * active. We are protected against the other CPU fiddling | ||
458 | * with the timer by holding the timer base lock. This also | ||
459 | * makes sure that a CPU on the way to idle can not evaluate | ||
460 | * the timer wheel. | ||
461 | */ | ||
462 | wake_up_idle_cpu(cpu); | ||
454 | spin_unlock_irqrestore(&base->lock, flags); | 463 | spin_unlock_irqrestore(&base->lock, flags); |
455 | } | 464 | } |
456 | 465 | ||
457 | |||
458 | /** | 466 | /** |
459 | * mod_timer - modify a timer's timeout | 467 | * mod_timer - modify a timer's timeout |
460 | * @timer: the timer to be modified | 468 | * @timer: the timer to be modified |