diff options
Diffstat (limited to 'kernel')
37 files changed, 575 insertions, 252 deletions
diff --git a/kernel/audit.c b/kernel/audit.c index eb0f9165b401..2924251a6547 100644 --- a/kernel/audit.c +++ b/kernel/audit.c | |||
| @@ -847,18 +847,10 @@ static void audit_receive_skb(struct sk_buff *skb) | |||
| 847 | } | 847 | } |
| 848 | 848 | ||
| 849 | /* Receive messages from netlink socket. */ | 849 | /* Receive messages from netlink socket. */ |
| 850 | static void audit_receive(struct sock *sk, int length) | 850 | static void audit_receive(struct sk_buff *skb) |
| 851 | { | 851 | { |
| 852 | struct sk_buff *skb; | ||
| 853 | unsigned int qlen; | ||
| 854 | |||
| 855 | mutex_lock(&audit_cmd_mutex); | 852 | mutex_lock(&audit_cmd_mutex); |
| 856 | 853 | audit_receive_skb(skb); | |
| 857 | for (qlen = skb_queue_len(&sk->sk_receive_queue); qlen; qlen--) { | ||
| 858 | skb = skb_dequeue(&sk->sk_receive_queue); | ||
| 859 | audit_receive_skb(skb); | ||
| 860 | kfree_skb(skb); | ||
| 861 | } | ||
| 862 | mutex_unlock(&audit_cmd_mutex); | 854 | mutex_unlock(&audit_cmd_mutex); |
| 863 | } | 855 | } |
| 864 | 856 | ||
| @@ -876,8 +868,8 @@ static int __init audit_init(void) | |||
| 876 | 868 | ||
| 877 | printk(KERN_INFO "audit: initializing netlink socket (%s)\n", | 869 | printk(KERN_INFO "audit: initializing netlink socket (%s)\n", |
| 878 | audit_default ? "enabled" : "disabled"); | 870 | audit_default ? "enabled" : "disabled"); |
| 879 | audit_sock = netlink_kernel_create(NETLINK_AUDIT, 0, audit_receive, | 871 | audit_sock = netlink_kernel_create(&init_net, NETLINK_AUDIT, 0, |
| 880 | NULL, THIS_MODULE); | 872 | audit_receive, NULL, THIS_MODULE); |
| 881 | if (!audit_sock) | 873 | if (!audit_sock) |
| 882 | audit_panic("cannot initialize netlink socket"); | 874 | audit_panic("cannot initialize netlink socket"); |
| 883 | else | 875 | else |
diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 3401293359e8..04f3ffb8d9d4 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c | |||
| @@ -2023,7 +2023,7 @@ int __audit_signal_info(int sig, struct task_struct *t) | |||
| 2023 | axp->d.next = ctx->aux_pids; | 2023 | axp->d.next = ctx->aux_pids; |
| 2024 | ctx->aux_pids = (void *)axp; | 2024 | ctx->aux_pids = (void *)axp; |
| 2025 | } | 2025 | } |
| 2026 | BUG_ON(axp->pid_count > AUDIT_AUX_PIDS); | 2026 | BUG_ON(axp->pid_count >= AUDIT_AUX_PIDS); |
| 2027 | 2027 | ||
| 2028 | axp->target_pid[axp->pid_count] = t->tgid; | 2028 | axp->target_pid[axp->pid_count] = t->tgid; |
| 2029 | selinux_get_task_sid(t, &axp->target_sid[axp->pid_count]); | 2029 | selinux_get_task_sid(t, &axp->target_sid[axp->pid_count]); |
diff --git a/kernel/cpu.c b/kernel/cpu.c index 181ae7086029..38033db8d8ec 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c | |||
| @@ -273,7 +273,7 @@ int __cpuinit cpu_up(unsigned int cpu) | |||
| 273 | return err; | 273 | return err; |
| 274 | } | 274 | } |
| 275 | 275 | ||
| 276 | #ifdef CONFIG_SUSPEND_SMP | 276 | #ifdef CONFIG_PM_SLEEP_SMP |
| 277 | static cpumask_t frozen_cpus; | 277 | static cpumask_t frozen_cpus; |
| 278 | 278 | ||
| 279 | int disable_nonboot_cpus(void) | 279 | int disable_nonboot_cpus(void) |
| @@ -334,4 +334,4 @@ void enable_nonboot_cpus(void) | |||
| 334 | out: | 334 | out: |
| 335 | mutex_unlock(&cpu_add_remove_lock); | 335 | mutex_unlock(&cpu_add_remove_lock); |
| 336 | } | 336 | } |
| 337 | #endif | 337 | #endif /* CONFIG_PM_SLEEP_SMP */ |
diff --git a/kernel/exit.c b/kernel/exit.c index 9578c1ae19ca..993369ee94d1 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
| @@ -24,7 +24,6 @@ | |||
| 24 | #include <linux/pid_namespace.h> | 24 | #include <linux/pid_namespace.h> |
| 25 | #include <linux/ptrace.h> | 25 | #include <linux/ptrace.h> |
| 26 | #include <linux/profile.h> | 26 | #include <linux/profile.h> |
| 27 | #include <linux/signalfd.h> | ||
| 28 | #include <linux/mount.h> | 27 | #include <linux/mount.h> |
| 29 | #include <linux/proc_fs.h> | 28 | #include <linux/proc_fs.h> |
| 30 | #include <linux/kthread.h> | 29 | #include <linux/kthread.h> |
| @@ -86,14 +85,6 @@ static void __exit_signal(struct task_struct *tsk) | |||
| 86 | sighand = rcu_dereference(tsk->sighand); | 85 | sighand = rcu_dereference(tsk->sighand); |
| 87 | spin_lock(&sighand->siglock); | 86 | spin_lock(&sighand->siglock); |
| 88 | 87 | ||
| 89 | /* | ||
| 90 | * Notify that this sighand has been detached. This must | ||
| 91 | * be called with the tsk->sighand lock held. Also, this | ||
| 92 | * access tsk->sighand internally, so it must be called | ||
| 93 | * before tsk->sighand is reset. | ||
| 94 | */ | ||
| 95 | signalfd_detach_locked(tsk); | ||
| 96 | |||
| 97 | posix_cpu_timers_exit(tsk); | 88 | posix_cpu_timers_exit(tsk); |
| 98 | if (atomic_dec_and_test(&sig->count)) | 89 | if (atomic_dec_and_test(&sig->count)) |
| 99 | posix_cpu_timers_exit_group(tsk); | 90 | posix_cpu_timers_exit_group(tsk); |
| @@ -975,6 +966,7 @@ fastcall NORET_TYPE void do_exit(long code) | |||
| 975 | if (unlikely(tsk->audit_context)) | 966 | if (unlikely(tsk->audit_context)) |
| 976 | audit_free(tsk); | 967 | audit_free(tsk); |
| 977 | 968 | ||
| 969 | tsk->exit_code = code; | ||
| 978 | taskstats_exit(tsk, group_dead); | 970 | taskstats_exit(tsk, group_dead); |
| 979 | 971 | ||
| 980 | exit_mm(tsk); | 972 | exit_mm(tsk); |
| @@ -996,7 +988,6 @@ fastcall NORET_TYPE void do_exit(long code) | |||
| 996 | if (tsk->binfmt) | 988 | if (tsk->binfmt) |
| 997 | module_put(tsk->binfmt->module); | 989 | module_put(tsk->binfmt->module); |
| 998 | 990 | ||
| 999 | tsk->exit_code = code; | ||
| 1000 | proc_exit_connector(tsk); | 991 | proc_exit_connector(tsk); |
| 1001 | exit_task_namespaces(tsk); | 992 | exit_task_namespaces(tsk); |
| 1002 | exit_notify(tsk); | 993 | exit_notify(tsk); |
diff --git a/kernel/fork.c b/kernel/fork.c index 7332e236d367..5e67f90a1694 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
| @@ -1438,7 +1438,7 @@ static void sighand_ctor(void *data, struct kmem_cache *cachep, | |||
| 1438 | struct sighand_struct *sighand = data; | 1438 | struct sighand_struct *sighand = data; |
| 1439 | 1439 | ||
| 1440 | spin_lock_init(&sighand->siglock); | 1440 | spin_lock_init(&sighand->siglock); |
| 1441 | INIT_LIST_HEAD(&sighand->signalfd_list); | 1441 | init_waitqueue_head(&sighand->signalfd_wqh); |
| 1442 | } | 1442 | } |
| 1443 | 1443 | ||
| 1444 | void __init proc_caches_init(void) | 1444 | void __init proc_caches_init(void) |
| @@ -1608,7 +1608,8 @@ asmlinkage long sys_unshare(unsigned long unshare_flags) | |||
| 1608 | err = -EINVAL; | 1608 | err = -EINVAL; |
| 1609 | if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND| | 1609 | if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND| |
| 1610 | CLONE_VM|CLONE_FILES|CLONE_SYSVSEM| | 1610 | CLONE_VM|CLONE_FILES|CLONE_SYSVSEM| |
| 1611 | CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWUSER)) | 1611 | CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWUSER| |
| 1612 | CLONE_NEWNET)) | ||
| 1612 | goto bad_unshare_out; | 1613 | goto bad_unshare_out; |
| 1613 | 1614 | ||
| 1614 | if ((err = unshare_thread(unshare_flags))) | 1615 | if ((err = unshare_thread(unshare_flags))) |
diff --git a/kernel/futex.c b/kernel/futex.c index 3415e9ad1391..fcc94e7b4086 100644 --- a/kernel/futex.c +++ b/kernel/futex.c | |||
| @@ -1670,6 +1670,7 @@ pi_faulted: | |||
| 1670 | attempt); | 1670 | attempt); |
| 1671 | if (ret) | 1671 | if (ret) |
| 1672 | goto out; | 1672 | goto out; |
| 1673 | uval = 0; | ||
| 1673 | goto retry_unlocked; | 1674 | goto retry_unlocked; |
| 1674 | } | 1675 | } |
| 1675 | 1676 | ||
| @@ -1942,9 +1943,10 @@ static inline int fetch_robust_entry(struct robust_list __user **entry, | |||
| 1942 | void exit_robust_list(struct task_struct *curr) | 1943 | void exit_robust_list(struct task_struct *curr) |
| 1943 | { | 1944 | { |
| 1944 | struct robust_list_head __user *head = curr->robust_list; | 1945 | struct robust_list_head __user *head = curr->robust_list; |
| 1945 | struct robust_list __user *entry, *pending; | 1946 | struct robust_list __user *entry, *next_entry, *pending; |
| 1946 | unsigned int limit = ROBUST_LIST_LIMIT, pi, pip; | 1947 | unsigned int limit = ROBUST_LIST_LIMIT, pi, next_pi, pip; |
| 1947 | unsigned long futex_offset; | 1948 | unsigned long futex_offset; |
| 1949 | int rc; | ||
| 1948 | 1950 | ||
| 1949 | /* | 1951 | /* |
| 1950 | * Fetch the list head (which was registered earlier, via | 1952 | * Fetch the list head (which was registered earlier, via |
| @@ -1964,12 +1966,14 @@ void exit_robust_list(struct task_struct *curr) | |||
| 1964 | if (fetch_robust_entry(&pending, &head->list_op_pending, &pip)) | 1966 | if (fetch_robust_entry(&pending, &head->list_op_pending, &pip)) |
| 1965 | return; | 1967 | return; |
| 1966 | 1968 | ||
| 1967 | if (pending) | 1969 | next_entry = NULL; /* avoid warning with gcc */ |
| 1968 | handle_futex_death((void __user *)pending + futex_offset, | ||
| 1969 | curr, pip); | ||
| 1970 | |||
| 1971 | while (entry != &head->list) { | 1970 | while (entry != &head->list) { |
| 1972 | /* | 1971 | /* |
| 1972 | * Fetch the next entry in the list before calling | ||
| 1973 | * handle_futex_death: | ||
| 1974 | */ | ||
| 1975 | rc = fetch_robust_entry(&next_entry, &entry->next, &next_pi); | ||
| 1976 | /* | ||
| 1973 | * A pending lock might already be on the list, so | 1977 | * A pending lock might already be on the list, so |
| 1974 | * don't process it twice: | 1978 | * don't process it twice: |
| 1975 | */ | 1979 | */ |
| @@ -1977,11 +1981,10 @@ void exit_robust_list(struct task_struct *curr) | |||
| 1977 | if (handle_futex_death((void __user *)entry + futex_offset, | 1981 | if (handle_futex_death((void __user *)entry + futex_offset, |
| 1978 | curr, pi)) | 1982 | curr, pi)) |
| 1979 | return; | 1983 | return; |
| 1980 | /* | 1984 | if (rc) |
| 1981 | * Fetch the next entry in the list: | ||
| 1982 | */ | ||
| 1983 | if (fetch_robust_entry(&entry, &entry->next, &pi)) | ||
| 1984 | return; | 1985 | return; |
| 1986 | entry = next_entry; | ||
| 1987 | pi = next_pi; | ||
| 1985 | /* | 1988 | /* |
| 1986 | * Avoid excessively long or circular lists: | 1989 | * Avoid excessively long or circular lists: |
| 1987 | */ | 1990 | */ |
| @@ -1990,6 +1993,10 @@ void exit_robust_list(struct task_struct *curr) | |||
| 1990 | 1993 | ||
| 1991 | cond_resched(); | 1994 | cond_resched(); |
| 1992 | } | 1995 | } |
| 1996 | |||
| 1997 | if (pending) | ||
| 1998 | handle_futex_death((void __user *)pending + futex_offset, | ||
| 1999 | curr, pip); | ||
| 1993 | } | 2000 | } |
| 1994 | 2001 | ||
| 1995 | long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, | 2002 | long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, |
diff --git a/kernel/futex_compat.c b/kernel/futex_compat.c index f7921360efad..2c2e2954b713 100644 --- a/kernel/futex_compat.c +++ b/kernel/futex_compat.c | |||
| @@ -38,10 +38,11 @@ fetch_robust_entry(compat_uptr_t *uentry, struct robust_list __user **entry, | |||
| 38 | void compat_exit_robust_list(struct task_struct *curr) | 38 | void compat_exit_robust_list(struct task_struct *curr) |
| 39 | { | 39 | { |
| 40 | struct compat_robust_list_head __user *head = curr->compat_robust_list; | 40 | struct compat_robust_list_head __user *head = curr->compat_robust_list; |
| 41 | struct robust_list __user *entry, *pending; | 41 | struct robust_list __user *entry, *next_entry, *pending; |
| 42 | unsigned int limit = ROBUST_LIST_LIMIT, pi, pip; | 42 | unsigned int limit = ROBUST_LIST_LIMIT, pi, next_pi, pip; |
| 43 | compat_uptr_t uentry, upending; | 43 | compat_uptr_t uentry, next_uentry, upending; |
| 44 | compat_long_t futex_offset; | 44 | compat_long_t futex_offset; |
| 45 | int rc; | ||
| 45 | 46 | ||
| 46 | /* | 47 | /* |
| 47 | * Fetch the list head (which was registered earlier, via | 48 | * Fetch the list head (which was registered earlier, via |
| @@ -61,10 +62,15 @@ void compat_exit_robust_list(struct task_struct *curr) | |||
| 61 | if (fetch_robust_entry(&upending, &pending, | 62 | if (fetch_robust_entry(&upending, &pending, |
| 62 | &head->list_op_pending, &pip)) | 63 | &head->list_op_pending, &pip)) |
| 63 | return; | 64 | return; |
| 64 | if (upending) | ||
| 65 | handle_futex_death((void __user *)pending + futex_offset, curr, pip); | ||
| 66 | 65 | ||
| 67 | while (compat_ptr(uentry) != &head->list) { | 66 | next_entry = NULL; /* avoid warning with gcc */ |
| 67 | while (entry != (struct robust_list __user *) &head->list) { | ||
| 68 | /* | ||
| 69 | * Fetch the next entry in the list before calling | ||
| 70 | * handle_futex_death: | ||
| 71 | */ | ||
| 72 | rc = fetch_robust_entry(&next_uentry, &next_entry, | ||
| 73 | (compat_uptr_t __user *)&entry->next, &next_pi); | ||
| 68 | /* | 74 | /* |
| 69 | * A pending lock might already be on the list, so | 75 | * A pending lock might already be on the list, so |
| 70 | * dont process it twice: | 76 | * dont process it twice: |
| @@ -74,12 +80,11 @@ void compat_exit_robust_list(struct task_struct *curr) | |||
| 74 | curr, pi)) | 80 | curr, pi)) |
| 75 | return; | 81 | return; |
| 76 | 82 | ||
| 77 | /* | 83 | if (rc) |
| 78 | * Fetch the next entry in the list: | ||
| 79 | */ | ||
| 80 | if (fetch_robust_entry(&uentry, &entry, | ||
| 81 | (compat_uptr_t __user *)&entry->next, &pi)) | ||
| 82 | return; | 84 | return; |
| 85 | uentry = next_uentry; | ||
| 86 | entry = next_entry; | ||
| 87 | pi = next_pi; | ||
| 83 | /* | 88 | /* |
| 84 | * Avoid excessively long or circular lists: | 89 | * Avoid excessively long or circular lists: |
| 85 | */ | 90 | */ |
| @@ -88,6 +93,9 @@ void compat_exit_robust_list(struct task_struct *curr) | |||
| 88 | 93 | ||
| 89 | cond_resched(); | 94 | cond_resched(); |
| 90 | } | 95 | } |
| 96 | if (pending) | ||
| 97 | handle_futex_death((void __user *)pending + futex_offset, | ||
| 98 | curr, pip); | ||
| 91 | } | 99 | } |
| 92 | 100 | ||
| 93 | asmlinkage long | 101 | asmlinkage long |
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index c21ca6bfaa66..dc8a4451d79b 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c | |||
| @@ -277,6 +277,30 @@ ktime_t ktime_add_ns(const ktime_t kt, u64 nsec) | |||
| 277 | } | 277 | } |
| 278 | 278 | ||
| 279 | EXPORT_SYMBOL_GPL(ktime_add_ns); | 279 | EXPORT_SYMBOL_GPL(ktime_add_ns); |
| 280 | |||
| 281 | /** | ||
| 282 | * ktime_sub_ns - Subtract a scalar nanoseconds value from a ktime_t variable | ||
| 283 | * @kt: minuend | ||
| 284 | * @nsec: the scalar nsec value to subtract | ||
| 285 | * | ||
| 286 | * Returns the subtraction of @nsec from @kt in ktime_t format | ||
| 287 | */ | ||
| 288 | ktime_t ktime_sub_ns(const ktime_t kt, u64 nsec) | ||
| 289 | { | ||
| 290 | ktime_t tmp; | ||
| 291 | |||
| 292 | if (likely(nsec < NSEC_PER_SEC)) { | ||
| 293 | tmp.tv64 = nsec; | ||
| 294 | } else { | ||
| 295 | unsigned long rem = do_div(nsec, NSEC_PER_SEC); | ||
| 296 | |||
| 297 | tmp = ktime_set((long)nsec, rem); | ||
| 298 | } | ||
| 299 | |||
| 300 | return ktime_sub(kt, tmp); | ||
| 301 | } | ||
| 302 | |||
| 303 | EXPORT_SYMBOL_GPL(ktime_sub_ns); | ||
| 280 | # endif /* !CONFIG_KTIME_SCALAR */ | 304 | # endif /* !CONFIG_KTIME_SCALAR */ |
| 281 | 305 | ||
| 282 | /* | 306 | /* |
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 203a518b6f14..7230d914eaa2 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c | |||
| @@ -462,7 +462,9 @@ void free_irq(unsigned int irq, void *dev_id) | |||
| 462 | * We do this after actually deregistering it, to make sure that | 462 | * We do this after actually deregistering it, to make sure that |
| 463 | * a 'real' IRQ doesn't run in parallel with our fake | 463 | * a 'real' IRQ doesn't run in parallel with our fake |
| 464 | */ | 464 | */ |
| 465 | local_irq_save(flags); | ||
| 465 | handler(irq, dev_id); | 466 | handler(irq, dev_id); |
| 467 | local_irq_restore(flags); | ||
| 466 | } | 468 | } |
| 467 | #endif | 469 | #endif |
| 468 | } | 470 | } |
| @@ -545,14 +547,11 @@ int request_irq(unsigned int irq, irq_handler_t handler, | |||
| 545 | * We do this before actually registering it, to make sure that | 547 | * We do this before actually registering it, to make sure that |
| 546 | * a 'real' IRQ doesn't run in parallel with our fake | 548 | * a 'real' IRQ doesn't run in parallel with our fake |
| 547 | */ | 549 | */ |
| 548 | if (irqflags & IRQF_DISABLED) { | 550 | unsigned long flags; |
| 549 | unsigned long flags; | ||
| 550 | 551 | ||
| 551 | local_irq_save(flags); | 552 | local_irq_save(flags); |
| 552 | handler(irq, dev_id); | 553 | handler(irq, dev_id); |
| 553 | local_irq_restore(flags); | 554 | local_irq_restore(flags); |
| 554 | } else | ||
| 555 | handler(irq, dev_id); | ||
| 556 | } | 555 | } |
| 557 | #endif | 556 | #endif |
| 558 | 557 | ||
diff --git a/kernel/kmod.c b/kernel/kmod.c index 9809cc1f33d6..c6a4f8aebeba 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c | |||
| @@ -505,7 +505,7 @@ int call_usermodehelper_pipe(char *path, char **argv, char **envp, | |||
| 505 | if (ret < 0) | 505 | if (ret < 0) |
| 506 | goto out; | 506 | goto out; |
| 507 | 507 | ||
| 508 | return call_usermodehelper_exec(sub_info, 1); | 508 | return call_usermodehelper_exec(sub_info, UMH_WAIT_EXEC); |
| 509 | 509 | ||
| 510 | out: | 510 | out: |
| 511 | call_usermodehelper_freeinfo(sub_info); | 511 | call_usermodehelper_freeinfo(sub_info); |
diff --git a/kernel/module.c b/kernel/module.c index 33c04ad51175..db0ead0363e2 100644 --- a/kernel/module.c +++ b/kernel/module.c | |||
| @@ -784,8 +784,7 @@ EXPORT_SYMBOL_GPL(symbol_put_addr); | |||
| 784 | static ssize_t show_refcnt(struct module_attribute *mattr, | 784 | static ssize_t show_refcnt(struct module_attribute *mattr, |
| 785 | struct module *mod, char *buffer) | 785 | struct module *mod, char *buffer) |
| 786 | { | 786 | { |
| 787 | /* sysfs holds a reference */ | 787 | return sprintf(buffer, "%u\n", module_refcount(mod)); |
| 788 | return sprintf(buffer, "%u\n", module_refcount(mod)-1); | ||
| 789 | } | 788 | } |
| 790 | 789 | ||
| 791 | static struct module_attribute refcnt = { | 790 | static struct module_attribute refcnt = { |
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c index a4fb7d46971f..f1decd21a534 100644 --- a/kernel/nsproxy.c +++ b/kernel/nsproxy.c | |||
| @@ -20,6 +20,7 @@ | |||
| 20 | #include <linux/mnt_namespace.h> | 20 | #include <linux/mnt_namespace.h> |
| 21 | #include <linux/utsname.h> | 21 | #include <linux/utsname.h> |
| 22 | #include <linux/pid_namespace.h> | 22 | #include <linux/pid_namespace.h> |
| 23 | #include <net/net_namespace.h> | ||
| 23 | 24 | ||
| 24 | static struct kmem_cache *nsproxy_cachep; | 25 | static struct kmem_cache *nsproxy_cachep; |
| 25 | 26 | ||
| @@ -98,8 +99,17 @@ static struct nsproxy *create_new_namespaces(unsigned long flags, | |||
| 98 | goto out_user; | 99 | goto out_user; |
| 99 | } | 100 | } |
| 100 | 101 | ||
| 102 | new_nsp->net_ns = copy_net_ns(flags, tsk->nsproxy->net_ns); | ||
| 103 | if (IS_ERR(new_nsp->net_ns)) { | ||
| 104 | err = PTR_ERR(new_nsp->net_ns); | ||
| 105 | goto out_net; | ||
| 106 | } | ||
| 107 | |||
| 101 | return new_nsp; | 108 | return new_nsp; |
| 102 | 109 | ||
| 110 | out_net: | ||
| 111 | if (new_nsp->user_ns) | ||
| 112 | put_user_ns(new_nsp->user_ns); | ||
| 103 | out_user: | 113 | out_user: |
| 104 | if (new_nsp->pid_ns) | 114 | if (new_nsp->pid_ns) |
| 105 | put_pid_ns(new_nsp->pid_ns); | 115 | put_pid_ns(new_nsp->pid_ns); |
| @@ -132,7 +142,7 @@ int copy_namespaces(unsigned long flags, struct task_struct *tsk) | |||
| 132 | 142 | ||
| 133 | get_nsproxy(old_ns); | 143 | get_nsproxy(old_ns); |
| 134 | 144 | ||
| 135 | if (!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC | CLONE_NEWUSER))) | 145 | if (!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC | CLONE_NEWUSER | CLONE_NEWNET))) |
| 136 | return 0; | 146 | return 0; |
| 137 | 147 | ||
| 138 | if (!capable(CAP_SYS_ADMIN)) { | 148 | if (!capable(CAP_SYS_ADMIN)) { |
| @@ -164,6 +174,7 @@ void free_nsproxy(struct nsproxy *ns) | |||
| 164 | put_pid_ns(ns->pid_ns); | 174 | put_pid_ns(ns->pid_ns); |
| 165 | if (ns->user_ns) | 175 | if (ns->user_ns) |
| 166 | put_user_ns(ns->user_ns); | 176 | put_user_ns(ns->user_ns); |
| 177 | put_net(ns->net_ns); | ||
| 167 | kmem_cache_free(nsproxy_cachep, ns); | 178 | kmem_cache_free(nsproxy_cachep, ns); |
| 168 | } | 179 | } |
| 169 | 180 | ||
| @@ -177,7 +188,7 @@ int unshare_nsproxy_namespaces(unsigned long unshare_flags, | |||
| 177 | int err = 0; | 188 | int err = 0; |
| 178 | 189 | ||
| 179 | if (!(unshare_flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC | | 190 | if (!(unshare_flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC | |
| 180 | CLONE_NEWUSER))) | 191 | CLONE_NEWUSER | CLONE_NEWNET))) |
| 181 | return 0; | 192 | return 0; |
| 182 | 193 | ||
| 183 | if (!capable(CAP_SYS_ADMIN)) | 194 | if (!capable(CAP_SYS_ADMIN)) |
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c index 55b3761edaa9..57efe0400bc2 100644 --- a/kernel/posix-timers.c +++ b/kernel/posix-timers.c | |||
| @@ -547,9 +547,9 @@ sys_timer_create(const clockid_t which_clock, | |||
| 547 | new_timer->it_process = process; | 547 | new_timer->it_process = process; |
| 548 | list_add(&new_timer->list, | 548 | list_add(&new_timer->list, |
| 549 | &process->signal->posix_timers); | 549 | &process->signal->posix_timers); |
| 550 | spin_unlock_irqrestore(&process->sighand->siglock, flags); | ||
| 551 | if (new_timer->it_sigev_notify == (SIGEV_SIGNAL|SIGEV_THREAD_ID)) | 550 | if (new_timer->it_sigev_notify == (SIGEV_SIGNAL|SIGEV_THREAD_ID)) |
| 552 | get_task_struct(process); | 551 | get_task_struct(process); |
| 552 | spin_unlock_irqrestore(&process->sighand->siglock, flags); | ||
| 553 | } else { | 553 | } else { |
| 554 | spin_unlock_irqrestore(&process->sighand->siglock, flags); | 554 | spin_unlock_irqrestore(&process->sighand->siglock, flags); |
| 555 | process = NULL; | 555 | process = NULL; |
| @@ -605,13 +605,14 @@ static struct k_itimer * lock_timer(timer_t timer_id, unsigned long *flags) | |||
| 605 | timr = (struct k_itimer *) idr_find(&posix_timers_id, (int) timer_id); | 605 | timr = (struct k_itimer *) idr_find(&posix_timers_id, (int) timer_id); |
| 606 | if (timr) { | 606 | if (timr) { |
| 607 | spin_lock(&timr->it_lock); | 607 | spin_lock(&timr->it_lock); |
| 608 | spin_unlock(&idr_lock); | ||
| 609 | 608 | ||
| 610 | if ((timr->it_id != timer_id) || !(timr->it_process) || | 609 | if ((timr->it_id != timer_id) || !(timr->it_process) || |
| 611 | timr->it_process->tgid != current->tgid) { | 610 | timr->it_process->tgid != current->tgid) { |
| 612 | unlock_timer(timr, *flags); | 611 | spin_unlock(&timr->it_lock); |
| 612 | spin_unlock_irqrestore(&idr_lock, *flags); | ||
| 613 | timr = NULL; | 613 | timr = NULL; |
| 614 | } | 614 | } else |
| 615 | spin_unlock(&idr_lock); | ||
| 615 | } else | 616 | } else |
| 616 | spin_unlock_irqrestore(&idr_lock, *flags); | 617 | spin_unlock_irqrestore(&idr_lock, *flags); |
| 617 | 618 | ||
| @@ -711,7 +712,7 @@ sys_timer_getoverrun(timer_t timer_id) | |||
| 711 | { | 712 | { |
| 712 | struct k_itimer *timr; | 713 | struct k_itimer *timr; |
| 713 | int overrun; | 714 | int overrun; |
| 714 | long flags; | 715 | unsigned long flags; |
| 715 | 716 | ||
| 716 | timr = lock_timer(timer_id, &flags); | 717 | timr = lock_timer(timer_id, &flags); |
| 717 | if (!timr) | 718 | if (!timr) |
| @@ -783,7 +784,7 @@ sys_timer_settime(timer_t timer_id, int flags, | |||
| 783 | struct k_itimer *timr; | 784 | struct k_itimer *timr; |
| 784 | struct itimerspec new_spec, old_spec; | 785 | struct itimerspec new_spec, old_spec; |
| 785 | int error = 0; | 786 | int error = 0; |
| 786 | long flag; | 787 | unsigned long flag; |
| 787 | struct itimerspec *rtn = old_setting ? &old_spec : NULL; | 788 | struct itimerspec *rtn = old_setting ? &old_spec : NULL; |
| 788 | 789 | ||
| 789 | if (!new_setting) | 790 | if (!new_setting) |
| @@ -835,7 +836,7 @@ asmlinkage long | |||
| 835 | sys_timer_delete(timer_t timer_id) | 836 | sys_timer_delete(timer_t timer_id) |
| 836 | { | 837 | { |
| 837 | struct k_itimer *timer; | 838 | struct k_itimer *timer; |
| 838 | long flags; | 839 | unsigned long flags; |
| 839 | 840 | ||
| 840 | retry_delete: | 841 | retry_delete: |
| 841 | timer = lock_timer(timer_id, &flags); | 842 | timer = lock_timer(timer_id, &flags); |
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index 412859f8d94a..14b0e10dc95c 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig | |||
| @@ -72,15 +72,10 @@ config PM_TRACE | |||
| 72 | CAUTION: this option will cause your machine's real-time clock to be | 72 | CAUTION: this option will cause your machine's real-time clock to be |
| 73 | set to an invalid time after a resume. | 73 | set to an invalid time after a resume. |
| 74 | 74 | ||
| 75 | config SUSPEND_SMP_POSSIBLE | 75 | config PM_SLEEP_SMP |
| 76 | bool | ||
| 77 | depends on (X86 && !X86_VOYAGER) || (PPC64 && (PPC_PSERIES || PPC_PMAC)) | ||
| 78 | depends on SMP | ||
| 79 | default y | ||
| 80 | |||
| 81 | config SUSPEND_SMP | ||
| 82 | bool | 76 | bool |
| 83 | depends on SUSPEND_SMP_POSSIBLE && PM_SLEEP | 77 | depends on SUSPEND_SMP_POSSIBLE || HIBERNATION_SMP_POSSIBLE |
| 78 | depends on PM_SLEEP | ||
| 84 | select HOTPLUG_CPU | 79 | select HOTPLUG_CPU |
| 85 | default y | 80 | default y |
| 86 | 81 | ||
| @@ -89,20 +84,46 @@ config PM_SLEEP | |||
| 89 | depends on SUSPEND || HIBERNATION | 84 | depends on SUSPEND || HIBERNATION |
| 90 | default y | 85 | default y |
| 91 | 86 | ||
| 87 | config SUSPEND_UP_POSSIBLE | ||
| 88 | bool | ||
| 89 | depends on (X86 && !X86_VOYAGER) || PPC || ARM || BLACKFIN || MIPS \ | ||
| 90 | || SUPERH || FRV | ||
| 91 | depends on !SMP | ||
| 92 | default y | ||
| 93 | |||
| 94 | config SUSPEND_SMP_POSSIBLE | ||
| 95 | bool | ||
| 96 | depends on (X86 && !X86_VOYAGER) \ | ||
| 97 | || (PPC && (PPC_PSERIES || PPC_PMAC)) || ARM | ||
| 98 | depends on SMP | ||
| 99 | default y | ||
| 100 | |||
| 92 | config SUSPEND | 101 | config SUSPEND |
| 93 | bool "Suspend to RAM and standby" | 102 | bool "Suspend to RAM and standby" |
| 94 | depends on PM | 103 | depends on PM |
| 95 | depends on !SMP || SUSPEND_SMP_POSSIBLE | 104 | depends on SUSPEND_UP_POSSIBLE || SUSPEND_SMP_POSSIBLE |
| 96 | default y | 105 | default y |
| 97 | ---help--- | 106 | ---help--- |
| 98 | Allow the system to enter sleep states in which main memory is | 107 | Allow the system to enter sleep states in which main memory is |
| 99 | powered and thus its contents are preserved, such as the | 108 | powered and thus its contents are preserved, such as the |
| 100 | suspend-to-RAM state (i.e. the ACPI S3 state). | 109 | suspend-to-RAM state (i.e. the ACPI S3 state). |
| 101 | 110 | ||
| 111 | config HIBERNATION_UP_POSSIBLE | ||
| 112 | bool | ||
| 113 | depends on X86 || PPC64_SWSUSP || PPC32 | ||
| 114 | depends on !SMP | ||
| 115 | default y | ||
| 116 | |||
| 117 | config HIBERNATION_SMP_POSSIBLE | ||
| 118 | bool | ||
| 119 | depends on (X86 && !X86_VOYAGER) || PPC64_SWSUSP | ||
| 120 | depends on SMP | ||
| 121 | default y | ||
| 122 | |||
| 102 | config HIBERNATION | 123 | config HIBERNATION |
| 103 | bool "Hibernation (aka 'suspend to disk')" | 124 | bool "Hibernation (aka 'suspend to disk')" |
| 104 | depends on PM && SWAP | 125 | depends on PM && SWAP |
| 105 | depends on ((X86 || PPC64_SWSUSP || FRV || PPC32) && !SMP) || SUSPEND_SMP_POSSIBLE | 126 | depends on HIBERNATION_UP_POSSIBLE || HIBERNATION_SMP_POSSIBLE |
| 106 | ---help--- | 127 | ---help--- |
| 107 | Enable the suspend to disk (STD) functionality, which is usually | 128 | Enable the suspend to disk (STD) functionality, which is usually |
| 108 | called "hibernation" in user interfaces. STD checkpoints the | 129 | called "hibernation" in user interfaces. STD checkpoints the |
diff --git a/kernel/printk.c b/kernel/printk.c index bd2cd062878d..8451dfc31d25 100644 --- a/kernel/printk.c +++ b/kernel/printk.c | |||
| @@ -1083,6 +1083,19 @@ int unregister_console(struct console *console) | |||
| 1083 | } | 1083 | } |
| 1084 | EXPORT_SYMBOL(unregister_console); | 1084 | EXPORT_SYMBOL(unregister_console); |
| 1085 | 1085 | ||
| 1086 | static int __init disable_boot_consoles(void) | ||
| 1087 | { | ||
| 1088 | if (console_drivers != NULL) { | ||
| 1089 | if (console_drivers->flags & CON_BOOT) { | ||
| 1090 | printk(KERN_INFO "turn off boot console %s%d\n", | ||
| 1091 | console_drivers->name, console_drivers->index); | ||
| 1092 | return unregister_console(console_drivers); | ||
| 1093 | } | ||
| 1094 | } | ||
| 1095 | return 0; | ||
| 1096 | } | ||
| 1097 | late_initcall(disable_boot_consoles); | ||
| 1098 | |||
| 1086 | /** | 1099 | /** |
| 1087 | * tty_write_message - write a message to a certain tty, not just the console. | 1100 | * tty_write_message - write a message to a certain tty, not just the console. |
| 1088 | * @tty: the destination tty_struct | 1101 | * @tty: the destination tty_struct |
diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 82a558b655da..3eca7a55f2ee 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c | |||
| @@ -233,6 +233,7 @@ int ptrace_detach(struct task_struct *child, unsigned int data) | |||
| 233 | 233 | ||
| 234 | /* Architecture-specific hardware disable .. */ | 234 | /* Architecture-specific hardware disable .. */ |
| 235 | ptrace_disable(child); | 235 | ptrace_disable(child); |
| 236 | clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); | ||
| 236 | 237 | ||
| 237 | write_lock_irq(&tasklist_lock); | 238 | write_lock_irq(&tasklist_lock); |
| 238 | /* protect against de_thread()->release_task() */ | 239 | /* protect against de_thread()->release_task() */ |
diff --git a/kernel/sched.c b/kernel/sched.c index 45e17b83b7f1..6c10fa796ca0 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
| @@ -61,6 +61,7 @@ | |||
| 61 | #include <linux/delayacct.h> | 61 | #include <linux/delayacct.h> |
| 62 | #include <linux/reciprocal_div.h> | 62 | #include <linux/reciprocal_div.h> |
| 63 | #include <linux/unistd.h> | 63 | #include <linux/unistd.h> |
| 64 | #include <linux/pagemap.h> | ||
| 64 | 65 | ||
| 65 | #include <asm/tlb.h> | 66 | #include <asm/tlb.h> |
| 66 | 67 | ||
| @@ -262,7 +263,8 @@ struct rq { | |||
| 262 | s64 clock_max_delta; | 263 | s64 clock_max_delta; |
| 263 | 264 | ||
| 264 | unsigned int clock_warps, clock_overflows; | 265 | unsigned int clock_warps, clock_overflows; |
| 265 | unsigned int clock_unstable_events; | 266 | u64 idle_clock; |
| 267 | unsigned int clock_deep_idle_events; | ||
| 266 | u64 tick_timestamp; | 268 | u64 tick_timestamp; |
| 267 | 269 | ||
| 268 | atomic_t nr_iowait; | 270 | atomic_t nr_iowait; |
| @@ -556,18 +558,40 @@ static inline struct rq *this_rq_lock(void) | |||
| 556 | } | 558 | } |
| 557 | 559 | ||
| 558 | /* | 560 | /* |
| 559 | * CPU frequency is/was unstable - start new by setting prev_clock_raw: | 561 | * We are going deep-idle (irqs are disabled): |
| 560 | */ | 562 | */ |
| 561 | void sched_clock_unstable_event(void) | 563 | void sched_clock_idle_sleep_event(void) |
| 562 | { | 564 | { |
| 563 | unsigned long flags; | 565 | struct rq *rq = cpu_rq(smp_processor_id()); |
| 564 | struct rq *rq; | ||
| 565 | 566 | ||
| 566 | rq = task_rq_lock(current, &flags); | 567 | spin_lock(&rq->lock); |
| 567 | rq->prev_clock_raw = sched_clock(); | 568 | __update_rq_clock(rq); |
| 568 | rq->clock_unstable_events++; | 569 | spin_unlock(&rq->lock); |
| 569 | task_rq_unlock(rq, &flags); | 570 | rq->clock_deep_idle_events++; |
| 571 | } | ||
| 572 | EXPORT_SYMBOL_GPL(sched_clock_idle_sleep_event); | ||
| 573 | |||
| 574 | /* | ||
| 575 | * We just idled delta nanoseconds (called with irqs disabled): | ||
| 576 | */ | ||
| 577 | void sched_clock_idle_wakeup_event(u64 delta_ns) | ||
| 578 | { | ||
| 579 | struct rq *rq = cpu_rq(smp_processor_id()); | ||
| 580 | u64 now = sched_clock(); | ||
| 581 | |||
| 582 | rq->idle_clock += delta_ns; | ||
| 583 | /* | ||
| 584 | * Override the previous timestamp and ignore all | ||
| 585 | * sched_clock() deltas that occured while we idled, | ||
| 586 | * and use the PM-provided delta_ns to advance the | ||
| 587 | * rq clock: | ||
| 588 | */ | ||
| 589 | spin_lock(&rq->lock); | ||
| 590 | rq->prev_clock_raw = now; | ||
| 591 | rq->clock += delta_ns; | ||
| 592 | spin_unlock(&rq->lock); | ||
| 570 | } | 593 | } |
| 594 | EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event); | ||
| 571 | 595 | ||
| 572 | /* | 596 | /* |
| 573 | * resched_task - mark a task 'to be rescheduled now'. | 597 | * resched_task - mark a task 'to be rescheduled now'. |
| @@ -645,7 +669,7 @@ static u64 div64_likely32(u64 divident, unsigned long divisor) | |||
| 645 | /* | 669 | /* |
| 646 | * Shift right and round: | 670 | * Shift right and round: |
| 647 | */ | 671 | */ |
| 648 | #define RSR(x, y) (((x) + (1UL << ((y) - 1))) >> (y)) | 672 | #define SRR(x, y) (((x) + (1UL << ((y) - 1))) >> (y)) |
| 649 | 673 | ||
| 650 | static unsigned long | 674 | static unsigned long |
| 651 | calc_delta_mine(unsigned long delta_exec, unsigned long weight, | 675 | calc_delta_mine(unsigned long delta_exec, unsigned long weight, |
| @@ -661,10 +685,10 @@ calc_delta_mine(unsigned long delta_exec, unsigned long weight, | |||
| 661 | * Check whether we'd overflow the 64-bit multiplication: | 685 | * Check whether we'd overflow the 64-bit multiplication: |
| 662 | */ | 686 | */ |
| 663 | if (unlikely(tmp > WMULT_CONST)) | 687 | if (unlikely(tmp > WMULT_CONST)) |
| 664 | tmp = RSR(RSR(tmp, WMULT_SHIFT/2) * lw->inv_weight, | 688 | tmp = SRR(SRR(tmp, WMULT_SHIFT/2) * lw->inv_weight, |
| 665 | WMULT_SHIFT/2); | 689 | WMULT_SHIFT/2); |
| 666 | else | 690 | else |
| 667 | tmp = RSR(tmp * lw->inv_weight, WMULT_SHIFT); | 691 | tmp = SRR(tmp * lw->inv_weight, WMULT_SHIFT); |
| 668 | 692 | ||
| 669 | return (unsigned long)min(tmp, (u64)(unsigned long)LONG_MAX); | 693 | return (unsigned long)min(tmp, (u64)(unsigned long)LONG_MAX); |
| 670 | } | 694 | } |
| @@ -835,7 +859,6 @@ static void dec_nr_running(struct task_struct *p, struct rq *rq) | |||
| 835 | 859 | ||
| 836 | static void set_load_weight(struct task_struct *p) | 860 | static void set_load_weight(struct task_struct *p) |
| 837 | { | 861 | { |
| 838 | task_rq(p)->cfs.wait_runtime -= p->se.wait_runtime; | ||
| 839 | p->se.wait_runtime = 0; | 862 | p->se.wait_runtime = 0; |
| 840 | 863 | ||
| 841 | if (task_has_rt_policy(p)) { | 864 | if (task_has_rt_policy(p)) { |
| @@ -1564,6 +1587,7 @@ static void __sched_fork(struct task_struct *p) | |||
| 1564 | p->se.wait_start_fair = 0; | 1587 | p->se.wait_start_fair = 0; |
| 1565 | p->se.exec_start = 0; | 1588 | p->se.exec_start = 0; |
| 1566 | p->se.sum_exec_runtime = 0; | 1589 | p->se.sum_exec_runtime = 0; |
| 1590 | p->se.prev_sum_exec_runtime = 0; | ||
| 1567 | p->se.delta_exec = 0; | 1591 | p->se.delta_exec = 0; |
| 1568 | p->se.delta_fair_run = 0; | 1592 | p->se.delta_fair_run = 0; |
| 1569 | p->se.delta_fair_sleep = 0; | 1593 | p->se.delta_fair_sleep = 0; |
| @@ -1659,6 +1683,11 @@ void fastcall wake_up_new_task(struct task_struct *p, unsigned long clone_flags) | |||
| 1659 | 1683 | ||
| 1660 | p->prio = effective_prio(p); | 1684 | p->prio = effective_prio(p); |
| 1661 | 1685 | ||
| 1686 | if (rt_prio(p->prio)) | ||
| 1687 | p->sched_class = &rt_sched_class; | ||
| 1688 | else | ||
| 1689 | p->sched_class = &fair_sched_class; | ||
| 1690 | |||
| 1662 | if (!p->sched_class->task_new || !sysctl_sched_child_runs_first || | 1691 | if (!p->sched_class->task_new || !sysctl_sched_child_runs_first || |
| 1663 | (clone_flags & CLONE_VM) || task_cpu(p) != this_cpu || | 1692 | (clone_flags & CLONE_VM) || task_cpu(p) != this_cpu || |
| 1664 | !current->se.on_rq) { | 1693 | !current->se.on_rq) { |
| @@ -2157,12 +2186,6 @@ int can_migrate_task(struct task_struct *p, struct rq *rq, int this_cpu, | |||
| 2157 | if (task_running(rq, p)) | 2186 | if (task_running(rq, p)) |
| 2158 | return 0; | 2187 | return 0; |
| 2159 | 2188 | ||
| 2160 | /* | ||
| 2161 | * Aggressive migration if too many balance attempts have failed: | ||
| 2162 | */ | ||
| 2163 | if (sd->nr_balance_failed > sd->cache_nice_tries) | ||
| 2164 | return 1; | ||
| 2165 | |||
| 2166 | return 1; | 2189 | return 1; |
| 2167 | } | 2190 | } |
| 2168 | 2191 | ||
| @@ -2494,7 +2517,7 @@ group_next: | |||
| 2494 | * a think about bumping its value to force at least one task to be | 2517 | * a think about bumping its value to force at least one task to be |
| 2495 | * moved | 2518 | * moved |
| 2496 | */ | 2519 | */ |
| 2497 | if (*imbalance + SCHED_LOAD_SCALE_FUZZ < busiest_load_per_task/2) { | 2520 | if (*imbalance < busiest_load_per_task) { |
| 2498 | unsigned long tmp, pwr_now, pwr_move; | 2521 | unsigned long tmp, pwr_now, pwr_move; |
| 2499 | unsigned int imbn; | 2522 | unsigned int imbn; |
| 2500 | 2523 | ||
| @@ -2546,10 +2569,8 @@ small_imbalance: | |||
| 2546 | pwr_move /= SCHED_LOAD_SCALE; | 2569 | pwr_move /= SCHED_LOAD_SCALE; |
| 2547 | 2570 | ||
| 2548 | /* Move if we gain throughput */ | 2571 | /* Move if we gain throughput */ |
| 2549 | if (pwr_move <= pwr_now) | 2572 | if (pwr_move > pwr_now) |
| 2550 | goto out_balanced; | 2573 | *imbalance = busiest_load_per_task; |
| 2551 | |||
| 2552 | *imbalance = busiest_load_per_task; | ||
| 2553 | } | 2574 | } |
| 2554 | 2575 | ||
| 2555 | return busiest; | 2576 | return busiest; |
| @@ -3020,6 +3041,7 @@ static inline void rebalance_domains(int cpu, enum cpu_idle_type idle) | |||
| 3020 | struct sched_domain *sd; | 3041 | struct sched_domain *sd; |
| 3021 | /* Earliest time when we have to do rebalance again */ | 3042 | /* Earliest time when we have to do rebalance again */ |
| 3022 | unsigned long next_balance = jiffies + 60*HZ; | 3043 | unsigned long next_balance = jiffies + 60*HZ; |
| 3044 | int update_next_balance = 0; | ||
| 3023 | 3045 | ||
| 3024 | for_each_domain(cpu, sd) { | 3046 | for_each_domain(cpu, sd) { |
| 3025 | if (!(sd->flags & SD_LOAD_BALANCE)) | 3047 | if (!(sd->flags & SD_LOAD_BALANCE)) |
| @@ -3056,8 +3078,10 @@ static inline void rebalance_domains(int cpu, enum cpu_idle_type idle) | |||
| 3056 | if (sd->flags & SD_SERIALIZE) | 3078 | if (sd->flags & SD_SERIALIZE) |
| 3057 | spin_unlock(&balancing); | 3079 | spin_unlock(&balancing); |
| 3058 | out: | 3080 | out: |
| 3059 | if (time_after(next_balance, sd->last_balance + interval)) | 3081 | if (time_after(next_balance, sd->last_balance + interval)) { |
| 3060 | next_balance = sd->last_balance + interval; | 3082 | next_balance = sd->last_balance + interval; |
| 3083 | update_next_balance = 1; | ||
| 3084 | } | ||
| 3061 | 3085 | ||
| 3062 | /* | 3086 | /* |
| 3063 | * Stop the load balance at this level. There is another | 3087 | * Stop the load balance at this level. There is another |
| @@ -3067,7 +3091,14 @@ out: | |||
| 3067 | if (!balance) | 3091 | if (!balance) |
| 3068 | break; | 3092 | break; |
| 3069 | } | 3093 | } |
| 3070 | rq->next_balance = next_balance; | 3094 | |
| 3095 | /* | ||
| 3096 | * next_balance will be updated only when there is a need. | ||
| 3097 | * When the cpu is attached to null domain for ex, it will not be | ||
| 3098 | * updated. | ||
| 3099 | */ | ||
| 3100 | if (likely(update_next_balance)) | ||
| 3101 | rq->next_balance = next_balance; | ||
| 3071 | } | 3102 | } |
| 3072 | 3103 | ||
| 3073 | /* | 3104 | /* |
| @@ -4525,10 +4556,7 @@ asmlinkage long sys_sched_yield(void) | |||
| 4525 | struct rq *rq = this_rq_lock(); | 4556 | struct rq *rq = this_rq_lock(); |
| 4526 | 4557 | ||
| 4527 | schedstat_inc(rq, yld_cnt); | 4558 | schedstat_inc(rq, yld_cnt); |
| 4528 | if (unlikely(rq->nr_running == 1)) | 4559 | current->sched_class->yield_task(rq, current); |
| 4529 | schedstat_inc(rq, yld_act_empty); | ||
| 4530 | else | ||
| 4531 | current->sched_class->yield_task(rq, current); | ||
| 4532 | 4560 | ||
| 4533 | /* | 4561 | /* |
| 4534 | * Since we are going to call schedule() anyway, there's | 4562 | * Since we are going to call schedule() anyway, there's |
| @@ -4884,14 +4912,18 @@ cpumask_t nohz_cpu_mask = CPU_MASK_NONE; | |||
| 4884 | static inline void sched_init_granularity(void) | 4912 | static inline void sched_init_granularity(void) |
| 4885 | { | 4913 | { |
| 4886 | unsigned int factor = 1 + ilog2(num_online_cpus()); | 4914 | unsigned int factor = 1 + ilog2(num_online_cpus()); |
| 4887 | const unsigned long gran_limit = 100000000; | 4915 | const unsigned long limit = 100000000; |
| 4916 | |||
| 4917 | sysctl_sched_min_granularity *= factor; | ||
| 4918 | if (sysctl_sched_min_granularity > limit) | ||
| 4919 | sysctl_sched_min_granularity = limit; | ||
| 4888 | 4920 | ||
| 4889 | sysctl_sched_granularity *= factor; | 4921 | sysctl_sched_latency *= factor; |
| 4890 | if (sysctl_sched_granularity > gran_limit) | 4922 | if (sysctl_sched_latency > limit) |
| 4891 | sysctl_sched_granularity = gran_limit; | 4923 | sysctl_sched_latency = limit; |
| 4892 | 4924 | ||
| 4893 | sysctl_sched_runtime_limit = sysctl_sched_granularity * 4; | 4925 | sysctl_sched_runtime_limit = sysctl_sched_latency; |
| 4894 | sysctl_sched_wakeup_granularity = sysctl_sched_granularity / 2; | 4926 | sysctl_sched_wakeup_granularity = sysctl_sched_min_granularity / 2; |
| 4895 | } | 4927 | } |
| 4896 | 4928 | ||
| 4897 | #ifdef CONFIG_SMP | 4929 | #ifdef CONFIG_SMP |
| @@ -5234,15 +5266,16 @@ static void migrate_dead_tasks(unsigned int dead_cpu) | |||
| 5234 | static struct ctl_table sd_ctl_dir[] = { | 5266 | static struct ctl_table sd_ctl_dir[] = { |
| 5235 | { | 5267 | { |
| 5236 | .procname = "sched_domain", | 5268 | .procname = "sched_domain", |
| 5237 | .mode = 0755, | 5269 | .mode = 0555, |
| 5238 | }, | 5270 | }, |
| 5239 | {0,}, | 5271 | {0,}, |
| 5240 | }; | 5272 | }; |
| 5241 | 5273 | ||
| 5242 | static struct ctl_table sd_ctl_root[] = { | 5274 | static struct ctl_table sd_ctl_root[] = { |
| 5243 | { | 5275 | { |
| 5276 | .ctl_name = CTL_KERN, | ||
| 5244 | .procname = "kernel", | 5277 | .procname = "kernel", |
| 5245 | .mode = 0755, | 5278 | .mode = 0555, |
| 5246 | .child = sd_ctl_dir, | 5279 | .child = sd_ctl_dir, |
| 5247 | }, | 5280 | }, |
| 5248 | {0,}, | 5281 | {0,}, |
| @@ -5318,7 +5351,7 @@ static ctl_table *sd_alloc_ctl_cpu_table(int cpu) | |||
| 5318 | for_each_domain(cpu, sd) { | 5351 | for_each_domain(cpu, sd) { |
| 5319 | snprintf(buf, 32, "domain%d", i); | 5352 | snprintf(buf, 32, "domain%d", i); |
| 5320 | entry->procname = kstrdup(buf, GFP_KERNEL); | 5353 | entry->procname = kstrdup(buf, GFP_KERNEL); |
| 5321 | entry->mode = 0755; | 5354 | entry->mode = 0555; |
| 5322 | entry->child = sd_alloc_ctl_domain_table(sd); | 5355 | entry->child = sd_alloc_ctl_domain_table(sd); |
| 5323 | entry++; | 5356 | entry++; |
| 5324 | i++; | 5357 | i++; |
| @@ -5338,7 +5371,7 @@ static void init_sched_domain_sysctl(void) | |||
| 5338 | for (i = 0; i < cpu_num; i++, entry++) { | 5371 | for (i = 0; i < cpu_num; i++, entry++) { |
| 5339 | snprintf(buf, 32, "cpu%d", i); | 5372 | snprintf(buf, 32, "cpu%d", i); |
| 5340 | entry->procname = kstrdup(buf, GFP_KERNEL); | 5373 | entry->procname = kstrdup(buf, GFP_KERNEL); |
| 5341 | entry->mode = 0755; | 5374 | entry->mode = 0555; |
| 5342 | entry->child = sd_alloc_ctl_cpu_table(i); | 5375 | entry->child = sd_alloc_ctl_cpu_table(i); |
| 5343 | } | 5376 | } |
| 5344 | sd_sysctl_header = register_sysctl_table(sd_ctl_root); | 5377 | sd_sysctl_header = register_sysctl_table(sd_ctl_root); |
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c index 87e524762b85..c3ee38bd3426 100644 --- a/kernel/sched_debug.c +++ b/kernel/sched_debug.c | |||
| @@ -154,10 +154,11 @@ static void print_cpu(struct seq_file *m, int cpu) | |||
| 154 | P(next_balance); | 154 | P(next_balance); |
| 155 | P(curr->pid); | 155 | P(curr->pid); |
| 156 | P(clock); | 156 | P(clock); |
| 157 | P(idle_clock); | ||
| 157 | P(prev_clock_raw); | 158 | P(prev_clock_raw); |
| 158 | P(clock_warps); | 159 | P(clock_warps); |
| 159 | P(clock_overflows); | 160 | P(clock_overflows); |
| 160 | P(clock_unstable_events); | 161 | P(clock_deep_idle_events); |
| 161 | P(clock_max_delta); | 162 | P(clock_max_delta); |
| 162 | P(cpu_load[0]); | 163 | P(cpu_load[0]); |
| 163 | P(cpu_load[1]); | 164 | P(cpu_load[1]); |
| @@ -282,4 +283,5 @@ void proc_sched_set_task(struct task_struct *p) | |||
| 282 | p->se.wait_runtime_overruns = p->se.wait_runtime_underruns = 0; | 283 | p->se.wait_runtime_overruns = p->se.wait_runtime_underruns = 0; |
| 283 | #endif | 284 | #endif |
| 284 | p->se.sum_exec_runtime = 0; | 285 | p->se.sum_exec_runtime = 0; |
| 286 | p->se.prev_sum_exec_runtime = 0; | ||
| 285 | } | 287 | } |
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index fedbb51bba96..67c67a87146e 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c | |||
| @@ -15,34 +15,50 @@ | |||
| 15 | * | 15 | * |
| 16 | * Scaled math optimizations by Thomas Gleixner | 16 | * Scaled math optimizations by Thomas Gleixner |
| 17 | * Copyright (C) 2007, Thomas Gleixner <tglx@linutronix.de> | 17 | * Copyright (C) 2007, Thomas Gleixner <tglx@linutronix.de> |
| 18 | * | ||
| 19 | * Adaptive scheduling granularity, math enhancements by Peter Zijlstra | ||
| 20 | * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> | ||
| 18 | */ | 21 | */ |
| 19 | 22 | ||
| 20 | /* | 23 | /* |
| 21 | * Preemption granularity: | 24 | * Targeted preemption latency for CPU-bound tasks: |
| 22 | * (default: 2 msec, units: nanoseconds) | 25 | * (default: 20ms, units: nanoseconds) |
| 23 | * | 26 | * |
| 24 | * NOTE: this granularity value is not the same as the concept of | 27 | * NOTE: this latency value is not the same as the concept of |
| 25 | * 'timeslice length' - timeslices in CFS will typically be somewhat | 28 | * 'timeslice length' - timeslices in CFS are of variable length. |
| 26 | * larger than this value. (to see the precise effective timeslice | 29 | * (to see the precise effective timeslice length of your workload, |
| 27 | * length of your workload, run vmstat and monitor the context-switches | 30 | * run vmstat and monitor the context-switches field) |
| 28 | * field) | ||
| 29 | * | 31 | * |
| 30 | * On SMP systems the value of this is multiplied by the log2 of the | 32 | * On SMP systems the value of this is multiplied by the log2 of the |
| 31 | * number of CPUs. (i.e. factor 2x on 2-way systems, 3x on 4-way | 33 | * number of CPUs. (i.e. factor 2x on 2-way systems, 3x on 4-way |
| 32 | * systems, 4x on 8-way systems, 5x on 16-way systems, etc.) | 34 | * systems, 4x on 8-way systems, 5x on 16-way systems, etc.) |
| 35 | * Targeted preemption latency for CPU-bound tasks: | ||
| 36 | */ | ||
| 37 | unsigned int sysctl_sched_latency __read_mostly = 20000000ULL; | ||
| 38 | |||
| 39 | /* | ||
| 40 | * Minimal preemption granularity for CPU-bound tasks: | ||
| 41 | * (default: 2 msec, units: nanoseconds) | ||
| 33 | */ | 42 | */ |
| 34 | unsigned int sysctl_sched_granularity __read_mostly = 2000000000ULL/HZ; | 43 | unsigned int sysctl_sched_min_granularity __read_mostly = 2000000ULL; |
| 44 | |||
| 45 | /* | ||
| 46 | * sys_sched_yield() compat mode | ||
| 47 | * | ||
| 48 | * This option switches the agressive yield implementation of the | ||
| 49 | * old scheduler back on. | ||
| 50 | */ | ||
| 51 | unsigned int __read_mostly sysctl_sched_compat_yield; | ||
| 35 | 52 | ||
| 36 | /* | 53 | /* |
| 37 | * SCHED_BATCH wake-up granularity. | 54 | * SCHED_BATCH wake-up granularity. |
| 38 | * (default: 10 msec, units: nanoseconds) | 55 | * (default: 25 msec, units: nanoseconds) |
| 39 | * | 56 | * |
| 40 | * This option delays the preemption effects of decoupled workloads | 57 | * This option delays the preemption effects of decoupled workloads |
| 41 | * and reduces their over-scheduling. Synchronous workloads will still | 58 | * and reduces their over-scheduling. Synchronous workloads will still |
| 42 | * have immediate wakeup/sleep latencies. | 59 | * have immediate wakeup/sleep latencies. |
| 43 | */ | 60 | */ |
| 44 | unsigned int sysctl_sched_batch_wakeup_granularity __read_mostly = | 61 | unsigned int sysctl_sched_batch_wakeup_granularity __read_mostly = 25000000UL; |
| 45 | 10000000000ULL/HZ; | ||
| 46 | 62 | ||
| 47 | /* | 63 | /* |
| 48 | * SCHED_OTHER wake-up granularity. | 64 | * SCHED_OTHER wake-up granularity. |
| @@ -52,12 +68,12 @@ unsigned int sysctl_sched_batch_wakeup_granularity __read_mostly = | |||
| 52 | * and reduces their over-scheduling. Synchronous workloads will still | 68 | * and reduces their over-scheduling. Synchronous workloads will still |
| 53 | * have immediate wakeup/sleep latencies. | 69 | * have immediate wakeup/sleep latencies. |
| 54 | */ | 70 | */ |
| 55 | unsigned int sysctl_sched_wakeup_granularity __read_mostly = 1000000000ULL/HZ; | 71 | unsigned int sysctl_sched_wakeup_granularity __read_mostly = 1000000UL; |
| 56 | 72 | ||
| 57 | unsigned int sysctl_sched_stat_granularity __read_mostly; | 73 | unsigned int sysctl_sched_stat_granularity __read_mostly; |
| 58 | 74 | ||
| 59 | /* | 75 | /* |
| 60 | * Initialized in sched_init_granularity(): | 76 | * Initialized in sched_init_granularity() [to 5 times the base granularity]: |
| 61 | */ | 77 | */ |
| 62 | unsigned int sysctl_sched_runtime_limit __read_mostly; | 78 | unsigned int sysctl_sched_runtime_limit __read_mostly; |
| 63 | 79 | ||
| @@ -186,6 +202,8 @@ __enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se) | |||
| 186 | update_load_add(&cfs_rq->load, se->load.weight); | 202 | update_load_add(&cfs_rq->load, se->load.weight); |
| 187 | cfs_rq->nr_running++; | 203 | cfs_rq->nr_running++; |
| 188 | se->on_rq = 1; | 204 | se->on_rq = 1; |
| 205 | |||
| 206 | schedstat_add(cfs_rq, wait_runtime, se->wait_runtime); | ||
| 189 | } | 207 | } |
| 190 | 208 | ||
| 191 | static inline void | 209 | static inline void |
| @@ -197,6 +215,8 @@ __dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se) | |||
| 197 | update_load_sub(&cfs_rq->load, se->load.weight); | 215 | update_load_sub(&cfs_rq->load, se->load.weight); |
| 198 | cfs_rq->nr_running--; | 216 | cfs_rq->nr_running--; |
| 199 | se->on_rq = 0; | 217 | se->on_rq = 0; |
| 218 | |||
| 219 | schedstat_add(cfs_rq, wait_runtime, -se->wait_runtime); | ||
| 200 | } | 220 | } |
| 201 | 221 | ||
| 202 | static inline struct rb_node *first_fair(struct cfs_rq *cfs_rq) | 222 | static inline struct rb_node *first_fair(struct cfs_rq *cfs_rq) |
| @@ -214,6 +234,49 @@ static struct sched_entity *__pick_next_entity(struct cfs_rq *cfs_rq) | |||
| 214 | */ | 234 | */ |
| 215 | 235 | ||
| 216 | /* | 236 | /* |
| 237 | * Calculate the preemption granularity needed to schedule every | ||
| 238 | * runnable task once per sysctl_sched_latency amount of time. | ||
| 239 | * (down to a sensible low limit on granularity) | ||
| 240 | * | ||
| 241 | * For example, if there are 2 tasks running and latency is 10 msecs, | ||
| 242 | * we switch tasks every 5 msecs. If we have 3 tasks running, we have | ||
| 243 | * to switch tasks every 3.33 msecs to get a 10 msecs observed latency | ||
| 244 | * for each task. We do finer and finer scheduling up to until we | ||
| 245 | * reach the minimum granularity value. | ||
| 246 | * | ||
| 247 | * To achieve this we use the following dynamic-granularity rule: | ||
| 248 | * | ||
| 249 | * gran = lat/nr - lat/nr/nr | ||
| 250 | * | ||
| 251 | * This comes out of the following equations: | ||
| 252 | * | ||
| 253 | * kA1 + gran = kB1 | ||
| 254 | * kB2 + gran = kA2 | ||
| 255 | * kA2 = kA1 | ||
| 256 | * kB2 = kB1 - d + d/nr | ||
| 257 | * lat = d * nr | ||
| 258 | * | ||
| 259 | * Where 'k' is key, 'A' is task A (waiting), 'B' is task B (running), | ||
| 260 | * '1' is start of time, '2' is end of time, 'd' is delay between | ||
| 261 | * 1 and 2 (during which task B was running), 'nr' is number of tasks | ||
| 262 | * running, 'lat' is the the period of each task. ('lat' is the | ||
| 263 | * sched_latency that we aim for.) | ||
| 264 | */ | ||
| 265 | static long | ||
| 266 | sched_granularity(struct cfs_rq *cfs_rq) | ||
| 267 | { | ||
| 268 | unsigned int gran = sysctl_sched_latency; | ||
| 269 | unsigned int nr = cfs_rq->nr_running; | ||
| 270 | |||
| 271 | if (nr > 1) { | ||
| 272 | gran = gran/nr - gran/nr/nr; | ||
| 273 | gran = max(gran, sysctl_sched_min_granularity); | ||
| 274 | } | ||
| 275 | |||
| 276 | return gran; | ||
| 277 | } | ||
| 278 | |||
| 279 | /* | ||
| 217 | * We rescale the rescheduling granularity of tasks according to their | 280 | * We rescale the rescheduling granularity of tasks according to their |
| 218 | * nice level, but only linearly, not exponentially: | 281 | * nice level, but only linearly, not exponentially: |
| 219 | */ | 282 | */ |
| @@ -240,7 +303,7 @@ niced_granularity(struct sched_entity *curr, unsigned long granularity) | |||
| 240 | /* | 303 | /* |
| 241 | * It will always fit into 'long': | 304 | * It will always fit into 'long': |
| 242 | */ | 305 | */ |
| 243 | return (long) (tmp >> WMULT_SHIFT); | 306 | return (long) (tmp >> (WMULT_SHIFT-NICE_0_SHIFT)); |
| 244 | } | 307 | } |
| 245 | 308 | ||
| 246 | static inline void | 309 | static inline void |
| @@ -303,10 +366,10 @@ __update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr) | |||
| 303 | delta_fair = calc_delta_fair(delta_exec, lw); | 366 | delta_fair = calc_delta_fair(delta_exec, lw); |
| 304 | delta_mine = calc_delta_mine(delta_exec, curr->load.weight, lw); | 367 | delta_mine = calc_delta_mine(delta_exec, curr->load.weight, lw); |
| 305 | 368 | ||
| 306 | if (cfs_rq->sleeper_bonus > sysctl_sched_granularity) { | 369 | if (cfs_rq->sleeper_bonus > sysctl_sched_min_granularity) { |
| 307 | delta = min(cfs_rq->sleeper_bonus, (u64)delta_exec); | 370 | delta = min((u64)delta_mine, cfs_rq->sleeper_bonus); |
| 308 | delta = calc_delta_mine(delta, curr->load.weight, lw); | 371 | delta = min(delta, (unsigned long)( |
| 309 | delta = min((u64)delta, cfs_rq->sleeper_bonus); | 372 | (long)sysctl_sched_runtime_limit - curr->wait_runtime)); |
| 310 | cfs_rq->sleeper_bonus -= delta; | 373 | cfs_rq->sleeper_bonus -= delta; |
| 311 | delta_mine -= delta; | 374 | delta_mine -= delta; |
| 312 | } | 375 | } |
| @@ -438,6 +501,9 @@ update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se) | |||
| 438 | { | 501 | { |
| 439 | unsigned long delta_fair; | 502 | unsigned long delta_fair; |
| 440 | 503 | ||
| 504 | if (unlikely(!se->wait_start_fair)) | ||
| 505 | return; | ||
| 506 | |||
| 441 | delta_fair = (unsigned long)min((u64)(2*sysctl_sched_runtime_limit), | 507 | delta_fair = (unsigned long)min((u64)(2*sysctl_sched_runtime_limit), |
| 442 | (u64)(cfs_rq->fair_clock - se->wait_start_fair)); | 508 | (u64)(cfs_rq->fair_clock - se->wait_start_fair)); |
| 443 | 509 | ||
| @@ -494,6 +560,13 @@ static void __enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se) | |||
| 494 | unsigned long load = cfs_rq->load.weight, delta_fair; | 560 | unsigned long load = cfs_rq->load.weight, delta_fair; |
| 495 | long prev_runtime; | 561 | long prev_runtime; |
| 496 | 562 | ||
| 563 | /* | ||
| 564 | * Do not boost sleepers if there's too much bonus 'in flight' | ||
| 565 | * already: | ||
| 566 | */ | ||
| 567 | if (unlikely(cfs_rq->sleeper_bonus > sysctl_sched_runtime_limit)) | ||
| 568 | return; | ||
| 569 | |||
| 497 | if (sysctl_sched_features & SCHED_FEAT_SLEEPER_LOAD_AVG) | 570 | if (sysctl_sched_features & SCHED_FEAT_SLEEPER_LOAD_AVG) |
| 498 | load = rq_of(cfs_rq)->cpu_load[2]; | 571 | load = rq_of(cfs_rq)->cpu_load[2]; |
| 499 | 572 | ||
| @@ -519,10 +592,6 @@ static void __enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se) | |||
| 519 | * Track the amount of bonus we've given to sleepers: | 592 | * Track the amount of bonus we've given to sleepers: |
| 520 | */ | 593 | */ |
| 521 | cfs_rq->sleeper_bonus += delta_fair; | 594 | cfs_rq->sleeper_bonus += delta_fair; |
| 522 | if (unlikely(cfs_rq->sleeper_bonus > sysctl_sched_runtime_limit)) | ||
| 523 | cfs_rq->sleeper_bonus = sysctl_sched_runtime_limit; | ||
| 524 | |||
| 525 | schedstat_add(cfs_rq, wait_runtime, se->wait_runtime); | ||
| 526 | } | 595 | } |
| 527 | 596 | ||
| 528 | static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se) | 597 | static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se) |
| @@ -570,6 +639,16 @@ static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se) | |||
| 570 | 639 | ||
| 571 | se->block_start = 0; | 640 | se->block_start = 0; |
| 572 | se->sum_sleep_runtime += delta; | 641 | se->sum_sleep_runtime += delta; |
| 642 | |||
| 643 | /* | ||
| 644 | * Blocking time is in units of nanosecs, so shift by 20 to | ||
| 645 | * get a milliseconds-range estimation of the amount of | ||
| 646 | * time that the task spent sleeping: | ||
| 647 | */ | ||
| 648 | if (unlikely(prof_on == SLEEP_PROFILING)) { | ||
| 649 | profile_hits(SLEEP_PROFILING, (void *)get_wchan(tsk), | ||
| 650 | delta >> 20); | ||
| 651 | } | ||
| 573 | } | 652 | } |
| 574 | #endif | 653 | #endif |
| 575 | } | 654 | } |
| @@ -604,7 +683,6 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep) | |||
| 604 | if (tsk->state & TASK_UNINTERRUPTIBLE) | 683 | if (tsk->state & TASK_UNINTERRUPTIBLE) |
| 605 | se->block_start = rq_of(cfs_rq)->clock; | 684 | se->block_start = rq_of(cfs_rq)->clock; |
| 606 | } | 685 | } |
| 607 | cfs_rq->wait_runtime -= se->wait_runtime; | ||
| 608 | #endif | 686 | #endif |
| 609 | } | 687 | } |
| 610 | __dequeue_entity(cfs_rq, se); | 688 | __dequeue_entity(cfs_rq, se); |
| @@ -618,11 +696,31 @@ __check_preempt_curr_fair(struct cfs_rq *cfs_rq, struct sched_entity *se, | |||
| 618 | struct sched_entity *curr, unsigned long granularity) | 696 | struct sched_entity *curr, unsigned long granularity) |
| 619 | { | 697 | { |
| 620 | s64 __delta = curr->fair_key - se->fair_key; | 698 | s64 __delta = curr->fair_key - se->fair_key; |
| 699 | unsigned long ideal_runtime, delta_exec; | ||
| 700 | |||
| 701 | /* | ||
| 702 | * ideal_runtime is compared against sum_exec_runtime, which is | ||
| 703 | * walltime, hence do not scale. | ||
| 704 | */ | ||
| 705 | ideal_runtime = max(sysctl_sched_latency / cfs_rq->nr_running, | ||
| 706 | (unsigned long)sysctl_sched_min_granularity); | ||
| 707 | |||
| 708 | /* | ||
| 709 | * If we executed more than what the latency constraint suggests, | ||
| 710 | * reduce the rescheduling granularity. This way the total latency | ||
| 711 | * of how much a task is not scheduled converges to | ||
| 712 | * sysctl_sched_latency: | ||
| 713 | */ | ||
| 714 | delta_exec = curr->sum_exec_runtime - curr->prev_sum_exec_runtime; | ||
| 715 | if (delta_exec > ideal_runtime) | ||
| 716 | granularity = 0; | ||
| 621 | 717 | ||
| 622 | /* | 718 | /* |
| 623 | * Take scheduling granularity into account - do not | 719 | * Take scheduling granularity into account - do not |
| 624 | * preempt the current task unless the best task has | 720 | * preempt the current task unless the best task has |
| 625 | * a larger than sched_granularity fairness advantage: | 721 | * a larger than sched_granularity fairness advantage: |
| 722 | * | ||
| 723 | * scale granularity as key space is in fair_clock. | ||
| 626 | */ | 724 | */ |
| 627 | if (__delta > niced_granularity(curr, granularity)) | 725 | if (__delta > niced_granularity(curr, granularity)) |
| 628 | resched_task(rq_of(cfs_rq)->curr); | 726 | resched_task(rq_of(cfs_rq)->curr); |
| @@ -641,6 +739,7 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se) | |||
| 641 | update_stats_wait_end(cfs_rq, se); | 739 | update_stats_wait_end(cfs_rq, se); |
| 642 | update_stats_curr_start(cfs_rq, se); | 740 | update_stats_curr_start(cfs_rq, se); |
| 643 | set_cfs_rq_curr(cfs_rq, se); | 741 | set_cfs_rq_curr(cfs_rq, se); |
| 742 | se->prev_sum_exec_runtime = se->sum_exec_runtime; | ||
| 644 | } | 743 | } |
| 645 | 744 | ||
| 646 | static struct sched_entity *pick_next_entity(struct cfs_rq *cfs_rq) | 745 | static struct sched_entity *pick_next_entity(struct cfs_rq *cfs_rq) |
| @@ -686,7 +785,8 @@ static void entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr) | |||
| 686 | if (next == curr) | 785 | if (next == curr) |
| 687 | return; | 786 | return; |
| 688 | 787 | ||
| 689 | __check_preempt_curr_fair(cfs_rq, next, curr, sysctl_sched_granularity); | 788 | __check_preempt_curr_fair(cfs_rq, next, curr, |
| 789 | sched_granularity(cfs_rq)); | ||
| 690 | } | 790 | } |
| 691 | 791 | ||
| 692 | /************************************************** | 792 | /************************************************** |
| @@ -815,19 +915,62 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int sleep) | |||
| 815 | } | 915 | } |
| 816 | 916 | ||
| 817 | /* | 917 | /* |
| 818 | * sched_yield() support is very simple - we dequeue and enqueue | 918 | * sched_yield() support is very simple - we dequeue and enqueue. |
| 919 | * | ||
| 920 | * If compat_yield is turned on then we requeue to the end of the tree. | ||
| 819 | */ | 921 | */ |
| 820 | static void yield_task_fair(struct rq *rq, struct task_struct *p) | 922 | static void yield_task_fair(struct rq *rq, struct task_struct *p) |
| 821 | { | 923 | { |
| 822 | struct cfs_rq *cfs_rq = task_cfs_rq(p); | 924 | struct cfs_rq *cfs_rq = task_cfs_rq(p); |
| 925 | struct rb_node **link = &cfs_rq->tasks_timeline.rb_node; | ||
| 926 | struct sched_entity *rightmost, *se = &p->se; | ||
| 927 | struct rb_node *parent; | ||
| 823 | 928 | ||
| 824 | __update_rq_clock(rq); | ||
| 825 | /* | 929 | /* |
| 826 | * Dequeue and enqueue the task to update its | 930 | * Are we the only task in the tree? |
| 827 | * position within the tree: | 931 | */ |
| 932 | if (unlikely(cfs_rq->nr_running == 1)) | ||
| 933 | return; | ||
| 934 | |||
| 935 | if (likely(!sysctl_sched_compat_yield)) { | ||
| 936 | __update_rq_clock(rq); | ||
| 937 | /* | ||
| 938 | * Dequeue and enqueue the task to update its | ||
| 939 | * position within the tree: | ||
| 940 | */ | ||
| 941 | dequeue_entity(cfs_rq, &p->se, 0); | ||
| 942 | enqueue_entity(cfs_rq, &p->se, 0); | ||
| 943 | |||
| 944 | return; | ||
| 945 | } | ||
| 946 | /* | ||
| 947 | * Find the rightmost entry in the rbtree: | ||
| 948 | */ | ||
| 949 | do { | ||
| 950 | parent = *link; | ||
| 951 | link = &parent->rb_right; | ||
| 952 | } while (*link); | ||
| 953 | |||
| 954 | rightmost = rb_entry(parent, struct sched_entity, run_node); | ||
| 955 | /* | ||
| 956 | * Already in the rightmost position? | ||
| 957 | */ | ||
| 958 | if (unlikely(rightmost == se)) | ||
| 959 | return; | ||
| 960 | |||
| 961 | /* | ||
| 962 | * Minimally necessary key value to be last in the tree: | ||
| 828 | */ | 963 | */ |
| 829 | dequeue_entity(cfs_rq, &p->se, 0); | 964 | se->fair_key = rightmost->fair_key + 1; |
| 830 | enqueue_entity(cfs_rq, &p->se, 0); | 965 | |
| 966 | if (cfs_rq->rb_leftmost == &se->run_node) | ||
| 967 | cfs_rq->rb_leftmost = rb_next(&se->run_node); | ||
| 968 | /* | ||
| 969 | * Relink the task to the rightmost position: | ||
| 970 | */ | ||
| 971 | rb_erase(&se->run_node, &cfs_rq->tasks_timeline); | ||
| 972 | rb_link_node(&se->run_node, parent, link); | ||
| 973 | rb_insert_color(&se->run_node, &cfs_rq->tasks_timeline); | ||
| 831 | } | 974 | } |
| 832 | 975 | ||
| 833 | /* | 976 | /* |
| @@ -1020,31 +1163,32 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr) | |||
| 1020 | static void task_new_fair(struct rq *rq, struct task_struct *p) | 1163 | static void task_new_fair(struct rq *rq, struct task_struct *p) |
| 1021 | { | 1164 | { |
| 1022 | struct cfs_rq *cfs_rq = task_cfs_rq(p); | 1165 | struct cfs_rq *cfs_rq = task_cfs_rq(p); |
| 1023 | struct sched_entity *se = &p->se; | 1166 | struct sched_entity *se = &p->se, *curr = cfs_rq_curr(cfs_rq); |
| 1024 | 1167 | ||
| 1025 | sched_info_queued(p); | 1168 | sched_info_queued(p); |
| 1026 | 1169 | ||
| 1170 | update_curr(cfs_rq); | ||
| 1027 | update_stats_enqueue(cfs_rq, se); | 1171 | update_stats_enqueue(cfs_rq, se); |
| 1028 | /* | 1172 | /* |
| 1029 | * Child runs first: we let it run before the parent | 1173 | * Child runs first: we let it run before the parent |
| 1030 | * until it reschedules once. We set up the key so that | 1174 | * until it reschedules once. We set up the key so that |
| 1031 | * it will preempt the parent: | 1175 | * it will preempt the parent: |
| 1032 | */ | 1176 | */ |
| 1033 | p->se.fair_key = current->se.fair_key - | 1177 | se->fair_key = curr->fair_key - |
| 1034 | niced_granularity(&rq->curr->se, sysctl_sched_granularity) - 1; | 1178 | niced_granularity(curr, sched_granularity(cfs_rq)) - 1; |
| 1035 | /* | 1179 | /* |
| 1036 | * The first wait is dominated by the child-runs-first logic, | 1180 | * The first wait is dominated by the child-runs-first logic, |
| 1037 | * so do not credit it with that waiting time yet: | 1181 | * so do not credit it with that waiting time yet: |
| 1038 | */ | 1182 | */ |
| 1039 | if (sysctl_sched_features & SCHED_FEAT_SKIP_INITIAL) | 1183 | if (sysctl_sched_features & SCHED_FEAT_SKIP_INITIAL) |
| 1040 | p->se.wait_start_fair = 0; | 1184 | se->wait_start_fair = 0; |
| 1041 | 1185 | ||
| 1042 | /* | 1186 | /* |
| 1043 | * The statistical average of wait_runtime is about | 1187 | * The statistical average of wait_runtime is about |
| 1044 | * -granularity/2, so initialize the task with that: | 1188 | * -granularity/2, so initialize the task with that: |
| 1045 | */ | 1189 | */ |
| 1046 | if (sysctl_sched_features & SCHED_FEAT_START_DEBIT) | 1190 | if (sysctl_sched_features & SCHED_FEAT_START_DEBIT) |
| 1047 | p->se.wait_runtime = -(sysctl_sched_granularity / 2); | 1191 | se->wait_runtime = -(sched_granularity(cfs_rq) / 2); |
| 1048 | 1192 | ||
| 1049 | __enqueue_entity(cfs_rq, se); | 1193 | __enqueue_entity(cfs_rq, se); |
| 1050 | } | 1194 | } |
| @@ -1057,7 +1201,7 @@ static void task_new_fair(struct rq *rq, struct task_struct *p) | |||
| 1057 | */ | 1201 | */ |
| 1058 | static void set_curr_task_fair(struct rq *rq) | 1202 | static void set_curr_task_fair(struct rq *rq) |
| 1059 | { | 1203 | { |
| 1060 | struct sched_entity *se = &rq->curr.se; | 1204 | struct sched_entity *se = &rq->curr->se; |
| 1061 | 1205 | ||
| 1062 | for_each_sched_entity(se) | 1206 | for_each_sched_entity(se) |
| 1063 | set_next_entity(cfs_rq_of(se), se); | 1207 | set_next_entity(cfs_rq_of(se), se); |
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index dcdcad632fd9..4b87476a02d0 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c | |||
| @@ -207,10 +207,15 @@ static void task_tick_rt(struct rq *rq, struct task_struct *p) | |||
| 207 | return; | 207 | return; |
| 208 | 208 | ||
| 209 | p->time_slice = static_prio_timeslice(p->static_prio); | 209 | p->time_slice = static_prio_timeslice(p->static_prio); |
| 210 | set_tsk_need_resched(p); | ||
| 211 | 210 | ||
| 212 | /* put it at the end of the queue: */ | 211 | /* |
| 213 | requeue_task_rt(rq, p); | 212 | * Requeue to the end of queue if we are not the only element |
| 213 | * on the queue: | ||
| 214 | */ | ||
| 215 | if (p->run_list.prev != p->run_list.next) { | ||
| 216 | requeue_task_rt(rq, p); | ||
| 217 | set_tsk_need_resched(p); | ||
| 218 | } | ||
| 214 | } | 219 | } |
| 215 | 220 | ||
| 216 | static struct sched_class rt_sched_class __read_mostly = { | 221 | static struct sched_class rt_sched_class __read_mostly = { |
diff --git a/kernel/signal.c b/kernel/signal.c index b27c01a66448..792952381092 100644 --- a/kernel/signal.c +++ b/kernel/signal.c | |||
| @@ -378,8 +378,7 @@ int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info) | |||
| 378 | /* We only dequeue private signals from ourselves, we don't let | 378 | /* We only dequeue private signals from ourselves, we don't let |
| 379 | * signalfd steal them | 379 | * signalfd steal them |
| 380 | */ | 380 | */ |
| 381 | if (tsk == current) | 381 | signr = __dequeue_signal(&tsk->pending, mask, info); |
| 382 | signr = __dequeue_signal(&tsk->pending, mask, info); | ||
| 383 | if (!signr) { | 382 | if (!signr) { |
| 384 | signr = __dequeue_signal(&tsk->signal->shared_pending, | 383 | signr = __dequeue_signal(&tsk->signal->shared_pending, |
| 385 | mask, info); | 384 | mask, info); |
| @@ -407,8 +406,7 @@ int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info) | |||
| 407 | } | 406 | } |
| 408 | } | 407 | } |
| 409 | } | 408 | } |
| 410 | if (likely(tsk == current)) | 409 | recalc_sigpending(); |
| 411 | recalc_sigpending(); | ||
| 412 | if (signr && unlikely(sig_kernel_stop(signr))) { | 410 | if (signr && unlikely(sig_kernel_stop(signr))) { |
| 413 | /* | 411 | /* |
| 414 | * Set a marker that we have dequeued a stop signal. Our | 412 | * Set a marker that we have dequeued a stop signal. Our |
| @@ -425,7 +423,7 @@ int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info) | |||
| 425 | if (!(tsk->signal->flags & SIGNAL_GROUP_EXIT)) | 423 | if (!(tsk->signal->flags & SIGNAL_GROUP_EXIT)) |
| 426 | tsk->signal->flags |= SIGNAL_STOP_DEQUEUED; | 424 | tsk->signal->flags |= SIGNAL_STOP_DEQUEUED; |
| 427 | } | 425 | } |
| 428 | if ( signr && | 426 | if (signr && |
| 429 | ((info->si_code & __SI_MASK) == __SI_TIMER) && | 427 | ((info->si_code & __SI_MASK) == __SI_TIMER) && |
| 430 | info->si_sys_private){ | 428 | info->si_sys_private){ |
| 431 | /* | 429 | /* |
| @@ -533,18 +531,18 @@ static int check_kill_permission(int sig, struct siginfo *info, | |||
| 533 | if (!valid_signal(sig)) | 531 | if (!valid_signal(sig)) |
| 534 | return error; | 532 | return error; |
| 535 | 533 | ||
| 536 | error = audit_signal_info(sig, t); /* Let audit system see the signal */ | 534 | if (info == SEND_SIG_NOINFO || (!is_si_special(info) && SI_FROMUSER(info))) { |
| 537 | if (error) | 535 | error = audit_signal_info(sig, t); /* Let audit system see the signal */ |
| 538 | return error; | 536 | if (error) |
| 539 | 537 | return error; | |
| 540 | error = -EPERM; | 538 | error = -EPERM; |
| 541 | if ((info == SEND_SIG_NOINFO || (!is_si_special(info) && SI_FROMUSER(info))) | 539 | if (((sig != SIGCONT) || |
| 542 | && ((sig != SIGCONT) || | 540 | (process_session(current) != process_session(t))) |
| 543 | (process_session(current) != process_session(t))) | 541 | && (current->euid ^ t->suid) && (current->euid ^ t->uid) |
| 544 | && (current->euid ^ t->suid) && (current->euid ^ t->uid) | 542 | && (current->uid ^ t->suid) && (current->uid ^ t->uid) |
| 545 | && (current->uid ^ t->suid) && (current->uid ^ t->uid) | 543 | && !capable(CAP_KILL)) |
| 546 | && !capable(CAP_KILL)) | ||
| 547 | return error; | 544 | return error; |
| 545 | } | ||
| 548 | 546 | ||
| 549 | return security_task_kill(t, info, sig, 0); | 547 | return security_task_kill(t, info, sig, 0); |
| 550 | } | 548 | } |
| @@ -1300,20 +1298,19 @@ struct sigqueue *sigqueue_alloc(void) | |||
| 1300 | void sigqueue_free(struct sigqueue *q) | 1298 | void sigqueue_free(struct sigqueue *q) |
| 1301 | { | 1299 | { |
| 1302 | unsigned long flags; | 1300 | unsigned long flags; |
| 1301 | spinlock_t *lock = ¤t->sighand->siglock; | ||
| 1302 | |||
| 1303 | BUG_ON(!(q->flags & SIGQUEUE_PREALLOC)); | 1303 | BUG_ON(!(q->flags & SIGQUEUE_PREALLOC)); |
| 1304 | /* | 1304 | /* |
| 1305 | * If the signal is still pending remove it from the | 1305 | * If the signal is still pending remove it from the |
| 1306 | * pending queue. | 1306 | * pending queue. We must hold ->siglock while testing |
| 1307 | * q->list to serialize with collect_signal(). | ||
| 1307 | */ | 1308 | */ |
| 1308 | if (unlikely(!list_empty(&q->list))) { | 1309 | spin_lock_irqsave(lock, flags); |
| 1309 | spinlock_t *lock = ¤t->sighand->siglock; | 1310 | if (!list_empty(&q->list)) |
| 1310 | read_lock(&tasklist_lock); | 1311 | list_del_init(&q->list); |
| 1311 | spin_lock_irqsave(lock, flags); | 1312 | spin_unlock_irqrestore(lock, flags); |
| 1312 | if (!list_empty(&q->list)) | 1313 | |
| 1313 | list_del_init(&q->list); | ||
| 1314 | spin_unlock_irqrestore(lock, flags); | ||
| 1315 | read_unlock(&tasklist_lock); | ||
| 1316 | } | ||
| 1317 | q->flags &= ~SIGQUEUE_PREALLOC; | 1314 | q->flags &= ~SIGQUEUE_PREALLOC; |
| 1318 | __sigqueue_free(q); | 1315 | __sigqueue_free(q); |
| 1319 | } | 1316 | } |
diff --git a/kernel/softirq.c b/kernel/softirq.c index 0f546ddea43d..bd89bc4eb0b9 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c | |||
| @@ -271,8 +271,6 @@ asmlinkage void do_softirq(void) | |||
| 271 | local_irq_restore(flags); | 271 | local_irq_restore(flags); |
| 272 | } | 272 | } |
| 273 | 273 | ||
| 274 | EXPORT_SYMBOL(do_softirq); | ||
| 275 | |||
| 276 | #endif | 274 | #endif |
| 277 | 275 | ||
| 278 | /* | 276 | /* |
| @@ -332,8 +330,6 @@ inline fastcall void raise_softirq_irqoff(unsigned int nr) | |||
| 332 | wakeup_softirqd(); | 330 | wakeup_softirqd(); |
| 333 | } | 331 | } |
| 334 | 332 | ||
| 335 | EXPORT_SYMBOL(raise_softirq_irqoff); | ||
| 336 | |||
| 337 | void fastcall raise_softirq(unsigned int nr) | 333 | void fastcall raise_softirq(unsigned int nr) |
| 338 | { | 334 | { |
| 339 | unsigned long flags; | 335 | unsigned long flags; |
diff --git a/kernel/sys.c b/kernel/sys.c index 449b81b98b3d..8ae2e636eb1b 100644 --- a/kernel/sys.c +++ b/kernel/sys.c | |||
| @@ -32,6 +32,7 @@ | |||
| 32 | #include <linux/getcpu.h> | 32 | #include <linux/getcpu.h> |
| 33 | #include <linux/task_io_accounting_ops.h> | 33 | #include <linux/task_io_accounting_ops.h> |
| 34 | #include <linux/seccomp.h> | 34 | #include <linux/seccomp.h> |
| 35 | #include <linux/cpu.h> | ||
| 35 | 36 | ||
| 36 | #include <linux/compat.h> | 37 | #include <linux/compat.h> |
| 37 | #include <linux/syscalls.h> | 38 | #include <linux/syscalls.h> |
| @@ -878,6 +879,7 @@ void kernel_power_off(void) | |||
| 878 | kernel_shutdown_prepare(SYSTEM_POWER_OFF); | 879 | kernel_shutdown_prepare(SYSTEM_POWER_OFF); |
| 879 | if (pm_power_off_prepare) | 880 | if (pm_power_off_prepare) |
| 880 | pm_power_off_prepare(); | 881 | pm_power_off_prepare(); |
| 882 | disable_nonboot_cpus(); | ||
| 881 | sysdev_shutdown(); | 883 | sysdev_shutdown(); |
| 882 | printk(KERN_EMERG "Power down.\n"); | 884 | printk(KERN_EMERG "Power down.\n"); |
| 883 | machine_power_off(); | 885 | machine_power_off(); |
| @@ -1442,7 +1444,6 @@ asmlinkage long sys_times(struct tms __user * tbuf) | |||
| 1442 | * Auch. Had to add the 'did_exec' flag to conform completely to POSIX. | 1444 | * Auch. Had to add the 'did_exec' flag to conform completely to POSIX. |
| 1443 | * LBT 04.03.94 | 1445 | * LBT 04.03.94 |
| 1444 | */ | 1446 | */ |
| 1445 | |||
| 1446 | asmlinkage long sys_setpgid(pid_t pid, pid_t pgid) | 1447 | asmlinkage long sys_setpgid(pid_t pid, pid_t pgid) |
| 1447 | { | 1448 | { |
| 1448 | struct task_struct *p; | 1449 | struct task_struct *p; |
| @@ -1470,7 +1471,7 @@ asmlinkage long sys_setpgid(pid_t pid, pid_t pgid) | |||
| 1470 | if (!thread_group_leader(p)) | 1471 | if (!thread_group_leader(p)) |
| 1471 | goto out; | 1472 | goto out; |
| 1472 | 1473 | ||
| 1473 | if (p->real_parent == group_leader) { | 1474 | if (p->real_parent->tgid == group_leader->tgid) { |
| 1474 | err = -EPERM; | 1475 | err = -EPERM; |
| 1475 | if (task_session(p) != task_session(group_leader)) | 1476 | if (task_session(p) != task_session(group_leader)) |
| 1476 | goto out; | 1477 | goto out; |
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 8bdb8c07e04f..6c97259e863e 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
| @@ -27,7 +27,6 @@ | |||
| 27 | #include <linux/capability.h> | 27 | #include <linux/capability.h> |
| 28 | #include <linux/ctype.h> | 28 | #include <linux/ctype.h> |
| 29 | #include <linux/utsname.h> | 29 | #include <linux/utsname.h> |
| 30 | #include <linux/capability.h> | ||
| 31 | #include <linux/smp_lock.h> | 30 | #include <linux/smp_lock.h> |
| 32 | #include <linux/fs.h> | 31 | #include <linux/fs.h> |
| 33 | #include <linux/init.h> | 32 | #include <linux/init.h> |
| @@ -223,8 +222,19 @@ static ctl_table kern_table[] = { | |||
| 223 | #ifdef CONFIG_SCHED_DEBUG | 222 | #ifdef CONFIG_SCHED_DEBUG |
| 224 | { | 223 | { |
| 225 | .ctl_name = CTL_UNNUMBERED, | 224 | .ctl_name = CTL_UNNUMBERED, |
| 226 | .procname = "sched_granularity_ns", | 225 | .procname = "sched_min_granularity_ns", |
| 227 | .data = &sysctl_sched_granularity, | 226 | .data = &sysctl_sched_min_granularity, |
| 227 | .maxlen = sizeof(unsigned int), | ||
| 228 | .mode = 0644, | ||
| 229 | .proc_handler = &proc_dointvec_minmax, | ||
| 230 | .strategy = &sysctl_intvec, | ||
| 231 | .extra1 = &min_sched_granularity_ns, | ||
| 232 | .extra2 = &max_sched_granularity_ns, | ||
| 233 | }, | ||
| 234 | { | ||
| 235 | .ctl_name = CTL_UNNUMBERED, | ||
| 236 | .procname = "sched_latency_ns", | ||
| 237 | .data = &sysctl_sched_latency, | ||
| 228 | .maxlen = sizeof(unsigned int), | 238 | .maxlen = sizeof(unsigned int), |
| 229 | .mode = 0644, | 239 | .mode = 0644, |
| 230 | .proc_handler = &proc_dointvec_minmax, | 240 | .proc_handler = &proc_dointvec_minmax, |
| @@ -284,6 +294,23 @@ static ctl_table kern_table[] = { | |||
| 284 | .mode = 0644, | 294 | .mode = 0644, |
| 285 | .proc_handler = &proc_dointvec, | 295 | .proc_handler = &proc_dointvec, |
| 286 | }, | 296 | }, |
| 297 | { | ||
| 298 | .ctl_name = CTL_UNNUMBERED, | ||
| 299 | .procname = "sched_features", | ||
| 300 | .data = &sysctl_sched_features, | ||
| 301 | .maxlen = sizeof(unsigned int), | ||
| 302 | .mode = 0644, | ||
| 303 | .proc_handler = &proc_dointvec, | ||
| 304 | }, | ||
| 305 | #endif | ||
| 306 | { | ||
| 307 | .ctl_name = CTL_UNNUMBERED, | ||
| 308 | .procname = "sched_compat_yield", | ||
| 309 | .data = &sysctl_sched_compat_yield, | ||
| 310 | .maxlen = sizeof(unsigned int), | ||
| 311 | .mode = 0644, | ||
| 312 | .proc_handler = &proc_dointvec, | ||
| 313 | }, | ||
| 287 | #ifdef CONFIG_PROVE_LOCKING | 314 | #ifdef CONFIG_PROVE_LOCKING |
| 288 | { | 315 | { |
| 289 | .ctl_name = CTL_UNNUMBERED, | 316 | .ctl_name = CTL_UNNUMBERED, |
| @@ -305,15 +332,6 @@ static ctl_table kern_table[] = { | |||
| 305 | }, | 332 | }, |
| 306 | #endif | 333 | #endif |
| 307 | { | 334 | { |
| 308 | .ctl_name = CTL_UNNUMBERED, | ||
| 309 | .procname = "sched_features", | ||
| 310 | .data = &sysctl_sched_features, | ||
| 311 | .maxlen = sizeof(unsigned int), | ||
| 312 | .mode = 0644, | ||
| 313 | .proc_handler = &proc_dointvec, | ||
| 314 | }, | ||
| 315 | #endif | ||
| 316 | { | ||
| 317 | .ctl_name = KERN_PANIC, | 335 | .ctl_name = KERN_PANIC, |
| 318 | .procname = "panic", | 336 | .procname = "panic", |
| 319 | .data = &panic_timeout, | 337 | .data = &panic_timeout, |
| @@ -1035,7 +1053,7 @@ static ctl_table vm_table[] = { | |||
| 1035 | .strategy = &sysctl_string, | 1053 | .strategy = &sysctl_string, |
| 1036 | }, | 1054 | }, |
| 1037 | #endif | 1055 | #endif |
| 1038 | #if defined(CONFIG_X86_32) || \ | 1056 | #if (defined(CONFIG_X86_32) && !defined(CONFIG_UML))|| \ |
| 1039 | (defined(CONFIG_SUPERH) && defined(CONFIG_VSYSCALL)) | 1057 | (defined(CONFIG_SUPERH) && defined(CONFIG_VSYSCALL)) |
| 1040 | { | 1058 | { |
| 1041 | .ctl_name = VM_VDSO_ENABLED, | 1059 | .ctl_name = VM_VDSO_ENABLED, |
| @@ -1203,7 +1221,7 @@ static ctl_table fs_table[] = { | |||
| 1203 | }; | 1221 | }; |
| 1204 | 1222 | ||
| 1205 | static ctl_table debug_table[] = { | 1223 | static ctl_table debug_table[] = { |
| 1206 | #ifdef CONFIG_X86 | 1224 | #if defined(CONFIG_X86) || defined(CONFIG_PPC) |
| 1207 | { | 1225 | { |
| 1208 | .ctl_name = CTL_UNNUMBERED, | 1226 | .ctl_name = CTL_UNNUMBERED, |
| 1209 | .procname = "exception-trace", | 1227 | .procname = "exception-trace", |
diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig index f66351126544..8d53106a0a92 100644 --- a/kernel/time/Kconfig +++ b/kernel/time/Kconfig | |||
| @@ -23,3 +23,8 @@ config HIGH_RES_TIMERS | |||
| 23 | hardware is not capable then this option only increases | 23 | hardware is not capable then this option only increases |
| 24 | the size of the kernel image. | 24 | the size of the kernel image. |
| 25 | 25 | ||
| 26 | config GENERIC_CLOCKEVENTS_BUILD | ||
| 27 | bool | ||
| 28 | default y | ||
| 29 | depends on GENERIC_CLOCKEVENTS || GENERIC_CLOCKEVENTS_MIGR | ||
| 30 | |||
diff --git a/kernel/time/Makefile b/kernel/time/Makefile index 99b6034fc86b..905b0b50792d 100644 --- a/kernel/time/Makefile +++ b/kernel/time/Makefile | |||
| @@ -1,6 +1,6 @@ | |||
| 1 | obj-y += timekeeping.o ntp.o clocksource.o jiffies.o timer_list.o | 1 | obj-y += timekeeping.o ntp.o clocksource.o jiffies.o timer_list.o |
| 2 | 2 | ||
| 3 | obj-$(CONFIG_GENERIC_CLOCKEVENTS) += clockevents.o | 3 | obj-$(CONFIG_GENERIC_CLOCKEVENTS_BUILD) += clockevents.o |
| 4 | obj-$(CONFIG_GENERIC_CLOCKEVENTS) += tick-common.o | 4 | obj-$(CONFIG_GENERIC_CLOCKEVENTS) += tick-common.o |
| 5 | obj-$(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) += tick-broadcast.o | 5 | obj-$(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) += tick-broadcast.o |
| 6 | obj-$(CONFIG_TICK_ONESHOT) += tick-oneshot.o | 6 | obj-$(CONFIG_TICK_ONESHOT) += tick-oneshot.o |
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index 41dd3105ce7f..822beebe664a 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c | |||
| @@ -194,6 +194,7 @@ void clockevents_exchange_device(struct clock_event_device *old, | |||
| 194 | local_irq_restore(flags); | 194 | local_irq_restore(flags); |
| 195 | } | 195 | } |
| 196 | 196 | ||
| 197 | #ifdef CONFIG_GENERIC_CLOCKEVENTS | ||
| 197 | /** | 198 | /** |
| 198 | * clockevents_notify - notification about relevant events | 199 | * clockevents_notify - notification about relevant events |
| 199 | */ | 200 | */ |
| @@ -222,4 +223,4 @@ void clockevents_notify(unsigned long reason, void *arg) | |||
| 222 | spin_unlock(&clockevents_lock); | 223 | spin_unlock(&clockevents_lock); |
| 223 | } | 224 | } |
| 224 | EXPORT_SYMBOL_GPL(clockevents_notify); | 225 | EXPORT_SYMBOL_GPL(clockevents_notify); |
| 225 | 226 | #endif | |
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index cd91237dbfe3..de6a2d6b3ebb 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c | |||
| @@ -226,7 +226,7 @@ static void sync_cmos_clock(unsigned long dummy) | |||
| 226 | 226 | ||
| 227 | static void notify_cmos_timer(void) | 227 | static void notify_cmos_timer(void) |
| 228 | { | 228 | { |
| 229 | if (no_sync_cmos_clock) | 229 | if (!no_sync_cmos_clock) |
| 230 | mod_timer(&sync_cmos_timer, jiffies + 1); | 230 | mod_timer(&sync_cmos_timer, jiffies + 1); |
| 231 | } | 231 | } |
| 232 | 232 | ||
diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index db8e0f3d409b..fc3fc79b3d59 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c | |||
| @@ -64,8 +64,9 @@ static void tick_broadcast_start_periodic(struct clock_event_device *bc) | |||
| 64 | */ | 64 | */ |
| 65 | int tick_check_broadcast_device(struct clock_event_device *dev) | 65 | int tick_check_broadcast_device(struct clock_event_device *dev) |
| 66 | { | 66 | { |
| 67 | if (tick_broadcast_device.evtdev || | 67 | if ((tick_broadcast_device.evtdev && |
| 68 | (dev->features & CLOCK_EVT_FEAT_C3STOP)) | 68 | tick_broadcast_device.evtdev->rating >= dev->rating) || |
| 69 | (dev->features & CLOCK_EVT_FEAT_C3STOP)) | ||
| 69 | return 0; | 70 | return 0; |
| 70 | 71 | ||
| 71 | clockevents_exchange_device(NULL, dev); | 72 | clockevents_exchange_device(NULL, dev); |
| @@ -176,8 +177,6 @@ static void tick_do_periodic_broadcast(void) | |||
| 176 | */ | 177 | */ |
| 177 | static void tick_handle_periodic_broadcast(struct clock_event_device *dev) | 178 | static void tick_handle_periodic_broadcast(struct clock_event_device *dev) |
| 178 | { | 179 | { |
| 179 | dev->next_event.tv64 = KTIME_MAX; | ||
| 180 | |||
| 181 | tick_do_periodic_broadcast(); | 180 | tick_do_periodic_broadcast(); |
| 182 | 181 | ||
| 183 | /* | 182 | /* |
| @@ -218,26 +217,43 @@ static void tick_do_broadcast_on_off(void *why) | |||
| 218 | bc = tick_broadcast_device.evtdev; | 217 | bc = tick_broadcast_device.evtdev; |
| 219 | 218 | ||
| 220 | /* | 219 | /* |
| 221 | * Is the device in broadcast mode forever or is it not | 220 | * Is the device not affected by the powerstate ? |
| 222 | * affected by the powerstate ? | ||
| 223 | */ | 221 | */ |
| 224 | if (!dev || !tick_device_is_functional(dev) || | 222 | if (!dev || !(dev->features & CLOCK_EVT_FEAT_C3STOP)) |
| 225 | !(dev->features & CLOCK_EVT_FEAT_C3STOP)) | ||
| 226 | goto out; | 223 | goto out; |
| 227 | 224 | ||
| 228 | if (*reason == CLOCK_EVT_NOTIFY_BROADCAST_ON) { | 225 | /* |
| 226 | * Defect device ? | ||
| 227 | */ | ||
| 228 | if (!tick_device_is_functional(dev)) { | ||
| 229 | /* | ||
| 230 | * AMD C1E wreckage fixup: | ||
| 231 | * | ||
| 232 | * Device was registered functional in the first | ||
| 233 | * place. Now the secondary CPU detected the C1E | ||
| 234 | * misfeature and notifies us to fix it up | ||
| 235 | */ | ||
| 236 | if (*reason != CLOCK_EVT_NOTIFY_BROADCAST_FORCE) | ||
| 237 | goto out; | ||
| 238 | } | ||
| 239 | |||
| 240 | switch (*reason) { | ||
| 241 | case CLOCK_EVT_NOTIFY_BROADCAST_ON: | ||
| 242 | case CLOCK_EVT_NOTIFY_BROADCAST_FORCE: | ||
| 229 | if (!cpu_isset(cpu, tick_broadcast_mask)) { | 243 | if (!cpu_isset(cpu, tick_broadcast_mask)) { |
| 230 | cpu_set(cpu, tick_broadcast_mask); | 244 | cpu_set(cpu, tick_broadcast_mask); |
| 231 | if (td->mode == TICKDEV_MODE_PERIODIC) | 245 | if (td->mode == TICKDEV_MODE_PERIODIC) |
| 232 | clockevents_set_mode(dev, | 246 | clockevents_set_mode(dev, |
| 233 | CLOCK_EVT_MODE_SHUTDOWN); | 247 | CLOCK_EVT_MODE_SHUTDOWN); |
| 234 | } | 248 | } |
| 235 | } else { | 249 | break; |
| 250 | case CLOCK_EVT_NOTIFY_BROADCAST_OFF: | ||
| 236 | if (cpu_isset(cpu, tick_broadcast_mask)) { | 251 | if (cpu_isset(cpu, tick_broadcast_mask)) { |
| 237 | cpu_clear(cpu, tick_broadcast_mask); | 252 | cpu_clear(cpu, tick_broadcast_mask); |
| 238 | if (td->mode == TICKDEV_MODE_PERIODIC) | 253 | if (td->mode == TICKDEV_MODE_PERIODIC) |
| 239 | tick_setup_periodic(dev, 0); | 254 | tick_setup_periodic(dev, 0); |
| 240 | } | 255 | } |
| 256 | break; | ||
| 241 | } | 257 | } |
| 242 | 258 | ||
| 243 | if (cpus_empty(tick_broadcast_mask)) | 259 | if (cpus_empty(tick_broadcast_mask)) |
| @@ -383,11 +399,7 @@ static int tick_broadcast_set_event(ktime_t expires, int force) | |||
| 383 | int tick_resume_broadcast_oneshot(struct clock_event_device *bc) | 399 | int tick_resume_broadcast_oneshot(struct clock_event_device *bc) |
| 384 | { | 400 | { |
| 385 | clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT); | 401 | clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT); |
| 386 | 402 | return 0; | |
| 387 | if(!cpus_empty(tick_broadcast_oneshot_mask)) | ||
| 388 | tick_broadcast_set_event(ktime_get(), 1); | ||
| 389 | |||
| 390 | return cpu_isset(smp_processor_id(), tick_broadcast_oneshot_mask); | ||
| 391 | } | 403 | } |
| 392 | 404 | ||
| 393 | /* | 405 | /* |
| @@ -519,11 +531,9 @@ static void tick_broadcast_clear_oneshot(int cpu) | |||
| 519 | */ | 531 | */ |
| 520 | void tick_broadcast_setup_oneshot(struct clock_event_device *bc) | 532 | void tick_broadcast_setup_oneshot(struct clock_event_device *bc) |
| 521 | { | 533 | { |
| 522 | if (bc->mode != CLOCK_EVT_MODE_ONESHOT) { | 534 | bc->event_handler = tick_handle_oneshot_broadcast; |
| 523 | bc->event_handler = tick_handle_oneshot_broadcast; | 535 | clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT); |
| 524 | clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT); | 536 | bc->next_event.tv64 = KTIME_MAX; |
| 525 | bc->next_event.tv64 = KTIME_MAX; | ||
| 526 | } | ||
| 527 | } | 537 | } |
| 528 | 538 | ||
| 529 | /* | 539 | /* |
| @@ -549,20 +559,17 @@ void tick_broadcast_switch_to_oneshot(void) | |||
| 549 | */ | 559 | */ |
| 550 | void tick_shutdown_broadcast_oneshot(unsigned int *cpup) | 560 | void tick_shutdown_broadcast_oneshot(unsigned int *cpup) |
| 551 | { | 561 | { |
| 552 | struct clock_event_device *bc; | ||
| 553 | unsigned long flags; | 562 | unsigned long flags; |
| 554 | unsigned int cpu = *cpup; | 563 | unsigned int cpu = *cpup; |
| 555 | 564 | ||
| 556 | spin_lock_irqsave(&tick_broadcast_lock, flags); | 565 | spin_lock_irqsave(&tick_broadcast_lock, flags); |
| 557 | 566 | ||
| 558 | bc = tick_broadcast_device.evtdev; | 567 | /* |
| 568 | * Clear the broadcast mask flag for the dead cpu, but do not | ||
| 569 | * stop the broadcast device! | ||
| 570 | */ | ||
| 559 | cpu_clear(cpu, tick_broadcast_oneshot_mask); | 571 | cpu_clear(cpu, tick_broadcast_oneshot_mask); |
| 560 | 572 | ||
| 561 | if (tick_broadcast_device.mode == TICKDEV_MODE_ONESHOT) { | ||
| 562 | if (bc && cpus_empty(tick_broadcast_oneshot_mask)) | ||
| 563 | clockevents_set_mode(bc, CLOCK_EVT_MODE_SHUTDOWN); | ||
| 564 | } | ||
| 565 | |||
| 566 | spin_unlock_irqrestore(&tick_broadcast_lock, flags); | 573 | spin_unlock_irqrestore(&tick_broadcast_lock, flags); |
| 567 | } | 574 | } |
| 568 | 575 | ||
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index 77a21abc8716..1bea399a9ef0 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c | |||
| @@ -200,7 +200,7 @@ static int tick_check_new_device(struct clock_event_device *newdev) | |||
| 200 | 200 | ||
| 201 | cpu = smp_processor_id(); | 201 | cpu = smp_processor_id(); |
| 202 | if (!cpu_isset(cpu, newdev->cpumask)) | 202 | if (!cpu_isset(cpu, newdev->cpumask)) |
| 203 | goto out; | 203 | goto out_bc; |
| 204 | 204 | ||
| 205 | td = &per_cpu(tick_cpu_device, cpu); | 205 | td = &per_cpu(tick_cpu_device, cpu); |
| 206 | curdev = td->evtdev; | 206 | curdev = td->evtdev; |
| @@ -265,7 +265,7 @@ out_bc: | |||
| 265 | */ | 265 | */ |
| 266 | if (tick_check_broadcast_device(newdev)) | 266 | if (tick_check_broadcast_device(newdev)) |
| 267 | ret = NOTIFY_STOP; | 267 | ret = NOTIFY_STOP; |
| 268 | out: | 268 | |
| 269 | spin_unlock_irqrestore(&tick_device_lock, flags); | 269 | spin_unlock_irqrestore(&tick_device_lock, flags); |
| 270 | 270 | ||
| 271 | return ret; | 271 | return ret; |
| @@ -345,6 +345,7 @@ static int tick_notify(struct notifier_block *nb, unsigned long reason, | |||
| 345 | 345 | ||
| 346 | case CLOCK_EVT_NOTIFY_BROADCAST_ON: | 346 | case CLOCK_EVT_NOTIFY_BROADCAST_ON: |
| 347 | case CLOCK_EVT_NOTIFY_BROADCAST_OFF: | 347 | case CLOCK_EVT_NOTIFY_BROADCAST_OFF: |
| 348 | case CLOCK_EVT_NOTIFY_BROADCAST_FORCE: | ||
| 348 | tick_broadcast_on_off(reason, dev); | 349 | tick_broadcast_on_off(reason, dev); |
| 349 | break; | 350 | break; |
| 350 | 351 | ||
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index b416995b9757..8c3fef1db09c 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c | |||
| @@ -160,6 +160,18 @@ void tick_nohz_stop_sched_tick(void) | |||
| 160 | cpu = smp_processor_id(); | 160 | cpu = smp_processor_id(); |
| 161 | ts = &per_cpu(tick_cpu_sched, cpu); | 161 | ts = &per_cpu(tick_cpu_sched, cpu); |
| 162 | 162 | ||
| 163 | /* | ||
| 164 | * If this cpu is offline and it is the one which updates | ||
| 165 | * jiffies, then give up the assignment and let it be taken by | ||
| 166 | * the cpu which runs the tick timer next. If we don't drop | ||
| 167 | * this here the jiffies might be stale and do_timer() never | ||
| 168 | * invoked. | ||
| 169 | */ | ||
| 170 | if (unlikely(!cpu_online(cpu))) { | ||
| 171 | if (cpu == tick_do_timer_cpu) | ||
| 172 | tick_do_timer_cpu = -1; | ||
| 173 | } | ||
| 174 | |||
| 163 | if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE)) | 175 | if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE)) |
| 164 | goto end; | 176 | goto end; |
| 165 | 177 | ||
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index acc417b5a9b7..4ad79f6bdec6 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c | |||
| @@ -217,6 +217,7 @@ static void change_clocksource(void) | |||
| 217 | } | 217 | } |
| 218 | #else | 218 | #else |
| 219 | static inline void change_clocksource(void) { } | 219 | static inline void change_clocksource(void) { } |
| 220 | static inline s64 __get_nsec_offset(void) { return 0; } | ||
| 220 | #endif | 221 | #endif |
| 221 | 222 | ||
| 222 | /** | 223 | /** |
| @@ -280,6 +281,8 @@ void __init timekeeping_init(void) | |||
| 280 | static int timekeeping_suspended; | 281 | static int timekeeping_suspended; |
| 281 | /* time in seconds when suspend began */ | 282 | /* time in seconds when suspend began */ |
| 282 | static unsigned long timekeeping_suspend_time; | 283 | static unsigned long timekeeping_suspend_time; |
| 284 | /* xtime offset when we went into suspend */ | ||
| 285 | static s64 timekeeping_suspend_nsecs; | ||
| 283 | 286 | ||
| 284 | /** | 287 | /** |
| 285 | * timekeeping_resume - Resumes the generic timekeeping subsystem. | 288 | * timekeeping_resume - Resumes the generic timekeeping subsystem. |
| @@ -305,6 +308,8 @@ static int timekeeping_resume(struct sys_device *dev) | |||
| 305 | wall_to_monotonic.tv_sec -= sleep_length; | 308 | wall_to_monotonic.tv_sec -= sleep_length; |
| 306 | total_sleep_time += sleep_length; | 309 | total_sleep_time += sleep_length; |
| 307 | } | 310 | } |
| 311 | /* Make sure that we have the correct xtime reference */ | ||
| 312 | timespec_add_ns(&xtime, timekeeping_suspend_nsecs); | ||
| 308 | /* re-base the last cycle value */ | 313 | /* re-base the last cycle value */ |
| 309 | clock->cycle_last = clocksource_read(clock); | 314 | clock->cycle_last = clocksource_read(clock); |
| 310 | clock->error = 0; | 315 | clock->error = 0; |
| @@ -325,9 +330,12 @@ static int timekeeping_suspend(struct sys_device *dev, pm_message_t state) | |||
| 325 | { | 330 | { |
| 326 | unsigned long flags; | 331 | unsigned long flags; |
| 327 | 332 | ||
| 333 | timekeeping_suspend_time = read_persistent_clock(); | ||
| 334 | |||
| 328 | write_seqlock_irqsave(&xtime_lock, flags); | 335 | write_seqlock_irqsave(&xtime_lock, flags); |
| 336 | /* Get the current xtime offset */ | ||
| 337 | timekeeping_suspend_nsecs = __get_nsec_offset(); | ||
| 329 | timekeeping_suspended = 1; | 338 | timekeeping_suspended = 1; |
| 330 | timekeeping_suspend_time = read_persistent_clock(); | ||
| 331 | write_sequnlock_irqrestore(&xtime_lock, flags); | 339 | write_sequnlock_irqrestore(&xtime_lock, flags); |
| 332 | 340 | ||
| 333 | clockevents_notify(CLOCK_EVT_NOTIFY_SUSPEND, NULL); | 341 | clockevents_notify(CLOCK_EVT_NOTIFY_SUSPEND, NULL); |
diff --git a/kernel/time/timer_stats.c b/kernel/time/timer_stats.c index 3c38fb5eae1b..c36bb7ed0301 100644 --- a/kernel/time/timer_stats.c +++ b/kernel/time/timer_stats.c | |||
| @@ -327,8 +327,9 @@ static int tstats_show(struct seq_file *m, void *v) | |||
| 327 | ms = 1; | 327 | ms = 1; |
| 328 | 328 | ||
| 329 | if (events && period.tv_sec) | 329 | if (events && period.tv_sec) |
| 330 | seq_printf(m, "%ld total events, %ld.%ld events/sec\n", events, | 330 | seq_printf(m, "%ld total events, %ld.%03ld events/sec\n", |
| 331 | events / period.tv_sec, events * 1000 / ms); | 331 | events, events * 1000 / ms, |
| 332 | (events * 1000000 / ms) % 1000); | ||
| 332 | else | 333 | else |
| 333 | seq_printf(m, "%ld total events\n", events); | 334 | seq_printf(m, "%ld total events\n", events); |
| 334 | 335 | ||
diff --git a/kernel/user.c b/kernel/user.c index e7d11cef6998..9ca2848fc356 100644 --- a/kernel/user.c +++ b/kernel/user.c | |||
| @@ -55,25 +55,22 @@ struct user_struct root_user = { | |||
| 55 | /* | 55 | /* |
| 56 | * These routines must be called with the uidhash spinlock held! | 56 | * These routines must be called with the uidhash spinlock held! |
| 57 | */ | 57 | */ |
| 58 | static inline void uid_hash_insert(struct user_struct *up, struct list_head *hashent) | 58 | static inline void uid_hash_insert(struct user_struct *up, struct hlist_head *hashent) |
| 59 | { | 59 | { |
| 60 | list_add(&up->uidhash_list, hashent); | 60 | hlist_add_head(&up->uidhash_node, hashent); |
| 61 | } | 61 | } |
| 62 | 62 | ||
| 63 | static inline void uid_hash_remove(struct user_struct *up) | 63 | static inline void uid_hash_remove(struct user_struct *up) |
| 64 | { | 64 | { |
| 65 | list_del(&up->uidhash_list); | 65 | hlist_del_init(&up->uidhash_node); |
| 66 | } | 66 | } |
| 67 | 67 | ||
| 68 | static inline struct user_struct *uid_hash_find(uid_t uid, struct list_head *hashent) | 68 | static inline struct user_struct *uid_hash_find(uid_t uid, struct hlist_head *hashent) |
| 69 | { | 69 | { |
| 70 | struct list_head *up; | 70 | struct user_struct *user; |
| 71 | 71 | struct hlist_node *h; | |
| 72 | list_for_each(up, hashent) { | ||
| 73 | struct user_struct *user; | ||
| 74 | |||
| 75 | user = list_entry(up, struct user_struct, uidhash_list); | ||
| 76 | 72 | ||
| 73 | hlist_for_each_entry(user, h, hashent, uidhash_node) { | ||
| 77 | if(user->uid == uid) { | 74 | if(user->uid == uid) { |
| 78 | atomic_inc(&user->__count); | 75 | atomic_inc(&user->__count); |
| 79 | return user; | 76 | return user; |
| @@ -122,7 +119,7 @@ void free_uid(struct user_struct *up) | |||
| 122 | 119 | ||
| 123 | struct user_struct * alloc_uid(struct user_namespace *ns, uid_t uid) | 120 | struct user_struct * alloc_uid(struct user_namespace *ns, uid_t uid) |
| 124 | { | 121 | { |
| 125 | struct list_head *hashent = uidhashentry(ns, uid); | 122 | struct hlist_head *hashent = uidhashentry(ns, uid); |
| 126 | struct user_struct *up; | 123 | struct user_struct *up; |
| 127 | 124 | ||
| 128 | spin_lock_irq(&uidhash_lock); | 125 | spin_lock_irq(&uidhash_lock); |
| @@ -202,6 +199,30 @@ void switch_uid(struct user_struct *new_user) | |||
| 202 | suid_keys(current); | 199 | suid_keys(current); |
| 203 | } | 200 | } |
| 204 | 201 | ||
| 202 | void release_uids(struct user_namespace *ns) | ||
| 203 | { | ||
| 204 | int i; | ||
| 205 | unsigned long flags; | ||
| 206 | struct hlist_head *head; | ||
| 207 | struct hlist_node *nd; | ||
| 208 | |||
| 209 | spin_lock_irqsave(&uidhash_lock, flags); | ||
| 210 | /* | ||
| 211 | * collapse the chains so that the user_struct-s will | ||
| 212 | * be still alive, but not in hashes. subsequent free_uid() | ||
| 213 | * will free them. | ||
| 214 | */ | ||
| 215 | for (i = 0; i < UIDHASH_SZ; i++) { | ||
| 216 | head = ns->uidhash_table + i; | ||
| 217 | while (!hlist_empty(head)) { | ||
| 218 | nd = head->first; | ||
| 219 | hlist_del_init(nd); | ||
| 220 | } | ||
| 221 | } | ||
| 222 | spin_unlock_irqrestore(&uidhash_lock, flags); | ||
| 223 | |||
| 224 | free_uid(ns->root_user); | ||
| 225 | } | ||
| 205 | 226 | ||
| 206 | static int __init uid_cache_init(void) | 227 | static int __init uid_cache_init(void) |
| 207 | { | 228 | { |
| @@ -211,7 +232,7 @@ static int __init uid_cache_init(void) | |||
| 211 | 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); | 232 | 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); |
| 212 | 233 | ||
| 213 | for(n = 0; n < UIDHASH_SZ; ++n) | 234 | for(n = 0; n < UIDHASH_SZ; ++n) |
| 214 | INIT_LIST_HEAD(init_user_ns.uidhash_table + n); | 235 | INIT_HLIST_HEAD(init_user_ns.uidhash_table + n); |
| 215 | 236 | ||
| 216 | /* Insert the root user immediately (init already runs as root) */ | 237 | /* Insert the root user immediately (init already runs as root) */ |
| 217 | spin_lock_irq(&uidhash_lock); | 238 | spin_lock_irq(&uidhash_lock); |
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index d055d987850c..7af90fc4f0fd 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c | |||
| @@ -39,7 +39,7 @@ static struct user_namespace *clone_user_ns(struct user_namespace *old_ns) | |||
| 39 | kref_init(&ns->kref); | 39 | kref_init(&ns->kref); |
| 40 | 40 | ||
| 41 | for (n = 0; n < UIDHASH_SZ; ++n) | 41 | for (n = 0; n < UIDHASH_SZ; ++n) |
| 42 | INIT_LIST_HEAD(ns->uidhash_table + n); | 42 | INIT_HLIST_HEAD(ns->uidhash_table + n); |
| 43 | 43 | ||
| 44 | /* Insert new root user. */ | 44 | /* Insert new root user. */ |
| 45 | ns->root_user = alloc_uid(ns, 0); | 45 | ns->root_user = alloc_uid(ns, 0); |
| @@ -81,6 +81,7 @@ void free_user_ns(struct kref *kref) | |||
| 81 | struct user_namespace *ns; | 81 | struct user_namespace *ns; |
| 82 | 82 | ||
| 83 | ns = container_of(kref, struct user_namespace, kref); | 83 | ns = container_of(kref, struct user_namespace, kref); |
| 84 | release_uids(ns); | ||
| 84 | kfree(ns); | 85 | kfree(ns); |
| 85 | } | 86 | } |
| 86 | 87 | ||
diff --git a/kernel/utsname.c b/kernel/utsname.c index 9d8180a0f0d8..816d7b24fa03 100644 --- a/kernel/utsname.c +++ b/kernel/utsname.c | |||
| @@ -28,7 +28,9 @@ static struct uts_namespace *clone_uts_ns(struct uts_namespace *old_ns) | |||
| 28 | if (!ns) | 28 | if (!ns) |
| 29 | return ERR_PTR(-ENOMEM); | 29 | return ERR_PTR(-ENOMEM); |
| 30 | 30 | ||
| 31 | down_read(&uts_sem); | ||
| 31 | memcpy(&ns->name, &old_ns->name, sizeof(ns->name)); | 32 | memcpy(&ns->name, &old_ns->name, sizeof(ns->name)); |
| 33 | up_read(&uts_sem); | ||
| 32 | kref_init(&ns->kref); | 34 | kref_init(&ns->kref); |
| 33 | return ns; | 35 | return ns; |
| 34 | } | 36 | } |
diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 58e5c152a6bb..e080d1d744cc 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c | |||
| @@ -635,7 +635,7 @@ int keventd_up(void) | |||
| 635 | int current_is_keventd(void) | 635 | int current_is_keventd(void) |
| 636 | { | 636 | { |
| 637 | struct cpu_workqueue_struct *cwq; | 637 | struct cpu_workqueue_struct *cwq; |
| 638 | int cpu = smp_processor_id(); /* preempt-safe: keventd is per-cpu */ | 638 | int cpu = raw_smp_processor_id(); /* preempt-safe: keventd is per-cpu */ |
| 639 | int ret = 0; | 639 | int ret = 0; |
| 640 | 640 | ||
| 641 | BUG_ON(!keventd_wq); | 641 | BUG_ON(!keventd_wq); |
