diff options
| author | Ingo Molnar <mingo@elte.hu> | 2008-07-07 02:07:35 -0400 |
|---|---|---|
| committer | Ingo Molnar <mingo@elte.hu> | 2008-07-07 02:07:35 -0400 |
| commit | d763d5edf945eec47bd443b699f174976f0afc13 (patch) | |
| tree | 3e5cd46b9a783999716bf92176854f4f1215d930 /kernel | |
| parent | 790e2a290b499b0400254e6870ec27969065d122 (diff) | |
| parent | 1b40a895df6c7d5a80e71f65674060b03d84bbef (diff) | |
Merge branch 'linus' into tracing/mmiotrace
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/audit.c | 6 | ||||
| -rw-r--r-- | kernel/auditfilter.c | 3 | ||||
| -rw-r--r-- | kernel/capability.c | 132 | ||||
| -rw-r--r-- | kernel/cgroup.c | 2 | ||||
| -rw-r--r-- | kernel/cpuset.c | 20 | ||||
| -rw-r--r-- | kernel/exit.c | 7 | ||||
| -rw-r--r-- | kernel/futex.c | 93 | ||||
| -rw-r--r-- | kernel/hrtimer.c | 8 | ||||
| -rw-r--r-- | kernel/kgdb.c | 19 | ||||
| -rw-r--r-- | kernel/kprobes.c | 15 | ||||
| -rw-r--r-- | kernel/module.c | 18 | ||||
| -rw-r--r-- | kernel/rcuclassic.c | 16 | ||||
| -rw-r--r-- | kernel/rcupreempt.c | 2 | ||||
| -rw-r--r-- | kernel/relay.c | 2 | ||||
| -rw-r--r-- | kernel/sched.c | 521 | ||||
| -rw-r--r-- | kernel/sched_clock.c | 18 | ||||
| -rw-r--r-- | kernel/sched_debug.c | 5 | ||||
| -rw-r--r-- | kernel/sched_fair.c | 254 | ||||
| -rw-r--r-- | kernel/sched_rt.c | 70 | ||||
| -rw-r--r-- | kernel/sched_stats.h | 7 | ||||
| -rw-r--r-- | kernel/signal.c | 51 | ||||
| -rw-r--r-- | kernel/softlockup.c | 16 | ||||
| -rw-r--r-- | kernel/stop_machine.c | 7 | ||||
| -rw-r--r-- | kernel/sys.c | 6 | ||||
| -rw-r--r-- | kernel/workqueue.c | 2 |
25 files changed, 559 insertions, 741 deletions
diff --git a/kernel/audit.c b/kernel/audit.c index e8692a5748c2..e092f1c0ce30 100644 --- a/kernel/audit.c +++ b/kernel/audit.c | |||
| @@ -738,7 +738,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) | |||
| 738 | if (!audit_enabled && msg_type != AUDIT_USER_AVC) | 738 | if (!audit_enabled && msg_type != AUDIT_USER_AVC) |
| 739 | return 0; | 739 | return 0; |
| 740 | 740 | ||
| 741 | err = audit_filter_user(&NETLINK_CB(skb), msg_type); | 741 | err = audit_filter_user(&NETLINK_CB(skb)); |
| 742 | if (err == 1) { | 742 | if (err == 1) { |
| 743 | err = 0; | 743 | err = 0; |
| 744 | if (msg_type == AUDIT_USER_TTY) { | 744 | if (msg_type == AUDIT_USER_TTY) { |
| @@ -779,7 +779,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) | |||
| 779 | } | 779 | } |
| 780 | /* fallthrough */ | 780 | /* fallthrough */ |
| 781 | case AUDIT_LIST: | 781 | case AUDIT_LIST: |
| 782 | err = audit_receive_filter(nlh->nlmsg_type, NETLINK_CB(skb).pid, | 782 | err = audit_receive_filter(msg_type, NETLINK_CB(skb).pid, |
| 783 | uid, seq, data, nlmsg_len(nlh), | 783 | uid, seq, data, nlmsg_len(nlh), |
| 784 | loginuid, sessionid, sid); | 784 | loginuid, sessionid, sid); |
| 785 | break; | 785 | break; |
| @@ -798,7 +798,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) | |||
| 798 | } | 798 | } |
| 799 | /* fallthrough */ | 799 | /* fallthrough */ |
| 800 | case AUDIT_LIST_RULES: | 800 | case AUDIT_LIST_RULES: |
| 801 | err = audit_receive_filter(nlh->nlmsg_type, NETLINK_CB(skb).pid, | 801 | err = audit_receive_filter(msg_type, NETLINK_CB(skb).pid, |
| 802 | uid, seq, data, nlmsg_len(nlh), | 802 | uid, seq, data, nlmsg_len(nlh), |
| 803 | loginuid, sessionid, sid); | 803 | loginuid, sessionid, sid); |
| 804 | break; | 804 | break; |
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c index 0e0bd27e6512..98c50cc671bb 100644 --- a/kernel/auditfilter.c +++ b/kernel/auditfilter.c | |||
| @@ -1544,6 +1544,7 @@ static void audit_log_rule_change(uid_t loginuid, u32 sessionid, u32 sid, | |||
| 1544 | * @data: payload data | 1544 | * @data: payload data |
| 1545 | * @datasz: size of payload data | 1545 | * @datasz: size of payload data |
| 1546 | * @loginuid: loginuid of sender | 1546 | * @loginuid: loginuid of sender |
| 1547 | * @sessionid: sessionid for netlink audit message | ||
| 1547 | * @sid: SE Linux Security ID of sender | 1548 | * @sid: SE Linux Security ID of sender |
| 1548 | */ | 1549 | */ |
| 1549 | int audit_receive_filter(int type, int pid, int uid, int seq, void *data, | 1550 | int audit_receive_filter(int type, int pid, int uid, int seq, void *data, |
| @@ -1720,7 +1721,7 @@ static int audit_filter_user_rules(struct netlink_skb_parms *cb, | |||
| 1720 | return 1; | 1721 | return 1; |
| 1721 | } | 1722 | } |
| 1722 | 1723 | ||
| 1723 | int audit_filter_user(struct netlink_skb_parms *cb, int type) | 1724 | int audit_filter_user(struct netlink_skb_parms *cb) |
| 1724 | { | 1725 | { |
| 1725 | enum audit_state state = AUDIT_DISABLED; | 1726 | enum audit_state state = AUDIT_DISABLED; |
| 1726 | struct audit_entry *e; | 1727 | struct audit_entry *e; |
diff --git a/kernel/capability.c b/kernel/capability.c index 39e8193b41ea..901e0fdc3fff 100644 --- a/kernel/capability.c +++ b/kernel/capability.c | |||
| @@ -53,11 +53,95 @@ static void warn_legacy_capability_use(void) | |||
| 53 | } | 53 | } |
| 54 | 54 | ||
| 55 | /* | 55 | /* |
| 56 | * Version 2 capabilities worked fine, but the linux/capability.h file | ||
| 57 | * that accompanied their introduction encouraged their use without | ||
| 58 | * the necessary user-space source code changes. As such, we have | ||
| 59 | * created a version 3 with equivalent functionality to version 2, but | ||
| 60 | * with a header change to protect legacy source code from using | ||
| 61 | * version 2 when it wanted to use version 1. If your system has code | ||
| 62 | * that trips the following warning, it is using version 2 specific | ||
| 63 | * capabilities and may be doing so insecurely. | ||
| 64 | * | ||
| 65 | * The remedy is to either upgrade your version of libcap (to 2.10+, | ||
| 66 | * if the application is linked against it), or recompile your | ||
| 67 | * application with modern kernel headers and this warning will go | ||
| 68 | * away. | ||
| 69 | */ | ||
| 70 | |||
| 71 | static void warn_deprecated_v2(void) | ||
| 72 | { | ||
| 73 | static int warned; | ||
| 74 | |||
| 75 | if (!warned) { | ||
| 76 | char name[sizeof(current->comm)]; | ||
| 77 | |||
| 78 | printk(KERN_INFO "warning: `%s' uses deprecated v2" | ||
| 79 | " capabilities in a way that may be insecure.\n", | ||
| 80 | get_task_comm(name, current)); | ||
| 81 | warned = 1; | ||
| 82 | } | ||
| 83 | } | ||
| 84 | |||
| 85 | /* | ||
| 86 | * Version check. Return the number of u32s in each capability flag | ||
| 87 | * array, or a negative value on error. | ||
| 88 | */ | ||
| 89 | static int cap_validate_magic(cap_user_header_t header, unsigned *tocopy) | ||
| 90 | { | ||
| 91 | __u32 version; | ||
| 92 | |||
| 93 | if (get_user(version, &header->version)) | ||
| 94 | return -EFAULT; | ||
| 95 | |||
| 96 | switch (version) { | ||
| 97 | case _LINUX_CAPABILITY_VERSION_1: | ||
| 98 | warn_legacy_capability_use(); | ||
| 99 | *tocopy = _LINUX_CAPABILITY_U32S_1; | ||
| 100 | break; | ||
| 101 | case _LINUX_CAPABILITY_VERSION_2: | ||
| 102 | warn_deprecated_v2(); | ||
| 103 | /* | ||
| 104 | * fall through - v3 is otherwise equivalent to v2. | ||
| 105 | */ | ||
| 106 | case _LINUX_CAPABILITY_VERSION_3: | ||
| 107 | *tocopy = _LINUX_CAPABILITY_U32S_3; | ||
| 108 | break; | ||
| 109 | default: | ||
| 110 | if (put_user((u32)_KERNEL_CAPABILITY_VERSION, &header->version)) | ||
| 111 | return -EFAULT; | ||
| 112 | return -EINVAL; | ||
| 113 | } | ||
| 114 | |||
| 115 | return 0; | ||
| 116 | } | ||
| 117 | |||
| 118 | /* | ||
| 56 | * For sys_getproccap() and sys_setproccap(), any of the three | 119 | * For sys_getproccap() and sys_setproccap(), any of the three |
| 57 | * capability set pointers may be NULL -- indicating that that set is | 120 | * capability set pointers may be NULL -- indicating that that set is |
| 58 | * uninteresting and/or not to be changed. | 121 | * uninteresting and/or not to be changed. |
| 59 | */ | 122 | */ |
| 60 | 123 | ||
| 124 | /* | ||
| 125 | * Atomically modify the effective capabilities returning the original | ||
| 126 | * value. No permission check is performed here - it is assumed that the | ||
| 127 | * caller is permitted to set the desired effective capabilities. | ||
| 128 | */ | ||
| 129 | kernel_cap_t cap_set_effective(const kernel_cap_t pE_new) | ||
| 130 | { | ||
| 131 | kernel_cap_t pE_old; | ||
| 132 | |||
| 133 | spin_lock(&task_capability_lock); | ||
| 134 | |||
| 135 | pE_old = current->cap_effective; | ||
| 136 | current->cap_effective = pE_new; | ||
| 137 | |||
| 138 | spin_unlock(&task_capability_lock); | ||
| 139 | |||
| 140 | return pE_old; | ||
| 141 | } | ||
| 142 | |||
| 143 | EXPORT_SYMBOL(cap_set_effective); | ||
| 144 | |||
| 61 | /** | 145 | /** |
| 62 | * sys_capget - get the capabilities of a given process. | 146 | * sys_capget - get the capabilities of a given process. |
| 63 | * @header: pointer to struct that contains capability version and | 147 | * @header: pointer to struct that contains capability version and |
| @@ -71,27 +155,13 @@ asmlinkage long sys_capget(cap_user_header_t header, cap_user_data_t dataptr) | |||
| 71 | { | 155 | { |
| 72 | int ret = 0; | 156 | int ret = 0; |
| 73 | pid_t pid; | 157 | pid_t pid; |
| 74 | __u32 version; | ||
| 75 | struct task_struct *target; | 158 | struct task_struct *target; |
| 76 | unsigned tocopy; | 159 | unsigned tocopy; |
| 77 | kernel_cap_t pE, pI, pP; | 160 | kernel_cap_t pE, pI, pP; |
| 78 | 161 | ||
| 79 | if (get_user(version, &header->version)) | 162 | ret = cap_validate_magic(header, &tocopy); |
| 80 | return -EFAULT; | 163 | if (ret != 0) |
| 81 | 164 | return ret; | |
| 82 | switch (version) { | ||
| 83 | case _LINUX_CAPABILITY_VERSION_1: | ||
| 84 | warn_legacy_capability_use(); | ||
| 85 | tocopy = _LINUX_CAPABILITY_U32S_1; | ||
| 86 | break; | ||
| 87 | case _LINUX_CAPABILITY_VERSION_2: | ||
| 88 | tocopy = _LINUX_CAPABILITY_U32S_2; | ||
| 89 | break; | ||
| 90 | default: | ||
| 91 | if (put_user(_LINUX_CAPABILITY_VERSION, &header->version)) | ||
| 92 | return -EFAULT; | ||
| 93 | return -EINVAL; | ||
| 94 | } | ||
| 95 | 165 | ||
| 96 | if (get_user(pid, &header->pid)) | 166 | if (get_user(pid, &header->pid)) |
| 97 | return -EFAULT; | 167 | return -EFAULT; |
| @@ -118,7 +188,7 @@ out: | |||
| 118 | spin_unlock(&task_capability_lock); | 188 | spin_unlock(&task_capability_lock); |
| 119 | 189 | ||
| 120 | if (!ret) { | 190 | if (!ret) { |
| 121 | struct __user_cap_data_struct kdata[_LINUX_CAPABILITY_U32S]; | 191 | struct __user_cap_data_struct kdata[_KERNEL_CAPABILITY_U32S]; |
| 122 | unsigned i; | 192 | unsigned i; |
| 123 | 193 | ||
| 124 | for (i = 0; i < tocopy; i++) { | 194 | for (i = 0; i < tocopy; i++) { |
| @@ -128,7 +198,7 @@ out: | |||
| 128 | } | 198 | } |
| 129 | 199 | ||
| 130 | /* | 200 | /* |
| 131 | * Note, in the case, tocopy < _LINUX_CAPABILITY_U32S, | 201 | * Note, in the case, tocopy < _KERNEL_CAPABILITY_U32S, |
| 132 | * we silently drop the upper capabilities here. This | 202 | * we silently drop the upper capabilities here. This |
| 133 | * has the effect of making older libcap | 203 | * has the effect of making older libcap |
| 134 | * implementations implicitly drop upper capability | 204 | * implementations implicitly drop upper capability |
| @@ -240,30 +310,16 @@ static inline int cap_set_all(kernel_cap_t *effective, | |||
| 240 | */ | 310 | */ |
| 241 | asmlinkage long sys_capset(cap_user_header_t header, const cap_user_data_t data) | 311 | asmlinkage long sys_capset(cap_user_header_t header, const cap_user_data_t data) |
| 242 | { | 312 | { |
| 243 | struct __user_cap_data_struct kdata[_LINUX_CAPABILITY_U32S]; | 313 | struct __user_cap_data_struct kdata[_KERNEL_CAPABILITY_U32S]; |
| 244 | unsigned i, tocopy; | 314 | unsigned i, tocopy; |
| 245 | kernel_cap_t inheritable, permitted, effective; | 315 | kernel_cap_t inheritable, permitted, effective; |
| 246 | __u32 version; | ||
| 247 | struct task_struct *target; | 316 | struct task_struct *target; |
| 248 | int ret; | 317 | int ret; |
| 249 | pid_t pid; | 318 | pid_t pid; |
| 250 | 319 | ||
| 251 | if (get_user(version, &header->version)) | 320 | ret = cap_validate_magic(header, &tocopy); |
| 252 | return -EFAULT; | 321 | if (ret != 0) |
| 253 | 322 | return ret; | |
| 254 | switch (version) { | ||
| 255 | case _LINUX_CAPABILITY_VERSION_1: | ||
| 256 | warn_legacy_capability_use(); | ||
| 257 | tocopy = _LINUX_CAPABILITY_U32S_1; | ||
| 258 | break; | ||
| 259 | case _LINUX_CAPABILITY_VERSION_2: | ||
| 260 | tocopy = _LINUX_CAPABILITY_U32S_2; | ||
| 261 | break; | ||
| 262 | default: | ||
| 263 | if (put_user(_LINUX_CAPABILITY_VERSION, &header->version)) | ||
| 264 | return -EFAULT; | ||
| 265 | return -EINVAL; | ||
| 266 | } | ||
| 267 | 323 | ||
| 268 | if (get_user(pid, &header->pid)) | 324 | if (get_user(pid, &header->pid)) |
| 269 | return -EFAULT; | 325 | return -EFAULT; |
| @@ -281,7 +337,7 @@ asmlinkage long sys_capset(cap_user_header_t header, const cap_user_data_t data) | |||
| 281 | permitted.cap[i] = kdata[i].permitted; | 337 | permitted.cap[i] = kdata[i].permitted; |
| 282 | inheritable.cap[i] = kdata[i].inheritable; | 338 | inheritable.cap[i] = kdata[i].inheritable; |
| 283 | } | 339 | } |
| 284 | while (i < _LINUX_CAPABILITY_U32S) { | 340 | while (i < _KERNEL_CAPABILITY_U32S) { |
| 285 | effective.cap[i] = 0; | 341 | effective.cap[i] = 0; |
| 286 | permitted.cap[i] = 0; | 342 | permitted.cap[i] = 0; |
| 287 | inheritable.cap[i] = 0; | 343 | inheritable.cap[i] = 0; |
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index fbc6fc8949b4..15ac0e1e4f4d 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
| @@ -2903,7 +2903,7 @@ int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys) | |||
| 2903 | cg = tsk->cgroups; | 2903 | cg = tsk->cgroups; |
| 2904 | parent = task_cgroup(tsk, subsys->subsys_id); | 2904 | parent = task_cgroup(tsk, subsys->subsys_id); |
| 2905 | 2905 | ||
| 2906 | snprintf(nodename, MAX_CGROUP_TYPE_NAMELEN, "node_%d", tsk->pid); | 2906 | snprintf(nodename, MAX_CGROUP_TYPE_NAMELEN, "%d", tsk->pid); |
| 2907 | 2907 | ||
| 2908 | /* Pin the hierarchy */ | 2908 | /* Pin the hierarchy */ |
| 2909 | atomic_inc(&parent->root->sb->s_active); | 2909 | atomic_inc(&parent->root->sb->s_active); |
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 86ea9e34e326..9fceb97e989c 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
| @@ -797,8 +797,10 @@ static int update_cpumask(struct cpuset *cs, char *buf) | |||
| 797 | retval = cpulist_parse(buf, trialcs.cpus_allowed); | 797 | retval = cpulist_parse(buf, trialcs.cpus_allowed); |
| 798 | if (retval < 0) | 798 | if (retval < 0) |
| 799 | return retval; | 799 | return retval; |
| 800 | |||
| 801 | if (!cpus_subset(trialcs.cpus_allowed, cpu_online_map)) | ||
| 802 | return -EINVAL; | ||
| 800 | } | 803 | } |
| 801 | cpus_and(trialcs.cpus_allowed, trialcs.cpus_allowed, cpu_online_map); | ||
| 802 | retval = validate_change(cs, &trialcs); | 804 | retval = validate_change(cs, &trialcs); |
| 803 | if (retval < 0) | 805 | if (retval < 0) |
| 804 | return retval; | 806 | return retval; |
| @@ -932,9 +934,11 @@ static int update_nodemask(struct cpuset *cs, char *buf) | |||
| 932 | retval = nodelist_parse(buf, trialcs.mems_allowed); | 934 | retval = nodelist_parse(buf, trialcs.mems_allowed); |
| 933 | if (retval < 0) | 935 | if (retval < 0) |
| 934 | goto done; | 936 | goto done; |
| 937 | |||
| 938 | if (!nodes_subset(trialcs.mems_allowed, | ||
| 939 | node_states[N_HIGH_MEMORY])) | ||
| 940 | return -EINVAL; | ||
| 935 | } | 941 | } |
| 936 | nodes_and(trialcs.mems_allowed, trialcs.mems_allowed, | ||
| 937 | node_states[N_HIGH_MEMORY]); | ||
| 938 | oldmem = cs->mems_allowed; | 942 | oldmem = cs->mems_allowed; |
| 939 | if (nodes_equal(oldmem, trialcs.mems_allowed)) { | 943 | if (nodes_equal(oldmem, trialcs.mems_allowed)) { |
| 940 | retval = 0; /* Too easy - nothing to do */ | 944 | retval = 0; /* Too easy - nothing to do */ |
| @@ -1033,8 +1037,8 @@ int current_cpuset_is_being_rebound(void) | |||
| 1033 | 1037 | ||
| 1034 | static int update_relax_domain_level(struct cpuset *cs, s64 val) | 1038 | static int update_relax_domain_level(struct cpuset *cs, s64 val) |
| 1035 | { | 1039 | { |
| 1036 | if ((int)val < 0) | 1040 | if (val < -1 || val >= SD_LV_MAX) |
| 1037 | val = -1; | 1041 | return -EINVAL; |
| 1038 | 1042 | ||
| 1039 | if (val != cs->relax_domain_level) { | 1043 | if (val != cs->relax_domain_level) { |
| 1040 | cs->relax_domain_level = val; | 1044 | cs->relax_domain_level = val; |
| @@ -1886,6 +1890,12 @@ static void common_cpu_mem_hotplug_unplug(void) | |||
| 1886 | top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY]; | 1890 | top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY]; |
| 1887 | scan_for_empty_cpusets(&top_cpuset); | 1891 | scan_for_empty_cpusets(&top_cpuset); |
| 1888 | 1892 | ||
| 1893 | /* | ||
| 1894 | * Scheduler destroys domains on hotplug events. | ||
| 1895 | * Rebuild them based on the current settings. | ||
| 1896 | */ | ||
| 1897 | rebuild_sched_domains(); | ||
| 1898 | |||
| 1889 | cgroup_unlock(); | 1899 | cgroup_unlock(); |
| 1890 | } | 1900 | } |
| 1891 | 1901 | ||
diff --git a/kernel/exit.c b/kernel/exit.c index 1510f78a0ffa..8f6185e69b69 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
| @@ -126,6 +126,12 @@ static void __exit_signal(struct task_struct *tsk) | |||
| 126 | 126 | ||
| 127 | __unhash_process(tsk); | 127 | __unhash_process(tsk); |
| 128 | 128 | ||
| 129 | /* | ||
| 130 | * Do this under ->siglock, we can race with another thread | ||
| 131 | * doing sigqueue_free() if we have SIGQUEUE_PREALLOC signals. | ||
| 132 | */ | ||
| 133 | flush_sigqueue(&tsk->pending); | ||
| 134 | |||
| 129 | tsk->signal = NULL; | 135 | tsk->signal = NULL; |
| 130 | tsk->sighand = NULL; | 136 | tsk->sighand = NULL; |
| 131 | spin_unlock(&sighand->siglock); | 137 | spin_unlock(&sighand->siglock); |
| @@ -133,7 +139,6 @@ static void __exit_signal(struct task_struct *tsk) | |||
| 133 | 139 | ||
| 134 | __cleanup_sighand(sighand); | 140 | __cleanup_sighand(sighand); |
| 135 | clear_tsk_thread_flag(tsk,TIF_SIGPENDING); | 141 | clear_tsk_thread_flag(tsk,TIF_SIGPENDING); |
| 136 | flush_sigqueue(&tsk->pending); | ||
| 137 | if (sig) { | 142 | if (sig) { |
| 138 | flush_sigqueue(&sig->shared_pending); | 143 | flush_sigqueue(&sig->shared_pending); |
| 139 | taskstats_tgid_free(sig); | 144 | taskstats_tgid_free(sig); |
diff --git a/kernel/futex.c b/kernel/futex.c index 449def8074fe..7d1136e97c14 100644 --- a/kernel/futex.c +++ b/kernel/futex.c | |||
| @@ -1096,21 +1096,64 @@ static void unqueue_me_pi(struct futex_q *q) | |||
| 1096 | * private futexes. | 1096 | * private futexes. |
| 1097 | */ | 1097 | */ |
| 1098 | static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, | 1098 | static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, |
| 1099 | struct task_struct *newowner) | 1099 | struct task_struct *newowner, |
| 1100 | struct rw_semaphore *fshared) | ||
| 1100 | { | 1101 | { |
| 1101 | u32 newtid = task_pid_vnr(newowner) | FUTEX_WAITERS; | 1102 | u32 newtid = task_pid_vnr(newowner) | FUTEX_WAITERS; |
| 1102 | struct futex_pi_state *pi_state = q->pi_state; | 1103 | struct futex_pi_state *pi_state = q->pi_state; |
| 1104 | struct task_struct *oldowner = pi_state->owner; | ||
| 1103 | u32 uval, curval, newval; | 1105 | u32 uval, curval, newval; |
| 1104 | int ret; | 1106 | int ret, attempt = 0; |
| 1105 | 1107 | ||
| 1106 | /* Owner died? */ | 1108 | /* Owner died? */ |
| 1109 | if (!pi_state->owner) | ||
| 1110 | newtid |= FUTEX_OWNER_DIED; | ||
| 1111 | |||
| 1112 | /* | ||
| 1113 | * We are here either because we stole the rtmutex from the | ||
| 1114 | * pending owner or we are the pending owner which failed to | ||
| 1115 | * get the rtmutex. We have to replace the pending owner TID | ||
| 1116 | * in the user space variable. This must be atomic as we have | ||
| 1117 | * to preserve the owner died bit here. | ||
| 1118 | * | ||
| 1119 | * Note: We write the user space value _before_ changing the | ||
| 1120 | * pi_state because we can fault here. Imagine swapped out | ||
| 1121 | * pages or a fork, which was running right before we acquired | ||
| 1122 | * mmap_sem, that marked all the anonymous memory readonly for | ||
| 1123 | * cow. | ||
| 1124 | * | ||
| 1125 | * Modifying pi_state _before_ the user space value would | ||
| 1126 | * leave the pi_state in an inconsistent state when we fault | ||
| 1127 | * here, because we need to drop the hash bucket lock to | ||
| 1128 | * handle the fault. This might be observed in the PID check | ||
| 1129 | * in lookup_pi_state. | ||
| 1130 | */ | ||
| 1131 | retry: | ||
| 1132 | if (get_futex_value_locked(&uval, uaddr)) | ||
| 1133 | goto handle_fault; | ||
| 1134 | |||
| 1135 | while (1) { | ||
| 1136 | newval = (uval & FUTEX_OWNER_DIED) | newtid; | ||
| 1137 | |||
| 1138 | curval = cmpxchg_futex_value_locked(uaddr, uval, newval); | ||
| 1139 | |||
| 1140 | if (curval == -EFAULT) | ||
| 1141 | goto handle_fault; | ||
| 1142 | if (curval == uval) | ||
| 1143 | break; | ||
| 1144 | uval = curval; | ||
| 1145 | } | ||
| 1146 | |||
| 1147 | /* | ||
| 1148 | * We fixed up user space. Now we need to fix the pi_state | ||
| 1149 | * itself. | ||
| 1150 | */ | ||
| 1107 | if (pi_state->owner != NULL) { | 1151 | if (pi_state->owner != NULL) { |
| 1108 | spin_lock_irq(&pi_state->owner->pi_lock); | 1152 | spin_lock_irq(&pi_state->owner->pi_lock); |
| 1109 | WARN_ON(list_empty(&pi_state->list)); | 1153 | WARN_ON(list_empty(&pi_state->list)); |
| 1110 | list_del_init(&pi_state->list); | 1154 | list_del_init(&pi_state->list); |
| 1111 | spin_unlock_irq(&pi_state->owner->pi_lock); | 1155 | spin_unlock_irq(&pi_state->owner->pi_lock); |
| 1112 | } else | 1156 | } |
| 1113 | newtid |= FUTEX_OWNER_DIED; | ||
| 1114 | 1157 | ||
| 1115 | pi_state->owner = newowner; | 1158 | pi_state->owner = newowner; |
| 1116 | 1159 | ||
| @@ -1118,26 +1161,35 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, | |||
| 1118 | WARN_ON(!list_empty(&pi_state->list)); | 1161 | WARN_ON(!list_empty(&pi_state->list)); |
| 1119 | list_add(&pi_state->list, &newowner->pi_state_list); | 1162 | list_add(&pi_state->list, &newowner->pi_state_list); |
| 1120 | spin_unlock_irq(&newowner->pi_lock); | 1163 | spin_unlock_irq(&newowner->pi_lock); |
| 1164 | return 0; | ||
| 1121 | 1165 | ||
| 1122 | /* | 1166 | /* |
| 1123 | * We own it, so we have to replace the pending owner | 1167 | * To handle the page fault we need to drop the hash bucket |
| 1124 | * TID. This must be atomic as we have preserve the | 1168 | * lock here. That gives the other task (either the pending |
| 1125 | * owner died bit here. | 1169 | * owner itself or the task which stole the rtmutex) the |
| 1170 | * chance to try the fixup of the pi_state. So once we are | ||
| 1171 | * back from handling the fault we need to check the pi_state | ||
| 1172 | * after reacquiring the hash bucket lock and before trying to | ||
| 1173 | * do another fixup. When the fixup has been done already we | ||
| 1174 | * simply return. | ||
| 1126 | */ | 1175 | */ |
| 1127 | ret = get_futex_value_locked(&uval, uaddr); | 1176 | handle_fault: |
| 1177 | spin_unlock(q->lock_ptr); | ||
| 1128 | 1178 | ||
| 1129 | while (!ret) { | 1179 | ret = futex_handle_fault((unsigned long)uaddr, fshared, attempt++); |
| 1130 | newval = (uval & FUTEX_OWNER_DIED) | newtid; | ||
| 1131 | 1180 | ||
| 1132 | curval = cmpxchg_futex_value_locked(uaddr, uval, newval); | 1181 | spin_lock(q->lock_ptr); |
| 1133 | 1182 | ||
| 1134 | if (curval == -EFAULT) | 1183 | /* |
| 1135 | ret = -EFAULT; | 1184 | * Check if someone else fixed it for us: |
| 1136 | if (curval == uval) | 1185 | */ |
| 1137 | break; | 1186 | if (pi_state->owner != oldowner) |
| 1138 | uval = curval; | 1187 | return 0; |
| 1139 | } | 1188 | |
| 1140 | return ret; | 1189 | if (ret) |
| 1190 | return ret; | ||
| 1191 | |||
| 1192 | goto retry; | ||
| 1141 | } | 1193 | } |
| 1142 | 1194 | ||
| 1143 | /* | 1195 | /* |
| @@ -1507,7 +1559,7 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared, | |||
| 1507 | * that case: | 1559 | * that case: |
| 1508 | */ | 1560 | */ |
| 1509 | if (q.pi_state->owner != curr) | 1561 | if (q.pi_state->owner != curr) |
| 1510 | ret = fixup_pi_state_owner(uaddr, &q, curr); | 1562 | ret = fixup_pi_state_owner(uaddr, &q, curr, fshared); |
| 1511 | } else { | 1563 | } else { |
| 1512 | /* | 1564 | /* |
| 1513 | * Catch the rare case, where the lock was released | 1565 | * Catch the rare case, where the lock was released |
| @@ -1539,7 +1591,8 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared, | |||
| 1539 | int res; | 1591 | int res; |
| 1540 | 1592 | ||
| 1541 | owner = rt_mutex_owner(&q.pi_state->pi_mutex); | 1593 | owner = rt_mutex_owner(&q.pi_state->pi_mutex); |
| 1542 | res = fixup_pi_state_owner(uaddr, &q, owner); | 1594 | res = fixup_pi_state_owner(uaddr, &q, owner, |
| 1595 | fshared); | ||
| 1543 | 1596 | ||
| 1544 | /* propagate -EFAULT, if the fixup failed */ | 1597 | /* propagate -EFAULT, if the fixup failed */ |
| 1545 | if (res) | 1598 | if (res) |
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 421be5fe5cc7..ab80515008f4 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c | |||
| @@ -1003,10 +1003,18 @@ hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode) | |||
| 1003 | */ | 1003 | */ |
| 1004 | raise = timer->state == HRTIMER_STATE_PENDING; | 1004 | raise = timer->state == HRTIMER_STATE_PENDING; |
| 1005 | 1005 | ||
| 1006 | /* | ||
| 1007 | * We use preempt_disable to prevent this task from migrating after | ||
| 1008 | * setting up the softirq and raising it. Otherwise, if me migrate | ||
| 1009 | * we will raise the softirq on the wrong CPU. | ||
| 1010 | */ | ||
| 1011 | preempt_disable(); | ||
| 1012 | |||
| 1006 | unlock_hrtimer_base(timer, &flags); | 1013 | unlock_hrtimer_base(timer, &flags); |
| 1007 | 1014 | ||
| 1008 | if (raise) | 1015 | if (raise) |
| 1009 | hrtimer_raise_softirq(); | 1016 | hrtimer_raise_softirq(); |
| 1017 | preempt_enable(); | ||
| 1010 | 1018 | ||
| 1011 | return ret; | 1019 | return ret; |
| 1012 | } | 1020 | } |
diff --git a/kernel/kgdb.c b/kernel/kgdb.c index 14787de568b3..3ec23c3ec97f 100644 --- a/kernel/kgdb.c +++ b/kernel/kgdb.c | |||
| @@ -52,6 +52,7 @@ | |||
| 52 | #include <asm/byteorder.h> | 52 | #include <asm/byteorder.h> |
| 53 | #include <asm/atomic.h> | 53 | #include <asm/atomic.h> |
| 54 | #include <asm/system.h> | 54 | #include <asm/system.h> |
| 55 | #include <asm/unaligned.h> | ||
| 55 | 56 | ||
| 56 | static int kgdb_break_asap; | 57 | static int kgdb_break_asap; |
| 57 | 58 | ||
| @@ -227,8 +228,6 @@ void __weak kgdb_disable_hw_debug(struct pt_regs *regs) | |||
| 227 | * GDB remote protocol parser: | 228 | * GDB remote protocol parser: |
| 228 | */ | 229 | */ |
| 229 | 230 | ||
| 230 | static const char hexchars[] = "0123456789abcdef"; | ||
| 231 | |||
| 232 | static int hex(char ch) | 231 | static int hex(char ch) |
| 233 | { | 232 | { |
| 234 | if ((ch >= 'a') && (ch <= 'f')) | 233 | if ((ch >= 'a') && (ch <= 'f')) |
| @@ -316,8 +315,8 @@ static void put_packet(char *buffer) | |||
| 316 | } | 315 | } |
| 317 | 316 | ||
| 318 | kgdb_io_ops->write_char('#'); | 317 | kgdb_io_ops->write_char('#'); |
| 319 | kgdb_io_ops->write_char(hexchars[checksum >> 4]); | 318 | kgdb_io_ops->write_char(hex_asc_hi(checksum)); |
| 320 | kgdb_io_ops->write_char(hexchars[checksum & 0xf]); | 319 | kgdb_io_ops->write_char(hex_asc_lo(checksum)); |
| 321 | if (kgdb_io_ops->flush) | 320 | if (kgdb_io_ops->flush) |
| 322 | kgdb_io_ops->flush(); | 321 | kgdb_io_ops->flush(); |
| 323 | 322 | ||
| @@ -478,8 +477,8 @@ static void error_packet(char *pkt, int error) | |||
| 478 | { | 477 | { |
| 479 | error = -error; | 478 | error = -error; |
| 480 | pkt[0] = 'E'; | 479 | pkt[0] = 'E'; |
| 481 | pkt[1] = hexchars[(error / 10)]; | 480 | pkt[1] = hex_asc[(error / 10)]; |
| 482 | pkt[2] = hexchars[(error % 10)]; | 481 | pkt[2] = hex_asc[(error % 10)]; |
| 483 | pkt[3] = '\0'; | 482 | pkt[3] = '\0'; |
| 484 | } | 483 | } |
| 485 | 484 | ||
| @@ -510,10 +509,7 @@ static void int_to_threadref(unsigned char *id, int value) | |||
| 510 | scan = (unsigned char *)id; | 509 | scan = (unsigned char *)id; |
| 511 | while (i--) | 510 | while (i--) |
| 512 | *scan++ = 0; | 511 | *scan++ = 0; |
| 513 | *scan++ = (value >> 24) & 0xff; | 512 | put_unaligned_be32(value, scan); |
| 514 | *scan++ = (value >> 16) & 0xff; | ||
| 515 | *scan++ = (value >> 8) & 0xff; | ||
| 516 | *scan++ = (value & 0xff); | ||
| 517 | } | 513 | } |
| 518 | 514 | ||
| 519 | static struct task_struct *getthread(struct pt_regs *regs, int tid) | 515 | static struct task_struct *getthread(struct pt_regs *regs, int tid) |
| @@ -1503,7 +1499,8 @@ int kgdb_nmicallback(int cpu, void *regs) | |||
| 1503 | return 1; | 1499 | return 1; |
| 1504 | } | 1500 | } |
| 1505 | 1501 | ||
| 1506 | void kgdb_console_write(struct console *co, const char *s, unsigned count) | 1502 | static void kgdb_console_write(struct console *co, const char *s, |
| 1503 | unsigned count) | ||
| 1507 | { | 1504 | { |
| 1508 | unsigned long flags; | 1505 | unsigned long flags; |
| 1509 | 1506 | ||
diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 1e0250cb9486..d4998f81e229 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c | |||
| @@ -699,8 +699,9 @@ static int __register_kprobes(struct kprobe **kps, int num, | |||
| 699 | return -EINVAL; | 699 | return -EINVAL; |
| 700 | for (i = 0; i < num; i++) { | 700 | for (i = 0; i < num; i++) { |
| 701 | ret = __register_kprobe(kps[i], called_from); | 701 | ret = __register_kprobe(kps[i], called_from); |
| 702 | if (ret < 0 && i > 0) { | 702 | if (ret < 0) { |
| 703 | unregister_kprobes(kps, i); | 703 | if (i > 0) |
| 704 | unregister_kprobes(kps, i); | ||
| 704 | break; | 705 | break; |
| 705 | } | 706 | } |
| 706 | } | 707 | } |
| @@ -776,8 +777,9 @@ static int __register_jprobes(struct jprobe **jps, int num, | |||
| 776 | jp->kp.break_handler = longjmp_break_handler; | 777 | jp->kp.break_handler = longjmp_break_handler; |
| 777 | ret = __register_kprobe(&jp->kp, called_from); | 778 | ret = __register_kprobe(&jp->kp, called_from); |
| 778 | } | 779 | } |
| 779 | if (ret < 0 && i > 0) { | 780 | if (ret < 0) { |
| 780 | unregister_jprobes(jps, i); | 781 | if (i > 0) |
| 782 | unregister_jprobes(jps, i); | ||
| 781 | break; | 783 | break; |
| 782 | } | 784 | } |
| 783 | } | 785 | } |
| @@ -920,8 +922,9 @@ static int __register_kretprobes(struct kretprobe **rps, int num, | |||
| 920 | return -EINVAL; | 922 | return -EINVAL; |
| 921 | for (i = 0; i < num; i++) { | 923 | for (i = 0; i < num; i++) { |
| 922 | ret = __register_kretprobe(rps[i], called_from); | 924 | ret = __register_kretprobe(rps[i], called_from); |
| 923 | if (ret < 0 && i > 0) { | 925 | if (ret < 0) { |
| 924 | unregister_kretprobes(rps, i); | 926 | if (i > 0) |
| 927 | unregister_kretprobes(rps, i); | ||
| 925 | break; | 928 | break; |
| 926 | } | 929 | } |
| 927 | } | 930 | } |
diff --git a/kernel/module.c b/kernel/module.c index f5e9491ef7ac..5f80478b746d 100644 --- a/kernel/module.c +++ b/kernel/module.c | |||
| @@ -1337,7 +1337,19 @@ out_unreg: | |||
| 1337 | kobject_put(&mod->mkobj.kobj); | 1337 | kobject_put(&mod->mkobj.kobj); |
| 1338 | return err; | 1338 | return err; |
| 1339 | } | 1339 | } |
| 1340 | #endif | 1340 | |
| 1341 | static void mod_sysfs_fini(struct module *mod) | ||
| 1342 | { | ||
| 1343 | kobject_put(&mod->mkobj.kobj); | ||
| 1344 | } | ||
| 1345 | |||
| 1346 | #else /* CONFIG_SYSFS */ | ||
| 1347 | |||
| 1348 | static void mod_sysfs_fini(struct module *mod) | ||
| 1349 | { | ||
| 1350 | } | ||
| 1351 | |||
| 1352 | #endif /* CONFIG_SYSFS */ | ||
| 1341 | 1353 | ||
| 1342 | static void mod_kobject_remove(struct module *mod) | 1354 | static void mod_kobject_remove(struct module *mod) |
| 1343 | { | 1355 | { |
| @@ -1345,7 +1357,7 @@ static void mod_kobject_remove(struct module *mod) | |||
| 1345 | module_param_sysfs_remove(mod); | 1357 | module_param_sysfs_remove(mod); |
| 1346 | kobject_put(mod->mkobj.drivers_dir); | 1358 | kobject_put(mod->mkobj.drivers_dir); |
| 1347 | kobject_put(mod->holders_dir); | 1359 | kobject_put(mod->holders_dir); |
| 1348 | kobject_put(&mod->mkobj.kobj); | 1360 | mod_sysfs_fini(mod); |
| 1349 | } | 1361 | } |
| 1350 | 1362 | ||
| 1351 | /* | 1363 | /* |
| @@ -1780,7 +1792,7 @@ static struct module *load_module(void __user *umod, | |||
| 1780 | 1792 | ||
| 1781 | /* Sanity checks against insmoding binaries or wrong arch, | 1793 | /* Sanity checks against insmoding binaries or wrong arch, |
| 1782 | weird elf version */ | 1794 | weird elf version */ |
| 1783 | if (memcmp(hdr->e_ident, ELFMAG, 4) != 0 | 1795 | if (memcmp(hdr->e_ident, ELFMAG, SELFMAG) != 0 |
| 1784 | || hdr->e_type != ET_REL | 1796 | || hdr->e_type != ET_REL |
| 1785 | || !elf_check_arch(hdr) | 1797 | || !elf_check_arch(hdr) |
| 1786 | || hdr->e_shentsize != sizeof(*sechdrs)) { | 1798 | || hdr->e_shentsize != sizeof(*sechdrs)) { |
diff --git a/kernel/rcuclassic.c b/kernel/rcuclassic.c index f4ffbd0f306f..a38895a5b8e2 100644 --- a/kernel/rcuclassic.c +++ b/kernel/rcuclassic.c | |||
| @@ -89,8 +89,22 @@ static void force_quiescent_state(struct rcu_data *rdp, | |||
| 89 | /* | 89 | /* |
| 90 | * Don't send IPI to itself. With irqs disabled, | 90 | * Don't send IPI to itself. With irqs disabled, |
| 91 | * rdp->cpu is the current cpu. | 91 | * rdp->cpu is the current cpu. |
| 92 | * | ||
| 93 | * cpu_online_map is updated by the _cpu_down() | ||
| 94 | * using stop_machine_run(). Since we're in irqs disabled | ||
| 95 | * section, stop_machine_run() is not exectuting, hence | ||
| 96 | * the cpu_online_map is stable. | ||
| 97 | * | ||
| 98 | * However, a cpu might have been offlined _just_ before | ||
| 99 | * we disabled irqs while entering here. | ||
| 100 | * And rcu subsystem might not yet have handled the CPU_DEAD | ||
| 101 | * notification, leading to the offlined cpu's bit | ||
| 102 | * being set in the rcp->cpumask. | ||
| 103 | * | ||
| 104 | * Hence cpumask = (rcp->cpumask & cpu_online_map) to prevent | ||
| 105 | * sending smp_reschedule() to an offlined CPU. | ||
| 92 | */ | 106 | */ |
| 93 | cpumask = rcp->cpumask; | 107 | cpus_and(cpumask, rcp->cpumask, cpu_online_map); |
| 94 | cpu_clear(rdp->cpu, cpumask); | 108 | cpu_clear(rdp->cpu, cpumask); |
| 95 | for_each_cpu_mask(cpu, cpumask) | 109 | for_each_cpu_mask(cpu, cpumask) |
| 96 | smp_send_reschedule(cpu); | 110 | smp_send_reschedule(cpu); |
diff --git a/kernel/rcupreempt.c b/kernel/rcupreempt.c index e1cdf196a515..5e02b7740702 100644 --- a/kernel/rcupreempt.c +++ b/kernel/rcupreempt.c | |||
| @@ -217,8 +217,6 @@ long rcu_batches_completed(void) | |||
| 217 | } | 217 | } |
| 218 | EXPORT_SYMBOL_GPL(rcu_batches_completed); | 218 | EXPORT_SYMBOL_GPL(rcu_batches_completed); |
| 219 | 219 | ||
| 220 | EXPORT_SYMBOL_GPL(rcu_batches_completed_bh); | ||
| 221 | |||
| 222 | void __rcu_read_lock(void) | 220 | void __rcu_read_lock(void) |
| 223 | { | 221 | { |
| 224 | int idx; | 222 | int idx; |
diff --git a/kernel/relay.c b/kernel/relay.c index bc24dcdc570f..7de644cdec43 100644 --- a/kernel/relay.c +++ b/kernel/relay.c | |||
| @@ -1191,7 +1191,7 @@ static ssize_t relay_file_splice_read(struct file *in, | |||
| 1191 | ret = 0; | 1191 | ret = 0; |
| 1192 | spliced = 0; | 1192 | spliced = 0; |
| 1193 | 1193 | ||
| 1194 | while (len) { | 1194 | while (len && !spliced) { |
| 1195 | ret = subbuf_splice_actor(in, ppos, pipe, len, flags, &nonpad_ret); | 1195 | ret = subbuf_splice_actor(in, ppos, pipe, len, flags, &nonpad_ret); |
| 1196 | if (ret < 0) | 1196 | if (ret < 0) |
| 1197 | break; | 1197 | break; |
diff --git a/kernel/sched.c b/kernel/sched.c index e2e985eeee78..70cb127e3495 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
| @@ -137,7 +137,7 @@ static inline void sg_inc_cpu_power(struct sched_group *sg, u32 val) | |||
| 137 | 137 | ||
| 138 | static inline int rt_policy(int policy) | 138 | static inline int rt_policy(int policy) |
| 139 | { | 139 | { |
| 140 | if (unlikely(policy == SCHED_FIFO) || unlikely(policy == SCHED_RR)) | 140 | if (unlikely(policy == SCHED_FIFO || policy == SCHED_RR)) |
| 141 | return 1; | 141 | return 1; |
| 142 | return 0; | 142 | return 0; |
| 143 | } | 143 | } |
| @@ -313,12 +313,15 @@ static DEFINE_SPINLOCK(task_group_lock); | |||
| 313 | #endif | 313 | #endif |
| 314 | 314 | ||
| 315 | /* | 315 | /* |
| 316 | * A weight of 0, 1 or ULONG_MAX can cause arithmetics problems. | 316 | * A weight of 0 or 1 can cause arithmetics problems. |
| 317 | * A weight of a cfs_rq is the sum of weights of which entities | ||
| 318 | * are queued on this cfs_rq, so a weight of a entity should not be | ||
| 319 | * too large, so as the shares value of a task group. | ||
| 317 | * (The default weight is 1024 - so there's no practical | 320 | * (The default weight is 1024 - so there's no practical |
| 318 | * limitation from this.) | 321 | * limitation from this.) |
| 319 | */ | 322 | */ |
| 320 | #define MIN_SHARES 2 | 323 | #define MIN_SHARES 2 |
| 321 | #define MAX_SHARES (ULONG_MAX - 1) | 324 | #define MAX_SHARES (1UL << 18) |
| 322 | 325 | ||
| 323 | static int init_task_group_load = INIT_TASK_GROUP_LOAD; | 326 | static int init_task_group_load = INIT_TASK_GROUP_LOAD; |
| 324 | #endif | 327 | #endif |
| @@ -399,43 +402,6 @@ struct cfs_rq { | |||
| 399 | */ | 402 | */ |
| 400 | struct list_head leaf_cfs_rq_list; | 403 | struct list_head leaf_cfs_rq_list; |
| 401 | struct task_group *tg; /* group that "owns" this runqueue */ | 404 | struct task_group *tg; /* group that "owns" this runqueue */ |
| 402 | |||
| 403 | #ifdef CONFIG_SMP | ||
| 404 | unsigned long task_weight; | ||
| 405 | unsigned long shares; | ||
| 406 | /* | ||
| 407 | * We need space to build a sched_domain wide view of the full task | ||
| 408 | * group tree, in order to avoid depending on dynamic memory allocation | ||
| 409 | * during the load balancing we place this in the per cpu task group | ||
| 410 | * hierarchy. This limits the load balancing to one instance per cpu, | ||
| 411 | * but more should not be needed anyway. | ||
| 412 | */ | ||
| 413 | struct aggregate_struct { | ||
| 414 | /* | ||
| 415 | * load = weight(cpus) * f(tg) | ||
| 416 | * | ||
| 417 | * Where f(tg) is the recursive weight fraction assigned to | ||
| 418 | * this group. | ||
| 419 | */ | ||
| 420 | unsigned long load; | ||
| 421 | |||
| 422 | /* | ||
| 423 | * part of the group weight distributed to this span. | ||
| 424 | */ | ||
| 425 | unsigned long shares; | ||
| 426 | |||
| 427 | /* | ||
| 428 | * The sum of all runqueue weights within this span. | ||
| 429 | */ | ||
| 430 | unsigned long rq_weight; | ||
| 431 | |||
| 432 | /* | ||
| 433 | * Weight contributed by tasks; this is the part we can | ||
| 434 | * influence by moving tasks around. | ||
| 435 | */ | ||
| 436 | unsigned long task_weight; | ||
| 437 | } aggregate; | ||
| 438 | #endif | ||
| 439 | #endif | 405 | #endif |
| 440 | }; | 406 | }; |
| 441 | 407 | ||
| @@ -1180,6 +1146,7 @@ static enum hrtimer_restart hrtick(struct hrtimer *timer) | |||
| 1180 | return HRTIMER_NORESTART; | 1146 | return HRTIMER_NORESTART; |
| 1181 | } | 1147 | } |
| 1182 | 1148 | ||
| 1149 | #ifdef CONFIG_SMP | ||
| 1183 | static void hotplug_hrtick_disable(int cpu) | 1150 | static void hotplug_hrtick_disable(int cpu) |
| 1184 | { | 1151 | { |
| 1185 | struct rq *rq = cpu_rq(cpu); | 1152 | struct rq *rq = cpu_rq(cpu); |
| @@ -1235,6 +1202,7 @@ static void init_hrtick(void) | |||
| 1235 | { | 1202 | { |
| 1236 | hotcpu_notifier(hotplug_hrtick, 0); | 1203 | hotcpu_notifier(hotplug_hrtick, 0); |
| 1237 | } | 1204 | } |
| 1205 | #endif /* CONFIG_SMP */ | ||
| 1238 | 1206 | ||
| 1239 | static void init_rq_hrtick(struct rq *rq) | 1207 | static void init_rq_hrtick(struct rq *rq) |
| 1240 | { | 1208 | { |
| @@ -1387,17 +1355,19 @@ static void __resched_task(struct task_struct *p, int tif_bit) | |||
| 1387 | */ | 1355 | */ |
| 1388 | #define SRR(x, y) (((x) + (1UL << ((y) - 1))) >> (y)) | 1356 | #define SRR(x, y) (((x) + (1UL << ((y) - 1))) >> (y)) |
| 1389 | 1357 | ||
| 1390 | /* | ||
| 1391 | * delta *= weight / lw | ||
| 1392 | */ | ||
| 1393 | static unsigned long | 1358 | static unsigned long |
| 1394 | calc_delta_mine(unsigned long delta_exec, unsigned long weight, | 1359 | calc_delta_mine(unsigned long delta_exec, unsigned long weight, |
| 1395 | struct load_weight *lw) | 1360 | struct load_weight *lw) |
| 1396 | { | 1361 | { |
| 1397 | u64 tmp; | 1362 | u64 tmp; |
| 1398 | 1363 | ||
| 1399 | if (!lw->inv_weight) | 1364 | if (!lw->inv_weight) { |
| 1400 | lw->inv_weight = 1 + (WMULT_CONST-lw->weight/2)/(lw->weight+1); | 1365 | if (BITS_PER_LONG > 32 && unlikely(lw->weight >= WMULT_CONST)) |
| 1366 | lw->inv_weight = 1; | ||
| 1367 | else | ||
| 1368 | lw->inv_weight = 1 + (WMULT_CONST-lw->weight/2) | ||
| 1369 | / (lw->weight+1); | ||
| 1370 | } | ||
| 1401 | 1371 | ||
| 1402 | tmp = (u64)delta_exec * weight; | 1372 | tmp = (u64)delta_exec * weight; |
| 1403 | /* | 1373 | /* |
| @@ -1412,6 +1382,12 @@ calc_delta_mine(unsigned long delta_exec, unsigned long weight, | |||
| 1412 | return (unsigned long)min(tmp, (u64)(unsigned long)LONG_MAX); | 1382 | return (unsigned long)min(tmp, (u64)(unsigned long)LONG_MAX); |
| 1413 | } | 1383 | } |
| 1414 | 1384 | ||
| 1385 | static inline unsigned long | ||
| 1386 | calc_delta_fair(unsigned long delta_exec, struct load_weight *lw) | ||
| 1387 | { | ||
| 1388 | return calc_delta_mine(delta_exec, NICE_0_LOAD, lw); | ||
| 1389 | } | ||
| 1390 | |||
| 1415 | static inline void update_load_add(struct load_weight *lw, unsigned long inc) | 1391 | static inline void update_load_add(struct load_weight *lw, unsigned long inc) |
| 1416 | { | 1392 | { |
| 1417 | lw->weight += inc; | 1393 | lw->weight += inc; |
| @@ -1524,326 +1500,6 @@ static unsigned long source_load(int cpu, int type); | |||
| 1524 | static unsigned long target_load(int cpu, int type); | 1500 | static unsigned long target_load(int cpu, int type); |
| 1525 | static unsigned long cpu_avg_load_per_task(int cpu); | 1501 | static unsigned long cpu_avg_load_per_task(int cpu); |
| 1526 | static int task_hot(struct task_struct *p, u64 now, struct sched_domain *sd); | 1502 | static int task_hot(struct task_struct *p, u64 now, struct sched_domain *sd); |
| 1527 | |||
| 1528 | #ifdef CONFIG_FAIR_GROUP_SCHED | ||
| 1529 | |||
| 1530 | /* | ||
| 1531 | * Group load balancing. | ||
| 1532 | * | ||
| 1533 | * We calculate a few balance domain wide aggregate numbers; load and weight. | ||
| 1534 | * Given the pictures below, and assuming each item has equal weight: | ||
| 1535 | * | ||
| 1536 | * root 1 - thread | ||
| 1537 | * / | \ A - group | ||
| 1538 | * A 1 B | ||
| 1539 | * /|\ / \ | ||
| 1540 | * C 2 D 3 4 | ||
| 1541 | * | | | ||
| 1542 | * 5 6 | ||
| 1543 | * | ||
| 1544 | * load: | ||
| 1545 | * A and B get 1/3-rd of the total load. C and D get 1/3-rd of A's 1/3-rd, | ||
| 1546 | * which equals 1/9-th of the total load. | ||
| 1547 | * | ||
| 1548 | * shares: | ||
| 1549 | * The weight of this group on the selected cpus. | ||
| 1550 | * | ||
| 1551 | * rq_weight: | ||
| 1552 | * Direct sum of all the cpu's their rq weight, e.g. A would get 3 while | ||
| 1553 | * B would get 2. | ||
| 1554 | * | ||
| 1555 | * task_weight: | ||
| 1556 | * Part of the rq_weight contributed by tasks; all groups except B would | ||
| 1557 | * get 1, B gets 2. | ||
| 1558 | */ | ||
| 1559 | |||
| 1560 | static inline struct aggregate_struct * | ||
| 1561 | aggregate(struct task_group *tg, struct sched_domain *sd) | ||
| 1562 | { | ||
| 1563 | return &tg->cfs_rq[sd->first_cpu]->aggregate; | ||
| 1564 | } | ||
| 1565 | |||
| 1566 | typedef void (*aggregate_func)(struct task_group *, struct sched_domain *); | ||
| 1567 | |||
| 1568 | /* | ||
| 1569 | * Iterate the full tree, calling @down when first entering a node and @up when | ||
| 1570 | * leaving it for the final time. | ||
| 1571 | */ | ||
| 1572 | static | ||
| 1573 | void aggregate_walk_tree(aggregate_func down, aggregate_func up, | ||
| 1574 | struct sched_domain *sd) | ||
| 1575 | { | ||
| 1576 | struct task_group *parent, *child; | ||
| 1577 | |||
| 1578 | rcu_read_lock(); | ||
| 1579 | parent = &root_task_group; | ||
| 1580 | down: | ||
| 1581 | (*down)(parent, sd); | ||
| 1582 | list_for_each_entry_rcu(child, &parent->children, siblings) { | ||
| 1583 | parent = child; | ||
| 1584 | goto down; | ||
| 1585 | |||
| 1586 | up: | ||
| 1587 | continue; | ||
| 1588 | } | ||
| 1589 | (*up)(parent, sd); | ||
| 1590 | |||
| 1591 | child = parent; | ||
| 1592 | parent = parent->parent; | ||
| 1593 | if (parent) | ||
| 1594 | goto up; | ||
| 1595 | rcu_read_unlock(); | ||
| 1596 | } | ||
| 1597 | |||
| 1598 | /* | ||
| 1599 | * Calculate the aggregate runqueue weight. | ||
| 1600 | */ | ||
| 1601 | static | ||
| 1602 | void aggregate_group_weight(struct task_group *tg, struct sched_domain *sd) | ||
| 1603 | { | ||
| 1604 | unsigned long rq_weight = 0; | ||
| 1605 | unsigned long task_weight = 0; | ||
| 1606 | int i; | ||
| 1607 | |||
| 1608 | for_each_cpu_mask(i, sd->span) { | ||
| 1609 | rq_weight += tg->cfs_rq[i]->load.weight; | ||
| 1610 | task_weight += tg->cfs_rq[i]->task_weight; | ||
| 1611 | } | ||
| 1612 | |||
| 1613 | aggregate(tg, sd)->rq_weight = rq_weight; | ||
| 1614 | aggregate(tg, sd)->task_weight = task_weight; | ||
| 1615 | } | ||
| 1616 | |||
| 1617 | /* | ||
| 1618 | * Compute the weight of this group on the given cpus. | ||
| 1619 | */ | ||
| 1620 | static | ||
| 1621 | void aggregate_group_shares(struct task_group *tg, struct sched_domain *sd) | ||
| 1622 | { | ||
| 1623 | unsigned long shares = 0; | ||
| 1624 | int i; | ||
| 1625 | |||
| 1626 | for_each_cpu_mask(i, sd->span) | ||
| 1627 | shares += tg->cfs_rq[i]->shares; | ||
| 1628 | |||
| 1629 | if ((!shares && aggregate(tg, sd)->rq_weight) || shares > tg->shares) | ||
| 1630 | shares = tg->shares; | ||
| 1631 | |||
| 1632 | aggregate(tg, sd)->shares = shares; | ||
| 1633 | } | ||
| 1634 | |||
| 1635 | /* | ||
| 1636 | * Compute the load fraction assigned to this group, relies on the aggregate | ||
| 1637 | * weight and this group's parent's load, i.e. top-down. | ||
| 1638 | */ | ||
| 1639 | static | ||
| 1640 | void aggregate_group_load(struct task_group *tg, struct sched_domain *sd) | ||
| 1641 | { | ||
| 1642 | unsigned long load; | ||
| 1643 | |||
| 1644 | if (!tg->parent) { | ||
| 1645 | int i; | ||
| 1646 | |||
| 1647 | load = 0; | ||
| 1648 | for_each_cpu_mask(i, sd->span) | ||
| 1649 | load += cpu_rq(i)->load.weight; | ||
| 1650 | |||
| 1651 | } else { | ||
| 1652 | load = aggregate(tg->parent, sd)->load; | ||
| 1653 | |||
| 1654 | /* | ||
| 1655 | * shares is our weight in the parent's rq so | ||
| 1656 | * shares/parent->rq_weight gives our fraction of the load | ||
| 1657 | */ | ||
| 1658 | load *= aggregate(tg, sd)->shares; | ||
| 1659 | load /= aggregate(tg->parent, sd)->rq_weight + 1; | ||
| 1660 | } | ||
| 1661 | |||
| 1662 | aggregate(tg, sd)->load = load; | ||
| 1663 | } | ||
| 1664 | |||
| 1665 | static void __set_se_shares(struct sched_entity *se, unsigned long shares); | ||
| 1666 | |||
| 1667 | /* | ||
| 1668 | * Calculate and set the cpu's group shares. | ||
| 1669 | */ | ||
| 1670 | static void | ||
| 1671 | __update_group_shares_cpu(struct task_group *tg, struct sched_domain *sd, | ||
| 1672 | int tcpu) | ||
| 1673 | { | ||
| 1674 | int boost = 0; | ||
| 1675 | unsigned long shares; | ||
| 1676 | unsigned long rq_weight; | ||
| 1677 | |||
| 1678 | if (!tg->se[tcpu]) | ||
| 1679 | return; | ||
| 1680 | |||
| 1681 | rq_weight = tg->cfs_rq[tcpu]->load.weight; | ||
| 1682 | |||
| 1683 | /* | ||
| 1684 | * If there are currently no tasks on the cpu pretend there is one of | ||
| 1685 | * average load so that when a new task gets to run here it will not | ||
| 1686 | * get delayed by group starvation. | ||
| 1687 | */ | ||
| 1688 | if (!rq_weight) { | ||
| 1689 | boost = 1; | ||
| 1690 | rq_weight = NICE_0_LOAD; | ||
| 1691 | } | ||
| 1692 | |||
| 1693 | /* | ||
| 1694 | * \Sum shares * rq_weight | ||
| 1695 | * shares = ----------------------- | ||
| 1696 | * \Sum rq_weight | ||
| 1697 | * | ||
| 1698 | */ | ||
| 1699 | shares = aggregate(tg, sd)->shares * rq_weight; | ||
| 1700 | shares /= aggregate(tg, sd)->rq_weight + 1; | ||
| 1701 | |||
| 1702 | /* | ||
| 1703 | * record the actual number of shares, not the boosted amount. | ||
| 1704 | */ | ||
| 1705 | tg->cfs_rq[tcpu]->shares = boost ? 0 : shares; | ||
| 1706 | |||
| 1707 | if (shares < MIN_SHARES) | ||
| 1708 | shares = MIN_SHARES; | ||
| 1709 | else if (shares > MAX_SHARES) | ||
| 1710 | shares = MAX_SHARES; | ||
| 1711 | |||
| 1712 | __set_se_shares(tg->se[tcpu], shares); | ||
| 1713 | } | ||
| 1714 | |||
| 1715 | /* | ||
| 1716 | * Re-adjust the weights on the cpu the task came from and on the cpu the | ||
| 1717 | * task went to. | ||
| 1718 | */ | ||
| 1719 | static void | ||
| 1720 | __move_group_shares(struct task_group *tg, struct sched_domain *sd, | ||
| 1721 | int scpu, int dcpu) | ||
| 1722 | { | ||
| 1723 | unsigned long shares; | ||
| 1724 | |||
| 1725 | shares = tg->cfs_rq[scpu]->shares + tg->cfs_rq[dcpu]->shares; | ||
| 1726 | |||
| 1727 | __update_group_shares_cpu(tg, sd, scpu); | ||
| 1728 | __update_group_shares_cpu(tg, sd, dcpu); | ||
| 1729 | |||
| 1730 | /* | ||
| 1731 | * ensure we never loose shares due to rounding errors in the | ||
| 1732 | * above redistribution. | ||
| 1733 | */ | ||
| 1734 | shares -= tg->cfs_rq[scpu]->shares + tg->cfs_rq[dcpu]->shares; | ||
| 1735 | if (shares) | ||
| 1736 | tg->cfs_rq[dcpu]->shares += shares; | ||
| 1737 | } | ||
| 1738 | |||
| 1739 | /* | ||
| 1740 | * Because changing a group's shares changes the weight of the super-group | ||
| 1741 | * we need to walk up the tree and change all shares until we hit the root. | ||
| 1742 | */ | ||
| 1743 | static void | ||
| 1744 | move_group_shares(struct task_group *tg, struct sched_domain *sd, | ||
| 1745 | int scpu, int dcpu) | ||
| 1746 | { | ||
| 1747 | while (tg) { | ||
| 1748 | __move_group_shares(tg, sd, scpu, dcpu); | ||
| 1749 | tg = tg->parent; | ||
| 1750 | } | ||
| 1751 | } | ||
| 1752 | |||
| 1753 | static | ||
| 1754 | void aggregate_group_set_shares(struct task_group *tg, struct sched_domain *sd) | ||
| 1755 | { | ||
| 1756 | unsigned long shares = aggregate(tg, sd)->shares; | ||
| 1757 | int i; | ||
| 1758 | |||
| 1759 | for_each_cpu_mask(i, sd->span) { | ||
| 1760 | struct rq *rq = cpu_rq(i); | ||
| 1761 | unsigned long flags; | ||
| 1762 | |||
| 1763 | spin_lock_irqsave(&rq->lock, flags); | ||
| 1764 | __update_group_shares_cpu(tg, sd, i); | ||
| 1765 | spin_unlock_irqrestore(&rq->lock, flags); | ||
| 1766 | } | ||
| 1767 | |||
| 1768 | aggregate_group_shares(tg, sd); | ||
| 1769 | |||
| 1770 | /* | ||
| 1771 | * ensure we never loose shares due to rounding errors in the | ||
| 1772 | * above redistribution. | ||
| 1773 | */ | ||
| 1774 | shares -= aggregate(tg, sd)->shares; | ||
| 1775 | if (shares) { | ||
| 1776 | tg->cfs_rq[sd->first_cpu]->shares += shares; | ||
| 1777 | aggregate(tg, sd)->shares += shares; | ||
| 1778 | } | ||
| 1779 | } | ||
| 1780 | |||
| 1781 | /* | ||
| 1782 | * Calculate the accumulative weight and recursive load of each task group | ||
| 1783 | * while walking down the tree. | ||
| 1784 | */ | ||
| 1785 | static | ||
| 1786 | void aggregate_get_down(struct task_group *tg, struct sched_domain *sd) | ||
| 1787 | { | ||
| 1788 | aggregate_group_weight(tg, sd); | ||
| 1789 | aggregate_group_shares(tg, sd); | ||
| 1790 | aggregate_group_load(tg, sd); | ||
| 1791 | } | ||
| 1792 | |||
| 1793 | /* | ||
| 1794 | * Rebalance the cpu shares while walking back up the tree. | ||
| 1795 | */ | ||
| 1796 | static | ||
| 1797 | void aggregate_get_up(struct task_group *tg, struct sched_domain *sd) | ||
| 1798 | { | ||
| 1799 | aggregate_group_set_shares(tg, sd); | ||
| 1800 | } | ||
| 1801 | |||
| 1802 | static DEFINE_PER_CPU(spinlock_t, aggregate_lock); | ||
| 1803 | |||
| 1804 | static void __init init_aggregate(void) | ||
| 1805 | { | ||
| 1806 | int i; | ||
| 1807 | |||
| 1808 | for_each_possible_cpu(i) | ||
| 1809 | spin_lock_init(&per_cpu(aggregate_lock, i)); | ||
| 1810 | } | ||
| 1811 | |||
| 1812 | static int get_aggregate(struct sched_domain *sd) | ||
| 1813 | { | ||
| 1814 | if (!spin_trylock(&per_cpu(aggregate_lock, sd->first_cpu))) | ||
| 1815 | return 0; | ||
| 1816 | |||
| 1817 | aggregate_walk_tree(aggregate_get_down, aggregate_get_up, sd); | ||
| 1818 | return 1; | ||
| 1819 | } | ||
| 1820 | |||
| 1821 | static void put_aggregate(struct sched_domain *sd) | ||
| 1822 | { | ||
| 1823 | spin_unlock(&per_cpu(aggregate_lock, sd->first_cpu)); | ||
| 1824 | } | ||
| 1825 | |||
| 1826 | static void cfs_rq_set_shares(struct cfs_rq *cfs_rq, unsigned long shares) | ||
| 1827 | { | ||
| 1828 | cfs_rq->shares = shares; | ||
| 1829 | } | ||
| 1830 | |||
| 1831 | #else | ||
| 1832 | |||
| 1833 | static inline void init_aggregate(void) | ||
| 1834 | { | ||
| 1835 | } | ||
| 1836 | |||
| 1837 | static inline int get_aggregate(struct sched_domain *sd) | ||
| 1838 | { | ||
| 1839 | return 0; | ||
| 1840 | } | ||
| 1841 | |||
| 1842 | static inline void put_aggregate(struct sched_domain *sd) | ||
| 1843 | { | ||
| 1844 | } | ||
| 1845 | #endif | ||
| 1846 | |||
| 1847 | #else /* CONFIG_SMP */ | 1503 | #else /* CONFIG_SMP */ |
| 1848 | 1504 | ||
| 1849 | #ifdef CONFIG_FAIR_GROUP_SCHED | 1505 | #ifdef CONFIG_FAIR_GROUP_SCHED |
| @@ -1864,14 +1520,26 @@ static void cfs_rq_set_shares(struct cfs_rq *cfs_rq, unsigned long shares) | |||
| 1864 | 1520 | ||
| 1865 | #define sched_class_highest (&rt_sched_class) | 1521 | #define sched_class_highest (&rt_sched_class) |
| 1866 | 1522 | ||
| 1867 | static void inc_nr_running(struct rq *rq) | 1523 | static inline void inc_load(struct rq *rq, const struct task_struct *p) |
| 1524 | { | ||
| 1525 | update_load_add(&rq->load, p->se.load.weight); | ||
| 1526 | } | ||
| 1527 | |||
| 1528 | static inline void dec_load(struct rq *rq, const struct task_struct *p) | ||
| 1529 | { | ||
| 1530 | update_load_sub(&rq->load, p->se.load.weight); | ||
| 1531 | } | ||
| 1532 | |||
| 1533 | static void inc_nr_running(struct task_struct *p, struct rq *rq) | ||
| 1868 | { | 1534 | { |
| 1869 | rq->nr_running++; | 1535 | rq->nr_running++; |
| 1536 | inc_load(rq, p); | ||
| 1870 | } | 1537 | } |
| 1871 | 1538 | ||
| 1872 | static void dec_nr_running(struct rq *rq) | 1539 | static void dec_nr_running(struct task_struct *p, struct rq *rq) |
| 1873 | { | 1540 | { |
| 1874 | rq->nr_running--; | 1541 | rq->nr_running--; |
| 1542 | dec_load(rq, p); | ||
| 1875 | } | 1543 | } |
| 1876 | 1544 | ||
| 1877 | static void set_load_weight(struct task_struct *p) | 1545 | static void set_load_weight(struct task_struct *p) |
| @@ -1963,7 +1631,7 @@ static void activate_task(struct rq *rq, struct task_struct *p, int wakeup) | |||
| 1963 | rq->nr_uninterruptible--; | 1631 | rq->nr_uninterruptible--; |
| 1964 | 1632 | ||
| 1965 | enqueue_task(rq, p, wakeup); | 1633 | enqueue_task(rq, p, wakeup); |
| 1966 | inc_nr_running(rq); | 1634 | inc_nr_running(p, rq); |
| 1967 | } | 1635 | } |
| 1968 | 1636 | ||
| 1969 | /* | 1637 | /* |
| @@ -1975,7 +1643,7 @@ static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep) | |||
| 1975 | rq->nr_uninterruptible++; | 1643 | rq->nr_uninterruptible++; |
| 1976 | 1644 | ||
| 1977 | dequeue_task(rq, p, sleep); | 1645 | dequeue_task(rq, p, sleep); |
| 1978 | dec_nr_running(rq); | 1646 | dec_nr_running(p, rq); |
| 1979 | } | 1647 | } |
| 1980 | 1648 | ||
| 1981 | /** | 1649 | /** |
| @@ -2631,7 +2299,7 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags) | |||
| 2631 | * management (if any): | 2299 | * management (if any): |
| 2632 | */ | 2300 | */ |
| 2633 | p->sched_class->task_new(rq, p); | 2301 | p->sched_class->task_new(rq, p); |
| 2634 | inc_nr_running(rq); | 2302 | inc_nr_running(p, rq); |
| 2635 | } | 2303 | } |
| 2636 | trace_mark(kernel_sched_wakeup_new, | 2304 | trace_mark(kernel_sched_wakeup_new, |
| 2637 | "pid %d state %ld ## rq %p task %p rq->curr %p", | 2305 | "pid %d state %ld ## rq %p task %p rq->curr %p", |
| @@ -3630,12 +3298,9 @@ static int load_balance(int this_cpu, struct rq *this_rq, | |||
| 3630 | unsigned long imbalance; | 3298 | unsigned long imbalance; |
| 3631 | struct rq *busiest; | 3299 | struct rq *busiest; |
| 3632 | unsigned long flags; | 3300 | unsigned long flags; |
| 3633 | int unlock_aggregate; | ||
| 3634 | 3301 | ||
| 3635 | cpus_setall(*cpus); | 3302 | cpus_setall(*cpus); |
| 3636 | 3303 | ||
| 3637 | unlock_aggregate = get_aggregate(sd); | ||
| 3638 | |||
| 3639 | /* | 3304 | /* |
| 3640 | * When power savings policy is enabled for the parent domain, idle | 3305 | * When power savings policy is enabled for the parent domain, idle |
| 3641 | * sibling can pick up load irrespective of busy siblings. In this case, | 3306 | * sibling can pick up load irrespective of busy siblings. In this case, |
| @@ -3751,9 +3416,8 @@ redo: | |||
| 3751 | 3416 | ||
| 3752 | if (!ld_moved && !sd_idle && sd->flags & SD_SHARE_CPUPOWER && | 3417 | if (!ld_moved && !sd_idle && sd->flags & SD_SHARE_CPUPOWER && |
| 3753 | !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE)) | 3418 | !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE)) |
| 3754 | ld_moved = -1; | 3419 | return -1; |
| 3755 | 3420 | return ld_moved; | |
| 3756 | goto out; | ||
| 3757 | 3421 | ||
| 3758 | out_balanced: | 3422 | out_balanced: |
| 3759 | schedstat_inc(sd, lb_balanced[idle]); | 3423 | schedstat_inc(sd, lb_balanced[idle]); |
| @@ -3768,13 +3432,8 @@ out_one_pinned: | |||
| 3768 | 3432 | ||
| 3769 | if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER && | 3433 | if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER && |
| 3770 | !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE)) | 3434 | !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE)) |
| 3771 | ld_moved = -1; | 3435 | return -1; |
| 3772 | else | 3436 | return 0; |
| 3773 | ld_moved = 0; | ||
| 3774 | out: | ||
| 3775 | if (unlock_aggregate) | ||
| 3776 | put_aggregate(sd); | ||
| 3777 | return ld_moved; | ||
| 3778 | } | 3437 | } |
| 3779 | 3438 | ||
| 3780 | /* | 3439 | /* |
| @@ -4481,7 +4140,7 @@ static inline void schedule_debug(struct task_struct *prev) | |||
| 4481 | * schedule() atomically, we ignore that path for now. | 4140 | * schedule() atomically, we ignore that path for now. |
| 4482 | * Otherwise, whine if we are scheduling when we should not be. | 4141 | * Otherwise, whine if we are scheduling when we should not be. |
| 4483 | */ | 4142 | */ |
| 4484 | if (unlikely(in_atomic_preempt_off()) && unlikely(!prev->exit_state)) | 4143 | if (unlikely(in_atomic_preempt_off() && !prev->exit_state)) |
| 4485 | __schedule_bug(prev); | 4144 | __schedule_bug(prev); |
| 4486 | 4145 | ||
| 4487 | profile_hit(SCHED_PROFILING, __builtin_return_address(0)); | 4146 | profile_hit(SCHED_PROFILING, __builtin_return_address(0)); |
| @@ -4561,12 +4220,10 @@ need_resched_nonpreemptible: | |||
| 4561 | clear_tsk_need_resched(prev); | 4220 | clear_tsk_need_resched(prev); |
| 4562 | 4221 | ||
| 4563 | if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) { | 4222 | if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) { |
| 4564 | if (unlikely((prev->state & TASK_INTERRUPTIBLE) && | 4223 | if (unlikely(signal_pending_state(prev->state, prev))) |
| 4565 | signal_pending(prev))) { | ||
| 4566 | prev->state = TASK_RUNNING; | 4224 | prev->state = TASK_RUNNING; |
| 4567 | } else { | 4225 | else |
| 4568 | deactivate_task(rq, prev, 1); | 4226 | deactivate_task(rq, prev, 1); |
| 4569 | } | ||
| 4570 | switch_count = &prev->nvcsw; | 4227 | switch_count = &prev->nvcsw; |
| 4571 | } | 4228 | } |
| 4572 | 4229 | ||
| @@ -4792,22 +4449,20 @@ do_wait_for_common(struct completion *x, long timeout, int state) | |||
| 4792 | signal_pending(current)) || | 4449 | signal_pending(current)) || |
| 4793 | (state == TASK_KILLABLE && | 4450 | (state == TASK_KILLABLE && |
| 4794 | fatal_signal_pending(current))) { | 4451 | fatal_signal_pending(current))) { |
| 4795 | __remove_wait_queue(&x->wait, &wait); | 4452 | timeout = -ERESTARTSYS; |
| 4796 | return -ERESTARTSYS; | 4453 | break; |
| 4797 | } | 4454 | } |
| 4798 | __set_current_state(state); | 4455 | __set_current_state(state); |
| 4799 | spin_unlock_irq(&x->wait.lock); | 4456 | spin_unlock_irq(&x->wait.lock); |
| 4800 | timeout = schedule_timeout(timeout); | 4457 | timeout = schedule_timeout(timeout); |
| 4801 | spin_lock_irq(&x->wait.lock); | 4458 | spin_lock_irq(&x->wait.lock); |
| 4802 | if (!timeout) { | 4459 | } while (!x->done && timeout); |
| 4803 | __remove_wait_queue(&x->wait, &wait); | ||
| 4804 | return timeout; | ||
| 4805 | } | ||
| 4806 | } while (!x->done); | ||
| 4807 | __remove_wait_queue(&x->wait, &wait); | 4460 | __remove_wait_queue(&x->wait, &wait); |
| 4461 | if (!x->done) | ||
| 4462 | return timeout; | ||
| 4808 | } | 4463 | } |
| 4809 | x->done--; | 4464 | x->done--; |
| 4810 | return timeout; | 4465 | return timeout ?: 1; |
| 4811 | } | 4466 | } |
| 4812 | 4467 | ||
| 4813 | static long __sched | 4468 | static long __sched |
| @@ -4982,8 +4637,10 @@ void set_user_nice(struct task_struct *p, long nice) | |||
| 4982 | goto out_unlock; | 4637 | goto out_unlock; |
| 4983 | } | 4638 | } |
| 4984 | on_rq = p->se.on_rq; | 4639 | on_rq = p->se.on_rq; |
| 4985 | if (on_rq) | 4640 | if (on_rq) { |
| 4986 | dequeue_task(rq, p, 0); | 4641 | dequeue_task(rq, p, 0); |
| 4642 | dec_load(rq, p); | ||
| 4643 | } | ||
| 4987 | 4644 | ||
| 4988 | p->static_prio = NICE_TO_PRIO(nice); | 4645 | p->static_prio = NICE_TO_PRIO(nice); |
| 4989 | set_load_weight(p); | 4646 | set_load_weight(p); |
| @@ -4993,6 +4650,7 @@ void set_user_nice(struct task_struct *p, long nice) | |||
| 4993 | 4650 | ||
| 4994 | if (on_rq) { | 4651 | if (on_rq) { |
| 4995 | enqueue_task(rq, p, 0); | 4652 | enqueue_task(rq, p, 0); |
| 4653 | inc_load(rq, p); | ||
| 4996 | /* | 4654 | /* |
| 4997 | * If the task increased its priority or is running and | 4655 | * If the task increased its priority or is running and |
| 4998 | * lowered its priority, then reschedule its CPU: | 4656 | * lowered its priority, then reschedule its CPU: |
| @@ -6280,6 +5938,7 @@ static void migrate_dead_tasks(unsigned int dead_cpu) | |||
| 6280 | next = pick_next_task(rq, rq->curr); | 5938 | next = pick_next_task(rq, rq->curr); |
| 6281 | if (!next) | 5939 | if (!next) |
| 6282 | break; | 5940 | break; |
| 5941 | next->sched_class->put_prev_task(rq, next); | ||
| 6283 | migrate_dead(dead_cpu, next); | 5942 | migrate_dead(dead_cpu, next); |
| 6284 | 5943 | ||
| 6285 | } | 5944 | } |
| @@ -7270,7 +6929,12 @@ static int default_relax_domain_level = -1; | |||
| 7270 | 6929 | ||
| 7271 | static int __init setup_relax_domain_level(char *str) | 6930 | static int __init setup_relax_domain_level(char *str) |
| 7272 | { | 6931 | { |
| 7273 | default_relax_domain_level = simple_strtoul(str, NULL, 0); | 6932 | unsigned long val; |
| 6933 | |||
| 6934 | val = simple_strtoul(str, NULL, 0); | ||
| 6935 | if (val < SD_LV_MAX) | ||
| 6936 | default_relax_domain_level = val; | ||
| 6937 | |||
| 7274 | return 1; | 6938 | return 1; |
| 7275 | } | 6939 | } |
| 7276 | __setup("relax_domain_level=", setup_relax_domain_level); | 6940 | __setup("relax_domain_level=", setup_relax_domain_level); |
| @@ -7367,7 +7031,6 @@ static int __build_sched_domains(const cpumask_t *cpu_map, | |||
| 7367 | SD_INIT(sd, ALLNODES); | 7031 | SD_INIT(sd, ALLNODES); |
| 7368 | set_domain_attribute(sd, attr); | 7032 | set_domain_attribute(sd, attr); |
| 7369 | sd->span = *cpu_map; | 7033 | sd->span = *cpu_map; |
| 7370 | sd->first_cpu = first_cpu(sd->span); | ||
| 7371 | cpu_to_allnodes_group(i, cpu_map, &sd->groups, tmpmask); | 7034 | cpu_to_allnodes_group(i, cpu_map, &sd->groups, tmpmask); |
| 7372 | p = sd; | 7035 | p = sd; |
| 7373 | sd_allnodes = 1; | 7036 | sd_allnodes = 1; |
| @@ -7378,7 +7041,6 @@ static int __build_sched_domains(const cpumask_t *cpu_map, | |||
| 7378 | SD_INIT(sd, NODE); | 7041 | SD_INIT(sd, NODE); |
| 7379 | set_domain_attribute(sd, attr); | 7042 | set_domain_attribute(sd, attr); |
| 7380 | sched_domain_node_span(cpu_to_node(i), &sd->span); | 7043 | sched_domain_node_span(cpu_to_node(i), &sd->span); |
| 7381 | sd->first_cpu = first_cpu(sd->span); | ||
| 7382 | sd->parent = p; | 7044 | sd->parent = p; |
| 7383 | if (p) | 7045 | if (p) |
| 7384 | p->child = sd; | 7046 | p->child = sd; |
| @@ -7390,7 +7052,6 @@ static int __build_sched_domains(const cpumask_t *cpu_map, | |||
| 7390 | SD_INIT(sd, CPU); | 7052 | SD_INIT(sd, CPU); |
| 7391 | set_domain_attribute(sd, attr); | 7053 | set_domain_attribute(sd, attr); |
| 7392 | sd->span = *nodemask; | 7054 | sd->span = *nodemask; |
| 7393 | sd->first_cpu = first_cpu(sd->span); | ||
| 7394 | sd->parent = p; | 7055 | sd->parent = p; |
| 7395 | if (p) | 7056 | if (p) |
| 7396 | p->child = sd; | 7057 | p->child = sd; |
| @@ -7402,7 +7063,6 @@ static int __build_sched_domains(const cpumask_t *cpu_map, | |||
| 7402 | SD_INIT(sd, MC); | 7063 | SD_INIT(sd, MC); |
| 7403 | set_domain_attribute(sd, attr); | 7064 | set_domain_attribute(sd, attr); |
| 7404 | sd->span = cpu_coregroup_map(i); | 7065 | sd->span = cpu_coregroup_map(i); |
| 7405 | sd->first_cpu = first_cpu(sd->span); | ||
| 7406 | cpus_and(sd->span, sd->span, *cpu_map); | 7066 | cpus_and(sd->span, sd->span, *cpu_map); |
| 7407 | sd->parent = p; | 7067 | sd->parent = p; |
| 7408 | p->child = sd; | 7068 | p->child = sd; |
| @@ -7415,7 +7075,6 @@ static int __build_sched_domains(const cpumask_t *cpu_map, | |||
| 7415 | SD_INIT(sd, SIBLING); | 7075 | SD_INIT(sd, SIBLING); |
| 7416 | set_domain_attribute(sd, attr); | 7076 | set_domain_attribute(sd, attr); |
| 7417 | sd->span = per_cpu(cpu_sibling_map, i); | 7077 | sd->span = per_cpu(cpu_sibling_map, i); |
| 7418 | sd->first_cpu = first_cpu(sd->span); | ||
| 7419 | cpus_and(sd->span, sd->span, *cpu_map); | 7078 | cpus_and(sd->span, sd->span, *cpu_map); |
| 7420 | sd->parent = p; | 7079 | sd->parent = p; |
| 7421 | p->child = sd; | 7080 | p->child = sd; |
| @@ -7619,8 +7278,8 @@ static int build_sched_domains(const cpumask_t *cpu_map) | |||
| 7619 | 7278 | ||
| 7620 | static cpumask_t *doms_cur; /* current sched domains */ | 7279 | static cpumask_t *doms_cur; /* current sched domains */ |
| 7621 | static int ndoms_cur; /* number of sched domains in 'doms_cur' */ | 7280 | static int ndoms_cur; /* number of sched domains in 'doms_cur' */ |
| 7622 | static struct sched_domain_attr *dattr_cur; /* attribues of custom domains | 7281 | static struct sched_domain_attr *dattr_cur; |
| 7623 | in 'doms_cur' */ | 7282 | /* attribues of custom domains in 'doms_cur' */ |
| 7624 | 7283 | ||
| 7625 | /* | 7284 | /* |
| 7626 | * Special case: If a kmalloc of a doms_cur partition (array of | 7285 | * Special case: If a kmalloc of a doms_cur partition (array of |
| @@ -7634,6 +7293,18 @@ void __attribute__((weak)) arch_update_cpu_topology(void) | |||
| 7634 | } | 7293 | } |
| 7635 | 7294 | ||
| 7636 | /* | 7295 | /* |
| 7296 | * Free current domain masks. | ||
| 7297 | * Called after all cpus are attached to NULL domain. | ||
| 7298 | */ | ||
| 7299 | static void free_sched_domains(void) | ||
| 7300 | { | ||
| 7301 | ndoms_cur = 0; | ||
| 7302 | if (doms_cur != &fallback_doms) | ||
| 7303 | kfree(doms_cur); | ||
| 7304 | doms_cur = &fallback_doms; | ||
| 7305 | } | ||
| 7306 | |||
| 7307 | /* | ||
| 7637 | * Set up scheduler domains and groups. Callers must hold the hotplug lock. | 7308 | * Set up scheduler domains and groups. Callers must hold the hotplug lock. |
| 7638 | * For now this just excludes isolated cpus, but could be used to | 7309 | * For now this just excludes isolated cpus, but could be used to |
| 7639 | * exclude other special cases in the future. | 7310 | * exclude other special cases in the future. |
| @@ -7780,6 +7451,7 @@ int arch_reinit_sched_domains(void) | |||
| 7780 | get_online_cpus(); | 7451 | get_online_cpus(); |
| 7781 | mutex_lock(&sched_domains_mutex); | 7452 | mutex_lock(&sched_domains_mutex); |
| 7782 | detach_destroy_domains(&cpu_online_map); | 7453 | detach_destroy_domains(&cpu_online_map); |
| 7454 | free_sched_domains(); | ||
| 7783 | err = arch_init_sched_domains(&cpu_online_map); | 7455 | err = arch_init_sched_domains(&cpu_online_map); |
| 7784 | mutex_unlock(&sched_domains_mutex); | 7456 | mutex_unlock(&sched_domains_mutex); |
| 7785 | put_online_cpus(); | 7457 | put_online_cpus(); |
| @@ -7865,6 +7537,7 @@ static int update_sched_domains(struct notifier_block *nfb, | |||
| 7865 | case CPU_DOWN_PREPARE: | 7537 | case CPU_DOWN_PREPARE: |
| 7866 | case CPU_DOWN_PREPARE_FROZEN: | 7538 | case CPU_DOWN_PREPARE_FROZEN: |
| 7867 | detach_destroy_domains(&cpu_online_map); | 7539 | detach_destroy_domains(&cpu_online_map); |
| 7540 | free_sched_domains(); | ||
| 7868 | return NOTIFY_OK; | 7541 | return NOTIFY_OK; |
| 7869 | 7542 | ||
| 7870 | case CPU_UP_CANCELED: | 7543 | case CPU_UP_CANCELED: |
| @@ -7883,8 +7556,16 @@ static int update_sched_domains(struct notifier_block *nfb, | |||
| 7883 | return NOTIFY_DONE; | 7556 | return NOTIFY_DONE; |
| 7884 | } | 7557 | } |
| 7885 | 7558 | ||
| 7559 | #ifndef CONFIG_CPUSETS | ||
| 7560 | /* | ||
| 7561 | * Create default domain partitioning if cpusets are disabled. | ||
| 7562 | * Otherwise we let cpusets rebuild the domains based on the | ||
| 7563 | * current setup. | ||
| 7564 | */ | ||
| 7565 | |||
| 7886 | /* The hotplug lock is already held by cpu_up/cpu_down */ | 7566 | /* The hotplug lock is already held by cpu_up/cpu_down */ |
| 7887 | arch_init_sched_domains(&cpu_online_map); | 7567 | arch_init_sched_domains(&cpu_online_map); |
| 7568 | #endif | ||
| 7888 | 7569 | ||
| 7889 | return NOTIFY_OK; | 7570 | return NOTIFY_OK; |
| 7890 | } | 7571 | } |
| @@ -8024,7 +7705,6 @@ static void init_tg_rt_entry(struct task_group *tg, struct rt_rq *rt_rq, | |||
| 8024 | else | 7705 | else |
| 8025 | rt_se->rt_rq = parent->my_q; | 7706 | rt_se->rt_rq = parent->my_q; |
| 8026 | 7707 | ||
| 8027 | rt_se->rt_rq = &rq->rt; | ||
| 8028 | rt_se->my_q = rt_rq; | 7708 | rt_se->my_q = rt_rq; |
| 8029 | rt_se->parent = parent; | 7709 | rt_se->parent = parent; |
| 8030 | INIT_LIST_HEAD(&rt_se->run_list); | 7710 | INIT_LIST_HEAD(&rt_se->run_list); |
| @@ -8085,7 +7765,6 @@ void __init sched_init(void) | |||
| 8085 | } | 7765 | } |
| 8086 | 7766 | ||
| 8087 | #ifdef CONFIG_SMP | 7767 | #ifdef CONFIG_SMP |
| 8088 | init_aggregate(); | ||
| 8089 | init_defrootdomain(); | 7768 | init_defrootdomain(); |
| 8090 | #endif | 7769 | #endif |
| 8091 | 7770 | ||
| @@ -8650,11 +8329,14 @@ void sched_move_task(struct task_struct *tsk) | |||
| 8650 | #endif | 8329 | #endif |
| 8651 | 8330 | ||
| 8652 | #ifdef CONFIG_FAIR_GROUP_SCHED | 8331 | #ifdef CONFIG_FAIR_GROUP_SCHED |
| 8653 | static void __set_se_shares(struct sched_entity *se, unsigned long shares) | 8332 | static void set_se_shares(struct sched_entity *se, unsigned long shares) |
| 8654 | { | 8333 | { |
| 8655 | struct cfs_rq *cfs_rq = se->cfs_rq; | 8334 | struct cfs_rq *cfs_rq = se->cfs_rq; |
| 8335 | struct rq *rq = cfs_rq->rq; | ||
| 8656 | int on_rq; | 8336 | int on_rq; |
| 8657 | 8337 | ||
| 8338 | spin_lock_irq(&rq->lock); | ||
| 8339 | |||
| 8658 | on_rq = se->on_rq; | 8340 | on_rq = se->on_rq; |
| 8659 | if (on_rq) | 8341 | if (on_rq) |
| 8660 | dequeue_entity(cfs_rq, se, 0); | 8342 | dequeue_entity(cfs_rq, se, 0); |
| @@ -8664,17 +8346,8 @@ static void __set_se_shares(struct sched_entity *se, unsigned long shares) | |||
| 8664 | 8346 | ||
| 8665 | if (on_rq) | 8347 | if (on_rq) |
| 8666 | enqueue_entity(cfs_rq, se, 0); | 8348 | enqueue_entity(cfs_rq, se, 0); |
| 8667 | } | ||
| 8668 | 8349 | ||
| 8669 | static void set_se_shares(struct sched_entity *se, unsigned long shares) | 8350 | spin_unlock_irq(&rq->lock); |
| 8670 | { | ||
| 8671 | struct cfs_rq *cfs_rq = se->cfs_rq; | ||
| 8672 | struct rq *rq = cfs_rq->rq; | ||
| 8673 | unsigned long flags; | ||
| 8674 | |||
| 8675 | spin_lock_irqsave(&rq->lock, flags); | ||
| 8676 | __set_se_shares(se, shares); | ||
| 8677 | spin_unlock_irqrestore(&rq->lock, flags); | ||
| 8678 | } | 8351 | } |
| 8679 | 8352 | ||
| 8680 | static DEFINE_MUTEX(shares_mutex); | 8353 | static DEFINE_MUTEX(shares_mutex); |
| @@ -8713,13 +8386,8 @@ int sched_group_set_shares(struct task_group *tg, unsigned long shares) | |||
| 8713 | * w/o tripping rebalance_share or load_balance_fair. | 8386 | * w/o tripping rebalance_share or load_balance_fair. |
| 8714 | */ | 8387 | */ |
| 8715 | tg->shares = shares; | 8388 | tg->shares = shares; |
| 8716 | for_each_possible_cpu(i) { | 8389 | for_each_possible_cpu(i) |
| 8717 | /* | ||
| 8718 | * force a rebalance | ||
| 8719 | */ | ||
| 8720 | cfs_rq_set_shares(tg->cfs_rq[i], 0); | ||
| 8721 | set_se_shares(tg->se[i], shares); | 8390 | set_se_shares(tg->se[i], shares); |
| 8722 | } | ||
| 8723 | 8391 | ||
| 8724 | /* | 8392 | /* |
| 8725 | * Enable load balance activity on this group, by inserting it back on | 8393 | * Enable load balance activity on this group, by inserting it back on |
| @@ -8758,7 +8426,7 @@ static unsigned long to_ratio(u64 period, u64 runtime) | |||
| 8758 | #ifdef CONFIG_CGROUP_SCHED | 8426 | #ifdef CONFIG_CGROUP_SCHED |
| 8759 | static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime) | 8427 | static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime) |
| 8760 | { | 8428 | { |
| 8761 | struct task_group *tgi, *parent = tg->parent; | 8429 | struct task_group *tgi, *parent = tg ? tg->parent : NULL; |
| 8762 | unsigned long total = 0; | 8430 | unsigned long total = 0; |
| 8763 | 8431 | ||
| 8764 | if (!parent) { | 8432 | if (!parent) { |
| @@ -8885,6 +8553,9 @@ int sched_group_set_rt_period(struct task_group *tg, long rt_period_us) | |||
| 8885 | rt_period = (u64)rt_period_us * NSEC_PER_USEC; | 8553 | rt_period = (u64)rt_period_us * NSEC_PER_USEC; |
| 8886 | rt_runtime = tg->rt_bandwidth.rt_runtime; | 8554 | rt_runtime = tg->rt_bandwidth.rt_runtime; |
| 8887 | 8555 | ||
| 8556 | if (rt_period == 0) | ||
| 8557 | return -EINVAL; | ||
| 8558 | |||
| 8888 | return tg_set_bandwidth(tg, rt_period, rt_runtime); | 8559 | return tg_set_bandwidth(tg, rt_period, rt_runtime); |
| 8889 | } | 8560 | } |
| 8890 | 8561 | ||
diff --git a/kernel/sched_clock.c b/kernel/sched_clock.c index 9c597e37f7de..ce05271219ab 100644 --- a/kernel/sched_clock.c +++ b/kernel/sched_clock.c | |||
| @@ -59,22 +59,26 @@ static inline struct sched_clock_data *cpu_sdc(int cpu) | |||
| 59 | return &per_cpu(sched_clock_data, cpu); | 59 | return &per_cpu(sched_clock_data, cpu); |
| 60 | } | 60 | } |
| 61 | 61 | ||
| 62 | static __read_mostly int sched_clock_running; | ||
| 63 | |||
| 62 | void sched_clock_init(void) | 64 | void sched_clock_init(void) |
| 63 | { | 65 | { |
| 64 | u64 ktime_now = ktime_to_ns(ktime_get()); | 66 | u64 ktime_now = ktime_to_ns(ktime_get()); |
| 65 | u64 now = 0; | 67 | unsigned long now_jiffies = jiffies; |
| 66 | int cpu; | 68 | int cpu; |
| 67 | 69 | ||
| 68 | for_each_possible_cpu(cpu) { | 70 | for_each_possible_cpu(cpu) { |
| 69 | struct sched_clock_data *scd = cpu_sdc(cpu); | 71 | struct sched_clock_data *scd = cpu_sdc(cpu); |
| 70 | 72 | ||
| 71 | scd->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; | 73 | scd->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; |
| 72 | scd->prev_jiffies = jiffies; | 74 | scd->prev_jiffies = now_jiffies; |
| 73 | scd->prev_raw = now; | 75 | scd->prev_raw = 0; |
| 74 | scd->tick_raw = now; | 76 | scd->tick_raw = 0; |
| 75 | scd->tick_gtod = ktime_now; | 77 | scd->tick_gtod = ktime_now; |
| 76 | scd->clock = ktime_now; | 78 | scd->clock = ktime_now; |
| 77 | } | 79 | } |
| 80 | |||
| 81 | sched_clock_running = 1; | ||
| 78 | } | 82 | } |
| 79 | 83 | ||
| 80 | /* | 84 | /* |
| @@ -136,6 +140,9 @@ u64 sched_clock_cpu(int cpu) | |||
| 136 | struct sched_clock_data *scd = cpu_sdc(cpu); | 140 | struct sched_clock_data *scd = cpu_sdc(cpu); |
| 137 | u64 now, clock; | 141 | u64 now, clock; |
| 138 | 142 | ||
| 143 | if (unlikely(!sched_clock_running)) | ||
| 144 | return 0ull; | ||
| 145 | |||
| 139 | WARN_ON_ONCE(!irqs_disabled()); | 146 | WARN_ON_ONCE(!irqs_disabled()); |
| 140 | now = sched_clock(); | 147 | now = sched_clock(); |
| 141 | 148 | ||
| @@ -174,6 +181,9 @@ void sched_clock_tick(void) | |||
| 174 | struct sched_clock_data *scd = this_scd(); | 181 | struct sched_clock_data *scd = this_scd(); |
| 175 | u64 now, now_gtod; | 182 | u64 now, now_gtod; |
| 176 | 183 | ||
| 184 | if (unlikely(!sched_clock_running)) | ||
| 185 | return; | ||
| 186 | |||
| 177 | WARN_ON_ONCE(!irqs_disabled()); | 187 | WARN_ON_ONCE(!irqs_disabled()); |
| 178 | 188 | ||
| 179 | now = sched_clock(); | 189 | now = sched_clock(); |
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c index 5f06118fbc31..8bb713040ac9 100644 --- a/kernel/sched_debug.c +++ b/kernel/sched_debug.c | |||
| @@ -167,11 +167,6 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) | |||
| 167 | #endif | 167 | #endif |
| 168 | SEQ_printf(m, " .%-30s: %ld\n", "nr_spread_over", | 168 | SEQ_printf(m, " .%-30s: %ld\n", "nr_spread_over", |
| 169 | cfs_rq->nr_spread_over); | 169 | cfs_rq->nr_spread_over); |
| 170 | #ifdef CONFIG_FAIR_GROUP_SCHED | ||
| 171 | #ifdef CONFIG_SMP | ||
| 172 | SEQ_printf(m, " .%-30s: %lu\n", "shares", cfs_rq->shares); | ||
| 173 | #endif | ||
| 174 | #endif | ||
| 175 | } | 170 | } |
| 176 | 171 | ||
| 177 | static void print_cpu(struct seq_file *m, int cpu) | 172 | static void print_cpu(struct seq_file *m, int cpu) |
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index e24ecd39c4b8..08ae848b71d4 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c | |||
| @@ -334,34 +334,6 @@ int sched_nr_latency_handler(struct ctl_table *table, int write, | |||
| 334 | #endif | 334 | #endif |
| 335 | 335 | ||
| 336 | /* | 336 | /* |
| 337 | * delta *= w / rw | ||
| 338 | */ | ||
| 339 | static inline unsigned long | ||
| 340 | calc_delta_weight(unsigned long delta, struct sched_entity *se) | ||
| 341 | { | ||
| 342 | for_each_sched_entity(se) { | ||
| 343 | delta = calc_delta_mine(delta, | ||
| 344 | se->load.weight, &cfs_rq_of(se)->load); | ||
| 345 | } | ||
| 346 | |||
| 347 | return delta; | ||
| 348 | } | ||
| 349 | |||
| 350 | /* | ||
| 351 | * delta *= rw / w | ||
| 352 | */ | ||
| 353 | static inline unsigned long | ||
| 354 | calc_delta_fair(unsigned long delta, struct sched_entity *se) | ||
| 355 | { | ||
| 356 | for_each_sched_entity(se) { | ||
| 357 | delta = calc_delta_mine(delta, | ||
| 358 | cfs_rq_of(se)->load.weight, &se->load); | ||
| 359 | } | ||
| 360 | |||
| 361 | return delta; | ||
| 362 | } | ||
| 363 | |||
| 364 | /* | ||
| 365 | * The idea is to set a period in which each task runs once. | 337 | * The idea is to set a period in which each task runs once. |
| 366 | * | 338 | * |
| 367 | * When there are too many tasks (sysctl_sched_nr_latency) we have to stretch | 339 | * When there are too many tasks (sysctl_sched_nr_latency) we have to stretch |
| @@ -390,54 +362,47 @@ static u64 __sched_period(unsigned long nr_running) | |||
| 390 | */ | 362 | */ |
| 391 | static u64 sched_slice(struct cfs_rq *cfs_rq, struct sched_entity *se) | 363 | static u64 sched_slice(struct cfs_rq *cfs_rq, struct sched_entity *se) |
| 392 | { | 364 | { |
| 393 | return calc_delta_weight(__sched_period(cfs_rq->nr_running), se); | 365 | u64 slice = __sched_period(cfs_rq->nr_running); |
| 366 | |||
| 367 | for_each_sched_entity(se) { | ||
| 368 | cfs_rq = cfs_rq_of(se); | ||
| 369 | |||
| 370 | slice *= se->load.weight; | ||
| 371 | do_div(slice, cfs_rq->load.weight); | ||
| 372 | } | ||
| 373 | |||
| 374 | |||
| 375 | return slice; | ||
| 394 | } | 376 | } |
| 395 | 377 | ||
| 396 | /* | 378 | /* |
| 397 | * We calculate the vruntime slice of a to be inserted task | 379 | * We calculate the vruntime slice of a to be inserted task |
| 398 | * | 380 | * |
| 399 | * vs = s*rw/w = p | 381 | * vs = s/w = p/rw |
| 400 | */ | 382 | */ |
| 401 | static u64 sched_vslice_add(struct cfs_rq *cfs_rq, struct sched_entity *se) | 383 | static u64 sched_vslice_add(struct cfs_rq *cfs_rq, struct sched_entity *se) |
| 402 | { | 384 | { |
| 403 | unsigned long nr_running = cfs_rq->nr_running; | 385 | unsigned long nr_running = cfs_rq->nr_running; |
| 386 | unsigned long weight; | ||
| 387 | u64 vslice; | ||
| 404 | 388 | ||
| 405 | if (!se->on_rq) | 389 | if (!se->on_rq) |
| 406 | nr_running++; | 390 | nr_running++; |
| 407 | 391 | ||
| 408 | return __sched_period(nr_running); | 392 | vslice = __sched_period(nr_running); |
| 409 | } | ||
| 410 | |||
| 411 | /* | ||
| 412 | * The goal of calc_delta_asym() is to be asymmetrically around NICE_0_LOAD, in | ||
| 413 | * that it favours >=0 over <0. | ||
| 414 | * | ||
| 415 | * -20 | | ||
| 416 | * | | ||
| 417 | * 0 --------+------- | ||
| 418 | * .' | ||
| 419 | * 19 .' | ||
| 420 | * | ||
| 421 | */ | ||
| 422 | static unsigned long | ||
| 423 | calc_delta_asym(unsigned long delta, struct sched_entity *se) | ||
| 424 | { | ||
| 425 | struct load_weight lw = { | ||
| 426 | .weight = NICE_0_LOAD, | ||
| 427 | .inv_weight = 1UL << (WMULT_SHIFT-NICE_0_SHIFT) | ||
| 428 | }; | ||
| 429 | 393 | ||
| 430 | for_each_sched_entity(se) { | 394 | for_each_sched_entity(se) { |
| 431 | struct load_weight *se_lw = &se->load; | 395 | cfs_rq = cfs_rq_of(se); |
| 432 | 396 | ||
| 433 | if (se->load.weight < NICE_0_LOAD) | 397 | weight = cfs_rq->load.weight; |
| 434 | se_lw = &lw; | 398 | if (!se->on_rq) |
| 399 | weight += se->load.weight; | ||
| 435 | 400 | ||
| 436 | delta = calc_delta_mine(delta, | 401 | vslice *= NICE_0_LOAD; |
| 437 | cfs_rq_of(se)->load.weight, se_lw); | 402 | do_div(vslice, weight); |
| 438 | } | 403 | } |
| 439 | 404 | ||
| 440 | return delta; | 405 | return vslice; |
| 441 | } | 406 | } |
| 442 | 407 | ||
| 443 | /* | 408 | /* |
| @@ -454,7 +419,11 @@ __update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr, | |||
| 454 | 419 | ||
| 455 | curr->sum_exec_runtime += delta_exec; | 420 | curr->sum_exec_runtime += delta_exec; |
| 456 | schedstat_add(cfs_rq, exec_clock, delta_exec); | 421 | schedstat_add(cfs_rq, exec_clock, delta_exec); |
| 457 | delta_exec_weighted = calc_delta_fair(delta_exec, curr); | 422 | delta_exec_weighted = delta_exec; |
| 423 | if (unlikely(curr->load.weight != NICE_0_LOAD)) { | ||
| 424 | delta_exec_weighted = calc_delta_fair(delta_exec_weighted, | ||
| 425 | &curr->load); | ||
| 426 | } | ||
| 458 | curr->vruntime += delta_exec_weighted; | 427 | curr->vruntime += delta_exec_weighted; |
| 459 | } | 428 | } |
| 460 | 429 | ||
| @@ -541,27 +510,10 @@ update_stats_curr_start(struct cfs_rq *cfs_rq, struct sched_entity *se) | |||
| 541 | * Scheduling class queueing methods: | 510 | * Scheduling class queueing methods: |
| 542 | */ | 511 | */ |
| 543 | 512 | ||
| 544 | #if defined CONFIG_SMP && defined CONFIG_FAIR_GROUP_SCHED | ||
| 545 | static void | ||
| 546 | add_cfs_task_weight(struct cfs_rq *cfs_rq, unsigned long weight) | ||
| 547 | { | ||
| 548 | cfs_rq->task_weight += weight; | ||
| 549 | } | ||
| 550 | #else | ||
| 551 | static inline void | ||
| 552 | add_cfs_task_weight(struct cfs_rq *cfs_rq, unsigned long weight) | ||
| 553 | { | ||
| 554 | } | ||
| 555 | #endif | ||
| 556 | |||
| 557 | static void | 513 | static void |
| 558 | account_entity_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se) | 514 | account_entity_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se) |
| 559 | { | 515 | { |
| 560 | update_load_add(&cfs_rq->load, se->load.weight); | 516 | update_load_add(&cfs_rq->load, se->load.weight); |
| 561 | if (!parent_entity(se)) | ||
| 562 | inc_cpu_load(rq_of(cfs_rq), se->load.weight); | ||
| 563 | if (entity_is_task(se)) | ||
| 564 | add_cfs_task_weight(cfs_rq, se->load.weight); | ||
| 565 | cfs_rq->nr_running++; | 517 | cfs_rq->nr_running++; |
| 566 | se->on_rq = 1; | 518 | se->on_rq = 1; |
| 567 | list_add(&se->group_node, &cfs_rq->tasks); | 519 | list_add(&se->group_node, &cfs_rq->tasks); |
| @@ -571,10 +523,6 @@ static void | |||
| 571 | account_entity_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se) | 523 | account_entity_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se) |
| 572 | { | 524 | { |
| 573 | update_load_sub(&cfs_rq->load, se->load.weight); | 525 | update_load_sub(&cfs_rq->load, se->load.weight); |
| 574 | if (!parent_entity(se)) | ||
| 575 | dec_cpu_load(rq_of(cfs_rq), se->load.weight); | ||
| 576 | if (entity_is_task(se)) | ||
| 577 | add_cfs_task_weight(cfs_rq, -se->load.weight); | ||
| 578 | cfs_rq->nr_running--; | 526 | cfs_rq->nr_running--; |
| 579 | se->on_rq = 0; | 527 | se->on_rq = 0; |
| 580 | list_del_init(&se->group_node); | 528 | list_del_init(&se->group_node); |
| @@ -661,17 +609,8 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial) | |||
| 661 | 609 | ||
| 662 | if (!initial) { | 610 | if (!initial) { |
| 663 | /* sleeps upto a single latency don't count. */ | 611 | /* sleeps upto a single latency don't count. */ |
| 664 | if (sched_feat(NEW_FAIR_SLEEPERS)) { | 612 | if (sched_feat(NEW_FAIR_SLEEPERS)) |
| 665 | unsigned long thresh = sysctl_sched_latency; | 613 | vruntime -= sysctl_sched_latency; |
| 666 | |||
| 667 | /* | ||
| 668 | * convert the sleeper threshold into virtual time | ||
| 669 | */ | ||
| 670 | if (sched_feat(NORMALIZED_SLEEPER)) | ||
| 671 | thresh = calc_delta_fair(thresh, se); | ||
| 672 | |||
| 673 | vruntime -= thresh; | ||
| 674 | } | ||
| 675 | 614 | ||
| 676 | /* ensure we never gain time by being placed backwards. */ | 615 | /* ensure we never gain time by being placed backwards. */ |
| 677 | vruntime = max_vruntime(se->vruntime, vruntime); | 616 | vruntime = max_vruntime(se->vruntime, vruntime); |
| @@ -1057,16 +996,27 @@ wake_affine(struct rq *rq, struct sched_domain *this_sd, struct rq *this_rq, | |||
| 1057 | struct task_struct *curr = this_rq->curr; | 996 | struct task_struct *curr = this_rq->curr; |
| 1058 | unsigned long tl = this_load; | 997 | unsigned long tl = this_load; |
| 1059 | unsigned long tl_per_task; | 998 | unsigned long tl_per_task; |
| 999 | int balanced; | ||
| 1060 | 1000 | ||
| 1061 | if (!(this_sd->flags & SD_WAKE_AFFINE)) | 1001 | if (!(this_sd->flags & SD_WAKE_AFFINE) || !sched_feat(AFFINE_WAKEUPS)) |
| 1062 | return 0; | 1002 | return 0; |
| 1063 | 1003 | ||
| 1064 | /* | 1004 | /* |
| 1005 | * If sync wakeup then subtract the (maximum possible) | ||
| 1006 | * effect of the currently running task from the load | ||
| 1007 | * of the current CPU: | ||
| 1008 | */ | ||
| 1009 | if (sync) | ||
| 1010 | tl -= current->se.load.weight; | ||
| 1011 | |||
| 1012 | balanced = 100*(tl + p->se.load.weight) <= imbalance*load; | ||
| 1013 | |||
| 1014 | /* | ||
| 1065 | * If the currently running task will sleep within | 1015 | * If the currently running task will sleep within |
| 1066 | * a reasonable amount of time then attract this newly | 1016 | * a reasonable amount of time then attract this newly |
| 1067 | * woken task: | 1017 | * woken task: |
| 1068 | */ | 1018 | */ |
| 1069 | if (sync && curr->sched_class == &fair_sched_class) { | 1019 | if (sync && balanced && curr->sched_class == &fair_sched_class) { |
| 1070 | if (curr->se.avg_overlap < sysctl_sched_migration_cost && | 1020 | if (curr->se.avg_overlap < sysctl_sched_migration_cost && |
| 1071 | p->se.avg_overlap < sysctl_sched_migration_cost) | 1021 | p->se.avg_overlap < sysctl_sched_migration_cost) |
| 1072 | return 1; | 1022 | return 1; |
| @@ -1075,16 +1025,8 @@ wake_affine(struct rq *rq, struct sched_domain *this_sd, struct rq *this_rq, | |||
| 1075 | schedstat_inc(p, se.nr_wakeups_affine_attempts); | 1025 | schedstat_inc(p, se.nr_wakeups_affine_attempts); |
| 1076 | tl_per_task = cpu_avg_load_per_task(this_cpu); | 1026 | tl_per_task = cpu_avg_load_per_task(this_cpu); |
| 1077 | 1027 | ||
| 1078 | /* | ||
| 1079 | * If sync wakeup then subtract the (maximum possible) | ||
| 1080 | * effect of the currently running task from the load | ||
| 1081 | * of the current CPU: | ||
| 1082 | */ | ||
| 1083 | if (sync) | ||
| 1084 | tl -= current->se.load.weight; | ||
| 1085 | |||
| 1086 | if ((tl <= load && tl + target_load(prev_cpu, idx) <= tl_per_task) || | 1028 | if ((tl <= load && tl + target_load(prev_cpu, idx) <= tl_per_task) || |
| 1087 | 100*(tl + p->se.load.weight) <= imbalance*load) { | 1029 | balanced) { |
| 1088 | /* | 1030 | /* |
| 1089 | * This domain has SD_WAKE_AFFINE and | 1031 | * This domain has SD_WAKE_AFFINE and |
| 1090 | * p is cache cold in this domain, and | 1032 | * p is cache cold in this domain, and |
| @@ -1169,10 +1111,11 @@ static unsigned long wakeup_gran(struct sched_entity *se) | |||
| 1169 | unsigned long gran = sysctl_sched_wakeup_granularity; | 1111 | unsigned long gran = sysctl_sched_wakeup_granularity; |
| 1170 | 1112 | ||
| 1171 | /* | 1113 | /* |
| 1172 | * More easily preempt - nice tasks, while not making it harder for | 1114 | * More easily preempt - nice tasks, while not making |
| 1173 | * + nice tasks. | 1115 | * it harder for + nice tasks. |
| 1174 | */ | 1116 | */ |
| 1175 | gran = calc_delta_asym(sysctl_sched_wakeup_granularity, se); | 1117 | if (unlikely(se->load.weight > NICE_0_LOAD)) |
| 1118 | gran = calc_delta_fair(gran, &se->load); | ||
| 1176 | 1119 | ||
| 1177 | return gran; | 1120 | return gran; |
| 1178 | } | 1121 | } |
| @@ -1366,90 +1309,75 @@ static struct task_struct *load_balance_next_fair(void *arg) | |||
| 1366 | return __load_balance_iterator(cfs_rq, cfs_rq->balance_iterator); | 1309 | return __load_balance_iterator(cfs_rq, cfs_rq->balance_iterator); |
| 1367 | } | 1310 | } |
| 1368 | 1311 | ||
| 1369 | static unsigned long | 1312 | #ifdef CONFIG_FAIR_GROUP_SCHED |
| 1370 | __load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, | 1313 | static int cfs_rq_best_prio(struct cfs_rq *cfs_rq) |
| 1371 | unsigned long max_load_move, struct sched_domain *sd, | ||
| 1372 | enum cpu_idle_type idle, int *all_pinned, int *this_best_prio, | ||
| 1373 | struct cfs_rq *cfs_rq) | ||
| 1374 | { | 1314 | { |
| 1375 | struct rq_iterator cfs_rq_iterator; | 1315 | struct sched_entity *curr; |
| 1316 | struct task_struct *p; | ||
| 1376 | 1317 | ||
| 1377 | cfs_rq_iterator.start = load_balance_start_fair; | 1318 | if (!cfs_rq->nr_running || !first_fair(cfs_rq)) |
| 1378 | cfs_rq_iterator.next = load_balance_next_fair; | 1319 | return MAX_PRIO; |
| 1379 | cfs_rq_iterator.arg = cfs_rq; | 1320 | |
| 1321 | curr = cfs_rq->curr; | ||
| 1322 | if (!curr) | ||
| 1323 | curr = __pick_next_entity(cfs_rq); | ||
| 1324 | |||
| 1325 | p = task_of(curr); | ||
| 1380 | 1326 | ||
| 1381 | return balance_tasks(this_rq, this_cpu, busiest, | 1327 | return p->prio; |
| 1382 | max_load_move, sd, idle, all_pinned, | ||
| 1383 | this_best_prio, &cfs_rq_iterator); | ||
| 1384 | } | 1328 | } |
| 1329 | #endif | ||
| 1385 | 1330 | ||
| 1386 | #ifdef CONFIG_FAIR_GROUP_SCHED | ||
| 1387 | static unsigned long | 1331 | static unsigned long |
| 1388 | load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, | 1332 | load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, |
| 1389 | unsigned long max_load_move, | 1333 | unsigned long max_load_move, |
| 1390 | struct sched_domain *sd, enum cpu_idle_type idle, | 1334 | struct sched_domain *sd, enum cpu_idle_type idle, |
| 1391 | int *all_pinned, int *this_best_prio) | 1335 | int *all_pinned, int *this_best_prio) |
| 1392 | { | 1336 | { |
| 1337 | struct cfs_rq *busy_cfs_rq; | ||
| 1393 | long rem_load_move = max_load_move; | 1338 | long rem_load_move = max_load_move; |
| 1394 | int busiest_cpu = cpu_of(busiest); | 1339 | struct rq_iterator cfs_rq_iterator; |
| 1395 | struct task_group *tg; | ||
| 1396 | |||
| 1397 | rcu_read_lock(); | ||
| 1398 | list_for_each_entry(tg, &task_groups, list) { | ||
| 1399 | long imbalance; | ||
| 1400 | unsigned long this_weight, busiest_weight; | ||
| 1401 | long rem_load, max_load, moved_load; | ||
| 1402 | |||
| 1403 | /* | ||
| 1404 | * empty group | ||
| 1405 | */ | ||
| 1406 | if (!aggregate(tg, sd)->task_weight) | ||
| 1407 | continue; | ||
| 1408 | |||
| 1409 | rem_load = rem_load_move * aggregate(tg, sd)->rq_weight; | ||
| 1410 | rem_load /= aggregate(tg, sd)->load + 1; | ||
| 1411 | |||
| 1412 | this_weight = tg->cfs_rq[this_cpu]->task_weight; | ||
| 1413 | busiest_weight = tg->cfs_rq[busiest_cpu]->task_weight; | ||
| 1414 | 1340 | ||
| 1415 | imbalance = (busiest_weight - this_weight) / 2; | 1341 | cfs_rq_iterator.start = load_balance_start_fair; |
| 1342 | cfs_rq_iterator.next = load_balance_next_fair; | ||
| 1416 | 1343 | ||
| 1417 | if (imbalance < 0) | 1344 | for_each_leaf_cfs_rq(busiest, busy_cfs_rq) { |
| 1418 | imbalance = busiest_weight; | 1345 | #ifdef CONFIG_FAIR_GROUP_SCHED |
| 1346 | struct cfs_rq *this_cfs_rq; | ||
| 1347 | long imbalance; | ||
| 1348 | unsigned long maxload; | ||
| 1419 | 1349 | ||
| 1420 | max_load = max(rem_load, imbalance); | 1350 | this_cfs_rq = cpu_cfs_rq(busy_cfs_rq, this_cpu); |
| 1421 | moved_load = __load_balance_fair(this_rq, this_cpu, busiest, | ||
| 1422 | max_load, sd, idle, all_pinned, this_best_prio, | ||
| 1423 | tg->cfs_rq[busiest_cpu]); | ||
| 1424 | 1351 | ||
| 1425 | if (!moved_load) | 1352 | imbalance = busy_cfs_rq->load.weight - this_cfs_rq->load.weight; |
| 1353 | /* Don't pull if this_cfs_rq has more load than busy_cfs_rq */ | ||
| 1354 | if (imbalance <= 0) | ||
| 1426 | continue; | 1355 | continue; |
| 1427 | 1356 | ||
| 1428 | move_group_shares(tg, sd, busiest_cpu, this_cpu); | 1357 | /* Don't pull more than imbalance/2 */ |
| 1358 | imbalance /= 2; | ||
| 1359 | maxload = min(rem_load_move, imbalance); | ||
| 1429 | 1360 | ||
| 1430 | moved_load *= aggregate(tg, sd)->load; | 1361 | *this_best_prio = cfs_rq_best_prio(this_cfs_rq); |
| 1431 | moved_load /= aggregate(tg, sd)->rq_weight + 1; | 1362 | #else |
| 1363 | # define maxload rem_load_move | ||
| 1364 | #endif | ||
| 1365 | /* | ||
| 1366 | * pass busy_cfs_rq argument into | ||
| 1367 | * load_balance_[start|next]_fair iterators | ||
| 1368 | */ | ||
| 1369 | cfs_rq_iterator.arg = busy_cfs_rq; | ||
| 1370 | rem_load_move -= balance_tasks(this_rq, this_cpu, busiest, | ||
| 1371 | maxload, sd, idle, all_pinned, | ||
| 1372 | this_best_prio, | ||
| 1373 | &cfs_rq_iterator); | ||
| 1432 | 1374 | ||
| 1433 | rem_load_move -= moved_load; | 1375 | if (rem_load_move <= 0) |
| 1434 | if (rem_load_move < 0) | ||
| 1435 | break; | 1376 | break; |
| 1436 | } | 1377 | } |
| 1437 | rcu_read_unlock(); | ||
| 1438 | 1378 | ||
| 1439 | return max_load_move - rem_load_move; | 1379 | return max_load_move - rem_load_move; |
| 1440 | } | 1380 | } |
| 1441 | #else | ||
| 1442 | static unsigned long | ||
| 1443 | load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, | ||
| 1444 | unsigned long max_load_move, | ||
| 1445 | struct sched_domain *sd, enum cpu_idle_type idle, | ||
| 1446 | int *all_pinned, int *this_best_prio) | ||
| 1447 | { | ||
| 1448 | return __load_balance_fair(this_rq, this_cpu, busiest, | ||
| 1449 | max_load_move, sd, idle, all_pinned, | ||
| 1450 | this_best_prio, &busiest->cfs); | ||
| 1451 | } | ||
| 1452 | #endif | ||
| 1453 | 1381 | ||
| 1454 | static int | 1382 | static int |
| 1455 | move_one_task_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, | 1383 | move_one_task_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, |
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index 060e87b0cb1c..0f3c19197fa4 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c | |||
| @@ -250,7 +250,8 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun) | |||
| 250 | if (rt_rq->rt_time || rt_rq->rt_nr_running) | 250 | if (rt_rq->rt_time || rt_rq->rt_nr_running) |
| 251 | idle = 0; | 251 | idle = 0; |
| 252 | spin_unlock(&rt_rq->rt_runtime_lock); | 252 | spin_unlock(&rt_rq->rt_runtime_lock); |
| 253 | } | 253 | } else if (rt_rq->rt_nr_running) |
| 254 | idle = 0; | ||
| 254 | 255 | ||
| 255 | if (enqueue) | 256 | if (enqueue) |
| 256 | sched_rt_rq_enqueue(rt_rq); | 257 | sched_rt_rq_enqueue(rt_rq); |
| @@ -449,13 +450,19 @@ void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) | |||
| 449 | #endif | 450 | #endif |
| 450 | } | 451 | } |
| 451 | 452 | ||
| 452 | static void enqueue_rt_entity(struct sched_rt_entity *rt_se) | 453 | static void __enqueue_rt_entity(struct sched_rt_entity *rt_se) |
| 453 | { | 454 | { |
| 454 | struct rt_rq *rt_rq = rt_rq_of_se(rt_se); | 455 | struct rt_rq *rt_rq = rt_rq_of_se(rt_se); |
| 455 | struct rt_prio_array *array = &rt_rq->active; | 456 | struct rt_prio_array *array = &rt_rq->active; |
| 456 | struct rt_rq *group_rq = group_rt_rq(rt_se); | 457 | struct rt_rq *group_rq = group_rt_rq(rt_se); |
| 457 | 458 | ||
| 458 | if (group_rq && rt_rq_throttled(group_rq)) | 459 | /* |
| 460 | * Don't enqueue the group if its throttled, or when empty. | ||
| 461 | * The latter is a consequence of the former when a child group | ||
| 462 | * get throttled and the current group doesn't have any other | ||
| 463 | * active members. | ||
| 464 | */ | ||
| 465 | if (group_rq && (rt_rq_throttled(group_rq) || !group_rq->rt_nr_running)) | ||
| 459 | return; | 466 | return; |
| 460 | 467 | ||
| 461 | list_add_tail(&rt_se->run_list, array->queue + rt_se_prio(rt_se)); | 468 | list_add_tail(&rt_se->run_list, array->queue + rt_se_prio(rt_se)); |
| @@ -464,7 +471,7 @@ static void enqueue_rt_entity(struct sched_rt_entity *rt_se) | |||
| 464 | inc_rt_tasks(rt_se, rt_rq); | 471 | inc_rt_tasks(rt_se, rt_rq); |
| 465 | } | 472 | } |
| 466 | 473 | ||
| 467 | static void dequeue_rt_entity(struct sched_rt_entity *rt_se) | 474 | static void __dequeue_rt_entity(struct sched_rt_entity *rt_se) |
| 468 | { | 475 | { |
| 469 | struct rt_rq *rt_rq = rt_rq_of_se(rt_se); | 476 | struct rt_rq *rt_rq = rt_rq_of_se(rt_se); |
| 470 | struct rt_prio_array *array = &rt_rq->active; | 477 | struct rt_prio_array *array = &rt_rq->active; |
| @@ -480,11 +487,10 @@ static void dequeue_rt_entity(struct sched_rt_entity *rt_se) | |||
| 480 | * Because the prio of an upper entry depends on the lower | 487 | * Because the prio of an upper entry depends on the lower |
| 481 | * entries, we must remove entries top - down. | 488 | * entries, we must remove entries top - down. |
| 482 | */ | 489 | */ |
| 483 | static void dequeue_rt_stack(struct task_struct *p) | 490 | static void dequeue_rt_stack(struct sched_rt_entity *rt_se) |
| 484 | { | 491 | { |
| 485 | struct sched_rt_entity *rt_se, *back = NULL; | 492 | struct sched_rt_entity *back = NULL; |
| 486 | 493 | ||
| 487 | rt_se = &p->rt; | ||
| 488 | for_each_sched_rt_entity(rt_se) { | 494 | for_each_sched_rt_entity(rt_se) { |
| 489 | rt_se->back = back; | 495 | rt_se->back = back; |
| 490 | back = rt_se; | 496 | back = rt_se; |
| @@ -492,7 +498,26 @@ static void dequeue_rt_stack(struct task_struct *p) | |||
| 492 | 498 | ||
| 493 | for (rt_se = back; rt_se; rt_se = rt_se->back) { | 499 | for (rt_se = back; rt_se; rt_se = rt_se->back) { |
| 494 | if (on_rt_rq(rt_se)) | 500 | if (on_rt_rq(rt_se)) |
| 495 | dequeue_rt_entity(rt_se); | 501 | __dequeue_rt_entity(rt_se); |
| 502 | } | ||
| 503 | } | ||
| 504 | |||
| 505 | static void enqueue_rt_entity(struct sched_rt_entity *rt_se) | ||
| 506 | { | ||
| 507 | dequeue_rt_stack(rt_se); | ||
| 508 | for_each_sched_rt_entity(rt_se) | ||
| 509 | __enqueue_rt_entity(rt_se); | ||
| 510 | } | ||
| 511 | |||
| 512 | static void dequeue_rt_entity(struct sched_rt_entity *rt_se) | ||
| 513 | { | ||
| 514 | dequeue_rt_stack(rt_se); | ||
| 515 | |||
| 516 | for_each_sched_rt_entity(rt_se) { | ||
| 517 | struct rt_rq *rt_rq = group_rt_rq(rt_se); | ||
| 518 | |||
| 519 | if (rt_rq && rt_rq->rt_nr_running) | ||
| 520 | __enqueue_rt_entity(rt_se); | ||
| 496 | } | 521 | } |
| 497 | } | 522 | } |
| 498 | 523 | ||
| @@ -506,36 +531,15 @@ static void enqueue_task_rt(struct rq *rq, struct task_struct *p, int wakeup) | |||
| 506 | if (wakeup) | 531 | if (wakeup) |
| 507 | rt_se->timeout = 0; | 532 | rt_se->timeout = 0; |
| 508 | 533 | ||
| 509 | dequeue_rt_stack(p); | 534 | enqueue_rt_entity(rt_se); |
| 510 | |||
| 511 | /* | ||
| 512 | * enqueue everybody, bottom - up. | ||
| 513 | */ | ||
| 514 | for_each_sched_rt_entity(rt_se) | ||
| 515 | enqueue_rt_entity(rt_se); | ||
| 516 | |||
| 517 | inc_cpu_load(rq, p->se.load.weight); | ||
| 518 | } | 535 | } |
| 519 | 536 | ||
| 520 | static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep) | 537 | static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep) |
| 521 | { | 538 | { |
| 522 | struct sched_rt_entity *rt_se = &p->rt; | 539 | struct sched_rt_entity *rt_se = &p->rt; |
| 523 | struct rt_rq *rt_rq; | ||
| 524 | 540 | ||
| 525 | update_curr_rt(rq); | 541 | update_curr_rt(rq); |
| 526 | 542 | dequeue_rt_entity(rt_se); | |
| 527 | dequeue_rt_stack(p); | ||
| 528 | |||
| 529 | /* | ||
| 530 | * re-enqueue all non-empty rt_rq entities. | ||
| 531 | */ | ||
| 532 | for_each_sched_rt_entity(rt_se) { | ||
| 533 | rt_rq = group_rt_rq(rt_se); | ||
| 534 | if (rt_rq && rt_rq->rt_nr_running) | ||
| 535 | enqueue_rt_entity(rt_se); | ||
| 536 | } | ||
| 537 | |||
| 538 | dec_cpu_load(rq, p->se.load.weight); | ||
| 539 | } | 543 | } |
| 540 | 544 | ||
| 541 | /* | 545 | /* |
| @@ -546,8 +550,10 @@ static | |||
| 546 | void requeue_rt_entity(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se) | 550 | void requeue_rt_entity(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se) |
| 547 | { | 551 | { |
| 548 | struct rt_prio_array *array = &rt_rq->active; | 552 | struct rt_prio_array *array = &rt_rq->active; |
| 553 | struct list_head *queue = array->queue + rt_se_prio(rt_se); | ||
| 549 | 554 | ||
| 550 | list_move_tail(&rt_se->run_list, array->queue + rt_se_prio(rt_se)); | 555 | if (on_rt_rq(rt_se)) |
| 556 | list_move_tail(&rt_se->run_list, queue); | ||
| 551 | } | 557 | } |
| 552 | 558 | ||
| 553 | static void requeue_task_rt(struct rq *rq, struct task_struct *p) | 559 | static void requeue_task_rt(struct rq *rq, struct task_struct *p) |
diff --git a/kernel/sched_stats.h b/kernel/sched_stats.h index 5bae2e0c3ff2..80179ef7450e 100644 --- a/kernel/sched_stats.h +++ b/kernel/sched_stats.h | |||
| @@ -67,6 +67,7 @@ static int show_schedstat(struct seq_file *seq, void *v) | |||
| 67 | preempt_enable(); | 67 | preempt_enable(); |
| 68 | #endif | 68 | #endif |
| 69 | } | 69 | } |
| 70 | kfree(mask_str); | ||
| 70 | return 0; | 71 | return 0; |
| 71 | } | 72 | } |
| 72 | 73 | ||
| @@ -197,6 +198,9 @@ static inline void sched_info_queued(struct task_struct *t) | |||
| 197 | /* | 198 | /* |
| 198 | * Called when a process ceases being the active-running process, either | 199 | * Called when a process ceases being the active-running process, either |
| 199 | * voluntarily or involuntarily. Now we can calculate how long we ran. | 200 | * voluntarily or involuntarily. Now we can calculate how long we ran. |
| 201 | * Also, if the process is still in the TASK_RUNNING state, call | ||
| 202 | * sched_info_queued() to mark that it has now again started waiting on | ||
| 203 | * the runqueue. | ||
| 200 | */ | 204 | */ |
| 201 | static inline void sched_info_depart(struct task_struct *t) | 205 | static inline void sched_info_depart(struct task_struct *t) |
| 202 | { | 206 | { |
| @@ -205,6 +209,9 @@ static inline void sched_info_depart(struct task_struct *t) | |||
| 205 | 209 | ||
| 206 | t->sched_info.cpu_time += delta; | 210 | t->sched_info.cpu_time += delta; |
| 207 | rq_sched_info_depart(task_rq(t), delta); | 211 | rq_sched_info_depart(task_rq(t), delta); |
| 212 | |||
| 213 | if (t->state == TASK_RUNNING) | ||
| 214 | sched_info_queued(t); | ||
| 208 | } | 215 | } |
| 209 | 216 | ||
| 210 | /* | 217 | /* |
diff --git a/kernel/signal.c b/kernel/signal.c index 72bb4f51f963..6c0958e52ea7 100644 --- a/kernel/signal.c +++ b/kernel/signal.c | |||
| @@ -231,6 +231,40 @@ void flush_signals(struct task_struct *t) | |||
| 231 | spin_unlock_irqrestore(&t->sighand->siglock, flags); | 231 | spin_unlock_irqrestore(&t->sighand->siglock, flags); |
| 232 | } | 232 | } |
| 233 | 233 | ||
| 234 | static void __flush_itimer_signals(struct sigpending *pending) | ||
| 235 | { | ||
| 236 | sigset_t signal, retain; | ||
| 237 | struct sigqueue *q, *n; | ||
| 238 | |||
| 239 | signal = pending->signal; | ||
| 240 | sigemptyset(&retain); | ||
| 241 | |||
| 242 | list_for_each_entry_safe(q, n, &pending->list, list) { | ||
| 243 | int sig = q->info.si_signo; | ||
| 244 | |||
| 245 | if (likely(q->info.si_code != SI_TIMER)) { | ||
| 246 | sigaddset(&retain, sig); | ||
| 247 | } else { | ||
| 248 | sigdelset(&signal, sig); | ||
| 249 | list_del_init(&q->list); | ||
| 250 | __sigqueue_free(q); | ||
| 251 | } | ||
| 252 | } | ||
| 253 | |||
| 254 | sigorsets(&pending->signal, &signal, &retain); | ||
| 255 | } | ||
| 256 | |||
| 257 | void flush_itimer_signals(void) | ||
| 258 | { | ||
| 259 | struct task_struct *tsk = current; | ||
| 260 | unsigned long flags; | ||
| 261 | |||
| 262 | spin_lock_irqsave(&tsk->sighand->siglock, flags); | ||
| 263 | __flush_itimer_signals(&tsk->pending); | ||
| 264 | __flush_itimer_signals(&tsk->signal->shared_pending); | ||
| 265 | spin_unlock_irqrestore(&tsk->sighand->siglock, flags); | ||
| 266 | } | ||
| 267 | |||
| 234 | void ignore_signals(struct task_struct *t) | 268 | void ignore_signals(struct task_struct *t) |
| 235 | { | 269 | { |
| 236 | int i; | 270 | int i; |
| @@ -1240,17 +1274,22 @@ void sigqueue_free(struct sigqueue *q) | |||
| 1240 | 1274 | ||
| 1241 | BUG_ON(!(q->flags & SIGQUEUE_PREALLOC)); | 1275 | BUG_ON(!(q->flags & SIGQUEUE_PREALLOC)); |
| 1242 | /* | 1276 | /* |
| 1243 | * If the signal is still pending remove it from the | 1277 | * We must hold ->siglock while testing q->list |
| 1244 | * pending queue. We must hold ->siglock while testing | 1278 | * to serialize with collect_signal() or with |
| 1245 | * q->list to serialize with collect_signal(). | 1279 | * __exit_signal()->flush_sigqueue(). |
| 1246 | */ | 1280 | */ |
| 1247 | spin_lock_irqsave(lock, flags); | 1281 | spin_lock_irqsave(lock, flags); |
| 1282 | q->flags &= ~SIGQUEUE_PREALLOC; | ||
| 1283 | /* | ||
| 1284 | * If it is queued it will be freed when dequeued, | ||
| 1285 | * like the "regular" sigqueue. | ||
| 1286 | */ | ||
| 1248 | if (!list_empty(&q->list)) | 1287 | if (!list_empty(&q->list)) |
| 1249 | list_del_init(&q->list); | 1288 | q = NULL; |
| 1250 | spin_unlock_irqrestore(lock, flags); | 1289 | spin_unlock_irqrestore(lock, flags); |
| 1251 | 1290 | ||
| 1252 | q->flags &= ~SIGQUEUE_PREALLOC; | 1291 | if (q) |
| 1253 | __sigqueue_free(q); | 1292 | __sigqueue_free(q); |
| 1254 | } | 1293 | } |
| 1255 | 1294 | ||
| 1256 | int send_sigqueue(struct sigqueue *q, struct task_struct *t, int group) | 1295 | int send_sigqueue(struct sigqueue *q, struct task_struct *t, int group) |
diff --git a/kernel/softlockup.c b/kernel/softlockup.c index 01b6522fd92b..a272d78185eb 100644 --- a/kernel/softlockup.c +++ b/kernel/softlockup.c | |||
| @@ -49,12 +49,17 @@ static unsigned long get_timestamp(int this_cpu) | |||
| 49 | return cpu_clock(this_cpu) >> 30LL; /* 2^30 ~= 10^9 */ | 49 | return cpu_clock(this_cpu) >> 30LL; /* 2^30 ~= 10^9 */ |
| 50 | } | 50 | } |
| 51 | 51 | ||
| 52 | void touch_softlockup_watchdog(void) | 52 | static void __touch_softlockup_watchdog(void) |
| 53 | { | 53 | { |
| 54 | int this_cpu = raw_smp_processor_id(); | 54 | int this_cpu = raw_smp_processor_id(); |
| 55 | 55 | ||
| 56 | __raw_get_cpu_var(touch_timestamp) = get_timestamp(this_cpu); | 56 | __raw_get_cpu_var(touch_timestamp) = get_timestamp(this_cpu); |
| 57 | } | 57 | } |
| 58 | |||
| 59 | void touch_softlockup_watchdog(void) | ||
| 60 | { | ||
| 61 | __raw_get_cpu_var(touch_timestamp) = 0; | ||
| 62 | } | ||
| 58 | EXPORT_SYMBOL(touch_softlockup_watchdog); | 63 | EXPORT_SYMBOL(touch_softlockup_watchdog); |
| 59 | 64 | ||
| 60 | void touch_all_softlockup_watchdogs(void) | 65 | void touch_all_softlockup_watchdogs(void) |
| @@ -80,7 +85,7 @@ void softlockup_tick(void) | |||
| 80 | unsigned long now; | 85 | unsigned long now; |
| 81 | 86 | ||
| 82 | if (touch_timestamp == 0) { | 87 | if (touch_timestamp == 0) { |
| 83 | touch_softlockup_watchdog(); | 88 | __touch_softlockup_watchdog(); |
| 84 | return; | 89 | return; |
| 85 | } | 90 | } |
| 86 | 91 | ||
| @@ -95,7 +100,7 @@ void softlockup_tick(void) | |||
| 95 | 100 | ||
| 96 | /* do not print during early bootup: */ | 101 | /* do not print during early bootup: */ |
| 97 | if (unlikely(system_state != SYSTEM_RUNNING)) { | 102 | if (unlikely(system_state != SYSTEM_RUNNING)) { |
| 98 | touch_softlockup_watchdog(); | 103 | __touch_softlockup_watchdog(); |
| 99 | return; | 104 | return; |
| 100 | } | 105 | } |
| 101 | 106 | ||
| @@ -115,6 +120,7 @@ void softlockup_tick(void) | |||
| 115 | printk(KERN_ERR "BUG: soft lockup - CPU#%d stuck for %lus! [%s:%d]\n", | 120 | printk(KERN_ERR "BUG: soft lockup - CPU#%d stuck for %lus! [%s:%d]\n", |
| 116 | this_cpu, now - touch_timestamp, | 121 | this_cpu, now - touch_timestamp, |
| 117 | current->comm, task_pid_nr(current)); | 122 | current->comm, task_pid_nr(current)); |
| 123 | print_modules(); | ||
| 118 | if (regs) | 124 | if (regs) |
| 119 | show_regs(regs); | 125 | show_regs(regs); |
| 120 | else | 126 | else |
| @@ -214,7 +220,7 @@ static int watchdog(void *__bind_cpu) | |||
| 214 | sched_setscheduler(current, SCHED_FIFO, ¶m); | 220 | sched_setscheduler(current, SCHED_FIFO, ¶m); |
| 215 | 221 | ||
| 216 | /* initialize timestamp */ | 222 | /* initialize timestamp */ |
| 217 | touch_softlockup_watchdog(); | 223 | __touch_softlockup_watchdog(); |
| 218 | 224 | ||
| 219 | set_current_state(TASK_INTERRUPTIBLE); | 225 | set_current_state(TASK_INTERRUPTIBLE); |
| 220 | /* | 226 | /* |
| @@ -223,7 +229,7 @@ static int watchdog(void *__bind_cpu) | |||
| 223 | * debug-printout triggers in softlockup_tick(). | 229 | * debug-printout triggers in softlockup_tick(). |
| 224 | */ | 230 | */ |
| 225 | while (!kthread_should_stop()) { | 231 | while (!kthread_should_stop()) { |
| 226 | touch_softlockup_watchdog(); | 232 | __touch_softlockup_watchdog(); |
| 227 | schedule(); | 233 | schedule(); |
| 228 | 234 | ||
| 229 | if (kthread_should_stop()) | 235 | if (kthread_should_stop()) |
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c index 0101aeef7ed7..b7350bbfb076 100644 --- a/kernel/stop_machine.c +++ b/kernel/stop_machine.c | |||
| @@ -62,8 +62,7 @@ static int stopmachine(void *cpu) | |||
| 62 | * help our sisters onto their CPUs. */ | 62 | * help our sisters onto their CPUs. */ |
| 63 | if (!prepared && !irqs_disabled) | 63 | if (!prepared && !irqs_disabled) |
| 64 | yield(); | 64 | yield(); |
| 65 | else | 65 | cpu_relax(); |
| 66 | cpu_relax(); | ||
| 67 | } | 66 | } |
| 68 | 67 | ||
| 69 | /* Ack: we are exiting. */ | 68 | /* Ack: we are exiting. */ |
| @@ -106,8 +105,10 @@ static int stop_machine(void) | |||
| 106 | } | 105 | } |
| 107 | 106 | ||
| 108 | /* Wait for them all to come to life. */ | 107 | /* Wait for them all to come to life. */ |
| 109 | while (atomic_read(&stopmachine_thread_ack) != stopmachine_num_threads) | 108 | while (atomic_read(&stopmachine_thread_ack) != stopmachine_num_threads) { |
| 110 | yield(); | 109 | yield(); |
| 110 | cpu_relax(); | ||
| 111 | } | ||
| 111 | 112 | ||
| 112 | /* If some failed, kill them all. */ | 113 | /* If some failed, kill them all. */ |
| 113 | if (ret < 0) { | 114 | if (ret < 0) { |
diff --git a/kernel/sys.c b/kernel/sys.c index 895d2d4c9493..14e97282eb6c 100644 --- a/kernel/sys.c +++ b/kernel/sys.c | |||
| @@ -1652,7 +1652,7 @@ asmlinkage long sys_umask(int mask) | |||
| 1652 | asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3, | 1652 | asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3, |
| 1653 | unsigned long arg4, unsigned long arg5) | 1653 | unsigned long arg4, unsigned long arg5) |
| 1654 | { | 1654 | { |
| 1655 | long uninitialized_var(error); | 1655 | long error = 0; |
| 1656 | 1656 | ||
| 1657 | if (security_task_prctl(option, arg2, arg3, arg4, arg5, &error)) | 1657 | if (security_task_prctl(option, arg2, arg3, arg4, arg5, &error)) |
| 1658 | return error; | 1658 | return error; |
| @@ -1701,9 +1701,7 @@ asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3, | |||
| 1701 | error = PR_TIMING_STATISTICAL; | 1701 | error = PR_TIMING_STATISTICAL; |
| 1702 | break; | 1702 | break; |
| 1703 | case PR_SET_TIMING: | 1703 | case PR_SET_TIMING: |
| 1704 | if (arg2 == PR_TIMING_STATISTICAL) | 1704 | if (arg2 != PR_TIMING_STATISTICAL) |
| 1705 | error = 0; | ||
| 1706 | else | ||
| 1707 | error = -EINVAL; | 1705 | error = -EINVAL; |
| 1708 | break; | 1706 | break; |
| 1709 | 1707 | ||
diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 29fc39f1029c..ce7799540c91 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c | |||
| @@ -13,7 +13,7 @@ | |||
| 13 | * Kai Petzke <wpp@marie.physik.tu-berlin.de> | 13 | * Kai Petzke <wpp@marie.physik.tu-berlin.de> |
| 14 | * Theodore Ts'o <tytso@mit.edu> | 14 | * Theodore Ts'o <tytso@mit.edu> |
| 15 | * | 15 | * |
| 16 | * Made to use alloc_percpu by Christoph Lameter <clameter@sgi.com>. | 16 | * Made to use alloc_percpu by Christoph Lameter. |
| 17 | */ | 17 | */ |
| 18 | 18 | ||
| 19 | #include <linux/module.h> | 19 | #include <linux/module.h> |
