diff options
author | Jens Axboe <axboe@kernel.dk> | 2013-12-31 11:51:02 -0500 |
---|---|---|
committer | Jens Axboe <axboe@kernel.dk> | 2013-12-31 11:51:02 -0500 |
commit | b28bc9b38c52f63f43e3fd875af982f2240a2859 (patch) | |
tree | 76cdb7b52b58f5685993cc15ed81d1c903023358 /kernel | |
parent | 8d30726912cb39c3a3ebde06214d54861f8fdde2 (diff) | |
parent | 802eee95bde72fd0cd0f3a5b2098375a487d1eda (diff) |
Merge tag 'v3.13-rc6' into for-3.14/core
Needed to bring blk-mq uptodate, since changes have been going in
since for-3.14/core was established.
Fixup merge issues related to the immutable biovec changes.
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Conflicts:
block/blk-flush.c
fs/btrfs/check-integrity.c
fs/btrfs/extent_io.c
fs/btrfs/scrub.c
fs/logfs/dev_bdev.c
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/.gitignore | 1 | ||||
-rw-r--r-- | kernel/Makefile | 7 | ||||
-rw-r--r-- | kernel/bounds.c | 2 | ||||
-rw-r--r-- | kernel/cgroup.c | 85 | ||||
-rw-r--r-- | kernel/cpuset.c | 8 | ||||
-rw-r--r-- | kernel/events/core.c | 29 | ||||
-rw-r--r-- | kernel/extable.c | 4 | ||||
-rw-r--r-- | kernel/fork.c | 1 | ||||
-rw-r--r-- | kernel/freezer.c | 6 | ||||
-rw-r--r-- | kernel/futex.c | 7 | ||||
-rw-r--r-- | kernel/irq/pm.c | 2 | ||||
-rw-r--r-- | kernel/kexec.c | 5 | ||||
-rw-r--r-- | kernel/padata.c | 9 | ||||
-rw-r--r-- | kernel/power/console.c | 1 | ||||
-rw-r--r-- | kernel/rcu/tree_plugin.h | 4 | ||||
-rw-r--r-- | kernel/reboot.c | 2 | ||||
-rw-r--r-- | kernel/sched/core.c | 10 | ||||
-rw-r--r-- | kernel/sched/fair.c | 178 | ||||
-rw-r--r-- | kernel/sched/rt.c | 14 | ||||
-rw-r--r-- | kernel/system_certificates.S | 14 | ||||
-rw-r--r-- | kernel/system_keyring.c | 4 | ||||
-rw-r--r-- | kernel/time/tick-common.c | 15 | ||||
-rw-r--r-- | kernel/time/tick-sched.c | 25 | ||||
-rw-r--r-- | kernel/time/timekeeping.c | 2 | ||||
-rw-r--r-- | kernel/timer.c | 5 | ||||
-rw-r--r-- | kernel/trace/ftrace.c | 66 | ||||
-rw-r--r-- | kernel/trace/trace_event_perf.c | 8 | ||||
-rw-r--r-- | kernel/trace/trace_events.c | 3 | ||||
-rw-r--r-- | kernel/trace/trace_syscalls.c | 10 | ||||
-rw-r--r-- | kernel/user.c | 6 | ||||
-rw-r--r-- | kernel/workqueue.c | 82 |
31 files changed, 375 insertions, 240 deletions
diff --git a/kernel/.gitignore b/kernel/.gitignore index b3097bde4e9c..790d83c7d160 100644 --- a/kernel/.gitignore +++ b/kernel/.gitignore | |||
@@ -5,3 +5,4 @@ config_data.h | |||
5 | config_data.gz | 5 | config_data.gz |
6 | timeconst.h | 6 | timeconst.h |
7 | hz.bc | 7 | hz.bc |
8 | x509_certificate_list | ||
diff --git a/kernel/Makefile b/kernel/Makefile index bbaf7d59c1bb..bc010ee272b6 100644 --- a/kernel/Makefile +++ b/kernel/Makefile | |||
@@ -137,9 +137,10 @@ $(obj)/timeconst.h: $(obj)/hz.bc $(src)/timeconst.bc FORCE | |||
137 | ############################################################################### | 137 | ############################################################################### |
138 | ifeq ($(CONFIG_SYSTEM_TRUSTED_KEYRING),y) | 138 | ifeq ($(CONFIG_SYSTEM_TRUSTED_KEYRING),y) |
139 | X509_CERTIFICATES-y := $(wildcard *.x509) $(wildcard $(srctree)/*.x509) | 139 | X509_CERTIFICATES-y := $(wildcard *.x509) $(wildcard $(srctree)/*.x509) |
140 | X509_CERTIFICATES-$(CONFIG_MODULE_SIG) += signing_key.x509 | 140 | X509_CERTIFICATES-$(CONFIG_MODULE_SIG) += $(objtree)/signing_key.x509 |
141 | X509_CERTIFICATES := $(sort $(foreach CERT,$(X509_CERTIFICATES-y), \ | 141 | X509_CERTIFICATES-raw := $(sort $(foreach CERT,$(X509_CERTIFICATES-y), \ |
142 | $(or $(realpath $(CERT)),$(CERT)))) | 142 | $(or $(realpath $(CERT)),$(CERT)))) |
143 | X509_CERTIFICATES := $(subst $(realpath $(objtree))/,,$(X509_CERTIFICATES-raw)) | ||
143 | 144 | ||
144 | ifeq ($(X509_CERTIFICATES),) | 145 | ifeq ($(X509_CERTIFICATES),) |
145 | $(warning *** No X.509 certificates found ***) | 146 | $(warning *** No X.509 certificates found ***) |
@@ -164,9 +165,9 @@ $(obj)/x509_certificate_list: $(X509_CERTIFICATES) $(obj)/.x509.list | |||
164 | targets += $(obj)/.x509.list | 165 | targets += $(obj)/.x509.list |
165 | $(obj)/.x509.list: | 166 | $(obj)/.x509.list: |
166 | @echo $(X509_CERTIFICATES) >$@ | 167 | @echo $(X509_CERTIFICATES) >$@ |
168 | endif | ||
167 | 169 | ||
168 | clean-files := x509_certificate_list .x509.list | 170 | clean-files := x509_certificate_list .x509.list |
169 | endif | ||
170 | 171 | ||
171 | ifeq ($(CONFIG_MODULE_SIG),y) | 172 | ifeq ($(CONFIG_MODULE_SIG),y) |
172 | ############################################################################### | 173 | ############################################################################### |
diff --git a/kernel/bounds.c b/kernel/bounds.c index 5253204afdca..9fd4246b04b8 100644 --- a/kernel/bounds.c +++ b/kernel/bounds.c | |||
@@ -22,6 +22,6 @@ void foo(void) | |||
22 | #ifdef CONFIG_SMP | 22 | #ifdef CONFIG_SMP |
23 | DEFINE(NR_CPUS_BITS, ilog2(CONFIG_NR_CPUS)); | 23 | DEFINE(NR_CPUS_BITS, ilog2(CONFIG_NR_CPUS)); |
24 | #endif | 24 | #endif |
25 | DEFINE(BLOATED_SPINLOCKS, sizeof(spinlock_t) > sizeof(int)); | 25 | DEFINE(SPINLOCK_SIZE, sizeof(spinlock_t)); |
26 | /* End of constants */ | 26 | /* End of constants */ |
27 | } | 27 | } |
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 4c62513fe19f..bc1dcabe9217 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
@@ -90,6 +90,14 @@ static DEFINE_MUTEX(cgroup_mutex); | |||
90 | static DEFINE_MUTEX(cgroup_root_mutex); | 90 | static DEFINE_MUTEX(cgroup_root_mutex); |
91 | 91 | ||
92 | /* | 92 | /* |
93 | * cgroup destruction makes heavy use of work items and there can be a lot | ||
94 | * of concurrent destructions. Use a separate workqueue so that cgroup | ||
95 | * destruction work items don't end up filling up max_active of system_wq | ||
96 | * which may lead to deadlock. | ||
97 | */ | ||
98 | static struct workqueue_struct *cgroup_destroy_wq; | ||
99 | |||
100 | /* | ||
93 | * Generate an array of cgroup subsystem pointers. At boot time, this is | 101 | * Generate an array of cgroup subsystem pointers. At boot time, this is |
94 | * populated with the built in subsystems, and modular subsystems are | 102 | * populated with the built in subsystems, and modular subsystems are |
95 | * registered after that. The mutable section of this array is protected by | 103 | * registered after that. The mutable section of this array is protected by |
@@ -191,6 +199,7 @@ static void cgroup_destroy_css_killed(struct cgroup *cgrp); | |||
191 | static int cgroup_destroy_locked(struct cgroup *cgrp); | 199 | static int cgroup_destroy_locked(struct cgroup *cgrp); |
192 | static int cgroup_addrm_files(struct cgroup *cgrp, struct cftype cfts[], | 200 | static int cgroup_addrm_files(struct cgroup *cgrp, struct cftype cfts[], |
193 | bool is_add); | 201 | bool is_add); |
202 | static int cgroup_file_release(struct inode *inode, struct file *file); | ||
194 | 203 | ||
195 | /** | 204 | /** |
196 | * cgroup_css - obtain a cgroup's css for the specified subsystem | 205 | * cgroup_css - obtain a cgroup's css for the specified subsystem |
@@ -871,7 +880,7 @@ static void cgroup_free_rcu(struct rcu_head *head) | |||
871 | struct cgroup *cgrp = container_of(head, struct cgroup, rcu_head); | 880 | struct cgroup *cgrp = container_of(head, struct cgroup, rcu_head); |
872 | 881 | ||
873 | INIT_WORK(&cgrp->destroy_work, cgroup_free_fn); | 882 | INIT_WORK(&cgrp->destroy_work, cgroup_free_fn); |
874 | schedule_work(&cgrp->destroy_work); | 883 | queue_work(cgroup_destroy_wq, &cgrp->destroy_work); |
875 | } | 884 | } |
876 | 885 | ||
877 | static void cgroup_diput(struct dentry *dentry, struct inode *inode) | 886 | static void cgroup_diput(struct dentry *dentry, struct inode *inode) |
@@ -881,6 +890,16 @@ static void cgroup_diput(struct dentry *dentry, struct inode *inode) | |||
881 | struct cgroup *cgrp = dentry->d_fsdata; | 890 | struct cgroup *cgrp = dentry->d_fsdata; |
882 | 891 | ||
883 | BUG_ON(!(cgroup_is_dead(cgrp))); | 892 | BUG_ON(!(cgroup_is_dead(cgrp))); |
893 | |||
894 | /* | ||
895 | * XXX: cgrp->id is only used to look up css's. As cgroup | ||
896 | * and css's lifetimes will be decoupled, it should be made | ||
897 | * per-subsystem and moved to css->id so that lookups are | ||
898 | * successful until the target css is released. | ||
899 | */ | ||
900 | idr_remove(&cgrp->root->cgroup_idr, cgrp->id); | ||
901 | cgrp->id = -1; | ||
902 | |||
884 | call_rcu(&cgrp->rcu_head, cgroup_free_rcu); | 903 | call_rcu(&cgrp->rcu_head, cgroup_free_rcu); |
885 | } else { | 904 | } else { |
886 | struct cfent *cfe = __d_cfe(dentry); | 905 | struct cfent *cfe = __d_cfe(dentry); |
@@ -2421,7 +2440,7 @@ static const struct file_operations cgroup_seqfile_operations = { | |||
2421 | .read = seq_read, | 2440 | .read = seq_read, |
2422 | .write = cgroup_file_write, | 2441 | .write = cgroup_file_write, |
2423 | .llseek = seq_lseek, | 2442 | .llseek = seq_lseek, |
2424 | .release = single_release, | 2443 | .release = cgroup_file_release, |
2425 | }; | 2444 | }; |
2426 | 2445 | ||
2427 | static int cgroup_file_open(struct inode *inode, struct file *file) | 2446 | static int cgroup_file_open(struct inode *inode, struct file *file) |
@@ -2482,6 +2501,8 @@ static int cgroup_file_release(struct inode *inode, struct file *file) | |||
2482 | ret = cft->release(inode, file); | 2501 | ret = cft->release(inode, file); |
2483 | if (css->ss) | 2502 | if (css->ss) |
2484 | css_put(css); | 2503 | css_put(css); |
2504 | if (file->f_op == &cgroup_seqfile_operations) | ||
2505 | single_release(inode, file); | ||
2485 | return ret; | 2506 | return ret; |
2486 | } | 2507 | } |
2487 | 2508 | ||
@@ -4249,7 +4270,7 @@ static void css_free_rcu_fn(struct rcu_head *rcu_head) | |||
4249 | * css_put(). dput() requires process context which we don't have. | 4270 | * css_put(). dput() requires process context which we don't have. |
4250 | */ | 4271 | */ |
4251 | INIT_WORK(&css->destroy_work, css_free_work_fn); | 4272 | INIT_WORK(&css->destroy_work, css_free_work_fn); |
4252 | schedule_work(&css->destroy_work); | 4273 | queue_work(cgroup_destroy_wq, &css->destroy_work); |
4253 | } | 4274 | } |
4254 | 4275 | ||
4255 | static void css_release(struct percpu_ref *ref) | 4276 | static void css_release(struct percpu_ref *ref) |
@@ -4257,6 +4278,7 @@ static void css_release(struct percpu_ref *ref) | |||
4257 | struct cgroup_subsys_state *css = | 4278 | struct cgroup_subsys_state *css = |
4258 | container_of(ref, struct cgroup_subsys_state, refcnt); | 4279 | container_of(ref, struct cgroup_subsys_state, refcnt); |
4259 | 4280 | ||
4281 | rcu_assign_pointer(css->cgroup->subsys[css->ss->subsys_id], NULL); | ||
4260 | call_rcu(&css->rcu_head, css_free_rcu_fn); | 4282 | call_rcu(&css->rcu_head, css_free_rcu_fn); |
4261 | } | 4283 | } |
4262 | 4284 | ||
@@ -4415,14 +4437,6 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry, | |||
4415 | list_add_tail_rcu(&cgrp->sibling, &cgrp->parent->children); | 4437 | list_add_tail_rcu(&cgrp->sibling, &cgrp->parent->children); |
4416 | root->number_of_cgroups++; | 4438 | root->number_of_cgroups++; |
4417 | 4439 | ||
4418 | /* each css holds a ref to the cgroup's dentry and the parent css */ | ||
4419 | for_each_root_subsys(root, ss) { | ||
4420 | struct cgroup_subsys_state *css = css_ar[ss->subsys_id]; | ||
4421 | |||
4422 | dget(dentry); | ||
4423 | css_get(css->parent); | ||
4424 | } | ||
4425 | |||
4426 | /* hold a ref to the parent's dentry */ | 4440 | /* hold a ref to the parent's dentry */ |
4427 | dget(parent->dentry); | 4441 | dget(parent->dentry); |
4428 | 4442 | ||
@@ -4434,6 +4448,13 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry, | |||
4434 | if (err) | 4448 | if (err) |
4435 | goto err_destroy; | 4449 | goto err_destroy; |
4436 | 4450 | ||
4451 | /* each css holds a ref to the cgroup's dentry and parent css */ | ||
4452 | dget(dentry); | ||
4453 | css_get(css->parent); | ||
4454 | |||
4455 | /* mark it consumed for error path */ | ||
4456 | css_ar[ss->subsys_id] = NULL; | ||
4457 | |||
4437 | if (ss->broken_hierarchy && !ss->warned_broken_hierarchy && | 4458 | if (ss->broken_hierarchy && !ss->warned_broken_hierarchy && |
4438 | parent->parent) { | 4459 | parent->parent) { |
4439 | pr_warning("cgroup: %s (%d) created nested cgroup for controller \"%s\" which has incomplete hierarchy support. Nested cgroups may change behavior in the future.\n", | 4460 | pr_warning("cgroup: %s (%d) created nested cgroup for controller \"%s\" which has incomplete hierarchy support. Nested cgroups may change behavior in the future.\n", |
@@ -4480,6 +4501,14 @@ err_free_cgrp: | |||
4480 | return err; | 4501 | return err; |
4481 | 4502 | ||
4482 | err_destroy: | 4503 | err_destroy: |
4504 | for_each_root_subsys(root, ss) { | ||
4505 | struct cgroup_subsys_state *css = css_ar[ss->subsys_id]; | ||
4506 | |||
4507 | if (css) { | ||
4508 | percpu_ref_cancel_init(&css->refcnt); | ||
4509 | ss->css_free(css); | ||
4510 | } | ||
4511 | } | ||
4483 | cgroup_destroy_locked(cgrp); | 4512 | cgroup_destroy_locked(cgrp); |
4484 | mutex_unlock(&cgroup_mutex); | 4513 | mutex_unlock(&cgroup_mutex); |
4485 | mutex_unlock(&dentry->d_inode->i_mutex); | 4514 | mutex_unlock(&dentry->d_inode->i_mutex); |
@@ -4539,7 +4568,7 @@ static void css_killed_ref_fn(struct percpu_ref *ref) | |||
4539 | container_of(ref, struct cgroup_subsys_state, refcnt); | 4568 | container_of(ref, struct cgroup_subsys_state, refcnt); |
4540 | 4569 | ||
4541 | INIT_WORK(&css->destroy_work, css_killed_work_fn); | 4570 | INIT_WORK(&css->destroy_work, css_killed_work_fn); |
4542 | schedule_work(&css->destroy_work); | 4571 | queue_work(cgroup_destroy_wq, &css->destroy_work); |
4543 | } | 4572 | } |
4544 | 4573 | ||
4545 | /** | 4574 | /** |
@@ -4641,8 +4670,12 @@ static int cgroup_destroy_locked(struct cgroup *cgrp) | |||
4641 | * will be invoked to perform the rest of destruction once the | 4670 | * will be invoked to perform the rest of destruction once the |
4642 | * percpu refs of all css's are confirmed to be killed. | 4671 | * percpu refs of all css's are confirmed to be killed. |
4643 | */ | 4672 | */ |
4644 | for_each_root_subsys(cgrp->root, ss) | 4673 | for_each_root_subsys(cgrp->root, ss) { |
4645 | kill_css(cgroup_css(cgrp, ss)); | 4674 | struct cgroup_subsys_state *css = cgroup_css(cgrp, ss); |
4675 | |||
4676 | if (css) | ||
4677 | kill_css(css); | ||
4678 | } | ||
4646 | 4679 | ||
4647 | /* | 4680 | /* |
4648 | * Mark @cgrp dead. This prevents further task migration and child | 4681 | * Mark @cgrp dead. This prevents further task migration and child |
@@ -4711,14 +4744,6 @@ static void cgroup_destroy_css_killed(struct cgroup *cgrp) | |||
4711 | /* delete this cgroup from parent->children */ | 4744 | /* delete this cgroup from parent->children */ |
4712 | list_del_rcu(&cgrp->sibling); | 4745 | list_del_rcu(&cgrp->sibling); |
4713 | 4746 | ||
4714 | /* | ||
4715 | * We should remove the cgroup object from idr before its grace | ||
4716 | * period starts, so we won't be looking up a cgroup while the | ||
4717 | * cgroup is being freed. | ||
4718 | */ | ||
4719 | idr_remove(&cgrp->root->cgroup_idr, cgrp->id); | ||
4720 | cgrp->id = -1; | ||
4721 | |||
4722 | dput(d); | 4747 | dput(d); |
4723 | 4748 | ||
4724 | set_bit(CGRP_RELEASABLE, &parent->flags); | 4749 | set_bit(CGRP_RELEASABLE, &parent->flags); |
@@ -5063,6 +5088,22 @@ out: | |||
5063 | return err; | 5088 | return err; |
5064 | } | 5089 | } |
5065 | 5090 | ||
5091 | static int __init cgroup_wq_init(void) | ||
5092 | { | ||
5093 | /* | ||
5094 | * There isn't much point in executing destruction path in | ||
5095 | * parallel. Good chunk is serialized with cgroup_mutex anyway. | ||
5096 | * Use 1 for @max_active. | ||
5097 | * | ||
5098 | * We would prefer to do this in cgroup_init() above, but that | ||
5099 | * is called before init_workqueues(): so leave this until after. | ||
5100 | */ | ||
5101 | cgroup_destroy_wq = alloc_workqueue("cgroup_destroy", 0, 1); | ||
5102 | BUG_ON(!cgroup_destroy_wq); | ||
5103 | return 0; | ||
5104 | } | ||
5105 | core_initcall(cgroup_wq_init); | ||
5106 | |||
5066 | /* | 5107 | /* |
5067 | * proc_cgroup_show() | 5108 | * proc_cgroup_show() |
5068 | * - Print task's cgroup paths into seq_file, one line for each hierarchy | 5109 | * - Print task's cgroup paths into seq_file, one line for each hierarchy |
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 6bf981e13c43..4772034b4b17 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
@@ -1033,8 +1033,10 @@ static void cpuset_change_task_nodemask(struct task_struct *tsk, | |||
1033 | need_loop = task_has_mempolicy(tsk) || | 1033 | need_loop = task_has_mempolicy(tsk) || |
1034 | !nodes_intersects(*newmems, tsk->mems_allowed); | 1034 | !nodes_intersects(*newmems, tsk->mems_allowed); |
1035 | 1035 | ||
1036 | if (need_loop) | 1036 | if (need_loop) { |
1037 | local_irq_disable(); | ||
1037 | write_seqcount_begin(&tsk->mems_allowed_seq); | 1038 | write_seqcount_begin(&tsk->mems_allowed_seq); |
1039 | } | ||
1038 | 1040 | ||
1039 | nodes_or(tsk->mems_allowed, tsk->mems_allowed, *newmems); | 1041 | nodes_or(tsk->mems_allowed, tsk->mems_allowed, *newmems); |
1040 | mpol_rebind_task(tsk, newmems, MPOL_REBIND_STEP1); | 1042 | mpol_rebind_task(tsk, newmems, MPOL_REBIND_STEP1); |
@@ -1042,8 +1044,10 @@ static void cpuset_change_task_nodemask(struct task_struct *tsk, | |||
1042 | mpol_rebind_task(tsk, newmems, MPOL_REBIND_STEP2); | 1044 | mpol_rebind_task(tsk, newmems, MPOL_REBIND_STEP2); |
1043 | tsk->mems_allowed = *newmems; | 1045 | tsk->mems_allowed = *newmems; |
1044 | 1046 | ||
1045 | if (need_loop) | 1047 | if (need_loop) { |
1046 | write_seqcount_end(&tsk->mems_allowed_seq); | 1048 | write_seqcount_end(&tsk->mems_allowed_seq); |
1049 | local_irq_enable(); | ||
1050 | } | ||
1047 | 1051 | ||
1048 | task_unlock(tsk); | 1052 | task_unlock(tsk); |
1049 | } | 1053 | } |
diff --git a/kernel/events/core.c b/kernel/events/core.c index d724e7757cd1..f5744010a8d2 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c | |||
@@ -1396,6 +1396,8 @@ event_sched_out(struct perf_event *event, | |||
1396 | if (event->state != PERF_EVENT_STATE_ACTIVE) | 1396 | if (event->state != PERF_EVENT_STATE_ACTIVE) |
1397 | return; | 1397 | return; |
1398 | 1398 | ||
1399 | perf_pmu_disable(event->pmu); | ||
1400 | |||
1399 | event->state = PERF_EVENT_STATE_INACTIVE; | 1401 | event->state = PERF_EVENT_STATE_INACTIVE; |
1400 | if (event->pending_disable) { | 1402 | if (event->pending_disable) { |
1401 | event->pending_disable = 0; | 1403 | event->pending_disable = 0; |
@@ -1412,6 +1414,8 @@ event_sched_out(struct perf_event *event, | |||
1412 | ctx->nr_freq--; | 1414 | ctx->nr_freq--; |
1413 | if (event->attr.exclusive || !cpuctx->active_oncpu) | 1415 | if (event->attr.exclusive || !cpuctx->active_oncpu) |
1414 | cpuctx->exclusive = 0; | 1416 | cpuctx->exclusive = 0; |
1417 | |||
1418 | perf_pmu_enable(event->pmu); | ||
1415 | } | 1419 | } |
1416 | 1420 | ||
1417 | static void | 1421 | static void |
@@ -1652,6 +1656,7 @@ event_sched_in(struct perf_event *event, | |||
1652 | struct perf_event_context *ctx) | 1656 | struct perf_event_context *ctx) |
1653 | { | 1657 | { |
1654 | u64 tstamp = perf_event_time(event); | 1658 | u64 tstamp = perf_event_time(event); |
1659 | int ret = 0; | ||
1655 | 1660 | ||
1656 | if (event->state <= PERF_EVENT_STATE_OFF) | 1661 | if (event->state <= PERF_EVENT_STATE_OFF) |
1657 | return 0; | 1662 | return 0; |
@@ -1674,10 +1679,13 @@ event_sched_in(struct perf_event *event, | |||
1674 | */ | 1679 | */ |
1675 | smp_wmb(); | 1680 | smp_wmb(); |
1676 | 1681 | ||
1682 | perf_pmu_disable(event->pmu); | ||
1683 | |||
1677 | if (event->pmu->add(event, PERF_EF_START)) { | 1684 | if (event->pmu->add(event, PERF_EF_START)) { |
1678 | event->state = PERF_EVENT_STATE_INACTIVE; | 1685 | event->state = PERF_EVENT_STATE_INACTIVE; |
1679 | event->oncpu = -1; | 1686 | event->oncpu = -1; |
1680 | return -EAGAIN; | 1687 | ret = -EAGAIN; |
1688 | goto out; | ||
1681 | } | 1689 | } |
1682 | 1690 | ||
1683 | event->tstamp_running += tstamp - event->tstamp_stopped; | 1691 | event->tstamp_running += tstamp - event->tstamp_stopped; |
@@ -1693,7 +1701,10 @@ event_sched_in(struct perf_event *event, | |||
1693 | if (event->attr.exclusive) | 1701 | if (event->attr.exclusive) |
1694 | cpuctx->exclusive = 1; | 1702 | cpuctx->exclusive = 1; |
1695 | 1703 | ||
1696 | return 0; | 1704 | out: |
1705 | perf_pmu_enable(event->pmu); | ||
1706 | |||
1707 | return ret; | ||
1697 | } | 1708 | } |
1698 | 1709 | ||
1699 | static int | 1710 | static int |
@@ -2743,6 +2754,8 @@ static void perf_adjust_freq_unthr_context(struct perf_event_context *ctx, | |||
2743 | if (!event_filter_match(event)) | 2754 | if (!event_filter_match(event)) |
2744 | continue; | 2755 | continue; |
2745 | 2756 | ||
2757 | perf_pmu_disable(event->pmu); | ||
2758 | |||
2746 | hwc = &event->hw; | 2759 | hwc = &event->hw; |
2747 | 2760 | ||
2748 | if (hwc->interrupts == MAX_INTERRUPTS) { | 2761 | if (hwc->interrupts == MAX_INTERRUPTS) { |
@@ -2752,7 +2765,7 @@ static void perf_adjust_freq_unthr_context(struct perf_event_context *ctx, | |||
2752 | } | 2765 | } |
2753 | 2766 | ||
2754 | if (!event->attr.freq || !event->attr.sample_freq) | 2767 | if (!event->attr.freq || !event->attr.sample_freq) |
2755 | continue; | 2768 | goto next; |
2756 | 2769 | ||
2757 | /* | 2770 | /* |
2758 | * stop the event and update event->count | 2771 | * stop the event and update event->count |
@@ -2774,6 +2787,8 @@ static void perf_adjust_freq_unthr_context(struct perf_event_context *ctx, | |||
2774 | perf_adjust_period(event, period, delta, false); | 2787 | perf_adjust_period(event, period, delta, false); |
2775 | 2788 | ||
2776 | event->pmu->start(event, delta > 0 ? PERF_EF_RELOAD : 0); | 2789 | event->pmu->start(event, delta > 0 ? PERF_EF_RELOAD : 0); |
2790 | next: | ||
2791 | perf_pmu_enable(event->pmu); | ||
2777 | } | 2792 | } |
2778 | 2793 | ||
2779 | perf_pmu_enable(ctx->pmu); | 2794 | perf_pmu_enable(ctx->pmu); |
@@ -5680,11 +5695,6 @@ static void swevent_hlist_put(struct perf_event *event) | |||
5680 | { | 5695 | { |
5681 | int cpu; | 5696 | int cpu; |
5682 | 5697 | ||
5683 | if (event->cpu != -1) { | ||
5684 | swevent_hlist_put_cpu(event, event->cpu); | ||
5685 | return; | ||
5686 | } | ||
5687 | |||
5688 | for_each_possible_cpu(cpu) | 5698 | for_each_possible_cpu(cpu) |
5689 | swevent_hlist_put_cpu(event, cpu); | 5699 | swevent_hlist_put_cpu(event, cpu); |
5690 | } | 5700 | } |
@@ -5718,9 +5728,6 @@ static int swevent_hlist_get(struct perf_event *event) | |||
5718 | int err; | 5728 | int err; |
5719 | int cpu, failed_cpu; | 5729 | int cpu, failed_cpu; |
5720 | 5730 | ||
5721 | if (event->cpu != -1) | ||
5722 | return swevent_hlist_get_cpu(event, event->cpu); | ||
5723 | |||
5724 | get_online_cpus(); | 5731 | get_online_cpus(); |
5725 | for_each_possible_cpu(cpu) { | 5732 | for_each_possible_cpu(cpu) { |
5726 | err = swevent_hlist_get_cpu(event, cpu); | 5733 | err = swevent_hlist_get_cpu(event, cpu); |
diff --git a/kernel/extable.c b/kernel/extable.c index 832cb28105bb..763faf037ec1 100644 --- a/kernel/extable.c +++ b/kernel/extable.c | |||
@@ -61,7 +61,7 @@ const struct exception_table_entry *search_exception_tables(unsigned long addr) | |||
61 | static inline int init_kernel_text(unsigned long addr) | 61 | static inline int init_kernel_text(unsigned long addr) |
62 | { | 62 | { |
63 | if (addr >= (unsigned long)_sinittext && | 63 | if (addr >= (unsigned long)_sinittext && |
64 | addr <= (unsigned long)_einittext) | 64 | addr < (unsigned long)_einittext) |
65 | return 1; | 65 | return 1; |
66 | return 0; | 66 | return 0; |
67 | } | 67 | } |
@@ -69,7 +69,7 @@ static inline int init_kernel_text(unsigned long addr) | |||
69 | int core_kernel_text(unsigned long addr) | 69 | int core_kernel_text(unsigned long addr) |
70 | { | 70 | { |
71 | if (addr >= (unsigned long)_stext && | 71 | if (addr >= (unsigned long)_stext && |
72 | addr <= (unsigned long)_etext) | 72 | addr < (unsigned long)_etext) |
73 | return 1; | 73 | return 1; |
74 | 74 | ||
75 | if (system_state == SYSTEM_BOOTING && | 75 | if (system_state == SYSTEM_BOOTING && |
diff --git a/kernel/fork.c b/kernel/fork.c index 728d5be9548c..5721f0e3f2da 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -537,6 +537,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p) | |||
537 | spin_lock_init(&mm->page_table_lock); | 537 | spin_lock_init(&mm->page_table_lock); |
538 | mm_init_aio(mm); | 538 | mm_init_aio(mm); |
539 | mm_init_owner(mm, p); | 539 | mm_init_owner(mm, p); |
540 | clear_tlb_flush_pending(mm); | ||
540 | 541 | ||
541 | if (likely(!mm_alloc_pgd(mm))) { | 542 | if (likely(!mm_alloc_pgd(mm))) { |
542 | mm->def_flags = 0; | 543 | mm->def_flags = 0; |
diff --git a/kernel/freezer.c b/kernel/freezer.c index b462fa197517..aa6a8aadb911 100644 --- a/kernel/freezer.c +++ b/kernel/freezer.c | |||
@@ -19,6 +19,12 @@ EXPORT_SYMBOL(system_freezing_cnt); | |||
19 | bool pm_freezing; | 19 | bool pm_freezing; |
20 | bool pm_nosig_freezing; | 20 | bool pm_nosig_freezing; |
21 | 21 | ||
22 | /* | ||
23 | * Temporary export for the deadlock workaround in ata_scsi_hotplug(). | ||
24 | * Remove once the hack becomes unnecessary. | ||
25 | */ | ||
26 | EXPORT_SYMBOL_GPL(pm_freezing); | ||
27 | |||
22 | /* protects freezing and frozen transitions */ | 28 | /* protects freezing and frozen transitions */ |
23 | static DEFINE_SPINLOCK(freezer_lock); | 29 | static DEFINE_SPINLOCK(freezer_lock); |
24 | 30 | ||
diff --git a/kernel/futex.c b/kernel/futex.c index 80ba086f021d..f6ff0191ecf7 100644 --- a/kernel/futex.c +++ b/kernel/futex.c | |||
@@ -251,6 +251,9 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw) | |||
251 | return -EINVAL; | 251 | return -EINVAL; |
252 | address -= key->both.offset; | 252 | address -= key->both.offset; |
253 | 253 | ||
254 | if (unlikely(!access_ok(rw, uaddr, sizeof(u32)))) | ||
255 | return -EFAULT; | ||
256 | |||
254 | /* | 257 | /* |
255 | * PROCESS_PRIVATE futexes are fast. | 258 | * PROCESS_PRIVATE futexes are fast. |
256 | * As the mm cannot disappear under us and the 'key' only needs | 259 | * As the mm cannot disappear under us and the 'key' only needs |
@@ -259,8 +262,6 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw) | |||
259 | * but access_ok() should be faster than find_vma() | 262 | * but access_ok() should be faster than find_vma() |
260 | */ | 263 | */ |
261 | if (!fshared) { | 264 | if (!fshared) { |
262 | if (unlikely(!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))) | ||
263 | return -EFAULT; | ||
264 | key->private.mm = mm; | 265 | key->private.mm = mm; |
265 | key->private.address = address; | 266 | key->private.address = address; |
266 | get_futex_key_refs(key); | 267 | get_futex_key_refs(key); |
@@ -288,7 +289,7 @@ again: | |||
288 | put_page(page); | 289 | put_page(page); |
289 | /* serialize against __split_huge_page_splitting() */ | 290 | /* serialize against __split_huge_page_splitting() */ |
290 | local_irq_disable(); | 291 | local_irq_disable(); |
291 | if (likely(__get_user_pages_fast(address, 1, 1, &page) == 1)) { | 292 | if (likely(__get_user_pages_fast(address, 1, !ro, &page) == 1)) { |
292 | page_head = compound_head(page); | 293 | page_head = compound_head(page); |
293 | /* | 294 | /* |
294 | * page_head is valid pointer but we must pin | 295 | * page_head is valid pointer but we must pin |
diff --git a/kernel/irq/pm.c b/kernel/irq/pm.c index cb228bf21760..abcd6ca86cb7 100644 --- a/kernel/irq/pm.c +++ b/kernel/irq/pm.c | |||
@@ -50,7 +50,7 @@ static void resume_irqs(bool want_early) | |||
50 | bool is_early = desc->action && | 50 | bool is_early = desc->action && |
51 | desc->action->flags & IRQF_EARLY_RESUME; | 51 | desc->action->flags & IRQF_EARLY_RESUME; |
52 | 52 | ||
53 | if (is_early != want_early) | 53 | if (!is_early && want_early) |
54 | continue; | 54 | continue; |
55 | 55 | ||
56 | raw_spin_lock_irqsave(&desc->lock, flags); | 56 | raw_spin_lock_irqsave(&desc->lock, flags); |
diff --git a/kernel/kexec.c b/kernel/kexec.c index 490afc03627e..9c970167e402 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c | |||
@@ -47,6 +47,9 @@ u32 vmcoreinfo_note[VMCOREINFO_NOTE_SIZE/4]; | |||
47 | size_t vmcoreinfo_size; | 47 | size_t vmcoreinfo_size; |
48 | size_t vmcoreinfo_max_size = sizeof(vmcoreinfo_data); | 48 | size_t vmcoreinfo_max_size = sizeof(vmcoreinfo_data); |
49 | 49 | ||
50 | /* Flag to indicate we are going to kexec a new kernel */ | ||
51 | bool kexec_in_progress = false; | ||
52 | |||
50 | /* Location of the reserved area for the crash kernel */ | 53 | /* Location of the reserved area for the crash kernel */ |
51 | struct resource crashk_res = { | 54 | struct resource crashk_res = { |
52 | .name = "Crash kernel", | 55 | .name = "Crash kernel", |
@@ -1675,7 +1678,9 @@ int kernel_kexec(void) | |||
1675 | } else | 1678 | } else |
1676 | #endif | 1679 | #endif |
1677 | { | 1680 | { |
1681 | kexec_in_progress = true; | ||
1678 | kernel_restart_prepare(NULL); | 1682 | kernel_restart_prepare(NULL); |
1683 | migrate_to_reboot_cpu(); | ||
1679 | printk(KERN_EMERG "Starting new kernel\n"); | 1684 | printk(KERN_EMERG "Starting new kernel\n"); |
1680 | machine_shutdown(); | 1685 | machine_shutdown(); |
1681 | } | 1686 | } |
diff --git a/kernel/padata.c b/kernel/padata.c index 07af2c95dcfe..2abd25d79cc8 100644 --- a/kernel/padata.c +++ b/kernel/padata.c | |||
@@ -46,6 +46,7 @@ static int padata_index_to_cpu(struct parallel_data *pd, int cpu_index) | |||
46 | 46 | ||
47 | static int padata_cpu_hash(struct parallel_data *pd) | 47 | static int padata_cpu_hash(struct parallel_data *pd) |
48 | { | 48 | { |
49 | unsigned int seq_nr; | ||
49 | int cpu_index; | 50 | int cpu_index; |
50 | 51 | ||
51 | /* | 52 | /* |
@@ -53,10 +54,8 @@ static int padata_cpu_hash(struct parallel_data *pd) | |||
53 | * seq_nr mod. number of cpus in use. | 54 | * seq_nr mod. number of cpus in use. |
54 | */ | 55 | */ |
55 | 56 | ||
56 | spin_lock(&pd->seq_lock); | 57 | seq_nr = atomic_inc_return(&pd->seq_nr); |
57 | cpu_index = pd->seq_nr % cpumask_weight(pd->cpumask.pcpu); | 58 | cpu_index = seq_nr % cpumask_weight(pd->cpumask.pcpu); |
58 | pd->seq_nr++; | ||
59 | spin_unlock(&pd->seq_lock); | ||
60 | 59 | ||
61 | return padata_index_to_cpu(pd, cpu_index); | 60 | return padata_index_to_cpu(pd, cpu_index); |
62 | } | 61 | } |
@@ -429,7 +428,7 @@ static struct parallel_data *padata_alloc_pd(struct padata_instance *pinst, | |||
429 | padata_init_pqueues(pd); | 428 | padata_init_pqueues(pd); |
430 | padata_init_squeues(pd); | 429 | padata_init_squeues(pd); |
431 | setup_timer(&pd->timer, padata_reorder_timer, (unsigned long)pd); | 430 | setup_timer(&pd->timer, padata_reorder_timer, (unsigned long)pd); |
432 | pd->seq_nr = 0; | 431 | atomic_set(&pd->seq_nr, -1); |
433 | atomic_set(&pd->reorder_objects, 0); | 432 | atomic_set(&pd->reorder_objects, 0); |
434 | atomic_set(&pd->refcnt, 0); | 433 | atomic_set(&pd->refcnt, 0); |
435 | pd->pinst = pinst; | 434 | pd->pinst = pinst; |
diff --git a/kernel/power/console.c b/kernel/power/console.c index 463aa6736751..eacb8bd8cab4 100644 --- a/kernel/power/console.c +++ b/kernel/power/console.c | |||
@@ -81,6 +81,7 @@ void pm_vt_switch_unregister(struct device *dev) | |||
81 | list_for_each_entry(tmp, &pm_vt_switch_list, head) { | 81 | list_for_each_entry(tmp, &pm_vt_switch_list, head) { |
82 | if (tmp->dev == dev) { | 82 | if (tmp->dev == dev) { |
83 | list_del(&tmp->head); | 83 | list_del(&tmp->head); |
84 | kfree(tmp); | ||
84 | break; | 85 | break; |
85 | } | 86 | } |
86 | } | 87 | } |
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 6abb03dff5c0..08a765232432 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h | |||
@@ -1632,7 +1632,7 @@ module_param(rcu_idle_gp_delay, int, 0644); | |||
1632 | static int rcu_idle_lazy_gp_delay = RCU_IDLE_LAZY_GP_DELAY; | 1632 | static int rcu_idle_lazy_gp_delay = RCU_IDLE_LAZY_GP_DELAY; |
1633 | module_param(rcu_idle_lazy_gp_delay, int, 0644); | 1633 | module_param(rcu_idle_lazy_gp_delay, int, 0644); |
1634 | 1634 | ||
1635 | extern int tick_nohz_enabled; | 1635 | extern int tick_nohz_active; |
1636 | 1636 | ||
1637 | /* | 1637 | /* |
1638 | * Try to advance callbacks for all flavors of RCU on the current CPU, but | 1638 | * Try to advance callbacks for all flavors of RCU on the current CPU, but |
@@ -1729,7 +1729,7 @@ static void rcu_prepare_for_idle(int cpu) | |||
1729 | int tne; | 1729 | int tne; |
1730 | 1730 | ||
1731 | /* Handle nohz enablement switches conservatively. */ | 1731 | /* Handle nohz enablement switches conservatively. */ |
1732 | tne = ACCESS_ONCE(tick_nohz_enabled); | 1732 | tne = ACCESS_ONCE(tick_nohz_active); |
1733 | if (tne != rdtp->tick_nohz_enabled_snap) { | 1733 | if (tne != rdtp->tick_nohz_enabled_snap) { |
1734 | if (rcu_cpu_has_callbacks(cpu, NULL)) | 1734 | if (rcu_cpu_has_callbacks(cpu, NULL)) |
1735 | invoke_rcu_core(); /* force nohz to see update. */ | 1735 | invoke_rcu_core(); /* force nohz to see update. */ |
diff --git a/kernel/reboot.c b/kernel/reboot.c index f813b3474646..662c83fc16b7 100644 --- a/kernel/reboot.c +++ b/kernel/reboot.c | |||
@@ -104,7 +104,7 @@ int unregister_reboot_notifier(struct notifier_block *nb) | |||
104 | } | 104 | } |
105 | EXPORT_SYMBOL(unregister_reboot_notifier); | 105 | EXPORT_SYMBOL(unregister_reboot_notifier); |
106 | 106 | ||
107 | static void migrate_to_reboot_cpu(void) | 107 | void migrate_to_reboot_cpu(void) |
108 | { | 108 | { |
109 | /* The boot cpu is always logical cpu 0 */ | 109 | /* The boot cpu is always logical cpu 0 */ |
110 | int cpu = reboot_cpu; | 110 | int cpu = reboot_cpu; |
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index c1808606ee5f..a88f4a485c5e 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
@@ -2660,6 +2660,7 @@ asmlinkage void __sched notrace preempt_schedule(void) | |||
2660 | } while (need_resched()); | 2660 | } while (need_resched()); |
2661 | } | 2661 | } |
2662 | EXPORT_SYMBOL(preempt_schedule); | 2662 | EXPORT_SYMBOL(preempt_schedule); |
2663 | #endif /* CONFIG_PREEMPT */ | ||
2663 | 2664 | ||
2664 | /* | 2665 | /* |
2665 | * this is the entry point to schedule() from kernel preemption | 2666 | * this is the entry point to schedule() from kernel preemption |
@@ -2693,8 +2694,6 @@ asmlinkage void __sched preempt_schedule_irq(void) | |||
2693 | exception_exit(prev_state); | 2694 | exception_exit(prev_state); |
2694 | } | 2695 | } |
2695 | 2696 | ||
2696 | #endif /* CONFIG_PREEMPT */ | ||
2697 | |||
2698 | int default_wake_function(wait_queue_t *curr, unsigned mode, int wake_flags, | 2697 | int default_wake_function(wait_queue_t *curr, unsigned mode, int wake_flags, |
2699 | void *key) | 2698 | void *key) |
2700 | { | 2699 | { |
@@ -4762,7 +4761,7 @@ static void rq_attach_root(struct rq *rq, struct root_domain *rd) | |||
4762 | cpumask_clear_cpu(rq->cpu, old_rd->span); | 4761 | cpumask_clear_cpu(rq->cpu, old_rd->span); |
4763 | 4762 | ||
4764 | /* | 4763 | /* |
4765 | * If we dont want to free the old_rt yet then | 4764 | * If we dont want to free the old_rd yet then |
4766 | * set old_rd to NULL to skip the freeing later | 4765 | * set old_rd to NULL to skip the freeing later |
4767 | * in this function: | 4766 | * in this function: |
4768 | */ | 4767 | */ |
@@ -4903,6 +4902,7 @@ DEFINE_PER_CPU(struct sched_domain *, sd_asym); | |||
4903 | static void update_top_cache_domain(int cpu) | 4902 | static void update_top_cache_domain(int cpu) |
4904 | { | 4903 | { |
4905 | struct sched_domain *sd; | 4904 | struct sched_domain *sd; |
4905 | struct sched_domain *busy_sd = NULL; | ||
4906 | int id = cpu; | 4906 | int id = cpu; |
4907 | int size = 1; | 4907 | int size = 1; |
4908 | 4908 | ||
@@ -4910,8 +4910,9 @@ static void update_top_cache_domain(int cpu) | |||
4910 | if (sd) { | 4910 | if (sd) { |
4911 | id = cpumask_first(sched_domain_span(sd)); | 4911 | id = cpumask_first(sched_domain_span(sd)); |
4912 | size = cpumask_weight(sched_domain_span(sd)); | 4912 | size = cpumask_weight(sched_domain_span(sd)); |
4913 | rcu_assign_pointer(per_cpu(sd_busy, cpu), sd->parent); | 4913 | busy_sd = sd->parent; /* sd_busy */ |
4914 | } | 4914 | } |
4915 | rcu_assign_pointer(per_cpu(sd_busy, cpu), busy_sd); | ||
4915 | 4916 | ||
4916 | rcu_assign_pointer(per_cpu(sd_llc, cpu), sd); | 4917 | rcu_assign_pointer(per_cpu(sd_llc, cpu), sd); |
4917 | per_cpu(sd_llc_size, cpu) = size; | 4918 | per_cpu(sd_llc_size, cpu) = size; |
@@ -5112,6 +5113,7 @@ build_overlap_sched_groups(struct sched_domain *sd, int cpu) | |||
5112 | * die on a /0 trap. | 5113 | * die on a /0 trap. |
5113 | */ | 5114 | */ |
5114 | sg->sgp->power = SCHED_POWER_SCALE * cpumask_weight(sg_span); | 5115 | sg->sgp->power = SCHED_POWER_SCALE * cpumask_weight(sg_span); |
5116 | sg->sgp->power_orig = sg->sgp->power; | ||
5115 | 5117 | ||
5116 | /* | 5118 | /* |
5117 | * Make sure the first group of this domain contains the | 5119 | * Make sure the first group of this domain contains the |
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index e8b652ebe027..c7395d97e4cb 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c | |||
@@ -178,59 +178,61 @@ void sched_init_granularity(void) | |||
178 | update_sysctl(); | 178 | update_sysctl(); |
179 | } | 179 | } |
180 | 180 | ||
181 | #if BITS_PER_LONG == 32 | 181 | #define WMULT_CONST (~0U) |
182 | # define WMULT_CONST (~0UL) | ||
183 | #else | ||
184 | # define WMULT_CONST (1UL << 32) | ||
185 | #endif | ||
186 | |||
187 | #define WMULT_SHIFT 32 | 182 | #define WMULT_SHIFT 32 |
188 | 183 | ||
189 | /* | 184 | static void __update_inv_weight(struct load_weight *lw) |
190 | * Shift right and round: | 185 | { |
191 | */ | 186 | unsigned long w; |
192 | #define SRR(x, y) (((x) + (1UL << ((y) - 1))) >> (y)) | 187 | |
188 | if (likely(lw->inv_weight)) | ||
189 | return; | ||
190 | |||
191 | w = scale_load_down(lw->weight); | ||
192 | |||
193 | if (BITS_PER_LONG > 32 && unlikely(w >= WMULT_CONST)) | ||
194 | lw->inv_weight = 1; | ||
195 | else if (unlikely(!w)) | ||
196 | lw->inv_weight = WMULT_CONST; | ||
197 | else | ||
198 | lw->inv_weight = WMULT_CONST / w; | ||
199 | } | ||
193 | 200 | ||
194 | /* | 201 | /* |
195 | * delta *= weight / lw | 202 | * delta_exec * weight / lw.weight |
203 | * OR | ||
204 | * (delta_exec * (weight * lw->inv_weight)) >> WMULT_SHIFT | ||
205 | * | ||
206 | * Either weight := NICE_0_LOAD and lw \e prio_to_wmult[], in which case | ||
207 | * we're guaranteed shift stays positive because inv_weight is guaranteed to | ||
208 | * fit 32 bits, and NICE_0_LOAD gives another 10 bits; therefore shift >= 22. | ||
209 | * | ||
210 | * Or, weight =< lw.weight (because lw.weight is the runqueue weight), thus | ||
211 | * weight/lw.weight <= 1, and therefore our shift will also be positive. | ||
196 | */ | 212 | */ |
197 | static unsigned long | 213 | static u64 __calc_delta(u64 delta_exec, unsigned long weight, struct load_weight *lw) |
198 | calc_delta_mine(unsigned long delta_exec, unsigned long weight, | ||
199 | struct load_weight *lw) | ||
200 | { | 214 | { |
201 | u64 tmp; | 215 | u64 fact = scale_load_down(weight); |
216 | int shift = WMULT_SHIFT; | ||
202 | 217 | ||
203 | /* | 218 | __update_inv_weight(lw); |
204 | * weight can be less than 2^SCHED_LOAD_RESOLUTION for task group sched | ||
205 | * entities since MIN_SHARES = 2. Treat weight as 1 if less than | ||
206 | * 2^SCHED_LOAD_RESOLUTION. | ||
207 | */ | ||
208 | if (likely(weight > (1UL << SCHED_LOAD_RESOLUTION))) | ||
209 | tmp = (u64)delta_exec * scale_load_down(weight); | ||
210 | else | ||
211 | tmp = (u64)delta_exec; | ||
212 | 219 | ||
213 | if (!lw->inv_weight) { | 220 | if (unlikely(fact >> 32)) { |
214 | unsigned long w = scale_load_down(lw->weight); | 221 | while (fact >> 32) { |
215 | 222 | fact >>= 1; | |
216 | if (BITS_PER_LONG > 32 && unlikely(w >= WMULT_CONST)) | 223 | shift--; |
217 | lw->inv_weight = 1; | 224 | } |
218 | else if (unlikely(!w)) | ||
219 | lw->inv_weight = WMULT_CONST; | ||
220 | else | ||
221 | lw->inv_weight = WMULT_CONST / w; | ||
222 | } | 225 | } |
223 | 226 | ||
224 | /* | 227 | /* hint to use a 32x32->64 mul */ |
225 | * Check whether we'd overflow the 64-bit multiplication: | 228 | fact = (u64)(u32)fact * lw->inv_weight; |
226 | */ | ||
227 | if (unlikely(tmp > WMULT_CONST)) | ||
228 | tmp = SRR(SRR(tmp, WMULT_SHIFT/2) * lw->inv_weight, | ||
229 | WMULT_SHIFT/2); | ||
230 | else | ||
231 | tmp = SRR(tmp * lw->inv_weight, WMULT_SHIFT); | ||
232 | 229 | ||
233 | return (unsigned long)min(tmp, (u64)(unsigned long)LONG_MAX); | 230 | while (fact >> 32) { |
231 | fact >>= 1; | ||
232 | shift--; | ||
233 | } | ||
234 | |||
235 | return mul_u64_u32_shr(delta_exec, fact, shift); | ||
234 | } | 236 | } |
235 | 237 | ||
236 | 238 | ||
@@ -443,7 +445,7 @@ find_matching_se(struct sched_entity **se, struct sched_entity **pse) | |||
443 | #endif /* CONFIG_FAIR_GROUP_SCHED */ | 445 | #endif /* CONFIG_FAIR_GROUP_SCHED */ |
444 | 446 | ||
445 | static __always_inline | 447 | static __always_inline |
446 | void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, unsigned long delta_exec); | 448 | void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, u64 delta_exec); |
447 | 449 | ||
448 | /************************************************************** | 450 | /************************************************************** |
449 | * Scheduling class tree data structure manipulation methods: | 451 | * Scheduling class tree data structure manipulation methods: |
@@ -612,11 +614,10 @@ int sched_proc_update_handler(struct ctl_table *table, int write, | |||
612 | /* | 614 | /* |
613 | * delta /= w | 615 | * delta /= w |
614 | */ | 616 | */ |
615 | static inline unsigned long | 617 | static inline u64 calc_delta_fair(u64 delta, struct sched_entity *se) |
616 | calc_delta_fair(unsigned long delta, struct sched_entity *se) | ||
617 | { | 618 | { |
618 | if (unlikely(se->load.weight != NICE_0_LOAD)) | 619 | if (unlikely(se->load.weight != NICE_0_LOAD)) |
619 | delta = calc_delta_mine(delta, NICE_0_LOAD, &se->load); | 620 | delta = __calc_delta(delta, NICE_0_LOAD, &se->load); |
620 | 621 | ||
621 | return delta; | 622 | return delta; |
622 | } | 623 | } |
@@ -665,7 +666,7 @@ static u64 sched_slice(struct cfs_rq *cfs_rq, struct sched_entity *se) | |||
665 | update_load_add(&lw, se->load.weight); | 666 | update_load_add(&lw, se->load.weight); |
666 | load = &lw; | 667 | load = &lw; |
667 | } | 668 | } |
668 | slice = calc_delta_mine(slice, se->load.weight, load); | 669 | slice = __calc_delta(slice, se->load.weight, load); |
669 | } | 670 | } |
670 | return slice; | 671 | return slice; |
671 | } | 672 | } |
@@ -703,47 +704,32 @@ void init_task_runnable_average(struct task_struct *p) | |||
703 | #endif | 704 | #endif |
704 | 705 | ||
705 | /* | 706 | /* |
706 | * Update the current task's runtime statistics. Skip current tasks that | 707 | * Update the current task's runtime statistics. |
707 | * are not in our scheduling class. | ||
708 | */ | 708 | */ |
709 | static inline void | ||
710 | __update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr, | ||
711 | unsigned long delta_exec) | ||
712 | { | ||
713 | unsigned long delta_exec_weighted; | ||
714 | |||
715 | schedstat_set(curr->statistics.exec_max, | ||
716 | max((u64)delta_exec, curr->statistics.exec_max)); | ||
717 | |||
718 | curr->sum_exec_runtime += delta_exec; | ||
719 | schedstat_add(cfs_rq, exec_clock, delta_exec); | ||
720 | delta_exec_weighted = calc_delta_fair(delta_exec, curr); | ||
721 | |||
722 | curr->vruntime += delta_exec_weighted; | ||
723 | update_min_vruntime(cfs_rq); | ||
724 | } | ||
725 | |||
726 | static void update_curr(struct cfs_rq *cfs_rq) | 709 | static void update_curr(struct cfs_rq *cfs_rq) |
727 | { | 710 | { |
728 | struct sched_entity *curr = cfs_rq->curr; | 711 | struct sched_entity *curr = cfs_rq->curr; |
729 | u64 now = rq_clock_task(rq_of(cfs_rq)); | 712 | u64 now = rq_clock_task(rq_of(cfs_rq)); |
730 | unsigned long delta_exec; | 713 | u64 delta_exec; |
731 | 714 | ||
732 | if (unlikely(!curr)) | 715 | if (unlikely(!curr)) |
733 | return; | 716 | return; |
734 | 717 | ||
735 | /* | 718 | delta_exec = now - curr->exec_start; |
736 | * Get the amount of time the current task was running | 719 | if (unlikely((s64)delta_exec <= 0)) |
737 | * since the last time we changed load (this cannot | ||
738 | * overflow on 32 bits): | ||
739 | */ | ||
740 | delta_exec = (unsigned long)(now - curr->exec_start); | ||
741 | if (!delta_exec) | ||
742 | return; | 720 | return; |
743 | 721 | ||
744 | __update_curr(cfs_rq, curr, delta_exec); | ||
745 | curr->exec_start = now; | 722 | curr->exec_start = now; |
746 | 723 | ||
724 | schedstat_set(curr->statistics.exec_max, | ||
725 | max(delta_exec, curr->statistics.exec_max)); | ||
726 | |||
727 | curr->sum_exec_runtime += delta_exec; | ||
728 | schedstat_add(cfs_rq, exec_clock, delta_exec); | ||
729 | |||
730 | curr->vruntime += calc_delta_fair(delta_exec, curr); | ||
731 | update_min_vruntime(cfs_rq); | ||
732 | |||
747 | if (entity_is_task(curr)) { | 733 | if (entity_is_task(curr)) { |
748 | struct task_struct *curtask = task_of(curr); | 734 | struct task_struct *curtask = task_of(curr); |
749 | 735 | ||
@@ -1752,6 +1738,13 @@ void task_numa_work(struct callback_head *work) | |||
1752 | (vma->vm_file && (vma->vm_flags & (VM_READ|VM_WRITE)) == (VM_READ))) | 1738 | (vma->vm_file && (vma->vm_flags & (VM_READ|VM_WRITE)) == (VM_READ))) |
1753 | continue; | 1739 | continue; |
1754 | 1740 | ||
1741 | /* | ||
1742 | * Skip inaccessible VMAs to avoid any confusion between | ||
1743 | * PROT_NONE and NUMA hinting ptes | ||
1744 | */ | ||
1745 | if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))) | ||
1746 | continue; | ||
1747 | |||
1755 | do { | 1748 | do { |
1756 | start = max(start, vma->vm_start); | 1749 | start = max(start, vma->vm_start); |
1757 | end = ALIGN(start + (pages << PAGE_SHIFT), HPAGE_SIZE); | 1750 | end = ALIGN(start + (pages << PAGE_SHIFT), HPAGE_SIZE); |
@@ -3015,8 +3008,7 @@ static void expire_cfs_rq_runtime(struct cfs_rq *cfs_rq) | |||
3015 | } | 3008 | } |
3016 | } | 3009 | } |
3017 | 3010 | ||
3018 | static void __account_cfs_rq_runtime(struct cfs_rq *cfs_rq, | 3011 | static void __account_cfs_rq_runtime(struct cfs_rq *cfs_rq, u64 delta_exec) |
3019 | unsigned long delta_exec) | ||
3020 | { | 3012 | { |
3021 | /* dock delta_exec before expiring quota (as it could span periods) */ | 3013 | /* dock delta_exec before expiring quota (as it could span periods) */ |
3022 | cfs_rq->runtime_remaining -= delta_exec; | 3014 | cfs_rq->runtime_remaining -= delta_exec; |
@@ -3034,7 +3026,7 @@ static void __account_cfs_rq_runtime(struct cfs_rq *cfs_rq, | |||
3034 | } | 3026 | } |
3035 | 3027 | ||
3036 | static __always_inline | 3028 | static __always_inline |
3037 | void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, unsigned long delta_exec) | 3029 | void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, u64 delta_exec) |
3038 | { | 3030 | { |
3039 | if (!cfs_bandwidth_used() || !cfs_rq->runtime_enabled) | 3031 | if (!cfs_bandwidth_used() || !cfs_rq->runtime_enabled) |
3040 | return; | 3032 | return; |
@@ -3574,8 +3566,7 @@ static inline u64 cfs_rq_clock_task(struct cfs_rq *cfs_rq) | |||
3574 | return rq_clock_task(rq_of(cfs_rq)); | 3566 | return rq_clock_task(rq_of(cfs_rq)); |
3575 | } | 3567 | } |
3576 | 3568 | ||
3577 | static void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, | 3569 | static void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, u64 delta_exec) {} |
3578 | unsigned long delta_exec) {} | ||
3579 | static void check_cfs_rq_runtime(struct cfs_rq *cfs_rq) {} | 3570 | static void check_cfs_rq_runtime(struct cfs_rq *cfs_rq) {} |
3580 | static void check_enqueue_throttle(struct cfs_rq *cfs_rq) {} | 3571 | static void check_enqueue_throttle(struct cfs_rq *cfs_rq) {} |
3581 | static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq) {} | 3572 | static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq) {} |
@@ -5379,10 +5370,31 @@ void update_group_power(struct sched_domain *sd, int cpu) | |||
5379 | */ | 5370 | */ |
5380 | 5371 | ||
5381 | for_each_cpu(cpu, sched_group_cpus(sdg)) { | 5372 | for_each_cpu(cpu, sched_group_cpus(sdg)) { |
5382 | struct sched_group *sg = cpu_rq(cpu)->sd->groups; | 5373 | struct sched_group_power *sgp; |
5374 | struct rq *rq = cpu_rq(cpu); | ||
5375 | |||
5376 | /* | ||
5377 | * build_sched_domains() -> init_sched_groups_power() | ||
5378 | * gets here before we've attached the domains to the | ||
5379 | * runqueues. | ||
5380 | * | ||
5381 | * Use power_of(), which is set irrespective of domains | ||
5382 | * in update_cpu_power(). | ||
5383 | * | ||
5384 | * This avoids power/power_orig from being 0 and | ||
5385 | * causing divide-by-zero issues on boot. | ||
5386 | * | ||
5387 | * Runtime updates will correct power_orig. | ||
5388 | */ | ||
5389 | if (unlikely(!rq->sd)) { | ||
5390 | power_orig += power_of(cpu); | ||
5391 | power += power_of(cpu); | ||
5392 | continue; | ||
5393 | } | ||
5383 | 5394 | ||
5384 | power_orig += sg->sgp->power_orig; | 5395 | sgp = rq->sd->groups->sgp; |
5385 | power += sg->sgp->power; | 5396 | power_orig += sgp->power_orig; |
5397 | power += sgp->power; | ||
5386 | } | 5398 | } |
5387 | } else { | 5399 | } else { |
5388 | /* | 5400 | /* |
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 7d57275fc396..1c4065575fa2 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c | |||
@@ -901,6 +901,13 @@ inc_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio) | |||
901 | { | 901 | { |
902 | struct rq *rq = rq_of_rt_rq(rt_rq); | 902 | struct rq *rq = rq_of_rt_rq(rt_rq); |
903 | 903 | ||
904 | #ifdef CONFIG_RT_GROUP_SCHED | ||
905 | /* | ||
906 | * Change rq's cpupri only if rt_rq is the top queue. | ||
907 | */ | ||
908 | if (&rq->rt != rt_rq) | ||
909 | return; | ||
910 | #endif | ||
904 | if (rq->online && prio < prev_prio) | 911 | if (rq->online && prio < prev_prio) |
905 | cpupri_set(&rq->rd->cpupri, rq->cpu, prio); | 912 | cpupri_set(&rq->rd->cpupri, rq->cpu, prio); |
906 | } | 913 | } |
@@ -910,6 +917,13 @@ dec_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio) | |||
910 | { | 917 | { |
911 | struct rq *rq = rq_of_rt_rq(rt_rq); | 918 | struct rq *rq = rq_of_rt_rq(rt_rq); |
912 | 919 | ||
920 | #ifdef CONFIG_RT_GROUP_SCHED | ||
921 | /* | ||
922 | * Change rq's cpupri only if rt_rq is the top queue. | ||
923 | */ | ||
924 | if (&rq->rt != rt_rq) | ||
925 | return; | ||
926 | #endif | ||
913 | if (rq->online && rt_rq->highest_prio.curr != prev_prio) | 927 | if (rq->online && rt_rq->highest_prio.curr != prev_prio) |
914 | cpupri_set(&rq->rd->cpupri, rq->cpu, rt_rq->highest_prio.curr); | 928 | cpupri_set(&rq->rd->cpupri, rq->cpu, rt_rq->highest_prio.curr); |
915 | } | 929 | } |
diff --git a/kernel/system_certificates.S b/kernel/system_certificates.S index 4aef390671cb..3e9868d47535 100644 --- a/kernel/system_certificates.S +++ b/kernel/system_certificates.S | |||
@@ -3,8 +3,18 @@ | |||
3 | 3 | ||
4 | __INITRODATA | 4 | __INITRODATA |
5 | 5 | ||
6 | .align 8 | ||
6 | .globl VMLINUX_SYMBOL(system_certificate_list) | 7 | .globl VMLINUX_SYMBOL(system_certificate_list) |
7 | VMLINUX_SYMBOL(system_certificate_list): | 8 | VMLINUX_SYMBOL(system_certificate_list): |
9 | __cert_list_start: | ||
8 | .incbin "kernel/x509_certificate_list" | 10 | .incbin "kernel/x509_certificate_list" |
9 | .globl VMLINUX_SYMBOL(system_certificate_list_end) | 11 | __cert_list_end: |
10 | VMLINUX_SYMBOL(system_certificate_list_end): | 12 | |
13 | .align 8 | ||
14 | .globl VMLINUX_SYMBOL(system_certificate_list_size) | ||
15 | VMLINUX_SYMBOL(system_certificate_list_size): | ||
16 | #ifdef CONFIG_64BIT | ||
17 | .quad __cert_list_end - __cert_list_start | ||
18 | #else | ||
19 | .long __cert_list_end - __cert_list_start | ||
20 | #endif | ||
diff --git a/kernel/system_keyring.c b/kernel/system_keyring.c index 564dd93430a2..52ebc70263f4 100644 --- a/kernel/system_keyring.c +++ b/kernel/system_keyring.c | |||
@@ -22,7 +22,7 @@ struct key *system_trusted_keyring; | |||
22 | EXPORT_SYMBOL_GPL(system_trusted_keyring); | 22 | EXPORT_SYMBOL_GPL(system_trusted_keyring); |
23 | 23 | ||
24 | extern __initconst const u8 system_certificate_list[]; | 24 | extern __initconst const u8 system_certificate_list[]; |
25 | extern __initconst const u8 system_certificate_list_end[]; | 25 | extern __initconst const unsigned long system_certificate_list_size; |
26 | 26 | ||
27 | /* | 27 | /* |
28 | * Load the compiled-in keys | 28 | * Load the compiled-in keys |
@@ -60,8 +60,8 @@ static __init int load_system_certificate_list(void) | |||
60 | 60 | ||
61 | pr_notice("Loading compiled-in X.509 certificates\n"); | 61 | pr_notice("Loading compiled-in X.509 certificates\n"); |
62 | 62 | ||
63 | end = system_certificate_list_end; | ||
64 | p = system_certificate_list; | 63 | p = system_certificate_list; |
64 | end = p + system_certificate_list_size; | ||
65 | while (p < end) { | 65 | while (p < end) { |
66 | /* Each cert begins with an ASN.1 SEQUENCE tag and must be more | 66 | /* Each cert begins with an ASN.1 SEQUENCE tag and must be more |
67 | * than 256 bytes in size. | 67 | * than 256 bytes in size. |
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index 64522ecdfe0e..162b03ab0ad2 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c | |||
@@ -33,6 +33,21 @@ DEFINE_PER_CPU(struct tick_device, tick_cpu_device); | |||
33 | */ | 33 | */ |
34 | ktime_t tick_next_period; | 34 | ktime_t tick_next_period; |
35 | ktime_t tick_period; | 35 | ktime_t tick_period; |
36 | |||
37 | /* | ||
38 | * tick_do_timer_cpu is a timer core internal variable which holds the CPU NR | ||
39 | * which is responsible for calling do_timer(), i.e. the timekeeping stuff. This | ||
40 | * variable has two functions: | ||
41 | * | ||
42 | * 1) Prevent a thundering herd issue of a gazillion of CPUs trying to grab the | ||
43 | * timekeeping lock all at once. Only the CPU which is assigned to do the | ||
44 | * update is handling it. | ||
45 | * | ||
46 | * 2) Hand off the duty in the NOHZ idle case by setting the value to | ||
47 | * TICK_DO_TIMER_NONE, i.e. a non existing CPU. So the next cpu which looks | ||
48 | * at it will take over and keep the time keeping alive. The handover | ||
49 | * procedure also covers cpu hotplug. | ||
50 | */ | ||
36 | int tick_do_timer_cpu __read_mostly = TICK_DO_TIMER_BOOT; | 51 | int tick_do_timer_cpu __read_mostly = TICK_DO_TIMER_BOOT; |
37 | 52 | ||
38 | /* | 53 | /* |
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 3612fc77f834..ea20f7d1ac2c 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c | |||
@@ -361,8 +361,8 @@ void __init tick_nohz_init(void) | |||
361 | /* | 361 | /* |
362 | * NO HZ enabled ? | 362 | * NO HZ enabled ? |
363 | */ | 363 | */ |
364 | int tick_nohz_enabled __read_mostly = 1; | 364 | static int tick_nohz_enabled __read_mostly = 1; |
365 | 365 | int tick_nohz_active __read_mostly; | |
366 | /* | 366 | /* |
367 | * Enable / Disable tickless mode | 367 | * Enable / Disable tickless mode |
368 | */ | 368 | */ |
@@ -465,7 +465,7 @@ u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time) | |||
465 | struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); | 465 | struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); |
466 | ktime_t now, idle; | 466 | ktime_t now, idle; |
467 | 467 | ||
468 | if (!tick_nohz_enabled) | 468 | if (!tick_nohz_active) |
469 | return -1; | 469 | return -1; |
470 | 470 | ||
471 | now = ktime_get(); | 471 | now = ktime_get(); |
@@ -506,7 +506,7 @@ u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time) | |||
506 | struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); | 506 | struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); |
507 | ktime_t now, iowait; | 507 | ktime_t now, iowait; |
508 | 508 | ||
509 | if (!tick_nohz_enabled) | 509 | if (!tick_nohz_active) |
510 | return -1; | 510 | return -1; |
511 | 511 | ||
512 | now = ktime_get(); | 512 | now = ktime_get(); |
@@ -711,8 +711,10 @@ static bool can_stop_idle_tick(int cpu, struct tick_sched *ts) | |||
711 | return false; | 711 | return false; |
712 | } | 712 | } |
713 | 713 | ||
714 | if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE)) | 714 | if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE)) { |
715 | ts->sleep_length = (ktime_t) { .tv64 = NSEC_PER_SEC/HZ }; | ||
715 | return false; | 716 | return false; |
717 | } | ||
716 | 718 | ||
717 | if (need_resched()) | 719 | if (need_resched()) |
718 | return false; | 720 | return false; |
@@ -799,11 +801,6 @@ void tick_nohz_idle_enter(void) | |||
799 | local_irq_disable(); | 801 | local_irq_disable(); |
800 | 802 | ||
801 | ts = &__get_cpu_var(tick_cpu_sched); | 803 | ts = &__get_cpu_var(tick_cpu_sched); |
802 | /* | ||
803 | * set ts->inidle unconditionally. even if the system did not | ||
804 | * switch to nohz mode the cpu frequency governers rely on the | ||
805 | * update of the idle time accounting in tick_nohz_start_idle(). | ||
806 | */ | ||
807 | ts->inidle = 1; | 804 | ts->inidle = 1; |
808 | __tick_nohz_idle_enter(ts); | 805 | __tick_nohz_idle_enter(ts); |
809 | 806 | ||
@@ -973,7 +970,7 @@ static void tick_nohz_switch_to_nohz(void) | |||
973 | struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); | 970 | struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); |
974 | ktime_t next; | 971 | ktime_t next; |
975 | 972 | ||
976 | if (!tick_nohz_enabled) | 973 | if (!tick_nohz_active) |
977 | return; | 974 | return; |
978 | 975 | ||
979 | local_irq_disable(); | 976 | local_irq_disable(); |
@@ -981,7 +978,7 @@ static void tick_nohz_switch_to_nohz(void) | |||
981 | local_irq_enable(); | 978 | local_irq_enable(); |
982 | return; | 979 | return; |
983 | } | 980 | } |
984 | 981 | tick_nohz_active = 1; | |
985 | ts->nohz_mode = NOHZ_MODE_LOWRES; | 982 | ts->nohz_mode = NOHZ_MODE_LOWRES; |
986 | 983 | ||
987 | /* | 984 | /* |
@@ -1139,8 +1136,10 @@ void tick_setup_sched_timer(void) | |||
1139 | } | 1136 | } |
1140 | 1137 | ||
1141 | #ifdef CONFIG_NO_HZ_COMMON | 1138 | #ifdef CONFIG_NO_HZ_COMMON |
1142 | if (tick_nohz_enabled) | 1139 | if (tick_nohz_enabled) { |
1143 | ts->nohz_mode = NOHZ_MODE_HIGHRES; | 1140 | ts->nohz_mode = NOHZ_MODE_HIGHRES; |
1141 | tick_nohz_active = 1; | ||
1142 | } | ||
1144 | #endif | 1143 | #endif |
1145 | } | 1144 | } |
1146 | #endif /* HIGH_RES_TIMERS */ | 1145 | #endif /* HIGH_RES_TIMERS */ |
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 3abf53418b67..87b4f00284c9 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c | |||
@@ -1347,7 +1347,7 @@ static inline void old_vsyscall_fixup(struct timekeeper *tk) | |||
1347 | tk->xtime_nsec -= remainder; | 1347 | tk->xtime_nsec -= remainder; |
1348 | tk->xtime_nsec += 1ULL << tk->shift; | 1348 | tk->xtime_nsec += 1ULL << tk->shift; |
1349 | tk->ntp_error += remainder << tk->ntp_error_shift; | 1349 | tk->ntp_error += remainder << tk->ntp_error_shift; |
1350 | 1350 | tk->ntp_error -= (1ULL << tk->shift) << tk->ntp_error_shift; | |
1351 | } | 1351 | } |
1352 | #else | 1352 | #else |
1353 | #define old_vsyscall_fixup(tk) | 1353 | #define old_vsyscall_fixup(tk) |
diff --git a/kernel/timer.c b/kernel/timer.c index 6582b82fa966..accfd241b9e5 100644 --- a/kernel/timer.c +++ b/kernel/timer.c | |||
@@ -1518,9 +1518,8 @@ static int init_timers_cpu(int cpu) | |||
1518 | /* | 1518 | /* |
1519 | * The APs use this path later in boot | 1519 | * The APs use this path later in boot |
1520 | */ | 1520 | */ |
1521 | base = kmalloc_node(sizeof(*base), | 1521 | base = kzalloc_node(sizeof(*base), GFP_KERNEL, |
1522 | GFP_KERNEL | __GFP_ZERO, | 1522 | cpu_to_node(cpu)); |
1523 | cpu_to_node(cpu)); | ||
1524 | if (!base) | 1523 | if (!base) |
1525 | return -ENOMEM; | 1524 | return -ENOMEM; |
1526 | 1525 | ||
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 22fa55696760..72a0f81dc5a8 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c | |||
@@ -367,9 +367,6 @@ static int remove_ftrace_list_ops(struct ftrace_ops **list, | |||
367 | 367 | ||
368 | static int __register_ftrace_function(struct ftrace_ops *ops) | 368 | static int __register_ftrace_function(struct ftrace_ops *ops) |
369 | { | 369 | { |
370 | if (unlikely(ftrace_disabled)) | ||
371 | return -ENODEV; | ||
372 | |||
373 | if (FTRACE_WARN_ON(ops == &global_ops)) | 370 | if (FTRACE_WARN_ON(ops == &global_ops)) |
374 | return -EINVAL; | 371 | return -EINVAL; |
375 | 372 | ||
@@ -428,9 +425,6 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops) | |||
428 | { | 425 | { |
429 | int ret; | 426 | int ret; |
430 | 427 | ||
431 | if (ftrace_disabled) | ||
432 | return -ENODEV; | ||
433 | |||
434 | if (WARN_ON(!(ops->flags & FTRACE_OPS_FL_ENABLED))) | 428 | if (WARN_ON(!(ops->flags & FTRACE_OPS_FL_ENABLED))) |
435 | return -EBUSY; | 429 | return -EBUSY; |
436 | 430 | ||
@@ -781,7 +775,7 @@ static int ftrace_profile_init(void) | |||
781 | int cpu; | 775 | int cpu; |
782 | int ret = 0; | 776 | int ret = 0; |
783 | 777 | ||
784 | for_each_online_cpu(cpu) { | 778 | for_each_possible_cpu(cpu) { |
785 | ret = ftrace_profile_init_cpu(cpu); | 779 | ret = ftrace_profile_init_cpu(cpu); |
786 | if (ret) | 780 | if (ret) |
787 | break; | 781 | break; |
@@ -2088,10 +2082,15 @@ static void ftrace_startup_enable(int command) | |||
2088 | static int ftrace_startup(struct ftrace_ops *ops, int command) | 2082 | static int ftrace_startup(struct ftrace_ops *ops, int command) |
2089 | { | 2083 | { |
2090 | bool hash_enable = true; | 2084 | bool hash_enable = true; |
2085 | int ret; | ||
2091 | 2086 | ||
2092 | if (unlikely(ftrace_disabled)) | 2087 | if (unlikely(ftrace_disabled)) |
2093 | return -ENODEV; | 2088 | return -ENODEV; |
2094 | 2089 | ||
2090 | ret = __register_ftrace_function(ops); | ||
2091 | if (ret) | ||
2092 | return ret; | ||
2093 | |||
2095 | ftrace_start_up++; | 2094 | ftrace_start_up++; |
2096 | command |= FTRACE_UPDATE_CALLS; | 2095 | command |= FTRACE_UPDATE_CALLS; |
2097 | 2096 | ||
@@ -2113,12 +2112,17 @@ static int ftrace_startup(struct ftrace_ops *ops, int command) | |||
2113 | return 0; | 2112 | return 0; |
2114 | } | 2113 | } |
2115 | 2114 | ||
2116 | static void ftrace_shutdown(struct ftrace_ops *ops, int command) | 2115 | static int ftrace_shutdown(struct ftrace_ops *ops, int command) |
2117 | { | 2116 | { |
2118 | bool hash_disable = true; | 2117 | bool hash_disable = true; |
2118 | int ret; | ||
2119 | 2119 | ||
2120 | if (unlikely(ftrace_disabled)) | 2120 | if (unlikely(ftrace_disabled)) |
2121 | return; | 2121 | return -ENODEV; |
2122 | |||
2123 | ret = __unregister_ftrace_function(ops); | ||
2124 | if (ret) | ||
2125 | return ret; | ||
2122 | 2126 | ||
2123 | ftrace_start_up--; | 2127 | ftrace_start_up--; |
2124 | /* | 2128 | /* |
@@ -2153,9 +2157,10 @@ static void ftrace_shutdown(struct ftrace_ops *ops, int command) | |||
2153 | } | 2157 | } |
2154 | 2158 | ||
2155 | if (!command || !ftrace_enabled) | 2159 | if (!command || !ftrace_enabled) |
2156 | return; | 2160 | return 0; |
2157 | 2161 | ||
2158 | ftrace_run_update_code(command); | 2162 | ftrace_run_update_code(command); |
2163 | return 0; | ||
2159 | } | 2164 | } |
2160 | 2165 | ||
2161 | static void ftrace_startup_sysctl(void) | 2166 | static void ftrace_startup_sysctl(void) |
@@ -3060,16 +3065,13 @@ static void __enable_ftrace_function_probe(void) | |||
3060 | if (i == FTRACE_FUNC_HASHSIZE) | 3065 | if (i == FTRACE_FUNC_HASHSIZE) |
3061 | return; | 3066 | return; |
3062 | 3067 | ||
3063 | ret = __register_ftrace_function(&trace_probe_ops); | 3068 | ret = ftrace_startup(&trace_probe_ops, 0); |
3064 | if (!ret) | ||
3065 | ret = ftrace_startup(&trace_probe_ops, 0); | ||
3066 | 3069 | ||
3067 | ftrace_probe_registered = 1; | 3070 | ftrace_probe_registered = 1; |
3068 | } | 3071 | } |
3069 | 3072 | ||
3070 | static void __disable_ftrace_function_probe(void) | 3073 | static void __disable_ftrace_function_probe(void) |
3071 | { | 3074 | { |
3072 | int ret; | ||
3073 | int i; | 3075 | int i; |
3074 | 3076 | ||
3075 | if (!ftrace_probe_registered) | 3077 | if (!ftrace_probe_registered) |
@@ -3082,9 +3084,7 @@ static void __disable_ftrace_function_probe(void) | |||
3082 | } | 3084 | } |
3083 | 3085 | ||
3084 | /* no more funcs left */ | 3086 | /* no more funcs left */ |
3085 | ret = __unregister_ftrace_function(&trace_probe_ops); | 3087 | ftrace_shutdown(&trace_probe_ops, 0); |
3086 | if (!ret) | ||
3087 | ftrace_shutdown(&trace_probe_ops, 0); | ||
3088 | 3088 | ||
3089 | ftrace_probe_registered = 0; | 3089 | ftrace_probe_registered = 0; |
3090 | } | 3090 | } |
@@ -4366,12 +4366,15 @@ core_initcall(ftrace_nodyn_init); | |||
4366 | static inline int ftrace_init_dyn_debugfs(struct dentry *d_tracer) { return 0; } | 4366 | static inline int ftrace_init_dyn_debugfs(struct dentry *d_tracer) { return 0; } |
4367 | static inline void ftrace_startup_enable(int command) { } | 4367 | static inline void ftrace_startup_enable(int command) { } |
4368 | /* Keep as macros so we do not need to define the commands */ | 4368 | /* Keep as macros so we do not need to define the commands */ |
4369 | # define ftrace_startup(ops, command) \ | 4369 | # define ftrace_startup(ops, command) \ |
4370 | ({ \ | 4370 | ({ \ |
4371 | (ops)->flags |= FTRACE_OPS_FL_ENABLED; \ | 4371 | int ___ret = __register_ftrace_function(ops); \ |
4372 | 0; \ | 4372 | if (!___ret) \ |
4373 | (ops)->flags |= FTRACE_OPS_FL_ENABLED; \ | ||
4374 | ___ret; \ | ||
4373 | }) | 4375 | }) |
4374 | # define ftrace_shutdown(ops, command) do { } while (0) | 4376 | # define ftrace_shutdown(ops, command) __unregister_ftrace_function(ops) |
4377 | |||
4375 | # define ftrace_startup_sysctl() do { } while (0) | 4378 | # define ftrace_startup_sysctl() do { } while (0) |
4376 | # define ftrace_shutdown_sysctl() do { } while (0) | 4379 | # define ftrace_shutdown_sysctl() do { } while (0) |
4377 | 4380 | ||
@@ -4780,9 +4783,7 @@ int register_ftrace_function(struct ftrace_ops *ops) | |||
4780 | 4783 | ||
4781 | mutex_lock(&ftrace_lock); | 4784 | mutex_lock(&ftrace_lock); |
4782 | 4785 | ||
4783 | ret = __register_ftrace_function(ops); | 4786 | ret = ftrace_startup(ops, 0); |
4784 | if (!ret) | ||
4785 | ret = ftrace_startup(ops, 0); | ||
4786 | 4787 | ||
4787 | mutex_unlock(&ftrace_lock); | 4788 | mutex_unlock(&ftrace_lock); |
4788 | 4789 | ||
@@ -4801,9 +4802,7 @@ int unregister_ftrace_function(struct ftrace_ops *ops) | |||
4801 | int ret; | 4802 | int ret; |
4802 | 4803 | ||
4803 | mutex_lock(&ftrace_lock); | 4804 | mutex_lock(&ftrace_lock); |
4804 | ret = __unregister_ftrace_function(ops); | 4805 | ret = ftrace_shutdown(ops, 0); |
4805 | if (!ret) | ||
4806 | ftrace_shutdown(ops, 0); | ||
4807 | mutex_unlock(&ftrace_lock); | 4806 | mutex_unlock(&ftrace_lock); |
4808 | 4807 | ||
4809 | return ret; | 4808 | return ret; |
@@ -4997,6 +4996,13 @@ ftrace_suspend_notifier_call(struct notifier_block *bl, unsigned long state, | |||
4997 | return NOTIFY_DONE; | 4996 | return NOTIFY_DONE; |
4998 | } | 4997 | } |
4999 | 4998 | ||
4999 | /* Just a place holder for function graph */ | ||
5000 | static struct ftrace_ops fgraph_ops __read_mostly = { | ||
5001 | .func = ftrace_stub, | ||
5002 | .flags = FTRACE_OPS_FL_STUB | FTRACE_OPS_FL_GLOBAL | | ||
5003 | FTRACE_OPS_FL_RECURSION_SAFE, | ||
5004 | }; | ||
5005 | |||
5000 | int register_ftrace_graph(trace_func_graph_ret_t retfunc, | 5006 | int register_ftrace_graph(trace_func_graph_ret_t retfunc, |
5001 | trace_func_graph_ent_t entryfunc) | 5007 | trace_func_graph_ent_t entryfunc) |
5002 | { | 5008 | { |
@@ -5023,7 +5029,7 @@ int register_ftrace_graph(trace_func_graph_ret_t retfunc, | |||
5023 | ftrace_graph_return = retfunc; | 5029 | ftrace_graph_return = retfunc; |
5024 | ftrace_graph_entry = entryfunc; | 5030 | ftrace_graph_entry = entryfunc; |
5025 | 5031 | ||
5026 | ret = ftrace_startup(&global_ops, FTRACE_START_FUNC_RET); | 5032 | ret = ftrace_startup(&fgraph_ops, FTRACE_START_FUNC_RET); |
5027 | 5033 | ||
5028 | out: | 5034 | out: |
5029 | mutex_unlock(&ftrace_lock); | 5035 | mutex_unlock(&ftrace_lock); |
@@ -5040,7 +5046,7 @@ void unregister_ftrace_graph(void) | |||
5040 | ftrace_graph_active--; | 5046 | ftrace_graph_active--; |
5041 | ftrace_graph_return = (trace_func_graph_ret_t)ftrace_stub; | 5047 | ftrace_graph_return = (trace_func_graph_ret_t)ftrace_stub; |
5042 | ftrace_graph_entry = ftrace_graph_entry_stub; | 5048 | ftrace_graph_entry = ftrace_graph_entry_stub; |
5043 | ftrace_shutdown(&global_ops, FTRACE_STOP_FUNC_RET); | 5049 | ftrace_shutdown(&fgraph_ops, FTRACE_STOP_FUNC_RET); |
5044 | unregister_pm_notifier(&ftrace_suspend_notifier); | 5050 | unregister_pm_notifier(&ftrace_suspend_notifier); |
5045 | unregister_trace_sched_switch(ftrace_graph_probe_sched_switch, NULL); | 5051 | unregister_trace_sched_switch(ftrace_graph_probe_sched_switch, NULL); |
5046 | 5052 | ||
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c index 78e27e3b52ac..e854f420e033 100644 --- a/kernel/trace/trace_event_perf.c +++ b/kernel/trace/trace_event_perf.c | |||
@@ -24,6 +24,12 @@ static int total_ref_count; | |||
24 | static int perf_trace_event_perm(struct ftrace_event_call *tp_event, | 24 | static int perf_trace_event_perm(struct ftrace_event_call *tp_event, |
25 | struct perf_event *p_event) | 25 | struct perf_event *p_event) |
26 | { | 26 | { |
27 | if (tp_event->perf_perm) { | ||
28 | int ret = tp_event->perf_perm(tp_event, p_event); | ||
29 | if (ret) | ||
30 | return ret; | ||
31 | } | ||
32 | |||
27 | /* The ftrace function trace is allowed only for root. */ | 33 | /* The ftrace function trace is allowed only for root. */ |
28 | if (ftrace_event_is_function(tp_event) && | 34 | if (ftrace_event_is_function(tp_event) && |
29 | perf_paranoid_tracepoint_raw() && !capable(CAP_SYS_ADMIN)) | 35 | perf_paranoid_tracepoint_raw() && !capable(CAP_SYS_ADMIN)) |
@@ -173,7 +179,7 @@ static int perf_trace_event_init(struct ftrace_event_call *tp_event, | |||
173 | int perf_trace_init(struct perf_event *p_event) | 179 | int perf_trace_init(struct perf_event *p_event) |
174 | { | 180 | { |
175 | struct ftrace_event_call *tp_event; | 181 | struct ftrace_event_call *tp_event; |
176 | int event_id = p_event->attr.config; | 182 | u64 event_id = p_event->attr.config; |
177 | int ret = -EINVAL; | 183 | int ret = -EINVAL; |
178 | 184 | ||
179 | mutex_lock(&event_mutex); | 185 | mutex_lock(&event_mutex); |
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index f919a2e21bf3..a11800ae96de 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c | |||
@@ -2314,6 +2314,9 @@ int event_trace_del_tracer(struct trace_array *tr) | |||
2314 | /* Disable any running events */ | 2314 | /* Disable any running events */ |
2315 | __ftrace_set_clr_event_nolock(tr, NULL, NULL, NULL, 0); | 2315 | __ftrace_set_clr_event_nolock(tr, NULL, NULL, NULL, 0); |
2316 | 2316 | ||
2317 | /* Access to events are within rcu_read_lock_sched() */ | ||
2318 | synchronize_sched(); | ||
2319 | |||
2317 | down_write(&trace_event_sem); | 2320 | down_write(&trace_event_sem); |
2318 | __trace_remove_event_dirs(tr); | 2321 | __trace_remove_event_dirs(tr); |
2319 | debugfs_remove_recursive(tr->event_dir); | 2322 | debugfs_remove_recursive(tr->event_dir); |
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index e4b6d11bdf78..ea90eb5f6f17 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c | |||
@@ -431,11 +431,6 @@ static void unreg_event_syscall_enter(struct ftrace_event_file *file, | |||
431 | if (!tr->sys_refcount_enter) | 431 | if (!tr->sys_refcount_enter) |
432 | unregister_trace_sys_enter(ftrace_syscall_enter, tr); | 432 | unregister_trace_sys_enter(ftrace_syscall_enter, tr); |
433 | mutex_unlock(&syscall_trace_lock); | 433 | mutex_unlock(&syscall_trace_lock); |
434 | /* | ||
435 | * Callers expect the event to be completely disabled on | ||
436 | * return, so wait for current handlers to finish. | ||
437 | */ | ||
438 | synchronize_sched(); | ||
439 | } | 434 | } |
440 | 435 | ||
441 | static int reg_event_syscall_exit(struct ftrace_event_file *file, | 436 | static int reg_event_syscall_exit(struct ftrace_event_file *file, |
@@ -474,11 +469,6 @@ static void unreg_event_syscall_exit(struct ftrace_event_file *file, | |||
474 | if (!tr->sys_refcount_exit) | 469 | if (!tr->sys_refcount_exit) |
475 | unregister_trace_sys_exit(ftrace_syscall_exit, tr); | 470 | unregister_trace_sys_exit(ftrace_syscall_exit, tr); |
476 | mutex_unlock(&syscall_trace_lock); | 471 | mutex_unlock(&syscall_trace_lock); |
477 | /* | ||
478 | * Callers expect the event to be completely disabled on | ||
479 | * return, so wait for current handlers to finish. | ||
480 | */ | ||
481 | synchronize_sched(); | ||
482 | } | 472 | } |
483 | 473 | ||
484 | static int __init init_syscall_trace(struct ftrace_event_call *call) | 474 | static int __init init_syscall_trace(struct ftrace_event_call *call) |
diff --git a/kernel/user.c b/kernel/user.c index a3a0dbfda329..c006131beb77 100644 --- a/kernel/user.c +++ b/kernel/user.c | |||
@@ -51,9 +51,9 @@ struct user_namespace init_user_ns = { | |||
51 | .owner = GLOBAL_ROOT_UID, | 51 | .owner = GLOBAL_ROOT_UID, |
52 | .group = GLOBAL_ROOT_GID, | 52 | .group = GLOBAL_ROOT_GID, |
53 | .proc_inum = PROC_USER_INIT_INO, | 53 | .proc_inum = PROC_USER_INIT_INO, |
54 | #ifdef CONFIG_KEYS_KERBEROS_CACHE | 54 | #ifdef CONFIG_PERSISTENT_KEYRINGS |
55 | .krb_cache_register_sem = | 55 | .persistent_keyring_register_sem = |
56 | __RWSEM_INITIALIZER(init_user_ns.krb_cache_register_sem), | 56 | __RWSEM_INITIALIZER(init_user_ns.persistent_keyring_register_sem), |
57 | #endif | 57 | #endif |
58 | }; | 58 | }; |
59 | EXPORT_SYMBOL_GPL(init_user_ns); | 59 | EXPORT_SYMBOL_GPL(init_user_ns); |
diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 987293d03ebc..b010eac595d2 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c | |||
@@ -305,6 +305,9 @@ static DEFINE_HASHTABLE(unbound_pool_hash, UNBOUND_POOL_HASH_ORDER); | |||
305 | /* I: attributes used when instantiating standard unbound pools on demand */ | 305 | /* I: attributes used when instantiating standard unbound pools on demand */ |
306 | static struct workqueue_attrs *unbound_std_wq_attrs[NR_STD_WORKER_POOLS]; | 306 | static struct workqueue_attrs *unbound_std_wq_attrs[NR_STD_WORKER_POOLS]; |
307 | 307 | ||
308 | /* I: attributes used when instantiating ordered pools on demand */ | ||
309 | static struct workqueue_attrs *ordered_wq_attrs[NR_STD_WORKER_POOLS]; | ||
310 | |||
308 | struct workqueue_struct *system_wq __read_mostly; | 311 | struct workqueue_struct *system_wq __read_mostly; |
309 | EXPORT_SYMBOL(system_wq); | 312 | EXPORT_SYMBOL(system_wq); |
310 | struct workqueue_struct *system_highpri_wq __read_mostly; | 313 | struct workqueue_struct *system_highpri_wq __read_mostly; |
@@ -518,14 +521,21 @@ static inline void debug_work_activate(struct work_struct *work) { } | |||
518 | static inline void debug_work_deactivate(struct work_struct *work) { } | 521 | static inline void debug_work_deactivate(struct work_struct *work) { } |
519 | #endif | 522 | #endif |
520 | 523 | ||
521 | /* allocate ID and assign it to @pool */ | 524 | /** |
525 | * worker_pool_assign_id - allocate ID and assing it to @pool | ||
526 | * @pool: the pool pointer of interest | ||
527 | * | ||
528 | * Returns 0 if ID in [0, WORK_OFFQ_POOL_NONE) is allocated and assigned | ||
529 | * successfully, -errno on failure. | ||
530 | */ | ||
522 | static int worker_pool_assign_id(struct worker_pool *pool) | 531 | static int worker_pool_assign_id(struct worker_pool *pool) |
523 | { | 532 | { |
524 | int ret; | 533 | int ret; |
525 | 534 | ||
526 | lockdep_assert_held(&wq_pool_mutex); | 535 | lockdep_assert_held(&wq_pool_mutex); |
527 | 536 | ||
528 | ret = idr_alloc(&worker_pool_idr, pool, 0, 0, GFP_KERNEL); | 537 | ret = idr_alloc(&worker_pool_idr, pool, 0, WORK_OFFQ_POOL_NONE, |
538 | GFP_KERNEL); | ||
529 | if (ret >= 0) { | 539 | if (ret >= 0) { |
530 | pool->id = ret; | 540 | pool->id = ret; |
531 | return 0; | 541 | return 0; |
@@ -1320,7 +1330,7 @@ static void __queue_work(int cpu, struct workqueue_struct *wq, | |||
1320 | 1330 | ||
1321 | debug_work_activate(work); | 1331 | debug_work_activate(work); |
1322 | 1332 | ||
1323 | /* if dying, only works from the same workqueue are allowed */ | 1333 | /* if draining, only works from the same workqueue are allowed */ |
1324 | if (unlikely(wq->flags & __WQ_DRAINING) && | 1334 | if (unlikely(wq->flags & __WQ_DRAINING) && |
1325 | WARN_ON_ONCE(!is_chained_work(wq))) | 1335 | WARN_ON_ONCE(!is_chained_work(wq))) |
1326 | return; | 1336 | return; |
@@ -1736,16 +1746,17 @@ static struct worker *create_worker(struct worker_pool *pool) | |||
1736 | if (IS_ERR(worker->task)) | 1746 | if (IS_ERR(worker->task)) |
1737 | goto fail; | 1747 | goto fail; |
1738 | 1748 | ||
1749 | set_user_nice(worker->task, pool->attrs->nice); | ||
1750 | |||
1751 | /* prevent userland from meddling with cpumask of workqueue workers */ | ||
1752 | worker->task->flags |= PF_NO_SETAFFINITY; | ||
1753 | |||
1739 | /* | 1754 | /* |
1740 | * set_cpus_allowed_ptr() will fail if the cpumask doesn't have any | 1755 | * set_cpus_allowed_ptr() will fail if the cpumask doesn't have any |
1741 | * online CPUs. It'll be re-applied when any of the CPUs come up. | 1756 | * online CPUs. It'll be re-applied when any of the CPUs come up. |
1742 | */ | 1757 | */ |
1743 | set_user_nice(worker->task, pool->attrs->nice); | ||
1744 | set_cpus_allowed_ptr(worker->task, pool->attrs->cpumask); | 1758 | set_cpus_allowed_ptr(worker->task, pool->attrs->cpumask); |
1745 | 1759 | ||
1746 | /* prevent userland from meddling with cpumask of workqueue workers */ | ||
1747 | worker->task->flags |= PF_NO_SETAFFINITY; | ||
1748 | |||
1749 | /* | 1760 | /* |
1750 | * The caller is responsible for ensuring %POOL_DISASSOCIATED | 1761 | * The caller is responsible for ensuring %POOL_DISASSOCIATED |
1751 | * remains stable across this function. See the comments above the | 1762 | * remains stable across this function. See the comments above the |
@@ -2840,19 +2851,6 @@ already_gone: | |||
2840 | return false; | 2851 | return false; |
2841 | } | 2852 | } |
2842 | 2853 | ||
2843 | static bool __flush_work(struct work_struct *work) | ||
2844 | { | ||
2845 | struct wq_barrier barr; | ||
2846 | |||
2847 | if (start_flush_work(work, &barr)) { | ||
2848 | wait_for_completion(&barr.done); | ||
2849 | destroy_work_on_stack(&barr.work); | ||
2850 | return true; | ||
2851 | } else { | ||
2852 | return false; | ||
2853 | } | ||
2854 | } | ||
2855 | |||
2856 | /** | 2854 | /** |
2857 | * flush_work - wait for a work to finish executing the last queueing instance | 2855 | * flush_work - wait for a work to finish executing the last queueing instance |
2858 | * @work: the work to flush | 2856 | * @work: the work to flush |
@@ -2866,10 +2864,18 @@ static bool __flush_work(struct work_struct *work) | |||
2866 | */ | 2864 | */ |
2867 | bool flush_work(struct work_struct *work) | 2865 | bool flush_work(struct work_struct *work) |
2868 | { | 2866 | { |
2867 | struct wq_barrier barr; | ||
2868 | |||
2869 | lock_map_acquire(&work->lockdep_map); | 2869 | lock_map_acquire(&work->lockdep_map); |
2870 | lock_map_release(&work->lockdep_map); | 2870 | lock_map_release(&work->lockdep_map); |
2871 | 2871 | ||
2872 | return __flush_work(work); | 2872 | if (start_flush_work(work, &barr)) { |
2873 | wait_for_completion(&barr.done); | ||
2874 | destroy_work_on_stack(&barr.work); | ||
2875 | return true; | ||
2876 | } else { | ||
2877 | return false; | ||
2878 | } | ||
2873 | } | 2879 | } |
2874 | EXPORT_SYMBOL_GPL(flush_work); | 2880 | EXPORT_SYMBOL_GPL(flush_work); |
2875 | 2881 | ||
@@ -4106,7 +4112,7 @@ out_unlock: | |||
4106 | static int alloc_and_link_pwqs(struct workqueue_struct *wq) | 4112 | static int alloc_and_link_pwqs(struct workqueue_struct *wq) |
4107 | { | 4113 | { |
4108 | bool highpri = wq->flags & WQ_HIGHPRI; | 4114 | bool highpri = wq->flags & WQ_HIGHPRI; |
4109 | int cpu; | 4115 | int cpu, ret; |
4110 | 4116 | ||
4111 | if (!(wq->flags & WQ_UNBOUND)) { | 4117 | if (!(wq->flags & WQ_UNBOUND)) { |
4112 | wq->cpu_pwqs = alloc_percpu(struct pool_workqueue); | 4118 | wq->cpu_pwqs = alloc_percpu(struct pool_workqueue); |
@@ -4126,6 +4132,13 @@ static int alloc_and_link_pwqs(struct workqueue_struct *wq) | |||
4126 | mutex_unlock(&wq->mutex); | 4132 | mutex_unlock(&wq->mutex); |
4127 | } | 4133 | } |
4128 | return 0; | 4134 | return 0; |
4135 | } else if (wq->flags & __WQ_ORDERED) { | ||
4136 | ret = apply_workqueue_attrs(wq, ordered_wq_attrs[highpri]); | ||
4137 | /* there should only be single pwq for ordering guarantee */ | ||
4138 | WARN(!ret && (wq->pwqs.next != &wq->dfl_pwq->pwqs_node || | ||
4139 | wq->pwqs.prev != &wq->dfl_pwq->pwqs_node), | ||
4140 | "ordering guarantee broken for workqueue %s\n", wq->name); | ||
4141 | return ret; | ||
4129 | } else { | 4142 | } else { |
4130 | return apply_workqueue_attrs(wq, unbound_std_wq_attrs[highpri]); | 4143 | return apply_workqueue_attrs(wq, unbound_std_wq_attrs[highpri]); |
4131 | } | 4144 | } |
@@ -4814,14 +4827,7 @@ long work_on_cpu(int cpu, long (*fn)(void *), void *arg) | |||
4814 | 4827 | ||
4815 | INIT_WORK_ONSTACK(&wfc.work, work_for_cpu_fn); | 4828 | INIT_WORK_ONSTACK(&wfc.work, work_for_cpu_fn); |
4816 | schedule_work_on(cpu, &wfc.work); | 4829 | schedule_work_on(cpu, &wfc.work); |
4817 | 4830 | flush_work(&wfc.work); | |
4818 | /* | ||
4819 | * The work item is on-stack and can't lead to deadlock through | ||
4820 | * flushing. Use __flush_work() to avoid spurious lockdep warnings | ||
4821 | * when work_on_cpu()s are nested. | ||
4822 | */ | ||
4823 | __flush_work(&wfc.work); | ||
4824 | |||
4825 | return wfc.ret; | 4831 | return wfc.ret; |
4826 | } | 4832 | } |
4827 | EXPORT_SYMBOL_GPL(work_on_cpu); | 4833 | EXPORT_SYMBOL_GPL(work_on_cpu); |
@@ -5009,10 +5015,6 @@ static int __init init_workqueues(void) | |||
5009 | int std_nice[NR_STD_WORKER_POOLS] = { 0, HIGHPRI_NICE_LEVEL }; | 5015 | int std_nice[NR_STD_WORKER_POOLS] = { 0, HIGHPRI_NICE_LEVEL }; |
5010 | int i, cpu; | 5016 | int i, cpu; |
5011 | 5017 | ||
5012 | /* make sure we have enough bits for OFFQ pool ID */ | ||
5013 | BUILD_BUG_ON((1LU << (BITS_PER_LONG - WORK_OFFQ_POOL_SHIFT)) < | ||
5014 | WORK_CPU_END * NR_STD_WORKER_POOLS); | ||
5015 | |||
5016 | WARN_ON(__alignof__(struct pool_workqueue) < __alignof__(long long)); | 5018 | WARN_ON(__alignof__(struct pool_workqueue) < __alignof__(long long)); |
5017 | 5019 | ||
5018 | pwq_cache = KMEM_CACHE(pool_workqueue, SLAB_PANIC); | 5020 | pwq_cache = KMEM_CACHE(pool_workqueue, SLAB_PANIC); |
@@ -5051,13 +5053,23 @@ static int __init init_workqueues(void) | |||
5051 | } | 5053 | } |
5052 | } | 5054 | } |
5053 | 5055 | ||
5054 | /* create default unbound wq attrs */ | 5056 | /* create default unbound and ordered wq attrs */ |
5055 | for (i = 0; i < NR_STD_WORKER_POOLS; i++) { | 5057 | for (i = 0; i < NR_STD_WORKER_POOLS; i++) { |
5056 | struct workqueue_attrs *attrs; | 5058 | struct workqueue_attrs *attrs; |
5057 | 5059 | ||
5058 | BUG_ON(!(attrs = alloc_workqueue_attrs(GFP_KERNEL))); | 5060 | BUG_ON(!(attrs = alloc_workqueue_attrs(GFP_KERNEL))); |
5059 | attrs->nice = std_nice[i]; | 5061 | attrs->nice = std_nice[i]; |
5060 | unbound_std_wq_attrs[i] = attrs; | 5062 | unbound_std_wq_attrs[i] = attrs; |
5063 | |||
5064 | /* | ||
5065 | * An ordered wq should have only one pwq as ordering is | ||
5066 | * guaranteed by max_active which is enforced by pwqs. | ||
5067 | * Turn off NUMA so that dfl_pwq is used for all nodes. | ||
5068 | */ | ||
5069 | BUG_ON(!(attrs = alloc_workqueue_attrs(GFP_KERNEL))); | ||
5070 | attrs->nice = std_nice[i]; | ||
5071 | attrs->no_numa = true; | ||
5072 | ordered_wq_attrs[i] = attrs; | ||
5061 | } | 5073 | } |
5062 | 5074 | ||
5063 | system_wq = alloc_workqueue("events", 0, 0); | 5075 | system_wq = alloc_workqueue("events", 0, 0); |