aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorThomas Gleixner <tglx@linutronix.de>2016-01-12 05:01:12 -0500
committerThomas Gleixner <tglx@linutronix.de>2016-01-12 05:01:12 -0500
commit1f16f116b01c110db20ab808562c8b8bc3ee3d6e (patch)
tree44db563f64cf5f8d62af8f99a61e2b248c44ea3a /kernel
parent03724ac3d48f8f0e3caf1d30fa134f8fd96c94e2 (diff)
parentf9eccf24615672896dc13251410c3f2f33a14f95 (diff)
Merge branches 'clockevents/4.4-fixes' and 'clockevents/4.5-fixes' of http://git.linaro.org/people/daniel.lezcano/linux into timers/urgent
Pull in fixes from Daniel Lezcano: - Fix the vt8500 timer leading to a system lock up when dealing with too small delta (Roman Volkov) - Select the CLKSRC_MMIO when the fsl_ftm_timer is enabled with COMPILE_TEST (Daniel Lezcano) - Prevent to compile timers using the 'iomem' API when the architecture has not HAS_IOMEM set (Richard Weinberger)
Diffstat (limited to 'kernel')
-rw-r--r--kernel/bpf/arraymap.c10
-rw-r--r--kernel/bpf/hashtab.c34
-rw-r--r--kernel/bpf/inode.c6
-rw-r--r--kernel/bpf/syscall.c40
-rw-r--r--kernel/bpf/verifier.c3
-rw-r--r--kernel/cgroup.c99
-rw-r--r--kernel/cgroup_freezer.c23
-rw-r--r--kernel/cgroup_pids.c77
-rw-r--r--kernel/cpuset.c33
-rw-r--r--kernel/events/callchain.c2
-rw-r--r--kernel/events/core.c90
-rw-r--r--kernel/events/ring_buffer.c2
-rw-r--r--kernel/events/uprobes.c2
-rw-r--r--kernel/fork.c9
-rw-r--r--kernel/irq_work.c2
-rw-r--r--kernel/jump_label.c2
-rw-r--r--kernel/locking/lockdep.c2
-rw-r--r--kernel/locking/lockdep_proc.c2
-rw-r--r--kernel/locking/osq_lock.c8
-rw-r--r--kernel/pid.c4
-rw-r--r--kernel/sched/clock.c2
-rw-r--r--kernel/sched/core.c48
-rw-r--r--kernel/sched/cputime.c3
-rw-r--r--kernel/sched/fair.c2
-rw-r--r--kernel/sched/rt.c2
-rw-r--r--kernel/sched/sched.h3
-rw-r--r--kernel/sched/wait.c28
-rw-r--r--kernel/stop_machine.c4
-rw-r--r--kernel/time/posix-clock.c4
-rw-r--r--kernel/trace/ring_buffer.c17
-rw-r--r--kernel/trace/trace_event_perf.c2
-rw-r--r--kernel/trace/trace_events.c16
32 files changed, 363 insertions, 218 deletions
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index 3f4c99e06c6b..b0799bced518 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -28,11 +28,17 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
28 attr->value_size == 0) 28 attr->value_size == 0)
29 return ERR_PTR(-EINVAL); 29 return ERR_PTR(-EINVAL);
30 30
31 if (attr->value_size >= 1 << (KMALLOC_SHIFT_MAX - 1))
32 /* if value_size is bigger, the user space won't be able to
33 * access the elements.
34 */
35 return ERR_PTR(-E2BIG);
36
31 elem_size = round_up(attr->value_size, 8); 37 elem_size = round_up(attr->value_size, 8);
32 38
33 /* check round_up into zero and u32 overflow */ 39 /* check round_up into zero and u32 overflow */
34 if (elem_size == 0 || 40 if (elem_size == 0 ||
35 attr->max_entries > (U32_MAX - sizeof(*array)) / elem_size) 41 attr->max_entries > (U32_MAX - PAGE_SIZE - sizeof(*array)) / elem_size)
36 return ERR_PTR(-ENOMEM); 42 return ERR_PTR(-ENOMEM);
37 43
38 array_size = sizeof(*array) + attr->max_entries * elem_size; 44 array_size = sizeof(*array) + attr->max_entries * elem_size;
@@ -105,7 +111,7 @@ static int array_map_update_elem(struct bpf_map *map, void *key, void *value,
105 /* all elements already exist */ 111 /* all elements already exist */
106 return -EEXIST; 112 return -EEXIST;
107 113
108 memcpy(array->value + array->elem_size * index, value, array->elem_size); 114 memcpy(array->value + array->elem_size * index, value, map->value_size);
109 return 0; 115 return 0;
110} 116}
111 117
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index 19909b22b4f8..34777b3746fa 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -64,12 +64,35 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
64 */ 64 */
65 goto free_htab; 65 goto free_htab;
66 66
67 err = -ENOMEM; 67 if (htab->map.value_size >= (1 << (KMALLOC_SHIFT_MAX - 1)) -
68 MAX_BPF_STACK - sizeof(struct htab_elem))
69 /* if value_size is bigger, the user space won't be able to
70 * access the elements via bpf syscall. This check also makes
71 * sure that the elem_size doesn't overflow and it's
72 * kmalloc-able later in htab_map_update_elem()
73 */
74 goto free_htab;
75
76 htab->elem_size = sizeof(struct htab_elem) +
77 round_up(htab->map.key_size, 8) +
78 htab->map.value_size;
79
68 /* prevent zero size kmalloc and check for u32 overflow */ 80 /* prevent zero size kmalloc and check for u32 overflow */
69 if (htab->n_buckets == 0 || 81 if (htab->n_buckets == 0 ||
70 htab->n_buckets > U32_MAX / sizeof(struct hlist_head)) 82 htab->n_buckets > U32_MAX / sizeof(struct hlist_head))
71 goto free_htab; 83 goto free_htab;
72 84
85 if ((u64) htab->n_buckets * sizeof(struct hlist_head) +
86 (u64) htab->elem_size * htab->map.max_entries >=
87 U32_MAX - PAGE_SIZE)
88 /* make sure page count doesn't overflow */
89 goto free_htab;
90
91 htab->map.pages = round_up(htab->n_buckets * sizeof(struct hlist_head) +
92 htab->elem_size * htab->map.max_entries,
93 PAGE_SIZE) >> PAGE_SHIFT;
94
95 err = -ENOMEM;
73 htab->buckets = kmalloc_array(htab->n_buckets, sizeof(struct hlist_head), 96 htab->buckets = kmalloc_array(htab->n_buckets, sizeof(struct hlist_head),
74 GFP_USER | __GFP_NOWARN); 97 GFP_USER | __GFP_NOWARN);
75 98
@@ -85,13 +108,6 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
85 raw_spin_lock_init(&htab->lock); 108 raw_spin_lock_init(&htab->lock);
86 htab->count = 0; 109 htab->count = 0;
87 110
88 htab->elem_size = sizeof(struct htab_elem) +
89 round_up(htab->map.key_size, 8) +
90 htab->map.value_size;
91
92 htab->map.pages = round_up(htab->n_buckets * sizeof(struct hlist_head) +
93 htab->elem_size * htab->map.max_entries,
94 PAGE_SIZE) >> PAGE_SHIFT;
95 return &htab->map; 111 return &htab->map;
96 112
97free_htab: 113free_htab:
@@ -222,7 +238,7 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
222 WARN_ON_ONCE(!rcu_read_lock_held()); 238 WARN_ON_ONCE(!rcu_read_lock_held());
223 239
224 /* allocate new element outside of lock */ 240 /* allocate new element outside of lock */
225 l_new = kmalloc(htab->elem_size, GFP_ATOMIC); 241 l_new = kmalloc(htab->elem_size, GFP_ATOMIC | __GFP_NOWARN);
226 if (!l_new) 242 if (!l_new)
227 return -ENOMEM; 243 return -ENOMEM;
228 244
diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c
index be6d726e31c9..5a8a797d50b7 100644
--- a/kernel/bpf/inode.c
+++ b/kernel/bpf/inode.c
@@ -34,7 +34,7 @@ static void *bpf_any_get(void *raw, enum bpf_type type)
34 atomic_inc(&((struct bpf_prog *)raw)->aux->refcnt); 34 atomic_inc(&((struct bpf_prog *)raw)->aux->refcnt);
35 break; 35 break;
36 case BPF_TYPE_MAP: 36 case BPF_TYPE_MAP:
37 atomic_inc(&((struct bpf_map *)raw)->refcnt); 37 bpf_map_inc(raw, true);
38 break; 38 break;
39 default: 39 default:
40 WARN_ON_ONCE(1); 40 WARN_ON_ONCE(1);
@@ -51,7 +51,7 @@ static void bpf_any_put(void *raw, enum bpf_type type)
51 bpf_prog_put(raw); 51 bpf_prog_put(raw);
52 break; 52 break;
53 case BPF_TYPE_MAP: 53 case BPF_TYPE_MAP:
54 bpf_map_put(raw); 54 bpf_map_put_with_uref(raw);
55 break; 55 break;
56 default: 56 default:
57 WARN_ON_ONCE(1); 57 WARN_ON_ONCE(1);
@@ -64,7 +64,7 @@ static void *bpf_fd_probe_obj(u32 ufd, enum bpf_type *type)
64 void *raw; 64 void *raw;
65 65
66 *type = BPF_TYPE_MAP; 66 *type = BPF_TYPE_MAP;
67 raw = bpf_map_get(ufd); 67 raw = bpf_map_get_with_uref(ufd);
68 if (IS_ERR(raw)) { 68 if (IS_ERR(raw)) {
69 *type = BPF_TYPE_PROG; 69 *type = BPF_TYPE_PROG;
70 raw = bpf_prog_get(ufd); 70 raw = bpf_prog_get(ufd);
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 0d3313d02a7e..3b39550d8485 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -82,6 +82,14 @@ static void bpf_map_free_deferred(struct work_struct *work)
82 map->ops->map_free(map); 82 map->ops->map_free(map);
83} 83}
84 84
85static void bpf_map_put_uref(struct bpf_map *map)
86{
87 if (atomic_dec_and_test(&map->usercnt)) {
88 if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY)
89 bpf_fd_array_map_clear(map);
90 }
91}
92
85/* decrement map refcnt and schedule it for freeing via workqueue 93/* decrement map refcnt and schedule it for freeing via workqueue
86 * (unrelying map implementation ops->map_free() might sleep) 94 * (unrelying map implementation ops->map_free() might sleep)
87 */ 95 */
@@ -93,17 +101,15 @@ void bpf_map_put(struct bpf_map *map)
93 } 101 }
94} 102}
95 103
96static int bpf_map_release(struct inode *inode, struct file *filp) 104void bpf_map_put_with_uref(struct bpf_map *map)
97{ 105{
98 struct bpf_map *map = filp->private_data; 106 bpf_map_put_uref(map);
99
100 if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY)
101 /* prog_array stores refcnt-ed bpf_prog pointers
102 * release them all when user space closes prog_array_fd
103 */
104 bpf_fd_array_map_clear(map);
105
106 bpf_map_put(map); 107 bpf_map_put(map);
108}
109
110static int bpf_map_release(struct inode *inode, struct file *filp)
111{
112 bpf_map_put_with_uref(filp->private_data);
107 return 0; 113 return 0;
108} 114}
109 115
@@ -142,6 +148,7 @@ static int map_create(union bpf_attr *attr)
142 return PTR_ERR(map); 148 return PTR_ERR(map);
143 149
144 atomic_set(&map->refcnt, 1); 150 atomic_set(&map->refcnt, 1);
151 atomic_set(&map->usercnt, 1);
145 152
146 err = bpf_map_charge_memlock(map); 153 err = bpf_map_charge_memlock(map);
147 if (err) 154 if (err)
@@ -174,7 +181,14 @@ struct bpf_map *__bpf_map_get(struct fd f)
174 return f.file->private_data; 181 return f.file->private_data;
175} 182}
176 183
177struct bpf_map *bpf_map_get(u32 ufd) 184void bpf_map_inc(struct bpf_map *map, bool uref)
185{
186 atomic_inc(&map->refcnt);
187 if (uref)
188 atomic_inc(&map->usercnt);
189}
190
191struct bpf_map *bpf_map_get_with_uref(u32 ufd)
178{ 192{
179 struct fd f = fdget(ufd); 193 struct fd f = fdget(ufd);
180 struct bpf_map *map; 194 struct bpf_map *map;
@@ -183,7 +197,7 @@ struct bpf_map *bpf_map_get(u32 ufd)
183 if (IS_ERR(map)) 197 if (IS_ERR(map))
184 return map; 198 return map;
185 199
186 atomic_inc(&map->refcnt); 200 bpf_map_inc(map, true);
187 fdput(f); 201 fdput(f);
188 202
189 return map; 203 return map;
@@ -226,7 +240,7 @@ static int map_lookup_elem(union bpf_attr *attr)
226 goto free_key; 240 goto free_key;
227 241
228 err = -ENOMEM; 242 err = -ENOMEM;
229 value = kmalloc(map->value_size, GFP_USER); 243 value = kmalloc(map->value_size, GFP_USER | __GFP_NOWARN);
230 if (!value) 244 if (!value)
231 goto free_key; 245 goto free_key;
232 246
@@ -285,7 +299,7 @@ static int map_update_elem(union bpf_attr *attr)
285 goto free_key; 299 goto free_key;
286 300
287 err = -ENOMEM; 301 err = -ENOMEM;
288 value = kmalloc(map->value_size, GFP_USER); 302 value = kmalloc(map->value_size, GFP_USER | __GFP_NOWARN);
289 if (!value) 303 if (!value)
290 goto free_key; 304 goto free_key;
291 305
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index c6073056badf..a7945d10b378 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -2021,8 +2021,7 @@ static int replace_map_fd_with_map_ptr(struct verifier_env *env)
2021 * will be used by the valid program until it's unloaded 2021 * will be used by the valid program until it's unloaded
2022 * and all maps are released in free_bpf_prog_info() 2022 * and all maps are released in free_bpf_prog_info()
2023 */ 2023 */
2024 atomic_inc(&map->refcnt); 2024 bpf_map_inc(map, false);
2025
2026 fdput(f); 2025 fdput(f);
2027next_insn: 2026next_insn:
2028 insn++; 2027 insn++;
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index f1603c153890..470f6536b9e8 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -98,6 +98,12 @@ static DEFINE_SPINLOCK(css_set_lock);
98static DEFINE_SPINLOCK(cgroup_idr_lock); 98static DEFINE_SPINLOCK(cgroup_idr_lock);
99 99
100/* 100/*
101 * Protects cgroup_file->kn for !self csses. It synchronizes notifications
102 * against file removal/re-creation across css hiding.
103 */
104static DEFINE_SPINLOCK(cgroup_file_kn_lock);
105
106/*
101 * Protects cgroup_subsys->release_agent_path. Modifying it also requires 107 * Protects cgroup_subsys->release_agent_path. Modifying it also requires
102 * cgroup_mutex. Reading requires either cgroup_mutex or this spinlock. 108 * cgroup_mutex. Reading requires either cgroup_mutex or this spinlock.
103 */ 109 */
@@ -754,9 +760,11 @@ static void put_css_set_locked(struct css_set *cset)
754 if (!atomic_dec_and_test(&cset->refcount)) 760 if (!atomic_dec_and_test(&cset->refcount))
755 return; 761 return;
756 762
757 /* This css_set is dead. unlink it and release cgroup refcounts */ 763 /* This css_set is dead. unlink it and release cgroup and css refs */
758 for_each_subsys(ss, ssid) 764 for_each_subsys(ss, ssid) {
759 list_del(&cset->e_cset_node[ssid]); 765 list_del(&cset->e_cset_node[ssid]);
766 css_put(cset->subsys[ssid]);
767 }
760 hash_del(&cset->hlist); 768 hash_del(&cset->hlist);
761 css_set_count--; 769 css_set_count--;
762 770
@@ -1056,9 +1064,13 @@ static struct css_set *find_css_set(struct css_set *old_cset,
1056 key = css_set_hash(cset->subsys); 1064 key = css_set_hash(cset->subsys);
1057 hash_add(css_set_table, &cset->hlist, key); 1065 hash_add(css_set_table, &cset->hlist, key);
1058 1066
1059 for_each_subsys(ss, ssid) 1067 for_each_subsys(ss, ssid) {
1068 struct cgroup_subsys_state *css = cset->subsys[ssid];
1069
1060 list_add_tail(&cset->e_cset_node[ssid], 1070 list_add_tail(&cset->e_cset_node[ssid],
1061 &cset->subsys[ssid]->cgroup->e_csets[ssid]); 1071 &css->cgroup->e_csets[ssid]);
1072 css_get(css);
1073 }
1062 1074
1063 spin_unlock_bh(&css_set_lock); 1075 spin_unlock_bh(&css_set_lock);
1064 1076
@@ -1393,6 +1405,16 @@ static void cgroup_rm_file(struct cgroup *cgrp, const struct cftype *cft)
1393 char name[CGROUP_FILE_NAME_MAX]; 1405 char name[CGROUP_FILE_NAME_MAX];
1394 1406
1395 lockdep_assert_held(&cgroup_mutex); 1407 lockdep_assert_held(&cgroup_mutex);
1408
1409 if (cft->file_offset) {
1410 struct cgroup_subsys_state *css = cgroup_css(cgrp, cft->ss);
1411 struct cgroup_file *cfile = (void *)css + cft->file_offset;
1412
1413 spin_lock_irq(&cgroup_file_kn_lock);
1414 cfile->kn = NULL;
1415 spin_unlock_irq(&cgroup_file_kn_lock);
1416 }
1417
1396 kernfs_remove_by_name(cgrp->kn, cgroup_file_name(cgrp, cft, name)); 1418 kernfs_remove_by_name(cgrp->kn, cgroup_file_name(cgrp, cft, name));
1397} 1419}
1398 1420
@@ -1856,7 +1878,6 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp)
1856 1878
1857 INIT_LIST_HEAD(&cgrp->self.sibling); 1879 INIT_LIST_HEAD(&cgrp->self.sibling);
1858 INIT_LIST_HEAD(&cgrp->self.children); 1880 INIT_LIST_HEAD(&cgrp->self.children);
1859 INIT_LIST_HEAD(&cgrp->self.files);
1860 INIT_LIST_HEAD(&cgrp->cset_links); 1881 INIT_LIST_HEAD(&cgrp->cset_links);
1861 INIT_LIST_HEAD(&cgrp->pidlists); 1882 INIT_LIST_HEAD(&cgrp->pidlists);
1862 mutex_init(&cgrp->pidlist_mutex); 1883 mutex_init(&cgrp->pidlist_mutex);
@@ -2216,6 +2237,9 @@ struct cgroup_taskset {
2216 struct list_head src_csets; 2237 struct list_head src_csets;
2217 struct list_head dst_csets; 2238 struct list_head dst_csets;
2218 2239
2240 /* the subsys currently being processed */
2241 int ssid;
2242
2219 /* 2243 /*
2220 * Fields for cgroup_taskset_*() iteration. 2244 * Fields for cgroup_taskset_*() iteration.
2221 * 2245 *
@@ -2278,25 +2302,29 @@ static void cgroup_taskset_add(struct task_struct *task,
2278/** 2302/**
2279 * cgroup_taskset_first - reset taskset and return the first task 2303 * cgroup_taskset_first - reset taskset and return the first task
2280 * @tset: taskset of interest 2304 * @tset: taskset of interest
2305 * @dst_cssp: output variable for the destination css
2281 * 2306 *
2282 * @tset iteration is initialized and the first task is returned. 2307 * @tset iteration is initialized and the first task is returned.
2283 */ 2308 */
2284struct task_struct *cgroup_taskset_first(struct cgroup_taskset *tset) 2309struct task_struct *cgroup_taskset_first(struct cgroup_taskset *tset,
2310 struct cgroup_subsys_state **dst_cssp)
2285{ 2311{
2286 tset->cur_cset = list_first_entry(tset->csets, struct css_set, mg_node); 2312 tset->cur_cset = list_first_entry(tset->csets, struct css_set, mg_node);
2287 tset->cur_task = NULL; 2313 tset->cur_task = NULL;
2288 2314
2289 return cgroup_taskset_next(tset); 2315 return cgroup_taskset_next(tset, dst_cssp);
2290} 2316}
2291 2317
2292/** 2318/**
2293 * cgroup_taskset_next - iterate to the next task in taskset 2319 * cgroup_taskset_next - iterate to the next task in taskset
2294 * @tset: taskset of interest 2320 * @tset: taskset of interest
2321 * @dst_cssp: output variable for the destination css
2295 * 2322 *
2296 * Return the next task in @tset. Iteration must have been initialized 2323 * Return the next task in @tset. Iteration must have been initialized
2297 * with cgroup_taskset_first(). 2324 * with cgroup_taskset_first().
2298 */ 2325 */
2299struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset) 2326struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset,
2327 struct cgroup_subsys_state **dst_cssp)
2300{ 2328{
2301 struct css_set *cset = tset->cur_cset; 2329 struct css_set *cset = tset->cur_cset;
2302 struct task_struct *task = tset->cur_task; 2330 struct task_struct *task = tset->cur_task;
@@ -2311,6 +2339,18 @@ struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset)
2311 if (&task->cg_list != &cset->mg_tasks) { 2339 if (&task->cg_list != &cset->mg_tasks) {
2312 tset->cur_cset = cset; 2340 tset->cur_cset = cset;
2313 tset->cur_task = task; 2341 tset->cur_task = task;
2342
2343 /*
2344 * This function may be called both before and
2345 * after cgroup_taskset_migrate(). The two cases
2346 * can be distinguished by looking at whether @cset
2347 * has its ->mg_dst_cset set.
2348 */
2349 if (cset->mg_dst_cset)
2350 *dst_cssp = cset->mg_dst_cset->subsys[tset->ssid];
2351 else
2352 *dst_cssp = cset->subsys[tset->ssid];
2353
2314 return task; 2354 return task;
2315 } 2355 }
2316 2356
@@ -2346,7 +2386,8 @@ static int cgroup_taskset_migrate(struct cgroup_taskset *tset,
2346 /* check that we can legitimately attach to the cgroup */ 2386 /* check that we can legitimately attach to the cgroup */
2347 for_each_e_css(css, i, dst_cgrp) { 2387 for_each_e_css(css, i, dst_cgrp) {
2348 if (css->ss->can_attach) { 2388 if (css->ss->can_attach) {
2349 ret = css->ss->can_attach(css, tset); 2389 tset->ssid = i;
2390 ret = css->ss->can_attach(tset);
2350 if (ret) { 2391 if (ret) {
2351 failed_css = css; 2392 failed_css = css;
2352 goto out_cancel_attach; 2393 goto out_cancel_attach;
@@ -2379,9 +2420,12 @@ static int cgroup_taskset_migrate(struct cgroup_taskset *tset,
2379 */ 2420 */
2380 tset->csets = &tset->dst_csets; 2421 tset->csets = &tset->dst_csets;
2381 2422
2382 for_each_e_css(css, i, dst_cgrp) 2423 for_each_e_css(css, i, dst_cgrp) {
2383 if (css->ss->attach) 2424 if (css->ss->attach) {
2384 css->ss->attach(css, tset); 2425 tset->ssid = i;
2426 css->ss->attach(tset);
2427 }
2428 }
2385 2429
2386 ret = 0; 2430 ret = 0;
2387 goto out_release_tset; 2431 goto out_release_tset;
@@ -2390,8 +2434,10 @@ out_cancel_attach:
2390 for_each_e_css(css, i, dst_cgrp) { 2434 for_each_e_css(css, i, dst_cgrp) {
2391 if (css == failed_css) 2435 if (css == failed_css)
2392 break; 2436 break;
2393 if (css->ss->cancel_attach) 2437 if (css->ss->cancel_attach) {
2394 css->ss->cancel_attach(css, tset); 2438 tset->ssid = i;
2439 css->ss->cancel_attach(tset);
2440 }
2395 } 2441 }
2396out_release_tset: 2442out_release_tset:
2397 spin_lock_bh(&css_set_lock); 2443 spin_lock_bh(&css_set_lock);
@@ -3313,9 +3359,9 @@ static int cgroup_add_file(struct cgroup_subsys_state *css, struct cgroup *cgrp,
3313 if (cft->file_offset) { 3359 if (cft->file_offset) {
3314 struct cgroup_file *cfile = (void *)css + cft->file_offset; 3360 struct cgroup_file *cfile = (void *)css + cft->file_offset;
3315 3361
3316 kernfs_get(kn); 3362 spin_lock_irq(&cgroup_file_kn_lock);
3317 cfile->kn = kn; 3363 cfile->kn = kn;
3318 list_add(&cfile->node, &css->files); 3364 spin_unlock_irq(&cgroup_file_kn_lock);
3319 } 3365 }
3320 3366
3321 return 0; 3367 return 0;
@@ -3553,6 +3599,22 @@ int cgroup_add_legacy_cftypes(struct cgroup_subsys *ss, struct cftype *cfts)
3553} 3599}
3554 3600
3555/** 3601/**
3602 * cgroup_file_notify - generate a file modified event for a cgroup_file
3603 * @cfile: target cgroup_file
3604 *
3605 * @cfile must have been obtained by setting cftype->file_offset.
3606 */
3607void cgroup_file_notify(struct cgroup_file *cfile)
3608{
3609 unsigned long flags;
3610
3611 spin_lock_irqsave(&cgroup_file_kn_lock, flags);
3612 if (cfile->kn)
3613 kernfs_notify(cfile->kn);
3614 spin_unlock_irqrestore(&cgroup_file_kn_lock, flags);
3615}
3616
3617/**
3556 * cgroup_task_count - count the number of tasks in a cgroup. 3618 * cgroup_task_count - count the number of tasks in a cgroup.
3557 * @cgrp: the cgroup in question 3619 * @cgrp: the cgroup in question
3558 * 3620 *
@@ -4613,13 +4675,9 @@ static void css_free_work_fn(struct work_struct *work)
4613 container_of(work, struct cgroup_subsys_state, destroy_work); 4675 container_of(work, struct cgroup_subsys_state, destroy_work);
4614 struct cgroup_subsys *ss = css->ss; 4676 struct cgroup_subsys *ss = css->ss;
4615 struct cgroup *cgrp = css->cgroup; 4677 struct cgroup *cgrp = css->cgroup;
4616 struct cgroup_file *cfile;
4617 4678
4618 percpu_ref_exit(&css->refcnt); 4679 percpu_ref_exit(&css->refcnt);
4619 4680
4620 list_for_each_entry(cfile, &css->files, node)
4621 kernfs_put(cfile->kn);
4622
4623 if (ss) { 4681 if (ss) {
4624 /* css free path */ 4682 /* css free path */
4625 int id = css->id; 4683 int id = css->id;
@@ -4724,7 +4782,6 @@ static void init_and_link_css(struct cgroup_subsys_state *css,
4724 css->ss = ss; 4782 css->ss = ss;
4725 INIT_LIST_HEAD(&css->sibling); 4783 INIT_LIST_HEAD(&css->sibling);
4726 INIT_LIST_HEAD(&css->children); 4784 INIT_LIST_HEAD(&css->children);
4727 INIT_LIST_HEAD(&css->files);
4728 css->serial_nr = css_serial_nr_next++; 4785 css->serial_nr = css_serial_nr_next++;
4729 4786
4730 if (cgroup_parent(cgrp)) { 4787 if (cgroup_parent(cgrp)) {
diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c
index f1b30ad5dc6d..2d3df82c54f2 100644
--- a/kernel/cgroup_freezer.c
+++ b/kernel/cgroup_freezer.c
@@ -155,12 +155,10 @@ static void freezer_css_free(struct cgroup_subsys_state *css)
155 * @freezer->lock. freezer_attach() makes the new tasks conform to the 155 * @freezer->lock. freezer_attach() makes the new tasks conform to the
156 * current state and all following state changes can see the new tasks. 156 * current state and all following state changes can see the new tasks.
157 */ 157 */
158static void freezer_attach(struct cgroup_subsys_state *new_css, 158static void freezer_attach(struct cgroup_taskset *tset)
159 struct cgroup_taskset *tset)
160{ 159{
161 struct freezer *freezer = css_freezer(new_css);
162 struct task_struct *task; 160 struct task_struct *task;
163 bool clear_frozen = false; 161 struct cgroup_subsys_state *new_css;
164 162
165 mutex_lock(&freezer_mutex); 163 mutex_lock(&freezer_mutex);
166 164
@@ -174,22 +172,21 @@ static void freezer_attach(struct cgroup_subsys_state *new_css,
174 * current state before executing the following - !frozen tasks may 172 * current state before executing the following - !frozen tasks may
175 * be visible in a FROZEN cgroup and frozen tasks in a THAWED one. 173 * be visible in a FROZEN cgroup and frozen tasks in a THAWED one.
176 */ 174 */
177 cgroup_taskset_for_each(task, tset) { 175 cgroup_taskset_for_each(task, new_css, tset) {
176 struct freezer *freezer = css_freezer(new_css);
177
178 if (!(freezer->state & CGROUP_FREEZING)) { 178 if (!(freezer->state & CGROUP_FREEZING)) {
179 __thaw_task(task); 179 __thaw_task(task);
180 } else { 180 } else {
181 freeze_task(task); 181 freeze_task(task);
182 freezer->state &= ~CGROUP_FROZEN; 182 /* clear FROZEN and propagate upwards */
183 clear_frozen = true; 183 while (freezer && (freezer->state & CGROUP_FROZEN)) {
184 freezer->state &= ~CGROUP_FROZEN;
185 freezer = parent_freezer(freezer);
186 }
184 } 187 }
185 } 188 }
186 189
187 /* propagate FROZEN clearing upwards */
188 while (clear_frozen && (freezer = parent_freezer(freezer))) {
189 freezer->state &= ~CGROUP_FROZEN;
190 clear_frozen = freezer->state & CGROUP_FREEZING;
191 }
192
193 mutex_unlock(&freezer_mutex); 190 mutex_unlock(&freezer_mutex);
194} 191}
195 192
diff --git a/kernel/cgroup_pids.c b/kernel/cgroup_pids.c
index cdd8df4e991c..b50d5a167fda 100644
--- a/kernel/cgroup_pids.c
+++ b/kernel/cgroup_pids.c
@@ -106,7 +106,7 @@ static void pids_uncharge(struct pids_cgroup *pids, int num)
106{ 106{
107 struct pids_cgroup *p; 107 struct pids_cgroup *p;
108 108
109 for (p = pids; p; p = parent_pids(p)) 109 for (p = pids; parent_pids(p); p = parent_pids(p))
110 pids_cancel(p, num); 110 pids_cancel(p, num);
111} 111}
112 112
@@ -123,7 +123,7 @@ static void pids_charge(struct pids_cgroup *pids, int num)
123{ 123{
124 struct pids_cgroup *p; 124 struct pids_cgroup *p;
125 125
126 for (p = pids; p; p = parent_pids(p)) 126 for (p = pids; parent_pids(p); p = parent_pids(p))
127 atomic64_add(num, &p->counter); 127 atomic64_add(num, &p->counter);
128} 128}
129 129
@@ -140,7 +140,7 @@ static int pids_try_charge(struct pids_cgroup *pids, int num)
140{ 140{
141 struct pids_cgroup *p, *q; 141 struct pids_cgroup *p, *q;
142 142
143 for (p = pids; p; p = parent_pids(p)) { 143 for (p = pids; parent_pids(p); p = parent_pids(p)) {
144 int64_t new = atomic64_add_return(num, &p->counter); 144 int64_t new = atomic64_add_return(num, &p->counter);
145 145
146 /* 146 /*
@@ -162,13 +162,13 @@ revert:
162 return -EAGAIN; 162 return -EAGAIN;
163} 163}
164 164
165static int pids_can_attach(struct cgroup_subsys_state *css, 165static int pids_can_attach(struct cgroup_taskset *tset)
166 struct cgroup_taskset *tset)
167{ 166{
168 struct pids_cgroup *pids = css_pids(css);
169 struct task_struct *task; 167 struct task_struct *task;
168 struct cgroup_subsys_state *dst_css;
170 169
171 cgroup_taskset_for_each(task, tset) { 170 cgroup_taskset_for_each(task, dst_css, tset) {
171 struct pids_cgroup *pids = css_pids(dst_css);
172 struct cgroup_subsys_state *old_css; 172 struct cgroup_subsys_state *old_css;
173 struct pids_cgroup *old_pids; 173 struct pids_cgroup *old_pids;
174 174
@@ -187,13 +187,13 @@ static int pids_can_attach(struct cgroup_subsys_state *css,
187 return 0; 187 return 0;
188} 188}
189 189
190static void pids_cancel_attach(struct cgroup_subsys_state *css, 190static void pids_cancel_attach(struct cgroup_taskset *tset)
191 struct cgroup_taskset *tset)
192{ 191{
193 struct pids_cgroup *pids = css_pids(css);
194 struct task_struct *task; 192 struct task_struct *task;
193 struct cgroup_subsys_state *dst_css;
195 194
196 cgroup_taskset_for_each(task, tset) { 195 cgroup_taskset_for_each(task, dst_css, tset) {
196 struct pids_cgroup *pids = css_pids(dst_css);
197 struct cgroup_subsys_state *old_css; 197 struct cgroup_subsys_state *old_css;
198 struct pids_cgroup *old_pids; 198 struct pids_cgroup *old_pids;
199 199
@@ -205,65 +205,28 @@ static void pids_cancel_attach(struct cgroup_subsys_state *css,
205 } 205 }
206} 206}
207 207
208/*
209 * task_css_check(true) in pids_can_fork() and pids_cancel_fork() relies
210 * on threadgroup_change_begin() held by the copy_process().
211 */
208static int pids_can_fork(struct task_struct *task, void **priv_p) 212static int pids_can_fork(struct task_struct *task, void **priv_p)
209{ 213{
210 struct cgroup_subsys_state *css; 214 struct cgroup_subsys_state *css;
211 struct pids_cgroup *pids; 215 struct pids_cgroup *pids;
212 int err;
213 216
214 /* 217 css = task_css_check(current, pids_cgrp_id, true);
215 * Use the "current" task_css for the pids subsystem as the tentative
216 * css. It is possible we will charge the wrong hierarchy, in which
217 * case we will forcefully revert/reapply the charge on the right
218 * hierarchy after it is committed to the task proper.
219 */
220 css = task_get_css(current, pids_cgrp_id);
221 pids = css_pids(css); 218 pids = css_pids(css);
222 219 return pids_try_charge(pids, 1);
223 err = pids_try_charge(pids, 1);
224 if (err)
225 goto err_css_put;
226
227 *priv_p = css;
228 return 0;
229
230err_css_put:
231 css_put(css);
232 return err;
233} 220}
234 221
235static void pids_cancel_fork(struct task_struct *task, void *priv) 222static void pids_cancel_fork(struct task_struct *task, void *priv)
236{ 223{
237 struct cgroup_subsys_state *css = priv;
238 struct pids_cgroup *pids = css_pids(css);
239
240 pids_uncharge(pids, 1);
241 css_put(css);
242}
243
244static void pids_fork(struct task_struct *task, void *priv)
245{
246 struct cgroup_subsys_state *css; 224 struct cgroup_subsys_state *css;
247 struct cgroup_subsys_state *old_css = priv;
248 struct pids_cgroup *pids; 225 struct pids_cgroup *pids;
249 struct pids_cgroup *old_pids = css_pids(old_css);
250 226
251 css = task_get_css(task, pids_cgrp_id); 227 css = task_css_check(current, pids_cgrp_id, true);
252 pids = css_pids(css); 228 pids = css_pids(css);
253 229 pids_uncharge(pids, 1);
254 /*
255 * If the association has changed, we have to revert and reapply the
256 * charge/uncharge on the wrong hierarchy to the current one. Since
257 * the association can only change due to an organisation event, its
258 * okay for us to ignore the limit in this case.
259 */
260 if (pids != old_pids) {
261 pids_uncharge(old_pids, 1);
262 pids_charge(pids, 1);
263 }
264
265 css_put(css);
266 css_put(old_css);
267} 230}
268 231
269static void pids_free(struct task_struct *task) 232static void pids_free(struct task_struct *task)
@@ -335,6 +298,7 @@ static struct cftype pids_files[] = {
335 { 298 {
336 .name = "current", 299 .name = "current",
337 .read_s64 = pids_current_read, 300 .read_s64 = pids_current_read,
301 .flags = CFTYPE_NOT_ON_ROOT,
338 }, 302 },
339 { } /* terminate */ 303 { } /* terminate */
340}; 304};
@@ -346,7 +310,6 @@ struct cgroup_subsys pids_cgrp_subsys = {
346 .cancel_attach = pids_cancel_attach, 310 .cancel_attach = pids_cancel_attach,
347 .can_fork = pids_can_fork, 311 .can_fork = pids_can_fork,
348 .cancel_fork = pids_cancel_fork, 312 .cancel_fork = pids_cancel_fork,
349 .fork = pids_fork,
350 .free = pids_free, 313 .free = pids_free,
351 .legacy_cftypes = pids_files, 314 .legacy_cftypes = pids_files,
352 .dfl_cftypes = pids_files, 315 .dfl_cftypes = pids_files,
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 10ae73611d80..02a8ea5c9963 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -1429,15 +1429,16 @@ static int fmeter_getrate(struct fmeter *fmp)
1429static struct cpuset *cpuset_attach_old_cs; 1429static struct cpuset *cpuset_attach_old_cs;
1430 1430
1431/* Called by cgroups to determine if a cpuset is usable; cpuset_mutex held */ 1431/* Called by cgroups to determine if a cpuset is usable; cpuset_mutex held */
1432static int cpuset_can_attach(struct cgroup_subsys_state *css, 1432static int cpuset_can_attach(struct cgroup_taskset *tset)
1433 struct cgroup_taskset *tset)
1434{ 1433{
1435 struct cpuset *cs = css_cs(css); 1434 struct cgroup_subsys_state *css;
1435 struct cpuset *cs;
1436 struct task_struct *task; 1436 struct task_struct *task;
1437 int ret; 1437 int ret;
1438 1438
1439 /* used later by cpuset_attach() */ 1439 /* used later by cpuset_attach() */
1440 cpuset_attach_old_cs = task_cs(cgroup_taskset_first(tset)); 1440 cpuset_attach_old_cs = task_cs(cgroup_taskset_first(tset, &css));
1441 cs = css_cs(css);
1441 1442
1442 mutex_lock(&cpuset_mutex); 1443 mutex_lock(&cpuset_mutex);
1443 1444
@@ -1447,7 +1448,7 @@ static int cpuset_can_attach(struct cgroup_subsys_state *css,
1447 (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed))) 1448 (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed)))
1448 goto out_unlock; 1449 goto out_unlock;
1449 1450
1450 cgroup_taskset_for_each(task, tset) { 1451 cgroup_taskset_for_each(task, css, tset) {
1451 ret = task_can_attach(task, cs->cpus_allowed); 1452 ret = task_can_attach(task, cs->cpus_allowed);
1452 if (ret) 1453 if (ret)
1453 goto out_unlock; 1454 goto out_unlock;
@@ -1467,9 +1468,14 @@ out_unlock:
1467 return ret; 1468 return ret;
1468} 1469}
1469 1470
1470static void cpuset_cancel_attach(struct cgroup_subsys_state *css, 1471static void cpuset_cancel_attach(struct cgroup_taskset *tset)
1471 struct cgroup_taskset *tset)
1472{ 1472{
1473 struct cgroup_subsys_state *css;
1474 struct cpuset *cs;
1475
1476 cgroup_taskset_first(tset, &css);
1477 cs = css_cs(css);
1478
1473 mutex_lock(&cpuset_mutex); 1479 mutex_lock(&cpuset_mutex);
1474 css_cs(css)->attach_in_progress--; 1480 css_cs(css)->attach_in_progress--;
1475 mutex_unlock(&cpuset_mutex); 1481 mutex_unlock(&cpuset_mutex);
@@ -1482,16 +1488,19 @@ static void cpuset_cancel_attach(struct cgroup_subsys_state *css,
1482 */ 1488 */
1483static cpumask_var_t cpus_attach; 1489static cpumask_var_t cpus_attach;
1484 1490
1485static void cpuset_attach(struct cgroup_subsys_state *css, 1491static void cpuset_attach(struct cgroup_taskset *tset)
1486 struct cgroup_taskset *tset)
1487{ 1492{
1488 /* static buf protected by cpuset_mutex */ 1493 /* static buf protected by cpuset_mutex */
1489 static nodemask_t cpuset_attach_nodemask_to; 1494 static nodemask_t cpuset_attach_nodemask_to;
1490 struct task_struct *task; 1495 struct task_struct *task;
1491 struct task_struct *leader; 1496 struct task_struct *leader;
1492 struct cpuset *cs = css_cs(css); 1497 struct cgroup_subsys_state *css;
1498 struct cpuset *cs;
1493 struct cpuset *oldcs = cpuset_attach_old_cs; 1499 struct cpuset *oldcs = cpuset_attach_old_cs;
1494 1500
1501 cgroup_taskset_first(tset, &css);
1502 cs = css_cs(css);
1503
1495 mutex_lock(&cpuset_mutex); 1504 mutex_lock(&cpuset_mutex);
1496 1505
1497 /* prepare for attach */ 1506 /* prepare for attach */
@@ -1502,7 +1511,7 @@ static void cpuset_attach(struct cgroup_subsys_state *css,
1502 1511
1503 guarantee_online_mems(cs, &cpuset_attach_nodemask_to); 1512 guarantee_online_mems(cs, &cpuset_attach_nodemask_to);
1504 1513
1505 cgroup_taskset_for_each(task, tset) { 1514 cgroup_taskset_for_each(task, css, tset) {
1506 /* 1515 /*
1507 * can_attach beforehand should guarantee that this doesn't 1516 * can_attach beforehand should guarantee that this doesn't
1508 * fail. TODO: have a better way to handle failure here 1517 * fail. TODO: have a better way to handle failure here
@@ -1518,7 +1527,7 @@ static void cpuset_attach(struct cgroup_subsys_state *css,
1518 * sleep and should be moved outside migration path proper. 1527 * sleep and should be moved outside migration path proper.
1519 */ 1528 */
1520 cpuset_attach_nodemask_to = cs->effective_mems; 1529 cpuset_attach_nodemask_to = cs->effective_mems;
1521 cgroup_taskset_for_each_leader(leader, tset) { 1530 cgroup_taskset_for_each_leader(leader, css, tset) {
1522 struct mm_struct *mm = get_task_mm(leader); 1531 struct mm_struct *mm = get_task_mm(leader);
1523 1532
1524 if (mm) { 1533 if (mm) {
diff --git a/kernel/events/callchain.c b/kernel/events/callchain.c
index d659487254d5..9c418002b8c1 100644
--- a/kernel/events/callchain.c
+++ b/kernel/events/callchain.c
@@ -3,7 +3,7 @@
3 * 3 *
4 * Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de> 4 * Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
5 * Copyright (C) 2008-2011 Red Hat, Inc., Ingo Molnar 5 * Copyright (C) 2008-2011 Red Hat, Inc., Ingo Molnar
6 * Copyright (C) 2008-2011 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> 6 * Copyright (C) 2008-2011 Red Hat, Inc., Peter Zijlstra
7 * Copyright © 2009 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com> 7 * Copyright © 2009 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
8 * 8 *
9 * For licensing details see kernel-base/COPYING 9 * For licensing details see kernel-base/COPYING
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 36babfd20648..ef2d6ea10736 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -3,7 +3,7 @@
3 * 3 *
4 * Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de> 4 * Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
5 * Copyright (C) 2008-2011 Red Hat, Inc., Ingo Molnar 5 * Copyright (C) 2008-2011 Red Hat, Inc., Ingo Molnar
6 * Copyright (C) 2008-2011 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> 6 * Copyright (C) 2008-2011 Red Hat, Inc., Peter Zijlstra
7 * Copyright © 2009 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com> 7 * Copyright © 2009 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
8 * 8 *
9 * For licensing details see kernel-base/COPYING 9 * For licensing details see kernel-base/COPYING
@@ -435,7 +435,7 @@ static inline void update_cgrp_time_from_event(struct perf_event *event)
435 if (!is_cgroup_event(event)) 435 if (!is_cgroup_event(event))
436 return; 436 return;
437 437
438 cgrp = perf_cgroup_from_task(current); 438 cgrp = perf_cgroup_from_task(current, event->ctx);
439 /* 439 /*
440 * Do not update time when cgroup is not active 440 * Do not update time when cgroup is not active
441 */ 441 */
@@ -458,7 +458,7 @@ perf_cgroup_set_timestamp(struct task_struct *task,
458 if (!task || !ctx->nr_cgroups) 458 if (!task || !ctx->nr_cgroups)
459 return; 459 return;
460 460
461 cgrp = perf_cgroup_from_task(task); 461 cgrp = perf_cgroup_from_task(task, ctx);
462 info = this_cpu_ptr(cgrp->info); 462 info = this_cpu_ptr(cgrp->info);
463 info->timestamp = ctx->timestamp; 463 info->timestamp = ctx->timestamp;
464} 464}
@@ -489,7 +489,6 @@ static void perf_cgroup_switch(struct task_struct *task, int mode)
489 * we reschedule only in the presence of cgroup 489 * we reschedule only in the presence of cgroup
490 * constrained events. 490 * constrained events.
491 */ 491 */
492 rcu_read_lock();
493 492
494 list_for_each_entry_rcu(pmu, &pmus, entry) { 493 list_for_each_entry_rcu(pmu, &pmus, entry) {
495 cpuctx = this_cpu_ptr(pmu->pmu_cpu_context); 494 cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
@@ -522,8 +521,10 @@ static void perf_cgroup_switch(struct task_struct *task, int mode)
522 * set cgrp before ctxsw in to allow 521 * set cgrp before ctxsw in to allow
523 * event_filter_match() to not have to pass 522 * event_filter_match() to not have to pass
524 * task around 523 * task around
524 * we pass the cpuctx->ctx to perf_cgroup_from_task()
525 * because cgorup events are only per-cpu
525 */ 526 */
526 cpuctx->cgrp = perf_cgroup_from_task(task); 527 cpuctx->cgrp = perf_cgroup_from_task(task, &cpuctx->ctx);
527 cpu_ctx_sched_in(cpuctx, EVENT_ALL, task); 528 cpu_ctx_sched_in(cpuctx, EVENT_ALL, task);
528 } 529 }
529 perf_pmu_enable(cpuctx->ctx.pmu); 530 perf_pmu_enable(cpuctx->ctx.pmu);
@@ -531,8 +532,6 @@ static void perf_cgroup_switch(struct task_struct *task, int mode)
531 } 532 }
532 } 533 }
533 534
534 rcu_read_unlock();
535
536 local_irq_restore(flags); 535 local_irq_restore(flags);
537} 536}
538 537
@@ -542,17 +541,20 @@ static inline void perf_cgroup_sched_out(struct task_struct *task,
542 struct perf_cgroup *cgrp1; 541 struct perf_cgroup *cgrp1;
543 struct perf_cgroup *cgrp2 = NULL; 542 struct perf_cgroup *cgrp2 = NULL;
544 543
544 rcu_read_lock();
545 /* 545 /*
546 * we come here when we know perf_cgroup_events > 0 546 * we come here when we know perf_cgroup_events > 0
547 * we do not need to pass the ctx here because we know
548 * we are holding the rcu lock
547 */ 549 */
548 cgrp1 = perf_cgroup_from_task(task); 550 cgrp1 = perf_cgroup_from_task(task, NULL);
549 551
550 /* 552 /*
551 * next is NULL when called from perf_event_enable_on_exec() 553 * next is NULL when called from perf_event_enable_on_exec()
552 * that will systematically cause a cgroup_switch() 554 * that will systematically cause a cgroup_switch()
553 */ 555 */
554 if (next) 556 if (next)
555 cgrp2 = perf_cgroup_from_task(next); 557 cgrp2 = perf_cgroup_from_task(next, NULL);
556 558
557 /* 559 /*
558 * only schedule out current cgroup events if we know 560 * only schedule out current cgroup events if we know
@@ -561,6 +563,8 @@ static inline void perf_cgroup_sched_out(struct task_struct *task,
561 */ 563 */
562 if (cgrp1 != cgrp2) 564 if (cgrp1 != cgrp2)
563 perf_cgroup_switch(task, PERF_CGROUP_SWOUT); 565 perf_cgroup_switch(task, PERF_CGROUP_SWOUT);
566
567 rcu_read_unlock();
564} 568}
565 569
566static inline void perf_cgroup_sched_in(struct task_struct *prev, 570static inline void perf_cgroup_sched_in(struct task_struct *prev,
@@ -569,13 +573,16 @@ static inline void perf_cgroup_sched_in(struct task_struct *prev,
569 struct perf_cgroup *cgrp1; 573 struct perf_cgroup *cgrp1;
570 struct perf_cgroup *cgrp2 = NULL; 574 struct perf_cgroup *cgrp2 = NULL;
571 575
576 rcu_read_lock();
572 /* 577 /*
573 * we come here when we know perf_cgroup_events > 0 578 * we come here when we know perf_cgroup_events > 0
579 * we do not need to pass the ctx here because we know
580 * we are holding the rcu lock
574 */ 581 */
575 cgrp1 = perf_cgroup_from_task(task); 582 cgrp1 = perf_cgroup_from_task(task, NULL);
576 583
577 /* prev can never be NULL */ 584 /* prev can never be NULL */
578 cgrp2 = perf_cgroup_from_task(prev); 585 cgrp2 = perf_cgroup_from_task(prev, NULL);
579 586
580 /* 587 /*
581 * only need to schedule in cgroup events if we are changing 588 * only need to schedule in cgroup events if we are changing
@@ -584,6 +591,8 @@ static inline void perf_cgroup_sched_in(struct task_struct *prev,
584 */ 591 */
585 if (cgrp1 != cgrp2) 592 if (cgrp1 != cgrp2)
586 perf_cgroup_switch(task, PERF_CGROUP_SWIN); 593 perf_cgroup_switch(task, PERF_CGROUP_SWIN);
594
595 rcu_read_unlock();
587} 596}
588 597
589static inline int perf_cgroup_connect(int fd, struct perf_event *event, 598static inline int perf_cgroup_connect(int fd, struct perf_event *event,
@@ -4216,7 +4225,14 @@ retry:
4216 goto retry; 4225 goto retry;
4217 } 4226 }
4218 4227
4219 __perf_event_period(&pe); 4228 if (event->attr.freq) {
4229 event->attr.sample_freq = value;
4230 } else {
4231 event->attr.sample_period = value;
4232 event->hw.sample_period = value;
4233 }
4234
4235 local64_set(&event->hw.period_left, 0);
4220 raw_spin_unlock_irq(&ctx->lock); 4236 raw_spin_unlock_irq(&ctx->lock);
4221 4237
4222 return 0; 4238 return 0;
@@ -5667,6 +5683,17 @@ perf_event_aux_ctx(struct perf_event_context *ctx,
5667} 5683}
5668 5684
5669static void 5685static void
5686perf_event_aux_task_ctx(perf_event_aux_output_cb output, void *data,
5687 struct perf_event_context *task_ctx)
5688{
5689 rcu_read_lock();
5690 preempt_disable();
5691 perf_event_aux_ctx(task_ctx, output, data);
5692 preempt_enable();
5693 rcu_read_unlock();
5694}
5695
5696static void
5670perf_event_aux(perf_event_aux_output_cb output, void *data, 5697perf_event_aux(perf_event_aux_output_cb output, void *data,
5671 struct perf_event_context *task_ctx) 5698 struct perf_event_context *task_ctx)
5672{ 5699{
@@ -5675,14 +5702,23 @@ perf_event_aux(perf_event_aux_output_cb output, void *data,
5675 struct pmu *pmu; 5702 struct pmu *pmu;
5676 int ctxn; 5703 int ctxn;
5677 5704
5705 /*
5706 * If we have task_ctx != NULL we only notify
5707 * the task context itself. The task_ctx is set
5708 * only for EXIT events before releasing task
5709 * context.
5710 */
5711 if (task_ctx) {
5712 perf_event_aux_task_ctx(output, data, task_ctx);
5713 return;
5714 }
5715
5678 rcu_read_lock(); 5716 rcu_read_lock();
5679 list_for_each_entry_rcu(pmu, &pmus, entry) { 5717 list_for_each_entry_rcu(pmu, &pmus, entry) {
5680 cpuctx = get_cpu_ptr(pmu->pmu_cpu_context); 5718 cpuctx = get_cpu_ptr(pmu->pmu_cpu_context);
5681 if (cpuctx->unique_pmu != pmu) 5719 if (cpuctx->unique_pmu != pmu)
5682 goto next; 5720 goto next;
5683 perf_event_aux_ctx(&cpuctx->ctx, output, data); 5721 perf_event_aux_ctx(&cpuctx->ctx, output, data);
5684 if (task_ctx)
5685 goto next;
5686 ctxn = pmu->task_ctx_nr; 5722 ctxn = pmu->task_ctx_nr;
5687 if (ctxn < 0) 5723 if (ctxn < 0)
5688 goto next; 5724 goto next;
@@ -5692,12 +5728,6 @@ perf_event_aux(perf_event_aux_output_cb output, void *data,
5692next: 5728next:
5693 put_cpu_ptr(pmu->pmu_cpu_context); 5729 put_cpu_ptr(pmu->pmu_cpu_context);
5694 } 5730 }
5695
5696 if (task_ctx) {
5697 preempt_disable();
5698 perf_event_aux_ctx(task_ctx, output, data);
5699 preempt_enable();
5700 }
5701 rcu_read_unlock(); 5731 rcu_read_unlock();
5702} 5732}
5703 5733
@@ -8787,10 +8817,8 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn)
8787 struct perf_event_context *child_ctx, *clone_ctx = NULL; 8817 struct perf_event_context *child_ctx, *clone_ctx = NULL;
8788 unsigned long flags; 8818 unsigned long flags;
8789 8819
8790 if (likely(!child->perf_event_ctxp[ctxn])) { 8820 if (likely(!child->perf_event_ctxp[ctxn]))
8791 perf_event_task(child, NULL, 0);
8792 return; 8821 return;
8793 }
8794 8822
8795 local_irq_save(flags); 8823 local_irq_save(flags);
8796 /* 8824 /*
@@ -8874,6 +8902,14 @@ void perf_event_exit_task(struct task_struct *child)
8874 8902
8875 for_each_task_context_nr(ctxn) 8903 for_each_task_context_nr(ctxn)
8876 perf_event_exit_task_context(child, ctxn); 8904 perf_event_exit_task_context(child, ctxn);
8905
8906 /*
8907 * The perf_event_exit_task_context calls perf_event_task
8908 * with child's task_ctx, which generates EXIT events for
8909 * child contexts and sets child->perf_event_ctxp[] to NULL.
8910 * At this point we need to send EXIT events to cpu contexts.
8911 */
8912 perf_event_task(child, NULL, 0);
8877} 8913}
8878 8914
8879static void perf_free_event(struct perf_event *event, 8915static void perf_free_event(struct perf_event *event,
@@ -9452,16 +9488,18 @@ static void perf_cgroup_css_free(struct cgroup_subsys_state *css)
9452static int __perf_cgroup_move(void *info) 9488static int __perf_cgroup_move(void *info)
9453{ 9489{
9454 struct task_struct *task = info; 9490 struct task_struct *task = info;
9491 rcu_read_lock();
9455 perf_cgroup_switch(task, PERF_CGROUP_SWOUT | PERF_CGROUP_SWIN); 9492 perf_cgroup_switch(task, PERF_CGROUP_SWOUT | PERF_CGROUP_SWIN);
9493 rcu_read_unlock();
9456 return 0; 9494 return 0;
9457} 9495}
9458 9496
9459static void perf_cgroup_attach(struct cgroup_subsys_state *css, 9497static void perf_cgroup_attach(struct cgroup_taskset *tset)
9460 struct cgroup_taskset *tset)
9461{ 9498{
9462 struct task_struct *task; 9499 struct task_struct *task;
9500 struct cgroup_subsys_state *css;
9463 9501
9464 cgroup_taskset_for_each(task, tset) 9502 cgroup_taskset_for_each(task, css, tset)
9465 task_function_call(task, __perf_cgroup_move, task); 9503 task_function_call(task, __perf_cgroup_move, task);
9466} 9504}
9467 9505
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
index b5d1ea79c595..adfdc0536117 100644
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -3,7 +3,7 @@
3 * 3 *
4 * Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de> 4 * Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
5 * Copyright (C) 2008-2011 Red Hat, Inc., Ingo Molnar 5 * Copyright (C) 2008-2011 Red Hat, Inc., Ingo Molnar
6 * Copyright (C) 2008-2011 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> 6 * Copyright (C) 2008-2011 Red Hat, Inc., Peter Zijlstra
7 * Copyright © 2009 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com> 7 * Copyright © 2009 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
8 * 8 *
9 * For licensing details see kernel-base/COPYING 9 * For licensing details see kernel-base/COPYING
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 4e5e9798aa0c..7dad84913abf 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -19,7 +19,7 @@
19 * Authors: 19 * Authors:
20 * Srikar Dronamraju 20 * Srikar Dronamraju
21 * Jim Keniston 21 * Jim Keniston
22 * Copyright (C) 2011-2012 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> 22 * Copyright (C) 2011-2012 Red Hat, Inc., Peter Zijlstra
23 */ 23 */
24 24
25#include <linux/kernel.h> 25#include <linux/kernel.h>
diff --git a/kernel/fork.c b/kernel/fork.c
index f97f2c449f5c..fce002ee3ddf 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1368,8 +1368,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1368 p->real_start_time = ktime_get_boot_ns(); 1368 p->real_start_time = ktime_get_boot_ns();
1369 p->io_context = NULL; 1369 p->io_context = NULL;
1370 p->audit_context = NULL; 1370 p->audit_context = NULL;
1371 if (clone_flags & CLONE_THREAD) 1371 threadgroup_change_begin(current);
1372 threadgroup_change_begin(current);
1373 cgroup_fork(p); 1372 cgroup_fork(p);
1374#ifdef CONFIG_NUMA 1373#ifdef CONFIG_NUMA
1375 p->mempolicy = mpol_dup(p->mempolicy); 1374 p->mempolicy = mpol_dup(p->mempolicy);
@@ -1610,8 +1609,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1610 1609
1611 proc_fork_connector(p); 1610 proc_fork_connector(p);
1612 cgroup_post_fork(p, cgrp_ss_priv); 1611 cgroup_post_fork(p, cgrp_ss_priv);
1613 if (clone_flags & CLONE_THREAD) 1612 threadgroup_change_end(current);
1614 threadgroup_change_end(current);
1615 perf_event_fork(p); 1613 perf_event_fork(p);
1616 1614
1617 trace_task_newtask(p, clone_flags); 1615 trace_task_newtask(p, clone_flags);
@@ -1652,8 +1650,7 @@ bad_fork_cleanup_policy:
1652 mpol_put(p->mempolicy); 1650 mpol_put(p->mempolicy);
1653bad_fork_cleanup_threadgroup_lock: 1651bad_fork_cleanup_threadgroup_lock:
1654#endif 1652#endif
1655 if (clone_flags & CLONE_THREAD) 1653 threadgroup_change_end(current);
1656 threadgroup_change_end(current);
1657 delayacct_tsk_free(p); 1654 delayacct_tsk_free(p);
1658bad_fork_cleanup_count: 1655bad_fork_cleanup_count:
1659 atomic_dec(&p->cred->user->processes); 1656 atomic_dec(&p->cred->user->processes);
diff --git a/kernel/irq_work.c b/kernel/irq_work.c
index cbf9fb899d92..bcf107ce0854 100644
--- a/kernel/irq_work.c
+++ b/kernel/irq_work.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (C) 2010 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> 2 * Copyright (C) 2010 Red Hat, Inc., Peter Zijlstra
3 * 3 *
4 * Provides a framework for enqueueing and running callbacks from hardirq 4 * Provides a framework for enqueueing and running callbacks from hardirq
5 * context. The enqueueing is NMI-safe. 5 * context. The enqueueing is NMI-safe.
diff --git a/kernel/jump_label.c b/kernel/jump_label.c
index f7dd15d537f9..05254eeb4b4e 100644
--- a/kernel/jump_label.c
+++ b/kernel/jump_label.c
@@ -2,7 +2,7 @@
2 * jump label support 2 * jump label support
3 * 3 *
4 * Copyright (C) 2009 Jason Baron <jbaron@redhat.com> 4 * Copyright (C) 2009 Jason Baron <jbaron@redhat.com>
5 * Copyright (C) 2011 Peter Zijlstra <pzijlstr@redhat.com> 5 * Copyright (C) 2011 Peter Zijlstra
6 * 6 *
7 */ 7 */
8#include <linux/memory.h> 8#include <linux/memory.h>
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index deae3907ac1e..60ace56618f6 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -6,7 +6,7 @@
6 * Started by Ingo Molnar: 6 * Started by Ingo Molnar:
7 * 7 *
8 * Copyright (C) 2006,2007 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> 8 * Copyright (C) 2006,2007 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
9 * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> 9 * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra
10 * 10 *
11 * this code maps all the lock dependencies as they occur in a live kernel 11 * this code maps all the lock dependencies as they occur in a live kernel
12 * and will warn about the following classes of locking bugs: 12 * and will warn about the following classes of locking bugs:
diff --git a/kernel/locking/lockdep_proc.c b/kernel/locking/lockdep_proc.c
index d83d798bef95..dbb61a302548 100644
--- a/kernel/locking/lockdep_proc.c
+++ b/kernel/locking/lockdep_proc.c
@@ -6,7 +6,7 @@
6 * Started by Ingo Molnar: 6 * Started by Ingo Molnar:
7 * 7 *
8 * Copyright (C) 2006,2007 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> 8 * Copyright (C) 2006,2007 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
9 * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> 9 * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra
10 * 10 *
11 * Code for /proc/lockdep and /proc/lockdep_stats: 11 * Code for /proc/lockdep and /proc/lockdep_stats:
12 * 12 *
diff --git a/kernel/locking/osq_lock.c b/kernel/locking/osq_lock.c
index d092a0c9c2d4..05a37857ab55 100644
--- a/kernel/locking/osq_lock.c
+++ b/kernel/locking/osq_lock.c
@@ -93,10 +93,12 @@ bool osq_lock(struct optimistic_spin_queue *lock)
93 node->cpu = curr; 93 node->cpu = curr;
94 94
95 /* 95 /*
96 * ACQUIRE semantics, pairs with corresponding RELEASE 96 * We need both ACQUIRE (pairs with corresponding RELEASE in
97 * in unlock() uncontended, or fastpath. 97 * unlock() uncontended, or fastpath) and RELEASE (to publish
98 * the node fields we just initialised) semantics when updating
99 * the lock tail.
98 */ 100 */
99 old = atomic_xchg_acquire(&lock->tail, curr); 101 old = atomic_xchg(&lock->tail, curr);
100 if (old == OSQ_UNLOCKED_VAL) 102 if (old == OSQ_UNLOCKED_VAL)
101 return true; 103 return true;
102 104
diff --git a/kernel/pid.c b/kernel/pid.c
index ca368793808e..78b3d9f80d44 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -467,7 +467,7 @@ struct pid *get_task_pid(struct task_struct *task, enum pid_type type)
467 rcu_read_lock(); 467 rcu_read_lock();
468 if (type != PIDTYPE_PID) 468 if (type != PIDTYPE_PID)
469 task = task->group_leader; 469 task = task->group_leader;
470 pid = get_pid(task->pids[type].pid); 470 pid = get_pid(rcu_dereference(task->pids[type].pid));
471 rcu_read_unlock(); 471 rcu_read_unlock();
472 return pid; 472 return pid;
473} 473}
@@ -528,7 +528,7 @@ pid_t __task_pid_nr_ns(struct task_struct *task, enum pid_type type,
528 if (likely(pid_alive(task))) { 528 if (likely(pid_alive(task))) {
529 if (type != PIDTYPE_PID) 529 if (type != PIDTYPE_PID)
530 task = task->group_leader; 530 task = task->group_leader;
531 nr = pid_nr_ns(task->pids[type].pid, ns); 531 nr = pid_nr_ns(rcu_dereference(task->pids[type].pid), ns);
532 } 532 }
533 rcu_read_unlock(); 533 rcu_read_unlock();
534 534
diff --git a/kernel/sched/clock.c b/kernel/sched/clock.c
index c0a205101c23..caf4041f5b0a 100644
--- a/kernel/sched/clock.c
+++ b/kernel/sched/clock.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * sched_clock for unstable cpu clocks 2 * sched_clock for unstable cpu clocks
3 * 3 *
4 * Copyright (C) 2008 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> 4 * Copyright (C) 2008 Red Hat, Inc., Peter Zijlstra
5 * 5 *
6 * Updates and enhancements: 6 * Updates and enhancements:
7 * Copyright (C) 2008 Red Hat, Inc. Steven Rostedt <srostedt@redhat.com> 7 * Copyright (C) 2008 Red Hat, Inc. Steven Rostedt <srostedt@redhat.com>
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 4d568ac9319e..732e993b564b 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1947,13 +1947,38 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
1947 1947
1948#ifdef CONFIG_SMP 1948#ifdef CONFIG_SMP
1949 /* 1949 /*
1950 * Ensure we load p->on_cpu _after_ p->on_rq, otherwise it would be
1951 * possible to, falsely, observe p->on_cpu == 0.
1952 *
1953 * One must be running (->on_cpu == 1) in order to remove oneself
1954 * from the runqueue.
1955 *
1956 * [S] ->on_cpu = 1; [L] ->on_rq
1957 * UNLOCK rq->lock
1958 * RMB
1959 * LOCK rq->lock
1960 * [S] ->on_rq = 0; [L] ->on_cpu
1961 *
1962 * Pairs with the full barrier implied in the UNLOCK+LOCK on rq->lock
1963 * from the consecutive calls to schedule(); the first switching to our
1964 * task, the second putting it to sleep.
1965 */
1966 smp_rmb();
1967
1968 /*
1950 * If the owning (remote) cpu is still in the middle of schedule() with 1969 * If the owning (remote) cpu is still in the middle of schedule() with
1951 * this task as prev, wait until its done referencing the task. 1970 * this task as prev, wait until its done referencing the task.
1952 */ 1971 */
1953 while (p->on_cpu) 1972 while (p->on_cpu)
1954 cpu_relax(); 1973 cpu_relax();
1955 /* 1974 /*
1956 * Pairs with the smp_wmb() in finish_lock_switch(). 1975 * Combined with the control dependency above, we have an effective
1976 * smp_load_acquire() without the need for full barriers.
1977 *
1978 * Pairs with the smp_store_release() in finish_lock_switch().
1979 *
1980 * This ensures that tasks getting woken will be fully ordered against
1981 * their previous state and preserve Program Order.
1957 */ 1982 */
1958 smp_rmb(); 1983 smp_rmb();
1959 1984
@@ -2039,7 +2064,6 @@ out:
2039 */ 2064 */
2040int wake_up_process(struct task_struct *p) 2065int wake_up_process(struct task_struct *p)
2041{ 2066{
2042 WARN_ON(task_is_stopped_or_traced(p));
2043 return try_to_wake_up(p, TASK_NORMAL, 0); 2067 return try_to_wake_up(p, TASK_NORMAL, 0);
2044} 2068}
2045EXPORT_SYMBOL(wake_up_process); 2069EXPORT_SYMBOL(wake_up_process);
@@ -5847,13 +5871,13 @@ static int init_rootdomain(struct root_domain *rd)
5847{ 5871{
5848 memset(rd, 0, sizeof(*rd)); 5872 memset(rd, 0, sizeof(*rd));
5849 5873
5850 if (!alloc_cpumask_var(&rd->span, GFP_KERNEL)) 5874 if (!zalloc_cpumask_var(&rd->span, GFP_KERNEL))
5851 goto out; 5875 goto out;
5852 if (!alloc_cpumask_var(&rd->online, GFP_KERNEL)) 5876 if (!zalloc_cpumask_var(&rd->online, GFP_KERNEL))
5853 goto free_span; 5877 goto free_span;
5854 if (!alloc_cpumask_var(&rd->dlo_mask, GFP_KERNEL)) 5878 if (!zalloc_cpumask_var(&rd->dlo_mask, GFP_KERNEL))
5855 goto free_online; 5879 goto free_online;
5856 if (!alloc_cpumask_var(&rd->rto_mask, GFP_KERNEL)) 5880 if (!zalloc_cpumask_var(&rd->rto_mask, GFP_KERNEL))
5857 goto free_dlo_mask; 5881 goto free_dlo_mask;
5858 5882
5859 init_dl_bw(&rd->dl_bw); 5883 init_dl_bw(&rd->dl_bw);
@@ -8217,12 +8241,12 @@ static void cpu_cgroup_fork(struct task_struct *task, void *private)
8217 sched_move_task(task); 8241 sched_move_task(task);
8218} 8242}
8219 8243
8220static int cpu_cgroup_can_attach(struct cgroup_subsys_state *css, 8244static int cpu_cgroup_can_attach(struct cgroup_taskset *tset)
8221 struct cgroup_taskset *tset)
8222{ 8245{
8223 struct task_struct *task; 8246 struct task_struct *task;
8247 struct cgroup_subsys_state *css;
8224 8248
8225 cgroup_taskset_for_each(task, tset) { 8249 cgroup_taskset_for_each(task, css, tset) {
8226#ifdef CONFIG_RT_GROUP_SCHED 8250#ifdef CONFIG_RT_GROUP_SCHED
8227 if (!sched_rt_can_attach(css_tg(css), task)) 8251 if (!sched_rt_can_attach(css_tg(css), task))
8228 return -EINVAL; 8252 return -EINVAL;
@@ -8235,12 +8259,12 @@ static int cpu_cgroup_can_attach(struct cgroup_subsys_state *css,
8235 return 0; 8259 return 0;
8236} 8260}
8237 8261
8238static void cpu_cgroup_attach(struct cgroup_subsys_state *css, 8262static void cpu_cgroup_attach(struct cgroup_taskset *tset)
8239 struct cgroup_taskset *tset)
8240{ 8263{
8241 struct task_struct *task; 8264 struct task_struct *task;
8265 struct cgroup_subsys_state *css;
8242 8266
8243 cgroup_taskset_for_each(task, tset) 8267 cgroup_taskset_for_each(task, css, tset)
8244 sched_move_task(task); 8268 sched_move_task(task);
8245} 8269}
8246 8270
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index 26a54461bf59..05de80b48586 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -788,6 +788,9 @@ cputime_t task_gtime(struct task_struct *t)
788 unsigned int seq; 788 unsigned int seq;
789 cputime_t gtime; 789 cputime_t gtime;
790 790
791 if (!context_tracking_is_enabled())
792 return t->gtime;
793
791 do { 794 do {
792 seq = read_seqbegin(&t->vtime_seqlock); 795 seq = read_seqbegin(&t->vtime_seqlock);
793 796
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index f04fda8f669c..90e26b11deaa 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -17,7 +17,7 @@
17 * Copyright (C) 2007, Thomas Gleixner <tglx@linutronix.de> 17 * Copyright (C) 2007, Thomas Gleixner <tglx@linutronix.de>
18 * 18 *
19 * Adaptive scheduling granularity, math enhancements by Peter Zijlstra 19 * Adaptive scheduling granularity, math enhancements by Peter Zijlstra
20 * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> 20 * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra
21 */ 21 */
22 22
23#include <linux/latencytop.h> 23#include <linux/latencytop.h>
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index e3cc16312046..8ec86abe0ea1 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -64,7 +64,7 @@ static void start_rt_bandwidth(struct rt_bandwidth *rt_b)
64 raw_spin_unlock(&rt_b->rt_runtime_lock); 64 raw_spin_unlock(&rt_b->rt_runtime_lock);
65} 65}
66 66
67#ifdef CONFIG_SMP 67#if defined(CONFIG_SMP) && defined(HAVE_RT_PUSH_IPI)
68static void push_irq_work_func(struct irq_work *work); 68static void push_irq_work_func(struct irq_work *work);
69#endif 69#endif
70 70
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index efd3bfc7e347..b242775bf670 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1073,6 +1073,9 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
1073 * We must ensure this doesn't happen until the switch is completely 1073 * We must ensure this doesn't happen until the switch is completely
1074 * finished. 1074 * finished.
1075 * 1075 *
1076 * In particular, the load of prev->state in finish_task_switch() must
1077 * happen before this.
1078 *
1076 * Pairs with the control dependency and rmb in try_to_wake_up(). 1079 * Pairs with the control dependency and rmb in try_to_wake_up().
1077 */ 1080 */
1078 smp_store_release(&prev->on_cpu, 0); 1081 smp_store_release(&prev->on_cpu, 0);
diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c
index 052e02672d12..f15d6b6a538a 100644
--- a/kernel/sched/wait.c
+++ b/kernel/sched/wait.c
@@ -392,7 +392,7 @@ __wait_on_bit(wait_queue_head_t *wq, struct wait_bit_queue *q,
392 do { 392 do {
393 prepare_to_wait(wq, &q->wait, mode); 393 prepare_to_wait(wq, &q->wait, mode);
394 if (test_bit(q->key.bit_nr, q->key.flags)) 394 if (test_bit(q->key.bit_nr, q->key.flags))
395 ret = (*action)(&q->key); 395 ret = (*action)(&q->key, mode);
396 } while (test_bit(q->key.bit_nr, q->key.flags) && !ret); 396 } while (test_bit(q->key.bit_nr, q->key.flags) && !ret);
397 finish_wait(wq, &q->wait); 397 finish_wait(wq, &q->wait);
398 return ret; 398 return ret;
@@ -431,7 +431,7 @@ __wait_on_bit_lock(wait_queue_head_t *wq, struct wait_bit_queue *q,
431 prepare_to_wait_exclusive(wq, &q->wait, mode); 431 prepare_to_wait_exclusive(wq, &q->wait, mode);
432 if (!test_bit(q->key.bit_nr, q->key.flags)) 432 if (!test_bit(q->key.bit_nr, q->key.flags))
433 continue; 433 continue;
434 ret = action(&q->key); 434 ret = action(&q->key, mode);
435 if (!ret) 435 if (!ret)
436 continue; 436 continue;
437 abort_exclusive_wait(wq, &q->wait, mode, &q->key); 437 abort_exclusive_wait(wq, &q->wait, mode, &q->key);
@@ -581,44 +581,44 @@ void wake_up_atomic_t(atomic_t *p)
581} 581}
582EXPORT_SYMBOL(wake_up_atomic_t); 582EXPORT_SYMBOL(wake_up_atomic_t);
583 583
584__sched int bit_wait(struct wait_bit_key *word) 584__sched int bit_wait(struct wait_bit_key *word, int mode)
585{ 585{
586 if (signal_pending_state(current->state, current))
587 return 1;
588 schedule(); 586 schedule();
587 if (signal_pending_state(mode, current))
588 return -EINTR;
589 return 0; 589 return 0;
590} 590}
591EXPORT_SYMBOL(bit_wait); 591EXPORT_SYMBOL(bit_wait);
592 592
593__sched int bit_wait_io(struct wait_bit_key *word) 593__sched int bit_wait_io(struct wait_bit_key *word, int mode)
594{ 594{
595 if (signal_pending_state(current->state, current))
596 return 1;
597 io_schedule(); 595 io_schedule();
596 if (signal_pending_state(mode, current))
597 return -EINTR;
598 return 0; 598 return 0;
599} 599}
600EXPORT_SYMBOL(bit_wait_io); 600EXPORT_SYMBOL(bit_wait_io);
601 601
602__sched int bit_wait_timeout(struct wait_bit_key *word) 602__sched int bit_wait_timeout(struct wait_bit_key *word, int mode)
603{ 603{
604 unsigned long now = READ_ONCE(jiffies); 604 unsigned long now = READ_ONCE(jiffies);
605 if (signal_pending_state(current->state, current))
606 return 1;
607 if (time_after_eq(now, word->timeout)) 605 if (time_after_eq(now, word->timeout))
608 return -EAGAIN; 606 return -EAGAIN;
609 schedule_timeout(word->timeout - now); 607 schedule_timeout(word->timeout - now);
608 if (signal_pending_state(mode, current))
609 return -EINTR;
610 return 0; 610 return 0;
611} 611}
612EXPORT_SYMBOL_GPL(bit_wait_timeout); 612EXPORT_SYMBOL_GPL(bit_wait_timeout);
613 613
614__sched int bit_wait_io_timeout(struct wait_bit_key *word) 614__sched int bit_wait_io_timeout(struct wait_bit_key *word, int mode)
615{ 615{
616 unsigned long now = READ_ONCE(jiffies); 616 unsigned long now = READ_ONCE(jiffies);
617 if (signal_pending_state(current->state, current))
618 return 1;
619 if (time_after_eq(now, word->timeout)) 617 if (time_after_eq(now, word->timeout))
620 return -EAGAIN; 618 return -EAGAIN;
621 io_schedule_timeout(word->timeout - now); 619 io_schedule_timeout(word->timeout - now);
620 if (signal_pending_state(mode, current))
621 return -EINTR;
622 return 0; 622 return 0;
623} 623}
624EXPORT_SYMBOL_GPL(bit_wait_io_timeout); 624EXPORT_SYMBOL_GPL(bit_wait_io_timeout);
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index 867bc20e1ef1..a3bbaee77c58 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -531,7 +531,7 @@ static int __init cpu_stop_init(void)
531} 531}
532early_initcall(cpu_stop_init); 532early_initcall(cpu_stop_init);
533 533
534#ifdef CONFIG_STOP_MACHINE 534#if defined(CONFIG_SMP) || defined(CONFIG_HOTPLUG_CPU)
535 535
536static int __stop_machine(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus) 536static int __stop_machine(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus)
537{ 537{
@@ -631,4 +631,4 @@ int stop_machine_from_inactive_cpu(cpu_stop_fn_t fn, void *data,
631 return ret ?: done.ret; 631 return ret ?: done.ret;
632} 632}
633 633
634#endif /* CONFIG_STOP_MACHINE */ 634#endif /* CONFIG_SMP || CONFIG_HOTPLUG_CPU */
diff --git a/kernel/time/posix-clock.c b/kernel/time/posix-clock.c
index ce033c7aa2e8..9cff0ab82b63 100644
--- a/kernel/time/posix-clock.c
+++ b/kernel/time/posix-clock.c
@@ -69,10 +69,10 @@ static ssize_t posix_clock_read(struct file *fp, char __user *buf,
69static unsigned int posix_clock_poll(struct file *fp, poll_table *wait) 69static unsigned int posix_clock_poll(struct file *fp, poll_table *wait)
70{ 70{
71 struct posix_clock *clk = get_posix_clock(fp); 71 struct posix_clock *clk = get_posix_clock(fp);
72 int result = 0; 72 unsigned int result = 0;
73 73
74 if (!clk) 74 if (!clk)
75 return -ENODEV; 75 return POLLERR;
76 76
77 if (clk->ops.poll) 77 if (clk->ops.poll)
78 result = clk->ops.poll(clk, fp, wait); 78 result = clk->ops.poll(clk, fp, wait);
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 75f1d05ea82d..9c6045a27ba3 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -1887,12 +1887,6 @@ rb_event_index(struct ring_buffer_event *event)
1887 return (addr & ~PAGE_MASK) - BUF_PAGE_HDR_SIZE; 1887 return (addr & ~PAGE_MASK) - BUF_PAGE_HDR_SIZE;
1888} 1888}
1889 1889
1890static void rb_reset_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
1891{
1892 cpu_buffer->read_stamp = cpu_buffer->reader_page->page->time_stamp;
1893 cpu_buffer->reader_page->read = 0;
1894}
1895
1896static void rb_inc_iter(struct ring_buffer_iter *iter) 1890static void rb_inc_iter(struct ring_buffer_iter *iter)
1897{ 1891{
1898 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer; 1892 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
@@ -2803,8 +2797,11 @@ rb_reserve_next_event(struct ring_buffer *buffer,
2803 2797
2804 event = __rb_reserve_next(cpu_buffer, &info); 2798 event = __rb_reserve_next(cpu_buffer, &info);
2805 2799
2806 if (unlikely(PTR_ERR(event) == -EAGAIN)) 2800 if (unlikely(PTR_ERR(event) == -EAGAIN)) {
2801 if (info.add_timestamp)
2802 info.length -= RB_LEN_TIME_EXTEND;
2807 goto again; 2803 goto again;
2804 }
2808 2805
2809 if (!event) 2806 if (!event)
2810 goto out_fail; 2807 goto out_fail;
@@ -3626,7 +3623,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
3626 3623
3627 /* Finally update the reader page to the new head */ 3624 /* Finally update the reader page to the new head */
3628 cpu_buffer->reader_page = reader; 3625 cpu_buffer->reader_page = reader;
3629 rb_reset_reader_page(cpu_buffer); 3626 cpu_buffer->reader_page->read = 0;
3630 3627
3631 if (overwrite != cpu_buffer->last_overrun) { 3628 if (overwrite != cpu_buffer->last_overrun) {
3632 cpu_buffer->lost_events = overwrite - cpu_buffer->last_overrun; 3629 cpu_buffer->lost_events = overwrite - cpu_buffer->last_overrun;
@@ -3636,6 +3633,10 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
3636 goto again; 3633 goto again;
3637 3634
3638 out: 3635 out:
3636 /* Update the read_stamp on the first event */
3637 if (reader && reader->read == 0)
3638 cpu_buffer->read_stamp = reader->page->time_stamp;
3639
3639 arch_spin_unlock(&cpu_buffer->lock); 3640 arch_spin_unlock(&cpu_buffer->lock);
3640 local_irq_restore(flags); 3641 local_irq_restore(flags);
3641 3642
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
index abfc903e741e..cc9f7a9319be 100644
--- a/kernel/trace/trace_event_perf.c
+++ b/kernel/trace/trace_event_perf.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * trace event based perf event profiling/tracing 2 * trace event based perf event profiling/tracing
3 * 3 *
4 * Copyright (C) 2009 Red Hat Inc, Peter Zijlstra <pzijlstr@redhat.com> 4 * Copyright (C) 2009 Red Hat Inc, Peter Zijlstra
5 * Copyright (C) 2009-2010 Frederic Weisbecker <fweisbec@gmail.com> 5 * Copyright (C) 2009-2010 Frederic Weisbecker <fweisbec@gmail.com>
6 */ 6 */
7 7
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 6bbc5f652355..4f6ef6912e00 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -582,6 +582,12 @@ static void __ftrace_clear_event_pids(struct trace_array *tr)
582 unregister_trace_sched_wakeup(event_filter_pid_sched_wakeup_probe_pre, tr); 582 unregister_trace_sched_wakeup(event_filter_pid_sched_wakeup_probe_pre, tr);
583 unregister_trace_sched_wakeup(event_filter_pid_sched_wakeup_probe_post, tr); 583 unregister_trace_sched_wakeup(event_filter_pid_sched_wakeup_probe_post, tr);
584 584
585 unregister_trace_sched_wakeup_new(event_filter_pid_sched_wakeup_probe_pre, tr);
586 unregister_trace_sched_wakeup_new(event_filter_pid_sched_wakeup_probe_post, tr);
587
588 unregister_trace_sched_waking(event_filter_pid_sched_wakeup_probe_pre, tr);
589 unregister_trace_sched_waking(event_filter_pid_sched_wakeup_probe_post, tr);
590
585 list_for_each_entry(file, &tr->events, list) { 591 list_for_each_entry(file, &tr->events, list) {
586 clear_bit(EVENT_FILE_FL_PID_FILTER_BIT, &file->flags); 592 clear_bit(EVENT_FILE_FL_PID_FILTER_BIT, &file->flags);
587 } 593 }
@@ -1729,6 +1735,16 @@ ftrace_event_pid_write(struct file *filp, const char __user *ubuf,
1729 tr, INT_MAX); 1735 tr, INT_MAX);
1730 register_trace_prio_sched_wakeup(event_filter_pid_sched_wakeup_probe_post, 1736 register_trace_prio_sched_wakeup(event_filter_pid_sched_wakeup_probe_post,
1731 tr, 0); 1737 tr, 0);
1738
1739 register_trace_prio_sched_wakeup_new(event_filter_pid_sched_wakeup_probe_pre,
1740 tr, INT_MAX);
1741 register_trace_prio_sched_wakeup_new(event_filter_pid_sched_wakeup_probe_post,
1742 tr, 0);
1743
1744 register_trace_prio_sched_waking(event_filter_pid_sched_wakeup_probe_pre,
1745 tr, INT_MAX);
1746 register_trace_prio_sched_waking(event_filter_pid_sched_wakeup_probe_post,
1747 tr, 0);
1732 } 1748 }
1733 1749
1734 /* 1750 /*