diff options
author | Thomas Gleixner <tglx@linutronix.de> | 2016-01-12 05:01:12 -0500 |
---|---|---|
committer | Thomas Gleixner <tglx@linutronix.de> | 2016-01-12 05:01:12 -0500 |
commit | 1f16f116b01c110db20ab808562c8b8bc3ee3d6e (patch) | |
tree | 44db563f64cf5f8d62af8f99a61e2b248c44ea3a /kernel | |
parent | 03724ac3d48f8f0e3caf1d30fa134f8fd96c94e2 (diff) | |
parent | f9eccf24615672896dc13251410c3f2f33a14f95 (diff) |
Merge branches 'clockevents/4.4-fixes' and 'clockevents/4.5-fixes' of http://git.linaro.org/people/daniel.lezcano/linux into timers/urgent
Pull in fixes from Daniel Lezcano:
- Fix the vt8500 timer leading to a system lock up when dealing with too
small delta (Roman Volkov)
- Select the CLKSRC_MMIO when the fsl_ftm_timer is enabled with COMPILE_TEST
(Daniel Lezcano)
- Prevent to compile timers using the 'iomem' API when the architecture has
not HAS_IOMEM set (Richard Weinberger)
Diffstat (limited to 'kernel')
32 files changed, 363 insertions, 218 deletions
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index 3f4c99e06c6b..b0799bced518 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c | |||
@@ -28,11 +28,17 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr) | |||
28 | attr->value_size == 0) | 28 | attr->value_size == 0) |
29 | return ERR_PTR(-EINVAL); | 29 | return ERR_PTR(-EINVAL); |
30 | 30 | ||
31 | if (attr->value_size >= 1 << (KMALLOC_SHIFT_MAX - 1)) | ||
32 | /* if value_size is bigger, the user space won't be able to | ||
33 | * access the elements. | ||
34 | */ | ||
35 | return ERR_PTR(-E2BIG); | ||
36 | |||
31 | elem_size = round_up(attr->value_size, 8); | 37 | elem_size = round_up(attr->value_size, 8); |
32 | 38 | ||
33 | /* check round_up into zero and u32 overflow */ | 39 | /* check round_up into zero and u32 overflow */ |
34 | if (elem_size == 0 || | 40 | if (elem_size == 0 || |
35 | attr->max_entries > (U32_MAX - sizeof(*array)) / elem_size) | 41 | attr->max_entries > (U32_MAX - PAGE_SIZE - sizeof(*array)) / elem_size) |
36 | return ERR_PTR(-ENOMEM); | 42 | return ERR_PTR(-ENOMEM); |
37 | 43 | ||
38 | array_size = sizeof(*array) + attr->max_entries * elem_size; | 44 | array_size = sizeof(*array) + attr->max_entries * elem_size; |
@@ -105,7 +111,7 @@ static int array_map_update_elem(struct bpf_map *map, void *key, void *value, | |||
105 | /* all elements already exist */ | 111 | /* all elements already exist */ |
106 | return -EEXIST; | 112 | return -EEXIST; |
107 | 113 | ||
108 | memcpy(array->value + array->elem_size * index, value, array->elem_size); | 114 | memcpy(array->value + array->elem_size * index, value, map->value_size); |
109 | return 0; | 115 | return 0; |
110 | } | 116 | } |
111 | 117 | ||
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index 19909b22b4f8..34777b3746fa 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c | |||
@@ -64,12 +64,35 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr) | |||
64 | */ | 64 | */ |
65 | goto free_htab; | 65 | goto free_htab; |
66 | 66 | ||
67 | err = -ENOMEM; | 67 | if (htab->map.value_size >= (1 << (KMALLOC_SHIFT_MAX - 1)) - |
68 | MAX_BPF_STACK - sizeof(struct htab_elem)) | ||
69 | /* if value_size is bigger, the user space won't be able to | ||
70 | * access the elements via bpf syscall. This check also makes | ||
71 | * sure that the elem_size doesn't overflow and it's | ||
72 | * kmalloc-able later in htab_map_update_elem() | ||
73 | */ | ||
74 | goto free_htab; | ||
75 | |||
76 | htab->elem_size = sizeof(struct htab_elem) + | ||
77 | round_up(htab->map.key_size, 8) + | ||
78 | htab->map.value_size; | ||
79 | |||
68 | /* prevent zero size kmalloc and check for u32 overflow */ | 80 | /* prevent zero size kmalloc and check for u32 overflow */ |
69 | if (htab->n_buckets == 0 || | 81 | if (htab->n_buckets == 0 || |
70 | htab->n_buckets > U32_MAX / sizeof(struct hlist_head)) | 82 | htab->n_buckets > U32_MAX / sizeof(struct hlist_head)) |
71 | goto free_htab; | 83 | goto free_htab; |
72 | 84 | ||
85 | if ((u64) htab->n_buckets * sizeof(struct hlist_head) + | ||
86 | (u64) htab->elem_size * htab->map.max_entries >= | ||
87 | U32_MAX - PAGE_SIZE) | ||
88 | /* make sure page count doesn't overflow */ | ||
89 | goto free_htab; | ||
90 | |||
91 | htab->map.pages = round_up(htab->n_buckets * sizeof(struct hlist_head) + | ||
92 | htab->elem_size * htab->map.max_entries, | ||
93 | PAGE_SIZE) >> PAGE_SHIFT; | ||
94 | |||
95 | err = -ENOMEM; | ||
73 | htab->buckets = kmalloc_array(htab->n_buckets, sizeof(struct hlist_head), | 96 | htab->buckets = kmalloc_array(htab->n_buckets, sizeof(struct hlist_head), |
74 | GFP_USER | __GFP_NOWARN); | 97 | GFP_USER | __GFP_NOWARN); |
75 | 98 | ||
@@ -85,13 +108,6 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr) | |||
85 | raw_spin_lock_init(&htab->lock); | 108 | raw_spin_lock_init(&htab->lock); |
86 | htab->count = 0; | 109 | htab->count = 0; |
87 | 110 | ||
88 | htab->elem_size = sizeof(struct htab_elem) + | ||
89 | round_up(htab->map.key_size, 8) + | ||
90 | htab->map.value_size; | ||
91 | |||
92 | htab->map.pages = round_up(htab->n_buckets * sizeof(struct hlist_head) + | ||
93 | htab->elem_size * htab->map.max_entries, | ||
94 | PAGE_SIZE) >> PAGE_SHIFT; | ||
95 | return &htab->map; | 111 | return &htab->map; |
96 | 112 | ||
97 | free_htab: | 113 | free_htab: |
@@ -222,7 +238,7 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value, | |||
222 | WARN_ON_ONCE(!rcu_read_lock_held()); | 238 | WARN_ON_ONCE(!rcu_read_lock_held()); |
223 | 239 | ||
224 | /* allocate new element outside of lock */ | 240 | /* allocate new element outside of lock */ |
225 | l_new = kmalloc(htab->elem_size, GFP_ATOMIC); | 241 | l_new = kmalloc(htab->elem_size, GFP_ATOMIC | __GFP_NOWARN); |
226 | if (!l_new) | 242 | if (!l_new) |
227 | return -ENOMEM; | 243 | return -ENOMEM; |
228 | 244 | ||
diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c index be6d726e31c9..5a8a797d50b7 100644 --- a/kernel/bpf/inode.c +++ b/kernel/bpf/inode.c | |||
@@ -34,7 +34,7 @@ static void *bpf_any_get(void *raw, enum bpf_type type) | |||
34 | atomic_inc(&((struct bpf_prog *)raw)->aux->refcnt); | 34 | atomic_inc(&((struct bpf_prog *)raw)->aux->refcnt); |
35 | break; | 35 | break; |
36 | case BPF_TYPE_MAP: | 36 | case BPF_TYPE_MAP: |
37 | atomic_inc(&((struct bpf_map *)raw)->refcnt); | 37 | bpf_map_inc(raw, true); |
38 | break; | 38 | break; |
39 | default: | 39 | default: |
40 | WARN_ON_ONCE(1); | 40 | WARN_ON_ONCE(1); |
@@ -51,7 +51,7 @@ static void bpf_any_put(void *raw, enum bpf_type type) | |||
51 | bpf_prog_put(raw); | 51 | bpf_prog_put(raw); |
52 | break; | 52 | break; |
53 | case BPF_TYPE_MAP: | 53 | case BPF_TYPE_MAP: |
54 | bpf_map_put(raw); | 54 | bpf_map_put_with_uref(raw); |
55 | break; | 55 | break; |
56 | default: | 56 | default: |
57 | WARN_ON_ONCE(1); | 57 | WARN_ON_ONCE(1); |
@@ -64,7 +64,7 @@ static void *bpf_fd_probe_obj(u32 ufd, enum bpf_type *type) | |||
64 | void *raw; | 64 | void *raw; |
65 | 65 | ||
66 | *type = BPF_TYPE_MAP; | 66 | *type = BPF_TYPE_MAP; |
67 | raw = bpf_map_get(ufd); | 67 | raw = bpf_map_get_with_uref(ufd); |
68 | if (IS_ERR(raw)) { | 68 | if (IS_ERR(raw)) { |
69 | *type = BPF_TYPE_PROG; | 69 | *type = BPF_TYPE_PROG; |
70 | raw = bpf_prog_get(ufd); | 70 | raw = bpf_prog_get(ufd); |
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 0d3313d02a7e..3b39550d8485 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c | |||
@@ -82,6 +82,14 @@ static void bpf_map_free_deferred(struct work_struct *work) | |||
82 | map->ops->map_free(map); | 82 | map->ops->map_free(map); |
83 | } | 83 | } |
84 | 84 | ||
85 | static void bpf_map_put_uref(struct bpf_map *map) | ||
86 | { | ||
87 | if (atomic_dec_and_test(&map->usercnt)) { | ||
88 | if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY) | ||
89 | bpf_fd_array_map_clear(map); | ||
90 | } | ||
91 | } | ||
92 | |||
85 | /* decrement map refcnt and schedule it for freeing via workqueue | 93 | /* decrement map refcnt and schedule it for freeing via workqueue |
86 | * (unrelying map implementation ops->map_free() might sleep) | 94 | * (unrelying map implementation ops->map_free() might sleep) |
87 | */ | 95 | */ |
@@ -93,17 +101,15 @@ void bpf_map_put(struct bpf_map *map) | |||
93 | } | 101 | } |
94 | } | 102 | } |
95 | 103 | ||
96 | static int bpf_map_release(struct inode *inode, struct file *filp) | 104 | void bpf_map_put_with_uref(struct bpf_map *map) |
97 | { | 105 | { |
98 | struct bpf_map *map = filp->private_data; | 106 | bpf_map_put_uref(map); |
99 | |||
100 | if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY) | ||
101 | /* prog_array stores refcnt-ed bpf_prog pointers | ||
102 | * release them all when user space closes prog_array_fd | ||
103 | */ | ||
104 | bpf_fd_array_map_clear(map); | ||
105 | |||
106 | bpf_map_put(map); | 107 | bpf_map_put(map); |
108 | } | ||
109 | |||
110 | static int bpf_map_release(struct inode *inode, struct file *filp) | ||
111 | { | ||
112 | bpf_map_put_with_uref(filp->private_data); | ||
107 | return 0; | 113 | return 0; |
108 | } | 114 | } |
109 | 115 | ||
@@ -142,6 +148,7 @@ static int map_create(union bpf_attr *attr) | |||
142 | return PTR_ERR(map); | 148 | return PTR_ERR(map); |
143 | 149 | ||
144 | atomic_set(&map->refcnt, 1); | 150 | atomic_set(&map->refcnt, 1); |
151 | atomic_set(&map->usercnt, 1); | ||
145 | 152 | ||
146 | err = bpf_map_charge_memlock(map); | 153 | err = bpf_map_charge_memlock(map); |
147 | if (err) | 154 | if (err) |
@@ -174,7 +181,14 @@ struct bpf_map *__bpf_map_get(struct fd f) | |||
174 | return f.file->private_data; | 181 | return f.file->private_data; |
175 | } | 182 | } |
176 | 183 | ||
177 | struct bpf_map *bpf_map_get(u32 ufd) | 184 | void bpf_map_inc(struct bpf_map *map, bool uref) |
185 | { | ||
186 | atomic_inc(&map->refcnt); | ||
187 | if (uref) | ||
188 | atomic_inc(&map->usercnt); | ||
189 | } | ||
190 | |||
191 | struct bpf_map *bpf_map_get_with_uref(u32 ufd) | ||
178 | { | 192 | { |
179 | struct fd f = fdget(ufd); | 193 | struct fd f = fdget(ufd); |
180 | struct bpf_map *map; | 194 | struct bpf_map *map; |
@@ -183,7 +197,7 @@ struct bpf_map *bpf_map_get(u32 ufd) | |||
183 | if (IS_ERR(map)) | 197 | if (IS_ERR(map)) |
184 | return map; | 198 | return map; |
185 | 199 | ||
186 | atomic_inc(&map->refcnt); | 200 | bpf_map_inc(map, true); |
187 | fdput(f); | 201 | fdput(f); |
188 | 202 | ||
189 | return map; | 203 | return map; |
@@ -226,7 +240,7 @@ static int map_lookup_elem(union bpf_attr *attr) | |||
226 | goto free_key; | 240 | goto free_key; |
227 | 241 | ||
228 | err = -ENOMEM; | 242 | err = -ENOMEM; |
229 | value = kmalloc(map->value_size, GFP_USER); | 243 | value = kmalloc(map->value_size, GFP_USER | __GFP_NOWARN); |
230 | if (!value) | 244 | if (!value) |
231 | goto free_key; | 245 | goto free_key; |
232 | 246 | ||
@@ -285,7 +299,7 @@ static int map_update_elem(union bpf_attr *attr) | |||
285 | goto free_key; | 299 | goto free_key; |
286 | 300 | ||
287 | err = -ENOMEM; | 301 | err = -ENOMEM; |
288 | value = kmalloc(map->value_size, GFP_USER); | 302 | value = kmalloc(map->value_size, GFP_USER | __GFP_NOWARN); |
289 | if (!value) | 303 | if (!value) |
290 | goto free_key; | 304 | goto free_key; |
291 | 305 | ||
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index c6073056badf..a7945d10b378 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c | |||
@@ -2021,8 +2021,7 @@ static int replace_map_fd_with_map_ptr(struct verifier_env *env) | |||
2021 | * will be used by the valid program until it's unloaded | 2021 | * will be used by the valid program until it's unloaded |
2022 | * and all maps are released in free_bpf_prog_info() | 2022 | * and all maps are released in free_bpf_prog_info() |
2023 | */ | 2023 | */ |
2024 | atomic_inc(&map->refcnt); | 2024 | bpf_map_inc(map, false); |
2025 | |||
2026 | fdput(f); | 2025 | fdput(f); |
2027 | next_insn: | 2026 | next_insn: |
2028 | insn++; | 2027 | insn++; |
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index f1603c153890..470f6536b9e8 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
@@ -98,6 +98,12 @@ static DEFINE_SPINLOCK(css_set_lock); | |||
98 | static DEFINE_SPINLOCK(cgroup_idr_lock); | 98 | static DEFINE_SPINLOCK(cgroup_idr_lock); |
99 | 99 | ||
100 | /* | 100 | /* |
101 | * Protects cgroup_file->kn for !self csses. It synchronizes notifications | ||
102 | * against file removal/re-creation across css hiding. | ||
103 | */ | ||
104 | static DEFINE_SPINLOCK(cgroup_file_kn_lock); | ||
105 | |||
106 | /* | ||
101 | * Protects cgroup_subsys->release_agent_path. Modifying it also requires | 107 | * Protects cgroup_subsys->release_agent_path. Modifying it also requires |
102 | * cgroup_mutex. Reading requires either cgroup_mutex or this spinlock. | 108 | * cgroup_mutex. Reading requires either cgroup_mutex or this spinlock. |
103 | */ | 109 | */ |
@@ -754,9 +760,11 @@ static void put_css_set_locked(struct css_set *cset) | |||
754 | if (!atomic_dec_and_test(&cset->refcount)) | 760 | if (!atomic_dec_and_test(&cset->refcount)) |
755 | return; | 761 | return; |
756 | 762 | ||
757 | /* This css_set is dead. unlink it and release cgroup refcounts */ | 763 | /* This css_set is dead. unlink it and release cgroup and css refs */ |
758 | for_each_subsys(ss, ssid) | 764 | for_each_subsys(ss, ssid) { |
759 | list_del(&cset->e_cset_node[ssid]); | 765 | list_del(&cset->e_cset_node[ssid]); |
766 | css_put(cset->subsys[ssid]); | ||
767 | } | ||
760 | hash_del(&cset->hlist); | 768 | hash_del(&cset->hlist); |
761 | css_set_count--; | 769 | css_set_count--; |
762 | 770 | ||
@@ -1056,9 +1064,13 @@ static struct css_set *find_css_set(struct css_set *old_cset, | |||
1056 | key = css_set_hash(cset->subsys); | 1064 | key = css_set_hash(cset->subsys); |
1057 | hash_add(css_set_table, &cset->hlist, key); | 1065 | hash_add(css_set_table, &cset->hlist, key); |
1058 | 1066 | ||
1059 | for_each_subsys(ss, ssid) | 1067 | for_each_subsys(ss, ssid) { |
1068 | struct cgroup_subsys_state *css = cset->subsys[ssid]; | ||
1069 | |||
1060 | list_add_tail(&cset->e_cset_node[ssid], | 1070 | list_add_tail(&cset->e_cset_node[ssid], |
1061 | &cset->subsys[ssid]->cgroup->e_csets[ssid]); | 1071 | &css->cgroup->e_csets[ssid]); |
1072 | css_get(css); | ||
1073 | } | ||
1062 | 1074 | ||
1063 | spin_unlock_bh(&css_set_lock); | 1075 | spin_unlock_bh(&css_set_lock); |
1064 | 1076 | ||
@@ -1393,6 +1405,16 @@ static void cgroup_rm_file(struct cgroup *cgrp, const struct cftype *cft) | |||
1393 | char name[CGROUP_FILE_NAME_MAX]; | 1405 | char name[CGROUP_FILE_NAME_MAX]; |
1394 | 1406 | ||
1395 | lockdep_assert_held(&cgroup_mutex); | 1407 | lockdep_assert_held(&cgroup_mutex); |
1408 | |||
1409 | if (cft->file_offset) { | ||
1410 | struct cgroup_subsys_state *css = cgroup_css(cgrp, cft->ss); | ||
1411 | struct cgroup_file *cfile = (void *)css + cft->file_offset; | ||
1412 | |||
1413 | spin_lock_irq(&cgroup_file_kn_lock); | ||
1414 | cfile->kn = NULL; | ||
1415 | spin_unlock_irq(&cgroup_file_kn_lock); | ||
1416 | } | ||
1417 | |||
1396 | kernfs_remove_by_name(cgrp->kn, cgroup_file_name(cgrp, cft, name)); | 1418 | kernfs_remove_by_name(cgrp->kn, cgroup_file_name(cgrp, cft, name)); |
1397 | } | 1419 | } |
1398 | 1420 | ||
@@ -1856,7 +1878,6 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp) | |||
1856 | 1878 | ||
1857 | INIT_LIST_HEAD(&cgrp->self.sibling); | 1879 | INIT_LIST_HEAD(&cgrp->self.sibling); |
1858 | INIT_LIST_HEAD(&cgrp->self.children); | 1880 | INIT_LIST_HEAD(&cgrp->self.children); |
1859 | INIT_LIST_HEAD(&cgrp->self.files); | ||
1860 | INIT_LIST_HEAD(&cgrp->cset_links); | 1881 | INIT_LIST_HEAD(&cgrp->cset_links); |
1861 | INIT_LIST_HEAD(&cgrp->pidlists); | 1882 | INIT_LIST_HEAD(&cgrp->pidlists); |
1862 | mutex_init(&cgrp->pidlist_mutex); | 1883 | mutex_init(&cgrp->pidlist_mutex); |
@@ -2216,6 +2237,9 @@ struct cgroup_taskset { | |||
2216 | struct list_head src_csets; | 2237 | struct list_head src_csets; |
2217 | struct list_head dst_csets; | 2238 | struct list_head dst_csets; |
2218 | 2239 | ||
2240 | /* the subsys currently being processed */ | ||
2241 | int ssid; | ||
2242 | |||
2219 | /* | 2243 | /* |
2220 | * Fields for cgroup_taskset_*() iteration. | 2244 | * Fields for cgroup_taskset_*() iteration. |
2221 | * | 2245 | * |
@@ -2278,25 +2302,29 @@ static void cgroup_taskset_add(struct task_struct *task, | |||
2278 | /** | 2302 | /** |
2279 | * cgroup_taskset_first - reset taskset and return the first task | 2303 | * cgroup_taskset_first - reset taskset and return the first task |
2280 | * @tset: taskset of interest | 2304 | * @tset: taskset of interest |
2305 | * @dst_cssp: output variable for the destination css | ||
2281 | * | 2306 | * |
2282 | * @tset iteration is initialized and the first task is returned. | 2307 | * @tset iteration is initialized and the first task is returned. |
2283 | */ | 2308 | */ |
2284 | struct task_struct *cgroup_taskset_first(struct cgroup_taskset *tset) | 2309 | struct task_struct *cgroup_taskset_first(struct cgroup_taskset *tset, |
2310 | struct cgroup_subsys_state **dst_cssp) | ||
2285 | { | 2311 | { |
2286 | tset->cur_cset = list_first_entry(tset->csets, struct css_set, mg_node); | 2312 | tset->cur_cset = list_first_entry(tset->csets, struct css_set, mg_node); |
2287 | tset->cur_task = NULL; | 2313 | tset->cur_task = NULL; |
2288 | 2314 | ||
2289 | return cgroup_taskset_next(tset); | 2315 | return cgroup_taskset_next(tset, dst_cssp); |
2290 | } | 2316 | } |
2291 | 2317 | ||
2292 | /** | 2318 | /** |
2293 | * cgroup_taskset_next - iterate to the next task in taskset | 2319 | * cgroup_taskset_next - iterate to the next task in taskset |
2294 | * @tset: taskset of interest | 2320 | * @tset: taskset of interest |
2321 | * @dst_cssp: output variable for the destination css | ||
2295 | * | 2322 | * |
2296 | * Return the next task in @tset. Iteration must have been initialized | 2323 | * Return the next task in @tset. Iteration must have been initialized |
2297 | * with cgroup_taskset_first(). | 2324 | * with cgroup_taskset_first(). |
2298 | */ | 2325 | */ |
2299 | struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset) | 2326 | struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset, |
2327 | struct cgroup_subsys_state **dst_cssp) | ||
2300 | { | 2328 | { |
2301 | struct css_set *cset = tset->cur_cset; | 2329 | struct css_set *cset = tset->cur_cset; |
2302 | struct task_struct *task = tset->cur_task; | 2330 | struct task_struct *task = tset->cur_task; |
@@ -2311,6 +2339,18 @@ struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset) | |||
2311 | if (&task->cg_list != &cset->mg_tasks) { | 2339 | if (&task->cg_list != &cset->mg_tasks) { |
2312 | tset->cur_cset = cset; | 2340 | tset->cur_cset = cset; |
2313 | tset->cur_task = task; | 2341 | tset->cur_task = task; |
2342 | |||
2343 | /* | ||
2344 | * This function may be called both before and | ||
2345 | * after cgroup_taskset_migrate(). The two cases | ||
2346 | * can be distinguished by looking at whether @cset | ||
2347 | * has its ->mg_dst_cset set. | ||
2348 | */ | ||
2349 | if (cset->mg_dst_cset) | ||
2350 | *dst_cssp = cset->mg_dst_cset->subsys[tset->ssid]; | ||
2351 | else | ||
2352 | *dst_cssp = cset->subsys[tset->ssid]; | ||
2353 | |||
2314 | return task; | 2354 | return task; |
2315 | } | 2355 | } |
2316 | 2356 | ||
@@ -2346,7 +2386,8 @@ static int cgroup_taskset_migrate(struct cgroup_taskset *tset, | |||
2346 | /* check that we can legitimately attach to the cgroup */ | 2386 | /* check that we can legitimately attach to the cgroup */ |
2347 | for_each_e_css(css, i, dst_cgrp) { | 2387 | for_each_e_css(css, i, dst_cgrp) { |
2348 | if (css->ss->can_attach) { | 2388 | if (css->ss->can_attach) { |
2349 | ret = css->ss->can_attach(css, tset); | 2389 | tset->ssid = i; |
2390 | ret = css->ss->can_attach(tset); | ||
2350 | if (ret) { | 2391 | if (ret) { |
2351 | failed_css = css; | 2392 | failed_css = css; |
2352 | goto out_cancel_attach; | 2393 | goto out_cancel_attach; |
@@ -2379,9 +2420,12 @@ static int cgroup_taskset_migrate(struct cgroup_taskset *tset, | |||
2379 | */ | 2420 | */ |
2380 | tset->csets = &tset->dst_csets; | 2421 | tset->csets = &tset->dst_csets; |
2381 | 2422 | ||
2382 | for_each_e_css(css, i, dst_cgrp) | 2423 | for_each_e_css(css, i, dst_cgrp) { |
2383 | if (css->ss->attach) | 2424 | if (css->ss->attach) { |
2384 | css->ss->attach(css, tset); | 2425 | tset->ssid = i; |
2426 | css->ss->attach(tset); | ||
2427 | } | ||
2428 | } | ||
2385 | 2429 | ||
2386 | ret = 0; | 2430 | ret = 0; |
2387 | goto out_release_tset; | 2431 | goto out_release_tset; |
@@ -2390,8 +2434,10 @@ out_cancel_attach: | |||
2390 | for_each_e_css(css, i, dst_cgrp) { | 2434 | for_each_e_css(css, i, dst_cgrp) { |
2391 | if (css == failed_css) | 2435 | if (css == failed_css) |
2392 | break; | 2436 | break; |
2393 | if (css->ss->cancel_attach) | 2437 | if (css->ss->cancel_attach) { |
2394 | css->ss->cancel_attach(css, tset); | 2438 | tset->ssid = i; |
2439 | css->ss->cancel_attach(tset); | ||
2440 | } | ||
2395 | } | 2441 | } |
2396 | out_release_tset: | 2442 | out_release_tset: |
2397 | spin_lock_bh(&css_set_lock); | 2443 | spin_lock_bh(&css_set_lock); |
@@ -3313,9 +3359,9 @@ static int cgroup_add_file(struct cgroup_subsys_state *css, struct cgroup *cgrp, | |||
3313 | if (cft->file_offset) { | 3359 | if (cft->file_offset) { |
3314 | struct cgroup_file *cfile = (void *)css + cft->file_offset; | 3360 | struct cgroup_file *cfile = (void *)css + cft->file_offset; |
3315 | 3361 | ||
3316 | kernfs_get(kn); | 3362 | spin_lock_irq(&cgroup_file_kn_lock); |
3317 | cfile->kn = kn; | 3363 | cfile->kn = kn; |
3318 | list_add(&cfile->node, &css->files); | 3364 | spin_unlock_irq(&cgroup_file_kn_lock); |
3319 | } | 3365 | } |
3320 | 3366 | ||
3321 | return 0; | 3367 | return 0; |
@@ -3553,6 +3599,22 @@ int cgroup_add_legacy_cftypes(struct cgroup_subsys *ss, struct cftype *cfts) | |||
3553 | } | 3599 | } |
3554 | 3600 | ||
3555 | /** | 3601 | /** |
3602 | * cgroup_file_notify - generate a file modified event for a cgroup_file | ||
3603 | * @cfile: target cgroup_file | ||
3604 | * | ||
3605 | * @cfile must have been obtained by setting cftype->file_offset. | ||
3606 | */ | ||
3607 | void cgroup_file_notify(struct cgroup_file *cfile) | ||
3608 | { | ||
3609 | unsigned long flags; | ||
3610 | |||
3611 | spin_lock_irqsave(&cgroup_file_kn_lock, flags); | ||
3612 | if (cfile->kn) | ||
3613 | kernfs_notify(cfile->kn); | ||
3614 | spin_unlock_irqrestore(&cgroup_file_kn_lock, flags); | ||
3615 | } | ||
3616 | |||
3617 | /** | ||
3556 | * cgroup_task_count - count the number of tasks in a cgroup. | 3618 | * cgroup_task_count - count the number of tasks in a cgroup. |
3557 | * @cgrp: the cgroup in question | 3619 | * @cgrp: the cgroup in question |
3558 | * | 3620 | * |
@@ -4613,13 +4675,9 @@ static void css_free_work_fn(struct work_struct *work) | |||
4613 | container_of(work, struct cgroup_subsys_state, destroy_work); | 4675 | container_of(work, struct cgroup_subsys_state, destroy_work); |
4614 | struct cgroup_subsys *ss = css->ss; | 4676 | struct cgroup_subsys *ss = css->ss; |
4615 | struct cgroup *cgrp = css->cgroup; | 4677 | struct cgroup *cgrp = css->cgroup; |
4616 | struct cgroup_file *cfile; | ||
4617 | 4678 | ||
4618 | percpu_ref_exit(&css->refcnt); | 4679 | percpu_ref_exit(&css->refcnt); |
4619 | 4680 | ||
4620 | list_for_each_entry(cfile, &css->files, node) | ||
4621 | kernfs_put(cfile->kn); | ||
4622 | |||
4623 | if (ss) { | 4681 | if (ss) { |
4624 | /* css free path */ | 4682 | /* css free path */ |
4625 | int id = css->id; | 4683 | int id = css->id; |
@@ -4724,7 +4782,6 @@ static void init_and_link_css(struct cgroup_subsys_state *css, | |||
4724 | css->ss = ss; | 4782 | css->ss = ss; |
4725 | INIT_LIST_HEAD(&css->sibling); | 4783 | INIT_LIST_HEAD(&css->sibling); |
4726 | INIT_LIST_HEAD(&css->children); | 4784 | INIT_LIST_HEAD(&css->children); |
4727 | INIT_LIST_HEAD(&css->files); | ||
4728 | css->serial_nr = css_serial_nr_next++; | 4785 | css->serial_nr = css_serial_nr_next++; |
4729 | 4786 | ||
4730 | if (cgroup_parent(cgrp)) { | 4787 | if (cgroup_parent(cgrp)) { |
diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c index f1b30ad5dc6d..2d3df82c54f2 100644 --- a/kernel/cgroup_freezer.c +++ b/kernel/cgroup_freezer.c | |||
@@ -155,12 +155,10 @@ static void freezer_css_free(struct cgroup_subsys_state *css) | |||
155 | * @freezer->lock. freezer_attach() makes the new tasks conform to the | 155 | * @freezer->lock. freezer_attach() makes the new tasks conform to the |
156 | * current state and all following state changes can see the new tasks. | 156 | * current state and all following state changes can see the new tasks. |
157 | */ | 157 | */ |
158 | static void freezer_attach(struct cgroup_subsys_state *new_css, | 158 | static void freezer_attach(struct cgroup_taskset *tset) |
159 | struct cgroup_taskset *tset) | ||
160 | { | 159 | { |
161 | struct freezer *freezer = css_freezer(new_css); | ||
162 | struct task_struct *task; | 160 | struct task_struct *task; |
163 | bool clear_frozen = false; | 161 | struct cgroup_subsys_state *new_css; |
164 | 162 | ||
165 | mutex_lock(&freezer_mutex); | 163 | mutex_lock(&freezer_mutex); |
166 | 164 | ||
@@ -174,22 +172,21 @@ static void freezer_attach(struct cgroup_subsys_state *new_css, | |||
174 | * current state before executing the following - !frozen tasks may | 172 | * current state before executing the following - !frozen tasks may |
175 | * be visible in a FROZEN cgroup and frozen tasks in a THAWED one. | 173 | * be visible in a FROZEN cgroup and frozen tasks in a THAWED one. |
176 | */ | 174 | */ |
177 | cgroup_taskset_for_each(task, tset) { | 175 | cgroup_taskset_for_each(task, new_css, tset) { |
176 | struct freezer *freezer = css_freezer(new_css); | ||
177 | |||
178 | if (!(freezer->state & CGROUP_FREEZING)) { | 178 | if (!(freezer->state & CGROUP_FREEZING)) { |
179 | __thaw_task(task); | 179 | __thaw_task(task); |
180 | } else { | 180 | } else { |
181 | freeze_task(task); | 181 | freeze_task(task); |
182 | freezer->state &= ~CGROUP_FROZEN; | 182 | /* clear FROZEN and propagate upwards */ |
183 | clear_frozen = true; | 183 | while (freezer && (freezer->state & CGROUP_FROZEN)) { |
184 | freezer->state &= ~CGROUP_FROZEN; | ||
185 | freezer = parent_freezer(freezer); | ||
186 | } | ||
184 | } | 187 | } |
185 | } | 188 | } |
186 | 189 | ||
187 | /* propagate FROZEN clearing upwards */ | ||
188 | while (clear_frozen && (freezer = parent_freezer(freezer))) { | ||
189 | freezer->state &= ~CGROUP_FROZEN; | ||
190 | clear_frozen = freezer->state & CGROUP_FREEZING; | ||
191 | } | ||
192 | |||
193 | mutex_unlock(&freezer_mutex); | 190 | mutex_unlock(&freezer_mutex); |
194 | } | 191 | } |
195 | 192 | ||
diff --git a/kernel/cgroup_pids.c b/kernel/cgroup_pids.c index cdd8df4e991c..b50d5a167fda 100644 --- a/kernel/cgroup_pids.c +++ b/kernel/cgroup_pids.c | |||
@@ -106,7 +106,7 @@ static void pids_uncharge(struct pids_cgroup *pids, int num) | |||
106 | { | 106 | { |
107 | struct pids_cgroup *p; | 107 | struct pids_cgroup *p; |
108 | 108 | ||
109 | for (p = pids; p; p = parent_pids(p)) | 109 | for (p = pids; parent_pids(p); p = parent_pids(p)) |
110 | pids_cancel(p, num); | 110 | pids_cancel(p, num); |
111 | } | 111 | } |
112 | 112 | ||
@@ -123,7 +123,7 @@ static void pids_charge(struct pids_cgroup *pids, int num) | |||
123 | { | 123 | { |
124 | struct pids_cgroup *p; | 124 | struct pids_cgroup *p; |
125 | 125 | ||
126 | for (p = pids; p; p = parent_pids(p)) | 126 | for (p = pids; parent_pids(p); p = parent_pids(p)) |
127 | atomic64_add(num, &p->counter); | 127 | atomic64_add(num, &p->counter); |
128 | } | 128 | } |
129 | 129 | ||
@@ -140,7 +140,7 @@ static int pids_try_charge(struct pids_cgroup *pids, int num) | |||
140 | { | 140 | { |
141 | struct pids_cgroup *p, *q; | 141 | struct pids_cgroup *p, *q; |
142 | 142 | ||
143 | for (p = pids; p; p = parent_pids(p)) { | 143 | for (p = pids; parent_pids(p); p = parent_pids(p)) { |
144 | int64_t new = atomic64_add_return(num, &p->counter); | 144 | int64_t new = atomic64_add_return(num, &p->counter); |
145 | 145 | ||
146 | /* | 146 | /* |
@@ -162,13 +162,13 @@ revert: | |||
162 | return -EAGAIN; | 162 | return -EAGAIN; |
163 | } | 163 | } |
164 | 164 | ||
165 | static int pids_can_attach(struct cgroup_subsys_state *css, | 165 | static int pids_can_attach(struct cgroup_taskset *tset) |
166 | struct cgroup_taskset *tset) | ||
167 | { | 166 | { |
168 | struct pids_cgroup *pids = css_pids(css); | ||
169 | struct task_struct *task; | 167 | struct task_struct *task; |
168 | struct cgroup_subsys_state *dst_css; | ||
170 | 169 | ||
171 | cgroup_taskset_for_each(task, tset) { | 170 | cgroup_taskset_for_each(task, dst_css, tset) { |
171 | struct pids_cgroup *pids = css_pids(dst_css); | ||
172 | struct cgroup_subsys_state *old_css; | 172 | struct cgroup_subsys_state *old_css; |
173 | struct pids_cgroup *old_pids; | 173 | struct pids_cgroup *old_pids; |
174 | 174 | ||
@@ -187,13 +187,13 @@ static int pids_can_attach(struct cgroup_subsys_state *css, | |||
187 | return 0; | 187 | return 0; |
188 | } | 188 | } |
189 | 189 | ||
190 | static void pids_cancel_attach(struct cgroup_subsys_state *css, | 190 | static void pids_cancel_attach(struct cgroup_taskset *tset) |
191 | struct cgroup_taskset *tset) | ||
192 | { | 191 | { |
193 | struct pids_cgroup *pids = css_pids(css); | ||
194 | struct task_struct *task; | 192 | struct task_struct *task; |
193 | struct cgroup_subsys_state *dst_css; | ||
195 | 194 | ||
196 | cgroup_taskset_for_each(task, tset) { | 195 | cgroup_taskset_for_each(task, dst_css, tset) { |
196 | struct pids_cgroup *pids = css_pids(dst_css); | ||
197 | struct cgroup_subsys_state *old_css; | 197 | struct cgroup_subsys_state *old_css; |
198 | struct pids_cgroup *old_pids; | 198 | struct pids_cgroup *old_pids; |
199 | 199 | ||
@@ -205,65 +205,28 @@ static void pids_cancel_attach(struct cgroup_subsys_state *css, | |||
205 | } | 205 | } |
206 | } | 206 | } |
207 | 207 | ||
208 | /* | ||
209 | * task_css_check(true) in pids_can_fork() and pids_cancel_fork() relies | ||
210 | * on threadgroup_change_begin() held by the copy_process(). | ||
211 | */ | ||
208 | static int pids_can_fork(struct task_struct *task, void **priv_p) | 212 | static int pids_can_fork(struct task_struct *task, void **priv_p) |
209 | { | 213 | { |
210 | struct cgroup_subsys_state *css; | 214 | struct cgroup_subsys_state *css; |
211 | struct pids_cgroup *pids; | 215 | struct pids_cgroup *pids; |
212 | int err; | ||
213 | 216 | ||
214 | /* | 217 | css = task_css_check(current, pids_cgrp_id, true); |
215 | * Use the "current" task_css for the pids subsystem as the tentative | ||
216 | * css. It is possible we will charge the wrong hierarchy, in which | ||
217 | * case we will forcefully revert/reapply the charge on the right | ||
218 | * hierarchy after it is committed to the task proper. | ||
219 | */ | ||
220 | css = task_get_css(current, pids_cgrp_id); | ||
221 | pids = css_pids(css); | 218 | pids = css_pids(css); |
222 | 219 | return pids_try_charge(pids, 1); | |
223 | err = pids_try_charge(pids, 1); | ||
224 | if (err) | ||
225 | goto err_css_put; | ||
226 | |||
227 | *priv_p = css; | ||
228 | return 0; | ||
229 | |||
230 | err_css_put: | ||
231 | css_put(css); | ||
232 | return err; | ||
233 | } | 220 | } |
234 | 221 | ||
235 | static void pids_cancel_fork(struct task_struct *task, void *priv) | 222 | static void pids_cancel_fork(struct task_struct *task, void *priv) |
236 | { | 223 | { |
237 | struct cgroup_subsys_state *css = priv; | ||
238 | struct pids_cgroup *pids = css_pids(css); | ||
239 | |||
240 | pids_uncharge(pids, 1); | ||
241 | css_put(css); | ||
242 | } | ||
243 | |||
244 | static void pids_fork(struct task_struct *task, void *priv) | ||
245 | { | ||
246 | struct cgroup_subsys_state *css; | 224 | struct cgroup_subsys_state *css; |
247 | struct cgroup_subsys_state *old_css = priv; | ||
248 | struct pids_cgroup *pids; | 225 | struct pids_cgroup *pids; |
249 | struct pids_cgroup *old_pids = css_pids(old_css); | ||
250 | 226 | ||
251 | css = task_get_css(task, pids_cgrp_id); | 227 | css = task_css_check(current, pids_cgrp_id, true); |
252 | pids = css_pids(css); | 228 | pids = css_pids(css); |
253 | 229 | pids_uncharge(pids, 1); | |
254 | /* | ||
255 | * If the association has changed, we have to revert and reapply the | ||
256 | * charge/uncharge on the wrong hierarchy to the current one. Since | ||
257 | * the association can only change due to an organisation event, its | ||
258 | * okay for us to ignore the limit in this case. | ||
259 | */ | ||
260 | if (pids != old_pids) { | ||
261 | pids_uncharge(old_pids, 1); | ||
262 | pids_charge(pids, 1); | ||
263 | } | ||
264 | |||
265 | css_put(css); | ||
266 | css_put(old_css); | ||
267 | } | 230 | } |
268 | 231 | ||
269 | static void pids_free(struct task_struct *task) | 232 | static void pids_free(struct task_struct *task) |
@@ -335,6 +298,7 @@ static struct cftype pids_files[] = { | |||
335 | { | 298 | { |
336 | .name = "current", | 299 | .name = "current", |
337 | .read_s64 = pids_current_read, | 300 | .read_s64 = pids_current_read, |
301 | .flags = CFTYPE_NOT_ON_ROOT, | ||
338 | }, | 302 | }, |
339 | { } /* terminate */ | 303 | { } /* terminate */ |
340 | }; | 304 | }; |
@@ -346,7 +310,6 @@ struct cgroup_subsys pids_cgrp_subsys = { | |||
346 | .cancel_attach = pids_cancel_attach, | 310 | .cancel_attach = pids_cancel_attach, |
347 | .can_fork = pids_can_fork, | 311 | .can_fork = pids_can_fork, |
348 | .cancel_fork = pids_cancel_fork, | 312 | .cancel_fork = pids_cancel_fork, |
349 | .fork = pids_fork, | ||
350 | .free = pids_free, | 313 | .free = pids_free, |
351 | .legacy_cftypes = pids_files, | 314 | .legacy_cftypes = pids_files, |
352 | .dfl_cftypes = pids_files, | 315 | .dfl_cftypes = pids_files, |
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 10ae73611d80..02a8ea5c9963 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
@@ -1429,15 +1429,16 @@ static int fmeter_getrate(struct fmeter *fmp) | |||
1429 | static struct cpuset *cpuset_attach_old_cs; | 1429 | static struct cpuset *cpuset_attach_old_cs; |
1430 | 1430 | ||
1431 | /* Called by cgroups to determine if a cpuset is usable; cpuset_mutex held */ | 1431 | /* Called by cgroups to determine if a cpuset is usable; cpuset_mutex held */ |
1432 | static int cpuset_can_attach(struct cgroup_subsys_state *css, | 1432 | static int cpuset_can_attach(struct cgroup_taskset *tset) |
1433 | struct cgroup_taskset *tset) | ||
1434 | { | 1433 | { |
1435 | struct cpuset *cs = css_cs(css); | 1434 | struct cgroup_subsys_state *css; |
1435 | struct cpuset *cs; | ||
1436 | struct task_struct *task; | 1436 | struct task_struct *task; |
1437 | int ret; | 1437 | int ret; |
1438 | 1438 | ||
1439 | /* used later by cpuset_attach() */ | 1439 | /* used later by cpuset_attach() */ |
1440 | cpuset_attach_old_cs = task_cs(cgroup_taskset_first(tset)); | 1440 | cpuset_attach_old_cs = task_cs(cgroup_taskset_first(tset, &css)); |
1441 | cs = css_cs(css); | ||
1441 | 1442 | ||
1442 | mutex_lock(&cpuset_mutex); | 1443 | mutex_lock(&cpuset_mutex); |
1443 | 1444 | ||
@@ -1447,7 +1448,7 @@ static int cpuset_can_attach(struct cgroup_subsys_state *css, | |||
1447 | (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed))) | 1448 | (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed))) |
1448 | goto out_unlock; | 1449 | goto out_unlock; |
1449 | 1450 | ||
1450 | cgroup_taskset_for_each(task, tset) { | 1451 | cgroup_taskset_for_each(task, css, tset) { |
1451 | ret = task_can_attach(task, cs->cpus_allowed); | 1452 | ret = task_can_attach(task, cs->cpus_allowed); |
1452 | if (ret) | 1453 | if (ret) |
1453 | goto out_unlock; | 1454 | goto out_unlock; |
@@ -1467,9 +1468,14 @@ out_unlock: | |||
1467 | return ret; | 1468 | return ret; |
1468 | } | 1469 | } |
1469 | 1470 | ||
1470 | static void cpuset_cancel_attach(struct cgroup_subsys_state *css, | 1471 | static void cpuset_cancel_attach(struct cgroup_taskset *tset) |
1471 | struct cgroup_taskset *tset) | ||
1472 | { | 1472 | { |
1473 | struct cgroup_subsys_state *css; | ||
1474 | struct cpuset *cs; | ||
1475 | |||
1476 | cgroup_taskset_first(tset, &css); | ||
1477 | cs = css_cs(css); | ||
1478 | |||
1473 | mutex_lock(&cpuset_mutex); | 1479 | mutex_lock(&cpuset_mutex); |
1474 | css_cs(css)->attach_in_progress--; | 1480 | css_cs(css)->attach_in_progress--; |
1475 | mutex_unlock(&cpuset_mutex); | 1481 | mutex_unlock(&cpuset_mutex); |
@@ -1482,16 +1488,19 @@ static void cpuset_cancel_attach(struct cgroup_subsys_state *css, | |||
1482 | */ | 1488 | */ |
1483 | static cpumask_var_t cpus_attach; | 1489 | static cpumask_var_t cpus_attach; |
1484 | 1490 | ||
1485 | static void cpuset_attach(struct cgroup_subsys_state *css, | 1491 | static void cpuset_attach(struct cgroup_taskset *tset) |
1486 | struct cgroup_taskset *tset) | ||
1487 | { | 1492 | { |
1488 | /* static buf protected by cpuset_mutex */ | 1493 | /* static buf protected by cpuset_mutex */ |
1489 | static nodemask_t cpuset_attach_nodemask_to; | 1494 | static nodemask_t cpuset_attach_nodemask_to; |
1490 | struct task_struct *task; | 1495 | struct task_struct *task; |
1491 | struct task_struct *leader; | 1496 | struct task_struct *leader; |
1492 | struct cpuset *cs = css_cs(css); | 1497 | struct cgroup_subsys_state *css; |
1498 | struct cpuset *cs; | ||
1493 | struct cpuset *oldcs = cpuset_attach_old_cs; | 1499 | struct cpuset *oldcs = cpuset_attach_old_cs; |
1494 | 1500 | ||
1501 | cgroup_taskset_first(tset, &css); | ||
1502 | cs = css_cs(css); | ||
1503 | |||
1495 | mutex_lock(&cpuset_mutex); | 1504 | mutex_lock(&cpuset_mutex); |
1496 | 1505 | ||
1497 | /* prepare for attach */ | 1506 | /* prepare for attach */ |
@@ -1502,7 +1511,7 @@ static void cpuset_attach(struct cgroup_subsys_state *css, | |||
1502 | 1511 | ||
1503 | guarantee_online_mems(cs, &cpuset_attach_nodemask_to); | 1512 | guarantee_online_mems(cs, &cpuset_attach_nodemask_to); |
1504 | 1513 | ||
1505 | cgroup_taskset_for_each(task, tset) { | 1514 | cgroup_taskset_for_each(task, css, tset) { |
1506 | /* | 1515 | /* |
1507 | * can_attach beforehand should guarantee that this doesn't | 1516 | * can_attach beforehand should guarantee that this doesn't |
1508 | * fail. TODO: have a better way to handle failure here | 1517 | * fail. TODO: have a better way to handle failure here |
@@ -1518,7 +1527,7 @@ static void cpuset_attach(struct cgroup_subsys_state *css, | |||
1518 | * sleep and should be moved outside migration path proper. | 1527 | * sleep and should be moved outside migration path proper. |
1519 | */ | 1528 | */ |
1520 | cpuset_attach_nodemask_to = cs->effective_mems; | 1529 | cpuset_attach_nodemask_to = cs->effective_mems; |
1521 | cgroup_taskset_for_each_leader(leader, tset) { | 1530 | cgroup_taskset_for_each_leader(leader, css, tset) { |
1522 | struct mm_struct *mm = get_task_mm(leader); | 1531 | struct mm_struct *mm = get_task_mm(leader); |
1523 | 1532 | ||
1524 | if (mm) { | 1533 | if (mm) { |
diff --git a/kernel/events/callchain.c b/kernel/events/callchain.c index d659487254d5..9c418002b8c1 100644 --- a/kernel/events/callchain.c +++ b/kernel/events/callchain.c | |||
@@ -3,7 +3,7 @@ | |||
3 | * | 3 | * |
4 | * Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de> | 4 | * Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de> |
5 | * Copyright (C) 2008-2011 Red Hat, Inc., Ingo Molnar | 5 | * Copyright (C) 2008-2011 Red Hat, Inc., Ingo Molnar |
6 | * Copyright (C) 2008-2011 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> | 6 | * Copyright (C) 2008-2011 Red Hat, Inc., Peter Zijlstra |
7 | * Copyright © 2009 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com> | 7 | * Copyright © 2009 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com> |
8 | * | 8 | * |
9 | * For licensing details see kernel-base/COPYING | 9 | * For licensing details see kernel-base/COPYING |
diff --git a/kernel/events/core.c b/kernel/events/core.c index 36babfd20648..ef2d6ea10736 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c | |||
@@ -3,7 +3,7 @@ | |||
3 | * | 3 | * |
4 | * Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de> | 4 | * Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de> |
5 | * Copyright (C) 2008-2011 Red Hat, Inc., Ingo Molnar | 5 | * Copyright (C) 2008-2011 Red Hat, Inc., Ingo Molnar |
6 | * Copyright (C) 2008-2011 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> | 6 | * Copyright (C) 2008-2011 Red Hat, Inc., Peter Zijlstra |
7 | * Copyright © 2009 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com> | 7 | * Copyright © 2009 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com> |
8 | * | 8 | * |
9 | * For licensing details see kernel-base/COPYING | 9 | * For licensing details see kernel-base/COPYING |
@@ -435,7 +435,7 @@ static inline void update_cgrp_time_from_event(struct perf_event *event) | |||
435 | if (!is_cgroup_event(event)) | 435 | if (!is_cgroup_event(event)) |
436 | return; | 436 | return; |
437 | 437 | ||
438 | cgrp = perf_cgroup_from_task(current); | 438 | cgrp = perf_cgroup_from_task(current, event->ctx); |
439 | /* | 439 | /* |
440 | * Do not update time when cgroup is not active | 440 | * Do not update time when cgroup is not active |
441 | */ | 441 | */ |
@@ -458,7 +458,7 @@ perf_cgroup_set_timestamp(struct task_struct *task, | |||
458 | if (!task || !ctx->nr_cgroups) | 458 | if (!task || !ctx->nr_cgroups) |
459 | return; | 459 | return; |
460 | 460 | ||
461 | cgrp = perf_cgroup_from_task(task); | 461 | cgrp = perf_cgroup_from_task(task, ctx); |
462 | info = this_cpu_ptr(cgrp->info); | 462 | info = this_cpu_ptr(cgrp->info); |
463 | info->timestamp = ctx->timestamp; | 463 | info->timestamp = ctx->timestamp; |
464 | } | 464 | } |
@@ -489,7 +489,6 @@ static void perf_cgroup_switch(struct task_struct *task, int mode) | |||
489 | * we reschedule only in the presence of cgroup | 489 | * we reschedule only in the presence of cgroup |
490 | * constrained events. | 490 | * constrained events. |
491 | */ | 491 | */ |
492 | rcu_read_lock(); | ||
493 | 492 | ||
494 | list_for_each_entry_rcu(pmu, &pmus, entry) { | 493 | list_for_each_entry_rcu(pmu, &pmus, entry) { |
495 | cpuctx = this_cpu_ptr(pmu->pmu_cpu_context); | 494 | cpuctx = this_cpu_ptr(pmu->pmu_cpu_context); |
@@ -522,8 +521,10 @@ static void perf_cgroup_switch(struct task_struct *task, int mode) | |||
522 | * set cgrp before ctxsw in to allow | 521 | * set cgrp before ctxsw in to allow |
523 | * event_filter_match() to not have to pass | 522 | * event_filter_match() to not have to pass |
524 | * task around | 523 | * task around |
524 | * we pass the cpuctx->ctx to perf_cgroup_from_task() | ||
525 | * because cgorup events are only per-cpu | ||
525 | */ | 526 | */ |
526 | cpuctx->cgrp = perf_cgroup_from_task(task); | 527 | cpuctx->cgrp = perf_cgroup_from_task(task, &cpuctx->ctx); |
527 | cpu_ctx_sched_in(cpuctx, EVENT_ALL, task); | 528 | cpu_ctx_sched_in(cpuctx, EVENT_ALL, task); |
528 | } | 529 | } |
529 | perf_pmu_enable(cpuctx->ctx.pmu); | 530 | perf_pmu_enable(cpuctx->ctx.pmu); |
@@ -531,8 +532,6 @@ static void perf_cgroup_switch(struct task_struct *task, int mode) | |||
531 | } | 532 | } |
532 | } | 533 | } |
533 | 534 | ||
534 | rcu_read_unlock(); | ||
535 | |||
536 | local_irq_restore(flags); | 535 | local_irq_restore(flags); |
537 | } | 536 | } |
538 | 537 | ||
@@ -542,17 +541,20 @@ static inline void perf_cgroup_sched_out(struct task_struct *task, | |||
542 | struct perf_cgroup *cgrp1; | 541 | struct perf_cgroup *cgrp1; |
543 | struct perf_cgroup *cgrp2 = NULL; | 542 | struct perf_cgroup *cgrp2 = NULL; |
544 | 543 | ||
544 | rcu_read_lock(); | ||
545 | /* | 545 | /* |
546 | * we come here when we know perf_cgroup_events > 0 | 546 | * we come here when we know perf_cgroup_events > 0 |
547 | * we do not need to pass the ctx here because we know | ||
548 | * we are holding the rcu lock | ||
547 | */ | 549 | */ |
548 | cgrp1 = perf_cgroup_from_task(task); | 550 | cgrp1 = perf_cgroup_from_task(task, NULL); |
549 | 551 | ||
550 | /* | 552 | /* |
551 | * next is NULL when called from perf_event_enable_on_exec() | 553 | * next is NULL when called from perf_event_enable_on_exec() |
552 | * that will systematically cause a cgroup_switch() | 554 | * that will systematically cause a cgroup_switch() |
553 | */ | 555 | */ |
554 | if (next) | 556 | if (next) |
555 | cgrp2 = perf_cgroup_from_task(next); | 557 | cgrp2 = perf_cgroup_from_task(next, NULL); |
556 | 558 | ||
557 | /* | 559 | /* |
558 | * only schedule out current cgroup events if we know | 560 | * only schedule out current cgroup events if we know |
@@ -561,6 +563,8 @@ static inline void perf_cgroup_sched_out(struct task_struct *task, | |||
561 | */ | 563 | */ |
562 | if (cgrp1 != cgrp2) | 564 | if (cgrp1 != cgrp2) |
563 | perf_cgroup_switch(task, PERF_CGROUP_SWOUT); | 565 | perf_cgroup_switch(task, PERF_CGROUP_SWOUT); |
566 | |||
567 | rcu_read_unlock(); | ||
564 | } | 568 | } |
565 | 569 | ||
566 | static inline void perf_cgroup_sched_in(struct task_struct *prev, | 570 | static inline void perf_cgroup_sched_in(struct task_struct *prev, |
@@ -569,13 +573,16 @@ static inline void perf_cgroup_sched_in(struct task_struct *prev, | |||
569 | struct perf_cgroup *cgrp1; | 573 | struct perf_cgroup *cgrp1; |
570 | struct perf_cgroup *cgrp2 = NULL; | 574 | struct perf_cgroup *cgrp2 = NULL; |
571 | 575 | ||
576 | rcu_read_lock(); | ||
572 | /* | 577 | /* |
573 | * we come here when we know perf_cgroup_events > 0 | 578 | * we come here when we know perf_cgroup_events > 0 |
579 | * we do not need to pass the ctx here because we know | ||
580 | * we are holding the rcu lock | ||
574 | */ | 581 | */ |
575 | cgrp1 = perf_cgroup_from_task(task); | 582 | cgrp1 = perf_cgroup_from_task(task, NULL); |
576 | 583 | ||
577 | /* prev can never be NULL */ | 584 | /* prev can never be NULL */ |
578 | cgrp2 = perf_cgroup_from_task(prev); | 585 | cgrp2 = perf_cgroup_from_task(prev, NULL); |
579 | 586 | ||
580 | /* | 587 | /* |
581 | * only need to schedule in cgroup events if we are changing | 588 | * only need to schedule in cgroup events if we are changing |
@@ -584,6 +591,8 @@ static inline void perf_cgroup_sched_in(struct task_struct *prev, | |||
584 | */ | 591 | */ |
585 | if (cgrp1 != cgrp2) | 592 | if (cgrp1 != cgrp2) |
586 | perf_cgroup_switch(task, PERF_CGROUP_SWIN); | 593 | perf_cgroup_switch(task, PERF_CGROUP_SWIN); |
594 | |||
595 | rcu_read_unlock(); | ||
587 | } | 596 | } |
588 | 597 | ||
589 | static inline int perf_cgroup_connect(int fd, struct perf_event *event, | 598 | static inline int perf_cgroup_connect(int fd, struct perf_event *event, |
@@ -4216,7 +4225,14 @@ retry: | |||
4216 | goto retry; | 4225 | goto retry; |
4217 | } | 4226 | } |
4218 | 4227 | ||
4219 | __perf_event_period(&pe); | 4228 | if (event->attr.freq) { |
4229 | event->attr.sample_freq = value; | ||
4230 | } else { | ||
4231 | event->attr.sample_period = value; | ||
4232 | event->hw.sample_period = value; | ||
4233 | } | ||
4234 | |||
4235 | local64_set(&event->hw.period_left, 0); | ||
4220 | raw_spin_unlock_irq(&ctx->lock); | 4236 | raw_spin_unlock_irq(&ctx->lock); |
4221 | 4237 | ||
4222 | return 0; | 4238 | return 0; |
@@ -5667,6 +5683,17 @@ perf_event_aux_ctx(struct perf_event_context *ctx, | |||
5667 | } | 5683 | } |
5668 | 5684 | ||
5669 | static void | 5685 | static void |
5686 | perf_event_aux_task_ctx(perf_event_aux_output_cb output, void *data, | ||
5687 | struct perf_event_context *task_ctx) | ||
5688 | { | ||
5689 | rcu_read_lock(); | ||
5690 | preempt_disable(); | ||
5691 | perf_event_aux_ctx(task_ctx, output, data); | ||
5692 | preempt_enable(); | ||
5693 | rcu_read_unlock(); | ||
5694 | } | ||
5695 | |||
5696 | static void | ||
5670 | perf_event_aux(perf_event_aux_output_cb output, void *data, | 5697 | perf_event_aux(perf_event_aux_output_cb output, void *data, |
5671 | struct perf_event_context *task_ctx) | 5698 | struct perf_event_context *task_ctx) |
5672 | { | 5699 | { |
@@ -5675,14 +5702,23 @@ perf_event_aux(perf_event_aux_output_cb output, void *data, | |||
5675 | struct pmu *pmu; | 5702 | struct pmu *pmu; |
5676 | int ctxn; | 5703 | int ctxn; |
5677 | 5704 | ||
5705 | /* | ||
5706 | * If we have task_ctx != NULL we only notify | ||
5707 | * the task context itself. The task_ctx is set | ||
5708 | * only for EXIT events before releasing task | ||
5709 | * context. | ||
5710 | */ | ||
5711 | if (task_ctx) { | ||
5712 | perf_event_aux_task_ctx(output, data, task_ctx); | ||
5713 | return; | ||
5714 | } | ||
5715 | |||
5678 | rcu_read_lock(); | 5716 | rcu_read_lock(); |
5679 | list_for_each_entry_rcu(pmu, &pmus, entry) { | 5717 | list_for_each_entry_rcu(pmu, &pmus, entry) { |
5680 | cpuctx = get_cpu_ptr(pmu->pmu_cpu_context); | 5718 | cpuctx = get_cpu_ptr(pmu->pmu_cpu_context); |
5681 | if (cpuctx->unique_pmu != pmu) | 5719 | if (cpuctx->unique_pmu != pmu) |
5682 | goto next; | 5720 | goto next; |
5683 | perf_event_aux_ctx(&cpuctx->ctx, output, data); | 5721 | perf_event_aux_ctx(&cpuctx->ctx, output, data); |
5684 | if (task_ctx) | ||
5685 | goto next; | ||
5686 | ctxn = pmu->task_ctx_nr; | 5722 | ctxn = pmu->task_ctx_nr; |
5687 | if (ctxn < 0) | 5723 | if (ctxn < 0) |
5688 | goto next; | 5724 | goto next; |
@@ -5692,12 +5728,6 @@ perf_event_aux(perf_event_aux_output_cb output, void *data, | |||
5692 | next: | 5728 | next: |
5693 | put_cpu_ptr(pmu->pmu_cpu_context); | 5729 | put_cpu_ptr(pmu->pmu_cpu_context); |
5694 | } | 5730 | } |
5695 | |||
5696 | if (task_ctx) { | ||
5697 | preempt_disable(); | ||
5698 | perf_event_aux_ctx(task_ctx, output, data); | ||
5699 | preempt_enable(); | ||
5700 | } | ||
5701 | rcu_read_unlock(); | 5731 | rcu_read_unlock(); |
5702 | } | 5732 | } |
5703 | 5733 | ||
@@ -8787,10 +8817,8 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn) | |||
8787 | struct perf_event_context *child_ctx, *clone_ctx = NULL; | 8817 | struct perf_event_context *child_ctx, *clone_ctx = NULL; |
8788 | unsigned long flags; | 8818 | unsigned long flags; |
8789 | 8819 | ||
8790 | if (likely(!child->perf_event_ctxp[ctxn])) { | 8820 | if (likely(!child->perf_event_ctxp[ctxn])) |
8791 | perf_event_task(child, NULL, 0); | ||
8792 | return; | 8821 | return; |
8793 | } | ||
8794 | 8822 | ||
8795 | local_irq_save(flags); | 8823 | local_irq_save(flags); |
8796 | /* | 8824 | /* |
@@ -8874,6 +8902,14 @@ void perf_event_exit_task(struct task_struct *child) | |||
8874 | 8902 | ||
8875 | for_each_task_context_nr(ctxn) | 8903 | for_each_task_context_nr(ctxn) |
8876 | perf_event_exit_task_context(child, ctxn); | 8904 | perf_event_exit_task_context(child, ctxn); |
8905 | |||
8906 | /* | ||
8907 | * The perf_event_exit_task_context calls perf_event_task | ||
8908 | * with child's task_ctx, which generates EXIT events for | ||
8909 | * child contexts and sets child->perf_event_ctxp[] to NULL. | ||
8910 | * At this point we need to send EXIT events to cpu contexts. | ||
8911 | */ | ||
8912 | perf_event_task(child, NULL, 0); | ||
8877 | } | 8913 | } |
8878 | 8914 | ||
8879 | static void perf_free_event(struct perf_event *event, | 8915 | static void perf_free_event(struct perf_event *event, |
@@ -9452,16 +9488,18 @@ static void perf_cgroup_css_free(struct cgroup_subsys_state *css) | |||
9452 | static int __perf_cgroup_move(void *info) | 9488 | static int __perf_cgroup_move(void *info) |
9453 | { | 9489 | { |
9454 | struct task_struct *task = info; | 9490 | struct task_struct *task = info; |
9491 | rcu_read_lock(); | ||
9455 | perf_cgroup_switch(task, PERF_CGROUP_SWOUT | PERF_CGROUP_SWIN); | 9492 | perf_cgroup_switch(task, PERF_CGROUP_SWOUT | PERF_CGROUP_SWIN); |
9493 | rcu_read_unlock(); | ||
9456 | return 0; | 9494 | return 0; |
9457 | } | 9495 | } |
9458 | 9496 | ||
9459 | static void perf_cgroup_attach(struct cgroup_subsys_state *css, | 9497 | static void perf_cgroup_attach(struct cgroup_taskset *tset) |
9460 | struct cgroup_taskset *tset) | ||
9461 | { | 9498 | { |
9462 | struct task_struct *task; | 9499 | struct task_struct *task; |
9500 | struct cgroup_subsys_state *css; | ||
9463 | 9501 | ||
9464 | cgroup_taskset_for_each(task, tset) | 9502 | cgroup_taskset_for_each(task, css, tset) |
9465 | task_function_call(task, __perf_cgroup_move, task); | 9503 | task_function_call(task, __perf_cgroup_move, task); |
9466 | } | 9504 | } |
9467 | 9505 | ||
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index b5d1ea79c595..adfdc0536117 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c | |||
@@ -3,7 +3,7 @@ | |||
3 | * | 3 | * |
4 | * Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de> | 4 | * Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de> |
5 | * Copyright (C) 2008-2011 Red Hat, Inc., Ingo Molnar | 5 | * Copyright (C) 2008-2011 Red Hat, Inc., Ingo Molnar |
6 | * Copyright (C) 2008-2011 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> | 6 | * Copyright (C) 2008-2011 Red Hat, Inc., Peter Zijlstra |
7 | * Copyright © 2009 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com> | 7 | * Copyright © 2009 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com> |
8 | * | 8 | * |
9 | * For licensing details see kernel-base/COPYING | 9 | * For licensing details see kernel-base/COPYING |
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 4e5e9798aa0c..7dad84913abf 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c | |||
@@ -19,7 +19,7 @@ | |||
19 | * Authors: | 19 | * Authors: |
20 | * Srikar Dronamraju | 20 | * Srikar Dronamraju |
21 | * Jim Keniston | 21 | * Jim Keniston |
22 | * Copyright (C) 2011-2012 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> | 22 | * Copyright (C) 2011-2012 Red Hat, Inc., Peter Zijlstra |
23 | */ | 23 | */ |
24 | 24 | ||
25 | #include <linux/kernel.h> | 25 | #include <linux/kernel.h> |
diff --git a/kernel/fork.c b/kernel/fork.c index f97f2c449f5c..fce002ee3ddf 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -1368,8 +1368,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1368 | p->real_start_time = ktime_get_boot_ns(); | 1368 | p->real_start_time = ktime_get_boot_ns(); |
1369 | p->io_context = NULL; | 1369 | p->io_context = NULL; |
1370 | p->audit_context = NULL; | 1370 | p->audit_context = NULL; |
1371 | if (clone_flags & CLONE_THREAD) | 1371 | threadgroup_change_begin(current); |
1372 | threadgroup_change_begin(current); | ||
1373 | cgroup_fork(p); | 1372 | cgroup_fork(p); |
1374 | #ifdef CONFIG_NUMA | 1373 | #ifdef CONFIG_NUMA |
1375 | p->mempolicy = mpol_dup(p->mempolicy); | 1374 | p->mempolicy = mpol_dup(p->mempolicy); |
@@ -1610,8 +1609,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1610 | 1609 | ||
1611 | proc_fork_connector(p); | 1610 | proc_fork_connector(p); |
1612 | cgroup_post_fork(p, cgrp_ss_priv); | 1611 | cgroup_post_fork(p, cgrp_ss_priv); |
1613 | if (clone_flags & CLONE_THREAD) | 1612 | threadgroup_change_end(current); |
1614 | threadgroup_change_end(current); | ||
1615 | perf_event_fork(p); | 1613 | perf_event_fork(p); |
1616 | 1614 | ||
1617 | trace_task_newtask(p, clone_flags); | 1615 | trace_task_newtask(p, clone_flags); |
@@ -1652,8 +1650,7 @@ bad_fork_cleanup_policy: | |||
1652 | mpol_put(p->mempolicy); | 1650 | mpol_put(p->mempolicy); |
1653 | bad_fork_cleanup_threadgroup_lock: | 1651 | bad_fork_cleanup_threadgroup_lock: |
1654 | #endif | 1652 | #endif |
1655 | if (clone_flags & CLONE_THREAD) | 1653 | threadgroup_change_end(current); |
1656 | threadgroup_change_end(current); | ||
1657 | delayacct_tsk_free(p); | 1654 | delayacct_tsk_free(p); |
1658 | bad_fork_cleanup_count: | 1655 | bad_fork_cleanup_count: |
1659 | atomic_dec(&p->cred->user->processes); | 1656 | atomic_dec(&p->cred->user->processes); |
diff --git a/kernel/irq_work.c b/kernel/irq_work.c index cbf9fb899d92..bcf107ce0854 100644 --- a/kernel/irq_work.c +++ b/kernel/irq_work.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (C) 2010 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> | 2 | * Copyright (C) 2010 Red Hat, Inc., Peter Zijlstra |
3 | * | 3 | * |
4 | * Provides a framework for enqueueing and running callbacks from hardirq | 4 | * Provides a framework for enqueueing and running callbacks from hardirq |
5 | * context. The enqueueing is NMI-safe. | 5 | * context. The enqueueing is NMI-safe. |
diff --git a/kernel/jump_label.c b/kernel/jump_label.c index f7dd15d537f9..05254eeb4b4e 100644 --- a/kernel/jump_label.c +++ b/kernel/jump_label.c | |||
@@ -2,7 +2,7 @@ | |||
2 | * jump label support | 2 | * jump label support |
3 | * | 3 | * |
4 | * Copyright (C) 2009 Jason Baron <jbaron@redhat.com> | 4 | * Copyright (C) 2009 Jason Baron <jbaron@redhat.com> |
5 | * Copyright (C) 2011 Peter Zijlstra <pzijlstr@redhat.com> | 5 | * Copyright (C) 2011 Peter Zijlstra |
6 | * | 6 | * |
7 | */ | 7 | */ |
8 | #include <linux/memory.h> | 8 | #include <linux/memory.h> |
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index deae3907ac1e..60ace56618f6 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c | |||
@@ -6,7 +6,7 @@ | |||
6 | * Started by Ingo Molnar: | 6 | * Started by Ingo Molnar: |
7 | * | 7 | * |
8 | * Copyright (C) 2006,2007 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> | 8 | * Copyright (C) 2006,2007 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> |
9 | * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> | 9 | * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra |
10 | * | 10 | * |
11 | * this code maps all the lock dependencies as they occur in a live kernel | 11 | * this code maps all the lock dependencies as they occur in a live kernel |
12 | * and will warn about the following classes of locking bugs: | 12 | * and will warn about the following classes of locking bugs: |
diff --git a/kernel/locking/lockdep_proc.c b/kernel/locking/lockdep_proc.c index d83d798bef95..dbb61a302548 100644 --- a/kernel/locking/lockdep_proc.c +++ b/kernel/locking/lockdep_proc.c | |||
@@ -6,7 +6,7 @@ | |||
6 | * Started by Ingo Molnar: | 6 | * Started by Ingo Molnar: |
7 | * | 7 | * |
8 | * Copyright (C) 2006,2007 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> | 8 | * Copyright (C) 2006,2007 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> |
9 | * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> | 9 | * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra |
10 | * | 10 | * |
11 | * Code for /proc/lockdep and /proc/lockdep_stats: | 11 | * Code for /proc/lockdep and /proc/lockdep_stats: |
12 | * | 12 | * |
diff --git a/kernel/locking/osq_lock.c b/kernel/locking/osq_lock.c index d092a0c9c2d4..05a37857ab55 100644 --- a/kernel/locking/osq_lock.c +++ b/kernel/locking/osq_lock.c | |||
@@ -93,10 +93,12 @@ bool osq_lock(struct optimistic_spin_queue *lock) | |||
93 | node->cpu = curr; | 93 | node->cpu = curr; |
94 | 94 | ||
95 | /* | 95 | /* |
96 | * ACQUIRE semantics, pairs with corresponding RELEASE | 96 | * We need both ACQUIRE (pairs with corresponding RELEASE in |
97 | * in unlock() uncontended, or fastpath. | 97 | * unlock() uncontended, or fastpath) and RELEASE (to publish |
98 | * the node fields we just initialised) semantics when updating | ||
99 | * the lock tail. | ||
98 | */ | 100 | */ |
99 | old = atomic_xchg_acquire(&lock->tail, curr); | 101 | old = atomic_xchg(&lock->tail, curr); |
100 | if (old == OSQ_UNLOCKED_VAL) | 102 | if (old == OSQ_UNLOCKED_VAL) |
101 | return true; | 103 | return true; |
102 | 104 | ||
diff --git a/kernel/pid.c b/kernel/pid.c index ca368793808e..78b3d9f80d44 100644 --- a/kernel/pid.c +++ b/kernel/pid.c | |||
@@ -467,7 +467,7 @@ struct pid *get_task_pid(struct task_struct *task, enum pid_type type) | |||
467 | rcu_read_lock(); | 467 | rcu_read_lock(); |
468 | if (type != PIDTYPE_PID) | 468 | if (type != PIDTYPE_PID) |
469 | task = task->group_leader; | 469 | task = task->group_leader; |
470 | pid = get_pid(task->pids[type].pid); | 470 | pid = get_pid(rcu_dereference(task->pids[type].pid)); |
471 | rcu_read_unlock(); | 471 | rcu_read_unlock(); |
472 | return pid; | 472 | return pid; |
473 | } | 473 | } |
@@ -528,7 +528,7 @@ pid_t __task_pid_nr_ns(struct task_struct *task, enum pid_type type, | |||
528 | if (likely(pid_alive(task))) { | 528 | if (likely(pid_alive(task))) { |
529 | if (type != PIDTYPE_PID) | 529 | if (type != PIDTYPE_PID) |
530 | task = task->group_leader; | 530 | task = task->group_leader; |
531 | nr = pid_nr_ns(task->pids[type].pid, ns); | 531 | nr = pid_nr_ns(rcu_dereference(task->pids[type].pid), ns); |
532 | } | 532 | } |
533 | rcu_read_unlock(); | 533 | rcu_read_unlock(); |
534 | 534 | ||
diff --git a/kernel/sched/clock.c b/kernel/sched/clock.c index c0a205101c23..caf4041f5b0a 100644 --- a/kernel/sched/clock.c +++ b/kernel/sched/clock.c | |||
@@ -1,7 +1,7 @@ | |||
1 | /* | 1 | /* |
2 | * sched_clock for unstable cpu clocks | 2 | * sched_clock for unstable cpu clocks |
3 | * | 3 | * |
4 | * Copyright (C) 2008 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> | 4 | * Copyright (C) 2008 Red Hat, Inc., Peter Zijlstra |
5 | * | 5 | * |
6 | * Updates and enhancements: | 6 | * Updates and enhancements: |
7 | * Copyright (C) 2008 Red Hat, Inc. Steven Rostedt <srostedt@redhat.com> | 7 | * Copyright (C) 2008 Red Hat, Inc. Steven Rostedt <srostedt@redhat.com> |
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 4d568ac9319e..732e993b564b 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
@@ -1947,13 +1947,38 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) | |||
1947 | 1947 | ||
1948 | #ifdef CONFIG_SMP | 1948 | #ifdef CONFIG_SMP |
1949 | /* | 1949 | /* |
1950 | * Ensure we load p->on_cpu _after_ p->on_rq, otherwise it would be | ||
1951 | * possible to, falsely, observe p->on_cpu == 0. | ||
1952 | * | ||
1953 | * One must be running (->on_cpu == 1) in order to remove oneself | ||
1954 | * from the runqueue. | ||
1955 | * | ||
1956 | * [S] ->on_cpu = 1; [L] ->on_rq | ||
1957 | * UNLOCK rq->lock | ||
1958 | * RMB | ||
1959 | * LOCK rq->lock | ||
1960 | * [S] ->on_rq = 0; [L] ->on_cpu | ||
1961 | * | ||
1962 | * Pairs with the full barrier implied in the UNLOCK+LOCK on rq->lock | ||
1963 | * from the consecutive calls to schedule(); the first switching to our | ||
1964 | * task, the second putting it to sleep. | ||
1965 | */ | ||
1966 | smp_rmb(); | ||
1967 | |||
1968 | /* | ||
1950 | * If the owning (remote) cpu is still in the middle of schedule() with | 1969 | * If the owning (remote) cpu is still in the middle of schedule() with |
1951 | * this task as prev, wait until its done referencing the task. | 1970 | * this task as prev, wait until its done referencing the task. |
1952 | */ | 1971 | */ |
1953 | while (p->on_cpu) | 1972 | while (p->on_cpu) |
1954 | cpu_relax(); | 1973 | cpu_relax(); |
1955 | /* | 1974 | /* |
1956 | * Pairs with the smp_wmb() in finish_lock_switch(). | 1975 | * Combined with the control dependency above, we have an effective |
1976 | * smp_load_acquire() without the need for full barriers. | ||
1977 | * | ||
1978 | * Pairs with the smp_store_release() in finish_lock_switch(). | ||
1979 | * | ||
1980 | * This ensures that tasks getting woken will be fully ordered against | ||
1981 | * their previous state and preserve Program Order. | ||
1957 | */ | 1982 | */ |
1958 | smp_rmb(); | 1983 | smp_rmb(); |
1959 | 1984 | ||
@@ -2039,7 +2064,6 @@ out: | |||
2039 | */ | 2064 | */ |
2040 | int wake_up_process(struct task_struct *p) | 2065 | int wake_up_process(struct task_struct *p) |
2041 | { | 2066 | { |
2042 | WARN_ON(task_is_stopped_or_traced(p)); | ||
2043 | return try_to_wake_up(p, TASK_NORMAL, 0); | 2067 | return try_to_wake_up(p, TASK_NORMAL, 0); |
2044 | } | 2068 | } |
2045 | EXPORT_SYMBOL(wake_up_process); | 2069 | EXPORT_SYMBOL(wake_up_process); |
@@ -5847,13 +5871,13 @@ static int init_rootdomain(struct root_domain *rd) | |||
5847 | { | 5871 | { |
5848 | memset(rd, 0, sizeof(*rd)); | 5872 | memset(rd, 0, sizeof(*rd)); |
5849 | 5873 | ||
5850 | if (!alloc_cpumask_var(&rd->span, GFP_KERNEL)) | 5874 | if (!zalloc_cpumask_var(&rd->span, GFP_KERNEL)) |
5851 | goto out; | 5875 | goto out; |
5852 | if (!alloc_cpumask_var(&rd->online, GFP_KERNEL)) | 5876 | if (!zalloc_cpumask_var(&rd->online, GFP_KERNEL)) |
5853 | goto free_span; | 5877 | goto free_span; |
5854 | if (!alloc_cpumask_var(&rd->dlo_mask, GFP_KERNEL)) | 5878 | if (!zalloc_cpumask_var(&rd->dlo_mask, GFP_KERNEL)) |
5855 | goto free_online; | 5879 | goto free_online; |
5856 | if (!alloc_cpumask_var(&rd->rto_mask, GFP_KERNEL)) | 5880 | if (!zalloc_cpumask_var(&rd->rto_mask, GFP_KERNEL)) |
5857 | goto free_dlo_mask; | 5881 | goto free_dlo_mask; |
5858 | 5882 | ||
5859 | init_dl_bw(&rd->dl_bw); | 5883 | init_dl_bw(&rd->dl_bw); |
@@ -8217,12 +8241,12 @@ static void cpu_cgroup_fork(struct task_struct *task, void *private) | |||
8217 | sched_move_task(task); | 8241 | sched_move_task(task); |
8218 | } | 8242 | } |
8219 | 8243 | ||
8220 | static int cpu_cgroup_can_attach(struct cgroup_subsys_state *css, | 8244 | static int cpu_cgroup_can_attach(struct cgroup_taskset *tset) |
8221 | struct cgroup_taskset *tset) | ||
8222 | { | 8245 | { |
8223 | struct task_struct *task; | 8246 | struct task_struct *task; |
8247 | struct cgroup_subsys_state *css; | ||
8224 | 8248 | ||
8225 | cgroup_taskset_for_each(task, tset) { | 8249 | cgroup_taskset_for_each(task, css, tset) { |
8226 | #ifdef CONFIG_RT_GROUP_SCHED | 8250 | #ifdef CONFIG_RT_GROUP_SCHED |
8227 | if (!sched_rt_can_attach(css_tg(css), task)) | 8251 | if (!sched_rt_can_attach(css_tg(css), task)) |
8228 | return -EINVAL; | 8252 | return -EINVAL; |
@@ -8235,12 +8259,12 @@ static int cpu_cgroup_can_attach(struct cgroup_subsys_state *css, | |||
8235 | return 0; | 8259 | return 0; |
8236 | } | 8260 | } |
8237 | 8261 | ||
8238 | static void cpu_cgroup_attach(struct cgroup_subsys_state *css, | 8262 | static void cpu_cgroup_attach(struct cgroup_taskset *tset) |
8239 | struct cgroup_taskset *tset) | ||
8240 | { | 8263 | { |
8241 | struct task_struct *task; | 8264 | struct task_struct *task; |
8265 | struct cgroup_subsys_state *css; | ||
8242 | 8266 | ||
8243 | cgroup_taskset_for_each(task, tset) | 8267 | cgroup_taskset_for_each(task, css, tset) |
8244 | sched_move_task(task); | 8268 | sched_move_task(task); |
8245 | } | 8269 | } |
8246 | 8270 | ||
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index 26a54461bf59..05de80b48586 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c | |||
@@ -788,6 +788,9 @@ cputime_t task_gtime(struct task_struct *t) | |||
788 | unsigned int seq; | 788 | unsigned int seq; |
789 | cputime_t gtime; | 789 | cputime_t gtime; |
790 | 790 | ||
791 | if (!context_tracking_is_enabled()) | ||
792 | return t->gtime; | ||
793 | |||
791 | do { | 794 | do { |
792 | seq = read_seqbegin(&t->vtime_seqlock); | 795 | seq = read_seqbegin(&t->vtime_seqlock); |
793 | 796 | ||
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index f04fda8f669c..90e26b11deaa 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c | |||
@@ -17,7 +17,7 @@ | |||
17 | * Copyright (C) 2007, Thomas Gleixner <tglx@linutronix.de> | 17 | * Copyright (C) 2007, Thomas Gleixner <tglx@linutronix.de> |
18 | * | 18 | * |
19 | * Adaptive scheduling granularity, math enhancements by Peter Zijlstra | 19 | * Adaptive scheduling granularity, math enhancements by Peter Zijlstra |
20 | * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> | 20 | * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra |
21 | */ | 21 | */ |
22 | 22 | ||
23 | #include <linux/latencytop.h> | 23 | #include <linux/latencytop.h> |
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index e3cc16312046..8ec86abe0ea1 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c | |||
@@ -64,7 +64,7 @@ static void start_rt_bandwidth(struct rt_bandwidth *rt_b) | |||
64 | raw_spin_unlock(&rt_b->rt_runtime_lock); | 64 | raw_spin_unlock(&rt_b->rt_runtime_lock); |
65 | } | 65 | } |
66 | 66 | ||
67 | #ifdef CONFIG_SMP | 67 | #if defined(CONFIG_SMP) && defined(HAVE_RT_PUSH_IPI) |
68 | static void push_irq_work_func(struct irq_work *work); | 68 | static void push_irq_work_func(struct irq_work *work); |
69 | #endif | 69 | #endif |
70 | 70 | ||
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index efd3bfc7e347..b242775bf670 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h | |||
@@ -1073,6 +1073,9 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev) | |||
1073 | * We must ensure this doesn't happen until the switch is completely | 1073 | * We must ensure this doesn't happen until the switch is completely |
1074 | * finished. | 1074 | * finished. |
1075 | * | 1075 | * |
1076 | * In particular, the load of prev->state in finish_task_switch() must | ||
1077 | * happen before this. | ||
1078 | * | ||
1076 | * Pairs with the control dependency and rmb in try_to_wake_up(). | 1079 | * Pairs with the control dependency and rmb in try_to_wake_up(). |
1077 | */ | 1080 | */ |
1078 | smp_store_release(&prev->on_cpu, 0); | 1081 | smp_store_release(&prev->on_cpu, 0); |
diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c index 052e02672d12..f15d6b6a538a 100644 --- a/kernel/sched/wait.c +++ b/kernel/sched/wait.c | |||
@@ -392,7 +392,7 @@ __wait_on_bit(wait_queue_head_t *wq, struct wait_bit_queue *q, | |||
392 | do { | 392 | do { |
393 | prepare_to_wait(wq, &q->wait, mode); | 393 | prepare_to_wait(wq, &q->wait, mode); |
394 | if (test_bit(q->key.bit_nr, q->key.flags)) | 394 | if (test_bit(q->key.bit_nr, q->key.flags)) |
395 | ret = (*action)(&q->key); | 395 | ret = (*action)(&q->key, mode); |
396 | } while (test_bit(q->key.bit_nr, q->key.flags) && !ret); | 396 | } while (test_bit(q->key.bit_nr, q->key.flags) && !ret); |
397 | finish_wait(wq, &q->wait); | 397 | finish_wait(wq, &q->wait); |
398 | return ret; | 398 | return ret; |
@@ -431,7 +431,7 @@ __wait_on_bit_lock(wait_queue_head_t *wq, struct wait_bit_queue *q, | |||
431 | prepare_to_wait_exclusive(wq, &q->wait, mode); | 431 | prepare_to_wait_exclusive(wq, &q->wait, mode); |
432 | if (!test_bit(q->key.bit_nr, q->key.flags)) | 432 | if (!test_bit(q->key.bit_nr, q->key.flags)) |
433 | continue; | 433 | continue; |
434 | ret = action(&q->key); | 434 | ret = action(&q->key, mode); |
435 | if (!ret) | 435 | if (!ret) |
436 | continue; | 436 | continue; |
437 | abort_exclusive_wait(wq, &q->wait, mode, &q->key); | 437 | abort_exclusive_wait(wq, &q->wait, mode, &q->key); |
@@ -581,44 +581,44 @@ void wake_up_atomic_t(atomic_t *p) | |||
581 | } | 581 | } |
582 | EXPORT_SYMBOL(wake_up_atomic_t); | 582 | EXPORT_SYMBOL(wake_up_atomic_t); |
583 | 583 | ||
584 | __sched int bit_wait(struct wait_bit_key *word) | 584 | __sched int bit_wait(struct wait_bit_key *word, int mode) |
585 | { | 585 | { |
586 | if (signal_pending_state(current->state, current)) | ||
587 | return 1; | ||
588 | schedule(); | 586 | schedule(); |
587 | if (signal_pending_state(mode, current)) | ||
588 | return -EINTR; | ||
589 | return 0; | 589 | return 0; |
590 | } | 590 | } |
591 | EXPORT_SYMBOL(bit_wait); | 591 | EXPORT_SYMBOL(bit_wait); |
592 | 592 | ||
593 | __sched int bit_wait_io(struct wait_bit_key *word) | 593 | __sched int bit_wait_io(struct wait_bit_key *word, int mode) |
594 | { | 594 | { |
595 | if (signal_pending_state(current->state, current)) | ||
596 | return 1; | ||
597 | io_schedule(); | 595 | io_schedule(); |
596 | if (signal_pending_state(mode, current)) | ||
597 | return -EINTR; | ||
598 | return 0; | 598 | return 0; |
599 | } | 599 | } |
600 | EXPORT_SYMBOL(bit_wait_io); | 600 | EXPORT_SYMBOL(bit_wait_io); |
601 | 601 | ||
602 | __sched int bit_wait_timeout(struct wait_bit_key *word) | 602 | __sched int bit_wait_timeout(struct wait_bit_key *word, int mode) |
603 | { | 603 | { |
604 | unsigned long now = READ_ONCE(jiffies); | 604 | unsigned long now = READ_ONCE(jiffies); |
605 | if (signal_pending_state(current->state, current)) | ||
606 | return 1; | ||
607 | if (time_after_eq(now, word->timeout)) | 605 | if (time_after_eq(now, word->timeout)) |
608 | return -EAGAIN; | 606 | return -EAGAIN; |
609 | schedule_timeout(word->timeout - now); | 607 | schedule_timeout(word->timeout - now); |
608 | if (signal_pending_state(mode, current)) | ||
609 | return -EINTR; | ||
610 | return 0; | 610 | return 0; |
611 | } | 611 | } |
612 | EXPORT_SYMBOL_GPL(bit_wait_timeout); | 612 | EXPORT_SYMBOL_GPL(bit_wait_timeout); |
613 | 613 | ||
614 | __sched int bit_wait_io_timeout(struct wait_bit_key *word) | 614 | __sched int bit_wait_io_timeout(struct wait_bit_key *word, int mode) |
615 | { | 615 | { |
616 | unsigned long now = READ_ONCE(jiffies); | 616 | unsigned long now = READ_ONCE(jiffies); |
617 | if (signal_pending_state(current->state, current)) | ||
618 | return 1; | ||
619 | if (time_after_eq(now, word->timeout)) | 617 | if (time_after_eq(now, word->timeout)) |
620 | return -EAGAIN; | 618 | return -EAGAIN; |
621 | io_schedule_timeout(word->timeout - now); | 619 | io_schedule_timeout(word->timeout - now); |
620 | if (signal_pending_state(mode, current)) | ||
621 | return -EINTR; | ||
622 | return 0; | 622 | return 0; |
623 | } | 623 | } |
624 | EXPORT_SYMBOL_GPL(bit_wait_io_timeout); | 624 | EXPORT_SYMBOL_GPL(bit_wait_io_timeout); |
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c index 867bc20e1ef1..a3bbaee77c58 100644 --- a/kernel/stop_machine.c +++ b/kernel/stop_machine.c | |||
@@ -531,7 +531,7 @@ static int __init cpu_stop_init(void) | |||
531 | } | 531 | } |
532 | early_initcall(cpu_stop_init); | 532 | early_initcall(cpu_stop_init); |
533 | 533 | ||
534 | #ifdef CONFIG_STOP_MACHINE | 534 | #if defined(CONFIG_SMP) || defined(CONFIG_HOTPLUG_CPU) |
535 | 535 | ||
536 | static int __stop_machine(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus) | 536 | static int __stop_machine(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus) |
537 | { | 537 | { |
@@ -631,4 +631,4 @@ int stop_machine_from_inactive_cpu(cpu_stop_fn_t fn, void *data, | |||
631 | return ret ?: done.ret; | 631 | return ret ?: done.ret; |
632 | } | 632 | } |
633 | 633 | ||
634 | #endif /* CONFIG_STOP_MACHINE */ | 634 | #endif /* CONFIG_SMP || CONFIG_HOTPLUG_CPU */ |
diff --git a/kernel/time/posix-clock.c b/kernel/time/posix-clock.c index ce033c7aa2e8..9cff0ab82b63 100644 --- a/kernel/time/posix-clock.c +++ b/kernel/time/posix-clock.c | |||
@@ -69,10 +69,10 @@ static ssize_t posix_clock_read(struct file *fp, char __user *buf, | |||
69 | static unsigned int posix_clock_poll(struct file *fp, poll_table *wait) | 69 | static unsigned int posix_clock_poll(struct file *fp, poll_table *wait) |
70 | { | 70 | { |
71 | struct posix_clock *clk = get_posix_clock(fp); | 71 | struct posix_clock *clk = get_posix_clock(fp); |
72 | int result = 0; | 72 | unsigned int result = 0; |
73 | 73 | ||
74 | if (!clk) | 74 | if (!clk) |
75 | return -ENODEV; | 75 | return POLLERR; |
76 | 76 | ||
77 | if (clk->ops.poll) | 77 | if (clk->ops.poll) |
78 | result = clk->ops.poll(clk, fp, wait); | 78 | result = clk->ops.poll(clk, fp, wait); |
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 75f1d05ea82d..9c6045a27ba3 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c | |||
@@ -1887,12 +1887,6 @@ rb_event_index(struct ring_buffer_event *event) | |||
1887 | return (addr & ~PAGE_MASK) - BUF_PAGE_HDR_SIZE; | 1887 | return (addr & ~PAGE_MASK) - BUF_PAGE_HDR_SIZE; |
1888 | } | 1888 | } |
1889 | 1889 | ||
1890 | static void rb_reset_reader_page(struct ring_buffer_per_cpu *cpu_buffer) | ||
1891 | { | ||
1892 | cpu_buffer->read_stamp = cpu_buffer->reader_page->page->time_stamp; | ||
1893 | cpu_buffer->reader_page->read = 0; | ||
1894 | } | ||
1895 | |||
1896 | static void rb_inc_iter(struct ring_buffer_iter *iter) | 1890 | static void rb_inc_iter(struct ring_buffer_iter *iter) |
1897 | { | 1891 | { |
1898 | struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer; | 1892 | struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer; |
@@ -2803,8 +2797,11 @@ rb_reserve_next_event(struct ring_buffer *buffer, | |||
2803 | 2797 | ||
2804 | event = __rb_reserve_next(cpu_buffer, &info); | 2798 | event = __rb_reserve_next(cpu_buffer, &info); |
2805 | 2799 | ||
2806 | if (unlikely(PTR_ERR(event) == -EAGAIN)) | 2800 | if (unlikely(PTR_ERR(event) == -EAGAIN)) { |
2801 | if (info.add_timestamp) | ||
2802 | info.length -= RB_LEN_TIME_EXTEND; | ||
2807 | goto again; | 2803 | goto again; |
2804 | } | ||
2808 | 2805 | ||
2809 | if (!event) | 2806 | if (!event) |
2810 | goto out_fail; | 2807 | goto out_fail; |
@@ -3626,7 +3623,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer) | |||
3626 | 3623 | ||
3627 | /* Finally update the reader page to the new head */ | 3624 | /* Finally update the reader page to the new head */ |
3628 | cpu_buffer->reader_page = reader; | 3625 | cpu_buffer->reader_page = reader; |
3629 | rb_reset_reader_page(cpu_buffer); | 3626 | cpu_buffer->reader_page->read = 0; |
3630 | 3627 | ||
3631 | if (overwrite != cpu_buffer->last_overrun) { | 3628 | if (overwrite != cpu_buffer->last_overrun) { |
3632 | cpu_buffer->lost_events = overwrite - cpu_buffer->last_overrun; | 3629 | cpu_buffer->lost_events = overwrite - cpu_buffer->last_overrun; |
@@ -3636,6 +3633,10 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer) | |||
3636 | goto again; | 3633 | goto again; |
3637 | 3634 | ||
3638 | out: | 3635 | out: |
3636 | /* Update the read_stamp on the first event */ | ||
3637 | if (reader && reader->read == 0) | ||
3638 | cpu_buffer->read_stamp = reader->page->time_stamp; | ||
3639 | |||
3639 | arch_spin_unlock(&cpu_buffer->lock); | 3640 | arch_spin_unlock(&cpu_buffer->lock); |
3640 | local_irq_restore(flags); | 3641 | local_irq_restore(flags); |
3641 | 3642 | ||
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c index abfc903e741e..cc9f7a9319be 100644 --- a/kernel/trace/trace_event_perf.c +++ b/kernel/trace/trace_event_perf.c | |||
@@ -1,7 +1,7 @@ | |||
1 | /* | 1 | /* |
2 | * trace event based perf event profiling/tracing | 2 | * trace event based perf event profiling/tracing |
3 | * | 3 | * |
4 | * Copyright (C) 2009 Red Hat Inc, Peter Zijlstra <pzijlstr@redhat.com> | 4 | * Copyright (C) 2009 Red Hat Inc, Peter Zijlstra |
5 | * Copyright (C) 2009-2010 Frederic Weisbecker <fweisbec@gmail.com> | 5 | * Copyright (C) 2009-2010 Frederic Weisbecker <fweisbec@gmail.com> |
6 | */ | 6 | */ |
7 | 7 | ||
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 6bbc5f652355..4f6ef6912e00 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c | |||
@@ -582,6 +582,12 @@ static void __ftrace_clear_event_pids(struct trace_array *tr) | |||
582 | unregister_trace_sched_wakeup(event_filter_pid_sched_wakeup_probe_pre, tr); | 582 | unregister_trace_sched_wakeup(event_filter_pid_sched_wakeup_probe_pre, tr); |
583 | unregister_trace_sched_wakeup(event_filter_pid_sched_wakeup_probe_post, tr); | 583 | unregister_trace_sched_wakeup(event_filter_pid_sched_wakeup_probe_post, tr); |
584 | 584 | ||
585 | unregister_trace_sched_wakeup_new(event_filter_pid_sched_wakeup_probe_pre, tr); | ||
586 | unregister_trace_sched_wakeup_new(event_filter_pid_sched_wakeup_probe_post, tr); | ||
587 | |||
588 | unregister_trace_sched_waking(event_filter_pid_sched_wakeup_probe_pre, tr); | ||
589 | unregister_trace_sched_waking(event_filter_pid_sched_wakeup_probe_post, tr); | ||
590 | |||
585 | list_for_each_entry(file, &tr->events, list) { | 591 | list_for_each_entry(file, &tr->events, list) { |
586 | clear_bit(EVENT_FILE_FL_PID_FILTER_BIT, &file->flags); | 592 | clear_bit(EVENT_FILE_FL_PID_FILTER_BIT, &file->flags); |
587 | } | 593 | } |
@@ -1729,6 +1735,16 @@ ftrace_event_pid_write(struct file *filp, const char __user *ubuf, | |||
1729 | tr, INT_MAX); | 1735 | tr, INT_MAX); |
1730 | register_trace_prio_sched_wakeup(event_filter_pid_sched_wakeup_probe_post, | 1736 | register_trace_prio_sched_wakeup(event_filter_pid_sched_wakeup_probe_post, |
1731 | tr, 0); | 1737 | tr, 0); |
1738 | |||
1739 | register_trace_prio_sched_wakeup_new(event_filter_pid_sched_wakeup_probe_pre, | ||
1740 | tr, INT_MAX); | ||
1741 | register_trace_prio_sched_wakeup_new(event_filter_pid_sched_wakeup_probe_post, | ||
1742 | tr, 0); | ||
1743 | |||
1744 | register_trace_prio_sched_waking(event_filter_pid_sched_wakeup_probe_pre, | ||
1745 | tr, INT_MAX); | ||
1746 | register_trace_prio_sched_waking(event_filter_pid_sched_wakeup_probe_post, | ||
1747 | tr, 0); | ||
1732 | } | 1748 | } |
1733 | 1749 | ||
1734 | /* | 1750 | /* |