diff options
Diffstat (limited to 'kernel')
105 files changed, 4913 insertions, 2555 deletions
diff --git a/kernel/Makefile b/kernel/Makefile index dc5c77544fd6..a59481a3fa6c 100644 --- a/kernel/Makefile +++ b/kernel/Makefile | |||
@@ -57,7 +57,6 @@ obj-$(CONFIG_UTS_NS) += utsname.o | |||
57 | obj-$(CONFIG_USER_NS) += user_namespace.o | 57 | obj-$(CONFIG_USER_NS) += user_namespace.o |
58 | obj-$(CONFIG_PID_NS) += pid_namespace.o | 58 | obj-$(CONFIG_PID_NS) += pid_namespace.o |
59 | obj-$(CONFIG_IKCONFIG) += configs.o | 59 | obj-$(CONFIG_IKCONFIG) += configs.o |
60 | obj-$(CONFIG_RESOURCE_COUNTERS) += res_counter.o | ||
61 | obj-$(CONFIG_SMP) += stop_machine.o | 60 | obj-$(CONFIG_SMP) += stop_machine.o |
62 | obj-$(CONFIG_KPROBES_SANITY_TEST) += test_kprobes.o | 61 | obj-$(CONFIG_KPROBES_SANITY_TEST) += test_kprobes.o |
63 | obj-$(CONFIG_AUDIT) += audit.o auditfilter.o | 62 | obj-$(CONFIG_AUDIT) += audit.o auditfilter.o |
@@ -86,7 +85,7 @@ obj-$(CONFIG_RING_BUFFER) += trace/ | |||
86 | obj-$(CONFIG_TRACEPOINTS) += trace/ | 85 | obj-$(CONFIG_TRACEPOINTS) += trace/ |
87 | obj-$(CONFIG_IRQ_WORK) += irq_work.o | 86 | obj-$(CONFIG_IRQ_WORK) += irq_work.o |
88 | obj-$(CONFIG_CPU_PM) += cpu_pm.o | 87 | obj-$(CONFIG_CPU_PM) += cpu_pm.o |
89 | obj-$(CONFIG_NET) += bpf/ | 88 | obj-$(CONFIG_BPF) += bpf/ |
90 | 89 | ||
91 | obj-$(CONFIG_PERF_EVENTS) += events/ | 90 | obj-$(CONFIG_PERF_EVENTS) += events/ |
92 | 91 | ||
diff --git a/kernel/audit.c b/kernel/audit.c index 80983df92cd4..1f37f15117e5 100644 --- a/kernel/audit.c +++ b/kernel/audit.c | |||
@@ -499,7 +499,6 @@ static int kauditd_thread(void *dummy) | |||
499 | set_freezable(); | 499 | set_freezable(); |
500 | while (!kthread_should_stop()) { | 500 | while (!kthread_should_stop()) { |
501 | struct sk_buff *skb; | 501 | struct sk_buff *skb; |
502 | DECLARE_WAITQUEUE(wait, current); | ||
503 | 502 | ||
504 | flush_hold_queue(); | 503 | flush_hold_queue(); |
505 | 504 | ||
@@ -514,16 +513,8 @@ static int kauditd_thread(void *dummy) | |||
514 | audit_printk_skb(skb); | 513 | audit_printk_skb(skb); |
515 | continue; | 514 | continue; |
516 | } | 515 | } |
517 | set_current_state(TASK_INTERRUPTIBLE); | ||
518 | add_wait_queue(&kauditd_wait, &wait); | ||
519 | 516 | ||
520 | if (!skb_queue_len(&audit_skb_queue)) { | 517 | wait_event_freezable(kauditd_wait, skb_queue_len(&audit_skb_queue)); |
521 | try_to_freeze(); | ||
522 | schedule(); | ||
523 | } | ||
524 | |||
525 | __set_current_state(TASK_RUNNING); | ||
526 | remove_wait_queue(&kauditd_wait, &wait); | ||
527 | } | 518 | } |
528 | return 0; | 519 | return 0; |
529 | } | 520 | } |
@@ -739,7 +730,7 @@ static void audit_log_feature_change(int which, u32 old_feature, u32 new_feature | |||
739 | 730 | ||
740 | ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_FEATURE_CHANGE); | 731 | ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_FEATURE_CHANGE); |
741 | audit_log_task_info(ab, current); | 732 | audit_log_task_info(ab, current); |
742 | audit_log_format(ab, "feature=%s old=%u new=%u old_lock=%u new_lock=%u res=%d", | 733 | audit_log_format(ab, " feature=%s old=%u new=%u old_lock=%u new_lock=%u res=%d", |
743 | audit_feature_names[which], !!old_feature, !!new_feature, | 734 | audit_feature_names[which], !!old_feature, !!new_feature, |
744 | !!old_lock, !!new_lock, res); | 735 | !!old_lock, !!new_lock, res); |
745 | audit_log_end(ab); | 736 | audit_log_end(ab); |
diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c index e242e3a9864a..80f29e015570 100644 --- a/kernel/audit_tree.c +++ b/kernel/audit_tree.c | |||
@@ -154,6 +154,7 @@ static struct audit_chunk *alloc_chunk(int count) | |||
154 | chunk->owners[i].index = i; | 154 | chunk->owners[i].index = i; |
155 | } | 155 | } |
156 | fsnotify_init_mark(&chunk->mark, audit_tree_destroy_watch); | 156 | fsnotify_init_mark(&chunk->mark, audit_tree_destroy_watch); |
157 | chunk->mark.mask = FS_IN_IGNORED; | ||
157 | return chunk; | 158 | return chunk; |
158 | } | 159 | } |
159 | 160 | ||
diff --git a/kernel/auditsc.c b/kernel/auditsc.c index e420a0c41b5f..c75522a83678 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c | |||
@@ -1897,6 +1897,11 @@ out: | |||
1897 | audit_copy_inode(n, dentry, inode); | 1897 | audit_copy_inode(n, dentry, inode); |
1898 | } | 1898 | } |
1899 | 1899 | ||
1900 | void __audit_file(const struct file *file) | ||
1901 | { | ||
1902 | __audit_inode(NULL, file->f_path.dentry, 0); | ||
1903 | } | ||
1904 | |||
1900 | /** | 1905 | /** |
1901 | * __audit_inode_child - collect inode info for created/removed objects | 1906 | * __audit_inode_child - collect inode info for created/removed objects |
1902 | * @parent: inode of dentry parent | 1907 | * @parent: inode of dentry parent |
@@ -2373,7 +2378,7 @@ int __audit_log_bprm_fcaps(struct linux_binprm *bprm, | |||
2373 | ax->d.next = context->aux; | 2378 | ax->d.next = context->aux; |
2374 | context->aux = (void *)ax; | 2379 | context->aux = (void *)ax; |
2375 | 2380 | ||
2376 | dentry = dget(bprm->file->f_dentry); | 2381 | dentry = dget(bprm->file->f_path.dentry); |
2377 | get_vfs_caps_from_disk(dentry, &vcaps); | 2382 | get_vfs_caps_from_disk(dentry, &vcaps); |
2378 | dput(dentry); | 2383 | dput(dentry); |
2379 | 2384 | ||
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile index 45427239f375..a5ae60f0b0a2 100644 --- a/kernel/bpf/Makefile +++ b/kernel/bpf/Makefile | |||
@@ -1,5 +1,5 @@ | |||
1 | obj-y := core.o syscall.o verifier.o | 1 | obj-y := core.o |
2 | 2 | obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o hashtab.o arraymap.o helpers.o | |
3 | ifdef CONFIG_TEST_BPF | 3 | ifdef CONFIG_TEST_BPF |
4 | obj-y += test_stub.o | 4 | obj-$(CONFIG_BPF_SYSCALL) += test_stub.o |
5 | endif | 5 | endif |
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c new file mode 100644 index 000000000000..9eb4d8a7cd87 --- /dev/null +++ b/kernel/bpf/arraymap.c | |||
@@ -0,0 +1,156 @@ | |||
1 | /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com | ||
2 | * | ||
3 | * This program is free software; you can redistribute it and/or | ||
4 | * modify it under the terms of version 2 of the GNU General Public | ||
5 | * License as published by the Free Software Foundation. | ||
6 | * | ||
7 | * This program is distributed in the hope that it will be useful, but | ||
8 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
9 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
10 | * General Public License for more details. | ||
11 | */ | ||
12 | #include <linux/bpf.h> | ||
13 | #include <linux/err.h> | ||
14 | #include <linux/vmalloc.h> | ||
15 | #include <linux/slab.h> | ||
16 | #include <linux/mm.h> | ||
17 | |||
18 | struct bpf_array { | ||
19 | struct bpf_map map; | ||
20 | u32 elem_size; | ||
21 | char value[0] __aligned(8); | ||
22 | }; | ||
23 | |||
24 | /* Called from syscall */ | ||
25 | static struct bpf_map *array_map_alloc(union bpf_attr *attr) | ||
26 | { | ||
27 | struct bpf_array *array; | ||
28 | u32 elem_size, array_size; | ||
29 | |||
30 | /* check sanity of attributes */ | ||
31 | if (attr->max_entries == 0 || attr->key_size != 4 || | ||
32 | attr->value_size == 0) | ||
33 | return ERR_PTR(-EINVAL); | ||
34 | |||
35 | elem_size = round_up(attr->value_size, 8); | ||
36 | |||
37 | /* check round_up into zero and u32 overflow */ | ||
38 | if (elem_size == 0 || | ||
39 | attr->max_entries > (U32_MAX - sizeof(*array)) / elem_size) | ||
40 | return ERR_PTR(-ENOMEM); | ||
41 | |||
42 | array_size = sizeof(*array) + attr->max_entries * elem_size; | ||
43 | |||
44 | /* allocate all map elements and zero-initialize them */ | ||
45 | array = kzalloc(array_size, GFP_USER | __GFP_NOWARN); | ||
46 | if (!array) { | ||
47 | array = vzalloc(array_size); | ||
48 | if (!array) | ||
49 | return ERR_PTR(-ENOMEM); | ||
50 | } | ||
51 | |||
52 | /* copy mandatory map attributes */ | ||
53 | array->map.key_size = attr->key_size; | ||
54 | array->map.value_size = attr->value_size; | ||
55 | array->map.max_entries = attr->max_entries; | ||
56 | |||
57 | array->elem_size = elem_size; | ||
58 | |||
59 | return &array->map; | ||
60 | } | ||
61 | |||
62 | /* Called from syscall or from eBPF program */ | ||
63 | static void *array_map_lookup_elem(struct bpf_map *map, void *key) | ||
64 | { | ||
65 | struct bpf_array *array = container_of(map, struct bpf_array, map); | ||
66 | u32 index = *(u32 *)key; | ||
67 | |||
68 | if (index >= array->map.max_entries) | ||
69 | return NULL; | ||
70 | |||
71 | return array->value + array->elem_size * index; | ||
72 | } | ||
73 | |||
74 | /* Called from syscall */ | ||
75 | static int array_map_get_next_key(struct bpf_map *map, void *key, void *next_key) | ||
76 | { | ||
77 | struct bpf_array *array = container_of(map, struct bpf_array, map); | ||
78 | u32 index = *(u32 *)key; | ||
79 | u32 *next = (u32 *)next_key; | ||
80 | |||
81 | if (index >= array->map.max_entries) { | ||
82 | *next = 0; | ||
83 | return 0; | ||
84 | } | ||
85 | |||
86 | if (index == array->map.max_entries - 1) | ||
87 | return -ENOENT; | ||
88 | |||
89 | *next = index + 1; | ||
90 | return 0; | ||
91 | } | ||
92 | |||
93 | /* Called from syscall or from eBPF program */ | ||
94 | static int array_map_update_elem(struct bpf_map *map, void *key, void *value, | ||
95 | u64 map_flags) | ||
96 | { | ||
97 | struct bpf_array *array = container_of(map, struct bpf_array, map); | ||
98 | u32 index = *(u32 *)key; | ||
99 | |||
100 | if (map_flags > BPF_EXIST) | ||
101 | /* unknown flags */ | ||
102 | return -EINVAL; | ||
103 | |||
104 | if (index >= array->map.max_entries) | ||
105 | /* all elements were pre-allocated, cannot insert a new one */ | ||
106 | return -E2BIG; | ||
107 | |||
108 | if (map_flags == BPF_NOEXIST) | ||
109 | /* all elements already exist */ | ||
110 | return -EEXIST; | ||
111 | |||
112 | memcpy(array->value + array->elem_size * index, value, array->elem_size); | ||
113 | return 0; | ||
114 | } | ||
115 | |||
116 | /* Called from syscall or from eBPF program */ | ||
117 | static int array_map_delete_elem(struct bpf_map *map, void *key) | ||
118 | { | ||
119 | return -EINVAL; | ||
120 | } | ||
121 | |||
122 | /* Called when map->refcnt goes to zero, either from workqueue or from syscall */ | ||
123 | static void array_map_free(struct bpf_map *map) | ||
124 | { | ||
125 | struct bpf_array *array = container_of(map, struct bpf_array, map); | ||
126 | |||
127 | /* at this point bpf_prog->aux->refcnt == 0 and this map->refcnt == 0, | ||
128 | * so the programs (can be more than one that used this map) were | ||
129 | * disconnected from events. Wait for outstanding programs to complete | ||
130 | * and free the array | ||
131 | */ | ||
132 | synchronize_rcu(); | ||
133 | |||
134 | kvfree(array); | ||
135 | } | ||
136 | |||
137 | static struct bpf_map_ops array_ops = { | ||
138 | .map_alloc = array_map_alloc, | ||
139 | .map_free = array_map_free, | ||
140 | .map_get_next_key = array_map_get_next_key, | ||
141 | .map_lookup_elem = array_map_lookup_elem, | ||
142 | .map_update_elem = array_map_update_elem, | ||
143 | .map_delete_elem = array_map_delete_elem, | ||
144 | }; | ||
145 | |||
146 | static struct bpf_map_type_list tl = { | ||
147 | .ops = &array_ops, | ||
148 | .type = BPF_MAP_TYPE_ARRAY, | ||
149 | }; | ||
150 | |||
151 | static int __init register_array_map(void) | ||
152 | { | ||
153 | bpf_register_map_type(&tl); | ||
154 | return 0; | ||
155 | } | ||
156 | late_initcall(register_array_map); | ||
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index f0c30c59b317..d6594e457a25 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c | |||
@@ -655,3 +655,12 @@ void bpf_prog_free(struct bpf_prog *fp) | |||
655 | schedule_work(&aux->work); | 655 | schedule_work(&aux->work); |
656 | } | 656 | } |
657 | EXPORT_SYMBOL_GPL(bpf_prog_free); | 657 | EXPORT_SYMBOL_GPL(bpf_prog_free); |
658 | |||
659 | /* To execute LD_ABS/LD_IND instructions __bpf_prog_run() may call | ||
660 | * skb_copy_bits(), so provide a weak definition of it for NET-less config. | ||
661 | */ | ||
662 | int __weak skb_copy_bits(const struct sk_buff *skb, int offset, void *to, | ||
663 | int len) | ||
664 | { | ||
665 | return -EFAULT; | ||
666 | } | ||
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c new file mode 100644 index 000000000000..b3ba43674310 --- /dev/null +++ b/kernel/bpf/hashtab.c | |||
@@ -0,0 +1,367 @@ | |||
1 | /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com | ||
2 | * | ||
3 | * This program is free software; you can redistribute it and/or | ||
4 | * modify it under the terms of version 2 of the GNU General Public | ||
5 | * License as published by the Free Software Foundation. | ||
6 | * | ||
7 | * This program is distributed in the hope that it will be useful, but | ||
8 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
9 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
10 | * General Public License for more details. | ||
11 | */ | ||
12 | #include <linux/bpf.h> | ||
13 | #include <linux/jhash.h> | ||
14 | #include <linux/filter.h> | ||
15 | #include <linux/vmalloc.h> | ||
16 | |||
17 | struct bpf_htab { | ||
18 | struct bpf_map map; | ||
19 | struct hlist_head *buckets; | ||
20 | spinlock_t lock; | ||
21 | u32 count; /* number of elements in this hashtable */ | ||
22 | u32 n_buckets; /* number of hash buckets */ | ||
23 | u32 elem_size; /* size of each element in bytes */ | ||
24 | }; | ||
25 | |||
26 | /* each htab element is struct htab_elem + key + value */ | ||
27 | struct htab_elem { | ||
28 | struct hlist_node hash_node; | ||
29 | struct rcu_head rcu; | ||
30 | u32 hash; | ||
31 | char key[0] __aligned(8); | ||
32 | }; | ||
33 | |||
34 | /* Called from syscall */ | ||
35 | static struct bpf_map *htab_map_alloc(union bpf_attr *attr) | ||
36 | { | ||
37 | struct bpf_htab *htab; | ||
38 | int err, i; | ||
39 | |||
40 | htab = kzalloc(sizeof(*htab), GFP_USER); | ||
41 | if (!htab) | ||
42 | return ERR_PTR(-ENOMEM); | ||
43 | |||
44 | /* mandatory map attributes */ | ||
45 | htab->map.key_size = attr->key_size; | ||
46 | htab->map.value_size = attr->value_size; | ||
47 | htab->map.max_entries = attr->max_entries; | ||
48 | |||
49 | /* check sanity of attributes. | ||
50 | * value_size == 0 may be allowed in the future to use map as a set | ||
51 | */ | ||
52 | err = -EINVAL; | ||
53 | if (htab->map.max_entries == 0 || htab->map.key_size == 0 || | ||
54 | htab->map.value_size == 0) | ||
55 | goto free_htab; | ||
56 | |||
57 | /* hash table size must be power of 2 */ | ||
58 | htab->n_buckets = roundup_pow_of_two(htab->map.max_entries); | ||
59 | |||
60 | err = -E2BIG; | ||
61 | if (htab->map.key_size > MAX_BPF_STACK) | ||
62 | /* eBPF programs initialize keys on stack, so they cannot be | ||
63 | * larger than max stack size | ||
64 | */ | ||
65 | goto free_htab; | ||
66 | |||
67 | err = -ENOMEM; | ||
68 | /* prevent zero size kmalloc and check for u32 overflow */ | ||
69 | if (htab->n_buckets == 0 || | ||
70 | htab->n_buckets > U32_MAX / sizeof(struct hlist_head)) | ||
71 | goto free_htab; | ||
72 | |||
73 | htab->buckets = kmalloc_array(htab->n_buckets, sizeof(struct hlist_head), | ||
74 | GFP_USER | __GFP_NOWARN); | ||
75 | |||
76 | if (!htab->buckets) { | ||
77 | htab->buckets = vmalloc(htab->n_buckets * sizeof(struct hlist_head)); | ||
78 | if (!htab->buckets) | ||
79 | goto free_htab; | ||
80 | } | ||
81 | |||
82 | for (i = 0; i < htab->n_buckets; i++) | ||
83 | INIT_HLIST_HEAD(&htab->buckets[i]); | ||
84 | |||
85 | spin_lock_init(&htab->lock); | ||
86 | htab->count = 0; | ||
87 | |||
88 | htab->elem_size = sizeof(struct htab_elem) + | ||
89 | round_up(htab->map.key_size, 8) + | ||
90 | htab->map.value_size; | ||
91 | return &htab->map; | ||
92 | |||
93 | free_htab: | ||
94 | kfree(htab); | ||
95 | return ERR_PTR(err); | ||
96 | } | ||
97 | |||
98 | static inline u32 htab_map_hash(const void *key, u32 key_len) | ||
99 | { | ||
100 | return jhash(key, key_len, 0); | ||
101 | } | ||
102 | |||
103 | static inline struct hlist_head *select_bucket(struct bpf_htab *htab, u32 hash) | ||
104 | { | ||
105 | return &htab->buckets[hash & (htab->n_buckets - 1)]; | ||
106 | } | ||
107 | |||
108 | static struct htab_elem *lookup_elem_raw(struct hlist_head *head, u32 hash, | ||
109 | void *key, u32 key_size) | ||
110 | { | ||
111 | struct htab_elem *l; | ||
112 | |||
113 | hlist_for_each_entry_rcu(l, head, hash_node) | ||
114 | if (l->hash == hash && !memcmp(&l->key, key, key_size)) | ||
115 | return l; | ||
116 | |||
117 | return NULL; | ||
118 | } | ||
119 | |||
120 | /* Called from syscall or from eBPF program */ | ||
121 | static void *htab_map_lookup_elem(struct bpf_map *map, void *key) | ||
122 | { | ||
123 | struct bpf_htab *htab = container_of(map, struct bpf_htab, map); | ||
124 | struct hlist_head *head; | ||
125 | struct htab_elem *l; | ||
126 | u32 hash, key_size; | ||
127 | |||
128 | /* Must be called with rcu_read_lock. */ | ||
129 | WARN_ON_ONCE(!rcu_read_lock_held()); | ||
130 | |||
131 | key_size = map->key_size; | ||
132 | |||
133 | hash = htab_map_hash(key, key_size); | ||
134 | |||
135 | head = select_bucket(htab, hash); | ||
136 | |||
137 | l = lookup_elem_raw(head, hash, key, key_size); | ||
138 | |||
139 | if (l) | ||
140 | return l->key + round_up(map->key_size, 8); | ||
141 | |||
142 | return NULL; | ||
143 | } | ||
144 | |||
145 | /* Called from syscall */ | ||
146 | static int htab_map_get_next_key(struct bpf_map *map, void *key, void *next_key) | ||
147 | { | ||
148 | struct bpf_htab *htab = container_of(map, struct bpf_htab, map); | ||
149 | struct hlist_head *head; | ||
150 | struct htab_elem *l, *next_l; | ||
151 | u32 hash, key_size; | ||
152 | int i; | ||
153 | |||
154 | WARN_ON_ONCE(!rcu_read_lock_held()); | ||
155 | |||
156 | key_size = map->key_size; | ||
157 | |||
158 | hash = htab_map_hash(key, key_size); | ||
159 | |||
160 | head = select_bucket(htab, hash); | ||
161 | |||
162 | /* lookup the key */ | ||
163 | l = lookup_elem_raw(head, hash, key, key_size); | ||
164 | |||
165 | if (!l) { | ||
166 | i = 0; | ||
167 | goto find_first_elem; | ||
168 | } | ||
169 | |||
170 | /* key was found, get next key in the same bucket */ | ||
171 | next_l = hlist_entry_safe(rcu_dereference_raw(hlist_next_rcu(&l->hash_node)), | ||
172 | struct htab_elem, hash_node); | ||
173 | |||
174 | if (next_l) { | ||
175 | /* if next elem in this hash list is non-zero, just return it */ | ||
176 | memcpy(next_key, next_l->key, key_size); | ||
177 | return 0; | ||
178 | } | ||
179 | |||
180 | /* no more elements in this hash list, go to the next bucket */ | ||
181 | i = hash & (htab->n_buckets - 1); | ||
182 | i++; | ||
183 | |||
184 | find_first_elem: | ||
185 | /* iterate over buckets */ | ||
186 | for (; i < htab->n_buckets; i++) { | ||
187 | head = select_bucket(htab, i); | ||
188 | |||
189 | /* pick first element in the bucket */ | ||
190 | next_l = hlist_entry_safe(rcu_dereference_raw(hlist_first_rcu(head)), | ||
191 | struct htab_elem, hash_node); | ||
192 | if (next_l) { | ||
193 | /* if it's not empty, just return it */ | ||
194 | memcpy(next_key, next_l->key, key_size); | ||
195 | return 0; | ||
196 | } | ||
197 | } | ||
198 | |||
199 | /* itereated over all buckets and all elements */ | ||
200 | return -ENOENT; | ||
201 | } | ||
202 | |||
203 | /* Called from syscall or from eBPF program */ | ||
204 | static int htab_map_update_elem(struct bpf_map *map, void *key, void *value, | ||
205 | u64 map_flags) | ||
206 | { | ||
207 | struct bpf_htab *htab = container_of(map, struct bpf_htab, map); | ||
208 | struct htab_elem *l_new, *l_old; | ||
209 | struct hlist_head *head; | ||
210 | unsigned long flags; | ||
211 | u32 key_size; | ||
212 | int ret; | ||
213 | |||
214 | if (map_flags > BPF_EXIST) | ||
215 | /* unknown flags */ | ||
216 | return -EINVAL; | ||
217 | |||
218 | WARN_ON_ONCE(!rcu_read_lock_held()); | ||
219 | |||
220 | /* allocate new element outside of lock */ | ||
221 | l_new = kmalloc(htab->elem_size, GFP_ATOMIC); | ||
222 | if (!l_new) | ||
223 | return -ENOMEM; | ||
224 | |||
225 | key_size = map->key_size; | ||
226 | |||
227 | memcpy(l_new->key, key, key_size); | ||
228 | memcpy(l_new->key + round_up(key_size, 8), value, map->value_size); | ||
229 | |||
230 | l_new->hash = htab_map_hash(l_new->key, key_size); | ||
231 | |||
232 | /* bpf_map_update_elem() can be called in_irq() */ | ||
233 | spin_lock_irqsave(&htab->lock, flags); | ||
234 | |||
235 | head = select_bucket(htab, l_new->hash); | ||
236 | |||
237 | l_old = lookup_elem_raw(head, l_new->hash, key, key_size); | ||
238 | |||
239 | if (!l_old && unlikely(htab->count >= map->max_entries)) { | ||
240 | /* if elem with this 'key' doesn't exist and we've reached | ||
241 | * max_entries limit, fail insertion of new elem | ||
242 | */ | ||
243 | ret = -E2BIG; | ||
244 | goto err; | ||
245 | } | ||
246 | |||
247 | if (l_old && map_flags == BPF_NOEXIST) { | ||
248 | /* elem already exists */ | ||
249 | ret = -EEXIST; | ||
250 | goto err; | ||
251 | } | ||
252 | |||
253 | if (!l_old && map_flags == BPF_EXIST) { | ||
254 | /* elem doesn't exist, cannot update it */ | ||
255 | ret = -ENOENT; | ||
256 | goto err; | ||
257 | } | ||
258 | |||
259 | /* add new element to the head of the list, so that concurrent | ||
260 | * search will find it before old elem | ||
261 | */ | ||
262 | hlist_add_head_rcu(&l_new->hash_node, head); | ||
263 | if (l_old) { | ||
264 | hlist_del_rcu(&l_old->hash_node); | ||
265 | kfree_rcu(l_old, rcu); | ||
266 | } else { | ||
267 | htab->count++; | ||
268 | } | ||
269 | spin_unlock_irqrestore(&htab->lock, flags); | ||
270 | |||
271 | return 0; | ||
272 | err: | ||
273 | spin_unlock_irqrestore(&htab->lock, flags); | ||
274 | kfree(l_new); | ||
275 | return ret; | ||
276 | } | ||
277 | |||
278 | /* Called from syscall or from eBPF program */ | ||
279 | static int htab_map_delete_elem(struct bpf_map *map, void *key) | ||
280 | { | ||
281 | struct bpf_htab *htab = container_of(map, struct bpf_htab, map); | ||
282 | struct hlist_head *head; | ||
283 | struct htab_elem *l; | ||
284 | unsigned long flags; | ||
285 | u32 hash, key_size; | ||
286 | int ret = -ENOENT; | ||
287 | |||
288 | WARN_ON_ONCE(!rcu_read_lock_held()); | ||
289 | |||
290 | key_size = map->key_size; | ||
291 | |||
292 | hash = htab_map_hash(key, key_size); | ||
293 | |||
294 | spin_lock_irqsave(&htab->lock, flags); | ||
295 | |||
296 | head = select_bucket(htab, hash); | ||
297 | |||
298 | l = lookup_elem_raw(head, hash, key, key_size); | ||
299 | |||
300 | if (l) { | ||
301 | hlist_del_rcu(&l->hash_node); | ||
302 | htab->count--; | ||
303 | kfree_rcu(l, rcu); | ||
304 | ret = 0; | ||
305 | } | ||
306 | |||
307 | spin_unlock_irqrestore(&htab->lock, flags); | ||
308 | return ret; | ||
309 | } | ||
310 | |||
311 | static void delete_all_elements(struct bpf_htab *htab) | ||
312 | { | ||
313 | int i; | ||
314 | |||
315 | for (i = 0; i < htab->n_buckets; i++) { | ||
316 | struct hlist_head *head = select_bucket(htab, i); | ||
317 | struct hlist_node *n; | ||
318 | struct htab_elem *l; | ||
319 | |||
320 | hlist_for_each_entry_safe(l, n, head, hash_node) { | ||
321 | hlist_del_rcu(&l->hash_node); | ||
322 | htab->count--; | ||
323 | kfree(l); | ||
324 | } | ||
325 | } | ||
326 | } | ||
327 | |||
328 | /* Called when map->refcnt goes to zero, either from workqueue or from syscall */ | ||
329 | static void htab_map_free(struct bpf_map *map) | ||
330 | { | ||
331 | struct bpf_htab *htab = container_of(map, struct bpf_htab, map); | ||
332 | |||
333 | /* at this point bpf_prog->aux->refcnt == 0 and this map->refcnt == 0, | ||
334 | * so the programs (can be more than one that used this map) were | ||
335 | * disconnected from events. Wait for outstanding critical sections in | ||
336 | * these programs to complete | ||
337 | */ | ||
338 | synchronize_rcu(); | ||
339 | |||
340 | /* some of kfree_rcu() callbacks for elements of this map may not have | ||
341 | * executed. It's ok. Proceed to free residual elements and map itself | ||
342 | */ | ||
343 | delete_all_elements(htab); | ||
344 | kvfree(htab->buckets); | ||
345 | kfree(htab); | ||
346 | } | ||
347 | |||
348 | static struct bpf_map_ops htab_ops = { | ||
349 | .map_alloc = htab_map_alloc, | ||
350 | .map_free = htab_map_free, | ||
351 | .map_get_next_key = htab_map_get_next_key, | ||
352 | .map_lookup_elem = htab_map_lookup_elem, | ||
353 | .map_update_elem = htab_map_update_elem, | ||
354 | .map_delete_elem = htab_map_delete_elem, | ||
355 | }; | ||
356 | |||
357 | static struct bpf_map_type_list tl = { | ||
358 | .ops = &htab_ops, | ||
359 | .type = BPF_MAP_TYPE_HASH, | ||
360 | }; | ||
361 | |||
362 | static int __init register_htab_map(void) | ||
363 | { | ||
364 | bpf_register_map_type(&tl); | ||
365 | return 0; | ||
366 | } | ||
367 | late_initcall(register_htab_map); | ||
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c new file mode 100644 index 000000000000..9e3414d85459 --- /dev/null +++ b/kernel/bpf/helpers.c | |||
@@ -0,0 +1,89 @@ | |||
1 | /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com | ||
2 | * | ||
3 | * This program is free software; you can redistribute it and/or | ||
4 | * modify it under the terms of version 2 of the GNU General Public | ||
5 | * License as published by the Free Software Foundation. | ||
6 | * | ||
7 | * This program is distributed in the hope that it will be useful, but | ||
8 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
9 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
10 | * General Public License for more details. | ||
11 | */ | ||
12 | #include <linux/bpf.h> | ||
13 | #include <linux/rcupdate.h> | ||
14 | |||
15 | /* If kernel subsystem is allowing eBPF programs to call this function, | ||
16 | * inside its own verifier_ops->get_func_proto() callback it should return | ||
17 | * bpf_map_lookup_elem_proto, so that verifier can properly check the arguments | ||
18 | * | ||
19 | * Different map implementations will rely on rcu in map methods | ||
20 | * lookup/update/delete, therefore eBPF programs must run under rcu lock | ||
21 | * if program is allowed to access maps, so check rcu_read_lock_held in | ||
22 | * all three functions. | ||
23 | */ | ||
24 | static u64 bpf_map_lookup_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) | ||
25 | { | ||
26 | /* verifier checked that R1 contains a valid pointer to bpf_map | ||
27 | * and R2 points to a program stack and map->key_size bytes were | ||
28 | * initialized | ||
29 | */ | ||
30 | struct bpf_map *map = (struct bpf_map *) (unsigned long) r1; | ||
31 | void *key = (void *) (unsigned long) r2; | ||
32 | void *value; | ||
33 | |||
34 | WARN_ON_ONCE(!rcu_read_lock_held()); | ||
35 | |||
36 | value = map->ops->map_lookup_elem(map, key); | ||
37 | |||
38 | /* lookup() returns either pointer to element value or NULL | ||
39 | * which is the meaning of PTR_TO_MAP_VALUE_OR_NULL type | ||
40 | */ | ||
41 | return (unsigned long) value; | ||
42 | } | ||
43 | |||
44 | struct bpf_func_proto bpf_map_lookup_elem_proto = { | ||
45 | .func = bpf_map_lookup_elem, | ||
46 | .gpl_only = false, | ||
47 | .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL, | ||
48 | .arg1_type = ARG_CONST_MAP_PTR, | ||
49 | .arg2_type = ARG_PTR_TO_MAP_KEY, | ||
50 | }; | ||
51 | |||
52 | static u64 bpf_map_update_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) | ||
53 | { | ||
54 | struct bpf_map *map = (struct bpf_map *) (unsigned long) r1; | ||
55 | void *key = (void *) (unsigned long) r2; | ||
56 | void *value = (void *) (unsigned long) r3; | ||
57 | |||
58 | WARN_ON_ONCE(!rcu_read_lock_held()); | ||
59 | |||
60 | return map->ops->map_update_elem(map, key, value, r4); | ||
61 | } | ||
62 | |||
63 | struct bpf_func_proto bpf_map_update_elem_proto = { | ||
64 | .func = bpf_map_update_elem, | ||
65 | .gpl_only = false, | ||
66 | .ret_type = RET_INTEGER, | ||
67 | .arg1_type = ARG_CONST_MAP_PTR, | ||
68 | .arg2_type = ARG_PTR_TO_MAP_KEY, | ||
69 | .arg3_type = ARG_PTR_TO_MAP_VALUE, | ||
70 | .arg4_type = ARG_ANYTHING, | ||
71 | }; | ||
72 | |||
73 | static u64 bpf_map_delete_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) | ||
74 | { | ||
75 | struct bpf_map *map = (struct bpf_map *) (unsigned long) r1; | ||
76 | void *key = (void *) (unsigned long) r2; | ||
77 | |||
78 | WARN_ON_ONCE(!rcu_read_lock_held()); | ||
79 | |||
80 | return map->ops->map_delete_elem(map, key); | ||
81 | } | ||
82 | |||
83 | struct bpf_func_proto bpf_map_delete_elem_proto = { | ||
84 | .func = bpf_map_delete_elem, | ||
85 | .gpl_only = false, | ||
86 | .ret_type = RET_INTEGER, | ||
87 | .arg1_type = ARG_CONST_MAP_PTR, | ||
88 | .arg2_type = ARG_PTR_TO_MAP_KEY, | ||
89 | }; | ||
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index ba61c8c16032..088ac0b1b106 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c | |||
@@ -169,7 +169,7 @@ static int map_lookup_elem(union bpf_attr *attr) | |||
169 | if (copy_from_user(key, ukey, map->key_size) != 0) | 169 | if (copy_from_user(key, ukey, map->key_size) != 0) |
170 | goto free_key; | 170 | goto free_key; |
171 | 171 | ||
172 | err = -ESRCH; | 172 | err = -ENOENT; |
173 | rcu_read_lock(); | 173 | rcu_read_lock(); |
174 | value = map->ops->map_lookup_elem(map, key); | 174 | value = map->ops->map_lookup_elem(map, key); |
175 | if (!value) | 175 | if (!value) |
@@ -190,7 +190,7 @@ err_put: | |||
190 | return err; | 190 | return err; |
191 | } | 191 | } |
192 | 192 | ||
193 | #define BPF_MAP_UPDATE_ELEM_LAST_FIELD value | 193 | #define BPF_MAP_UPDATE_ELEM_LAST_FIELD flags |
194 | 194 | ||
195 | static int map_update_elem(union bpf_attr *attr) | 195 | static int map_update_elem(union bpf_attr *attr) |
196 | { | 196 | { |
@@ -231,7 +231,7 @@ static int map_update_elem(union bpf_attr *attr) | |||
231 | * therefore all map accessors rely on this fact, so do the same here | 231 | * therefore all map accessors rely on this fact, so do the same here |
232 | */ | 232 | */ |
233 | rcu_read_lock(); | 233 | rcu_read_lock(); |
234 | err = map->ops->map_update_elem(map, key, value); | 234 | err = map->ops->map_update_elem(map, key, value, attr->flags); |
235 | rcu_read_unlock(); | 235 | rcu_read_unlock(); |
236 | 236 | ||
237 | free_value: | 237 | free_value: |
diff --git a/kernel/bpf/test_stub.c b/kernel/bpf/test_stub.c index fcaddff4003e..0ceae1e6e8b5 100644 --- a/kernel/bpf/test_stub.c +++ b/kernel/bpf/test_stub.c | |||
@@ -18,26 +18,18 @@ struct bpf_context { | |||
18 | u64 arg2; | 18 | u64 arg2; |
19 | }; | 19 | }; |
20 | 20 | ||
21 | static u64 test_func(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) | ||
22 | { | ||
23 | return 0; | ||
24 | } | ||
25 | |||
26 | static struct bpf_func_proto test_funcs[] = { | ||
27 | [BPF_FUNC_unspec] = { | ||
28 | .func = test_func, | ||
29 | .gpl_only = true, | ||
30 | .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL, | ||
31 | .arg1_type = ARG_CONST_MAP_PTR, | ||
32 | .arg2_type = ARG_PTR_TO_MAP_KEY, | ||
33 | }, | ||
34 | }; | ||
35 | |||
36 | static const struct bpf_func_proto *test_func_proto(enum bpf_func_id func_id) | 21 | static const struct bpf_func_proto *test_func_proto(enum bpf_func_id func_id) |
37 | { | 22 | { |
38 | if (func_id < 0 || func_id >= ARRAY_SIZE(test_funcs)) | 23 | switch (func_id) { |
24 | case BPF_FUNC_map_lookup_elem: | ||
25 | return &bpf_map_lookup_elem_proto; | ||
26 | case BPF_FUNC_map_update_elem: | ||
27 | return &bpf_map_update_elem_proto; | ||
28 | case BPF_FUNC_map_delete_elem: | ||
29 | return &bpf_map_delete_elem_proto; | ||
30 | default: | ||
39 | return NULL; | 31 | return NULL; |
40 | return &test_funcs[func_id]; | 32 | } |
41 | } | 33 | } |
42 | 34 | ||
43 | static const struct bpf_context_access { | 35 | static const struct bpf_context_access { |
@@ -78,38 +70,8 @@ static struct bpf_prog_type_list tl_prog = { | |||
78 | .type = BPF_PROG_TYPE_UNSPEC, | 70 | .type = BPF_PROG_TYPE_UNSPEC, |
79 | }; | 71 | }; |
80 | 72 | ||
81 | static struct bpf_map *test_map_alloc(union bpf_attr *attr) | ||
82 | { | ||
83 | struct bpf_map *map; | ||
84 | |||
85 | map = kzalloc(sizeof(*map), GFP_USER); | ||
86 | if (!map) | ||
87 | return ERR_PTR(-ENOMEM); | ||
88 | |||
89 | map->key_size = attr->key_size; | ||
90 | map->value_size = attr->value_size; | ||
91 | map->max_entries = attr->max_entries; | ||
92 | return map; | ||
93 | } | ||
94 | |||
95 | static void test_map_free(struct bpf_map *map) | ||
96 | { | ||
97 | kfree(map); | ||
98 | } | ||
99 | |||
100 | static struct bpf_map_ops test_map_ops = { | ||
101 | .map_alloc = test_map_alloc, | ||
102 | .map_free = test_map_free, | ||
103 | }; | ||
104 | |||
105 | static struct bpf_map_type_list tl_map = { | ||
106 | .ops = &test_map_ops, | ||
107 | .type = BPF_MAP_TYPE_UNSPEC, | ||
108 | }; | ||
109 | |||
110 | static int __init register_test_ops(void) | 73 | static int __init register_test_ops(void) |
111 | { | 74 | { |
112 | bpf_register_map_type(&tl_map); | ||
113 | bpf_register_prog_type(&tl_prog); | 75 | bpf_register_prog_type(&tl_prog); |
114 | return 0; | 76 | return 0; |
115 | } | 77 | } |
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 801f5f3b9307..a28e09c7825d 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c | |||
@@ -153,22 +153,19 @@ struct reg_state { | |||
153 | 153 | ||
154 | enum bpf_stack_slot_type { | 154 | enum bpf_stack_slot_type { |
155 | STACK_INVALID, /* nothing was stored in this stack slot */ | 155 | STACK_INVALID, /* nothing was stored in this stack slot */ |
156 | STACK_SPILL, /* 1st byte of register spilled into stack */ | 156 | STACK_SPILL, /* register spilled into stack */ |
157 | STACK_SPILL_PART, /* other 7 bytes of register spill */ | ||
158 | STACK_MISC /* BPF program wrote some data into this slot */ | 157 | STACK_MISC /* BPF program wrote some data into this slot */ |
159 | }; | 158 | }; |
160 | 159 | ||
161 | struct bpf_stack_slot { | 160 | #define BPF_REG_SIZE 8 /* size of eBPF register in bytes */ |
162 | enum bpf_stack_slot_type stype; | ||
163 | struct reg_state reg_st; | ||
164 | }; | ||
165 | 161 | ||
166 | /* state of the program: | 162 | /* state of the program: |
167 | * type of all registers and stack info | 163 | * type of all registers and stack info |
168 | */ | 164 | */ |
169 | struct verifier_state { | 165 | struct verifier_state { |
170 | struct reg_state regs[MAX_BPF_REG]; | 166 | struct reg_state regs[MAX_BPF_REG]; |
171 | struct bpf_stack_slot stack[MAX_BPF_STACK]; | 167 | u8 stack_slot_type[MAX_BPF_STACK]; |
168 | struct reg_state spilled_regs[MAX_BPF_STACK / BPF_REG_SIZE]; | ||
172 | }; | 169 | }; |
173 | 170 | ||
174 | /* linked list of verifier states used to prune search */ | 171 | /* linked list of verifier states used to prune search */ |
@@ -259,10 +256,10 @@ static void print_verifier_state(struct verifier_env *env) | |||
259 | env->cur_state.regs[i].map_ptr->key_size, | 256 | env->cur_state.regs[i].map_ptr->key_size, |
260 | env->cur_state.regs[i].map_ptr->value_size); | 257 | env->cur_state.regs[i].map_ptr->value_size); |
261 | } | 258 | } |
262 | for (i = 0; i < MAX_BPF_STACK; i++) { | 259 | for (i = 0; i < MAX_BPF_STACK; i += BPF_REG_SIZE) { |
263 | if (env->cur_state.stack[i].stype == STACK_SPILL) | 260 | if (env->cur_state.stack_slot_type[i] == STACK_SPILL) |
264 | verbose(" fp%d=%s", -MAX_BPF_STACK + i, | 261 | verbose(" fp%d=%s", -MAX_BPF_STACK + i, |
265 | reg_type_str[env->cur_state.stack[i].reg_st.type]); | 262 | reg_type_str[env->cur_state.spilled_regs[i / BPF_REG_SIZE].type]); |
266 | } | 263 | } |
267 | verbose("\n"); | 264 | verbose("\n"); |
268 | } | 265 | } |
@@ -539,8 +536,10 @@ static int bpf_size_to_bytes(int bpf_size) | |||
539 | static int check_stack_write(struct verifier_state *state, int off, int size, | 536 | static int check_stack_write(struct verifier_state *state, int off, int size, |
540 | int value_regno) | 537 | int value_regno) |
541 | { | 538 | { |
542 | struct bpf_stack_slot *slot; | ||
543 | int i; | 539 | int i; |
540 | /* caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0, | ||
541 | * so it's aligned access and [off, off + size) are within stack limits | ||
542 | */ | ||
544 | 543 | ||
545 | if (value_regno >= 0 && | 544 | if (value_regno >= 0 && |
546 | (state->regs[value_regno].type == PTR_TO_MAP_VALUE || | 545 | (state->regs[value_regno].type == PTR_TO_MAP_VALUE || |
@@ -548,30 +547,24 @@ static int check_stack_write(struct verifier_state *state, int off, int size, | |||
548 | state->regs[value_regno].type == PTR_TO_CTX)) { | 547 | state->regs[value_regno].type == PTR_TO_CTX)) { |
549 | 548 | ||
550 | /* register containing pointer is being spilled into stack */ | 549 | /* register containing pointer is being spilled into stack */ |
551 | if (size != 8) { | 550 | if (size != BPF_REG_SIZE) { |
552 | verbose("invalid size of register spill\n"); | 551 | verbose("invalid size of register spill\n"); |
553 | return -EACCES; | 552 | return -EACCES; |
554 | } | 553 | } |
555 | 554 | ||
556 | slot = &state->stack[MAX_BPF_STACK + off]; | ||
557 | slot->stype = STACK_SPILL; | ||
558 | /* save register state */ | 555 | /* save register state */ |
559 | slot->reg_st = state->regs[value_regno]; | 556 | state->spilled_regs[(MAX_BPF_STACK + off) / BPF_REG_SIZE] = |
560 | for (i = 1; i < 8; i++) { | 557 | state->regs[value_regno]; |
561 | slot = &state->stack[MAX_BPF_STACK + off + i]; | ||
562 | slot->stype = STACK_SPILL_PART; | ||
563 | slot->reg_st.type = UNKNOWN_VALUE; | ||
564 | slot->reg_st.map_ptr = NULL; | ||
565 | } | ||
566 | } else { | ||
567 | 558 | ||
559 | for (i = 0; i < BPF_REG_SIZE; i++) | ||
560 | state->stack_slot_type[MAX_BPF_STACK + off + i] = STACK_SPILL; | ||
561 | } else { | ||
568 | /* regular write of data into stack */ | 562 | /* regular write of data into stack */ |
569 | for (i = 0; i < size; i++) { | 563 | state->spilled_regs[(MAX_BPF_STACK + off) / BPF_REG_SIZE] = |
570 | slot = &state->stack[MAX_BPF_STACK + off + i]; | 564 | (struct reg_state) {}; |
571 | slot->stype = STACK_MISC; | 565 | |
572 | slot->reg_st.type = UNKNOWN_VALUE; | 566 | for (i = 0; i < size; i++) |
573 | slot->reg_st.map_ptr = NULL; | 567 | state->stack_slot_type[MAX_BPF_STACK + off + i] = STACK_MISC; |
574 | } | ||
575 | } | 568 | } |
576 | return 0; | 569 | return 0; |
577 | } | 570 | } |
@@ -579,19 +572,18 @@ static int check_stack_write(struct verifier_state *state, int off, int size, | |||
579 | static int check_stack_read(struct verifier_state *state, int off, int size, | 572 | static int check_stack_read(struct verifier_state *state, int off, int size, |
580 | int value_regno) | 573 | int value_regno) |
581 | { | 574 | { |
575 | u8 *slot_type; | ||
582 | int i; | 576 | int i; |
583 | struct bpf_stack_slot *slot; | ||
584 | 577 | ||
585 | slot = &state->stack[MAX_BPF_STACK + off]; | 578 | slot_type = &state->stack_slot_type[MAX_BPF_STACK + off]; |
586 | 579 | ||
587 | if (slot->stype == STACK_SPILL) { | 580 | if (slot_type[0] == STACK_SPILL) { |
588 | if (size != 8) { | 581 | if (size != BPF_REG_SIZE) { |
589 | verbose("invalid size of register spill\n"); | 582 | verbose("invalid size of register spill\n"); |
590 | return -EACCES; | 583 | return -EACCES; |
591 | } | 584 | } |
592 | for (i = 1; i < 8; i++) { | 585 | for (i = 1; i < BPF_REG_SIZE; i++) { |
593 | if (state->stack[MAX_BPF_STACK + off + i].stype != | 586 | if (slot_type[i] != STACK_SPILL) { |
594 | STACK_SPILL_PART) { | ||
595 | verbose("corrupted spill memory\n"); | 587 | verbose("corrupted spill memory\n"); |
596 | return -EACCES; | 588 | return -EACCES; |
597 | } | 589 | } |
@@ -599,12 +591,12 @@ static int check_stack_read(struct verifier_state *state, int off, int size, | |||
599 | 591 | ||
600 | if (value_regno >= 0) | 592 | if (value_regno >= 0) |
601 | /* restore register state from stack */ | 593 | /* restore register state from stack */ |
602 | state->regs[value_regno] = slot->reg_st; | 594 | state->regs[value_regno] = |
595 | state->spilled_regs[(MAX_BPF_STACK + off) / BPF_REG_SIZE]; | ||
603 | return 0; | 596 | return 0; |
604 | } else { | 597 | } else { |
605 | for (i = 0; i < size; i++) { | 598 | for (i = 0; i < size; i++) { |
606 | if (state->stack[MAX_BPF_STACK + off + i].stype != | 599 | if (slot_type[i] != STACK_MISC) { |
607 | STACK_MISC) { | ||
608 | verbose("invalid read from stack off %d+%d size %d\n", | 600 | verbose("invalid read from stack off %d+%d size %d\n", |
609 | off, i, size); | 601 | off, i, size); |
610 | return -EACCES; | 602 | return -EACCES; |
@@ -747,7 +739,7 @@ static int check_stack_boundary(struct verifier_env *env, | |||
747 | } | 739 | } |
748 | 740 | ||
749 | for (i = 0; i < access_size; i++) { | 741 | for (i = 0; i < access_size; i++) { |
750 | if (state->stack[MAX_BPF_STACK + off + i].stype != STACK_MISC) { | 742 | if (state->stack_slot_type[MAX_BPF_STACK + off + i] != STACK_MISC) { |
751 | verbose("invalid indirect read from stack off %d+%d size %d\n", | 743 | verbose("invalid indirect read from stack off %d+%d size %d\n", |
752 | off, i, access_size); | 744 | off, i, access_size); |
753 | return -EACCES; | 745 | return -EACCES; |
@@ -1180,6 +1172,70 @@ static int check_ld_imm(struct verifier_env *env, struct bpf_insn *insn) | |||
1180 | return 0; | 1172 | return 0; |
1181 | } | 1173 | } |
1182 | 1174 | ||
1175 | /* verify safety of LD_ABS|LD_IND instructions: | ||
1176 | * - they can only appear in the programs where ctx == skb | ||
1177 | * - since they are wrappers of function calls, they scratch R1-R5 registers, | ||
1178 | * preserve R6-R9, and store return value into R0 | ||
1179 | * | ||
1180 | * Implicit input: | ||
1181 | * ctx == skb == R6 == CTX | ||
1182 | * | ||
1183 | * Explicit input: | ||
1184 | * SRC == any register | ||
1185 | * IMM == 32-bit immediate | ||
1186 | * | ||
1187 | * Output: | ||
1188 | * R0 - 8/16/32-bit skb data converted to cpu endianness | ||
1189 | */ | ||
1190 | static int check_ld_abs(struct verifier_env *env, struct bpf_insn *insn) | ||
1191 | { | ||
1192 | struct reg_state *regs = env->cur_state.regs; | ||
1193 | u8 mode = BPF_MODE(insn->code); | ||
1194 | struct reg_state *reg; | ||
1195 | int i, err; | ||
1196 | |||
1197 | if (env->prog->aux->prog_type != BPF_PROG_TYPE_SOCKET_FILTER) { | ||
1198 | verbose("BPF_LD_ABS|IND instructions are only allowed in socket filters\n"); | ||
1199 | return -EINVAL; | ||
1200 | } | ||
1201 | |||
1202 | if (insn->dst_reg != BPF_REG_0 || insn->off != 0 || | ||
1203 | (mode == BPF_ABS && insn->src_reg != BPF_REG_0)) { | ||
1204 | verbose("BPF_LD_ABS uses reserved fields\n"); | ||
1205 | return -EINVAL; | ||
1206 | } | ||
1207 | |||
1208 | /* check whether implicit source operand (register R6) is readable */ | ||
1209 | err = check_reg_arg(regs, BPF_REG_6, SRC_OP); | ||
1210 | if (err) | ||
1211 | return err; | ||
1212 | |||
1213 | if (regs[BPF_REG_6].type != PTR_TO_CTX) { | ||
1214 | verbose("at the time of BPF_LD_ABS|IND R6 != pointer to skb\n"); | ||
1215 | return -EINVAL; | ||
1216 | } | ||
1217 | |||
1218 | if (mode == BPF_IND) { | ||
1219 | /* check explicit source operand */ | ||
1220 | err = check_reg_arg(regs, insn->src_reg, SRC_OP); | ||
1221 | if (err) | ||
1222 | return err; | ||
1223 | } | ||
1224 | |||
1225 | /* reset caller saved regs to unreadable */ | ||
1226 | for (i = 0; i < CALLER_SAVED_REGS; i++) { | ||
1227 | reg = regs + caller_saved[i]; | ||
1228 | reg->type = NOT_INIT; | ||
1229 | reg->imm = 0; | ||
1230 | } | ||
1231 | |||
1232 | /* mark destination R0 register as readable, since it contains | ||
1233 | * the value fetched from the packet | ||
1234 | */ | ||
1235 | regs[BPF_REG_0].type = UNKNOWN_VALUE; | ||
1236 | return 0; | ||
1237 | } | ||
1238 | |||
1183 | /* non-recursive DFS pseudo code | 1239 | /* non-recursive DFS pseudo code |
1184 | * 1 procedure DFS-iterative(G,v): | 1240 | * 1 procedure DFS-iterative(G,v): |
1185 | * 2 label v as discovered | 1241 | * 2 label v as discovered |
@@ -1409,19 +1465,41 @@ static bool states_equal(struct verifier_state *old, struct verifier_state *cur) | |||
1409 | if (memcmp(&old->regs[i], &cur->regs[i], | 1465 | if (memcmp(&old->regs[i], &cur->regs[i], |
1410 | sizeof(old->regs[0])) != 0) { | 1466 | sizeof(old->regs[0])) != 0) { |
1411 | if (old->regs[i].type == NOT_INIT || | 1467 | if (old->regs[i].type == NOT_INIT || |
1412 | old->regs[i].type == UNKNOWN_VALUE) | 1468 | (old->regs[i].type == UNKNOWN_VALUE && |
1469 | cur->regs[i].type != NOT_INIT)) | ||
1413 | continue; | 1470 | continue; |
1414 | return false; | 1471 | return false; |
1415 | } | 1472 | } |
1416 | } | 1473 | } |
1417 | 1474 | ||
1418 | for (i = 0; i < MAX_BPF_STACK; i++) { | 1475 | for (i = 0; i < MAX_BPF_STACK; i++) { |
1419 | if (memcmp(&old->stack[i], &cur->stack[i], | 1476 | if (old->stack_slot_type[i] == STACK_INVALID) |
1420 | sizeof(old->stack[0])) != 0) { | 1477 | continue; |
1421 | if (old->stack[i].stype == STACK_INVALID) | 1478 | if (old->stack_slot_type[i] != cur->stack_slot_type[i]) |
1422 | continue; | 1479 | /* Ex: old explored (safe) state has STACK_SPILL in |
1480 | * this stack slot, but current has has STACK_MISC -> | ||
1481 | * this verifier states are not equivalent, | ||
1482 | * return false to continue verification of this path | ||
1483 | */ | ||
1423 | return false; | 1484 | return false; |
1424 | } | 1485 | if (i % BPF_REG_SIZE) |
1486 | continue; | ||
1487 | if (memcmp(&old->spilled_regs[i / BPF_REG_SIZE], | ||
1488 | &cur->spilled_regs[i / BPF_REG_SIZE], | ||
1489 | sizeof(old->spilled_regs[0]))) | ||
1490 | /* when explored and current stack slot types are | ||
1491 | * the same, check that stored pointers types | ||
1492 | * are the same as well. | ||
1493 | * Ex: explored safe path could have stored | ||
1494 | * (struct reg_state) {.type = PTR_TO_STACK, .imm = -8} | ||
1495 | * but current path has stored: | ||
1496 | * (struct reg_state) {.type = PTR_TO_STACK, .imm = -16} | ||
1497 | * such verifier states are not equivalent. | ||
1498 | * return false to continue verification of this path | ||
1499 | */ | ||
1500 | return false; | ||
1501 | else | ||
1502 | continue; | ||
1425 | } | 1503 | } |
1426 | return true; | 1504 | return true; |
1427 | } | 1505 | } |
@@ -1663,8 +1741,10 @@ process_bpf_exit: | |||
1663 | u8 mode = BPF_MODE(insn->code); | 1741 | u8 mode = BPF_MODE(insn->code); |
1664 | 1742 | ||
1665 | if (mode == BPF_ABS || mode == BPF_IND) { | 1743 | if (mode == BPF_ABS || mode == BPF_IND) { |
1666 | verbose("LD_ABS is not supported yet\n"); | 1744 | err = check_ld_abs(env, insn); |
1667 | return -EINVAL; | 1745 | if (err) |
1746 | return err; | ||
1747 | |||
1668 | } else if (mode == BPF_IMM) { | 1748 | } else if (mode == BPF_IMM) { |
1669 | err = check_ld_imm(env, insn); | 1749 | err = check_ld_imm(env, insn); |
1670 | if (err) | 1750 | if (err) |
diff --git a/kernel/context_tracking.c b/kernel/context_tracking.c index 5664985c46a0..937ecdfdf258 100644 --- a/kernel/context_tracking.c +++ b/kernel/context_tracking.c | |||
@@ -107,46 +107,6 @@ void context_tracking_user_enter(void) | |||
107 | } | 107 | } |
108 | NOKPROBE_SYMBOL(context_tracking_user_enter); | 108 | NOKPROBE_SYMBOL(context_tracking_user_enter); |
109 | 109 | ||
110 | #ifdef CONFIG_PREEMPT | ||
111 | /** | ||
112 | * preempt_schedule_context - preempt_schedule called by tracing | ||
113 | * | ||
114 | * The tracing infrastructure uses preempt_enable_notrace to prevent | ||
115 | * recursion and tracing preempt enabling caused by the tracing | ||
116 | * infrastructure itself. But as tracing can happen in areas coming | ||
117 | * from userspace or just about to enter userspace, a preempt enable | ||
118 | * can occur before user_exit() is called. This will cause the scheduler | ||
119 | * to be called when the system is still in usermode. | ||
120 | * | ||
121 | * To prevent this, the preempt_enable_notrace will use this function | ||
122 | * instead of preempt_schedule() to exit user context if needed before | ||
123 | * calling the scheduler. | ||
124 | */ | ||
125 | asmlinkage __visible void __sched notrace preempt_schedule_context(void) | ||
126 | { | ||
127 | enum ctx_state prev_ctx; | ||
128 | |||
129 | if (likely(!preemptible())) | ||
130 | return; | ||
131 | |||
132 | /* | ||
133 | * Need to disable preemption in case user_exit() is traced | ||
134 | * and the tracer calls preempt_enable_notrace() causing | ||
135 | * an infinite recursion. | ||
136 | */ | ||
137 | preempt_disable_notrace(); | ||
138 | prev_ctx = exception_enter(); | ||
139 | preempt_enable_no_resched_notrace(); | ||
140 | |||
141 | preempt_schedule(); | ||
142 | |||
143 | preempt_disable_notrace(); | ||
144 | exception_exit(prev_ctx); | ||
145 | preempt_enable_notrace(); | ||
146 | } | ||
147 | EXPORT_SYMBOL_GPL(preempt_schedule_context); | ||
148 | #endif /* CONFIG_PREEMPT */ | ||
149 | |||
150 | /** | 110 | /** |
151 | * context_tracking_user_exit - Inform the context tracking that the CPU is | 111 | * context_tracking_user_exit - Inform the context tracking that the CPU is |
152 | * exiting userspace mode and entering the kernel. | 112 | * exiting userspace mode and entering the kernel. |
diff --git a/kernel/cpu.c b/kernel/cpu.c index 356450f09c1f..5d220234b3ca 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c | |||
@@ -64,6 +64,8 @@ static struct { | |||
64 | * an ongoing cpu hotplug operation. | 64 | * an ongoing cpu hotplug operation. |
65 | */ | 65 | */ |
66 | int refcount; | 66 | int refcount; |
67 | /* And allows lockless put_online_cpus(). */ | ||
68 | atomic_t puts_pending; | ||
67 | 69 | ||
68 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | 70 | #ifdef CONFIG_DEBUG_LOCK_ALLOC |
69 | struct lockdep_map dep_map; | 71 | struct lockdep_map dep_map; |
@@ -84,6 +86,16 @@ static struct { | |||
84 | #define cpuhp_lock_acquire() lock_map_acquire(&cpu_hotplug.dep_map) | 86 | #define cpuhp_lock_acquire() lock_map_acquire(&cpu_hotplug.dep_map) |
85 | #define cpuhp_lock_release() lock_map_release(&cpu_hotplug.dep_map) | 87 | #define cpuhp_lock_release() lock_map_release(&cpu_hotplug.dep_map) |
86 | 88 | ||
89 | static void apply_puts_pending(int max) | ||
90 | { | ||
91 | int delta; | ||
92 | |||
93 | if (atomic_read(&cpu_hotplug.puts_pending) >= max) { | ||
94 | delta = atomic_xchg(&cpu_hotplug.puts_pending, 0); | ||
95 | cpu_hotplug.refcount -= delta; | ||
96 | } | ||
97 | } | ||
98 | |||
87 | void get_online_cpus(void) | 99 | void get_online_cpus(void) |
88 | { | 100 | { |
89 | might_sleep(); | 101 | might_sleep(); |
@@ -91,6 +103,7 @@ void get_online_cpus(void) | |||
91 | return; | 103 | return; |
92 | cpuhp_lock_acquire_read(); | 104 | cpuhp_lock_acquire_read(); |
93 | mutex_lock(&cpu_hotplug.lock); | 105 | mutex_lock(&cpu_hotplug.lock); |
106 | apply_puts_pending(65536); | ||
94 | cpu_hotplug.refcount++; | 107 | cpu_hotplug.refcount++; |
95 | mutex_unlock(&cpu_hotplug.lock); | 108 | mutex_unlock(&cpu_hotplug.lock); |
96 | } | 109 | } |
@@ -103,6 +116,7 @@ bool try_get_online_cpus(void) | |||
103 | if (!mutex_trylock(&cpu_hotplug.lock)) | 116 | if (!mutex_trylock(&cpu_hotplug.lock)) |
104 | return false; | 117 | return false; |
105 | cpuhp_lock_acquire_tryread(); | 118 | cpuhp_lock_acquire_tryread(); |
119 | apply_puts_pending(65536); | ||
106 | cpu_hotplug.refcount++; | 120 | cpu_hotplug.refcount++; |
107 | mutex_unlock(&cpu_hotplug.lock); | 121 | mutex_unlock(&cpu_hotplug.lock); |
108 | return true; | 122 | return true; |
@@ -113,7 +127,11 @@ void put_online_cpus(void) | |||
113 | { | 127 | { |
114 | if (cpu_hotplug.active_writer == current) | 128 | if (cpu_hotplug.active_writer == current) |
115 | return; | 129 | return; |
116 | mutex_lock(&cpu_hotplug.lock); | 130 | if (!mutex_trylock(&cpu_hotplug.lock)) { |
131 | atomic_inc(&cpu_hotplug.puts_pending); | ||
132 | cpuhp_lock_release(); | ||
133 | return; | ||
134 | } | ||
117 | 135 | ||
118 | if (WARN_ON(!cpu_hotplug.refcount)) | 136 | if (WARN_ON(!cpu_hotplug.refcount)) |
119 | cpu_hotplug.refcount++; /* try to fix things up */ | 137 | cpu_hotplug.refcount++; /* try to fix things up */ |
@@ -155,6 +173,7 @@ void cpu_hotplug_begin(void) | |||
155 | cpuhp_lock_acquire(); | 173 | cpuhp_lock_acquire(); |
156 | for (;;) { | 174 | for (;;) { |
157 | mutex_lock(&cpu_hotplug.lock); | 175 | mutex_lock(&cpu_hotplug.lock); |
176 | apply_puts_pending(1); | ||
158 | if (likely(!cpu_hotplug.refcount)) | 177 | if (likely(!cpu_hotplug.refcount)) |
159 | break; | 178 | break; |
160 | __set_current_state(TASK_UNINTERRUPTIBLE); | 179 | __set_current_state(TASK_UNINTERRUPTIBLE); |
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 1f107c74087b..723cfc9d0ad7 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
@@ -506,6 +506,16 @@ static int validate_change(struct cpuset *cur, struct cpuset *trial) | |||
506 | goto out; | 506 | goto out; |
507 | } | 507 | } |
508 | 508 | ||
509 | /* | ||
510 | * We can't shrink if we won't have enough room for SCHED_DEADLINE | ||
511 | * tasks. | ||
512 | */ | ||
513 | ret = -EBUSY; | ||
514 | if (is_cpu_exclusive(cur) && | ||
515 | !cpuset_cpumask_can_shrink(cur->cpus_allowed, | ||
516 | trial->cpus_allowed)) | ||
517 | goto out; | ||
518 | |||
509 | ret = 0; | 519 | ret = 0; |
510 | out: | 520 | out: |
511 | rcu_read_unlock(); | 521 | rcu_read_unlock(); |
@@ -1429,17 +1439,8 @@ static int cpuset_can_attach(struct cgroup_subsys_state *css, | |||
1429 | goto out_unlock; | 1439 | goto out_unlock; |
1430 | 1440 | ||
1431 | cgroup_taskset_for_each(task, tset) { | 1441 | cgroup_taskset_for_each(task, tset) { |
1432 | /* | 1442 | ret = task_can_attach(task, cs->cpus_allowed); |
1433 | * Kthreads which disallow setaffinity shouldn't be moved | 1443 | if (ret) |
1434 | * to a new cpuset; we don't want to change their cpu | ||
1435 | * affinity and isolating such threads by their set of | ||
1436 | * allowed nodes is unnecessary. Thus, cpusets are not | ||
1437 | * applicable for such threads. This prevents checking for | ||
1438 | * success of set_cpus_allowed_ptr() on all attached tasks | ||
1439 | * before cpus_allowed may be changed. | ||
1440 | */ | ||
1441 | ret = -EINVAL; | ||
1442 | if (task->flags & PF_NO_SETAFFINITY) | ||
1443 | goto out_unlock; | 1444 | goto out_unlock; |
1444 | ret = security_task_setscheduler(task); | 1445 | ret = security_task_setscheduler(task); |
1445 | if (ret) | 1446 | if (ret) |
diff --git a/kernel/events/core.c b/kernel/events/core.c index 1425d07018de..113b837470cd 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c | |||
@@ -614,7 +614,7 @@ static inline int perf_cgroup_connect(int fd, struct perf_event *event, | |||
614 | if (!f.file) | 614 | if (!f.file) |
615 | return -EBADF; | 615 | return -EBADF; |
616 | 616 | ||
617 | css = css_tryget_online_from_dir(f.file->f_dentry, | 617 | css = css_tryget_online_from_dir(f.file->f_path.dentry, |
618 | &perf_event_cgrp_subsys); | 618 | &perf_event_cgrp_subsys); |
619 | if (IS_ERR(css)) { | 619 | if (IS_ERR(css)) { |
620 | ret = PTR_ERR(css); | 620 | ret = PTR_ERR(css); |
@@ -1562,8 +1562,10 @@ static void perf_remove_from_context(struct perf_event *event, bool detach_group | |||
1562 | 1562 | ||
1563 | if (!task) { | 1563 | if (!task) { |
1564 | /* | 1564 | /* |
1565 | * Per cpu events are removed via an smp call and | 1565 | * Per cpu events are removed via an smp call. The removal can |
1566 | * the removal is always successful. | 1566 | * fail if the CPU is currently offline, but in that case we |
1567 | * already called __perf_remove_from_context from | ||
1568 | * perf_event_exit_cpu. | ||
1567 | */ | 1569 | */ |
1568 | cpu_function_call(event->cpu, __perf_remove_from_context, &re); | 1570 | cpu_function_call(event->cpu, __perf_remove_from_context, &re); |
1569 | return; | 1571 | return; |
@@ -4458,7 +4460,7 @@ perf_output_sample_regs(struct perf_output_handle *handle, | |||
4458 | } | 4460 | } |
4459 | } | 4461 | } |
4460 | 4462 | ||
4461 | static void perf_sample_regs_user(struct perf_regs_user *regs_user, | 4463 | static void perf_sample_regs_user(struct perf_regs *regs_user, |
4462 | struct pt_regs *regs) | 4464 | struct pt_regs *regs) |
4463 | { | 4465 | { |
4464 | if (!user_mode(regs)) { | 4466 | if (!user_mode(regs)) { |
@@ -4469,11 +4471,22 @@ static void perf_sample_regs_user(struct perf_regs_user *regs_user, | |||
4469 | } | 4471 | } |
4470 | 4472 | ||
4471 | if (regs) { | 4473 | if (regs) { |
4472 | regs_user->regs = regs; | ||
4473 | regs_user->abi = perf_reg_abi(current); | 4474 | regs_user->abi = perf_reg_abi(current); |
4475 | regs_user->regs = regs; | ||
4476 | } else { | ||
4477 | regs_user->abi = PERF_SAMPLE_REGS_ABI_NONE; | ||
4478 | regs_user->regs = NULL; | ||
4474 | } | 4479 | } |
4475 | } | 4480 | } |
4476 | 4481 | ||
4482 | static void perf_sample_regs_intr(struct perf_regs *regs_intr, | ||
4483 | struct pt_regs *regs) | ||
4484 | { | ||
4485 | regs_intr->regs = regs; | ||
4486 | regs_intr->abi = perf_reg_abi(current); | ||
4487 | } | ||
4488 | |||
4489 | |||
4477 | /* | 4490 | /* |
4478 | * Get remaining task size from user stack pointer. | 4491 | * Get remaining task size from user stack pointer. |
4479 | * | 4492 | * |
@@ -4855,6 +4868,23 @@ void perf_output_sample(struct perf_output_handle *handle, | |||
4855 | if (sample_type & PERF_SAMPLE_TRANSACTION) | 4868 | if (sample_type & PERF_SAMPLE_TRANSACTION) |
4856 | perf_output_put(handle, data->txn); | 4869 | perf_output_put(handle, data->txn); |
4857 | 4870 | ||
4871 | if (sample_type & PERF_SAMPLE_REGS_INTR) { | ||
4872 | u64 abi = data->regs_intr.abi; | ||
4873 | /* | ||
4874 | * If there are no regs to dump, notice it through | ||
4875 | * first u64 being zero (PERF_SAMPLE_REGS_ABI_NONE). | ||
4876 | */ | ||
4877 | perf_output_put(handle, abi); | ||
4878 | |||
4879 | if (abi) { | ||
4880 | u64 mask = event->attr.sample_regs_intr; | ||
4881 | |||
4882 | perf_output_sample_regs(handle, | ||
4883 | data->regs_intr.regs, | ||
4884 | mask); | ||
4885 | } | ||
4886 | } | ||
4887 | |||
4858 | if (!event->attr.watermark) { | 4888 | if (!event->attr.watermark) { |
4859 | int wakeup_events = event->attr.wakeup_events; | 4889 | int wakeup_events = event->attr.wakeup_events; |
4860 | 4890 | ||
@@ -4920,12 +4950,13 @@ void perf_prepare_sample(struct perf_event_header *header, | |||
4920 | header->size += size; | 4950 | header->size += size; |
4921 | } | 4951 | } |
4922 | 4952 | ||
4953 | if (sample_type & (PERF_SAMPLE_REGS_USER | PERF_SAMPLE_STACK_USER)) | ||
4954 | perf_sample_regs_user(&data->regs_user, regs); | ||
4955 | |||
4923 | if (sample_type & PERF_SAMPLE_REGS_USER) { | 4956 | if (sample_type & PERF_SAMPLE_REGS_USER) { |
4924 | /* regs dump ABI info */ | 4957 | /* regs dump ABI info */ |
4925 | int size = sizeof(u64); | 4958 | int size = sizeof(u64); |
4926 | 4959 | ||
4927 | perf_sample_regs_user(&data->regs_user, regs); | ||
4928 | |||
4929 | if (data->regs_user.regs) { | 4960 | if (data->regs_user.regs) { |
4930 | u64 mask = event->attr.sample_regs_user; | 4961 | u64 mask = event->attr.sample_regs_user; |
4931 | size += hweight64(mask) * sizeof(u64); | 4962 | size += hweight64(mask) * sizeof(u64); |
@@ -4941,15 +4972,11 @@ void perf_prepare_sample(struct perf_event_header *header, | |||
4941 | * in case new sample type is added, because we could eat | 4972 | * in case new sample type is added, because we could eat |
4942 | * up the rest of the sample size. | 4973 | * up the rest of the sample size. |
4943 | */ | 4974 | */ |
4944 | struct perf_regs_user *uregs = &data->regs_user; | ||
4945 | u16 stack_size = event->attr.sample_stack_user; | 4975 | u16 stack_size = event->attr.sample_stack_user; |
4946 | u16 size = sizeof(u64); | 4976 | u16 size = sizeof(u64); |
4947 | 4977 | ||
4948 | if (!uregs->abi) | ||
4949 | perf_sample_regs_user(uregs, regs); | ||
4950 | |||
4951 | stack_size = perf_sample_ustack_size(stack_size, header->size, | 4978 | stack_size = perf_sample_ustack_size(stack_size, header->size, |
4952 | uregs->regs); | 4979 | data->regs_user.regs); |
4953 | 4980 | ||
4954 | /* | 4981 | /* |
4955 | * If there is something to dump, add space for the dump | 4982 | * If there is something to dump, add space for the dump |
@@ -4962,6 +4989,21 @@ void perf_prepare_sample(struct perf_event_header *header, | |||
4962 | data->stack_user_size = stack_size; | 4989 | data->stack_user_size = stack_size; |
4963 | header->size += size; | 4990 | header->size += size; |
4964 | } | 4991 | } |
4992 | |||
4993 | if (sample_type & PERF_SAMPLE_REGS_INTR) { | ||
4994 | /* regs dump ABI info */ | ||
4995 | int size = sizeof(u64); | ||
4996 | |||
4997 | perf_sample_regs_intr(&data->regs_intr, regs); | ||
4998 | |||
4999 | if (data->regs_intr.regs) { | ||
5000 | u64 mask = event->attr.sample_regs_intr; | ||
5001 | |||
5002 | size += hweight64(mask) * sizeof(u64); | ||
5003 | } | ||
5004 | |||
5005 | header->size += size; | ||
5006 | } | ||
4965 | } | 5007 | } |
4966 | 5008 | ||
4967 | static void perf_event_output(struct perf_event *event, | 5009 | static void perf_event_output(struct perf_event *event, |
@@ -6071,11 +6113,6 @@ static int perf_swevent_init(struct perf_event *event) | |||
6071 | return 0; | 6113 | return 0; |
6072 | } | 6114 | } |
6073 | 6115 | ||
6074 | static int perf_swevent_event_idx(struct perf_event *event) | ||
6075 | { | ||
6076 | return 0; | ||
6077 | } | ||
6078 | |||
6079 | static struct pmu perf_swevent = { | 6116 | static struct pmu perf_swevent = { |
6080 | .task_ctx_nr = perf_sw_context, | 6117 | .task_ctx_nr = perf_sw_context, |
6081 | 6118 | ||
@@ -6085,8 +6122,6 @@ static struct pmu perf_swevent = { | |||
6085 | .start = perf_swevent_start, | 6122 | .start = perf_swevent_start, |
6086 | .stop = perf_swevent_stop, | 6123 | .stop = perf_swevent_stop, |
6087 | .read = perf_swevent_read, | 6124 | .read = perf_swevent_read, |
6088 | |||
6089 | .event_idx = perf_swevent_event_idx, | ||
6090 | }; | 6125 | }; |
6091 | 6126 | ||
6092 | #ifdef CONFIG_EVENT_TRACING | 6127 | #ifdef CONFIG_EVENT_TRACING |
@@ -6204,8 +6239,6 @@ static struct pmu perf_tracepoint = { | |||
6204 | .start = perf_swevent_start, | 6239 | .start = perf_swevent_start, |
6205 | .stop = perf_swevent_stop, | 6240 | .stop = perf_swevent_stop, |
6206 | .read = perf_swevent_read, | 6241 | .read = perf_swevent_read, |
6207 | |||
6208 | .event_idx = perf_swevent_event_idx, | ||
6209 | }; | 6242 | }; |
6210 | 6243 | ||
6211 | static inline void perf_tp_register(void) | 6244 | static inline void perf_tp_register(void) |
@@ -6431,8 +6464,6 @@ static struct pmu perf_cpu_clock = { | |||
6431 | .start = cpu_clock_event_start, | 6464 | .start = cpu_clock_event_start, |
6432 | .stop = cpu_clock_event_stop, | 6465 | .stop = cpu_clock_event_stop, |
6433 | .read = cpu_clock_event_read, | 6466 | .read = cpu_clock_event_read, |
6434 | |||
6435 | .event_idx = perf_swevent_event_idx, | ||
6436 | }; | 6467 | }; |
6437 | 6468 | ||
6438 | /* | 6469 | /* |
@@ -6511,8 +6542,6 @@ static struct pmu perf_task_clock = { | |||
6511 | .start = task_clock_event_start, | 6542 | .start = task_clock_event_start, |
6512 | .stop = task_clock_event_stop, | 6543 | .stop = task_clock_event_stop, |
6513 | .read = task_clock_event_read, | 6544 | .read = task_clock_event_read, |
6514 | |||
6515 | .event_idx = perf_swevent_event_idx, | ||
6516 | }; | 6545 | }; |
6517 | 6546 | ||
6518 | static void perf_pmu_nop_void(struct pmu *pmu) | 6547 | static void perf_pmu_nop_void(struct pmu *pmu) |
@@ -6542,7 +6571,7 @@ static void perf_pmu_cancel_txn(struct pmu *pmu) | |||
6542 | 6571 | ||
6543 | static int perf_event_idx_default(struct perf_event *event) | 6572 | static int perf_event_idx_default(struct perf_event *event) |
6544 | { | 6573 | { |
6545 | return event->hw.idx + 1; | 6574 | return 0; |
6546 | } | 6575 | } |
6547 | 6576 | ||
6548 | /* | 6577 | /* |
@@ -7162,6 +7191,8 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr, | |||
7162 | ret = -EINVAL; | 7191 | ret = -EINVAL; |
7163 | } | 7192 | } |
7164 | 7193 | ||
7194 | if (attr->sample_type & PERF_SAMPLE_REGS_INTR) | ||
7195 | ret = perf_reg_validate(attr->sample_regs_intr); | ||
7165 | out: | 7196 | out: |
7166 | return ret; | 7197 | return ret; |
7167 | 7198 | ||
@@ -8130,7 +8161,7 @@ static void perf_pmu_rotate_stop(struct pmu *pmu) | |||
8130 | 8161 | ||
8131 | static void __perf_event_exit_context(void *__info) | 8162 | static void __perf_event_exit_context(void *__info) |
8132 | { | 8163 | { |
8133 | struct remove_event re = { .detach_group = false }; | 8164 | struct remove_event re = { .detach_group = true }; |
8134 | struct perf_event_context *ctx = __info; | 8165 | struct perf_event_context *ctx = __info; |
8135 | 8166 | ||
8136 | perf_pmu_rotate_stop(ctx->pmu); | 8167 | perf_pmu_rotate_stop(ctx->pmu); |
diff --git a/kernel/events/hw_breakpoint.c b/kernel/events/hw_breakpoint.c index 1559fb0b9296..9803a6600d49 100644 --- a/kernel/events/hw_breakpoint.c +++ b/kernel/events/hw_breakpoint.c | |||
@@ -605,11 +605,6 @@ static void hw_breakpoint_stop(struct perf_event *bp, int flags) | |||
605 | bp->hw.state = PERF_HES_STOPPED; | 605 | bp->hw.state = PERF_HES_STOPPED; |
606 | } | 606 | } |
607 | 607 | ||
608 | static int hw_breakpoint_event_idx(struct perf_event *bp) | ||
609 | { | ||
610 | return 0; | ||
611 | } | ||
612 | |||
613 | static struct pmu perf_breakpoint = { | 608 | static struct pmu perf_breakpoint = { |
614 | .task_ctx_nr = perf_sw_context, /* could eventually get its own */ | 609 | .task_ctx_nr = perf_sw_context, /* could eventually get its own */ |
615 | 610 | ||
@@ -619,8 +614,6 @@ static struct pmu perf_breakpoint = { | |||
619 | .start = hw_breakpoint_start, | 614 | .start = hw_breakpoint_start, |
620 | .stop = hw_breakpoint_stop, | 615 | .stop = hw_breakpoint_stop, |
621 | .read = hw_breakpoint_pmu_read, | 616 | .read = hw_breakpoint_pmu_read, |
622 | |||
623 | .event_idx = hw_breakpoint_event_idx, | ||
624 | }; | 617 | }; |
625 | 618 | ||
626 | int __init init_hw_breakpoint(void) | 619 | int __init init_hw_breakpoint(void) |
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 1d0af8a2c646..ed8f2cde34c5 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c | |||
@@ -1640,7 +1640,6 @@ bool uprobe_deny_signal(void) | |||
1640 | if (__fatal_signal_pending(t) || arch_uprobe_xol_was_trapped(t)) { | 1640 | if (__fatal_signal_pending(t) || arch_uprobe_xol_was_trapped(t)) { |
1641 | utask->state = UTASK_SSTEP_TRAPPED; | 1641 | utask->state = UTASK_SSTEP_TRAPPED; |
1642 | set_tsk_thread_flag(t, TIF_UPROBE); | 1642 | set_tsk_thread_flag(t, TIF_UPROBE); |
1643 | set_tsk_thread_flag(t, TIF_NOTIFY_RESUME); | ||
1644 | } | 1643 | } |
1645 | } | 1644 | } |
1646 | 1645 | ||
diff --git a/kernel/exit.c b/kernel/exit.c index 5d30019ff953..8714e5ded8b4 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
@@ -118,13 +118,10 @@ static void __exit_signal(struct task_struct *tsk) | |||
118 | } | 118 | } |
119 | 119 | ||
120 | /* | 120 | /* |
121 | * Accumulate here the counters for all threads but the group leader | 121 | * Accumulate here the counters for all threads as they die. We could |
122 | * as they die, so they can be added into the process-wide totals | 122 | * skip the group leader because it is the last user of signal_struct, |
123 | * when those are taken. The group leader stays around as a zombie as | 123 | * but we want to avoid the race with thread_group_cputime() which can |
124 | * long as there are other threads. When it gets reaped, the exit.c | 124 | * see the empty ->thread_head list. |
125 | * code will add its counts into these totals. We won't ever get here | ||
126 | * for the group leader, since it will have been the last reference on | ||
127 | * the signal_struct. | ||
128 | */ | 125 | */ |
129 | task_cputime(tsk, &utime, &stime); | 126 | task_cputime(tsk, &utime, &stime); |
130 | write_seqlock(&sig->stats_lock); | 127 | write_seqlock(&sig->stats_lock); |
@@ -462,6 +459,44 @@ static void exit_mm(struct task_struct *tsk) | |||
462 | clear_thread_flag(TIF_MEMDIE); | 459 | clear_thread_flag(TIF_MEMDIE); |
463 | } | 460 | } |
464 | 461 | ||
462 | static struct task_struct *find_alive_thread(struct task_struct *p) | ||
463 | { | ||
464 | struct task_struct *t; | ||
465 | |||
466 | for_each_thread(p, t) { | ||
467 | if (!(t->flags & PF_EXITING)) | ||
468 | return t; | ||
469 | } | ||
470 | return NULL; | ||
471 | } | ||
472 | |||
473 | static struct task_struct *find_child_reaper(struct task_struct *father) | ||
474 | __releases(&tasklist_lock) | ||
475 | __acquires(&tasklist_lock) | ||
476 | { | ||
477 | struct pid_namespace *pid_ns = task_active_pid_ns(father); | ||
478 | struct task_struct *reaper = pid_ns->child_reaper; | ||
479 | |||
480 | if (likely(reaper != father)) | ||
481 | return reaper; | ||
482 | |||
483 | reaper = find_alive_thread(father); | ||
484 | if (reaper) { | ||
485 | pid_ns->child_reaper = reaper; | ||
486 | return reaper; | ||
487 | } | ||
488 | |||
489 | write_unlock_irq(&tasklist_lock); | ||
490 | if (unlikely(pid_ns == &init_pid_ns)) { | ||
491 | panic("Attempted to kill init! exitcode=0x%08x\n", | ||
492 | father->signal->group_exit_code ?: father->exit_code); | ||
493 | } | ||
494 | zap_pid_ns_processes(pid_ns); | ||
495 | write_lock_irq(&tasklist_lock); | ||
496 | |||
497 | return father; | ||
498 | } | ||
499 | |||
465 | /* | 500 | /* |
466 | * When we die, we re-parent all our children, and try to: | 501 | * When we die, we re-parent all our children, and try to: |
467 | * 1. give them to another thread in our thread group, if such a member exists | 502 | * 1. give them to another thread in our thread group, if such a member exists |
@@ -469,58 +504,36 @@ static void exit_mm(struct task_struct *tsk) | |||
469 | * child_subreaper for its children (like a service manager) | 504 | * child_subreaper for its children (like a service manager) |
470 | * 3. give it to the init process (PID 1) in our pid namespace | 505 | * 3. give it to the init process (PID 1) in our pid namespace |
471 | */ | 506 | */ |
472 | static struct task_struct *find_new_reaper(struct task_struct *father) | 507 | static struct task_struct *find_new_reaper(struct task_struct *father, |
473 | __releases(&tasklist_lock) | 508 | struct task_struct *child_reaper) |
474 | __acquires(&tasklist_lock) | ||
475 | { | 509 | { |
476 | struct pid_namespace *pid_ns = task_active_pid_ns(father); | 510 | struct task_struct *thread, *reaper; |
477 | struct task_struct *thread; | ||
478 | 511 | ||
479 | thread = father; | 512 | thread = find_alive_thread(father); |
480 | while_each_thread(father, thread) { | 513 | if (thread) |
481 | if (thread->flags & PF_EXITING) | ||
482 | continue; | ||
483 | if (unlikely(pid_ns->child_reaper == father)) | ||
484 | pid_ns->child_reaper = thread; | ||
485 | return thread; | 514 | return thread; |
486 | } | ||
487 | |||
488 | if (unlikely(pid_ns->child_reaper == father)) { | ||
489 | write_unlock_irq(&tasklist_lock); | ||
490 | if (unlikely(pid_ns == &init_pid_ns)) { | ||
491 | panic("Attempted to kill init! exitcode=0x%08x\n", | ||
492 | father->signal->group_exit_code ?: | ||
493 | father->exit_code); | ||
494 | } | ||
495 | |||
496 | zap_pid_ns_processes(pid_ns); | ||
497 | write_lock_irq(&tasklist_lock); | ||
498 | } else if (father->signal->has_child_subreaper) { | ||
499 | struct task_struct *reaper; | ||
500 | 515 | ||
516 | if (father->signal->has_child_subreaper) { | ||
501 | /* | 517 | /* |
502 | * Find the first ancestor marked as child_subreaper. | 518 | * Find the first ->is_child_subreaper ancestor in our pid_ns. |
503 | * Note that the code below checks same_thread_group(reaper, | 519 | * We start from father to ensure we can not look into another |
504 | * pid_ns->child_reaper). This is what we need to DTRT in a | 520 | * namespace, this is safe because all its threads are dead. |
505 | * PID namespace. However we still need the check above, see | ||
506 | * http://marc.info/?l=linux-kernel&m=131385460420380 | ||
507 | */ | 521 | */ |
508 | for (reaper = father->real_parent; | 522 | for (reaper = father; |
509 | reaper != &init_task; | 523 | !same_thread_group(reaper, child_reaper); |
510 | reaper = reaper->real_parent) { | 524 | reaper = reaper->real_parent) { |
511 | if (same_thread_group(reaper, pid_ns->child_reaper)) | 525 | /* call_usermodehelper() descendants need this check */ |
526 | if (reaper == &init_task) | ||
512 | break; | 527 | break; |
513 | if (!reaper->signal->is_child_subreaper) | 528 | if (!reaper->signal->is_child_subreaper) |
514 | continue; | 529 | continue; |
515 | thread = reaper; | 530 | thread = find_alive_thread(reaper); |
516 | do { | 531 | if (thread) |
517 | if (!(thread->flags & PF_EXITING)) | 532 | return thread; |
518 | return reaper; | ||
519 | } while_each_thread(reaper, thread); | ||
520 | } | 533 | } |
521 | } | 534 | } |
522 | 535 | ||
523 | return pid_ns->child_reaper; | 536 | return child_reaper; |
524 | } | 537 | } |
525 | 538 | ||
526 | /* | 539 | /* |
@@ -529,15 +542,7 @@ static struct task_struct *find_new_reaper(struct task_struct *father) | |||
529 | static void reparent_leader(struct task_struct *father, struct task_struct *p, | 542 | static void reparent_leader(struct task_struct *father, struct task_struct *p, |
530 | struct list_head *dead) | 543 | struct list_head *dead) |
531 | { | 544 | { |
532 | list_move_tail(&p->sibling, &p->real_parent->children); | 545 | if (unlikely(p->exit_state == EXIT_DEAD)) |
533 | |||
534 | if (p->exit_state == EXIT_DEAD) | ||
535 | return; | ||
536 | /* | ||
537 | * If this is a threaded reparent there is no need to | ||
538 | * notify anyone anything has happened. | ||
539 | */ | ||
540 | if (same_thread_group(p->real_parent, father)) | ||
541 | return; | 546 | return; |
542 | 547 | ||
543 | /* We don't want people slaying init. */ | 548 | /* We don't want people slaying init. */ |
@@ -548,49 +553,53 @@ static void reparent_leader(struct task_struct *father, struct task_struct *p, | |||
548 | p->exit_state == EXIT_ZOMBIE && thread_group_empty(p)) { | 553 | p->exit_state == EXIT_ZOMBIE && thread_group_empty(p)) { |
549 | if (do_notify_parent(p, p->exit_signal)) { | 554 | if (do_notify_parent(p, p->exit_signal)) { |
550 | p->exit_state = EXIT_DEAD; | 555 | p->exit_state = EXIT_DEAD; |
551 | list_move_tail(&p->sibling, dead); | 556 | list_add(&p->ptrace_entry, dead); |
552 | } | 557 | } |
553 | } | 558 | } |
554 | 559 | ||
555 | kill_orphaned_pgrp(p, father); | 560 | kill_orphaned_pgrp(p, father); |
556 | } | 561 | } |
557 | 562 | ||
558 | static void forget_original_parent(struct task_struct *father) | 563 | /* |
564 | * This does two things: | ||
565 | * | ||
566 | * A. Make init inherit all the child processes | ||
567 | * B. Check to see if any process groups have become orphaned | ||
568 | * as a result of our exiting, and if they have any stopped | ||
569 | * jobs, send them a SIGHUP and then a SIGCONT. (POSIX 3.2.2.2) | ||
570 | */ | ||
571 | static void forget_original_parent(struct task_struct *father, | ||
572 | struct list_head *dead) | ||
559 | { | 573 | { |
560 | struct task_struct *p, *n, *reaper; | 574 | struct task_struct *p, *t, *reaper; |
561 | LIST_HEAD(dead_children); | ||
562 | 575 | ||
563 | write_lock_irq(&tasklist_lock); | 576 | if (unlikely(!list_empty(&father->ptraced))) |
564 | /* | 577 | exit_ptrace(father, dead); |
565 | * Note that exit_ptrace() and find_new_reaper() might | ||
566 | * drop tasklist_lock and reacquire it. | ||
567 | */ | ||
568 | exit_ptrace(father); | ||
569 | reaper = find_new_reaper(father); | ||
570 | 578 | ||
571 | list_for_each_entry_safe(p, n, &father->children, sibling) { | 579 | /* Can drop and reacquire tasklist_lock */ |
572 | struct task_struct *t = p; | 580 | reaper = find_child_reaper(father); |
581 | if (list_empty(&father->children)) | ||
582 | return; | ||
573 | 583 | ||
574 | do { | 584 | reaper = find_new_reaper(father, reaper); |
585 | list_for_each_entry(p, &father->children, sibling) { | ||
586 | for_each_thread(p, t) { | ||
575 | t->real_parent = reaper; | 587 | t->real_parent = reaper; |
576 | if (t->parent == father) { | 588 | BUG_ON((!t->ptrace) != (t->parent == father)); |
577 | BUG_ON(t->ptrace); | 589 | if (likely(!t->ptrace)) |
578 | t->parent = t->real_parent; | 590 | t->parent = t->real_parent; |
579 | } | ||
580 | if (t->pdeath_signal) | 591 | if (t->pdeath_signal) |
581 | group_send_sig_info(t->pdeath_signal, | 592 | group_send_sig_info(t->pdeath_signal, |
582 | SEND_SIG_NOINFO, t); | 593 | SEND_SIG_NOINFO, t); |
583 | } while_each_thread(p, t); | 594 | } |
584 | reparent_leader(father, p, &dead_children); | 595 | /* |
585 | } | 596 | * If this is a threaded reparent there is no need to |
586 | write_unlock_irq(&tasklist_lock); | 597 | * notify anyone anything has happened. |
587 | 598 | */ | |
588 | BUG_ON(!list_empty(&father->children)); | 599 | if (!same_thread_group(reaper, father)) |
589 | 600 | reparent_leader(father, p, dead); | |
590 | list_for_each_entry_safe(p, n, &dead_children, sibling) { | ||
591 | list_del_init(&p->sibling); | ||
592 | release_task(p); | ||
593 | } | 601 | } |
602 | list_splice_tail_init(&father->children, &reaper->children); | ||
594 | } | 603 | } |
595 | 604 | ||
596 | /* | 605 | /* |
@@ -600,18 +609,12 @@ static void forget_original_parent(struct task_struct *father) | |||
600 | static void exit_notify(struct task_struct *tsk, int group_dead) | 609 | static void exit_notify(struct task_struct *tsk, int group_dead) |
601 | { | 610 | { |
602 | bool autoreap; | 611 | bool autoreap; |
603 | 612 | struct task_struct *p, *n; | |
604 | /* | 613 | LIST_HEAD(dead); |
605 | * This does two things: | ||
606 | * | ||
607 | * A. Make init inherit all the child processes | ||
608 | * B. Check to see if any process groups have become orphaned | ||
609 | * as a result of our exiting, and if they have any stopped | ||
610 | * jobs, send them a SIGHUP and then a SIGCONT. (POSIX 3.2.2.2) | ||
611 | */ | ||
612 | forget_original_parent(tsk); | ||
613 | 614 | ||
614 | write_lock_irq(&tasklist_lock); | 615 | write_lock_irq(&tasklist_lock); |
616 | forget_original_parent(tsk, &dead); | ||
617 | |||
615 | if (group_dead) | 618 | if (group_dead) |
616 | kill_orphaned_pgrp(tsk->group_leader, NULL); | 619 | kill_orphaned_pgrp(tsk->group_leader, NULL); |
617 | 620 | ||
@@ -629,15 +632,18 @@ static void exit_notify(struct task_struct *tsk, int group_dead) | |||
629 | } | 632 | } |
630 | 633 | ||
631 | tsk->exit_state = autoreap ? EXIT_DEAD : EXIT_ZOMBIE; | 634 | tsk->exit_state = autoreap ? EXIT_DEAD : EXIT_ZOMBIE; |
635 | if (tsk->exit_state == EXIT_DEAD) | ||
636 | list_add(&tsk->ptrace_entry, &dead); | ||
632 | 637 | ||
633 | /* mt-exec, de_thread() is waiting for group leader */ | 638 | /* mt-exec, de_thread() is waiting for group leader */ |
634 | if (unlikely(tsk->signal->notify_count < 0)) | 639 | if (unlikely(tsk->signal->notify_count < 0)) |
635 | wake_up_process(tsk->signal->group_exit_task); | 640 | wake_up_process(tsk->signal->group_exit_task); |
636 | write_unlock_irq(&tasklist_lock); | 641 | write_unlock_irq(&tasklist_lock); |
637 | 642 | ||
638 | /* If the process is dead, release it - nobody will wait for it */ | 643 | list_for_each_entry_safe(p, n, &dead, ptrace_entry) { |
639 | if (autoreap) | 644 | list_del_init(&p->ptrace_entry); |
640 | release_task(tsk); | 645 | release_task(p); |
646 | } | ||
641 | } | 647 | } |
642 | 648 | ||
643 | #ifdef CONFIG_DEBUG_STACK_USAGE | 649 | #ifdef CONFIG_DEBUG_STACK_USAGE |
@@ -982,8 +988,7 @@ static int wait_noreap_copyout(struct wait_opts *wo, struct task_struct *p, | |||
982 | */ | 988 | */ |
983 | static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p) | 989 | static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p) |
984 | { | 990 | { |
985 | unsigned long state; | 991 | int state, retval, status; |
986 | int retval, status, traced; | ||
987 | pid_t pid = task_pid_vnr(p); | 992 | pid_t pid = task_pid_vnr(p); |
988 | uid_t uid = from_kuid_munged(current_user_ns(), task_uid(p)); | 993 | uid_t uid = from_kuid_munged(current_user_ns(), task_uid(p)); |
989 | struct siginfo __user *infop; | 994 | struct siginfo __user *infop; |
@@ -997,6 +1002,8 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p) | |||
997 | 1002 | ||
998 | get_task_struct(p); | 1003 | get_task_struct(p); |
999 | read_unlock(&tasklist_lock); | 1004 | read_unlock(&tasklist_lock); |
1005 | sched_annotate_sleep(); | ||
1006 | |||
1000 | if ((exit_code & 0x7f) == 0) { | 1007 | if ((exit_code & 0x7f) == 0) { |
1001 | why = CLD_EXITED; | 1008 | why = CLD_EXITED; |
1002 | status = exit_code >> 8; | 1009 | status = exit_code >> 8; |
@@ -1006,21 +1013,25 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p) | |||
1006 | } | 1013 | } |
1007 | return wait_noreap_copyout(wo, p, pid, uid, why, status); | 1014 | return wait_noreap_copyout(wo, p, pid, uid, why, status); |
1008 | } | 1015 | } |
1009 | |||
1010 | traced = ptrace_reparented(p); | ||
1011 | /* | 1016 | /* |
1012 | * Move the task's state to DEAD/TRACE, only one thread can do this. | 1017 | * Move the task's state to DEAD/TRACE, only one thread can do this. |
1013 | */ | 1018 | */ |
1014 | state = traced && thread_group_leader(p) ? EXIT_TRACE : EXIT_DEAD; | 1019 | state = (ptrace_reparented(p) && thread_group_leader(p)) ? |
1020 | EXIT_TRACE : EXIT_DEAD; | ||
1015 | if (cmpxchg(&p->exit_state, EXIT_ZOMBIE, state) != EXIT_ZOMBIE) | 1021 | if (cmpxchg(&p->exit_state, EXIT_ZOMBIE, state) != EXIT_ZOMBIE) |
1016 | return 0; | 1022 | return 0; |
1017 | /* | 1023 | /* |
1018 | * It can be ptraced but not reparented, check | 1024 | * We own this thread, nobody else can reap it. |
1019 | * thread_group_leader() to filter out sub-threads. | ||
1020 | */ | 1025 | */ |
1021 | if (likely(!traced) && thread_group_leader(p)) { | 1026 | read_unlock(&tasklist_lock); |
1022 | struct signal_struct *psig; | 1027 | sched_annotate_sleep(); |
1023 | struct signal_struct *sig; | 1028 | |
1029 | /* | ||
1030 | * Check thread_group_leader() to exclude the traced sub-threads. | ||
1031 | */ | ||
1032 | if (state == EXIT_DEAD && thread_group_leader(p)) { | ||
1033 | struct signal_struct *sig = p->signal; | ||
1034 | struct signal_struct *psig = current->signal; | ||
1024 | unsigned long maxrss; | 1035 | unsigned long maxrss; |
1025 | cputime_t tgutime, tgstime; | 1036 | cputime_t tgutime, tgstime; |
1026 | 1037 | ||
@@ -1032,21 +1043,20 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p) | |||
1032 | * accumulate in the parent's signal_struct c* fields. | 1043 | * accumulate in the parent's signal_struct c* fields. |
1033 | * | 1044 | * |
1034 | * We don't bother to take a lock here to protect these | 1045 | * We don't bother to take a lock here to protect these |
1035 | * p->signal fields, because they are only touched by | 1046 | * p->signal fields because the whole thread group is dead |
1036 | * __exit_signal, which runs with tasklist_lock | 1047 | * and nobody can change them. |
1037 | * write-locked anyway, and so is excluded here. We do | 1048 | * |
1038 | * need to protect the access to parent->signal fields, | 1049 | * psig->stats_lock also protects us from our sub-theads |
1039 | * as other threads in the parent group can be right | 1050 | * which can reap other children at the same time. Until |
1040 | * here reaping other children at the same time. | 1051 | * we change k_getrusage()-like users to rely on this lock |
1052 | * we have to take ->siglock as well. | ||
1041 | * | 1053 | * |
1042 | * We use thread_group_cputime_adjusted() to get times for | 1054 | * We use thread_group_cputime_adjusted() to get times for |
1043 | * the thread group, which consolidates times for all threads | 1055 | * the thread group, which consolidates times for all threads |
1044 | * in the group including the group leader. | 1056 | * in the group including the group leader. |
1045 | */ | 1057 | */ |
1046 | thread_group_cputime_adjusted(p, &tgutime, &tgstime); | 1058 | thread_group_cputime_adjusted(p, &tgutime, &tgstime); |
1047 | spin_lock_irq(&p->real_parent->sighand->siglock); | 1059 | spin_lock_irq(¤t->sighand->siglock); |
1048 | psig = p->real_parent->signal; | ||
1049 | sig = p->signal; | ||
1050 | write_seqlock(&psig->stats_lock); | 1060 | write_seqlock(&psig->stats_lock); |
1051 | psig->cutime += tgutime + sig->cutime; | 1061 | psig->cutime += tgutime + sig->cutime; |
1052 | psig->cstime += tgstime + sig->cstime; | 1062 | psig->cstime += tgstime + sig->cstime; |
@@ -1071,15 +1081,9 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p) | |||
1071 | task_io_accounting_add(&psig->ioac, &p->ioac); | 1081 | task_io_accounting_add(&psig->ioac, &p->ioac); |
1072 | task_io_accounting_add(&psig->ioac, &sig->ioac); | 1082 | task_io_accounting_add(&psig->ioac, &sig->ioac); |
1073 | write_sequnlock(&psig->stats_lock); | 1083 | write_sequnlock(&psig->stats_lock); |
1074 | spin_unlock_irq(&p->real_parent->sighand->siglock); | 1084 | spin_unlock_irq(¤t->sighand->siglock); |
1075 | } | 1085 | } |
1076 | 1086 | ||
1077 | /* | ||
1078 | * Now we are sure this task is interesting, and no other | ||
1079 | * thread can reap it because we its state == DEAD/TRACE. | ||
1080 | */ | ||
1081 | read_unlock(&tasklist_lock); | ||
1082 | |||
1083 | retval = wo->wo_rusage | 1087 | retval = wo->wo_rusage |
1084 | ? getrusage(p, RUSAGE_BOTH, wo->wo_rusage) : 0; | 1088 | ? getrusage(p, RUSAGE_BOTH, wo->wo_rusage) : 0; |
1085 | status = (p->signal->flags & SIGNAL_GROUP_EXIT) | 1089 | status = (p->signal->flags & SIGNAL_GROUP_EXIT) |
@@ -1210,6 +1214,7 @@ unlock_sig: | |||
1210 | pid = task_pid_vnr(p); | 1214 | pid = task_pid_vnr(p); |
1211 | why = ptrace ? CLD_TRAPPED : CLD_STOPPED; | 1215 | why = ptrace ? CLD_TRAPPED : CLD_STOPPED; |
1212 | read_unlock(&tasklist_lock); | 1216 | read_unlock(&tasklist_lock); |
1217 | sched_annotate_sleep(); | ||
1213 | 1218 | ||
1214 | if (unlikely(wo->wo_flags & WNOWAIT)) | 1219 | if (unlikely(wo->wo_flags & WNOWAIT)) |
1215 | return wait_noreap_copyout(wo, p, pid, uid, why, exit_code); | 1220 | return wait_noreap_copyout(wo, p, pid, uid, why, exit_code); |
@@ -1272,6 +1277,7 @@ static int wait_task_continued(struct wait_opts *wo, struct task_struct *p) | |||
1272 | pid = task_pid_vnr(p); | 1277 | pid = task_pid_vnr(p); |
1273 | get_task_struct(p); | 1278 | get_task_struct(p); |
1274 | read_unlock(&tasklist_lock); | 1279 | read_unlock(&tasklist_lock); |
1280 | sched_annotate_sleep(); | ||
1275 | 1281 | ||
1276 | if (!wo->wo_info) { | 1282 | if (!wo->wo_info) { |
1277 | retval = wo->wo_rusage | 1283 | retval = wo->wo_rusage |
diff --git a/kernel/extable.c b/kernel/extable.c index d8a6446adbcb..c98f926277a8 100644 --- a/kernel/extable.c +++ b/kernel/extable.c | |||
@@ -18,6 +18,7 @@ | |||
18 | #include <linux/ftrace.h> | 18 | #include <linux/ftrace.h> |
19 | #include <linux/memory.h> | 19 | #include <linux/memory.h> |
20 | #include <linux/module.h> | 20 | #include <linux/module.h> |
21 | #include <linux/ftrace.h> | ||
21 | #include <linux/mutex.h> | 22 | #include <linux/mutex.h> |
22 | #include <linux/init.h> | 23 | #include <linux/init.h> |
23 | 24 | ||
@@ -102,6 +103,8 @@ int __kernel_text_address(unsigned long addr) | |||
102 | return 1; | 103 | return 1; |
103 | if (is_module_text_address(addr)) | 104 | if (is_module_text_address(addr)) |
104 | return 1; | 105 | return 1; |
106 | if (is_ftrace_trampoline(addr)) | ||
107 | return 1; | ||
105 | /* | 108 | /* |
106 | * There might be init symbols in saved stacktraces. | 109 | * There might be init symbols in saved stacktraces. |
107 | * Give those symbols a chance to be printed in | 110 | * Give those symbols a chance to be printed in |
@@ -119,7 +122,9 @@ int kernel_text_address(unsigned long addr) | |||
119 | { | 122 | { |
120 | if (core_kernel_text(addr)) | 123 | if (core_kernel_text(addr)) |
121 | return 1; | 124 | return 1; |
122 | return is_module_text_address(addr); | 125 | if (is_module_text_address(addr)) |
126 | return 1; | ||
127 | return is_ftrace_trampoline(addr); | ||
123 | } | 128 | } |
124 | 129 | ||
125 | /* | 130 | /* |
diff --git a/kernel/fork.c b/kernel/fork.c index 9b7d746d6d62..9ca84189cfc2 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -1022,11 +1022,14 @@ void __cleanup_sighand(struct sighand_struct *sighand) | |||
1022 | { | 1022 | { |
1023 | if (atomic_dec_and_test(&sighand->count)) { | 1023 | if (atomic_dec_and_test(&sighand->count)) { |
1024 | signalfd_cleanup(sighand); | 1024 | signalfd_cleanup(sighand); |
1025 | /* | ||
1026 | * sighand_cachep is SLAB_DESTROY_BY_RCU so we can free it | ||
1027 | * without an RCU grace period, see __lock_task_sighand(). | ||
1028 | */ | ||
1025 | kmem_cache_free(sighand_cachep, sighand); | 1029 | kmem_cache_free(sighand_cachep, sighand); |
1026 | } | 1030 | } |
1027 | } | 1031 | } |
1028 | 1032 | ||
1029 | |||
1030 | /* | 1033 | /* |
1031 | * Initialize POSIX timer handling for a thread group. | 1034 | * Initialize POSIX timer handling for a thread group. |
1032 | */ | 1035 | */ |
diff --git a/kernel/futex.c b/kernel/futex.c index f3a3a071283c..63678b573d61 100644 --- a/kernel/futex.c +++ b/kernel/futex.c | |||
@@ -143,9 +143,8 @@ | |||
143 | * | 143 | * |
144 | * Where (A) orders the waiters increment and the futex value read through | 144 | * Where (A) orders the waiters increment and the futex value read through |
145 | * atomic operations (see hb_waiters_inc) and where (B) orders the write | 145 | * atomic operations (see hb_waiters_inc) and where (B) orders the write |
146 | * to futex and the waiters read -- this is done by the barriers in | 146 | * to futex and the waiters read -- this is done by the barriers for both |
147 | * get_futex_key_refs(), through either ihold or atomic_inc, depending on the | 147 | * shared and private futexes in get_futex_key_refs(). |
148 | * futex type. | ||
149 | * | 148 | * |
150 | * This yields the following case (where X:=waiters, Y:=futex): | 149 | * This yields the following case (where X:=waiters, Y:=futex): |
151 | * | 150 | * |
@@ -344,13 +343,20 @@ static void get_futex_key_refs(union futex_key *key) | |||
344 | futex_get_mm(key); /* implies MB (B) */ | 343 | futex_get_mm(key); /* implies MB (B) */ |
345 | break; | 344 | break; |
346 | default: | 345 | default: |
346 | /* | ||
347 | * Private futexes do not hold reference on an inode or | ||
348 | * mm, therefore the only purpose of calling get_futex_key_refs | ||
349 | * is because we need the barrier for the lockless waiter check. | ||
350 | */ | ||
347 | smp_mb(); /* explicit MB (B) */ | 351 | smp_mb(); /* explicit MB (B) */ |
348 | } | 352 | } |
349 | } | 353 | } |
350 | 354 | ||
351 | /* | 355 | /* |
352 | * Drop a reference to the resource addressed by a key. | 356 | * Drop a reference to the resource addressed by a key. |
353 | * The hash bucket spinlock must not be held. | 357 | * The hash bucket spinlock must not be held. This is |
358 | * a no-op for private futexes, see comment in the get | ||
359 | * counterpart. | ||
354 | */ | 360 | */ |
355 | static void drop_futex_key_refs(union futex_key *key) | 361 | static void drop_futex_key_refs(union futex_key *key) |
356 | { | 362 | { |
@@ -641,8 +647,14 @@ static struct futex_pi_state * alloc_pi_state(void) | |||
641 | return pi_state; | 647 | return pi_state; |
642 | } | 648 | } |
643 | 649 | ||
650 | /* | ||
651 | * Must be called with the hb lock held. | ||
652 | */ | ||
644 | static void free_pi_state(struct futex_pi_state *pi_state) | 653 | static void free_pi_state(struct futex_pi_state *pi_state) |
645 | { | 654 | { |
655 | if (!pi_state) | ||
656 | return; | ||
657 | |||
646 | if (!atomic_dec_and_test(&pi_state->refcount)) | 658 | if (!atomic_dec_and_test(&pi_state->refcount)) |
647 | return; | 659 | return; |
648 | 660 | ||
@@ -1521,15 +1533,6 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags, | |||
1521 | } | 1533 | } |
1522 | 1534 | ||
1523 | retry: | 1535 | retry: |
1524 | if (pi_state != NULL) { | ||
1525 | /* | ||
1526 | * We will have to lookup the pi_state again, so free this one | ||
1527 | * to keep the accounting correct. | ||
1528 | */ | ||
1529 | free_pi_state(pi_state); | ||
1530 | pi_state = NULL; | ||
1531 | } | ||
1532 | |||
1533 | ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, VERIFY_READ); | 1536 | ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, VERIFY_READ); |
1534 | if (unlikely(ret != 0)) | 1537 | if (unlikely(ret != 0)) |
1535 | goto out; | 1538 | goto out; |
@@ -1619,6 +1622,8 @@ retry_private: | |||
1619 | case 0: | 1622 | case 0: |
1620 | break; | 1623 | break; |
1621 | case -EFAULT: | 1624 | case -EFAULT: |
1625 | free_pi_state(pi_state); | ||
1626 | pi_state = NULL; | ||
1622 | double_unlock_hb(hb1, hb2); | 1627 | double_unlock_hb(hb1, hb2); |
1623 | hb_waiters_dec(hb2); | 1628 | hb_waiters_dec(hb2); |
1624 | put_futex_key(&key2); | 1629 | put_futex_key(&key2); |
@@ -1634,6 +1639,8 @@ retry_private: | |||
1634 | * exit to complete. | 1639 | * exit to complete. |
1635 | * - The user space value changed. | 1640 | * - The user space value changed. |
1636 | */ | 1641 | */ |
1642 | free_pi_state(pi_state); | ||
1643 | pi_state = NULL; | ||
1637 | double_unlock_hb(hb1, hb2); | 1644 | double_unlock_hb(hb1, hb2); |
1638 | hb_waiters_dec(hb2); | 1645 | hb_waiters_dec(hb2); |
1639 | put_futex_key(&key2); | 1646 | put_futex_key(&key2); |
@@ -1710,6 +1717,7 @@ retry_private: | |||
1710 | } | 1717 | } |
1711 | 1718 | ||
1712 | out_unlock: | 1719 | out_unlock: |
1720 | free_pi_state(pi_state); | ||
1713 | double_unlock_hb(hb1, hb2); | 1721 | double_unlock_hb(hb1, hb2); |
1714 | hb_waiters_dec(hb2); | 1722 | hb_waiters_dec(hb2); |
1715 | 1723 | ||
@@ -1727,8 +1735,6 @@ out_put_keys: | |||
1727 | out_put_key1: | 1735 | out_put_key1: |
1728 | put_futex_key(&key1); | 1736 | put_futex_key(&key1); |
1729 | out: | 1737 | out: |
1730 | if (pi_state != NULL) | ||
1731 | free_pi_state(pi_state); | ||
1732 | return ret ? ret : task_count; | 1738 | return ret ? ret : task_count; |
1733 | } | 1739 | } |
1734 | 1740 | ||
diff --git a/kernel/gcov/Kconfig b/kernel/gcov/Kconfig index cf66c5c8458e..3b7408759bdf 100644 --- a/kernel/gcov/Kconfig +++ b/kernel/gcov/Kconfig | |||
@@ -35,7 +35,7 @@ config GCOV_KERNEL | |||
35 | config GCOV_PROFILE_ALL | 35 | config GCOV_PROFILE_ALL |
36 | bool "Profile entire Kernel" | 36 | bool "Profile entire Kernel" |
37 | depends on GCOV_KERNEL | 37 | depends on GCOV_KERNEL |
38 | depends on SUPERH || S390 || X86 || PPC || MICROBLAZE || ARM | 38 | depends on SUPERH || S390 || X86 || PPC || MICROBLAZE || ARM || ARM64 |
39 | default n | 39 | default n |
40 | ---help--- | 40 | ---help--- |
41 | This options activates profiling for the entire kernel. | 41 | This options activates profiling for the entire kernel. |
diff --git a/kernel/irq/Kconfig b/kernel/irq/Kconfig index 225086b2652e..9a76e3beda54 100644 --- a/kernel/irq/Kconfig +++ b/kernel/irq/Kconfig | |||
@@ -55,6 +55,21 @@ config GENERIC_IRQ_CHIP | |||
55 | config IRQ_DOMAIN | 55 | config IRQ_DOMAIN |
56 | bool | 56 | bool |
57 | 57 | ||
58 | # Support for hierarchical irq domains | ||
59 | config IRQ_DOMAIN_HIERARCHY | ||
60 | bool | ||
61 | select IRQ_DOMAIN | ||
62 | |||
63 | # Generic MSI interrupt support | ||
64 | config GENERIC_MSI_IRQ | ||
65 | bool | ||
66 | |||
67 | # Generic MSI hierarchical interrupt domain support | ||
68 | config GENERIC_MSI_IRQ_DOMAIN | ||
69 | bool | ||
70 | select IRQ_DOMAIN_HIERARCHY | ||
71 | select GENERIC_MSI_IRQ | ||
72 | |||
58 | config HANDLE_DOMAIN_IRQ | 73 | config HANDLE_DOMAIN_IRQ |
59 | bool | 74 | bool |
60 | 75 | ||
diff --git a/kernel/irq/Makefile b/kernel/irq/Makefile index fff17381f0af..d12123526e2b 100644 --- a/kernel/irq/Makefile +++ b/kernel/irq/Makefile | |||
@@ -6,3 +6,4 @@ obj-$(CONFIG_IRQ_DOMAIN) += irqdomain.o | |||
6 | obj-$(CONFIG_PROC_FS) += proc.o | 6 | obj-$(CONFIG_PROC_FS) += proc.o |
7 | obj-$(CONFIG_GENERIC_PENDING_IRQ) += migration.o | 7 | obj-$(CONFIG_GENERIC_PENDING_IRQ) += migration.o |
8 | obj-$(CONFIG_PM_SLEEP) += pm.o | 8 | obj-$(CONFIG_PM_SLEEP) += pm.o |
9 | obj-$(CONFIG_GENERIC_MSI_IRQ) += msi.o | ||
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index e5202f00cabc..6f1c7a566b95 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c | |||
@@ -15,6 +15,7 @@ | |||
15 | #include <linux/module.h> | 15 | #include <linux/module.h> |
16 | #include <linux/interrupt.h> | 16 | #include <linux/interrupt.h> |
17 | #include <linux/kernel_stat.h> | 17 | #include <linux/kernel_stat.h> |
18 | #include <linux/irqdomain.h> | ||
18 | 19 | ||
19 | #include <trace/events/irq.h> | 20 | #include <trace/events/irq.h> |
20 | 21 | ||
@@ -178,6 +179,7 @@ int irq_startup(struct irq_desc *desc, bool resend) | |||
178 | irq_state_clr_disabled(desc); | 179 | irq_state_clr_disabled(desc); |
179 | desc->depth = 0; | 180 | desc->depth = 0; |
180 | 181 | ||
182 | irq_domain_activate_irq(&desc->irq_data); | ||
181 | if (desc->irq_data.chip->irq_startup) { | 183 | if (desc->irq_data.chip->irq_startup) { |
182 | ret = desc->irq_data.chip->irq_startup(&desc->irq_data); | 184 | ret = desc->irq_data.chip->irq_startup(&desc->irq_data); |
183 | irq_state_clr_masked(desc); | 185 | irq_state_clr_masked(desc); |
@@ -199,6 +201,7 @@ void irq_shutdown(struct irq_desc *desc) | |||
199 | desc->irq_data.chip->irq_disable(&desc->irq_data); | 201 | desc->irq_data.chip->irq_disable(&desc->irq_data); |
200 | else | 202 | else |
201 | desc->irq_data.chip->irq_mask(&desc->irq_data); | 203 | desc->irq_data.chip->irq_mask(&desc->irq_data); |
204 | irq_domain_deactivate_irq(&desc->irq_data); | ||
202 | irq_state_set_masked(desc); | 205 | irq_state_set_masked(desc); |
203 | } | 206 | } |
204 | 207 | ||
@@ -728,7 +731,30 @@ __irq_set_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained, | |||
728 | if (!handle) { | 731 | if (!handle) { |
729 | handle = handle_bad_irq; | 732 | handle = handle_bad_irq; |
730 | } else { | 733 | } else { |
731 | if (WARN_ON(desc->irq_data.chip == &no_irq_chip)) | 734 | struct irq_data *irq_data = &desc->irq_data; |
735 | #ifdef CONFIG_IRQ_DOMAIN_HIERARCHY | ||
736 | /* | ||
737 | * With hierarchical domains we might run into a | ||
738 | * situation where the outermost chip is not yet set | ||
739 | * up, but the inner chips are there. Instead of | ||
740 | * bailing we install the handler, but obviously we | ||
741 | * cannot enable/startup the interrupt at this point. | ||
742 | */ | ||
743 | while (irq_data) { | ||
744 | if (irq_data->chip != &no_irq_chip) | ||
745 | break; | ||
746 | /* | ||
747 | * Bail out if the outer chip is not set up | ||
748 | * and the interrrupt supposed to be started | ||
749 | * right away. | ||
750 | */ | ||
751 | if (WARN_ON(is_chained)) | ||
752 | goto out; | ||
753 | /* Try the parent */ | ||
754 | irq_data = irq_data->parent_data; | ||
755 | } | ||
756 | #endif | ||
757 | if (WARN_ON(!irq_data || irq_data->chip == &no_irq_chip)) | ||
732 | goto out; | 758 | goto out; |
733 | } | 759 | } |
734 | 760 | ||
@@ -847,3 +873,105 @@ void irq_cpu_offline(void) | |||
847 | raw_spin_unlock_irqrestore(&desc->lock, flags); | 873 | raw_spin_unlock_irqrestore(&desc->lock, flags); |
848 | } | 874 | } |
849 | } | 875 | } |
876 | |||
877 | #ifdef CONFIG_IRQ_DOMAIN_HIERARCHY | ||
878 | /** | ||
879 | * irq_chip_ack_parent - Acknowledge the parent interrupt | ||
880 | * @data: Pointer to interrupt specific data | ||
881 | */ | ||
882 | void irq_chip_ack_parent(struct irq_data *data) | ||
883 | { | ||
884 | data = data->parent_data; | ||
885 | data->chip->irq_ack(data); | ||
886 | } | ||
887 | |||
888 | /** | ||
889 | * irq_chip_mask_parent - Mask the parent interrupt | ||
890 | * @data: Pointer to interrupt specific data | ||
891 | */ | ||
892 | void irq_chip_mask_parent(struct irq_data *data) | ||
893 | { | ||
894 | data = data->parent_data; | ||
895 | data->chip->irq_mask(data); | ||
896 | } | ||
897 | |||
898 | /** | ||
899 | * irq_chip_unmask_parent - Unmask the parent interrupt | ||
900 | * @data: Pointer to interrupt specific data | ||
901 | */ | ||
902 | void irq_chip_unmask_parent(struct irq_data *data) | ||
903 | { | ||
904 | data = data->parent_data; | ||
905 | data->chip->irq_unmask(data); | ||
906 | } | ||
907 | |||
908 | /** | ||
909 | * irq_chip_eoi_parent - Invoke EOI on the parent interrupt | ||
910 | * @data: Pointer to interrupt specific data | ||
911 | */ | ||
912 | void irq_chip_eoi_parent(struct irq_data *data) | ||
913 | { | ||
914 | data = data->parent_data; | ||
915 | data->chip->irq_eoi(data); | ||
916 | } | ||
917 | |||
918 | /** | ||
919 | * irq_chip_set_affinity_parent - Set affinity on the parent interrupt | ||
920 | * @data: Pointer to interrupt specific data | ||
921 | * @dest: The affinity mask to set | ||
922 | * @force: Flag to enforce setting (disable online checks) | ||
923 | * | ||
924 | * Conditinal, as the underlying parent chip might not implement it. | ||
925 | */ | ||
926 | int irq_chip_set_affinity_parent(struct irq_data *data, | ||
927 | const struct cpumask *dest, bool force) | ||
928 | { | ||
929 | data = data->parent_data; | ||
930 | if (data->chip->irq_set_affinity) | ||
931 | return data->chip->irq_set_affinity(data, dest, force); | ||
932 | |||
933 | return -ENOSYS; | ||
934 | } | ||
935 | |||
936 | /** | ||
937 | * irq_chip_retrigger_hierarchy - Retrigger an interrupt in hardware | ||
938 | * @data: Pointer to interrupt specific data | ||
939 | * | ||
940 | * Iterate through the domain hierarchy of the interrupt and check | ||
941 | * whether a hw retrigger function exists. If yes, invoke it. | ||
942 | */ | ||
943 | int irq_chip_retrigger_hierarchy(struct irq_data *data) | ||
944 | { | ||
945 | for (data = data->parent_data; data; data = data->parent_data) | ||
946 | if (data->chip && data->chip->irq_retrigger) | ||
947 | return data->chip->irq_retrigger(data); | ||
948 | |||
949 | return -ENOSYS; | ||
950 | } | ||
951 | #endif | ||
952 | |||
953 | /** | ||
954 | * irq_chip_compose_msi_msg - Componse msi message for a irq chip | ||
955 | * @data: Pointer to interrupt specific data | ||
956 | * @msg: Pointer to the MSI message | ||
957 | * | ||
958 | * For hierarchical domains we find the first chip in the hierarchy | ||
959 | * which implements the irq_compose_msi_msg callback. For non | ||
960 | * hierarchical we use the top level chip. | ||
961 | */ | ||
962 | int irq_chip_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) | ||
963 | { | ||
964 | struct irq_data *pos = NULL; | ||
965 | |||
966 | #ifdef CONFIG_IRQ_DOMAIN_HIERARCHY | ||
967 | for (; data; data = data->parent_data) | ||
968 | #endif | ||
969 | if (data->chip && data->chip->irq_compose_msi_msg) | ||
970 | pos = data; | ||
971 | if (!pos) | ||
972 | return -ENOSYS; | ||
973 | |||
974 | pos->chip->irq_compose_msi_msg(pos, msg); | ||
975 | |||
976 | return 0; | ||
977 | } | ||
diff --git a/kernel/irq/generic-chip.c b/kernel/irq/generic-chip.c index cf80e7b0ddab..61024e8abdef 100644 --- a/kernel/irq/generic-chip.c +++ b/kernel/irq/generic-chip.c | |||
@@ -39,7 +39,7 @@ void irq_gc_mask_disable_reg(struct irq_data *d) | |||
39 | u32 mask = d->mask; | 39 | u32 mask = d->mask; |
40 | 40 | ||
41 | irq_gc_lock(gc); | 41 | irq_gc_lock(gc); |
42 | irq_reg_writel(mask, gc->reg_base + ct->regs.disable); | 42 | irq_reg_writel(gc, mask, ct->regs.disable); |
43 | *ct->mask_cache &= ~mask; | 43 | *ct->mask_cache &= ~mask; |
44 | irq_gc_unlock(gc); | 44 | irq_gc_unlock(gc); |
45 | } | 45 | } |
@@ -59,7 +59,7 @@ void irq_gc_mask_set_bit(struct irq_data *d) | |||
59 | 59 | ||
60 | irq_gc_lock(gc); | 60 | irq_gc_lock(gc); |
61 | *ct->mask_cache |= mask; | 61 | *ct->mask_cache |= mask; |
62 | irq_reg_writel(*ct->mask_cache, gc->reg_base + ct->regs.mask); | 62 | irq_reg_writel(gc, *ct->mask_cache, ct->regs.mask); |
63 | irq_gc_unlock(gc); | 63 | irq_gc_unlock(gc); |
64 | } | 64 | } |
65 | EXPORT_SYMBOL_GPL(irq_gc_mask_set_bit); | 65 | EXPORT_SYMBOL_GPL(irq_gc_mask_set_bit); |
@@ -79,7 +79,7 @@ void irq_gc_mask_clr_bit(struct irq_data *d) | |||
79 | 79 | ||
80 | irq_gc_lock(gc); | 80 | irq_gc_lock(gc); |
81 | *ct->mask_cache &= ~mask; | 81 | *ct->mask_cache &= ~mask; |
82 | irq_reg_writel(*ct->mask_cache, gc->reg_base + ct->regs.mask); | 82 | irq_reg_writel(gc, *ct->mask_cache, ct->regs.mask); |
83 | irq_gc_unlock(gc); | 83 | irq_gc_unlock(gc); |
84 | } | 84 | } |
85 | EXPORT_SYMBOL_GPL(irq_gc_mask_clr_bit); | 85 | EXPORT_SYMBOL_GPL(irq_gc_mask_clr_bit); |
@@ -98,7 +98,7 @@ void irq_gc_unmask_enable_reg(struct irq_data *d) | |||
98 | u32 mask = d->mask; | 98 | u32 mask = d->mask; |
99 | 99 | ||
100 | irq_gc_lock(gc); | 100 | irq_gc_lock(gc); |
101 | irq_reg_writel(mask, gc->reg_base + ct->regs.enable); | 101 | irq_reg_writel(gc, mask, ct->regs.enable); |
102 | *ct->mask_cache |= mask; | 102 | *ct->mask_cache |= mask; |
103 | irq_gc_unlock(gc); | 103 | irq_gc_unlock(gc); |
104 | } | 104 | } |
@@ -114,7 +114,7 @@ void irq_gc_ack_set_bit(struct irq_data *d) | |||
114 | u32 mask = d->mask; | 114 | u32 mask = d->mask; |
115 | 115 | ||
116 | irq_gc_lock(gc); | 116 | irq_gc_lock(gc); |
117 | irq_reg_writel(mask, gc->reg_base + ct->regs.ack); | 117 | irq_reg_writel(gc, mask, ct->regs.ack); |
118 | irq_gc_unlock(gc); | 118 | irq_gc_unlock(gc); |
119 | } | 119 | } |
120 | EXPORT_SYMBOL_GPL(irq_gc_ack_set_bit); | 120 | EXPORT_SYMBOL_GPL(irq_gc_ack_set_bit); |
@@ -130,7 +130,7 @@ void irq_gc_ack_clr_bit(struct irq_data *d) | |||
130 | u32 mask = ~d->mask; | 130 | u32 mask = ~d->mask; |
131 | 131 | ||
132 | irq_gc_lock(gc); | 132 | irq_gc_lock(gc); |
133 | irq_reg_writel(mask, gc->reg_base + ct->regs.ack); | 133 | irq_reg_writel(gc, mask, ct->regs.ack); |
134 | irq_gc_unlock(gc); | 134 | irq_gc_unlock(gc); |
135 | } | 135 | } |
136 | 136 | ||
@@ -145,8 +145,8 @@ void irq_gc_mask_disable_reg_and_ack(struct irq_data *d) | |||
145 | u32 mask = d->mask; | 145 | u32 mask = d->mask; |
146 | 146 | ||
147 | irq_gc_lock(gc); | 147 | irq_gc_lock(gc); |
148 | irq_reg_writel(mask, gc->reg_base + ct->regs.mask); | 148 | irq_reg_writel(gc, mask, ct->regs.mask); |
149 | irq_reg_writel(mask, gc->reg_base + ct->regs.ack); | 149 | irq_reg_writel(gc, mask, ct->regs.ack); |
150 | irq_gc_unlock(gc); | 150 | irq_gc_unlock(gc); |
151 | } | 151 | } |
152 | 152 | ||
@@ -161,7 +161,7 @@ void irq_gc_eoi(struct irq_data *d) | |||
161 | u32 mask = d->mask; | 161 | u32 mask = d->mask; |
162 | 162 | ||
163 | irq_gc_lock(gc); | 163 | irq_gc_lock(gc); |
164 | irq_reg_writel(mask, gc->reg_base + ct->regs.eoi); | 164 | irq_reg_writel(gc, mask, ct->regs.eoi); |
165 | irq_gc_unlock(gc); | 165 | irq_gc_unlock(gc); |
166 | } | 166 | } |
167 | 167 | ||
@@ -191,6 +191,16 @@ int irq_gc_set_wake(struct irq_data *d, unsigned int on) | |||
191 | return 0; | 191 | return 0; |
192 | } | 192 | } |
193 | 193 | ||
194 | static u32 irq_readl_be(void __iomem *addr) | ||
195 | { | ||
196 | return ioread32be(addr); | ||
197 | } | ||
198 | |||
199 | static void irq_writel_be(u32 val, void __iomem *addr) | ||
200 | { | ||
201 | iowrite32be(val, addr); | ||
202 | } | ||
203 | |||
194 | static void | 204 | static void |
195 | irq_init_generic_chip(struct irq_chip_generic *gc, const char *name, | 205 | irq_init_generic_chip(struct irq_chip_generic *gc, const char *name, |
196 | int num_ct, unsigned int irq_base, | 206 | int num_ct, unsigned int irq_base, |
@@ -245,7 +255,7 @@ irq_gc_init_mask_cache(struct irq_chip_generic *gc, enum irq_gc_flags flags) | |||
245 | } | 255 | } |
246 | ct[i].mask_cache = mskptr; | 256 | ct[i].mask_cache = mskptr; |
247 | if (flags & IRQ_GC_INIT_MASK_CACHE) | 257 | if (flags & IRQ_GC_INIT_MASK_CACHE) |
248 | *mskptr = irq_reg_readl(gc->reg_base + mskreg); | 258 | *mskptr = irq_reg_readl(gc, mskreg); |
249 | } | 259 | } |
250 | } | 260 | } |
251 | 261 | ||
@@ -300,7 +310,13 @@ int irq_alloc_domain_generic_chips(struct irq_domain *d, int irqs_per_chip, | |||
300 | dgc->gc[i] = gc = tmp; | 310 | dgc->gc[i] = gc = tmp; |
301 | irq_init_generic_chip(gc, name, num_ct, i * irqs_per_chip, | 311 | irq_init_generic_chip(gc, name, num_ct, i * irqs_per_chip, |
302 | NULL, handler); | 312 | NULL, handler); |
313 | |||
303 | gc->domain = d; | 314 | gc->domain = d; |
315 | if (gcflags & IRQ_GC_BE_IO) { | ||
316 | gc->reg_readl = &irq_readl_be; | ||
317 | gc->reg_writel = &irq_writel_be; | ||
318 | } | ||
319 | |||
304 | raw_spin_lock_irqsave(&gc_lock, flags); | 320 | raw_spin_lock_irqsave(&gc_lock, flags); |
305 | list_add_tail(&gc->list, &gc_list); | 321 | list_add_tail(&gc->list, &gc_list); |
306 | raw_spin_unlock_irqrestore(&gc_lock, flags); | 322 | raw_spin_unlock_irqrestore(&gc_lock, flags); |
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c index 6534ff6ce02e..7fac311057b8 100644 --- a/kernel/irq/irqdomain.c +++ b/kernel/irq/irqdomain.c | |||
@@ -23,6 +23,10 @@ static DEFINE_MUTEX(irq_domain_mutex); | |||
23 | static DEFINE_MUTEX(revmap_trees_mutex); | 23 | static DEFINE_MUTEX(revmap_trees_mutex); |
24 | static struct irq_domain *irq_default_domain; | 24 | static struct irq_domain *irq_default_domain; |
25 | 25 | ||
26 | static int irq_domain_alloc_descs(int virq, unsigned int nr_irqs, | ||
27 | irq_hw_number_t hwirq, int node); | ||
28 | static void irq_domain_check_hierarchy(struct irq_domain *domain); | ||
29 | |||
26 | /** | 30 | /** |
27 | * __irq_domain_add() - Allocate a new irq_domain data structure | 31 | * __irq_domain_add() - Allocate a new irq_domain data structure |
28 | * @of_node: optional device-tree node of the interrupt controller | 32 | * @of_node: optional device-tree node of the interrupt controller |
@@ -30,7 +34,7 @@ static struct irq_domain *irq_default_domain; | |||
30 | * @hwirq_max: Maximum number of interrupts supported by controller | 34 | * @hwirq_max: Maximum number of interrupts supported by controller |
31 | * @direct_max: Maximum value of direct maps; Use ~0 for no limit; 0 for no | 35 | * @direct_max: Maximum value of direct maps; Use ~0 for no limit; 0 for no |
32 | * direct mapping | 36 | * direct mapping |
33 | * @ops: map/unmap domain callbacks | 37 | * @ops: domain callbacks |
34 | * @host_data: Controller private data pointer | 38 | * @host_data: Controller private data pointer |
35 | * | 39 | * |
36 | * Allocates and initialize and irq_domain structure. | 40 | * Allocates and initialize and irq_domain structure. |
@@ -56,6 +60,7 @@ struct irq_domain *__irq_domain_add(struct device_node *of_node, int size, | |||
56 | domain->hwirq_max = hwirq_max; | 60 | domain->hwirq_max = hwirq_max; |
57 | domain->revmap_size = size; | 61 | domain->revmap_size = size; |
58 | domain->revmap_direct_max_irq = direct_max; | 62 | domain->revmap_direct_max_irq = direct_max; |
63 | irq_domain_check_hierarchy(domain); | ||
59 | 64 | ||
60 | mutex_lock(&irq_domain_mutex); | 65 | mutex_lock(&irq_domain_mutex); |
61 | list_add(&domain->link, &irq_domain_list); | 66 | list_add(&domain->link, &irq_domain_list); |
@@ -109,7 +114,7 @@ EXPORT_SYMBOL_GPL(irq_domain_remove); | |||
109 | * @first_irq: first number of irq block assigned to the domain, | 114 | * @first_irq: first number of irq block assigned to the domain, |
110 | * pass zero to assign irqs on-the-fly. If first_irq is non-zero, then | 115 | * pass zero to assign irqs on-the-fly. If first_irq is non-zero, then |
111 | * pre-map all of the irqs in the domain to virqs starting at first_irq. | 116 | * pre-map all of the irqs in the domain to virqs starting at first_irq. |
112 | * @ops: map/unmap domain callbacks | 117 | * @ops: domain callbacks |
113 | * @host_data: Controller private data pointer | 118 | * @host_data: Controller private data pointer |
114 | * | 119 | * |
115 | * Allocates an irq_domain, and optionally if first_irq is positive then also | 120 | * Allocates an irq_domain, and optionally if first_irq is positive then also |
@@ -174,10 +179,8 @@ struct irq_domain *irq_domain_add_legacy(struct device_node *of_node, | |||
174 | 179 | ||
175 | domain = __irq_domain_add(of_node, first_hwirq + size, | 180 | domain = __irq_domain_add(of_node, first_hwirq + size, |
176 | first_hwirq + size, 0, ops, host_data); | 181 | first_hwirq + size, 0, ops, host_data); |
177 | if (!domain) | 182 | if (domain) |
178 | return NULL; | 183 | irq_domain_associate_many(domain, first_irq, first_hwirq, size); |
179 | |||
180 | irq_domain_associate_many(domain, first_irq, first_hwirq, size); | ||
181 | 184 | ||
182 | return domain; | 185 | return domain; |
183 | } | 186 | } |
@@ -388,7 +391,6 @@ EXPORT_SYMBOL_GPL(irq_create_direct_mapping); | |||
388 | unsigned int irq_create_mapping(struct irq_domain *domain, | 391 | unsigned int irq_create_mapping(struct irq_domain *domain, |
389 | irq_hw_number_t hwirq) | 392 | irq_hw_number_t hwirq) |
390 | { | 393 | { |
391 | unsigned int hint; | ||
392 | int virq; | 394 | int virq; |
393 | 395 | ||
394 | pr_debug("irq_create_mapping(0x%p, 0x%lx)\n", domain, hwirq); | 396 | pr_debug("irq_create_mapping(0x%p, 0x%lx)\n", domain, hwirq); |
@@ -410,12 +412,8 @@ unsigned int irq_create_mapping(struct irq_domain *domain, | |||
410 | } | 412 | } |
411 | 413 | ||
412 | /* Allocate a virtual interrupt number */ | 414 | /* Allocate a virtual interrupt number */ |
413 | hint = hwirq % nr_irqs; | 415 | virq = irq_domain_alloc_descs(-1, 1, hwirq, |
414 | if (hint == 0) | 416 | of_node_to_nid(domain->of_node)); |
415 | hint++; | ||
416 | virq = irq_alloc_desc_from(hint, of_node_to_nid(domain->of_node)); | ||
417 | if (virq <= 0) | ||
418 | virq = irq_alloc_desc_from(1, of_node_to_nid(domain->of_node)); | ||
419 | if (virq <= 0) { | 417 | if (virq <= 0) { |
420 | pr_debug("-> virq allocation failed\n"); | 418 | pr_debug("-> virq allocation failed\n"); |
421 | return 0; | 419 | return 0; |
@@ -471,7 +469,7 @@ unsigned int irq_create_of_mapping(struct of_phandle_args *irq_data) | |||
471 | struct irq_domain *domain; | 469 | struct irq_domain *domain; |
472 | irq_hw_number_t hwirq; | 470 | irq_hw_number_t hwirq; |
473 | unsigned int type = IRQ_TYPE_NONE; | 471 | unsigned int type = IRQ_TYPE_NONE; |
474 | unsigned int virq; | 472 | int virq; |
475 | 473 | ||
476 | domain = irq_data->np ? irq_find_host(irq_data->np) : irq_default_domain; | 474 | domain = irq_data->np ? irq_find_host(irq_data->np) : irq_default_domain; |
477 | if (!domain) { | 475 | if (!domain) { |
@@ -489,10 +487,24 @@ unsigned int irq_create_of_mapping(struct of_phandle_args *irq_data) | |||
489 | return 0; | 487 | return 0; |
490 | } | 488 | } |
491 | 489 | ||
492 | /* Create mapping */ | 490 | if (irq_domain_is_hierarchy(domain)) { |
493 | virq = irq_create_mapping(domain, hwirq); | 491 | /* |
494 | if (!virq) | 492 | * If we've already configured this interrupt, |
495 | return virq; | 493 | * don't do it again, or hell will break loose. |
494 | */ | ||
495 | virq = irq_find_mapping(domain, hwirq); | ||
496 | if (virq) | ||
497 | return virq; | ||
498 | |||
499 | virq = irq_domain_alloc_irqs(domain, 1, NUMA_NO_NODE, irq_data); | ||
500 | if (virq <= 0) | ||
501 | return 0; | ||
502 | } else { | ||
503 | /* Create mapping */ | ||
504 | virq = irq_create_mapping(domain, hwirq); | ||
505 | if (!virq) | ||
506 | return virq; | ||
507 | } | ||
496 | 508 | ||
497 | /* Set type if specified and different than the current one */ | 509 | /* Set type if specified and different than the current one */ |
498 | if (type != IRQ_TYPE_NONE && | 510 | if (type != IRQ_TYPE_NONE && |
@@ -540,8 +552,8 @@ unsigned int irq_find_mapping(struct irq_domain *domain, | |||
540 | return 0; | 552 | return 0; |
541 | 553 | ||
542 | if (hwirq < domain->revmap_direct_max_irq) { | 554 | if (hwirq < domain->revmap_direct_max_irq) { |
543 | data = irq_get_irq_data(hwirq); | 555 | data = irq_domain_get_irq_data(domain, hwirq); |
544 | if (data && (data->domain == domain) && (data->hwirq == hwirq)) | 556 | if (data && data->hwirq == hwirq) |
545 | return hwirq; | 557 | return hwirq; |
546 | } | 558 | } |
547 | 559 | ||
@@ -709,3 +721,518 @@ const struct irq_domain_ops irq_domain_simple_ops = { | |||
709 | .xlate = irq_domain_xlate_onetwocell, | 721 | .xlate = irq_domain_xlate_onetwocell, |
710 | }; | 722 | }; |
711 | EXPORT_SYMBOL_GPL(irq_domain_simple_ops); | 723 | EXPORT_SYMBOL_GPL(irq_domain_simple_ops); |
724 | |||
725 | static int irq_domain_alloc_descs(int virq, unsigned int cnt, | ||
726 | irq_hw_number_t hwirq, int node) | ||
727 | { | ||
728 | unsigned int hint; | ||
729 | |||
730 | if (virq >= 0) { | ||
731 | virq = irq_alloc_descs(virq, virq, cnt, node); | ||
732 | } else { | ||
733 | hint = hwirq % nr_irqs; | ||
734 | if (hint == 0) | ||
735 | hint++; | ||
736 | virq = irq_alloc_descs_from(hint, cnt, node); | ||
737 | if (virq <= 0 && hint > 1) | ||
738 | virq = irq_alloc_descs_from(1, cnt, node); | ||
739 | } | ||
740 | |||
741 | return virq; | ||
742 | } | ||
743 | |||
744 | #ifdef CONFIG_IRQ_DOMAIN_HIERARCHY | ||
745 | /** | ||
746 | * irq_domain_add_hierarchy - Add a irqdomain into the hierarchy | ||
747 | * @parent: Parent irq domain to associate with the new domain | ||
748 | * @flags: Irq domain flags associated to the domain | ||
749 | * @size: Size of the domain. See below | ||
750 | * @node: Optional device-tree node of the interrupt controller | ||
751 | * @ops: Pointer to the interrupt domain callbacks | ||
752 | * @host_data: Controller private data pointer | ||
753 | * | ||
754 | * If @size is 0 a tree domain is created, otherwise a linear domain. | ||
755 | * | ||
756 | * If successful the parent is associated to the new domain and the | ||
757 | * domain flags are set. | ||
758 | * Returns pointer to IRQ domain, or NULL on failure. | ||
759 | */ | ||
760 | struct irq_domain *irq_domain_add_hierarchy(struct irq_domain *parent, | ||
761 | unsigned int flags, | ||
762 | unsigned int size, | ||
763 | struct device_node *node, | ||
764 | const struct irq_domain_ops *ops, | ||
765 | void *host_data) | ||
766 | { | ||
767 | struct irq_domain *domain; | ||
768 | |||
769 | if (size) | ||
770 | domain = irq_domain_add_linear(node, size, ops, host_data); | ||
771 | else | ||
772 | domain = irq_domain_add_tree(node, ops, host_data); | ||
773 | if (domain) { | ||
774 | domain->parent = parent; | ||
775 | domain->flags |= flags; | ||
776 | } | ||
777 | |||
778 | return domain; | ||
779 | } | ||
780 | |||
781 | static void irq_domain_insert_irq(int virq) | ||
782 | { | ||
783 | struct irq_data *data; | ||
784 | |||
785 | for (data = irq_get_irq_data(virq); data; data = data->parent_data) { | ||
786 | struct irq_domain *domain = data->domain; | ||
787 | irq_hw_number_t hwirq = data->hwirq; | ||
788 | |||
789 | if (hwirq < domain->revmap_size) { | ||
790 | domain->linear_revmap[hwirq] = virq; | ||
791 | } else { | ||
792 | mutex_lock(&revmap_trees_mutex); | ||
793 | radix_tree_insert(&domain->revmap_tree, hwirq, data); | ||
794 | mutex_unlock(&revmap_trees_mutex); | ||
795 | } | ||
796 | |||
797 | /* If not already assigned, give the domain the chip's name */ | ||
798 | if (!domain->name && data->chip) | ||
799 | domain->name = data->chip->name; | ||
800 | } | ||
801 | |||
802 | irq_clear_status_flags(virq, IRQ_NOREQUEST); | ||
803 | } | ||
804 | |||
805 | static void irq_domain_remove_irq(int virq) | ||
806 | { | ||
807 | struct irq_data *data; | ||
808 | |||
809 | irq_set_status_flags(virq, IRQ_NOREQUEST); | ||
810 | irq_set_chip_and_handler(virq, NULL, NULL); | ||
811 | synchronize_irq(virq); | ||
812 | smp_mb(); | ||
813 | |||
814 | for (data = irq_get_irq_data(virq); data; data = data->parent_data) { | ||
815 | struct irq_domain *domain = data->domain; | ||
816 | irq_hw_number_t hwirq = data->hwirq; | ||
817 | |||
818 | if (hwirq < domain->revmap_size) { | ||
819 | domain->linear_revmap[hwirq] = 0; | ||
820 | } else { | ||
821 | mutex_lock(&revmap_trees_mutex); | ||
822 | radix_tree_delete(&domain->revmap_tree, hwirq); | ||
823 | mutex_unlock(&revmap_trees_mutex); | ||
824 | } | ||
825 | } | ||
826 | } | ||
827 | |||
828 | static struct irq_data *irq_domain_insert_irq_data(struct irq_domain *domain, | ||
829 | struct irq_data *child) | ||
830 | { | ||
831 | struct irq_data *irq_data; | ||
832 | |||
833 | irq_data = kzalloc_node(sizeof(*irq_data), GFP_KERNEL, child->node); | ||
834 | if (irq_data) { | ||
835 | child->parent_data = irq_data; | ||
836 | irq_data->irq = child->irq; | ||
837 | irq_data->node = child->node; | ||
838 | irq_data->domain = domain; | ||
839 | } | ||
840 | |||
841 | return irq_data; | ||
842 | } | ||
843 | |||
844 | static void irq_domain_free_irq_data(unsigned int virq, unsigned int nr_irqs) | ||
845 | { | ||
846 | struct irq_data *irq_data, *tmp; | ||
847 | int i; | ||
848 | |||
849 | for (i = 0; i < nr_irqs; i++) { | ||
850 | irq_data = irq_get_irq_data(virq + i); | ||
851 | tmp = irq_data->parent_data; | ||
852 | irq_data->parent_data = NULL; | ||
853 | irq_data->domain = NULL; | ||
854 | |||
855 | while (tmp) { | ||
856 | irq_data = tmp; | ||
857 | tmp = tmp->parent_data; | ||
858 | kfree(irq_data); | ||
859 | } | ||
860 | } | ||
861 | } | ||
862 | |||
863 | static int irq_domain_alloc_irq_data(struct irq_domain *domain, | ||
864 | unsigned int virq, unsigned int nr_irqs) | ||
865 | { | ||
866 | struct irq_data *irq_data; | ||
867 | struct irq_domain *parent; | ||
868 | int i; | ||
869 | |||
870 | /* The outermost irq_data is embedded in struct irq_desc */ | ||
871 | for (i = 0; i < nr_irqs; i++) { | ||
872 | irq_data = irq_get_irq_data(virq + i); | ||
873 | irq_data->domain = domain; | ||
874 | |||
875 | for (parent = domain->parent; parent; parent = parent->parent) { | ||
876 | irq_data = irq_domain_insert_irq_data(parent, irq_data); | ||
877 | if (!irq_data) { | ||
878 | irq_domain_free_irq_data(virq, i + 1); | ||
879 | return -ENOMEM; | ||
880 | } | ||
881 | } | ||
882 | } | ||
883 | |||
884 | return 0; | ||
885 | } | ||
886 | |||
887 | /** | ||
888 | * irq_domain_get_irq_data - Get irq_data associated with @virq and @domain | ||
889 | * @domain: domain to match | ||
890 | * @virq: IRQ number to get irq_data | ||
891 | */ | ||
892 | struct irq_data *irq_domain_get_irq_data(struct irq_domain *domain, | ||
893 | unsigned int virq) | ||
894 | { | ||
895 | struct irq_data *irq_data; | ||
896 | |||
897 | for (irq_data = irq_get_irq_data(virq); irq_data; | ||
898 | irq_data = irq_data->parent_data) | ||
899 | if (irq_data->domain == domain) | ||
900 | return irq_data; | ||
901 | |||
902 | return NULL; | ||
903 | } | ||
904 | |||
905 | /** | ||
906 | * irq_domain_set_hwirq_and_chip - Set hwirq and irqchip of @virq at @domain | ||
907 | * @domain: Interrupt domain to match | ||
908 | * @virq: IRQ number | ||
909 | * @hwirq: The hwirq number | ||
910 | * @chip: The associated interrupt chip | ||
911 | * @chip_data: The associated chip data | ||
912 | */ | ||
913 | int irq_domain_set_hwirq_and_chip(struct irq_domain *domain, unsigned int virq, | ||
914 | irq_hw_number_t hwirq, struct irq_chip *chip, | ||
915 | void *chip_data) | ||
916 | { | ||
917 | struct irq_data *irq_data = irq_domain_get_irq_data(domain, virq); | ||
918 | |||
919 | if (!irq_data) | ||
920 | return -ENOENT; | ||
921 | |||
922 | irq_data->hwirq = hwirq; | ||
923 | irq_data->chip = chip ? chip : &no_irq_chip; | ||
924 | irq_data->chip_data = chip_data; | ||
925 | |||
926 | return 0; | ||
927 | } | ||
928 | |||
929 | /** | ||
930 | * irq_domain_set_info - Set the complete data for a @virq in @domain | ||
931 | * @domain: Interrupt domain to match | ||
932 | * @virq: IRQ number | ||
933 | * @hwirq: The hardware interrupt number | ||
934 | * @chip: The associated interrupt chip | ||
935 | * @chip_data: The associated interrupt chip data | ||
936 | * @handler: The interrupt flow handler | ||
937 | * @handler_data: The interrupt flow handler data | ||
938 | * @handler_name: The interrupt handler name | ||
939 | */ | ||
940 | void irq_domain_set_info(struct irq_domain *domain, unsigned int virq, | ||
941 | irq_hw_number_t hwirq, struct irq_chip *chip, | ||
942 | void *chip_data, irq_flow_handler_t handler, | ||
943 | void *handler_data, const char *handler_name) | ||
944 | { | ||
945 | irq_domain_set_hwirq_and_chip(domain, virq, hwirq, chip, chip_data); | ||
946 | __irq_set_handler(virq, handler, 0, handler_name); | ||
947 | irq_set_handler_data(virq, handler_data); | ||
948 | } | ||
949 | |||
950 | /** | ||
951 | * irq_domain_reset_irq_data - Clear hwirq, chip and chip_data in @irq_data | ||
952 | * @irq_data: The pointer to irq_data | ||
953 | */ | ||
954 | void irq_domain_reset_irq_data(struct irq_data *irq_data) | ||
955 | { | ||
956 | irq_data->hwirq = 0; | ||
957 | irq_data->chip = &no_irq_chip; | ||
958 | irq_data->chip_data = NULL; | ||
959 | } | ||
960 | |||
961 | /** | ||
962 | * irq_domain_free_irqs_common - Clear irq_data and free the parent | ||
963 | * @domain: Interrupt domain to match | ||
964 | * @virq: IRQ number to start with | ||
965 | * @nr_irqs: The number of irqs to free | ||
966 | */ | ||
967 | void irq_domain_free_irqs_common(struct irq_domain *domain, unsigned int virq, | ||
968 | unsigned int nr_irqs) | ||
969 | { | ||
970 | struct irq_data *irq_data; | ||
971 | int i; | ||
972 | |||
973 | for (i = 0; i < nr_irqs; i++) { | ||
974 | irq_data = irq_domain_get_irq_data(domain, virq + i); | ||
975 | if (irq_data) | ||
976 | irq_domain_reset_irq_data(irq_data); | ||
977 | } | ||
978 | irq_domain_free_irqs_parent(domain, virq, nr_irqs); | ||
979 | } | ||
980 | |||
981 | /** | ||
982 | * irq_domain_free_irqs_top - Clear handler and handler data, clear irqdata and free parent | ||
983 | * @domain: Interrupt domain to match | ||
984 | * @virq: IRQ number to start with | ||
985 | * @nr_irqs: The number of irqs to free | ||
986 | */ | ||
987 | void irq_domain_free_irqs_top(struct irq_domain *domain, unsigned int virq, | ||
988 | unsigned int nr_irqs) | ||
989 | { | ||
990 | int i; | ||
991 | |||
992 | for (i = 0; i < nr_irqs; i++) { | ||
993 | irq_set_handler_data(virq + i, NULL); | ||
994 | irq_set_handler(virq + i, NULL); | ||
995 | } | ||
996 | irq_domain_free_irqs_common(domain, virq, nr_irqs); | ||
997 | } | ||
998 | |||
999 | static bool irq_domain_is_auto_recursive(struct irq_domain *domain) | ||
1000 | { | ||
1001 | return domain->flags & IRQ_DOMAIN_FLAG_AUTO_RECURSIVE; | ||
1002 | } | ||
1003 | |||
1004 | static void irq_domain_free_irqs_recursive(struct irq_domain *domain, | ||
1005 | unsigned int irq_base, | ||
1006 | unsigned int nr_irqs) | ||
1007 | { | ||
1008 | domain->ops->free(domain, irq_base, nr_irqs); | ||
1009 | if (irq_domain_is_auto_recursive(domain)) { | ||
1010 | BUG_ON(!domain->parent); | ||
1011 | irq_domain_free_irqs_recursive(domain->parent, irq_base, | ||
1012 | nr_irqs); | ||
1013 | } | ||
1014 | } | ||
1015 | |||
1016 | static int irq_domain_alloc_irqs_recursive(struct irq_domain *domain, | ||
1017 | unsigned int irq_base, | ||
1018 | unsigned int nr_irqs, void *arg) | ||
1019 | { | ||
1020 | int ret = 0; | ||
1021 | struct irq_domain *parent = domain->parent; | ||
1022 | bool recursive = irq_domain_is_auto_recursive(domain); | ||
1023 | |||
1024 | BUG_ON(recursive && !parent); | ||
1025 | if (recursive) | ||
1026 | ret = irq_domain_alloc_irqs_recursive(parent, irq_base, | ||
1027 | nr_irqs, arg); | ||
1028 | if (ret >= 0) | ||
1029 | ret = domain->ops->alloc(domain, irq_base, nr_irqs, arg); | ||
1030 | if (ret < 0 && recursive) | ||
1031 | irq_domain_free_irqs_recursive(parent, irq_base, nr_irqs); | ||
1032 | |||
1033 | return ret; | ||
1034 | } | ||
1035 | |||
1036 | /** | ||
1037 | * __irq_domain_alloc_irqs - Allocate IRQs from domain | ||
1038 | * @domain: domain to allocate from | ||
1039 | * @irq_base: allocate specified IRQ nubmer if irq_base >= 0 | ||
1040 | * @nr_irqs: number of IRQs to allocate | ||
1041 | * @node: NUMA node id for memory allocation | ||
1042 | * @arg: domain specific argument | ||
1043 | * @realloc: IRQ descriptors have already been allocated if true | ||
1044 | * | ||
1045 | * Allocate IRQ numbers and initialized all data structures to support | ||
1046 | * hierarchy IRQ domains. | ||
1047 | * Parameter @realloc is mainly to support legacy IRQs. | ||
1048 | * Returns error code or allocated IRQ number | ||
1049 | * | ||
1050 | * The whole process to setup an IRQ has been split into two steps. | ||
1051 | * The first step, __irq_domain_alloc_irqs(), is to allocate IRQ | ||
1052 | * descriptor and required hardware resources. The second step, | ||
1053 | * irq_domain_activate_irq(), is to program hardwares with preallocated | ||
1054 | * resources. In this way, it's easier to rollback when failing to | ||
1055 | * allocate resources. | ||
1056 | */ | ||
1057 | int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base, | ||
1058 | unsigned int nr_irqs, int node, void *arg, | ||
1059 | bool realloc) | ||
1060 | { | ||
1061 | int i, ret, virq; | ||
1062 | |||
1063 | if (domain == NULL) { | ||
1064 | domain = irq_default_domain; | ||
1065 | if (WARN(!domain, "domain is NULL; cannot allocate IRQ\n")) | ||
1066 | return -EINVAL; | ||
1067 | } | ||
1068 | |||
1069 | if (!domain->ops->alloc) { | ||
1070 | pr_debug("domain->ops->alloc() is NULL\n"); | ||
1071 | return -ENOSYS; | ||
1072 | } | ||
1073 | |||
1074 | if (realloc && irq_base >= 0) { | ||
1075 | virq = irq_base; | ||
1076 | } else { | ||
1077 | virq = irq_domain_alloc_descs(irq_base, nr_irqs, 0, node); | ||
1078 | if (virq < 0) { | ||
1079 | pr_debug("cannot allocate IRQ(base %d, count %d)\n", | ||
1080 | irq_base, nr_irqs); | ||
1081 | return virq; | ||
1082 | } | ||
1083 | } | ||
1084 | |||
1085 | if (irq_domain_alloc_irq_data(domain, virq, nr_irqs)) { | ||
1086 | pr_debug("cannot allocate memory for IRQ%d\n", virq); | ||
1087 | ret = -ENOMEM; | ||
1088 | goto out_free_desc; | ||
1089 | } | ||
1090 | |||
1091 | mutex_lock(&irq_domain_mutex); | ||
1092 | ret = irq_domain_alloc_irqs_recursive(domain, virq, nr_irqs, arg); | ||
1093 | if (ret < 0) { | ||
1094 | mutex_unlock(&irq_domain_mutex); | ||
1095 | goto out_free_irq_data; | ||
1096 | } | ||
1097 | for (i = 0; i < nr_irqs; i++) | ||
1098 | irq_domain_insert_irq(virq + i); | ||
1099 | mutex_unlock(&irq_domain_mutex); | ||
1100 | |||
1101 | return virq; | ||
1102 | |||
1103 | out_free_irq_data: | ||
1104 | irq_domain_free_irq_data(virq, nr_irqs); | ||
1105 | out_free_desc: | ||
1106 | irq_free_descs(virq, nr_irqs); | ||
1107 | return ret; | ||
1108 | } | ||
1109 | |||
1110 | /** | ||
1111 | * irq_domain_free_irqs - Free IRQ number and associated data structures | ||
1112 | * @virq: base IRQ number | ||
1113 | * @nr_irqs: number of IRQs to free | ||
1114 | */ | ||
1115 | void irq_domain_free_irqs(unsigned int virq, unsigned int nr_irqs) | ||
1116 | { | ||
1117 | struct irq_data *data = irq_get_irq_data(virq); | ||
1118 | int i; | ||
1119 | |||
1120 | if (WARN(!data || !data->domain || !data->domain->ops->free, | ||
1121 | "NULL pointer, cannot free irq\n")) | ||
1122 | return; | ||
1123 | |||
1124 | mutex_lock(&irq_domain_mutex); | ||
1125 | for (i = 0; i < nr_irqs; i++) | ||
1126 | irq_domain_remove_irq(virq + i); | ||
1127 | irq_domain_free_irqs_recursive(data->domain, virq, nr_irqs); | ||
1128 | mutex_unlock(&irq_domain_mutex); | ||
1129 | |||
1130 | irq_domain_free_irq_data(virq, nr_irqs); | ||
1131 | irq_free_descs(virq, nr_irqs); | ||
1132 | } | ||
1133 | |||
1134 | /** | ||
1135 | * irq_domain_alloc_irqs_parent - Allocate interrupts from parent domain | ||
1136 | * @irq_base: Base IRQ number | ||
1137 | * @nr_irqs: Number of IRQs to allocate | ||
1138 | * @arg: Allocation data (arch/domain specific) | ||
1139 | * | ||
1140 | * Check whether the domain has been setup recursive. If not allocate | ||
1141 | * through the parent domain. | ||
1142 | */ | ||
1143 | int irq_domain_alloc_irqs_parent(struct irq_domain *domain, | ||
1144 | unsigned int irq_base, unsigned int nr_irqs, | ||
1145 | void *arg) | ||
1146 | { | ||
1147 | /* irq_domain_alloc_irqs_recursive() has called parent's alloc() */ | ||
1148 | if (irq_domain_is_auto_recursive(domain)) | ||
1149 | return 0; | ||
1150 | |||
1151 | domain = domain->parent; | ||
1152 | if (domain) | ||
1153 | return irq_domain_alloc_irqs_recursive(domain, irq_base, | ||
1154 | nr_irqs, arg); | ||
1155 | return -ENOSYS; | ||
1156 | } | ||
1157 | |||
1158 | /** | ||
1159 | * irq_domain_free_irqs_parent - Free interrupts from parent domain | ||
1160 | * @irq_base: Base IRQ number | ||
1161 | * @nr_irqs: Number of IRQs to free | ||
1162 | * | ||
1163 | * Check whether the domain has been setup recursive. If not free | ||
1164 | * through the parent domain. | ||
1165 | */ | ||
1166 | void irq_domain_free_irqs_parent(struct irq_domain *domain, | ||
1167 | unsigned int irq_base, unsigned int nr_irqs) | ||
1168 | { | ||
1169 | /* irq_domain_free_irqs_recursive() will call parent's free */ | ||
1170 | if (!irq_domain_is_auto_recursive(domain) && domain->parent) | ||
1171 | irq_domain_free_irqs_recursive(domain->parent, irq_base, | ||
1172 | nr_irqs); | ||
1173 | } | ||
1174 | |||
1175 | /** | ||
1176 | * irq_domain_activate_irq - Call domain_ops->activate recursively to activate | ||
1177 | * interrupt | ||
1178 | * @irq_data: outermost irq_data associated with interrupt | ||
1179 | * | ||
1180 | * This is the second step to call domain_ops->activate to program interrupt | ||
1181 | * controllers, so the interrupt could actually get delivered. | ||
1182 | */ | ||
1183 | void irq_domain_activate_irq(struct irq_data *irq_data) | ||
1184 | { | ||
1185 | if (irq_data && irq_data->domain) { | ||
1186 | struct irq_domain *domain = irq_data->domain; | ||
1187 | |||
1188 | if (irq_data->parent_data) | ||
1189 | irq_domain_activate_irq(irq_data->parent_data); | ||
1190 | if (domain->ops->activate) | ||
1191 | domain->ops->activate(domain, irq_data); | ||
1192 | } | ||
1193 | } | ||
1194 | |||
1195 | /** | ||
1196 | * irq_domain_deactivate_irq - Call domain_ops->deactivate recursively to | ||
1197 | * deactivate interrupt | ||
1198 | * @irq_data: outermost irq_data associated with interrupt | ||
1199 | * | ||
1200 | * It calls domain_ops->deactivate to program interrupt controllers to disable | ||
1201 | * interrupt delivery. | ||
1202 | */ | ||
1203 | void irq_domain_deactivate_irq(struct irq_data *irq_data) | ||
1204 | { | ||
1205 | if (irq_data && irq_data->domain) { | ||
1206 | struct irq_domain *domain = irq_data->domain; | ||
1207 | |||
1208 | if (domain->ops->deactivate) | ||
1209 | domain->ops->deactivate(domain, irq_data); | ||
1210 | if (irq_data->parent_data) | ||
1211 | irq_domain_deactivate_irq(irq_data->parent_data); | ||
1212 | } | ||
1213 | } | ||
1214 | |||
1215 | static void irq_domain_check_hierarchy(struct irq_domain *domain) | ||
1216 | { | ||
1217 | /* Hierarchy irq_domains must implement callback alloc() */ | ||
1218 | if (domain->ops->alloc) | ||
1219 | domain->flags |= IRQ_DOMAIN_FLAG_HIERARCHY; | ||
1220 | } | ||
1221 | #else /* CONFIG_IRQ_DOMAIN_HIERARCHY */ | ||
1222 | /** | ||
1223 | * irq_domain_get_irq_data - Get irq_data associated with @virq and @domain | ||
1224 | * @domain: domain to match | ||
1225 | * @virq: IRQ number to get irq_data | ||
1226 | */ | ||
1227 | struct irq_data *irq_domain_get_irq_data(struct irq_domain *domain, | ||
1228 | unsigned int virq) | ||
1229 | { | ||
1230 | struct irq_data *irq_data = irq_get_irq_data(virq); | ||
1231 | |||
1232 | return (irq_data && irq_data->domain == domain) ? irq_data : NULL; | ||
1233 | } | ||
1234 | |||
1235 | static void irq_domain_check_hierarchy(struct irq_domain *domain) | ||
1236 | { | ||
1237 | } | ||
1238 | #endif /* CONFIG_IRQ_DOMAIN_HIERARCHY */ | ||
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 0a9104b4608b..80692373abd6 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c | |||
@@ -183,6 +183,7 @@ int irq_do_set_affinity(struct irq_data *data, const struct cpumask *mask, | |||
183 | ret = chip->irq_set_affinity(data, mask, force); | 183 | ret = chip->irq_set_affinity(data, mask, force); |
184 | switch (ret) { | 184 | switch (ret) { |
185 | case IRQ_SET_MASK_OK: | 185 | case IRQ_SET_MASK_OK: |
186 | case IRQ_SET_MASK_OK_DONE: | ||
186 | cpumask_copy(data->affinity, mask); | 187 | cpumask_copy(data->affinity, mask); |
187 | case IRQ_SET_MASK_OK_NOCOPY: | 188 | case IRQ_SET_MASK_OK_NOCOPY: |
188 | irq_set_thread_affinity(desc); | 189 | irq_set_thread_affinity(desc); |
@@ -600,6 +601,7 @@ int __irq_set_trigger(struct irq_desc *desc, unsigned int irq, | |||
600 | 601 | ||
601 | switch (ret) { | 602 | switch (ret) { |
602 | case IRQ_SET_MASK_OK: | 603 | case IRQ_SET_MASK_OK: |
604 | case IRQ_SET_MASK_OK_DONE: | ||
603 | irqd_clear(&desc->irq_data, IRQD_TRIGGER_MASK); | 605 | irqd_clear(&desc->irq_data, IRQD_TRIGGER_MASK); |
604 | irqd_set(&desc->irq_data, flags); | 606 | irqd_set(&desc->irq_data, flags); |
605 | 607 | ||
diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c new file mode 100644 index 000000000000..3e18163f336f --- /dev/null +++ b/kernel/irq/msi.c | |||
@@ -0,0 +1,330 @@ | |||
1 | /* | ||
2 | * linux/kernel/irq/msi.c | ||
3 | * | ||
4 | * Copyright (C) 2014 Intel Corp. | ||
5 | * Author: Jiang Liu <jiang.liu@linux.intel.com> | ||
6 | * | ||
7 | * This file is licensed under GPLv2. | ||
8 | * | ||
9 | * This file contains common code to support Message Signalled Interrupt for | ||
10 | * PCI compatible and non PCI compatible devices. | ||
11 | */ | ||
12 | #include <linux/types.h> | ||
13 | #include <linux/device.h> | ||
14 | #include <linux/irq.h> | ||
15 | #include <linux/irqdomain.h> | ||
16 | #include <linux/msi.h> | ||
17 | |||
18 | /* Temparory solution for building, will be removed later */ | ||
19 | #include <linux/pci.h> | ||
20 | |||
21 | void __get_cached_msi_msg(struct msi_desc *entry, struct msi_msg *msg) | ||
22 | { | ||
23 | *msg = entry->msg; | ||
24 | } | ||
25 | |||
26 | void get_cached_msi_msg(unsigned int irq, struct msi_msg *msg) | ||
27 | { | ||
28 | struct msi_desc *entry = irq_get_msi_desc(irq); | ||
29 | |||
30 | __get_cached_msi_msg(entry, msg); | ||
31 | } | ||
32 | EXPORT_SYMBOL_GPL(get_cached_msi_msg); | ||
33 | |||
34 | #ifdef CONFIG_GENERIC_MSI_IRQ_DOMAIN | ||
35 | static inline void irq_chip_write_msi_msg(struct irq_data *data, | ||
36 | struct msi_msg *msg) | ||
37 | { | ||
38 | data->chip->irq_write_msi_msg(data, msg); | ||
39 | } | ||
40 | |||
41 | /** | ||
42 | * msi_domain_set_affinity - Generic affinity setter function for MSI domains | ||
43 | * @irq_data: The irq data associated to the interrupt | ||
44 | * @mask: The affinity mask to set | ||
45 | * @force: Flag to enforce setting (disable online checks) | ||
46 | * | ||
47 | * Intended to be used by MSI interrupt controllers which are | ||
48 | * implemented with hierarchical domains. | ||
49 | */ | ||
50 | int msi_domain_set_affinity(struct irq_data *irq_data, | ||
51 | const struct cpumask *mask, bool force) | ||
52 | { | ||
53 | struct irq_data *parent = irq_data->parent_data; | ||
54 | struct msi_msg msg; | ||
55 | int ret; | ||
56 | |||
57 | ret = parent->chip->irq_set_affinity(parent, mask, force); | ||
58 | if (ret >= 0 && ret != IRQ_SET_MASK_OK_DONE) { | ||
59 | BUG_ON(irq_chip_compose_msi_msg(irq_data, &msg)); | ||
60 | irq_chip_write_msi_msg(irq_data, &msg); | ||
61 | } | ||
62 | |||
63 | return ret; | ||
64 | } | ||
65 | |||
66 | static void msi_domain_activate(struct irq_domain *domain, | ||
67 | struct irq_data *irq_data) | ||
68 | { | ||
69 | struct msi_msg msg; | ||
70 | |||
71 | BUG_ON(irq_chip_compose_msi_msg(irq_data, &msg)); | ||
72 | irq_chip_write_msi_msg(irq_data, &msg); | ||
73 | } | ||
74 | |||
75 | static void msi_domain_deactivate(struct irq_domain *domain, | ||
76 | struct irq_data *irq_data) | ||
77 | { | ||
78 | struct msi_msg msg; | ||
79 | |||
80 | memset(&msg, 0, sizeof(msg)); | ||
81 | irq_chip_write_msi_msg(irq_data, &msg); | ||
82 | } | ||
83 | |||
84 | static int msi_domain_alloc(struct irq_domain *domain, unsigned int virq, | ||
85 | unsigned int nr_irqs, void *arg) | ||
86 | { | ||
87 | struct msi_domain_info *info = domain->host_data; | ||
88 | struct msi_domain_ops *ops = info->ops; | ||
89 | irq_hw_number_t hwirq = ops->get_hwirq(info, arg); | ||
90 | int i, ret; | ||
91 | |||
92 | if (irq_find_mapping(domain, hwirq) > 0) | ||
93 | return -EEXIST; | ||
94 | |||
95 | ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, arg); | ||
96 | if (ret < 0) | ||
97 | return ret; | ||
98 | |||
99 | for (i = 0; i < nr_irqs; i++) { | ||
100 | ret = ops->msi_init(domain, info, virq + i, hwirq + i, arg); | ||
101 | if (ret < 0) { | ||
102 | if (ops->msi_free) { | ||
103 | for (i--; i > 0; i--) | ||
104 | ops->msi_free(domain, info, virq + i); | ||
105 | } | ||
106 | irq_domain_free_irqs_top(domain, virq, nr_irqs); | ||
107 | return ret; | ||
108 | } | ||
109 | } | ||
110 | |||
111 | return 0; | ||
112 | } | ||
113 | |||
114 | static void msi_domain_free(struct irq_domain *domain, unsigned int virq, | ||
115 | unsigned int nr_irqs) | ||
116 | { | ||
117 | struct msi_domain_info *info = domain->host_data; | ||
118 | int i; | ||
119 | |||
120 | if (info->ops->msi_free) { | ||
121 | for (i = 0; i < nr_irqs; i++) | ||
122 | info->ops->msi_free(domain, info, virq + i); | ||
123 | } | ||
124 | irq_domain_free_irqs_top(domain, virq, nr_irqs); | ||
125 | } | ||
126 | |||
127 | static struct irq_domain_ops msi_domain_ops = { | ||
128 | .alloc = msi_domain_alloc, | ||
129 | .free = msi_domain_free, | ||
130 | .activate = msi_domain_activate, | ||
131 | .deactivate = msi_domain_deactivate, | ||
132 | }; | ||
133 | |||
134 | #ifdef GENERIC_MSI_DOMAIN_OPS | ||
135 | static irq_hw_number_t msi_domain_ops_get_hwirq(struct msi_domain_info *info, | ||
136 | msi_alloc_info_t *arg) | ||
137 | { | ||
138 | return arg->hwirq; | ||
139 | } | ||
140 | |||
141 | static int msi_domain_ops_prepare(struct irq_domain *domain, struct device *dev, | ||
142 | int nvec, msi_alloc_info_t *arg) | ||
143 | { | ||
144 | memset(arg, 0, sizeof(*arg)); | ||
145 | return 0; | ||
146 | } | ||
147 | |||
148 | static void msi_domain_ops_set_desc(msi_alloc_info_t *arg, | ||
149 | struct msi_desc *desc) | ||
150 | { | ||
151 | arg->desc = desc; | ||
152 | } | ||
153 | #else | ||
154 | #define msi_domain_ops_get_hwirq NULL | ||
155 | #define msi_domain_ops_prepare NULL | ||
156 | #define msi_domain_ops_set_desc NULL | ||
157 | #endif /* !GENERIC_MSI_DOMAIN_OPS */ | ||
158 | |||
159 | static int msi_domain_ops_init(struct irq_domain *domain, | ||
160 | struct msi_domain_info *info, | ||
161 | unsigned int virq, irq_hw_number_t hwirq, | ||
162 | msi_alloc_info_t *arg) | ||
163 | { | ||
164 | irq_domain_set_hwirq_and_chip(domain, virq, hwirq, info->chip, | ||
165 | info->chip_data); | ||
166 | if (info->handler && info->handler_name) { | ||
167 | __irq_set_handler(virq, info->handler, 0, info->handler_name); | ||
168 | if (info->handler_data) | ||
169 | irq_set_handler_data(virq, info->handler_data); | ||
170 | } | ||
171 | return 0; | ||
172 | } | ||
173 | |||
174 | static int msi_domain_ops_check(struct irq_domain *domain, | ||
175 | struct msi_domain_info *info, | ||
176 | struct device *dev) | ||
177 | { | ||
178 | return 0; | ||
179 | } | ||
180 | |||
181 | static struct msi_domain_ops msi_domain_ops_default = { | ||
182 | .get_hwirq = msi_domain_ops_get_hwirq, | ||
183 | .msi_init = msi_domain_ops_init, | ||
184 | .msi_check = msi_domain_ops_check, | ||
185 | .msi_prepare = msi_domain_ops_prepare, | ||
186 | .set_desc = msi_domain_ops_set_desc, | ||
187 | }; | ||
188 | |||
189 | static void msi_domain_update_dom_ops(struct msi_domain_info *info) | ||
190 | { | ||
191 | struct msi_domain_ops *ops = info->ops; | ||
192 | |||
193 | if (ops == NULL) { | ||
194 | info->ops = &msi_domain_ops_default; | ||
195 | return; | ||
196 | } | ||
197 | |||
198 | if (ops->get_hwirq == NULL) | ||
199 | ops->get_hwirq = msi_domain_ops_default.get_hwirq; | ||
200 | if (ops->msi_init == NULL) | ||
201 | ops->msi_init = msi_domain_ops_default.msi_init; | ||
202 | if (ops->msi_check == NULL) | ||
203 | ops->msi_check = msi_domain_ops_default.msi_check; | ||
204 | if (ops->msi_prepare == NULL) | ||
205 | ops->msi_prepare = msi_domain_ops_default.msi_prepare; | ||
206 | if (ops->set_desc == NULL) | ||
207 | ops->set_desc = msi_domain_ops_default.set_desc; | ||
208 | } | ||
209 | |||
210 | static void msi_domain_update_chip_ops(struct msi_domain_info *info) | ||
211 | { | ||
212 | struct irq_chip *chip = info->chip; | ||
213 | |||
214 | BUG_ON(!chip); | ||
215 | if (!chip->irq_mask) | ||
216 | chip->irq_mask = pci_msi_mask_irq; | ||
217 | if (!chip->irq_unmask) | ||
218 | chip->irq_unmask = pci_msi_unmask_irq; | ||
219 | if (!chip->irq_set_affinity) | ||
220 | chip->irq_set_affinity = msi_domain_set_affinity; | ||
221 | } | ||
222 | |||
223 | /** | ||
224 | * msi_create_irq_domain - Create a MSI interrupt domain | ||
225 | * @of_node: Optional device-tree node of the interrupt controller | ||
226 | * @info: MSI domain info | ||
227 | * @parent: Parent irq domain | ||
228 | */ | ||
229 | struct irq_domain *msi_create_irq_domain(struct device_node *node, | ||
230 | struct msi_domain_info *info, | ||
231 | struct irq_domain *parent) | ||
232 | { | ||
233 | if (info->flags & MSI_FLAG_USE_DEF_DOM_OPS) | ||
234 | msi_domain_update_dom_ops(info); | ||
235 | if (info->flags & MSI_FLAG_USE_DEF_CHIP_OPS) | ||
236 | msi_domain_update_chip_ops(info); | ||
237 | |||
238 | return irq_domain_add_hierarchy(parent, 0, 0, node, &msi_domain_ops, | ||
239 | info); | ||
240 | } | ||
241 | |||
242 | /** | ||
243 | * msi_domain_alloc_irqs - Allocate interrupts from a MSI interrupt domain | ||
244 | * @domain: The domain to allocate from | ||
245 | * @dev: Pointer to device struct of the device for which the interrupts | ||
246 | * are allocated | ||
247 | * @nvec: The number of interrupts to allocate | ||
248 | * | ||
249 | * Returns 0 on success or an error code. | ||
250 | */ | ||
251 | int msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev, | ||
252 | int nvec) | ||
253 | { | ||
254 | struct msi_domain_info *info = domain->host_data; | ||
255 | struct msi_domain_ops *ops = info->ops; | ||
256 | msi_alloc_info_t arg; | ||
257 | struct msi_desc *desc; | ||
258 | int i, ret, virq = -1; | ||
259 | |||
260 | ret = ops->msi_check(domain, info, dev); | ||
261 | if (ret == 0) | ||
262 | ret = ops->msi_prepare(domain, dev, nvec, &arg); | ||
263 | if (ret) | ||
264 | return ret; | ||
265 | |||
266 | for_each_msi_entry(desc, dev) { | ||
267 | ops->set_desc(&arg, desc); | ||
268 | if (info->flags & MSI_FLAG_IDENTITY_MAP) | ||
269 | virq = (int)ops->get_hwirq(info, &arg); | ||
270 | else | ||
271 | virq = -1; | ||
272 | |||
273 | virq = __irq_domain_alloc_irqs(domain, virq, desc->nvec_used, | ||
274 | dev_to_node(dev), &arg, false); | ||
275 | if (virq < 0) { | ||
276 | ret = -ENOSPC; | ||
277 | if (ops->handle_error) | ||
278 | ret = ops->handle_error(domain, desc, ret); | ||
279 | if (ops->msi_finish) | ||
280 | ops->msi_finish(&arg, ret); | ||
281 | return ret; | ||
282 | } | ||
283 | |||
284 | for (i = 0; i < desc->nvec_used; i++) | ||
285 | irq_set_msi_desc_off(virq, i, desc); | ||
286 | } | ||
287 | |||
288 | if (ops->msi_finish) | ||
289 | ops->msi_finish(&arg, 0); | ||
290 | |||
291 | for_each_msi_entry(desc, dev) { | ||
292 | if (desc->nvec_used == 1) | ||
293 | dev_dbg(dev, "irq %d for MSI\n", virq); | ||
294 | else | ||
295 | dev_dbg(dev, "irq [%d-%d] for MSI\n", | ||
296 | virq, virq + desc->nvec_used - 1); | ||
297 | } | ||
298 | |||
299 | return 0; | ||
300 | } | ||
301 | |||
302 | /** | ||
303 | * msi_domain_free_irqs - Free interrupts from a MSI interrupt @domain associated tp @dev | ||
304 | * @domain: The domain to managing the interrupts | ||
305 | * @dev: Pointer to device struct of the device for which the interrupts | ||
306 | * are free | ||
307 | */ | ||
308 | void msi_domain_free_irqs(struct irq_domain *domain, struct device *dev) | ||
309 | { | ||
310 | struct msi_desc *desc; | ||
311 | |||
312 | for_each_msi_entry(desc, dev) { | ||
313 | irq_domain_free_irqs(desc->irq, desc->nvec_used); | ||
314 | desc->irq = 0; | ||
315 | } | ||
316 | } | ||
317 | |||
318 | /** | ||
319 | * msi_get_domain_info - Get the MSI interrupt domain info for @domain | ||
320 | * @domain: The interrupt domain to retrieve data from | ||
321 | * | ||
322 | * Returns the pointer to the msi_domain_info stored in | ||
323 | * @domain->host_data. | ||
324 | */ | ||
325 | struct msi_domain_info *msi_get_domain_info(struct irq_domain *domain) | ||
326 | { | ||
327 | return (struct msi_domain_info *)domain->host_data; | ||
328 | } | ||
329 | |||
330 | #endif /* CONFIG_GENERIC_MSI_IRQ_DOMAIN */ | ||
diff --git a/kernel/kmod.c b/kernel/kmod.c index 8637e041a247..2777f40a9c7b 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c | |||
@@ -47,13 +47,6 @@ extern int max_threads; | |||
47 | 47 | ||
48 | static struct workqueue_struct *khelper_wq; | 48 | static struct workqueue_struct *khelper_wq; |
49 | 49 | ||
50 | /* | ||
51 | * kmod_thread_locker is used for deadlock avoidance. There is no explicit | ||
52 | * locking to protect this global - it is private to the singleton khelper | ||
53 | * thread and should only ever be modified by that thread. | ||
54 | */ | ||
55 | static const struct task_struct *kmod_thread_locker; | ||
56 | |||
57 | #define CAP_BSET (void *)1 | 50 | #define CAP_BSET (void *)1 |
58 | #define CAP_PI (void *)2 | 51 | #define CAP_PI (void *)2 |
59 | 52 | ||
@@ -196,6 +189,27 @@ int __request_module(bool wait, const char *fmt, ...) | |||
196 | EXPORT_SYMBOL(__request_module); | 189 | EXPORT_SYMBOL(__request_module); |
197 | #endif /* CONFIG_MODULES */ | 190 | #endif /* CONFIG_MODULES */ |
198 | 191 | ||
192 | static void call_usermodehelper_freeinfo(struct subprocess_info *info) | ||
193 | { | ||
194 | if (info->cleanup) | ||
195 | (*info->cleanup)(info); | ||
196 | kfree(info); | ||
197 | } | ||
198 | |||
199 | static void umh_complete(struct subprocess_info *sub_info) | ||
200 | { | ||
201 | struct completion *comp = xchg(&sub_info->complete, NULL); | ||
202 | /* | ||
203 | * See call_usermodehelper_exec(). If xchg() returns NULL | ||
204 | * we own sub_info, the UMH_KILLABLE caller has gone away | ||
205 | * or the caller used UMH_NO_WAIT. | ||
206 | */ | ||
207 | if (comp) | ||
208 | complete(comp); | ||
209 | else | ||
210 | call_usermodehelper_freeinfo(sub_info); | ||
211 | } | ||
212 | |||
199 | /* | 213 | /* |
200 | * This is the task which runs the usermode application | 214 | * This is the task which runs the usermode application |
201 | */ | 215 | */ |
@@ -221,7 +235,7 @@ static int ____call_usermodehelper(void *data) | |||
221 | retval = -ENOMEM; | 235 | retval = -ENOMEM; |
222 | new = prepare_kernel_cred(current); | 236 | new = prepare_kernel_cred(current); |
223 | if (!new) | 237 | if (!new) |
224 | goto fail; | 238 | goto out; |
225 | 239 | ||
226 | spin_lock(&umh_sysctl_lock); | 240 | spin_lock(&umh_sysctl_lock); |
227 | new->cap_bset = cap_intersect(usermodehelper_bset, new->cap_bset); | 241 | new->cap_bset = cap_intersect(usermodehelper_bset, new->cap_bset); |
@@ -233,7 +247,7 @@ static int ____call_usermodehelper(void *data) | |||
233 | retval = sub_info->init(sub_info, new); | 247 | retval = sub_info->init(sub_info, new); |
234 | if (retval) { | 248 | if (retval) { |
235 | abort_creds(new); | 249 | abort_creds(new); |
236 | goto fail; | 250 | goto out; |
237 | } | 251 | } |
238 | } | 252 | } |
239 | 253 | ||
@@ -242,42 +256,16 @@ static int ____call_usermodehelper(void *data) | |||
242 | retval = do_execve(getname_kernel(sub_info->path), | 256 | retval = do_execve(getname_kernel(sub_info->path), |
243 | (const char __user *const __user *)sub_info->argv, | 257 | (const char __user *const __user *)sub_info->argv, |
244 | (const char __user *const __user *)sub_info->envp); | 258 | (const char __user *const __user *)sub_info->envp); |
259 | out: | ||
260 | sub_info->retval = retval; | ||
261 | /* wait_for_helper() will call umh_complete if UHM_WAIT_PROC. */ | ||
262 | if (!(sub_info->wait & UMH_WAIT_PROC)) | ||
263 | umh_complete(sub_info); | ||
245 | if (!retval) | 264 | if (!retval) |
246 | return 0; | 265 | return 0; |
247 | |||
248 | /* Exec failed? */ | ||
249 | fail: | ||
250 | sub_info->retval = retval; | ||
251 | do_exit(0); | 266 | do_exit(0); |
252 | } | 267 | } |
253 | 268 | ||
254 | static int call_helper(void *data) | ||
255 | { | ||
256 | /* Worker thread started blocking khelper thread. */ | ||
257 | kmod_thread_locker = current; | ||
258 | return ____call_usermodehelper(data); | ||
259 | } | ||
260 | |||
261 | static void call_usermodehelper_freeinfo(struct subprocess_info *info) | ||
262 | { | ||
263 | if (info->cleanup) | ||
264 | (*info->cleanup)(info); | ||
265 | kfree(info); | ||
266 | } | ||
267 | |||
268 | static void umh_complete(struct subprocess_info *sub_info) | ||
269 | { | ||
270 | struct completion *comp = xchg(&sub_info->complete, NULL); | ||
271 | /* | ||
272 | * See call_usermodehelper_exec(). If xchg() returns NULL | ||
273 | * we own sub_info, the UMH_KILLABLE caller has gone away. | ||
274 | */ | ||
275 | if (comp) | ||
276 | complete(comp); | ||
277 | else | ||
278 | call_usermodehelper_freeinfo(sub_info); | ||
279 | } | ||
280 | |||
281 | /* Keventd can't block, but this (a child) can. */ | 269 | /* Keventd can't block, but this (a child) can. */ |
282 | static int wait_for_helper(void *data) | 270 | static int wait_for_helper(void *data) |
283 | { | 271 | { |
@@ -320,34 +308,17 @@ static void __call_usermodehelper(struct work_struct *work) | |||
320 | { | 308 | { |
321 | struct subprocess_info *sub_info = | 309 | struct subprocess_info *sub_info = |
322 | container_of(work, struct subprocess_info, work); | 310 | container_of(work, struct subprocess_info, work); |
323 | int wait = sub_info->wait & ~UMH_KILLABLE; | ||
324 | pid_t pid; | 311 | pid_t pid; |
325 | 312 | ||
326 | /* CLONE_VFORK: wait until the usermode helper has execve'd | 313 | if (sub_info->wait & UMH_WAIT_PROC) |
327 | * successfully We need the data structures to stay around | ||
328 | * until that is done. */ | ||
329 | if (wait == UMH_WAIT_PROC) | ||
330 | pid = kernel_thread(wait_for_helper, sub_info, | 314 | pid = kernel_thread(wait_for_helper, sub_info, |
331 | CLONE_FS | CLONE_FILES | SIGCHLD); | 315 | CLONE_FS | CLONE_FILES | SIGCHLD); |
332 | else { | 316 | else |
333 | pid = kernel_thread(call_helper, sub_info, | 317 | pid = kernel_thread(____call_usermodehelper, sub_info, |
334 | CLONE_VFORK | SIGCHLD); | 318 | SIGCHLD); |
335 | /* Worker thread stopped blocking khelper thread. */ | ||
336 | kmod_thread_locker = NULL; | ||
337 | } | ||
338 | |||
339 | switch (wait) { | ||
340 | case UMH_NO_WAIT: | ||
341 | call_usermodehelper_freeinfo(sub_info); | ||
342 | break; | ||
343 | 319 | ||
344 | case UMH_WAIT_PROC: | 320 | if (pid < 0) { |
345 | if (pid > 0) | 321 | sub_info->retval = pid; |
346 | break; | ||
347 | /* FALLTHROUGH */ | ||
348 | case UMH_WAIT_EXEC: | ||
349 | if (pid < 0) | ||
350 | sub_info->retval = pid; | ||
351 | umh_complete(sub_info); | 322 | umh_complete(sub_info); |
352 | } | 323 | } |
353 | } | 324 | } |
@@ -578,17 +549,11 @@ int call_usermodehelper_exec(struct subprocess_info *sub_info, int wait) | |||
578 | goto out; | 549 | goto out; |
579 | } | 550 | } |
580 | /* | 551 | /* |
581 | * Worker thread must not wait for khelper thread at below | 552 | * Set the completion pointer only if there is a waiter. |
582 | * wait_for_completion() if the thread was created with CLONE_VFORK | 553 | * This makes it possible to use umh_complete to free |
583 | * flag, for khelper thread is already waiting for the thread at | 554 | * the data structure in case of UMH_NO_WAIT. |
584 | * wait_for_completion() in do_fork(). | ||
585 | */ | 555 | */ |
586 | if (wait != UMH_NO_WAIT && current == kmod_thread_locker) { | 556 | sub_info->complete = (wait == UMH_NO_WAIT) ? NULL : &done; |
587 | retval = -EBUSY; | ||
588 | goto out; | ||
589 | } | ||
590 | |||
591 | sub_info->complete = &done; | ||
592 | sub_info->wait = wait; | 557 | sub_info->wait = wait; |
593 | 558 | ||
594 | queue_work(khelper_wq, &sub_info->work); | 559 | queue_work(khelper_wq, &sub_info->work); |
diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 317eb8ad28dd..06f58309fed2 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c | |||
@@ -915,7 +915,7 @@ static struct kprobe *alloc_aggr_kprobe(struct kprobe *p) | |||
915 | #ifdef CONFIG_KPROBES_ON_FTRACE | 915 | #ifdef CONFIG_KPROBES_ON_FTRACE |
916 | static struct ftrace_ops kprobe_ftrace_ops __read_mostly = { | 916 | static struct ftrace_ops kprobe_ftrace_ops __read_mostly = { |
917 | .func = kprobe_ftrace_handler, | 917 | .func = kprobe_ftrace_handler, |
918 | .flags = FTRACE_OPS_FL_SAVE_REGS, | 918 | .flags = FTRACE_OPS_FL_SAVE_REGS | FTRACE_OPS_FL_IPMODIFY, |
919 | }; | 919 | }; |
920 | static int kprobe_ftrace_enabled; | 920 | static int kprobe_ftrace_enabled; |
921 | 921 | ||
diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c index dadbf88c22c4..454195194d4a 100644 --- a/kernel/locking/mutex.c +++ b/kernel/locking/mutex.c | |||
@@ -378,8 +378,14 @@ done: | |||
378 | * reschedule now, before we try-lock the mutex. This avoids getting | 378 | * reschedule now, before we try-lock the mutex. This avoids getting |
379 | * scheduled out right after we obtained the mutex. | 379 | * scheduled out right after we obtained the mutex. |
380 | */ | 380 | */ |
381 | if (need_resched()) | 381 | if (need_resched()) { |
382 | /* | ||
383 | * We _should_ have TASK_RUNNING here, but just in case | ||
384 | * we do not, make it so, otherwise we might get stuck. | ||
385 | */ | ||
386 | __set_current_state(TASK_RUNNING); | ||
382 | schedule_preempt_disabled(); | 387 | schedule_preempt_disabled(); |
388 | } | ||
383 | 389 | ||
384 | return false; | 390 | return false; |
385 | } | 391 | } |
diff --git a/kernel/module.c b/kernel/module.c index 88cec1ddb1e3..e52a8739361a 100644 --- a/kernel/module.c +++ b/kernel/module.c | |||
@@ -3097,6 +3097,32 @@ static int may_init_module(void) | |||
3097 | } | 3097 | } |
3098 | 3098 | ||
3099 | /* | 3099 | /* |
3100 | * Can't use wait_event_interruptible() because our condition | ||
3101 | * 'finished_loading()' contains a blocking primitive itself (mutex_lock). | ||
3102 | */ | ||
3103 | static int wait_finished_loading(struct module *mod) | ||
3104 | { | ||
3105 | DEFINE_WAIT_FUNC(wait, woken_wake_function); | ||
3106 | int ret = 0; | ||
3107 | |||
3108 | add_wait_queue(&module_wq, &wait); | ||
3109 | for (;;) { | ||
3110 | if (finished_loading(mod->name)) | ||
3111 | break; | ||
3112 | |||
3113 | if (signal_pending(current)) { | ||
3114 | ret = -ERESTARTSYS; | ||
3115 | break; | ||
3116 | } | ||
3117 | |||
3118 | wait_woken(&wait, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT); | ||
3119 | } | ||
3120 | remove_wait_queue(&module_wq, &wait); | ||
3121 | |||
3122 | return ret; | ||
3123 | } | ||
3124 | |||
3125 | /* | ||
3100 | * We try to place it in the list now to make sure it's unique before | 3126 | * We try to place it in the list now to make sure it's unique before |
3101 | * we dedicate too many resources. In particular, temporary percpu | 3127 | * we dedicate too many resources. In particular, temporary percpu |
3102 | * memory exhaustion. | 3128 | * memory exhaustion. |
@@ -3116,8 +3142,8 @@ again: | |||
3116 | || old->state == MODULE_STATE_UNFORMED) { | 3142 | || old->state == MODULE_STATE_UNFORMED) { |
3117 | /* Wait in case it fails to load. */ | 3143 | /* Wait in case it fails to load. */ |
3118 | mutex_unlock(&module_mutex); | 3144 | mutex_unlock(&module_mutex); |
3119 | err = wait_event_interruptible(module_wq, | 3145 | |
3120 | finished_loading(mod->name)); | 3146 | err = wait_finished_loading(mod); |
3121 | if (err) | 3147 | if (err) |
3122 | goto out_unlocked; | 3148 | goto out_unlocked; |
3123 | goto again; | 3149 | goto again; |
diff --git a/kernel/panic.c b/kernel/panic.c index d09dc5c32c67..4d8d6f906dec 100644 --- a/kernel/panic.c +++ b/kernel/panic.c | |||
@@ -33,6 +33,7 @@ static int pause_on_oops; | |||
33 | static int pause_on_oops_flag; | 33 | static int pause_on_oops_flag; |
34 | static DEFINE_SPINLOCK(pause_on_oops_lock); | 34 | static DEFINE_SPINLOCK(pause_on_oops_lock); |
35 | static bool crash_kexec_post_notifiers; | 35 | static bool crash_kexec_post_notifiers; |
36 | int panic_on_warn __read_mostly; | ||
36 | 37 | ||
37 | int panic_timeout = CONFIG_PANIC_TIMEOUT; | 38 | int panic_timeout = CONFIG_PANIC_TIMEOUT; |
38 | EXPORT_SYMBOL_GPL(panic_timeout); | 39 | EXPORT_SYMBOL_GPL(panic_timeout); |
@@ -244,6 +245,7 @@ static const struct tnt tnts[] = { | |||
244 | * 'I' - Working around severe firmware bug. | 245 | * 'I' - Working around severe firmware bug. |
245 | * 'O' - Out-of-tree module has been loaded. | 246 | * 'O' - Out-of-tree module has been loaded. |
246 | * 'E' - Unsigned module has been loaded. | 247 | * 'E' - Unsigned module has been loaded. |
248 | * 'L' - A soft lockup has previously occurred. | ||
247 | * | 249 | * |
248 | * The string is overwritten by the next call to print_tainted(). | 250 | * The string is overwritten by the next call to print_tainted(). |
249 | */ | 251 | */ |
@@ -427,6 +429,17 @@ static void warn_slowpath_common(const char *file, int line, void *caller, | |||
427 | if (args) | 429 | if (args) |
428 | vprintk(args->fmt, args->args); | 430 | vprintk(args->fmt, args->args); |
429 | 431 | ||
432 | if (panic_on_warn) { | ||
433 | /* | ||
434 | * This thread may hit another WARN() in the panic path. | ||
435 | * Resetting this prevents additional WARN() from panicking the | ||
436 | * system on this thread. Other threads are blocked by the | ||
437 | * panic_mutex in panic(). | ||
438 | */ | ||
439 | panic_on_warn = 0; | ||
440 | panic("panic_on_warn set ...\n"); | ||
441 | } | ||
442 | |||
430 | print_modules(); | 443 | print_modules(); |
431 | dump_stack(); | 444 | dump_stack(); |
432 | print_oops_end_marker(); | 445 | print_oops_end_marker(); |
@@ -484,6 +497,7 @@ EXPORT_SYMBOL(__stack_chk_fail); | |||
484 | 497 | ||
485 | core_param(panic, panic_timeout, int, 0644); | 498 | core_param(panic, panic_timeout, int, 0644); |
486 | core_param(pause_on_oops, pause_on_oops, int, 0644); | 499 | core_param(pause_on_oops, pause_on_oops, int, 0644); |
500 | core_param(panic_on_warn, panic_on_warn, int, 0644); | ||
487 | 501 | ||
488 | static int __init setup_crash_kexec_post_notifiers(char *s) | 502 | static int __init setup_crash_kexec_post_notifiers(char *s) |
489 | { | 503 | { |
diff --git a/kernel/pid.c b/kernel/pid.c index 9b9a26698144..82430c858d69 100644 --- a/kernel/pid.c +++ b/kernel/pid.c | |||
@@ -341,6 +341,8 @@ out: | |||
341 | 341 | ||
342 | out_unlock: | 342 | out_unlock: |
343 | spin_unlock_irq(&pidmap_lock); | 343 | spin_unlock_irq(&pidmap_lock); |
344 | put_pid_ns(ns); | ||
345 | |||
344 | out_free: | 346 | out_free: |
345 | while (++i <= ns->level) | 347 | while (++i <= ns->level) |
346 | free_pidmap(pid->numbers + i); | 348 | free_pidmap(pid->numbers + i); |
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c index db95d8eb761b..bc6d6a89b6e6 100644 --- a/kernel/pid_namespace.c +++ b/kernel/pid_namespace.c | |||
@@ -190,7 +190,11 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns) | |||
190 | /* Don't allow any more processes into the pid namespace */ | 190 | /* Don't allow any more processes into the pid namespace */ |
191 | disable_pid_allocation(pid_ns); | 191 | disable_pid_allocation(pid_ns); |
192 | 192 | ||
193 | /* Ignore SIGCHLD causing any terminated children to autoreap */ | 193 | /* |
194 | * Ignore SIGCHLD causing any terminated children to autoreap. | ||
195 | * This speeds up the namespace shutdown, plus see the comment | ||
196 | * below. | ||
197 | */ | ||
194 | spin_lock_irq(&me->sighand->siglock); | 198 | spin_lock_irq(&me->sighand->siglock); |
195 | me->sighand->action[SIGCHLD - 1].sa.sa_handler = SIG_IGN; | 199 | me->sighand->action[SIGCHLD - 1].sa.sa_handler = SIG_IGN; |
196 | spin_unlock_irq(&me->sighand->siglock); | 200 | spin_unlock_irq(&me->sighand->siglock); |
@@ -223,15 +227,31 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns) | |||
223 | } | 227 | } |
224 | read_unlock(&tasklist_lock); | 228 | read_unlock(&tasklist_lock); |
225 | 229 | ||
226 | /* Firstly reap the EXIT_ZOMBIE children we may have. */ | 230 | /* |
231 | * Reap the EXIT_ZOMBIE children we had before we ignored SIGCHLD. | ||
232 | * sys_wait4() will also block until our children traced from the | ||
233 | * parent namespace are detached and become EXIT_DEAD. | ||
234 | */ | ||
227 | do { | 235 | do { |
228 | clear_thread_flag(TIF_SIGPENDING); | 236 | clear_thread_flag(TIF_SIGPENDING); |
229 | rc = sys_wait4(-1, NULL, __WALL, NULL); | 237 | rc = sys_wait4(-1, NULL, __WALL, NULL); |
230 | } while (rc != -ECHILD); | 238 | } while (rc != -ECHILD); |
231 | 239 | ||
232 | /* | 240 | /* |
233 | * sys_wait4() above can't reap the TASK_DEAD children. | 241 | * sys_wait4() above can't reap the EXIT_DEAD children but we do not |
234 | * Make sure they all go away, see free_pid(). | 242 | * really care, we could reparent them to the global init. We could |
243 | * exit and reap ->child_reaper even if it is not the last thread in | ||
244 | * this pid_ns, free_pid(nr_hashed == 0) calls proc_cleanup_work(), | ||
245 | * pid_ns can not go away until proc_kill_sb() drops the reference. | ||
246 | * | ||
247 | * But this ns can also have other tasks injected by setns()+fork(). | ||
248 | * Again, ignoring the user visible semantics we do not really need | ||
249 | * to wait until they are all reaped, but they can be reparented to | ||
250 | * us and thus we need to ensure that pid->child_reaper stays valid | ||
251 | * until they all go away. See free_pid()->wake_up_process(). | ||
252 | * | ||
253 | * We rely on ignored SIGCHLD, an injected zombie must be autoreaped | ||
254 | * if reparented. | ||
235 | */ | 255 | */ |
236 | for (;;) { | 256 | for (;;) { |
237 | set_current_state(TASK_UNINTERRUPTIBLE); | 257 | set_current_state(TASK_UNINTERRUPTIBLE); |
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index bbef57f5bdfd..6e7708c2c21f 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig | |||
@@ -94,6 +94,7 @@ config PM_STD_PARTITION | |||
94 | config PM_SLEEP | 94 | config PM_SLEEP |
95 | def_bool y | 95 | def_bool y |
96 | depends on SUSPEND || HIBERNATE_CALLBACKS | 96 | depends on SUSPEND || HIBERNATE_CALLBACKS |
97 | select PM_RUNTIME | ||
97 | 98 | ||
98 | config PM_SLEEP_SMP | 99 | config PM_SLEEP_SMP |
99 | def_bool y | 100 | def_bool y |
@@ -131,7 +132,6 @@ config PM_WAKELOCKS_GC | |||
131 | 132 | ||
132 | config PM_RUNTIME | 133 | config PM_RUNTIME |
133 | bool "Run-time PM core functionality" | 134 | bool "Run-time PM core functionality" |
134 | depends on !IA64_HP_SIM | ||
135 | ---help--- | 135 | ---help--- |
136 | Enable functionality allowing I/O devices to be put into energy-saving | 136 | Enable functionality allowing I/O devices to be put into energy-saving |
137 | (low power) states at run time (or autosuspended) after a specified | 137 | (low power) states at run time (or autosuspended) after a specified |
@@ -298,14 +298,9 @@ config PM_GENERIC_DOMAINS_SLEEP | |||
298 | def_bool y | 298 | def_bool y |
299 | depends on PM_SLEEP && PM_GENERIC_DOMAINS | 299 | depends on PM_SLEEP && PM_GENERIC_DOMAINS |
300 | 300 | ||
301 | config PM_GENERIC_DOMAINS_RUNTIME | ||
302 | def_bool y | ||
303 | depends on PM_RUNTIME && PM_GENERIC_DOMAINS | ||
304 | |||
305 | config PM_GENERIC_DOMAINS_OF | 301 | config PM_GENERIC_DOMAINS_OF |
306 | def_bool y | 302 | def_bool y |
307 | depends on PM_GENERIC_DOMAINS && OF | 303 | depends on PM_GENERIC_DOMAINS && OF |
308 | 304 | ||
309 | config CPU_PM | 305 | config CPU_PM |
310 | bool | 306 | bool |
311 | depends on SUSPEND || CPU_IDLE | ||
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index a9dfa79b6bab..2329daae5255 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c | |||
@@ -28,6 +28,7 @@ | |||
28 | #include <linux/syscore_ops.h> | 28 | #include <linux/syscore_ops.h> |
29 | #include <linux/ctype.h> | 29 | #include <linux/ctype.h> |
30 | #include <linux/genhd.h> | 30 | #include <linux/genhd.h> |
31 | #include <linux/ktime.h> | ||
31 | #include <trace/events/power.h> | 32 | #include <trace/events/power.h> |
32 | 33 | ||
33 | #include "power.h" | 34 | #include "power.h" |
@@ -232,20 +233,17 @@ static void platform_recover(int platform_mode) | |||
232 | * @nr_pages: Number of memory pages processed between @start and @stop. | 233 | * @nr_pages: Number of memory pages processed between @start and @stop. |
233 | * @msg: Additional diagnostic message to print. | 234 | * @msg: Additional diagnostic message to print. |
234 | */ | 235 | */ |
235 | void swsusp_show_speed(struct timeval *start, struct timeval *stop, | 236 | void swsusp_show_speed(ktime_t start, ktime_t stop, |
236 | unsigned nr_pages, char *msg) | 237 | unsigned nr_pages, char *msg) |
237 | { | 238 | { |
239 | ktime_t diff; | ||
238 | u64 elapsed_centisecs64; | 240 | u64 elapsed_centisecs64; |
239 | unsigned int centisecs; | 241 | unsigned int centisecs; |
240 | unsigned int k; | 242 | unsigned int k; |
241 | unsigned int kps; | 243 | unsigned int kps; |
242 | 244 | ||
243 | elapsed_centisecs64 = timeval_to_ns(stop) - timeval_to_ns(start); | 245 | diff = ktime_sub(stop, start); |
244 | /* | 246 | elapsed_centisecs64 = ktime_divns(diff, 10*NSEC_PER_MSEC); |
245 | * If "(s64)elapsed_centisecs64 < 0", it will print long elapsed time, | ||
246 | * it is obvious enough for what went wrong. | ||
247 | */ | ||
248 | do_div(elapsed_centisecs64, NSEC_PER_SEC / 100); | ||
249 | centisecs = elapsed_centisecs64; | 247 | centisecs = elapsed_centisecs64; |
250 | if (centisecs == 0) | 248 | if (centisecs == 0) |
251 | centisecs = 1; /* avoid div-by-zero */ | 249 | centisecs = 1; /* avoid div-by-zero */ |
@@ -502,8 +500,14 @@ int hibernation_restore(int platform_mode) | |||
502 | error = dpm_suspend_start(PMSG_QUIESCE); | 500 | error = dpm_suspend_start(PMSG_QUIESCE); |
503 | if (!error) { | 501 | if (!error) { |
504 | error = resume_target_kernel(platform_mode); | 502 | error = resume_target_kernel(platform_mode); |
505 | dpm_resume_end(PMSG_RECOVER); | 503 | /* |
504 | * The above should either succeed and jump to the new kernel, | ||
505 | * or return with an error. Otherwise things are just | ||
506 | * undefined, so let's be paranoid. | ||
507 | */ | ||
508 | BUG_ON(!error); | ||
506 | } | 509 | } |
510 | dpm_resume_end(PMSG_RECOVER); | ||
507 | pm_restore_gfp_mask(); | 511 | pm_restore_gfp_mask(); |
508 | resume_console(); | 512 | resume_console(); |
509 | pm_restore_console(); | 513 | pm_restore_console(); |
diff --git a/kernel/power/power.h b/kernel/power/power.h index 2df883a9d3cb..ce9b8328a689 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h | |||
@@ -174,8 +174,7 @@ extern int hib_wait_on_bio_chain(struct bio **bio_chain); | |||
174 | 174 | ||
175 | struct timeval; | 175 | struct timeval; |
176 | /* kernel/power/swsusp.c */ | 176 | /* kernel/power/swsusp.c */ |
177 | extern void swsusp_show_speed(struct timeval *, struct timeval *, | 177 | extern void swsusp_show_speed(ktime_t, ktime_t, unsigned int, char *); |
178 | unsigned int, char *); | ||
179 | 178 | ||
180 | #ifdef CONFIG_SUSPEND | 179 | #ifdef CONFIG_SUSPEND |
181 | /* kernel/power/suspend.c */ | 180 | /* kernel/power/suspend.c */ |
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index 791a61892bb5..0c40c16174b4 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c | |||
@@ -28,6 +28,7 @@ | |||
28 | #include <linux/list.h> | 28 | #include <linux/list.h> |
29 | #include <linux/slab.h> | 29 | #include <linux/slab.h> |
30 | #include <linux/compiler.h> | 30 | #include <linux/compiler.h> |
31 | #include <linux/ktime.h> | ||
31 | 32 | ||
32 | #include <asm/uaccess.h> | 33 | #include <asm/uaccess.h> |
33 | #include <asm/mmu_context.h> | 34 | #include <asm/mmu_context.h> |
@@ -1576,11 +1577,11 @@ int hibernate_preallocate_memory(void) | |||
1576 | struct zone *zone; | 1577 | struct zone *zone; |
1577 | unsigned long saveable, size, max_size, count, highmem, pages = 0; | 1578 | unsigned long saveable, size, max_size, count, highmem, pages = 0; |
1578 | unsigned long alloc, save_highmem, pages_highmem, avail_normal; | 1579 | unsigned long alloc, save_highmem, pages_highmem, avail_normal; |
1579 | struct timeval start, stop; | 1580 | ktime_t start, stop; |
1580 | int error; | 1581 | int error; |
1581 | 1582 | ||
1582 | printk(KERN_INFO "PM: Preallocating image memory... "); | 1583 | printk(KERN_INFO "PM: Preallocating image memory... "); |
1583 | do_gettimeofday(&start); | 1584 | start = ktime_get(); |
1584 | 1585 | ||
1585 | error = memory_bm_create(&orig_bm, GFP_IMAGE, PG_ANY); | 1586 | error = memory_bm_create(&orig_bm, GFP_IMAGE, PG_ANY); |
1586 | if (error) | 1587 | if (error) |
@@ -1709,9 +1710,9 @@ int hibernate_preallocate_memory(void) | |||
1709 | free_unnecessary_pages(); | 1710 | free_unnecessary_pages(); |
1710 | 1711 | ||
1711 | out: | 1712 | out: |
1712 | do_gettimeofday(&stop); | 1713 | stop = ktime_get(); |
1713 | printk(KERN_CONT "done (allocated %lu pages)\n", pages); | 1714 | printk(KERN_CONT "done (allocated %lu pages)\n", pages); |
1714 | swsusp_show_speed(&start, &stop, pages, "Allocated"); | 1715 | swsusp_show_speed(start, stop, pages, "Allocated"); |
1715 | 1716 | ||
1716 | return 0; | 1717 | return 0; |
1717 | 1718 | ||
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index 4ca9a33ff620..c347e3ce3a55 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c | |||
@@ -146,7 +146,7 @@ static int platform_suspend_prepare(suspend_state_t state) | |||
146 | 146 | ||
147 | static int platform_suspend_prepare_late(suspend_state_t state) | 147 | static int platform_suspend_prepare_late(suspend_state_t state) |
148 | { | 148 | { |
149 | return state == PM_SUSPEND_FREEZE && freeze_ops->prepare ? | 149 | return state == PM_SUSPEND_FREEZE && freeze_ops && freeze_ops->prepare ? |
150 | freeze_ops->prepare() : 0; | 150 | freeze_ops->prepare() : 0; |
151 | } | 151 | } |
152 | 152 | ||
@@ -164,7 +164,7 @@ static void platform_resume_noirq(suspend_state_t state) | |||
164 | 164 | ||
165 | static void platform_resume_early(suspend_state_t state) | 165 | static void platform_resume_early(suspend_state_t state) |
166 | { | 166 | { |
167 | if (state == PM_SUSPEND_FREEZE && freeze_ops->restore) | 167 | if (state == PM_SUSPEND_FREEZE && freeze_ops && freeze_ops->restore) |
168 | freeze_ops->restore(); | 168 | freeze_ops->restore(); |
169 | } | 169 | } |
170 | 170 | ||
diff --git a/kernel/power/swap.c b/kernel/power/swap.c index aaa3261dea5d..570aff817543 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c | |||
@@ -30,6 +30,7 @@ | |||
30 | #include <linux/atomic.h> | 30 | #include <linux/atomic.h> |
31 | #include <linux/kthread.h> | 31 | #include <linux/kthread.h> |
32 | #include <linux/crc32.h> | 32 | #include <linux/crc32.h> |
33 | #include <linux/ktime.h> | ||
33 | 34 | ||
34 | #include "power.h" | 35 | #include "power.h" |
35 | 36 | ||
@@ -445,8 +446,8 @@ static int save_image(struct swap_map_handle *handle, | |||
445 | int nr_pages; | 446 | int nr_pages; |
446 | int err2; | 447 | int err2; |
447 | struct bio *bio; | 448 | struct bio *bio; |
448 | struct timeval start; | 449 | ktime_t start; |
449 | struct timeval stop; | 450 | ktime_t stop; |
450 | 451 | ||
451 | printk(KERN_INFO "PM: Saving image data pages (%u pages)...\n", | 452 | printk(KERN_INFO "PM: Saving image data pages (%u pages)...\n", |
452 | nr_to_write); | 453 | nr_to_write); |
@@ -455,7 +456,7 @@ static int save_image(struct swap_map_handle *handle, | |||
455 | m = 1; | 456 | m = 1; |
456 | nr_pages = 0; | 457 | nr_pages = 0; |
457 | bio = NULL; | 458 | bio = NULL; |
458 | do_gettimeofday(&start); | 459 | start = ktime_get(); |
459 | while (1) { | 460 | while (1) { |
460 | ret = snapshot_read_next(snapshot); | 461 | ret = snapshot_read_next(snapshot); |
461 | if (ret <= 0) | 462 | if (ret <= 0) |
@@ -469,12 +470,12 @@ static int save_image(struct swap_map_handle *handle, | |||
469 | nr_pages++; | 470 | nr_pages++; |
470 | } | 471 | } |
471 | err2 = hib_wait_on_bio_chain(&bio); | 472 | err2 = hib_wait_on_bio_chain(&bio); |
472 | do_gettimeofday(&stop); | 473 | stop = ktime_get(); |
473 | if (!ret) | 474 | if (!ret) |
474 | ret = err2; | 475 | ret = err2; |
475 | if (!ret) | 476 | if (!ret) |
476 | printk(KERN_INFO "PM: Image saving done.\n"); | 477 | printk(KERN_INFO "PM: Image saving done.\n"); |
477 | swsusp_show_speed(&start, &stop, nr_to_write, "Wrote"); | 478 | swsusp_show_speed(start, stop, nr_to_write, "Wrote"); |
478 | return ret; | 479 | return ret; |
479 | } | 480 | } |
480 | 481 | ||
@@ -580,8 +581,8 @@ static int save_image_lzo(struct swap_map_handle *handle, | |||
580 | int nr_pages; | 581 | int nr_pages; |
581 | int err2; | 582 | int err2; |
582 | struct bio *bio; | 583 | struct bio *bio; |
583 | struct timeval start; | 584 | ktime_t start; |
584 | struct timeval stop; | 585 | ktime_t stop; |
585 | size_t off; | 586 | size_t off; |
586 | unsigned thr, run_threads, nr_threads; | 587 | unsigned thr, run_threads, nr_threads; |
587 | unsigned char *page = NULL; | 588 | unsigned char *page = NULL; |
@@ -674,7 +675,7 @@ static int save_image_lzo(struct swap_map_handle *handle, | |||
674 | m = 1; | 675 | m = 1; |
675 | nr_pages = 0; | 676 | nr_pages = 0; |
676 | bio = NULL; | 677 | bio = NULL; |
677 | do_gettimeofday(&start); | 678 | start = ktime_get(); |
678 | for (;;) { | 679 | for (;;) { |
679 | for (thr = 0; thr < nr_threads; thr++) { | 680 | for (thr = 0; thr < nr_threads; thr++) { |
680 | for (off = 0; off < LZO_UNC_SIZE; off += PAGE_SIZE) { | 681 | for (off = 0; off < LZO_UNC_SIZE; off += PAGE_SIZE) { |
@@ -759,12 +760,12 @@ static int save_image_lzo(struct swap_map_handle *handle, | |||
759 | 760 | ||
760 | out_finish: | 761 | out_finish: |
761 | err2 = hib_wait_on_bio_chain(&bio); | 762 | err2 = hib_wait_on_bio_chain(&bio); |
762 | do_gettimeofday(&stop); | 763 | stop = ktime_get(); |
763 | if (!ret) | 764 | if (!ret) |
764 | ret = err2; | 765 | ret = err2; |
765 | if (!ret) | 766 | if (!ret) |
766 | printk(KERN_INFO "PM: Image saving done.\n"); | 767 | printk(KERN_INFO "PM: Image saving done.\n"); |
767 | swsusp_show_speed(&start, &stop, nr_to_write, "Wrote"); | 768 | swsusp_show_speed(start, stop, nr_to_write, "Wrote"); |
768 | out_clean: | 769 | out_clean: |
769 | if (crc) { | 770 | if (crc) { |
770 | if (crc->thr) | 771 | if (crc->thr) |
@@ -965,8 +966,8 @@ static int load_image(struct swap_map_handle *handle, | |||
965 | { | 966 | { |
966 | unsigned int m; | 967 | unsigned int m; |
967 | int ret = 0; | 968 | int ret = 0; |
968 | struct timeval start; | 969 | ktime_t start; |
969 | struct timeval stop; | 970 | ktime_t stop; |
970 | struct bio *bio; | 971 | struct bio *bio; |
971 | int err2; | 972 | int err2; |
972 | unsigned nr_pages; | 973 | unsigned nr_pages; |
@@ -978,7 +979,7 @@ static int load_image(struct swap_map_handle *handle, | |||
978 | m = 1; | 979 | m = 1; |
979 | nr_pages = 0; | 980 | nr_pages = 0; |
980 | bio = NULL; | 981 | bio = NULL; |
981 | do_gettimeofday(&start); | 982 | start = ktime_get(); |
982 | for ( ; ; ) { | 983 | for ( ; ; ) { |
983 | ret = snapshot_write_next(snapshot); | 984 | ret = snapshot_write_next(snapshot); |
984 | if (ret <= 0) | 985 | if (ret <= 0) |
@@ -996,7 +997,7 @@ static int load_image(struct swap_map_handle *handle, | |||
996 | nr_pages++; | 997 | nr_pages++; |
997 | } | 998 | } |
998 | err2 = hib_wait_on_bio_chain(&bio); | 999 | err2 = hib_wait_on_bio_chain(&bio); |
999 | do_gettimeofday(&stop); | 1000 | stop = ktime_get(); |
1000 | if (!ret) | 1001 | if (!ret) |
1001 | ret = err2; | 1002 | ret = err2; |
1002 | if (!ret) { | 1003 | if (!ret) { |
@@ -1005,7 +1006,7 @@ static int load_image(struct swap_map_handle *handle, | |||
1005 | if (!snapshot_image_loaded(snapshot)) | 1006 | if (!snapshot_image_loaded(snapshot)) |
1006 | ret = -ENODATA; | 1007 | ret = -ENODATA; |
1007 | } | 1008 | } |
1008 | swsusp_show_speed(&start, &stop, nr_to_read, "Read"); | 1009 | swsusp_show_speed(start, stop, nr_to_read, "Read"); |
1009 | return ret; | 1010 | return ret; |
1010 | } | 1011 | } |
1011 | 1012 | ||
@@ -1067,8 +1068,8 @@ static int load_image_lzo(struct swap_map_handle *handle, | |||
1067 | int ret = 0; | 1068 | int ret = 0; |
1068 | int eof = 0; | 1069 | int eof = 0; |
1069 | struct bio *bio; | 1070 | struct bio *bio; |
1070 | struct timeval start; | 1071 | ktime_t start; |
1071 | struct timeval stop; | 1072 | ktime_t stop; |
1072 | unsigned nr_pages; | 1073 | unsigned nr_pages; |
1073 | size_t off; | 1074 | size_t off; |
1074 | unsigned i, thr, run_threads, nr_threads; | 1075 | unsigned i, thr, run_threads, nr_threads; |
@@ -1190,7 +1191,7 @@ static int load_image_lzo(struct swap_map_handle *handle, | |||
1190 | m = 1; | 1191 | m = 1; |
1191 | nr_pages = 0; | 1192 | nr_pages = 0; |
1192 | bio = NULL; | 1193 | bio = NULL; |
1193 | do_gettimeofday(&start); | 1194 | start = ktime_get(); |
1194 | 1195 | ||
1195 | ret = snapshot_write_next(snapshot); | 1196 | ret = snapshot_write_next(snapshot); |
1196 | if (ret <= 0) | 1197 | if (ret <= 0) |
@@ -1343,7 +1344,7 @@ out_finish: | |||
1343 | wait_event(crc->done, atomic_read(&crc->stop)); | 1344 | wait_event(crc->done, atomic_read(&crc->stop)); |
1344 | atomic_set(&crc->stop, 0); | 1345 | atomic_set(&crc->stop, 0); |
1345 | } | 1346 | } |
1346 | do_gettimeofday(&stop); | 1347 | stop = ktime_get(); |
1347 | if (!ret) { | 1348 | if (!ret) { |
1348 | printk(KERN_INFO "PM: Image loading done.\n"); | 1349 | printk(KERN_INFO "PM: Image loading done.\n"); |
1349 | snapshot_write_finalize(snapshot); | 1350 | snapshot_write_finalize(snapshot); |
@@ -1359,7 +1360,7 @@ out_finish: | |||
1359 | } | 1360 | } |
1360 | } | 1361 | } |
1361 | } | 1362 | } |
1362 | swsusp_show_speed(&start, &stop, nr_to_read, "Read"); | 1363 | swsusp_show_speed(start, stop, nr_to_read, "Read"); |
1363 | out_clean: | 1364 | out_clean: |
1364 | for (i = 0; i < ring_size; i++) | 1365 | for (i = 0; i < ring_size; i++) |
1365 | free_page((unsigned long)page[i]); | 1366 | free_page((unsigned long)page[i]); |
@@ -1374,7 +1375,7 @@ out_clean: | |||
1374 | kthread_stop(data[thr].thr); | 1375 | kthread_stop(data[thr].thr); |
1375 | vfree(data); | 1376 | vfree(data); |
1376 | } | 1377 | } |
1377 | if (page) vfree(page); | 1378 | vfree(page); |
1378 | 1379 | ||
1379 | return ret; | 1380 | return ret; |
1380 | } | 1381 | } |
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index ced2b84b1cb7..f900dc9f6822 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c | |||
@@ -62,9 +62,6 @@ int console_printk[4] = { | |||
62 | CONSOLE_LOGLEVEL_DEFAULT, /* default_console_loglevel */ | 62 | CONSOLE_LOGLEVEL_DEFAULT, /* default_console_loglevel */ |
63 | }; | 63 | }; |
64 | 64 | ||
65 | /* Deferred messaged from sched code are marked by this special level */ | ||
66 | #define SCHED_MESSAGE_LOGLEVEL -2 | ||
67 | |||
68 | /* | 65 | /* |
69 | * Low level drivers may need that to know if they can schedule in | 66 | * Low level drivers may need that to know if they can schedule in |
70 | * their unblank() callback or not. So let's export it. | 67 | * their unblank() callback or not. So let's export it. |
@@ -480,7 +477,7 @@ static int syslog_action_restricted(int type) | |||
480 | type != SYSLOG_ACTION_SIZE_BUFFER; | 477 | type != SYSLOG_ACTION_SIZE_BUFFER; |
481 | } | 478 | } |
482 | 479 | ||
483 | static int check_syslog_permissions(int type, bool from_file) | 480 | int check_syslog_permissions(int type, bool from_file) |
484 | { | 481 | { |
485 | /* | 482 | /* |
486 | * If this is from /proc/kmsg and we've already opened it, then we've | 483 | * If this is from /proc/kmsg and we've already opened it, then we've |
@@ -1259,7 +1256,7 @@ static int syslog_print_all(char __user *buf, int size, bool clear) | |||
1259 | int do_syslog(int type, char __user *buf, int len, bool from_file) | 1256 | int do_syslog(int type, char __user *buf, int len, bool from_file) |
1260 | { | 1257 | { |
1261 | bool clear = false; | 1258 | bool clear = false; |
1262 | static int saved_console_loglevel = -1; | 1259 | static int saved_console_loglevel = LOGLEVEL_DEFAULT; |
1263 | int error; | 1260 | int error; |
1264 | 1261 | ||
1265 | error = check_syslog_permissions(type, from_file); | 1262 | error = check_syslog_permissions(type, from_file); |
@@ -1316,15 +1313,15 @@ int do_syslog(int type, char __user *buf, int len, bool from_file) | |||
1316 | break; | 1313 | break; |
1317 | /* Disable logging to console */ | 1314 | /* Disable logging to console */ |
1318 | case SYSLOG_ACTION_CONSOLE_OFF: | 1315 | case SYSLOG_ACTION_CONSOLE_OFF: |
1319 | if (saved_console_loglevel == -1) | 1316 | if (saved_console_loglevel == LOGLEVEL_DEFAULT) |
1320 | saved_console_loglevel = console_loglevel; | 1317 | saved_console_loglevel = console_loglevel; |
1321 | console_loglevel = minimum_console_loglevel; | 1318 | console_loglevel = minimum_console_loglevel; |
1322 | break; | 1319 | break; |
1323 | /* Enable logging to console */ | 1320 | /* Enable logging to console */ |
1324 | case SYSLOG_ACTION_CONSOLE_ON: | 1321 | case SYSLOG_ACTION_CONSOLE_ON: |
1325 | if (saved_console_loglevel != -1) { | 1322 | if (saved_console_loglevel != LOGLEVEL_DEFAULT) { |
1326 | console_loglevel = saved_console_loglevel; | 1323 | console_loglevel = saved_console_loglevel; |
1327 | saved_console_loglevel = -1; | 1324 | saved_console_loglevel = LOGLEVEL_DEFAULT; |
1328 | } | 1325 | } |
1329 | break; | 1326 | break; |
1330 | /* Set level of messages printed to console */ | 1327 | /* Set level of messages printed to console */ |
@@ -1336,7 +1333,7 @@ int do_syslog(int type, char __user *buf, int len, bool from_file) | |||
1336 | len = minimum_console_loglevel; | 1333 | len = minimum_console_loglevel; |
1337 | console_loglevel = len; | 1334 | console_loglevel = len; |
1338 | /* Implicitly re-enable logging to console */ | 1335 | /* Implicitly re-enable logging to console */ |
1339 | saved_console_loglevel = -1; | 1336 | saved_console_loglevel = LOGLEVEL_DEFAULT; |
1340 | error = 0; | 1337 | error = 0; |
1341 | break; | 1338 | break; |
1342 | /* Number of chars in the log buffer */ | 1339 | /* Number of chars in the log buffer */ |
@@ -1627,10 +1624,10 @@ asmlinkage int vprintk_emit(int facility, int level, | |||
1627 | int printed_len = 0; | 1624 | int printed_len = 0; |
1628 | bool in_sched = false; | 1625 | bool in_sched = false; |
1629 | /* cpu currently holding logbuf_lock in this function */ | 1626 | /* cpu currently holding logbuf_lock in this function */ |
1630 | static volatile unsigned int logbuf_cpu = UINT_MAX; | 1627 | static unsigned int logbuf_cpu = UINT_MAX; |
1631 | 1628 | ||
1632 | if (level == SCHED_MESSAGE_LOGLEVEL) { | 1629 | if (level == LOGLEVEL_SCHED) { |
1633 | level = -1; | 1630 | level = LOGLEVEL_DEFAULT; |
1634 | in_sched = true; | 1631 | in_sched = true; |
1635 | } | 1632 | } |
1636 | 1633 | ||
@@ -1695,8 +1692,9 @@ asmlinkage int vprintk_emit(int facility, int level, | |||
1695 | const char *end_of_header = printk_skip_level(text); | 1692 | const char *end_of_header = printk_skip_level(text); |
1696 | switch (kern_level) { | 1693 | switch (kern_level) { |
1697 | case '0' ... '7': | 1694 | case '0' ... '7': |
1698 | if (level == -1) | 1695 | if (level == LOGLEVEL_DEFAULT) |
1699 | level = kern_level - '0'; | 1696 | level = kern_level - '0'; |
1697 | /* fallthrough */ | ||
1700 | case 'd': /* KERN_DEFAULT */ | 1698 | case 'd': /* KERN_DEFAULT */ |
1701 | lflags |= LOG_PREFIX; | 1699 | lflags |= LOG_PREFIX; |
1702 | } | 1700 | } |
@@ -1710,7 +1708,7 @@ asmlinkage int vprintk_emit(int facility, int level, | |||
1710 | } | 1708 | } |
1711 | } | 1709 | } |
1712 | 1710 | ||
1713 | if (level == -1) | 1711 | if (level == LOGLEVEL_DEFAULT) |
1714 | level = default_message_loglevel; | 1712 | level = default_message_loglevel; |
1715 | 1713 | ||
1716 | if (dict) | 1714 | if (dict) |
@@ -1788,7 +1786,7 @@ EXPORT_SYMBOL(vprintk_emit); | |||
1788 | 1786 | ||
1789 | asmlinkage int vprintk(const char *fmt, va_list args) | 1787 | asmlinkage int vprintk(const char *fmt, va_list args) |
1790 | { | 1788 | { |
1791 | return vprintk_emit(0, -1, NULL, 0, fmt, args); | 1789 | return vprintk_emit(0, LOGLEVEL_DEFAULT, NULL, 0, fmt, args); |
1792 | } | 1790 | } |
1793 | EXPORT_SYMBOL(vprintk); | 1791 | EXPORT_SYMBOL(vprintk); |
1794 | 1792 | ||
@@ -1807,6 +1805,30 @@ asmlinkage int printk_emit(int facility, int level, | |||
1807 | } | 1805 | } |
1808 | EXPORT_SYMBOL(printk_emit); | 1806 | EXPORT_SYMBOL(printk_emit); |
1809 | 1807 | ||
1808 | int vprintk_default(const char *fmt, va_list args) | ||
1809 | { | ||
1810 | int r; | ||
1811 | |||
1812 | #ifdef CONFIG_KGDB_KDB | ||
1813 | if (unlikely(kdb_trap_printk)) { | ||
1814 | r = vkdb_printf(fmt, args); | ||
1815 | return r; | ||
1816 | } | ||
1817 | #endif | ||
1818 | r = vprintk_emit(0, LOGLEVEL_DEFAULT, NULL, 0, fmt, args); | ||
1819 | |||
1820 | return r; | ||
1821 | } | ||
1822 | EXPORT_SYMBOL_GPL(vprintk_default); | ||
1823 | |||
1824 | /* | ||
1825 | * This allows printk to be diverted to another function per cpu. | ||
1826 | * This is useful for calling printk functions from within NMI | ||
1827 | * without worrying about race conditions that can lock up the | ||
1828 | * box. | ||
1829 | */ | ||
1830 | DEFINE_PER_CPU(printk_func_t, printk_func) = vprintk_default; | ||
1831 | |||
1810 | /** | 1832 | /** |
1811 | * printk - print a kernel message | 1833 | * printk - print a kernel message |
1812 | * @fmt: format string | 1834 | * @fmt: format string |
@@ -1830,19 +1852,15 @@ EXPORT_SYMBOL(printk_emit); | |||
1830 | */ | 1852 | */ |
1831 | asmlinkage __visible int printk(const char *fmt, ...) | 1853 | asmlinkage __visible int printk(const char *fmt, ...) |
1832 | { | 1854 | { |
1855 | printk_func_t vprintk_func; | ||
1833 | va_list args; | 1856 | va_list args; |
1834 | int r; | 1857 | int r; |
1835 | 1858 | ||
1836 | #ifdef CONFIG_KGDB_KDB | ||
1837 | if (unlikely(kdb_trap_printk)) { | ||
1838 | va_start(args, fmt); | ||
1839 | r = vkdb_printf(fmt, args); | ||
1840 | va_end(args); | ||
1841 | return r; | ||
1842 | } | ||
1843 | #endif | ||
1844 | va_start(args, fmt); | 1859 | va_start(args, fmt); |
1845 | r = vprintk_emit(0, -1, NULL, 0, fmt, args); | 1860 | preempt_disable(); |
1861 | vprintk_func = this_cpu_read(printk_func); | ||
1862 | r = vprintk_func(fmt, args); | ||
1863 | preempt_enable(); | ||
1846 | va_end(args); | 1864 | va_end(args); |
1847 | 1865 | ||
1848 | return r; | 1866 | return r; |
@@ -1876,28 +1894,28 @@ static size_t msg_print_text(const struct printk_log *msg, enum log_flags prev, | |||
1876 | bool syslog, char *buf, size_t size) { return 0; } | 1894 | bool syslog, char *buf, size_t size) { return 0; } |
1877 | static size_t cont_print_text(char *text, size_t size) { return 0; } | 1895 | static size_t cont_print_text(char *text, size_t size) { return 0; } |
1878 | 1896 | ||
1897 | /* Still needs to be defined for users */ | ||
1898 | DEFINE_PER_CPU(printk_func_t, printk_func); | ||
1899 | |||
1879 | #endif /* CONFIG_PRINTK */ | 1900 | #endif /* CONFIG_PRINTK */ |
1880 | 1901 | ||
1881 | #ifdef CONFIG_EARLY_PRINTK | 1902 | #ifdef CONFIG_EARLY_PRINTK |
1882 | struct console *early_console; | 1903 | struct console *early_console; |
1883 | 1904 | ||
1884 | void early_vprintk(const char *fmt, va_list ap) | ||
1885 | { | ||
1886 | if (early_console) { | ||
1887 | char buf[512]; | ||
1888 | int n = vscnprintf(buf, sizeof(buf), fmt, ap); | ||
1889 | |||
1890 | early_console->write(early_console, buf, n); | ||
1891 | } | ||
1892 | } | ||
1893 | |||
1894 | asmlinkage __visible void early_printk(const char *fmt, ...) | 1905 | asmlinkage __visible void early_printk(const char *fmt, ...) |
1895 | { | 1906 | { |
1896 | va_list ap; | 1907 | va_list ap; |
1908 | char buf[512]; | ||
1909 | int n; | ||
1910 | |||
1911 | if (!early_console) | ||
1912 | return; | ||
1897 | 1913 | ||
1898 | va_start(ap, fmt); | 1914 | va_start(ap, fmt); |
1899 | early_vprintk(fmt, ap); | 1915 | n = vscnprintf(buf, sizeof(buf), fmt, ap); |
1900 | va_end(ap); | 1916 | va_end(ap); |
1917 | |||
1918 | early_console->write(early_console, buf, n); | ||
1901 | } | 1919 | } |
1902 | #endif | 1920 | #endif |
1903 | 1921 | ||
@@ -2634,7 +2652,7 @@ int printk_deferred(const char *fmt, ...) | |||
2634 | 2652 | ||
2635 | preempt_disable(); | 2653 | preempt_disable(); |
2636 | va_start(args, fmt); | 2654 | va_start(args, fmt); |
2637 | r = vprintk_emit(0, SCHED_MESSAGE_LOGLEVEL, NULL, 0, fmt, args); | 2655 | r = vprintk_emit(0, LOGLEVEL_SCHED, NULL, 0, fmt, args); |
2638 | va_end(args); | 2656 | va_end(args); |
2639 | 2657 | ||
2640 | __this_cpu_or(printk_pending, PRINTK_PENDING_OUTPUT); | 2658 | __this_cpu_or(printk_pending, PRINTK_PENDING_OUTPUT); |
diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 54e75226c2c4..1eb9d90c3af9 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c | |||
@@ -485,36 +485,19 @@ static int ptrace_detach(struct task_struct *child, unsigned int data) | |||
485 | 485 | ||
486 | /* | 486 | /* |
487 | * Detach all tasks we were using ptrace on. Called with tasklist held | 487 | * Detach all tasks we were using ptrace on. Called with tasklist held |
488 | * for writing, and returns with it held too. But note it can release | 488 | * for writing. |
489 | * and reacquire the lock. | ||
490 | */ | 489 | */ |
491 | void exit_ptrace(struct task_struct *tracer) | 490 | void exit_ptrace(struct task_struct *tracer, struct list_head *dead) |
492 | __releases(&tasklist_lock) | ||
493 | __acquires(&tasklist_lock) | ||
494 | { | 491 | { |
495 | struct task_struct *p, *n; | 492 | struct task_struct *p, *n; |
496 | LIST_HEAD(ptrace_dead); | ||
497 | |||
498 | if (likely(list_empty(&tracer->ptraced))) | ||
499 | return; | ||
500 | 493 | ||
501 | list_for_each_entry_safe(p, n, &tracer->ptraced, ptrace_entry) { | 494 | list_for_each_entry_safe(p, n, &tracer->ptraced, ptrace_entry) { |
502 | if (unlikely(p->ptrace & PT_EXITKILL)) | 495 | if (unlikely(p->ptrace & PT_EXITKILL)) |
503 | send_sig_info(SIGKILL, SEND_SIG_FORCED, p); | 496 | send_sig_info(SIGKILL, SEND_SIG_FORCED, p); |
504 | 497 | ||
505 | if (__ptrace_detach(tracer, p)) | 498 | if (__ptrace_detach(tracer, p)) |
506 | list_add(&p->ptrace_entry, &ptrace_dead); | 499 | list_add(&p->ptrace_entry, dead); |
507 | } | ||
508 | |||
509 | write_unlock_irq(&tasklist_lock); | ||
510 | BUG_ON(!list_empty(&tracer->ptraced)); | ||
511 | |||
512 | list_for_each_entry_safe(p, n, &ptrace_dead, ptrace_entry) { | ||
513 | list_del_init(&p->ptrace_entry); | ||
514 | release_task(p); | ||
515 | } | 500 | } |
516 | |||
517 | write_lock_irq(&tasklist_lock); | ||
518 | } | 501 | } |
519 | 502 | ||
520 | int ptrace_readdata(struct task_struct *tsk, unsigned long src, char __user *dst, int len) | 503 | int ptrace_readdata(struct task_struct *tsk, unsigned long src, char __user *dst, int len) |
diff --git a/kernel/rcu/Makefile b/kernel/rcu/Makefile index 807ccfbf69b3..e6fae503d1bc 100644 --- a/kernel/rcu/Makefile +++ b/kernel/rcu/Makefile | |||
@@ -1,6 +1,6 @@ | |||
1 | obj-y += update.o srcu.o | 1 | obj-y += update.o srcu.o |
2 | obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o | 2 | obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o |
3 | obj-$(CONFIG_TREE_RCU) += tree.o | 3 | obj-$(CONFIG_TREE_RCU) += tree.o |
4 | obj-$(CONFIG_TREE_PREEMPT_RCU) += tree.o | 4 | obj-$(CONFIG_PREEMPT_RCU) += tree.o |
5 | obj-$(CONFIG_TREE_RCU_TRACE) += tree_trace.o | 5 | obj-$(CONFIG_TREE_RCU_TRACE) += tree_trace.o |
6 | obj-$(CONFIG_TINY_RCU) += tiny.o | 6 | obj-$(CONFIG_TINY_RCU) += tiny.o |
diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h index ff1a6de62f17..07bb02eda844 100644 --- a/kernel/rcu/rcu.h +++ b/kernel/rcu/rcu.h | |||
@@ -135,4 +135,6 @@ int rcu_jiffies_till_stall_check(void); | |||
135 | */ | 135 | */ |
136 | #define TPS(x) tracepoint_string(x) | 136 | #define TPS(x) tracepoint_string(x) |
137 | 137 | ||
138 | void rcu_early_boot_tests(void); | ||
139 | |||
138 | #endif /* __LINUX_RCU_H */ | 140 | #endif /* __LINUX_RCU_H */ |
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index 240fa9094f83..4d559baf06e0 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c | |||
@@ -812,6 +812,7 @@ rcu_torture_cbflood(void *arg) | |||
812 | cur_ops->cb_barrier(); | 812 | cur_ops->cb_barrier(); |
813 | stutter_wait("rcu_torture_cbflood"); | 813 | stutter_wait("rcu_torture_cbflood"); |
814 | } while (!torture_must_stop()); | 814 | } while (!torture_must_stop()); |
815 | vfree(rhp); | ||
815 | torture_kthread_stopping("rcu_torture_cbflood"); | 816 | torture_kthread_stopping("rcu_torture_cbflood"); |
816 | return 0; | 817 | return 0; |
817 | } | 818 | } |
diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c index c0623fc47125..0db5649f8817 100644 --- a/kernel/rcu/tiny.c +++ b/kernel/rcu/tiny.c | |||
@@ -247,7 +247,7 @@ void rcu_bh_qs(void) | |||
247 | * be called from hardirq context. It is normally called from the | 247 | * be called from hardirq context. It is normally called from the |
248 | * scheduling-clock interrupt. | 248 | * scheduling-clock interrupt. |
249 | */ | 249 | */ |
250 | void rcu_check_callbacks(int cpu, int user) | 250 | void rcu_check_callbacks(int user) |
251 | { | 251 | { |
252 | RCU_TRACE(check_cpu_stalls()); | 252 | RCU_TRACE(check_cpu_stalls()); |
253 | if (user || rcu_is_cpu_rrupt_from_idle()) | 253 | if (user || rcu_is_cpu_rrupt_from_idle()) |
@@ -380,7 +380,9 @@ void call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) | |||
380 | } | 380 | } |
381 | EXPORT_SYMBOL_GPL(call_rcu_bh); | 381 | EXPORT_SYMBOL_GPL(call_rcu_bh); |
382 | 382 | ||
383 | void rcu_init(void) | 383 | void __init rcu_init(void) |
384 | { | 384 | { |
385 | open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); | 385 | open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); |
386 | |||
387 | rcu_early_boot_tests(); | ||
386 | } | 388 | } |
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 133e47223095..7680fc275036 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c | |||
@@ -105,7 +105,7 @@ struct rcu_state sname##_state = { \ | |||
105 | .name = RCU_STATE_NAME(sname), \ | 105 | .name = RCU_STATE_NAME(sname), \ |
106 | .abbr = sabbr, \ | 106 | .abbr = sabbr, \ |
107 | }; \ | 107 | }; \ |
108 | DEFINE_PER_CPU(struct rcu_data, sname##_data) | 108 | DEFINE_PER_CPU_SHARED_ALIGNED(struct rcu_data, sname##_data) |
109 | 109 | ||
110 | RCU_STATE_INITIALIZER(rcu_sched, 's', call_rcu_sched); | 110 | RCU_STATE_INITIALIZER(rcu_sched, 's', call_rcu_sched); |
111 | RCU_STATE_INITIALIZER(rcu_bh, 'b', call_rcu_bh); | 111 | RCU_STATE_INITIALIZER(rcu_bh, 'b', call_rcu_bh); |
@@ -152,19 +152,6 @@ EXPORT_SYMBOL_GPL(rcu_scheduler_active); | |||
152 | */ | 152 | */ |
153 | static int rcu_scheduler_fully_active __read_mostly; | 153 | static int rcu_scheduler_fully_active __read_mostly; |
154 | 154 | ||
155 | #ifdef CONFIG_RCU_BOOST | ||
156 | |||
157 | /* | ||
158 | * Control variables for per-CPU and per-rcu_node kthreads. These | ||
159 | * handle all flavors of RCU. | ||
160 | */ | ||
161 | static DEFINE_PER_CPU(struct task_struct *, rcu_cpu_kthread_task); | ||
162 | DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_status); | ||
163 | DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_loops); | ||
164 | DEFINE_PER_CPU(char, rcu_cpu_has_work); | ||
165 | |||
166 | #endif /* #ifdef CONFIG_RCU_BOOST */ | ||
167 | |||
168 | static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu); | 155 | static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu); |
169 | static void invoke_rcu_core(void); | 156 | static void invoke_rcu_core(void); |
170 | static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp); | 157 | static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp); |
@@ -286,11 +273,11 @@ static void rcu_momentary_dyntick_idle(void) | |||
286 | * and requires special handling for preemptible RCU. | 273 | * and requires special handling for preemptible RCU. |
287 | * The caller must have disabled preemption. | 274 | * The caller must have disabled preemption. |
288 | */ | 275 | */ |
289 | void rcu_note_context_switch(int cpu) | 276 | void rcu_note_context_switch(void) |
290 | { | 277 | { |
291 | trace_rcu_utilization(TPS("Start context switch")); | 278 | trace_rcu_utilization(TPS("Start context switch")); |
292 | rcu_sched_qs(); | 279 | rcu_sched_qs(); |
293 | rcu_preempt_note_context_switch(cpu); | 280 | rcu_preempt_note_context_switch(); |
294 | if (unlikely(raw_cpu_read(rcu_sched_qs_mask))) | 281 | if (unlikely(raw_cpu_read(rcu_sched_qs_mask))) |
295 | rcu_momentary_dyntick_idle(); | 282 | rcu_momentary_dyntick_idle(); |
296 | trace_rcu_utilization(TPS("End context switch")); | 283 | trace_rcu_utilization(TPS("End context switch")); |
@@ -325,7 +312,7 @@ static void force_qs_rnp(struct rcu_state *rsp, | |||
325 | unsigned long *maxj), | 312 | unsigned long *maxj), |
326 | bool *isidle, unsigned long *maxj); | 313 | bool *isidle, unsigned long *maxj); |
327 | static void force_quiescent_state(struct rcu_state *rsp); | 314 | static void force_quiescent_state(struct rcu_state *rsp); |
328 | static int rcu_pending(int cpu); | 315 | static int rcu_pending(void); |
329 | 316 | ||
330 | /* | 317 | /* |
331 | * Return the number of RCU-sched batches processed thus far for debug & stats. | 318 | * Return the number of RCU-sched batches processed thus far for debug & stats. |
@@ -510,11 +497,11 @@ cpu_needs_another_gp(struct rcu_state *rsp, struct rcu_data *rdp) | |||
510 | * we really have entered idle, and must do the appropriate accounting. | 497 | * we really have entered idle, and must do the appropriate accounting. |
511 | * The caller must have disabled interrupts. | 498 | * The caller must have disabled interrupts. |
512 | */ | 499 | */ |
513 | static void rcu_eqs_enter_common(struct rcu_dynticks *rdtp, long long oldval, | 500 | static void rcu_eqs_enter_common(long long oldval, bool user) |
514 | bool user) | ||
515 | { | 501 | { |
516 | struct rcu_state *rsp; | 502 | struct rcu_state *rsp; |
517 | struct rcu_data *rdp; | 503 | struct rcu_data *rdp; |
504 | struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks); | ||
518 | 505 | ||
519 | trace_rcu_dyntick(TPS("Start"), oldval, rdtp->dynticks_nesting); | 506 | trace_rcu_dyntick(TPS("Start"), oldval, rdtp->dynticks_nesting); |
520 | if (!user && !is_idle_task(current)) { | 507 | if (!user && !is_idle_task(current)) { |
@@ -531,7 +518,7 @@ static void rcu_eqs_enter_common(struct rcu_dynticks *rdtp, long long oldval, | |||
531 | rdp = this_cpu_ptr(rsp->rda); | 518 | rdp = this_cpu_ptr(rsp->rda); |
532 | do_nocb_deferred_wakeup(rdp); | 519 | do_nocb_deferred_wakeup(rdp); |
533 | } | 520 | } |
534 | rcu_prepare_for_idle(smp_processor_id()); | 521 | rcu_prepare_for_idle(); |
535 | /* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */ | 522 | /* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */ |
536 | smp_mb__before_atomic(); /* See above. */ | 523 | smp_mb__before_atomic(); /* See above. */ |
537 | atomic_inc(&rdtp->dynticks); | 524 | atomic_inc(&rdtp->dynticks); |
@@ -565,7 +552,7 @@ static void rcu_eqs_enter(bool user) | |||
565 | WARN_ON_ONCE((oldval & DYNTICK_TASK_NEST_MASK) == 0); | 552 | WARN_ON_ONCE((oldval & DYNTICK_TASK_NEST_MASK) == 0); |
566 | if ((oldval & DYNTICK_TASK_NEST_MASK) == DYNTICK_TASK_NEST_VALUE) { | 553 | if ((oldval & DYNTICK_TASK_NEST_MASK) == DYNTICK_TASK_NEST_VALUE) { |
567 | rdtp->dynticks_nesting = 0; | 554 | rdtp->dynticks_nesting = 0; |
568 | rcu_eqs_enter_common(rdtp, oldval, user); | 555 | rcu_eqs_enter_common(oldval, user); |
569 | } else { | 556 | } else { |
570 | rdtp->dynticks_nesting -= DYNTICK_TASK_NEST_VALUE; | 557 | rdtp->dynticks_nesting -= DYNTICK_TASK_NEST_VALUE; |
571 | } | 558 | } |
@@ -589,7 +576,7 @@ void rcu_idle_enter(void) | |||
589 | 576 | ||
590 | local_irq_save(flags); | 577 | local_irq_save(flags); |
591 | rcu_eqs_enter(false); | 578 | rcu_eqs_enter(false); |
592 | rcu_sysidle_enter(this_cpu_ptr(&rcu_dynticks), 0); | 579 | rcu_sysidle_enter(0); |
593 | local_irq_restore(flags); | 580 | local_irq_restore(flags); |
594 | } | 581 | } |
595 | EXPORT_SYMBOL_GPL(rcu_idle_enter); | 582 | EXPORT_SYMBOL_GPL(rcu_idle_enter); |
@@ -639,8 +626,8 @@ void rcu_irq_exit(void) | |||
639 | if (rdtp->dynticks_nesting) | 626 | if (rdtp->dynticks_nesting) |
640 | trace_rcu_dyntick(TPS("--="), oldval, rdtp->dynticks_nesting); | 627 | trace_rcu_dyntick(TPS("--="), oldval, rdtp->dynticks_nesting); |
641 | else | 628 | else |
642 | rcu_eqs_enter_common(rdtp, oldval, true); | 629 | rcu_eqs_enter_common(oldval, true); |
643 | rcu_sysidle_enter(rdtp, 1); | 630 | rcu_sysidle_enter(1); |
644 | local_irq_restore(flags); | 631 | local_irq_restore(flags); |
645 | } | 632 | } |
646 | 633 | ||
@@ -651,16 +638,17 @@ void rcu_irq_exit(void) | |||
651 | * we really have exited idle, and must do the appropriate accounting. | 638 | * we really have exited idle, and must do the appropriate accounting. |
652 | * The caller must have disabled interrupts. | 639 | * The caller must have disabled interrupts. |
653 | */ | 640 | */ |
654 | static void rcu_eqs_exit_common(struct rcu_dynticks *rdtp, long long oldval, | 641 | static void rcu_eqs_exit_common(long long oldval, int user) |
655 | int user) | ||
656 | { | 642 | { |
643 | struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks); | ||
644 | |||
657 | rcu_dynticks_task_exit(); | 645 | rcu_dynticks_task_exit(); |
658 | smp_mb__before_atomic(); /* Force ordering w/previous sojourn. */ | 646 | smp_mb__before_atomic(); /* Force ordering w/previous sojourn. */ |
659 | atomic_inc(&rdtp->dynticks); | 647 | atomic_inc(&rdtp->dynticks); |
660 | /* CPUs seeing atomic_inc() must see later RCU read-side crit sects */ | 648 | /* CPUs seeing atomic_inc() must see later RCU read-side crit sects */ |
661 | smp_mb__after_atomic(); /* See above. */ | 649 | smp_mb__after_atomic(); /* See above. */ |
662 | WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1)); | 650 | WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1)); |
663 | rcu_cleanup_after_idle(smp_processor_id()); | 651 | rcu_cleanup_after_idle(); |
664 | trace_rcu_dyntick(TPS("End"), oldval, rdtp->dynticks_nesting); | 652 | trace_rcu_dyntick(TPS("End"), oldval, rdtp->dynticks_nesting); |
665 | if (!user && !is_idle_task(current)) { | 653 | if (!user && !is_idle_task(current)) { |
666 | struct task_struct *idle __maybe_unused = | 654 | struct task_struct *idle __maybe_unused = |
@@ -691,7 +679,7 @@ static void rcu_eqs_exit(bool user) | |||
691 | rdtp->dynticks_nesting += DYNTICK_TASK_NEST_VALUE; | 679 | rdtp->dynticks_nesting += DYNTICK_TASK_NEST_VALUE; |
692 | } else { | 680 | } else { |
693 | rdtp->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE; | 681 | rdtp->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE; |
694 | rcu_eqs_exit_common(rdtp, oldval, user); | 682 | rcu_eqs_exit_common(oldval, user); |
695 | } | 683 | } |
696 | } | 684 | } |
697 | 685 | ||
@@ -712,7 +700,7 @@ void rcu_idle_exit(void) | |||
712 | 700 | ||
713 | local_irq_save(flags); | 701 | local_irq_save(flags); |
714 | rcu_eqs_exit(false); | 702 | rcu_eqs_exit(false); |
715 | rcu_sysidle_exit(this_cpu_ptr(&rcu_dynticks), 0); | 703 | rcu_sysidle_exit(0); |
716 | local_irq_restore(flags); | 704 | local_irq_restore(flags); |
717 | } | 705 | } |
718 | EXPORT_SYMBOL_GPL(rcu_idle_exit); | 706 | EXPORT_SYMBOL_GPL(rcu_idle_exit); |
@@ -763,8 +751,8 @@ void rcu_irq_enter(void) | |||
763 | if (oldval) | 751 | if (oldval) |
764 | trace_rcu_dyntick(TPS("++="), oldval, rdtp->dynticks_nesting); | 752 | trace_rcu_dyntick(TPS("++="), oldval, rdtp->dynticks_nesting); |
765 | else | 753 | else |
766 | rcu_eqs_exit_common(rdtp, oldval, true); | 754 | rcu_eqs_exit_common(oldval, true); |
767 | rcu_sysidle_exit(rdtp, 1); | 755 | rcu_sysidle_exit(1); |
768 | local_irq_restore(flags); | 756 | local_irq_restore(flags); |
769 | } | 757 | } |
770 | 758 | ||
@@ -2387,7 +2375,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp) | |||
2387 | * invoked from the scheduling-clock interrupt. If rcu_pending returns | 2375 | * invoked from the scheduling-clock interrupt. If rcu_pending returns |
2388 | * false, there is no point in invoking rcu_check_callbacks(). | 2376 | * false, there is no point in invoking rcu_check_callbacks(). |
2389 | */ | 2377 | */ |
2390 | void rcu_check_callbacks(int cpu, int user) | 2378 | void rcu_check_callbacks(int user) |
2391 | { | 2379 | { |
2392 | trace_rcu_utilization(TPS("Start scheduler-tick")); | 2380 | trace_rcu_utilization(TPS("Start scheduler-tick")); |
2393 | increment_cpu_stall_ticks(); | 2381 | increment_cpu_stall_ticks(); |
@@ -2419,8 +2407,8 @@ void rcu_check_callbacks(int cpu, int user) | |||
2419 | 2407 | ||
2420 | rcu_bh_qs(); | 2408 | rcu_bh_qs(); |
2421 | } | 2409 | } |
2422 | rcu_preempt_check_callbacks(cpu); | 2410 | rcu_preempt_check_callbacks(); |
2423 | if (rcu_pending(cpu)) | 2411 | if (rcu_pending()) |
2424 | invoke_rcu_core(); | 2412 | invoke_rcu_core(); |
2425 | if (user) | 2413 | if (user) |
2426 | rcu_note_voluntary_context_switch(current); | 2414 | rcu_note_voluntary_context_switch(current); |
@@ -2963,6 +2951,9 @@ static int synchronize_sched_expedited_cpu_stop(void *data) | |||
2963 | */ | 2951 | */ |
2964 | void synchronize_sched_expedited(void) | 2952 | void synchronize_sched_expedited(void) |
2965 | { | 2953 | { |
2954 | cpumask_var_t cm; | ||
2955 | bool cma = false; | ||
2956 | int cpu; | ||
2966 | long firstsnap, s, snap; | 2957 | long firstsnap, s, snap; |
2967 | int trycount = 0; | 2958 | int trycount = 0; |
2968 | struct rcu_state *rsp = &rcu_sched_state; | 2959 | struct rcu_state *rsp = &rcu_sched_state; |
@@ -2997,11 +2988,26 @@ void synchronize_sched_expedited(void) | |||
2997 | } | 2988 | } |
2998 | WARN_ON_ONCE(cpu_is_offline(raw_smp_processor_id())); | 2989 | WARN_ON_ONCE(cpu_is_offline(raw_smp_processor_id())); |
2999 | 2990 | ||
2991 | /* Offline CPUs, idle CPUs, and any CPU we run on are quiescent. */ | ||
2992 | cma = zalloc_cpumask_var(&cm, GFP_KERNEL); | ||
2993 | if (cma) { | ||
2994 | cpumask_copy(cm, cpu_online_mask); | ||
2995 | cpumask_clear_cpu(raw_smp_processor_id(), cm); | ||
2996 | for_each_cpu(cpu, cm) { | ||
2997 | struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); | ||
2998 | |||
2999 | if (!(atomic_add_return(0, &rdtp->dynticks) & 0x1)) | ||
3000 | cpumask_clear_cpu(cpu, cm); | ||
3001 | } | ||
3002 | if (cpumask_weight(cm) == 0) | ||
3003 | goto all_cpus_idle; | ||
3004 | } | ||
3005 | |||
3000 | /* | 3006 | /* |
3001 | * Each pass through the following loop attempts to force a | 3007 | * Each pass through the following loop attempts to force a |
3002 | * context switch on each CPU. | 3008 | * context switch on each CPU. |
3003 | */ | 3009 | */ |
3004 | while (try_stop_cpus(cpu_online_mask, | 3010 | while (try_stop_cpus(cma ? cm : cpu_online_mask, |
3005 | synchronize_sched_expedited_cpu_stop, | 3011 | synchronize_sched_expedited_cpu_stop, |
3006 | NULL) == -EAGAIN) { | 3012 | NULL) == -EAGAIN) { |
3007 | put_online_cpus(); | 3013 | put_online_cpus(); |
@@ -3013,6 +3019,7 @@ void synchronize_sched_expedited(void) | |||
3013 | /* ensure test happens before caller kfree */ | 3019 | /* ensure test happens before caller kfree */ |
3014 | smp_mb__before_atomic(); /* ^^^ */ | 3020 | smp_mb__before_atomic(); /* ^^^ */ |
3015 | atomic_long_inc(&rsp->expedited_workdone1); | 3021 | atomic_long_inc(&rsp->expedited_workdone1); |
3022 | free_cpumask_var(cm); | ||
3016 | return; | 3023 | return; |
3017 | } | 3024 | } |
3018 | 3025 | ||
@@ -3022,6 +3029,7 @@ void synchronize_sched_expedited(void) | |||
3022 | } else { | 3029 | } else { |
3023 | wait_rcu_gp(call_rcu_sched); | 3030 | wait_rcu_gp(call_rcu_sched); |
3024 | atomic_long_inc(&rsp->expedited_normal); | 3031 | atomic_long_inc(&rsp->expedited_normal); |
3032 | free_cpumask_var(cm); | ||
3025 | return; | 3033 | return; |
3026 | } | 3034 | } |
3027 | 3035 | ||
@@ -3031,6 +3039,7 @@ void synchronize_sched_expedited(void) | |||
3031 | /* ensure test happens before caller kfree */ | 3039 | /* ensure test happens before caller kfree */ |
3032 | smp_mb__before_atomic(); /* ^^^ */ | 3040 | smp_mb__before_atomic(); /* ^^^ */ |
3033 | atomic_long_inc(&rsp->expedited_workdone2); | 3041 | atomic_long_inc(&rsp->expedited_workdone2); |
3042 | free_cpumask_var(cm); | ||
3034 | return; | 3043 | return; |
3035 | } | 3044 | } |
3036 | 3045 | ||
@@ -3045,6 +3054,7 @@ void synchronize_sched_expedited(void) | |||
3045 | /* CPU hotplug operation in flight, use normal GP. */ | 3054 | /* CPU hotplug operation in flight, use normal GP. */ |
3046 | wait_rcu_gp(call_rcu_sched); | 3055 | wait_rcu_gp(call_rcu_sched); |
3047 | atomic_long_inc(&rsp->expedited_normal); | 3056 | atomic_long_inc(&rsp->expedited_normal); |
3057 | free_cpumask_var(cm); | ||
3048 | return; | 3058 | return; |
3049 | } | 3059 | } |
3050 | snap = atomic_long_read(&rsp->expedited_start); | 3060 | snap = atomic_long_read(&rsp->expedited_start); |
@@ -3052,6 +3062,9 @@ void synchronize_sched_expedited(void) | |||
3052 | } | 3062 | } |
3053 | atomic_long_inc(&rsp->expedited_stoppedcpus); | 3063 | atomic_long_inc(&rsp->expedited_stoppedcpus); |
3054 | 3064 | ||
3065 | all_cpus_idle: | ||
3066 | free_cpumask_var(cm); | ||
3067 | |||
3055 | /* | 3068 | /* |
3056 | * Everyone up to our most recent fetch is covered by our grace | 3069 | * Everyone up to our most recent fetch is covered by our grace |
3057 | * period. Update the counter, but only if our work is still | 3070 | * period. Update the counter, but only if our work is still |
@@ -3143,12 +3156,12 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp) | |||
3143 | * by the current CPU, returning 1 if so. This function is part of the | 3156 | * by the current CPU, returning 1 if so. This function is part of the |
3144 | * RCU implementation; it is -not- an exported member of the RCU API. | 3157 | * RCU implementation; it is -not- an exported member of the RCU API. |
3145 | */ | 3158 | */ |
3146 | static int rcu_pending(int cpu) | 3159 | static int rcu_pending(void) |
3147 | { | 3160 | { |
3148 | struct rcu_state *rsp; | 3161 | struct rcu_state *rsp; |
3149 | 3162 | ||
3150 | for_each_rcu_flavor(rsp) | 3163 | for_each_rcu_flavor(rsp) |
3151 | if (__rcu_pending(rsp, per_cpu_ptr(rsp->rda, cpu))) | 3164 | if (__rcu_pending(rsp, this_cpu_ptr(rsp->rda))) |
3152 | return 1; | 3165 | return 1; |
3153 | return 0; | 3166 | return 0; |
3154 | } | 3167 | } |
@@ -3158,7 +3171,7 @@ static int rcu_pending(int cpu) | |||
3158 | * non-NULL, store an indication of whether all callbacks are lazy. | 3171 | * non-NULL, store an indication of whether all callbacks are lazy. |
3159 | * (If there are no callbacks, all of them are deemed to be lazy.) | 3172 | * (If there are no callbacks, all of them are deemed to be lazy.) |
3160 | */ | 3173 | */ |
3161 | static int __maybe_unused rcu_cpu_has_callbacks(int cpu, bool *all_lazy) | 3174 | static int __maybe_unused rcu_cpu_has_callbacks(bool *all_lazy) |
3162 | { | 3175 | { |
3163 | bool al = true; | 3176 | bool al = true; |
3164 | bool hc = false; | 3177 | bool hc = false; |
@@ -3166,7 +3179,7 @@ static int __maybe_unused rcu_cpu_has_callbacks(int cpu, bool *all_lazy) | |||
3166 | struct rcu_state *rsp; | 3179 | struct rcu_state *rsp; |
3167 | 3180 | ||
3168 | for_each_rcu_flavor(rsp) { | 3181 | for_each_rcu_flavor(rsp) { |
3169 | rdp = per_cpu_ptr(rsp->rda, cpu); | 3182 | rdp = this_cpu_ptr(rsp->rda); |
3170 | if (!rdp->nxtlist) | 3183 | if (!rdp->nxtlist) |
3171 | continue; | 3184 | continue; |
3172 | hc = true; | 3185 | hc = true; |
@@ -3299,11 +3312,16 @@ static void _rcu_barrier(struct rcu_state *rsp) | |||
3299 | continue; | 3312 | continue; |
3300 | rdp = per_cpu_ptr(rsp->rda, cpu); | 3313 | rdp = per_cpu_ptr(rsp->rda, cpu); |
3301 | if (rcu_is_nocb_cpu(cpu)) { | 3314 | if (rcu_is_nocb_cpu(cpu)) { |
3302 | _rcu_barrier_trace(rsp, "OnlineNoCB", cpu, | 3315 | if (!rcu_nocb_cpu_needs_barrier(rsp, cpu)) { |
3303 | rsp->n_barrier_done); | 3316 | _rcu_barrier_trace(rsp, "OfflineNoCB", cpu, |
3304 | atomic_inc(&rsp->barrier_cpu_count); | 3317 | rsp->n_barrier_done); |
3305 | __call_rcu(&rdp->barrier_head, rcu_barrier_callback, | 3318 | } else { |
3306 | rsp, cpu, 0); | 3319 | _rcu_barrier_trace(rsp, "OnlineNoCB", cpu, |
3320 | rsp->n_barrier_done); | ||
3321 | atomic_inc(&rsp->barrier_cpu_count); | ||
3322 | __call_rcu(&rdp->barrier_head, | ||
3323 | rcu_barrier_callback, rsp, cpu, 0); | ||
3324 | } | ||
3307 | } else if (ACCESS_ONCE(rdp->qlen)) { | 3325 | } else if (ACCESS_ONCE(rdp->qlen)) { |
3308 | _rcu_barrier_trace(rsp, "OnlineQ", cpu, | 3326 | _rcu_barrier_trace(rsp, "OnlineQ", cpu, |
3309 | rsp->n_barrier_done); | 3327 | rsp->n_barrier_done); |
@@ -3480,8 +3498,10 @@ static int rcu_cpu_notify(struct notifier_block *self, | |||
3480 | case CPU_DEAD_FROZEN: | 3498 | case CPU_DEAD_FROZEN: |
3481 | case CPU_UP_CANCELED: | 3499 | case CPU_UP_CANCELED: |
3482 | case CPU_UP_CANCELED_FROZEN: | 3500 | case CPU_UP_CANCELED_FROZEN: |
3483 | for_each_rcu_flavor(rsp) | 3501 | for_each_rcu_flavor(rsp) { |
3484 | rcu_cleanup_dead_cpu(cpu, rsp); | 3502 | rcu_cleanup_dead_cpu(cpu, rsp); |
3503 | do_nocb_deferred_wakeup(per_cpu_ptr(rsp->rda, cpu)); | ||
3504 | } | ||
3485 | break; | 3505 | break; |
3486 | default: | 3506 | default: |
3487 | break; | 3507 | break; |
@@ -3761,6 +3781,8 @@ void __init rcu_init(void) | |||
3761 | pm_notifier(rcu_pm_notify, 0); | 3781 | pm_notifier(rcu_pm_notify, 0); |
3762 | for_each_online_cpu(cpu) | 3782 | for_each_online_cpu(cpu) |
3763 | rcu_cpu_notify(NULL, CPU_UP_PREPARE, (void *)(long)cpu); | 3783 | rcu_cpu_notify(NULL, CPU_UP_PREPARE, (void *)(long)cpu); |
3784 | |||
3785 | rcu_early_boot_tests(); | ||
3764 | } | 3786 | } |
3765 | 3787 | ||
3766 | #include "tree_plugin.h" | 3788 | #include "tree_plugin.h" |
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index d03764652d91..8e7b1843896e 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h | |||
@@ -139,7 +139,7 @@ struct rcu_node { | |||
139 | unsigned long expmask; /* Groups that have ->blkd_tasks */ | 139 | unsigned long expmask; /* Groups that have ->blkd_tasks */ |
140 | /* elements that need to drain to allow the */ | 140 | /* elements that need to drain to allow the */ |
141 | /* current expedited grace period to */ | 141 | /* current expedited grace period to */ |
142 | /* complete (only for TREE_PREEMPT_RCU). */ | 142 | /* complete (only for PREEMPT_RCU). */ |
143 | unsigned long qsmaskinit; | 143 | unsigned long qsmaskinit; |
144 | /* Per-GP initial value for qsmask & expmask. */ | 144 | /* Per-GP initial value for qsmask & expmask. */ |
145 | unsigned long grpmask; /* Mask to apply to parent qsmask. */ | 145 | unsigned long grpmask; /* Mask to apply to parent qsmask. */ |
@@ -530,10 +530,10 @@ DECLARE_PER_CPU(struct rcu_data, rcu_sched_data); | |||
530 | extern struct rcu_state rcu_bh_state; | 530 | extern struct rcu_state rcu_bh_state; |
531 | DECLARE_PER_CPU(struct rcu_data, rcu_bh_data); | 531 | DECLARE_PER_CPU(struct rcu_data, rcu_bh_data); |
532 | 532 | ||
533 | #ifdef CONFIG_TREE_PREEMPT_RCU | 533 | #ifdef CONFIG_PREEMPT_RCU |
534 | extern struct rcu_state rcu_preempt_state; | 534 | extern struct rcu_state rcu_preempt_state; |
535 | DECLARE_PER_CPU(struct rcu_data, rcu_preempt_data); | 535 | DECLARE_PER_CPU(struct rcu_data, rcu_preempt_data); |
536 | #endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ | 536 | #endif /* #ifdef CONFIG_PREEMPT_RCU */ |
537 | 537 | ||
538 | #ifdef CONFIG_RCU_BOOST | 538 | #ifdef CONFIG_RCU_BOOST |
539 | DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_status); | 539 | DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_status); |
@@ -547,7 +547,7 @@ DECLARE_PER_CPU(char, rcu_cpu_has_work); | |||
547 | /* Forward declarations for rcutree_plugin.h */ | 547 | /* Forward declarations for rcutree_plugin.h */ |
548 | static void rcu_bootup_announce(void); | 548 | static void rcu_bootup_announce(void); |
549 | long rcu_batches_completed(void); | 549 | long rcu_batches_completed(void); |
550 | static void rcu_preempt_note_context_switch(int cpu); | 550 | static void rcu_preempt_note_context_switch(void); |
551 | static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp); | 551 | static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp); |
552 | #ifdef CONFIG_HOTPLUG_CPU | 552 | #ifdef CONFIG_HOTPLUG_CPU |
553 | static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, | 553 | static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, |
@@ -561,12 +561,12 @@ static int rcu_preempt_offline_tasks(struct rcu_state *rsp, | |||
561 | struct rcu_node *rnp, | 561 | struct rcu_node *rnp, |
562 | struct rcu_data *rdp); | 562 | struct rcu_data *rdp); |
563 | #endif /* #ifdef CONFIG_HOTPLUG_CPU */ | 563 | #endif /* #ifdef CONFIG_HOTPLUG_CPU */ |
564 | static void rcu_preempt_check_callbacks(int cpu); | 564 | static void rcu_preempt_check_callbacks(void); |
565 | void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)); | 565 | void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)); |
566 | #if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_TREE_PREEMPT_RCU) | 566 | #if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_PREEMPT_RCU) |
567 | static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp, | 567 | static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp, |
568 | bool wake); | 568 | bool wake); |
569 | #endif /* #if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_TREE_PREEMPT_RCU) */ | 569 | #endif /* #if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_PREEMPT_RCU) */ |
570 | static void __init __rcu_init_preempt(void); | 570 | static void __init __rcu_init_preempt(void); |
571 | static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags); | 571 | static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags); |
572 | static void rcu_preempt_boost_start_gp(struct rcu_node *rnp); | 572 | static void rcu_preempt_boost_start_gp(struct rcu_node *rnp); |
@@ -579,14 +579,15 @@ static int rcu_spawn_one_boost_kthread(struct rcu_state *rsp, | |||
579 | #endif /* #ifdef CONFIG_RCU_BOOST */ | 579 | #endif /* #ifdef CONFIG_RCU_BOOST */ |
580 | static void __init rcu_spawn_boost_kthreads(void); | 580 | static void __init rcu_spawn_boost_kthreads(void); |
581 | static void rcu_prepare_kthreads(int cpu); | 581 | static void rcu_prepare_kthreads(int cpu); |
582 | static void rcu_cleanup_after_idle(int cpu); | 582 | static void rcu_cleanup_after_idle(void); |
583 | static void rcu_prepare_for_idle(int cpu); | 583 | static void rcu_prepare_for_idle(void); |
584 | static void rcu_idle_count_callbacks_posted(void); | 584 | static void rcu_idle_count_callbacks_posted(void); |
585 | static void print_cpu_stall_info_begin(void); | 585 | static void print_cpu_stall_info_begin(void); |
586 | static void print_cpu_stall_info(struct rcu_state *rsp, int cpu); | 586 | static void print_cpu_stall_info(struct rcu_state *rsp, int cpu); |
587 | static void print_cpu_stall_info_end(void); | 587 | static void print_cpu_stall_info_end(void); |
588 | static void zero_cpu_stall_ticks(struct rcu_data *rdp); | 588 | static void zero_cpu_stall_ticks(struct rcu_data *rdp); |
589 | static void increment_cpu_stall_ticks(void); | 589 | static void increment_cpu_stall_ticks(void); |
590 | static bool rcu_nocb_cpu_needs_barrier(struct rcu_state *rsp, int cpu); | ||
590 | static void rcu_nocb_gp_set(struct rcu_node *rnp, int nrq); | 591 | static void rcu_nocb_gp_set(struct rcu_node *rnp, int nrq); |
591 | static void rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp); | 592 | static void rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp); |
592 | static void rcu_init_one_nocb(struct rcu_node *rnp); | 593 | static void rcu_init_one_nocb(struct rcu_node *rnp); |
@@ -605,8 +606,8 @@ static void __init rcu_organize_nocb_kthreads(struct rcu_state *rsp); | |||
605 | #endif /* #ifdef CONFIG_RCU_NOCB_CPU */ | 606 | #endif /* #ifdef CONFIG_RCU_NOCB_CPU */ |
606 | static void __maybe_unused rcu_kick_nohz_cpu(int cpu); | 607 | static void __maybe_unused rcu_kick_nohz_cpu(int cpu); |
607 | static bool init_nocb_callback_list(struct rcu_data *rdp); | 608 | static bool init_nocb_callback_list(struct rcu_data *rdp); |
608 | static void rcu_sysidle_enter(struct rcu_dynticks *rdtp, int irq); | 609 | static void rcu_sysidle_enter(int irq); |
609 | static void rcu_sysidle_exit(struct rcu_dynticks *rdtp, int irq); | 610 | static void rcu_sysidle_exit(int irq); |
610 | static void rcu_sysidle_check_cpu(struct rcu_data *rdp, bool *isidle, | 611 | static void rcu_sysidle_check_cpu(struct rcu_data *rdp, bool *isidle, |
611 | unsigned long *maxj); | 612 | unsigned long *maxj); |
612 | static bool is_sysidle_rcu_state(struct rcu_state *rsp); | 613 | static bool is_sysidle_rcu_state(struct rcu_state *rsp); |
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 387dd4599344..3ec85cb5d544 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h | |||
@@ -30,14 +30,24 @@ | |||
30 | #include <linux/smpboot.h> | 30 | #include <linux/smpboot.h> |
31 | #include "../time/tick-internal.h" | 31 | #include "../time/tick-internal.h" |
32 | 32 | ||
33 | #define RCU_KTHREAD_PRIO 1 | ||
34 | |||
35 | #ifdef CONFIG_RCU_BOOST | 33 | #ifdef CONFIG_RCU_BOOST |
34 | |||
36 | #include "../locking/rtmutex_common.h" | 35 | #include "../locking/rtmutex_common.h" |
37 | #define RCU_BOOST_PRIO CONFIG_RCU_BOOST_PRIO | 36 | |
38 | #else | 37 | /* rcuc/rcub kthread realtime priority */ |
39 | #define RCU_BOOST_PRIO RCU_KTHREAD_PRIO | 38 | static int kthread_prio = CONFIG_RCU_KTHREAD_PRIO; |
40 | #endif | 39 | module_param(kthread_prio, int, 0644); |
40 | |||
41 | /* | ||
42 | * Control variables for per-CPU and per-rcu_node kthreads. These | ||
43 | * handle all flavors of RCU. | ||
44 | */ | ||
45 | static DEFINE_PER_CPU(struct task_struct *, rcu_cpu_kthread_task); | ||
46 | DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_status); | ||
47 | DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_loops); | ||
48 | DEFINE_PER_CPU(char, rcu_cpu_has_work); | ||
49 | |||
50 | #endif /* #ifdef CONFIG_RCU_BOOST */ | ||
41 | 51 | ||
42 | #ifdef CONFIG_RCU_NOCB_CPU | 52 | #ifdef CONFIG_RCU_NOCB_CPU |
43 | static cpumask_var_t rcu_nocb_mask; /* CPUs to have callbacks offloaded. */ | 53 | static cpumask_var_t rcu_nocb_mask; /* CPUs to have callbacks offloaded. */ |
@@ -72,9 +82,6 @@ static void __init rcu_bootup_announce_oddness(void) | |||
72 | #ifdef CONFIG_RCU_TORTURE_TEST_RUNNABLE | 82 | #ifdef CONFIG_RCU_TORTURE_TEST_RUNNABLE |
73 | pr_info("\tRCU torture testing starts during boot.\n"); | 83 | pr_info("\tRCU torture testing starts during boot.\n"); |
74 | #endif | 84 | #endif |
75 | #if defined(CONFIG_TREE_PREEMPT_RCU) && !defined(CONFIG_RCU_CPU_STALL_VERBOSE) | ||
76 | pr_info("\tDump stacks of tasks blocking RCU-preempt GP.\n"); | ||
77 | #endif | ||
78 | #if defined(CONFIG_RCU_CPU_STALL_INFO) | 85 | #if defined(CONFIG_RCU_CPU_STALL_INFO) |
79 | pr_info("\tAdditional per-CPU info printed with stalls.\n"); | 86 | pr_info("\tAdditional per-CPU info printed with stalls.\n"); |
80 | #endif | 87 | #endif |
@@ -85,9 +92,12 @@ static void __init rcu_bootup_announce_oddness(void) | |||
85 | pr_info("\tBoot-time adjustment of leaf fanout to %d.\n", rcu_fanout_leaf); | 92 | pr_info("\tBoot-time adjustment of leaf fanout to %d.\n", rcu_fanout_leaf); |
86 | if (nr_cpu_ids != NR_CPUS) | 93 | if (nr_cpu_ids != NR_CPUS) |
87 | pr_info("\tRCU restricting CPUs from NR_CPUS=%d to nr_cpu_ids=%d.\n", NR_CPUS, nr_cpu_ids); | 94 | pr_info("\tRCU restricting CPUs from NR_CPUS=%d to nr_cpu_ids=%d.\n", NR_CPUS, nr_cpu_ids); |
95 | #ifdef CONFIG_RCU_BOOST | ||
96 | pr_info("\tRCU kthread priority: %d.\n", kthread_prio); | ||
97 | #endif | ||
88 | } | 98 | } |
89 | 99 | ||
90 | #ifdef CONFIG_TREE_PREEMPT_RCU | 100 | #ifdef CONFIG_PREEMPT_RCU |
91 | 101 | ||
92 | RCU_STATE_INITIALIZER(rcu_preempt, 'p', call_rcu); | 102 | RCU_STATE_INITIALIZER(rcu_preempt, 'p', call_rcu); |
93 | static struct rcu_state *rcu_state_p = &rcu_preempt_state; | 103 | static struct rcu_state *rcu_state_p = &rcu_preempt_state; |
@@ -156,7 +166,7 @@ static void rcu_preempt_qs(void) | |||
156 | * | 166 | * |
157 | * Caller must disable preemption. | 167 | * Caller must disable preemption. |
158 | */ | 168 | */ |
159 | static void rcu_preempt_note_context_switch(int cpu) | 169 | static void rcu_preempt_note_context_switch(void) |
160 | { | 170 | { |
161 | struct task_struct *t = current; | 171 | struct task_struct *t = current; |
162 | unsigned long flags; | 172 | unsigned long flags; |
@@ -167,7 +177,7 @@ static void rcu_preempt_note_context_switch(int cpu) | |||
167 | !t->rcu_read_unlock_special.b.blocked) { | 177 | !t->rcu_read_unlock_special.b.blocked) { |
168 | 178 | ||
169 | /* Possibly blocking in an RCU read-side critical section. */ | 179 | /* Possibly blocking in an RCU read-side critical section. */ |
170 | rdp = per_cpu_ptr(rcu_preempt_state.rda, cpu); | 180 | rdp = this_cpu_ptr(rcu_preempt_state.rda); |
171 | rnp = rdp->mynode; | 181 | rnp = rdp->mynode; |
172 | raw_spin_lock_irqsave(&rnp->lock, flags); | 182 | raw_spin_lock_irqsave(&rnp->lock, flags); |
173 | smp_mb__after_unlock_lock(); | 183 | smp_mb__after_unlock_lock(); |
@@ -415,8 +425,6 @@ void rcu_read_unlock_special(struct task_struct *t) | |||
415 | } | 425 | } |
416 | } | 426 | } |
417 | 427 | ||
418 | #ifdef CONFIG_RCU_CPU_STALL_VERBOSE | ||
419 | |||
420 | /* | 428 | /* |
421 | * Dump detailed information for all tasks blocking the current RCU | 429 | * Dump detailed information for all tasks blocking the current RCU |
422 | * grace period on the specified rcu_node structure. | 430 | * grace period on the specified rcu_node structure. |
@@ -451,14 +459,6 @@ static void rcu_print_detail_task_stall(struct rcu_state *rsp) | |||
451 | rcu_print_detail_task_stall_rnp(rnp); | 459 | rcu_print_detail_task_stall_rnp(rnp); |
452 | } | 460 | } |
453 | 461 | ||
454 | #else /* #ifdef CONFIG_RCU_CPU_STALL_VERBOSE */ | ||
455 | |||
456 | static void rcu_print_detail_task_stall(struct rcu_state *rsp) | ||
457 | { | ||
458 | } | ||
459 | |||
460 | #endif /* #else #ifdef CONFIG_RCU_CPU_STALL_VERBOSE */ | ||
461 | |||
462 | #ifdef CONFIG_RCU_CPU_STALL_INFO | 462 | #ifdef CONFIG_RCU_CPU_STALL_INFO |
463 | 463 | ||
464 | static void rcu_print_task_stall_begin(struct rcu_node *rnp) | 464 | static void rcu_print_task_stall_begin(struct rcu_node *rnp) |
@@ -621,7 +621,7 @@ static int rcu_preempt_offline_tasks(struct rcu_state *rsp, | |||
621 | * | 621 | * |
622 | * Caller must disable hard irqs. | 622 | * Caller must disable hard irqs. |
623 | */ | 623 | */ |
624 | static void rcu_preempt_check_callbacks(int cpu) | 624 | static void rcu_preempt_check_callbacks(void) |
625 | { | 625 | { |
626 | struct task_struct *t = current; | 626 | struct task_struct *t = current; |
627 | 627 | ||
@@ -630,8 +630,8 @@ static void rcu_preempt_check_callbacks(int cpu) | |||
630 | return; | 630 | return; |
631 | } | 631 | } |
632 | if (t->rcu_read_lock_nesting > 0 && | 632 | if (t->rcu_read_lock_nesting > 0 && |
633 | per_cpu(rcu_preempt_data, cpu).qs_pending && | 633 | __this_cpu_read(rcu_preempt_data.qs_pending) && |
634 | !per_cpu(rcu_preempt_data, cpu).passed_quiesce) | 634 | !__this_cpu_read(rcu_preempt_data.passed_quiesce)) |
635 | t->rcu_read_unlock_special.b.need_qs = true; | 635 | t->rcu_read_unlock_special.b.need_qs = true; |
636 | } | 636 | } |
637 | 637 | ||
@@ -919,7 +919,7 @@ void exit_rcu(void) | |||
919 | __rcu_read_unlock(); | 919 | __rcu_read_unlock(); |
920 | } | 920 | } |
921 | 921 | ||
922 | #else /* #ifdef CONFIG_TREE_PREEMPT_RCU */ | 922 | #else /* #ifdef CONFIG_PREEMPT_RCU */ |
923 | 923 | ||
924 | static struct rcu_state *rcu_state_p = &rcu_sched_state; | 924 | static struct rcu_state *rcu_state_p = &rcu_sched_state; |
925 | 925 | ||
@@ -945,7 +945,7 @@ EXPORT_SYMBOL_GPL(rcu_batches_completed); | |||
945 | * Because preemptible RCU does not exist, we never have to check for | 945 | * Because preemptible RCU does not exist, we never have to check for |
946 | * CPUs being in quiescent states. | 946 | * CPUs being in quiescent states. |
947 | */ | 947 | */ |
948 | static void rcu_preempt_note_context_switch(int cpu) | 948 | static void rcu_preempt_note_context_switch(void) |
949 | { | 949 | { |
950 | } | 950 | } |
951 | 951 | ||
@@ -1017,7 +1017,7 @@ static int rcu_preempt_offline_tasks(struct rcu_state *rsp, | |||
1017 | * Because preemptible RCU does not exist, it never has any callbacks | 1017 | * Because preemptible RCU does not exist, it never has any callbacks |
1018 | * to check. | 1018 | * to check. |
1019 | */ | 1019 | */ |
1020 | static void rcu_preempt_check_callbacks(int cpu) | 1020 | static void rcu_preempt_check_callbacks(void) |
1021 | { | 1021 | { |
1022 | } | 1022 | } |
1023 | 1023 | ||
@@ -1070,7 +1070,7 @@ void exit_rcu(void) | |||
1070 | { | 1070 | { |
1071 | } | 1071 | } |
1072 | 1072 | ||
1073 | #endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */ | 1073 | #endif /* #else #ifdef CONFIG_PREEMPT_RCU */ |
1074 | 1074 | ||
1075 | #ifdef CONFIG_RCU_BOOST | 1075 | #ifdef CONFIG_RCU_BOOST |
1076 | 1076 | ||
@@ -1326,7 +1326,7 @@ static int rcu_spawn_one_boost_kthread(struct rcu_state *rsp, | |||
1326 | smp_mb__after_unlock_lock(); | 1326 | smp_mb__after_unlock_lock(); |
1327 | rnp->boost_kthread_task = t; | 1327 | rnp->boost_kthread_task = t; |
1328 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 1328 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
1329 | sp.sched_priority = RCU_BOOST_PRIO; | 1329 | sp.sched_priority = kthread_prio; |
1330 | sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); | 1330 | sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); |
1331 | wake_up_process(t); /* get to TASK_INTERRUPTIBLE quickly. */ | 1331 | wake_up_process(t); /* get to TASK_INTERRUPTIBLE quickly. */ |
1332 | return 0; | 1332 | return 0; |
@@ -1343,7 +1343,7 @@ static void rcu_cpu_kthread_setup(unsigned int cpu) | |||
1343 | { | 1343 | { |
1344 | struct sched_param sp; | 1344 | struct sched_param sp; |
1345 | 1345 | ||
1346 | sp.sched_priority = RCU_KTHREAD_PRIO; | 1346 | sp.sched_priority = kthread_prio; |
1347 | sched_setscheduler_nocheck(current, SCHED_FIFO, &sp); | 1347 | sched_setscheduler_nocheck(current, SCHED_FIFO, &sp); |
1348 | } | 1348 | } |
1349 | 1349 | ||
@@ -1512,10 +1512,10 @@ static void rcu_prepare_kthreads(int cpu) | |||
1512 | * any flavor of RCU. | 1512 | * any flavor of RCU. |
1513 | */ | 1513 | */ |
1514 | #ifndef CONFIG_RCU_NOCB_CPU_ALL | 1514 | #ifndef CONFIG_RCU_NOCB_CPU_ALL |
1515 | int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies) | 1515 | int rcu_needs_cpu(unsigned long *delta_jiffies) |
1516 | { | 1516 | { |
1517 | *delta_jiffies = ULONG_MAX; | 1517 | *delta_jiffies = ULONG_MAX; |
1518 | return rcu_cpu_has_callbacks(cpu, NULL); | 1518 | return rcu_cpu_has_callbacks(NULL); |
1519 | } | 1519 | } |
1520 | #endif /* #ifndef CONFIG_RCU_NOCB_CPU_ALL */ | 1520 | #endif /* #ifndef CONFIG_RCU_NOCB_CPU_ALL */ |
1521 | 1521 | ||
@@ -1523,7 +1523,7 @@ int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies) | |||
1523 | * Because we do not have RCU_FAST_NO_HZ, don't bother cleaning up | 1523 | * Because we do not have RCU_FAST_NO_HZ, don't bother cleaning up |
1524 | * after it. | 1524 | * after it. |
1525 | */ | 1525 | */ |
1526 | static void rcu_cleanup_after_idle(int cpu) | 1526 | static void rcu_cleanup_after_idle(void) |
1527 | { | 1527 | { |
1528 | } | 1528 | } |
1529 | 1529 | ||
@@ -1531,7 +1531,7 @@ static void rcu_cleanup_after_idle(int cpu) | |||
1531 | * Do the idle-entry grace-period work, which, because CONFIG_RCU_FAST_NO_HZ=n, | 1531 | * Do the idle-entry grace-period work, which, because CONFIG_RCU_FAST_NO_HZ=n, |
1532 | * is nothing. | 1532 | * is nothing. |
1533 | */ | 1533 | */ |
1534 | static void rcu_prepare_for_idle(int cpu) | 1534 | static void rcu_prepare_for_idle(void) |
1535 | { | 1535 | { |
1536 | } | 1536 | } |
1537 | 1537 | ||
@@ -1624,15 +1624,15 @@ static bool __maybe_unused rcu_try_advance_all_cbs(void) | |||
1624 | * The caller must have disabled interrupts. | 1624 | * The caller must have disabled interrupts. |
1625 | */ | 1625 | */ |
1626 | #ifndef CONFIG_RCU_NOCB_CPU_ALL | 1626 | #ifndef CONFIG_RCU_NOCB_CPU_ALL |
1627 | int rcu_needs_cpu(int cpu, unsigned long *dj) | 1627 | int rcu_needs_cpu(unsigned long *dj) |
1628 | { | 1628 | { |
1629 | struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); | 1629 | struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks); |
1630 | 1630 | ||
1631 | /* Snapshot to detect later posting of non-lazy callback. */ | 1631 | /* Snapshot to detect later posting of non-lazy callback. */ |
1632 | rdtp->nonlazy_posted_snap = rdtp->nonlazy_posted; | 1632 | rdtp->nonlazy_posted_snap = rdtp->nonlazy_posted; |
1633 | 1633 | ||
1634 | /* If no callbacks, RCU doesn't need the CPU. */ | 1634 | /* If no callbacks, RCU doesn't need the CPU. */ |
1635 | if (!rcu_cpu_has_callbacks(cpu, &rdtp->all_lazy)) { | 1635 | if (!rcu_cpu_has_callbacks(&rdtp->all_lazy)) { |
1636 | *dj = ULONG_MAX; | 1636 | *dj = ULONG_MAX; |
1637 | return 0; | 1637 | return 0; |
1638 | } | 1638 | } |
@@ -1666,12 +1666,12 @@ int rcu_needs_cpu(int cpu, unsigned long *dj) | |||
1666 | * | 1666 | * |
1667 | * The caller must have disabled interrupts. | 1667 | * The caller must have disabled interrupts. |
1668 | */ | 1668 | */ |
1669 | static void rcu_prepare_for_idle(int cpu) | 1669 | static void rcu_prepare_for_idle(void) |
1670 | { | 1670 | { |
1671 | #ifndef CONFIG_RCU_NOCB_CPU_ALL | 1671 | #ifndef CONFIG_RCU_NOCB_CPU_ALL |
1672 | bool needwake; | 1672 | bool needwake; |
1673 | struct rcu_data *rdp; | 1673 | struct rcu_data *rdp; |
1674 | struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); | 1674 | struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks); |
1675 | struct rcu_node *rnp; | 1675 | struct rcu_node *rnp; |
1676 | struct rcu_state *rsp; | 1676 | struct rcu_state *rsp; |
1677 | int tne; | 1677 | int tne; |
@@ -1679,7 +1679,7 @@ static void rcu_prepare_for_idle(int cpu) | |||
1679 | /* Handle nohz enablement switches conservatively. */ | 1679 | /* Handle nohz enablement switches conservatively. */ |
1680 | tne = ACCESS_ONCE(tick_nohz_active); | 1680 | tne = ACCESS_ONCE(tick_nohz_active); |
1681 | if (tne != rdtp->tick_nohz_enabled_snap) { | 1681 | if (tne != rdtp->tick_nohz_enabled_snap) { |
1682 | if (rcu_cpu_has_callbacks(cpu, NULL)) | 1682 | if (rcu_cpu_has_callbacks(NULL)) |
1683 | invoke_rcu_core(); /* force nohz to see update. */ | 1683 | invoke_rcu_core(); /* force nohz to see update. */ |
1684 | rdtp->tick_nohz_enabled_snap = tne; | 1684 | rdtp->tick_nohz_enabled_snap = tne; |
1685 | return; | 1685 | return; |
@@ -1688,7 +1688,7 @@ static void rcu_prepare_for_idle(int cpu) | |||
1688 | return; | 1688 | return; |
1689 | 1689 | ||
1690 | /* If this is a no-CBs CPU, no callbacks, just return. */ | 1690 | /* If this is a no-CBs CPU, no callbacks, just return. */ |
1691 | if (rcu_is_nocb_cpu(cpu)) | 1691 | if (rcu_is_nocb_cpu(smp_processor_id())) |
1692 | return; | 1692 | return; |
1693 | 1693 | ||
1694 | /* | 1694 | /* |
@@ -1712,7 +1712,7 @@ static void rcu_prepare_for_idle(int cpu) | |||
1712 | return; | 1712 | return; |
1713 | rdtp->last_accelerate = jiffies; | 1713 | rdtp->last_accelerate = jiffies; |
1714 | for_each_rcu_flavor(rsp) { | 1714 | for_each_rcu_flavor(rsp) { |
1715 | rdp = per_cpu_ptr(rsp->rda, cpu); | 1715 | rdp = this_cpu_ptr(rsp->rda); |
1716 | if (!*rdp->nxttail[RCU_DONE_TAIL]) | 1716 | if (!*rdp->nxttail[RCU_DONE_TAIL]) |
1717 | continue; | 1717 | continue; |
1718 | rnp = rdp->mynode; | 1718 | rnp = rdp->mynode; |
@@ -1731,10 +1731,10 @@ static void rcu_prepare_for_idle(int cpu) | |||
1731 | * any grace periods that elapsed while the CPU was idle, and if any | 1731 | * any grace periods that elapsed while the CPU was idle, and if any |
1732 | * callbacks are now ready to invoke, initiate invocation. | 1732 | * callbacks are now ready to invoke, initiate invocation. |
1733 | */ | 1733 | */ |
1734 | static void rcu_cleanup_after_idle(int cpu) | 1734 | static void rcu_cleanup_after_idle(void) |
1735 | { | 1735 | { |
1736 | #ifndef CONFIG_RCU_NOCB_CPU_ALL | 1736 | #ifndef CONFIG_RCU_NOCB_CPU_ALL |
1737 | if (rcu_is_nocb_cpu(cpu)) | 1737 | if (rcu_is_nocb_cpu(smp_processor_id())) |
1738 | return; | 1738 | return; |
1739 | if (rcu_try_advance_all_cbs()) | 1739 | if (rcu_try_advance_all_cbs()) |
1740 | invoke_rcu_core(); | 1740 | invoke_rcu_core(); |
@@ -2050,6 +2050,33 @@ static void wake_nocb_leader(struct rcu_data *rdp, bool force) | |||
2050 | } | 2050 | } |
2051 | 2051 | ||
2052 | /* | 2052 | /* |
2053 | * Does the specified CPU need an RCU callback for the specified flavor | ||
2054 | * of rcu_barrier()? | ||
2055 | */ | ||
2056 | static bool rcu_nocb_cpu_needs_barrier(struct rcu_state *rsp, int cpu) | ||
2057 | { | ||
2058 | struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); | ||
2059 | struct rcu_head *rhp; | ||
2060 | |||
2061 | /* No-CBs CPUs might have callbacks on any of three lists. */ | ||
2062 | rhp = ACCESS_ONCE(rdp->nocb_head); | ||
2063 | if (!rhp) | ||
2064 | rhp = ACCESS_ONCE(rdp->nocb_gp_head); | ||
2065 | if (!rhp) | ||
2066 | rhp = ACCESS_ONCE(rdp->nocb_follower_head); | ||
2067 | |||
2068 | /* Having no rcuo kthread but CBs after scheduler starts is bad! */ | ||
2069 | if (!ACCESS_ONCE(rdp->nocb_kthread) && rhp) { | ||
2070 | /* RCU callback enqueued before CPU first came online??? */ | ||
2071 | pr_err("RCU: Never-onlined no-CBs CPU %d has CB %p\n", | ||
2072 | cpu, rhp->func); | ||
2073 | WARN_ON_ONCE(1); | ||
2074 | } | ||
2075 | |||
2076 | return !!rhp; | ||
2077 | } | ||
2078 | |||
2079 | /* | ||
2053 | * Enqueue the specified string of rcu_head structures onto the specified | 2080 | * Enqueue the specified string of rcu_head structures onto the specified |
2054 | * CPU's no-CBs lists. The CPU is specified by rdp, the head of the | 2081 | * CPU's no-CBs lists. The CPU is specified by rdp, the head of the |
2055 | * string by rhp, and the tail of the string by rhtp. The non-lazy/lazy | 2082 | * string by rhp, and the tail of the string by rhtp. The non-lazy/lazy |
@@ -2546,9 +2573,13 @@ static void rcu_spawn_one_nocb_kthread(struct rcu_state *rsp, int cpu) | |||
2546 | rdp->nocb_leader = rdp_spawn; | 2573 | rdp->nocb_leader = rdp_spawn; |
2547 | if (rdp_last && rdp != rdp_spawn) | 2574 | if (rdp_last && rdp != rdp_spawn) |
2548 | rdp_last->nocb_next_follower = rdp; | 2575 | rdp_last->nocb_next_follower = rdp; |
2549 | rdp_last = rdp; | 2576 | if (rdp == rdp_spawn) { |
2550 | rdp = rdp->nocb_next_follower; | 2577 | rdp = rdp->nocb_next_follower; |
2551 | rdp_last->nocb_next_follower = NULL; | 2578 | } else { |
2579 | rdp_last = rdp; | ||
2580 | rdp = rdp->nocb_next_follower; | ||
2581 | rdp_last->nocb_next_follower = NULL; | ||
2582 | } | ||
2552 | } while (rdp); | 2583 | } while (rdp); |
2553 | rdp_spawn->nocb_next_follower = rdp_old_leader; | 2584 | rdp_spawn->nocb_next_follower = rdp_old_leader; |
2554 | } | 2585 | } |
@@ -2642,6 +2673,12 @@ static bool init_nocb_callback_list(struct rcu_data *rdp) | |||
2642 | 2673 | ||
2643 | #else /* #ifdef CONFIG_RCU_NOCB_CPU */ | 2674 | #else /* #ifdef CONFIG_RCU_NOCB_CPU */ |
2644 | 2675 | ||
2676 | static bool rcu_nocb_cpu_needs_barrier(struct rcu_state *rsp, int cpu) | ||
2677 | { | ||
2678 | WARN_ON_ONCE(1); /* Should be dead code. */ | ||
2679 | return false; | ||
2680 | } | ||
2681 | |||
2645 | static void rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp) | 2682 | static void rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp) |
2646 | { | 2683 | { |
2647 | } | 2684 | } |
@@ -2728,9 +2765,10 @@ static int full_sysidle_state; /* Current system-idle state. */ | |||
2728 | * to detect full-system idle states, not RCU quiescent states and grace | 2765 | * to detect full-system idle states, not RCU quiescent states and grace |
2729 | * periods. The caller must have disabled interrupts. | 2766 | * periods. The caller must have disabled interrupts. |
2730 | */ | 2767 | */ |
2731 | static void rcu_sysidle_enter(struct rcu_dynticks *rdtp, int irq) | 2768 | static void rcu_sysidle_enter(int irq) |
2732 | { | 2769 | { |
2733 | unsigned long j; | 2770 | unsigned long j; |
2771 | struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks); | ||
2734 | 2772 | ||
2735 | /* If there are no nohz_full= CPUs, no need to track this. */ | 2773 | /* If there are no nohz_full= CPUs, no need to track this. */ |
2736 | if (!tick_nohz_full_enabled()) | 2774 | if (!tick_nohz_full_enabled()) |
@@ -2799,8 +2837,10 @@ void rcu_sysidle_force_exit(void) | |||
2799 | * usermode execution does -not- count as idle here! The caller must | 2837 | * usermode execution does -not- count as idle here! The caller must |
2800 | * have disabled interrupts. | 2838 | * have disabled interrupts. |
2801 | */ | 2839 | */ |
2802 | static void rcu_sysidle_exit(struct rcu_dynticks *rdtp, int irq) | 2840 | static void rcu_sysidle_exit(int irq) |
2803 | { | 2841 | { |
2842 | struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks); | ||
2843 | |||
2804 | /* If there are no nohz_full= CPUs, no need to track this. */ | 2844 | /* If there are no nohz_full= CPUs, no need to track this. */ |
2805 | if (!tick_nohz_full_enabled()) | 2845 | if (!tick_nohz_full_enabled()) |
2806 | return; | 2846 | return; |
@@ -3094,11 +3134,11 @@ static void rcu_sysidle_init_percpu_data(struct rcu_dynticks *rdtp) | |||
3094 | 3134 | ||
3095 | #else /* #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */ | 3135 | #else /* #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */ |
3096 | 3136 | ||
3097 | static void rcu_sysidle_enter(struct rcu_dynticks *rdtp, int irq) | 3137 | static void rcu_sysidle_enter(int irq) |
3098 | { | 3138 | { |
3099 | } | 3139 | } |
3100 | 3140 | ||
3101 | static void rcu_sysidle_exit(struct rcu_dynticks *rdtp, int irq) | 3141 | static void rcu_sysidle_exit(int irq) |
3102 | { | 3142 | { |
3103 | } | 3143 | } |
3104 | 3144 | ||
diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c index 3ef8ba58694e..e0d31a345ee6 100644 --- a/kernel/rcu/update.c +++ b/kernel/rcu/update.c | |||
@@ -306,7 +306,7 @@ struct debug_obj_descr rcuhead_debug_descr = { | |||
306 | EXPORT_SYMBOL_GPL(rcuhead_debug_descr); | 306 | EXPORT_SYMBOL_GPL(rcuhead_debug_descr); |
307 | #endif /* #ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD */ | 307 | #endif /* #ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD */ |
308 | 308 | ||
309 | #if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU) || defined(CONFIG_RCU_TRACE) | 309 | #if defined(CONFIG_TREE_RCU) || defined(CONFIG_PREEMPT_RCU) || defined(CONFIG_RCU_TRACE) |
310 | void do_trace_rcu_torture_read(const char *rcutorturename, struct rcu_head *rhp, | 310 | void do_trace_rcu_torture_read(const char *rcutorturename, struct rcu_head *rhp, |
311 | unsigned long secs, | 311 | unsigned long secs, |
312 | unsigned long c_old, unsigned long c) | 312 | unsigned long c_old, unsigned long c) |
@@ -531,7 +531,8 @@ static int __noreturn rcu_tasks_kthread(void *arg) | |||
531 | struct rcu_head *next; | 531 | struct rcu_head *next; |
532 | LIST_HEAD(rcu_tasks_holdouts); | 532 | LIST_HEAD(rcu_tasks_holdouts); |
533 | 533 | ||
534 | /* FIXME: Add housekeeping affinity. */ | 534 | /* Run on housekeeping CPUs by default. Sysadm can move if desired. */ |
535 | housekeeping_affine(current); | ||
535 | 536 | ||
536 | /* | 537 | /* |
537 | * Each pass through the following loop makes one check for | 538 | * Each pass through the following loop makes one check for |
@@ -690,3 +691,87 @@ static void rcu_spawn_tasks_kthread(void) | |||
690 | } | 691 | } |
691 | 692 | ||
692 | #endif /* #ifdef CONFIG_TASKS_RCU */ | 693 | #endif /* #ifdef CONFIG_TASKS_RCU */ |
694 | |||
695 | #ifdef CONFIG_PROVE_RCU | ||
696 | |||
697 | /* | ||
698 | * Early boot self test parameters, one for each flavor | ||
699 | */ | ||
700 | static bool rcu_self_test; | ||
701 | static bool rcu_self_test_bh; | ||
702 | static bool rcu_self_test_sched; | ||
703 | |||
704 | module_param(rcu_self_test, bool, 0444); | ||
705 | module_param(rcu_self_test_bh, bool, 0444); | ||
706 | module_param(rcu_self_test_sched, bool, 0444); | ||
707 | |||
708 | static int rcu_self_test_counter; | ||
709 | |||
710 | static void test_callback(struct rcu_head *r) | ||
711 | { | ||
712 | rcu_self_test_counter++; | ||
713 | pr_info("RCU test callback executed %d\n", rcu_self_test_counter); | ||
714 | } | ||
715 | |||
716 | static void early_boot_test_call_rcu(void) | ||
717 | { | ||
718 | static struct rcu_head head; | ||
719 | |||
720 | call_rcu(&head, test_callback); | ||
721 | } | ||
722 | |||
723 | static void early_boot_test_call_rcu_bh(void) | ||
724 | { | ||
725 | static struct rcu_head head; | ||
726 | |||
727 | call_rcu_bh(&head, test_callback); | ||
728 | } | ||
729 | |||
730 | static void early_boot_test_call_rcu_sched(void) | ||
731 | { | ||
732 | static struct rcu_head head; | ||
733 | |||
734 | call_rcu_sched(&head, test_callback); | ||
735 | } | ||
736 | |||
737 | void rcu_early_boot_tests(void) | ||
738 | { | ||
739 | pr_info("Running RCU self tests\n"); | ||
740 | |||
741 | if (rcu_self_test) | ||
742 | early_boot_test_call_rcu(); | ||
743 | if (rcu_self_test_bh) | ||
744 | early_boot_test_call_rcu_bh(); | ||
745 | if (rcu_self_test_sched) | ||
746 | early_boot_test_call_rcu_sched(); | ||
747 | } | ||
748 | |||
749 | static int rcu_verify_early_boot_tests(void) | ||
750 | { | ||
751 | int ret = 0; | ||
752 | int early_boot_test_counter = 0; | ||
753 | |||
754 | if (rcu_self_test) { | ||
755 | early_boot_test_counter++; | ||
756 | rcu_barrier(); | ||
757 | } | ||
758 | if (rcu_self_test_bh) { | ||
759 | early_boot_test_counter++; | ||
760 | rcu_barrier_bh(); | ||
761 | } | ||
762 | if (rcu_self_test_sched) { | ||
763 | early_boot_test_counter++; | ||
764 | rcu_barrier_sched(); | ||
765 | } | ||
766 | |||
767 | if (rcu_self_test_counter != early_boot_test_counter) { | ||
768 | WARN_ON(1); | ||
769 | ret = -1; | ||
770 | } | ||
771 | |||
772 | return ret; | ||
773 | } | ||
774 | late_initcall(rcu_verify_early_boot_tests); | ||
775 | #else | ||
776 | void rcu_early_boot_tests(void) {} | ||
777 | #endif /* CONFIG_PROVE_RCU */ | ||
diff --git a/kernel/res_counter.c b/kernel/res_counter.c deleted file mode 100644 index e791130f85a7..000000000000 --- a/kernel/res_counter.c +++ /dev/null | |||
@@ -1,211 +0,0 @@ | |||
1 | /* | ||
2 | * resource cgroups | ||
3 | * | ||
4 | * Copyright 2007 OpenVZ SWsoft Inc | ||
5 | * | ||
6 | * Author: Pavel Emelianov <xemul@openvz.org> | ||
7 | * | ||
8 | */ | ||
9 | |||
10 | #include <linux/types.h> | ||
11 | #include <linux/parser.h> | ||
12 | #include <linux/fs.h> | ||
13 | #include <linux/res_counter.h> | ||
14 | #include <linux/uaccess.h> | ||
15 | #include <linux/mm.h> | ||
16 | |||
17 | void res_counter_init(struct res_counter *counter, struct res_counter *parent) | ||
18 | { | ||
19 | spin_lock_init(&counter->lock); | ||
20 | counter->limit = RES_COUNTER_MAX; | ||
21 | counter->soft_limit = RES_COUNTER_MAX; | ||
22 | counter->parent = parent; | ||
23 | } | ||
24 | |||
25 | static u64 res_counter_uncharge_locked(struct res_counter *counter, | ||
26 | unsigned long val) | ||
27 | { | ||
28 | if (WARN_ON(counter->usage < val)) | ||
29 | val = counter->usage; | ||
30 | |||
31 | counter->usage -= val; | ||
32 | return counter->usage; | ||
33 | } | ||
34 | |||
35 | static int res_counter_charge_locked(struct res_counter *counter, | ||
36 | unsigned long val, bool force) | ||
37 | { | ||
38 | int ret = 0; | ||
39 | |||
40 | if (counter->usage + val > counter->limit) { | ||
41 | counter->failcnt++; | ||
42 | ret = -ENOMEM; | ||
43 | if (!force) | ||
44 | return ret; | ||
45 | } | ||
46 | |||
47 | counter->usage += val; | ||
48 | if (counter->usage > counter->max_usage) | ||
49 | counter->max_usage = counter->usage; | ||
50 | return ret; | ||
51 | } | ||
52 | |||
53 | static int __res_counter_charge(struct res_counter *counter, unsigned long val, | ||
54 | struct res_counter **limit_fail_at, bool force) | ||
55 | { | ||
56 | int ret, r; | ||
57 | unsigned long flags; | ||
58 | struct res_counter *c, *u; | ||
59 | |||
60 | r = ret = 0; | ||
61 | *limit_fail_at = NULL; | ||
62 | local_irq_save(flags); | ||
63 | for (c = counter; c != NULL; c = c->parent) { | ||
64 | spin_lock(&c->lock); | ||
65 | r = res_counter_charge_locked(c, val, force); | ||
66 | spin_unlock(&c->lock); | ||
67 | if (r < 0 && !ret) { | ||
68 | ret = r; | ||
69 | *limit_fail_at = c; | ||
70 | if (!force) | ||
71 | break; | ||
72 | } | ||
73 | } | ||
74 | |||
75 | if (ret < 0 && !force) { | ||
76 | for (u = counter; u != c; u = u->parent) { | ||
77 | spin_lock(&u->lock); | ||
78 | res_counter_uncharge_locked(u, val); | ||
79 | spin_unlock(&u->lock); | ||
80 | } | ||
81 | } | ||
82 | local_irq_restore(flags); | ||
83 | |||
84 | return ret; | ||
85 | } | ||
86 | |||
87 | int res_counter_charge(struct res_counter *counter, unsigned long val, | ||
88 | struct res_counter **limit_fail_at) | ||
89 | { | ||
90 | return __res_counter_charge(counter, val, limit_fail_at, false); | ||
91 | } | ||
92 | |||
93 | int res_counter_charge_nofail(struct res_counter *counter, unsigned long val, | ||
94 | struct res_counter **limit_fail_at) | ||
95 | { | ||
96 | return __res_counter_charge(counter, val, limit_fail_at, true); | ||
97 | } | ||
98 | |||
99 | u64 res_counter_uncharge_until(struct res_counter *counter, | ||
100 | struct res_counter *top, | ||
101 | unsigned long val) | ||
102 | { | ||
103 | unsigned long flags; | ||
104 | struct res_counter *c; | ||
105 | u64 ret = 0; | ||
106 | |||
107 | local_irq_save(flags); | ||
108 | for (c = counter; c != top; c = c->parent) { | ||
109 | u64 r; | ||
110 | spin_lock(&c->lock); | ||
111 | r = res_counter_uncharge_locked(c, val); | ||
112 | if (c == counter) | ||
113 | ret = r; | ||
114 | spin_unlock(&c->lock); | ||
115 | } | ||
116 | local_irq_restore(flags); | ||
117 | return ret; | ||
118 | } | ||
119 | |||
120 | u64 res_counter_uncharge(struct res_counter *counter, unsigned long val) | ||
121 | { | ||
122 | return res_counter_uncharge_until(counter, NULL, val); | ||
123 | } | ||
124 | |||
125 | static inline unsigned long long * | ||
126 | res_counter_member(struct res_counter *counter, int member) | ||
127 | { | ||
128 | switch (member) { | ||
129 | case RES_USAGE: | ||
130 | return &counter->usage; | ||
131 | case RES_MAX_USAGE: | ||
132 | return &counter->max_usage; | ||
133 | case RES_LIMIT: | ||
134 | return &counter->limit; | ||
135 | case RES_FAILCNT: | ||
136 | return &counter->failcnt; | ||
137 | case RES_SOFT_LIMIT: | ||
138 | return &counter->soft_limit; | ||
139 | }; | ||
140 | |||
141 | BUG(); | ||
142 | return NULL; | ||
143 | } | ||
144 | |||
145 | ssize_t res_counter_read(struct res_counter *counter, int member, | ||
146 | const char __user *userbuf, size_t nbytes, loff_t *pos, | ||
147 | int (*read_strategy)(unsigned long long val, char *st_buf)) | ||
148 | { | ||
149 | unsigned long long *val; | ||
150 | char buf[64], *s; | ||
151 | |||
152 | s = buf; | ||
153 | val = res_counter_member(counter, member); | ||
154 | if (read_strategy) | ||
155 | s += read_strategy(*val, s); | ||
156 | else | ||
157 | s += sprintf(s, "%llu\n", *val); | ||
158 | return simple_read_from_buffer((void __user *)userbuf, nbytes, | ||
159 | pos, buf, s - buf); | ||
160 | } | ||
161 | |||
162 | #if BITS_PER_LONG == 32 | ||
163 | u64 res_counter_read_u64(struct res_counter *counter, int member) | ||
164 | { | ||
165 | unsigned long flags; | ||
166 | u64 ret; | ||
167 | |||
168 | spin_lock_irqsave(&counter->lock, flags); | ||
169 | ret = *res_counter_member(counter, member); | ||
170 | spin_unlock_irqrestore(&counter->lock, flags); | ||
171 | |||
172 | return ret; | ||
173 | } | ||
174 | #else | ||
175 | u64 res_counter_read_u64(struct res_counter *counter, int member) | ||
176 | { | ||
177 | return *res_counter_member(counter, member); | ||
178 | } | ||
179 | #endif | ||
180 | |||
181 | int res_counter_memparse_write_strategy(const char *buf, | ||
182 | unsigned long long *resp) | ||
183 | { | ||
184 | char *end; | ||
185 | unsigned long long res; | ||
186 | |||
187 | /* return RES_COUNTER_MAX(unlimited) if "-1" is specified */ | ||
188 | if (*buf == '-') { | ||
189 | int rc = kstrtoull(buf + 1, 10, &res); | ||
190 | |||
191 | if (rc) | ||
192 | return rc; | ||
193 | if (res != 1) | ||
194 | return -EINVAL; | ||
195 | *resp = RES_COUNTER_MAX; | ||
196 | return 0; | ||
197 | } | ||
198 | |||
199 | res = memparse(buf, &end); | ||
200 | if (*end != '\0') | ||
201 | return -EINVAL; | ||
202 | |||
203 | if (PAGE_ALIGN(res) >= res) | ||
204 | res = PAGE_ALIGN(res); | ||
205 | else | ||
206 | res = RES_COUNTER_MAX; | ||
207 | |||
208 | *resp = res; | ||
209 | |||
210 | return 0; | ||
211 | } | ||
diff --git a/kernel/sched/completion.c b/kernel/sched/completion.c index a63f4dc27909..607f852b4d04 100644 --- a/kernel/sched/completion.c +++ b/kernel/sched/completion.c | |||
@@ -148,7 +148,7 @@ EXPORT_SYMBOL(wait_for_completion_timeout); | |||
148 | * | 148 | * |
149 | * This waits to be signaled for completion of a specific task. It is NOT | 149 | * This waits to be signaled for completion of a specific task. It is NOT |
150 | * interruptible and there is no timeout. The caller is accounted as waiting | 150 | * interruptible and there is no timeout. The caller is accounted as waiting |
151 | * for IO. | 151 | * for IO (which traditionally means blkio only). |
152 | */ | 152 | */ |
153 | void __sched wait_for_completion_io(struct completion *x) | 153 | void __sched wait_for_completion_io(struct completion *x) |
154 | { | 154 | { |
@@ -163,7 +163,8 @@ EXPORT_SYMBOL(wait_for_completion_io); | |||
163 | * | 163 | * |
164 | * This waits for either a completion of a specific task to be signaled or for a | 164 | * This waits for either a completion of a specific task to be signaled or for a |
165 | * specified timeout to expire. The timeout is in jiffies. It is not | 165 | * specified timeout to expire. The timeout is in jiffies. It is not |
166 | * interruptible. The caller is accounted as waiting for IO. | 166 | * interruptible. The caller is accounted as waiting for IO (which traditionally |
167 | * means blkio only). | ||
167 | * | 168 | * |
168 | * Return: 0 if timed out, and positive (at least 1, or number of jiffies left | 169 | * Return: 0 if timed out, and positive (at least 1, or number of jiffies left |
169 | * till timeout) if completed. | 170 | * till timeout) if completed. |
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 44999505e1bf..b5797b78add6 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
@@ -1008,6 +1008,9 @@ inline int task_curr(const struct task_struct *p) | |||
1008 | return cpu_curr(task_cpu(p)) == p; | 1008 | return cpu_curr(task_cpu(p)) == p; |
1009 | } | 1009 | } |
1010 | 1010 | ||
1011 | /* | ||
1012 | * Can drop rq->lock because from sched_class::switched_from() methods drop it. | ||
1013 | */ | ||
1011 | static inline void check_class_changed(struct rq *rq, struct task_struct *p, | 1014 | static inline void check_class_changed(struct rq *rq, struct task_struct *p, |
1012 | const struct sched_class *prev_class, | 1015 | const struct sched_class *prev_class, |
1013 | int oldprio) | 1016 | int oldprio) |
@@ -1015,6 +1018,7 @@ static inline void check_class_changed(struct rq *rq, struct task_struct *p, | |||
1015 | if (prev_class != p->sched_class) { | 1018 | if (prev_class != p->sched_class) { |
1016 | if (prev_class->switched_from) | 1019 | if (prev_class->switched_from) |
1017 | prev_class->switched_from(rq, p); | 1020 | prev_class->switched_from(rq, p); |
1021 | /* Possble rq->lock 'hole'. */ | ||
1018 | p->sched_class->switched_to(rq, p); | 1022 | p->sched_class->switched_to(rq, p); |
1019 | } else if (oldprio != p->prio || dl_task(p)) | 1023 | } else if (oldprio != p->prio || dl_task(p)) |
1020 | p->sched_class->prio_changed(rq, p, oldprio); | 1024 | p->sched_class->prio_changed(rq, p, oldprio); |
@@ -1054,7 +1058,7 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu) | |||
1054 | * ttwu() will sort out the placement. | 1058 | * ttwu() will sort out the placement. |
1055 | */ | 1059 | */ |
1056 | WARN_ON_ONCE(p->state != TASK_RUNNING && p->state != TASK_WAKING && | 1060 | WARN_ON_ONCE(p->state != TASK_RUNNING && p->state != TASK_WAKING && |
1057 | !(task_preempt_count(p) & PREEMPT_ACTIVE)); | 1061 | !p->on_rq); |
1058 | 1062 | ||
1059 | #ifdef CONFIG_LOCKDEP | 1063 | #ifdef CONFIG_LOCKDEP |
1060 | /* | 1064 | /* |
@@ -1407,7 +1411,8 @@ out: | |||
1407 | static inline | 1411 | static inline |
1408 | int select_task_rq(struct task_struct *p, int cpu, int sd_flags, int wake_flags) | 1412 | int select_task_rq(struct task_struct *p, int cpu, int sd_flags, int wake_flags) |
1409 | { | 1413 | { |
1410 | cpu = p->sched_class->select_task_rq(p, cpu, sd_flags, wake_flags); | 1414 | if (p->nr_cpus_allowed > 1) |
1415 | cpu = p->sched_class->select_task_rq(p, cpu, sd_flags, wake_flags); | ||
1411 | 1416 | ||
1412 | /* | 1417 | /* |
1413 | * In order not to call set_task_cpu() on a blocking task we need | 1418 | * In order not to call set_task_cpu() on a blocking task we need |
@@ -1623,8 +1628,10 @@ void wake_up_if_idle(int cpu) | |||
1623 | struct rq *rq = cpu_rq(cpu); | 1628 | struct rq *rq = cpu_rq(cpu); |
1624 | unsigned long flags; | 1629 | unsigned long flags; |
1625 | 1630 | ||
1626 | if (!is_idle_task(rq->curr)) | 1631 | rcu_read_lock(); |
1627 | return; | 1632 | |
1633 | if (!is_idle_task(rcu_dereference(rq->curr))) | ||
1634 | goto out; | ||
1628 | 1635 | ||
1629 | if (set_nr_if_polling(rq->idle)) { | 1636 | if (set_nr_if_polling(rq->idle)) { |
1630 | trace_sched_wake_idle_without_ipi(cpu); | 1637 | trace_sched_wake_idle_without_ipi(cpu); |
@@ -1635,6 +1642,9 @@ void wake_up_if_idle(int cpu) | |||
1635 | /* Else cpu is not in idle, do nothing here */ | 1642 | /* Else cpu is not in idle, do nothing here */ |
1636 | raw_spin_unlock_irqrestore(&rq->lock, flags); | 1643 | raw_spin_unlock_irqrestore(&rq->lock, flags); |
1637 | } | 1644 | } |
1645 | |||
1646 | out: | ||
1647 | rcu_read_unlock(); | ||
1638 | } | 1648 | } |
1639 | 1649 | ||
1640 | bool cpus_share_cache(int this_cpu, int that_cpu) | 1650 | bool cpus_share_cache(int this_cpu, int that_cpu) |
@@ -1853,12 +1863,10 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p) | |||
1853 | p->numa_scan_seq = p->mm ? p->mm->numa_scan_seq : 0; | 1863 | p->numa_scan_seq = p->mm ? p->mm->numa_scan_seq : 0; |
1854 | p->numa_scan_period = sysctl_numa_balancing_scan_delay; | 1864 | p->numa_scan_period = sysctl_numa_balancing_scan_delay; |
1855 | p->numa_work.next = &p->numa_work; | 1865 | p->numa_work.next = &p->numa_work; |
1856 | p->numa_faults_memory = NULL; | 1866 | p->numa_faults = NULL; |
1857 | p->numa_faults_buffer_memory = NULL; | ||
1858 | p->last_task_numa_placement = 0; | 1867 | p->last_task_numa_placement = 0; |
1859 | p->last_sum_exec_runtime = 0; | 1868 | p->last_sum_exec_runtime = 0; |
1860 | 1869 | ||
1861 | INIT_LIST_HEAD(&p->numa_entry); | ||
1862 | p->numa_group = NULL; | 1870 | p->numa_group = NULL; |
1863 | #endif /* CONFIG_NUMA_BALANCING */ | 1871 | #endif /* CONFIG_NUMA_BALANCING */ |
1864 | } | 1872 | } |
@@ -2034,25 +2042,6 @@ static inline int dl_bw_cpus(int i) | |||
2034 | } | 2042 | } |
2035 | #endif | 2043 | #endif |
2036 | 2044 | ||
2037 | static inline | ||
2038 | void __dl_clear(struct dl_bw *dl_b, u64 tsk_bw) | ||
2039 | { | ||
2040 | dl_b->total_bw -= tsk_bw; | ||
2041 | } | ||
2042 | |||
2043 | static inline | ||
2044 | void __dl_add(struct dl_bw *dl_b, u64 tsk_bw) | ||
2045 | { | ||
2046 | dl_b->total_bw += tsk_bw; | ||
2047 | } | ||
2048 | |||
2049 | static inline | ||
2050 | bool __dl_overflow(struct dl_bw *dl_b, int cpus, u64 old_bw, u64 new_bw) | ||
2051 | { | ||
2052 | return dl_b->bw != -1 && | ||
2053 | dl_b->bw * cpus < dl_b->total_bw - old_bw + new_bw; | ||
2054 | } | ||
2055 | |||
2056 | /* | 2045 | /* |
2057 | * We must be sure that accepting a new task (or allowing changing the | 2046 | * We must be sure that accepting a new task (or allowing changing the |
2058 | * parameters of an existing one) is consistent with the bandwidth | 2047 | * parameters of an existing one) is consistent with the bandwidth |
@@ -2220,7 +2209,6 @@ prepare_task_switch(struct rq *rq, struct task_struct *prev, | |||
2220 | 2209 | ||
2221 | /** | 2210 | /** |
2222 | * finish_task_switch - clean up after a task-switch | 2211 | * finish_task_switch - clean up after a task-switch |
2223 | * @rq: runqueue associated with task-switch | ||
2224 | * @prev: the thread we just switched away from. | 2212 | * @prev: the thread we just switched away from. |
2225 | * | 2213 | * |
2226 | * finish_task_switch must be called after the context switch, paired | 2214 | * finish_task_switch must be called after the context switch, paired |
@@ -2232,10 +2220,16 @@ prepare_task_switch(struct rq *rq, struct task_struct *prev, | |||
2232 | * so, we finish that here outside of the runqueue lock. (Doing it | 2220 | * so, we finish that here outside of the runqueue lock. (Doing it |
2233 | * with the lock held can cause deadlocks; see schedule() for | 2221 | * with the lock held can cause deadlocks; see schedule() for |
2234 | * details.) | 2222 | * details.) |
2223 | * | ||
2224 | * The context switch have flipped the stack from under us and restored the | ||
2225 | * local variables which were saved when this task called schedule() in the | ||
2226 | * past. prev == current is still correct but we need to recalculate this_rq | ||
2227 | * because prev may have moved to another CPU. | ||
2235 | */ | 2228 | */ |
2236 | static void finish_task_switch(struct rq *rq, struct task_struct *prev) | 2229 | static struct rq *finish_task_switch(struct task_struct *prev) |
2237 | __releases(rq->lock) | 2230 | __releases(rq->lock) |
2238 | { | 2231 | { |
2232 | struct rq *rq = this_rq(); | ||
2239 | struct mm_struct *mm = rq->prev_mm; | 2233 | struct mm_struct *mm = rq->prev_mm; |
2240 | long prev_state; | 2234 | long prev_state; |
2241 | 2235 | ||
@@ -2275,6 +2269,7 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev) | |||
2275 | } | 2269 | } |
2276 | 2270 | ||
2277 | tick_nohz_task_switch(current); | 2271 | tick_nohz_task_switch(current); |
2272 | return rq; | ||
2278 | } | 2273 | } |
2279 | 2274 | ||
2280 | #ifdef CONFIG_SMP | 2275 | #ifdef CONFIG_SMP |
@@ -2309,25 +2304,22 @@ static inline void post_schedule(struct rq *rq) | |||
2309 | asmlinkage __visible void schedule_tail(struct task_struct *prev) | 2304 | asmlinkage __visible void schedule_tail(struct task_struct *prev) |
2310 | __releases(rq->lock) | 2305 | __releases(rq->lock) |
2311 | { | 2306 | { |
2312 | struct rq *rq = this_rq(); | 2307 | struct rq *rq; |
2313 | |||
2314 | finish_task_switch(rq, prev); | ||
2315 | 2308 | ||
2316 | /* | 2309 | /* finish_task_switch() drops rq->lock and enables preemtion */ |
2317 | * FIXME: do we need to worry about rq being invalidated by the | 2310 | preempt_disable(); |
2318 | * task_switch? | 2311 | rq = finish_task_switch(prev); |
2319 | */ | ||
2320 | post_schedule(rq); | 2312 | post_schedule(rq); |
2313 | preempt_enable(); | ||
2321 | 2314 | ||
2322 | if (current->set_child_tid) | 2315 | if (current->set_child_tid) |
2323 | put_user(task_pid_vnr(current), current->set_child_tid); | 2316 | put_user(task_pid_vnr(current), current->set_child_tid); |
2324 | } | 2317 | } |
2325 | 2318 | ||
2326 | /* | 2319 | /* |
2327 | * context_switch - switch to the new MM and the new | 2320 | * context_switch - switch to the new MM and the new thread's register state. |
2328 | * thread's register state. | ||
2329 | */ | 2321 | */ |
2330 | static inline void | 2322 | static inline struct rq * |
2331 | context_switch(struct rq *rq, struct task_struct *prev, | 2323 | context_switch(struct rq *rq, struct task_struct *prev, |
2332 | struct task_struct *next) | 2324 | struct task_struct *next) |
2333 | { | 2325 | { |
@@ -2366,14 +2358,9 @@ context_switch(struct rq *rq, struct task_struct *prev, | |||
2366 | context_tracking_task_switch(prev, next); | 2358 | context_tracking_task_switch(prev, next); |
2367 | /* Here we just switch the register state and the stack. */ | 2359 | /* Here we just switch the register state and the stack. */ |
2368 | switch_to(prev, next, prev); | 2360 | switch_to(prev, next, prev); |
2369 | |||
2370 | barrier(); | 2361 | barrier(); |
2371 | /* | 2362 | |
2372 | * this_rq must be evaluated again because prev may have moved | 2363 | return finish_task_switch(prev); |
2373 | * CPUs since it called schedule(), thus the 'rq' on its stack | ||
2374 | * frame will be invalid. | ||
2375 | */ | ||
2376 | finish_task_switch(this_rq(), prev); | ||
2377 | } | 2364 | } |
2378 | 2365 | ||
2379 | /* | 2366 | /* |
@@ -2475,44 +2462,6 @@ EXPORT_PER_CPU_SYMBOL(kstat); | |||
2475 | EXPORT_PER_CPU_SYMBOL(kernel_cpustat); | 2462 | EXPORT_PER_CPU_SYMBOL(kernel_cpustat); |
2476 | 2463 | ||
2477 | /* | 2464 | /* |
2478 | * Return any ns on the sched_clock that have not yet been accounted in | ||
2479 | * @p in case that task is currently running. | ||
2480 | * | ||
2481 | * Called with task_rq_lock() held on @rq. | ||
2482 | */ | ||
2483 | static u64 do_task_delta_exec(struct task_struct *p, struct rq *rq) | ||
2484 | { | ||
2485 | u64 ns = 0; | ||
2486 | |||
2487 | /* | ||
2488 | * Must be ->curr _and_ ->on_rq. If dequeued, we would | ||
2489 | * project cycles that may never be accounted to this | ||
2490 | * thread, breaking clock_gettime(). | ||
2491 | */ | ||
2492 | if (task_current(rq, p) && task_on_rq_queued(p)) { | ||
2493 | update_rq_clock(rq); | ||
2494 | ns = rq_clock_task(rq) - p->se.exec_start; | ||
2495 | if ((s64)ns < 0) | ||
2496 | ns = 0; | ||
2497 | } | ||
2498 | |||
2499 | return ns; | ||
2500 | } | ||
2501 | |||
2502 | unsigned long long task_delta_exec(struct task_struct *p) | ||
2503 | { | ||
2504 | unsigned long flags; | ||
2505 | struct rq *rq; | ||
2506 | u64 ns = 0; | ||
2507 | |||
2508 | rq = task_rq_lock(p, &flags); | ||
2509 | ns = do_task_delta_exec(p, rq); | ||
2510 | task_rq_unlock(rq, p, &flags); | ||
2511 | |||
2512 | return ns; | ||
2513 | } | ||
2514 | |||
2515 | /* | ||
2516 | * Return accounted runtime for the task. | 2465 | * Return accounted runtime for the task. |
2517 | * In case the task is currently running, return the runtime plus current's | 2466 | * In case the task is currently running, return the runtime plus current's |
2518 | * pending runtime that have not been accounted yet. | 2467 | * pending runtime that have not been accounted yet. |
@@ -2521,7 +2470,7 @@ unsigned long long task_sched_runtime(struct task_struct *p) | |||
2521 | { | 2470 | { |
2522 | unsigned long flags; | 2471 | unsigned long flags; |
2523 | struct rq *rq; | 2472 | struct rq *rq; |
2524 | u64 ns = 0; | 2473 | u64 ns; |
2525 | 2474 | ||
2526 | #if defined(CONFIG_64BIT) && defined(CONFIG_SMP) | 2475 | #if defined(CONFIG_64BIT) && defined(CONFIG_SMP) |
2527 | /* | 2476 | /* |
@@ -2540,7 +2489,16 @@ unsigned long long task_sched_runtime(struct task_struct *p) | |||
2540 | #endif | 2489 | #endif |
2541 | 2490 | ||
2542 | rq = task_rq_lock(p, &flags); | 2491 | rq = task_rq_lock(p, &flags); |
2543 | ns = p->se.sum_exec_runtime + do_task_delta_exec(p, rq); | 2492 | /* |
2493 | * Must be ->curr _and_ ->on_rq. If dequeued, we would | ||
2494 | * project cycles that may never be accounted to this | ||
2495 | * thread, breaking clock_gettime(). | ||
2496 | */ | ||
2497 | if (task_current(rq, p) && task_on_rq_queued(p)) { | ||
2498 | update_rq_clock(rq); | ||
2499 | p->sched_class->update_curr(rq); | ||
2500 | } | ||
2501 | ns = p->se.sum_exec_runtime; | ||
2544 | task_rq_unlock(rq, p, &flags); | 2502 | task_rq_unlock(rq, p, &flags); |
2545 | 2503 | ||
2546 | return ns; | 2504 | return ns; |
@@ -2802,7 +2760,7 @@ need_resched: | |||
2802 | preempt_disable(); | 2760 | preempt_disable(); |
2803 | cpu = smp_processor_id(); | 2761 | cpu = smp_processor_id(); |
2804 | rq = cpu_rq(cpu); | 2762 | rq = cpu_rq(cpu); |
2805 | rcu_note_context_switch(cpu); | 2763 | rcu_note_context_switch(); |
2806 | prev = rq->curr; | 2764 | prev = rq->curr; |
2807 | 2765 | ||
2808 | schedule_debug(prev); | 2766 | schedule_debug(prev); |
@@ -2855,15 +2813,8 @@ need_resched: | |||
2855 | rq->curr = next; | 2813 | rq->curr = next; |
2856 | ++*switch_count; | 2814 | ++*switch_count; |
2857 | 2815 | ||
2858 | context_switch(rq, prev, next); /* unlocks the rq */ | 2816 | rq = context_switch(rq, prev, next); /* unlocks the rq */ |
2859 | /* | 2817 | cpu = cpu_of(rq); |
2860 | * The context switch have flipped the stack from under us | ||
2861 | * and restored the local variables which were saved when | ||
2862 | * this task called schedule() in the past. prev == current | ||
2863 | * is still correct, but it can be moved to another cpu/rq. | ||
2864 | */ | ||
2865 | cpu = smp_processor_id(); | ||
2866 | rq = cpu_rq(cpu); | ||
2867 | } else | 2818 | } else |
2868 | raw_spin_unlock_irq(&rq->lock); | 2819 | raw_spin_unlock_irq(&rq->lock); |
2869 | 2820 | ||
@@ -2903,10 +2854,14 @@ asmlinkage __visible void __sched schedule_user(void) | |||
2903 | * or we have been woken up remotely but the IPI has not yet arrived, | 2854 | * or we have been woken up remotely but the IPI has not yet arrived, |
2904 | * we haven't yet exited the RCU idle mode. Do it here manually until | 2855 | * we haven't yet exited the RCU idle mode. Do it here manually until |
2905 | * we find a better solution. | 2856 | * we find a better solution. |
2857 | * | ||
2858 | * NB: There are buggy callers of this function. Ideally we | ||
2859 | * should warn if prev_state != IN_USER, but that will trigger | ||
2860 | * too frequently to make sense yet. | ||
2906 | */ | 2861 | */ |
2907 | user_exit(); | 2862 | enum ctx_state prev_state = exception_enter(); |
2908 | schedule(); | 2863 | schedule(); |
2909 | user_enter(); | 2864 | exception_exit(prev_state); |
2910 | } | 2865 | } |
2911 | #endif | 2866 | #endif |
2912 | 2867 | ||
@@ -2951,6 +2906,47 @@ asmlinkage __visible void __sched notrace preempt_schedule(void) | |||
2951 | } | 2906 | } |
2952 | NOKPROBE_SYMBOL(preempt_schedule); | 2907 | NOKPROBE_SYMBOL(preempt_schedule); |
2953 | EXPORT_SYMBOL(preempt_schedule); | 2908 | EXPORT_SYMBOL(preempt_schedule); |
2909 | |||
2910 | #ifdef CONFIG_CONTEXT_TRACKING | ||
2911 | /** | ||
2912 | * preempt_schedule_context - preempt_schedule called by tracing | ||
2913 | * | ||
2914 | * The tracing infrastructure uses preempt_enable_notrace to prevent | ||
2915 | * recursion and tracing preempt enabling caused by the tracing | ||
2916 | * infrastructure itself. But as tracing can happen in areas coming | ||
2917 | * from userspace or just about to enter userspace, a preempt enable | ||
2918 | * can occur before user_exit() is called. This will cause the scheduler | ||
2919 | * to be called when the system is still in usermode. | ||
2920 | * | ||
2921 | * To prevent this, the preempt_enable_notrace will use this function | ||
2922 | * instead of preempt_schedule() to exit user context if needed before | ||
2923 | * calling the scheduler. | ||
2924 | */ | ||
2925 | asmlinkage __visible void __sched notrace preempt_schedule_context(void) | ||
2926 | { | ||
2927 | enum ctx_state prev_ctx; | ||
2928 | |||
2929 | if (likely(!preemptible())) | ||
2930 | return; | ||
2931 | |||
2932 | do { | ||
2933 | __preempt_count_add(PREEMPT_ACTIVE); | ||
2934 | /* | ||
2935 | * Needs preempt disabled in case user_exit() is traced | ||
2936 | * and the tracer calls preempt_enable_notrace() causing | ||
2937 | * an infinite recursion. | ||
2938 | */ | ||
2939 | prev_ctx = exception_enter(); | ||
2940 | __schedule(); | ||
2941 | exception_exit(prev_ctx); | ||
2942 | |||
2943 | __preempt_count_sub(PREEMPT_ACTIVE); | ||
2944 | barrier(); | ||
2945 | } while (need_resched()); | ||
2946 | } | ||
2947 | EXPORT_SYMBOL_GPL(preempt_schedule_context); | ||
2948 | #endif /* CONFIG_CONTEXT_TRACKING */ | ||
2949 | |||
2954 | #endif /* CONFIG_PREEMPT */ | 2950 | #endif /* CONFIG_PREEMPT */ |
2955 | 2951 | ||
2956 | /* | 2952 | /* |
@@ -4531,8 +4527,10 @@ void sched_show_task(struct task_struct *p) | |||
4531 | #ifdef CONFIG_DEBUG_STACK_USAGE | 4527 | #ifdef CONFIG_DEBUG_STACK_USAGE |
4532 | free = stack_not_used(p); | 4528 | free = stack_not_used(p); |
4533 | #endif | 4529 | #endif |
4530 | ppid = 0; | ||
4534 | rcu_read_lock(); | 4531 | rcu_read_lock(); |
4535 | ppid = task_pid_nr(rcu_dereference(p->real_parent)); | 4532 | if (pid_alive(p)) |
4533 | ppid = task_pid_nr(rcu_dereference(p->real_parent)); | ||
4536 | rcu_read_unlock(); | 4534 | rcu_read_unlock(); |
4537 | printk(KERN_CONT "%5lu %5d %6d 0x%08lx\n", free, | 4535 | printk(KERN_CONT "%5lu %5d %6d 0x%08lx\n", free, |
4538 | task_pid_nr(p), ppid, | 4536 | task_pid_nr(p), ppid, |
@@ -4637,6 +4635,81 @@ void init_idle(struct task_struct *idle, int cpu) | |||
4637 | #endif | 4635 | #endif |
4638 | } | 4636 | } |
4639 | 4637 | ||
4638 | int cpuset_cpumask_can_shrink(const struct cpumask *cur, | ||
4639 | const struct cpumask *trial) | ||
4640 | { | ||
4641 | int ret = 1, trial_cpus; | ||
4642 | struct dl_bw *cur_dl_b; | ||
4643 | unsigned long flags; | ||
4644 | |||
4645 | rcu_read_lock_sched(); | ||
4646 | cur_dl_b = dl_bw_of(cpumask_any(cur)); | ||
4647 | trial_cpus = cpumask_weight(trial); | ||
4648 | |||
4649 | raw_spin_lock_irqsave(&cur_dl_b->lock, flags); | ||
4650 | if (cur_dl_b->bw != -1 && | ||
4651 | cur_dl_b->bw * trial_cpus < cur_dl_b->total_bw) | ||
4652 | ret = 0; | ||
4653 | raw_spin_unlock_irqrestore(&cur_dl_b->lock, flags); | ||
4654 | rcu_read_unlock_sched(); | ||
4655 | |||
4656 | return ret; | ||
4657 | } | ||
4658 | |||
4659 | int task_can_attach(struct task_struct *p, | ||
4660 | const struct cpumask *cs_cpus_allowed) | ||
4661 | { | ||
4662 | int ret = 0; | ||
4663 | |||
4664 | /* | ||
4665 | * Kthreads which disallow setaffinity shouldn't be moved | ||
4666 | * to a new cpuset; we don't want to change their cpu | ||
4667 | * affinity and isolating such threads by their set of | ||
4668 | * allowed nodes is unnecessary. Thus, cpusets are not | ||
4669 | * applicable for such threads. This prevents checking for | ||
4670 | * success of set_cpus_allowed_ptr() on all attached tasks | ||
4671 | * before cpus_allowed may be changed. | ||
4672 | */ | ||
4673 | if (p->flags & PF_NO_SETAFFINITY) { | ||
4674 | ret = -EINVAL; | ||
4675 | goto out; | ||
4676 | } | ||
4677 | |||
4678 | #ifdef CONFIG_SMP | ||
4679 | if (dl_task(p) && !cpumask_intersects(task_rq(p)->rd->span, | ||
4680 | cs_cpus_allowed)) { | ||
4681 | unsigned int dest_cpu = cpumask_any_and(cpu_active_mask, | ||
4682 | cs_cpus_allowed); | ||
4683 | struct dl_bw *dl_b; | ||
4684 | bool overflow; | ||
4685 | int cpus; | ||
4686 | unsigned long flags; | ||
4687 | |||
4688 | rcu_read_lock_sched(); | ||
4689 | dl_b = dl_bw_of(dest_cpu); | ||
4690 | raw_spin_lock_irqsave(&dl_b->lock, flags); | ||
4691 | cpus = dl_bw_cpus(dest_cpu); | ||
4692 | overflow = __dl_overflow(dl_b, cpus, 0, p->dl.dl_bw); | ||
4693 | if (overflow) | ||
4694 | ret = -EBUSY; | ||
4695 | else { | ||
4696 | /* | ||
4697 | * We reserve space for this task in the destination | ||
4698 | * root_domain, as we can't fail after this point. | ||
4699 | * We will free resources in the source root_domain | ||
4700 | * later on (see set_cpus_allowed_dl()). | ||
4701 | */ | ||
4702 | __dl_add(dl_b, p->dl.dl_bw); | ||
4703 | } | ||
4704 | raw_spin_unlock_irqrestore(&dl_b->lock, flags); | ||
4705 | rcu_read_unlock_sched(); | ||
4706 | |||
4707 | } | ||
4708 | #endif | ||
4709 | out: | ||
4710 | return ret; | ||
4711 | } | ||
4712 | |||
4640 | #ifdef CONFIG_SMP | 4713 | #ifdef CONFIG_SMP |
4641 | /* | 4714 | /* |
4642 | * move_queued_task - move a queued task to new rq. | 4715 | * move_queued_task - move a queued task to new rq. |
@@ -6087,7 +6160,9 @@ static void claim_allocations(int cpu, struct sched_domain *sd) | |||
6087 | 6160 | ||
6088 | #ifdef CONFIG_NUMA | 6161 | #ifdef CONFIG_NUMA |
6089 | static int sched_domains_numa_levels; | 6162 | static int sched_domains_numa_levels; |
6163 | enum numa_topology_type sched_numa_topology_type; | ||
6090 | static int *sched_domains_numa_distance; | 6164 | static int *sched_domains_numa_distance; |
6165 | int sched_max_numa_distance; | ||
6091 | static struct cpumask ***sched_domains_numa_masks; | 6166 | static struct cpumask ***sched_domains_numa_masks; |
6092 | static int sched_domains_curr_level; | 6167 | static int sched_domains_curr_level; |
6093 | #endif | 6168 | #endif |
@@ -6259,7 +6334,7 @@ static void sched_numa_warn(const char *str) | |||
6259 | printk(KERN_WARNING "\n"); | 6334 | printk(KERN_WARNING "\n"); |
6260 | } | 6335 | } |
6261 | 6336 | ||
6262 | static bool find_numa_distance(int distance) | 6337 | bool find_numa_distance(int distance) |
6263 | { | 6338 | { |
6264 | int i; | 6339 | int i; |
6265 | 6340 | ||
@@ -6274,6 +6349,56 @@ static bool find_numa_distance(int distance) | |||
6274 | return false; | 6349 | return false; |
6275 | } | 6350 | } |
6276 | 6351 | ||
6352 | /* | ||
6353 | * A system can have three types of NUMA topology: | ||
6354 | * NUMA_DIRECT: all nodes are directly connected, or not a NUMA system | ||
6355 | * NUMA_GLUELESS_MESH: some nodes reachable through intermediary nodes | ||
6356 | * NUMA_BACKPLANE: nodes can reach other nodes through a backplane | ||
6357 | * | ||
6358 | * The difference between a glueless mesh topology and a backplane | ||
6359 | * topology lies in whether communication between not directly | ||
6360 | * connected nodes goes through intermediary nodes (where programs | ||
6361 | * could run), or through backplane controllers. This affects | ||
6362 | * placement of programs. | ||
6363 | * | ||
6364 | * The type of topology can be discerned with the following tests: | ||
6365 | * - If the maximum distance between any nodes is 1 hop, the system | ||
6366 | * is directly connected. | ||
6367 | * - If for two nodes A and B, located N > 1 hops away from each other, | ||
6368 | * there is an intermediary node C, which is < N hops away from both | ||
6369 | * nodes A and B, the system is a glueless mesh. | ||
6370 | */ | ||
6371 | static void init_numa_topology_type(void) | ||
6372 | { | ||
6373 | int a, b, c, n; | ||
6374 | |||
6375 | n = sched_max_numa_distance; | ||
6376 | |||
6377 | if (n <= 1) | ||
6378 | sched_numa_topology_type = NUMA_DIRECT; | ||
6379 | |||
6380 | for_each_online_node(a) { | ||
6381 | for_each_online_node(b) { | ||
6382 | /* Find two nodes furthest removed from each other. */ | ||
6383 | if (node_distance(a, b) < n) | ||
6384 | continue; | ||
6385 | |||
6386 | /* Is there an intermediary node between a and b? */ | ||
6387 | for_each_online_node(c) { | ||
6388 | if (node_distance(a, c) < n && | ||
6389 | node_distance(b, c) < n) { | ||
6390 | sched_numa_topology_type = | ||
6391 | NUMA_GLUELESS_MESH; | ||
6392 | return; | ||
6393 | } | ||
6394 | } | ||
6395 | |||
6396 | sched_numa_topology_type = NUMA_BACKPLANE; | ||
6397 | return; | ||
6398 | } | ||
6399 | } | ||
6400 | } | ||
6401 | |||
6277 | static void sched_init_numa(void) | 6402 | static void sched_init_numa(void) |
6278 | { | 6403 | { |
6279 | int next_distance, curr_distance = node_distance(0, 0); | 6404 | int next_distance, curr_distance = node_distance(0, 0); |
@@ -6327,6 +6452,10 @@ static void sched_init_numa(void) | |||
6327 | if (!sched_debug()) | 6452 | if (!sched_debug()) |
6328 | break; | 6453 | break; |
6329 | } | 6454 | } |
6455 | |||
6456 | if (!level) | ||
6457 | return; | ||
6458 | |||
6330 | /* | 6459 | /* |
6331 | * 'level' contains the number of unique distances, excluding the | 6460 | * 'level' contains the number of unique distances, excluding the |
6332 | * identity distance node_distance(i,i). | 6461 | * identity distance node_distance(i,i). |
@@ -6406,6 +6535,9 @@ static void sched_init_numa(void) | |||
6406 | sched_domain_topology = tl; | 6535 | sched_domain_topology = tl; |
6407 | 6536 | ||
6408 | sched_domains_numa_levels = level; | 6537 | sched_domains_numa_levels = level; |
6538 | sched_max_numa_distance = sched_domains_numa_distance[level - 1]; | ||
6539 | |||
6540 | init_numa_topology_type(); | ||
6409 | } | 6541 | } |
6410 | 6542 | ||
6411 | static void sched_domains_numa_masks_set(int cpu) | 6543 | static void sched_domains_numa_masks_set(int cpu) |
@@ -7158,6 +7290,25 @@ static inline int preempt_count_equals(int preempt_offset) | |||
7158 | 7290 | ||
7159 | void __might_sleep(const char *file, int line, int preempt_offset) | 7291 | void __might_sleep(const char *file, int line, int preempt_offset) |
7160 | { | 7292 | { |
7293 | /* | ||
7294 | * Blocking primitives will set (and therefore destroy) current->state, | ||
7295 | * since we will exit with TASK_RUNNING make sure we enter with it, | ||
7296 | * otherwise we will destroy state. | ||
7297 | */ | ||
7298 | if (WARN_ONCE(current->state != TASK_RUNNING, | ||
7299 | "do not call blocking ops when !TASK_RUNNING; " | ||
7300 | "state=%lx set at [<%p>] %pS\n", | ||
7301 | current->state, | ||
7302 | (void *)current->task_state_change, | ||
7303 | (void *)current->task_state_change)) | ||
7304 | __set_current_state(TASK_RUNNING); | ||
7305 | |||
7306 | ___might_sleep(file, line, preempt_offset); | ||
7307 | } | ||
7308 | EXPORT_SYMBOL(__might_sleep); | ||
7309 | |||
7310 | void ___might_sleep(const char *file, int line, int preempt_offset) | ||
7311 | { | ||
7161 | static unsigned long prev_jiffy; /* ratelimiting */ | 7312 | static unsigned long prev_jiffy; /* ratelimiting */ |
7162 | 7313 | ||
7163 | rcu_sleep_check(); /* WARN_ON_ONCE() by default, no rate limit reqd. */ | 7314 | rcu_sleep_check(); /* WARN_ON_ONCE() by default, no rate limit reqd. */ |
@@ -7189,7 +7340,7 @@ void __might_sleep(const char *file, int line, int preempt_offset) | |||
7189 | #endif | 7340 | #endif |
7190 | dump_stack(); | 7341 | dump_stack(); |
7191 | } | 7342 | } |
7192 | EXPORT_SYMBOL(__might_sleep); | 7343 | EXPORT_SYMBOL(___might_sleep); |
7193 | #endif | 7344 | #endif |
7194 | 7345 | ||
7195 | #ifdef CONFIG_MAGIC_SYSRQ | 7346 | #ifdef CONFIG_MAGIC_SYSRQ |
@@ -7403,8 +7554,12 @@ void sched_move_task(struct task_struct *tsk) | |||
7403 | if (unlikely(running)) | 7554 | if (unlikely(running)) |
7404 | put_prev_task(rq, tsk); | 7555 | put_prev_task(rq, tsk); |
7405 | 7556 | ||
7406 | tg = container_of(task_css_check(tsk, cpu_cgrp_id, | 7557 | /* |
7407 | lockdep_is_held(&tsk->sighand->siglock)), | 7558 | * All callers are synchronized by task_rq_lock(); we do not use RCU |
7559 | * which is pointless here. Thus, we pass "true" to task_css_check() | ||
7560 | * to prevent lockdep warnings. | ||
7561 | */ | ||
7562 | tg = container_of(task_css_check(tsk, cpu_cgrp_id, true), | ||
7408 | struct task_group, css); | 7563 | struct task_group, css); |
7409 | tg = autogroup_task_group(tsk, tg); | 7564 | tg = autogroup_task_group(tsk, tg); |
7410 | tsk->sched_task_group = tg; | 7565 | tsk->sched_task_group = tg; |
@@ -7833,6 +7988,11 @@ static void cpu_cgroup_css_offline(struct cgroup_subsys_state *css) | |||
7833 | sched_offline_group(tg); | 7988 | sched_offline_group(tg); |
7834 | } | 7989 | } |
7835 | 7990 | ||
7991 | static void cpu_cgroup_fork(struct task_struct *task) | ||
7992 | { | ||
7993 | sched_move_task(task); | ||
7994 | } | ||
7995 | |||
7836 | static int cpu_cgroup_can_attach(struct cgroup_subsys_state *css, | 7996 | static int cpu_cgroup_can_attach(struct cgroup_subsys_state *css, |
7837 | struct cgroup_taskset *tset) | 7997 | struct cgroup_taskset *tset) |
7838 | { | 7998 | { |
@@ -8205,6 +8365,7 @@ struct cgroup_subsys cpu_cgrp_subsys = { | |||
8205 | .css_free = cpu_cgroup_css_free, | 8365 | .css_free = cpu_cgroup_css_free, |
8206 | .css_online = cpu_cgroup_css_online, | 8366 | .css_online = cpu_cgroup_css_online, |
8207 | .css_offline = cpu_cgroup_css_offline, | 8367 | .css_offline = cpu_cgroup_css_offline, |
8368 | .fork = cpu_cgroup_fork, | ||
8208 | .can_attach = cpu_cgroup_can_attach, | 8369 | .can_attach = cpu_cgroup_can_attach, |
8209 | .attach = cpu_cgroup_attach, | 8370 | .attach = cpu_cgroup_attach, |
8210 | .exit = cpu_cgroup_exit, | 8371 | .exit = cpu_cgroup_exit, |
diff --git a/kernel/sched/cpudeadline.h b/kernel/sched/cpudeadline.h index 538c9796ad4a..020039bd1326 100644 --- a/kernel/sched/cpudeadline.h +++ b/kernel/sched/cpudeadline.h | |||
@@ -25,9 +25,6 @@ int cpudl_find(struct cpudl *cp, struct task_struct *p, | |||
25 | void cpudl_set(struct cpudl *cp, int cpu, u64 dl, int is_valid); | 25 | void cpudl_set(struct cpudl *cp, int cpu, u64 dl, int is_valid); |
26 | int cpudl_init(struct cpudl *cp); | 26 | int cpudl_init(struct cpudl *cp); |
27 | void cpudl_cleanup(struct cpudl *cp); | 27 | void cpudl_cleanup(struct cpudl *cp); |
28 | #else | ||
29 | #define cpudl_set(cp, cpu, dl) do { } while (0) | ||
30 | #define cpudl_init() do { } while (0) | ||
31 | #endif /* CONFIG_SMP */ | 28 | #endif /* CONFIG_SMP */ |
32 | 29 | ||
33 | #endif /* _LINUX_CPUDL_H */ | 30 | #endif /* _LINUX_CPUDL_H */ |
diff --git a/kernel/sched/cpupri.h b/kernel/sched/cpupri.h index 6b033347fdfd..63cbb9ca0496 100644 --- a/kernel/sched/cpupri.h +++ b/kernel/sched/cpupri.h | |||
@@ -26,9 +26,6 @@ int cpupri_find(struct cpupri *cp, | |||
26 | void cpupri_set(struct cpupri *cp, int cpu, int pri); | 26 | void cpupri_set(struct cpupri *cp, int cpu, int pri); |
27 | int cpupri_init(struct cpupri *cp); | 27 | int cpupri_init(struct cpupri *cp); |
28 | void cpupri_cleanup(struct cpupri *cp); | 28 | void cpupri_cleanup(struct cpupri *cp); |
29 | #else | ||
30 | #define cpupri_set(cp, cpu, pri) do { } while (0) | ||
31 | #define cpupri_init() do { } while (0) | ||
32 | #endif | 29 | #endif |
33 | 30 | ||
34 | #endif /* _LINUX_CPUPRI_H */ | 31 | #endif /* _LINUX_CPUPRI_H */ |
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index 256e577faf1b..e5db8c6feebd 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c | |||
@@ -518,12 +518,20 @@ again: | |||
518 | } | 518 | } |
519 | 519 | ||
520 | /* | 520 | /* |
521 | * We need to take care of a possible races here. In fact, the | 521 | * We need to take care of several possible races here: |
522 | * task might have changed its scheduling policy to something | 522 | * |
523 | * different from SCHED_DEADLINE or changed its reservation | 523 | * - the task might have changed its scheduling policy |
524 | * parameters (through sched_setattr()). | 524 | * to something different than SCHED_DEADLINE |
525 | * - the task might have changed its reservation parameters | ||
526 | * (through sched_setattr()) | ||
527 | * - the task might have been boosted by someone else and | ||
528 | * might be in the boosting/deboosting path | ||
529 | * | ||
530 | * In all this cases we bail out, as the task is already | ||
531 | * in the runqueue or is going to be enqueued back anyway. | ||
525 | */ | 532 | */ |
526 | if (!dl_task(p) || dl_se->dl_new) | 533 | if (!dl_task(p) || dl_se->dl_new || |
534 | dl_se->dl_boosted || !dl_se->dl_throttled) | ||
527 | goto unlock; | 535 | goto unlock; |
528 | 536 | ||
529 | sched_clock_tick(); | 537 | sched_clock_tick(); |
@@ -532,7 +540,7 @@ again: | |||
532 | dl_se->dl_yielded = 0; | 540 | dl_se->dl_yielded = 0; |
533 | if (task_on_rq_queued(p)) { | 541 | if (task_on_rq_queued(p)) { |
534 | enqueue_task_dl(rq, p, ENQUEUE_REPLENISH); | 542 | enqueue_task_dl(rq, p, ENQUEUE_REPLENISH); |
535 | if (task_has_dl_policy(rq->curr)) | 543 | if (dl_task(rq->curr)) |
536 | check_preempt_curr_dl(rq, p, 0); | 544 | check_preempt_curr_dl(rq, p, 0); |
537 | else | 545 | else |
538 | resched_curr(rq); | 546 | resched_curr(rq); |
@@ -555,11 +563,6 @@ void init_dl_task_timer(struct sched_dl_entity *dl_se) | |||
555 | { | 563 | { |
556 | struct hrtimer *timer = &dl_se->dl_timer; | 564 | struct hrtimer *timer = &dl_se->dl_timer; |
557 | 565 | ||
558 | if (hrtimer_active(timer)) { | ||
559 | hrtimer_try_to_cancel(timer); | ||
560 | return; | ||
561 | } | ||
562 | |||
563 | hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | 566 | hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); |
564 | timer->function = dl_task_timer; | 567 | timer->function = dl_task_timer; |
565 | } | 568 | } |
@@ -625,7 +628,7 @@ static void update_curr_dl(struct rq *rq) | |||
625 | 628 | ||
626 | sched_rt_avg_update(rq, delta_exec); | 629 | sched_rt_avg_update(rq, delta_exec); |
627 | 630 | ||
628 | dl_se->runtime -= delta_exec; | 631 | dl_se->runtime -= dl_se->dl_yielded ? 0 : delta_exec; |
629 | if (dl_runtime_exceeded(rq, dl_se)) { | 632 | if (dl_runtime_exceeded(rq, dl_se)) { |
630 | __dequeue_task_dl(rq, curr, 0); | 633 | __dequeue_task_dl(rq, curr, 0); |
631 | if (likely(start_dl_timer(dl_se, curr->dl.dl_boosted))) | 634 | if (likely(start_dl_timer(dl_se, curr->dl.dl_boosted))) |
@@ -847,8 +850,19 @@ static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags) | |||
847 | * smaller than our one... OTW we keep our runtime and | 850 | * smaller than our one... OTW we keep our runtime and |
848 | * deadline. | 851 | * deadline. |
849 | */ | 852 | */ |
850 | if (pi_task && p->dl.dl_boosted && dl_prio(pi_task->normal_prio)) | 853 | if (pi_task && p->dl.dl_boosted && dl_prio(pi_task->normal_prio)) { |
851 | pi_se = &pi_task->dl; | 854 | pi_se = &pi_task->dl; |
855 | } else if (!dl_prio(p->normal_prio)) { | ||
856 | /* | ||
857 | * Special case in which we have a !SCHED_DEADLINE task | ||
858 | * that is going to be deboosted, but exceedes its | ||
859 | * runtime while doing so. No point in replenishing | ||
860 | * it, as it's going to return back to its original | ||
861 | * scheduling class after this. | ||
862 | */ | ||
863 | BUG_ON(!p->dl.dl_boosted || flags != ENQUEUE_REPLENISH); | ||
864 | return; | ||
865 | } | ||
852 | 866 | ||
853 | /* | 867 | /* |
854 | * If p is throttled, we do nothing. In fact, if it exhausted | 868 | * If p is throttled, we do nothing. In fact, if it exhausted |
@@ -914,7 +928,7 @@ select_task_rq_dl(struct task_struct *p, int cpu, int sd_flag, int flags) | |||
914 | struct task_struct *curr; | 928 | struct task_struct *curr; |
915 | struct rq *rq; | 929 | struct rq *rq; |
916 | 930 | ||
917 | if (sd_flag != SD_BALANCE_WAKE && sd_flag != SD_BALANCE_FORK) | 931 | if (sd_flag != SD_BALANCE_WAKE) |
918 | goto out; | 932 | goto out; |
919 | 933 | ||
920 | rq = cpu_rq(cpu); | 934 | rq = cpu_rq(cpu); |
@@ -999,6 +1013,10 @@ static void start_hrtick_dl(struct rq *rq, struct task_struct *p) | |||
999 | { | 1013 | { |
1000 | hrtick_start(rq, p->dl.runtime); | 1014 | hrtick_start(rq, p->dl.runtime); |
1001 | } | 1015 | } |
1016 | #else /* !CONFIG_SCHED_HRTICK */ | ||
1017 | static void start_hrtick_dl(struct rq *rq, struct task_struct *p) | ||
1018 | { | ||
1019 | } | ||
1002 | #endif | 1020 | #endif |
1003 | 1021 | ||
1004 | static struct sched_dl_entity *pick_next_dl_entity(struct rq *rq, | 1022 | static struct sched_dl_entity *pick_next_dl_entity(struct rq *rq, |
@@ -1052,10 +1070,8 @@ struct task_struct *pick_next_task_dl(struct rq *rq, struct task_struct *prev) | |||
1052 | /* Running task will never be pushed. */ | 1070 | /* Running task will never be pushed. */ |
1053 | dequeue_pushable_dl_task(rq, p); | 1071 | dequeue_pushable_dl_task(rq, p); |
1054 | 1072 | ||
1055 | #ifdef CONFIG_SCHED_HRTICK | ||
1056 | if (hrtick_enabled(rq)) | 1073 | if (hrtick_enabled(rq)) |
1057 | start_hrtick_dl(rq, p); | 1074 | start_hrtick_dl(rq, p); |
1058 | #endif | ||
1059 | 1075 | ||
1060 | set_post_schedule(rq); | 1076 | set_post_schedule(rq); |
1061 | 1077 | ||
@@ -1074,10 +1090,8 @@ static void task_tick_dl(struct rq *rq, struct task_struct *p, int queued) | |||
1074 | { | 1090 | { |
1075 | update_curr_dl(rq); | 1091 | update_curr_dl(rq); |
1076 | 1092 | ||
1077 | #ifdef CONFIG_SCHED_HRTICK | ||
1078 | if (hrtick_enabled(rq) && queued && p->dl.runtime > 0) | 1093 | if (hrtick_enabled(rq) && queued && p->dl.runtime > 0) |
1079 | start_hrtick_dl(rq, p); | 1094 | start_hrtick_dl(rq, p); |
1080 | #endif | ||
1081 | } | 1095 | } |
1082 | 1096 | ||
1083 | static void task_fork_dl(struct task_struct *p) | 1097 | static void task_fork_dl(struct task_struct *p) |
@@ -1314,6 +1328,7 @@ static int push_dl_task(struct rq *rq) | |||
1314 | { | 1328 | { |
1315 | struct task_struct *next_task; | 1329 | struct task_struct *next_task; |
1316 | struct rq *later_rq; | 1330 | struct rq *later_rq; |
1331 | int ret = 0; | ||
1317 | 1332 | ||
1318 | if (!rq->dl.overloaded) | 1333 | if (!rq->dl.overloaded) |
1319 | return 0; | 1334 | return 0; |
@@ -1359,7 +1374,6 @@ retry: | |||
1359 | * The task is still there. We don't try | 1374 | * The task is still there. We don't try |
1360 | * again, some other cpu will pull it when ready. | 1375 | * again, some other cpu will pull it when ready. |
1361 | */ | 1376 | */ |
1362 | dequeue_pushable_dl_task(rq, next_task); | ||
1363 | goto out; | 1377 | goto out; |
1364 | } | 1378 | } |
1365 | 1379 | ||
@@ -1375,6 +1389,7 @@ retry: | |||
1375 | deactivate_task(rq, next_task, 0); | 1389 | deactivate_task(rq, next_task, 0); |
1376 | set_task_cpu(next_task, later_rq->cpu); | 1390 | set_task_cpu(next_task, later_rq->cpu); |
1377 | activate_task(later_rq, next_task, 0); | 1391 | activate_task(later_rq, next_task, 0); |
1392 | ret = 1; | ||
1378 | 1393 | ||
1379 | resched_curr(later_rq); | 1394 | resched_curr(later_rq); |
1380 | 1395 | ||
@@ -1383,7 +1398,7 @@ retry: | |||
1383 | out: | 1398 | out: |
1384 | put_task_struct(next_task); | 1399 | put_task_struct(next_task); |
1385 | 1400 | ||
1386 | return 1; | 1401 | return ret; |
1387 | } | 1402 | } |
1388 | 1403 | ||
1389 | static void push_dl_tasks(struct rq *rq) | 1404 | static void push_dl_tasks(struct rq *rq) |
@@ -1489,7 +1504,7 @@ static void task_woken_dl(struct rq *rq, struct task_struct *p) | |||
1489 | p->nr_cpus_allowed > 1 && | 1504 | p->nr_cpus_allowed > 1 && |
1490 | dl_task(rq->curr) && | 1505 | dl_task(rq->curr) && |
1491 | (rq->curr->nr_cpus_allowed < 2 || | 1506 | (rq->curr->nr_cpus_allowed < 2 || |
1492 | dl_entity_preempt(&rq->curr->dl, &p->dl))) { | 1507 | !dl_entity_preempt(&p->dl, &rq->curr->dl))) { |
1493 | push_dl_tasks(rq); | 1508 | push_dl_tasks(rq); |
1494 | } | 1509 | } |
1495 | } | 1510 | } |
@@ -1498,10 +1513,33 @@ static void set_cpus_allowed_dl(struct task_struct *p, | |||
1498 | const struct cpumask *new_mask) | 1513 | const struct cpumask *new_mask) |
1499 | { | 1514 | { |
1500 | struct rq *rq; | 1515 | struct rq *rq; |
1516 | struct root_domain *src_rd; | ||
1501 | int weight; | 1517 | int weight; |
1502 | 1518 | ||
1503 | BUG_ON(!dl_task(p)); | 1519 | BUG_ON(!dl_task(p)); |
1504 | 1520 | ||
1521 | rq = task_rq(p); | ||
1522 | src_rd = rq->rd; | ||
1523 | /* | ||
1524 | * Migrating a SCHED_DEADLINE task between exclusive | ||
1525 | * cpusets (different root_domains) entails a bandwidth | ||
1526 | * update. We already made space for us in the destination | ||
1527 | * domain (see cpuset_can_attach()). | ||
1528 | */ | ||
1529 | if (!cpumask_intersects(src_rd->span, new_mask)) { | ||
1530 | struct dl_bw *src_dl_b; | ||
1531 | |||
1532 | src_dl_b = dl_bw_of(cpu_of(rq)); | ||
1533 | /* | ||
1534 | * We now free resources of the root_domain we are migrating | ||
1535 | * off. In the worst case, sched_setattr() may temporary fail | ||
1536 | * until we complete the update. | ||
1537 | */ | ||
1538 | raw_spin_lock(&src_dl_b->lock); | ||
1539 | __dl_clear(src_dl_b, p->dl.dl_bw); | ||
1540 | raw_spin_unlock(&src_dl_b->lock); | ||
1541 | } | ||
1542 | |||
1505 | /* | 1543 | /* |
1506 | * Update only if the task is actually running (i.e., | 1544 | * Update only if the task is actually running (i.e., |
1507 | * it is on the rq AND it is not throttled). | 1545 | * it is on the rq AND it is not throttled). |
@@ -1518,8 +1556,6 @@ static void set_cpus_allowed_dl(struct task_struct *p, | |||
1518 | if ((p->nr_cpus_allowed > 1) == (weight > 1)) | 1556 | if ((p->nr_cpus_allowed > 1) == (weight > 1)) |
1519 | return; | 1557 | return; |
1520 | 1558 | ||
1521 | rq = task_rq(p); | ||
1522 | |||
1523 | /* | 1559 | /* |
1524 | * The process used to be able to migrate OR it can now migrate | 1560 | * The process used to be able to migrate OR it can now migrate |
1525 | */ | 1561 | */ |
@@ -1567,22 +1603,48 @@ void init_sched_dl_class(void) | |||
1567 | 1603 | ||
1568 | #endif /* CONFIG_SMP */ | 1604 | #endif /* CONFIG_SMP */ |
1569 | 1605 | ||
1606 | /* | ||
1607 | * Ensure p's dl_timer is cancelled. May drop rq->lock for a while. | ||
1608 | */ | ||
1609 | static void cancel_dl_timer(struct rq *rq, struct task_struct *p) | ||
1610 | { | ||
1611 | struct hrtimer *dl_timer = &p->dl.dl_timer; | ||
1612 | |||
1613 | /* Nobody will change task's class if pi_lock is held */ | ||
1614 | lockdep_assert_held(&p->pi_lock); | ||
1615 | |||
1616 | if (hrtimer_active(dl_timer)) { | ||
1617 | int ret = hrtimer_try_to_cancel(dl_timer); | ||
1618 | |||
1619 | if (unlikely(ret == -1)) { | ||
1620 | /* | ||
1621 | * Note, p may migrate OR new deadline tasks | ||
1622 | * may appear in rq when we are unlocking it. | ||
1623 | * A caller of us must be fine with that. | ||
1624 | */ | ||
1625 | raw_spin_unlock(&rq->lock); | ||
1626 | hrtimer_cancel(dl_timer); | ||
1627 | raw_spin_lock(&rq->lock); | ||
1628 | } | ||
1629 | } | ||
1630 | } | ||
1631 | |||
1570 | static void switched_from_dl(struct rq *rq, struct task_struct *p) | 1632 | static void switched_from_dl(struct rq *rq, struct task_struct *p) |
1571 | { | 1633 | { |
1572 | if (hrtimer_active(&p->dl.dl_timer) && !dl_policy(p->policy)) | 1634 | cancel_dl_timer(rq, p); |
1573 | hrtimer_try_to_cancel(&p->dl.dl_timer); | ||
1574 | 1635 | ||
1575 | __dl_clear_params(p); | 1636 | __dl_clear_params(p); |
1576 | 1637 | ||
1577 | #ifdef CONFIG_SMP | ||
1578 | /* | 1638 | /* |
1579 | * Since this might be the only -deadline task on the rq, | 1639 | * Since this might be the only -deadline task on the rq, |
1580 | * this is the right place to try to pull some other one | 1640 | * this is the right place to try to pull some other one |
1581 | * from an overloaded cpu, if any. | 1641 | * from an overloaded cpu, if any. |
1582 | */ | 1642 | */ |
1583 | if (!rq->dl.dl_nr_running) | 1643 | if (!task_on_rq_queued(p) || rq->dl.dl_nr_running) |
1584 | pull_dl_task(rq); | 1644 | return; |
1585 | #endif | 1645 | |
1646 | if (pull_dl_task(rq)) | ||
1647 | resched_curr(rq); | ||
1586 | } | 1648 | } |
1587 | 1649 | ||
1588 | /* | 1650 | /* |
@@ -1603,12 +1665,17 @@ static void switched_to_dl(struct rq *rq, struct task_struct *p) | |||
1603 | 1665 | ||
1604 | if (task_on_rq_queued(p) && rq->curr != p) { | 1666 | if (task_on_rq_queued(p) && rq->curr != p) { |
1605 | #ifdef CONFIG_SMP | 1667 | #ifdef CONFIG_SMP |
1606 | if (rq->dl.overloaded && push_dl_task(rq) && rq != task_rq(p)) | 1668 | if (p->nr_cpus_allowed > 1 && rq->dl.overloaded && |
1669 | push_dl_task(rq) && rq != task_rq(p)) | ||
1607 | /* Only reschedule if pushing failed */ | 1670 | /* Only reschedule if pushing failed */ |
1608 | check_resched = 0; | 1671 | check_resched = 0; |
1609 | #endif /* CONFIG_SMP */ | 1672 | #endif /* CONFIG_SMP */ |
1610 | if (check_resched && task_has_dl_policy(rq->curr)) | 1673 | if (check_resched) { |
1611 | check_preempt_curr_dl(rq, p, 0); | 1674 | if (dl_task(rq->curr)) |
1675 | check_preempt_curr_dl(rq, p, 0); | ||
1676 | else | ||
1677 | resched_curr(rq); | ||
1678 | } | ||
1612 | } | 1679 | } |
1613 | } | 1680 | } |
1614 | 1681 | ||
@@ -1678,4 +1745,15 @@ const struct sched_class dl_sched_class = { | |||
1678 | .prio_changed = prio_changed_dl, | 1745 | .prio_changed = prio_changed_dl, |
1679 | .switched_from = switched_from_dl, | 1746 | .switched_from = switched_from_dl, |
1680 | .switched_to = switched_to_dl, | 1747 | .switched_to = switched_to_dl, |
1748 | |||
1749 | .update_curr = update_curr_dl, | ||
1681 | }; | 1750 | }; |
1751 | |||
1752 | #ifdef CONFIG_SCHED_DEBUG | ||
1753 | extern void print_dl_rq(struct seq_file *m, int cpu, struct dl_rq *dl_rq); | ||
1754 | |||
1755 | void print_dl_stats(struct seq_file *m, int cpu) | ||
1756 | { | ||
1757 | print_dl_rq(m, cpu, &cpu_rq(cpu)->dl); | ||
1758 | } | ||
1759 | #endif /* CONFIG_SCHED_DEBUG */ | ||
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c index ce33780d8f20..92cc52001e74 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c | |||
@@ -261,6 +261,12 @@ void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq) | |||
261 | #undef P | 261 | #undef P |
262 | } | 262 | } |
263 | 263 | ||
264 | void print_dl_rq(struct seq_file *m, int cpu, struct dl_rq *dl_rq) | ||
265 | { | ||
266 | SEQ_printf(m, "\ndl_rq[%d]:\n", cpu); | ||
267 | SEQ_printf(m, " .%-30s: %ld\n", "dl_nr_running", dl_rq->dl_nr_running); | ||
268 | } | ||
269 | |||
264 | extern __read_mostly int sched_clock_running; | 270 | extern __read_mostly int sched_clock_running; |
265 | 271 | ||
266 | static void print_cpu(struct seq_file *m, int cpu) | 272 | static void print_cpu(struct seq_file *m, int cpu) |
@@ -329,6 +335,7 @@ do { \ | |||
329 | spin_lock_irqsave(&sched_debug_lock, flags); | 335 | spin_lock_irqsave(&sched_debug_lock, flags); |
330 | print_cfs_stats(m, cpu); | 336 | print_cfs_stats(m, cpu); |
331 | print_rt_stats(m, cpu); | 337 | print_rt_stats(m, cpu); |
338 | print_dl_stats(m, cpu); | ||
332 | 339 | ||
333 | print_rq(m, rq, cpu); | 340 | print_rq(m, rq, cpu); |
334 | spin_unlock_irqrestore(&sched_debug_lock, flags); | 341 | spin_unlock_irqrestore(&sched_debug_lock, flags); |
@@ -528,8 +535,8 @@ static void sched_show_numa(struct task_struct *p, struct seq_file *m) | |||
528 | unsigned long nr_faults = -1; | 535 | unsigned long nr_faults = -1; |
529 | int cpu_current, home_node; | 536 | int cpu_current, home_node; |
530 | 537 | ||
531 | if (p->numa_faults_memory) | 538 | if (p->numa_faults) |
532 | nr_faults = p->numa_faults_memory[2*node + i]; | 539 | nr_faults = p->numa_faults[2*node + i]; |
533 | 540 | ||
534 | cpu_current = !i ? (task_node(p) == node) : | 541 | cpu_current = !i ? (task_node(p) == node) : |
535 | (pol && node_isset(node, pol->v.nodes)); | 542 | (pol && node_isset(node, pol->v.nodes)); |
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 0b069bf3e708..df2cdf77f899 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c | |||
@@ -726,6 +726,11 @@ static void update_curr(struct cfs_rq *cfs_rq) | |||
726 | account_cfs_rq_runtime(cfs_rq, delta_exec); | 726 | account_cfs_rq_runtime(cfs_rq, delta_exec); |
727 | } | 727 | } |
728 | 728 | ||
729 | static void update_curr_fair(struct rq *rq) | ||
730 | { | ||
731 | update_curr(cfs_rq_of(&rq->curr->se)); | ||
732 | } | ||
733 | |||
729 | static inline void | 734 | static inline void |
730 | update_stats_wait_start(struct cfs_rq *cfs_rq, struct sched_entity *se) | 735 | update_stats_wait_start(struct cfs_rq *cfs_rq, struct sched_entity *se) |
731 | { | 736 | { |
@@ -828,11 +833,12 @@ static unsigned int task_nr_scan_windows(struct task_struct *p) | |||
828 | 833 | ||
829 | static unsigned int task_scan_min(struct task_struct *p) | 834 | static unsigned int task_scan_min(struct task_struct *p) |
830 | { | 835 | { |
836 | unsigned int scan_size = ACCESS_ONCE(sysctl_numa_balancing_scan_size); | ||
831 | unsigned int scan, floor; | 837 | unsigned int scan, floor; |
832 | unsigned int windows = 1; | 838 | unsigned int windows = 1; |
833 | 839 | ||
834 | if (sysctl_numa_balancing_scan_size < MAX_SCAN_WINDOW) | 840 | if (scan_size < MAX_SCAN_WINDOW) |
835 | windows = MAX_SCAN_WINDOW / sysctl_numa_balancing_scan_size; | 841 | windows = MAX_SCAN_WINDOW / scan_size; |
836 | floor = 1000 / windows; | 842 | floor = 1000 / windows; |
837 | 843 | ||
838 | scan = sysctl_numa_balancing_scan_period_min / task_nr_scan_windows(p); | 844 | scan = sysctl_numa_balancing_scan_period_min / task_nr_scan_windows(p); |
@@ -867,7 +873,6 @@ struct numa_group { | |||
867 | spinlock_t lock; /* nr_tasks, tasks */ | 873 | spinlock_t lock; /* nr_tasks, tasks */ |
868 | int nr_tasks; | 874 | int nr_tasks; |
869 | pid_t gid; | 875 | pid_t gid; |
870 | struct list_head task_list; | ||
871 | 876 | ||
872 | struct rcu_head rcu; | 877 | struct rcu_head rcu; |
873 | nodemask_t active_nodes; | 878 | nodemask_t active_nodes; |
@@ -895,18 +900,24 @@ pid_t task_numa_group_id(struct task_struct *p) | |||
895 | return p->numa_group ? p->numa_group->gid : 0; | 900 | return p->numa_group ? p->numa_group->gid : 0; |
896 | } | 901 | } |
897 | 902 | ||
898 | static inline int task_faults_idx(int nid, int priv) | 903 | /* |
904 | * The averaged statistics, shared & private, memory & cpu, | ||
905 | * occupy the first half of the array. The second half of the | ||
906 | * array is for current counters, which are averaged into the | ||
907 | * first set by task_numa_placement. | ||
908 | */ | ||
909 | static inline int task_faults_idx(enum numa_faults_stats s, int nid, int priv) | ||
899 | { | 910 | { |
900 | return NR_NUMA_HINT_FAULT_TYPES * nid + priv; | 911 | return NR_NUMA_HINT_FAULT_TYPES * (s * nr_node_ids + nid) + priv; |
901 | } | 912 | } |
902 | 913 | ||
903 | static inline unsigned long task_faults(struct task_struct *p, int nid) | 914 | static inline unsigned long task_faults(struct task_struct *p, int nid) |
904 | { | 915 | { |
905 | if (!p->numa_faults_memory) | 916 | if (!p->numa_faults) |
906 | return 0; | 917 | return 0; |
907 | 918 | ||
908 | return p->numa_faults_memory[task_faults_idx(nid, 0)] + | 919 | return p->numa_faults[task_faults_idx(NUMA_MEM, nid, 0)] + |
909 | p->numa_faults_memory[task_faults_idx(nid, 1)]; | 920 | p->numa_faults[task_faults_idx(NUMA_MEM, nid, 1)]; |
910 | } | 921 | } |
911 | 922 | ||
912 | static inline unsigned long group_faults(struct task_struct *p, int nid) | 923 | static inline unsigned long group_faults(struct task_struct *p, int nid) |
@@ -914,14 +925,79 @@ static inline unsigned long group_faults(struct task_struct *p, int nid) | |||
914 | if (!p->numa_group) | 925 | if (!p->numa_group) |
915 | return 0; | 926 | return 0; |
916 | 927 | ||
917 | return p->numa_group->faults[task_faults_idx(nid, 0)] + | 928 | return p->numa_group->faults[task_faults_idx(NUMA_MEM, nid, 0)] + |
918 | p->numa_group->faults[task_faults_idx(nid, 1)]; | 929 | p->numa_group->faults[task_faults_idx(NUMA_MEM, nid, 1)]; |
919 | } | 930 | } |
920 | 931 | ||
921 | static inline unsigned long group_faults_cpu(struct numa_group *group, int nid) | 932 | static inline unsigned long group_faults_cpu(struct numa_group *group, int nid) |
922 | { | 933 | { |
923 | return group->faults_cpu[task_faults_idx(nid, 0)] + | 934 | return group->faults_cpu[task_faults_idx(NUMA_MEM, nid, 0)] + |
924 | group->faults_cpu[task_faults_idx(nid, 1)]; | 935 | group->faults_cpu[task_faults_idx(NUMA_MEM, nid, 1)]; |
936 | } | ||
937 | |||
938 | /* Handle placement on systems where not all nodes are directly connected. */ | ||
939 | static unsigned long score_nearby_nodes(struct task_struct *p, int nid, | ||
940 | int maxdist, bool task) | ||
941 | { | ||
942 | unsigned long score = 0; | ||
943 | int node; | ||
944 | |||
945 | /* | ||
946 | * All nodes are directly connected, and the same distance | ||
947 | * from each other. No need for fancy placement algorithms. | ||
948 | */ | ||
949 | if (sched_numa_topology_type == NUMA_DIRECT) | ||
950 | return 0; | ||
951 | |||
952 | /* | ||
953 | * This code is called for each node, introducing N^2 complexity, | ||
954 | * which should be ok given the number of nodes rarely exceeds 8. | ||
955 | */ | ||
956 | for_each_online_node(node) { | ||
957 | unsigned long faults; | ||
958 | int dist = node_distance(nid, node); | ||
959 | |||
960 | /* | ||
961 | * The furthest away nodes in the system are not interesting | ||
962 | * for placement; nid was already counted. | ||
963 | */ | ||
964 | if (dist == sched_max_numa_distance || node == nid) | ||
965 | continue; | ||
966 | |||
967 | /* | ||
968 | * On systems with a backplane NUMA topology, compare groups | ||
969 | * of nodes, and move tasks towards the group with the most | ||
970 | * memory accesses. When comparing two nodes at distance | ||
971 | * "hoplimit", only nodes closer by than "hoplimit" are part | ||
972 | * of each group. Skip other nodes. | ||
973 | */ | ||
974 | if (sched_numa_topology_type == NUMA_BACKPLANE && | ||
975 | dist > maxdist) | ||
976 | continue; | ||
977 | |||
978 | /* Add up the faults from nearby nodes. */ | ||
979 | if (task) | ||
980 | faults = task_faults(p, node); | ||
981 | else | ||
982 | faults = group_faults(p, node); | ||
983 | |||
984 | /* | ||
985 | * On systems with a glueless mesh NUMA topology, there are | ||
986 | * no fixed "groups of nodes". Instead, nodes that are not | ||
987 | * directly connected bounce traffic through intermediate | ||
988 | * nodes; a numa_group can occupy any set of nodes. | ||
989 | * The further away a node is, the less the faults count. | ||
990 | * This seems to result in good task placement. | ||
991 | */ | ||
992 | if (sched_numa_topology_type == NUMA_GLUELESS_MESH) { | ||
993 | faults *= (sched_max_numa_distance - dist); | ||
994 | faults /= (sched_max_numa_distance - LOCAL_DISTANCE); | ||
995 | } | ||
996 | |||
997 | score += faults; | ||
998 | } | ||
999 | |||
1000 | return score; | ||
925 | } | 1001 | } |
926 | 1002 | ||
927 | /* | 1003 | /* |
@@ -930,11 +1006,12 @@ static inline unsigned long group_faults_cpu(struct numa_group *group, int nid) | |||
930 | * larger multiplier, in order to group tasks together that are almost | 1006 | * larger multiplier, in order to group tasks together that are almost |
931 | * evenly spread out between numa nodes. | 1007 | * evenly spread out between numa nodes. |
932 | */ | 1008 | */ |
933 | static inline unsigned long task_weight(struct task_struct *p, int nid) | 1009 | static inline unsigned long task_weight(struct task_struct *p, int nid, |
1010 | int dist) | ||
934 | { | 1011 | { |
935 | unsigned long total_faults; | 1012 | unsigned long faults, total_faults; |
936 | 1013 | ||
937 | if (!p->numa_faults_memory) | 1014 | if (!p->numa_faults) |
938 | return 0; | 1015 | return 0; |
939 | 1016 | ||
940 | total_faults = p->total_numa_faults; | 1017 | total_faults = p->total_numa_faults; |
@@ -942,15 +1019,29 @@ static inline unsigned long task_weight(struct task_struct *p, int nid) | |||
942 | if (!total_faults) | 1019 | if (!total_faults) |
943 | return 0; | 1020 | return 0; |
944 | 1021 | ||
945 | return 1000 * task_faults(p, nid) / total_faults; | 1022 | faults = task_faults(p, nid); |
1023 | faults += score_nearby_nodes(p, nid, dist, true); | ||
1024 | |||
1025 | return 1000 * faults / total_faults; | ||
946 | } | 1026 | } |
947 | 1027 | ||
948 | static inline unsigned long group_weight(struct task_struct *p, int nid) | 1028 | static inline unsigned long group_weight(struct task_struct *p, int nid, |
1029 | int dist) | ||
949 | { | 1030 | { |
950 | if (!p->numa_group || !p->numa_group->total_faults) | 1031 | unsigned long faults, total_faults; |
1032 | |||
1033 | if (!p->numa_group) | ||
1034 | return 0; | ||
1035 | |||
1036 | total_faults = p->numa_group->total_faults; | ||
1037 | |||
1038 | if (!total_faults) | ||
951 | return 0; | 1039 | return 0; |
952 | 1040 | ||
953 | return 1000 * group_faults(p, nid) / p->numa_group->total_faults; | 1041 | faults = group_faults(p, nid); |
1042 | faults += score_nearby_nodes(p, nid, dist, false); | ||
1043 | |||
1044 | return 1000 * faults / total_faults; | ||
954 | } | 1045 | } |
955 | 1046 | ||
956 | bool should_numa_migrate_memory(struct task_struct *p, struct page * page, | 1047 | bool should_numa_migrate_memory(struct task_struct *p, struct page * page, |
@@ -1083,6 +1174,7 @@ struct task_numa_env { | |||
1083 | struct numa_stats src_stats, dst_stats; | 1174 | struct numa_stats src_stats, dst_stats; |
1084 | 1175 | ||
1085 | int imbalance_pct; | 1176 | int imbalance_pct; |
1177 | int dist; | ||
1086 | 1178 | ||
1087 | struct task_struct *best_task; | 1179 | struct task_struct *best_task; |
1088 | long best_imp; | 1180 | long best_imp; |
@@ -1162,11 +1254,29 @@ static void task_numa_compare(struct task_numa_env *env, | |||
1162 | long load; | 1254 | long load; |
1163 | long imp = env->p->numa_group ? groupimp : taskimp; | 1255 | long imp = env->p->numa_group ? groupimp : taskimp; |
1164 | long moveimp = imp; | 1256 | long moveimp = imp; |
1257 | int dist = env->dist; | ||
1165 | 1258 | ||
1166 | rcu_read_lock(); | 1259 | rcu_read_lock(); |
1167 | cur = ACCESS_ONCE(dst_rq->curr); | 1260 | |
1168 | if (cur->pid == 0) /* idle */ | 1261 | raw_spin_lock_irq(&dst_rq->lock); |
1262 | cur = dst_rq->curr; | ||
1263 | /* | ||
1264 | * No need to move the exiting task, and this ensures that ->curr | ||
1265 | * wasn't reaped and thus get_task_struct() in task_numa_assign() | ||
1266 | * is safe under RCU read lock. | ||
1267 | * Note that rcu_read_lock() itself can't protect from the final | ||
1268 | * put_task_struct() after the last schedule(). | ||
1269 | */ | ||
1270 | if ((cur->flags & PF_EXITING) || is_idle_task(cur)) | ||
1169 | cur = NULL; | 1271 | cur = NULL; |
1272 | raw_spin_unlock_irq(&dst_rq->lock); | ||
1273 | |||
1274 | /* | ||
1275 | * Because we have preemption enabled we can get migrated around and | ||
1276 | * end try selecting ourselves (current == env->p) as a swap candidate. | ||
1277 | */ | ||
1278 | if (cur == env->p) | ||
1279 | goto unlock; | ||
1170 | 1280 | ||
1171 | /* | 1281 | /* |
1172 | * "imp" is the fault differential for the source task between the | 1282 | * "imp" is the fault differential for the source task between the |
@@ -1185,8 +1295,8 @@ static void task_numa_compare(struct task_numa_env *env, | |||
1185 | * in any group then look only at task weights. | 1295 | * in any group then look only at task weights. |
1186 | */ | 1296 | */ |
1187 | if (cur->numa_group == env->p->numa_group) { | 1297 | if (cur->numa_group == env->p->numa_group) { |
1188 | imp = taskimp + task_weight(cur, env->src_nid) - | 1298 | imp = taskimp + task_weight(cur, env->src_nid, dist) - |
1189 | task_weight(cur, env->dst_nid); | 1299 | task_weight(cur, env->dst_nid, dist); |
1190 | /* | 1300 | /* |
1191 | * Add some hysteresis to prevent swapping the | 1301 | * Add some hysteresis to prevent swapping the |
1192 | * tasks within a group over tiny differences. | 1302 | * tasks within a group over tiny differences. |
@@ -1200,11 +1310,11 @@ static void task_numa_compare(struct task_numa_env *env, | |||
1200 | * instead. | 1310 | * instead. |
1201 | */ | 1311 | */ |
1202 | if (cur->numa_group) | 1312 | if (cur->numa_group) |
1203 | imp += group_weight(cur, env->src_nid) - | 1313 | imp += group_weight(cur, env->src_nid, dist) - |
1204 | group_weight(cur, env->dst_nid); | 1314 | group_weight(cur, env->dst_nid, dist); |
1205 | else | 1315 | else |
1206 | imp += task_weight(cur, env->src_nid) - | 1316 | imp += task_weight(cur, env->src_nid, dist) - |
1207 | task_weight(cur, env->dst_nid); | 1317 | task_weight(cur, env->dst_nid, dist); |
1208 | } | 1318 | } |
1209 | } | 1319 | } |
1210 | 1320 | ||
@@ -1303,7 +1413,7 @@ static int task_numa_migrate(struct task_struct *p) | |||
1303 | }; | 1413 | }; |
1304 | struct sched_domain *sd; | 1414 | struct sched_domain *sd; |
1305 | unsigned long taskweight, groupweight; | 1415 | unsigned long taskweight, groupweight; |
1306 | int nid, ret; | 1416 | int nid, ret, dist; |
1307 | long taskimp, groupimp; | 1417 | long taskimp, groupimp; |
1308 | 1418 | ||
1309 | /* | 1419 | /* |
@@ -1331,29 +1441,45 @@ static int task_numa_migrate(struct task_struct *p) | |||
1331 | return -EINVAL; | 1441 | return -EINVAL; |
1332 | } | 1442 | } |
1333 | 1443 | ||
1334 | taskweight = task_weight(p, env.src_nid); | ||
1335 | groupweight = group_weight(p, env.src_nid); | ||
1336 | update_numa_stats(&env.src_stats, env.src_nid); | ||
1337 | env.dst_nid = p->numa_preferred_nid; | 1444 | env.dst_nid = p->numa_preferred_nid; |
1338 | taskimp = task_weight(p, env.dst_nid) - taskweight; | 1445 | dist = env.dist = node_distance(env.src_nid, env.dst_nid); |
1339 | groupimp = group_weight(p, env.dst_nid) - groupweight; | 1446 | taskweight = task_weight(p, env.src_nid, dist); |
1447 | groupweight = group_weight(p, env.src_nid, dist); | ||
1448 | update_numa_stats(&env.src_stats, env.src_nid); | ||
1449 | taskimp = task_weight(p, env.dst_nid, dist) - taskweight; | ||
1450 | groupimp = group_weight(p, env.dst_nid, dist) - groupweight; | ||
1340 | update_numa_stats(&env.dst_stats, env.dst_nid); | 1451 | update_numa_stats(&env.dst_stats, env.dst_nid); |
1341 | 1452 | ||
1342 | /* Try to find a spot on the preferred nid. */ | 1453 | /* Try to find a spot on the preferred nid. */ |
1343 | task_numa_find_cpu(&env, taskimp, groupimp); | 1454 | task_numa_find_cpu(&env, taskimp, groupimp); |
1344 | 1455 | ||
1345 | /* No space available on the preferred nid. Look elsewhere. */ | 1456 | /* |
1346 | if (env.best_cpu == -1) { | 1457 | * Look at other nodes in these cases: |
1458 | * - there is no space available on the preferred_nid | ||
1459 | * - the task is part of a numa_group that is interleaved across | ||
1460 | * multiple NUMA nodes; in order to better consolidate the group, | ||
1461 | * we need to check other locations. | ||
1462 | */ | ||
1463 | if (env.best_cpu == -1 || (p->numa_group && | ||
1464 | nodes_weight(p->numa_group->active_nodes) > 1)) { | ||
1347 | for_each_online_node(nid) { | 1465 | for_each_online_node(nid) { |
1348 | if (nid == env.src_nid || nid == p->numa_preferred_nid) | 1466 | if (nid == env.src_nid || nid == p->numa_preferred_nid) |
1349 | continue; | 1467 | continue; |
1350 | 1468 | ||
1469 | dist = node_distance(env.src_nid, env.dst_nid); | ||
1470 | if (sched_numa_topology_type == NUMA_BACKPLANE && | ||
1471 | dist != env.dist) { | ||
1472 | taskweight = task_weight(p, env.src_nid, dist); | ||
1473 | groupweight = group_weight(p, env.src_nid, dist); | ||
1474 | } | ||
1475 | |||
1351 | /* Only consider nodes where both task and groups benefit */ | 1476 | /* Only consider nodes where both task and groups benefit */ |
1352 | taskimp = task_weight(p, nid) - taskweight; | 1477 | taskimp = task_weight(p, nid, dist) - taskweight; |
1353 | groupimp = group_weight(p, nid) - groupweight; | 1478 | groupimp = group_weight(p, nid, dist) - groupweight; |
1354 | if (taskimp < 0 && groupimp < 0) | 1479 | if (taskimp < 0 && groupimp < 0) |
1355 | continue; | 1480 | continue; |
1356 | 1481 | ||
1482 | env.dist = dist; | ||
1357 | env.dst_nid = nid; | 1483 | env.dst_nid = nid; |
1358 | update_numa_stats(&env.dst_stats, env.dst_nid); | 1484 | update_numa_stats(&env.dst_stats, env.dst_nid); |
1359 | task_numa_find_cpu(&env, taskimp, groupimp); | 1485 | task_numa_find_cpu(&env, taskimp, groupimp); |
@@ -1408,7 +1534,7 @@ static void numa_migrate_preferred(struct task_struct *p) | |||
1408 | unsigned long interval = HZ; | 1534 | unsigned long interval = HZ; |
1409 | 1535 | ||
1410 | /* This task has no NUMA fault statistics yet */ | 1536 | /* This task has no NUMA fault statistics yet */ |
1411 | if (unlikely(p->numa_preferred_nid == -1 || !p->numa_faults_memory)) | 1537 | if (unlikely(p->numa_preferred_nid == -1 || !p->numa_faults)) |
1412 | return; | 1538 | return; |
1413 | 1539 | ||
1414 | /* Periodically retry migrating the task to the preferred node */ | 1540 | /* Periodically retry migrating the task to the preferred node */ |
@@ -1520,7 +1646,7 @@ static void update_task_scan_period(struct task_struct *p, | |||
1520 | * scanning faster if shared accesses dominate as it may | 1646 | * scanning faster if shared accesses dominate as it may |
1521 | * simply bounce migrations uselessly | 1647 | * simply bounce migrations uselessly |
1522 | */ | 1648 | */ |
1523 | ratio = DIV_ROUND_UP(private * NUMA_PERIOD_SLOTS, (private + shared)); | 1649 | ratio = DIV_ROUND_UP(private * NUMA_PERIOD_SLOTS, (private + shared + 1)); |
1524 | diff = (diff * ratio) / NUMA_PERIOD_SLOTS; | 1650 | diff = (diff * ratio) / NUMA_PERIOD_SLOTS; |
1525 | } | 1651 | } |
1526 | 1652 | ||
@@ -1557,6 +1683,92 @@ static u64 numa_get_avg_runtime(struct task_struct *p, u64 *period) | |||
1557 | return delta; | 1683 | return delta; |
1558 | } | 1684 | } |
1559 | 1685 | ||
1686 | /* | ||
1687 | * Determine the preferred nid for a task in a numa_group. This needs to | ||
1688 | * be done in a way that produces consistent results with group_weight, | ||
1689 | * otherwise workloads might not converge. | ||
1690 | */ | ||
1691 | static int preferred_group_nid(struct task_struct *p, int nid) | ||
1692 | { | ||
1693 | nodemask_t nodes; | ||
1694 | int dist; | ||
1695 | |||
1696 | /* Direct connections between all NUMA nodes. */ | ||
1697 | if (sched_numa_topology_type == NUMA_DIRECT) | ||
1698 | return nid; | ||
1699 | |||
1700 | /* | ||
1701 | * On a system with glueless mesh NUMA topology, group_weight | ||
1702 | * scores nodes according to the number of NUMA hinting faults on | ||
1703 | * both the node itself, and on nearby nodes. | ||
1704 | */ | ||
1705 | if (sched_numa_topology_type == NUMA_GLUELESS_MESH) { | ||
1706 | unsigned long score, max_score = 0; | ||
1707 | int node, max_node = nid; | ||
1708 | |||
1709 | dist = sched_max_numa_distance; | ||
1710 | |||
1711 | for_each_online_node(node) { | ||
1712 | score = group_weight(p, node, dist); | ||
1713 | if (score > max_score) { | ||
1714 | max_score = score; | ||
1715 | max_node = node; | ||
1716 | } | ||
1717 | } | ||
1718 | return max_node; | ||
1719 | } | ||
1720 | |||
1721 | /* | ||
1722 | * Finding the preferred nid in a system with NUMA backplane | ||
1723 | * interconnect topology is more involved. The goal is to locate | ||
1724 | * tasks from numa_groups near each other in the system, and | ||
1725 | * untangle workloads from different sides of the system. This requires | ||
1726 | * searching down the hierarchy of node groups, recursively searching | ||
1727 | * inside the highest scoring group of nodes. The nodemask tricks | ||
1728 | * keep the complexity of the search down. | ||
1729 | */ | ||
1730 | nodes = node_online_map; | ||
1731 | for (dist = sched_max_numa_distance; dist > LOCAL_DISTANCE; dist--) { | ||
1732 | unsigned long max_faults = 0; | ||
1733 | nodemask_t max_group; | ||
1734 | int a, b; | ||
1735 | |||
1736 | /* Are there nodes at this distance from each other? */ | ||
1737 | if (!find_numa_distance(dist)) | ||
1738 | continue; | ||
1739 | |||
1740 | for_each_node_mask(a, nodes) { | ||
1741 | unsigned long faults = 0; | ||
1742 | nodemask_t this_group; | ||
1743 | nodes_clear(this_group); | ||
1744 | |||
1745 | /* Sum group's NUMA faults; includes a==b case. */ | ||
1746 | for_each_node_mask(b, nodes) { | ||
1747 | if (node_distance(a, b) < dist) { | ||
1748 | faults += group_faults(p, b); | ||
1749 | node_set(b, this_group); | ||
1750 | node_clear(b, nodes); | ||
1751 | } | ||
1752 | } | ||
1753 | |||
1754 | /* Remember the top group. */ | ||
1755 | if (faults > max_faults) { | ||
1756 | max_faults = faults; | ||
1757 | max_group = this_group; | ||
1758 | /* | ||
1759 | * subtle: at the smallest distance there is | ||
1760 | * just one node left in each "group", the | ||
1761 | * winner is the preferred nid. | ||
1762 | */ | ||
1763 | nid = a; | ||
1764 | } | ||
1765 | } | ||
1766 | /* Next round, evaluate the nodes within max_group. */ | ||
1767 | nodes = max_group; | ||
1768 | } | ||
1769 | return nid; | ||
1770 | } | ||
1771 | |||
1560 | static void task_numa_placement(struct task_struct *p) | 1772 | static void task_numa_placement(struct task_struct *p) |
1561 | { | 1773 | { |
1562 | int seq, nid, max_nid = -1, max_group_nid = -1; | 1774 | int seq, nid, max_nid = -1, max_group_nid = -1; |
@@ -1584,18 +1796,23 @@ static void task_numa_placement(struct task_struct *p) | |||
1584 | 1796 | ||
1585 | /* Find the node with the highest number of faults */ | 1797 | /* Find the node with the highest number of faults */ |
1586 | for_each_online_node(nid) { | 1798 | for_each_online_node(nid) { |
1799 | /* Keep track of the offsets in numa_faults array */ | ||
1800 | int mem_idx, membuf_idx, cpu_idx, cpubuf_idx; | ||
1587 | unsigned long faults = 0, group_faults = 0; | 1801 | unsigned long faults = 0, group_faults = 0; |
1588 | int priv, i; | 1802 | int priv; |
1589 | 1803 | ||
1590 | for (priv = 0; priv < NR_NUMA_HINT_FAULT_TYPES; priv++) { | 1804 | for (priv = 0; priv < NR_NUMA_HINT_FAULT_TYPES; priv++) { |
1591 | long diff, f_diff, f_weight; | 1805 | long diff, f_diff, f_weight; |
1592 | 1806 | ||
1593 | i = task_faults_idx(nid, priv); | 1807 | mem_idx = task_faults_idx(NUMA_MEM, nid, priv); |
1808 | membuf_idx = task_faults_idx(NUMA_MEMBUF, nid, priv); | ||
1809 | cpu_idx = task_faults_idx(NUMA_CPU, nid, priv); | ||
1810 | cpubuf_idx = task_faults_idx(NUMA_CPUBUF, nid, priv); | ||
1594 | 1811 | ||
1595 | /* Decay existing window, copy faults since last scan */ | 1812 | /* Decay existing window, copy faults since last scan */ |
1596 | diff = p->numa_faults_buffer_memory[i] - p->numa_faults_memory[i] / 2; | 1813 | diff = p->numa_faults[membuf_idx] - p->numa_faults[mem_idx] / 2; |
1597 | fault_types[priv] += p->numa_faults_buffer_memory[i]; | 1814 | fault_types[priv] += p->numa_faults[membuf_idx]; |
1598 | p->numa_faults_buffer_memory[i] = 0; | 1815 | p->numa_faults[membuf_idx] = 0; |
1599 | 1816 | ||
1600 | /* | 1817 | /* |
1601 | * Normalize the faults_from, so all tasks in a group | 1818 | * Normalize the faults_from, so all tasks in a group |
@@ -1605,21 +1822,27 @@ static void task_numa_placement(struct task_struct *p) | |||
1605 | * faults are less important. | 1822 | * faults are less important. |
1606 | */ | 1823 | */ |
1607 | f_weight = div64_u64(runtime << 16, period + 1); | 1824 | f_weight = div64_u64(runtime << 16, period + 1); |
1608 | f_weight = (f_weight * p->numa_faults_buffer_cpu[i]) / | 1825 | f_weight = (f_weight * p->numa_faults[cpubuf_idx]) / |
1609 | (total_faults + 1); | 1826 | (total_faults + 1); |
1610 | f_diff = f_weight - p->numa_faults_cpu[i] / 2; | 1827 | f_diff = f_weight - p->numa_faults[cpu_idx] / 2; |
1611 | p->numa_faults_buffer_cpu[i] = 0; | 1828 | p->numa_faults[cpubuf_idx] = 0; |
1612 | 1829 | ||
1613 | p->numa_faults_memory[i] += diff; | 1830 | p->numa_faults[mem_idx] += diff; |
1614 | p->numa_faults_cpu[i] += f_diff; | 1831 | p->numa_faults[cpu_idx] += f_diff; |
1615 | faults += p->numa_faults_memory[i]; | 1832 | faults += p->numa_faults[mem_idx]; |
1616 | p->total_numa_faults += diff; | 1833 | p->total_numa_faults += diff; |
1617 | if (p->numa_group) { | 1834 | if (p->numa_group) { |
1618 | /* safe because we can only change our own group */ | 1835 | /* |
1619 | p->numa_group->faults[i] += diff; | 1836 | * safe because we can only change our own group |
1620 | p->numa_group->faults_cpu[i] += f_diff; | 1837 | * |
1838 | * mem_idx represents the offset for a given | ||
1839 | * nid and priv in a specific region because it | ||
1840 | * is at the beginning of the numa_faults array. | ||
1841 | */ | ||
1842 | p->numa_group->faults[mem_idx] += diff; | ||
1843 | p->numa_group->faults_cpu[mem_idx] += f_diff; | ||
1621 | p->numa_group->total_faults += diff; | 1844 | p->numa_group->total_faults += diff; |
1622 | group_faults += p->numa_group->faults[i]; | 1845 | group_faults += p->numa_group->faults[mem_idx]; |
1623 | } | 1846 | } |
1624 | } | 1847 | } |
1625 | 1848 | ||
@@ -1639,7 +1862,7 @@ static void task_numa_placement(struct task_struct *p) | |||
1639 | if (p->numa_group) { | 1862 | if (p->numa_group) { |
1640 | update_numa_active_node_mask(p->numa_group); | 1863 | update_numa_active_node_mask(p->numa_group); |
1641 | spin_unlock_irq(group_lock); | 1864 | spin_unlock_irq(group_lock); |
1642 | max_nid = max_group_nid; | 1865 | max_nid = preferred_group_nid(p, max_group_nid); |
1643 | } | 1866 | } |
1644 | 1867 | ||
1645 | if (max_faults) { | 1868 | if (max_faults) { |
@@ -1682,7 +1905,6 @@ static void task_numa_group(struct task_struct *p, int cpupid, int flags, | |||
1682 | 1905 | ||
1683 | atomic_set(&grp->refcount, 1); | 1906 | atomic_set(&grp->refcount, 1); |
1684 | spin_lock_init(&grp->lock); | 1907 | spin_lock_init(&grp->lock); |
1685 | INIT_LIST_HEAD(&grp->task_list); | ||
1686 | grp->gid = p->pid; | 1908 | grp->gid = p->pid; |
1687 | /* Second half of the array tracks nids where faults happen */ | 1909 | /* Second half of the array tracks nids where faults happen */ |
1688 | grp->faults_cpu = grp->faults + NR_NUMA_HINT_FAULT_TYPES * | 1910 | grp->faults_cpu = grp->faults + NR_NUMA_HINT_FAULT_TYPES * |
@@ -1691,11 +1913,10 @@ static void task_numa_group(struct task_struct *p, int cpupid, int flags, | |||
1691 | node_set(task_node(current), grp->active_nodes); | 1913 | node_set(task_node(current), grp->active_nodes); |
1692 | 1914 | ||
1693 | for (i = 0; i < NR_NUMA_HINT_FAULT_STATS * nr_node_ids; i++) | 1915 | for (i = 0; i < NR_NUMA_HINT_FAULT_STATS * nr_node_ids; i++) |
1694 | grp->faults[i] = p->numa_faults_memory[i]; | 1916 | grp->faults[i] = p->numa_faults[i]; |
1695 | 1917 | ||
1696 | grp->total_faults = p->total_numa_faults; | 1918 | grp->total_faults = p->total_numa_faults; |
1697 | 1919 | ||
1698 | list_add(&p->numa_entry, &grp->task_list); | ||
1699 | grp->nr_tasks++; | 1920 | grp->nr_tasks++; |
1700 | rcu_assign_pointer(p->numa_group, grp); | 1921 | rcu_assign_pointer(p->numa_group, grp); |
1701 | } | 1922 | } |
@@ -1750,13 +1971,12 @@ static void task_numa_group(struct task_struct *p, int cpupid, int flags, | |||
1750 | double_lock_irq(&my_grp->lock, &grp->lock); | 1971 | double_lock_irq(&my_grp->lock, &grp->lock); |
1751 | 1972 | ||
1752 | for (i = 0; i < NR_NUMA_HINT_FAULT_STATS * nr_node_ids; i++) { | 1973 | for (i = 0; i < NR_NUMA_HINT_FAULT_STATS * nr_node_ids; i++) { |
1753 | my_grp->faults[i] -= p->numa_faults_memory[i]; | 1974 | my_grp->faults[i] -= p->numa_faults[i]; |
1754 | grp->faults[i] += p->numa_faults_memory[i]; | 1975 | grp->faults[i] += p->numa_faults[i]; |
1755 | } | 1976 | } |
1756 | my_grp->total_faults -= p->total_numa_faults; | 1977 | my_grp->total_faults -= p->total_numa_faults; |
1757 | grp->total_faults += p->total_numa_faults; | 1978 | grp->total_faults += p->total_numa_faults; |
1758 | 1979 | ||
1759 | list_move(&p->numa_entry, &grp->task_list); | ||
1760 | my_grp->nr_tasks--; | 1980 | my_grp->nr_tasks--; |
1761 | grp->nr_tasks++; | 1981 | grp->nr_tasks++; |
1762 | 1982 | ||
@@ -1776,27 +1996,23 @@ no_join: | |||
1776 | void task_numa_free(struct task_struct *p) | 1996 | void task_numa_free(struct task_struct *p) |
1777 | { | 1997 | { |
1778 | struct numa_group *grp = p->numa_group; | 1998 | struct numa_group *grp = p->numa_group; |
1779 | void *numa_faults = p->numa_faults_memory; | 1999 | void *numa_faults = p->numa_faults; |
1780 | unsigned long flags; | 2000 | unsigned long flags; |
1781 | int i; | 2001 | int i; |
1782 | 2002 | ||
1783 | if (grp) { | 2003 | if (grp) { |
1784 | spin_lock_irqsave(&grp->lock, flags); | 2004 | spin_lock_irqsave(&grp->lock, flags); |
1785 | for (i = 0; i < NR_NUMA_HINT_FAULT_STATS * nr_node_ids; i++) | 2005 | for (i = 0; i < NR_NUMA_HINT_FAULT_STATS * nr_node_ids; i++) |
1786 | grp->faults[i] -= p->numa_faults_memory[i]; | 2006 | grp->faults[i] -= p->numa_faults[i]; |
1787 | grp->total_faults -= p->total_numa_faults; | 2007 | grp->total_faults -= p->total_numa_faults; |
1788 | 2008 | ||
1789 | list_del(&p->numa_entry); | ||
1790 | grp->nr_tasks--; | 2009 | grp->nr_tasks--; |
1791 | spin_unlock_irqrestore(&grp->lock, flags); | 2010 | spin_unlock_irqrestore(&grp->lock, flags); |
1792 | RCU_INIT_POINTER(p->numa_group, NULL); | 2011 | RCU_INIT_POINTER(p->numa_group, NULL); |
1793 | put_numa_group(grp); | 2012 | put_numa_group(grp); |
1794 | } | 2013 | } |
1795 | 2014 | ||
1796 | p->numa_faults_memory = NULL; | 2015 | p->numa_faults = NULL; |
1797 | p->numa_faults_buffer_memory = NULL; | ||
1798 | p->numa_faults_cpu= NULL; | ||
1799 | p->numa_faults_buffer_cpu = NULL; | ||
1800 | kfree(numa_faults); | 2016 | kfree(numa_faults); |
1801 | } | 2017 | } |
1802 | 2018 | ||
@@ -1819,24 +2035,14 @@ void task_numa_fault(int last_cpupid, int mem_node, int pages, int flags) | |||
1819 | return; | 2035 | return; |
1820 | 2036 | ||
1821 | /* Allocate buffer to track faults on a per-node basis */ | 2037 | /* Allocate buffer to track faults on a per-node basis */ |
1822 | if (unlikely(!p->numa_faults_memory)) { | 2038 | if (unlikely(!p->numa_faults)) { |
1823 | int size = sizeof(*p->numa_faults_memory) * | 2039 | int size = sizeof(*p->numa_faults) * |
1824 | NR_NUMA_HINT_FAULT_BUCKETS * nr_node_ids; | 2040 | NR_NUMA_HINT_FAULT_BUCKETS * nr_node_ids; |
1825 | 2041 | ||
1826 | p->numa_faults_memory = kzalloc(size, GFP_KERNEL|__GFP_NOWARN); | 2042 | p->numa_faults = kzalloc(size, GFP_KERNEL|__GFP_NOWARN); |
1827 | if (!p->numa_faults_memory) | 2043 | if (!p->numa_faults) |
1828 | return; | 2044 | return; |
1829 | 2045 | ||
1830 | BUG_ON(p->numa_faults_buffer_memory); | ||
1831 | /* | ||
1832 | * The averaged statistics, shared & private, memory & cpu, | ||
1833 | * occupy the first half of the array. The second half of the | ||
1834 | * array is for current counters, which are averaged into the | ||
1835 | * first set by task_numa_placement. | ||
1836 | */ | ||
1837 | p->numa_faults_cpu = p->numa_faults_memory + (2 * nr_node_ids); | ||
1838 | p->numa_faults_buffer_memory = p->numa_faults_memory + (4 * nr_node_ids); | ||
1839 | p->numa_faults_buffer_cpu = p->numa_faults_memory + (6 * nr_node_ids); | ||
1840 | p->total_numa_faults = 0; | 2046 | p->total_numa_faults = 0; |
1841 | memset(p->numa_faults_locality, 0, sizeof(p->numa_faults_locality)); | 2047 | memset(p->numa_faults_locality, 0, sizeof(p->numa_faults_locality)); |
1842 | } | 2048 | } |
@@ -1876,8 +2082,8 @@ void task_numa_fault(int last_cpupid, int mem_node, int pages, int flags) | |||
1876 | if (migrated) | 2082 | if (migrated) |
1877 | p->numa_pages_migrated += pages; | 2083 | p->numa_pages_migrated += pages; |
1878 | 2084 | ||
1879 | p->numa_faults_buffer_memory[task_faults_idx(mem_node, priv)] += pages; | 2085 | p->numa_faults[task_faults_idx(NUMA_MEMBUF, mem_node, priv)] += pages; |
1880 | p->numa_faults_buffer_cpu[task_faults_idx(cpu_node, priv)] += pages; | 2086 | p->numa_faults[task_faults_idx(NUMA_CPUBUF, cpu_node, priv)] += pages; |
1881 | p->numa_faults_locality[local] += pages; | 2087 | p->numa_faults_locality[local] += pages; |
1882 | } | 2088 | } |
1883 | 2089 | ||
@@ -4446,7 +4652,7 @@ find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu) | |||
4446 | latest_idle_timestamp = rq->idle_stamp; | 4652 | latest_idle_timestamp = rq->idle_stamp; |
4447 | shallowest_idle_cpu = i; | 4653 | shallowest_idle_cpu = i; |
4448 | } | 4654 | } |
4449 | } else { | 4655 | } else if (shallowest_idle_cpu == -1) { |
4450 | load = weighted_cpuload(i); | 4656 | load = weighted_cpuload(i); |
4451 | if (load < min_load || (load == min_load && i == this_cpu)) { | 4657 | if (load < min_load || (load == min_load && i == this_cpu)) { |
4452 | min_load = load; | 4658 | min_load = load; |
@@ -4524,9 +4730,6 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f | |||
4524 | int want_affine = 0; | 4730 | int want_affine = 0; |
4525 | int sync = wake_flags & WF_SYNC; | 4731 | int sync = wake_flags & WF_SYNC; |
4526 | 4732 | ||
4527 | if (p->nr_cpus_allowed == 1) | ||
4528 | return prev_cpu; | ||
4529 | |||
4530 | if (sd_flag & SD_BALANCE_WAKE) | 4733 | if (sd_flag & SD_BALANCE_WAKE) |
4531 | want_affine = cpumask_test_cpu(cpu, tsk_cpus_allowed(p)); | 4734 | want_affine = cpumask_test_cpu(cpu, tsk_cpus_allowed(p)); |
4532 | 4735 | ||
@@ -5166,7 +5369,7 @@ static bool migrate_improves_locality(struct task_struct *p, struct lb_env *env) | |||
5166 | struct numa_group *numa_group = rcu_dereference(p->numa_group); | 5369 | struct numa_group *numa_group = rcu_dereference(p->numa_group); |
5167 | int src_nid, dst_nid; | 5370 | int src_nid, dst_nid; |
5168 | 5371 | ||
5169 | if (!sched_feat(NUMA_FAVOUR_HIGHER) || !p->numa_faults_memory || | 5372 | if (!sched_feat(NUMA_FAVOUR_HIGHER) || !p->numa_faults || |
5170 | !(env->sd->flags & SD_NUMA)) { | 5373 | !(env->sd->flags & SD_NUMA)) { |
5171 | return false; | 5374 | return false; |
5172 | } | 5375 | } |
@@ -5205,7 +5408,7 @@ static bool migrate_degrades_locality(struct task_struct *p, struct lb_env *env) | |||
5205 | if (!sched_feat(NUMA) || !sched_feat(NUMA_RESIST_LOWER)) | 5408 | if (!sched_feat(NUMA) || !sched_feat(NUMA_RESIST_LOWER)) |
5206 | return false; | 5409 | return false; |
5207 | 5410 | ||
5208 | if (!p->numa_faults_memory || !(env->sd->flags & SD_NUMA)) | 5411 | if (!p->numa_faults || !(env->sd->flags & SD_NUMA)) |
5209 | return false; | 5412 | return false; |
5210 | 5413 | ||
5211 | src_nid = cpu_to_node(env->src_cpu); | 5414 | src_nid = cpu_to_node(env->src_cpu); |
@@ -6149,8 +6352,10 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd | |||
6149 | * with a large weight task outweighs the tasks on the system). | 6352 | * with a large weight task outweighs the tasks on the system). |
6150 | */ | 6353 | */ |
6151 | if (prefer_sibling && sds->local && | 6354 | if (prefer_sibling && sds->local && |
6152 | sds->local_stat.group_has_free_capacity) | 6355 | sds->local_stat.group_has_free_capacity) { |
6153 | sgs->group_capacity_factor = min(sgs->group_capacity_factor, 1U); | 6356 | sgs->group_capacity_factor = min(sgs->group_capacity_factor, 1U); |
6357 | sgs->group_type = group_classify(sg, sgs); | ||
6358 | } | ||
6154 | 6359 | ||
6155 | if (update_sd_pick_busiest(env, sds, sg, sgs)) { | 6360 | if (update_sd_pick_busiest(env, sds, sg, sgs)) { |
6156 | sds->busiest = sg; | 6361 | sds->busiest = sg; |
@@ -7938,6 +8143,8 @@ const struct sched_class fair_sched_class = { | |||
7938 | 8143 | ||
7939 | .get_rr_interval = get_rr_interval_fair, | 8144 | .get_rr_interval = get_rr_interval_fair, |
7940 | 8145 | ||
8146 | .update_curr = update_curr_fair, | ||
8147 | |||
7941 | #ifdef CONFIG_FAIR_GROUP_SCHED | 8148 | #ifdef CONFIG_FAIR_GROUP_SCHED |
7942 | .task_move_group = task_move_group_fair, | 8149 | .task_move_group = task_move_group_fair, |
7943 | #endif | 8150 | #endif |
diff --git a/kernel/sched/idle_task.c b/kernel/sched/idle_task.c index 67ad4e7f506a..c65dac8c97cd 100644 --- a/kernel/sched/idle_task.c +++ b/kernel/sched/idle_task.c | |||
@@ -75,6 +75,10 @@ static unsigned int get_rr_interval_idle(struct rq *rq, struct task_struct *task | |||
75 | return 0; | 75 | return 0; |
76 | } | 76 | } |
77 | 77 | ||
78 | static void update_curr_idle(struct rq *rq) | ||
79 | { | ||
80 | } | ||
81 | |||
78 | /* | 82 | /* |
79 | * Simple, special scheduling class for the per-CPU idle tasks: | 83 | * Simple, special scheduling class for the per-CPU idle tasks: |
80 | */ | 84 | */ |
@@ -101,4 +105,5 @@ const struct sched_class idle_sched_class = { | |||
101 | 105 | ||
102 | .prio_changed = prio_changed_idle, | 106 | .prio_changed = prio_changed_idle, |
103 | .switched_to = switched_to_idle, | 107 | .switched_to = switched_to_idle, |
108 | .update_curr = update_curr_idle, | ||
104 | }; | 109 | }; |
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index d024e6ce30ba..ee15f5a0d1c1 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c | |||
@@ -1301,9 +1301,6 @@ select_task_rq_rt(struct task_struct *p, int cpu, int sd_flag, int flags) | |||
1301 | struct task_struct *curr; | 1301 | struct task_struct *curr; |
1302 | struct rq *rq; | 1302 | struct rq *rq; |
1303 | 1303 | ||
1304 | if (p->nr_cpus_allowed == 1) | ||
1305 | goto out; | ||
1306 | |||
1307 | /* For anything but wake ups, just return the task_cpu */ | 1304 | /* For anything but wake ups, just return the task_cpu */ |
1308 | if (sd_flag != SD_BALANCE_WAKE && sd_flag != SD_BALANCE_FORK) | 1305 | if (sd_flag != SD_BALANCE_WAKE && sd_flag != SD_BALANCE_FORK) |
1309 | goto out; | 1306 | goto out; |
@@ -1351,16 +1348,22 @@ out: | |||
1351 | 1348 | ||
1352 | static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p) | 1349 | static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p) |
1353 | { | 1350 | { |
1354 | if (rq->curr->nr_cpus_allowed == 1) | 1351 | /* |
1352 | * Current can't be migrated, useless to reschedule, | ||
1353 | * let's hope p can move out. | ||
1354 | */ | ||
1355 | if (rq->curr->nr_cpus_allowed == 1 || | ||
1356 | !cpupri_find(&rq->rd->cpupri, rq->curr, NULL)) | ||
1355 | return; | 1357 | return; |
1356 | 1358 | ||
1359 | /* | ||
1360 | * p is migratable, so let's not schedule it and | ||
1361 | * see if it is pushed or pulled somewhere else. | ||
1362 | */ | ||
1357 | if (p->nr_cpus_allowed != 1 | 1363 | if (p->nr_cpus_allowed != 1 |
1358 | && cpupri_find(&rq->rd->cpupri, p, NULL)) | 1364 | && cpupri_find(&rq->rd->cpupri, p, NULL)) |
1359 | return; | 1365 | return; |
1360 | 1366 | ||
1361 | if (!cpupri_find(&rq->rd->cpupri, rq->curr, NULL)) | ||
1362 | return; | ||
1363 | |||
1364 | /* | 1367 | /* |
1365 | * There appears to be other cpus that can accept | 1368 | * There appears to be other cpus that can accept |
1366 | * current and none to run 'p', so lets reschedule | 1369 | * current and none to run 'p', so lets reschedule |
@@ -2128,6 +2131,8 @@ const struct sched_class rt_sched_class = { | |||
2128 | 2131 | ||
2129 | .prio_changed = prio_changed_rt, | 2132 | .prio_changed = prio_changed_rt, |
2130 | .switched_to = switched_to_rt, | 2133 | .switched_to = switched_to_rt, |
2134 | |||
2135 | .update_curr = update_curr_rt, | ||
2131 | }; | 2136 | }; |
2132 | 2137 | ||
2133 | #ifdef CONFIG_SCHED_DEBUG | 2138 | #ifdef CONFIG_SCHED_DEBUG |
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 24156c8434d1..9a2a45c970e7 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h | |||
@@ -176,6 +176,25 @@ struct dl_bw { | |||
176 | u64 bw, total_bw; | 176 | u64 bw, total_bw; |
177 | }; | 177 | }; |
178 | 178 | ||
179 | static inline | ||
180 | void __dl_clear(struct dl_bw *dl_b, u64 tsk_bw) | ||
181 | { | ||
182 | dl_b->total_bw -= tsk_bw; | ||
183 | } | ||
184 | |||
185 | static inline | ||
186 | void __dl_add(struct dl_bw *dl_b, u64 tsk_bw) | ||
187 | { | ||
188 | dl_b->total_bw += tsk_bw; | ||
189 | } | ||
190 | |||
191 | static inline | ||
192 | bool __dl_overflow(struct dl_bw *dl_b, int cpus, u64 old_bw, u64 new_bw) | ||
193 | { | ||
194 | return dl_b->bw != -1 && | ||
195 | dl_b->bw * cpus < dl_b->total_bw - old_bw + new_bw; | ||
196 | } | ||
197 | |||
179 | extern struct mutex sched_domains_mutex; | 198 | extern struct mutex sched_domains_mutex; |
180 | 199 | ||
181 | #ifdef CONFIG_CGROUP_SCHED | 200 | #ifdef CONFIG_CGROUP_SCHED |
@@ -678,7 +697,25 @@ static inline u64 rq_clock_task(struct rq *rq) | |||
678 | return rq->clock_task; | 697 | return rq->clock_task; |
679 | } | 698 | } |
680 | 699 | ||
700 | #ifdef CONFIG_NUMA | ||
701 | enum numa_topology_type { | ||
702 | NUMA_DIRECT, | ||
703 | NUMA_GLUELESS_MESH, | ||
704 | NUMA_BACKPLANE, | ||
705 | }; | ||
706 | extern enum numa_topology_type sched_numa_topology_type; | ||
707 | extern int sched_max_numa_distance; | ||
708 | extern bool find_numa_distance(int distance); | ||
709 | #endif | ||
710 | |||
681 | #ifdef CONFIG_NUMA_BALANCING | 711 | #ifdef CONFIG_NUMA_BALANCING |
712 | /* The regions in numa_faults array from task_struct */ | ||
713 | enum numa_faults_stats { | ||
714 | NUMA_MEM = 0, | ||
715 | NUMA_CPU, | ||
716 | NUMA_MEMBUF, | ||
717 | NUMA_CPUBUF | ||
718 | }; | ||
682 | extern void sched_setnuma(struct task_struct *p, int node); | 719 | extern void sched_setnuma(struct task_struct *p, int node); |
683 | extern int migrate_task_to(struct task_struct *p, int cpu); | 720 | extern int migrate_task_to(struct task_struct *p, int cpu); |
684 | extern int migrate_swap(struct task_struct *, struct task_struct *); | 721 | extern int migrate_swap(struct task_struct *, struct task_struct *); |
@@ -1127,6 +1164,11 @@ struct sched_class { | |||
1127 | void (*task_fork) (struct task_struct *p); | 1164 | void (*task_fork) (struct task_struct *p); |
1128 | void (*task_dead) (struct task_struct *p); | 1165 | void (*task_dead) (struct task_struct *p); |
1129 | 1166 | ||
1167 | /* | ||
1168 | * The switched_from() call is allowed to drop rq->lock, therefore we | ||
1169 | * cannot assume the switched_from/switched_to pair is serliazed by | ||
1170 | * rq->lock. They are however serialized by p->pi_lock. | ||
1171 | */ | ||
1130 | void (*switched_from) (struct rq *this_rq, struct task_struct *task); | 1172 | void (*switched_from) (struct rq *this_rq, struct task_struct *task); |
1131 | void (*switched_to) (struct rq *this_rq, struct task_struct *task); | 1173 | void (*switched_to) (struct rq *this_rq, struct task_struct *task); |
1132 | void (*prio_changed) (struct rq *this_rq, struct task_struct *task, | 1174 | void (*prio_changed) (struct rq *this_rq, struct task_struct *task, |
@@ -1135,6 +1177,8 @@ struct sched_class { | |||
1135 | unsigned int (*get_rr_interval) (struct rq *rq, | 1177 | unsigned int (*get_rr_interval) (struct rq *rq, |
1136 | struct task_struct *task); | 1178 | struct task_struct *task); |
1137 | 1179 | ||
1180 | void (*update_curr) (struct rq *rq); | ||
1181 | |||
1138 | #ifdef CONFIG_FAIR_GROUP_SCHED | 1182 | #ifdef CONFIG_FAIR_GROUP_SCHED |
1139 | void (*task_move_group) (struct task_struct *p, int on_rq); | 1183 | void (*task_move_group) (struct task_struct *p, int on_rq); |
1140 | #endif | 1184 | #endif |
@@ -1502,6 +1546,7 @@ extern struct sched_entity *__pick_first_entity(struct cfs_rq *cfs_rq); | |||
1502 | extern struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq); | 1546 | extern struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq); |
1503 | extern void print_cfs_stats(struct seq_file *m, int cpu); | 1547 | extern void print_cfs_stats(struct seq_file *m, int cpu); |
1504 | extern void print_rt_stats(struct seq_file *m, int cpu); | 1548 | extern void print_rt_stats(struct seq_file *m, int cpu); |
1549 | extern void print_dl_stats(struct seq_file *m, int cpu); | ||
1505 | 1550 | ||
1506 | extern void init_cfs_rq(struct cfs_rq *cfs_rq); | 1551 | extern void init_cfs_rq(struct cfs_rq *cfs_rq); |
1507 | extern void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq); | 1552 | extern void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq); |
diff --git a/kernel/sched/stop_task.c b/kernel/sched/stop_task.c index 67426e529f59..79ffec45a6ac 100644 --- a/kernel/sched/stop_task.c +++ b/kernel/sched/stop_task.c | |||
@@ -102,6 +102,10 @@ get_rr_interval_stop(struct rq *rq, struct task_struct *task) | |||
102 | return 0; | 102 | return 0; |
103 | } | 103 | } |
104 | 104 | ||
105 | static void update_curr_stop(struct rq *rq) | ||
106 | { | ||
107 | } | ||
108 | |||
105 | /* | 109 | /* |
106 | * Simple, special scheduling class for the per-CPU stop tasks: | 110 | * Simple, special scheduling class for the per-CPU stop tasks: |
107 | */ | 111 | */ |
@@ -128,4 +132,5 @@ const struct sched_class stop_sched_class = { | |||
128 | 132 | ||
129 | .prio_changed = prio_changed_stop, | 133 | .prio_changed = prio_changed_stop, |
130 | .switched_to = switched_to_stop, | 134 | .switched_to = switched_to_stop, |
135 | .update_curr = update_curr_stop, | ||
131 | }; | 136 | }; |
diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c index 5a62915f47a8..852143a79f36 100644 --- a/kernel/sched/wait.c +++ b/kernel/sched/wait.c | |||
@@ -9,6 +9,7 @@ | |||
9 | #include <linux/mm.h> | 9 | #include <linux/mm.h> |
10 | #include <linux/wait.h> | 10 | #include <linux/wait.h> |
11 | #include <linux/hash.h> | 11 | #include <linux/hash.h> |
12 | #include <linux/kthread.h> | ||
12 | 13 | ||
13 | void __init_waitqueue_head(wait_queue_head_t *q, const char *name, struct lock_class_key *key) | 14 | void __init_waitqueue_head(wait_queue_head_t *q, const char *name, struct lock_class_key *key) |
14 | { | 15 | { |
@@ -297,6 +298,71 @@ int autoremove_wake_function(wait_queue_t *wait, unsigned mode, int sync, void * | |||
297 | } | 298 | } |
298 | EXPORT_SYMBOL(autoremove_wake_function); | 299 | EXPORT_SYMBOL(autoremove_wake_function); |
299 | 300 | ||
301 | static inline bool is_kthread_should_stop(void) | ||
302 | { | ||
303 | return (current->flags & PF_KTHREAD) && kthread_should_stop(); | ||
304 | } | ||
305 | |||
306 | /* | ||
307 | * DEFINE_WAIT_FUNC(wait, woken_wake_func); | ||
308 | * | ||
309 | * add_wait_queue(&wq, &wait); | ||
310 | * for (;;) { | ||
311 | * if (condition) | ||
312 | * break; | ||
313 | * | ||
314 | * p->state = mode; condition = true; | ||
315 | * smp_mb(); // A smp_wmb(); // C | ||
316 | * if (!wait->flags & WQ_FLAG_WOKEN) wait->flags |= WQ_FLAG_WOKEN; | ||
317 | * schedule() try_to_wake_up(); | ||
318 | * p->state = TASK_RUNNING; ~~~~~~~~~~~~~~~~~~ | ||
319 | * wait->flags &= ~WQ_FLAG_WOKEN; condition = true; | ||
320 | * smp_mb() // B smp_wmb(); // C | ||
321 | * wait->flags |= WQ_FLAG_WOKEN; | ||
322 | * } | ||
323 | * remove_wait_queue(&wq, &wait); | ||
324 | * | ||
325 | */ | ||
326 | long wait_woken(wait_queue_t *wait, unsigned mode, long timeout) | ||
327 | { | ||
328 | set_current_state(mode); /* A */ | ||
329 | /* | ||
330 | * The above implies an smp_mb(), which matches with the smp_wmb() from | ||
331 | * woken_wake_function() such that if we observe WQ_FLAG_WOKEN we must | ||
332 | * also observe all state before the wakeup. | ||
333 | */ | ||
334 | if (!(wait->flags & WQ_FLAG_WOKEN) && !is_kthread_should_stop()) | ||
335 | timeout = schedule_timeout(timeout); | ||
336 | __set_current_state(TASK_RUNNING); | ||
337 | |||
338 | /* | ||
339 | * The below implies an smp_mb(), it too pairs with the smp_wmb() from | ||
340 | * woken_wake_function() such that we must either observe the wait | ||
341 | * condition being true _OR_ WQ_FLAG_WOKEN such that we will not miss | ||
342 | * an event. | ||
343 | */ | ||
344 | set_mb(wait->flags, wait->flags & ~WQ_FLAG_WOKEN); /* B */ | ||
345 | |||
346 | return timeout; | ||
347 | } | ||
348 | EXPORT_SYMBOL(wait_woken); | ||
349 | |||
350 | int woken_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key) | ||
351 | { | ||
352 | /* | ||
353 | * Although this function is called under waitqueue lock, LOCK | ||
354 | * doesn't imply write barrier and the users expects write | ||
355 | * barrier semantics on wakeup functions. The following | ||
356 | * smp_wmb() is equivalent to smp_wmb() in try_to_wake_up() | ||
357 | * and is paired with set_mb() in wait_woken(). | ||
358 | */ | ||
359 | smp_wmb(); /* C */ | ||
360 | wait->flags |= WQ_FLAG_WOKEN; | ||
361 | |||
362 | return default_wake_function(wait, mode, sync, key); | ||
363 | } | ||
364 | EXPORT_SYMBOL(woken_wake_function); | ||
365 | |||
300 | int wake_bit_function(wait_queue_t *wait, unsigned mode, int sync, void *arg) | 366 | int wake_bit_function(wait_queue_t *wait, unsigned mode, int sync, void *arg) |
301 | { | 367 | { |
302 | struct wait_bit_key *key = arg; | 368 | struct wait_bit_key *key = arg; |
diff --git a/kernel/signal.c b/kernel/signal.c index 8f0876f9f6dd..16a305295256 100644 --- a/kernel/signal.c +++ b/kernel/signal.c | |||
@@ -1275,7 +1275,17 @@ struct sighand_struct *__lock_task_sighand(struct task_struct *tsk, | |||
1275 | local_irq_restore(*flags); | 1275 | local_irq_restore(*flags); |
1276 | break; | 1276 | break; |
1277 | } | 1277 | } |
1278 | 1278 | /* | |
1279 | * This sighand can be already freed and even reused, but | ||
1280 | * we rely on SLAB_DESTROY_BY_RCU and sighand_ctor() which | ||
1281 | * initializes ->siglock: this slab can't go away, it has | ||
1282 | * the same object type, ->siglock can't be reinitialized. | ||
1283 | * | ||
1284 | * We need to ensure that tsk->sighand is still the same | ||
1285 | * after we take the lock, we can race with de_thread() or | ||
1286 | * __exit_signal(). In the latter case the next iteration | ||
1287 | * must see ->sighand == NULL. | ||
1288 | */ | ||
1279 | spin_lock(&sighand->siglock); | 1289 | spin_lock(&sighand->siglock); |
1280 | if (likely(sighand == tsk->sighand)) { | 1290 | if (likely(sighand == tsk->sighand)) { |
1281 | rcu_read_unlock(); | 1291 | rcu_read_unlock(); |
@@ -1331,23 +1341,21 @@ int kill_pid_info(int sig, struct siginfo *info, struct pid *pid) | |||
1331 | int error = -ESRCH; | 1341 | int error = -ESRCH; |
1332 | struct task_struct *p; | 1342 | struct task_struct *p; |
1333 | 1343 | ||
1334 | rcu_read_lock(); | 1344 | for (;;) { |
1335 | retry: | 1345 | rcu_read_lock(); |
1336 | p = pid_task(pid, PIDTYPE_PID); | 1346 | p = pid_task(pid, PIDTYPE_PID); |
1337 | if (p) { | 1347 | if (p) |
1338 | error = group_send_sig_info(sig, info, p); | 1348 | error = group_send_sig_info(sig, info, p); |
1339 | if (unlikely(error == -ESRCH)) | 1349 | rcu_read_unlock(); |
1340 | /* | 1350 | if (likely(!p || error != -ESRCH)) |
1341 | * The task was unhashed in between, try again. | 1351 | return error; |
1342 | * If it is dead, pid_task() will return NULL, | ||
1343 | * if we race with de_thread() it will find the | ||
1344 | * new leader. | ||
1345 | */ | ||
1346 | goto retry; | ||
1347 | } | ||
1348 | rcu_read_unlock(); | ||
1349 | 1352 | ||
1350 | return error; | 1353 | /* |
1354 | * The task was unhashed in between, try again. If it | ||
1355 | * is dead, pid_task() will return NULL, if we race with | ||
1356 | * de_thread() it will find the new leader. | ||
1357 | */ | ||
1358 | } | ||
1351 | } | 1359 | } |
1352 | 1360 | ||
1353 | int kill_proc_info(int sig, struct siginfo *info, pid_t pid) | 1361 | int kill_proc_info(int sig, struct siginfo *info, pid_t pid) |
@@ -2748,6 +2756,10 @@ int copy_siginfo_to_user(siginfo_t __user *to, const siginfo_t *from) | |||
2748 | if (from->si_code == BUS_MCEERR_AR || from->si_code == BUS_MCEERR_AO) | 2756 | if (from->si_code == BUS_MCEERR_AR || from->si_code == BUS_MCEERR_AO) |
2749 | err |= __put_user(from->si_addr_lsb, &to->si_addr_lsb); | 2757 | err |= __put_user(from->si_addr_lsb, &to->si_addr_lsb); |
2750 | #endif | 2758 | #endif |
2759 | #ifdef SEGV_BNDERR | ||
2760 | err |= __put_user(from->si_lower, &to->si_lower); | ||
2761 | err |= __put_user(from->si_upper, &to->si_upper); | ||
2762 | #endif | ||
2751 | break; | 2763 | break; |
2752 | case __SI_CHLD: | 2764 | case __SI_CHLD: |
2753 | err |= __put_user(from->si_pid, &to->si_pid); | 2765 | err |= __put_user(from->si_pid, &to->si_pid); |
diff --git a/kernel/smpboot.c b/kernel/smpboot.c index eb89e1807408..f032fb5284e3 100644 --- a/kernel/smpboot.c +++ b/kernel/smpboot.c | |||
@@ -110,7 +110,7 @@ static int smpboot_thread_fn(void *data) | |||
110 | set_current_state(TASK_INTERRUPTIBLE); | 110 | set_current_state(TASK_INTERRUPTIBLE); |
111 | preempt_disable(); | 111 | preempt_disable(); |
112 | if (kthread_should_stop()) { | 112 | if (kthread_should_stop()) { |
113 | set_current_state(TASK_RUNNING); | 113 | __set_current_state(TASK_RUNNING); |
114 | preempt_enable(); | 114 | preempt_enable(); |
115 | if (ht->cleanup) | 115 | if (ht->cleanup) |
116 | ht->cleanup(td->cpu, cpu_online(td->cpu)); | 116 | ht->cleanup(td->cpu, cpu_online(td->cpu)); |
@@ -136,26 +136,27 @@ static int smpboot_thread_fn(void *data) | |||
136 | /* Check for state change setup */ | 136 | /* Check for state change setup */ |
137 | switch (td->status) { | 137 | switch (td->status) { |
138 | case HP_THREAD_NONE: | 138 | case HP_THREAD_NONE: |
139 | __set_current_state(TASK_RUNNING); | ||
139 | preempt_enable(); | 140 | preempt_enable(); |
140 | if (ht->setup) | 141 | if (ht->setup) |
141 | ht->setup(td->cpu); | 142 | ht->setup(td->cpu); |
142 | td->status = HP_THREAD_ACTIVE; | 143 | td->status = HP_THREAD_ACTIVE; |
143 | preempt_disable(); | 144 | continue; |
144 | break; | 145 | |
145 | case HP_THREAD_PARKED: | 146 | case HP_THREAD_PARKED: |
147 | __set_current_state(TASK_RUNNING); | ||
146 | preempt_enable(); | 148 | preempt_enable(); |
147 | if (ht->unpark) | 149 | if (ht->unpark) |
148 | ht->unpark(td->cpu); | 150 | ht->unpark(td->cpu); |
149 | td->status = HP_THREAD_ACTIVE; | 151 | td->status = HP_THREAD_ACTIVE; |
150 | preempt_disable(); | 152 | continue; |
151 | break; | ||
152 | } | 153 | } |
153 | 154 | ||
154 | if (!ht->thread_should_run(td->cpu)) { | 155 | if (!ht->thread_should_run(td->cpu)) { |
155 | preempt_enable(); | 156 | preempt_enable_no_resched(); |
156 | schedule(); | 157 | schedule(); |
157 | } else { | 158 | } else { |
158 | set_current_state(TASK_RUNNING); | 159 | __set_current_state(TASK_RUNNING); |
159 | preempt_enable(); | 160 | preempt_enable(); |
160 | ht->thread_fn(td->cpu); | 161 | ht->thread_fn(td->cpu); |
161 | } | 162 | } |
diff --git a/kernel/softirq.c b/kernel/softirq.c index 0699add19164..501baa9ac1be 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c | |||
@@ -656,7 +656,7 @@ static void run_ksoftirqd(unsigned int cpu) | |||
656 | * in the task stack here. | 656 | * in the task stack here. |
657 | */ | 657 | */ |
658 | __do_softirq(); | 658 | __do_softirq(); |
659 | rcu_note_context_switch(cpu); | 659 | rcu_note_context_switch(); |
660 | local_irq_enable(); | 660 | local_irq_enable(); |
661 | cond_resched(); | 661 | cond_resched(); |
662 | return; | 662 | return; |
diff --git a/kernel/sys.c b/kernel/sys.c index 1eaa2f0b0246..a8c9f5a7dda6 100644 --- a/kernel/sys.c +++ b/kernel/sys.c | |||
@@ -91,6 +91,12 @@ | |||
91 | #ifndef SET_TSC_CTL | 91 | #ifndef SET_TSC_CTL |
92 | # define SET_TSC_CTL(a) (-EINVAL) | 92 | # define SET_TSC_CTL(a) (-EINVAL) |
93 | #endif | 93 | #endif |
94 | #ifndef MPX_ENABLE_MANAGEMENT | ||
95 | # define MPX_ENABLE_MANAGEMENT(a) (-EINVAL) | ||
96 | #endif | ||
97 | #ifndef MPX_DISABLE_MANAGEMENT | ||
98 | # define MPX_DISABLE_MANAGEMENT(a) (-EINVAL) | ||
99 | #endif | ||
94 | 100 | ||
95 | /* | 101 | /* |
96 | * this is where the system-wide overflow UID and GID are defined, for | 102 | * this is where the system-wide overflow UID and GID are defined, for |
@@ -2203,6 +2209,12 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, | |||
2203 | me->mm->def_flags &= ~VM_NOHUGEPAGE; | 2209 | me->mm->def_flags &= ~VM_NOHUGEPAGE; |
2204 | up_write(&me->mm->mmap_sem); | 2210 | up_write(&me->mm->mmap_sem); |
2205 | break; | 2211 | break; |
2212 | case PR_MPX_ENABLE_MANAGEMENT: | ||
2213 | error = MPX_ENABLE_MANAGEMENT(me); | ||
2214 | break; | ||
2215 | case PR_MPX_DISABLE_MANAGEMENT: | ||
2216 | error = MPX_DISABLE_MANAGEMENT(me); | ||
2217 | break; | ||
2206 | default: | 2218 | default: |
2207 | error = -EINVAL; | 2219 | error = -EINVAL; |
2208 | break; | 2220 | break; |
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 4aada6d9fe74..7c54ff79afd7 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
@@ -387,7 +387,8 @@ static struct ctl_table kern_table[] = { | |||
387 | .data = &sysctl_numa_balancing_scan_size, | 387 | .data = &sysctl_numa_balancing_scan_size, |
388 | .maxlen = sizeof(unsigned int), | 388 | .maxlen = sizeof(unsigned int), |
389 | .mode = 0644, | 389 | .mode = 0644, |
390 | .proc_handler = proc_dointvec, | 390 | .proc_handler = proc_dointvec_minmax, |
391 | .extra1 = &one, | ||
391 | }, | 392 | }, |
392 | { | 393 | { |
393 | .procname = "numa_balancing", | 394 | .procname = "numa_balancing", |
@@ -1103,6 +1104,15 @@ static struct ctl_table kern_table[] = { | |||
1103 | .proc_handler = proc_dointvec, | 1104 | .proc_handler = proc_dointvec, |
1104 | }, | 1105 | }, |
1105 | #endif | 1106 | #endif |
1107 | { | ||
1108 | .procname = "panic_on_warn", | ||
1109 | .data = &panic_on_warn, | ||
1110 | .maxlen = sizeof(int), | ||
1111 | .mode = 0644, | ||
1112 | .proc_handler = proc_dointvec_minmax, | ||
1113 | .extra1 = &zero, | ||
1114 | .extra2 = &one, | ||
1115 | }, | ||
1106 | { } | 1116 | { } |
1107 | }; | 1117 | }; |
1108 | 1118 | ||
diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c index 9a4f750a2963..7e7746a42a62 100644 --- a/kernel/sysctl_binary.c +++ b/kernel/sysctl_binary.c | |||
@@ -137,6 +137,7 @@ static const struct bin_table bin_kern_table[] = { | |||
137 | { CTL_INT, KERN_COMPAT_LOG, "compat-log" }, | 137 | { CTL_INT, KERN_COMPAT_LOG, "compat-log" }, |
138 | { CTL_INT, KERN_MAX_LOCK_DEPTH, "max_lock_depth" }, | 138 | { CTL_INT, KERN_MAX_LOCK_DEPTH, "max_lock_depth" }, |
139 | { CTL_INT, KERN_PANIC_ON_NMI, "panic_on_unrecovered_nmi" }, | 139 | { CTL_INT, KERN_PANIC_ON_NMI, "panic_on_unrecovered_nmi" }, |
140 | { CTL_INT, KERN_PANIC_ON_WARN, "panic_on_warn" }, | ||
140 | {} | 141 | {} |
141 | }; | 142 | }; |
142 | 143 | ||
diff --git a/kernel/taskstats.c b/kernel/taskstats.c index b312fcc73024..670fff88a961 100644 --- a/kernel/taskstats.c +++ b/kernel/taskstats.c | |||
@@ -459,7 +459,7 @@ static int cgroupstats_user_cmd(struct sk_buff *skb, struct genl_info *info) | |||
459 | stats = nla_data(na); | 459 | stats = nla_data(na); |
460 | memset(stats, 0, sizeof(*stats)); | 460 | memset(stats, 0, sizeof(*stats)); |
461 | 461 | ||
462 | rc = cgroupstats_build(stats, f.file->f_dentry); | 462 | rc = cgroupstats_build(stats, f.file->f_path.dentry); |
463 | if (rc < 0) { | 463 | if (rc < 0) { |
464 | nlmsg_free(rep_skb); | 464 | nlmsg_free(rep_skb); |
465 | goto err; | 465 | goto err; |
diff --git a/kernel/time/Makefile b/kernel/time/Makefile index 7347426fa68d..f622cf28628a 100644 --- a/kernel/time/Makefile +++ b/kernel/time/Makefile | |||
@@ -13,7 +13,7 @@ obj-$(CONFIG_TICK_ONESHOT) += tick-oneshot.o | |||
13 | obj-$(CONFIG_TICK_ONESHOT) += tick-sched.o | 13 | obj-$(CONFIG_TICK_ONESHOT) += tick-sched.o |
14 | obj-$(CONFIG_TIMER_STATS) += timer_stats.o | 14 | obj-$(CONFIG_TIMER_STATS) += timer_stats.o |
15 | obj-$(CONFIG_DEBUG_FS) += timekeeping_debug.o | 15 | obj-$(CONFIG_DEBUG_FS) += timekeeping_debug.o |
16 | obj-$(CONFIG_TEST_UDELAY) += udelay_test.o | 16 | obj-$(CONFIG_TEST_UDELAY) += test_udelay.o |
17 | 17 | ||
18 | $(obj)/time.o: $(obj)/timeconst.h | 18 | $(obj)/time.o: $(obj)/timeconst.h |
19 | 19 | ||
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index 9c94c19f1305..55449909f114 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c | |||
@@ -72,7 +72,7 @@ static u64 cev_delta2ns(unsigned long latch, struct clock_event_device *evt, | |||
72 | * Also omit the add if it would overflow the u64 boundary. | 72 | * Also omit the add if it would overflow the u64 boundary. |
73 | */ | 73 | */ |
74 | if ((~0ULL - clc > rnd) && | 74 | if ((~0ULL - clc > rnd) && |
75 | (!ismax || evt->mult <= (1U << evt->shift))) | 75 | (!ismax || evt->mult <= (1ULL << evt->shift))) |
76 | clc += rnd; | 76 | clc += rnd; |
77 | 77 | ||
78 | do_div(clc, evt->mult); | 78 | do_div(clc, evt->mult); |
diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c index 492b986195d5..a16b67859e2a 100644 --- a/kernel/time/posix-cpu-timers.c +++ b/kernel/time/posix-cpu-timers.c | |||
@@ -553,7 +553,7 @@ static int cpu_timer_sample_group(const clockid_t which_clock, | |||
553 | *sample = cputime_to_expires(cputime.utime); | 553 | *sample = cputime_to_expires(cputime.utime); |
554 | break; | 554 | break; |
555 | case CPUCLOCK_SCHED: | 555 | case CPUCLOCK_SCHED: |
556 | *sample = cputime.sum_exec_runtime + task_delta_exec(p); | 556 | *sample = cputime.sum_exec_runtime; |
557 | break; | 557 | break; |
558 | } | 558 | } |
559 | return 0; | 559 | return 0; |
diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c index 42b463ad90f2..31ea01f42e1f 100644 --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c | |||
@@ -636,6 +636,7 @@ SYSCALL_DEFINE3(timer_create, const clockid_t, which_clock, | |||
636 | goto out; | 636 | goto out; |
637 | } | 637 | } |
638 | } else { | 638 | } else { |
639 | memset(&event.sigev_value, 0, sizeof(event.sigev_value)); | ||
639 | event.sigev_notify = SIGEV_SIGNAL; | 640 | event.sigev_notify = SIGEV_SIGNAL; |
640 | event.sigev_signo = SIGALRM; | 641 | event.sigev_signo = SIGALRM; |
641 | event.sigev_value.sival_int = new_timer->it_id; | 642 | event.sigev_value.sival_int = new_timer->it_id; |
diff --git a/kernel/time/udelay_test.c b/kernel/time/test_udelay.c index e622ba365a13..e622ba365a13 100644 --- a/kernel/time/udelay_test.c +++ b/kernel/time/test_udelay.c | |||
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 7b5741fc4110..1f4356037a7d 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c | |||
@@ -585,7 +585,7 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts, | |||
585 | last_jiffies = jiffies; | 585 | last_jiffies = jiffies; |
586 | } while (read_seqretry(&jiffies_lock, seq)); | 586 | } while (read_seqretry(&jiffies_lock, seq)); |
587 | 587 | ||
588 | if (rcu_needs_cpu(cpu, &rcu_delta_jiffies) || | 588 | if (rcu_needs_cpu(&rcu_delta_jiffies) || |
589 | arch_needs_cpu() || irq_work_needs_cpu()) { | 589 | arch_needs_cpu() || irq_work_needs_cpu()) { |
590 | next_jiffies = last_jiffies + 1; | 590 | next_jiffies = last_jiffies + 1; |
591 | delta_jiffies = 1; | 591 | delta_jiffies = 1; |
diff --git a/kernel/time/time.c b/kernel/time/time.c index a9ae20fb0b11..65015ff2f07c 100644 --- a/kernel/time/time.c +++ b/kernel/time/time.c | |||
@@ -304,7 +304,9 @@ struct timespec timespec_trunc(struct timespec t, unsigned gran) | |||
304 | } | 304 | } |
305 | EXPORT_SYMBOL(timespec_trunc); | 305 | EXPORT_SYMBOL(timespec_trunc); |
306 | 306 | ||
307 | /* Converts Gregorian date to seconds since 1970-01-01 00:00:00. | 307 | /* |
308 | * mktime64 - Converts date to seconds. | ||
309 | * Converts Gregorian date to seconds since 1970-01-01 00:00:00. | ||
308 | * Assumes input in normal date format, i.e. 1980-12-31 23:59:59 | 310 | * Assumes input in normal date format, i.e. 1980-12-31 23:59:59 |
309 | * => year=1980, mon=12, day=31, hour=23, min=59, sec=59. | 311 | * => year=1980, mon=12, day=31, hour=23, min=59, sec=59. |
310 | * | 312 | * |
@@ -314,15 +316,10 @@ EXPORT_SYMBOL(timespec_trunc); | |||
314 | * -year/100+year/400 terms, and add 10.] | 316 | * -year/100+year/400 terms, and add 10.] |
315 | * | 317 | * |
316 | * This algorithm was first published by Gauss (I think). | 318 | * This algorithm was first published by Gauss (I think). |
317 | * | ||
318 | * WARNING: this function will overflow on 2106-02-07 06:28:16 on | ||
319 | * machines where long is 32-bit! (However, as time_t is signed, we | ||
320 | * will already get problems at other places on 2038-01-19 03:14:08) | ||
321 | */ | 319 | */ |
322 | unsigned long | 320 | time64_t mktime64(const unsigned int year0, const unsigned int mon0, |
323 | mktime(const unsigned int year0, const unsigned int mon0, | 321 | const unsigned int day, const unsigned int hour, |
324 | const unsigned int day, const unsigned int hour, | 322 | const unsigned int min, const unsigned int sec) |
325 | const unsigned int min, const unsigned int sec) | ||
326 | { | 323 | { |
327 | unsigned int mon = mon0, year = year0; | 324 | unsigned int mon = mon0, year = year0; |
328 | 325 | ||
@@ -332,15 +329,14 @@ mktime(const unsigned int year0, const unsigned int mon0, | |||
332 | year -= 1; | 329 | year -= 1; |
333 | } | 330 | } |
334 | 331 | ||
335 | return ((((unsigned long) | 332 | return ((((time64_t) |
336 | (year/4 - year/100 + year/400 + 367*mon/12 + day) + | 333 | (year/4 - year/100 + year/400 + 367*mon/12 + day) + |
337 | year*365 - 719499 | 334 | year*365 - 719499 |
338 | )*24 + hour /* now have hours */ | 335 | )*24 + hour /* now have hours */ |
339 | )*60 + min /* now have minutes */ | 336 | )*60 + min /* now have minutes */ |
340 | )*60 + sec; /* finally seconds */ | 337 | )*60 + sec; /* finally seconds */ |
341 | } | 338 | } |
342 | 339 | EXPORT_SYMBOL(mktime64); | |
343 | EXPORT_SYMBOL(mktime); | ||
344 | 340 | ||
345 | /** | 341 | /** |
346 | * set_normalized_timespec - set timespec sec and nsec parts and normalize | 342 | * set_normalized_timespec - set timespec sec and nsec parts and normalize |
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index ec1791fae965..6a931852082f 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c | |||
@@ -417,7 +417,8 @@ EXPORT_SYMBOL_GPL(pvclock_gtod_unregister_notifier); | |||
417 | */ | 417 | */ |
418 | static inline void tk_update_ktime_data(struct timekeeper *tk) | 418 | static inline void tk_update_ktime_data(struct timekeeper *tk) |
419 | { | 419 | { |
420 | s64 nsec; | 420 | u64 seconds; |
421 | u32 nsec; | ||
421 | 422 | ||
422 | /* | 423 | /* |
423 | * The xtime based monotonic readout is: | 424 | * The xtime based monotonic readout is: |
@@ -426,13 +427,22 @@ static inline void tk_update_ktime_data(struct timekeeper *tk) | |||
426 | * nsec = base_mono + now(); | 427 | * nsec = base_mono + now(); |
427 | * ==> base_mono = (xtime_sec + wtm_sec) * 1e9 + wtm_nsec | 428 | * ==> base_mono = (xtime_sec + wtm_sec) * 1e9 + wtm_nsec |
428 | */ | 429 | */ |
429 | nsec = (s64)(tk->xtime_sec + tk->wall_to_monotonic.tv_sec); | 430 | seconds = (u64)(tk->xtime_sec + tk->wall_to_monotonic.tv_sec); |
430 | nsec *= NSEC_PER_SEC; | 431 | nsec = (u32) tk->wall_to_monotonic.tv_nsec; |
431 | nsec += tk->wall_to_monotonic.tv_nsec; | 432 | tk->tkr.base_mono = ns_to_ktime(seconds * NSEC_PER_SEC + nsec); |
432 | tk->tkr.base_mono = ns_to_ktime(nsec); | ||
433 | 433 | ||
434 | /* Update the monotonic raw base */ | 434 | /* Update the monotonic raw base */ |
435 | tk->base_raw = timespec64_to_ktime(tk->raw_time); | 435 | tk->base_raw = timespec64_to_ktime(tk->raw_time); |
436 | |||
437 | /* | ||
438 | * The sum of the nanoseconds portions of xtime and | ||
439 | * wall_to_monotonic can be greater/equal one second. Take | ||
440 | * this into account before updating tk->ktime_sec. | ||
441 | */ | ||
442 | nsec += (u32)(tk->tkr.xtime_nsec >> tk->tkr.shift); | ||
443 | if (nsec >= NSEC_PER_SEC) | ||
444 | seconds++; | ||
445 | tk->ktime_sec = seconds; | ||
436 | } | 446 | } |
437 | 447 | ||
438 | /* must hold timekeeper_lock */ | 448 | /* must hold timekeeper_lock */ |
@@ -519,9 +529,9 @@ EXPORT_SYMBOL(__getnstimeofday64); | |||
519 | 529 | ||
520 | /** | 530 | /** |
521 | * getnstimeofday64 - Returns the time of day in a timespec64. | 531 | * getnstimeofday64 - Returns the time of day in a timespec64. |
522 | * @ts: pointer to the timespec to be set | 532 | * @ts: pointer to the timespec64 to be set |
523 | * | 533 | * |
524 | * Returns the time of day in a timespec (WARN if suspended). | 534 | * Returns the time of day in a timespec64 (WARN if suspended). |
525 | */ | 535 | */ |
526 | void getnstimeofday64(struct timespec64 *ts) | 536 | void getnstimeofday64(struct timespec64 *ts) |
527 | { | 537 | { |
@@ -623,7 +633,7 @@ EXPORT_SYMBOL_GPL(ktime_get_raw); | |||
623 | * | 633 | * |
624 | * The function calculates the monotonic clock from the realtime | 634 | * The function calculates the monotonic clock from the realtime |
625 | * clock and the wall_to_monotonic offset and stores the result | 635 | * clock and the wall_to_monotonic offset and stores the result |
626 | * in normalized timespec format in the variable pointed to by @ts. | 636 | * in normalized timespec64 format in the variable pointed to by @ts. |
627 | */ | 637 | */ |
628 | void ktime_get_ts64(struct timespec64 *ts) | 638 | void ktime_get_ts64(struct timespec64 *ts) |
629 | { | 639 | { |
@@ -648,6 +658,54 @@ void ktime_get_ts64(struct timespec64 *ts) | |||
648 | } | 658 | } |
649 | EXPORT_SYMBOL_GPL(ktime_get_ts64); | 659 | EXPORT_SYMBOL_GPL(ktime_get_ts64); |
650 | 660 | ||
661 | /** | ||
662 | * ktime_get_seconds - Get the seconds portion of CLOCK_MONOTONIC | ||
663 | * | ||
664 | * Returns the seconds portion of CLOCK_MONOTONIC with a single non | ||
665 | * serialized read. tk->ktime_sec is of type 'unsigned long' so this | ||
666 | * works on both 32 and 64 bit systems. On 32 bit systems the readout | ||
667 | * covers ~136 years of uptime which should be enough to prevent | ||
668 | * premature wrap arounds. | ||
669 | */ | ||
670 | time64_t ktime_get_seconds(void) | ||
671 | { | ||
672 | struct timekeeper *tk = &tk_core.timekeeper; | ||
673 | |||
674 | WARN_ON(timekeeping_suspended); | ||
675 | return tk->ktime_sec; | ||
676 | } | ||
677 | EXPORT_SYMBOL_GPL(ktime_get_seconds); | ||
678 | |||
679 | /** | ||
680 | * ktime_get_real_seconds - Get the seconds portion of CLOCK_REALTIME | ||
681 | * | ||
682 | * Returns the wall clock seconds since 1970. This replaces the | ||
683 | * get_seconds() interface which is not y2038 safe on 32bit systems. | ||
684 | * | ||
685 | * For 64bit systems the fast access to tk->xtime_sec is preserved. On | ||
686 | * 32bit systems the access must be protected with the sequence | ||
687 | * counter to provide "atomic" access to the 64bit tk->xtime_sec | ||
688 | * value. | ||
689 | */ | ||
690 | time64_t ktime_get_real_seconds(void) | ||
691 | { | ||
692 | struct timekeeper *tk = &tk_core.timekeeper; | ||
693 | time64_t seconds; | ||
694 | unsigned int seq; | ||
695 | |||
696 | if (IS_ENABLED(CONFIG_64BIT)) | ||
697 | return tk->xtime_sec; | ||
698 | |||
699 | do { | ||
700 | seq = read_seqcount_begin(&tk_core.seq); | ||
701 | seconds = tk->xtime_sec; | ||
702 | |||
703 | } while (read_seqcount_retry(&tk_core.seq, seq)); | ||
704 | |||
705 | return seconds; | ||
706 | } | ||
707 | EXPORT_SYMBOL_GPL(ktime_get_real_seconds); | ||
708 | |||
651 | #ifdef CONFIG_NTP_PPS | 709 | #ifdef CONFIG_NTP_PPS |
652 | 710 | ||
653 | /** | 711 | /** |
@@ -703,18 +761,18 @@ void do_gettimeofday(struct timeval *tv) | |||
703 | EXPORT_SYMBOL(do_gettimeofday); | 761 | EXPORT_SYMBOL(do_gettimeofday); |
704 | 762 | ||
705 | /** | 763 | /** |
706 | * do_settimeofday - Sets the time of day | 764 | * do_settimeofday64 - Sets the time of day. |
707 | * @tv: pointer to the timespec variable containing the new time | 765 | * @ts: pointer to the timespec64 variable containing the new time |
708 | * | 766 | * |
709 | * Sets the time of day to the new time and update NTP and notify hrtimers | 767 | * Sets the time of day to the new time and update NTP and notify hrtimers |
710 | */ | 768 | */ |
711 | int do_settimeofday(const struct timespec *tv) | 769 | int do_settimeofday64(const struct timespec64 *ts) |
712 | { | 770 | { |
713 | struct timekeeper *tk = &tk_core.timekeeper; | 771 | struct timekeeper *tk = &tk_core.timekeeper; |
714 | struct timespec64 ts_delta, xt, tmp; | 772 | struct timespec64 ts_delta, xt; |
715 | unsigned long flags; | 773 | unsigned long flags; |
716 | 774 | ||
717 | if (!timespec_valid_strict(tv)) | 775 | if (!timespec64_valid_strict(ts)) |
718 | return -EINVAL; | 776 | return -EINVAL; |
719 | 777 | ||
720 | raw_spin_lock_irqsave(&timekeeper_lock, flags); | 778 | raw_spin_lock_irqsave(&timekeeper_lock, flags); |
@@ -723,13 +781,12 @@ int do_settimeofday(const struct timespec *tv) | |||
723 | timekeeping_forward_now(tk); | 781 | timekeeping_forward_now(tk); |
724 | 782 | ||
725 | xt = tk_xtime(tk); | 783 | xt = tk_xtime(tk); |
726 | ts_delta.tv_sec = tv->tv_sec - xt.tv_sec; | 784 | ts_delta.tv_sec = ts->tv_sec - xt.tv_sec; |
727 | ts_delta.tv_nsec = tv->tv_nsec - xt.tv_nsec; | 785 | ts_delta.tv_nsec = ts->tv_nsec - xt.tv_nsec; |
728 | 786 | ||
729 | tk_set_wall_to_mono(tk, timespec64_sub(tk->wall_to_monotonic, ts_delta)); | 787 | tk_set_wall_to_mono(tk, timespec64_sub(tk->wall_to_monotonic, ts_delta)); |
730 | 788 | ||
731 | tmp = timespec_to_timespec64(*tv); | 789 | tk_set_xtime(tk, ts); |
732 | tk_set_xtime(tk, &tmp); | ||
733 | 790 | ||
734 | timekeeping_update(tk, TK_CLEAR_NTP | TK_MIRROR | TK_CLOCK_WAS_SET); | 791 | timekeeping_update(tk, TK_CLEAR_NTP | TK_MIRROR | TK_CLOCK_WAS_SET); |
735 | 792 | ||
@@ -741,7 +798,7 @@ int do_settimeofday(const struct timespec *tv) | |||
741 | 798 | ||
742 | return 0; | 799 | return 0; |
743 | } | 800 | } |
744 | EXPORT_SYMBOL(do_settimeofday); | 801 | EXPORT_SYMBOL(do_settimeofday64); |
745 | 802 | ||
746 | /** | 803 | /** |
747 | * timekeeping_inject_offset - Adds or subtracts from the current time. | 804 | * timekeeping_inject_offset - Adds or subtracts from the current time. |
@@ -895,12 +952,12 @@ int timekeeping_notify(struct clocksource *clock) | |||
895 | } | 952 | } |
896 | 953 | ||
897 | /** | 954 | /** |
898 | * getrawmonotonic - Returns the raw monotonic time in a timespec | 955 | * getrawmonotonic64 - Returns the raw monotonic time in a timespec |
899 | * @ts: pointer to the timespec to be set | 956 | * @ts: pointer to the timespec64 to be set |
900 | * | 957 | * |
901 | * Returns the raw monotonic time (completely un-modified by ntp) | 958 | * Returns the raw monotonic time (completely un-modified by ntp) |
902 | */ | 959 | */ |
903 | void getrawmonotonic(struct timespec *ts) | 960 | void getrawmonotonic64(struct timespec64 *ts) |
904 | { | 961 | { |
905 | struct timekeeper *tk = &tk_core.timekeeper; | 962 | struct timekeeper *tk = &tk_core.timekeeper; |
906 | struct timespec64 ts64; | 963 | struct timespec64 ts64; |
@@ -915,9 +972,10 @@ void getrawmonotonic(struct timespec *ts) | |||
915 | } while (read_seqcount_retry(&tk_core.seq, seq)); | 972 | } while (read_seqcount_retry(&tk_core.seq, seq)); |
916 | 973 | ||
917 | timespec64_add_ns(&ts64, nsecs); | 974 | timespec64_add_ns(&ts64, nsecs); |
918 | *ts = timespec64_to_timespec(ts64); | 975 | *ts = ts64; |
919 | } | 976 | } |
920 | EXPORT_SYMBOL(getrawmonotonic); | 977 | EXPORT_SYMBOL(getrawmonotonic64); |
978 | |||
921 | 979 | ||
922 | /** | 980 | /** |
923 | * timekeeping_valid_for_hres - Check if timekeeping is suitable for hres | 981 | * timekeeping_valid_for_hres - Check if timekeeping is suitable for hres |
@@ -1068,8 +1126,8 @@ static void __timekeeping_inject_sleeptime(struct timekeeper *tk, | |||
1068 | } | 1126 | } |
1069 | 1127 | ||
1070 | /** | 1128 | /** |
1071 | * timekeeping_inject_sleeptime - Adds suspend interval to timeekeeping values | 1129 | * timekeeping_inject_sleeptime64 - Adds suspend interval to timeekeeping values |
1072 | * @delta: pointer to a timespec delta value | 1130 | * @delta: pointer to a timespec64 delta value |
1073 | * | 1131 | * |
1074 | * This hook is for architectures that cannot support read_persistent_clock | 1132 | * This hook is for architectures that cannot support read_persistent_clock |
1075 | * because their RTC/persistent clock is only accessible when irqs are enabled. | 1133 | * because their RTC/persistent clock is only accessible when irqs are enabled. |
@@ -1077,10 +1135,9 @@ static void __timekeeping_inject_sleeptime(struct timekeeper *tk, | |||
1077 | * This function should only be called by rtc_resume(), and allows | 1135 | * This function should only be called by rtc_resume(), and allows |
1078 | * a suspend offset to be injected into the timekeeping values. | 1136 | * a suspend offset to be injected into the timekeeping values. |
1079 | */ | 1137 | */ |
1080 | void timekeeping_inject_sleeptime(struct timespec *delta) | 1138 | void timekeeping_inject_sleeptime64(struct timespec64 *delta) |
1081 | { | 1139 | { |
1082 | struct timekeeper *tk = &tk_core.timekeeper; | 1140 | struct timekeeper *tk = &tk_core.timekeeper; |
1083 | struct timespec64 tmp; | ||
1084 | unsigned long flags; | 1141 | unsigned long flags; |
1085 | 1142 | ||
1086 | /* | 1143 | /* |
@@ -1095,8 +1152,7 @@ void timekeeping_inject_sleeptime(struct timespec *delta) | |||
1095 | 1152 | ||
1096 | timekeeping_forward_now(tk); | 1153 | timekeeping_forward_now(tk); |
1097 | 1154 | ||
1098 | tmp = timespec_to_timespec64(*delta); | 1155 | __timekeeping_inject_sleeptime(tk, delta); |
1099 | __timekeeping_inject_sleeptime(tk, &tmp); | ||
1100 | 1156 | ||
1101 | timekeeping_update(tk, TK_CLEAR_NTP | TK_MIRROR | TK_CLOCK_WAS_SET); | 1157 | timekeeping_update(tk, TK_CLEAR_NTP | TK_MIRROR | TK_CLOCK_WAS_SET); |
1102 | 1158 | ||
@@ -1332,6 +1388,12 @@ static __always_inline void timekeeping_apply_adjustment(struct timekeeper *tk, | |||
1332 | * | 1388 | * |
1333 | * XXX - TODO: Doc ntp_error calculation. | 1389 | * XXX - TODO: Doc ntp_error calculation. |
1334 | */ | 1390 | */ |
1391 | if ((mult_adj > 0) && (tk->tkr.mult + mult_adj < mult_adj)) { | ||
1392 | /* NTP adjustment caused clocksource mult overflow */ | ||
1393 | WARN_ON_ONCE(1); | ||
1394 | return; | ||
1395 | } | ||
1396 | |||
1335 | tk->tkr.mult += mult_adj; | 1397 | tk->tkr.mult += mult_adj; |
1336 | tk->xtime_interval += interval; | 1398 | tk->xtime_interval += interval; |
1337 | tk->tkr.xtime_nsec -= offset; | 1399 | tk->tkr.xtime_nsec -= offset; |
@@ -1397,7 +1459,8 @@ static void timekeeping_adjust(struct timekeeper *tk, s64 offset) | |||
1397 | } | 1459 | } |
1398 | 1460 | ||
1399 | if (unlikely(tk->tkr.clock->maxadj && | 1461 | if (unlikely(tk->tkr.clock->maxadj && |
1400 | (tk->tkr.mult > tk->tkr.clock->mult + tk->tkr.clock->maxadj))) { | 1462 | (abs(tk->tkr.mult - tk->tkr.clock->mult) |
1463 | > tk->tkr.clock->maxadj))) { | ||
1401 | printk_once(KERN_WARNING | 1464 | printk_once(KERN_WARNING |
1402 | "Adjusting %s more than 11%% (%ld vs %ld)\n", | 1465 | "Adjusting %s more than 11%% (%ld vs %ld)\n", |
1403 | tk->tkr.clock->name, (long)tk->tkr.mult, | 1466 | tk->tkr.clock->name, (long)tk->tkr.mult, |
@@ -1646,7 +1709,7 @@ struct timespec current_kernel_time(void) | |||
1646 | } | 1709 | } |
1647 | EXPORT_SYMBOL(current_kernel_time); | 1710 | EXPORT_SYMBOL(current_kernel_time); |
1648 | 1711 | ||
1649 | struct timespec get_monotonic_coarse(void) | 1712 | struct timespec64 get_monotonic_coarse64(void) |
1650 | { | 1713 | { |
1651 | struct timekeeper *tk = &tk_core.timekeeper; | 1714 | struct timekeeper *tk = &tk_core.timekeeper; |
1652 | struct timespec64 now, mono; | 1715 | struct timespec64 now, mono; |
@@ -1662,7 +1725,7 @@ struct timespec get_monotonic_coarse(void) | |||
1662 | set_normalized_timespec64(&now, now.tv_sec + mono.tv_sec, | 1725 | set_normalized_timespec64(&now, now.tv_sec + mono.tv_sec, |
1663 | now.tv_nsec + mono.tv_nsec); | 1726 | now.tv_nsec + mono.tv_nsec); |
1664 | 1727 | ||
1665 | return timespec64_to_timespec(now); | 1728 | return now; |
1666 | } | 1729 | } |
1667 | 1730 | ||
1668 | /* | 1731 | /* |
diff --git a/kernel/time/timer.c b/kernel/time/timer.c index 3260ffdb368f..2d3f5c504939 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c | |||
@@ -1377,12 +1377,11 @@ unsigned long get_next_timer_interrupt(unsigned long now) | |||
1377 | void update_process_times(int user_tick) | 1377 | void update_process_times(int user_tick) |
1378 | { | 1378 | { |
1379 | struct task_struct *p = current; | 1379 | struct task_struct *p = current; |
1380 | int cpu = smp_processor_id(); | ||
1381 | 1380 | ||
1382 | /* Note: this timer irq context must be accounted for as well. */ | 1381 | /* Note: this timer irq context must be accounted for as well. */ |
1383 | account_process_tick(p, user_tick); | 1382 | account_process_tick(p, user_tick); |
1384 | run_local_timers(); | 1383 | run_local_timers(); |
1385 | rcu_check_callbacks(cpu, user_tick); | 1384 | rcu_check_callbacks(user_tick); |
1386 | #ifdef CONFIG_IRQ_WORK | 1385 | #ifdef CONFIG_IRQ_WORK |
1387 | if (in_irq()) | 1386 | if (in_irq()) |
1388 | irq_work_tick(); | 1387 | irq_work_tick(); |
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index c1bd4ada2a04..11b9cb36092b 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c | |||
@@ -1142,9 +1142,9 @@ static void get_pdu_remap(const struct trace_entry *ent, | |||
1142 | r->sector_from = be64_to_cpu(sector_from); | 1142 | r->sector_from = be64_to_cpu(sector_from); |
1143 | } | 1143 | } |
1144 | 1144 | ||
1145 | typedef int (blk_log_action_t) (struct trace_iterator *iter, const char *act); | 1145 | typedef void (blk_log_action_t) (struct trace_iterator *iter, const char *act); |
1146 | 1146 | ||
1147 | static int blk_log_action_classic(struct trace_iterator *iter, const char *act) | 1147 | static void blk_log_action_classic(struct trace_iterator *iter, const char *act) |
1148 | { | 1148 | { |
1149 | char rwbs[RWBS_LEN]; | 1149 | char rwbs[RWBS_LEN]; |
1150 | unsigned long long ts = iter->ts; | 1150 | unsigned long long ts = iter->ts; |
@@ -1154,33 +1154,33 @@ static int blk_log_action_classic(struct trace_iterator *iter, const char *act) | |||
1154 | 1154 | ||
1155 | fill_rwbs(rwbs, t); | 1155 | fill_rwbs(rwbs, t); |
1156 | 1156 | ||
1157 | return trace_seq_printf(&iter->seq, | 1157 | trace_seq_printf(&iter->seq, |
1158 | "%3d,%-3d %2d %5d.%09lu %5u %2s %3s ", | 1158 | "%3d,%-3d %2d %5d.%09lu %5u %2s %3s ", |
1159 | MAJOR(t->device), MINOR(t->device), iter->cpu, | 1159 | MAJOR(t->device), MINOR(t->device), iter->cpu, |
1160 | secs, nsec_rem, iter->ent->pid, act, rwbs); | 1160 | secs, nsec_rem, iter->ent->pid, act, rwbs); |
1161 | } | 1161 | } |
1162 | 1162 | ||
1163 | static int blk_log_action(struct trace_iterator *iter, const char *act) | 1163 | static void blk_log_action(struct trace_iterator *iter, const char *act) |
1164 | { | 1164 | { |
1165 | char rwbs[RWBS_LEN]; | 1165 | char rwbs[RWBS_LEN]; |
1166 | const struct blk_io_trace *t = te_blk_io_trace(iter->ent); | 1166 | const struct blk_io_trace *t = te_blk_io_trace(iter->ent); |
1167 | 1167 | ||
1168 | fill_rwbs(rwbs, t); | 1168 | fill_rwbs(rwbs, t); |
1169 | return trace_seq_printf(&iter->seq, "%3d,%-3d %2s %3s ", | 1169 | trace_seq_printf(&iter->seq, "%3d,%-3d %2s %3s ", |
1170 | MAJOR(t->device), MINOR(t->device), act, rwbs); | 1170 | MAJOR(t->device), MINOR(t->device), act, rwbs); |
1171 | } | 1171 | } |
1172 | 1172 | ||
1173 | static int blk_log_dump_pdu(struct trace_seq *s, const struct trace_entry *ent) | 1173 | static void blk_log_dump_pdu(struct trace_seq *s, const struct trace_entry *ent) |
1174 | { | 1174 | { |
1175 | const unsigned char *pdu_buf; | 1175 | const unsigned char *pdu_buf; |
1176 | int pdu_len; | 1176 | int pdu_len; |
1177 | int i, end, ret; | 1177 | int i, end; |
1178 | 1178 | ||
1179 | pdu_buf = pdu_start(ent); | 1179 | pdu_buf = pdu_start(ent); |
1180 | pdu_len = te_blk_io_trace(ent)->pdu_len; | 1180 | pdu_len = te_blk_io_trace(ent)->pdu_len; |
1181 | 1181 | ||
1182 | if (!pdu_len) | 1182 | if (!pdu_len) |
1183 | return 1; | 1183 | return; |
1184 | 1184 | ||
1185 | /* find the last zero that needs to be printed */ | 1185 | /* find the last zero that needs to be printed */ |
1186 | for (end = pdu_len - 1; end >= 0; end--) | 1186 | for (end = pdu_len - 1; end >= 0; end--) |
@@ -1188,119 +1188,107 @@ static int blk_log_dump_pdu(struct trace_seq *s, const struct trace_entry *ent) | |||
1188 | break; | 1188 | break; |
1189 | end++; | 1189 | end++; |
1190 | 1190 | ||
1191 | if (!trace_seq_putc(s, '(')) | 1191 | trace_seq_putc(s, '('); |
1192 | return 0; | ||
1193 | 1192 | ||
1194 | for (i = 0; i < pdu_len; i++) { | 1193 | for (i = 0; i < pdu_len; i++) { |
1195 | 1194 | ||
1196 | ret = trace_seq_printf(s, "%s%02x", | 1195 | trace_seq_printf(s, "%s%02x", |
1197 | i == 0 ? "" : " ", pdu_buf[i]); | 1196 | i == 0 ? "" : " ", pdu_buf[i]); |
1198 | if (!ret) | ||
1199 | return ret; | ||
1200 | 1197 | ||
1201 | /* | 1198 | /* |
1202 | * stop when the rest is just zeroes and indicate so | 1199 | * stop when the rest is just zeroes and indicate so |
1203 | * with a ".." appended | 1200 | * with a ".." appended |
1204 | */ | 1201 | */ |
1205 | if (i == end && end != pdu_len - 1) | 1202 | if (i == end && end != pdu_len - 1) { |
1206 | return trace_seq_puts(s, " ..) "); | 1203 | trace_seq_puts(s, " ..) "); |
1204 | return; | ||
1205 | } | ||
1207 | } | 1206 | } |
1208 | 1207 | ||
1209 | return trace_seq_puts(s, ") "); | 1208 | trace_seq_puts(s, ") "); |
1210 | } | 1209 | } |
1211 | 1210 | ||
1212 | static int blk_log_generic(struct trace_seq *s, const struct trace_entry *ent) | 1211 | static void blk_log_generic(struct trace_seq *s, const struct trace_entry *ent) |
1213 | { | 1212 | { |
1214 | char cmd[TASK_COMM_LEN]; | 1213 | char cmd[TASK_COMM_LEN]; |
1215 | 1214 | ||
1216 | trace_find_cmdline(ent->pid, cmd); | 1215 | trace_find_cmdline(ent->pid, cmd); |
1217 | 1216 | ||
1218 | if (t_action(ent) & BLK_TC_ACT(BLK_TC_PC)) { | 1217 | if (t_action(ent) & BLK_TC_ACT(BLK_TC_PC)) { |
1219 | int ret; | 1218 | trace_seq_printf(s, "%u ", t_bytes(ent)); |
1220 | 1219 | blk_log_dump_pdu(s, ent); | |
1221 | ret = trace_seq_printf(s, "%u ", t_bytes(ent)); | 1220 | trace_seq_printf(s, "[%s]\n", cmd); |
1222 | if (!ret) | ||
1223 | return 0; | ||
1224 | ret = blk_log_dump_pdu(s, ent); | ||
1225 | if (!ret) | ||
1226 | return 0; | ||
1227 | return trace_seq_printf(s, "[%s]\n", cmd); | ||
1228 | } else { | 1221 | } else { |
1229 | if (t_sec(ent)) | 1222 | if (t_sec(ent)) |
1230 | return trace_seq_printf(s, "%llu + %u [%s]\n", | 1223 | trace_seq_printf(s, "%llu + %u [%s]\n", |
1231 | t_sector(ent), t_sec(ent), cmd); | 1224 | t_sector(ent), t_sec(ent), cmd); |
1232 | return trace_seq_printf(s, "[%s]\n", cmd); | 1225 | else |
1226 | trace_seq_printf(s, "[%s]\n", cmd); | ||
1233 | } | 1227 | } |
1234 | } | 1228 | } |
1235 | 1229 | ||
1236 | static int blk_log_with_error(struct trace_seq *s, | 1230 | static void blk_log_with_error(struct trace_seq *s, |
1237 | const struct trace_entry *ent) | 1231 | const struct trace_entry *ent) |
1238 | { | 1232 | { |
1239 | if (t_action(ent) & BLK_TC_ACT(BLK_TC_PC)) { | 1233 | if (t_action(ent) & BLK_TC_ACT(BLK_TC_PC)) { |
1240 | int ret; | 1234 | blk_log_dump_pdu(s, ent); |
1241 | 1235 | trace_seq_printf(s, "[%d]\n", t_error(ent)); | |
1242 | ret = blk_log_dump_pdu(s, ent); | ||
1243 | if (ret) | ||
1244 | return trace_seq_printf(s, "[%d]\n", t_error(ent)); | ||
1245 | return 0; | ||
1246 | } else { | 1236 | } else { |
1247 | if (t_sec(ent)) | 1237 | if (t_sec(ent)) |
1248 | return trace_seq_printf(s, "%llu + %u [%d]\n", | 1238 | trace_seq_printf(s, "%llu + %u [%d]\n", |
1249 | t_sector(ent), | 1239 | t_sector(ent), |
1250 | t_sec(ent), t_error(ent)); | 1240 | t_sec(ent), t_error(ent)); |
1251 | return trace_seq_printf(s, "%llu [%d]\n", | 1241 | else |
1252 | t_sector(ent), t_error(ent)); | 1242 | trace_seq_printf(s, "%llu [%d]\n", |
1243 | t_sector(ent), t_error(ent)); | ||
1253 | } | 1244 | } |
1254 | } | 1245 | } |
1255 | 1246 | ||
1256 | static int blk_log_remap(struct trace_seq *s, const struct trace_entry *ent) | 1247 | static void blk_log_remap(struct trace_seq *s, const struct trace_entry *ent) |
1257 | { | 1248 | { |
1258 | struct blk_io_trace_remap r = { .device_from = 0, }; | 1249 | struct blk_io_trace_remap r = { .device_from = 0, }; |
1259 | 1250 | ||
1260 | get_pdu_remap(ent, &r); | 1251 | get_pdu_remap(ent, &r); |
1261 | return trace_seq_printf(s, "%llu + %u <- (%d,%d) %llu\n", | 1252 | trace_seq_printf(s, "%llu + %u <- (%d,%d) %llu\n", |
1262 | t_sector(ent), t_sec(ent), | 1253 | t_sector(ent), t_sec(ent), |
1263 | MAJOR(r.device_from), MINOR(r.device_from), | 1254 | MAJOR(r.device_from), MINOR(r.device_from), |
1264 | (unsigned long long)r.sector_from); | 1255 | (unsigned long long)r.sector_from); |
1265 | } | 1256 | } |
1266 | 1257 | ||
1267 | static int blk_log_plug(struct trace_seq *s, const struct trace_entry *ent) | 1258 | static void blk_log_plug(struct trace_seq *s, const struct trace_entry *ent) |
1268 | { | 1259 | { |
1269 | char cmd[TASK_COMM_LEN]; | 1260 | char cmd[TASK_COMM_LEN]; |
1270 | 1261 | ||
1271 | trace_find_cmdline(ent->pid, cmd); | 1262 | trace_find_cmdline(ent->pid, cmd); |
1272 | 1263 | ||
1273 | return trace_seq_printf(s, "[%s]\n", cmd); | 1264 | trace_seq_printf(s, "[%s]\n", cmd); |
1274 | } | 1265 | } |
1275 | 1266 | ||
1276 | static int blk_log_unplug(struct trace_seq *s, const struct trace_entry *ent) | 1267 | static void blk_log_unplug(struct trace_seq *s, const struct trace_entry *ent) |
1277 | { | 1268 | { |
1278 | char cmd[TASK_COMM_LEN]; | 1269 | char cmd[TASK_COMM_LEN]; |
1279 | 1270 | ||
1280 | trace_find_cmdline(ent->pid, cmd); | 1271 | trace_find_cmdline(ent->pid, cmd); |
1281 | 1272 | ||
1282 | return trace_seq_printf(s, "[%s] %llu\n", cmd, get_pdu_int(ent)); | 1273 | trace_seq_printf(s, "[%s] %llu\n", cmd, get_pdu_int(ent)); |
1283 | } | 1274 | } |
1284 | 1275 | ||
1285 | static int blk_log_split(struct trace_seq *s, const struct trace_entry *ent) | 1276 | static void blk_log_split(struct trace_seq *s, const struct trace_entry *ent) |
1286 | { | 1277 | { |
1287 | char cmd[TASK_COMM_LEN]; | 1278 | char cmd[TASK_COMM_LEN]; |
1288 | 1279 | ||
1289 | trace_find_cmdline(ent->pid, cmd); | 1280 | trace_find_cmdline(ent->pid, cmd); |
1290 | 1281 | ||
1291 | return trace_seq_printf(s, "%llu / %llu [%s]\n", t_sector(ent), | 1282 | trace_seq_printf(s, "%llu / %llu [%s]\n", t_sector(ent), |
1292 | get_pdu_int(ent), cmd); | 1283 | get_pdu_int(ent), cmd); |
1293 | } | 1284 | } |
1294 | 1285 | ||
1295 | static int blk_log_msg(struct trace_seq *s, const struct trace_entry *ent) | 1286 | static void blk_log_msg(struct trace_seq *s, const struct trace_entry *ent) |
1296 | { | 1287 | { |
1297 | int ret; | ||
1298 | const struct blk_io_trace *t = te_blk_io_trace(ent); | 1288 | const struct blk_io_trace *t = te_blk_io_trace(ent); |
1299 | 1289 | ||
1300 | ret = trace_seq_putmem(s, t + 1, t->pdu_len); | 1290 | trace_seq_putmem(s, t + 1, t->pdu_len); |
1301 | if (ret) | 1291 | trace_seq_putc(s, '\n'); |
1302 | return trace_seq_putc(s, '\n'); | ||
1303 | return ret; | ||
1304 | } | 1292 | } |
1305 | 1293 | ||
1306 | /* | 1294 | /* |
@@ -1339,7 +1327,7 @@ static void blk_tracer_reset(struct trace_array *tr) | |||
1339 | 1327 | ||
1340 | static const struct { | 1328 | static const struct { |
1341 | const char *act[2]; | 1329 | const char *act[2]; |
1342 | int (*print)(struct trace_seq *s, const struct trace_entry *ent); | 1330 | void (*print)(struct trace_seq *s, const struct trace_entry *ent); |
1343 | } what2act[] = { | 1331 | } what2act[] = { |
1344 | [__BLK_TA_QUEUE] = {{ "Q", "queue" }, blk_log_generic }, | 1332 | [__BLK_TA_QUEUE] = {{ "Q", "queue" }, blk_log_generic }, |
1345 | [__BLK_TA_BACKMERGE] = {{ "M", "backmerge" }, blk_log_generic }, | 1333 | [__BLK_TA_BACKMERGE] = {{ "M", "backmerge" }, blk_log_generic }, |
@@ -1364,7 +1352,6 @@ static enum print_line_t print_one_line(struct trace_iterator *iter, | |||
1364 | struct trace_seq *s = &iter->seq; | 1352 | struct trace_seq *s = &iter->seq; |
1365 | const struct blk_io_trace *t; | 1353 | const struct blk_io_trace *t; |
1366 | u16 what; | 1354 | u16 what; |
1367 | int ret; | ||
1368 | bool long_act; | 1355 | bool long_act; |
1369 | blk_log_action_t *log_action; | 1356 | blk_log_action_t *log_action; |
1370 | 1357 | ||
@@ -1374,21 +1361,18 @@ static enum print_line_t print_one_line(struct trace_iterator *iter, | |||
1374 | log_action = classic ? &blk_log_action_classic : &blk_log_action; | 1361 | log_action = classic ? &blk_log_action_classic : &blk_log_action; |
1375 | 1362 | ||
1376 | if (t->action == BLK_TN_MESSAGE) { | 1363 | if (t->action == BLK_TN_MESSAGE) { |
1377 | ret = log_action(iter, long_act ? "message" : "m"); | 1364 | log_action(iter, long_act ? "message" : "m"); |
1378 | if (ret) | 1365 | blk_log_msg(s, iter->ent); |
1379 | ret = blk_log_msg(s, iter->ent); | ||
1380 | goto out; | ||
1381 | } | 1366 | } |
1382 | 1367 | ||
1383 | if (unlikely(what == 0 || what >= ARRAY_SIZE(what2act))) | 1368 | if (unlikely(what == 0 || what >= ARRAY_SIZE(what2act))) |
1384 | ret = trace_seq_printf(s, "Unknown action %x\n", what); | 1369 | trace_seq_printf(s, "Unknown action %x\n", what); |
1385 | else { | 1370 | else { |
1386 | ret = log_action(iter, what2act[what].act[long_act]); | 1371 | log_action(iter, what2act[what].act[long_act]); |
1387 | if (ret) | 1372 | what2act[what].print(s, iter->ent); |
1388 | ret = what2act[what].print(s, iter->ent); | ||
1389 | } | 1373 | } |
1390 | out: | 1374 | |
1391 | return ret ? TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE; | 1375 | return trace_handle_return(s); |
1392 | } | 1376 | } |
1393 | 1377 | ||
1394 | static enum print_line_t blk_trace_event_print(struct trace_iterator *iter, | 1378 | static enum print_line_t blk_trace_event_print(struct trace_iterator *iter, |
@@ -1397,7 +1381,7 @@ static enum print_line_t blk_trace_event_print(struct trace_iterator *iter, | |||
1397 | return print_one_line(iter, false); | 1381 | return print_one_line(iter, false); |
1398 | } | 1382 | } |
1399 | 1383 | ||
1400 | static int blk_trace_synthesize_old_trace(struct trace_iterator *iter) | 1384 | static void blk_trace_synthesize_old_trace(struct trace_iterator *iter) |
1401 | { | 1385 | { |
1402 | struct trace_seq *s = &iter->seq; | 1386 | struct trace_seq *s = &iter->seq; |
1403 | struct blk_io_trace *t = (struct blk_io_trace *)iter->ent; | 1387 | struct blk_io_trace *t = (struct blk_io_trace *)iter->ent; |
@@ -1407,18 +1391,18 @@ static int blk_trace_synthesize_old_trace(struct trace_iterator *iter) | |||
1407 | .time = iter->ts, | 1391 | .time = iter->ts, |
1408 | }; | 1392 | }; |
1409 | 1393 | ||
1410 | if (!trace_seq_putmem(s, &old, offset)) | 1394 | trace_seq_putmem(s, &old, offset); |
1411 | return 0; | 1395 | trace_seq_putmem(s, &t->sector, |
1412 | return trace_seq_putmem(s, &t->sector, | 1396 | sizeof(old) - offset + t->pdu_len); |
1413 | sizeof(old) - offset + t->pdu_len); | ||
1414 | } | 1397 | } |
1415 | 1398 | ||
1416 | static enum print_line_t | 1399 | static enum print_line_t |
1417 | blk_trace_event_print_binary(struct trace_iterator *iter, int flags, | 1400 | blk_trace_event_print_binary(struct trace_iterator *iter, int flags, |
1418 | struct trace_event *event) | 1401 | struct trace_event *event) |
1419 | { | 1402 | { |
1420 | return blk_trace_synthesize_old_trace(iter) ? | 1403 | blk_trace_synthesize_old_trace(iter); |
1421 | TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE; | 1404 | |
1405 | return trace_handle_return(&iter->seq); | ||
1422 | } | 1406 | } |
1423 | 1407 | ||
1424 | static enum print_line_t blk_tracer_print_line(struct trace_iterator *iter) | 1408 | static enum print_line_t blk_tracer_print_line(struct trace_iterator *iter) |
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index fb186b9ddf51..929a733d302e 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c | |||
@@ -387,6 +387,8 @@ static int remove_ftrace_list_ops(struct ftrace_ops **list, | |||
387 | return ret; | 387 | return ret; |
388 | } | 388 | } |
389 | 389 | ||
390 | static void ftrace_update_trampoline(struct ftrace_ops *ops); | ||
391 | |||
390 | static int __register_ftrace_function(struct ftrace_ops *ops) | 392 | static int __register_ftrace_function(struct ftrace_ops *ops) |
391 | { | 393 | { |
392 | if (ops->flags & FTRACE_OPS_FL_DELETED) | 394 | if (ops->flags & FTRACE_OPS_FL_DELETED) |
@@ -416,9 +418,13 @@ static int __register_ftrace_function(struct ftrace_ops *ops) | |||
416 | if (control_ops_alloc(ops)) | 418 | if (control_ops_alloc(ops)) |
417 | return -ENOMEM; | 419 | return -ENOMEM; |
418 | add_ftrace_list_ops(&ftrace_control_list, &control_ops, ops); | 420 | add_ftrace_list_ops(&ftrace_control_list, &control_ops, ops); |
421 | /* The control_ops needs the trampoline update */ | ||
422 | ops = &control_ops; | ||
419 | } else | 423 | } else |
420 | add_ftrace_ops(&ftrace_ops_list, ops); | 424 | add_ftrace_ops(&ftrace_ops_list, ops); |
421 | 425 | ||
426 | ftrace_update_trampoline(ops); | ||
427 | |||
422 | if (ftrace_enabled) | 428 | if (ftrace_enabled) |
423 | update_ftrace_function(); | 429 | update_ftrace_function(); |
424 | 430 | ||
@@ -565,13 +571,13 @@ static int function_stat_cmp(void *p1, void *p2) | |||
565 | static int function_stat_headers(struct seq_file *m) | 571 | static int function_stat_headers(struct seq_file *m) |
566 | { | 572 | { |
567 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER | 573 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER |
568 | seq_printf(m, " Function " | 574 | seq_puts(m, " Function " |
569 | "Hit Time Avg s^2\n" | 575 | "Hit Time Avg s^2\n" |
570 | " -------- " | 576 | " -------- " |
571 | "--- ---- --- ---\n"); | 577 | "--- ---- --- ---\n"); |
572 | #else | 578 | #else |
573 | seq_printf(m, " Function Hit\n" | 579 | seq_puts(m, " Function Hit\n" |
574 | " -------- ---\n"); | 580 | " -------- ---\n"); |
575 | #endif | 581 | #endif |
576 | return 0; | 582 | return 0; |
577 | } | 583 | } |
@@ -598,7 +604,7 @@ static int function_stat_show(struct seq_file *m, void *v) | |||
598 | seq_printf(m, " %-30.30s %10lu", str, rec->counter); | 604 | seq_printf(m, " %-30.30s %10lu", str, rec->counter); |
599 | 605 | ||
600 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER | 606 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER |
601 | seq_printf(m, " "); | 607 | seq_puts(m, " "); |
602 | avg = rec->time; | 608 | avg = rec->time; |
603 | do_div(avg, rec->counter); | 609 | do_div(avg, rec->counter); |
604 | 610 | ||
@@ -1111,6 +1117,43 @@ static struct ftrace_ops global_ops = { | |||
1111 | FTRACE_OPS_FL_INITIALIZED, | 1117 | FTRACE_OPS_FL_INITIALIZED, |
1112 | }; | 1118 | }; |
1113 | 1119 | ||
1120 | /* | ||
1121 | * This is used by __kernel_text_address() to return true if the | ||
1122 | * address is on a dynamically allocated trampoline that would | ||
1123 | * not return true for either core_kernel_text() or | ||
1124 | * is_module_text_address(). | ||
1125 | */ | ||
1126 | bool is_ftrace_trampoline(unsigned long addr) | ||
1127 | { | ||
1128 | struct ftrace_ops *op; | ||
1129 | bool ret = false; | ||
1130 | |||
1131 | /* | ||
1132 | * Some of the ops may be dynamically allocated, | ||
1133 | * they are freed after a synchronize_sched(). | ||
1134 | */ | ||
1135 | preempt_disable_notrace(); | ||
1136 | |||
1137 | do_for_each_ftrace_op(op, ftrace_ops_list) { | ||
1138 | /* | ||
1139 | * This is to check for dynamically allocated trampolines. | ||
1140 | * Trampolines that are in kernel text will have | ||
1141 | * core_kernel_text() return true. | ||
1142 | */ | ||
1143 | if (op->trampoline && op->trampoline_size) | ||
1144 | if (addr >= op->trampoline && | ||
1145 | addr < op->trampoline + op->trampoline_size) { | ||
1146 | ret = true; | ||
1147 | goto out; | ||
1148 | } | ||
1149 | } while_for_each_ftrace_op(op); | ||
1150 | |||
1151 | out: | ||
1152 | preempt_enable_notrace(); | ||
1153 | |||
1154 | return ret; | ||
1155 | } | ||
1156 | |||
1114 | struct ftrace_page { | 1157 | struct ftrace_page { |
1115 | struct ftrace_page *next; | 1158 | struct ftrace_page *next; |
1116 | struct dyn_ftrace *records; | 1159 | struct dyn_ftrace *records; |
@@ -1315,6 +1358,9 @@ ftrace_hash_rec_disable_modify(struct ftrace_ops *ops, int filter_hash); | |||
1315 | static void | 1358 | static void |
1316 | ftrace_hash_rec_enable_modify(struct ftrace_ops *ops, int filter_hash); | 1359 | ftrace_hash_rec_enable_modify(struct ftrace_ops *ops, int filter_hash); |
1317 | 1360 | ||
1361 | static int ftrace_hash_ipmodify_update(struct ftrace_ops *ops, | ||
1362 | struct ftrace_hash *new_hash); | ||
1363 | |||
1318 | static int | 1364 | static int |
1319 | ftrace_hash_move(struct ftrace_ops *ops, int enable, | 1365 | ftrace_hash_move(struct ftrace_ops *ops, int enable, |
1320 | struct ftrace_hash **dst, struct ftrace_hash *src) | 1366 | struct ftrace_hash **dst, struct ftrace_hash *src) |
@@ -1325,8 +1371,13 @@ ftrace_hash_move(struct ftrace_ops *ops, int enable, | |||
1325 | struct ftrace_hash *new_hash; | 1371 | struct ftrace_hash *new_hash; |
1326 | int size = src->count; | 1372 | int size = src->count; |
1327 | int bits = 0; | 1373 | int bits = 0; |
1374 | int ret; | ||
1328 | int i; | 1375 | int i; |
1329 | 1376 | ||
1377 | /* Reject setting notrace hash on IPMODIFY ftrace_ops */ | ||
1378 | if (ops->flags & FTRACE_OPS_FL_IPMODIFY && !enable) | ||
1379 | return -EINVAL; | ||
1380 | |||
1330 | /* | 1381 | /* |
1331 | * If the new source is empty, just free dst and assign it | 1382 | * If the new source is empty, just free dst and assign it |
1332 | * the empty_hash. | 1383 | * the empty_hash. |
@@ -1360,6 +1411,16 @@ ftrace_hash_move(struct ftrace_ops *ops, int enable, | |||
1360 | } | 1411 | } |
1361 | 1412 | ||
1362 | update: | 1413 | update: |
1414 | /* Make sure this can be applied if it is IPMODIFY ftrace_ops */ | ||
1415 | if (enable) { | ||
1416 | /* IPMODIFY should be updated only when filter_hash updating */ | ||
1417 | ret = ftrace_hash_ipmodify_update(ops, new_hash); | ||
1418 | if (ret < 0) { | ||
1419 | free_ftrace_hash(new_hash); | ||
1420 | return ret; | ||
1421 | } | ||
1422 | } | ||
1423 | |||
1363 | /* | 1424 | /* |
1364 | * Remove the current set, update the hash and add | 1425 | * Remove the current set, update the hash and add |
1365 | * them back. | 1426 | * them back. |
@@ -1724,6 +1785,114 @@ static void ftrace_hash_rec_enable_modify(struct ftrace_ops *ops, | |||
1724 | ftrace_hash_rec_update_modify(ops, filter_hash, 1); | 1785 | ftrace_hash_rec_update_modify(ops, filter_hash, 1); |
1725 | } | 1786 | } |
1726 | 1787 | ||
1788 | /* | ||
1789 | * Try to update IPMODIFY flag on each ftrace_rec. Return 0 if it is OK | ||
1790 | * or no-needed to update, -EBUSY if it detects a conflict of the flag | ||
1791 | * on a ftrace_rec, and -EINVAL if the new_hash tries to trace all recs. | ||
1792 | * Note that old_hash and new_hash has below meanings | ||
1793 | * - If the hash is NULL, it hits all recs (if IPMODIFY is set, this is rejected) | ||
1794 | * - If the hash is EMPTY_HASH, it hits nothing | ||
1795 | * - Anything else hits the recs which match the hash entries. | ||
1796 | */ | ||
1797 | static int __ftrace_hash_update_ipmodify(struct ftrace_ops *ops, | ||
1798 | struct ftrace_hash *old_hash, | ||
1799 | struct ftrace_hash *new_hash) | ||
1800 | { | ||
1801 | struct ftrace_page *pg; | ||
1802 | struct dyn_ftrace *rec, *end = NULL; | ||
1803 | int in_old, in_new; | ||
1804 | |||
1805 | /* Only update if the ops has been registered */ | ||
1806 | if (!(ops->flags & FTRACE_OPS_FL_ENABLED)) | ||
1807 | return 0; | ||
1808 | |||
1809 | if (!(ops->flags & FTRACE_OPS_FL_IPMODIFY)) | ||
1810 | return 0; | ||
1811 | |||
1812 | /* | ||
1813 | * Since the IPMODIFY is a very address sensitive action, we do not | ||
1814 | * allow ftrace_ops to set all functions to new hash. | ||
1815 | */ | ||
1816 | if (!new_hash || !old_hash) | ||
1817 | return -EINVAL; | ||
1818 | |||
1819 | /* Update rec->flags */ | ||
1820 | do_for_each_ftrace_rec(pg, rec) { | ||
1821 | /* We need to update only differences of filter_hash */ | ||
1822 | in_old = !!ftrace_lookup_ip(old_hash, rec->ip); | ||
1823 | in_new = !!ftrace_lookup_ip(new_hash, rec->ip); | ||
1824 | if (in_old == in_new) | ||
1825 | continue; | ||
1826 | |||
1827 | if (in_new) { | ||
1828 | /* New entries must ensure no others are using it */ | ||
1829 | if (rec->flags & FTRACE_FL_IPMODIFY) | ||
1830 | goto rollback; | ||
1831 | rec->flags |= FTRACE_FL_IPMODIFY; | ||
1832 | } else /* Removed entry */ | ||
1833 | rec->flags &= ~FTRACE_FL_IPMODIFY; | ||
1834 | } while_for_each_ftrace_rec(); | ||
1835 | |||
1836 | return 0; | ||
1837 | |||
1838 | rollback: | ||
1839 | end = rec; | ||
1840 | |||
1841 | /* Roll back what we did above */ | ||
1842 | do_for_each_ftrace_rec(pg, rec) { | ||
1843 | if (rec == end) | ||
1844 | goto err_out; | ||
1845 | |||
1846 | in_old = !!ftrace_lookup_ip(old_hash, rec->ip); | ||
1847 | in_new = !!ftrace_lookup_ip(new_hash, rec->ip); | ||
1848 | if (in_old == in_new) | ||
1849 | continue; | ||
1850 | |||
1851 | if (in_new) | ||
1852 | rec->flags &= ~FTRACE_FL_IPMODIFY; | ||
1853 | else | ||
1854 | rec->flags |= FTRACE_FL_IPMODIFY; | ||
1855 | } while_for_each_ftrace_rec(); | ||
1856 | |||
1857 | err_out: | ||
1858 | return -EBUSY; | ||
1859 | } | ||
1860 | |||
1861 | static int ftrace_hash_ipmodify_enable(struct ftrace_ops *ops) | ||
1862 | { | ||
1863 | struct ftrace_hash *hash = ops->func_hash->filter_hash; | ||
1864 | |||
1865 | if (ftrace_hash_empty(hash)) | ||
1866 | hash = NULL; | ||
1867 | |||
1868 | return __ftrace_hash_update_ipmodify(ops, EMPTY_HASH, hash); | ||
1869 | } | ||
1870 | |||
1871 | /* Disabling always succeeds */ | ||
1872 | static void ftrace_hash_ipmodify_disable(struct ftrace_ops *ops) | ||
1873 | { | ||
1874 | struct ftrace_hash *hash = ops->func_hash->filter_hash; | ||
1875 | |||
1876 | if (ftrace_hash_empty(hash)) | ||
1877 | hash = NULL; | ||
1878 | |||
1879 | __ftrace_hash_update_ipmodify(ops, hash, EMPTY_HASH); | ||
1880 | } | ||
1881 | |||
1882 | static int ftrace_hash_ipmodify_update(struct ftrace_ops *ops, | ||
1883 | struct ftrace_hash *new_hash) | ||
1884 | { | ||
1885 | struct ftrace_hash *old_hash = ops->func_hash->filter_hash; | ||
1886 | |||
1887 | if (ftrace_hash_empty(old_hash)) | ||
1888 | old_hash = NULL; | ||
1889 | |||
1890 | if (ftrace_hash_empty(new_hash)) | ||
1891 | new_hash = NULL; | ||
1892 | |||
1893 | return __ftrace_hash_update_ipmodify(ops, old_hash, new_hash); | ||
1894 | } | ||
1895 | |||
1727 | static void print_ip_ins(const char *fmt, unsigned char *p) | 1896 | static void print_ip_ins(const char *fmt, unsigned char *p) |
1728 | { | 1897 | { |
1729 | int i; | 1898 | int i; |
@@ -1734,10 +1903,13 @@ static void print_ip_ins(const char *fmt, unsigned char *p) | |||
1734 | printk(KERN_CONT "%s%02x", i ? ":" : "", p[i]); | 1903 | printk(KERN_CONT "%s%02x", i ? ":" : "", p[i]); |
1735 | } | 1904 | } |
1736 | 1905 | ||
1906 | static struct ftrace_ops * | ||
1907 | ftrace_find_tramp_ops_any(struct dyn_ftrace *rec); | ||
1908 | |||
1737 | /** | 1909 | /** |
1738 | * ftrace_bug - report and shutdown function tracer | 1910 | * ftrace_bug - report and shutdown function tracer |
1739 | * @failed: The failed type (EFAULT, EINVAL, EPERM) | 1911 | * @failed: The failed type (EFAULT, EINVAL, EPERM) |
1740 | * @ip: The address that failed | 1912 | * @rec: The record that failed |
1741 | * | 1913 | * |
1742 | * The arch code that enables or disables the function tracing | 1914 | * The arch code that enables or disables the function tracing |
1743 | * can call ftrace_bug() when it has detected a problem in | 1915 | * can call ftrace_bug() when it has detected a problem in |
@@ -1746,8 +1918,10 @@ static void print_ip_ins(const char *fmt, unsigned char *p) | |||
1746 | * EINVAL - if what is read at @ip is not what was expected | 1918 | * EINVAL - if what is read at @ip is not what was expected |
1747 | * EPERM - if the problem happens on writting to the @ip address | 1919 | * EPERM - if the problem happens on writting to the @ip address |
1748 | */ | 1920 | */ |
1749 | void ftrace_bug(int failed, unsigned long ip) | 1921 | void ftrace_bug(int failed, struct dyn_ftrace *rec) |
1750 | { | 1922 | { |
1923 | unsigned long ip = rec ? rec->ip : 0; | ||
1924 | |||
1751 | switch (failed) { | 1925 | switch (failed) { |
1752 | case -EFAULT: | 1926 | case -EFAULT: |
1753 | FTRACE_WARN_ON_ONCE(1); | 1927 | FTRACE_WARN_ON_ONCE(1); |
@@ -1759,7 +1933,7 @@ void ftrace_bug(int failed, unsigned long ip) | |||
1759 | pr_info("ftrace failed to modify "); | 1933 | pr_info("ftrace failed to modify "); |
1760 | print_ip_sym(ip); | 1934 | print_ip_sym(ip); |
1761 | print_ip_ins(" actual: ", (unsigned char *)ip); | 1935 | print_ip_ins(" actual: ", (unsigned char *)ip); |
1762 | printk(KERN_CONT "\n"); | 1936 | pr_cont("\n"); |
1763 | break; | 1937 | break; |
1764 | case -EPERM: | 1938 | case -EPERM: |
1765 | FTRACE_WARN_ON_ONCE(1); | 1939 | FTRACE_WARN_ON_ONCE(1); |
@@ -1771,6 +1945,24 @@ void ftrace_bug(int failed, unsigned long ip) | |||
1771 | pr_info("ftrace faulted on unknown error "); | 1945 | pr_info("ftrace faulted on unknown error "); |
1772 | print_ip_sym(ip); | 1946 | print_ip_sym(ip); |
1773 | } | 1947 | } |
1948 | if (rec) { | ||
1949 | struct ftrace_ops *ops = NULL; | ||
1950 | |||
1951 | pr_info("ftrace record flags: %lx\n", rec->flags); | ||
1952 | pr_cont(" (%ld)%s", ftrace_rec_count(rec), | ||
1953 | rec->flags & FTRACE_FL_REGS ? " R" : " "); | ||
1954 | if (rec->flags & FTRACE_FL_TRAMP_EN) { | ||
1955 | ops = ftrace_find_tramp_ops_any(rec); | ||
1956 | if (ops) | ||
1957 | pr_cont("\ttramp: %pS", | ||
1958 | (void *)ops->trampoline); | ||
1959 | else | ||
1960 | pr_cont("\ttramp: ERROR!"); | ||
1961 | |||
1962 | } | ||
1963 | ip = ftrace_get_addr_curr(rec); | ||
1964 | pr_cont(" expected tramp: %lx\n", ip); | ||
1965 | } | ||
1774 | } | 1966 | } |
1775 | 1967 | ||
1776 | static int ftrace_check_record(struct dyn_ftrace *rec, int enable, int update) | 1968 | static int ftrace_check_record(struct dyn_ftrace *rec, int enable, int update) |
@@ -1925,8 +2117,16 @@ ftrace_find_tramp_ops_curr(struct dyn_ftrace *rec) | |||
1925 | * when we are adding another op to the rec or removing the | 2117 | * when we are adding another op to the rec or removing the |
1926 | * current one. Thus, if the op is being added, we can | 2118 | * current one. Thus, if the op is being added, we can |
1927 | * ignore it because it hasn't attached itself to the rec | 2119 | * ignore it because it hasn't attached itself to the rec |
1928 | * yet. That means we just need to find the op that has a | 2120 | * yet. |
1929 | * trampoline and is not beeing added. | 2121 | * |
2122 | * If an ops is being modified (hooking to different functions) | ||
2123 | * then we don't care about the new functions that are being | ||
2124 | * added, just the old ones (that are probably being removed). | ||
2125 | * | ||
2126 | * If we are adding an ops to a function that already is using | ||
2127 | * a trampoline, it needs to be removed (trampolines are only | ||
2128 | * for single ops connected), then an ops that is not being | ||
2129 | * modified also needs to be checked. | ||
1930 | */ | 2130 | */ |
1931 | do_for_each_ftrace_op(op, ftrace_ops_list) { | 2131 | do_for_each_ftrace_op(op, ftrace_ops_list) { |
1932 | 2132 | ||
@@ -1940,17 +2140,23 @@ ftrace_find_tramp_ops_curr(struct dyn_ftrace *rec) | |||
1940 | if (op->flags & FTRACE_OPS_FL_ADDING) | 2140 | if (op->flags & FTRACE_OPS_FL_ADDING) |
1941 | continue; | 2141 | continue; |
1942 | 2142 | ||
2143 | |||
1943 | /* | 2144 | /* |
1944 | * If the ops is not being added and has a trampoline, | 2145 | * If the ops is being modified and is in the old |
1945 | * then it must be the one that we want! | 2146 | * hash, then it is probably being removed from this |
2147 | * function. | ||
1946 | */ | 2148 | */ |
1947 | if (hash_contains_ip(ip, op->func_hash)) | ||
1948 | return op; | ||
1949 | |||
1950 | /* If the ops is being modified, it may be in the old hash. */ | ||
1951 | if ((op->flags & FTRACE_OPS_FL_MODIFYING) && | 2149 | if ((op->flags & FTRACE_OPS_FL_MODIFYING) && |
1952 | hash_contains_ip(ip, &op->old_hash)) | 2150 | hash_contains_ip(ip, &op->old_hash)) |
1953 | return op; | 2151 | return op; |
2152 | /* | ||
2153 | * If the ops is not being added or modified, and it's | ||
2154 | * in its normal filter hash, then this must be the one | ||
2155 | * we want! | ||
2156 | */ | ||
2157 | if (!(op->flags & FTRACE_OPS_FL_MODIFYING) && | ||
2158 | hash_contains_ip(ip, op->func_hash)) | ||
2159 | return op; | ||
1954 | 2160 | ||
1955 | } while_for_each_ftrace_op(op); | 2161 | } while_for_each_ftrace_op(op); |
1956 | 2162 | ||
@@ -2079,7 +2285,7 @@ void __weak ftrace_replace_code(int enable) | |||
2079 | do_for_each_ftrace_rec(pg, rec) { | 2285 | do_for_each_ftrace_rec(pg, rec) { |
2080 | failed = __ftrace_replace_code(rec, enable); | 2286 | failed = __ftrace_replace_code(rec, enable); |
2081 | if (failed) { | 2287 | if (failed) { |
2082 | ftrace_bug(failed, rec->ip); | 2288 | ftrace_bug(failed, rec); |
2083 | /* Stop processing */ | 2289 | /* Stop processing */ |
2084 | return; | 2290 | return; |
2085 | } | 2291 | } |
@@ -2161,17 +2367,14 @@ struct dyn_ftrace *ftrace_rec_iter_record(struct ftrace_rec_iter *iter) | |||
2161 | static int | 2367 | static int |
2162 | ftrace_code_disable(struct module *mod, struct dyn_ftrace *rec) | 2368 | ftrace_code_disable(struct module *mod, struct dyn_ftrace *rec) |
2163 | { | 2369 | { |
2164 | unsigned long ip; | ||
2165 | int ret; | 2370 | int ret; |
2166 | 2371 | ||
2167 | ip = rec->ip; | ||
2168 | |||
2169 | if (unlikely(ftrace_disabled)) | 2372 | if (unlikely(ftrace_disabled)) |
2170 | return 0; | 2373 | return 0; |
2171 | 2374 | ||
2172 | ret = ftrace_make_nop(mod, rec, MCOUNT_ADDR); | 2375 | ret = ftrace_make_nop(mod, rec, MCOUNT_ADDR); |
2173 | if (ret) { | 2376 | if (ret) { |
2174 | ftrace_bug(ret, ip); | 2377 | ftrace_bug(ret, rec); |
2175 | return 0; | 2378 | return 0; |
2176 | } | 2379 | } |
2177 | return 1; | 2380 | return 1; |
@@ -2293,16 +2496,23 @@ static void ftrace_run_update_code(int command) | |||
2293 | FTRACE_WARN_ON(ret); | 2496 | FTRACE_WARN_ON(ret); |
2294 | } | 2497 | } |
2295 | 2498 | ||
2296 | static void ftrace_run_modify_code(struct ftrace_ops *ops, int command) | 2499 | static void ftrace_run_modify_code(struct ftrace_ops *ops, int command, |
2500 | struct ftrace_hash *old_hash) | ||
2297 | { | 2501 | { |
2298 | ops->flags |= FTRACE_OPS_FL_MODIFYING; | 2502 | ops->flags |= FTRACE_OPS_FL_MODIFYING; |
2503 | ops->old_hash.filter_hash = old_hash; | ||
2299 | ftrace_run_update_code(command); | 2504 | ftrace_run_update_code(command); |
2505 | ops->old_hash.filter_hash = NULL; | ||
2300 | ops->flags &= ~FTRACE_OPS_FL_MODIFYING; | 2506 | ops->flags &= ~FTRACE_OPS_FL_MODIFYING; |
2301 | } | 2507 | } |
2302 | 2508 | ||
2303 | static ftrace_func_t saved_ftrace_func; | 2509 | static ftrace_func_t saved_ftrace_func; |
2304 | static int ftrace_start_up; | 2510 | static int ftrace_start_up; |
2305 | 2511 | ||
2512 | void __weak arch_ftrace_trampoline_free(struct ftrace_ops *ops) | ||
2513 | { | ||
2514 | } | ||
2515 | |||
2306 | static void control_ops_free(struct ftrace_ops *ops) | 2516 | static void control_ops_free(struct ftrace_ops *ops) |
2307 | { | 2517 | { |
2308 | free_percpu(ops->disabled); | 2518 | free_percpu(ops->disabled); |
@@ -2352,6 +2562,15 @@ static int ftrace_startup(struct ftrace_ops *ops, int command) | |||
2352 | */ | 2562 | */ |
2353 | ops->flags |= FTRACE_OPS_FL_ENABLED | FTRACE_OPS_FL_ADDING; | 2563 | ops->flags |= FTRACE_OPS_FL_ENABLED | FTRACE_OPS_FL_ADDING; |
2354 | 2564 | ||
2565 | ret = ftrace_hash_ipmodify_enable(ops); | ||
2566 | if (ret < 0) { | ||
2567 | /* Rollback registration process */ | ||
2568 | __unregister_ftrace_function(ops); | ||
2569 | ftrace_start_up--; | ||
2570 | ops->flags &= ~FTRACE_OPS_FL_ENABLED; | ||
2571 | return ret; | ||
2572 | } | ||
2573 | |||
2355 | ftrace_hash_rec_enable(ops, 1); | 2574 | ftrace_hash_rec_enable(ops, 1); |
2356 | 2575 | ||
2357 | ftrace_startup_enable(command); | 2576 | ftrace_startup_enable(command); |
@@ -2380,6 +2599,8 @@ static int ftrace_shutdown(struct ftrace_ops *ops, int command) | |||
2380 | */ | 2599 | */ |
2381 | WARN_ON_ONCE(ftrace_start_up < 0); | 2600 | WARN_ON_ONCE(ftrace_start_up < 0); |
2382 | 2601 | ||
2602 | /* Disabling ipmodify never fails */ | ||
2603 | ftrace_hash_ipmodify_disable(ops); | ||
2383 | ftrace_hash_rec_disable(ops, 1); | 2604 | ftrace_hash_rec_disable(ops, 1); |
2384 | 2605 | ||
2385 | ops->flags &= ~FTRACE_OPS_FL_ENABLED; | 2606 | ops->flags &= ~FTRACE_OPS_FL_ENABLED; |
@@ -2454,6 +2675,8 @@ static int ftrace_shutdown(struct ftrace_ops *ops, int command) | |||
2454 | if (ops->flags & (FTRACE_OPS_FL_DYNAMIC | FTRACE_OPS_FL_CONTROL)) { | 2675 | if (ops->flags & (FTRACE_OPS_FL_DYNAMIC | FTRACE_OPS_FL_CONTROL)) { |
2455 | schedule_on_each_cpu(ftrace_sync); | 2676 | schedule_on_each_cpu(ftrace_sync); |
2456 | 2677 | ||
2678 | arch_ftrace_trampoline_free(ops); | ||
2679 | |||
2457 | if (ops->flags & FTRACE_OPS_FL_CONTROL) | 2680 | if (ops->flags & FTRACE_OPS_FL_CONTROL) |
2458 | control_ops_free(ops); | 2681 | control_ops_free(ops); |
2459 | } | 2682 | } |
@@ -2606,7 +2829,7 @@ static int ftrace_update_code(struct module *mod, struct ftrace_page *new_pgs) | |||
2606 | if (ftrace_start_up && cnt) { | 2829 | if (ftrace_start_up && cnt) { |
2607 | int failed = __ftrace_replace_code(p, 1); | 2830 | int failed = __ftrace_replace_code(p, 1); |
2608 | if (failed) | 2831 | if (failed) |
2609 | ftrace_bug(failed, p->ip); | 2832 | ftrace_bug(failed, p); |
2610 | } | 2833 | } |
2611 | } | 2834 | } |
2612 | } | 2835 | } |
@@ -2931,6 +3154,22 @@ static void t_stop(struct seq_file *m, void *p) | |||
2931 | mutex_unlock(&ftrace_lock); | 3154 | mutex_unlock(&ftrace_lock); |
2932 | } | 3155 | } |
2933 | 3156 | ||
3157 | void * __weak | ||
3158 | arch_ftrace_trampoline_func(struct ftrace_ops *ops, struct dyn_ftrace *rec) | ||
3159 | { | ||
3160 | return NULL; | ||
3161 | } | ||
3162 | |||
3163 | static void add_trampoline_func(struct seq_file *m, struct ftrace_ops *ops, | ||
3164 | struct dyn_ftrace *rec) | ||
3165 | { | ||
3166 | void *ptr; | ||
3167 | |||
3168 | ptr = arch_ftrace_trampoline_func(ops, rec); | ||
3169 | if (ptr) | ||
3170 | seq_printf(m, " ->%pS", ptr); | ||
3171 | } | ||
3172 | |||
2934 | static int t_show(struct seq_file *m, void *v) | 3173 | static int t_show(struct seq_file *m, void *v) |
2935 | { | 3174 | { |
2936 | struct ftrace_iterator *iter = m->private; | 3175 | struct ftrace_iterator *iter = m->private; |
@@ -2941,9 +3180,9 @@ static int t_show(struct seq_file *m, void *v) | |||
2941 | 3180 | ||
2942 | if (iter->flags & FTRACE_ITER_PRINTALL) { | 3181 | if (iter->flags & FTRACE_ITER_PRINTALL) { |
2943 | if (iter->flags & FTRACE_ITER_NOTRACE) | 3182 | if (iter->flags & FTRACE_ITER_NOTRACE) |
2944 | seq_printf(m, "#### no functions disabled ####\n"); | 3183 | seq_puts(m, "#### no functions disabled ####\n"); |
2945 | else | 3184 | else |
2946 | seq_printf(m, "#### all functions enabled ####\n"); | 3185 | seq_puts(m, "#### all functions enabled ####\n"); |
2947 | return 0; | 3186 | return 0; |
2948 | } | 3187 | } |
2949 | 3188 | ||
@@ -2954,22 +3193,25 @@ static int t_show(struct seq_file *m, void *v) | |||
2954 | 3193 | ||
2955 | seq_printf(m, "%ps", (void *)rec->ip); | 3194 | seq_printf(m, "%ps", (void *)rec->ip); |
2956 | if (iter->flags & FTRACE_ITER_ENABLED) { | 3195 | if (iter->flags & FTRACE_ITER_ENABLED) { |
2957 | seq_printf(m, " (%ld)%s", | 3196 | struct ftrace_ops *ops = NULL; |
3197 | |||
3198 | seq_printf(m, " (%ld)%s%s", | ||
2958 | ftrace_rec_count(rec), | 3199 | ftrace_rec_count(rec), |
2959 | rec->flags & FTRACE_FL_REGS ? " R" : " "); | 3200 | rec->flags & FTRACE_FL_REGS ? " R" : " ", |
3201 | rec->flags & FTRACE_FL_IPMODIFY ? " I" : " "); | ||
2960 | if (rec->flags & FTRACE_FL_TRAMP_EN) { | 3202 | if (rec->flags & FTRACE_FL_TRAMP_EN) { |
2961 | struct ftrace_ops *ops; | ||
2962 | |||
2963 | ops = ftrace_find_tramp_ops_any(rec); | 3203 | ops = ftrace_find_tramp_ops_any(rec); |
2964 | if (ops) | 3204 | if (ops) |
2965 | seq_printf(m, "\ttramp: %pS", | 3205 | seq_printf(m, "\ttramp: %pS", |
2966 | (void *)ops->trampoline); | 3206 | (void *)ops->trampoline); |
2967 | else | 3207 | else |
2968 | seq_printf(m, "\ttramp: ERROR!"); | 3208 | seq_puts(m, "\ttramp: ERROR!"); |
3209 | |||
2969 | } | 3210 | } |
3211 | add_trampoline_func(m, ops, rec); | ||
2970 | } | 3212 | } |
2971 | 3213 | ||
2972 | seq_printf(m, "\n"); | 3214 | seq_putc(m, '\n'); |
2973 | 3215 | ||
2974 | return 0; | 3216 | return 0; |
2975 | } | 3217 | } |
@@ -3003,9 +3245,6 @@ ftrace_enabled_open(struct inode *inode, struct file *file) | |||
3003 | { | 3245 | { |
3004 | struct ftrace_iterator *iter; | 3246 | struct ftrace_iterator *iter; |
3005 | 3247 | ||
3006 | if (unlikely(ftrace_disabled)) | ||
3007 | return -ENODEV; | ||
3008 | |||
3009 | iter = __seq_open_private(file, &show_ftrace_seq_ops, sizeof(*iter)); | 3248 | iter = __seq_open_private(file, &show_ftrace_seq_ops, sizeof(*iter)); |
3010 | if (iter) { | 3249 | if (iter) { |
3011 | iter->pg = ftrace_pages_start; | 3250 | iter->pg = ftrace_pages_start; |
@@ -3340,7 +3579,7 @@ static struct ftrace_ops trace_probe_ops __read_mostly = | |||
3340 | 3579 | ||
3341 | static int ftrace_probe_registered; | 3580 | static int ftrace_probe_registered; |
3342 | 3581 | ||
3343 | static void __enable_ftrace_function_probe(void) | 3582 | static void __enable_ftrace_function_probe(struct ftrace_hash *old_hash) |
3344 | { | 3583 | { |
3345 | int ret; | 3584 | int ret; |
3346 | int i; | 3585 | int i; |
@@ -3348,7 +3587,8 @@ static void __enable_ftrace_function_probe(void) | |||
3348 | if (ftrace_probe_registered) { | 3587 | if (ftrace_probe_registered) { |
3349 | /* still need to update the function call sites */ | 3588 | /* still need to update the function call sites */ |
3350 | if (ftrace_enabled) | 3589 | if (ftrace_enabled) |
3351 | ftrace_run_modify_code(&trace_probe_ops, FTRACE_UPDATE_CALLS); | 3590 | ftrace_run_modify_code(&trace_probe_ops, FTRACE_UPDATE_CALLS, |
3591 | old_hash); | ||
3352 | return; | 3592 | return; |
3353 | } | 3593 | } |
3354 | 3594 | ||
@@ -3477,13 +3717,14 @@ register_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops, | |||
3477 | } while_for_each_ftrace_rec(); | 3717 | } while_for_each_ftrace_rec(); |
3478 | 3718 | ||
3479 | ret = ftrace_hash_move(&trace_probe_ops, 1, orig_hash, hash); | 3719 | ret = ftrace_hash_move(&trace_probe_ops, 1, orig_hash, hash); |
3720 | |||
3721 | __enable_ftrace_function_probe(old_hash); | ||
3722 | |||
3480 | if (!ret) | 3723 | if (!ret) |
3481 | free_ftrace_hash_rcu(old_hash); | 3724 | free_ftrace_hash_rcu(old_hash); |
3482 | else | 3725 | else |
3483 | count = ret; | 3726 | count = ret; |
3484 | 3727 | ||
3485 | __enable_ftrace_function_probe(); | ||
3486 | |||
3487 | out_unlock: | 3728 | out_unlock: |
3488 | mutex_unlock(&ftrace_lock); | 3729 | mutex_unlock(&ftrace_lock); |
3489 | out: | 3730 | out: |
@@ -3764,10 +4005,11 @@ ftrace_match_addr(struct ftrace_hash *hash, unsigned long ip, int remove) | |||
3764 | return add_hash_entry(hash, ip); | 4005 | return add_hash_entry(hash, ip); |
3765 | } | 4006 | } |
3766 | 4007 | ||
3767 | static void ftrace_ops_update_code(struct ftrace_ops *ops) | 4008 | static void ftrace_ops_update_code(struct ftrace_ops *ops, |
4009 | struct ftrace_hash *old_hash) | ||
3768 | { | 4010 | { |
3769 | if (ops->flags & FTRACE_OPS_FL_ENABLED && ftrace_enabled) | 4011 | if (ops->flags & FTRACE_OPS_FL_ENABLED && ftrace_enabled) |
3770 | ftrace_run_modify_code(ops, FTRACE_UPDATE_CALLS); | 4012 | ftrace_run_modify_code(ops, FTRACE_UPDATE_CALLS, old_hash); |
3771 | } | 4013 | } |
3772 | 4014 | ||
3773 | static int | 4015 | static int |
@@ -3813,7 +4055,7 @@ ftrace_set_hash(struct ftrace_ops *ops, unsigned char *buf, int len, | |||
3813 | old_hash = *orig_hash; | 4055 | old_hash = *orig_hash; |
3814 | ret = ftrace_hash_move(ops, enable, orig_hash, hash); | 4056 | ret = ftrace_hash_move(ops, enable, orig_hash, hash); |
3815 | if (!ret) { | 4057 | if (!ret) { |
3816 | ftrace_ops_update_code(ops); | 4058 | ftrace_ops_update_code(ops, old_hash); |
3817 | free_ftrace_hash_rcu(old_hash); | 4059 | free_ftrace_hash_rcu(old_hash); |
3818 | } | 4060 | } |
3819 | mutex_unlock(&ftrace_lock); | 4061 | mutex_unlock(&ftrace_lock); |
@@ -3955,6 +4197,9 @@ static char ftrace_graph_buf[FTRACE_FILTER_SIZE] __initdata; | |||
3955 | static char ftrace_graph_notrace_buf[FTRACE_FILTER_SIZE] __initdata; | 4197 | static char ftrace_graph_notrace_buf[FTRACE_FILTER_SIZE] __initdata; |
3956 | static int ftrace_set_func(unsigned long *array, int *idx, int size, char *buffer); | 4198 | static int ftrace_set_func(unsigned long *array, int *idx, int size, char *buffer); |
3957 | 4199 | ||
4200 | static unsigned long save_global_trampoline; | ||
4201 | static unsigned long save_global_flags; | ||
4202 | |||
3958 | static int __init set_graph_function(char *str) | 4203 | static int __init set_graph_function(char *str) |
3959 | { | 4204 | { |
3960 | strlcpy(ftrace_graph_buf, str, FTRACE_FILTER_SIZE); | 4205 | strlcpy(ftrace_graph_buf, str, FTRACE_FILTER_SIZE); |
@@ -4058,7 +4303,7 @@ int ftrace_regex_release(struct inode *inode, struct file *file) | |||
4058 | ret = ftrace_hash_move(iter->ops, filter_hash, | 4303 | ret = ftrace_hash_move(iter->ops, filter_hash, |
4059 | orig_hash, iter->hash); | 4304 | orig_hash, iter->hash); |
4060 | if (!ret) { | 4305 | if (!ret) { |
4061 | ftrace_ops_update_code(iter->ops); | 4306 | ftrace_ops_update_code(iter->ops, old_hash); |
4062 | free_ftrace_hash_rcu(old_hash); | 4307 | free_ftrace_hash_rcu(old_hash); |
4063 | } | 4308 | } |
4064 | mutex_unlock(&ftrace_lock); | 4309 | mutex_unlock(&ftrace_lock); |
@@ -4163,9 +4408,9 @@ static int g_show(struct seq_file *m, void *v) | |||
4163 | struct ftrace_graph_data *fgd = m->private; | 4408 | struct ftrace_graph_data *fgd = m->private; |
4164 | 4409 | ||
4165 | if (fgd->table == ftrace_graph_funcs) | 4410 | if (fgd->table == ftrace_graph_funcs) |
4166 | seq_printf(m, "#### all functions enabled ####\n"); | 4411 | seq_puts(m, "#### all functions enabled ####\n"); |
4167 | else | 4412 | else |
4168 | seq_printf(m, "#### no functions disabled ####\n"); | 4413 | seq_puts(m, "#### no functions disabled ####\n"); |
4169 | return 0; | 4414 | return 0; |
4170 | } | 4415 | } |
4171 | 4416 | ||
@@ -4676,6 +4921,32 @@ void __init ftrace_init(void) | |||
4676 | ftrace_disabled = 1; | 4921 | ftrace_disabled = 1; |
4677 | } | 4922 | } |
4678 | 4923 | ||
4924 | /* Do nothing if arch does not support this */ | ||
4925 | void __weak arch_ftrace_update_trampoline(struct ftrace_ops *ops) | ||
4926 | { | ||
4927 | } | ||
4928 | |||
4929 | static void ftrace_update_trampoline(struct ftrace_ops *ops) | ||
4930 | { | ||
4931 | |||
4932 | /* | ||
4933 | * Currently there's no safe way to free a trampoline when the kernel | ||
4934 | * is configured with PREEMPT. That is because a task could be preempted | ||
4935 | * when it jumped to the trampoline, it may be preempted for a long time | ||
4936 | * depending on the system load, and currently there's no way to know | ||
4937 | * when it will be off the trampoline. If the trampoline is freed | ||
4938 | * too early, when the task runs again, it will be executing on freed | ||
4939 | * memory and crash. | ||
4940 | */ | ||
4941 | #ifdef CONFIG_PREEMPT | ||
4942 | /* Currently, only non dynamic ops can have a trampoline */ | ||
4943 | if (ops->flags & FTRACE_OPS_FL_DYNAMIC) | ||
4944 | return; | ||
4945 | #endif | ||
4946 | |||
4947 | arch_ftrace_update_trampoline(ops); | ||
4948 | } | ||
4949 | |||
4679 | #else | 4950 | #else |
4680 | 4951 | ||
4681 | static struct ftrace_ops global_ops = { | 4952 | static struct ftrace_ops global_ops = { |
@@ -4718,6 +4989,10 @@ ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip, void *regs) | |||
4718 | return 1; | 4989 | return 1; |
4719 | } | 4990 | } |
4720 | 4991 | ||
4992 | static void ftrace_update_trampoline(struct ftrace_ops *ops) | ||
4993 | { | ||
4994 | } | ||
4995 | |||
4721 | #endif /* CONFIG_DYNAMIC_FTRACE */ | 4996 | #endif /* CONFIG_DYNAMIC_FTRACE */ |
4722 | 4997 | ||
4723 | __init void ftrace_init_global_array_ops(struct trace_array *tr) | 4998 | __init void ftrace_init_global_array_ops(struct trace_array *tr) |
@@ -5055,12 +5330,12 @@ static int fpid_show(struct seq_file *m, void *v) | |||
5055 | const struct ftrace_pid *fpid = list_entry(v, struct ftrace_pid, list); | 5330 | const struct ftrace_pid *fpid = list_entry(v, struct ftrace_pid, list); |
5056 | 5331 | ||
5057 | if (v == (void *)1) { | 5332 | if (v == (void *)1) { |
5058 | seq_printf(m, "no pid\n"); | 5333 | seq_puts(m, "no pid\n"); |
5059 | return 0; | 5334 | return 0; |
5060 | } | 5335 | } |
5061 | 5336 | ||
5062 | if (fpid->pid == ftrace_swapper_pid) | 5337 | if (fpid->pid == ftrace_swapper_pid) |
5063 | seq_printf(m, "swapper tasks\n"); | 5338 | seq_puts(m, "swapper tasks\n"); |
5064 | else | 5339 | else |
5065 | seq_printf(m, "%u\n", pid_vnr(fpid->pid)); | 5340 | seq_printf(m, "%u\n", pid_vnr(fpid->pid)); |
5066 | 5341 | ||
@@ -5273,6 +5548,7 @@ static struct ftrace_ops graph_ops = { | |||
5273 | FTRACE_OPS_FL_STUB, | 5548 | FTRACE_OPS_FL_STUB, |
5274 | #ifdef FTRACE_GRAPH_TRAMP_ADDR | 5549 | #ifdef FTRACE_GRAPH_TRAMP_ADDR |
5275 | .trampoline = FTRACE_GRAPH_TRAMP_ADDR, | 5550 | .trampoline = FTRACE_GRAPH_TRAMP_ADDR, |
5551 | /* trampoline_size is only needed for dynamically allocated tramps */ | ||
5276 | #endif | 5552 | #endif |
5277 | ASSIGN_OPS_HASH(graph_ops, &global_ops.local_hash) | 5553 | ASSIGN_OPS_HASH(graph_ops, &global_ops.local_hash) |
5278 | }; | 5554 | }; |
@@ -5502,7 +5778,6 @@ int register_ftrace_graph(trace_func_graph_ret_t retfunc, | |||
5502 | update_function_graph_func(); | 5778 | update_function_graph_func(); |
5503 | 5779 | ||
5504 | ret = ftrace_startup(&graph_ops, FTRACE_START_FUNC_RET); | 5780 | ret = ftrace_startup(&graph_ops, FTRACE_START_FUNC_RET); |
5505 | |||
5506 | out: | 5781 | out: |
5507 | mutex_unlock(&ftrace_lock); | 5782 | mutex_unlock(&ftrace_lock); |
5508 | return ret; | 5783 | return ret; |
@@ -5523,6 +5798,17 @@ void unregister_ftrace_graph(void) | |||
5523 | unregister_pm_notifier(&ftrace_suspend_notifier); | 5798 | unregister_pm_notifier(&ftrace_suspend_notifier); |
5524 | unregister_trace_sched_switch(ftrace_graph_probe_sched_switch, NULL); | 5799 | unregister_trace_sched_switch(ftrace_graph_probe_sched_switch, NULL); |
5525 | 5800 | ||
5801 | #ifdef CONFIG_DYNAMIC_FTRACE | ||
5802 | /* | ||
5803 | * Function graph does not allocate the trampoline, but | ||
5804 | * other global_ops do. We need to reset the ALLOC_TRAMP flag | ||
5805 | * if one was used. | ||
5806 | */ | ||
5807 | global_ops.trampoline = save_global_trampoline; | ||
5808 | if (save_global_flags & FTRACE_OPS_FL_ALLOC_TRAMP) | ||
5809 | global_ops.flags |= FTRACE_OPS_FL_ALLOC_TRAMP; | ||
5810 | #endif | ||
5811 | |||
5526 | out: | 5812 | out: |
5527 | mutex_unlock(&ftrace_lock); | 5813 | mutex_unlock(&ftrace_lock); |
5528 | } | 5814 | } |
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 2d75c94ae87d..7a4104cb95cb 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c | |||
@@ -34,21 +34,19 @@ static void update_pages_handler(struct work_struct *work); | |||
34 | */ | 34 | */ |
35 | int ring_buffer_print_entry_header(struct trace_seq *s) | 35 | int ring_buffer_print_entry_header(struct trace_seq *s) |
36 | { | 36 | { |
37 | int ret; | 37 | trace_seq_puts(s, "# compressed entry header\n"); |
38 | 38 | trace_seq_puts(s, "\ttype_len : 5 bits\n"); | |
39 | ret = trace_seq_puts(s, "# compressed entry header\n"); | 39 | trace_seq_puts(s, "\ttime_delta : 27 bits\n"); |
40 | ret = trace_seq_puts(s, "\ttype_len : 5 bits\n"); | 40 | trace_seq_puts(s, "\tarray : 32 bits\n"); |
41 | ret = trace_seq_puts(s, "\ttime_delta : 27 bits\n"); | 41 | trace_seq_putc(s, '\n'); |
42 | ret = trace_seq_puts(s, "\tarray : 32 bits\n"); | 42 | trace_seq_printf(s, "\tpadding : type == %d\n", |
43 | ret = trace_seq_putc(s, '\n'); | 43 | RINGBUF_TYPE_PADDING); |
44 | ret = trace_seq_printf(s, "\tpadding : type == %d\n", | 44 | trace_seq_printf(s, "\ttime_extend : type == %d\n", |
45 | RINGBUF_TYPE_PADDING); | 45 | RINGBUF_TYPE_TIME_EXTEND); |
46 | ret = trace_seq_printf(s, "\ttime_extend : type == %d\n", | 46 | trace_seq_printf(s, "\tdata max type_len == %d\n", |
47 | RINGBUF_TYPE_TIME_EXTEND); | 47 | RINGBUF_TYPE_DATA_TYPE_LEN_MAX); |
48 | ret = trace_seq_printf(s, "\tdata max type_len == %d\n", | ||
49 | RINGBUF_TYPE_DATA_TYPE_LEN_MAX); | ||
50 | 48 | ||
51 | return ret; | 49 | return !trace_seq_has_overflowed(s); |
52 | } | 50 | } |
53 | 51 | ||
54 | /* | 52 | /* |
@@ -419,32 +417,31 @@ static inline int test_time_stamp(u64 delta) | |||
419 | int ring_buffer_print_page_header(struct trace_seq *s) | 417 | int ring_buffer_print_page_header(struct trace_seq *s) |
420 | { | 418 | { |
421 | struct buffer_data_page field; | 419 | struct buffer_data_page field; |
422 | int ret; | ||
423 | 420 | ||
424 | ret = trace_seq_printf(s, "\tfield: u64 timestamp;\t" | 421 | trace_seq_printf(s, "\tfield: u64 timestamp;\t" |
425 | "offset:0;\tsize:%u;\tsigned:%u;\n", | 422 | "offset:0;\tsize:%u;\tsigned:%u;\n", |
426 | (unsigned int)sizeof(field.time_stamp), | 423 | (unsigned int)sizeof(field.time_stamp), |
427 | (unsigned int)is_signed_type(u64)); | 424 | (unsigned int)is_signed_type(u64)); |
428 | |||
429 | ret = trace_seq_printf(s, "\tfield: local_t commit;\t" | ||
430 | "offset:%u;\tsize:%u;\tsigned:%u;\n", | ||
431 | (unsigned int)offsetof(typeof(field), commit), | ||
432 | (unsigned int)sizeof(field.commit), | ||
433 | (unsigned int)is_signed_type(long)); | ||
434 | |||
435 | ret = trace_seq_printf(s, "\tfield: int overwrite;\t" | ||
436 | "offset:%u;\tsize:%u;\tsigned:%u;\n", | ||
437 | (unsigned int)offsetof(typeof(field), commit), | ||
438 | 1, | ||
439 | (unsigned int)is_signed_type(long)); | ||
440 | |||
441 | ret = trace_seq_printf(s, "\tfield: char data;\t" | ||
442 | "offset:%u;\tsize:%u;\tsigned:%u;\n", | ||
443 | (unsigned int)offsetof(typeof(field), data), | ||
444 | (unsigned int)BUF_PAGE_SIZE, | ||
445 | (unsigned int)is_signed_type(char)); | ||
446 | 425 | ||
447 | return ret; | 426 | trace_seq_printf(s, "\tfield: local_t commit;\t" |
427 | "offset:%u;\tsize:%u;\tsigned:%u;\n", | ||
428 | (unsigned int)offsetof(typeof(field), commit), | ||
429 | (unsigned int)sizeof(field.commit), | ||
430 | (unsigned int)is_signed_type(long)); | ||
431 | |||
432 | trace_seq_printf(s, "\tfield: int overwrite;\t" | ||
433 | "offset:%u;\tsize:%u;\tsigned:%u;\n", | ||
434 | (unsigned int)offsetof(typeof(field), commit), | ||
435 | 1, | ||
436 | (unsigned int)is_signed_type(long)); | ||
437 | |||
438 | trace_seq_printf(s, "\tfield: char data;\t" | ||
439 | "offset:%u;\tsize:%u;\tsigned:%u;\n", | ||
440 | (unsigned int)offsetof(typeof(field), data), | ||
441 | (unsigned int)BUF_PAGE_SIZE, | ||
442 | (unsigned int)is_signed_type(char)); | ||
443 | |||
444 | return !trace_seq_has_overflowed(s); | ||
448 | } | 445 | } |
449 | 446 | ||
450 | struct rb_irq_work { | 447 | struct rb_irq_work { |
@@ -538,16 +535,18 @@ static void rb_wake_up_waiters(struct irq_work *work) | |||
538 | * ring_buffer_wait - wait for input to the ring buffer | 535 | * ring_buffer_wait - wait for input to the ring buffer |
539 | * @buffer: buffer to wait on | 536 | * @buffer: buffer to wait on |
540 | * @cpu: the cpu buffer to wait on | 537 | * @cpu: the cpu buffer to wait on |
538 | * @full: wait until a full page is available, if @cpu != RING_BUFFER_ALL_CPUS | ||
541 | * | 539 | * |
542 | * If @cpu == RING_BUFFER_ALL_CPUS then the task will wake up as soon | 540 | * If @cpu == RING_BUFFER_ALL_CPUS then the task will wake up as soon |
543 | * as data is added to any of the @buffer's cpu buffers. Otherwise | 541 | * as data is added to any of the @buffer's cpu buffers. Otherwise |
544 | * it will wait for data to be added to a specific cpu buffer. | 542 | * it will wait for data to be added to a specific cpu buffer. |
545 | */ | 543 | */ |
546 | int ring_buffer_wait(struct ring_buffer *buffer, int cpu) | 544 | int ring_buffer_wait(struct ring_buffer *buffer, int cpu, bool full) |
547 | { | 545 | { |
548 | struct ring_buffer_per_cpu *cpu_buffer; | 546 | struct ring_buffer_per_cpu *uninitialized_var(cpu_buffer); |
549 | DEFINE_WAIT(wait); | 547 | DEFINE_WAIT(wait); |
550 | struct rb_irq_work *work; | 548 | struct rb_irq_work *work; |
549 | int ret = 0; | ||
551 | 550 | ||
552 | /* | 551 | /* |
553 | * Depending on what the caller is waiting for, either any | 552 | * Depending on what the caller is waiting for, either any |
@@ -564,36 +563,61 @@ int ring_buffer_wait(struct ring_buffer *buffer, int cpu) | |||
564 | } | 563 | } |
565 | 564 | ||
566 | 565 | ||
567 | prepare_to_wait(&work->waiters, &wait, TASK_INTERRUPTIBLE); | 566 | while (true) { |
567 | prepare_to_wait(&work->waiters, &wait, TASK_INTERRUPTIBLE); | ||
568 | 568 | ||
569 | /* | 569 | /* |
570 | * The events can happen in critical sections where | 570 | * The events can happen in critical sections where |
571 | * checking a work queue can cause deadlocks. | 571 | * checking a work queue can cause deadlocks. |
572 | * After adding a task to the queue, this flag is set | 572 | * After adding a task to the queue, this flag is set |
573 | * only to notify events to try to wake up the queue | 573 | * only to notify events to try to wake up the queue |
574 | * using irq_work. | 574 | * using irq_work. |
575 | * | 575 | * |
576 | * We don't clear it even if the buffer is no longer | 576 | * We don't clear it even if the buffer is no longer |
577 | * empty. The flag only causes the next event to run | 577 | * empty. The flag only causes the next event to run |
578 | * irq_work to do the work queue wake up. The worse | 578 | * irq_work to do the work queue wake up. The worse |
579 | * that can happen if we race with !trace_empty() is that | 579 | * that can happen if we race with !trace_empty() is that |
580 | * an event will cause an irq_work to try to wake up | 580 | * an event will cause an irq_work to try to wake up |
581 | * an empty queue. | 581 | * an empty queue. |
582 | * | 582 | * |
583 | * There's no reason to protect this flag either, as | 583 | * There's no reason to protect this flag either, as |
584 | * the work queue and irq_work logic will do the necessary | 584 | * the work queue and irq_work logic will do the necessary |
585 | * synchronization for the wake ups. The only thing | 585 | * synchronization for the wake ups. The only thing |
586 | * that is necessary is that the wake up happens after | 586 | * that is necessary is that the wake up happens after |
587 | * a task has been queued. It's OK for spurious wake ups. | 587 | * a task has been queued. It's OK for spurious wake ups. |
588 | */ | 588 | */ |
589 | work->waiters_pending = true; | 589 | work->waiters_pending = true; |
590 | |||
591 | if (signal_pending(current)) { | ||
592 | ret = -EINTR; | ||
593 | break; | ||
594 | } | ||
595 | |||
596 | if (cpu == RING_BUFFER_ALL_CPUS && !ring_buffer_empty(buffer)) | ||
597 | break; | ||
598 | |||
599 | if (cpu != RING_BUFFER_ALL_CPUS && | ||
600 | !ring_buffer_empty_cpu(buffer, cpu)) { | ||
601 | unsigned long flags; | ||
602 | bool pagebusy; | ||
603 | |||
604 | if (!full) | ||
605 | break; | ||
606 | |||
607 | raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags); | ||
608 | pagebusy = cpu_buffer->reader_page == cpu_buffer->commit_page; | ||
609 | raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); | ||
610 | |||
611 | if (!pagebusy) | ||
612 | break; | ||
613 | } | ||
590 | 614 | ||
591 | if ((cpu == RING_BUFFER_ALL_CPUS && ring_buffer_empty(buffer)) || | ||
592 | (cpu != RING_BUFFER_ALL_CPUS && ring_buffer_empty_cpu(buffer, cpu))) | ||
593 | schedule(); | 615 | schedule(); |
616 | } | ||
594 | 617 | ||
595 | finish_wait(&work->waiters, &wait); | 618 | finish_wait(&work->waiters, &wait); |
596 | return 0; | 619 | |
620 | return ret; | ||
597 | } | 621 | } |
598 | 622 | ||
599 | /** | 623 | /** |
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 8a528392b1f4..1af4f8f2ab5d 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c | |||
@@ -155,10 +155,11 @@ __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops); | |||
155 | 155 | ||
156 | static int __init stop_trace_on_warning(char *str) | 156 | static int __init stop_trace_on_warning(char *str) |
157 | { | 157 | { |
158 | __disable_trace_on_warning = 1; | 158 | if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0)) |
159 | __disable_trace_on_warning = 1; | ||
159 | return 1; | 160 | return 1; |
160 | } | 161 | } |
161 | __setup("traceoff_on_warning=", stop_trace_on_warning); | 162 | __setup("traceoff_on_warning", stop_trace_on_warning); |
162 | 163 | ||
163 | static int __init boot_alloc_snapshot(char *str) | 164 | static int __init boot_alloc_snapshot(char *str) |
164 | { | 165 | { |
@@ -938,19 +939,20 @@ out: | |||
938 | return ret; | 939 | return ret; |
939 | } | 940 | } |
940 | 941 | ||
942 | /* TODO add a seq_buf_to_buffer() */ | ||
941 | static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt) | 943 | static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt) |
942 | { | 944 | { |
943 | int len; | 945 | int len; |
944 | 946 | ||
945 | if (s->len <= s->readpos) | 947 | if (trace_seq_used(s) <= s->seq.readpos) |
946 | return -EBUSY; | 948 | return -EBUSY; |
947 | 949 | ||
948 | len = s->len - s->readpos; | 950 | len = trace_seq_used(s) - s->seq.readpos; |
949 | if (cnt > len) | 951 | if (cnt > len) |
950 | cnt = len; | 952 | cnt = len; |
951 | memcpy(buf, s->buffer + s->readpos, cnt); | 953 | memcpy(buf, s->buffer + s->seq.readpos, cnt); |
952 | 954 | ||
953 | s->readpos += cnt; | 955 | s->seq.readpos += cnt; |
954 | return cnt; | 956 | return cnt; |
955 | } | 957 | } |
956 | 958 | ||
@@ -1076,13 +1078,14 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu) | |||
1076 | } | 1078 | } |
1077 | #endif /* CONFIG_TRACER_MAX_TRACE */ | 1079 | #endif /* CONFIG_TRACER_MAX_TRACE */ |
1078 | 1080 | ||
1079 | static int wait_on_pipe(struct trace_iterator *iter) | 1081 | static int wait_on_pipe(struct trace_iterator *iter, bool full) |
1080 | { | 1082 | { |
1081 | /* Iterators are static, they should be filled or empty */ | 1083 | /* Iterators are static, they should be filled or empty */ |
1082 | if (trace_buffer_iter(iter, iter->cpu_file)) | 1084 | if (trace_buffer_iter(iter, iter->cpu_file)) |
1083 | return 0; | 1085 | return 0; |
1084 | 1086 | ||
1085 | return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file); | 1087 | return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file, |
1088 | full); | ||
1086 | } | 1089 | } |
1087 | 1090 | ||
1088 | #ifdef CONFIG_FTRACE_STARTUP_TEST | 1091 | #ifdef CONFIG_FTRACE_STARTUP_TEST |
@@ -2157,9 +2160,7 @@ __trace_array_vprintk(struct ring_buffer *buffer, | |||
2157 | goto out; | 2160 | goto out; |
2158 | } | 2161 | } |
2159 | 2162 | ||
2160 | len = vsnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args); | 2163 | len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args); |
2161 | if (len > TRACE_BUF_SIZE) | ||
2162 | goto out; | ||
2163 | 2164 | ||
2164 | local_save_flags(flags); | 2165 | local_save_flags(flags); |
2165 | size = sizeof(*entry) + len + 1; | 2166 | size = sizeof(*entry) + len + 1; |
@@ -2170,8 +2171,7 @@ __trace_array_vprintk(struct ring_buffer *buffer, | |||
2170 | entry = ring_buffer_event_data(event); | 2171 | entry = ring_buffer_event_data(event); |
2171 | entry->ip = ip; | 2172 | entry->ip = ip; |
2172 | 2173 | ||
2173 | memcpy(&entry->buf, tbuffer, len); | 2174 | memcpy(&entry->buf, tbuffer, len + 1); |
2174 | entry->buf[len] = '\0'; | ||
2175 | if (!call_filter_check_discard(call, entry, buffer, event)) { | 2175 | if (!call_filter_check_discard(call, entry, buffer, event)) { |
2176 | __buffer_unlock_commit(buffer, event); | 2176 | __buffer_unlock_commit(buffer, event); |
2177 | ftrace_trace_stack(buffer, flags, 6, pc); | 2177 | ftrace_trace_stack(buffer, flags, 6, pc); |
@@ -2508,14 +2508,14 @@ get_total_entries(struct trace_buffer *buf, | |||
2508 | 2508 | ||
2509 | static void print_lat_help_header(struct seq_file *m) | 2509 | static void print_lat_help_header(struct seq_file *m) |
2510 | { | 2510 | { |
2511 | seq_puts(m, "# _------=> CPU# \n"); | 2511 | seq_puts(m, "# _------=> CPU# \n" |
2512 | seq_puts(m, "# / _-----=> irqs-off \n"); | 2512 | "# / _-----=> irqs-off \n" |
2513 | seq_puts(m, "# | / _----=> need-resched \n"); | 2513 | "# | / _----=> need-resched \n" |
2514 | seq_puts(m, "# || / _---=> hardirq/softirq \n"); | 2514 | "# || / _---=> hardirq/softirq \n" |
2515 | seq_puts(m, "# ||| / _--=> preempt-depth \n"); | 2515 | "# ||| / _--=> preempt-depth \n" |
2516 | seq_puts(m, "# |||| / delay \n"); | 2516 | "# |||| / delay \n" |
2517 | seq_puts(m, "# cmd pid ||||| time | caller \n"); | 2517 | "# cmd pid ||||| time | caller \n" |
2518 | seq_puts(m, "# \\ / ||||| \\ | / \n"); | 2518 | "# \\ / ||||| \\ | / \n"); |
2519 | } | 2519 | } |
2520 | 2520 | ||
2521 | static void print_event_info(struct trace_buffer *buf, struct seq_file *m) | 2521 | static void print_event_info(struct trace_buffer *buf, struct seq_file *m) |
@@ -2532,20 +2532,20 @@ static void print_event_info(struct trace_buffer *buf, struct seq_file *m) | |||
2532 | static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m) | 2532 | static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m) |
2533 | { | 2533 | { |
2534 | print_event_info(buf, m); | 2534 | print_event_info(buf, m); |
2535 | seq_puts(m, "# TASK-PID CPU# TIMESTAMP FUNCTION\n"); | 2535 | seq_puts(m, "# TASK-PID CPU# TIMESTAMP FUNCTION\n" |
2536 | seq_puts(m, "# | | | | |\n"); | 2536 | "# | | | | |\n"); |
2537 | } | 2537 | } |
2538 | 2538 | ||
2539 | static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m) | 2539 | static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m) |
2540 | { | 2540 | { |
2541 | print_event_info(buf, m); | 2541 | print_event_info(buf, m); |
2542 | seq_puts(m, "# _-----=> irqs-off\n"); | 2542 | seq_puts(m, "# _-----=> irqs-off\n" |
2543 | seq_puts(m, "# / _----=> need-resched\n"); | 2543 | "# / _----=> need-resched\n" |
2544 | seq_puts(m, "# | / _---=> hardirq/softirq\n"); | 2544 | "# | / _---=> hardirq/softirq\n" |
2545 | seq_puts(m, "# || / _--=> preempt-depth\n"); | 2545 | "# || / _--=> preempt-depth\n" |
2546 | seq_puts(m, "# ||| / delay\n"); | 2546 | "# ||| / delay\n" |
2547 | seq_puts(m, "# TASK-PID CPU# |||| TIMESTAMP FUNCTION\n"); | 2547 | "# TASK-PID CPU# |||| TIMESTAMP FUNCTION\n" |
2548 | seq_puts(m, "# | | | |||| | |\n"); | 2548 | "# | | | |||| | |\n"); |
2549 | } | 2549 | } |
2550 | 2550 | ||
2551 | void | 2551 | void |
@@ -2648,24 +2648,21 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter) | |||
2648 | event = ftrace_find_event(entry->type); | 2648 | event = ftrace_find_event(entry->type); |
2649 | 2649 | ||
2650 | if (trace_flags & TRACE_ITER_CONTEXT_INFO) { | 2650 | if (trace_flags & TRACE_ITER_CONTEXT_INFO) { |
2651 | if (iter->iter_flags & TRACE_FILE_LAT_FMT) { | 2651 | if (iter->iter_flags & TRACE_FILE_LAT_FMT) |
2652 | if (!trace_print_lat_context(iter)) | 2652 | trace_print_lat_context(iter); |
2653 | goto partial; | 2653 | else |
2654 | } else { | 2654 | trace_print_context(iter); |
2655 | if (!trace_print_context(iter)) | ||
2656 | goto partial; | ||
2657 | } | ||
2658 | } | 2655 | } |
2659 | 2656 | ||
2657 | if (trace_seq_has_overflowed(s)) | ||
2658 | return TRACE_TYPE_PARTIAL_LINE; | ||
2659 | |||
2660 | if (event) | 2660 | if (event) |
2661 | return event->funcs->trace(iter, sym_flags, event); | 2661 | return event->funcs->trace(iter, sym_flags, event); |
2662 | 2662 | ||
2663 | if (!trace_seq_printf(s, "Unknown type %d\n", entry->type)) | 2663 | trace_seq_printf(s, "Unknown type %d\n", entry->type); |
2664 | goto partial; | ||
2665 | 2664 | ||
2666 | return TRACE_TYPE_HANDLED; | 2665 | return trace_handle_return(s); |
2667 | partial: | ||
2668 | return TRACE_TYPE_PARTIAL_LINE; | ||
2669 | } | 2666 | } |
2670 | 2667 | ||
2671 | static enum print_line_t print_raw_fmt(struct trace_iterator *iter) | 2668 | static enum print_line_t print_raw_fmt(struct trace_iterator *iter) |
@@ -2676,22 +2673,20 @@ static enum print_line_t print_raw_fmt(struct trace_iterator *iter) | |||
2676 | 2673 | ||
2677 | entry = iter->ent; | 2674 | entry = iter->ent; |
2678 | 2675 | ||
2679 | if (trace_flags & TRACE_ITER_CONTEXT_INFO) { | 2676 | if (trace_flags & TRACE_ITER_CONTEXT_INFO) |
2680 | if (!trace_seq_printf(s, "%d %d %llu ", | 2677 | trace_seq_printf(s, "%d %d %llu ", |
2681 | entry->pid, iter->cpu, iter->ts)) | 2678 | entry->pid, iter->cpu, iter->ts); |
2682 | goto partial; | 2679 | |
2683 | } | 2680 | if (trace_seq_has_overflowed(s)) |
2681 | return TRACE_TYPE_PARTIAL_LINE; | ||
2684 | 2682 | ||
2685 | event = ftrace_find_event(entry->type); | 2683 | event = ftrace_find_event(entry->type); |
2686 | if (event) | 2684 | if (event) |
2687 | return event->funcs->raw(iter, 0, event); | 2685 | return event->funcs->raw(iter, 0, event); |
2688 | 2686 | ||
2689 | if (!trace_seq_printf(s, "%d ?\n", entry->type)) | 2687 | trace_seq_printf(s, "%d ?\n", entry->type); |
2690 | goto partial; | ||
2691 | 2688 | ||
2692 | return TRACE_TYPE_HANDLED; | 2689 | return trace_handle_return(s); |
2693 | partial: | ||
2694 | return TRACE_TYPE_PARTIAL_LINE; | ||
2695 | } | 2690 | } |
2696 | 2691 | ||
2697 | static enum print_line_t print_hex_fmt(struct trace_iterator *iter) | 2692 | static enum print_line_t print_hex_fmt(struct trace_iterator *iter) |
@@ -2704,9 +2699,11 @@ static enum print_line_t print_hex_fmt(struct trace_iterator *iter) | |||
2704 | entry = iter->ent; | 2699 | entry = iter->ent; |
2705 | 2700 | ||
2706 | if (trace_flags & TRACE_ITER_CONTEXT_INFO) { | 2701 | if (trace_flags & TRACE_ITER_CONTEXT_INFO) { |
2707 | SEQ_PUT_HEX_FIELD_RET(s, entry->pid); | 2702 | SEQ_PUT_HEX_FIELD(s, entry->pid); |
2708 | SEQ_PUT_HEX_FIELD_RET(s, iter->cpu); | 2703 | SEQ_PUT_HEX_FIELD(s, iter->cpu); |
2709 | SEQ_PUT_HEX_FIELD_RET(s, iter->ts); | 2704 | SEQ_PUT_HEX_FIELD(s, iter->ts); |
2705 | if (trace_seq_has_overflowed(s)) | ||
2706 | return TRACE_TYPE_PARTIAL_LINE; | ||
2710 | } | 2707 | } |
2711 | 2708 | ||
2712 | event = ftrace_find_event(entry->type); | 2709 | event = ftrace_find_event(entry->type); |
@@ -2716,9 +2713,9 @@ static enum print_line_t print_hex_fmt(struct trace_iterator *iter) | |||
2716 | return ret; | 2713 | return ret; |
2717 | } | 2714 | } |
2718 | 2715 | ||
2719 | SEQ_PUT_FIELD_RET(s, newline); | 2716 | SEQ_PUT_FIELD(s, newline); |
2720 | 2717 | ||
2721 | return TRACE_TYPE_HANDLED; | 2718 | return trace_handle_return(s); |
2722 | } | 2719 | } |
2723 | 2720 | ||
2724 | static enum print_line_t print_bin_fmt(struct trace_iterator *iter) | 2721 | static enum print_line_t print_bin_fmt(struct trace_iterator *iter) |
@@ -2730,9 +2727,11 @@ static enum print_line_t print_bin_fmt(struct trace_iterator *iter) | |||
2730 | entry = iter->ent; | 2727 | entry = iter->ent; |
2731 | 2728 | ||
2732 | if (trace_flags & TRACE_ITER_CONTEXT_INFO) { | 2729 | if (trace_flags & TRACE_ITER_CONTEXT_INFO) { |
2733 | SEQ_PUT_FIELD_RET(s, entry->pid); | 2730 | SEQ_PUT_FIELD(s, entry->pid); |
2734 | SEQ_PUT_FIELD_RET(s, iter->cpu); | 2731 | SEQ_PUT_FIELD(s, iter->cpu); |
2735 | SEQ_PUT_FIELD_RET(s, iter->ts); | 2732 | SEQ_PUT_FIELD(s, iter->ts); |
2733 | if (trace_seq_has_overflowed(s)) | ||
2734 | return TRACE_TYPE_PARTIAL_LINE; | ||
2736 | } | 2735 | } |
2737 | 2736 | ||
2738 | event = ftrace_find_event(entry->type); | 2737 | event = ftrace_find_event(entry->type); |
@@ -2778,10 +2777,12 @@ enum print_line_t print_trace_line(struct trace_iterator *iter) | |||
2778 | { | 2777 | { |
2779 | enum print_line_t ret; | 2778 | enum print_line_t ret; |
2780 | 2779 | ||
2781 | if (iter->lost_events && | 2780 | if (iter->lost_events) { |
2782 | !trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n", | 2781 | trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n", |
2783 | iter->cpu, iter->lost_events)) | 2782 | iter->cpu, iter->lost_events); |
2784 | return TRACE_TYPE_PARTIAL_LINE; | 2783 | if (trace_seq_has_overflowed(&iter->seq)) |
2784 | return TRACE_TYPE_PARTIAL_LINE; | ||
2785 | } | ||
2785 | 2786 | ||
2786 | if (iter->trace && iter->trace->print_line) { | 2787 | if (iter->trace && iter->trace->print_line) { |
2787 | ret = iter->trace->print_line(iter); | 2788 | ret = iter->trace->print_line(iter); |
@@ -2859,44 +2860,44 @@ static void test_ftrace_alive(struct seq_file *m) | |||
2859 | { | 2860 | { |
2860 | if (!ftrace_is_dead()) | 2861 | if (!ftrace_is_dead()) |
2861 | return; | 2862 | return; |
2862 | seq_printf(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"); | 2863 | seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n" |
2863 | seq_printf(m, "# MAY BE MISSING FUNCTION EVENTS\n"); | 2864 | "# MAY BE MISSING FUNCTION EVENTS\n"); |
2864 | } | 2865 | } |
2865 | 2866 | ||
2866 | #ifdef CONFIG_TRACER_MAX_TRACE | 2867 | #ifdef CONFIG_TRACER_MAX_TRACE |
2867 | static void show_snapshot_main_help(struct seq_file *m) | 2868 | static void show_snapshot_main_help(struct seq_file *m) |
2868 | { | 2869 | { |
2869 | seq_printf(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"); | 2870 | seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n" |
2870 | seq_printf(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"); | 2871 | "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n" |
2871 | seq_printf(m, "# Takes a snapshot of the main buffer.\n"); | 2872 | "# Takes a snapshot of the main buffer.\n" |
2872 | seq_printf(m, "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"); | 2873 | "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n" |
2873 | seq_printf(m, "# (Doesn't have to be '2' works with any number that\n"); | 2874 | "# (Doesn't have to be '2' works with any number that\n" |
2874 | seq_printf(m, "# is not a '0' or '1')\n"); | 2875 | "# is not a '0' or '1')\n"); |
2875 | } | 2876 | } |
2876 | 2877 | ||
2877 | static void show_snapshot_percpu_help(struct seq_file *m) | 2878 | static void show_snapshot_percpu_help(struct seq_file *m) |
2878 | { | 2879 | { |
2879 | seq_printf(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n"); | 2880 | seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n"); |
2880 | #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP | 2881 | #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP |
2881 | seq_printf(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"); | 2882 | seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n" |
2882 | seq_printf(m, "# Takes a snapshot of the main buffer for this cpu.\n"); | 2883 | "# Takes a snapshot of the main buffer for this cpu.\n"); |
2883 | #else | 2884 | #else |
2884 | seq_printf(m, "# echo 1 > snapshot : Not supported with this kernel.\n"); | 2885 | seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n" |
2885 | seq_printf(m, "# Must use main snapshot file to allocate.\n"); | 2886 | "# Must use main snapshot file to allocate.\n"); |
2886 | #endif | 2887 | #endif |
2887 | seq_printf(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"); | 2888 | seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n" |
2888 | seq_printf(m, "# (Doesn't have to be '2' works with any number that\n"); | 2889 | "# (Doesn't have to be '2' works with any number that\n" |
2889 | seq_printf(m, "# is not a '0' or '1')\n"); | 2890 | "# is not a '0' or '1')\n"); |
2890 | } | 2891 | } |
2891 | 2892 | ||
2892 | static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) | 2893 | static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) |
2893 | { | 2894 | { |
2894 | if (iter->tr->allocated_snapshot) | 2895 | if (iter->tr->allocated_snapshot) |
2895 | seq_printf(m, "#\n# * Snapshot is allocated *\n#\n"); | 2896 | seq_puts(m, "#\n# * Snapshot is allocated *\n#\n"); |
2896 | else | 2897 | else |
2897 | seq_printf(m, "#\n# * Snapshot is freed *\n#\n"); | 2898 | seq_puts(m, "#\n# * Snapshot is freed *\n#\n"); |
2898 | 2899 | ||
2899 | seq_printf(m, "# Snapshot commands:\n"); | 2900 | seq_puts(m, "# Snapshot commands:\n"); |
2900 | if (iter->cpu_file == RING_BUFFER_ALL_CPUS) | 2901 | if (iter->cpu_file == RING_BUFFER_ALL_CPUS) |
2901 | show_snapshot_main_help(m); | 2902 | show_snapshot_main_help(m); |
2902 | else | 2903 | else |
@@ -3250,7 +3251,7 @@ static int t_show(struct seq_file *m, void *v) | |||
3250 | if (!t) | 3251 | if (!t) |
3251 | return 0; | 3252 | return 0; |
3252 | 3253 | ||
3253 | seq_printf(m, "%s", t->name); | 3254 | seq_puts(m, t->name); |
3254 | if (t->next) | 3255 | if (t->next) |
3255 | seq_putc(m, ' '); | 3256 | seq_putc(m, ' '); |
3256 | else | 3257 | else |
@@ -4313,6 +4314,8 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp) | |||
4313 | goto out; | 4314 | goto out; |
4314 | } | 4315 | } |
4315 | 4316 | ||
4317 | trace_seq_init(&iter->seq); | ||
4318 | |||
4316 | /* | 4319 | /* |
4317 | * We make a copy of the current tracer to avoid concurrent | 4320 | * We make a copy of the current tracer to avoid concurrent |
4318 | * changes on it while we are reading. | 4321 | * changes on it while we are reading. |
@@ -4434,15 +4437,12 @@ static int tracing_wait_pipe(struct file *filp) | |||
4434 | 4437 | ||
4435 | mutex_unlock(&iter->mutex); | 4438 | mutex_unlock(&iter->mutex); |
4436 | 4439 | ||
4437 | ret = wait_on_pipe(iter); | 4440 | ret = wait_on_pipe(iter, false); |
4438 | 4441 | ||
4439 | mutex_lock(&iter->mutex); | 4442 | mutex_lock(&iter->mutex); |
4440 | 4443 | ||
4441 | if (ret) | 4444 | if (ret) |
4442 | return ret; | 4445 | return ret; |
4443 | |||
4444 | if (signal_pending(current)) | ||
4445 | return -EINTR; | ||
4446 | } | 4446 | } |
4447 | 4447 | ||
4448 | return 1; | 4448 | return 1; |
@@ -4509,18 +4509,18 @@ waitagain: | |||
4509 | trace_access_lock(iter->cpu_file); | 4509 | trace_access_lock(iter->cpu_file); |
4510 | while (trace_find_next_entry_inc(iter) != NULL) { | 4510 | while (trace_find_next_entry_inc(iter) != NULL) { |
4511 | enum print_line_t ret; | 4511 | enum print_line_t ret; |
4512 | int len = iter->seq.len; | 4512 | int save_len = iter->seq.seq.len; |
4513 | 4513 | ||
4514 | ret = print_trace_line(iter); | 4514 | ret = print_trace_line(iter); |
4515 | if (ret == TRACE_TYPE_PARTIAL_LINE) { | 4515 | if (ret == TRACE_TYPE_PARTIAL_LINE) { |
4516 | /* don't print partial lines */ | 4516 | /* don't print partial lines */ |
4517 | iter->seq.len = len; | 4517 | iter->seq.seq.len = save_len; |
4518 | break; | 4518 | break; |
4519 | } | 4519 | } |
4520 | if (ret != TRACE_TYPE_NO_CONSUME) | 4520 | if (ret != TRACE_TYPE_NO_CONSUME) |
4521 | trace_consume(iter); | 4521 | trace_consume(iter); |
4522 | 4522 | ||
4523 | if (iter->seq.len >= cnt) | 4523 | if (trace_seq_used(&iter->seq) >= cnt) |
4524 | break; | 4524 | break; |
4525 | 4525 | ||
4526 | /* | 4526 | /* |
@@ -4536,7 +4536,7 @@ waitagain: | |||
4536 | 4536 | ||
4537 | /* Now copy what we have to the user */ | 4537 | /* Now copy what we have to the user */ |
4538 | sret = trace_seq_to_user(&iter->seq, ubuf, cnt); | 4538 | sret = trace_seq_to_user(&iter->seq, ubuf, cnt); |
4539 | if (iter->seq.readpos >= iter->seq.len) | 4539 | if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq)) |
4540 | trace_seq_init(&iter->seq); | 4540 | trace_seq_init(&iter->seq); |
4541 | 4541 | ||
4542 | /* | 4542 | /* |
@@ -4570,20 +4570,33 @@ static size_t | |||
4570 | tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter) | 4570 | tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter) |
4571 | { | 4571 | { |
4572 | size_t count; | 4572 | size_t count; |
4573 | int save_len; | ||
4573 | int ret; | 4574 | int ret; |
4574 | 4575 | ||
4575 | /* Seq buffer is page-sized, exactly what we need. */ | 4576 | /* Seq buffer is page-sized, exactly what we need. */ |
4576 | for (;;) { | 4577 | for (;;) { |
4577 | count = iter->seq.len; | 4578 | save_len = iter->seq.seq.len; |
4578 | ret = print_trace_line(iter); | 4579 | ret = print_trace_line(iter); |
4579 | count = iter->seq.len - count; | 4580 | |
4580 | if (rem < count) { | 4581 | if (trace_seq_has_overflowed(&iter->seq)) { |
4581 | rem = 0; | 4582 | iter->seq.seq.len = save_len; |
4582 | iter->seq.len -= count; | ||
4583 | break; | 4583 | break; |
4584 | } | 4584 | } |
4585 | |||
4586 | /* | ||
4587 | * This should not be hit, because it should only | ||
4588 | * be set if the iter->seq overflowed. But check it | ||
4589 | * anyway to be safe. | ||
4590 | */ | ||
4585 | if (ret == TRACE_TYPE_PARTIAL_LINE) { | 4591 | if (ret == TRACE_TYPE_PARTIAL_LINE) { |
4586 | iter->seq.len -= count; | 4592 | iter->seq.seq.len = save_len; |
4593 | break; | ||
4594 | } | ||
4595 | |||
4596 | count = trace_seq_used(&iter->seq) - save_len; | ||
4597 | if (rem < count) { | ||
4598 | rem = 0; | ||
4599 | iter->seq.seq.len = save_len; | ||
4587 | break; | 4600 | break; |
4588 | } | 4601 | } |
4589 | 4602 | ||
@@ -4664,13 +4677,13 @@ static ssize_t tracing_splice_read_pipe(struct file *filp, | |||
4664 | /* Copy the data into the page, so we can start over. */ | 4677 | /* Copy the data into the page, so we can start over. */ |
4665 | ret = trace_seq_to_buffer(&iter->seq, | 4678 | ret = trace_seq_to_buffer(&iter->seq, |
4666 | page_address(spd.pages[i]), | 4679 | page_address(spd.pages[i]), |
4667 | iter->seq.len); | 4680 | trace_seq_used(&iter->seq)); |
4668 | if (ret < 0) { | 4681 | if (ret < 0) { |
4669 | __free_page(spd.pages[i]); | 4682 | __free_page(spd.pages[i]); |
4670 | break; | 4683 | break; |
4671 | } | 4684 | } |
4672 | spd.partial[i].offset = 0; | 4685 | spd.partial[i].offset = 0; |
4673 | spd.partial[i].len = iter->seq.len; | 4686 | spd.partial[i].len = trace_seq_used(&iter->seq); |
4674 | 4687 | ||
4675 | trace_seq_init(&iter->seq); | 4688 | trace_seq_init(&iter->seq); |
4676 | } | 4689 | } |
@@ -5372,16 +5385,12 @@ tracing_buffers_read(struct file *filp, char __user *ubuf, | |||
5372 | goto out_unlock; | 5385 | goto out_unlock; |
5373 | } | 5386 | } |
5374 | mutex_unlock(&trace_types_lock); | 5387 | mutex_unlock(&trace_types_lock); |
5375 | ret = wait_on_pipe(iter); | 5388 | ret = wait_on_pipe(iter, false); |
5376 | mutex_lock(&trace_types_lock); | 5389 | mutex_lock(&trace_types_lock); |
5377 | if (ret) { | 5390 | if (ret) { |
5378 | size = ret; | 5391 | size = ret; |
5379 | goto out_unlock; | 5392 | goto out_unlock; |
5380 | } | 5393 | } |
5381 | if (signal_pending(current)) { | ||
5382 | size = -EINTR; | ||
5383 | goto out_unlock; | ||
5384 | } | ||
5385 | goto again; | 5394 | goto again; |
5386 | } | 5395 | } |
5387 | size = 0; | 5396 | size = 0; |
@@ -5500,7 +5509,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, | |||
5500 | }; | 5509 | }; |
5501 | struct buffer_ref *ref; | 5510 | struct buffer_ref *ref; |
5502 | int entries, size, i; | 5511 | int entries, size, i; |
5503 | ssize_t ret; | 5512 | ssize_t ret = 0; |
5504 | 5513 | ||
5505 | mutex_lock(&trace_types_lock); | 5514 | mutex_lock(&trace_types_lock); |
5506 | 5515 | ||
@@ -5538,13 +5547,16 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, | |||
5538 | int r; | 5547 | int r; |
5539 | 5548 | ||
5540 | ref = kzalloc(sizeof(*ref), GFP_KERNEL); | 5549 | ref = kzalloc(sizeof(*ref), GFP_KERNEL); |
5541 | if (!ref) | 5550 | if (!ref) { |
5551 | ret = -ENOMEM; | ||
5542 | break; | 5552 | break; |
5553 | } | ||
5543 | 5554 | ||
5544 | ref->ref = 1; | 5555 | ref->ref = 1; |
5545 | ref->buffer = iter->trace_buffer->buffer; | 5556 | ref->buffer = iter->trace_buffer->buffer; |
5546 | ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file); | 5557 | ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file); |
5547 | if (!ref->page) { | 5558 | if (!ref->page) { |
5559 | ret = -ENOMEM; | ||
5548 | kfree(ref); | 5560 | kfree(ref); |
5549 | break; | 5561 | break; |
5550 | } | 5562 | } |
@@ -5582,19 +5594,19 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, | |||
5582 | 5594 | ||
5583 | /* did we read anything? */ | 5595 | /* did we read anything? */ |
5584 | if (!spd.nr_pages) { | 5596 | if (!spd.nr_pages) { |
5597 | if (ret) | ||
5598 | goto out; | ||
5599 | |||
5585 | if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK)) { | 5600 | if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK)) { |
5586 | ret = -EAGAIN; | 5601 | ret = -EAGAIN; |
5587 | goto out; | 5602 | goto out; |
5588 | } | 5603 | } |
5589 | mutex_unlock(&trace_types_lock); | 5604 | mutex_unlock(&trace_types_lock); |
5590 | ret = wait_on_pipe(iter); | 5605 | ret = wait_on_pipe(iter, true); |
5591 | mutex_lock(&trace_types_lock); | 5606 | mutex_lock(&trace_types_lock); |
5592 | if (ret) | 5607 | if (ret) |
5593 | goto out; | 5608 | goto out; |
5594 | if (signal_pending(current)) { | 5609 | |
5595 | ret = -EINTR; | ||
5596 | goto out; | ||
5597 | } | ||
5598 | goto again; | 5610 | goto again; |
5599 | } | 5611 | } |
5600 | 5612 | ||
@@ -5671,7 +5683,8 @@ tracing_stats_read(struct file *filp, char __user *ubuf, | |||
5671 | cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu); | 5683 | cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu); |
5672 | trace_seq_printf(s, "read events: %ld\n", cnt); | 5684 | trace_seq_printf(s, "read events: %ld\n", cnt); |
5673 | 5685 | ||
5674 | count = simple_read_from_buffer(ubuf, count, ppos, s->buffer, s->len); | 5686 | count = simple_read_from_buffer(ubuf, count, ppos, |
5687 | s->buffer, trace_seq_used(s)); | ||
5675 | 5688 | ||
5676 | kfree(s); | 5689 | kfree(s); |
5677 | 5690 | ||
@@ -5752,10 +5765,10 @@ ftrace_snapshot_print(struct seq_file *m, unsigned long ip, | |||
5752 | 5765 | ||
5753 | seq_printf(m, "%ps:", (void *)ip); | 5766 | seq_printf(m, "%ps:", (void *)ip); |
5754 | 5767 | ||
5755 | seq_printf(m, "snapshot"); | 5768 | seq_puts(m, "snapshot"); |
5756 | 5769 | ||
5757 | if (count == -1) | 5770 | if (count == -1) |
5758 | seq_printf(m, ":unlimited\n"); | 5771 | seq_puts(m, ":unlimited\n"); |
5759 | else | 5772 | else |
5760 | seq_printf(m, ":count=%ld\n", count); | 5773 | seq_printf(m, ":count=%ld\n", count); |
5761 | 5774 | ||
@@ -6420,7 +6433,7 @@ static int instance_mkdir (struct inode *inode, struct dentry *dentry, umode_t m | |||
6420 | int ret; | 6433 | int ret; |
6421 | 6434 | ||
6422 | /* Paranoid: Make sure the parent is the "instances" directory */ | 6435 | /* Paranoid: Make sure the parent is the "instances" directory */ |
6423 | parent = hlist_entry(inode->i_dentry.first, struct dentry, d_alias); | 6436 | parent = hlist_entry(inode->i_dentry.first, struct dentry, d_u.d_alias); |
6424 | if (WARN_ON_ONCE(parent != trace_instance_dir)) | 6437 | if (WARN_ON_ONCE(parent != trace_instance_dir)) |
6425 | return -ENOENT; | 6438 | return -ENOENT; |
6426 | 6439 | ||
@@ -6447,7 +6460,7 @@ static int instance_rmdir(struct inode *inode, struct dentry *dentry) | |||
6447 | int ret; | 6460 | int ret; |
6448 | 6461 | ||
6449 | /* Paranoid: Make sure the parent is the "instances" directory */ | 6462 | /* Paranoid: Make sure the parent is the "instances" directory */ |
6450 | parent = hlist_entry(inode->i_dentry.first, struct dentry, d_alias); | 6463 | parent = hlist_entry(inode->i_dentry.first, struct dentry, d_u.d_alias); |
6451 | if (WARN_ON_ONCE(parent != trace_instance_dir)) | 6464 | if (WARN_ON_ONCE(parent != trace_instance_dir)) |
6452 | return -ENOENT; | 6465 | return -ENOENT; |
6453 | 6466 | ||
@@ -6634,11 +6647,19 @@ void | |||
6634 | trace_printk_seq(struct trace_seq *s) | 6647 | trace_printk_seq(struct trace_seq *s) |
6635 | { | 6648 | { |
6636 | /* Probably should print a warning here. */ | 6649 | /* Probably should print a warning here. */ |
6637 | if (s->len >= TRACE_MAX_PRINT) | 6650 | if (s->seq.len >= TRACE_MAX_PRINT) |
6638 | s->len = TRACE_MAX_PRINT; | 6651 | s->seq.len = TRACE_MAX_PRINT; |
6652 | |||
6653 | /* | ||
6654 | * More paranoid code. Although the buffer size is set to | ||
6655 | * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just | ||
6656 | * an extra layer of protection. | ||
6657 | */ | ||
6658 | if (WARN_ON_ONCE(s->seq.len >= s->seq.size)) | ||
6659 | s->seq.len = s->seq.size - 1; | ||
6639 | 6660 | ||
6640 | /* should be zero ended, but we are paranoid. */ | 6661 | /* should be zero ended, but we are paranoid. */ |
6641 | s->buffer[s->len] = 0; | 6662 | s->buffer[s->seq.len] = 0; |
6642 | 6663 | ||
6643 | printk(KERN_TRACE "%s", s->buffer); | 6664 | printk(KERN_TRACE "%s", s->buffer); |
6644 | 6665 | ||
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 385391fb1d3b..3255dfb054a0 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h | |||
@@ -14,6 +14,7 @@ | |||
14 | #include <linux/trace_seq.h> | 14 | #include <linux/trace_seq.h> |
15 | #include <linux/ftrace_event.h> | 15 | #include <linux/ftrace_event.h> |
16 | #include <linux/compiler.h> | 16 | #include <linux/compiler.h> |
17 | #include <linux/trace_seq.h> | ||
17 | 18 | ||
18 | #ifdef CONFIG_FTRACE_SYSCALLS | 19 | #ifdef CONFIG_FTRACE_SYSCALLS |
19 | #include <asm/unistd.h> /* For NR_SYSCALLS */ | 20 | #include <asm/unistd.h> /* For NR_SYSCALLS */ |
@@ -569,15 +570,6 @@ void trace_init_global_iter(struct trace_iterator *iter); | |||
569 | 570 | ||
570 | void tracing_iter_reset(struct trace_iterator *iter, int cpu); | 571 | void tracing_iter_reset(struct trace_iterator *iter, int cpu); |
571 | 572 | ||
572 | void tracing_sched_switch_trace(struct trace_array *tr, | ||
573 | struct task_struct *prev, | ||
574 | struct task_struct *next, | ||
575 | unsigned long flags, int pc); | ||
576 | |||
577 | void tracing_sched_wakeup_trace(struct trace_array *tr, | ||
578 | struct task_struct *wakee, | ||
579 | struct task_struct *cur, | ||
580 | unsigned long flags, int pc); | ||
581 | void trace_function(struct trace_array *tr, | 573 | void trace_function(struct trace_array *tr, |
582 | unsigned long ip, | 574 | unsigned long ip, |
583 | unsigned long parent_ip, | 575 | unsigned long parent_ip, |
@@ -597,9 +589,6 @@ void set_graph_array(struct trace_array *tr); | |||
597 | 589 | ||
598 | void tracing_start_cmdline_record(void); | 590 | void tracing_start_cmdline_record(void); |
599 | void tracing_stop_cmdline_record(void); | 591 | void tracing_stop_cmdline_record(void); |
600 | void tracing_sched_switch_assign_trace(struct trace_array *tr); | ||
601 | void tracing_stop_sched_switch_record(void); | ||
602 | void tracing_start_sched_switch_record(void); | ||
603 | int register_tracer(struct tracer *type); | 592 | int register_tracer(struct tracer *type); |
604 | int is_tracing_stopped(void); | 593 | int is_tracing_stopped(void); |
605 | 594 | ||
@@ -719,6 +708,8 @@ enum print_line_t print_trace_line(struct trace_iterator *iter); | |||
719 | 708 | ||
720 | extern unsigned long trace_flags; | 709 | extern unsigned long trace_flags; |
721 | 710 | ||
711 | extern char trace_find_mark(unsigned long long duration); | ||
712 | |||
722 | /* Standard output formatting function used for function return traces */ | 713 | /* Standard output formatting function used for function return traces */ |
723 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER | 714 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER |
724 | 715 | ||
@@ -737,7 +728,7 @@ extern unsigned long trace_flags; | |||
737 | extern enum print_line_t | 728 | extern enum print_line_t |
738 | print_graph_function_flags(struct trace_iterator *iter, u32 flags); | 729 | print_graph_function_flags(struct trace_iterator *iter, u32 flags); |
739 | extern void print_graph_headers_flags(struct seq_file *s, u32 flags); | 730 | extern void print_graph_headers_flags(struct seq_file *s, u32 flags); |
740 | extern enum print_line_t | 731 | extern void |
741 | trace_print_graph_duration(unsigned long long duration, struct trace_seq *s); | 732 | trace_print_graph_duration(unsigned long long duration, struct trace_seq *s); |
742 | extern void graph_trace_open(struct trace_iterator *iter); | 733 | extern void graph_trace_open(struct trace_iterator *iter); |
743 | extern void graph_trace_close(struct trace_iterator *iter); | 734 | extern void graph_trace_close(struct trace_iterator *iter); |
diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c index 697fb9bac8f0..7d6e2afde669 100644 --- a/kernel/trace/trace_branch.c +++ b/kernel/trace/trace_branch.c | |||
@@ -151,22 +151,21 @@ static enum print_line_t trace_branch_print(struct trace_iterator *iter, | |||
151 | 151 | ||
152 | trace_assign_type(field, iter->ent); | 152 | trace_assign_type(field, iter->ent); |
153 | 153 | ||
154 | if (trace_seq_printf(&iter->seq, "[%s] %s:%s:%d\n", | 154 | trace_seq_printf(&iter->seq, "[%s] %s:%s:%d\n", |
155 | field->correct ? " ok " : " MISS ", | 155 | field->correct ? " ok " : " MISS ", |
156 | field->func, | 156 | field->func, |
157 | field->file, | 157 | field->file, |
158 | field->line)) | 158 | field->line); |
159 | return TRACE_TYPE_PARTIAL_LINE; | 159 | |
160 | 160 | return trace_handle_return(&iter->seq); | |
161 | return TRACE_TYPE_HANDLED; | ||
162 | } | 161 | } |
163 | 162 | ||
164 | static void branch_print_header(struct seq_file *s) | 163 | static void branch_print_header(struct seq_file *s) |
165 | { | 164 | { |
166 | seq_puts(s, "# TASK-PID CPU# TIMESTAMP CORRECT" | 165 | seq_puts(s, "# TASK-PID CPU# TIMESTAMP CORRECT" |
167 | " FUNC:FILE:LINE\n"); | 166 | " FUNC:FILE:LINE\n" |
168 | seq_puts(s, "# | | | | | " | 167 | "# | | | | | " |
169 | " |\n"); | 168 | " |\n"); |
170 | } | 169 | } |
171 | 170 | ||
172 | static struct trace_event_functions trace_branch_funcs = { | 171 | static struct trace_event_functions trace_branch_funcs = { |
@@ -233,12 +232,12 @@ extern unsigned long __stop_annotated_branch_profile[]; | |||
233 | 232 | ||
234 | static int annotated_branch_stat_headers(struct seq_file *m) | 233 | static int annotated_branch_stat_headers(struct seq_file *m) |
235 | { | 234 | { |
236 | seq_printf(m, " correct incorrect %% "); | 235 | seq_puts(m, " correct incorrect % " |
237 | seq_printf(m, " Function " | 236 | " Function " |
238 | " File Line\n" | 237 | " File Line\n" |
239 | " ------- --------- - " | 238 | " ------- --------- - " |
240 | " -------- " | 239 | " -------- " |
241 | " ---- ----\n"); | 240 | " ---- ----\n"); |
242 | return 0; | 241 | return 0; |
243 | } | 242 | } |
244 | 243 | ||
@@ -274,7 +273,7 @@ static int branch_stat_show(struct seq_file *m, void *v) | |||
274 | 273 | ||
275 | seq_printf(m, "%8lu %8lu ", p->correct, p->incorrect); | 274 | seq_printf(m, "%8lu %8lu ", p->correct, p->incorrect); |
276 | if (percent < 0) | 275 | if (percent < 0) |
277 | seq_printf(m, " X "); | 276 | seq_puts(m, " X "); |
278 | else | 277 | else |
279 | seq_printf(m, "%3ld ", percent); | 278 | seq_printf(m, "%3ld ", percent); |
280 | seq_printf(m, "%-30.30s %-20.20s %d\n", p->func, f, p->line); | 279 | seq_printf(m, "%-30.30s %-20.20s %d\n", p->func, f, p->line); |
@@ -362,12 +361,12 @@ extern unsigned long __stop_branch_profile[]; | |||
362 | 361 | ||
363 | static int all_branch_stat_headers(struct seq_file *m) | 362 | static int all_branch_stat_headers(struct seq_file *m) |
364 | { | 363 | { |
365 | seq_printf(m, " miss hit %% "); | 364 | seq_puts(m, " miss hit % " |
366 | seq_printf(m, " Function " | 365 | " Function " |
367 | " File Line\n" | 366 | " File Line\n" |
368 | " ------- --------- - " | 367 | " ------- --------- - " |
369 | " -------- " | 368 | " -------- " |
370 | " ---- ----\n"); | 369 | " ---- ----\n"); |
371 | return 0; | 370 | return 0; |
372 | } | 371 | } |
373 | 372 | ||
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 0cc51edde3a8..d0e4f92b5eb6 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c | |||
@@ -461,7 +461,7 @@ static void remove_event_file_dir(struct ftrace_event_file *file) | |||
461 | 461 | ||
462 | if (dir) { | 462 | if (dir) { |
463 | spin_lock(&dir->d_lock); /* probably unneeded */ | 463 | spin_lock(&dir->d_lock); /* probably unneeded */ |
464 | list_for_each_entry(child, &dir->d_subdirs, d_u.d_child) { | 464 | list_for_each_entry(child, &dir->d_subdirs, d_child) { |
465 | if (child->d_inode) /* probably unneeded */ | 465 | if (child->d_inode) /* probably unneeded */ |
466 | child->d_inode->i_private = NULL; | 466 | child->d_inode->i_private = NULL; |
467 | } | 467 | } |
@@ -918,7 +918,7 @@ static int f_show(struct seq_file *m, void *v) | |||
918 | case FORMAT_HEADER: | 918 | case FORMAT_HEADER: |
919 | seq_printf(m, "name: %s\n", ftrace_event_name(call)); | 919 | seq_printf(m, "name: %s\n", ftrace_event_name(call)); |
920 | seq_printf(m, "ID: %d\n", call->event.type); | 920 | seq_printf(m, "ID: %d\n", call->event.type); |
921 | seq_printf(m, "format:\n"); | 921 | seq_puts(m, "format:\n"); |
922 | return 0; | 922 | return 0; |
923 | 923 | ||
924 | case FORMAT_FIELD_SEPERATOR: | 924 | case FORMAT_FIELD_SEPERATOR: |
@@ -1044,7 +1044,8 @@ event_filter_read(struct file *filp, char __user *ubuf, size_t cnt, | |||
1044 | mutex_unlock(&event_mutex); | 1044 | mutex_unlock(&event_mutex); |
1045 | 1045 | ||
1046 | if (file) | 1046 | if (file) |
1047 | r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->len); | 1047 | r = simple_read_from_buffer(ubuf, cnt, ppos, |
1048 | s->buffer, trace_seq_used(s)); | ||
1048 | 1049 | ||
1049 | kfree(s); | 1050 | kfree(s); |
1050 | 1051 | ||
@@ -1210,7 +1211,8 @@ subsystem_filter_read(struct file *filp, char __user *ubuf, size_t cnt, | |||
1210 | trace_seq_init(s); | 1211 | trace_seq_init(s); |
1211 | 1212 | ||
1212 | print_subsystem_event_filter(system, s); | 1213 | print_subsystem_event_filter(system, s); |
1213 | r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->len); | 1214 | r = simple_read_from_buffer(ubuf, cnt, ppos, |
1215 | s->buffer, trace_seq_used(s)); | ||
1214 | 1216 | ||
1215 | kfree(s); | 1217 | kfree(s); |
1216 | 1218 | ||
@@ -1265,7 +1267,8 @@ show_header(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) | |||
1265 | trace_seq_init(s); | 1267 | trace_seq_init(s); |
1266 | 1268 | ||
1267 | func(s); | 1269 | func(s); |
1268 | r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->len); | 1270 | r = simple_read_from_buffer(ubuf, cnt, ppos, |
1271 | s->buffer, trace_seq_used(s)); | ||
1269 | 1272 | ||
1270 | kfree(s); | 1273 | kfree(s); |
1271 | 1274 | ||
@@ -1988,7 +1991,7 @@ event_enable_print(struct seq_file *m, unsigned long ip, | |||
1988 | ftrace_event_name(data->file->event_call)); | 1991 | ftrace_event_name(data->file->event_call)); |
1989 | 1992 | ||
1990 | if (data->count == -1) | 1993 | if (data->count == -1) |
1991 | seq_printf(m, ":unlimited\n"); | 1994 | seq_puts(m, ":unlimited\n"); |
1992 | else | 1995 | else |
1993 | seq_printf(m, ":count=%ld\n", data->count); | 1996 | seq_printf(m, ":count=%ld\n", data->count); |
1994 | 1997 | ||
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 7a8c1528e141..ced69da0ff55 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c | |||
@@ -45,6 +45,7 @@ enum filter_op_ids | |||
45 | OP_GT, | 45 | OP_GT, |
46 | OP_GE, | 46 | OP_GE, |
47 | OP_BAND, | 47 | OP_BAND, |
48 | OP_NOT, | ||
48 | OP_NONE, | 49 | OP_NONE, |
49 | OP_OPEN_PAREN, | 50 | OP_OPEN_PAREN, |
50 | }; | 51 | }; |
@@ -67,6 +68,7 @@ static struct filter_op filter_ops[] = { | |||
67 | { OP_GT, ">", 5 }, | 68 | { OP_GT, ">", 5 }, |
68 | { OP_GE, ">=", 5 }, | 69 | { OP_GE, ">=", 5 }, |
69 | { OP_BAND, "&", 6 }, | 70 | { OP_BAND, "&", 6 }, |
71 | { OP_NOT, "!", 6 }, | ||
70 | { OP_NONE, "OP_NONE", 0 }, | 72 | { OP_NONE, "OP_NONE", 0 }, |
71 | { OP_OPEN_PAREN, "(", 0 }, | 73 | { OP_OPEN_PAREN, "(", 0 }, |
72 | }; | 74 | }; |
@@ -85,6 +87,7 @@ enum { | |||
85 | FILT_ERR_MISSING_FIELD, | 87 | FILT_ERR_MISSING_FIELD, |
86 | FILT_ERR_INVALID_FILTER, | 88 | FILT_ERR_INVALID_FILTER, |
87 | FILT_ERR_IP_FIELD_ONLY, | 89 | FILT_ERR_IP_FIELD_ONLY, |
90 | FILT_ERR_ILLEGAL_NOT_OP, | ||
88 | }; | 91 | }; |
89 | 92 | ||
90 | static char *err_text[] = { | 93 | static char *err_text[] = { |
@@ -101,6 +104,7 @@ static char *err_text[] = { | |||
101 | "Missing field name and/or value", | 104 | "Missing field name and/or value", |
102 | "Meaningless filter expression", | 105 | "Meaningless filter expression", |
103 | "Only 'ip' field is supported for function trace", | 106 | "Only 'ip' field is supported for function trace", |
107 | "Illegal use of '!'", | ||
104 | }; | 108 | }; |
105 | 109 | ||
106 | struct opstack_op { | 110 | struct opstack_op { |
@@ -139,6 +143,7 @@ struct pred_stack { | |||
139 | int index; | 143 | int index; |
140 | }; | 144 | }; |
141 | 145 | ||
146 | /* If not of not match is equal to not of not, then it is a match */ | ||
142 | #define DEFINE_COMPARISON_PRED(type) \ | 147 | #define DEFINE_COMPARISON_PRED(type) \ |
143 | static int filter_pred_##type(struct filter_pred *pred, void *event) \ | 148 | static int filter_pred_##type(struct filter_pred *pred, void *event) \ |
144 | { \ | 149 | { \ |
@@ -166,7 +171,7 @@ static int filter_pred_##type(struct filter_pred *pred, void *event) \ | |||
166 | break; \ | 171 | break; \ |
167 | } \ | 172 | } \ |
168 | \ | 173 | \ |
169 | return match; \ | 174 | return !!match == !pred->not; \ |
170 | } | 175 | } |
171 | 176 | ||
172 | #define DEFINE_EQUALITY_PRED(size) \ | 177 | #define DEFINE_EQUALITY_PRED(size) \ |
@@ -484,9 +489,10 @@ static int process_ops(struct filter_pred *preds, | |||
484 | if (!WARN_ON_ONCE(!pred->fn)) | 489 | if (!WARN_ON_ONCE(!pred->fn)) |
485 | match = pred->fn(pred, rec); | 490 | match = pred->fn(pred, rec); |
486 | if (!!match == type) | 491 | if (!!match == type) |
487 | return match; | 492 | break; |
488 | } | 493 | } |
489 | return match; | 494 | /* If not of not match is equal to not of not, then it is a match */ |
495 | return !!match == !op->not; | ||
490 | } | 496 | } |
491 | 497 | ||
492 | struct filter_match_preds_data { | 498 | struct filter_match_preds_data { |
@@ -735,10 +741,10 @@ static int filter_set_pred(struct event_filter *filter, | |||
735 | * then this op can be folded. | 741 | * then this op can be folded. |
736 | */ | 742 | */ |
737 | if (left->index & FILTER_PRED_FOLD && | 743 | if (left->index & FILTER_PRED_FOLD && |
738 | (left->op == dest->op || | 744 | ((left->op == dest->op && !left->not) || |
739 | left->left == FILTER_PRED_INVALID) && | 745 | left->left == FILTER_PRED_INVALID) && |
740 | right->index & FILTER_PRED_FOLD && | 746 | right->index & FILTER_PRED_FOLD && |
741 | (right->op == dest->op || | 747 | ((right->op == dest->op && !right->not) || |
742 | right->left == FILTER_PRED_INVALID)) | 748 | right->left == FILTER_PRED_INVALID)) |
743 | dest->index |= FILTER_PRED_FOLD; | 749 | dest->index |= FILTER_PRED_FOLD; |
744 | 750 | ||
@@ -1028,7 +1034,7 @@ static int init_pred(struct filter_parse_state *ps, | |||
1028 | } | 1034 | } |
1029 | 1035 | ||
1030 | if (pred->op == OP_NE) | 1036 | if (pred->op == OP_NE) |
1031 | pred->not = 1; | 1037 | pred->not ^= 1; |
1032 | 1038 | ||
1033 | pred->fn = fn; | 1039 | pred->fn = fn; |
1034 | return 0; | 1040 | return 0; |
@@ -1590,6 +1596,17 @@ static int replace_preds(struct ftrace_event_call *call, | |||
1590 | continue; | 1596 | continue; |
1591 | } | 1597 | } |
1592 | 1598 | ||
1599 | if (elt->op == OP_NOT) { | ||
1600 | if (!n_preds || operand1 || operand2) { | ||
1601 | parse_error(ps, FILT_ERR_ILLEGAL_NOT_OP, 0); | ||
1602 | err = -EINVAL; | ||
1603 | goto fail; | ||
1604 | } | ||
1605 | if (!dry_run) | ||
1606 | filter->preds[n_preds - 1].not ^= 1; | ||
1607 | continue; | ||
1608 | } | ||
1609 | |||
1593 | if (WARN_ON(n_preds++ == MAX_FILTER_PRED)) { | 1610 | if (WARN_ON(n_preds++ == MAX_FILTER_PRED)) { |
1594 | parse_error(ps, FILT_ERR_TOO_MANY_PREDS, 0); | 1611 | parse_error(ps, FILT_ERR_TOO_MANY_PREDS, 0); |
1595 | err = -ENOSPC; | 1612 | err = -ENOSPC; |
diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c index 4747b476a030..8712df9decb4 100644 --- a/kernel/trace/trace_events_trigger.c +++ b/kernel/trace/trace_events_trigger.c | |||
@@ -373,7 +373,7 @@ event_trigger_print(const char *name, struct seq_file *m, | |||
373 | { | 373 | { |
374 | long count = (long)data; | 374 | long count = (long)data; |
375 | 375 | ||
376 | seq_printf(m, "%s", name); | 376 | seq_puts(m, name); |
377 | 377 | ||
378 | if (count == -1) | 378 | if (count == -1) |
379 | seq_puts(m, ":unlimited"); | 379 | seq_puts(m, ":unlimited"); |
@@ -383,7 +383,7 @@ event_trigger_print(const char *name, struct seq_file *m, | |||
383 | if (filter_str) | 383 | if (filter_str) |
384 | seq_printf(m, " if %s\n", filter_str); | 384 | seq_printf(m, " if %s\n", filter_str); |
385 | else | 385 | else |
386 | seq_puts(m, "\n"); | 386 | seq_putc(m, '\n'); |
387 | 387 | ||
388 | return 0; | 388 | return 0; |
389 | } | 389 | } |
@@ -1105,7 +1105,7 @@ event_enable_trigger_print(struct seq_file *m, struct event_trigger_ops *ops, | |||
1105 | if (data->filter_str) | 1105 | if (data->filter_str) |
1106 | seq_printf(m, " if %s\n", data->filter_str); | 1106 | seq_printf(m, " if %s\n", data->filter_str); |
1107 | else | 1107 | else |
1108 | seq_puts(m, "\n"); | 1108 | seq_putc(m, '\n'); |
1109 | 1109 | ||
1110 | return 0; | 1110 | return 0; |
1111 | } | 1111 | } |
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c index 57f0ec962d2c..fcd41a166405 100644 --- a/kernel/trace/trace_functions.c +++ b/kernel/trace/trace_functions.c | |||
@@ -261,37 +261,74 @@ static struct tracer function_trace __tracer_data = | |||
261 | }; | 261 | }; |
262 | 262 | ||
263 | #ifdef CONFIG_DYNAMIC_FTRACE | 263 | #ifdef CONFIG_DYNAMIC_FTRACE |
264 | static int update_count(void **data) | 264 | static void update_traceon_count(void **data, bool on) |
265 | { | 265 | { |
266 | unsigned long *count = (long *)data; | 266 | long *count = (long *)data; |
267 | long old_count = *count; | ||
267 | 268 | ||
268 | if (!*count) | 269 | /* |
269 | return 0; | 270 | * Tracing gets disabled (or enabled) once per count. |
271 | * This function can be called at the same time on multiple CPUs. | ||
272 | * It is fine if both disable (or enable) tracing, as disabling | ||
273 | * (or enabling) the second time doesn't do anything as the | ||
274 | * state of the tracer is already disabled (or enabled). | ||
275 | * What needs to be synchronized in this case is that the count | ||
276 | * only gets decremented once, even if the tracer is disabled | ||
277 | * (or enabled) twice, as the second one is really a nop. | ||
278 | * | ||
279 | * The memory barriers guarantee that we only decrement the | ||
280 | * counter once. First the count is read to a local variable | ||
281 | * and a read barrier is used to make sure that it is loaded | ||
282 | * before checking if the tracer is in the state we want. | ||
283 | * If the tracer is not in the state we want, then the count | ||
284 | * is guaranteed to be the old count. | ||
285 | * | ||
286 | * Next the tracer is set to the state we want (disabled or enabled) | ||
287 | * then a write memory barrier is used to make sure that | ||
288 | * the new state is visible before changing the counter by | ||
289 | * one minus the old counter. This guarantees that another CPU | ||
290 | * executing this code will see the new state before seeing | ||
291 | * the new counter value, and would not do anything if the new | ||
292 | * counter is seen. | ||
293 | * | ||
294 | * Note, there is no synchronization between this and a user | ||
295 | * setting the tracing_on file. But we currently don't care | ||
296 | * about that. | ||
297 | */ | ||
298 | if (!old_count) | ||
299 | return; | ||
270 | 300 | ||
271 | if (*count != -1) | 301 | /* Make sure we see count before checking tracing state */ |
272 | (*count)--; | 302 | smp_rmb(); |
273 | 303 | ||
274 | return 1; | 304 | if (on == !!tracing_is_on()) |
305 | return; | ||
306 | |||
307 | if (on) | ||
308 | tracing_on(); | ||
309 | else | ||
310 | tracing_off(); | ||
311 | |||
312 | /* unlimited? */ | ||
313 | if (old_count == -1) | ||
314 | return; | ||
315 | |||
316 | /* Make sure tracing state is visible before updating count */ | ||
317 | smp_wmb(); | ||
318 | |||
319 | *count = old_count - 1; | ||
275 | } | 320 | } |
276 | 321 | ||
277 | static void | 322 | static void |
278 | ftrace_traceon_count(unsigned long ip, unsigned long parent_ip, void **data) | 323 | ftrace_traceon_count(unsigned long ip, unsigned long parent_ip, void **data) |
279 | { | 324 | { |
280 | if (tracing_is_on()) | 325 | update_traceon_count(data, 1); |
281 | return; | ||
282 | |||
283 | if (update_count(data)) | ||
284 | tracing_on(); | ||
285 | } | 326 | } |
286 | 327 | ||
287 | static void | 328 | static void |
288 | ftrace_traceoff_count(unsigned long ip, unsigned long parent_ip, void **data) | 329 | ftrace_traceoff_count(unsigned long ip, unsigned long parent_ip, void **data) |
289 | { | 330 | { |
290 | if (!tracing_is_on()) | 331 | update_traceon_count(data, 0); |
291 | return; | ||
292 | |||
293 | if (update_count(data)) | ||
294 | tracing_off(); | ||
295 | } | 332 | } |
296 | 333 | ||
297 | static void | 334 | static void |
@@ -330,11 +367,49 @@ ftrace_stacktrace(unsigned long ip, unsigned long parent_ip, void **data) | |||
330 | static void | 367 | static void |
331 | ftrace_stacktrace_count(unsigned long ip, unsigned long parent_ip, void **data) | 368 | ftrace_stacktrace_count(unsigned long ip, unsigned long parent_ip, void **data) |
332 | { | 369 | { |
333 | if (!tracing_is_on()) | 370 | long *count = (long *)data; |
334 | return; | 371 | long old_count; |
372 | long new_count; | ||
335 | 373 | ||
336 | if (update_count(data)) | 374 | /* |
337 | trace_dump_stack(STACK_SKIP); | 375 | * Stack traces should only execute the number of times the |
376 | * user specified in the counter. | ||
377 | */ | ||
378 | do { | ||
379 | |||
380 | if (!tracing_is_on()) | ||
381 | return; | ||
382 | |||
383 | old_count = *count; | ||
384 | |||
385 | if (!old_count) | ||
386 | return; | ||
387 | |||
388 | /* unlimited? */ | ||
389 | if (old_count == -1) { | ||
390 | trace_dump_stack(STACK_SKIP); | ||
391 | return; | ||
392 | } | ||
393 | |||
394 | new_count = old_count - 1; | ||
395 | new_count = cmpxchg(count, old_count, new_count); | ||
396 | if (new_count == old_count) | ||
397 | trace_dump_stack(STACK_SKIP); | ||
398 | |||
399 | } while (new_count != old_count); | ||
400 | } | ||
401 | |||
402 | static int update_count(void **data) | ||
403 | { | ||
404 | unsigned long *count = (long *)data; | ||
405 | |||
406 | if (!*count) | ||
407 | return 0; | ||
408 | |||
409 | if (*count != -1) | ||
410 | (*count)--; | ||
411 | |||
412 | return 1; | ||
338 | } | 413 | } |
339 | 414 | ||
340 | static void | 415 | static void |
@@ -361,7 +436,7 @@ ftrace_probe_print(const char *name, struct seq_file *m, | |||
361 | seq_printf(m, "%ps:%s", (void *)ip, name); | 436 | seq_printf(m, "%ps:%s", (void *)ip, name); |
362 | 437 | ||
363 | if (count == -1) | 438 | if (count == -1) |
364 | seq_printf(m, ":unlimited\n"); | 439 | seq_puts(m, ":unlimited\n"); |
365 | else | 440 | else |
366 | seq_printf(m, ":count=%ld\n", count); | 441 | seq_printf(m, ":count=%ld\n", count); |
367 | 442 | ||
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index f0a0c982cde3..ba476009e5de 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c | |||
@@ -107,7 +107,7 @@ enum { | |||
107 | FLAGS_FILL_END = 3 << TRACE_GRAPH_PRINT_FILL_SHIFT, | 107 | FLAGS_FILL_END = 3 << TRACE_GRAPH_PRINT_FILL_SHIFT, |
108 | }; | 108 | }; |
109 | 109 | ||
110 | static enum print_line_t | 110 | static void |
111 | print_graph_duration(unsigned long long duration, struct trace_seq *s, | 111 | print_graph_duration(unsigned long long duration, struct trace_seq *s, |
112 | u32 flags); | 112 | u32 flags); |
113 | 113 | ||
@@ -483,33 +483,24 @@ static int graph_trace_update_thresh(struct trace_array *tr) | |||
483 | 483 | ||
484 | static int max_bytes_for_cpu; | 484 | static int max_bytes_for_cpu; |
485 | 485 | ||
486 | static enum print_line_t | 486 | static void print_graph_cpu(struct trace_seq *s, int cpu) |
487 | print_graph_cpu(struct trace_seq *s, int cpu) | ||
488 | { | 487 | { |
489 | int ret; | ||
490 | |||
491 | /* | 488 | /* |
492 | * Start with a space character - to make it stand out | 489 | * Start with a space character - to make it stand out |
493 | * to the right a bit when trace output is pasted into | 490 | * to the right a bit when trace output is pasted into |
494 | * email: | 491 | * email: |
495 | */ | 492 | */ |
496 | ret = trace_seq_printf(s, " %*d) ", max_bytes_for_cpu, cpu); | 493 | trace_seq_printf(s, " %*d) ", max_bytes_for_cpu, cpu); |
497 | if (!ret) | ||
498 | return TRACE_TYPE_PARTIAL_LINE; | ||
499 | |||
500 | return TRACE_TYPE_HANDLED; | ||
501 | } | 494 | } |
502 | 495 | ||
503 | #define TRACE_GRAPH_PROCINFO_LENGTH 14 | 496 | #define TRACE_GRAPH_PROCINFO_LENGTH 14 |
504 | 497 | ||
505 | static enum print_line_t | 498 | static void print_graph_proc(struct trace_seq *s, pid_t pid) |
506 | print_graph_proc(struct trace_seq *s, pid_t pid) | ||
507 | { | 499 | { |
508 | char comm[TASK_COMM_LEN]; | 500 | char comm[TASK_COMM_LEN]; |
509 | /* sign + log10(MAX_INT) + '\0' */ | 501 | /* sign + log10(MAX_INT) + '\0' */ |
510 | char pid_str[11]; | 502 | char pid_str[11]; |
511 | int spaces = 0; | 503 | int spaces = 0; |
512 | int ret; | ||
513 | int len; | 504 | int len; |
514 | int i; | 505 | int i; |
515 | 506 | ||
@@ -524,56 +515,43 @@ print_graph_proc(struct trace_seq *s, pid_t pid) | |||
524 | spaces = TRACE_GRAPH_PROCINFO_LENGTH - len; | 515 | spaces = TRACE_GRAPH_PROCINFO_LENGTH - len; |
525 | 516 | ||
526 | /* First spaces to align center */ | 517 | /* First spaces to align center */ |
527 | for (i = 0; i < spaces / 2; i++) { | 518 | for (i = 0; i < spaces / 2; i++) |
528 | ret = trace_seq_putc(s, ' '); | 519 | trace_seq_putc(s, ' '); |
529 | if (!ret) | ||
530 | return TRACE_TYPE_PARTIAL_LINE; | ||
531 | } | ||
532 | 520 | ||
533 | ret = trace_seq_printf(s, "%s-%s", comm, pid_str); | 521 | trace_seq_printf(s, "%s-%s", comm, pid_str); |
534 | if (!ret) | ||
535 | return TRACE_TYPE_PARTIAL_LINE; | ||
536 | 522 | ||
537 | /* Last spaces to align center */ | 523 | /* Last spaces to align center */ |
538 | for (i = 0; i < spaces - (spaces / 2); i++) { | 524 | for (i = 0; i < spaces - (spaces / 2); i++) |
539 | ret = trace_seq_putc(s, ' '); | 525 | trace_seq_putc(s, ' '); |
540 | if (!ret) | ||
541 | return TRACE_TYPE_PARTIAL_LINE; | ||
542 | } | ||
543 | return TRACE_TYPE_HANDLED; | ||
544 | } | 526 | } |
545 | 527 | ||
546 | 528 | ||
547 | static enum print_line_t | 529 | static void print_graph_lat_fmt(struct trace_seq *s, struct trace_entry *entry) |
548 | print_graph_lat_fmt(struct trace_seq *s, struct trace_entry *entry) | ||
549 | { | 530 | { |
550 | if (!trace_seq_putc(s, ' ')) | 531 | trace_seq_putc(s, ' '); |
551 | return 0; | 532 | trace_print_lat_fmt(s, entry); |
552 | |||
553 | return trace_print_lat_fmt(s, entry); | ||
554 | } | 533 | } |
555 | 534 | ||
556 | /* If the pid changed since the last trace, output this event */ | 535 | /* If the pid changed since the last trace, output this event */ |
557 | static enum print_line_t | 536 | static void |
558 | verif_pid(struct trace_seq *s, pid_t pid, int cpu, struct fgraph_data *data) | 537 | verif_pid(struct trace_seq *s, pid_t pid, int cpu, struct fgraph_data *data) |
559 | { | 538 | { |
560 | pid_t prev_pid; | 539 | pid_t prev_pid; |
561 | pid_t *last_pid; | 540 | pid_t *last_pid; |
562 | int ret; | ||
563 | 541 | ||
564 | if (!data) | 542 | if (!data) |
565 | return TRACE_TYPE_HANDLED; | 543 | return; |
566 | 544 | ||
567 | last_pid = &(per_cpu_ptr(data->cpu_data, cpu)->last_pid); | 545 | last_pid = &(per_cpu_ptr(data->cpu_data, cpu)->last_pid); |
568 | 546 | ||
569 | if (*last_pid == pid) | 547 | if (*last_pid == pid) |
570 | return TRACE_TYPE_HANDLED; | 548 | return; |
571 | 549 | ||
572 | prev_pid = *last_pid; | 550 | prev_pid = *last_pid; |
573 | *last_pid = pid; | 551 | *last_pid = pid; |
574 | 552 | ||
575 | if (prev_pid == -1) | 553 | if (prev_pid == -1) |
576 | return TRACE_TYPE_HANDLED; | 554 | return; |
577 | /* | 555 | /* |
578 | * Context-switch trace line: | 556 | * Context-switch trace line: |
579 | 557 | ||
@@ -582,33 +560,12 @@ verif_pid(struct trace_seq *s, pid_t pid, int cpu, struct fgraph_data *data) | |||
582 | ------------------------------------------ | 560 | ------------------------------------------ |
583 | 561 | ||
584 | */ | 562 | */ |
585 | ret = trace_seq_puts(s, | 563 | trace_seq_puts(s, " ------------------------------------------\n"); |
586 | " ------------------------------------------\n"); | 564 | print_graph_cpu(s, cpu); |
587 | if (!ret) | 565 | print_graph_proc(s, prev_pid); |
588 | return TRACE_TYPE_PARTIAL_LINE; | 566 | trace_seq_puts(s, " => "); |
589 | 567 | print_graph_proc(s, pid); | |
590 | ret = print_graph_cpu(s, cpu); | 568 | trace_seq_puts(s, "\n ------------------------------------------\n\n"); |
591 | if (ret == TRACE_TYPE_PARTIAL_LINE) | ||
592 | return TRACE_TYPE_PARTIAL_LINE; | ||
593 | |||
594 | ret = print_graph_proc(s, prev_pid); | ||
595 | if (ret == TRACE_TYPE_PARTIAL_LINE) | ||
596 | return TRACE_TYPE_PARTIAL_LINE; | ||
597 | |||
598 | ret = trace_seq_puts(s, " => "); | ||
599 | if (!ret) | ||
600 | return TRACE_TYPE_PARTIAL_LINE; | ||
601 | |||
602 | ret = print_graph_proc(s, pid); | ||
603 | if (ret == TRACE_TYPE_PARTIAL_LINE) | ||
604 | return TRACE_TYPE_PARTIAL_LINE; | ||
605 | |||
606 | ret = trace_seq_puts(s, | ||
607 | "\n ------------------------------------------\n\n"); | ||
608 | if (!ret) | ||
609 | return TRACE_TYPE_PARTIAL_LINE; | ||
610 | |||
611 | return TRACE_TYPE_HANDLED; | ||
612 | } | 569 | } |
613 | 570 | ||
614 | static struct ftrace_graph_ret_entry * | 571 | static struct ftrace_graph_ret_entry * |
@@ -682,175 +639,122 @@ get_return_for_leaf(struct trace_iterator *iter, | |||
682 | return next; | 639 | return next; |
683 | } | 640 | } |
684 | 641 | ||
685 | static int print_graph_abs_time(u64 t, struct trace_seq *s) | 642 | static void print_graph_abs_time(u64 t, struct trace_seq *s) |
686 | { | 643 | { |
687 | unsigned long usecs_rem; | 644 | unsigned long usecs_rem; |
688 | 645 | ||
689 | usecs_rem = do_div(t, NSEC_PER_SEC); | 646 | usecs_rem = do_div(t, NSEC_PER_SEC); |
690 | usecs_rem /= 1000; | 647 | usecs_rem /= 1000; |
691 | 648 | ||
692 | return trace_seq_printf(s, "%5lu.%06lu | ", | 649 | trace_seq_printf(s, "%5lu.%06lu | ", |
693 | (unsigned long)t, usecs_rem); | 650 | (unsigned long)t, usecs_rem); |
694 | } | 651 | } |
695 | 652 | ||
696 | static enum print_line_t | 653 | static void |
697 | print_graph_irq(struct trace_iterator *iter, unsigned long addr, | 654 | print_graph_irq(struct trace_iterator *iter, unsigned long addr, |
698 | enum trace_type type, int cpu, pid_t pid, u32 flags) | 655 | enum trace_type type, int cpu, pid_t pid, u32 flags) |
699 | { | 656 | { |
700 | int ret; | ||
701 | struct trace_seq *s = &iter->seq; | 657 | struct trace_seq *s = &iter->seq; |
658 | struct trace_entry *ent = iter->ent; | ||
702 | 659 | ||
703 | if (addr < (unsigned long)__irqentry_text_start || | 660 | if (addr < (unsigned long)__irqentry_text_start || |
704 | addr >= (unsigned long)__irqentry_text_end) | 661 | addr >= (unsigned long)__irqentry_text_end) |
705 | return TRACE_TYPE_UNHANDLED; | 662 | return; |
706 | 663 | ||
707 | if (trace_flags & TRACE_ITER_CONTEXT_INFO) { | 664 | if (trace_flags & TRACE_ITER_CONTEXT_INFO) { |
708 | /* Absolute time */ | 665 | /* Absolute time */ |
709 | if (flags & TRACE_GRAPH_PRINT_ABS_TIME) { | 666 | if (flags & TRACE_GRAPH_PRINT_ABS_TIME) |
710 | ret = print_graph_abs_time(iter->ts, s); | 667 | print_graph_abs_time(iter->ts, s); |
711 | if (!ret) | ||
712 | return TRACE_TYPE_PARTIAL_LINE; | ||
713 | } | ||
714 | 668 | ||
715 | /* Cpu */ | 669 | /* Cpu */ |
716 | if (flags & TRACE_GRAPH_PRINT_CPU) { | 670 | if (flags & TRACE_GRAPH_PRINT_CPU) |
717 | ret = print_graph_cpu(s, cpu); | 671 | print_graph_cpu(s, cpu); |
718 | if (ret == TRACE_TYPE_PARTIAL_LINE) | ||
719 | return TRACE_TYPE_PARTIAL_LINE; | ||
720 | } | ||
721 | 672 | ||
722 | /* Proc */ | 673 | /* Proc */ |
723 | if (flags & TRACE_GRAPH_PRINT_PROC) { | 674 | if (flags & TRACE_GRAPH_PRINT_PROC) { |
724 | ret = print_graph_proc(s, pid); | 675 | print_graph_proc(s, pid); |
725 | if (ret == TRACE_TYPE_PARTIAL_LINE) | 676 | trace_seq_puts(s, " | "); |
726 | return TRACE_TYPE_PARTIAL_LINE; | ||
727 | ret = trace_seq_puts(s, " | "); | ||
728 | if (!ret) | ||
729 | return TRACE_TYPE_PARTIAL_LINE; | ||
730 | } | 677 | } |
678 | |||
679 | /* Latency format */ | ||
680 | if (trace_flags & TRACE_ITER_LATENCY_FMT) | ||
681 | print_graph_lat_fmt(s, ent); | ||
731 | } | 682 | } |
732 | 683 | ||
733 | /* No overhead */ | 684 | /* No overhead */ |
734 | ret = print_graph_duration(0, s, flags | FLAGS_FILL_START); | 685 | print_graph_duration(0, s, flags | FLAGS_FILL_START); |
735 | if (ret != TRACE_TYPE_HANDLED) | ||
736 | return ret; | ||
737 | 686 | ||
738 | if (type == TRACE_GRAPH_ENT) | 687 | if (type == TRACE_GRAPH_ENT) |
739 | ret = trace_seq_puts(s, "==========>"); | 688 | trace_seq_puts(s, "==========>"); |
740 | else | 689 | else |
741 | ret = trace_seq_puts(s, "<=========="); | 690 | trace_seq_puts(s, "<=========="); |
742 | |||
743 | if (!ret) | ||
744 | return TRACE_TYPE_PARTIAL_LINE; | ||
745 | |||
746 | ret = print_graph_duration(0, s, flags | FLAGS_FILL_END); | ||
747 | if (ret != TRACE_TYPE_HANDLED) | ||
748 | return ret; | ||
749 | |||
750 | ret = trace_seq_putc(s, '\n'); | ||
751 | 691 | ||
752 | if (!ret) | 692 | print_graph_duration(0, s, flags | FLAGS_FILL_END); |
753 | return TRACE_TYPE_PARTIAL_LINE; | 693 | trace_seq_putc(s, '\n'); |
754 | return TRACE_TYPE_HANDLED; | ||
755 | } | 694 | } |
756 | 695 | ||
757 | enum print_line_t | 696 | void |
758 | trace_print_graph_duration(unsigned long long duration, struct trace_seq *s) | 697 | trace_print_graph_duration(unsigned long long duration, struct trace_seq *s) |
759 | { | 698 | { |
760 | unsigned long nsecs_rem = do_div(duration, 1000); | 699 | unsigned long nsecs_rem = do_div(duration, 1000); |
761 | /* log10(ULONG_MAX) + '\0' */ | 700 | /* log10(ULONG_MAX) + '\0' */ |
762 | char msecs_str[21]; | 701 | char usecs_str[21]; |
763 | char nsecs_str[5]; | 702 | char nsecs_str[5]; |
764 | int ret, len; | 703 | int len; |
765 | int i; | 704 | int i; |
766 | 705 | ||
767 | sprintf(msecs_str, "%lu", (unsigned long) duration); | 706 | sprintf(usecs_str, "%lu", (unsigned long) duration); |
768 | 707 | ||
769 | /* Print msecs */ | 708 | /* Print msecs */ |
770 | ret = trace_seq_printf(s, "%s", msecs_str); | 709 | trace_seq_printf(s, "%s", usecs_str); |
771 | if (!ret) | ||
772 | return TRACE_TYPE_PARTIAL_LINE; | ||
773 | 710 | ||
774 | len = strlen(msecs_str); | 711 | len = strlen(usecs_str); |
775 | 712 | ||
776 | /* Print nsecs (we don't want to exceed 7 numbers) */ | 713 | /* Print nsecs (we don't want to exceed 7 numbers) */ |
777 | if (len < 7) { | 714 | if (len < 7) { |
778 | size_t slen = min_t(size_t, sizeof(nsecs_str), 8UL - len); | 715 | size_t slen = min_t(size_t, sizeof(nsecs_str), 8UL - len); |
779 | 716 | ||
780 | snprintf(nsecs_str, slen, "%03lu", nsecs_rem); | 717 | snprintf(nsecs_str, slen, "%03lu", nsecs_rem); |
781 | ret = trace_seq_printf(s, ".%s", nsecs_str); | 718 | trace_seq_printf(s, ".%s", nsecs_str); |
782 | if (!ret) | ||
783 | return TRACE_TYPE_PARTIAL_LINE; | ||
784 | len += strlen(nsecs_str); | 719 | len += strlen(nsecs_str); |
785 | } | 720 | } |
786 | 721 | ||
787 | ret = trace_seq_puts(s, " us "); | 722 | trace_seq_puts(s, " us "); |
788 | if (!ret) | ||
789 | return TRACE_TYPE_PARTIAL_LINE; | ||
790 | 723 | ||
791 | /* Print remaining spaces to fit the row's width */ | 724 | /* Print remaining spaces to fit the row's width */ |
792 | for (i = len; i < 7; i++) { | 725 | for (i = len; i < 7; i++) |
793 | ret = trace_seq_putc(s, ' '); | 726 | trace_seq_putc(s, ' '); |
794 | if (!ret) | ||
795 | return TRACE_TYPE_PARTIAL_LINE; | ||
796 | } | ||
797 | return TRACE_TYPE_HANDLED; | ||
798 | } | 727 | } |
799 | 728 | ||
800 | static enum print_line_t | 729 | static void |
801 | print_graph_duration(unsigned long long duration, struct trace_seq *s, | 730 | print_graph_duration(unsigned long long duration, struct trace_seq *s, |
802 | u32 flags) | 731 | u32 flags) |
803 | { | 732 | { |
804 | int ret = -1; | ||
805 | |||
806 | if (!(flags & TRACE_GRAPH_PRINT_DURATION) || | 733 | if (!(flags & TRACE_GRAPH_PRINT_DURATION) || |
807 | !(trace_flags & TRACE_ITER_CONTEXT_INFO)) | 734 | !(trace_flags & TRACE_ITER_CONTEXT_INFO)) |
808 | return TRACE_TYPE_HANDLED; | 735 | return; |
809 | 736 | ||
810 | /* No real adata, just filling the column with spaces */ | 737 | /* No real adata, just filling the column with spaces */ |
811 | switch (flags & TRACE_GRAPH_PRINT_FILL_MASK) { | 738 | switch (flags & TRACE_GRAPH_PRINT_FILL_MASK) { |
812 | case FLAGS_FILL_FULL: | 739 | case FLAGS_FILL_FULL: |
813 | ret = trace_seq_puts(s, " | "); | 740 | trace_seq_puts(s, " | "); |
814 | return ret ? TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE; | 741 | return; |
815 | case FLAGS_FILL_START: | 742 | case FLAGS_FILL_START: |
816 | ret = trace_seq_puts(s, " "); | 743 | trace_seq_puts(s, " "); |
817 | return ret ? TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE; | 744 | return; |
818 | case FLAGS_FILL_END: | 745 | case FLAGS_FILL_END: |
819 | ret = trace_seq_puts(s, " |"); | 746 | trace_seq_puts(s, " |"); |
820 | return ret ? TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE; | 747 | return; |
821 | } | 748 | } |
822 | 749 | ||
823 | /* Signal a overhead of time execution to the output */ | 750 | /* Signal a overhead of time execution to the output */ |
824 | if (flags & TRACE_GRAPH_PRINT_OVERHEAD) { | 751 | if (flags & TRACE_GRAPH_PRINT_OVERHEAD) |
825 | /* Duration exceeded 100 msecs */ | 752 | trace_seq_printf(s, "%c ", trace_find_mark(duration)); |
826 | if (duration > 100000ULL) | 753 | else |
827 | ret = trace_seq_puts(s, "! "); | 754 | trace_seq_puts(s, " "); |
828 | /* Duration exceeded 10 msecs */ | ||
829 | else if (duration > 10000ULL) | ||
830 | ret = trace_seq_puts(s, "+ "); | ||
831 | } | ||
832 | |||
833 | /* | ||
834 | * The -1 means we either did not exceed the duration tresholds | ||
835 | * or we dont want to print out the overhead. Either way we need | ||
836 | * to fill out the space. | ||
837 | */ | ||
838 | if (ret == -1) | ||
839 | ret = trace_seq_puts(s, " "); | ||
840 | |||
841 | /* Catching here any failure happenned above */ | ||
842 | if (!ret) | ||
843 | return TRACE_TYPE_PARTIAL_LINE; | ||
844 | |||
845 | ret = trace_print_graph_duration(duration, s); | ||
846 | if (ret != TRACE_TYPE_HANDLED) | ||
847 | return ret; | ||
848 | |||
849 | ret = trace_seq_puts(s, "| "); | ||
850 | if (!ret) | ||
851 | return TRACE_TYPE_PARTIAL_LINE; | ||
852 | 755 | ||
853 | return TRACE_TYPE_HANDLED; | 756 | trace_print_graph_duration(duration, s); |
757 | trace_seq_puts(s, "| "); | ||
854 | } | 758 | } |
855 | 759 | ||
856 | /* Case of a leaf function on its call entry */ | 760 | /* Case of a leaf function on its call entry */ |
@@ -864,7 +768,6 @@ print_graph_entry_leaf(struct trace_iterator *iter, | |||
864 | struct ftrace_graph_ret *graph_ret; | 768 | struct ftrace_graph_ret *graph_ret; |
865 | struct ftrace_graph_ent *call; | 769 | struct ftrace_graph_ent *call; |
866 | unsigned long long duration; | 770 | unsigned long long duration; |
867 | int ret; | ||
868 | int i; | 771 | int i; |
869 | 772 | ||
870 | graph_ret = &ret_entry->ret; | 773 | graph_ret = &ret_entry->ret; |
@@ -890,22 +793,15 @@ print_graph_entry_leaf(struct trace_iterator *iter, | |||
890 | } | 793 | } |
891 | 794 | ||
892 | /* Overhead and duration */ | 795 | /* Overhead and duration */ |
893 | ret = print_graph_duration(duration, s, flags); | 796 | print_graph_duration(duration, s, flags); |
894 | if (ret == TRACE_TYPE_PARTIAL_LINE) | ||
895 | return TRACE_TYPE_PARTIAL_LINE; | ||
896 | 797 | ||
897 | /* Function */ | 798 | /* Function */ |
898 | for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++) { | 799 | for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++) |
899 | ret = trace_seq_putc(s, ' '); | 800 | trace_seq_putc(s, ' '); |
900 | if (!ret) | ||
901 | return TRACE_TYPE_PARTIAL_LINE; | ||
902 | } | ||
903 | 801 | ||
904 | ret = trace_seq_printf(s, "%ps();\n", (void *)call->func); | 802 | trace_seq_printf(s, "%ps();\n", (void *)call->func); |
905 | if (!ret) | ||
906 | return TRACE_TYPE_PARTIAL_LINE; | ||
907 | 803 | ||
908 | return TRACE_TYPE_HANDLED; | 804 | return trace_handle_return(s); |
909 | } | 805 | } |
910 | 806 | ||
911 | static enum print_line_t | 807 | static enum print_line_t |
@@ -915,7 +811,6 @@ print_graph_entry_nested(struct trace_iterator *iter, | |||
915 | { | 811 | { |
916 | struct ftrace_graph_ent *call = &entry->graph_ent; | 812 | struct ftrace_graph_ent *call = &entry->graph_ent; |
917 | struct fgraph_data *data = iter->private; | 813 | struct fgraph_data *data = iter->private; |
918 | int ret; | ||
919 | int i; | 814 | int i; |
920 | 815 | ||
921 | if (data) { | 816 | if (data) { |
@@ -931,19 +826,15 @@ print_graph_entry_nested(struct trace_iterator *iter, | |||
931 | } | 826 | } |
932 | 827 | ||
933 | /* No time */ | 828 | /* No time */ |
934 | ret = print_graph_duration(0, s, flags | FLAGS_FILL_FULL); | 829 | print_graph_duration(0, s, flags | FLAGS_FILL_FULL); |
935 | if (ret != TRACE_TYPE_HANDLED) | ||
936 | return ret; | ||
937 | 830 | ||
938 | /* Function */ | 831 | /* Function */ |
939 | for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++) { | 832 | for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++) |
940 | ret = trace_seq_putc(s, ' '); | 833 | trace_seq_putc(s, ' '); |
941 | if (!ret) | 834 | |
942 | return TRACE_TYPE_PARTIAL_LINE; | 835 | trace_seq_printf(s, "%ps() {\n", (void *)call->func); |
943 | } | ||
944 | 836 | ||
945 | ret = trace_seq_printf(s, "%ps() {\n", (void *)call->func); | 837 | if (trace_seq_has_overflowed(s)) |
946 | if (!ret) | ||
947 | return TRACE_TYPE_PARTIAL_LINE; | 838 | return TRACE_TYPE_PARTIAL_LINE; |
948 | 839 | ||
949 | /* | 840 | /* |
@@ -953,62 +844,43 @@ print_graph_entry_nested(struct trace_iterator *iter, | |||
953 | return TRACE_TYPE_NO_CONSUME; | 844 | return TRACE_TYPE_NO_CONSUME; |
954 | } | 845 | } |
955 | 846 | ||
956 | static enum print_line_t | 847 | static void |
957 | print_graph_prologue(struct trace_iterator *iter, struct trace_seq *s, | 848 | print_graph_prologue(struct trace_iterator *iter, struct trace_seq *s, |
958 | int type, unsigned long addr, u32 flags) | 849 | int type, unsigned long addr, u32 flags) |
959 | { | 850 | { |
960 | struct fgraph_data *data = iter->private; | 851 | struct fgraph_data *data = iter->private; |
961 | struct trace_entry *ent = iter->ent; | 852 | struct trace_entry *ent = iter->ent; |
962 | int cpu = iter->cpu; | 853 | int cpu = iter->cpu; |
963 | int ret; | ||
964 | 854 | ||
965 | /* Pid */ | 855 | /* Pid */ |
966 | if (verif_pid(s, ent->pid, cpu, data) == TRACE_TYPE_PARTIAL_LINE) | 856 | verif_pid(s, ent->pid, cpu, data); |
967 | return TRACE_TYPE_PARTIAL_LINE; | ||
968 | 857 | ||
969 | if (type) { | 858 | if (type) |
970 | /* Interrupt */ | 859 | /* Interrupt */ |
971 | ret = print_graph_irq(iter, addr, type, cpu, ent->pid, flags); | 860 | print_graph_irq(iter, addr, type, cpu, ent->pid, flags); |
972 | if (ret == TRACE_TYPE_PARTIAL_LINE) | ||
973 | return TRACE_TYPE_PARTIAL_LINE; | ||
974 | } | ||
975 | 861 | ||
976 | if (!(trace_flags & TRACE_ITER_CONTEXT_INFO)) | 862 | if (!(trace_flags & TRACE_ITER_CONTEXT_INFO)) |
977 | return 0; | 863 | return; |
978 | 864 | ||
979 | /* Absolute time */ | 865 | /* Absolute time */ |
980 | if (flags & TRACE_GRAPH_PRINT_ABS_TIME) { | 866 | if (flags & TRACE_GRAPH_PRINT_ABS_TIME) |
981 | ret = print_graph_abs_time(iter->ts, s); | 867 | print_graph_abs_time(iter->ts, s); |
982 | if (!ret) | ||
983 | return TRACE_TYPE_PARTIAL_LINE; | ||
984 | } | ||
985 | 868 | ||
986 | /* Cpu */ | 869 | /* Cpu */ |
987 | if (flags & TRACE_GRAPH_PRINT_CPU) { | 870 | if (flags & TRACE_GRAPH_PRINT_CPU) |
988 | ret = print_graph_cpu(s, cpu); | 871 | print_graph_cpu(s, cpu); |
989 | if (ret == TRACE_TYPE_PARTIAL_LINE) | ||
990 | return TRACE_TYPE_PARTIAL_LINE; | ||
991 | } | ||
992 | 872 | ||
993 | /* Proc */ | 873 | /* Proc */ |
994 | if (flags & TRACE_GRAPH_PRINT_PROC) { | 874 | if (flags & TRACE_GRAPH_PRINT_PROC) { |
995 | ret = print_graph_proc(s, ent->pid); | 875 | print_graph_proc(s, ent->pid); |
996 | if (ret == TRACE_TYPE_PARTIAL_LINE) | 876 | trace_seq_puts(s, " | "); |
997 | return TRACE_TYPE_PARTIAL_LINE; | ||
998 | |||
999 | ret = trace_seq_puts(s, " | "); | ||
1000 | if (!ret) | ||
1001 | return TRACE_TYPE_PARTIAL_LINE; | ||
1002 | } | 877 | } |
1003 | 878 | ||
1004 | /* Latency format */ | 879 | /* Latency format */ |
1005 | if (trace_flags & TRACE_ITER_LATENCY_FMT) { | 880 | if (trace_flags & TRACE_ITER_LATENCY_FMT) |
1006 | ret = print_graph_lat_fmt(s, ent); | 881 | print_graph_lat_fmt(s, ent); |
1007 | if (ret == TRACE_TYPE_PARTIAL_LINE) | ||
1008 | return TRACE_TYPE_PARTIAL_LINE; | ||
1009 | } | ||
1010 | 882 | ||
1011 | return 0; | 883 | return; |
1012 | } | 884 | } |
1013 | 885 | ||
1014 | /* | 886 | /* |
@@ -1126,8 +998,7 @@ print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s, | |||
1126 | if (check_irq_entry(iter, flags, call->func, call->depth)) | 998 | if (check_irq_entry(iter, flags, call->func, call->depth)) |
1127 | return TRACE_TYPE_HANDLED; | 999 | return TRACE_TYPE_HANDLED; |
1128 | 1000 | ||
1129 | if (print_graph_prologue(iter, s, TRACE_GRAPH_ENT, call->func, flags)) | 1001 | print_graph_prologue(iter, s, TRACE_GRAPH_ENT, call->func, flags); |
1130 | return TRACE_TYPE_PARTIAL_LINE; | ||
1131 | 1002 | ||
1132 | leaf_ret = get_return_for_leaf(iter, field); | 1003 | leaf_ret = get_return_for_leaf(iter, field); |
1133 | if (leaf_ret) | 1004 | if (leaf_ret) |
@@ -1160,7 +1031,6 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s, | |||
1160 | pid_t pid = ent->pid; | 1031 | pid_t pid = ent->pid; |
1161 | int cpu = iter->cpu; | 1032 | int cpu = iter->cpu; |
1162 | int func_match = 1; | 1033 | int func_match = 1; |
1163 | int ret; | ||
1164 | int i; | 1034 | int i; |
1165 | 1035 | ||
1166 | if (check_irq_return(iter, flags, trace->depth)) | 1036 | if (check_irq_return(iter, flags, trace->depth)) |
@@ -1186,20 +1056,14 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s, | |||
1186 | } | 1056 | } |
1187 | } | 1057 | } |
1188 | 1058 | ||
1189 | if (print_graph_prologue(iter, s, 0, 0, flags)) | 1059 | print_graph_prologue(iter, s, 0, 0, flags); |
1190 | return TRACE_TYPE_PARTIAL_LINE; | ||
1191 | 1060 | ||
1192 | /* Overhead and duration */ | 1061 | /* Overhead and duration */ |
1193 | ret = print_graph_duration(duration, s, flags); | 1062 | print_graph_duration(duration, s, flags); |
1194 | if (ret == TRACE_TYPE_PARTIAL_LINE) | ||
1195 | return TRACE_TYPE_PARTIAL_LINE; | ||
1196 | 1063 | ||
1197 | /* Closing brace */ | 1064 | /* Closing brace */ |
1198 | for (i = 0; i < trace->depth * TRACE_GRAPH_INDENT; i++) { | 1065 | for (i = 0; i < trace->depth * TRACE_GRAPH_INDENT; i++) |
1199 | ret = trace_seq_putc(s, ' '); | 1066 | trace_seq_putc(s, ' '); |
1200 | if (!ret) | ||
1201 | return TRACE_TYPE_PARTIAL_LINE; | ||
1202 | } | ||
1203 | 1067 | ||
1204 | /* | 1068 | /* |
1205 | * If the return function does not have a matching entry, | 1069 | * If the return function does not have a matching entry, |
@@ -1208,30 +1072,20 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s, | |||
1208 | * belongs to, write out the function name. Always do | 1072 | * belongs to, write out the function name. Always do |
1209 | * that if the funcgraph-tail option is enabled. | 1073 | * that if the funcgraph-tail option is enabled. |
1210 | */ | 1074 | */ |
1211 | if (func_match && !(flags & TRACE_GRAPH_PRINT_TAIL)) { | 1075 | if (func_match && !(flags & TRACE_GRAPH_PRINT_TAIL)) |
1212 | ret = trace_seq_puts(s, "}\n"); | 1076 | trace_seq_puts(s, "}\n"); |
1213 | if (!ret) | 1077 | else |
1214 | return TRACE_TYPE_PARTIAL_LINE; | 1078 | trace_seq_printf(s, "} /* %ps */\n", (void *)trace->func); |
1215 | } else { | ||
1216 | ret = trace_seq_printf(s, "} /* %ps */\n", (void *)trace->func); | ||
1217 | if (!ret) | ||
1218 | return TRACE_TYPE_PARTIAL_LINE; | ||
1219 | } | ||
1220 | 1079 | ||
1221 | /* Overrun */ | 1080 | /* Overrun */ |
1222 | if (flags & TRACE_GRAPH_PRINT_OVERRUN) { | 1081 | if (flags & TRACE_GRAPH_PRINT_OVERRUN) |
1223 | ret = trace_seq_printf(s, " (Overruns: %lu)\n", | 1082 | trace_seq_printf(s, " (Overruns: %lu)\n", |
1224 | trace->overrun); | 1083 | trace->overrun); |
1225 | if (!ret) | ||
1226 | return TRACE_TYPE_PARTIAL_LINE; | ||
1227 | } | ||
1228 | 1084 | ||
1229 | ret = print_graph_irq(iter, trace->func, TRACE_GRAPH_RET, | 1085 | print_graph_irq(iter, trace->func, TRACE_GRAPH_RET, |
1230 | cpu, pid, flags); | 1086 | cpu, pid, flags); |
1231 | if (ret == TRACE_TYPE_PARTIAL_LINE) | ||
1232 | return TRACE_TYPE_PARTIAL_LINE; | ||
1233 | 1087 | ||
1234 | return TRACE_TYPE_HANDLED; | 1088 | return trace_handle_return(s); |
1235 | } | 1089 | } |
1236 | 1090 | ||
1237 | static enum print_line_t | 1091 | static enum print_line_t |
@@ -1248,26 +1102,18 @@ print_graph_comment(struct trace_seq *s, struct trace_entry *ent, | |||
1248 | if (data) | 1102 | if (data) |
1249 | depth = per_cpu_ptr(data->cpu_data, iter->cpu)->depth; | 1103 | depth = per_cpu_ptr(data->cpu_data, iter->cpu)->depth; |
1250 | 1104 | ||
1251 | if (print_graph_prologue(iter, s, 0, 0, flags)) | 1105 | print_graph_prologue(iter, s, 0, 0, flags); |
1252 | return TRACE_TYPE_PARTIAL_LINE; | ||
1253 | 1106 | ||
1254 | /* No time */ | 1107 | /* No time */ |
1255 | ret = print_graph_duration(0, s, flags | FLAGS_FILL_FULL); | 1108 | print_graph_duration(0, s, flags | FLAGS_FILL_FULL); |
1256 | if (ret != TRACE_TYPE_HANDLED) | ||
1257 | return ret; | ||
1258 | 1109 | ||
1259 | /* Indentation */ | 1110 | /* Indentation */ |
1260 | if (depth > 0) | 1111 | if (depth > 0) |
1261 | for (i = 0; i < (depth + 1) * TRACE_GRAPH_INDENT; i++) { | 1112 | for (i = 0; i < (depth + 1) * TRACE_GRAPH_INDENT; i++) |
1262 | ret = trace_seq_putc(s, ' '); | 1113 | trace_seq_putc(s, ' '); |
1263 | if (!ret) | ||
1264 | return TRACE_TYPE_PARTIAL_LINE; | ||
1265 | } | ||
1266 | 1114 | ||
1267 | /* The comment */ | 1115 | /* The comment */ |
1268 | ret = trace_seq_puts(s, "/* "); | 1116 | trace_seq_puts(s, "/* "); |
1269 | if (!ret) | ||
1270 | return TRACE_TYPE_PARTIAL_LINE; | ||
1271 | 1117 | ||
1272 | switch (iter->ent->type) { | 1118 | switch (iter->ent->type) { |
1273 | case TRACE_BPRINT: | 1119 | case TRACE_BPRINT: |
@@ -1290,17 +1136,18 @@ print_graph_comment(struct trace_seq *s, struct trace_entry *ent, | |||
1290 | return ret; | 1136 | return ret; |
1291 | } | 1137 | } |
1292 | 1138 | ||
1139 | if (trace_seq_has_overflowed(s)) | ||
1140 | goto out; | ||
1141 | |||
1293 | /* Strip ending newline */ | 1142 | /* Strip ending newline */ |
1294 | if (s->buffer[s->len - 1] == '\n') { | 1143 | if (s->buffer[s->seq.len - 1] == '\n') { |
1295 | s->buffer[s->len - 1] = '\0'; | 1144 | s->buffer[s->seq.len - 1] = '\0'; |
1296 | s->len--; | 1145 | s->seq.len--; |
1297 | } | 1146 | } |
1298 | 1147 | ||
1299 | ret = trace_seq_puts(s, " */\n"); | 1148 | trace_seq_puts(s, " */\n"); |
1300 | if (!ret) | 1149 | out: |
1301 | return TRACE_TYPE_PARTIAL_LINE; | 1150 | return trace_handle_return(s); |
1302 | |||
1303 | return TRACE_TYPE_HANDLED; | ||
1304 | } | 1151 | } |
1305 | 1152 | ||
1306 | 1153 | ||
@@ -1407,32 +1254,32 @@ static void __print_graph_headers_flags(struct seq_file *s, u32 flags) | |||
1407 | print_lat_header(s, flags); | 1254 | print_lat_header(s, flags); |
1408 | 1255 | ||
1409 | /* 1st line */ | 1256 | /* 1st line */ |
1410 | seq_printf(s, "#"); | 1257 | seq_putc(s, '#'); |
1411 | if (flags & TRACE_GRAPH_PRINT_ABS_TIME) | 1258 | if (flags & TRACE_GRAPH_PRINT_ABS_TIME) |
1412 | seq_printf(s, " TIME "); | 1259 | seq_puts(s, " TIME "); |
1413 | if (flags & TRACE_GRAPH_PRINT_CPU) | 1260 | if (flags & TRACE_GRAPH_PRINT_CPU) |
1414 | seq_printf(s, " CPU"); | 1261 | seq_puts(s, " CPU"); |
1415 | if (flags & TRACE_GRAPH_PRINT_PROC) | 1262 | if (flags & TRACE_GRAPH_PRINT_PROC) |
1416 | seq_printf(s, " TASK/PID "); | 1263 | seq_puts(s, " TASK/PID "); |
1417 | if (lat) | 1264 | if (lat) |
1418 | seq_printf(s, "||||"); | 1265 | seq_puts(s, "||||"); |
1419 | if (flags & TRACE_GRAPH_PRINT_DURATION) | 1266 | if (flags & TRACE_GRAPH_PRINT_DURATION) |
1420 | seq_printf(s, " DURATION "); | 1267 | seq_puts(s, " DURATION "); |
1421 | seq_printf(s, " FUNCTION CALLS\n"); | 1268 | seq_puts(s, " FUNCTION CALLS\n"); |
1422 | 1269 | ||
1423 | /* 2nd line */ | 1270 | /* 2nd line */ |
1424 | seq_printf(s, "#"); | 1271 | seq_putc(s, '#'); |
1425 | if (flags & TRACE_GRAPH_PRINT_ABS_TIME) | 1272 | if (flags & TRACE_GRAPH_PRINT_ABS_TIME) |
1426 | seq_printf(s, " | "); | 1273 | seq_puts(s, " | "); |
1427 | if (flags & TRACE_GRAPH_PRINT_CPU) | 1274 | if (flags & TRACE_GRAPH_PRINT_CPU) |
1428 | seq_printf(s, " | "); | 1275 | seq_puts(s, " | "); |
1429 | if (flags & TRACE_GRAPH_PRINT_PROC) | 1276 | if (flags & TRACE_GRAPH_PRINT_PROC) |
1430 | seq_printf(s, " | | "); | 1277 | seq_puts(s, " | | "); |
1431 | if (lat) | 1278 | if (lat) |
1432 | seq_printf(s, "||||"); | 1279 | seq_puts(s, "||||"); |
1433 | if (flags & TRACE_GRAPH_PRINT_DURATION) | 1280 | if (flags & TRACE_GRAPH_PRINT_DURATION) |
1434 | seq_printf(s, " | | "); | 1281 | seq_puts(s, " | | "); |
1435 | seq_printf(s, " | | | |\n"); | 1282 | seq_puts(s, " | | | |\n"); |
1436 | } | 1283 | } |
1437 | 1284 | ||
1438 | static void print_graph_headers(struct seq_file *s) | 1285 | static void print_graph_headers(struct seq_file *s) |
diff --git a/kernel/trace/trace_kdb.c b/kernel/trace/trace_kdb.c index bd90e1b06088..b0b1c44e923a 100644 --- a/kernel/trace/trace_kdb.c +++ b/kernel/trace/trace_kdb.c | |||
@@ -20,10 +20,12 @@ static void ftrace_dump_buf(int skip_lines, long cpu_file) | |||
20 | { | 20 | { |
21 | /* use static because iter can be a bit big for the stack */ | 21 | /* use static because iter can be a bit big for the stack */ |
22 | static struct trace_iterator iter; | 22 | static struct trace_iterator iter; |
23 | static struct ring_buffer_iter *buffer_iter[CONFIG_NR_CPUS]; | ||
23 | unsigned int old_userobj; | 24 | unsigned int old_userobj; |
24 | int cnt = 0, cpu; | 25 | int cnt = 0, cpu; |
25 | 26 | ||
26 | trace_init_global_iter(&iter); | 27 | trace_init_global_iter(&iter); |
28 | iter.buffer_iter = buffer_iter; | ||
27 | 29 | ||
28 | for_each_tracing_cpu(cpu) { | 30 | for_each_tracing_cpu(cpu) { |
29 | atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled); | 31 | atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled); |
@@ -57,19 +59,19 @@ static void ftrace_dump_buf(int skip_lines, long cpu_file) | |||
57 | ring_buffer_read_start(iter.buffer_iter[cpu_file]); | 59 | ring_buffer_read_start(iter.buffer_iter[cpu_file]); |
58 | tracing_iter_reset(&iter, cpu_file); | 60 | tracing_iter_reset(&iter, cpu_file); |
59 | } | 61 | } |
60 | if (!trace_empty(&iter)) | 62 | |
61 | trace_find_next_entry_inc(&iter); | 63 | while (trace_find_next_entry_inc(&iter)) { |
62 | while (!trace_empty(&iter)) { | ||
63 | if (!cnt) | 64 | if (!cnt) |
64 | kdb_printf("---------------------------------\n"); | 65 | kdb_printf("---------------------------------\n"); |
65 | cnt++; | 66 | cnt++; |
66 | 67 | ||
67 | if (trace_find_next_entry_inc(&iter) != NULL && !skip_lines) | 68 | if (!skip_lines) { |
68 | print_trace_line(&iter); | 69 | print_trace_line(&iter); |
69 | if (!skip_lines) | ||
70 | trace_printk_seq(&iter.seq); | 70 | trace_printk_seq(&iter.seq); |
71 | else | 71 | } else { |
72 | skip_lines--; | 72 | skip_lines--; |
73 | } | ||
74 | |||
73 | if (KDB_FLAG(CMD_INTERRUPT)) | 75 | if (KDB_FLAG(CMD_INTERRUPT)) |
74 | goto out; | 76 | goto out; |
75 | } | 77 | } |
@@ -86,9 +88,12 @@ out: | |||
86 | atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled); | 88 | atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled); |
87 | } | 89 | } |
88 | 90 | ||
89 | for_each_tracing_cpu(cpu) | 91 | for_each_tracing_cpu(cpu) { |
90 | if (iter.buffer_iter[cpu]) | 92 | if (iter.buffer_iter[cpu]) { |
91 | ring_buffer_read_finish(iter.buffer_iter[cpu]); | 93 | ring_buffer_read_finish(iter.buffer_iter[cpu]); |
94 | iter.buffer_iter[cpu] = NULL; | ||
95 | } | ||
96 | } | ||
92 | } | 97 | } |
93 | 98 | ||
94 | /* | 99 | /* |
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 282f6e4e5539..5edb518be345 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c | |||
@@ -826,7 +826,7 @@ static int probes_seq_show(struct seq_file *m, void *v) | |||
826 | struct trace_kprobe *tk = v; | 826 | struct trace_kprobe *tk = v; |
827 | int i; | 827 | int i; |
828 | 828 | ||
829 | seq_printf(m, "%c", trace_kprobe_is_return(tk) ? 'r' : 'p'); | 829 | seq_putc(m, trace_kprobe_is_return(tk) ? 'r' : 'p'); |
830 | seq_printf(m, ":%s/%s", tk->tp.call.class->system, | 830 | seq_printf(m, ":%s/%s", tk->tp.call.class->system, |
831 | ftrace_event_name(&tk->tp.call)); | 831 | ftrace_event_name(&tk->tp.call)); |
832 | 832 | ||
@@ -840,7 +840,7 @@ static int probes_seq_show(struct seq_file *m, void *v) | |||
840 | 840 | ||
841 | for (i = 0; i < tk->tp.nr_args; i++) | 841 | for (i = 0; i < tk->tp.nr_args; i++) |
842 | seq_printf(m, " %s=%s", tk->tp.args[i].name, tk->tp.args[i].comm); | 842 | seq_printf(m, " %s=%s", tk->tp.args[i].name, tk->tp.args[i].comm); |
843 | seq_printf(m, "\n"); | 843 | seq_putc(m, '\n'); |
844 | 844 | ||
845 | return 0; | 845 | return 0; |
846 | } | 846 | } |
@@ -1024,27 +1024,22 @@ print_kprobe_event(struct trace_iterator *iter, int flags, | |||
1024 | field = (struct kprobe_trace_entry_head *)iter->ent; | 1024 | field = (struct kprobe_trace_entry_head *)iter->ent; |
1025 | tp = container_of(event, struct trace_probe, call.event); | 1025 | tp = container_of(event, struct trace_probe, call.event); |
1026 | 1026 | ||
1027 | if (!trace_seq_printf(s, "%s: (", ftrace_event_name(&tp->call))) | 1027 | trace_seq_printf(s, "%s: (", ftrace_event_name(&tp->call)); |
1028 | goto partial; | ||
1029 | 1028 | ||
1030 | if (!seq_print_ip_sym(s, field->ip, flags | TRACE_ITER_SYM_OFFSET)) | 1029 | if (!seq_print_ip_sym(s, field->ip, flags | TRACE_ITER_SYM_OFFSET)) |
1031 | goto partial; | 1030 | goto out; |
1032 | 1031 | ||
1033 | if (!trace_seq_puts(s, ")")) | 1032 | trace_seq_putc(s, ')'); |
1034 | goto partial; | ||
1035 | 1033 | ||
1036 | data = (u8 *)&field[1]; | 1034 | data = (u8 *)&field[1]; |
1037 | for (i = 0; i < tp->nr_args; i++) | 1035 | for (i = 0; i < tp->nr_args; i++) |
1038 | if (!tp->args[i].type->print(s, tp->args[i].name, | 1036 | if (!tp->args[i].type->print(s, tp->args[i].name, |
1039 | data + tp->args[i].offset, field)) | 1037 | data + tp->args[i].offset, field)) |
1040 | goto partial; | 1038 | goto out; |
1041 | |||
1042 | if (!trace_seq_puts(s, "\n")) | ||
1043 | goto partial; | ||
1044 | 1039 | ||
1045 | return TRACE_TYPE_HANDLED; | 1040 | trace_seq_putc(s, '\n'); |
1046 | partial: | 1041 | out: |
1047 | return TRACE_TYPE_PARTIAL_LINE; | 1042 | return trace_handle_return(s); |
1048 | } | 1043 | } |
1049 | 1044 | ||
1050 | static enum print_line_t | 1045 | static enum print_line_t |
@@ -1060,33 +1055,28 @@ print_kretprobe_event(struct trace_iterator *iter, int flags, | |||
1060 | field = (struct kretprobe_trace_entry_head *)iter->ent; | 1055 | field = (struct kretprobe_trace_entry_head *)iter->ent; |
1061 | tp = container_of(event, struct trace_probe, call.event); | 1056 | tp = container_of(event, struct trace_probe, call.event); |
1062 | 1057 | ||
1063 | if (!trace_seq_printf(s, "%s: (", ftrace_event_name(&tp->call))) | 1058 | trace_seq_printf(s, "%s: (", ftrace_event_name(&tp->call)); |
1064 | goto partial; | ||
1065 | 1059 | ||
1066 | if (!seq_print_ip_sym(s, field->ret_ip, flags | TRACE_ITER_SYM_OFFSET)) | 1060 | if (!seq_print_ip_sym(s, field->ret_ip, flags | TRACE_ITER_SYM_OFFSET)) |
1067 | goto partial; | 1061 | goto out; |
1068 | 1062 | ||
1069 | if (!trace_seq_puts(s, " <- ")) | 1063 | trace_seq_puts(s, " <- "); |
1070 | goto partial; | ||
1071 | 1064 | ||
1072 | if (!seq_print_ip_sym(s, field->func, flags & ~TRACE_ITER_SYM_OFFSET)) | 1065 | if (!seq_print_ip_sym(s, field->func, flags & ~TRACE_ITER_SYM_OFFSET)) |
1073 | goto partial; | 1066 | goto out; |
1074 | 1067 | ||
1075 | if (!trace_seq_puts(s, ")")) | 1068 | trace_seq_putc(s, ')'); |
1076 | goto partial; | ||
1077 | 1069 | ||
1078 | data = (u8 *)&field[1]; | 1070 | data = (u8 *)&field[1]; |
1079 | for (i = 0; i < tp->nr_args; i++) | 1071 | for (i = 0; i < tp->nr_args; i++) |
1080 | if (!tp->args[i].type->print(s, tp->args[i].name, | 1072 | if (!tp->args[i].type->print(s, tp->args[i].name, |
1081 | data + tp->args[i].offset, field)) | 1073 | data + tp->args[i].offset, field)) |
1082 | goto partial; | 1074 | goto out; |
1083 | 1075 | ||
1084 | if (!trace_seq_puts(s, "\n")) | 1076 | trace_seq_putc(s, '\n'); |
1085 | goto partial; | ||
1086 | 1077 | ||
1087 | return TRACE_TYPE_HANDLED; | 1078 | out: |
1088 | partial: | 1079 | return trace_handle_return(s); |
1089 | return TRACE_TYPE_PARTIAL_LINE; | ||
1090 | } | 1080 | } |
1091 | 1081 | ||
1092 | 1082 | ||
diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c index 0abd9b863474..7a9ba62e9fef 100644 --- a/kernel/trace/trace_mmiotrace.c +++ b/kernel/trace/trace_mmiotrace.c | |||
@@ -59,17 +59,15 @@ static void mmio_trace_start(struct trace_array *tr) | |||
59 | mmio_reset_data(tr); | 59 | mmio_reset_data(tr); |
60 | } | 60 | } |
61 | 61 | ||
62 | static int mmio_print_pcidev(struct trace_seq *s, const struct pci_dev *dev) | 62 | static void mmio_print_pcidev(struct trace_seq *s, const struct pci_dev *dev) |
63 | { | 63 | { |
64 | int ret = 0; | ||
65 | int i; | 64 | int i; |
66 | resource_size_t start, end; | 65 | resource_size_t start, end; |
67 | const struct pci_driver *drv = pci_dev_driver(dev); | 66 | const struct pci_driver *drv = pci_dev_driver(dev); |
68 | 67 | ||
69 | /* XXX: incomplete checks for trace_seq_printf() return value */ | 68 | trace_seq_printf(s, "PCIDEV %02x%02x %04x%04x %x", |
70 | ret += trace_seq_printf(s, "PCIDEV %02x%02x %04x%04x %x", | 69 | dev->bus->number, dev->devfn, |
71 | dev->bus->number, dev->devfn, | 70 | dev->vendor, dev->device, dev->irq); |
72 | dev->vendor, dev->device, dev->irq); | ||
73 | /* | 71 | /* |
74 | * XXX: is pci_resource_to_user() appropriate, since we are | 72 | * XXX: is pci_resource_to_user() appropriate, since we are |
75 | * supposed to interpret the __ioremap() phys_addr argument based on | 73 | * supposed to interpret the __ioremap() phys_addr argument based on |
@@ -77,21 +75,20 @@ static int mmio_print_pcidev(struct trace_seq *s, const struct pci_dev *dev) | |||
77 | */ | 75 | */ |
78 | for (i = 0; i < 7; i++) { | 76 | for (i = 0; i < 7; i++) { |
79 | pci_resource_to_user(dev, i, &dev->resource[i], &start, &end); | 77 | pci_resource_to_user(dev, i, &dev->resource[i], &start, &end); |
80 | ret += trace_seq_printf(s, " %llx", | 78 | trace_seq_printf(s, " %llx", |
81 | (unsigned long long)(start | | 79 | (unsigned long long)(start | |
82 | (dev->resource[i].flags & PCI_REGION_FLAG_MASK))); | 80 | (dev->resource[i].flags & PCI_REGION_FLAG_MASK))); |
83 | } | 81 | } |
84 | for (i = 0; i < 7; i++) { | 82 | for (i = 0; i < 7; i++) { |
85 | pci_resource_to_user(dev, i, &dev->resource[i], &start, &end); | 83 | pci_resource_to_user(dev, i, &dev->resource[i], &start, &end); |
86 | ret += trace_seq_printf(s, " %llx", | 84 | trace_seq_printf(s, " %llx", |
87 | dev->resource[i].start < dev->resource[i].end ? | 85 | dev->resource[i].start < dev->resource[i].end ? |
88 | (unsigned long long)(end - start) + 1 : 0); | 86 | (unsigned long long)(end - start) + 1 : 0); |
89 | } | 87 | } |
90 | if (drv) | 88 | if (drv) |
91 | ret += trace_seq_printf(s, " %s\n", drv->name); | 89 | trace_seq_printf(s, " %s\n", drv->name); |
92 | else | 90 | else |
93 | ret += trace_seq_puts(s, " \n"); | 91 | trace_seq_puts(s, " \n"); |
94 | return ret; | ||
95 | } | 92 | } |
96 | 93 | ||
97 | static void destroy_header_iter(struct header_iter *hiter) | 94 | static void destroy_header_iter(struct header_iter *hiter) |
@@ -179,28 +176,27 @@ static enum print_line_t mmio_print_rw(struct trace_iterator *iter) | |||
179 | unsigned long long t = ns2usecs(iter->ts); | 176 | unsigned long long t = ns2usecs(iter->ts); |
180 | unsigned long usec_rem = do_div(t, USEC_PER_SEC); | 177 | unsigned long usec_rem = do_div(t, USEC_PER_SEC); |
181 | unsigned secs = (unsigned long)t; | 178 | unsigned secs = (unsigned long)t; |
182 | int ret = 1; | ||
183 | 179 | ||
184 | trace_assign_type(field, entry); | 180 | trace_assign_type(field, entry); |
185 | rw = &field->rw; | 181 | rw = &field->rw; |
186 | 182 | ||
187 | switch (rw->opcode) { | 183 | switch (rw->opcode) { |
188 | case MMIO_READ: | 184 | case MMIO_READ: |
189 | ret = trace_seq_printf(s, | 185 | trace_seq_printf(s, |
190 | "R %d %u.%06lu %d 0x%llx 0x%lx 0x%lx %d\n", | 186 | "R %d %u.%06lu %d 0x%llx 0x%lx 0x%lx %d\n", |
191 | rw->width, secs, usec_rem, rw->map_id, | 187 | rw->width, secs, usec_rem, rw->map_id, |
192 | (unsigned long long)rw->phys, | 188 | (unsigned long long)rw->phys, |
193 | rw->value, rw->pc, 0); | 189 | rw->value, rw->pc, 0); |
194 | break; | 190 | break; |
195 | case MMIO_WRITE: | 191 | case MMIO_WRITE: |
196 | ret = trace_seq_printf(s, | 192 | trace_seq_printf(s, |
197 | "W %d %u.%06lu %d 0x%llx 0x%lx 0x%lx %d\n", | 193 | "W %d %u.%06lu %d 0x%llx 0x%lx 0x%lx %d\n", |
198 | rw->width, secs, usec_rem, rw->map_id, | 194 | rw->width, secs, usec_rem, rw->map_id, |
199 | (unsigned long long)rw->phys, | 195 | (unsigned long long)rw->phys, |
200 | rw->value, rw->pc, 0); | 196 | rw->value, rw->pc, 0); |
201 | break; | 197 | break; |
202 | case MMIO_UNKNOWN_OP: | 198 | case MMIO_UNKNOWN_OP: |
203 | ret = trace_seq_printf(s, | 199 | trace_seq_printf(s, |
204 | "UNKNOWN %u.%06lu %d 0x%llx %02lx,%02lx," | 200 | "UNKNOWN %u.%06lu %d 0x%llx %02lx,%02lx," |
205 | "%02lx 0x%lx %d\n", | 201 | "%02lx 0x%lx %d\n", |
206 | secs, usec_rem, rw->map_id, | 202 | secs, usec_rem, rw->map_id, |
@@ -209,12 +205,11 @@ static enum print_line_t mmio_print_rw(struct trace_iterator *iter) | |||
209 | (rw->value >> 0) & 0xff, rw->pc, 0); | 205 | (rw->value >> 0) & 0xff, rw->pc, 0); |
210 | break; | 206 | break; |
211 | default: | 207 | default: |
212 | ret = trace_seq_puts(s, "rw what?\n"); | 208 | trace_seq_puts(s, "rw what?\n"); |
213 | break; | 209 | break; |
214 | } | 210 | } |
215 | if (ret) | 211 | |
216 | return TRACE_TYPE_HANDLED; | 212 | return trace_handle_return(s); |
217 | return TRACE_TYPE_PARTIAL_LINE; | ||
218 | } | 213 | } |
219 | 214 | ||
220 | static enum print_line_t mmio_print_map(struct trace_iterator *iter) | 215 | static enum print_line_t mmio_print_map(struct trace_iterator *iter) |
@@ -226,31 +221,29 @@ static enum print_line_t mmio_print_map(struct trace_iterator *iter) | |||
226 | unsigned long long t = ns2usecs(iter->ts); | 221 | unsigned long long t = ns2usecs(iter->ts); |
227 | unsigned long usec_rem = do_div(t, USEC_PER_SEC); | 222 | unsigned long usec_rem = do_div(t, USEC_PER_SEC); |
228 | unsigned secs = (unsigned long)t; | 223 | unsigned secs = (unsigned long)t; |
229 | int ret; | ||
230 | 224 | ||
231 | trace_assign_type(field, entry); | 225 | trace_assign_type(field, entry); |
232 | m = &field->map; | 226 | m = &field->map; |
233 | 227 | ||
234 | switch (m->opcode) { | 228 | switch (m->opcode) { |
235 | case MMIO_PROBE: | 229 | case MMIO_PROBE: |
236 | ret = trace_seq_printf(s, | 230 | trace_seq_printf(s, |
237 | "MAP %u.%06lu %d 0x%llx 0x%lx 0x%lx 0x%lx %d\n", | 231 | "MAP %u.%06lu %d 0x%llx 0x%lx 0x%lx 0x%lx %d\n", |
238 | secs, usec_rem, m->map_id, | 232 | secs, usec_rem, m->map_id, |
239 | (unsigned long long)m->phys, m->virt, m->len, | 233 | (unsigned long long)m->phys, m->virt, m->len, |
240 | 0UL, 0); | 234 | 0UL, 0); |
241 | break; | 235 | break; |
242 | case MMIO_UNPROBE: | 236 | case MMIO_UNPROBE: |
243 | ret = trace_seq_printf(s, | 237 | trace_seq_printf(s, |
244 | "UNMAP %u.%06lu %d 0x%lx %d\n", | 238 | "UNMAP %u.%06lu %d 0x%lx %d\n", |
245 | secs, usec_rem, m->map_id, 0UL, 0); | 239 | secs, usec_rem, m->map_id, 0UL, 0); |
246 | break; | 240 | break; |
247 | default: | 241 | default: |
248 | ret = trace_seq_puts(s, "map what?\n"); | 242 | trace_seq_puts(s, "map what?\n"); |
249 | break; | 243 | break; |
250 | } | 244 | } |
251 | if (ret) | 245 | |
252 | return TRACE_TYPE_HANDLED; | 246 | return trace_handle_return(s); |
253 | return TRACE_TYPE_PARTIAL_LINE; | ||
254 | } | 247 | } |
255 | 248 | ||
256 | static enum print_line_t mmio_print_mark(struct trace_iterator *iter) | 249 | static enum print_line_t mmio_print_mark(struct trace_iterator *iter) |
@@ -262,14 +255,11 @@ static enum print_line_t mmio_print_mark(struct trace_iterator *iter) | |||
262 | unsigned long long t = ns2usecs(iter->ts); | 255 | unsigned long long t = ns2usecs(iter->ts); |
263 | unsigned long usec_rem = do_div(t, USEC_PER_SEC); | 256 | unsigned long usec_rem = do_div(t, USEC_PER_SEC); |
264 | unsigned secs = (unsigned long)t; | 257 | unsigned secs = (unsigned long)t; |
265 | int ret; | ||
266 | 258 | ||
267 | /* The trailing newline must be in the message. */ | 259 | /* The trailing newline must be in the message. */ |
268 | ret = trace_seq_printf(s, "MARK %u.%06lu %s", secs, usec_rem, msg); | 260 | trace_seq_printf(s, "MARK %u.%06lu %s", secs, usec_rem, msg); |
269 | if (!ret) | ||
270 | return TRACE_TYPE_PARTIAL_LINE; | ||
271 | 261 | ||
272 | return TRACE_TYPE_HANDLED; | 262 | return trace_handle_return(s); |
273 | } | 263 | } |
274 | 264 | ||
275 | static enum print_line_t mmio_print_line(struct trace_iterator *iter) | 265 | static enum print_line_t mmio_print_line(struct trace_iterator *iter) |
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index c6977d5a9b12..b77b9a697619 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c | |||
@@ -25,15 +25,12 @@ enum print_line_t trace_print_bputs_msg_only(struct trace_iterator *iter) | |||
25 | struct trace_seq *s = &iter->seq; | 25 | struct trace_seq *s = &iter->seq; |
26 | struct trace_entry *entry = iter->ent; | 26 | struct trace_entry *entry = iter->ent; |
27 | struct bputs_entry *field; | 27 | struct bputs_entry *field; |
28 | int ret; | ||
29 | 28 | ||
30 | trace_assign_type(field, entry); | 29 | trace_assign_type(field, entry); |
31 | 30 | ||
32 | ret = trace_seq_puts(s, field->str); | 31 | trace_seq_puts(s, field->str); |
33 | if (!ret) | ||
34 | return TRACE_TYPE_PARTIAL_LINE; | ||
35 | 32 | ||
36 | return TRACE_TYPE_HANDLED; | 33 | return trace_handle_return(s); |
37 | } | 34 | } |
38 | 35 | ||
39 | enum print_line_t trace_print_bprintk_msg_only(struct trace_iterator *iter) | 36 | enum print_line_t trace_print_bprintk_msg_only(struct trace_iterator *iter) |
@@ -41,15 +38,12 @@ enum print_line_t trace_print_bprintk_msg_only(struct trace_iterator *iter) | |||
41 | struct trace_seq *s = &iter->seq; | 38 | struct trace_seq *s = &iter->seq; |
42 | struct trace_entry *entry = iter->ent; | 39 | struct trace_entry *entry = iter->ent; |
43 | struct bprint_entry *field; | 40 | struct bprint_entry *field; |
44 | int ret; | ||
45 | 41 | ||
46 | trace_assign_type(field, entry); | 42 | trace_assign_type(field, entry); |
47 | 43 | ||
48 | ret = trace_seq_bprintf(s, field->fmt, field->buf); | 44 | trace_seq_bprintf(s, field->fmt, field->buf); |
49 | if (!ret) | ||
50 | return TRACE_TYPE_PARTIAL_LINE; | ||
51 | 45 | ||
52 | return TRACE_TYPE_HANDLED; | 46 | return trace_handle_return(s); |
53 | } | 47 | } |
54 | 48 | ||
55 | enum print_line_t trace_print_printk_msg_only(struct trace_iterator *iter) | 49 | enum print_line_t trace_print_printk_msg_only(struct trace_iterator *iter) |
@@ -57,15 +51,12 @@ enum print_line_t trace_print_printk_msg_only(struct trace_iterator *iter) | |||
57 | struct trace_seq *s = &iter->seq; | 51 | struct trace_seq *s = &iter->seq; |
58 | struct trace_entry *entry = iter->ent; | 52 | struct trace_entry *entry = iter->ent; |
59 | struct print_entry *field; | 53 | struct print_entry *field; |
60 | int ret; | ||
61 | 54 | ||
62 | trace_assign_type(field, entry); | 55 | trace_assign_type(field, entry); |
63 | 56 | ||
64 | ret = trace_seq_puts(s, field->buf); | 57 | trace_seq_puts(s, field->buf); |
65 | if (!ret) | ||
66 | return TRACE_TYPE_PARTIAL_LINE; | ||
67 | 58 | ||
68 | return TRACE_TYPE_HANDLED; | 59 | return trace_handle_return(s); |
69 | } | 60 | } |
70 | 61 | ||
71 | const char * | 62 | const char * |
@@ -124,7 +115,7 @@ ftrace_print_symbols_seq(struct trace_seq *p, unsigned long val, | |||
124 | 115 | ||
125 | if (ret == (const char *)(trace_seq_buffer_ptr(p))) | 116 | if (ret == (const char *)(trace_seq_buffer_ptr(p))) |
126 | trace_seq_printf(p, "0x%lx", val); | 117 | trace_seq_printf(p, "0x%lx", val); |
127 | 118 | ||
128 | trace_seq_putc(p, 0); | 119 | trace_seq_putc(p, 0); |
129 | 120 | ||
130 | return ret; | 121 | return ret; |
@@ -193,7 +184,6 @@ int ftrace_raw_output_prep(struct trace_iterator *iter, | |||
193 | struct trace_seq *s = &iter->seq; | 184 | struct trace_seq *s = &iter->seq; |
194 | struct trace_seq *p = &iter->tmp_seq; | 185 | struct trace_seq *p = &iter->tmp_seq; |
195 | struct trace_entry *entry; | 186 | struct trace_entry *entry; |
196 | int ret; | ||
197 | 187 | ||
198 | event = container_of(trace_event, struct ftrace_event_call, event); | 188 | event = container_of(trace_event, struct ftrace_event_call, event); |
199 | entry = iter->ent; | 189 | entry = iter->ent; |
@@ -204,11 +194,9 @@ int ftrace_raw_output_prep(struct trace_iterator *iter, | |||
204 | } | 194 | } |
205 | 195 | ||
206 | trace_seq_init(p); | 196 | trace_seq_init(p); |
207 | ret = trace_seq_printf(s, "%s: ", ftrace_event_name(event)); | 197 | trace_seq_printf(s, "%s: ", ftrace_event_name(event)); |
208 | if (!ret) | ||
209 | return TRACE_TYPE_PARTIAL_LINE; | ||
210 | 198 | ||
211 | return 0; | 199 | return trace_handle_return(s); |
212 | } | 200 | } |
213 | EXPORT_SYMBOL(ftrace_raw_output_prep); | 201 | EXPORT_SYMBOL(ftrace_raw_output_prep); |
214 | 202 | ||
@@ -216,18 +204,11 @@ static int ftrace_output_raw(struct trace_iterator *iter, char *name, | |||
216 | char *fmt, va_list ap) | 204 | char *fmt, va_list ap) |
217 | { | 205 | { |
218 | struct trace_seq *s = &iter->seq; | 206 | struct trace_seq *s = &iter->seq; |
219 | int ret; | ||
220 | |||
221 | ret = trace_seq_printf(s, "%s: ", name); | ||
222 | if (!ret) | ||
223 | return TRACE_TYPE_PARTIAL_LINE; | ||
224 | |||
225 | ret = trace_seq_vprintf(s, fmt, ap); | ||
226 | 207 | ||
227 | if (!ret) | 208 | trace_seq_printf(s, "%s: ", name); |
228 | return TRACE_TYPE_PARTIAL_LINE; | 209 | trace_seq_vprintf(s, fmt, ap); |
229 | 210 | ||
230 | return TRACE_TYPE_HANDLED; | 211 | return trace_handle_return(s); |
231 | } | 212 | } |
232 | 213 | ||
233 | int ftrace_output_call(struct trace_iterator *iter, char *name, char *fmt, ...) | 214 | int ftrace_output_call(struct trace_iterator *iter, char *name, char *fmt, ...) |
@@ -260,7 +241,7 @@ static inline const char *kretprobed(const char *name) | |||
260 | } | 241 | } |
261 | #endif /* CONFIG_KRETPROBES */ | 242 | #endif /* CONFIG_KRETPROBES */ |
262 | 243 | ||
263 | static int | 244 | static void |
264 | seq_print_sym_short(struct trace_seq *s, const char *fmt, unsigned long address) | 245 | seq_print_sym_short(struct trace_seq *s, const char *fmt, unsigned long address) |
265 | { | 246 | { |
266 | #ifdef CONFIG_KALLSYMS | 247 | #ifdef CONFIG_KALLSYMS |
@@ -271,12 +252,11 @@ seq_print_sym_short(struct trace_seq *s, const char *fmt, unsigned long address) | |||
271 | 252 | ||
272 | name = kretprobed(str); | 253 | name = kretprobed(str); |
273 | 254 | ||
274 | return trace_seq_printf(s, fmt, name); | 255 | trace_seq_printf(s, fmt, name); |
275 | #endif | 256 | #endif |
276 | return 1; | ||
277 | } | 257 | } |
278 | 258 | ||
279 | static int | 259 | static void |
280 | seq_print_sym_offset(struct trace_seq *s, const char *fmt, | 260 | seq_print_sym_offset(struct trace_seq *s, const char *fmt, |
281 | unsigned long address) | 261 | unsigned long address) |
282 | { | 262 | { |
@@ -287,9 +267,8 @@ seq_print_sym_offset(struct trace_seq *s, const char *fmt, | |||
287 | sprint_symbol(str, address); | 267 | sprint_symbol(str, address); |
288 | name = kretprobed(str); | 268 | name = kretprobed(str); |
289 | 269 | ||
290 | return trace_seq_printf(s, fmt, name); | 270 | trace_seq_printf(s, fmt, name); |
291 | #endif | 271 | #endif |
292 | return 1; | ||
293 | } | 272 | } |
294 | 273 | ||
295 | #ifndef CONFIG_64BIT | 274 | #ifndef CONFIG_64BIT |
@@ -320,14 +299,14 @@ int seq_print_user_ip(struct trace_seq *s, struct mm_struct *mm, | |||
320 | if (file) { | 299 | if (file) { |
321 | ret = trace_seq_path(s, &file->f_path); | 300 | ret = trace_seq_path(s, &file->f_path); |
322 | if (ret) | 301 | if (ret) |
323 | ret = trace_seq_printf(s, "[+0x%lx]", | 302 | trace_seq_printf(s, "[+0x%lx]", |
324 | ip - vmstart); | 303 | ip - vmstart); |
325 | } | 304 | } |
326 | up_read(&mm->mmap_sem); | 305 | up_read(&mm->mmap_sem); |
327 | } | 306 | } |
328 | if (ret && ((sym_flags & TRACE_ITER_SYM_ADDR) || !file)) | 307 | if (ret && ((sym_flags & TRACE_ITER_SYM_ADDR) || !file)) |
329 | ret = trace_seq_printf(s, " <" IP_FMT ">", ip); | 308 | trace_seq_printf(s, " <" IP_FMT ">", ip); |
330 | return ret; | 309 | return !trace_seq_has_overflowed(s); |
331 | } | 310 | } |
332 | 311 | ||
333 | int | 312 | int |
@@ -335,7 +314,6 @@ seq_print_userip_objs(const struct userstack_entry *entry, struct trace_seq *s, | |||
335 | unsigned long sym_flags) | 314 | unsigned long sym_flags) |
336 | { | 315 | { |
337 | struct mm_struct *mm = NULL; | 316 | struct mm_struct *mm = NULL; |
338 | int ret = 1; | ||
339 | unsigned int i; | 317 | unsigned int i; |
340 | 318 | ||
341 | if (trace_flags & TRACE_ITER_SYM_USEROBJ) { | 319 | if (trace_flags & TRACE_ITER_SYM_USEROBJ) { |
@@ -354,48 +332,45 @@ seq_print_userip_objs(const struct userstack_entry *entry, struct trace_seq *s, | |||
354 | for (i = 0; i < FTRACE_STACK_ENTRIES; i++) { | 332 | for (i = 0; i < FTRACE_STACK_ENTRIES; i++) { |
355 | unsigned long ip = entry->caller[i]; | 333 | unsigned long ip = entry->caller[i]; |
356 | 334 | ||
357 | if (ip == ULONG_MAX || !ret) | 335 | if (ip == ULONG_MAX || trace_seq_has_overflowed(s)) |
358 | break; | 336 | break; |
359 | if (ret) | 337 | |
360 | ret = trace_seq_puts(s, " => "); | 338 | trace_seq_puts(s, " => "); |
339 | |||
361 | if (!ip) { | 340 | if (!ip) { |
362 | if (ret) | 341 | trace_seq_puts(s, "??"); |
363 | ret = trace_seq_puts(s, "??"); | 342 | trace_seq_putc(s, '\n'); |
364 | if (ret) | ||
365 | ret = trace_seq_putc(s, '\n'); | ||
366 | continue; | 343 | continue; |
367 | } | 344 | } |
368 | if (!ret) | 345 | |
369 | break; | 346 | seq_print_user_ip(s, mm, ip, sym_flags); |
370 | if (ret) | 347 | trace_seq_putc(s, '\n'); |
371 | ret = seq_print_user_ip(s, mm, ip, sym_flags); | ||
372 | ret = trace_seq_putc(s, '\n'); | ||
373 | } | 348 | } |
374 | 349 | ||
375 | if (mm) | 350 | if (mm) |
376 | mmput(mm); | 351 | mmput(mm); |
377 | return ret; | 352 | |
353 | return !trace_seq_has_overflowed(s); | ||
378 | } | 354 | } |
379 | 355 | ||
380 | int | 356 | int |
381 | seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags) | 357 | seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags) |
382 | { | 358 | { |
383 | int ret; | 359 | if (!ip) { |
384 | 360 | trace_seq_putc(s, '0'); | |
385 | if (!ip) | 361 | goto out; |
386 | return trace_seq_putc(s, '0'); | 362 | } |
387 | 363 | ||
388 | if (sym_flags & TRACE_ITER_SYM_OFFSET) | 364 | if (sym_flags & TRACE_ITER_SYM_OFFSET) |
389 | ret = seq_print_sym_offset(s, "%s", ip); | 365 | seq_print_sym_offset(s, "%s", ip); |
390 | else | 366 | else |
391 | ret = seq_print_sym_short(s, "%s", ip); | 367 | seq_print_sym_short(s, "%s", ip); |
392 | |||
393 | if (!ret) | ||
394 | return 0; | ||
395 | 368 | ||
396 | if (sym_flags & TRACE_ITER_SYM_ADDR) | 369 | if (sym_flags & TRACE_ITER_SYM_ADDR) |
397 | ret = trace_seq_printf(s, " <" IP_FMT ">", ip); | 370 | trace_seq_printf(s, " <" IP_FMT ">", ip); |
398 | return ret; | 371 | |
372 | out: | ||
373 | return !trace_seq_has_overflowed(s); | ||
399 | } | 374 | } |
400 | 375 | ||
401 | /** | 376 | /** |
@@ -413,7 +388,6 @@ int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry) | |||
413 | char irqs_off; | 388 | char irqs_off; |
414 | int hardirq; | 389 | int hardirq; |
415 | int softirq; | 390 | int softirq; |
416 | int ret; | ||
417 | 391 | ||
418 | hardirq = entry->flags & TRACE_FLAG_HARDIRQ; | 392 | hardirq = entry->flags & TRACE_FLAG_HARDIRQ; |
419 | softirq = entry->flags & TRACE_FLAG_SOFTIRQ; | 393 | softirq = entry->flags & TRACE_FLAG_SOFTIRQ; |
@@ -445,16 +419,15 @@ int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry) | |||
445 | softirq ? 's' : | 419 | softirq ? 's' : |
446 | '.'; | 420 | '.'; |
447 | 421 | ||
448 | if (!trace_seq_printf(s, "%c%c%c", | 422 | trace_seq_printf(s, "%c%c%c", |
449 | irqs_off, need_resched, hardsoft_irq)) | 423 | irqs_off, need_resched, hardsoft_irq); |
450 | return 0; | ||
451 | 424 | ||
452 | if (entry->preempt_count) | 425 | if (entry->preempt_count) |
453 | ret = trace_seq_printf(s, "%x", entry->preempt_count); | 426 | trace_seq_printf(s, "%x", entry->preempt_count); |
454 | else | 427 | else |
455 | ret = trace_seq_putc(s, '.'); | 428 | trace_seq_putc(s, '.'); |
456 | 429 | ||
457 | return ret; | 430 | return !trace_seq_has_overflowed(s); |
458 | } | 431 | } |
459 | 432 | ||
460 | static int | 433 | static int |
@@ -464,14 +437,38 @@ lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu) | |||
464 | 437 | ||
465 | trace_find_cmdline(entry->pid, comm); | 438 | trace_find_cmdline(entry->pid, comm); |
466 | 439 | ||
467 | if (!trace_seq_printf(s, "%8.8s-%-5d %3d", | 440 | trace_seq_printf(s, "%8.8s-%-5d %3d", |
468 | comm, entry->pid, cpu)) | 441 | comm, entry->pid, cpu); |
469 | return 0; | ||
470 | 442 | ||
471 | return trace_print_lat_fmt(s, entry); | 443 | return trace_print_lat_fmt(s, entry); |
472 | } | 444 | } |
473 | 445 | ||
474 | static unsigned long preempt_mark_thresh_us = 100; | 446 | #undef MARK |
447 | #define MARK(v, s) {.val = v, .sym = s} | ||
448 | /* trace overhead mark */ | ||
449 | static const struct trace_mark { | ||
450 | unsigned long long val; /* unit: nsec */ | ||
451 | char sym; | ||
452 | } mark[] = { | ||
453 | MARK(1000000000ULL , '$'), /* 1 sec */ | ||
454 | MARK(1000000ULL , '#'), /* 1000 usecs */ | ||
455 | MARK(100000ULL , '!'), /* 100 usecs */ | ||
456 | MARK(10000ULL , '+'), /* 10 usecs */ | ||
457 | }; | ||
458 | #undef MARK | ||
459 | |||
460 | char trace_find_mark(unsigned long long d) | ||
461 | { | ||
462 | int i; | ||
463 | int size = ARRAY_SIZE(mark); | ||
464 | |||
465 | for (i = 0; i < size; i++) { | ||
466 | if (d >= mark[i].val) | ||
467 | break; | ||
468 | } | ||
469 | |||
470 | return (i == size) ? ' ' : mark[i].sym; | ||
471 | } | ||
475 | 472 | ||
476 | static int | 473 | static int |
477 | lat_print_timestamp(struct trace_iterator *iter, u64 next_ts) | 474 | lat_print_timestamp(struct trace_iterator *iter, u64 next_ts) |
@@ -493,24 +490,28 @@ lat_print_timestamp(struct trace_iterator *iter, u64 next_ts) | |||
493 | unsigned long rel_usec = do_div(rel_ts, USEC_PER_MSEC); | 490 | unsigned long rel_usec = do_div(rel_ts, USEC_PER_MSEC); |
494 | unsigned long rel_msec = (unsigned long)rel_ts; | 491 | unsigned long rel_msec = (unsigned long)rel_ts; |
495 | 492 | ||
496 | return trace_seq_printf( | 493 | trace_seq_printf( |
497 | s, "[%08llx] %ld.%03ldms (+%ld.%03ldms): ", | 494 | s, "[%08llx] %ld.%03ldms (+%ld.%03ldms): ", |
498 | ns2usecs(iter->ts), | 495 | ns2usecs(iter->ts), |
499 | abs_msec, abs_usec, | 496 | abs_msec, abs_usec, |
500 | rel_msec, rel_usec); | 497 | rel_msec, rel_usec); |
498 | |||
501 | } else if (verbose && !in_ns) { | 499 | } else if (verbose && !in_ns) { |
502 | return trace_seq_printf( | 500 | trace_seq_printf( |
503 | s, "[%016llx] %lld (+%lld): ", | 501 | s, "[%016llx] %lld (+%lld): ", |
504 | iter->ts, abs_ts, rel_ts); | 502 | iter->ts, abs_ts, rel_ts); |
503 | |||
505 | } else if (!verbose && in_ns) { | 504 | } else if (!verbose && in_ns) { |
506 | return trace_seq_printf( | 505 | trace_seq_printf( |
507 | s, " %4lldus%c: ", | 506 | s, " %4lldus%c: ", |
508 | abs_ts, | 507 | abs_ts, |
509 | rel_ts > preempt_mark_thresh_us ? '!' : | 508 | trace_find_mark(rel_ts * NSEC_PER_USEC)); |
510 | rel_ts > 1 ? '+' : ' '); | 509 | |
511 | } else { /* !verbose && !in_ns */ | 510 | } else { /* !verbose && !in_ns */ |
512 | return trace_seq_printf(s, " %4lld: ", abs_ts); | 511 | trace_seq_printf(s, " %4lld: ", abs_ts); |
513 | } | 512 | } |
513 | |||
514 | return !trace_seq_has_overflowed(s); | ||
514 | } | 515 | } |
515 | 516 | ||
516 | int trace_print_context(struct trace_iterator *iter) | 517 | int trace_print_context(struct trace_iterator *iter) |
@@ -520,34 +521,29 @@ int trace_print_context(struct trace_iterator *iter) | |||
520 | unsigned long long t; | 521 | unsigned long long t; |
521 | unsigned long secs, usec_rem; | 522 | unsigned long secs, usec_rem; |
522 | char comm[TASK_COMM_LEN]; | 523 | char comm[TASK_COMM_LEN]; |
523 | int ret; | ||
524 | 524 | ||
525 | trace_find_cmdline(entry->pid, comm); | 525 | trace_find_cmdline(entry->pid, comm); |
526 | 526 | ||
527 | ret = trace_seq_printf(s, "%16s-%-5d [%03d] ", | 527 | trace_seq_printf(s, "%16s-%-5d [%03d] ", |
528 | comm, entry->pid, iter->cpu); | 528 | comm, entry->pid, iter->cpu); |
529 | if (!ret) | ||
530 | return 0; | ||
531 | 529 | ||
532 | if (trace_flags & TRACE_ITER_IRQ_INFO) { | 530 | if (trace_flags & TRACE_ITER_IRQ_INFO) |
533 | ret = trace_print_lat_fmt(s, entry); | 531 | trace_print_lat_fmt(s, entry); |
534 | if (!ret) | ||
535 | return 0; | ||
536 | } | ||
537 | 532 | ||
538 | if (iter->iter_flags & TRACE_FILE_TIME_IN_NS) { | 533 | if (iter->iter_flags & TRACE_FILE_TIME_IN_NS) { |
539 | t = ns2usecs(iter->ts); | 534 | t = ns2usecs(iter->ts); |
540 | usec_rem = do_div(t, USEC_PER_SEC); | 535 | usec_rem = do_div(t, USEC_PER_SEC); |
541 | secs = (unsigned long)t; | 536 | secs = (unsigned long)t; |
542 | return trace_seq_printf(s, " %5lu.%06lu: ", secs, usec_rem); | 537 | trace_seq_printf(s, " %5lu.%06lu: ", secs, usec_rem); |
543 | } else | 538 | } else |
544 | return trace_seq_printf(s, " %12llu: ", iter->ts); | 539 | trace_seq_printf(s, " %12llu: ", iter->ts); |
540 | |||
541 | return !trace_seq_has_overflowed(s); | ||
545 | } | 542 | } |
546 | 543 | ||
547 | int trace_print_lat_context(struct trace_iterator *iter) | 544 | int trace_print_lat_context(struct trace_iterator *iter) |
548 | { | 545 | { |
549 | u64 next_ts; | 546 | u64 next_ts; |
550 | int ret; | ||
551 | /* trace_find_next_entry will reset ent_size */ | 547 | /* trace_find_next_entry will reset ent_size */ |
552 | int ent_size = iter->ent_size; | 548 | int ent_size = iter->ent_size; |
553 | struct trace_seq *s = &iter->seq; | 549 | struct trace_seq *s = &iter->seq; |
@@ -567,18 +563,17 @@ int trace_print_lat_context(struct trace_iterator *iter) | |||
567 | 563 | ||
568 | trace_find_cmdline(entry->pid, comm); | 564 | trace_find_cmdline(entry->pid, comm); |
569 | 565 | ||
570 | ret = trace_seq_printf( | 566 | trace_seq_printf( |
571 | s, "%16s %5d %3d %d %08x %08lx ", | 567 | s, "%16s %5d %3d %d %08x %08lx ", |
572 | comm, entry->pid, iter->cpu, entry->flags, | 568 | comm, entry->pid, iter->cpu, entry->flags, |
573 | entry->preempt_count, iter->idx); | 569 | entry->preempt_count, iter->idx); |
574 | } else { | 570 | } else { |
575 | ret = lat_print_generic(s, entry, iter->cpu); | 571 | lat_print_generic(s, entry, iter->cpu); |
576 | } | 572 | } |
577 | 573 | ||
578 | if (ret) | 574 | lat_print_timestamp(iter, next_ts); |
579 | ret = lat_print_timestamp(iter, next_ts); | ||
580 | 575 | ||
581 | return ret; | 576 | return !trace_seq_has_overflowed(s); |
582 | } | 577 | } |
583 | 578 | ||
584 | static const char state_to_char[] = TASK_STATE_TO_CHAR_STR; | 579 | static const char state_to_char[] = TASK_STATE_TO_CHAR_STR; |
@@ -692,7 +687,7 @@ int register_ftrace_event(struct trace_event *event) | |||
692 | goto out; | 687 | goto out; |
693 | 688 | ||
694 | } else { | 689 | } else { |
695 | 690 | ||
696 | event->type = next_event_type++; | 691 | event->type = next_event_type++; |
697 | list = &ftrace_event_list; | 692 | list = &ftrace_event_list; |
698 | } | 693 | } |
@@ -764,10 +759,9 @@ EXPORT_SYMBOL_GPL(unregister_ftrace_event); | |||
764 | enum print_line_t trace_nop_print(struct trace_iterator *iter, int flags, | 759 | enum print_line_t trace_nop_print(struct trace_iterator *iter, int flags, |
765 | struct trace_event *event) | 760 | struct trace_event *event) |
766 | { | 761 | { |
767 | if (!trace_seq_printf(&iter->seq, "type: %d\n", iter->ent->type)) | 762 | trace_seq_printf(&iter->seq, "type: %d\n", iter->ent->type); |
768 | return TRACE_TYPE_PARTIAL_LINE; | ||
769 | 763 | ||
770 | return TRACE_TYPE_HANDLED; | 764 | return trace_handle_return(&iter->seq); |
771 | } | 765 | } |
772 | 766 | ||
773 | /* TRACE_FN */ | 767 | /* TRACE_FN */ |
@@ -779,24 +773,16 @@ static enum print_line_t trace_fn_trace(struct trace_iterator *iter, int flags, | |||
779 | 773 | ||
780 | trace_assign_type(field, iter->ent); | 774 | trace_assign_type(field, iter->ent); |
781 | 775 | ||
782 | if (!seq_print_ip_sym(s, field->ip, flags)) | 776 | seq_print_ip_sym(s, field->ip, flags); |
783 | goto partial; | ||
784 | 777 | ||
785 | if ((flags & TRACE_ITER_PRINT_PARENT) && field->parent_ip) { | 778 | if ((flags & TRACE_ITER_PRINT_PARENT) && field->parent_ip) { |
786 | if (!trace_seq_puts(s, " <-")) | 779 | trace_seq_puts(s, " <-"); |
787 | goto partial; | 780 | seq_print_ip_sym(s, field->parent_ip, flags); |
788 | if (!seq_print_ip_sym(s, | ||
789 | field->parent_ip, | ||
790 | flags)) | ||
791 | goto partial; | ||
792 | } | 781 | } |
793 | if (!trace_seq_putc(s, '\n')) | ||
794 | goto partial; | ||
795 | 782 | ||
796 | return TRACE_TYPE_HANDLED; | 783 | trace_seq_putc(s, '\n'); |
797 | 784 | ||
798 | partial: | 785 | return trace_handle_return(s); |
799 | return TRACE_TYPE_PARTIAL_LINE; | ||
800 | } | 786 | } |
801 | 787 | ||
802 | static enum print_line_t trace_fn_raw(struct trace_iterator *iter, int flags, | 788 | static enum print_line_t trace_fn_raw(struct trace_iterator *iter, int flags, |
@@ -806,12 +792,11 @@ static enum print_line_t trace_fn_raw(struct trace_iterator *iter, int flags, | |||
806 | 792 | ||
807 | trace_assign_type(field, iter->ent); | 793 | trace_assign_type(field, iter->ent); |
808 | 794 | ||
809 | if (!trace_seq_printf(&iter->seq, "%lx %lx\n", | 795 | trace_seq_printf(&iter->seq, "%lx %lx\n", |
810 | field->ip, | 796 | field->ip, |
811 | field->parent_ip)) | 797 | field->parent_ip); |
812 | return TRACE_TYPE_PARTIAL_LINE; | ||
813 | 798 | ||
814 | return TRACE_TYPE_HANDLED; | 799 | return trace_handle_return(&iter->seq); |
815 | } | 800 | } |
816 | 801 | ||
817 | static enum print_line_t trace_fn_hex(struct trace_iterator *iter, int flags, | 802 | static enum print_line_t trace_fn_hex(struct trace_iterator *iter, int flags, |
@@ -822,10 +807,10 @@ static enum print_line_t trace_fn_hex(struct trace_iterator *iter, int flags, | |||
822 | 807 | ||
823 | trace_assign_type(field, iter->ent); | 808 | trace_assign_type(field, iter->ent); |
824 | 809 | ||
825 | SEQ_PUT_HEX_FIELD_RET(s, field->ip); | 810 | SEQ_PUT_HEX_FIELD(s, field->ip); |
826 | SEQ_PUT_HEX_FIELD_RET(s, field->parent_ip); | 811 | SEQ_PUT_HEX_FIELD(s, field->parent_ip); |
827 | 812 | ||
828 | return TRACE_TYPE_HANDLED; | 813 | return trace_handle_return(s); |
829 | } | 814 | } |
830 | 815 | ||
831 | static enum print_line_t trace_fn_bin(struct trace_iterator *iter, int flags, | 816 | static enum print_line_t trace_fn_bin(struct trace_iterator *iter, int flags, |
@@ -836,10 +821,10 @@ static enum print_line_t trace_fn_bin(struct trace_iterator *iter, int flags, | |||
836 | 821 | ||
837 | trace_assign_type(field, iter->ent); | 822 | trace_assign_type(field, iter->ent); |
838 | 823 | ||
839 | SEQ_PUT_FIELD_RET(s, field->ip); | 824 | SEQ_PUT_FIELD(s, field->ip); |
840 | SEQ_PUT_FIELD_RET(s, field->parent_ip); | 825 | SEQ_PUT_FIELD(s, field->parent_ip); |
841 | 826 | ||
842 | return TRACE_TYPE_HANDLED; | 827 | return trace_handle_return(s); |
843 | } | 828 | } |
844 | 829 | ||
845 | static struct trace_event_functions trace_fn_funcs = { | 830 | static struct trace_event_functions trace_fn_funcs = { |
@@ -868,18 +853,17 @@ static enum print_line_t trace_ctxwake_print(struct trace_iterator *iter, | |||
868 | T = task_state_char(field->next_state); | 853 | T = task_state_char(field->next_state); |
869 | S = task_state_char(field->prev_state); | 854 | S = task_state_char(field->prev_state); |
870 | trace_find_cmdline(field->next_pid, comm); | 855 | trace_find_cmdline(field->next_pid, comm); |
871 | if (!trace_seq_printf(&iter->seq, | 856 | trace_seq_printf(&iter->seq, |
872 | " %5d:%3d:%c %s [%03d] %5d:%3d:%c %s\n", | 857 | " %5d:%3d:%c %s [%03d] %5d:%3d:%c %s\n", |
873 | field->prev_pid, | 858 | field->prev_pid, |
874 | field->prev_prio, | 859 | field->prev_prio, |
875 | S, delim, | 860 | S, delim, |
876 | field->next_cpu, | 861 | field->next_cpu, |
877 | field->next_pid, | 862 | field->next_pid, |
878 | field->next_prio, | 863 | field->next_prio, |
879 | T, comm)) | 864 | T, comm); |
880 | return TRACE_TYPE_PARTIAL_LINE; | 865 | |
881 | 866 | return trace_handle_return(&iter->seq); | |
882 | return TRACE_TYPE_HANDLED; | ||
883 | } | 867 | } |
884 | 868 | ||
885 | static enum print_line_t trace_ctx_print(struct trace_iterator *iter, int flags, | 869 | static enum print_line_t trace_ctx_print(struct trace_iterator *iter, int flags, |
@@ -904,17 +888,16 @@ static int trace_ctxwake_raw(struct trace_iterator *iter, char S) | |||
904 | if (!S) | 888 | if (!S) |
905 | S = task_state_char(field->prev_state); | 889 | S = task_state_char(field->prev_state); |
906 | T = task_state_char(field->next_state); | 890 | T = task_state_char(field->next_state); |
907 | if (!trace_seq_printf(&iter->seq, "%d %d %c %d %d %d %c\n", | 891 | trace_seq_printf(&iter->seq, "%d %d %c %d %d %d %c\n", |
908 | field->prev_pid, | 892 | field->prev_pid, |
909 | field->prev_prio, | 893 | field->prev_prio, |
910 | S, | 894 | S, |
911 | field->next_cpu, | 895 | field->next_cpu, |
912 | field->next_pid, | 896 | field->next_pid, |
913 | field->next_prio, | 897 | field->next_prio, |
914 | T)) | 898 | T); |
915 | return TRACE_TYPE_PARTIAL_LINE; | 899 | |
916 | 900 | return trace_handle_return(&iter->seq); | |
917 | return TRACE_TYPE_HANDLED; | ||
918 | } | 901 | } |
919 | 902 | ||
920 | static enum print_line_t trace_ctx_raw(struct trace_iterator *iter, int flags, | 903 | static enum print_line_t trace_ctx_raw(struct trace_iterator *iter, int flags, |
@@ -942,15 +925,15 @@ static int trace_ctxwake_hex(struct trace_iterator *iter, char S) | |||
942 | S = task_state_char(field->prev_state); | 925 | S = task_state_char(field->prev_state); |
943 | T = task_state_char(field->next_state); | 926 | T = task_state_char(field->next_state); |
944 | 927 | ||
945 | SEQ_PUT_HEX_FIELD_RET(s, field->prev_pid); | 928 | SEQ_PUT_HEX_FIELD(s, field->prev_pid); |
946 | SEQ_PUT_HEX_FIELD_RET(s, field->prev_prio); | 929 | SEQ_PUT_HEX_FIELD(s, field->prev_prio); |
947 | SEQ_PUT_HEX_FIELD_RET(s, S); | 930 | SEQ_PUT_HEX_FIELD(s, S); |
948 | SEQ_PUT_HEX_FIELD_RET(s, field->next_cpu); | 931 | SEQ_PUT_HEX_FIELD(s, field->next_cpu); |
949 | SEQ_PUT_HEX_FIELD_RET(s, field->next_pid); | 932 | SEQ_PUT_HEX_FIELD(s, field->next_pid); |
950 | SEQ_PUT_HEX_FIELD_RET(s, field->next_prio); | 933 | SEQ_PUT_HEX_FIELD(s, field->next_prio); |
951 | SEQ_PUT_HEX_FIELD_RET(s, T); | 934 | SEQ_PUT_HEX_FIELD(s, T); |
952 | 935 | ||
953 | return TRACE_TYPE_HANDLED; | 936 | return trace_handle_return(s); |
954 | } | 937 | } |
955 | 938 | ||
956 | static enum print_line_t trace_ctx_hex(struct trace_iterator *iter, int flags, | 939 | static enum print_line_t trace_ctx_hex(struct trace_iterator *iter, int flags, |
@@ -973,14 +956,15 @@ static enum print_line_t trace_ctxwake_bin(struct trace_iterator *iter, | |||
973 | 956 | ||
974 | trace_assign_type(field, iter->ent); | 957 | trace_assign_type(field, iter->ent); |
975 | 958 | ||
976 | SEQ_PUT_FIELD_RET(s, field->prev_pid); | 959 | SEQ_PUT_FIELD(s, field->prev_pid); |
977 | SEQ_PUT_FIELD_RET(s, field->prev_prio); | 960 | SEQ_PUT_FIELD(s, field->prev_prio); |
978 | SEQ_PUT_FIELD_RET(s, field->prev_state); | 961 | SEQ_PUT_FIELD(s, field->prev_state); |
979 | SEQ_PUT_FIELD_RET(s, field->next_pid); | 962 | SEQ_PUT_FIELD(s, field->next_cpu); |
980 | SEQ_PUT_FIELD_RET(s, field->next_prio); | 963 | SEQ_PUT_FIELD(s, field->next_pid); |
981 | SEQ_PUT_FIELD_RET(s, field->next_state); | 964 | SEQ_PUT_FIELD(s, field->next_prio); |
965 | SEQ_PUT_FIELD(s, field->next_state); | ||
982 | 966 | ||
983 | return TRACE_TYPE_HANDLED; | 967 | return trace_handle_return(s); |
984 | } | 968 | } |
985 | 969 | ||
986 | static struct trace_event_functions trace_ctx_funcs = { | 970 | static struct trace_event_functions trace_ctx_funcs = { |
@@ -1020,23 +1004,19 @@ static enum print_line_t trace_stack_print(struct trace_iterator *iter, | |||
1020 | trace_assign_type(field, iter->ent); | 1004 | trace_assign_type(field, iter->ent); |
1021 | end = (unsigned long *)((long)iter->ent + iter->ent_size); | 1005 | end = (unsigned long *)((long)iter->ent + iter->ent_size); |
1022 | 1006 | ||
1023 | if (!trace_seq_puts(s, "<stack trace>\n")) | 1007 | trace_seq_puts(s, "<stack trace>\n"); |
1024 | goto partial; | ||
1025 | 1008 | ||
1026 | for (p = field->caller; p && *p != ULONG_MAX && p < end; p++) { | 1009 | for (p = field->caller; p && *p != ULONG_MAX && p < end; p++) { |
1027 | if (!trace_seq_puts(s, " => ")) | ||
1028 | goto partial; | ||
1029 | 1010 | ||
1030 | if (!seq_print_ip_sym(s, *p, flags)) | 1011 | if (trace_seq_has_overflowed(s)) |
1031 | goto partial; | 1012 | break; |
1032 | if (!trace_seq_putc(s, '\n')) | ||
1033 | goto partial; | ||
1034 | } | ||
1035 | 1013 | ||
1036 | return TRACE_TYPE_HANDLED; | 1014 | trace_seq_puts(s, " => "); |
1015 | seq_print_ip_sym(s, *p, flags); | ||
1016 | trace_seq_putc(s, '\n'); | ||
1017 | } | ||
1037 | 1018 | ||
1038 | partial: | 1019 | return trace_handle_return(s); |
1039 | return TRACE_TYPE_PARTIAL_LINE; | ||
1040 | } | 1020 | } |
1041 | 1021 | ||
1042 | static struct trace_event_functions trace_stack_funcs = { | 1022 | static struct trace_event_functions trace_stack_funcs = { |
@@ -1057,16 +1037,10 @@ static enum print_line_t trace_user_stack_print(struct trace_iterator *iter, | |||
1057 | 1037 | ||
1058 | trace_assign_type(field, iter->ent); | 1038 | trace_assign_type(field, iter->ent); |
1059 | 1039 | ||
1060 | if (!trace_seq_puts(s, "<user stack trace>\n")) | 1040 | trace_seq_puts(s, "<user stack trace>\n"); |
1061 | goto partial; | 1041 | seq_print_userip_objs(field, s, flags); |
1062 | |||
1063 | if (!seq_print_userip_objs(field, s, flags)) | ||
1064 | goto partial; | ||
1065 | |||
1066 | return TRACE_TYPE_HANDLED; | ||
1067 | 1042 | ||
1068 | partial: | 1043 | return trace_handle_return(s); |
1069 | return TRACE_TYPE_PARTIAL_LINE; | ||
1070 | } | 1044 | } |
1071 | 1045 | ||
1072 | static struct trace_event_functions trace_user_stack_funcs = { | 1046 | static struct trace_event_functions trace_user_stack_funcs = { |
@@ -1089,19 +1063,11 @@ trace_bputs_print(struct trace_iterator *iter, int flags, | |||
1089 | 1063 | ||
1090 | trace_assign_type(field, entry); | 1064 | trace_assign_type(field, entry); |
1091 | 1065 | ||
1092 | if (!seq_print_ip_sym(s, field->ip, flags)) | 1066 | seq_print_ip_sym(s, field->ip, flags); |
1093 | goto partial; | 1067 | trace_seq_puts(s, ": "); |
1068 | trace_seq_puts(s, field->str); | ||
1094 | 1069 | ||
1095 | if (!trace_seq_puts(s, ": ")) | 1070 | return trace_handle_return(s); |
1096 | goto partial; | ||
1097 | |||
1098 | if (!trace_seq_puts(s, field->str)) | ||
1099 | goto partial; | ||
1100 | |||
1101 | return TRACE_TYPE_HANDLED; | ||
1102 | |||
1103 | partial: | ||
1104 | return TRACE_TYPE_PARTIAL_LINE; | ||
1105 | } | 1071 | } |
1106 | 1072 | ||
1107 | 1073 | ||
@@ -1114,16 +1080,10 @@ trace_bputs_raw(struct trace_iterator *iter, int flags, | |||
1114 | 1080 | ||
1115 | trace_assign_type(field, iter->ent); | 1081 | trace_assign_type(field, iter->ent); |
1116 | 1082 | ||
1117 | if (!trace_seq_printf(s, ": %lx : ", field->ip)) | 1083 | trace_seq_printf(s, ": %lx : ", field->ip); |
1118 | goto partial; | 1084 | trace_seq_puts(s, field->str); |
1119 | |||
1120 | if (!trace_seq_puts(s, field->str)) | ||
1121 | goto partial; | ||
1122 | 1085 | ||
1123 | return TRACE_TYPE_HANDLED; | 1086 | return trace_handle_return(s); |
1124 | |||
1125 | partial: | ||
1126 | return TRACE_TYPE_PARTIAL_LINE; | ||
1127 | } | 1087 | } |
1128 | 1088 | ||
1129 | static struct trace_event_functions trace_bputs_funcs = { | 1089 | static struct trace_event_functions trace_bputs_funcs = { |
@@ -1147,19 +1107,11 @@ trace_bprint_print(struct trace_iterator *iter, int flags, | |||
1147 | 1107 | ||
1148 | trace_assign_type(field, entry); | 1108 | trace_assign_type(field, entry); |
1149 | 1109 | ||
1150 | if (!seq_print_ip_sym(s, field->ip, flags)) | 1110 | seq_print_ip_sym(s, field->ip, flags); |
1151 | goto partial; | 1111 | trace_seq_puts(s, ": "); |
1152 | 1112 | trace_seq_bprintf(s, field->fmt, field->buf); | |
1153 | if (!trace_seq_puts(s, ": ")) | ||
1154 | goto partial; | ||
1155 | |||
1156 | if (!trace_seq_bprintf(s, field->fmt, field->buf)) | ||
1157 | goto partial; | ||
1158 | 1113 | ||
1159 | return TRACE_TYPE_HANDLED; | 1114 | return trace_handle_return(s); |
1160 | |||
1161 | partial: | ||
1162 | return TRACE_TYPE_PARTIAL_LINE; | ||
1163 | } | 1115 | } |
1164 | 1116 | ||
1165 | 1117 | ||
@@ -1172,16 +1124,10 @@ trace_bprint_raw(struct trace_iterator *iter, int flags, | |||
1172 | 1124 | ||
1173 | trace_assign_type(field, iter->ent); | 1125 | trace_assign_type(field, iter->ent); |
1174 | 1126 | ||
1175 | if (!trace_seq_printf(s, ": %lx : ", field->ip)) | 1127 | trace_seq_printf(s, ": %lx : ", field->ip); |
1176 | goto partial; | 1128 | trace_seq_bprintf(s, field->fmt, field->buf); |
1177 | |||
1178 | if (!trace_seq_bprintf(s, field->fmt, field->buf)) | ||
1179 | goto partial; | ||
1180 | 1129 | ||
1181 | return TRACE_TYPE_HANDLED; | 1130 | return trace_handle_return(s); |
1182 | |||
1183 | partial: | ||
1184 | return TRACE_TYPE_PARTIAL_LINE; | ||
1185 | } | 1131 | } |
1186 | 1132 | ||
1187 | static struct trace_event_functions trace_bprint_funcs = { | 1133 | static struct trace_event_functions trace_bprint_funcs = { |
@@ -1203,16 +1149,10 @@ static enum print_line_t trace_print_print(struct trace_iterator *iter, | |||
1203 | 1149 | ||
1204 | trace_assign_type(field, iter->ent); | 1150 | trace_assign_type(field, iter->ent); |
1205 | 1151 | ||
1206 | if (!seq_print_ip_sym(s, field->ip, flags)) | 1152 | seq_print_ip_sym(s, field->ip, flags); |
1207 | goto partial; | 1153 | trace_seq_printf(s, ": %s", field->buf); |
1208 | |||
1209 | if (!trace_seq_printf(s, ": %s", field->buf)) | ||
1210 | goto partial; | ||
1211 | 1154 | ||
1212 | return TRACE_TYPE_HANDLED; | 1155 | return trace_handle_return(s); |
1213 | |||
1214 | partial: | ||
1215 | return TRACE_TYPE_PARTIAL_LINE; | ||
1216 | } | 1156 | } |
1217 | 1157 | ||
1218 | static enum print_line_t trace_print_raw(struct trace_iterator *iter, int flags, | 1158 | static enum print_line_t trace_print_raw(struct trace_iterator *iter, int flags, |
@@ -1222,13 +1162,9 @@ static enum print_line_t trace_print_raw(struct trace_iterator *iter, int flags, | |||
1222 | 1162 | ||
1223 | trace_assign_type(field, iter->ent); | 1163 | trace_assign_type(field, iter->ent); |
1224 | 1164 | ||
1225 | if (!trace_seq_printf(&iter->seq, "# %lx %s", field->ip, field->buf)) | 1165 | trace_seq_printf(&iter->seq, "# %lx %s", field->ip, field->buf); |
1226 | goto partial; | ||
1227 | |||
1228 | return TRACE_TYPE_HANDLED; | ||
1229 | 1166 | ||
1230 | partial: | 1167 | return trace_handle_return(&iter->seq); |
1231 | return TRACE_TYPE_PARTIAL_LINE; | ||
1232 | } | 1168 | } |
1233 | 1169 | ||
1234 | static struct trace_event_functions trace_print_funcs = { | 1170 | static struct trace_event_functions trace_print_funcs = { |
diff --git a/kernel/trace/trace_output.h b/kernel/trace/trace_output.h index 80b25b585a70..8ef2c40efb3c 100644 --- a/kernel/trace/trace_output.h +++ b/kernel/trace/trace_output.h | |||
@@ -35,17 +35,11 @@ trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry); | |||
35 | extern int __unregister_ftrace_event(struct trace_event *event); | 35 | extern int __unregister_ftrace_event(struct trace_event *event); |
36 | extern struct rw_semaphore trace_event_sem; | 36 | extern struct rw_semaphore trace_event_sem; |
37 | 37 | ||
38 | #define SEQ_PUT_FIELD_RET(s, x) \ | 38 | #define SEQ_PUT_FIELD(s, x) \ |
39 | do { \ | 39 | trace_seq_putmem(s, &(x), sizeof(x)) |
40 | if (!trace_seq_putmem(s, &(x), sizeof(x))) \ | 40 | |
41 | return TRACE_TYPE_PARTIAL_LINE; \ | 41 | #define SEQ_PUT_HEX_FIELD(s, x) \ |
42 | } while (0) | 42 | trace_seq_putmem_hex(s, &(x), sizeof(x)) |
43 | |||
44 | #define SEQ_PUT_HEX_FIELD_RET(s, x) \ | ||
45 | do { \ | ||
46 | if (!trace_seq_putmem_hex(s, &(x), sizeof(x))) \ | ||
47 | return TRACE_TYPE_PARTIAL_LINE; \ | ||
48 | } while (0) | ||
49 | 43 | ||
50 | #endif | 44 | #endif |
51 | 45 | ||
diff --git a/kernel/trace/trace_printk.c b/kernel/trace/trace_printk.c index 2900817ba65c..c4e70b6bd7fa 100644 --- a/kernel/trace/trace_printk.c +++ b/kernel/trace/trace_printk.c | |||
@@ -305,7 +305,7 @@ static int t_show(struct seq_file *m, void *v) | |||
305 | seq_puts(m, "\\t"); | 305 | seq_puts(m, "\\t"); |
306 | break; | 306 | break; |
307 | case '\\': | 307 | case '\\': |
308 | seq_puts(m, "\\"); | 308 | seq_putc(m, '\\'); |
309 | break; | 309 | break; |
310 | case '"': | 310 | case '"': |
311 | seq_puts(m, "\\\""); | 311 | seq_puts(m, "\\\""); |
diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c index d4b9fc22cd27..b983b2fd2ca1 100644 --- a/kernel/trace/trace_probe.c +++ b/kernel/trace/trace_probe.c | |||
@@ -40,7 +40,8 @@ const char *reserved_field_names[] = { | |||
40 | int PRINT_TYPE_FUNC_NAME(type)(struct trace_seq *s, const char *name, \ | 40 | int PRINT_TYPE_FUNC_NAME(type)(struct trace_seq *s, const char *name, \ |
41 | void *data, void *ent) \ | 41 | void *data, void *ent) \ |
42 | { \ | 42 | { \ |
43 | return trace_seq_printf(s, " %s=" fmt, name, *(type *)data); \ | 43 | trace_seq_printf(s, " %s=" fmt, name, *(type *)data); \ |
44 | return !trace_seq_has_overflowed(s); \ | ||
44 | } \ | 45 | } \ |
45 | const char PRINT_TYPE_FMT_NAME(type)[] = fmt; \ | 46 | const char PRINT_TYPE_FMT_NAME(type)[] = fmt; \ |
46 | NOKPROBE_SYMBOL(PRINT_TYPE_FUNC_NAME(type)); | 47 | NOKPROBE_SYMBOL(PRINT_TYPE_FUNC_NAME(type)); |
@@ -61,10 +62,11 @@ int PRINT_TYPE_FUNC_NAME(string)(struct trace_seq *s, const char *name, | |||
61 | int len = *(u32 *)data >> 16; | 62 | int len = *(u32 *)data >> 16; |
62 | 63 | ||
63 | if (!len) | 64 | if (!len) |
64 | return trace_seq_printf(s, " %s=(fault)", name); | 65 | trace_seq_printf(s, " %s=(fault)", name); |
65 | else | 66 | else |
66 | return trace_seq_printf(s, " %s=\"%s\"", name, | 67 | trace_seq_printf(s, " %s=\"%s\"", name, |
67 | (const char *)get_loc_data(data, ent)); | 68 | (const char *)get_loc_data(data, ent)); |
69 | return !trace_seq_has_overflowed(s); | ||
68 | } | 70 | } |
69 | NOKPROBE_SYMBOL(PRINT_TYPE_FUNC_NAME(string)); | 71 | NOKPROBE_SYMBOL(PRINT_TYPE_FUNC_NAME(string)); |
70 | 72 | ||
diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c index 3f34dc9b40f3..2e293beb186e 100644 --- a/kernel/trace/trace_sched_switch.c +++ b/kernel/trace/trace_sched_switch.c | |||
@@ -14,122 +14,26 @@ | |||
14 | 14 | ||
15 | #include "trace.h" | 15 | #include "trace.h" |
16 | 16 | ||
17 | static struct trace_array *ctx_trace; | ||
18 | static int __read_mostly tracer_enabled; | ||
19 | static int sched_ref; | 17 | static int sched_ref; |
20 | static DEFINE_MUTEX(sched_register_mutex); | 18 | static DEFINE_MUTEX(sched_register_mutex); |
21 | static int sched_stopped; | ||
22 | |||
23 | |||
24 | void | ||
25 | tracing_sched_switch_trace(struct trace_array *tr, | ||
26 | struct task_struct *prev, | ||
27 | struct task_struct *next, | ||
28 | unsigned long flags, int pc) | ||
29 | { | ||
30 | struct ftrace_event_call *call = &event_context_switch; | ||
31 | struct ring_buffer *buffer = tr->trace_buffer.buffer; | ||
32 | struct ring_buffer_event *event; | ||
33 | struct ctx_switch_entry *entry; | ||
34 | |||
35 | event = trace_buffer_lock_reserve(buffer, TRACE_CTX, | ||
36 | sizeof(*entry), flags, pc); | ||
37 | if (!event) | ||
38 | return; | ||
39 | entry = ring_buffer_event_data(event); | ||
40 | entry->prev_pid = prev->pid; | ||
41 | entry->prev_prio = prev->prio; | ||
42 | entry->prev_state = prev->state; | ||
43 | entry->next_pid = next->pid; | ||
44 | entry->next_prio = next->prio; | ||
45 | entry->next_state = next->state; | ||
46 | entry->next_cpu = task_cpu(next); | ||
47 | |||
48 | if (!call_filter_check_discard(call, entry, buffer, event)) | ||
49 | trace_buffer_unlock_commit(buffer, event, flags, pc); | ||
50 | } | ||
51 | 19 | ||
52 | static void | 20 | static void |
53 | probe_sched_switch(void *ignore, struct task_struct *prev, struct task_struct *next) | 21 | probe_sched_switch(void *ignore, struct task_struct *prev, struct task_struct *next) |
54 | { | 22 | { |
55 | struct trace_array_cpu *data; | ||
56 | unsigned long flags; | ||
57 | int cpu; | ||
58 | int pc; | ||
59 | |||
60 | if (unlikely(!sched_ref)) | 23 | if (unlikely(!sched_ref)) |
61 | return; | 24 | return; |
62 | 25 | ||
63 | tracing_record_cmdline(prev); | 26 | tracing_record_cmdline(prev); |
64 | tracing_record_cmdline(next); | 27 | tracing_record_cmdline(next); |
65 | |||
66 | if (!tracer_enabled || sched_stopped) | ||
67 | return; | ||
68 | |||
69 | pc = preempt_count(); | ||
70 | local_irq_save(flags); | ||
71 | cpu = raw_smp_processor_id(); | ||
72 | data = per_cpu_ptr(ctx_trace->trace_buffer.data, cpu); | ||
73 | |||
74 | if (likely(!atomic_read(&data->disabled))) | ||
75 | tracing_sched_switch_trace(ctx_trace, prev, next, flags, pc); | ||
76 | |||
77 | local_irq_restore(flags); | ||
78 | } | ||
79 | |||
80 | void | ||
81 | tracing_sched_wakeup_trace(struct trace_array *tr, | ||
82 | struct task_struct *wakee, | ||
83 | struct task_struct *curr, | ||
84 | unsigned long flags, int pc) | ||
85 | { | ||
86 | struct ftrace_event_call *call = &event_wakeup; | ||
87 | struct ring_buffer_event *event; | ||
88 | struct ctx_switch_entry *entry; | ||
89 | struct ring_buffer *buffer = tr->trace_buffer.buffer; | ||
90 | |||
91 | event = trace_buffer_lock_reserve(buffer, TRACE_WAKE, | ||
92 | sizeof(*entry), flags, pc); | ||
93 | if (!event) | ||
94 | return; | ||
95 | entry = ring_buffer_event_data(event); | ||
96 | entry->prev_pid = curr->pid; | ||
97 | entry->prev_prio = curr->prio; | ||
98 | entry->prev_state = curr->state; | ||
99 | entry->next_pid = wakee->pid; | ||
100 | entry->next_prio = wakee->prio; | ||
101 | entry->next_state = wakee->state; | ||
102 | entry->next_cpu = task_cpu(wakee); | ||
103 | |||
104 | if (!call_filter_check_discard(call, entry, buffer, event)) | ||
105 | trace_buffer_unlock_commit(buffer, event, flags, pc); | ||
106 | } | 28 | } |
107 | 29 | ||
108 | static void | 30 | static void |
109 | probe_sched_wakeup(void *ignore, struct task_struct *wakee, int success) | 31 | probe_sched_wakeup(void *ignore, struct task_struct *wakee, int success) |
110 | { | 32 | { |
111 | struct trace_array_cpu *data; | ||
112 | unsigned long flags; | ||
113 | int cpu, pc; | ||
114 | |||
115 | if (unlikely(!sched_ref)) | 33 | if (unlikely(!sched_ref)) |
116 | return; | 34 | return; |
117 | 35 | ||
118 | tracing_record_cmdline(current); | 36 | tracing_record_cmdline(current); |
119 | |||
120 | if (!tracer_enabled || sched_stopped) | ||
121 | return; | ||
122 | |||
123 | pc = preempt_count(); | ||
124 | local_irq_save(flags); | ||
125 | cpu = raw_smp_processor_id(); | ||
126 | data = per_cpu_ptr(ctx_trace->trace_buffer.data, cpu); | ||
127 | |||
128 | if (likely(!atomic_read(&data->disabled))) | ||
129 | tracing_sched_wakeup_trace(ctx_trace, wakee, current, | ||
130 | flags, pc); | ||
131 | |||
132 | local_irq_restore(flags); | ||
133 | } | 37 | } |
134 | 38 | ||
135 | static int tracing_sched_register(void) | 39 | static int tracing_sched_register(void) |
@@ -197,51 +101,3 @@ void tracing_stop_cmdline_record(void) | |||
197 | { | 101 | { |
198 | tracing_stop_sched_switch(); | 102 | tracing_stop_sched_switch(); |
199 | } | 103 | } |
200 | |||
201 | /** | ||
202 | * tracing_start_sched_switch_record - start tracing context switches | ||
203 | * | ||
204 | * Turns on context switch tracing for a tracer. | ||
205 | */ | ||
206 | void tracing_start_sched_switch_record(void) | ||
207 | { | ||
208 | if (unlikely(!ctx_trace)) { | ||
209 | WARN_ON(1); | ||
210 | return; | ||
211 | } | ||
212 | |||
213 | tracing_start_sched_switch(); | ||
214 | |||
215 | mutex_lock(&sched_register_mutex); | ||
216 | tracer_enabled++; | ||
217 | mutex_unlock(&sched_register_mutex); | ||
218 | } | ||
219 | |||
220 | /** | ||
221 | * tracing_stop_sched_switch_record - start tracing context switches | ||
222 | * | ||
223 | * Turns off context switch tracing for a tracer. | ||
224 | */ | ||
225 | void tracing_stop_sched_switch_record(void) | ||
226 | { | ||
227 | mutex_lock(&sched_register_mutex); | ||
228 | tracer_enabled--; | ||
229 | WARN_ON(tracer_enabled < 0); | ||
230 | mutex_unlock(&sched_register_mutex); | ||
231 | |||
232 | tracing_stop_sched_switch(); | ||
233 | } | ||
234 | |||
235 | /** | ||
236 | * tracing_sched_switch_assign_trace - assign a trace array for ctx switch | ||
237 | * @tr: trace array pointer to assign | ||
238 | * | ||
239 | * Some tracers might want to record the context switches in their | ||
240 | * trace. This function lets those tracers assign the trace array | ||
241 | * to use. | ||
242 | */ | ||
243 | void tracing_sched_switch_assign_trace(struct trace_array *tr) | ||
244 | { | ||
245 | ctx_trace = tr; | ||
246 | } | ||
247 | |||
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index 19bd8928ce94..8fb84b362816 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c | |||
@@ -365,6 +365,62 @@ probe_wakeup_migrate_task(void *ignore, struct task_struct *task, int cpu) | |||
365 | wakeup_current_cpu = cpu; | 365 | wakeup_current_cpu = cpu; |
366 | } | 366 | } |
367 | 367 | ||
368 | static void | ||
369 | tracing_sched_switch_trace(struct trace_array *tr, | ||
370 | struct task_struct *prev, | ||
371 | struct task_struct *next, | ||
372 | unsigned long flags, int pc) | ||
373 | { | ||
374 | struct ftrace_event_call *call = &event_context_switch; | ||
375 | struct ring_buffer *buffer = tr->trace_buffer.buffer; | ||
376 | struct ring_buffer_event *event; | ||
377 | struct ctx_switch_entry *entry; | ||
378 | |||
379 | event = trace_buffer_lock_reserve(buffer, TRACE_CTX, | ||
380 | sizeof(*entry), flags, pc); | ||
381 | if (!event) | ||
382 | return; | ||
383 | entry = ring_buffer_event_data(event); | ||
384 | entry->prev_pid = prev->pid; | ||
385 | entry->prev_prio = prev->prio; | ||
386 | entry->prev_state = prev->state; | ||
387 | entry->next_pid = next->pid; | ||
388 | entry->next_prio = next->prio; | ||
389 | entry->next_state = next->state; | ||
390 | entry->next_cpu = task_cpu(next); | ||
391 | |||
392 | if (!call_filter_check_discard(call, entry, buffer, event)) | ||
393 | trace_buffer_unlock_commit(buffer, event, flags, pc); | ||
394 | } | ||
395 | |||
396 | static void | ||
397 | tracing_sched_wakeup_trace(struct trace_array *tr, | ||
398 | struct task_struct *wakee, | ||
399 | struct task_struct *curr, | ||
400 | unsigned long flags, int pc) | ||
401 | { | ||
402 | struct ftrace_event_call *call = &event_wakeup; | ||
403 | struct ring_buffer_event *event; | ||
404 | struct ctx_switch_entry *entry; | ||
405 | struct ring_buffer *buffer = tr->trace_buffer.buffer; | ||
406 | |||
407 | event = trace_buffer_lock_reserve(buffer, TRACE_WAKE, | ||
408 | sizeof(*entry), flags, pc); | ||
409 | if (!event) | ||
410 | return; | ||
411 | entry = ring_buffer_event_data(event); | ||
412 | entry->prev_pid = curr->pid; | ||
413 | entry->prev_prio = curr->prio; | ||
414 | entry->prev_state = curr->state; | ||
415 | entry->next_pid = wakee->pid; | ||
416 | entry->next_prio = wakee->prio; | ||
417 | entry->next_state = wakee->state; | ||
418 | entry->next_cpu = task_cpu(wakee); | ||
419 | |||
420 | if (!call_filter_check_discard(call, entry, buffer, event)) | ||
421 | trace_buffer_unlock_commit(buffer, event, flags, pc); | ||
422 | } | ||
423 | |||
368 | static void notrace | 424 | static void notrace |
369 | probe_wakeup_sched_switch(void *ignore, | 425 | probe_wakeup_sched_switch(void *ignore, |
370 | struct task_struct *prev, struct task_struct *next) | 426 | struct task_struct *prev, struct task_struct *next) |
diff --git a/kernel/trace/trace_seq.c b/kernel/trace/trace_seq.c index 1f24ed99dca2..f8b45d8792f9 100644 --- a/kernel/trace/trace_seq.c +++ b/kernel/trace/trace_seq.c | |||
@@ -27,10 +27,19 @@ | |||
27 | #include <linux/trace_seq.h> | 27 | #include <linux/trace_seq.h> |
28 | 28 | ||
29 | /* How much buffer is left on the trace_seq? */ | 29 | /* How much buffer is left on the trace_seq? */ |
30 | #define TRACE_SEQ_BUF_LEFT(s) ((PAGE_SIZE - 1) - (s)->len) | 30 | #define TRACE_SEQ_BUF_LEFT(s) seq_buf_buffer_left(&(s)->seq) |
31 | 31 | ||
32 | /* How much buffer is written? */ | 32 | /* How much buffer is written? */ |
33 | #define TRACE_SEQ_BUF_USED(s) min((s)->len, (unsigned int)(PAGE_SIZE - 1)) | 33 | #define TRACE_SEQ_BUF_USED(s) seq_buf_used(&(s)->seq) |
34 | |||
35 | /* | ||
36 | * trace_seq should work with being initialized with 0s. | ||
37 | */ | ||
38 | static inline void __trace_seq_init(struct trace_seq *s) | ||
39 | { | ||
40 | if (unlikely(!s->seq.size)) | ||
41 | trace_seq_init(s); | ||
42 | } | ||
34 | 43 | ||
35 | /** | 44 | /** |
36 | * trace_print_seq - move the contents of trace_seq into a seq_file | 45 | * trace_print_seq - move the contents of trace_seq into a seq_file |
@@ -43,10 +52,11 @@ | |||
43 | */ | 52 | */ |
44 | int trace_print_seq(struct seq_file *m, struct trace_seq *s) | 53 | int trace_print_seq(struct seq_file *m, struct trace_seq *s) |
45 | { | 54 | { |
46 | unsigned int len = TRACE_SEQ_BUF_USED(s); | ||
47 | int ret; | 55 | int ret; |
48 | 56 | ||
49 | ret = seq_write(m, s->buffer, len); | 57 | __trace_seq_init(s); |
58 | |||
59 | ret = seq_buf_print_seq(m, &s->seq); | ||
50 | 60 | ||
51 | /* | 61 | /* |
52 | * Only reset this buffer if we successfully wrote to the | 62 | * Only reset this buffer if we successfully wrote to the |
@@ -69,34 +79,26 @@ int trace_print_seq(struct seq_file *m, struct trace_seq *s) | |||
69 | * trace_seq_printf() is used to store strings into a special | 79 | * trace_seq_printf() is used to store strings into a special |
70 | * buffer (@s). Then the output may be either used by | 80 | * buffer (@s). Then the output may be either used by |
71 | * the sequencer or pulled into another buffer. | 81 | * the sequencer or pulled into another buffer. |
72 | * | ||
73 | * Returns 1 if we successfully written all the contents to | ||
74 | * the buffer. | ||
75 | * Returns 0 if we the length to write is bigger than the | ||
76 | * reserved buffer space. In this case, nothing gets written. | ||
77 | */ | 82 | */ |
78 | int trace_seq_printf(struct trace_seq *s, const char *fmt, ...) | 83 | void trace_seq_printf(struct trace_seq *s, const char *fmt, ...) |
79 | { | 84 | { |
80 | unsigned int len = TRACE_SEQ_BUF_LEFT(s); | 85 | unsigned int save_len = s->seq.len; |
81 | va_list ap; | 86 | va_list ap; |
82 | int ret; | ||
83 | 87 | ||
84 | if (s->full || !len) | 88 | if (s->full) |
85 | return 0; | 89 | return; |
90 | |||
91 | __trace_seq_init(s); | ||
86 | 92 | ||
87 | va_start(ap, fmt); | 93 | va_start(ap, fmt); |
88 | ret = vsnprintf(s->buffer + s->len, len, fmt, ap); | 94 | seq_buf_vprintf(&s->seq, fmt, ap); |
89 | va_end(ap); | 95 | va_end(ap); |
90 | 96 | ||
91 | /* If we can't write it all, don't bother writing anything */ | 97 | /* If we can't write it all, don't bother writing anything */ |
92 | if (ret >= len) { | 98 | if (unlikely(seq_buf_has_overflowed(&s->seq))) { |
99 | s->seq.len = save_len; | ||
93 | s->full = 1; | 100 | s->full = 1; |
94 | return 0; | ||
95 | } | 101 | } |
96 | |||
97 | s->len += ret; | ||
98 | |||
99 | return 1; | ||
100 | } | 102 | } |
101 | EXPORT_SYMBOL_GPL(trace_seq_printf); | 103 | EXPORT_SYMBOL_GPL(trace_seq_printf); |
102 | 104 | ||
@@ -107,25 +109,23 @@ EXPORT_SYMBOL_GPL(trace_seq_printf); | |||
107 | * @nmaskbits: The number of bits that are valid in @maskp | 109 | * @nmaskbits: The number of bits that are valid in @maskp |
108 | * | 110 | * |
109 | * Writes a ASCII representation of a bitmask string into @s. | 111 | * Writes a ASCII representation of a bitmask string into @s. |
110 | * | ||
111 | * Returns 1 if we successfully written all the contents to | ||
112 | * the buffer. | ||
113 | * Returns 0 if we the length to write is bigger than the | ||
114 | * reserved buffer space. In this case, nothing gets written. | ||
115 | */ | 112 | */ |
116 | int trace_seq_bitmask(struct trace_seq *s, const unsigned long *maskp, | 113 | void trace_seq_bitmask(struct trace_seq *s, const unsigned long *maskp, |
117 | int nmaskbits) | 114 | int nmaskbits) |
118 | { | 115 | { |
119 | unsigned int len = TRACE_SEQ_BUF_LEFT(s); | 116 | unsigned int save_len = s->seq.len; |
120 | int ret; | ||
121 | 117 | ||
122 | if (s->full || !len) | 118 | if (s->full) |
123 | return 0; | 119 | return; |
124 | 120 | ||
125 | ret = bitmap_scnprintf(s->buffer, len, maskp, nmaskbits); | 121 | __trace_seq_init(s); |
126 | s->len += ret; | ||
127 | 122 | ||
128 | return 1; | 123 | seq_buf_bitmask(&s->seq, maskp, nmaskbits); |
124 | |||
125 | if (unlikely(seq_buf_has_overflowed(&s->seq))) { | ||
126 | s->seq.len = save_len; | ||
127 | s->full = 1; | ||
128 | } | ||
129 | } | 129 | } |
130 | EXPORT_SYMBOL_GPL(trace_seq_bitmask); | 130 | EXPORT_SYMBOL_GPL(trace_seq_bitmask); |
131 | 131 | ||
@@ -139,28 +139,23 @@ EXPORT_SYMBOL_GPL(trace_seq_bitmask); | |||
139 | * trace_seq_printf is used to store strings into a special | 139 | * trace_seq_printf is used to store strings into a special |
140 | * buffer (@s). Then the output may be either used by | 140 | * buffer (@s). Then the output may be either used by |
141 | * the sequencer or pulled into another buffer. | 141 | * the sequencer or pulled into another buffer. |
142 | * | ||
143 | * Returns how much it wrote to the buffer. | ||
144 | */ | 142 | */ |
145 | int trace_seq_vprintf(struct trace_seq *s, const char *fmt, va_list args) | 143 | void trace_seq_vprintf(struct trace_seq *s, const char *fmt, va_list args) |
146 | { | 144 | { |
147 | unsigned int len = TRACE_SEQ_BUF_LEFT(s); | 145 | unsigned int save_len = s->seq.len; |
148 | int ret; | ||
149 | 146 | ||
150 | if (s->full || !len) | 147 | if (s->full) |
151 | return 0; | 148 | return; |
152 | 149 | ||
153 | ret = vsnprintf(s->buffer + s->len, len, fmt, args); | 150 | __trace_seq_init(s); |
151 | |||
152 | seq_buf_vprintf(&s->seq, fmt, args); | ||
154 | 153 | ||
155 | /* If we can't write it all, don't bother writing anything */ | 154 | /* If we can't write it all, don't bother writing anything */ |
156 | if (ret >= len) { | 155 | if (unlikely(seq_buf_has_overflowed(&s->seq))) { |
156 | s->seq.len = save_len; | ||
157 | s->full = 1; | 157 | s->full = 1; |
158 | return 0; | ||
159 | } | 158 | } |
160 | |||
161 | s->len += ret; | ||
162 | |||
163 | return len; | ||
164 | } | 159 | } |
165 | EXPORT_SYMBOL_GPL(trace_seq_vprintf); | 160 | EXPORT_SYMBOL_GPL(trace_seq_vprintf); |
166 | 161 | ||
@@ -178,28 +173,24 @@ EXPORT_SYMBOL_GPL(trace_seq_vprintf); | |||
178 | * | 173 | * |
179 | * This function will take the format and the binary array and finish | 174 | * This function will take the format and the binary array and finish |
180 | * the conversion into the ASCII string within the buffer. | 175 | * the conversion into the ASCII string within the buffer. |
181 | * | ||
182 | * Returns how much it wrote to the buffer. | ||
183 | */ | 176 | */ |
184 | int trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary) | 177 | void trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary) |
185 | { | 178 | { |
186 | unsigned int len = TRACE_SEQ_BUF_LEFT(s); | 179 | unsigned int save_len = s->seq.len; |
187 | int ret; | ||
188 | 180 | ||
189 | if (s->full || !len) | 181 | if (s->full) |
190 | return 0; | 182 | return; |
183 | |||
184 | __trace_seq_init(s); | ||
191 | 185 | ||
192 | ret = bstr_printf(s->buffer + s->len, len, fmt, binary); | 186 | seq_buf_bprintf(&s->seq, fmt, binary); |
193 | 187 | ||
194 | /* If we can't write it all, don't bother writing anything */ | 188 | /* If we can't write it all, don't bother writing anything */ |
195 | if (ret >= len) { | 189 | if (unlikely(seq_buf_has_overflowed(&s->seq))) { |
190 | s->seq.len = save_len; | ||
196 | s->full = 1; | 191 | s->full = 1; |
197 | return 0; | 192 | return; |
198 | } | 193 | } |
199 | |||
200 | s->len += ret; | ||
201 | |||
202 | return len; | ||
203 | } | 194 | } |
204 | EXPORT_SYMBOL_GPL(trace_seq_bprintf); | 195 | EXPORT_SYMBOL_GPL(trace_seq_bprintf); |
205 | 196 | ||
@@ -212,25 +203,22 @@ EXPORT_SYMBOL_GPL(trace_seq_bprintf); | |||
212 | * copy to user routines. This function records a simple string | 203 | * copy to user routines. This function records a simple string |
213 | * into a special buffer (@s) for later retrieval by a sequencer | 204 | * into a special buffer (@s) for later retrieval by a sequencer |
214 | * or other mechanism. | 205 | * or other mechanism. |
215 | * | ||
216 | * Returns how much it wrote to the buffer. | ||
217 | */ | 206 | */ |
218 | int trace_seq_puts(struct trace_seq *s, const char *str) | 207 | void trace_seq_puts(struct trace_seq *s, const char *str) |
219 | { | 208 | { |
220 | unsigned int len = strlen(str); | 209 | unsigned int len = strlen(str); |
221 | 210 | ||
222 | if (s->full) | 211 | if (s->full) |
223 | return 0; | 212 | return; |
213 | |||
214 | __trace_seq_init(s); | ||
224 | 215 | ||
225 | if (len > TRACE_SEQ_BUF_LEFT(s)) { | 216 | if (len > TRACE_SEQ_BUF_LEFT(s)) { |
226 | s->full = 1; | 217 | s->full = 1; |
227 | return 0; | 218 | return; |
228 | } | 219 | } |
229 | 220 | ||
230 | memcpy(s->buffer + s->len, str, len); | 221 | seq_buf_putmem(&s->seq, str, len); |
231 | s->len += len; | ||
232 | |||
233 | return len; | ||
234 | } | 222 | } |
235 | EXPORT_SYMBOL_GPL(trace_seq_puts); | 223 | EXPORT_SYMBOL_GPL(trace_seq_puts); |
236 | 224 | ||
@@ -243,22 +231,20 @@ EXPORT_SYMBOL_GPL(trace_seq_puts); | |||
243 | * copy to user routines. This function records a simple charater | 231 | * copy to user routines. This function records a simple charater |
244 | * into a special buffer (@s) for later retrieval by a sequencer | 232 | * into a special buffer (@s) for later retrieval by a sequencer |
245 | * or other mechanism. | 233 | * or other mechanism. |
246 | * | ||
247 | * Returns how much it wrote to the buffer. | ||
248 | */ | 234 | */ |
249 | int trace_seq_putc(struct trace_seq *s, unsigned char c) | 235 | void trace_seq_putc(struct trace_seq *s, unsigned char c) |
250 | { | 236 | { |
251 | if (s->full) | 237 | if (s->full) |
252 | return 0; | 238 | return; |
239 | |||
240 | __trace_seq_init(s); | ||
253 | 241 | ||
254 | if (TRACE_SEQ_BUF_LEFT(s) < 1) { | 242 | if (TRACE_SEQ_BUF_LEFT(s) < 1) { |
255 | s->full = 1; | 243 | s->full = 1; |
256 | return 0; | 244 | return; |
257 | } | 245 | } |
258 | 246 | ||
259 | s->buffer[s->len++] = c; | 247 | seq_buf_putc(&s->seq, c); |
260 | |||
261 | return 1; | ||
262 | } | 248 | } |
263 | EXPORT_SYMBOL_GPL(trace_seq_putc); | 249 | EXPORT_SYMBOL_GPL(trace_seq_putc); |
264 | 250 | ||
@@ -271,29 +257,23 @@ EXPORT_SYMBOL_GPL(trace_seq_putc); | |||
271 | * There may be cases where raw memory needs to be written into the | 257 | * There may be cases where raw memory needs to be written into the |
272 | * buffer and a strcpy() would not work. Using this function allows | 258 | * buffer and a strcpy() would not work. Using this function allows |
273 | * for such cases. | 259 | * for such cases. |
274 | * | ||
275 | * Returns how much it wrote to the buffer. | ||
276 | */ | 260 | */ |
277 | int trace_seq_putmem(struct trace_seq *s, const void *mem, unsigned int len) | 261 | void trace_seq_putmem(struct trace_seq *s, const void *mem, unsigned int len) |
278 | { | 262 | { |
279 | if (s->full) | 263 | if (s->full) |
280 | return 0; | 264 | return; |
265 | |||
266 | __trace_seq_init(s); | ||
281 | 267 | ||
282 | if (len > TRACE_SEQ_BUF_LEFT(s)) { | 268 | if (len > TRACE_SEQ_BUF_LEFT(s)) { |
283 | s->full = 1; | 269 | s->full = 1; |
284 | return 0; | 270 | return; |
285 | } | 271 | } |
286 | 272 | ||
287 | memcpy(s->buffer + s->len, mem, len); | 273 | seq_buf_putmem(&s->seq, mem, len); |
288 | s->len += len; | ||
289 | |||
290 | return len; | ||
291 | } | 274 | } |
292 | EXPORT_SYMBOL_GPL(trace_seq_putmem); | 275 | EXPORT_SYMBOL_GPL(trace_seq_putmem); |
293 | 276 | ||
294 | #define MAX_MEMHEX_BYTES 8U | ||
295 | #define HEX_CHARS (MAX_MEMHEX_BYTES*2 + 1) | ||
296 | |||
297 | /** | 277 | /** |
298 | * trace_seq_putmem_hex - write raw memory into the buffer in ASCII hex | 278 | * trace_seq_putmem_hex - write raw memory into the buffer in ASCII hex |
299 | * @s: trace sequence descriptor | 279 | * @s: trace sequence descriptor |
@@ -303,41 +283,31 @@ EXPORT_SYMBOL_GPL(trace_seq_putmem); | |||
303 | * This is similar to trace_seq_putmem() except instead of just copying the | 283 | * This is similar to trace_seq_putmem() except instead of just copying the |
304 | * raw memory into the buffer it writes its ASCII representation of it | 284 | * raw memory into the buffer it writes its ASCII representation of it |
305 | * in hex characters. | 285 | * in hex characters. |
306 | * | ||
307 | * Returns how much it wrote to the buffer. | ||
308 | */ | 286 | */ |
309 | int trace_seq_putmem_hex(struct trace_seq *s, const void *mem, | 287 | void trace_seq_putmem_hex(struct trace_seq *s, const void *mem, |
310 | unsigned int len) | 288 | unsigned int len) |
311 | { | 289 | { |
312 | unsigned char hex[HEX_CHARS]; | 290 | unsigned int save_len = s->seq.len; |
313 | const unsigned char *data = mem; | ||
314 | unsigned int start_len; | ||
315 | int i, j; | ||
316 | int cnt = 0; | ||
317 | 291 | ||
318 | if (s->full) | 292 | if (s->full) |
319 | return 0; | 293 | return; |
320 | 294 | ||
321 | while (len) { | 295 | __trace_seq_init(s); |
322 | start_len = min(len, HEX_CHARS - 1); | 296 | |
323 | #ifdef __BIG_ENDIAN | 297 | /* Each byte is represented by two chars */ |
324 | for (i = 0, j = 0; i < start_len; i++) { | 298 | if (len * 2 > TRACE_SEQ_BUF_LEFT(s)) { |
325 | #else | 299 | s->full = 1; |
326 | for (i = start_len-1, j = 0; i >= 0; i--) { | 300 | return; |
327 | #endif | 301 | } |
328 | hex[j++] = hex_asc_hi(data[i]); | 302 | |
329 | hex[j++] = hex_asc_lo(data[i]); | 303 | /* The added spaces can still cause an overflow */ |
330 | } | 304 | seq_buf_putmem_hex(&s->seq, mem, len); |
331 | if (WARN_ON_ONCE(j == 0 || j/2 > len)) | 305 | |
332 | break; | 306 | if (unlikely(seq_buf_has_overflowed(&s->seq))) { |
333 | 307 | s->seq.len = save_len; | |
334 | /* j increments twice per loop */ | 308 | s->full = 1; |
335 | len -= j / 2; | 309 | return; |
336 | hex[j++] = ' '; | ||
337 | |||
338 | cnt += trace_seq_putmem(s, hex, j); | ||
339 | } | 310 | } |
340 | return cnt; | ||
341 | } | 311 | } |
342 | EXPORT_SYMBOL_GPL(trace_seq_putmem_hex); | 312 | EXPORT_SYMBOL_GPL(trace_seq_putmem_hex); |
343 | 313 | ||
@@ -355,30 +325,27 @@ EXPORT_SYMBOL_GPL(trace_seq_putmem_hex); | |||
355 | */ | 325 | */ |
356 | int trace_seq_path(struct trace_seq *s, const struct path *path) | 326 | int trace_seq_path(struct trace_seq *s, const struct path *path) |
357 | { | 327 | { |
358 | unsigned char *p; | 328 | unsigned int save_len = s->seq.len; |
359 | 329 | ||
360 | if (s->full) | 330 | if (s->full) |
361 | return 0; | 331 | return 0; |
362 | 332 | ||
333 | __trace_seq_init(s); | ||
334 | |||
363 | if (TRACE_SEQ_BUF_LEFT(s) < 1) { | 335 | if (TRACE_SEQ_BUF_LEFT(s) < 1) { |
364 | s->full = 1; | 336 | s->full = 1; |
365 | return 0; | 337 | return 0; |
366 | } | 338 | } |
367 | 339 | ||
368 | p = d_path(path, s->buffer + s->len, PAGE_SIZE - s->len); | 340 | seq_buf_path(&s->seq, path, "\n"); |
369 | if (!IS_ERR(p)) { | 341 | |
370 | p = mangle_path(s->buffer + s->len, p, "\n"); | 342 | if (unlikely(seq_buf_has_overflowed(&s->seq))) { |
371 | if (p) { | 343 | s->seq.len = save_len; |
372 | s->len = p - s->buffer; | 344 | s->full = 1; |
373 | return 1; | 345 | return 0; |
374 | } | ||
375 | } else { | ||
376 | s->buffer[s->len++] = '?'; | ||
377 | return 1; | ||
378 | } | 346 | } |
379 | 347 | ||
380 | s->full = 1; | 348 | return 1; |
381 | return 0; | ||
382 | } | 349 | } |
383 | EXPORT_SYMBOL_GPL(trace_seq_path); | 350 | EXPORT_SYMBOL_GPL(trace_seq_path); |
384 | 351 | ||
@@ -404,25 +371,7 @@ EXPORT_SYMBOL_GPL(trace_seq_path); | |||
404 | */ | 371 | */ |
405 | int trace_seq_to_user(struct trace_seq *s, char __user *ubuf, int cnt) | 372 | int trace_seq_to_user(struct trace_seq *s, char __user *ubuf, int cnt) |
406 | { | 373 | { |
407 | int len; | 374 | __trace_seq_init(s); |
408 | int ret; | 375 | return seq_buf_to_user(&s->seq, ubuf, cnt); |
409 | |||
410 | if (!cnt) | ||
411 | return 0; | ||
412 | |||
413 | if (s->len <= s->readpos) | ||
414 | return -EBUSY; | ||
415 | |||
416 | len = s->len - s->readpos; | ||
417 | if (cnt > len) | ||
418 | cnt = len; | ||
419 | ret = copy_to_user(ubuf, s->buffer + s->readpos, cnt); | ||
420 | if (ret == cnt) | ||
421 | return -EFAULT; | ||
422 | |||
423 | cnt -= ret; | ||
424 | |||
425 | s->readpos += cnt; | ||
426 | return cnt; | ||
427 | } | 376 | } |
428 | EXPORT_SYMBOL_GPL(trace_seq_to_user); | 377 | EXPORT_SYMBOL_GPL(trace_seq_to_user); |
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 4dc8b79c5f75..dfe00a4f3f3e 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c | |||
@@ -114,7 +114,7 @@ print_syscall_enter(struct trace_iterator *iter, int flags, | |||
114 | struct trace_entry *ent = iter->ent; | 114 | struct trace_entry *ent = iter->ent; |
115 | struct syscall_trace_enter *trace; | 115 | struct syscall_trace_enter *trace; |
116 | struct syscall_metadata *entry; | 116 | struct syscall_metadata *entry; |
117 | int i, ret, syscall; | 117 | int i, syscall; |
118 | 118 | ||
119 | trace = (typeof(trace))ent; | 119 | trace = (typeof(trace))ent; |
120 | syscall = trace->nr; | 120 | syscall = trace->nr; |
@@ -128,35 +128,28 @@ print_syscall_enter(struct trace_iterator *iter, int flags, | |||
128 | goto end; | 128 | goto end; |
129 | } | 129 | } |
130 | 130 | ||
131 | ret = trace_seq_printf(s, "%s(", entry->name); | 131 | trace_seq_printf(s, "%s(", entry->name); |
132 | if (!ret) | ||
133 | return TRACE_TYPE_PARTIAL_LINE; | ||
134 | 132 | ||
135 | for (i = 0; i < entry->nb_args; i++) { | 133 | for (i = 0; i < entry->nb_args; i++) { |
134 | |||
135 | if (trace_seq_has_overflowed(s)) | ||
136 | goto end; | ||
137 | |||
136 | /* parameter types */ | 138 | /* parameter types */ |
137 | if (trace_flags & TRACE_ITER_VERBOSE) { | 139 | if (trace_flags & TRACE_ITER_VERBOSE) |
138 | ret = trace_seq_printf(s, "%s ", entry->types[i]); | 140 | trace_seq_printf(s, "%s ", entry->types[i]); |
139 | if (!ret) | 141 | |
140 | return TRACE_TYPE_PARTIAL_LINE; | ||
141 | } | ||
142 | /* parameter values */ | 142 | /* parameter values */ |
143 | ret = trace_seq_printf(s, "%s: %lx%s", entry->args[i], | 143 | trace_seq_printf(s, "%s: %lx%s", entry->args[i], |
144 | trace->args[i], | 144 | trace->args[i], |
145 | i == entry->nb_args - 1 ? "" : ", "); | 145 | i == entry->nb_args - 1 ? "" : ", "); |
146 | if (!ret) | ||
147 | return TRACE_TYPE_PARTIAL_LINE; | ||
148 | } | 146 | } |
149 | 147 | ||
150 | ret = trace_seq_putc(s, ')'); | 148 | trace_seq_putc(s, ')'); |
151 | if (!ret) | ||
152 | return TRACE_TYPE_PARTIAL_LINE; | ||
153 | |||
154 | end: | 149 | end: |
155 | ret = trace_seq_putc(s, '\n'); | 150 | trace_seq_putc(s, '\n'); |
156 | if (!ret) | ||
157 | return TRACE_TYPE_PARTIAL_LINE; | ||
158 | 151 | ||
159 | return TRACE_TYPE_HANDLED; | 152 | return trace_handle_return(s); |
160 | } | 153 | } |
161 | 154 | ||
162 | static enum print_line_t | 155 | static enum print_line_t |
@@ -168,7 +161,6 @@ print_syscall_exit(struct trace_iterator *iter, int flags, | |||
168 | struct syscall_trace_exit *trace; | 161 | struct syscall_trace_exit *trace; |
169 | int syscall; | 162 | int syscall; |
170 | struct syscall_metadata *entry; | 163 | struct syscall_metadata *entry; |
171 | int ret; | ||
172 | 164 | ||
173 | trace = (typeof(trace))ent; | 165 | trace = (typeof(trace))ent; |
174 | syscall = trace->nr; | 166 | syscall = trace->nr; |
@@ -176,7 +168,7 @@ print_syscall_exit(struct trace_iterator *iter, int flags, | |||
176 | 168 | ||
177 | if (!entry) { | 169 | if (!entry) { |
178 | trace_seq_putc(s, '\n'); | 170 | trace_seq_putc(s, '\n'); |
179 | return TRACE_TYPE_HANDLED; | 171 | goto out; |
180 | } | 172 | } |
181 | 173 | ||
182 | if (entry->exit_event->event.type != ent->type) { | 174 | if (entry->exit_event->event.type != ent->type) { |
@@ -184,12 +176,11 @@ print_syscall_exit(struct trace_iterator *iter, int flags, | |||
184 | return TRACE_TYPE_UNHANDLED; | 176 | return TRACE_TYPE_UNHANDLED; |
185 | } | 177 | } |
186 | 178 | ||
187 | ret = trace_seq_printf(s, "%s -> 0x%lx\n", entry->name, | 179 | trace_seq_printf(s, "%s -> 0x%lx\n", entry->name, |
188 | trace->ret); | 180 | trace->ret); |
189 | if (!ret) | ||
190 | return TRACE_TYPE_PARTIAL_LINE; | ||
191 | 181 | ||
192 | return TRACE_TYPE_HANDLED; | 182 | out: |
183 | return trace_handle_return(s); | ||
193 | } | 184 | } |
194 | 185 | ||
195 | extern char *__bad_type_size(void); | 186 | extern char *__bad_type_size(void); |
@@ -313,7 +304,7 @@ static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id) | |||
313 | int size; | 304 | int size; |
314 | 305 | ||
315 | syscall_nr = trace_get_syscall_nr(current, regs); | 306 | syscall_nr = trace_get_syscall_nr(current, regs); |
316 | if (syscall_nr < 0) | 307 | if (syscall_nr < 0 || syscall_nr >= NR_syscalls) |
317 | return; | 308 | return; |
318 | 309 | ||
319 | /* Here we're inside tp handler's rcu_read_lock_sched (__DO_TRACE) */ | 310 | /* Here we're inside tp handler's rcu_read_lock_sched (__DO_TRACE) */ |
@@ -360,7 +351,7 @@ static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret) | |||
360 | int syscall_nr; | 351 | int syscall_nr; |
361 | 352 | ||
362 | syscall_nr = trace_get_syscall_nr(current, regs); | 353 | syscall_nr = trace_get_syscall_nr(current, regs); |
363 | if (syscall_nr < 0) | 354 | if (syscall_nr < 0 || syscall_nr >= NR_syscalls) |
364 | return; | 355 | return; |
365 | 356 | ||
366 | /* Here we're inside tp handler's rcu_read_lock_sched (__DO_TRACE()) */ | 357 | /* Here we're inside tp handler's rcu_read_lock_sched (__DO_TRACE()) */ |
@@ -567,7 +558,7 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id) | |||
567 | int size; | 558 | int size; |
568 | 559 | ||
569 | syscall_nr = trace_get_syscall_nr(current, regs); | 560 | syscall_nr = trace_get_syscall_nr(current, regs); |
570 | if (syscall_nr < 0) | 561 | if (syscall_nr < 0 || syscall_nr >= NR_syscalls) |
571 | return; | 562 | return; |
572 | if (!test_bit(syscall_nr, enabled_perf_enter_syscalls)) | 563 | if (!test_bit(syscall_nr, enabled_perf_enter_syscalls)) |
573 | return; | 564 | return; |
@@ -641,7 +632,7 @@ static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret) | |||
641 | int size; | 632 | int size; |
642 | 633 | ||
643 | syscall_nr = trace_get_syscall_nr(current, regs); | 634 | syscall_nr = trace_get_syscall_nr(current, regs); |
644 | if (syscall_nr < 0) | 635 | if (syscall_nr < 0 || syscall_nr >= NR_syscalls) |
645 | return; | 636 | return; |
646 | if (!test_bit(syscall_nr, enabled_perf_exit_syscalls)) | 637 | if (!test_bit(syscall_nr, enabled_perf_exit_syscalls)) |
647 | return; | 638 | return; |
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 33ff6a24b802..8520acc34b18 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c | |||
@@ -552,8 +552,7 @@ error: | |||
552 | return ret; | 552 | return ret; |
553 | 553 | ||
554 | fail_address_parse: | 554 | fail_address_parse: |
555 | if (inode) | 555 | iput(inode); |
556 | iput(inode); | ||
557 | 556 | ||
558 | pr_info("Failed to parse address or file.\n"); | 557 | pr_info("Failed to parse address or file.\n"); |
559 | 558 | ||
@@ -606,7 +605,7 @@ static int probes_seq_show(struct seq_file *m, void *v) | |||
606 | for (i = 0; i < tu->tp.nr_args; i++) | 605 | for (i = 0; i < tu->tp.nr_args; i++) |
607 | seq_printf(m, " %s=%s", tu->tp.args[i].name, tu->tp.args[i].comm); | 606 | seq_printf(m, " %s=%s", tu->tp.args[i].name, tu->tp.args[i].comm); |
608 | 607 | ||
609 | seq_printf(m, "\n"); | 608 | seq_putc(m, '\n'); |
610 | return 0; | 609 | return 0; |
611 | } | 610 | } |
612 | 611 | ||
@@ -852,16 +851,14 @@ print_uprobe_event(struct trace_iterator *iter, int flags, struct trace_event *e | |||
852 | tu = container_of(event, struct trace_uprobe, tp.call.event); | 851 | tu = container_of(event, struct trace_uprobe, tp.call.event); |
853 | 852 | ||
854 | if (is_ret_probe(tu)) { | 853 | if (is_ret_probe(tu)) { |
855 | if (!trace_seq_printf(s, "%s: (0x%lx <- 0x%lx)", | 854 | trace_seq_printf(s, "%s: (0x%lx <- 0x%lx)", |
856 | ftrace_event_name(&tu->tp.call), | 855 | ftrace_event_name(&tu->tp.call), |
857 | entry->vaddr[1], entry->vaddr[0])) | 856 | entry->vaddr[1], entry->vaddr[0]); |
858 | goto partial; | ||
859 | data = DATAOF_TRACE_ENTRY(entry, true); | 857 | data = DATAOF_TRACE_ENTRY(entry, true); |
860 | } else { | 858 | } else { |
861 | if (!trace_seq_printf(s, "%s: (0x%lx)", | 859 | trace_seq_printf(s, "%s: (0x%lx)", |
862 | ftrace_event_name(&tu->tp.call), | 860 | ftrace_event_name(&tu->tp.call), |
863 | entry->vaddr[0])) | 861 | entry->vaddr[0]); |
864 | goto partial; | ||
865 | data = DATAOF_TRACE_ENTRY(entry, false); | 862 | data = DATAOF_TRACE_ENTRY(entry, false); |
866 | } | 863 | } |
867 | 864 | ||
@@ -869,14 +866,13 @@ print_uprobe_event(struct trace_iterator *iter, int flags, struct trace_event *e | |||
869 | struct probe_arg *parg = &tu->tp.args[i]; | 866 | struct probe_arg *parg = &tu->tp.args[i]; |
870 | 867 | ||
871 | if (!parg->type->print(s, parg->name, data + parg->offset, entry)) | 868 | if (!parg->type->print(s, parg->name, data + parg->offset, entry)) |
872 | goto partial; | 869 | goto out; |
873 | } | 870 | } |
874 | 871 | ||
875 | if (trace_seq_puts(s, "\n")) | 872 | trace_seq_putc(s, '\n'); |
876 | return TRACE_TYPE_HANDLED; | ||
877 | 873 | ||
878 | partial: | 874 | out: |
879 | return TRACE_TYPE_PARTIAL_LINE; | 875 | return trace_handle_return(s); |
880 | } | 876 | } |
881 | 877 | ||
882 | typedef bool (*filter_func_t)(struct uprobe_consumer *self, | 878 | typedef bool (*filter_func_t)(struct uprobe_consumer *self, |