summaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2018-07-17 03:31:30 -0400
committerIngo Molnar <mingo@kernel.org>2018-07-17 03:31:30 -0400
commit37c45b2354cb2270f246679bedd8bf798cca351c (patch)
tree113dbc1a94cc8e106b98c5d00105c91bd6623419 /kernel
parent5e0fb5df2ee871b841f96f9cb6a7f2784e96aa4e (diff)
parent9d3cce1e8b8561fed5f383d22a4d6949db4eadbe (diff)
Merge tag 'v4.18-rc5' into x86/mm, to pick up fixes
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/bpf/cgroup.c54
-rw-r--r--kernel/bpf/core.c30
-rw-r--r--kernel/bpf/sockmap.c254
-rw-r--r--kernel/bpf/syscall.c99
-rw-r--r--kernel/kthread.c30
-rw-r--r--kernel/rseq.c41
-rw-r--r--kernel/sched/core.c67
-rw-r--r--kernel/sched/cpufreq_schedutil.c2
-rw-r--r--kernel/sched/fair.c45
-rw-r--r--kernel/sched/rt.c16
-rw-r--r--kernel/sched/sched.h11
-rw-r--r--kernel/time/tick-common.c3
-rw-r--r--kernel/trace/ftrace.c13
-rw-r--r--kernel/trace/trace.c13
-rw-r--r--kernel/trace/trace.h4
-rw-r--r--kernel/trace/trace_events_filter.c5
-rw-r--r--kernel/trace/trace_events_hist.c2
-rw-r--r--kernel/trace/trace_functions_graph.c5
-rw-r--r--kernel/trace/trace_kprobe.c6
-rw-r--r--kernel/trace/trace_output.c5
20 files changed, 413 insertions, 292 deletions
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index f7c00bd6f8e4..3d83ee7df381 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -428,6 +428,60 @@ int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
428 return ret; 428 return ret;
429} 429}
430 430
431int cgroup_bpf_prog_attach(const union bpf_attr *attr,
432 enum bpf_prog_type ptype, struct bpf_prog *prog)
433{
434 struct cgroup *cgrp;
435 int ret;
436
437 cgrp = cgroup_get_from_fd(attr->target_fd);
438 if (IS_ERR(cgrp))
439 return PTR_ERR(cgrp);
440
441 ret = cgroup_bpf_attach(cgrp, prog, attr->attach_type,
442 attr->attach_flags);
443 cgroup_put(cgrp);
444 return ret;
445}
446
447int cgroup_bpf_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype)
448{
449 struct bpf_prog *prog;
450 struct cgroup *cgrp;
451 int ret;
452
453 cgrp = cgroup_get_from_fd(attr->target_fd);
454 if (IS_ERR(cgrp))
455 return PTR_ERR(cgrp);
456
457 prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype);
458 if (IS_ERR(prog))
459 prog = NULL;
460
461 ret = cgroup_bpf_detach(cgrp, prog, attr->attach_type, 0);
462 if (prog)
463 bpf_prog_put(prog);
464
465 cgroup_put(cgrp);
466 return ret;
467}
468
469int cgroup_bpf_prog_query(const union bpf_attr *attr,
470 union bpf_attr __user *uattr)
471{
472 struct cgroup *cgrp;
473 int ret;
474
475 cgrp = cgroup_get_from_fd(attr->query.target_fd);
476 if (IS_ERR(cgrp))
477 return PTR_ERR(cgrp);
478
479 ret = cgroup_bpf_query(cgrp, attr, uattr);
480
481 cgroup_put(cgrp);
482 return ret;
483}
484
431/** 485/**
432 * __cgroup_bpf_run_filter_skb() - Run a program for packet filtering 486 * __cgroup_bpf_run_filter_skb() - Run a program for packet filtering
433 * @sk: The socket sending or receiving traffic 487 * @sk: The socket sending or receiving traffic
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index a9e6c04d0f4a..1e5625d46414 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -598,8 +598,6 @@ bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr,
598 bpf_fill_ill_insns(hdr, size); 598 bpf_fill_ill_insns(hdr, size);
599 599
600 hdr->pages = size / PAGE_SIZE; 600 hdr->pages = size / PAGE_SIZE;
601 hdr->locked = 0;
602
603 hole = min_t(unsigned int, size - (proglen + sizeof(*hdr)), 601 hole = min_t(unsigned int, size - (proglen + sizeof(*hdr)),
604 PAGE_SIZE - sizeof(*hdr)); 602 PAGE_SIZE - sizeof(*hdr));
605 start = (get_random_int() % hole) & ~(alignment - 1); 603 start = (get_random_int() % hole) & ~(alignment - 1);
@@ -1450,22 +1448,6 @@ static int bpf_check_tail_call(const struct bpf_prog *fp)
1450 return 0; 1448 return 0;
1451} 1449}
1452 1450
1453static int bpf_prog_check_pages_ro_locked(const struct bpf_prog *fp)
1454{
1455#ifdef CONFIG_ARCH_HAS_SET_MEMORY
1456 int i, err;
1457
1458 for (i = 0; i < fp->aux->func_cnt; i++) {
1459 err = bpf_prog_check_pages_ro_single(fp->aux->func[i]);
1460 if (err)
1461 return err;
1462 }
1463
1464 return bpf_prog_check_pages_ro_single(fp);
1465#endif
1466 return 0;
1467}
1468
1469static void bpf_prog_select_func(struct bpf_prog *fp) 1451static void bpf_prog_select_func(struct bpf_prog *fp)
1470{ 1452{
1471#ifndef CONFIG_BPF_JIT_ALWAYS_ON 1453#ifndef CONFIG_BPF_JIT_ALWAYS_ON
@@ -1524,17 +1506,7 @@ finalize:
1524 * all eBPF JITs might immediately support all features. 1506 * all eBPF JITs might immediately support all features.
1525 */ 1507 */
1526 *err = bpf_check_tail_call(fp); 1508 *err = bpf_check_tail_call(fp);
1527 if (*err) 1509
1528 return fp;
1529
1530 /* Checkpoint: at this point onwards any cBPF -> eBPF or
1531 * native eBPF program is read-only. If we failed to change
1532 * the page attributes (e.g. allocation failure from
1533 * splitting large pages), then reject the whole program
1534 * in order to guarantee not ending up with any W+X pages
1535 * from BPF side in kernel.
1536 */
1537 *err = bpf_prog_check_pages_ro_locked(fp);
1538 return fp; 1510 return fp;
1539} 1511}
1540EXPORT_SYMBOL_GPL(bpf_prog_select_runtime); 1512EXPORT_SYMBOL_GPL(bpf_prog_select_runtime);
diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c
index 52a91d816c0e..cf7b6a6dbd1f 100644
--- a/kernel/bpf/sockmap.c
+++ b/kernel/bpf/sockmap.c
@@ -72,6 +72,7 @@ struct bpf_htab {
72 u32 n_buckets; 72 u32 n_buckets;
73 u32 elem_size; 73 u32 elem_size;
74 struct bpf_sock_progs progs; 74 struct bpf_sock_progs progs;
75 struct rcu_head rcu;
75}; 76};
76 77
77struct htab_elem { 78struct htab_elem {
@@ -89,8 +90,8 @@ enum smap_psock_state {
89struct smap_psock_map_entry { 90struct smap_psock_map_entry {
90 struct list_head list; 91 struct list_head list;
91 struct sock **entry; 92 struct sock **entry;
92 struct htab_elem *hash_link; 93 struct htab_elem __rcu *hash_link;
93 struct bpf_htab *htab; 94 struct bpf_htab __rcu *htab;
94}; 95};
95 96
96struct smap_psock { 97struct smap_psock {
@@ -120,6 +121,7 @@ struct smap_psock {
120 struct bpf_prog *bpf_parse; 121 struct bpf_prog *bpf_parse;
121 struct bpf_prog *bpf_verdict; 122 struct bpf_prog *bpf_verdict;
122 struct list_head maps; 123 struct list_head maps;
124 spinlock_t maps_lock;
123 125
124 /* Back reference used when sock callback trigger sockmap operations */ 126 /* Back reference used when sock callback trigger sockmap operations */
125 struct sock *sock; 127 struct sock *sock;
@@ -140,6 +142,7 @@ static int bpf_tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
140static int bpf_tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size); 142static int bpf_tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size);
141static int bpf_tcp_sendpage(struct sock *sk, struct page *page, 143static int bpf_tcp_sendpage(struct sock *sk, struct page *page,
142 int offset, size_t size, int flags); 144 int offset, size_t size, int flags);
145static void bpf_tcp_close(struct sock *sk, long timeout);
143 146
144static inline struct smap_psock *smap_psock_sk(const struct sock *sk) 147static inline struct smap_psock *smap_psock_sk(const struct sock *sk)
145{ 148{
@@ -161,7 +164,42 @@ out:
161 return !empty; 164 return !empty;
162} 165}
163 166
164static struct proto tcp_bpf_proto; 167enum {
168 SOCKMAP_IPV4,
169 SOCKMAP_IPV6,
170 SOCKMAP_NUM_PROTS,
171};
172
173enum {
174 SOCKMAP_BASE,
175 SOCKMAP_TX,
176 SOCKMAP_NUM_CONFIGS,
177};
178
179static struct proto *saved_tcpv6_prot __read_mostly;
180static DEFINE_SPINLOCK(tcpv6_prot_lock);
181static struct proto bpf_tcp_prots[SOCKMAP_NUM_PROTS][SOCKMAP_NUM_CONFIGS];
182static void build_protos(struct proto prot[SOCKMAP_NUM_CONFIGS],
183 struct proto *base)
184{
185 prot[SOCKMAP_BASE] = *base;
186 prot[SOCKMAP_BASE].close = bpf_tcp_close;
187 prot[SOCKMAP_BASE].recvmsg = bpf_tcp_recvmsg;
188 prot[SOCKMAP_BASE].stream_memory_read = bpf_tcp_stream_read;
189
190 prot[SOCKMAP_TX] = prot[SOCKMAP_BASE];
191 prot[SOCKMAP_TX].sendmsg = bpf_tcp_sendmsg;
192 prot[SOCKMAP_TX].sendpage = bpf_tcp_sendpage;
193}
194
195static void update_sk_prot(struct sock *sk, struct smap_psock *psock)
196{
197 int family = sk->sk_family == AF_INET6 ? SOCKMAP_IPV6 : SOCKMAP_IPV4;
198 int conf = psock->bpf_tx_msg ? SOCKMAP_TX : SOCKMAP_BASE;
199
200 sk->sk_prot = &bpf_tcp_prots[family][conf];
201}
202
165static int bpf_tcp_init(struct sock *sk) 203static int bpf_tcp_init(struct sock *sk)
166{ 204{
167 struct smap_psock *psock; 205 struct smap_psock *psock;
@@ -181,14 +219,17 @@ static int bpf_tcp_init(struct sock *sk)
181 psock->save_close = sk->sk_prot->close; 219 psock->save_close = sk->sk_prot->close;
182 psock->sk_proto = sk->sk_prot; 220 psock->sk_proto = sk->sk_prot;
183 221
184 if (psock->bpf_tx_msg) { 222 /* Build IPv6 sockmap whenever the address of tcpv6_prot changes */
185 tcp_bpf_proto.sendmsg = bpf_tcp_sendmsg; 223 if (sk->sk_family == AF_INET6 &&
186 tcp_bpf_proto.sendpage = bpf_tcp_sendpage; 224 unlikely(sk->sk_prot != smp_load_acquire(&saved_tcpv6_prot))) {
187 tcp_bpf_proto.recvmsg = bpf_tcp_recvmsg; 225 spin_lock_bh(&tcpv6_prot_lock);
188 tcp_bpf_proto.stream_memory_read = bpf_tcp_stream_read; 226 if (likely(sk->sk_prot != saved_tcpv6_prot)) {
227 build_protos(bpf_tcp_prots[SOCKMAP_IPV6], sk->sk_prot);
228 smp_store_release(&saved_tcpv6_prot, sk->sk_prot);
229 }
230 spin_unlock_bh(&tcpv6_prot_lock);
189 } 231 }
190 232 update_sk_prot(sk, psock);
191 sk->sk_prot = &tcp_bpf_proto;
192 rcu_read_unlock(); 233 rcu_read_unlock();
193 return 0; 234 return 0;
194} 235}
@@ -219,16 +260,54 @@ out:
219 rcu_read_unlock(); 260 rcu_read_unlock();
220} 261}
221 262
263static struct htab_elem *lookup_elem_raw(struct hlist_head *head,
264 u32 hash, void *key, u32 key_size)
265{
266 struct htab_elem *l;
267
268 hlist_for_each_entry_rcu(l, head, hash_node) {
269 if (l->hash == hash && !memcmp(&l->key, key, key_size))
270 return l;
271 }
272
273 return NULL;
274}
275
276static inline struct bucket *__select_bucket(struct bpf_htab *htab, u32 hash)
277{
278 return &htab->buckets[hash & (htab->n_buckets - 1)];
279}
280
281static inline struct hlist_head *select_bucket(struct bpf_htab *htab, u32 hash)
282{
283 return &__select_bucket(htab, hash)->head;
284}
285
222static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l) 286static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l)
223{ 287{
224 atomic_dec(&htab->count); 288 atomic_dec(&htab->count);
225 kfree_rcu(l, rcu); 289 kfree_rcu(l, rcu);
226} 290}
227 291
292static struct smap_psock_map_entry *psock_map_pop(struct sock *sk,
293 struct smap_psock *psock)
294{
295 struct smap_psock_map_entry *e;
296
297 spin_lock_bh(&psock->maps_lock);
298 e = list_first_entry_or_null(&psock->maps,
299 struct smap_psock_map_entry,
300 list);
301 if (e)
302 list_del(&e->list);
303 spin_unlock_bh(&psock->maps_lock);
304 return e;
305}
306
228static void bpf_tcp_close(struct sock *sk, long timeout) 307static void bpf_tcp_close(struct sock *sk, long timeout)
229{ 308{
230 void (*close_fun)(struct sock *sk, long timeout); 309 void (*close_fun)(struct sock *sk, long timeout);
231 struct smap_psock_map_entry *e, *tmp; 310 struct smap_psock_map_entry *e;
232 struct sk_msg_buff *md, *mtmp; 311 struct sk_msg_buff *md, *mtmp;
233 struct smap_psock *psock; 312 struct smap_psock *psock;
234 struct sock *osk; 313 struct sock *osk;
@@ -247,7 +326,6 @@ static void bpf_tcp_close(struct sock *sk, long timeout)
247 */ 326 */
248 close_fun = psock->save_close; 327 close_fun = psock->save_close;
249 328
250 write_lock_bh(&sk->sk_callback_lock);
251 if (psock->cork) { 329 if (psock->cork) {
252 free_start_sg(psock->sock, psock->cork); 330 free_start_sg(psock->sock, psock->cork);
253 kfree(psock->cork); 331 kfree(psock->cork);
@@ -260,20 +338,38 @@ static void bpf_tcp_close(struct sock *sk, long timeout)
260 kfree(md); 338 kfree(md);
261 } 339 }
262 340
263 list_for_each_entry_safe(e, tmp, &psock->maps, list) { 341 e = psock_map_pop(sk, psock);
342 while (e) {
264 if (e->entry) { 343 if (e->entry) {
265 osk = cmpxchg(e->entry, sk, NULL); 344 osk = cmpxchg(e->entry, sk, NULL);
266 if (osk == sk) { 345 if (osk == sk) {
267 list_del(&e->list);
268 smap_release_sock(psock, sk); 346 smap_release_sock(psock, sk);
269 } 347 }
270 } else { 348 } else {
271 hlist_del_rcu(&e->hash_link->hash_node); 349 struct htab_elem *link = rcu_dereference(e->hash_link);
272 smap_release_sock(psock, e->hash_link->sk); 350 struct bpf_htab *htab = rcu_dereference(e->htab);
273 free_htab_elem(e->htab, e->hash_link); 351 struct hlist_head *head;
352 struct htab_elem *l;
353 struct bucket *b;
354
355 b = __select_bucket(htab, link->hash);
356 head = &b->head;
357 raw_spin_lock_bh(&b->lock);
358 l = lookup_elem_raw(head,
359 link->hash, link->key,
360 htab->map.key_size);
361 /* If another thread deleted this object skip deletion.
362 * The refcnt on psock may or may not be zero.
363 */
364 if (l) {
365 hlist_del_rcu(&link->hash_node);
366 smap_release_sock(psock, link->sk);
367 free_htab_elem(htab, link);
368 }
369 raw_spin_unlock_bh(&b->lock);
274 } 370 }
371 e = psock_map_pop(sk, psock);
275 } 372 }
276 write_unlock_bh(&sk->sk_callback_lock);
277 rcu_read_unlock(); 373 rcu_read_unlock();
278 close_fun(sk, timeout); 374 close_fun(sk, timeout);
279} 375}
@@ -1111,8 +1207,7 @@ static void bpf_tcp_msg_add(struct smap_psock *psock,
1111 1207
1112static int bpf_tcp_ulp_register(void) 1208static int bpf_tcp_ulp_register(void)
1113{ 1209{
1114 tcp_bpf_proto = tcp_prot; 1210 build_protos(bpf_tcp_prots[SOCKMAP_IPV4], &tcp_prot);
1115 tcp_bpf_proto.close = bpf_tcp_close;
1116 /* Once BPF TX ULP is registered it is never unregistered. It 1211 /* Once BPF TX ULP is registered it is never unregistered. It
1117 * will be in the ULP list for the lifetime of the system. Doing 1212 * will be in the ULP list for the lifetime of the system. Doing
1118 * duplicate registers is not a problem. 1213 * duplicate registers is not a problem.
@@ -1357,7 +1452,9 @@ static void smap_release_sock(struct smap_psock *psock, struct sock *sock)
1357{ 1452{
1358 if (refcount_dec_and_test(&psock->refcnt)) { 1453 if (refcount_dec_and_test(&psock->refcnt)) {
1359 tcp_cleanup_ulp(sock); 1454 tcp_cleanup_ulp(sock);
1455 write_lock_bh(&sock->sk_callback_lock);
1360 smap_stop_sock(psock, sock); 1456 smap_stop_sock(psock, sock);
1457 write_unlock_bh(&sock->sk_callback_lock);
1361 clear_bit(SMAP_TX_RUNNING, &psock->state); 1458 clear_bit(SMAP_TX_RUNNING, &psock->state);
1362 rcu_assign_sk_user_data(sock, NULL); 1459 rcu_assign_sk_user_data(sock, NULL);
1363 call_rcu_sched(&psock->rcu, smap_destroy_psock); 1460 call_rcu_sched(&psock->rcu, smap_destroy_psock);
@@ -1508,6 +1605,7 @@ static struct smap_psock *smap_init_psock(struct sock *sock, int node)
1508 INIT_LIST_HEAD(&psock->maps); 1605 INIT_LIST_HEAD(&psock->maps);
1509 INIT_LIST_HEAD(&psock->ingress); 1606 INIT_LIST_HEAD(&psock->ingress);
1510 refcount_set(&psock->refcnt, 1); 1607 refcount_set(&psock->refcnt, 1);
1608 spin_lock_init(&psock->maps_lock);
1511 1609
1512 rcu_assign_sk_user_data(sock, psock); 1610 rcu_assign_sk_user_data(sock, psock);
1513 sock_hold(sock); 1611 sock_hold(sock);
@@ -1564,18 +1662,32 @@ free_stab:
1564 return ERR_PTR(err); 1662 return ERR_PTR(err);
1565} 1663}
1566 1664
1567static void smap_list_remove(struct smap_psock *psock, 1665static void smap_list_map_remove(struct smap_psock *psock,
1568 struct sock **entry, 1666 struct sock **entry)
1569 struct htab_elem *hash_link)
1570{ 1667{
1571 struct smap_psock_map_entry *e, *tmp; 1668 struct smap_psock_map_entry *e, *tmp;
1572 1669
1670 spin_lock_bh(&psock->maps_lock);
1573 list_for_each_entry_safe(e, tmp, &psock->maps, list) { 1671 list_for_each_entry_safe(e, tmp, &psock->maps, list) {
1574 if (e->entry == entry || e->hash_link == hash_link) { 1672 if (e->entry == entry)
1575 list_del(&e->list); 1673 list_del(&e->list);
1576 break;
1577 }
1578 } 1674 }
1675 spin_unlock_bh(&psock->maps_lock);
1676}
1677
1678static void smap_list_hash_remove(struct smap_psock *psock,
1679 struct htab_elem *hash_link)
1680{
1681 struct smap_psock_map_entry *e, *tmp;
1682
1683 spin_lock_bh(&psock->maps_lock);
1684 list_for_each_entry_safe(e, tmp, &psock->maps, list) {
1685 struct htab_elem *c = rcu_dereference(e->hash_link);
1686
1687 if (c == hash_link)
1688 list_del(&e->list);
1689 }
1690 spin_unlock_bh(&psock->maps_lock);
1579} 1691}
1580 1692
1581static void sock_map_free(struct bpf_map *map) 1693static void sock_map_free(struct bpf_map *map)
@@ -1601,7 +1713,6 @@ static void sock_map_free(struct bpf_map *map)
1601 if (!sock) 1713 if (!sock)
1602 continue; 1714 continue;
1603 1715
1604 write_lock_bh(&sock->sk_callback_lock);
1605 psock = smap_psock_sk(sock); 1716 psock = smap_psock_sk(sock);
1606 /* This check handles a racing sock event that can get the 1717 /* This check handles a racing sock event that can get the
1607 * sk_callback_lock before this case but after xchg happens 1718 * sk_callback_lock before this case but after xchg happens
@@ -1609,10 +1720,9 @@ static void sock_map_free(struct bpf_map *map)
1609 * to be null and queued for garbage collection. 1720 * to be null and queued for garbage collection.
1610 */ 1721 */
1611 if (likely(psock)) { 1722 if (likely(psock)) {
1612 smap_list_remove(psock, &stab->sock_map[i], NULL); 1723 smap_list_map_remove(psock, &stab->sock_map[i]);
1613 smap_release_sock(psock, sock); 1724 smap_release_sock(psock, sock);
1614 } 1725 }
1615 write_unlock_bh(&sock->sk_callback_lock);
1616 } 1726 }
1617 rcu_read_unlock(); 1727 rcu_read_unlock();
1618 1728
@@ -1661,17 +1771,15 @@ static int sock_map_delete_elem(struct bpf_map *map, void *key)
1661 if (!sock) 1771 if (!sock)
1662 return -EINVAL; 1772 return -EINVAL;
1663 1773
1664 write_lock_bh(&sock->sk_callback_lock);
1665 psock = smap_psock_sk(sock); 1774 psock = smap_psock_sk(sock);
1666 if (!psock) 1775 if (!psock)
1667 goto out; 1776 goto out;
1668 1777
1669 if (psock->bpf_parse) 1778 if (psock->bpf_parse)
1670 smap_stop_sock(psock, sock); 1779 smap_stop_sock(psock, sock);
1671 smap_list_remove(psock, &stab->sock_map[k], NULL); 1780 smap_list_map_remove(psock, &stab->sock_map[k]);
1672 smap_release_sock(psock, sock); 1781 smap_release_sock(psock, sock);
1673out: 1782out:
1674 write_unlock_bh(&sock->sk_callback_lock);
1675 return 0; 1783 return 0;
1676} 1784}
1677 1785
@@ -1752,7 +1860,6 @@ static int __sock_map_ctx_update_elem(struct bpf_map *map,
1752 } 1860 }
1753 } 1861 }
1754 1862
1755 write_lock_bh(&sock->sk_callback_lock);
1756 psock = smap_psock_sk(sock); 1863 psock = smap_psock_sk(sock);
1757 1864
1758 /* 2. Do not allow inheriting programs if psock exists and has 1865 /* 2. Do not allow inheriting programs if psock exists and has
@@ -1809,7 +1916,9 @@ static int __sock_map_ctx_update_elem(struct bpf_map *map,
1809 if (err) 1916 if (err)
1810 goto out_free; 1917 goto out_free;
1811 smap_init_progs(psock, verdict, parse); 1918 smap_init_progs(psock, verdict, parse);
1919 write_lock_bh(&sock->sk_callback_lock);
1812 smap_start_sock(psock, sock); 1920 smap_start_sock(psock, sock);
1921 write_unlock_bh(&sock->sk_callback_lock);
1813 } 1922 }
1814 1923
1815 /* 4. Place psock in sockmap for use and stop any programs on 1924 /* 4. Place psock in sockmap for use and stop any programs on
@@ -1819,9 +1928,10 @@ static int __sock_map_ctx_update_elem(struct bpf_map *map,
1819 */ 1928 */
1820 if (map_link) { 1929 if (map_link) {
1821 e->entry = map_link; 1930 e->entry = map_link;
1931 spin_lock_bh(&psock->maps_lock);
1822 list_add_tail(&e->list, &psock->maps); 1932 list_add_tail(&e->list, &psock->maps);
1933 spin_unlock_bh(&psock->maps_lock);
1823 } 1934 }
1824 write_unlock_bh(&sock->sk_callback_lock);
1825 return err; 1935 return err;
1826out_free: 1936out_free:
1827 smap_release_sock(psock, sock); 1937 smap_release_sock(psock, sock);
@@ -1832,7 +1942,6 @@ out_progs:
1832 } 1942 }
1833 if (tx_msg) 1943 if (tx_msg)
1834 bpf_prog_put(tx_msg); 1944 bpf_prog_put(tx_msg);
1835 write_unlock_bh(&sock->sk_callback_lock);
1836 kfree(e); 1945 kfree(e);
1837 return err; 1946 return err;
1838} 1947}
@@ -1869,10 +1978,8 @@ static int sock_map_ctx_update_elem(struct bpf_sock_ops_kern *skops,
1869 if (osock) { 1978 if (osock) {
1870 struct smap_psock *opsock = smap_psock_sk(osock); 1979 struct smap_psock *opsock = smap_psock_sk(osock);
1871 1980
1872 write_lock_bh(&osock->sk_callback_lock); 1981 smap_list_map_remove(opsock, &stab->sock_map[i]);
1873 smap_list_remove(opsock, &stab->sock_map[i], NULL);
1874 smap_release_sock(opsock, osock); 1982 smap_release_sock(opsock, osock);
1875 write_unlock_bh(&osock->sk_callback_lock);
1876 } 1983 }
1877out: 1984out:
1878 return err; 1985 return err;
@@ -1915,6 +2022,24 @@ int sock_map_prog(struct bpf_map *map, struct bpf_prog *prog, u32 type)
1915 return 0; 2022 return 0;
1916} 2023}
1917 2024
2025int sockmap_get_from_fd(const union bpf_attr *attr, int type,
2026 struct bpf_prog *prog)
2027{
2028 int ufd = attr->target_fd;
2029 struct bpf_map *map;
2030 struct fd f;
2031 int err;
2032
2033 f = fdget(ufd);
2034 map = __bpf_map_get(f);
2035 if (IS_ERR(map))
2036 return PTR_ERR(map);
2037
2038 err = sock_map_prog(map, prog, attr->attach_type);
2039 fdput(f);
2040 return err;
2041}
2042
1918static void *sock_map_lookup(struct bpf_map *map, void *key) 2043static void *sock_map_lookup(struct bpf_map *map, void *key)
1919{ 2044{
1920 return NULL; 2045 return NULL;
@@ -2043,14 +2168,13 @@ free_htab:
2043 return ERR_PTR(err); 2168 return ERR_PTR(err);
2044} 2169}
2045 2170
2046static inline struct bucket *__select_bucket(struct bpf_htab *htab, u32 hash) 2171static void __bpf_htab_free(struct rcu_head *rcu)
2047{ 2172{
2048 return &htab->buckets[hash & (htab->n_buckets - 1)]; 2173 struct bpf_htab *htab;
2049}
2050 2174
2051static inline struct hlist_head *select_bucket(struct bpf_htab *htab, u32 hash) 2175 htab = container_of(rcu, struct bpf_htab, rcu);
2052{ 2176 bpf_map_area_free(htab->buckets);
2053 return &__select_bucket(htab, hash)->head; 2177 kfree(htab);
2054} 2178}
2055 2179
2056static void sock_hash_free(struct bpf_map *map) 2180static void sock_hash_free(struct bpf_map *map)
@@ -2069,16 +2193,18 @@ static void sock_hash_free(struct bpf_map *map)
2069 */ 2193 */
2070 rcu_read_lock(); 2194 rcu_read_lock();
2071 for (i = 0; i < htab->n_buckets; i++) { 2195 for (i = 0; i < htab->n_buckets; i++) {
2072 struct hlist_head *head = select_bucket(htab, i); 2196 struct bucket *b = __select_bucket(htab, i);
2197 struct hlist_head *head;
2073 struct hlist_node *n; 2198 struct hlist_node *n;
2074 struct htab_elem *l; 2199 struct htab_elem *l;
2075 2200
2201 raw_spin_lock_bh(&b->lock);
2202 head = &b->head;
2076 hlist_for_each_entry_safe(l, n, head, hash_node) { 2203 hlist_for_each_entry_safe(l, n, head, hash_node) {
2077 struct sock *sock = l->sk; 2204 struct sock *sock = l->sk;
2078 struct smap_psock *psock; 2205 struct smap_psock *psock;
2079 2206
2080 hlist_del_rcu(&l->hash_node); 2207 hlist_del_rcu(&l->hash_node);
2081 write_lock_bh(&sock->sk_callback_lock);
2082 psock = smap_psock_sk(sock); 2208 psock = smap_psock_sk(sock);
2083 /* This check handles a racing sock event that can get 2209 /* This check handles a racing sock event that can get
2084 * the sk_callback_lock before this case but after xchg 2210 * the sk_callback_lock before this case but after xchg
@@ -2086,16 +2212,15 @@ static void sock_hash_free(struct bpf_map *map)
2086 * (psock) to be null and queued for garbage collection. 2212 * (psock) to be null and queued for garbage collection.
2087 */ 2213 */
2088 if (likely(psock)) { 2214 if (likely(psock)) {
2089 smap_list_remove(psock, NULL, l); 2215 smap_list_hash_remove(psock, l);
2090 smap_release_sock(psock, sock); 2216 smap_release_sock(psock, sock);
2091 } 2217 }
2092 write_unlock_bh(&sock->sk_callback_lock); 2218 free_htab_elem(htab, l);
2093 kfree(l);
2094 } 2219 }
2220 raw_spin_unlock_bh(&b->lock);
2095 } 2221 }
2096 rcu_read_unlock(); 2222 rcu_read_unlock();
2097 bpf_map_area_free(htab->buckets); 2223 call_rcu(&htab->rcu, __bpf_htab_free);
2098 kfree(htab);
2099} 2224}
2100 2225
2101static struct htab_elem *alloc_sock_hash_elem(struct bpf_htab *htab, 2226static struct htab_elem *alloc_sock_hash_elem(struct bpf_htab *htab,
@@ -2122,19 +2247,6 @@ static struct htab_elem *alloc_sock_hash_elem(struct bpf_htab *htab,
2122 return l_new; 2247 return l_new;
2123} 2248}
2124 2249
2125static struct htab_elem *lookup_elem_raw(struct hlist_head *head,
2126 u32 hash, void *key, u32 key_size)
2127{
2128 struct htab_elem *l;
2129
2130 hlist_for_each_entry_rcu(l, head, hash_node) {
2131 if (l->hash == hash && !memcmp(&l->key, key, key_size))
2132 return l;
2133 }
2134
2135 return NULL;
2136}
2137
2138static inline u32 htab_map_hash(const void *key, u32 key_len) 2250static inline u32 htab_map_hash(const void *key, u32 key_len)
2139{ 2251{
2140 return jhash(key, key_len, 0); 2252 return jhash(key, key_len, 0);
@@ -2254,9 +2366,12 @@ static int sock_hash_ctx_update_elem(struct bpf_sock_ops_kern *skops,
2254 goto bucket_err; 2366 goto bucket_err;
2255 } 2367 }
2256 2368
2257 e->hash_link = l_new; 2369 rcu_assign_pointer(e->hash_link, l_new);
2258 e->htab = container_of(map, struct bpf_htab, map); 2370 rcu_assign_pointer(e->htab,
2371 container_of(map, struct bpf_htab, map));
2372 spin_lock_bh(&psock->maps_lock);
2259 list_add_tail(&e->list, &psock->maps); 2373 list_add_tail(&e->list, &psock->maps);
2374 spin_unlock_bh(&psock->maps_lock);
2260 2375
2261 /* add new element to the head of the list, so that 2376 /* add new element to the head of the list, so that
2262 * concurrent search will find it before old elem 2377 * concurrent search will find it before old elem
@@ -2266,7 +2381,7 @@ static int sock_hash_ctx_update_elem(struct bpf_sock_ops_kern *skops,
2266 psock = smap_psock_sk(l_old->sk); 2381 psock = smap_psock_sk(l_old->sk);
2267 2382
2268 hlist_del_rcu(&l_old->hash_node); 2383 hlist_del_rcu(&l_old->hash_node);
2269 smap_list_remove(psock, NULL, l_old); 2384 smap_list_hash_remove(psock, l_old);
2270 smap_release_sock(psock, l_old->sk); 2385 smap_release_sock(psock, l_old->sk);
2271 free_htab_elem(htab, l_old); 2386 free_htab_elem(htab, l_old);
2272 } 2387 }
@@ -2326,7 +2441,6 @@ static int sock_hash_delete_elem(struct bpf_map *map, void *key)
2326 struct smap_psock *psock; 2441 struct smap_psock *psock;
2327 2442
2328 hlist_del_rcu(&l->hash_node); 2443 hlist_del_rcu(&l->hash_node);
2329 write_lock_bh(&sock->sk_callback_lock);
2330 psock = smap_psock_sk(sock); 2444 psock = smap_psock_sk(sock);
2331 /* This check handles a racing sock event that can get the 2445 /* This check handles a racing sock event that can get the
2332 * sk_callback_lock before this case but after xchg happens 2446 * sk_callback_lock before this case but after xchg happens
@@ -2334,10 +2448,9 @@ static int sock_hash_delete_elem(struct bpf_map *map, void *key)
2334 * to be null and queued for garbage collection. 2448 * to be null and queued for garbage collection.
2335 */ 2449 */
2336 if (likely(psock)) { 2450 if (likely(psock)) {
2337 smap_list_remove(psock, NULL, l); 2451 smap_list_hash_remove(psock, l);
2338 smap_release_sock(psock, sock); 2452 smap_release_sock(psock, sock);
2339 } 2453 }
2340 write_unlock_bh(&sock->sk_callback_lock);
2341 free_htab_elem(htab, l); 2454 free_htab_elem(htab, l);
2342 ret = 0; 2455 ret = 0;
2343 } 2456 }
@@ -2383,6 +2496,7 @@ const struct bpf_map_ops sock_hash_ops = {
2383 .map_get_next_key = sock_hash_get_next_key, 2496 .map_get_next_key = sock_hash_get_next_key,
2384 .map_update_elem = sock_hash_update_elem, 2497 .map_update_elem = sock_hash_update_elem,
2385 .map_delete_elem = sock_hash_delete_elem, 2498 .map_delete_elem = sock_hash_delete_elem,
2499 .map_release_uref = sock_map_release,
2386}; 2500};
2387 2501
2388BPF_CALL_4(bpf_sock_map_update, struct bpf_sock_ops_kern *, bpf_sock, 2502BPF_CALL_4(bpf_sock_map_update, struct bpf_sock_ops_kern *, bpf_sock,
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 35dc466641f2..d10ecd78105f 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1483,8 +1483,6 @@ out_free_tp:
1483 return err; 1483 return err;
1484} 1484}
1485 1485
1486#ifdef CONFIG_CGROUP_BPF
1487
1488static int bpf_prog_attach_check_attach_type(const struct bpf_prog *prog, 1486static int bpf_prog_attach_check_attach_type(const struct bpf_prog *prog,
1489 enum bpf_attach_type attach_type) 1487 enum bpf_attach_type attach_type)
1490{ 1488{
@@ -1499,40 +1497,6 @@ static int bpf_prog_attach_check_attach_type(const struct bpf_prog *prog,
1499 1497
1500#define BPF_PROG_ATTACH_LAST_FIELD attach_flags 1498#define BPF_PROG_ATTACH_LAST_FIELD attach_flags
1501 1499
1502static int sockmap_get_from_fd(const union bpf_attr *attr,
1503 int type, bool attach)
1504{
1505 struct bpf_prog *prog = NULL;
1506 int ufd = attr->target_fd;
1507 struct bpf_map *map;
1508 struct fd f;
1509 int err;
1510
1511 f = fdget(ufd);
1512 map = __bpf_map_get(f);
1513 if (IS_ERR(map))
1514 return PTR_ERR(map);
1515
1516 if (attach) {
1517 prog = bpf_prog_get_type(attr->attach_bpf_fd, type);
1518 if (IS_ERR(prog)) {
1519 fdput(f);
1520 return PTR_ERR(prog);
1521 }
1522 }
1523
1524 err = sock_map_prog(map, prog, attr->attach_type);
1525 if (err) {
1526 fdput(f);
1527 if (prog)
1528 bpf_prog_put(prog);
1529 return err;
1530 }
1531
1532 fdput(f);
1533 return 0;
1534}
1535
1536#define BPF_F_ATTACH_MASK \ 1500#define BPF_F_ATTACH_MASK \
1537 (BPF_F_ALLOW_OVERRIDE | BPF_F_ALLOW_MULTI) 1501 (BPF_F_ALLOW_OVERRIDE | BPF_F_ALLOW_MULTI)
1538 1502
@@ -1540,7 +1504,6 @@ static int bpf_prog_attach(const union bpf_attr *attr)
1540{ 1504{
1541 enum bpf_prog_type ptype; 1505 enum bpf_prog_type ptype;
1542 struct bpf_prog *prog; 1506 struct bpf_prog *prog;
1543 struct cgroup *cgrp;
1544 int ret; 1507 int ret;
1545 1508
1546 if (!capable(CAP_NET_ADMIN)) 1509 if (!capable(CAP_NET_ADMIN))
@@ -1577,12 +1540,15 @@ static int bpf_prog_attach(const union bpf_attr *attr)
1577 ptype = BPF_PROG_TYPE_CGROUP_DEVICE; 1540 ptype = BPF_PROG_TYPE_CGROUP_DEVICE;
1578 break; 1541 break;
1579 case BPF_SK_MSG_VERDICT: 1542 case BPF_SK_MSG_VERDICT:
1580 return sockmap_get_from_fd(attr, BPF_PROG_TYPE_SK_MSG, true); 1543 ptype = BPF_PROG_TYPE_SK_MSG;
1544 break;
1581 case BPF_SK_SKB_STREAM_PARSER: 1545 case BPF_SK_SKB_STREAM_PARSER:
1582 case BPF_SK_SKB_STREAM_VERDICT: 1546 case BPF_SK_SKB_STREAM_VERDICT:
1583 return sockmap_get_from_fd(attr, BPF_PROG_TYPE_SK_SKB, true); 1547 ptype = BPF_PROG_TYPE_SK_SKB;
1548 break;
1584 case BPF_LIRC_MODE2: 1549 case BPF_LIRC_MODE2:
1585 return lirc_prog_attach(attr); 1550 ptype = BPF_PROG_TYPE_LIRC_MODE2;
1551 break;
1586 default: 1552 default:
1587 return -EINVAL; 1553 return -EINVAL;
1588 } 1554 }
@@ -1596,18 +1562,20 @@ static int bpf_prog_attach(const union bpf_attr *attr)
1596 return -EINVAL; 1562 return -EINVAL;
1597 } 1563 }
1598 1564
1599 cgrp = cgroup_get_from_fd(attr->target_fd); 1565 switch (ptype) {
1600 if (IS_ERR(cgrp)) { 1566 case BPF_PROG_TYPE_SK_SKB:
1601 bpf_prog_put(prog); 1567 case BPF_PROG_TYPE_SK_MSG:
1602 return PTR_ERR(cgrp); 1568 ret = sockmap_get_from_fd(attr, ptype, prog);
1569 break;
1570 case BPF_PROG_TYPE_LIRC_MODE2:
1571 ret = lirc_prog_attach(attr, prog);
1572 break;
1573 default:
1574 ret = cgroup_bpf_prog_attach(attr, ptype, prog);
1603 } 1575 }
1604 1576
1605 ret = cgroup_bpf_attach(cgrp, prog, attr->attach_type,
1606 attr->attach_flags);
1607 if (ret) 1577 if (ret)
1608 bpf_prog_put(prog); 1578 bpf_prog_put(prog);
1609 cgroup_put(cgrp);
1610
1611 return ret; 1579 return ret;
1612} 1580}
1613 1581
@@ -1616,9 +1584,6 @@ static int bpf_prog_attach(const union bpf_attr *attr)
1616static int bpf_prog_detach(const union bpf_attr *attr) 1584static int bpf_prog_detach(const union bpf_attr *attr)
1617{ 1585{
1618 enum bpf_prog_type ptype; 1586 enum bpf_prog_type ptype;
1619 struct bpf_prog *prog;
1620 struct cgroup *cgrp;
1621 int ret;
1622 1587
1623 if (!capable(CAP_NET_ADMIN)) 1588 if (!capable(CAP_NET_ADMIN))
1624 return -EPERM; 1589 return -EPERM;
@@ -1651,29 +1616,17 @@ static int bpf_prog_detach(const union bpf_attr *attr)
1651 ptype = BPF_PROG_TYPE_CGROUP_DEVICE; 1616 ptype = BPF_PROG_TYPE_CGROUP_DEVICE;
1652 break; 1617 break;
1653 case BPF_SK_MSG_VERDICT: 1618 case BPF_SK_MSG_VERDICT:
1654 return sockmap_get_from_fd(attr, BPF_PROG_TYPE_SK_MSG, false); 1619 return sockmap_get_from_fd(attr, BPF_PROG_TYPE_SK_MSG, NULL);
1655 case BPF_SK_SKB_STREAM_PARSER: 1620 case BPF_SK_SKB_STREAM_PARSER:
1656 case BPF_SK_SKB_STREAM_VERDICT: 1621 case BPF_SK_SKB_STREAM_VERDICT:
1657 return sockmap_get_from_fd(attr, BPF_PROG_TYPE_SK_SKB, false); 1622 return sockmap_get_from_fd(attr, BPF_PROG_TYPE_SK_SKB, NULL);
1658 case BPF_LIRC_MODE2: 1623 case BPF_LIRC_MODE2:
1659 return lirc_prog_detach(attr); 1624 return lirc_prog_detach(attr);
1660 default: 1625 default:
1661 return -EINVAL; 1626 return -EINVAL;
1662 } 1627 }
1663 1628
1664 cgrp = cgroup_get_from_fd(attr->target_fd); 1629 return cgroup_bpf_prog_detach(attr, ptype);
1665 if (IS_ERR(cgrp))
1666 return PTR_ERR(cgrp);
1667
1668 prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype);
1669 if (IS_ERR(prog))
1670 prog = NULL;
1671
1672 ret = cgroup_bpf_detach(cgrp, prog, attr->attach_type, 0);
1673 if (prog)
1674 bpf_prog_put(prog);
1675 cgroup_put(cgrp);
1676 return ret;
1677} 1630}
1678 1631
1679#define BPF_PROG_QUERY_LAST_FIELD query.prog_cnt 1632#define BPF_PROG_QUERY_LAST_FIELD query.prog_cnt
@@ -1681,9 +1634,6 @@ static int bpf_prog_detach(const union bpf_attr *attr)
1681static int bpf_prog_query(const union bpf_attr *attr, 1634static int bpf_prog_query(const union bpf_attr *attr,
1682 union bpf_attr __user *uattr) 1635 union bpf_attr __user *uattr)
1683{ 1636{
1684 struct cgroup *cgrp;
1685 int ret;
1686
1687 if (!capable(CAP_NET_ADMIN)) 1637 if (!capable(CAP_NET_ADMIN))
1688 return -EPERM; 1638 return -EPERM;
1689 if (CHECK_ATTR(BPF_PROG_QUERY)) 1639 if (CHECK_ATTR(BPF_PROG_QUERY))
@@ -1711,14 +1661,9 @@ static int bpf_prog_query(const union bpf_attr *attr,
1711 default: 1661 default:
1712 return -EINVAL; 1662 return -EINVAL;
1713 } 1663 }
1714 cgrp = cgroup_get_from_fd(attr->query.target_fd); 1664
1715 if (IS_ERR(cgrp)) 1665 return cgroup_bpf_prog_query(attr, uattr);
1716 return PTR_ERR(cgrp);
1717 ret = cgroup_bpf_query(cgrp, attr, uattr);
1718 cgroup_put(cgrp);
1719 return ret;
1720} 1666}
1721#endif /* CONFIG_CGROUP_BPF */
1722 1667
1723#define BPF_PROG_TEST_RUN_LAST_FIELD test.duration 1668#define BPF_PROG_TEST_RUN_LAST_FIELD test.duration
1724 1669
@@ -2365,7 +2310,6 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
2365 case BPF_OBJ_GET: 2310 case BPF_OBJ_GET:
2366 err = bpf_obj_get(&attr); 2311 err = bpf_obj_get(&attr);
2367 break; 2312 break;
2368#ifdef CONFIG_CGROUP_BPF
2369 case BPF_PROG_ATTACH: 2313 case BPF_PROG_ATTACH:
2370 err = bpf_prog_attach(&attr); 2314 err = bpf_prog_attach(&attr);
2371 break; 2315 break;
@@ -2375,7 +2319,6 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
2375 case BPF_PROG_QUERY: 2319 case BPF_PROG_QUERY:
2376 err = bpf_prog_query(&attr, uattr); 2320 err = bpf_prog_query(&attr, uattr);
2377 break; 2321 break;
2378#endif
2379 case BPF_PROG_TEST_RUN: 2322 case BPF_PROG_TEST_RUN:
2380 err = bpf_prog_test_run(&attr, uattr); 2323 err = bpf_prog_test_run(&attr, uattr);
2381 break; 2324 break;
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 481951bf091d..750cb8082694 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -177,9 +177,20 @@ void *kthread_probe_data(struct task_struct *task)
177static void __kthread_parkme(struct kthread *self) 177static void __kthread_parkme(struct kthread *self)
178{ 178{
179 for (;;) { 179 for (;;) {
180 set_current_state(TASK_PARKED); 180 /*
181 * TASK_PARKED is a special state; we must serialize against
182 * possible pending wakeups to avoid store-store collisions on
183 * task->state.
184 *
185 * Such a collision might possibly result in the task state
186 * changin from TASK_PARKED and us failing the
187 * wait_task_inactive() in kthread_park().
188 */
189 set_special_state(TASK_PARKED);
181 if (!test_bit(KTHREAD_SHOULD_PARK, &self->flags)) 190 if (!test_bit(KTHREAD_SHOULD_PARK, &self->flags))
182 break; 191 break;
192
193 complete_all(&self->parked);
183 schedule(); 194 schedule();
184 } 195 }
185 __set_current_state(TASK_RUNNING); 196 __set_current_state(TASK_RUNNING);
@@ -191,11 +202,6 @@ void kthread_parkme(void)
191} 202}
192EXPORT_SYMBOL_GPL(kthread_parkme); 203EXPORT_SYMBOL_GPL(kthread_parkme);
193 204
194void kthread_park_complete(struct task_struct *k)
195{
196 complete_all(&to_kthread(k)->parked);
197}
198
199static int kthread(void *_create) 205static int kthread(void *_create)
200{ 206{
201 /* Copy data: it's on kthread's stack */ 207 /* Copy data: it's on kthread's stack */
@@ -461,6 +467,9 @@ void kthread_unpark(struct task_struct *k)
461 467
462 reinit_completion(&kthread->parked); 468 reinit_completion(&kthread->parked);
463 clear_bit(KTHREAD_SHOULD_PARK, &kthread->flags); 469 clear_bit(KTHREAD_SHOULD_PARK, &kthread->flags);
470 /*
471 * __kthread_parkme() will either see !SHOULD_PARK or get the wakeup.
472 */
464 wake_up_state(k, TASK_PARKED); 473 wake_up_state(k, TASK_PARKED);
465} 474}
466EXPORT_SYMBOL_GPL(kthread_unpark); 475EXPORT_SYMBOL_GPL(kthread_unpark);
@@ -487,7 +496,16 @@ int kthread_park(struct task_struct *k)
487 set_bit(KTHREAD_SHOULD_PARK, &kthread->flags); 496 set_bit(KTHREAD_SHOULD_PARK, &kthread->flags);
488 if (k != current) { 497 if (k != current) {
489 wake_up_process(k); 498 wake_up_process(k);
499 /*
500 * Wait for __kthread_parkme() to complete(), this means we
501 * _will_ have TASK_PARKED and are about to call schedule().
502 */
490 wait_for_completion(&kthread->parked); 503 wait_for_completion(&kthread->parked);
504 /*
505 * Now wait for that schedule() to complete and the task to
506 * get scheduled out.
507 */
508 WARN_ON_ONCE(!wait_task_inactive(k, TASK_PARKED));
491 } 509 }
492 510
493 return 0; 511 return 0;
diff --git a/kernel/rseq.c b/kernel/rseq.c
index 22b6acf1ad63..c6242d8594dc 100644
--- a/kernel/rseq.c
+++ b/kernel/rseq.c
@@ -85,9 +85,9 @@ static int rseq_update_cpu_id(struct task_struct *t)
85{ 85{
86 u32 cpu_id = raw_smp_processor_id(); 86 u32 cpu_id = raw_smp_processor_id();
87 87
88 if (__put_user(cpu_id, &t->rseq->cpu_id_start)) 88 if (put_user(cpu_id, &t->rseq->cpu_id_start))
89 return -EFAULT; 89 return -EFAULT;
90 if (__put_user(cpu_id, &t->rseq->cpu_id)) 90 if (put_user(cpu_id, &t->rseq->cpu_id))
91 return -EFAULT; 91 return -EFAULT;
92 trace_rseq_update(t); 92 trace_rseq_update(t);
93 return 0; 93 return 0;
@@ -100,14 +100,14 @@ static int rseq_reset_rseq_cpu_id(struct task_struct *t)
100 /* 100 /*
101 * Reset cpu_id_start to its initial state (0). 101 * Reset cpu_id_start to its initial state (0).
102 */ 102 */
103 if (__put_user(cpu_id_start, &t->rseq->cpu_id_start)) 103 if (put_user(cpu_id_start, &t->rseq->cpu_id_start))
104 return -EFAULT; 104 return -EFAULT;
105 /* 105 /*
106 * Reset cpu_id to RSEQ_CPU_ID_UNINITIALIZED, so any user coming 106 * Reset cpu_id to RSEQ_CPU_ID_UNINITIALIZED, so any user coming
107 * in after unregistration can figure out that rseq needs to be 107 * in after unregistration can figure out that rseq needs to be
108 * registered again. 108 * registered again.
109 */ 109 */
110 if (__put_user(cpu_id, &t->rseq->cpu_id)) 110 if (put_user(cpu_id, &t->rseq->cpu_id))
111 return -EFAULT; 111 return -EFAULT;
112 return 0; 112 return 0;
113} 113}
@@ -115,29 +115,36 @@ static int rseq_reset_rseq_cpu_id(struct task_struct *t)
115static int rseq_get_rseq_cs(struct task_struct *t, struct rseq_cs *rseq_cs) 115static int rseq_get_rseq_cs(struct task_struct *t, struct rseq_cs *rseq_cs)
116{ 116{
117 struct rseq_cs __user *urseq_cs; 117 struct rseq_cs __user *urseq_cs;
118 unsigned long ptr; 118 u64 ptr;
119 u32 __user *usig; 119 u32 __user *usig;
120 u32 sig; 120 u32 sig;
121 int ret; 121 int ret;
122 122
123 ret = __get_user(ptr, &t->rseq->rseq_cs); 123 if (copy_from_user(&ptr, &t->rseq->rseq_cs.ptr64, sizeof(ptr)))
124 if (ret) 124 return -EFAULT;
125 return ret;
126 if (!ptr) { 125 if (!ptr) {
127 memset(rseq_cs, 0, sizeof(*rseq_cs)); 126 memset(rseq_cs, 0, sizeof(*rseq_cs));
128 return 0; 127 return 0;
129 } 128 }
130 urseq_cs = (struct rseq_cs __user *)ptr; 129 if (ptr >= TASK_SIZE)
130 return -EINVAL;
131 urseq_cs = (struct rseq_cs __user *)(unsigned long)ptr;
131 if (copy_from_user(rseq_cs, urseq_cs, sizeof(*rseq_cs))) 132 if (copy_from_user(rseq_cs, urseq_cs, sizeof(*rseq_cs)))
132 return -EFAULT; 133 return -EFAULT;
133 if (rseq_cs->version > 0)
134 return -EINVAL;
135 134
135 if (rseq_cs->start_ip >= TASK_SIZE ||
136 rseq_cs->start_ip + rseq_cs->post_commit_offset >= TASK_SIZE ||
137 rseq_cs->abort_ip >= TASK_SIZE ||
138 rseq_cs->version > 0)
139 return -EINVAL;
140 /* Check for overflow. */
141 if (rseq_cs->start_ip + rseq_cs->post_commit_offset < rseq_cs->start_ip)
142 return -EINVAL;
136 /* Ensure that abort_ip is not in the critical section. */ 143 /* Ensure that abort_ip is not in the critical section. */
137 if (rseq_cs->abort_ip - rseq_cs->start_ip < rseq_cs->post_commit_offset) 144 if (rseq_cs->abort_ip - rseq_cs->start_ip < rseq_cs->post_commit_offset)
138 return -EINVAL; 145 return -EINVAL;
139 146
140 usig = (u32 __user *)(rseq_cs->abort_ip - sizeof(u32)); 147 usig = (u32 __user *)(unsigned long)(rseq_cs->abort_ip - sizeof(u32));
141 ret = get_user(sig, usig); 148 ret = get_user(sig, usig);
142 if (ret) 149 if (ret)
143 return ret; 150 return ret;
@@ -146,7 +153,7 @@ static int rseq_get_rseq_cs(struct task_struct *t, struct rseq_cs *rseq_cs)
146 printk_ratelimited(KERN_WARNING 153 printk_ratelimited(KERN_WARNING
147 "Possible attack attempt. Unexpected rseq signature 0x%x, expecting 0x%x (pid=%d, addr=%p).\n", 154 "Possible attack attempt. Unexpected rseq signature 0x%x, expecting 0x%x (pid=%d, addr=%p).\n",
148 sig, current->rseq_sig, current->pid, usig); 155 sig, current->rseq_sig, current->pid, usig);
149 return -EPERM; 156 return -EINVAL;
150 } 157 }
151 return 0; 158 return 0;
152} 159}
@@ -157,7 +164,7 @@ static int rseq_need_restart(struct task_struct *t, u32 cs_flags)
157 int ret; 164 int ret;
158 165
159 /* Get thread flags. */ 166 /* Get thread flags. */
160 ret = __get_user(flags, &t->rseq->flags); 167 ret = get_user(flags, &t->rseq->flags);
161 if (ret) 168 if (ret)
162 return ret; 169 return ret;
163 170
@@ -195,9 +202,11 @@ static int clear_rseq_cs(struct task_struct *t)
195 * of code outside of the rseq assembly block. This performs 202 * of code outside of the rseq assembly block. This performs
196 * a lazy clear of the rseq_cs field. 203 * a lazy clear of the rseq_cs field.
197 * 204 *
198 * Set rseq_cs to NULL with single-copy atomicity. 205 * Set rseq_cs to NULL.
199 */ 206 */
200 return __put_user(0UL, &t->rseq->rseq_cs); 207 if (clear_user(&t->rseq->rseq_cs.ptr64, sizeof(t->rseq->rseq_cs.ptr64)))
208 return -EFAULT;
209 return 0;
201} 210}
202 211
203/* 212/*
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 78d8facba456..fe365c9a08e9 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -7,7 +7,6 @@
7 */ 7 */
8#include "sched.h" 8#include "sched.h"
9 9
10#include <linux/kthread.h>
11#include <linux/nospec.h> 10#include <linux/nospec.h>
12 11
13#include <linux/kcov.h> 12#include <linux/kcov.h>
@@ -2724,28 +2723,20 @@ static struct rq *finish_task_switch(struct task_struct *prev)
2724 membarrier_mm_sync_core_before_usermode(mm); 2723 membarrier_mm_sync_core_before_usermode(mm);
2725 mmdrop(mm); 2724 mmdrop(mm);
2726 } 2725 }
2727 if (unlikely(prev_state & (TASK_DEAD|TASK_PARKED))) { 2726 if (unlikely(prev_state == TASK_DEAD)) {
2728 switch (prev_state) { 2727 if (prev->sched_class->task_dead)
2729 case TASK_DEAD: 2728 prev->sched_class->task_dead(prev);
2730 if (prev->sched_class->task_dead)
2731 prev->sched_class->task_dead(prev);
2732 2729
2733 /* 2730 /*
2734 * Remove function-return probe instances associated with this 2731 * Remove function-return probe instances associated with this
2735 * task and put them back on the free list. 2732 * task and put them back on the free list.
2736 */ 2733 */
2737 kprobe_flush_task(prev); 2734 kprobe_flush_task(prev);
2738
2739 /* Task is done with its stack. */
2740 put_task_stack(prev);
2741 2735
2742 put_task_struct(prev); 2736 /* Task is done with its stack. */
2743 break; 2737 put_task_stack(prev);
2744 2738
2745 case TASK_PARKED: 2739 put_task_struct(prev);
2746 kthread_park_complete(prev);
2747 break;
2748 }
2749 } 2740 }
2750 2741
2751 tick_nohz_task_switch(); 2742 tick_nohz_task_switch();
@@ -3113,7 +3104,9 @@ static void sched_tick_remote(struct work_struct *work)
3113 struct tick_work *twork = container_of(dwork, struct tick_work, work); 3104 struct tick_work *twork = container_of(dwork, struct tick_work, work);
3114 int cpu = twork->cpu; 3105 int cpu = twork->cpu;
3115 struct rq *rq = cpu_rq(cpu); 3106 struct rq *rq = cpu_rq(cpu);
3107 struct task_struct *curr;
3116 struct rq_flags rf; 3108 struct rq_flags rf;
3109 u64 delta;
3117 3110
3118 /* 3111 /*
3119 * Handle the tick only if it appears the remote CPU is running in full 3112 * Handle the tick only if it appears the remote CPU is running in full
@@ -3122,24 +3115,28 @@ static void sched_tick_remote(struct work_struct *work)
3122 * statistics and checks timeslices in a time-independent way, regardless 3115 * statistics and checks timeslices in a time-independent way, regardless
3123 * of when exactly it is running. 3116 * of when exactly it is running.
3124 */ 3117 */
3125 if (!idle_cpu(cpu) && tick_nohz_tick_stopped_cpu(cpu)) { 3118 if (idle_cpu(cpu) || !tick_nohz_tick_stopped_cpu(cpu))
3126 struct task_struct *curr; 3119 goto out_requeue;
3127 u64 delta;
3128 3120
3129 rq_lock_irq(rq, &rf); 3121 rq_lock_irq(rq, &rf);
3130 update_rq_clock(rq); 3122 curr = rq->curr;
3131 curr = rq->curr; 3123 if (is_idle_task(curr))
3132 delta = rq_clock_task(rq) - curr->se.exec_start; 3124 goto out_unlock;
3133 3125
3134 /* 3126 update_rq_clock(rq);
3135 * Make sure the next tick runs within a reasonable 3127 delta = rq_clock_task(rq) - curr->se.exec_start;
3136 * amount of time. 3128
3137 */ 3129 /*
3138 WARN_ON_ONCE(delta > (u64)NSEC_PER_SEC * 3); 3130 * Make sure the next tick runs within a reasonable
3139 curr->sched_class->task_tick(rq, curr, 0); 3131 * amount of time.
3140 rq_unlock_irq(rq, &rf); 3132 */
3141 } 3133 WARN_ON_ONCE(delta > (u64)NSEC_PER_SEC * 3);
3134 curr->sched_class->task_tick(rq, curr, 0);
3135
3136out_unlock:
3137 rq_unlock_irq(rq, &rf);
3142 3138
3139out_requeue:
3143 /* 3140 /*
3144 * Run the remote tick once per second (1Hz). This arbitrary 3141 * Run the remote tick once per second (1Hz). This arbitrary
3145 * frequency is large enough to avoid overload but short enough 3142 * frequency is large enough to avoid overload but short enough
diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index 3cde46483f0a..c907fde01eaa 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -192,7 +192,7 @@ static unsigned long sugov_aggregate_util(struct sugov_cpu *sg_cpu)
192{ 192{
193 struct rq *rq = cpu_rq(sg_cpu->cpu); 193 struct rq *rq = cpu_rq(sg_cpu->cpu);
194 194
195 if (rq->rt.rt_nr_running) 195 if (rt_rq_is_runnable(&rq->rt))
196 return sg_cpu->max; 196 return sg_cpu->max;
197 197
198 /* 198 /*
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 1866e64792a7..2f0a0be4d344 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3982,18 +3982,10 @@ util_est_dequeue(struct cfs_rq *cfs_rq, struct task_struct *p, bool task_sleep)
3982 if (!sched_feat(UTIL_EST)) 3982 if (!sched_feat(UTIL_EST))
3983 return; 3983 return;
3984 3984
3985 /* 3985 /* Update root cfs_rq's estimated utilization */
3986 * Update root cfs_rq's estimated utilization 3986 ue.enqueued = cfs_rq->avg.util_est.enqueued;
3987 * 3987 ue.enqueued -= min_t(unsigned int, ue.enqueued,
3988 * If *p is the last task then the root cfs_rq's estimated utilization 3988 (_task_util_est(p) | UTIL_AVG_UNCHANGED));
3989 * of a CPU is 0 by definition.
3990 */
3991 ue.enqueued = 0;
3992 if (cfs_rq->nr_running) {
3993 ue.enqueued = cfs_rq->avg.util_est.enqueued;
3994 ue.enqueued -= min_t(unsigned int, ue.enqueued,
3995 (_task_util_est(p) | UTIL_AVG_UNCHANGED));
3996 }
3997 WRITE_ONCE(cfs_rq->avg.util_est.enqueued, ue.enqueued); 3989 WRITE_ONCE(cfs_rq->avg.util_est.enqueued, ue.enqueued);
3998 3990
3999 /* 3991 /*
@@ -4590,6 +4582,7 @@ void __refill_cfs_bandwidth_runtime(struct cfs_bandwidth *cfs_b)
4590 now = sched_clock_cpu(smp_processor_id()); 4582 now = sched_clock_cpu(smp_processor_id());
4591 cfs_b->runtime = cfs_b->quota; 4583 cfs_b->runtime = cfs_b->quota;
4592 cfs_b->runtime_expires = now + ktime_to_ns(cfs_b->period); 4584 cfs_b->runtime_expires = now + ktime_to_ns(cfs_b->period);
4585 cfs_b->expires_seq++;
4593} 4586}
4594 4587
4595static inline struct cfs_bandwidth *tg_cfs_bandwidth(struct task_group *tg) 4588static inline struct cfs_bandwidth *tg_cfs_bandwidth(struct task_group *tg)
@@ -4612,6 +4605,7 @@ static int assign_cfs_rq_runtime(struct cfs_rq *cfs_rq)
4612 struct task_group *tg = cfs_rq->tg; 4605 struct task_group *tg = cfs_rq->tg;
4613 struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(tg); 4606 struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(tg);
4614 u64 amount = 0, min_amount, expires; 4607 u64 amount = 0, min_amount, expires;
4608 int expires_seq;
4615 4609
4616 /* note: this is a positive sum as runtime_remaining <= 0 */ 4610 /* note: this is a positive sum as runtime_remaining <= 0 */
4617 min_amount = sched_cfs_bandwidth_slice() - cfs_rq->runtime_remaining; 4611 min_amount = sched_cfs_bandwidth_slice() - cfs_rq->runtime_remaining;
@@ -4628,6 +4622,7 @@ static int assign_cfs_rq_runtime(struct cfs_rq *cfs_rq)
4628 cfs_b->idle = 0; 4622 cfs_b->idle = 0;
4629 } 4623 }
4630 } 4624 }
4625 expires_seq = cfs_b->expires_seq;
4631 expires = cfs_b->runtime_expires; 4626 expires = cfs_b->runtime_expires;
4632 raw_spin_unlock(&cfs_b->lock); 4627 raw_spin_unlock(&cfs_b->lock);
4633 4628
@@ -4637,8 +4632,10 @@ static int assign_cfs_rq_runtime(struct cfs_rq *cfs_rq)
4637 * spread between our sched_clock and the one on which runtime was 4632 * spread between our sched_clock and the one on which runtime was
4638 * issued. 4633 * issued.
4639 */ 4634 */
4640 if ((s64)(expires - cfs_rq->runtime_expires) > 0) 4635 if (cfs_rq->expires_seq != expires_seq) {
4636 cfs_rq->expires_seq = expires_seq;
4641 cfs_rq->runtime_expires = expires; 4637 cfs_rq->runtime_expires = expires;
4638 }
4642 4639
4643 return cfs_rq->runtime_remaining > 0; 4640 return cfs_rq->runtime_remaining > 0;
4644} 4641}
@@ -4664,12 +4661,9 @@ static void expire_cfs_rq_runtime(struct cfs_rq *cfs_rq)
4664 * has not truly expired. 4661 * has not truly expired.
4665 * 4662 *
4666 * Fortunately we can check determine whether this the case by checking 4663 * Fortunately we can check determine whether this the case by checking
4667 * whether the global deadline has advanced. It is valid to compare 4664 * whether the global deadline(cfs_b->expires_seq) has advanced.
4668 * cfs_b->runtime_expires without any locks since we only care about
4669 * exact equality, so a partial write will still work.
4670 */ 4665 */
4671 4666 if (cfs_rq->expires_seq == cfs_b->expires_seq) {
4672 if (cfs_rq->runtime_expires != cfs_b->runtime_expires) {
4673 /* extend local deadline, drift is bounded above by 2 ticks */ 4667 /* extend local deadline, drift is bounded above by 2 ticks */
4674 cfs_rq->runtime_expires += TICK_NSEC; 4668 cfs_rq->runtime_expires += TICK_NSEC;
4675 } else { 4669 } else {
@@ -5202,13 +5196,18 @@ static void init_cfs_rq_runtime(struct cfs_rq *cfs_rq)
5202 5196
5203void start_cfs_bandwidth(struct cfs_bandwidth *cfs_b) 5197void start_cfs_bandwidth(struct cfs_bandwidth *cfs_b)
5204{ 5198{
5199 u64 overrun;
5200
5205 lockdep_assert_held(&cfs_b->lock); 5201 lockdep_assert_held(&cfs_b->lock);
5206 5202
5207 if (!cfs_b->period_active) { 5203 if (cfs_b->period_active)
5208 cfs_b->period_active = 1; 5204 return;
5209 hrtimer_forward_now(&cfs_b->period_timer, cfs_b->period); 5205
5210 hrtimer_start_expires(&cfs_b->period_timer, HRTIMER_MODE_ABS_PINNED); 5206 cfs_b->period_active = 1;
5211 } 5207 overrun = hrtimer_forward_now(&cfs_b->period_timer, cfs_b->period);
5208 cfs_b->runtime_expires += (overrun + 1) * ktime_to_ns(cfs_b->period);
5209 cfs_b->expires_seq++;
5210 hrtimer_start_expires(&cfs_b->period_timer, HRTIMER_MODE_ABS_PINNED);
5212} 5211}
5213 5212
5214static void destroy_cfs_bandwidth(struct cfs_bandwidth *cfs_b) 5213static void destroy_cfs_bandwidth(struct cfs_bandwidth *cfs_b)
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 47556b0c9a95..572567078b60 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -508,8 +508,11 @@ static void sched_rt_rq_dequeue(struct rt_rq *rt_rq)
508 508
509 rt_se = rt_rq->tg->rt_se[cpu]; 509 rt_se = rt_rq->tg->rt_se[cpu];
510 510
511 if (!rt_se) 511 if (!rt_se) {
512 dequeue_top_rt_rq(rt_rq); 512 dequeue_top_rt_rq(rt_rq);
513 /* Kick cpufreq (see the comment in kernel/sched/sched.h). */
514 cpufreq_update_util(rq_of_rt_rq(rt_rq), 0);
515 }
513 else if (on_rt_rq(rt_se)) 516 else if (on_rt_rq(rt_se))
514 dequeue_rt_entity(rt_se, 0); 517 dequeue_rt_entity(rt_se, 0);
515} 518}
@@ -1001,8 +1004,6 @@ dequeue_top_rt_rq(struct rt_rq *rt_rq)
1001 sub_nr_running(rq, rt_rq->rt_nr_running); 1004 sub_nr_running(rq, rt_rq->rt_nr_running);
1002 rt_rq->rt_queued = 0; 1005 rt_rq->rt_queued = 0;
1003 1006
1004 /* Kick cpufreq (see the comment in kernel/sched/sched.h). */
1005 cpufreq_update_util(rq, 0);
1006} 1007}
1007 1008
1008static void 1009static void
@@ -1014,11 +1015,14 @@ enqueue_top_rt_rq(struct rt_rq *rt_rq)
1014 1015
1015 if (rt_rq->rt_queued) 1016 if (rt_rq->rt_queued)
1016 return; 1017 return;
1017 if (rt_rq_throttled(rt_rq) || !rt_rq->rt_nr_running) 1018
1019 if (rt_rq_throttled(rt_rq))
1018 return; 1020 return;
1019 1021
1020 add_nr_running(rq, rt_rq->rt_nr_running); 1022 if (rt_rq->rt_nr_running) {
1021 rt_rq->rt_queued = 1; 1023 add_nr_running(rq, rt_rq->rt_nr_running);
1024 rt_rq->rt_queued = 1;
1025 }
1022 1026
1023 /* Kick cpufreq (see the comment in kernel/sched/sched.h). */ 1027 /* Kick cpufreq (see the comment in kernel/sched/sched.h). */
1024 cpufreq_update_util(rq, 0); 1028 cpufreq_update_util(rq, 0);
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 6601baf2361c..c7742dcc136c 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -334,9 +334,10 @@ struct cfs_bandwidth {
334 u64 runtime; 334 u64 runtime;
335 s64 hierarchical_quota; 335 s64 hierarchical_quota;
336 u64 runtime_expires; 336 u64 runtime_expires;
337 int expires_seq;
337 338
338 int idle; 339 short idle;
339 int period_active; 340 short period_active;
340 struct hrtimer period_timer; 341 struct hrtimer period_timer;
341 struct hrtimer slack_timer; 342 struct hrtimer slack_timer;
342 struct list_head throttled_cfs_rq; 343 struct list_head throttled_cfs_rq;
@@ -551,6 +552,7 @@ struct cfs_rq {
551 552
552#ifdef CONFIG_CFS_BANDWIDTH 553#ifdef CONFIG_CFS_BANDWIDTH
553 int runtime_enabled; 554 int runtime_enabled;
555 int expires_seq;
554 u64 runtime_expires; 556 u64 runtime_expires;
555 s64 runtime_remaining; 557 s64 runtime_remaining;
556 558
@@ -609,6 +611,11 @@ struct rt_rq {
609#endif 611#endif
610}; 612};
611 613
614static inline bool rt_rq_is_runnable(struct rt_rq *rt_rq)
615{
616 return rt_rq->rt_queued && rt_rq->rt_nr_running;
617}
618
612/* Deadline class' related fields in a runqueue */ 619/* Deadline class' related fields in a runqueue */
613struct dl_rq { 620struct dl_rq {
614 /* runqueue is an rbtree, ordered by deadline */ 621 /* runqueue is an rbtree, ordered by deadline */
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index b7005dd21ec1..14de3727b18e 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -277,8 +277,7 @@ static bool tick_check_preferred(struct clock_event_device *curdev,
277 */ 277 */
278 return !curdev || 278 return !curdev ||
279 newdev->rating > curdev->rating || 279 newdev->rating > curdev->rating ||
280 (!cpumask_equal(curdev->cpumask, newdev->cpumask) && 280 !cpumask_equal(curdev->cpumask, newdev->cpumask);
281 !tick_check_percpu(curdev, newdev, smp_processor_id()));
282} 281}
283 282
284/* 283/*
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index efed9c1cfb7e..caf9cbf35816 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -192,17 +192,6 @@ static void ftrace_pid_func(unsigned long ip, unsigned long parent_ip,
192 op->saved_func(ip, parent_ip, op, regs); 192 op->saved_func(ip, parent_ip, op, regs);
193} 193}
194 194
195/**
196 * clear_ftrace_function - reset the ftrace function
197 *
198 * This NULLs the ftrace function and in essence stops
199 * tracing. There may be lag
200 */
201void clear_ftrace_function(void)
202{
203 ftrace_trace_function = ftrace_stub;
204}
205
206static void ftrace_sync(struct work_struct *work) 195static void ftrace_sync(struct work_struct *work)
207{ 196{
208 /* 197 /*
@@ -6689,7 +6678,7 @@ void ftrace_kill(void)
6689{ 6678{
6690 ftrace_disabled = 1; 6679 ftrace_disabled = 1;
6691 ftrace_enabled = 0; 6680 ftrace_enabled = 0;
6692 clear_ftrace_function(); 6681 ftrace_trace_function = ftrace_stub;
6693} 6682}
6694 6683
6695/** 6684/**
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index a0079b4c7a49..87cf25171fb8 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -2953,6 +2953,7 @@ out_nobuffer:
2953} 2953}
2954EXPORT_SYMBOL_GPL(trace_vbprintk); 2954EXPORT_SYMBOL_GPL(trace_vbprintk);
2955 2955
2956__printf(3, 0)
2956static int 2957static int
2957__trace_array_vprintk(struct ring_buffer *buffer, 2958__trace_array_vprintk(struct ring_buffer *buffer,
2958 unsigned long ip, const char *fmt, va_list args) 2959 unsigned long ip, const char *fmt, va_list args)
@@ -3007,12 +3008,14 @@ out_nobuffer:
3007 return len; 3008 return len;
3008} 3009}
3009 3010
3011__printf(3, 0)
3010int trace_array_vprintk(struct trace_array *tr, 3012int trace_array_vprintk(struct trace_array *tr,
3011 unsigned long ip, const char *fmt, va_list args) 3013 unsigned long ip, const char *fmt, va_list args)
3012{ 3014{
3013 return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args); 3015 return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
3014} 3016}
3015 3017
3018__printf(3, 0)
3016int trace_array_printk(struct trace_array *tr, 3019int trace_array_printk(struct trace_array *tr,
3017 unsigned long ip, const char *fmt, ...) 3020 unsigned long ip, const char *fmt, ...)
3018{ 3021{
@@ -3028,6 +3031,7 @@ int trace_array_printk(struct trace_array *tr,
3028 return ret; 3031 return ret;
3029} 3032}
3030 3033
3034__printf(3, 4)
3031int trace_array_printk_buf(struct ring_buffer *buffer, 3035int trace_array_printk_buf(struct ring_buffer *buffer,
3032 unsigned long ip, const char *fmt, ...) 3036 unsigned long ip, const char *fmt, ...)
3033{ 3037{
@@ -3043,6 +3047,7 @@ int trace_array_printk_buf(struct ring_buffer *buffer,
3043 return ret; 3047 return ret;
3044} 3048}
3045 3049
3050__printf(2, 0)
3046int trace_vprintk(unsigned long ip, const char *fmt, va_list args) 3051int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3047{ 3052{
3048 return trace_array_vprintk(&global_trace, ip, fmt, args); 3053 return trace_array_vprintk(&global_trace, ip, fmt, args);
@@ -3360,8 +3365,8 @@ static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m,
3360 3365
3361 print_event_info(buf, m); 3366 print_event_info(buf, m);
3362 3367
3363 seq_printf(m, "# TASK-PID CPU# %s TIMESTAMP FUNCTION\n", tgid ? "TGID " : ""); 3368 seq_printf(m, "# TASK-PID %s CPU# TIMESTAMP FUNCTION\n", tgid ? "TGID " : "");
3364 seq_printf(m, "# | | | %s | |\n", tgid ? " | " : ""); 3369 seq_printf(m, "# | | %s | | |\n", tgid ? " | " : "");
3365} 3370}
3366 3371
3367static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m, 3372static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m,
@@ -3381,9 +3386,9 @@ static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file
3381 tgid ? tgid_space : space); 3386 tgid ? tgid_space : space);
3382 seq_printf(m, "# %s||| / delay\n", 3387 seq_printf(m, "# %s||| / delay\n",
3383 tgid ? tgid_space : space); 3388 tgid ? tgid_space : space);
3384 seq_printf(m, "# TASK-PID CPU#%s|||| TIMESTAMP FUNCTION\n", 3389 seq_printf(m, "# TASK-PID %sCPU# |||| TIMESTAMP FUNCTION\n",
3385 tgid ? " TGID " : space); 3390 tgid ? " TGID " : space);
3386 seq_printf(m, "# | | | %s|||| | |\n", 3391 seq_printf(m, "# | | %s | |||| | |\n",
3387 tgid ? " | " : space); 3392 tgid ? " | " : space);
3388} 3393}
3389 3394
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 630c5a24b2b2..f8f86231ad90 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -583,9 +583,7 @@ static __always_inline void trace_clear_recursion(int bit)
583static inline struct ring_buffer_iter * 583static inline struct ring_buffer_iter *
584trace_buffer_iter(struct trace_iterator *iter, int cpu) 584trace_buffer_iter(struct trace_iterator *iter, int cpu)
585{ 585{
586 if (iter->buffer_iter && iter->buffer_iter[cpu]) 586 return iter->buffer_iter ? iter->buffer_iter[cpu] : NULL;
587 return iter->buffer_iter[cpu];
588 return NULL;
589} 587}
590 588
591int tracer_init(struct tracer *t, struct trace_array *tr); 589int tracer_init(struct tracer *t, struct trace_array *tr);
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index 0dceb77d1d42..893a206bcba4 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -1701,6 +1701,7 @@ static void create_filter_finish(struct filter_parse_error *pe)
1701 * @filter_str: filter string 1701 * @filter_str: filter string
1702 * @set_str: remember @filter_str and enable detailed error in filter 1702 * @set_str: remember @filter_str and enable detailed error in filter
1703 * @filterp: out param for created filter (always updated on return) 1703 * @filterp: out param for created filter (always updated on return)
1704 * Must be a pointer that references a NULL pointer.
1704 * 1705 *
1705 * Creates a filter for @call with @filter_str. If @set_str is %true, 1706 * Creates a filter for @call with @filter_str. If @set_str is %true,
1706 * @filter_str is copied and recorded in the new filter. 1707 * @filter_str is copied and recorded in the new filter.
@@ -1718,6 +1719,10 @@ static int create_filter(struct trace_event_call *call,
1718 struct filter_parse_error *pe = NULL; 1719 struct filter_parse_error *pe = NULL;
1719 int err; 1720 int err;
1720 1721
1722 /* filterp must point to NULL */
1723 if (WARN_ON(*filterp))
1724 *filterp = NULL;
1725
1721 err = create_filter_start(filter_string, set_str, &pe, filterp); 1726 err = create_filter_start(filter_string, set_str, &pe, filterp);
1722 if (err) 1727 if (err)
1723 return err; 1728 return err;
diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
index 046c716a6536..aae18af94c94 100644
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -393,7 +393,7 @@ static void hist_err_event(char *str, char *system, char *event, char *var)
393 else if (system) 393 else if (system)
394 snprintf(err, MAX_FILTER_STR_VAL, "%s.%s", system, event); 394 snprintf(err, MAX_FILTER_STR_VAL, "%s.%s", system, event);
395 else 395 else
396 strncpy(err, var, MAX_FILTER_STR_VAL); 396 strscpy(err, var, MAX_FILTER_STR_VAL);
397 397
398 hist_err(str, err); 398 hist_err(str, err);
399} 399}
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 23c0b0cb5fb9..169b3c44ee97 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -831,6 +831,7 @@ print_graph_entry_leaf(struct trace_iterator *iter,
831 struct ftrace_graph_ret *graph_ret; 831 struct ftrace_graph_ret *graph_ret;
832 struct ftrace_graph_ent *call; 832 struct ftrace_graph_ent *call;
833 unsigned long long duration; 833 unsigned long long duration;
834 int cpu = iter->cpu;
834 int i; 835 int i;
835 836
836 graph_ret = &ret_entry->ret; 837 graph_ret = &ret_entry->ret;
@@ -839,7 +840,6 @@ print_graph_entry_leaf(struct trace_iterator *iter,
839 840
840 if (data) { 841 if (data) {
841 struct fgraph_cpu_data *cpu_data; 842 struct fgraph_cpu_data *cpu_data;
842 int cpu = iter->cpu;
843 843
844 cpu_data = per_cpu_ptr(data->cpu_data, cpu); 844 cpu_data = per_cpu_ptr(data->cpu_data, cpu);
845 845
@@ -869,6 +869,9 @@ print_graph_entry_leaf(struct trace_iterator *iter,
869 869
870 trace_seq_printf(s, "%ps();\n", (void *)call->func); 870 trace_seq_printf(s, "%ps();\n", (void *)call->func);
871 871
872 print_graph_irq(iter, graph_ret->func, TRACE_GRAPH_RET,
873 cpu, iter->ent->pid, flags);
874
872 return trace_handle_return(s); 875 return trace_handle_return(s);
873} 876}
874 877
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index daa81571b22a..21f718472942 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -1480,8 +1480,10 @@ create_local_trace_kprobe(char *func, void *addr, unsigned long offs,
1480 } 1480 }
1481 1481
1482 ret = __register_trace_kprobe(tk); 1482 ret = __register_trace_kprobe(tk);
1483 if (ret < 0) 1483 if (ret < 0) {
1484 kfree(tk->tp.call.print_fmt);
1484 goto error; 1485 goto error;
1486 }
1485 1487
1486 return &tk->tp.call; 1488 return &tk->tp.call;
1487error: 1489error:
@@ -1501,6 +1503,8 @@ void destroy_local_trace_kprobe(struct trace_event_call *event_call)
1501 } 1503 }
1502 1504
1503 __unregister_trace_kprobe(tk); 1505 __unregister_trace_kprobe(tk);
1506
1507 kfree(tk->tp.call.print_fmt);
1504 free_trace_kprobe(tk); 1508 free_trace_kprobe(tk);
1505} 1509}
1506#endif /* CONFIG_PERF_EVENTS */ 1510#endif /* CONFIG_PERF_EVENTS */
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index 90db994ac900..1c8e30fda46a 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -594,8 +594,7 @@ int trace_print_context(struct trace_iterator *iter)
594 594
595 trace_find_cmdline(entry->pid, comm); 595 trace_find_cmdline(entry->pid, comm);
596 596
597 trace_seq_printf(s, "%16s-%-5d [%03d] ", 597 trace_seq_printf(s, "%16s-%-5d ", comm, entry->pid);
598 comm, entry->pid, iter->cpu);
599 598
600 if (tr->trace_flags & TRACE_ITER_RECORD_TGID) { 599 if (tr->trace_flags & TRACE_ITER_RECORD_TGID) {
601 unsigned int tgid = trace_find_tgid(entry->pid); 600 unsigned int tgid = trace_find_tgid(entry->pid);
@@ -606,6 +605,8 @@ int trace_print_context(struct trace_iterator *iter)
606 trace_seq_printf(s, "(%5d) ", tgid); 605 trace_seq_printf(s, "(%5d) ", tgid);
607 } 606 }
608 607
608 trace_seq_printf(s, "[%03d] ", iter->cpu);
609
609 if (tr->trace_flags & TRACE_ITER_IRQ_INFO) 610 if (tr->trace_flags & TRACE_ITER_IRQ_INFO)
610 trace_print_lat_fmt(s, entry); 611 trace_print_lat_fmt(s, entry);
611 612