diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/audit.c | 17 | ||||
-rw-r--r-- | kernel/audit.h | 4 | ||||
-rw-r--r-- | kernel/auditsc.c | 8 | ||||
-rw-r--r-- | kernel/bpf/verifier.c | 41 | ||||
-rw-r--r-- | kernel/cgroup.c | 148 | ||||
-rw-r--r-- | kernel/events/core.c | 8 | ||||
-rw-r--r-- | kernel/fork.c | 50 | ||||
-rw-r--r-- | kernel/futex.c | 14 | ||||
-rw-r--r-- | kernel/jump_label.c | 36 | ||||
-rw-r--r-- | kernel/kcov.c | 7 | ||||
-rw-r--r-- | kernel/locking/mutex-debug.c | 12 | ||||
-rw-r--r-- | kernel/locking/mutex-debug.h | 4 | ||||
-rw-r--r-- | kernel/locking/mutex.c | 15 | ||||
-rw-r--r-- | kernel/locking/mutex.h | 2 | ||||
-rw-r--r-- | kernel/locking/qspinlock.c | 60 | ||||
-rw-r--r-- | kernel/power/process.c | 12 | ||||
-rw-r--r-- | kernel/sched/core.c | 42 | ||||
-rw-r--r-- | kernel/sched/debug.c | 15 | ||||
-rw-r--r-- | kernel/sched/fair.c | 72 | ||||
-rw-r--r-- | kernel/sched/sched.h | 2 | ||||
-rw-r--r-- | kernel/sched/stats.h | 3 | ||||
-rw-r--r-- | kernel/trace/bpf_trace.c | 10 | ||||
-rw-r--r-- | kernel/trace/trace_printk.c | 7 |
23 files changed, 386 insertions, 203 deletions
diff --git a/kernel/audit.c b/kernel/audit.c index 22bb4f24f071..8d528f9930da 100644 --- a/kernel/audit.c +++ b/kernel/audit.c | |||
@@ -1883,6 +1883,23 @@ out_null: | |||
1883 | audit_log_format(ab, " exe=(null)"); | 1883 | audit_log_format(ab, " exe=(null)"); |
1884 | } | 1884 | } |
1885 | 1885 | ||
1886 | struct tty_struct *audit_get_tty(struct task_struct *tsk) | ||
1887 | { | ||
1888 | struct tty_struct *tty = NULL; | ||
1889 | unsigned long flags; | ||
1890 | |||
1891 | spin_lock_irqsave(&tsk->sighand->siglock, flags); | ||
1892 | if (tsk->signal) | ||
1893 | tty = tty_kref_get(tsk->signal->tty); | ||
1894 | spin_unlock_irqrestore(&tsk->sighand->siglock, flags); | ||
1895 | return tty; | ||
1896 | } | ||
1897 | |||
1898 | void audit_put_tty(struct tty_struct *tty) | ||
1899 | { | ||
1900 | tty_kref_put(tty); | ||
1901 | } | ||
1902 | |||
1886 | void audit_log_task_info(struct audit_buffer *ab, struct task_struct *tsk) | 1903 | void audit_log_task_info(struct audit_buffer *ab, struct task_struct *tsk) |
1887 | { | 1904 | { |
1888 | const struct cred *cred; | 1905 | const struct cred *cred; |
diff --git a/kernel/audit.h b/kernel/audit.h index cbbe6bb6496e..a492f4c4e710 100644 --- a/kernel/audit.h +++ b/kernel/audit.h | |||
@@ -23,6 +23,7 @@ | |||
23 | #include <linux/audit.h> | 23 | #include <linux/audit.h> |
24 | #include <linux/skbuff.h> | 24 | #include <linux/skbuff.h> |
25 | #include <uapi/linux/mqueue.h> | 25 | #include <uapi/linux/mqueue.h> |
26 | #include <linux/tty.h> | ||
26 | 27 | ||
27 | /* AUDIT_NAMES is the number of slots we reserve in the audit_context | 28 | /* AUDIT_NAMES is the number of slots we reserve in the audit_context |
28 | * for saving names from getname(). If we get more names we will allocate | 29 | * for saving names from getname(). If we get more names we will allocate |
@@ -262,6 +263,9 @@ extern struct audit_entry *audit_dupe_rule(struct audit_krule *old); | |||
262 | extern void audit_log_d_path_exe(struct audit_buffer *ab, | 263 | extern void audit_log_d_path_exe(struct audit_buffer *ab, |
263 | struct mm_struct *mm); | 264 | struct mm_struct *mm); |
264 | 265 | ||
266 | extern struct tty_struct *audit_get_tty(struct task_struct *tsk); | ||
267 | extern void audit_put_tty(struct tty_struct *tty); | ||
268 | |||
265 | /* audit watch functions */ | 269 | /* audit watch functions */ |
266 | #ifdef CONFIG_AUDIT_WATCH | 270 | #ifdef CONFIG_AUDIT_WATCH |
267 | extern void audit_put_watch(struct audit_watch *watch); | 271 | extern void audit_put_watch(struct audit_watch *watch); |
diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 62ab53d7619c..2672d105cffc 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c | |||
@@ -63,7 +63,6 @@ | |||
63 | #include <asm/unistd.h> | 63 | #include <asm/unistd.h> |
64 | #include <linux/security.h> | 64 | #include <linux/security.h> |
65 | #include <linux/list.h> | 65 | #include <linux/list.h> |
66 | #include <linux/tty.h> | ||
67 | #include <linux/binfmts.h> | 66 | #include <linux/binfmts.h> |
68 | #include <linux/highmem.h> | 67 | #include <linux/highmem.h> |
69 | #include <linux/syscalls.h> | 68 | #include <linux/syscalls.h> |
@@ -1985,14 +1984,15 @@ static void audit_log_set_loginuid(kuid_t koldloginuid, kuid_t kloginuid, | |||
1985 | if (!audit_enabled) | 1984 | if (!audit_enabled) |
1986 | return; | 1985 | return; |
1987 | 1986 | ||
1987 | ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_LOGIN); | ||
1988 | if (!ab) | ||
1989 | return; | ||
1990 | |||
1988 | uid = from_kuid(&init_user_ns, task_uid(current)); | 1991 | uid = from_kuid(&init_user_ns, task_uid(current)); |
1989 | oldloginuid = from_kuid(&init_user_ns, koldloginuid); | 1992 | oldloginuid = from_kuid(&init_user_ns, koldloginuid); |
1990 | loginuid = from_kuid(&init_user_ns, kloginuid), | 1993 | loginuid = from_kuid(&init_user_ns, kloginuid), |
1991 | tty = audit_get_tty(current); | 1994 | tty = audit_get_tty(current); |
1992 | 1995 | ||
1993 | ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_LOGIN); | ||
1994 | if (!ab) | ||
1995 | return; | ||
1996 | audit_log_format(ab, "pid=%d uid=%u", task_pid_nr(current), uid); | 1996 | audit_log_format(ab, "pid=%d uid=%u", task_pid_nr(current), uid); |
1997 | audit_log_task_context(ab); | 1997 | audit_log_task_context(ab); |
1998 | audit_log_format(ab, " old-auid=%u auid=%u tty=%s old-ses=%u ses=%u res=%d", | 1998 | audit_log_format(ab, " old-auid=%u auid=%u tty=%s old-ses=%u ses=%u res=%d", |
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 668e07903c8f..eec9f90ba030 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c | |||
@@ -126,31 +126,6 @@ | |||
126 | * are set to NOT_INIT to indicate that they are no longer readable. | 126 | * are set to NOT_INIT to indicate that they are no longer readable. |
127 | */ | 127 | */ |
128 | 128 | ||
129 | /* types of values stored in eBPF registers */ | ||
130 | enum bpf_reg_type { | ||
131 | NOT_INIT = 0, /* nothing was written into register */ | ||
132 | UNKNOWN_VALUE, /* reg doesn't contain a valid pointer */ | ||
133 | PTR_TO_CTX, /* reg points to bpf_context */ | ||
134 | CONST_PTR_TO_MAP, /* reg points to struct bpf_map */ | ||
135 | PTR_TO_MAP_VALUE, /* reg points to map element value */ | ||
136 | PTR_TO_MAP_VALUE_OR_NULL,/* points to map elem value or NULL */ | ||
137 | FRAME_PTR, /* reg == frame_pointer */ | ||
138 | PTR_TO_STACK, /* reg == frame_pointer + imm */ | ||
139 | CONST_IMM, /* constant integer value */ | ||
140 | |||
141 | /* PTR_TO_PACKET represents: | ||
142 | * skb->data | ||
143 | * skb->data + imm | ||
144 | * skb->data + (u16) var | ||
145 | * skb->data + (u16) var + imm | ||
146 | * if (range > 0) then [ptr, ptr + range - off) is safe to access | ||
147 | * if (id > 0) means that some 'var' was added | ||
148 | * if (off > 0) menas that 'imm' was added | ||
149 | */ | ||
150 | PTR_TO_PACKET, | ||
151 | PTR_TO_PACKET_END, /* skb->data + headlen */ | ||
152 | }; | ||
153 | |||
154 | struct reg_state { | 129 | struct reg_state { |
155 | enum bpf_reg_type type; | 130 | enum bpf_reg_type type; |
156 | union { | 131 | union { |
@@ -695,10 +670,10 @@ static int check_packet_access(struct verifier_env *env, u32 regno, int off, | |||
695 | 670 | ||
696 | /* check access to 'struct bpf_context' fields */ | 671 | /* check access to 'struct bpf_context' fields */ |
697 | static int check_ctx_access(struct verifier_env *env, int off, int size, | 672 | static int check_ctx_access(struct verifier_env *env, int off, int size, |
698 | enum bpf_access_type t) | 673 | enum bpf_access_type t, enum bpf_reg_type *reg_type) |
699 | { | 674 | { |
700 | if (env->prog->aux->ops->is_valid_access && | 675 | if (env->prog->aux->ops->is_valid_access && |
701 | env->prog->aux->ops->is_valid_access(off, size, t)) { | 676 | env->prog->aux->ops->is_valid_access(off, size, t, reg_type)) { |
702 | /* remember the offset of last byte accessed in ctx */ | 677 | /* remember the offset of last byte accessed in ctx */ |
703 | if (env->prog->aux->max_ctx_offset < off + size) | 678 | if (env->prog->aux->max_ctx_offset < off + size) |
704 | env->prog->aux->max_ctx_offset = off + size; | 679 | env->prog->aux->max_ctx_offset = off + size; |
@@ -798,21 +773,19 @@ static int check_mem_access(struct verifier_env *env, u32 regno, int off, | |||
798 | mark_reg_unknown_value(state->regs, value_regno); | 773 | mark_reg_unknown_value(state->regs, value_regno); |
799 | 774 | ||
800 | } else if (reg->type == PTR_TO_CTX) { | 775 | } else if (reg->type == PTR_TO_CTX) { |
776 | enum bpf_reg_type reg_type = UNKNOWN_VALUE; | ||
777 | |||
801 | if (t == BPF_WRITE && value_regno >= 0 && | 778 | if (t == BPF_WRITE && value_regno >= 0 && |
802 | is_pointer_value(env, value_regno)) { | 779 | is_pointer_value(env, value_regno)) { |
803 | verbose("R%d leaks addr into ctx\n", value_regno); | 780 | verbose("R%d leaks addr into ctx\n", value_regno); |
804 | return -EACCES; | 781 | return -EACCES; |
805 | } | 782 | } |
806 | err = check_ctx_access(env, off, size, t); | 783 | err = check_ctx_access(env, off, size, t, ®_type); |
807 | if (!err && t == BPF_READ && value_regno >= 0) { | 784 | if (!err && t == BPF_READ && value_regno >= 0) { |
808 | mark_reg_unknown_value(state->regs, value_regno); | 785 | mark_reg_unknown_value(state->regs, value_regno); |
809 | if (off == offsetof(struct __sk_buff, data) && | 786 | if (env->allow_ptr_leaks) |
810 | env->allow_ptr_leaks) | ||
811 | /* note that reg.[id|off|range] == 0 */ | 787 | /* note that reg.[id|off|range] == 0 */ |
812 | state->regs[value_regno].type = PTR_TO_PACKET; | 788 | state->regs[value_regno].type = reg_type; |
813 | else if (off == offsetof(struct __sk_buff, data_end) && | ||
814 | env->allow_ptr_leaks) | ||
815 | state->regs[value_regno].type = PTR_TO_PACKET_END; | ||
816 | } | 789 | } |
817 | 790 | ||
818 | } else if (reg->type == FRAME_PTR || reg->type == PTR_TO_STACK) { | 791 | } else if (reg->type == FRAME_PTR || reg->type == PTR_TO_STACK) { |
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 86cb5c6e8932..75c0ff00aca6 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
@@ -837,6 +837,8 @@ static void put_css_set_locked(struct css_set *cset) | |||
837 | 837 | ||
838 | static void put_css_set(struct css_set *cset) | 838 | static void put_css_set(struct css_set *cset) |
839 | { | 839 | { |
840 | unsigned long flags; | ||
841 | |||
840 | /* | 842 | /* |
841 | * Ensure that the refcount doesn't hit zero while any readers | 843 | * Ensure that the refcount doesn't hit zero while any readers |
842 | * can see it. Similar to atomic_dec_and_lock(), but for an | 844 | * can see it. Similar to atomic_dec_and_lock(), but for an |
@@ -845,9 +847,9 @@ static void put_css_set(struct css_set *cset) | |||
845 | if (atomic_add_unless(&cset->refcount, -1, 1)) | 847 | if (atomic_add_unless(&cset->refcount, -1, 1)) |
846 | return; | 848 | return; |
847 | 849 | ||
848 | spin_lock_bh(&css_set_lock); | 850 | spin_lock_irqsave(&css_set_lock, flags); |
849 | put_css_set_locked(cset); | 851 | put_css_set_locked(cset); |
850 | spin_unlock_bh(&css_set_lock); | 852 | spin_unlock_irqrestore(&css_set_lock, flags); |
851 | } | 853 | } |
852 | 854 | ||
853 | /* | 855 | /* |
@@ -1070,11 +1072,11 @@ static struct css_set *find_css_set(struct css_set *old_cset, | |||
1070 | 1072 | ||
1071 | /* First see if we already have a cgroup group that matches | 1073 | /* First see if we already have a cgroup group that matches |
1072 | * the desired set */ | 1074 | * the desired set */ |
1073 | spin_lock_bh(&css_set_lock); | 1075 | spin_lock_irq(&css_set_lock); |
1074 | cset = find_existing_css_set(old_cset, cgrp, template); | 1076 | cset = find_existing_css_set(old_cset, cgrp, template); |
1075 | if (cset) | 1077 | if (cset) |
1076 | get_css_set(cset); | 1078 | get_css_set(cset); |
1077 | spin_unlock_bh(&css_set_lock); | 1079 | spin_unlock_irq(&css_set_lock); |
1078 | 1080 | ||
1079 | if (cset) | 1081 | if (cset) |
1080 | return cset; | 1082 | return cset; |
@@ -1102,7 +1104,7 @@ static struct css_set *find_css_set(struct css_set *old_cset, | |||
1102 | * find_existing_css_set() */ | 1104 | * find_existing_css_set() */ |
1103 | memcpy(cset->subsys, template, sizeof(cset->subsys)); | 1105 | memcpy(cset->subsys, template, sizeof(cset->subsys)); |
1104 | 1106 | ||
1105 | spin_lock_bh(&css_set_lock); | 1107 | spin_lock_irq(&css_set_lock); |
1106 | /* Add reference counts and links from the new css_set. */ | 1108 | /* Add reference counts and links from the new css_set. */ |
1107 | list_for_each_entry(link, &old_cset->cgrp_links, cgrp_link) { | 1109 | list_for_each_entry(link, &old_cset->cgrp_links, cgrp_link) { |
1108 | struct cgroup *c = link->cgrp; | 1110 | struct cgroup *c = link->cgrp; |
@@ -1128,7 +1130,7 @@ static struct css_set *find_css_set(struct css_set *old_cset, | |||
1128 | css_get(css); | 1130 | css_get(css); |
1129 | } | 1131 | } |
1130 | 1132 | ||
1131 | spin_unlock_bh(&css_set_lock); | 1133 | spin_unlock_irq(&css_set_lock); |
1132 | 1134 | ||
1133 | return cset; | 1135 | return cset; |
1134 | } | 1136 | } |
@@ -1192,7 +1194,7 @@ static void cgroup_destroy_root(struct cgroup_root *root) | |||
1192 | * Release all the links from cset_links to this hierarchy's | 1194 | * Release all the links from cset_links to this hierarchy's |
1193 | * root cgroup | 1195 | * root cgroup |
1194 | */ | 1196 | */ |
1195 | spin_lock_bh(&css_set_lock); | 1197 | spin_lock_irq(&css_set_lock); |
1196 | 1198 | ||
1197 | list_for_each_entry_safe(link, tmp_link, &cgrp->cset_links, cset_link) { | 1199 | list_for_each_entry_safe(link, tmp_link, &cgrp->cset_links, cset_link) { |
1198 | list_del(&link->cset_link); | 1200 | list_del(&link->cset_link); |
@@ -1200,7 +1202,7 @@ static void cgroup_destroy_root(struct cgroup_root *root) | |||
1200 | kfree(link); | 1202 | kfree(link); |
1201 | } | 1203 | } |
1202 | 1204 | ||
1203 | spin_unlock_bh(&css_set_lock); | 1205 | spin_unlock_irq(&css_set_lock); |
1204 | 1206 | ||
1205 | if (!list_empty(&root->root_list)) { | 1207 | if (!list_empty(&root->root_list)) { |
1206 | list_del(&root->root_list); | 1208 | list_del(&root->root_list); |
@@ -1600,11 +1602,11 @@ static int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask) | |||
1600 | ss->root = dst_root; | 1602 | ss->root = dst_root; |
1601 | css->cgroup = dcgrp; | 1603 | css->cgroup = dcgrp; |
1602 | 1604 | ||
1603 | spin_lock_bh(&css_set_lock); | 1605 | spin_lock_irq(&css_set_lock); |
1604 | hash_for_each(css_set_table, i, cset, hlist) | 1606 | hash_for_each(css_set_table, i, cset, hlist) |
1605 | list_move_tail(&cset->e_cset_node[ss->id], | 1607 | list_move_tail(&cset->e_cset_node[ss->id], |
1606 | &dcgrp->e_csets[ss->id]); | 1608 | &dcgrp->e_csets[ss->id]); |
1607 | spin_unlock_bh(&css_set_lock); | 1609 | spin_unlock_irq(&css_set_lock); |
1608 | 1610 | ||
1609 | /* default hierarchy doesn't enable controllers by default */ | 1611 | /* default hierarchy doesn't enable controllers by default */ |
1610 | dst_root->subsys_mask |= 1 << ssid; | 1612 | dst_root->subsys_mask |= 1 << ssid; |
@@ -1640,10 +1642,10 @@ static int cgroup_show_path(struct seq_file *sf, struct kernfs_node *kf_node, | |||
1640 | if (!buf) | 1642 | if (!buf) |
1641 | return -ENOMEM; | 1643 | return -ENOMEM; |
1642 | 1644 | ||
1643 | spin_lock_bh(&css_set_lock); | 1645 | spin_lock_irq(&css_set_lock); |
1644 | ns_cgroup = current_cgns_cgroup_from_root(kf_cgroot); | 1646 | ns_cgroup = current_cgns_cgroup_from_root(kf_cgroot); |
1645 | len = kernfs_path_from_node(kf_node, ns_cgroup->kn, buf, PATH_MAX); | 1647 | len = kernfs_path_from_node(kf_node, ns_cgroup->kn, buf, PATH_MAX); |
1646 | spin_unlock_bh(&css_set_lock); | 1648 | spin_unlock_irq(&css_set_lock); |
1647 | 1649 | ||
1648 | if (len >= PATH_MAX) | 1650 | if (len >= PATH_MAX) |
1649 | len = -ERANGE; | 1651 | len = -ERANGE; |
@@ -1897,7 +1899,7 @@ static void cgroup_enable_task_cg_lists(void) | |||
1897 | { | 1899 | { |
1898 | struct task_struct *p, *g; | 1900 | struct task_struct *p, *g; |
1899 | 1901 | ||
1900 | spin_lock_bh(&css_set_lock); | 1902 | spin_lock_irq(&css_set_lock); |
1901 | 1903 | ||
1902 | if (use_task_css_set_links) | 1904 | if (use_task_css_set_links) |
1903 | goto out_unlock; | 1905 | goto out_unlock; |
@@ -1922,8 +1924,12 @@ static void cgroup_enable_task_cg_lists(void) | |||
1922 | * entry won't be deleted though the process has exited. | 1924 | * entry won't be deleted though the process has exited. |
1923 | * Do it while holding siglock so that we don't end up | 1925 | * Do it while holding siglock so that we don't end up |
1924 | * racing against cgroup_exit(). | 1926 | * racing against cgroup_exit(). |
1927 | * | ||
1928 | * Interrupts were already disabled while acquiring | ||
1929 | * the css_set_lock, so we do not need to disable it | ||
1930 | * again when acquiring the sighand->siglock here. | ||
1925 | */ | 1931 | */ |
1926 | spin_lock_irq(&p->sighand->siglock); | 1932 | spin_lock(&p->sighand->siglock); |
1927 | if (!(p->flags & PF_EXITING)) { | 1933 | if (!(p->flags & PF_EXITING)) { |
1928 | struct css_set *cset = task_css_set(p); | 1934 | struct css_set *cset = task_css_set(p); |
1929 | 1935 | ||
@@ -1932,11 +1938,11 @@ static void cgroup_enable_task_cg_lists(void) | |||
1932 | list_add_tail(&p->cg_list, &cset->tasks); | 1938 | list_add_tail(&p->cg_list, &cset->tasks); |
1933 | get_css_set(cset); | 1939 | get_css_set(cset); |
1934 | } | 1940 | } |
1935 | spin_unlock_irq(&p->sighand->siglock); | 1941 | spin_unlock(&p->sighand->siglock); |
1936 | } while_each_thread(g, p); | 1942 | } while_each_thread(g, p); |
1937 | read_unlock(&tasklist_lock); | 1943 | read_unlock(&tasklist_lock); |
1938 | out_unlock: | 1944 | out_unlock: |
1939 | spin_unlock_bh(&css_set_lock); | 1945 | spin_unlock_irq(&css_set_lock); |
1940 | } | 1946 | } |
1941 | 1947 | ||
1942 | static void init_cgroup_housekeeping(struct cgroup *cgrp) | 1948 | static void init_cgroup_housekeeping(struct cgroup *cgrp) |
@@ -2043,13 +2049,13 @@ static int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask) | |||
2043 | * Link the root cgroup in this hierarchy into all the css_set | 2049 | * Link the root cgroup in this hierarchy into all the css_set |
2044 | * objects. | 2050 | * objects. |
2045 | */ | 2051 | */ |
2046 | spin_lock_bh(&css_set_lock); | 2052 | spin_lock_irq(&css_set_lock); |
2047 | hash_for_each(css_set_table, i, cset, hlist) { | 2053 | hash_for_each(css_set_table, i, cset, hlist) { |
2048 | link_css_set(&tmp_links, cset, root_cgrp); | 2054 | link_css_set(&tmp_links, cset, root_cgrp); |
2049 | if (css_set_populated(cset)) | 2055 | if (css_set_populated(cset)) |
2050 | cgroup_update_populated(root_cgrp, true); | 2056 | cgroup_update_populated(root_cgrp, true); |
2051 | } | 2057 | } |
2052 | spin_unlock_bh(&css_set_lock); | 2058 | spin_unlock_irq(&css_set_lock); |
2053 | 2059 | ||
2054 | BUG_ON(!list_empty(&root_cgrp->self.children)); | 2060 | BUG_ON(!list_empty(&root_cgrp->self.children)); |
2055 | BUG_ON(atomic_read(&root->nr_cgrps) != 1); | 2061 | BUG_ON(atomic_read(&root->nr_cgrps) != 1); |
@@ -2256,11 +2262,11 @@ out_mount: | |||
2256 | struct cgroup *cgrp; | 2262 | struct cgroup *cgrp; |
2257 | 2263 | ||
2258 | mutex_lock(&cgroup_mutex); | 2264 | mutex_lock(&cgroup_mutex); |
2259 | spin_lock_bh(&css_set_lock); | 2265 | spin_lock_irq(&css_set_lock); |
2260 | 2266 | ||
2261 | cgrp = cset_cgroup_from_root(ns->root_cset, root); | 2267 | cgrp = cset_cgroup_from_root(ns->root_cset, root); |
2262 | 2268 | ||
2263 | spin_unlock_bh(&css_set_lock); | 2269 | spin_unlock_irq(&css_set_lock); |
2264 | mutex_unlock(&cgroup_mutex); | 2270 | mutex_unlock(&cgroup_mutex); |
2265 | 2271 | ||
2266 | nsdentry = kernfs_node_dentry(cgrp->kn, dentry->d_sb); | 2272 | nsdentry = kernfs_node_dentry(cgrp->kn, dentry->d_sb); |
@@ -2337,11 +2343,11 @@ char *cgroup_path_ns(struct cgroup *cgrp, char *buf, size_t buflen, | |||
2337 | char *ret; | 2343 | char *ret; |
2338 | 2344 | ||
2339 | mutex_lock(&cgroup_mutex); | 2345 | mutex_lock(&cgroup_mutex); |
2340 | spin_lock_bh(&css_set_lock); | 2346 | spin_lock_irq(&css_set_lock); |
2341 | 2347 | ||
2342 | ret = cgroup_path_ns_locked(cgrp, buf, buflen, ns); | 2348 | ret = cgroup_path_ns_locked(cgrp, buf, buflen, ns); |
2343 | 2349 | ||
2344 | spin_unlock_bh(&css_set_lock); | 2350 | spin_unlock_irq(&css_set_lock); |
2345 | mutex_unlock(&cgroup_mutex); | 2351 | mutex_unlock(&cgroup_mutex); |
2346 | 2352 | ||
2347 | return ret; | 2353 | return ret; |
@@ -2369,7 +2375,7 @@ char *task_cgroup_path(struct task_struct *task, char *buf, size_t buflen) | |||
2369 | char *path = NULL; | 2375 | char *path = NULL; |
2370 | 2376 | ||
2371 | mutex_lock(&cgroup_mutex); | 2377 | mutex_lock(&cgroup_mutex); |
2372 | spin_lock_bh(&css_set_lock); | 2378 | spin_lock_irq(&css_set_lock); |
2373 | 2379 | ||
2374 | root = idr_get_next(&cgroup_hierarchy_idr, &hierarchy_id); | 2380 | root = idr_get_next(&cgroup_hierarchy_idr, &hierarchy_id); |
2375 | 2381 | ||
@@ -2382,7 +2388,7 @@ char *task_cgroup_path(struct task_struct *task, char *buf, size_t buflen) | |||
2382 | path = buf; | 2388 | path = buf; |
2383 | } | 2389 | } |
2384 | 2390 | ||
2385 | spin_unlock_bh(&css_set_lock); | 2391 | spin_unlock_irq(&css_set_lock); |
2386 | mutex_unlock(&cgroup_mutex); | 2392 | mutex_unlock(&cgroup_mutex); |
2387 | return path; | 2393 | return path; |
2388 | } | 2394 | } |
@@ -2557,7 +2563,7 @@ static int cgroup_taskset_migrate(struct cgroup_taskset *tset, | |||
2557 | * the new cgroup. There are no failure cases after here, so this | 2563 | * the new cgroup. There are no failure cases after here, so this |
2558 | * is the commit point. | 2564 | * is the commit point. |
2559 | */ | 2565 | */ |
2560 | spin_lock_bh(&css_set_lock); | 2566 | spin_lock_irq(&css_set_lock); |
2561 | list_for_each_entry(cset, &tset->src_csets, mg_node) { | 2567 | list_for_each_entry(cset, &tset->src_csets, mg_node) { |
2562 | list_for_each_entry_safe(task, tmp_task, &cset->mg_tasks, cg_list) { | 2568 | list_for_each_entry_safe(task, tmp_task, &cset->mg_tasks, cg_list) { |
2563 | struct css_set *from_cset = task_css_set(task); | 2569 | struct css_set *from_cset = task_css_set(task); |
@@ -2568,7 +2574,7 @@ static int cgroup_taskset_migrate(struct cgroup_taskset *tset, | |||
2568 | put_css_set_locked(from_cset); | 2574 | put_css_set_locked(from_cset); |
2569 | } | 2575 | } |
2570 | } | 2576 | } |
2571 | spin_unlock_bh(&css_set_lock); | 2577 | spin_unlock_irq(&css_set_lock); |
2572 | 2578 | ||
2573 | /* | 2579 | /* |
2574 | * Migration is committed, all target tasks are now on dst_csets. | 2580 | * Migration is committed, all target tasks are now on dst_csets. |
@@ -2597,13 +2603,13 @@ out_cancel_attach: | |||
2597 | } | 2603 | } |
2598 | } while_each_subsys_mask(); | 2604 | } while_each_subsys_mask(); |
2599 | out_release_tset: | 2605 | out_release_tset: |
2600 | spin_lock_bh(&css_set_lock); | 2606 | spin_lock_irq(&css_set_lock); |
2601 | list_splice_init(&tset->dst_csets, &tset->src_csets); | 2607 | list_splice_init(&tset->dst_csets, &tset->src_csets); |
2602 | list_for_each_entry_safe(cset, tmp_cset, &tset->src_csets, mg_node) { | 2608 | list_for_each_entry_safe(cset, tmp_cset, &tset->src_csets, mg_node) { |
2603 | list_splice_tail_init(&cset->mg_tasks, &cset->tasks); | 2609 | list_splice_tail_init(&cset->mg_tasks, &cset->tasks); |
2604 | list_del_init(&cset->mg_node); | 2610 | list_del_init(&cset->mg_node); |
2605 | } | 2611 | } |
2606 | spin_unlock_bh(&css_set_lock); | 2612 | spin_unlock_irq(&css_set_lock); |
2607 | return ret; | 2613 | return ret; |
2608 | } | 2614 | } |
2609 | 2615 | ||
@@ -2634,7 +2640,7 @@ static void cgroup_migrate_finish(struct list_head *preloaded_csets) | |||
2634 | 2640 | ||
2635 | lockdep_assert_held(&cgroup_mutex); | 2641 | lockdep_assert_held(&cgroup_mutex); |
2636 | 2642 | ||
2637 | spin_lock_bh(&css_set_lock); | 2643 | spin_lock_irq(&css_set_lock); |
2638 | list_for_each_entry_safe(cset, tmp_cset, preloaded_csets, mg_preload_node) { | 2644 | list_for_each_entry_safe(cset, tmp_cset, preloaded_csets, mg_preload_node) { |
2639 | cset->mg_src_cgrp = NULL; | 2645 | cset->mg_src_cgrp = NULL; |
2640 | cset->mg_dst_cgrp = NULL; | 2646 | cset->mg_dst_cgrp = NULL; |
@@ -2642,7 +2648,7 @@ static void cgroup_migrate_finish(struct list_head *preloaded_csets) | |||
2642 | list_del_init(&cset->mg_preload_node); | 2648 | list_del_init(&cset->mg_preload_node); |
2643 | put_css_set_locked(cset); | 2649 | put_css_set_locked(cset); |
2644 | } | 2650 | } |
2645 | spin_unlock_bh(&css_set_lock); | 2651 | spin_unlock_irq(&css_set_lock); |
2646 | } | 2652 | } |
2647 | 2653 | ||
2648 | /** | 2654 | /** |
@@ -2783,7 +2789,7 @@ static int cgroup_migrate(struct task_struct *leader, bool threadgroup, | |||
2783 | * already PF_EXITING could be freed from underneath us unless we | 2789 | * already PF_EXITING could be freed from underneath us unless we |
2784 | * take an rcu_read_lock. | 2790 | * take an rcu_read_lock. |
2785 | */ | 2791 | */ |
2786 | spin_lock_bh(&css_set_lock); | 2792 | spin_lock_irq(&css_set_lock); |
2787 | rcu_read_lock(); | 2793 | rcu_read_lock(); |
2788 | task = leader; | 2794 | task = leader; |
2789 | do { | 2795 | do { |
@@ -2792,7 +2798,7 @@ static int cgroup_migrate(struct task_struct *leader, bool threadgroup, | |||
2792 | break; | 2798 | break; |
2793 | } while_each_thread(leader, task); | 2799 | } while_each_thread(leader, task); |
2794 | rcu_read_unlock(); | 2800 | rcu_read_unlock(); |
2795 | spin_unlock_bh(&css_set_lock); | 2801 | spin_unlock_irq(&css_set_lock); |
2796 | 2802 | ||
2797 | return cgroup_taskset_migrate(&tset, root); | 2803 | return cgroup_taskset_migrate(&tset, root); |
2798 | } | 2804 | } |
@@ -2816,7 +2822,7 @@ static int cgroup_attach_task(struct cgroup *dst_cgrp, | |||
2816 | return -EBUSY; | 2822 | return -EBUSY; |
2817 | 2823 | ||
2818 | /* look up all src csets */ | 2824 | /* look up all src csets */ |
2819 | spin_lock_bh(&css_set_lock); | 2825 | spin_lock_irq(&css_set_lock); |
2820 | rcu_read_lock(); | 2826 | rcu_read_lock(); |
2821 | task = leader; | 2827 | task = leader; |
2822 | do { | 2828 | do { |
@@ -2826,7 +2832,7 @@ static int cgroup_attach_task(struct cgroup *dst_cgrp, | |||
2826 | break; | 2832 | break; |
2827 | } while_each_thread(leader, task); | 2833 | } while_each_thread(leader, task); |
2828 | rcu_read_unlock(); | 2834 | rcu_read_unlock(); |
2829 | spin_unlock_bh(&css_set_lock); | 2835 | spin_unlock_irq(&css_set_lock); |
2830 | 2836 | ||
2831 | /* prepare dst csets and commit */ | 2837 | /* prepare dst csets and commit */ |
2832 | ret = cgroup_migrate_prepare_dst(&preloaded_csets); | 2838 | ret = cgroup_migrate_prepare_dst(&preloaded_csets); |
@@ -2859,9 +2865,9 @@ static int cgroup_procs_write_permission(struct task_struct *task, | |||
2859 | struct cgroup *cgrp; | 2865 | struct cgroup *cgrp; |
2860 | struct inode *inode; | 2866 | struct inode *inode; |
2861 | 2867 | ||
2862 | spin_lock_bh(&css_set_lock); | 2868 | spin_lock_irq(&css_set_lock); |
2863 | cgrp = task_cgroup_from_root(task, &cgrp_dfl_root); | 2869 | cgrp = task_cgroup_from_root(task, &cgrp_dfl_root); |
2864 | spin_unlock_bh(&css_set_lock); | 2870 | spin_unlock_irq(&css_set_lock); |
2865 | 2871 | ||
2866 | while (!cgroup_is_descendant(dst_cgrp, cgrp)) | 2872 | while (!cgroup_is_descendant(dst_cgrp, cgrp)) |
2867 | cgrp = cgroup_parent(cgrp); | 2873 | cgrp = cgroup_parent(cgrp); |
@@ -2962,9 +2968,9 @@ int cgroup_attach_task_all(struct task_struct *from, struct task_struct *tsk) | |||
2962 | if (root == &cgrp_dfl_root) | 2968 | if (root == &cgrp_dfl_root) |
2963 | continue; | 2969 | continue; |
2964 | 2970 | ||
2965 | spin_lock_bh(&css_set_lock); | 2971 | spin_lock_irq(&css_set_lock); |
2966 | from_cgrp = task_cgroup_from_root(from, root); | 2972 | from_cgrp = task_cgroup_from_root(from, root); |
2967 | spin_unlock_bh(&css_set_lock); | 2973 | spin_unlock_irq(&css_set_lock); |
2968 | 2974 | ||
2969 | retval = cgroup_attach_task(from_cgrp, tsk, false); | 2975 | retval = cgroup_attach_task(from_cgrp, tsk, false); |
2970 | if (retval) | 2976 | if (retval) |
@@ -3080,7 +3086,7 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp) | |||
3080 | percpu_down_write(&cgroup_threadgroup_rwsem); | 3086 | percpu_down_write(&cgroup_threadgroup_rwsem); |
3081 | 3087 | ||
3082 | /* look up all csses currently attached to @cgrp's subtree */ | 3088 | /* look up all csses currently attached to @cgrp's subtree */ |
3083 | spin_lock_bh(&css_set_lock); | 3089 | spin_lock_irq(&css_set_lock); |
3084 | cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp) { | 3090 | cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp) { |
3085 | struct cgrp_cset_link *link; | 3091 | struct cgrp_cset_link *link; |
3086 | 3092 | ||
@@ -3088,14 +3094,14 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp) | |||
3088 | cgroup_migrate_add_src(link->cset, dsct, | 3094 | cgroup_migrate_add_src(link->cset, dsct, |
3089 | &preloaded_csets); | 3095 | &preloaded_csets); |
3090 | } | 3096 | } |
3091 | spin_unlock_bh(&css_set_lock); | 3097 | spin_unlock_irq(&css_set_lock); |
3092 | 3098 | ||
3093 | /* NULL dst indicates self on default hierarchy */ | 3099 | /* NULL dst indicates self on default hierarchy */ |
3094 | ret = cgroup_migrate_prepare_dst(&preloaded_csets); | 3100 | ret = cgroup_migrate_prepare_dst(&preloaded_csets); |
3095 | if (ret) | 3101 | if (ret) |
3096 | goto out_finish; | 3102 | goto out_finish; |
3097 | 3103 | ||
3098 | spin_lock_bh(&css_set_lock); | 3104 | spin_lock_irq(&css_set_lock); |
3099 | list_for_each_entry(src_cset, &preloaded_csets, mg_preload_node) { | 3105 | list_for_each_entry(src_cset, &preloaded_csets, mg_preload_node) { |
3100 | struct task_struct *task, *ntask; | 3106 | struct task_struct *task, *ntask; |
3101 | 3107 | ||
@@ -3107,7 +3113,7 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp) | |||
3107 | list_for_each_entry_safe(task, ntask, &src_cset->tasks, cg_list) | 3113 | list_for_each_entry_safe(task, ntask, &src_cset->tasks, cg_list) |
3108 | cgroup_taskset_add(task, &tset); | 3114 | cgroup_taskset_add(task, &tset); |
3109 | } | 3115 | } |
3110 | spin_unlock_bh(&css_set_lock); | 3116 | spin_unlock_irq(&css_set_lock); |
3111 | 3117 | ||
3112 | ret = cgroup_taskset_migrate(&tset, cgrp->root); | 3118 | ret = cgroup_taskset_migrate(&tset, cgrp->root); |
3113 | out_finish: | 3119 | out_finish: |
@@ -3908,10 +3914,10 @@ static int cgroup_task_count(const struct cgroup *cgrp) | |||
3908 | int count = 0; | 3914 | int count = 0; |
3909 | struct cgrp_cset_link *link; | 3915 | struct cgrp_cset_link *link; |
3910 | 3916 | ||
3911 | spin_lock_bh(&css_set_lock); | 3917 | spin_lock_irq(&css_set_lock); |
3912 | list_for_each_entry(link, &cgrp->cset_links, cset_link) | 3918 | list_for_each_entry(link, &cgrp->cset_links, cset_link) |
3913 | count += atomic_read(&link->cset->refcount); | 3919 | count += atomic_read(&link->cset->refcount); |
3914 | spin_unlock_bh(&css_set_lock); | 3920 | spin_unlock_irq(&css_set_lock); |
3915 | return count; | 3921 | return count; |
3916 | } | 3922 | } |
3917 | 3923 | ||
@@ -4249,7 +4255,7 @@ void css_task_iter_start(struct cgroup_subsys_state *css, | |||
4249 | 4255 | ||
4250 | memset(it, 0, sizeof(*it)); | 4256 | memset(it, 0, sizeof(*it)); |
4251 | 4257 | ||
4252 | spin_lock_bh(&css_set_lock); | 4258 | spin_lock_irq(&css_set_lock); |
4253 | 4259 | ||
4254 | it->ss = css->ss; | 4260 | it->ss = css->ss; |
4255 | 4261 | ||
@@ -4262,7 +4268,7 @@ void css_task_iter_start(struct cgroup_subsys_state *css, | |||
4262 | 4268 | ||
4263 | css_task_iter_advance_css_set(it); | 4269 | css_task_iter_advance_css_set(it); |
4264 | 4270 | ||
4265 | spin_unlock_bh(&css_set_lock); | 4271 | spin_unlock_irq(&css_set_lock); |
4266 | } | 4272 | } |
4267 | 4273 | ||
4268 | /** | 4274 | /** |
@@ -4280,7 +4286,7 @@ struct task_struct *css_task_iter_next(struct css_task_iter *it) | |||
4280 | it->cur_task = NULL; | 4286 | it->cur_task = NULL; |
4281 | } | 4287 | } |
4282 | 4288 | ||
4283 | spin_lock_bh(&css_set_lock); | 4289 | spin_lock_irq(&css_set_lock); |
4284 | 4290 | ||
4285 | if (it->task_pos) { | 4291 | if (it->task_pos) { |
4286 | it->cur_task = list_entry(it->task_pos, struct task_struct, | 4292 | it->cur_task = list_entry(it->task_pos, struct task_struct, |
@@ -4289,7 +4295,7 @@ struct task_struct *css_task_iter_next(struct css_task_iter *it) | |||
4289 | css_task_iter_advance(it); | 4295 | css_task_iter_advance(it); |
4290 | } | 4296 | } |
4291 | 4297 | ||
4292 | spin_unlock_bh(&css_set_lock); | 4298 | spin_unlock_irq(&css_set_lock); |
4293 | 4299 | ||
4294 | return it->cur_task; | 4300 | return it->cur_task; |
4295 | } | 4301 | } |
@@ -4303,10 +4309,10 @@ struct task_struct *css_task_iter_next(struct css_task_iter *it) | |||
4303 | void css_task_iter_end(struct css_task_iter *it) | 4309 | void css_task_iter_end(struct css_task_iter *it) |
4304 | { | 4310 | { |
4305 | if (it->cur_cset) { | 4311 | if (it->cur_cset) { |
4306 | spin_lock_bh(&css_set_lock); | 4312 | spin_lock_irq(&css_set_lock); |
4307 | list_del(&it->iters_node); | 4313 | list_del(&it->iters_node); |
4308 | put_css_set_locked(it->cur_cset); | 4314 | put_css_set_locked(it->cur_cset); |
4309 | spin_unlock_bh(&css_set_lock); | 4315 | spin_unlock_irq(&css_set_lock); |
4310 | } | 4316 | } |
4311 | 4317 | ||
4312 | if (it->cur_task) | 4318 | if (it->cur_task) |
@@ -4338,10 +4344,10 @@ int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from) | |||
4338 | mutex_lock(&cgroup_mutex); | 4344 | mutex_lock(&cgroup_mutex); |
4339 | 4345 | ||
4340 | /* all tasks in @from are being moved, all csets are source */ | 4346 | /* all tasks in @from are being moved, all csets are source */ |
4341 | spin_lock_bh(&css_set_lock); | 4347 | spin_lock_irq(&css_set_lock); |
4342 | list_for_each_entry(link, &from->cset_links, cset_link) | 4348 | list_for_each_entry(link, &from->cset_links, cset_link) |
4343 | cgroup_migrate_add_src(link->cset, to, &preloaded_csets); | 4349 | cgroup_migrate_add_src(link->cset, to, &preloaded_csets); |
4344 | spin_unlock_bh(&css_set_lock); | 4350 | spin_unlock_irq(&css_set_lock); |
4345 | 4351 | ||
4346 | ret = cgroup_migrate_prepare_dst(&preloaded_csets); | 4352 | ret = cgroup_migrate_prepare_dst(&preloaded_csets); |
4347 | if (ret) | 4353 | if (ret) |
@@ -5063,6 +5069,7 @@ static void init_and_link_css(struct cgroup_subsys_state *css, | |||
5063 | memset(css, 0, sizeof(*css)); | 5069 | memset(css, 0, sizeof(*css)); |
5064 | css->cgroup = cgrp; | 5070 | css->cgroup = cgrp; |
5065 | css->ss = ss; | 5071 | css->ss = ss; |
5072 | css->id = -1; | ||
5066 | INIT_LIST_HEAD(&css->sibling); | 5073 | INIT_LIST_HEAD(&css->sibling); |
5067 | INIT_LIST_HEAD(&css->children); | 5074 | INIT_LIST_HEAD(&css->children); |
5068 | css->serial_nr = css_serial_nr_next++; | 5075 | css->serial_nr = css_serial_nr_next++; |
@@ -5150,7 +5157,7 @@ static struct cgroup_subsys_state *css_create(struct cgroup *cgrp, | |||
5150 | 5157 | ||
5151 | err = cgroup_idr_alloc(&ss->css_idr, NULL, 2, 0, GFP_KERNEL); | 5158 | err = cgroup_idr_alloc(&ss->css_idr, NULL, 2, 0, GFP_KERNEL); |
5152 | if (err < 0) | 5159 | if (err < 0) |
5153 | goto err_free_percpu_ref; | 5160 | goto err_free_css; |
5154 | css->id = err; | 5161 | css->id = err; |
5155 | 5162 | ||
5156 | /* @css is ready to be brought online now, make it visible */ | 5163 | /* @css is ready to be brought online now, make it visible */ |
@@ -5174,9 +5181,6 @@ static struct cgroup_subsys_state *css_create(struct cgroup *cgrp, | |||
5174 | 5181 | ||
5175 | err_list_del: | 5182 | err_list_del: |
5176 | list_del_rcu(&css->sibling); | 5183 | list_del_rcu(&css->sibling); |
5177 | cgroup_idr_remove(&ss->css_idr, css->id); | ||
5178 | err_free_percpu_ref: | ||
5179 | percpu_ref_exit(&css->refcnt); | ||
5180 | err_free_css: | 5184 | err_free_css: |
5181 | call_rcu(&css->rcu_head, css_free_rcu_fn); | 5185 | call_rcu(&css->rcu_head, css_free_rcu_fn); |
5182 | return ERR_PTR(err); | 5186 | return ERR_PTR(err); |
@@ -5451,10 +5455,10 @@ static int cgroup_destroy_locked(struct cgroup *cgrp) | |||
5451 | */ | 5455 | */ |
5452 | cgrp->self.flags &= ~CSS_ONLINE; | 5456 | cgrp->self.flags &= ~CSS_ONLINE; |
5453 | 5457 | ||
5454 | spin_lock_bh(&css_set_lock); | 5458 | spin_lock_irq(&css_set_lock); |
5455 | list_for_each_entry(link, &cgrp->cset_links, cset_link) | 5459 | list_for_each_entry(link, &cgrp->cset_links, cset_link) |
5456 | link->cset->dead = true; | 5460 | link->cset->dead = true; |
5457 | spin_unlock_bh(&css_set_lock); | 5461 | spin_unlock_irq(&css_set_lock); |
5458 | 5462 | ||
5459 | /* initiate massacre of all css's */ | 5463 | /* initiate massacre of all css's */ |
5460 | for_each_css(css, ssid, cgrp) | 5464 | for_each_css(css, ssid, cgrp) |
@@ -5725,7 +5729,7 @@ int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns, | |||
5725 | goto out; | 5729 | goto out; |
5726 | 5730 | ||
5727 | mutex_lock(&cgroup_mutex); | 5731 | mutex_lock(&cgroup_mutex); |
5728 | spin_lock_bh(&css_set_lock); | 5732 | spin_lock_irq(&css_set_lock); |
5729 | 5733 | ||
5730 | for_each_root(root) { | 5734 | for_each_root(root) { |
5731 | struct cgroup_subsys *ss; | 5735 | struct cgroup_subsys *ss; |
@@ -5778,7 +5782,7 @@ int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns, | |||
5778 | 5782 | ||
5779 | retval = 0; | 5783 | retval = 0; |
5780 | out_unlock: | 5784 | out_unlock: |
5781 | spin_unlock_bh(&css_set_lock); | 5785 | spin_unlock_irq(&css_set_lock); |
5782 | mutex_unlock(&cgroup_mutex); | 5786 | mutex_unlock(&cgroup_mutex); |
5783 | kfree(buf); | 5787 | kfree(buf); |
5784 | out: | 5788 | out: |
@@ -5923,13 +5927,13 @@ void cgroup_post_fork(struct task_struct *child) | |||
5923 | if (use_task_css_set_links) { | 5927 | if (use_task_css_set_links) { |
5924 | struct css_set *cset; | 5928 | struct css_set *cset; |
5925 | 5929 | ||
5926 | spin_lock_bh(&css_set_lock); | 5930 | spin_lock_irq(&css_set_lock); |
5927 | cset = task_css_set(current); | 5931 | cset = task_css_set(current); |
5928 | if (list_empty(&child->cg_list)) { | 5932 | if (list_empty(&child->cg_list)) { |
5929 | get_css_set(cset); | 5933 | get_css_set(cset); |
5930 | css_set_move_task(child, NULL, cset, false); | 5934 | css_set_move_task(child, NULL, cset, false); |
5931 | } | 5935 | } |
5932 | spin_unlock_bh(&css_set_lock); | 5936 | spin_unlock_irq(&css_set_lock); |
5933 | } | 5937 | } |
5934 | 5938 | ||
5935 | /* | 5939 | /* |
@@ -5974,9 +5978,9 @@ void cgroup_exit(struct task_struct *tsk) | |||
5974 | cset = task_css_set(tsk); | 5978 | cset = task_css_set(tsk); |
5975 | 5979 | ||
5976 | if (!list_empty(&tsk->cg_list)) { | 5980 | if (!list_empty(&tsk->cg_list)) { |
5977 | spin_lock_bh(&css_set_lock); | 5981 | spin_lock_irq(&css_set_lock); |
5978 | css_set_move_task(tsk, cset, NULL, false); | 5982 | css_set_move_task(tsk, cset, NULL, false); |
5979 | spin_unlock_bh(&css_set_lock); | 5983 | spin_unlock_irq(&css_set_lock); |
5980 | } else { | 5984 | } else { |
5981 | get_css_set(cset); | 5985 | get_css_set(cset); |
5982 | } | 5986 | } |
@@ -6044,9 +6048,9 @@ static void cgroup_release_agent(struct work_struct *work) | |||
6044 | if (!pathbuf || !agentbuf) | 6048 | if (!pathbuf || !agentbuf) |
6045 | goto out; | 6049 | goto out; |
6046 | 6050 | ||
6047 | spin_lock_bh(&css_set_lock); | 6051 | spin_lock_irq(&css_set_lock); |
6048 | path = cgroup_path_ns_locked(cgrp, pathbuf, PATH_MAX, &init_cgroup_ns); | 6052 | path = cgroup_path_ns_locked(cgrp, pathbuf, PATH_MAX, &init_cgroup_ns); |
6049 | spin_unlock_bh(&css_set_lock); | 6053 | spin_unlock_irq(&css_set_lock); |
6050 | if (!path) | 6054 | if (!path) |
6051 | goto out; | 6055 | goto out; |
6052 | 6056 | ||
@@ -6306,12 +6310,12 @@ struct cgroup_namespace *copy_cgroup_ns(unsigned long flags, | |||
6306 | return ERR_PTR(-EPERM); | 6310 | return ERR_PTR(-EPERM); |
6307 | 6311 | ||
6308 | mutex_lock(&cgroup_mutex); | 6312 | mutex_lock(&cgroup_mutex); |
6309 | spin_lock_bh(&css_set_lock); | 6313 | spin_lock_irq(&css_set_lock); |
6310 | 6314 | ||
6311 | cset = task_css_set(current); | 6315 | cset = task_css_set(current); |
6312 | get_css_set(cset); | 6316 | get_css_set(cset); |
6313 | 6317 | ||
6314 | spin_unlock_bh(&css_set_lock); | 6318 | spin_unlock_irq(&css_set_lock); |
6315 | mutex_unlock(&cgroup_mutex); | 6319 | mutex_unlock(&cgroup_mutex); |
6316 | 6320 | ||
6317 | new_ns = alloc_cgroup_ns(); | 6321 | new_ns = alloc_cgroup_ns(); |
@@ -6435,7 +6439,7 @@ static int current_css_set_cg_links_read(struct seq_file *seq, void *v) | |||
6435 | if (!name_buf) | 6439 | if (!name_buf) |
6436 | return -ENOMEM; | 6440 | return -ENOMEM; |
6437 | 6441 | ||
6438 | spin_lock_bh(&css_set_lock); | 6442 | spin_lock_irq(&css_set_lock); |
6439 | rcu_read_lock(); | 6443 | rcu_read_lock(); |
6440 | cset = rcu_dereference(current->cgroups); | 6444 | cset = rcu_dereference(current->cgroups); |
6441 | list_for_each_entry(link, &cset->cgrp_links, cgrp_link) { | 6445 | list_for_each_entry(link, &cset->cgrp_links, cgrp_link) { |
@@ -6446,7 +6450,7 @@ static int current_css_set_cg_links_read(struct seq_file *seq, void *v) | |||
6446 | c->root->hierarchy_id, name_buf); | 6450 | c->root->hierarchy_id, name_buf); |
6447 | } | 6451 | } |
6448 | rcu_read_unlock(); | 6452 | rcu_read_unlock(); |
6449 | spin_unlock_bh(&css_set_lock); | 6453 | spin_unlock_irq(&css_set_lock); |
6450 | kfree(name_buf); | 6454 | kfree(name_buf); |
6451 | return 0; | 6455 | return 0; |
6452 | } | 6456 | } |
@@ -6457,7 +6461,7 @@ static int cgroup_css_links_read(struct seq_file *seq, void *v) | |||
6457 | struct cgroup_subsys_state *css = seq_css(seq); | 6461 | struct cgroup_subsys_state *css = seq_css(seq); |
6458 | struct cgrp_cset_link *link; | 6462 | struct cgrp_cset_link *link; |
6459 | 6463 | ||
6460 | spin_lock_bh(&css_set_lock); | 6464 | spin_lock_irq(&css_set_lock); |
6461 | list_for_each_entry(link, &css->cgroup->cset_links, cset_link) { | 6465 | list_for_each_entry(link, &css->cgroup->cset_links, cset_link) { |
6462 | struct css_set *cset = link->cset; | 6466 | struct css_set *cset = link->cset; |
6463 | struct task_struct *task; | 6467 | struct task_struct *task; |
@@ -6480,7 +6484,7 @@ static int cgroup_css_links_read(struct seq_file *seq, void *v) | |||
6480 | overflow: | 6484 | overflow: |
6481 | seq_puts(seq, " ...\n"); | 6485 | seq_puts(seq, " ...\n"); |
6482 | } | 6486 | } |
6483 | spin_unlock_bh(&css_set_lock); | 6487 | spin_unlock_irq(&css_set_lock); |
6484 | return 0; | 6488 | return 0; |
6485 | } | 6489 | } |
6486 | 6490 | ||
diff --git a/kernel/events/core.c b/kernel/events/core.c index 274450efea90..85cd41878a74 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c | |||
@@ -3862,10 +3862,8 @@ static void _free_event(struct perf_event *event) | |||
3862 | if (event->ctx) | 3862 | if (event->ctx) |
3863 | put_ctx(event->ctx); | 3863 | put_ctx(event->ctx); |
3864 | 3864 | ||
3865 | if (event->pmu) { | 3865 | exclusive_event_destroy(event); |
3866 | exclusive_event_destroy(event); | 3866 | module_put(event->pmu->module); |
3867 | module_put(event->pmu->module); | ||
3868 | } | ||
3869 | 3867 | ||
3870 | call_rcu(&event->rcu_head, free_event_rcu); | 3868 | call_rcu(&event->rcu_head, free_event_rcu); |
3871 | } | 3869 | } |
@@ -7531,7 +7529,7 @@ static void perf_event_free_bpf_prog(struct perf_event *event) | |||
7531 | prog = event->tp_event->prog; | 7529 | prog = event->tp_event->prog; |
7532 | if (prog) { | 7530 | if (prog) { |
7533 | event->tp_event->prog = NULL; | 7531 | event->tp_event->prog = NULL; |
7534 | bpf_prog_put(prog); | 7532 | bpf_prog_put_rcu(prog); |
7535 | } | 7533 | } |
7536 | } | 7534 | } |
7537 | 7535 | ||
diff --git a/kernel/fork.c b/kernel/fork.c index 5c2c355aa97f..4a7ec0c6c88c 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -148,18 +148,18 @@ static inline void free_task_struct(struct task_struct *tsk) | |||
148 | } | 148 | } |
149 | #endif | 149 | #endif |
150 | 150 | ||
151 | void __weak arch_release_thread_info(struct thread_info *ti) | 151 | void __weak arch_release_thread_stack(unsigned long *stack) |
152 | { | 152 | { |
153 | } | 153 | } |
154 | 154 | ||
155 | #ifndef CONFIG_ARCH_THREAD_INFO_ALLOCATOR | 155 | #ifndef CONFIG_ARCH_THREAD_STACK_ALLOCATOR |
156 | 156 | ||
157 | /* | 157 | /* |
158 | * Allocate pages if THREAD_SIZE is >= PAGE_SIZE, otherwise use a | 158 | * Allocate pages if THREAD_SIZE is >= PAGE_SIZE, otherwise use a |
159 | * kmemcache based allocator. | 159 | * kmemcache based allocator. |
160 | */ | 160 | */ |
161 | # if THREAD_SIZE >= PAGE_SIZE | 161 | # if THREAD_SIZE >= PAGE_SIZE |
162 | static struct thread_info *alloc_thread_info_node(struct task_struct *tsk, | 162 | static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, |
163 | int node) | 163 | int node) |
164 | { | 164 | { |
165 | struct page *page = alloc_kmem_pages_node(node, THREADINFO_GFP, | 165 | struct page *page = alloc_kmem_pages_node(node, THREADINFO_GFP, |
@@ -172,33 +172,33 @@ static struct thread_info *alloc_thread_info_node(struct task_struct *tsk, | |||
172 | return page ? page_address(page) : NULL; | 172 | return page ? page_address(page) : NULL; |
173 | } | 173 | } |
174 | 174 | ||
175 | static inline void free_thread_info(struct thread_info *ti) | 175 | static inline void free_thread_stack(unsigned long *stack) |
176 | { | 176 | { |
177 | struct page *page = virt_to_page(ti); | 177 | struct page *page = virt_to_page(stack); |
178 | 178 | ||
179 | memcg_kmem_update_page_stat(page, MEMCG_KERNEL_STACK, | 179 | memcg_kmem_update_page_stat(page, MEMCG_KERNEL_STACK, |
180 | -(1 << THREAD_SIZE_ORDER)); | 180 | -(1 << THREAD_SIZE_ORDER)); |
181 | __free_kmem_pages(page, THREAD_SIZE_ORDER); | 181 | __free_kmem_pages(page, THREAD_SIZE_ORDER); |
182 | } | 182 | } |
183 | # else | 183 | # else |
184 | static struct kmem_cache *thread_info_cache; | 184 | static struct kmem_cache *thread_stack_cache; |
185 | 185 | ||
186 | static struct thread_info *alloc_thread_info_node(struct task_struct *tsk, | 186 | static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, |
187 | int node) | 187 | int node) |
188 | { | 188 | { |
189 | return kmem_cache_alloc_node(thread_info_cache, THREADINFO_GFP, node); | 189 | return kmem_cache_alloc_node(thread_stack_cache, THREADINFO_GFP, node); |
190 | } | 190 | } |
191 | 191 | ||
192 | static void free_thread_info(struct thread_info *ti) | 192 | static void free_thread_stack(unsigned long *stack) |
193 | { | 193 | { |
194 | kmem_cache_free(thread_info_cache, ti); | 194 | kmem_cache_free(thread_stack_cache, stack); |
195 | } | 195 | } |
196 | 196 | ||
197 | void thread_info_cache_init(void) | 197 | void thread_stack_cache_init(void) |
198 | { | 198 | { |
199 | thread_info_cache = kmem_cache_create("thread_info", THREAD_SIZE, | 199 | thread_stack_cache = kmem_cache_create("thread_stack", THREAD_SIZE, |
200 | THREAD_SIZE, 0, NULL); | 200 | THREAD_SIZE, 0, NULL); |
201 | BUG_ON(thread_info_cache == NULL); | 201 | BUG_ON(thread_stack_cache == NULL); |
202 | } | 202 | } |
203 | # endif | 203 | # endif |
204 | #endif | 204 | #endif |
@@ -221,9 +221,9 @@ struct kmem_cache *vm_area_cachep; | |||
221 | /* SLAB cache for mm_struct structures (tsk->mm) */ | 221 | /* SLAB cache for mm_struct structures (tsk->mm) */ |
222 | static struct kmem_cache *mm_cachep; | 222 | static struct kmem_cache *mm_cachep; |
223 | 223 | ||
224 | static void account_kernel_stack(struct thread_info *ti, int account) | 224 | static void account_kernel_stack(unsigned long *stack, int account) |
225 | { | 225 | { |
226 | struct zone *zone = page_zone(virt_to_page(ti)); | 226 | struct zone *zone = page_zone(virt_to_page(stack)); |
227 | 227 | ||
228 | mod_zone_page_state(zone, NR_KERNEL_STACK, account); | 228 | mod_zone_page_state(zone, NR_KERNEL_STACK, account); |
229 | } | 229 | } |
@@ -231,8 +231,8 @@ static void account_kernel_stack(struct thread_info *ti, int account) | |||
231 | void free_task(struct task_struct *tsk) | 231 | void free_task(struct task_struct *tsk) |
232 | { | 232 | { |
233 | account_kernel_stack(tsk->stack, -1); | 233 | account_kernel_stack(tsk->stack, -1); |
234 | arch_release_thread_info(tsk->stack); | 234 | arch_release_thread_stack(tsk->stack); |
235 | free_thread_info(tsk->stack); | 235 | free_thread_stack(tsk->stack); |
236 | rt_mutex_debug_task_free(tsk); | 236 | rt_mutex_debug_task_free(tsk); |
237 | ftrace_graph_exit_task(tsk); | 237 | ftrace_graph_exit_task(tsk); |
238 | put_seccomp_filter(tsk); | 238 | put_seccomp_filter(tsk); |
@@ -343,7 +343,7 @@ void set_task_stack_end_magic(struct task_struct *tsk) | |||
343 | static struct task_struct *dup_task_struct(struct task_struct *orig, int node) | 343 | static struct task_struct *dup_task_struct(struct task_struct *orig, int node) |
344 | { | 344 | { |
345 | struct task_struct *tsk; | 345 | struct task_struct *tsk; |
346 | struct thread_info *ti; | 346 | unsigned long *stack; |
347 | int err; | 347 | int err; |
348 | 348 | ||
349 | if (node == NUMA_NO_NODE) | 349 | if (node == NUMA_NO_NODE) |
@@ -352,15 +352,15 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node) | |||
352 | if (!tsk) | 352 | if (!tsk) |
353 | return NULL; | 353 | return NULL; |
354 | 354 | ||
355 | ti = alloc_thread_info_node(tsk, node); | 355 | stack = alloc_thread_stack_node(tsk, node); |
356 | if (!ti) | 356 | if (!stack) |
357 | goto free_tsk; | 357 | goto free_tsk; |
358 | 358 | ||
359 | err = arch_dup_task_struct(tsk, orig); | 359 | err = arch_dup_task_struct(tsk, orig); |
360 | if (err) | 360 | if (err) |
361 | goto free_ti; | 361 | goto free_stack; |
362 | 362 | ||
363 | tsk->stack = ti; | 363 | tsk->stack = stack; |
364 | #ifdef CONFIG_SECCOMP | 364 | #ifdef CONFIG_SECCOMP |
365 | /* | 365 | /* |
366 | * We must handle setting up seccomp filters once we're under | 366 | * We must handle setting up seccomp filters once we're under |
@@ -392,14 +392,14 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node) | |||
392 | tsk->task_frag.page = NULL; | 392 | tsk->task_frag.page = NULL; |
393 | tsk->wake_q.next = NULL; | 393 | tsk->wake_q.next = NULL; |
394 | 394 | ||
395 | account_kernel_stack(ti, 1); | 395 | account_kernel_stack(stack, 1); |
396 | 396 | ||
397 | kcov_task_init(tsk); | 397 | kcov_task_init(tsk); |
398 | 398 | ||
399 | return tsk; | 399 | return tsk; |
400 | 400 | ||
401 | free_ti: | 401 | free_stack: |
402 | free_thread_info(ti); | 402 | free_thread_stack(stack); |
403 | free_tsk: | 403 | free_tsk: |
404 | free_task_struct(tsk); | 404 | free_task_struct(tsk); |
405 | return NULL; | 405 | return NULL; |
diff --git a/kernel/futex.c b/kernel/futex.c index ee25f5ba4aca..33664f70e2d2 100644 --- a/kernel/futex.c +++ b/kernel/futex.c | |||
@@ -469,7 +469,7 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw) | |||
469 | { | 469 | { |
470 | unsigned long address = (unsigned long)uaddr; | 470 | unsigned long address = (unsigned long)uaddr; |
471 | struct mm_struct *mm = current->mm; | 471 | struct mm_struct *mm = current->mm; |
472 | struct page *page; | 472 | struct page *page, *tail; |
473 | struct address_space *mapping; | 473 | struct address_space *mapping; |
474 | int err, ro = 0; | 474 | int err, ro = 0; |
475 | 475 | ||
@@ -530,7 +530,15 @@ again: | |||
530 | * considered here and page lock forces unnecessarily serialization | 530 | * considered here and page lock forces unnecessarily serialization |
531 | * From this point on, mapping will be re-verified if necessary and | 531 | * From this point on, mapping will be re-verified if necessary and |
532 | * page lock will be acquired only if it is unavoidable | 532 | * page lock will be acquired only if it is unavoidable |
533 | */ | 533 | * |
534 | * Mapping checks require the head page for any compound page so the | ||
535 | * head page and mapping is looked up now. For anonymous pages, it | ||
536 | * does not matter if the page splits in the future as the key is | ||
537 | * based on the address. For filesystem-backed pages, the tail is | ||
538 | * required as the index of the page determines the key. For | ||
539 | * base pages, there is no tail page and tail == page. | ||
540 | */ | ||
541 | tail = page; | ||
534 | page = compound_head(page); | 542 | page = compound_head(page); |
535 | mapping = READ_ONCE(page->mapping); | 543 | mapping = READ_ONCE(page->mapping); |
536 | 544 | ||
@@ -654,7 +662,7 @@ again: | |||
654 | 662 | ||
655 | key->both.offset |= FUT_OFF_INODE; /* inode-based key */ | 663 | key->both.offset |= FUT_OFF_INODE; /* inode-based key */ |
656 | key->shared.inode = inode; | 664 | key->shared.inode = inode; |
657 | key->shared.pgoff = basepage_index(page); | 665 | key->shared.pgoff = basepage_index(tail); |
658 | rcu_read_unlock(); | 666 | rcu_read_unlock(); |
659 | } | 667 | } |
660 | 668 | ||
diff --git a/kernel/jump_label.c b/kernel/jump_label.c index 05254eeb4b4e..4b353e0be121 100644 --- a/kernel/jump_label.c +++ b/kernel/jump_label.c | |||
@@ -58,13 +58,36 @@ static void jump_label_update(struct static_key *key); | |||
58 | 58 | ||
59 | void static_key_slow_inc(struct static_key *key) | 59 | void static_key_slow_inc(struct static_key *key) |
60 | { | 60 | { |
61 | int v, v1; | ||
62 | |||
61 | STATIC_KEY_CHECK_USE(); | 63 | STATIC_KEY_CHECK_USE(); |
62 | if (atomic_inc_not_zero(&key->enabled)) | 64 | |
63 | return; | 65 | /* |
66 | * Careful if we get concurrent static_key_slow_inc() calls; | ||
67 | * later calls must wait for the first one to _finish_ the | ||
68 | * jump_label_update() process. At the same time, however, | ||
69 | * the jump_label_update() call below wants to see | ||
70 | * static_key_enabled(&key) for jumps to be updated properly. | ||
71 | * | ||
72 | * So give a special meaning to negative key->enabled: it sends | ||
73 | * static_key_slow_inc() down the slow path, and it is non-zero | ||
74 | * so it counts as "enabled" in jump_label_update(). Note that | ||
75 | * atomic_inc_unless_negative() checks >= 0, so roll our own. | ||
76 | */ | ||
77 | for (v = atomic_read(&key->enabled); v > 0; v = v1) { | ||
78 | v1 = atomic_cmpxchg(&key->enabled, v, v + 1); | ||
79 | if (likely(v1 == v)) | ||
80 | return; | ||
81 | } | ||
64 | 82 | ||
65 | jump_label_lock(); | 83 | jump_label_lock(); |
66 | if (atomic_inc_return(&key->enabled) == 1) | 84 | if (atomic_read(&key->enabled) == 0) { |
85 | atomic_set(&key->enabled, -1); | ||
67 | jump_label_update(key); | 86 | jump_label_update(key); |
87 | atomic_set(&key->enabled, 1); | ||
88 | } else { | ||
89 | atomic_inc(&key->enabled); | ||
90 | } | ||
68 | jump_label_unlock(); | 91 | jump_label_unlock(); |
69 | } | 92 | } |
70 | EXPORT_SYMBOL_GPL(static_key_slow_inc); | 93 | EXPORT_SYMBOL_GPL(static_key_slow_inc); |
@@ -72,6 +95,13 @@ EXPORT_SYMBOL_GPL(static_key_slow_inc); | |||
72 | static void __static_key_slow_dec(struct static_key *key, | 95 | static void __static_key_slow_dec(struct static_key *key, |
73 | unsigned long rate_limit, struct delayed_work *work) | 96 | unsigned long rate_limit, struct delayed_work *work) |
74 | { | 97 | { |
98 | /* | ||
99 | * The negative count check is valid even when a negative | ||
100 | * key->enabled is in use by static_key_slow_inc(); a | ||
101 | * __static_key_slow_dec() before the first static_key_slow_inc() | ||
102 | * returns is unbalanced, because all other static_key_slow_inc() | ||
103 | * instances block while the update is in progress. | ||
104 | */ | ||
75 | if (!atomic_dec_and_mutex_lock(&key->enabled, &jump_label_mutex)) { | 105 | if (!atomic_dec_and_mutex_lock(&key->enabled, &jump_label_mutex)) { |
76 | WARN(atomic_read(&key->enabled) < 0, | 106 | WARN(atomic_read(&key->enabled) < 0, |
77 | "jump label: negative count!\n"); | 107 | "jump label: negative count!\n"); |
diff --git a/kernel/kcov.c b/kernel/kcov.c index a02f2dddd1d7..8d44b3fea9d0 100644 --- a/kernel/kcov.c +++ b/kernel/kcov.c | |||
@@ -264,7 +264,12 @@ static const struct file_operations kcov_fops = { | |||
264 | 264 | ||
265 | static int __init kcov_init(void) | 265 | static int __init kcov_init(void) |
266 | { | 266 | { |
267 | if (!debugfs_create_file("kcov", 0600, NULL, NULL, &kcov_fops)) { | 267 | /* |
268 | * The kcov debugfs file won't ever get removed and thus, | ||
269 | * there is no need to protect it against removal races. The | ||
270 | * use of debugfs_create_file_unsafe() is actually safe here. | ||
271 | */ | ||
272 | if (!debugfs_create_file_unsafe("kcov", 0600, NULL, NULL, &kcov_fops)) { | ||
268 | pr_err("failed to create kcov in debugfs\n"); | 273 | pr_err("failed to create kcov in debugfs\n"); |
269 | return -ENOMEM; | 274 | return -ENOMEM; |
270 | } | 275 | } |
diff --git a/kernel/locking/mutex-debug.c b/kernel/locking/mutex-debug.c index 3ef3736002d8..9c951fade415 100644 --- a/kernel/locking/mutex-debug.c +++ b/kernel/locking/mutex-debug.c | |||
@@ -49,21 +49,21 @@ void debug_mutex_free_waiter(struct mutex_waiter *waiter) | |||
49 | } | 49 | } |
50 | 50 | ||
51 | void debug_mutex_add_waiter(struct mutex *lock, struct mutex_waiter *waiter, | 51 | void debug_mutex_add_waiter(struct mutex *lock, struct mutex_waiter *waiter, |
52 | struct thread_info *ti) | 52 | struct task_struct *task) |
53 | { | 53 | { |
54 | SMP_DEBUG_LOCKS_WARN_ON(!spin_is_locked(&lock->wait_lock)); | 54 | SMP_DEBUG_LOCKS_WARN_ON(!spin_is_locked(&lock->wait_lock)); |
55 | 55 | ||
56 | /* Mark the current thread as blocked on the lock: */ | 56 | /* Mark the current thread as blocked on the lock: */ |
57 | ti->task->blocked_on = waiter; | 57 | task->blocked_on = waiter; |
58 | } | 58 | } |
59 | 59 | ||
60 | void mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter, | 60 | void mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter, |
61 | struct thread_info *ti) | 61 | struct task_struct *task) |
62 | { | 62 | { |
63 | DEBUG_LOCKS_WARN_ON(list_empty(&waiter->list)); | 63 | DEBUG_LOCKS_WARN_ON(list_empty(&waiter->list)); |
64 | DEBUG_LOCKS_WARN_ON(waiter->task != ti->task); | 64 | DEBUG_LOCKS_WARN_ON(waiter->task != task); |
65 | DEBUG_LOCKS_WARN_ON(ti->task->blocked_on != waiter); | 65 | DEBUG_LOCKS_WARN_ON(task->blocked_on != waiter); |
66 | ti->task->blocked_on = NULL; | 66 | task->blocked_on = NULL; |
67 | 67 | ||
68 | list_del_init(&waiter->list); | 68 | list_del_init(&waiter->list); |
69 | waiter->task = NULL; | 69 | waiter->task = NULL; |
diff --git a/kernel/locking/mutex-debug.h b/kernel/locking/mutex-debug.h index 0799fd3e4cfa..d06ae3bb46c5 100644 --- a/kernel/locking/mutex-debug.h +++ b/kernel/locking/mutex-debug.h | |||
@@ -20,9 +20,9 @@ extern void debug_mutex_wake_waiter(struct mutex *lock, | |||
20 | extern void debug_mutex_free_waiter(struct mutex_waiter *waiter); | 20 | extern void debug_mutex_free_waiter(struct mutex_waiter *waiter); |
21 | extern void debug_mutex_add_waiter(struct mutex *lock, | 21 | extern void debug_mutex_add_waiter(struct mutex *lock, |
22 | struct mutex_waiter *waiter, | 22 | struct mutex_waiter *waiter, |
23 | struct thread_info *ti); | 23 | struct task_struct *task); |
24 | extern void mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter, | 24 | extern void mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter, |
25 | struct thread_info *ti); | 25 | struct task_struct *task); |
26 | extern void debug_mutex_unlock(struct mutex *lock); | 26 | extern void debug_mutex_unlock(struct mutex *lock); |
27 | extern void debug_mutex_init(struct mutex *lock, const char *name, | 27 | extern void debug_mutex_init(struct mutex *lock, const char *name, |
28 | struct lock_class_key *key); | 28 | struct lock_class_key *key); |
diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c index e364b424b019..a70b90db3909 100644 --- a/kernel/locking/mutex.c +++ b/kernel/locking/mutex.c | |||
@@ -486,9 +486,6 @@ __ww_mutex_lock_check_stamp(struct mutex *lock, struct ww_acquire_ctx *ctx) | |||
486 | if (!hold_ctx) | 486 | if (!hold_ctx) |
487 | return 0; | 487 | return 0; |
488 | 488 | ||
489 | if (unlikely(ctx == hold_ctx)) | ||
490 | return -EALREADY; | ||
491 | |||
492 | if (ctx->stamp - hold_ctx->stamp <= LONG_MAX && | 489 | if (ctx->stamp - hold_ctx->stamp <= LONG_MAX && |
493 | (ctx->stamp != hold_ctx->stamp || ctx > hold_ctx)) { | 490 | (ctx->stamp != hold_ctx->stamp || ctx > hold_ctx)) { |
494 | #ifdef CONFIG_DEBUG_MUTEXES | 491 | #ifdef CONFIG_DEBUG_MUTEXES |
@@ -514,6 +511,12 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, | |||
514 | unsigned long flags; | 511 | unsigned long flags; |
515 | int ret; | 512 | int ret; |
516 | 513 | ||
514 | if (use_ww_ctx) { | ||
515 | struct ww_mutex *ww = container_of(lock, struct ww_mutex, base); | ||
516 | if (unlikely(ww_ctx == READ_ONCE(ww->ctx))) | ||
517 | return -EALREADY; | ||
518 | } | ||
519 | |||
517 | preempt_disable(); | 520 | preempt_disable(); |
518 | mutex_acquire_nest(&lock->dep_map, subclass, 0, nest_lock, ip); | 521 | mutex_acquire_nest(&lock->dep_map, subclass, 0, nest_lock, ip); |
519 | 522 | ||
@@ -534,7 +537,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, | |||
534 | goto skip_wait; | 537 | goto skip_wait; |
535 | 538 | ||
536 | debug_mutex_lock_common(lock, &waiter); | 539 | debug_mutex_lock_common(lock, &waiter); |
537 | debug_mutex_add_waiter(lock, &waiter, task_thread_info(task)); | 540 | debug_mutex_add_waiter(lock, &waiter, task); |
538 | 541 | ||
539 | /* add waiting tasks to the end of the waitqueue (FIFO): */ | 542 | /* add waiting tasks to the end of the waitqueue (FIFO): */ |
540 | list_add_tail(&waiter.list, &lock->wait_list); | 543 | list_add_tail(&waiter.list, &lock->wait_list); |
@@ -581,7 +584,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, | |||
581 | } | 584 | } |
582 | __set_task_state(task, TASK_RUNNING); | 585 | __set_task_state(task, TASK_RUNNING); |
583 | 586 | ||
584 | mutex_remove_waiter(lock, &waiter, current_thread_info()); | 587 | mutex_remove_waiter(lock, &waiter, task); |
585 | /* set it to 0 if there are no waiters left: */ | 588 | /* set it to 0 if there are no waiters left: */ |
586 | if (likely(list_empty(&lock->wait_list))) | 589 | if (likely(list_empty(&lock->wait_list))) |
587 | atomic_set(&lock->count, 0); | 590 | atomic_set(&lock->count, 0); |
@@ -602,7 +605,7 @@ skip_wait: | |||
602 | return 0; | 605 | return 0; |
603 | 606 | ||
604 | err: | 607 | err: |
605 | mutex_remove_waiter(lock, &waiter, task_thread_info(task)); | 608 | mutex_remove_waiter(lock, &waiter, task); |
606 | spin_unlock_mutex(&lock->wait_lock, flags); | 609 | spin_unlock_mutex(&lock->wait_lock, flags); |
607 | debug_mutex_free_waiter(&waiter); | 610 | debug_mutex_free_waiter(&waiter); |
608 | mutex_release(&lock->dep_map, 1, ip); | 611 | mutex_release(&lock->dep_map, 1, ip); |
diff --git a/kernel/locking/mutex.h b/kernel/locking/mutex.h index 5cda397607f2..a68bae5e852a 100644 --- a/kernel/locking/mutex.h +++ b/kernel/locking/mutex.h | |||
@@ -13,7 +13,7 @@ | |||
13 | do { spin_lock(lock); (void)(flags); } while (0) | 13 | do { spin_lock(lock); (void)(flags); } while (0) |
14 | #define spin_unlock_mutex(lock, flags) \ | 14 | #define spin_unlock_mutex(lock, flags) \ |
15 | do { spin_unlock(lock); (void)(flags); } while (0) | 15 | do { spin_unlock(lock); (void)(flags); } while (0) |
16 | #define mutex_remove_waiter(lock, waiter, ti) \ | 16 | #define mutex_remove_waiter(lock, waiter, task) \ |
17 | __list_del((waiter)->list.prev, (waiter)->list.next) | 17 | __list_del((waiter)->list.prev, (waiter)->list.next) |
18 | 18 | ||
19 | #ifdef CONFIG_MUTEX_SPIN_ON_OWNER | 19 | #ifdef CONFIG_MUTEX_SPIN_ON_OWNER |
diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c index ce2f75e32ae1..5fc8c311b8fe 100644 --- a/kernel/locking/qspinlock.c +++ b/kernel/locking/qspinlock.c | |||
@@ -267,6 +267,66 @@ static __always_inline u32 __pv_wait_head_or_lock(struct qspinlock *lock, | |||
267 | #define queued_spin_lock_slowpath native_queued_spin_lock_slowpath | 267 | #define queued_spin_lock_slowpath native_queued_spin_lock_slowpath |
268 | #endif | 268 | #endif |
269 | 269 | ||
270 | /* | ||
271 | * queued_spin_lock_slowpath() can (load-)ACQUIRE the lock before | ||
272 | * issuing an _unordered_ store to set _Q_LOCKED_VAL. | ||
273 | * | ||
274 | * This means that the store can be delayed, but no later than the | ||
275 | * store-release from the unlock. This means that simply observing | ||
276 | * _Q_LOCKED_VAL is not sufficient to determine if the lock is acquired. | ||
277 | * | ||
278 | * There are two paths that can issue the unordered store: | ||
279 | * | ||
280 | * (1) clear_pending_set_locked(): *,1,0 -> *,0,1 | ||
281 | * | ||
282 | * (2) set_locked(): t,0,0 -> t,0,1 ; t != 0 | ||
283 | * atomic_cmpxchg_relaxed(): t,0,0 -> 0,0,1 | ||
284 | * | ||
285 | * However, in both cases we have other !0 state we've set before to queue | ||
286 | * ourseves: | ||
287 | * | ||
288 | * For (1) we have the atomic_cmpxchg_acquire() that set _Q_PENDING_VAL, our | ||
289 | * load is constrained by that ACQUIRE to not pass before that, and thus must | ||
290 | * observe the store. | ||
291 | * | ||
292 | * For (2) we have a more intersting scenario. We enqueue ourselves using | ||
293 | * xchg_tail(), which ends up being a RELEASE. This in itself is not | ||
294 | * sufficient, however that is followed by an smp_cond_acquire() on the same | ||
295 | * word, giving a RELEASE->ACQUIRE ordering. This again constrains our load and | ||
296 | * guarantees we must observe that store. | ||
297 | * | ||
298 | * Therefore both cases have other !0 state that is observable before the | ||
299 | * unordered locked byte store comes through. This means we can use that to | ||
300 | * wait for the lock store, and then wait for an unlock. | ||
301 | */ | ||
302 | #ifndef queued_spin_unlock_wait | ||
303 | void queued_spin_unlock_wait(struct qspinlock *lock) | ||
304 | { | ||
305 | u32 val; | ||
306 | |||
307 | for (;;) { | ||
308 | val = atomic_read(&lock->val); | ||
309 | |||
310 | if (!val) /* not locked, we're done */ | ||
311 | goto done; | ||
312 | |||
313 | if (val & _Q_LOCKED_MASK) /* locked, go wait for unlock */ | ||
314 | break; | ||
315 | |||
316 | /* not locked, but pending, wait until we observe the lock */ | ||
317 | cpu_relax(); | ||
318 | } | ||
319 | |||
320 | /* any unlock is good */ | ||
321 | while (atomic_read(&lock->val) & _Q_LOCKED_MASK) | ||
322 | cpu_relax(); | ||
323 | |||
324 | done: | ||
325 | smp_rmb(); /* CTRL + RMB -> ACQUIRE */ | ||
326 | } | ||
327 | EXPORT_SYMBOL(queued_spin_unlock_wait); | ||
328 | #endif | ||
329 | |||
270 | #endif /* _GEN_PV_LOCK_SLOWPATH */ | 330 | #endif /* _GEN_PV_LOCK_SLOWPATH */ |
271 | 331 | ||
272 | /** | 332 | /** |
diff --git a/kernel/power/process.c b/kernel/power/process.c index df058bed53ce..0c2ee9761d57 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c | |||
@@ -146,6 +146,18 @@ int freeze_processes(void) | |||
146 | if (!error && !oom_killer_disable()) | 146 | if (!error && !oom_killer_disable()) |
147 | error = -EBUSY; | 147 | error = -EBUSY; |
148 | 148 | ||
149 | /* | ||
150 | * There is a hard to fix race between oom_reaper kernel thread | ||
151 | * and oom_killer_disable. oom_reaper calls exit_oom_victim | ||
152 | * before the victim reaches exit_mm so try to freeze all the tasks | ||
153 | * again and catch such a left over task. | ||
154 | */ | ||
155 | if (!error) { | ||
156 | pr_info("Double checking all user space processes after OOM killer disable... "); | ||
157 | error = try_to_freeze_tasks(true); | ||
158 | pr_cont("\n"); | ||
159 | } | ||
160 | |||
149 | if (error) | 161 | if (error) |
150 | thaw_processes(); | 162 | thaw_processes(); |
151 | return error; | 163 | return error; |
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 7f2cae4620c7..51d7105f529a 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
@@ -1536,7 +1536,9 @@ static int select_fallback_rq(int cpu, struct task_struct *p) | |||
1536 | for (;;) { | 1536 | for (;;) { |
1537 | /* Any allowed, online CPU? */ | 1537 | /* Any allowed, online CPU? */ |
1538 | for_each_cpu(dest_cpu, tsk_cpus_allowed(p)) { | 1538 | for_each_cpu(dest_cpu, tsk_cpus_allowed(p)) { |
1539 | if (!cpu_active(dest_cpu)) | 1539 | if (!(p->flags & PF_KTHREAD) && !cpu_active(dest_cpu)) |
1540 | continue; | ||
1541 | if (!cpu_online(dest_cpu)) | ||
1540 | continue; | 1542 | continue; |
1541 | goto out; | 1543 | goto out; |
1542 | } | 1544 | } |
@@ -2253,9 +2255,11 @@ int sysctl_numa_balancing(struct ctl_table *table, int write, | |||
2253 | #endif | 2255 | #endif |
2254 | #endif | 2256 | #endif |
2255 | 2257 | ||
2258 | #ifdef CONFIG_SCHEDSTATS | ||
2259 | |||
2256 | DEFINE_STATIC_KEY_FALSE(sched_schedstats); | 2260 | DEFINE_STATIC_KEY_FALSE(sched_schedstats); |
2261 | static bool __initdata __sched_schedstats = false; | ||
2257 | 2262 | ||
2258 | #ifdef CONFIG_SCHEDSTATS | ||
2259 | static void set_schedstats(bool enabled) | 2263 | static void set_schedstats(bool enabled) |
2260 | { | 2264 | { |
2261 | if (enabled) | 2265 | if (enabled) |
@@ -2278,11 +2282,16 @@ static int __init setup_schedstats(char *str) | |||
2278 | if (!str) | 2282 | if (!str) |
2279 | goto out; | 2283 | goto out; |
2280 | 2284 | ||
2285 | /* | ||
2286 | * This code is called before jump labels have been set up, so we can't | ||
2287 | * change the static branch directly just yet. Instead set a temporary | ||
2288 | * variable so init_schedstats() can do it later. | ||
2289 | */ | ||
2281 | if (!strcmp(str, "enable")) { | 2290 | if (!strcmp(str, "enable")) { |
2282 | set_schedstats(true); | 2291 | __sched_schedstats = true; |
2283 | ret = 1; | 2292 | ret = 1; |
2284 | } else if (!strcmp(str, "disable")) { | 2293 | } else if (!strcmp(str, "disable")) { |
2285 | set_schedstats(false); | 2294 | __sched_schedstats = false; |
2286 | ret = 1; | 2295 | ret = 1; |
2287 | } | 2296 | } |
2288 | out: | 2297 | out: |
@@ -2293,6 +2302,11 @@ out: | |||
2293 | } | 2302 | } |
2294 | __setup("schedstats=", setup_schedstats); | 2303 | __setup("schedstats=", setup_schedstats); |
2295 | 2304 | ||
2305 | static void __init init_schedstats(void) | ||
2306 | { | ||
2307 | set_schedstats(__sched_schedstats); | ||
2308 | } | ||
2309 | |||
2296 | #ifdef CONFIG_PROC_SYSCTL | 2310 | #ifdef CONFIG_PROC_SYSCTL |
2297 | int sysctl_schedstats(struct ctl_table *table, int write, | 2311 | int sysctl_schedstats(struct ctl_table *table, int write, |
2298 | void __user *buffer, size_t *lenp, loff_t *ppos) | 2312 | void __user *buffer, size_t *lenp, loff_t *ppos) |
@@ -2313,8 +2327,10 @@ int sysctl_schedstats(struct ctl_table *table, int write, | |||
2313 | set_schedstats(state); | 2327 | set_schedstats(state); |
2314 | return err; | 2328 | return err; |
2315 | } | 2329 | } |
2316 | #endif | 2330 | #endif /* CONFIG_PROC_SYSCTL */ |
2317 | #endif | 2331 | #else /* !CONFIG_SCHEDSTATS */ |
2332 | static inline void init_schedstats(void) {} | ||
2333 | #endif /* CONFIG_SCHEDSTATS */ | ||
2318 | 2334 | ||
2319 | /* | 2335 | /* |
2320 | * fork()/clone()-time setup: | 2336 | * fork()/clone()-time setup: |
@@ -2521,10 +2537,9 @@ void wake_up_new_task(struct task_struct *p) | |||
2521 | */ | 2537 | */ |
2522 | set_task_cpu(p, select_task_rq(p, task_cpu(p), SD_BALANCE_FORK, 0)); | 2538 | set_task_cpu(p, select_task_rq(p, task_cpu(p), SD_BALANCE_FORK, 0)); |
2523 | #endif | 2539 | #endif |
2524 | /* Post initialize new task's util average when its cfs_rq is set */ | 2540 | rq = __task_rq_lock(p, &rf); |
2525 | post_init_entity_util_avg(&p->se); | 2541 | post_init_entity_util_avg(&p->se); |
2526 | 2542 | ||
2527 | rq = __task_rq_lock(p, &rf); | ||
2528 | activate_task(rq, p, 0); | 2543 | activate_task(rq, p, 0); |
2529 | p->on_rq = TASK_ON_RQ_QUEUED; | 2544 | p->on_rq = TASK_ON_RQ_QUEUED; |
2530 | trace_sched_wakeup_new(p); | 2545 | trace_sched_wakeup_new(p); |
@@ -3156,7 +3171,8 @@ static noinline void __schedule_bug(struct task_struct *prev) | |||
3156 | static inline void schedule_debug(struct task_struct *prev) | 3171 | static inline void schedule_debug(struct task_struct *prev) |
3157 | { | 3172 | { |
3158 | #ifdef CONFIG_SCHED_STACK_END_CHECK | 3173 | #ifdef CONFIG_SCHED_STACK_END_CHECK |
3159 | BUG_ON(task_stack_end_corrupted(prev)); | 3174 | if (task_stack_end_corrupted(prev)) |
3175 | panic("corrupted stack end detected inside scheduler\n"); | ||
3160 | #endif | 3176 | #endif |
3161 | 3177 | ||
3162 | if (unlikely(in_atomic_preempt_off())) { | 3178 | if (unlikely(in_atomic_preempt_off())) { |
@@ -5133,14 +5149,16 @@ void show_state_filter(unsigned long state_filter) | |||
5133 | /* | 5149 | /* |
5134 | * reset the NMI-timeout, listing all files on a slow | 5150 | * reset the NMI-timeout, listing all files on a slow |
5135 | * console might take a lot of time: | 5151 | * console might take a lot of time: |
5152 | * Also, reset softlockup watchdogs on all CPUs, because | ||
5153 | * another CPU might be blocked waiting for us to process | ||
5154 | * an IPI. | ||
5136 | */ | 5155 | */ |
5137 | touch_nmi_watchdog(); | 5156 | touch_nmi_watchdog(); |
5157 | touch_all_softlockup_watchdogs(); | ||
5138 | if (!state_filter || (p->state & state_filter)) | 5158 | if (!state_filter || (p->state & state_filter)) |
5139 | sched_show_task(p); | 5159 | sched_show_task(p); |
5140 | } | 5160 | } |
5141 | 5161 | ||
5142 | touch_all_softlockup_watchdogs(); | ||
5143 | |||
5144 | #ifdef CONFIG_SCHED_DEBUG | 5162 | #ifdef CONFIG_SCHED_DEBUG |
5145 | if (!state_filter) | 5163 | if (!state_filter) |
5146 | sysrq_sched_debug_show(); | 5164 | sysrq_sched_debug_show(); |
@@ -7487,6 +7505,8 @@ void __init sched_init(void) | |||
7487 | #endif | 7505 | #endif |
7488 | init_sched_fair_class(); | 7506 | init_sched_fair_class(); |
7489 | 7507 | ||
7508 | init_schedstats(); | ||
7509 | |||
7490 | scheduler_running = 1; | 7510 | scheduler_running = 1; |
7491 | } | 7511 | } |
7492 | 7512 | ||
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c index cf905f655ba1..0368c393a336 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c | |||
@@ -427,19 +427,12 @@ print_task(struct seq_file *m, struct rq *rq, struct task_struct *p) | |||
427 | SPLIT_NS(p->se.vruntime), | 427 | SPLIT_NS(p->se.vruntime), |
428 | (long long)(p->nvcsw + p->nivcsw), | 428 | (long long)(p->nvcsw + p->nivcsw), |
429 | p->prio); | 429 | p->prio); |
430 | #ifdef CONFIG_SCHEDSTATS | 430 | |
431 | if (schedstat_enabled()) { | ||
432 | SEQ_printf(m, "%9Ld.%06ld %9Ld.%06ld %9Ld.%06ld", | ||
433 | SPLIT_NS(p->se.statistics.wait_sum), | ||
434 | SPLIT_NS(p->se.sum_exec_runtime), | ||
435 | SPLIT_NS(p->se.statistics.sum_sleep_runtime)); | ||
436 | } | ||
437 | #else | ||
438 | SEQ_printf(m, "%9Ld.%06ld %9Ld.%06ld %9Ld.%06ld", | 431 | SEQ_printf(m, "%9Ld.%06ld %9Ld.%06ld %9Ld.%06ld", |
439 | 0LL, 0L, | 432 | SPLIT_NS(schedstat_val(p, se.statistics.wait_sum)), |
440 | SPLIT_NS(p->se.sum_exec_runtime), | 433 | SPLIT_NS(p->se.sum_exec_runtime), |
441 | 0LL, 0L); | 434 | SPLIT_NS(schedstat_val(p, se.statistics.sum_sleep_runtime))); |
442 | #endif | 435 | |
443 | #ifdef CONFIG_NUMA_BALANCING | 436 | #ifdef CONFIG_NUMA_BALANCING |
444 | SEQ_printf(m, " %d %d", task_node(p), task_numa_group_id(p)); | 437 | SEQ_printf(m, " %d %d", task_node(p), task_numa_group_id(p)); |
445 | #endif | 438 | #endif |
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 218f8e83db73..bdcbeea90c95 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c | |||
@@ -2904,6 +2904,23 @@ static inline void cfs_rq_util_change(struct cfs_rq *cfs_rq) | |||
2904 | } | 2904 | } |
2905 | } | 2905 | } |
2906 | 2906 | ||
2907 | /* | ||
2908 | * Unsigned subtract and clamp on underflow. | ||
2909 | * | ||
2910 | * Explicitly do a load-store to ensure the intermediate value never hits | ||
2911 | * memory. This allows lockless observations without ever seeing the negative | ||
2912 | * values. | ||
2913 | */ | ||
2914 | #define sub_positive(_ptr, _val) do { \ | ||
2915 | typeof(_ptr) ptr = (_ptr); \ | ||
2916 | typeof(*ptr) val = (_val); \ | ||
2917 | typeof(*ptr) res, var = READ_ONCE(*ptr); \ | ||
2918 | res = var - val; \ | ||
2919 | if (res > var) \ | ||
2920 | res = 0; \ | ||
2921 | WRITE_ONCE(*ptr, res); \ | ||
2922 | } while (0) | ||
2923 | |||
2907 | /* Group cfs_rq's load_avg is used for task_h_load and update_cfs_share */ | 2924 | /* Group cfs_rq's load_avg is used for task_h_load and update_cfs_share */ |
2908 | static inline int | 2925 | static inline int |
2909 | update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq, bool update_freq) | 2926 | update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq, bool update_freq) |
@@ -2913,15 +2930,15 @@ update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq, bool update_freq) | |||
2913 | 2930 | ||
2914 | if (atomic_long_read(&cfs_rq->removed_load_avg)) { | 2931 | if (atomic_long_read(&cfs_rq->removed_load_avg)) { |
2915 | s64 r = atomic_long_xchg(&cfs_rq->removed_load_avg, 0); | 2932 | s64 r = atomic_long_xchg(&cfs_rq->removed_load_avg, 0); |
2916 | sa->load_avg = max_t(long, sa->load_avg - r, 0); | 2933 | sub_positive(&sa->load_avg, r); |
2917 | sa->load_sum = max_t(s64, sa->load_sum - r * LOAD_AVG_MAX, 0); | 2934 | sub_positive(&sa->load_sum, r * LOAD_AVG_MAX); |
2918 | removed_load = 1; | 2935 | removed_load = 1; |
2919 | } | 2936 | } |
2920 | 2937 | ||
2921 | if (atomic_long_read(&cfs_rq->removed_util_avg)) { | 2938 | if (atomic_long_read(&cfs_rq->removed_util_avg)) { |
2922 | long r = atomic_long_xchg(&cfs_rq->removed_util_avg, 0); | 2939 | long r = atomic_long_xchg(&cfs_rq->removed_util_avg, 0); |
2923 | sa->util_avg = max_t(long, sa->util_avg - r, 0); | 2940 | sub_positive(&sa->util_avg, r); |
2924 | sa->util_sum = max_t(s32, sa->util_sum - r * LOAD_AVG_MAX, 0); | 2941 | sub_positive(&sa->util_sum, r * LOAD_AVG_MAX); |
2925 | removed_util = 1; | 2942 | removed_util = 1; |
2926 | } | 2943 | } |
2927 | 2944 | ||
@@ -2994,10 +3011,10 @@ static void detach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s | |||
2994 | &se->avg, se->on_rq * scale_load_down(se->load.weight), | 3011 | &se->avg, se->on_rq * scale_load_down(se->load.weight), |
2995 | cfs_rq->curr == se, NULL); | 3012 | cfs_rq->curr == se, NULL); |
2996 | 3013 | ||
2997 | cfs_rq->avg.load_avg = max_t(long, cfs_rq->avg.load_avg - se->avg.load_avg, 0); | 3014 | sub_positive(&cfs_rq->avg.load_avg, se->avg.load_avg); |
2998 | cfs_rq->avg.load_sum = max_t(s64, cfs_rq->avg.load_sum - se->avg.load_sum, 0); | 3015 | sub_positive(&cfs_rq->avg.load_sum, se->avg.load_sum); |
2999 | cfs_rq->avg.util_avg = max_t(long, cfs_rq->avg.util_avg - se->avg.util_avg, 0); | 3016 | sub_positive(&cfs_rq->avg.util_avg, se->avg.util_avg); |
3000 | cfs_rq->avg.util_sum = max_t(s32, cfs_rq->avg.util_sum - se->avg.util_sum, 0); | 3017 | sub_positive(&cfs_rq->avg.util_sum, se->avg.util_sum); |
3001 | 3018 | ||
3002 | cfs_rq_util_change(cfs_rq); | 3019 | cfs_rq_util_change(cfs_rq); |
3003 | } | 3020 | } |
@@ -3246,7 +3263,7 @@ static inline void check_schedstat_required(void) | |||
3246 | trace_sched_stat_iowait_enabled() || | 3263 | trace_sched_stat_iowait_enabled() || |
3247 | trace_sched_stat_blocked_enabled() || | 3264 | trace_sched_stat_blocked_enabled() || |
3248 | trace_sched_stat_runtime_enabled()) { | 3265 | trace_sched_stat_runtime_enabled()) { |
3249 | pr_warn_once("Scheduler tracepoints stat_sleep, stat_iowait, " | 3266 | printk_deferred_once("Scheduler tracepoints stat_sleep, stat_iowait, " |
3250 | "stat_blocked and stat_runtime require the " | 3267 | "stat_blocked and stat_runtime require the " |
3251 | "kernel parameter schedstats=enabled or " | 3268 | "kernel parameter schedstats=enabled or " |
3252 | "kernel.sched_schedstats=1\n"); | 3269 | "kernel.sched_schedstats=1\n"); |
@@ -4185,6 +4202,26 @@ static void check_enqueue_throttle(struct cfs_rq *cfs_rq) | |||
4185 | if (!cfs_bandwidth_used()) | 4202 | if (!cfs_bandwidth_used()) |
4186 | return; | 4203 | return; |
4187 | 4204 | ||
4205 | /* Synchronize hierarchical throttle counter: */ | ||
4206 | if (unlikely(!cfs_rq->throttle_uptodate)) { | ||
4207 | struct rq *rq = rq_of(cfs_rq); | ||
4208 | struct cfs_rq *pcfs_rq; | ||
4209 | struct task_group *tg; | ||
4210 | |||
4211 | cfs_rq->throttle_uptodate = 1; | ||
4212 | |||
4213 | /* Get closest up-to-date node, because leaves go first: */ | ||
4214 | for (tg = cfs_rq->tg->parent; tg; tg = tg->parent) { | ||
4215 | pcfs_rq = tg->cfs_rq[cpu_of(rq)]; | ||
4216 | if (pcfs_rq->throttle_uptodate) | ||
4217 | break; | ||
4218 | } | ||
4219 | if (tg) { | ||
4220 | cfs_rq->throttle_count = pcfs_rq->throttle_count; | ||
4221 | cfs_rq->throttled_clock_task = rq_clock_task(rq); | ||
4222 | } | ||
4223 | } | ||
4224 | |||
4188 | /* an active group must be handled by the update_curr()->put() path */ | 4225 | /* an active group must be handled by the update_curr()->put() path */ |
4189 | if (!cfs_rq->runtime_enabled || cfs_rq->curr) | 4226 | if (!cfs_rq->runtime_enabled || cfs_rq->curr) |
4190 | return; | 4227 | return; |
@@ -4500,15 +4537,14 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) | |||
4500 | 4537 | ||
4501 | /* Don't dequeue parent if it has other entities besides us */ | 4538 | /* Don't dequeue parent if it has other entities besides us */ |
4502 | if (cfs_rq->load.weight) { | 4539 | if (cfs_rq->load.weight) { |
4540 | /* Avoid re-evaluating load for this entity: */ | ||
4541 | se = parent_entity(se); | ||
4503 | /* | 4542 | /* |
4504 | * Bias pick_next to pick a task from this cfs_rq, as | 4543 | * Bias pick_next to pick a task from this cfs_rq, as |
4505 | * p is sleeping when it is within its sched_slice. | 4544 | * p is sleeping when it is within its sched_slice. |
4506 | */ | 4545 | */ |
4507 | if (task_sleep && parent_entity(se)) | 4546 | if (task_sleep && se && !throttled_hierarchy(cfs_rq)) |
4508 | set_next_buddy(parent_entity(se)); | 4547 | set_next_buddy(se); |
4509 | |||
4510 | /* avoid re-evaluating load for this entity */ | ||
4511 | se = parent_entity(se); | ||
4512 | break; | 4548 | break; |
4513 | } | 4549 | } |
4514 | flags |= DEQUEUE_SLEEP; | 4550 | flags |= DEQUEUE_SLEEP; |
@@ -8496,8 +8532,9 @@ void free_fair_sched_group(struct task_group *tg) | |||
8496 | 8532 | ||
8497 | int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent) | 8533 | int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent) |
8498 | { | 8534 | { |
8499 | struct cfs_rq *cfs_rq; | ||
8500 | struct sched_entity *se; | 8535 | struct sched_entity *se; |
8536 | struct cfs_rq *cfs_rq; | ||
8537 | struct rq *rq; | ||
8501 | int i; | 8538 | int i; |
8502 | 8539 | ||
8503 | tg->cfs_rq = kzalloc(sizeof(cfs_rq) * nr_cpu_ids, GFP_KERNEL); | 8540 | tg->cfs_rq = kzalloc(sizeof(cfs_rq) * nr_cpu_ids, GFP_KERNEL); |
@@ -8512,6 +8549,8 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent) | |||
8512 | init_cfs_bandwidth(tg_cfs_bandwidth(tg)); | 8549 | init_cfs_bandwidth(tg_cfs_bandwidth(tg)); |
8513 | 8550 | ||
8514 | for_each_possible_cpu(i) { | 8551 | for_each_possible_cpu(i) { |
8552 | rq = cpu_rq(i); | ||
8553 | |||
8515 | cfs_rq = kzalloc_node(sizeof(struct cfs_rq), | 8554 | cfs_rq = kzalloc_node(sizeof(struct cfs_rq), |
8516 | GFP_KERNEL, cpu_to_node(i)); | 8555 | GFP_KERNEL, cpu_to_node(i)); |
8517 | if (!cfs_rq) | 8556 | if (!cfs_rq) |
@@ -8525,7 +8564,10 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent) | |||
8525 | init_cfs_rq(cfs_rq); | 8564 | init_cfs_rq(cfs_rq); |
8526 | init_tg_cfs_entry(tg, cfs_rq, se, i, parent->se[i]); | 8565 | init_tg_cfs_entry(tg, cfs_rq, se, i, parent->se[i]); |
8527 | init_entity_runnable_average(se); | 8566 | init_entity_runnable_average(se); |
8567 | |||
8568 | raw_spin_lock_irq(&rq->lock); | ||
8528 | post_init_entity_util_avg(se); | 8569 | post_init_entity_util_avg(se); |
8570 | raw_spin_unlock_irq(&rq->lock); | ||
8529 | } | 8571 | } |
8530 | 8572 | ||
8531 | return 1; | 8573 | return 1; |
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 72f1f3087b04..7cbeb92a1cb9 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h | |||
@@ -437,7 +437,7 @@ struct cfs_rq { | |||
437 | 437 | ||
438 | u64 throttled_clock, throttled_clock_task; | 438 | u64 throttled_clock, throttled_clock_task; |
439 | u64 throttled_clock_task_time; | 439 | u64 throttled_clock_task_time; |
440 | int throttled, throttle_count; | 440 | int throttled, throttle_count, throttle_uptodate; |
441 | struct list_head throttled_list; | 441 | struct list_head throttled_list; |
442 | #endif /* CONFIG_CFS_BANDWIDTH */ | 442 | #endif /* CONFIG_CFS_BANDWIDTH */ |
443 | #endif /* CONFIG_FAIR_GROUP_SCHED */ | 443 | #endif /* CONFIG_FAIR_GROUP_SCHED */ |
diff --git a/kernel/sched/stats.h b/kernel/sched/stats.h index 70b3b6a20fb0..78955cbea31c 100644 --- a/kernel/sched/stats.h +++ b/kernel/sched/stats.h | |||
@@ -33,6 +33,8 @@ rq_sched_info_dequeued(struct rq *rq, unsigned long long delta) | |||
33 | # define schedstat_inc(rq, field) do { if (schedstat_enabled()) { (rq)->field++; } } while (0) | 33 | # define schedstat_inc(rq, field) do { if (schedstat_enabled()) { (rq)->field++; } } while (0) |
34 | # define schedstat_add(rq, field, amt) do { if (schedstat_enabled()) { (rq)->field += (amt); } } while (0) | 34 | # define schedstat_add(rq, field, amt) do { if (schedstat_enabled()) { (rq)->field += (amt); } } while (0) |
35 | # define schedstat_set(var, val) do { if (schedstat_enabled()) { var = (val); } } while (0) | 35 | # define schedstat_set(var, val) do { if (schedstat_enabled()) { var = (val); } } while (0) |
36 | # define schedstat_val(rq, field) ((schedstat_enabled()) ? (rq)->field : 0) | ||
37 | |||
36 | #else /* !CONFIG_SCHEDSTATS */ | 38 | #else /* !CONFIG_SCHEDSTATS */ |
37 | static inline void | 39 | static inline void |
38 | rq_sched_info_arrive(struct rq *rq, unsigned long long delta) | 40 | rq_sched_info_arrive(struct rq *rq, unsigned long long delta) |
@@ -47,6 +49,7 @@ rq_sched_info_depart(struct rq *rq, unsigned long long delta) | |||
47 | # define schedstat_inc(rq, field) do { } while (0) | 49 | # define schedstat_inc(rq, field) do { } while (0) |
48 | # define schedstat_add(rq, field, amt) do { } while (0) | 50 | # define schedstat_add(rq, field, amt) do { } while (0) |
49 | # define schedstat_set(var, val) do { } while (0) | 51 | # define schedstat_set(var, val) do { } while (0) |
52 | # define schedstat_val(rq, field) 0 | ||
50 | #endif | 53 | #endif |
51 | 54 | ||
52 | #ifdef CONFIG_SCHED_INFO | 55 | #ifdef CONFIG_SCHED_INFO |
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 037ea6ea3cb2..3de25fbed785 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c | |||
@@ -208,6 +208,10 @@ static u64 bpf_perf_event_read(u64 r1, u64 index, u64 r3, u64 r4, u64 r5) | |||
208 | event->pmu->count) | 208 | event->pmu->count) |
209 | return -EINVAL; | 209 | return -EINVAL; |
210 | 210 | ||
211 | if (unlikely(event->attr.type != PERF_TYPE_HARDWARE && | ||
212 | event->attr.type != PERF_TYPE_RAW)) | ||
213 | return -EINVAL; | ||
214 | |||
211 | /* | 215 | /* |
212 | * we don't know if the function is run successfully by the | 216 | * we don't know if the function is run successfully by the |
213 | * return value. It can be judged in other places, such as | 217 | * return value. It can be judged in other places, such as |
@@ -347,7 +351,8 @@ static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func | |||
347 | } | 351 | } |
348 | 352 | ||
349 | /* bpf+kprobe programs can access fields of 'struct pt_regs' */ | 353 | /* bpf+kprobe programs can access fields of 'struct pt_regs' */ |
350 | static bool kprobe_prog_is_valid_access(int off, int size, enum bpf_access_type type) | 354 | static bool kprobe_prog_is_valid_access(int off, int size, enum bpf_access_type type, |
355 | enum bpf_reg_type *reg_type) | ||
351 | { | 356 | { |
352 | /* check bounds */ | 357 | /* check bounds */ |
353 | if (off < 0 || off >= sizeof(struct pt_regs)) | 358 | if (off < 0 || off >= sizeof(struct pt_regs)) |
@@ -425,7 +430,8 @@ static const struct bpf_func_proto *tp_prog_func_proto(enum bpf_func_id func_id) | |||
425 | } | 430 | } |
426 | } | 431 | } |
427 | 432 | ||
428 | static bool tp_prog_is_valid_access(int off, int size, enum bpf_access_type type) | 433 | static bool tp_prog_is_valid_access(int off, int size, enum bpf_access_type type, |
434 | enum bpf_reg_type *reg_type) | ||
429 | { | 435 | { |
430 | if (off < sizeof(void *) || off >= PERF_MAX_TRACE_SIZE) | 436 | if (off < sizeof(void *) || off >= PERF_MAX_TRACE_SIZE) |
431 | return false; | 437 | return false; |
diff --git a/kernel/trace/trace_printk.c b/kernel/trace/trace_printk.c index f96f0383f6c6..ad1d6164e946 100644 --- a/kernel/trace/trace_printk.c +++ b/kernel/trace/trace_printk.c | |||
@@ -36,6 +36,10 @@ struct trace_bprintk_fmt { | |||
36 | static inline struct trace_bprintk_fmt *lookup_format(const char *fmt) | 36 | static inline struct trace_bprintk_fmt *lookup_format(const char *fmt) |
37 | { | 37 | { |
38 | struct trace_bprintk_fmt *pos; | 38 | struct trace_bprintk_fmt *pos; |
39 | |||
40 | if (!fmt) | ||
41 | return ERR_PTR(-EINVAL); | ||
42 | |||
39 | list_for_each_entry(pos, &trace_bprintk_fmt_list, list) { | 43 | list_for_each_entry(pos, &trace_bprintk_fmt_list, list) { |
40 | if (!strcmp(pos->fmt, fmt)) | 44 | if (!strcmp(pos->fmt, fmt)) |
41 | return pos; | 45 | return pos; |
@@ -57,7 +61,8 @@ void hold_module_trace_bprintk_format(const char **start, const char **end) | |||
57 | for (iter = start; iter < end; iter++) { | 61 | for (iter = start; iter < end; iter++) { |
58 | struct trace_bprintk_fmt *tb_fmt = lookup_format(*iter); | 62 | struct trace_bprintk_fmt *tb_fmt = lookup_format(*iter); |
59 | if (tb_fmt) { | 63 | if (tb_fmt) { |
60 | *iter = tb_fmt->fmt; | 64 | if (!IS_ERR(tb_fmt)) |
65 | *iter = tb_fmt->fmt; | ||
61 | continue; | 66 | continue; |
62 | } | 67 | } |
63 | 68 | ||