aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>2018-01-02 08:46:35 -0500
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2018-01-02 08:46:35 -0500
commitb6a09416e83ffe4eccfb4ef1b91b3b66483fa810 (patch)
treeb30f266e85047244dcdb47d5afc134e76aec530d /kernel
parentdb809859c8cee415293b830e67178f526d1eb2be (diff)
parent30a7acd573899fd8b8ac39236eff6468b195ac7d (diff)
Merge 4.15-rc6 into char-misc-next
We want the fixes in here as well. Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/bpf/hashtab.c2
-rw-r--r--kernel/bpf/verifier.c283
-rw-r--r--kernel/cgroup/debug.c4
-rw-r--r--kernel/cgroup/stat.c8
-rw-r--r--kernel/cpu.c12
-rw-r--r--kernel/exit.c8
-rw-r--r--kernel/fork.c3
-rw-r--r--kernel/groups.c5
-rw-r--r--kernel/irq/debug.h5
-rw-r--r--kernel/irq/debugfs.c1
-rw-r--r--kernel/irq/generic-chip.c11
-rw-r--r--kernel/irq/internals.h2
-rw-r--r--kernel/irq/irqdomain.c13
-rw-r--r--kernel/irq/msi.c64
-rw-r--r--kernel/kcov.c4
-rw-r--r--kernel/locking/lockdep.c652
-rw-r--r--kernel/locking/spinlock.c13
-rw-r--r--kernel/sched/core.c22
-rw-r--r--kernel/sched/cpufreq_schedutil.c2
-rw-r--r--kernel/sched/rt.c8
-rw-r--r--kernel/time/Kconfig1
-rw-r--r--kernel/time/posix-timers.c29
-rw-r--r--kernel/time/tick-sched.c32
-rw-r--r--kernel/time/timer.c35
-rw-r--r--kernel/trace/Kconfig1
-rw-r--r--kernel/trace/bpf_trace.c19
-rw-r--r--kernel/trace/ring_buffer.c18
-rw-r--r--kernel/trace/trace.c54
-rw-r--r--kernel/trace/trace_stack.c4
-rw-r--r--kernel/uid16.c1
-rw-r--r--kernel/workqueue.c33
31 files changed, 462 insertions, 887 deletions
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index e469e05c8e83..3905d4bc5b80 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -114,6 +114,7 @@ static void htab_free_elems(struct bpf_htab *htab)
114 pptr = htab_elem_get_ptr(get_htab_elem(htab, i), 114 pptr = htab_elem_get_ptr(get_htab_elem(htab, i),
115 htab->map.key_size); 115 htab->map.key_size);
116 free_percpu(pptr); 116 free_percpu(pptr);
117 cond_resched();
117 } 118 }
118free_elems: 119free_elems:
119 bpf_map_area_free(htab->elems); 120 bpf_map_area_free(htab->elems);
@@ -159,6 +160,7 @@ static int prealloc_init(struct bpf_htab *htab)
159 goto free_elems; 160 goto free_elems;
160 htab_elem_set_ptr(get_htab_elem(htab, i), htab->map.key_size, 161 htab_elem_set_ptr(get_htab_elem(htab, i), htab->map.key_size,
161 pptr); 162 pptr);
163 cond_resched();
162 } 164 }
163 165
164skip_percpu_elems: 166skip_percpu_elems:
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index d4593571c404..04b24876cd23 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1059,6 +1059,11 @@ static int check_ptr_alignment(struct bpf_verifier_env *env,
1059 break; 1059 break;
1060 case PTR_TO_STACK: 1060 case PTR_TO_STACK:
1061 pointer_desc = "stack "; 1061 pointer_desc = "stack ";
1062 /* The stack spill tracking logic in check_stack_write()
1063 * and check_stack_read() relies on stack accesses being
1064 * aligned.
1065 */
1066 strict = true;
1062 break; 1067 break;
1063 default: 1068 default:
1064 break; 1069 break;
@@ -1067,6 +1072,29 @@ static int check_ptr_alignment(struct bpf_verifier_env *env,
1067 strict); 1072 strict);
1068} 1073}
1069 1074
1075/* truncate register to smaller size (in bytes)
1076 * must be called with size < BPF_REG_SIZE
1077 */
1078static void coerce_reg_to_size(struct bpf_reg_state *reg, int size)
1079{
1080 u64 mask;
1081
1082 /* clear high bits in bit representation */
1083 reg->var_off = tnum_cast(reg->var_off, size);
1084
1085 /* fix arithmetic bounds */
1086 mask = ((u64)1 << (size * 8)) - 1;
1087 if ((reg->umin_value & ~mask) == (reg->umax_value & ~mask)) {
1088 reg->umin_value &= mask;
1089 reg->umax_value &= mask;
1090 } else {
1091 reg->umin_value = 0;
1092 reg->umax_value = mask;
1093 }
1094 reg->smin_value = reg->umin_value;
1095 reg->smax_value = reg->umax_value;
1096}
1097
1070/* check whether memory at (regno + off) is accessible for t = (read | write) 1098/* check whether memory at (regno + off) is accessible for t = (read | write)
1071 * if t==write, value_regno is a register which value is stored into memory 1099 * if t==write, value_regno is a register which value is stored into memory
1072 * if t==read, value_regno is a register which will receive the value from memory 1100 * if t==read, value_regno is a register which will receive the value from memory
@@ -1200,9 +1228,7 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
1200 if (!err && size < BPF_REG_SIZE && value_regno >= 0 && t == BPF_READ && 1228 if (!err && size < BPF_REG_SIZE && value_regno >= 0 && t == BPF_READ &&
1201 regs[value_regno].type == SCALAR_VALUE) { 1229 regs[value_regno].type == SCALAR_VALUE) {
1202 /* b/h/w load zero-extends, mark upper bits as known 0 */ 1230 /* b/h/w load zero-extends, mark upper bits as known 0 */
1203 regs[value_regno].var_off = 1231 coerce_reg_to_size(&regs[value_regno], size);
1204 tnum_cast(regs[value_regno].var_off, size);
1205 __update_reg_bounds(&regs[value_regno]);
1206 } 1232 }
1207 return err; 1233 return err;
1208} 1234}
@@ -1282,6 +1308,7 @@ static int check_stack_boundary(struct bpf_verifier_env *env, int regno,
1282 tnum_strn(tn_buf, sizeof(tn_buf), regs[regno].var_off); 1308 tnum_strn(tn_buf, sizeof(tn_buf), regs[regno].var_off);
1283 verbose(env, "invalid variable stack read R%d var_off=%s\n", 1309 verbose(env, "invalid variable stack read R%d var_off=%s\n",
1284 regno, tn_buf); 1310 regno, tn_buf);
1311 return -EACCES;
1285 } 1312 }
1286 off = regs[regno].off + regs[regno].var_off.value; 1313 off = regs[regno].off + regs[regno].var_off.value;
1287 if (off >= 0 || off < -MAX_BPF_STACK || off + access_size > 0 || 1314 if (off >= 0 || off < -MAX_BPF_STACK || off + access_size > 0 ||
@@ -1674,7 +1701,13 @@ static int check_call(struct bpf_verifier_env *env, int func_id, int insn_idx)
1674 return -EINVAL; 1701 return -EINVAL;
1675 } 1702 }
1676 1703
1704 /* With LD_ABS/IND some JITs save/restore skb from r1. */
1677 changes_data = bpf_helper_changes_pkt_data(fn->func); 1705 changes_data = bpf_helper_changes_pkt_data(fn->func);
1706 if (changes_data && fn->arg1_type != ARG_PTR_TO_CTX) {
1707 verbose(env, "kernel subsystem misconfigured func %s#%d: r1 != ctx\n",
1708 func_id_name(func_id), func_id);
1709 return -EINVAL;
1710 }
1678 1711
1679 memset(&meta, 0, sizeof(meta)); 1712 memset(&meta, 0, sizeof(meta));
1680 meta.pkt_access = fn->pkt_access; 1713 meta.pkt_access = fn->pkt_access;
@@ -1766,14 +1799,6 @@ static int check_call(struct bpf_verifier_env *env, int func_id, int insn_idx)
1766 return 0; 1799 return 0;
1767} 1800}
1768 1801
1769static void coerce_reg_to_32(struct bpf_reg_state *reg)
1770{
1771 /* clear high 32 bits */
1772 reg->var_off = tnum_cast(reg->var_off, 4);
1773 /* Update bounds */
1774 __update_reg_bounds(reg);
1775}
1776
1777static bool signed_add_overflows(s64 a, s64 b) 1802static bool signed_add_overflows(s64 a, s64 b)
1778{ 1803{
1779 /* Do the add in u64, where overflow is well-defined */ 1804 /* Do the add in u64, where overflow is well-defined */
@@ -1794,6 +1819,41 @@ static bool signed_sub_overflows(s64 a, s64 b)
1794 return res > a; 1819 return res > a;
1795} 1820}
1796 1821
1822static bool check_reg_sane_offset(struct bpf_verifier_env *env,
1823 const struct bpf_reg_state *reg,
1824 enum bpf_reg_type type)
1825{
1826 bool known = tnum_is_const(reg->var_off);
1827 s64 val = reg->var_off.value;
1828 s64 smin = reg->smin_value;
1829
1830 if (known && (val >= BPF_MAX_VAR_OFF || val <= -BPF_MAX_VAR_OFF)) {
1831 verbose(env, "math between %s pointer and %lld is not allowed\n",
1832 reg_type_str[type], val);
1833 return false;
1834 }
1835
1836 if (reg->off >= BPF_MAX_VAR_OFF || reg->off <= -BPF_MAX_VAR_OFF) {
1837 verbose(env, "%s pointer offset %d is not allowed\n",
1838 reg_type_str[type], reg->off);
1839 return false;
1840 }
1841
1842 if (smin == S64_MIN) {
1843 verbose(env, "math between %s pointer and register with unbounded min value is not allowed\n",
1844 reg_type_str[type]);
1845 return false;
1846 }
1847
1848 if (smin >= BPF_MAX_VAR_OFF || smin <= -BPF_MAX_VAR_OFF) {
1849 verbose(env, "value %lld makes %s pointer be out of bounds\n",
1850 smin, reg_type_str[type]);
1851 return false;
1852 }
1853
1854 return true;
1855}
1856
1797/* Handles arithmetic on a pointer and a scalar: computes new min/max and var_off. 1857/* Handles arithmetic on a pointer and a scalar: computes new min/max and var_off.
1798 * Caller should also handle BPF_MOV case separately. 1858 * Caller should also handle BPF_MOV case separately.
1799 * If we return -EACCES, caller may want to try again treating pointer as a 1859 * If we return -EACCES, caller may want to try again treating pointer as a
@@ -1830,29 +1890,25 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
1830 1890
1831 if (BPF_CLASS(insn->code) != BPF_ALU64) { 1891 if (BPF_CLASS(insn->code) != BPF_ALU64) {
1832 /* 32-bit ALU ops on pointers produce (meaningless) scalars */ 1892 /* 32-bit ALU ops on pointers produce (meaningless) scalars */
1833 if (!env->allow_ptr_leaks) 1893 verbose(env,
1834 verbose(env, 1894 "R%d 32-bit pointer arithmetic prohibited\n",
1835 "R%d 32-bit pointer arithmetic prohibited\n", 1895 dst);
1836 dst);
1837 return -EACCES; 1896 return -EACCES;
1838 } 1897 }
1839 1898
1840 if (ptr_reg->type == PTR_TO_MAP_VALUE_OR_NULL) { 1899 if (ptr_reg->type == PTR_TO_MAP_VALUE_OR_NULL) {
1841 if (!env->allow_ptr_leaks) 1900 verbose(env, "R%d pointer arithmetic on PTR_TO_MAP_VALUE_OR_NULL prohibited, null-check it first\n",
1842 verbose(env, "R%d pointer arithmetic on PTR_TO_MAP_VALUE_OR_NULL prohibited, null-check it first\n", 1901 dst);
1843 dst);
1844 return -EACCES; 1902 return -EACCES;
1845 } 1903 }
1846 if (ptr_reg->type == CONST_PTR_TO_MAP) { 1904 if (ptr_reg->type == CONST_PTR_TO_MAP) {
1847 if (!env->allow_ptr_leaks) 1905 verbose(env, "R%d pointer arithmetic on CONST_PTR_TO_MAP prohibited\n",
1848 verbose(env, "R%d pointer arithmetic on CONST_PTR_TO_MAP prohibited\n", 1906 dst);
1849 dst);
1850 return -EACCES; 1907 return -EACCES;
1851 } 1908 }
1852 if (ptr_reg->type == PTR_TO_PACKET_END) { 1909 if (ptr_reg->type == PTR_TO_PACKET_END) {
1853 if (!env->allow_ptr_leaks) 1910 verbose(env, "R%d pointer arithmetic on PTR_TO_PACKET_END prohibited\n",
1854 verbose(env, "R%d pointer arithmetic on PTR_TO_PACKET_END prohibited\n", 1911 dst);
1855 dst);
1856 return -EACCES; 1912 return -EACCES;
1857 } 1913 }
1858 1914
@@ -1862,6 +1918,10 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
1862 dst_reg->type = ptr_reg->type; 1918 dst_reg->type = ptr_reg->type;
1863 dst_reg->id = ptr_reg->id; 1919 dst_reg->id = ptr_reg->id;
1864 1920
1921 if (!check_reg_sane_offset(env, off_reg, ptr_reg->type) ||
1922 !check_reg_sane_offset(env, ptr_reg, ptr_reg->type))
1923 return -EINVAL;
1924
1865 switch (opcode) { 1925 switch (opcode) {
1866 case BPF_ADD: 1926 case BPF_ADD:
1867 /* We can take a fixed offset as long as it doesn't overflow 1927 /* We can take a fixed offset as long as it doesn't overflow
@@ -1915,9 +1975,8 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
1915 case BPF_SUB: 1975 case BPF_SUB:
1916 if (dst_reg == off_reg) { 1976 if (dst_reg == off_reg) {
1917 /* scalar -= pointer. Creates an unknown scalar */ 1977 /* scalar -= pointer. Creates an unknown scalar */
1918 if (!env->allow_ptr_leaks) 1978 verbose(env, "R%d tried to subtract pointer from scalar\n",
1919 verbose(env, "R%d tried to subtract pointer from scalar\n", 1979 dst);
1920 dst);
1921 return -EACCES; 1980 return -EACCES;
1922 } 1981 }
1923 /* We don't allow subtraction from FP, because (according to 1982 /* We don't allow subtraction from FP, because (according to
@@ -1925,9 +1984,8 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
1925 * be able to deal with it. 1984 * be able to deal with it.
1926 */ 1985 */
1927 if (ptr_reg->type == PTR_TO_STACK) { 1986 if (ptr_reg->type == PTR_TO_STACK) {
1928 if (!env->allow_ptr_leaks) 1987 verbose(env, "R%d subtraction from stack pointer prohibited\n",
1929 verbose(env, "R%d subtraction from stack pointer prohibited\n", 1988 dst);
1930 dst);
1931 return -EACCES; 1989 return -EACCES;
1932 } 1990 }
1933 if (known && (ptr_reg->off - smin_val == 1991 if (known && (ptr_reg->off - smin_val ==
@@ -1976,28 +2034,30 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
1976 case BPF_AND: 2034 case BPF_AND:
1977 case BPF_OR: 2035 case BPF_OR:
1978 case BPF_XOR: 2036 case BPF_XOR:
1979 /* bitwise ops on pointers are troublesome, prohibit for now. 2037 /* bitwise ops on pointers are troublesome, prohibit. */
1980 * (However, in principle we could allow some cases, e.g. 2038 verbose(env, "R%d bitwise operator %s on pointer prohibited\n",
1981 * ptr &= ~3 which would reduce min_value by 3.) 2039 dst, bpf_alu_string[opcode >> 4]);
1982 */
1983 if (!env->allow_ptr_leaks)
1984 verbose(env, "R%d bitwise operator %s on pointer prohibited\n",
1985 dst, bpf_alu_string[opcode >> 4]);
1986 return -EACCES; 2040 return -EACCES;
1987 default: 2041 default:
1988 /* other operators (e.g. MUL,LSH) produce non-pointer results */ 2042 /* other operators (e.g. MUL,LSH) produce non-pointer results */
1989 if (!env->allow_ptr_leaks) 2043 verbose(env, "R%d pointer arithmetic with %s operator prohibited\n",
1990 verbose(env, "R%d pointer arithmetic with %s operator prohibited\n", 2044 dst, bpf_alu_string[opcode >> 4]);
1991 dst, bpf_alu_string[opcode >> 4]);
1992 return -EACCES; 2045 return -EACCES;
1993 } 2046 }
1994 2047
2048 if (!check_reg_sane_offset(env, dst_reg, ptr_reg->type))
2049 return -EINVAL;
2050
1995 __update_reg_bounds(dst_reg); 2051 __update_reg_bounds(dst_reg);
1996 __reg_deduce_bounds(dst_reg); 2052 __reg_deduce_bounds(dst_reg);
1997 __reg_bound_offset(dst_reg); 2053 __reg_bound_offset(dst_reg);
1998 return 0; 2054 return 0;
1999} 2055}
2000 2056
2057/* WARNING: This function does calculations on 64-bit values, but the actual
2058 * execution may occur on 32-bit values. Therefore, things like bitshifts
2059 * need extra checks in the 32-bit case.
2060 */
2001static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env, 2061static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
2002 struct bpf_insn *insn, 2062 struct bpf_insn *insn,
2003 struct bpf_reg_state *dst_reg, 2063 struct bpf_reg_state *dst_reg,
@@ -2008,12 +2068,8 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
2008 bool src_known, dst_known; 2068 bool src_known, dst_known;
2009 s64 smin_val, smax_val; 2069 s64 smin_val, smax_val;
2010 u64 umin_val, umax_val; 2070 u64 umin_val, umax_val;
2071 u64 insn_bitness = (BPF_CLASS(insn->code) == BPF_ALU64) ? 64 : 32;
2011 2072
2012 if (BPF_CLASS(insn->code) != BPF_ALU64) {
2013 /* 32-bit ALU ops are (32,32)->64 */
2014 coerce_reg_to_32(dst_reg);
2015 coerce_reg_to_32(&src_reg);
2016 }
2017 smin_val = src_reg.smin_value; 2073 smin_val = src_reg.smin_value;
2018 smax_val = src_reg.smax_value; 2074 smax_val = src_reg.smax_value;
2019 umin_val = src_reg.umin_value; 2075 umin_val = src_reg.umin_value;
@@ -2021,6 +2077,12 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
2021 src_known = tnum_is_const(src_reg.var_off); 2077 src_known = tnum_is_const(src_reg.var_off);
2022 dst_known = tnum_is_const(dst_reg->var_off); 2078 dst_known = tnum_is_const(dst_reg->var_off);
2023 2079
2080 if (!src_known &&
2081 opcode != BPF_ADD && opcode != BPF_SUB && opcode != BPF_AND) {
2082 __mark_reg_unknown(dst_reg);
2083 return 0;
2084 }
2085
2024 switch (opcode) { 2086 switch (opcode) {
2025 case BPF_ADD: 2087 case BPF_ADD:
2026 if (signed_add_overflows(dst_reg->smin_value, smin_val) || 2088 if (signed_add_overflows(dst_reg->smin_value, smin_val) ||
@@ -2149,9 +2211,9 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
2149 __update_reg_bounds(dst_reg); 2211 __update_reg_bounds(dst_reg);
2150 break; 2212 break;
2151 case BPF_LSH: 2213 case BPF_LSH:
2152 if (umax_val > 63) { 2214 if (umax_val >= insn_bitness) {
2153 /* Shifts greater than 63 are undefined. This includes 2215 /* Shifts greater than 31 or 63 are undefined.
2154 * shifts by a negative number. 2216 * This includes shifts by a negative number.
2155 */ 2217 */
2156 mark_reg_unknown(env, regs, insn->dst_reg); 2218 mark_reg_unknown(env, regs, insn->dst_reg);
2157 break; 2219 break;
@@ -2177,27 +2239,29 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
2177 __update_reg_bounds(dst_reg); 2239 __update_reg_bounds(dst_reg);
2178 break; 2240 break;
2179 case BPF_RSH: 2241 case BPF_RSH:
2180 if (umax_val > 63) { 2242 if (umax_val >= insn_bitness) {
2181 /* Shifts greater than 63 are undefined. This includes 2243 /* Shifts greater than 31 or 63 are undefined.
2182 * shifts by a negative number. 2244 * This includes shifts by a negative number.
2183 */ 2245 */
2184 mark_reg_unknown(env, regs, insn->dst_reg); 2246 mark_reg_unknown(env, regs, insn->dst_reg);
2185 break; 2247 break;
2186 } 2248 }
2187 /* BPF_RSH is an unsigned shift, so make the appropriate casts */ 2249 /* BPF_RSH is an unsigned shift. If the value in dst_reg might
2188 if (dst_reg->smin_value < 0) { 2250 * be negative, then either:
2189 if (umin_val) { 2251 * 1) src_reg might be zero, so the sign bit of the result is
2190 /* Sign bit will be cleared */ 2252 * unknown, so we lose our signed bounds
2191 dst_reg->smin_value = 0; 2253 * 2) it's known negative, thus the unsigned bounds capture the
2192 } else { 2254 * signed bounds
2193 /* Lost sign bit information */ 2255 * 3) the signed bounds cross zero, so they tell us nothing
2194 dst_reg->smin_value = S64_MIN; 2256 * about the result
2195 dst_reg->smax_value = S64_MAX; 2257 * If the value in dst_reg is known nonnegative, then again the
2196 } 2258 * unsigned bounts capture the signed bounds.
2197 } else { 2259 * Thus, in all cases it suffices to blow away our signed bounds
2198 dst_reg->smin_value = 2260 * and rely on inferring new ones from the unsigned bounds and
2199 (u64)(dst_reg->smin_value) >> umax_val; 2261 * var_off of the result.
2200 } 2262 */
2263 dst_reg->smin_value = S64_MIN;
2264 dst_reg->smax_value = S64_MAX;
2201 if (src_known) 2265 if (src_known)
2202 dst_reg->var_off = tnum_rshift(dst_reg->var_off, 2266 dst_reg->var_off = tnum_rshift(dst_reg->var_off,
2203 umin_val); 2267 umin_val);
@@ -2213,6 +2277,12 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
2213 break; 2277 break;
2214 } 2278 }
2215 2279
2280 if (BPF_CLASS(insn->code) != BPF_ALU64) {
2281 /* 32-bit ALU ops are (32,32)->32 */
2282 coerce_reg_to_size(dst_reg, 4);
2283 coerce_reg_to_size(&src_reg, 4);
2284 }
2285
2216 __reg_deduce_bounds(dst_reg); 2286 __reg_deduce_bounds(dst_reg);
2217 __reg_bound_offset(dst_reg); 2287 __reg_bound_offset(dst_reg);
2218 return 0; 2288 return 0;
@@ -2227,7 +2297,6 @@ static int adjust_reg_min_max_vals(struct bpf_verifier_env *env,
2227 struct bpf_reg_state *regs = cur_regs(env), *dst_reg, *src_reg; 2297 struct bpf_reg_state *regs = cur_regs(env), *dst_reg, *src_reg;
2228 struct bpf_reg_state *ptr_reg = NULL, off_reg = {0}; 2298 struct bpf_reg_state *ptr_reg = NULL, off_reg = {0};
2229 u8 opcode = BPF_OP(insn->code); 2299 u8 opcode = BPF_OP(insn->code);
2230 int rc;
2231 2300
2232 dst_reg = &regs[insn->dst_reg]; 2301 dst_reg = &regs[insn->dst_reg];
2233 src_reg = NULL; 2302 src_reg = NULL;
@@ -2238,43 +2307,29 @@ static int adjust_reg_min_max_vals(struct bpf_verifier_env *env,
2238 if (src_reg->type != SCALAR_VALUE) { 2307 if (src_reg->type != SCALAR_VALUE) {
2239 if (dst_reg->type != SCALAR_VALUE) { 2308 if (dst_reg->type != SCALAR_VALUE) {
2240 /* Combining two pointers by any ALU op yields 2309 /* Combining two pointers by any ALU op yields
2241 * an arbitrary scalar. 2310 * an arbitrary scalar. Disallow all math except
2311 * pointer subtraction
2242 */ 2312 */
2243 if (!env->allow_ptr_leaks) { 2313 if (opcode == BPF_SUB){
2244 verbose(env, "R%d pointer %s pointer prohibited\n", 2314 mark_reg_unknown(env, regs, insn->dst_reg);
2245 insn->dst_reg, 2315 return 0;
2246 bpf_alu_string[opcode >> 4]);
2247 return -EACCES;
2248 } 2316 }
2249 mark_reg_unknown(env, regs, insn->dst_reg); 2317 verbose(env, "R%d pointer %s pointer prohibited\n",
2250 return 0; 2318 insn->dst_reg,
2319 bpf_alu_string[opcode >> 4]);
2320 return -EACCES;
2251 } else { 2321 } else {
2252 /* scalar += pointer 2322 /* scalar += pointer
2253 * This is legal, but we have to reverse our 2323 * This is legal, but we have to reverse our
2254 * src/dest handling in computing the range 2324 * src/dest handling in computing the range
2255 */ 2325 */
2256 rc = adjust_ptr_min_max_vals(env, insn, 2326 return adjust_ptr_min_max_vals(env, insn,
2257 src_reg, dst_reg); 2327 src_reg, dst_reg);
2258 if (rc == -EACCES && env->allow_ptr_leaks) {
2259 /* scalar += unknown scalar */
2260 __mark_reg_unknown(&off_reg);
2261 return adjust_scalar_min_max_vals(
2262 env, insn,
2263 dst_reg, off_reg);
2264 }
2265 return rc;
2266 } 2328 }
2267 } else if (ptr_reg) { 2329 } else if (ptr_reg) {
2268 /* pointer += scalar */ 2330 /* pointer += scalar */
2269 rc = adjust_ptr_min_max_vals(env, insn, 2331 return adjust_ptr_min_max_vals(env, insn,
2270 dst_reg, src_reg); 2332 dst_reg, src_reg);
2271 if (rc == -EACCES && env->allow_ptr_leaks) {
2272 /* unknown scalar += scalar */
2273 __mark_reg_unknown(dst_reg);
2274 return adjust_scalar_min_max_vals(
2275 env, insn, dst_reg, *src_reg);
2276 }
2277 return rc;
2278 } 2333 }
2279 } else { 2334 } else {
2280 /* Pretend the src is a reg with a known value, since we only 2335 /* Pretend the src is a reg with a known value, since we only
@@ -2283,17 +2338,9 @@ static int adjust_reg_min_max_vals(struct bpf_verifier_env *env,
2283 off_reg.type = SCALAR_VALUE; 2338 off_reg.type = SCALAR_VALUE;
2284 __mark_reg_known(&off_reg, insn->imm); 2339 __mark_reg_known(&off_reg, insn->imm);
2285 src_reg = &off_reg; 2340 src_reg = &off_reg;
2286 if (ptr_reg) { /* pointer += K */ 2341 if (ptr_reg) /* pointer += K */
2287 rc = adjust_ptr_min_max_vals(env, insn, 2342 return adjust_ptr_min_max_vals(env, insn,
2288 ptr_reg, src_reg); 2343 ptr_reg, src_reg);
2289 if (rc == -EACCES && env->allow_ptr_leaks) {
2290 /* unknown scalar += K */
2291 __mark_reg_unknown(dst_reg);
2292 return adjust_scalar_min_max_vals(
2293 env, insn, dst_reg, off_reg);
2294 }
2295 return rc;
2296 }
2297 } 2344 }
2298 2345
2299 /* Got here implies adding two SCALAR_VALUEs */ 2346 /* Got here implies adding two SCALAR_VALUEs */
@@ -2390,17 +2437,20 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
2390 return -EACCES; 2437 return -EACCES;
2391 } 2438 }
2392 mark_reg_unknown(env, regs, insn->dst_reg); 2439 mark_reg_unknown(env, regs, insn->dst_reg);
2393 /* high 32 bits are known zero. */ 2440 coerce_reg_to_size(&regs[insn->dst_reg], 4);
2394 regs[insn->dst_reg].var_off = tnum_cast(
2395 regs[insn->dst_reg].var_off, 4);
2396 __update_reg_bounds(&regs[insn->dst_reg]);
2397 } 2441 }
2398 } else { 2442 } else {
2399 /* case: R = imm 2443 /* case: R = imm
2400 * remember the value we stored into this reg 2444 * remember the value we stored into this reg
2401 */ 2445 */
2402 regs[insn->dst_reg].type = SCALAR_VALUE; 2446 regs[insn->dst_reg].type = SCALAR_VALUE;
2403 __mark_reg_known(regs + insn->dst_reg, insn->imm); 2447 if (BPF_CLASS(insn->code) == BPF_ALU64) {
2448 __mark_reg_known(regs + insn->dst_reg,
2449 insn->imm);
2450 } else {
2451 __mark_reg_known(regs + insn->dst_reg,
2452 (u32)insn->imm);
2453 }
2404 } 2454 }
2405 2455
2406 } else if (opcode > BPF_END) { 2456 } else if (opcode > BPF_END) {
@@ -3431,15 +3481,14 @@ static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur,
3431 return range_within(rold, rcur) && 3481 return range_within(rold, rcur) &&
3432 tnum_in(rold->var_off, rcur->var_off); 3482 tnum_in(rold->var_off, rcur->var_off);
3433 } else { 3483 } else {
3434 /* if we knew anything about the old value, we're not 3484 /* We're trying to use a pointer in place of a scalar.
3435 * equal, because we can't know anything about the 3485 * Even if the scalar was unbounded, this could lead to
3436 * scalar value of the pointer in the new value. 3486 * pointer leaks because scalars are allowed to leak
3487 * while pointers are not. We could make this safe in
3488 * special cases if root is calling us, but it's
3489 * probably not worth the hassle.
3437 */ 3490 */
3438 return rold->umin_value == 0 && 3491 return false;
3439 rold->umax_value == U64_MAX &&
3440 rold->smin_value == S64_MIN &&
3441 rold->smax_value == S64_MAX &&
3442 tnum_is_unknown(rold->var_off);
3443 } 3492 }
3444 case PTR_TO_MAP_VALUE: 3493 case PTR_TO_MAP_VALUE:
3445 /* If the new min/max/var_off satisfy the old ones and 3494 /* If the new min/max/var_off satisfy the old ones and
diff --git a/kernel/cgroup/debug.c b/kernel/cgroup/debug.c
index 5f780d8f6a9d..9caeda610249 100644
--- a/kernel/cgroup/debug.c
+++ b/kernel/cgroup/debug.c
@@ -50,7 +50,7 @@ static int current_css_set_read(struct seq_file *seq, void *v)
50 50
51 spin_lock_irq(&css_set_lock); 51 spin_lock_irq(&css_set_lock);
52 rcu_read_lock(); 52 rcu_read_lock();
53 cset = rcu_dereference(current->cgroups); 53 cset = task_css_set(current);
54 refcnt = refcount_read(&cset->refcount); 54 refcnt = refcount_read(&cset->refcount);
55 seq_printf(seq, "css_set %pK %d", cset, refcnt); 55 seq_printf(seq, "css_set %pK %d", cset, refcnt);
56 if (refcnt > cset->nr_tasks) 56 if (refcnt > cset->nr_tasks)
@@ -96,7 +96,7 @@ static int current_css_set_cg_links_read(struct seq_file *seq, void *v)
96 96
97 spin_lock_irq(&css_set_lock); 97 spin_lock_irq(&css_set_lock);
98 rcu_read_lock(); 98 rcu_read_lock();
99 cset = rcu_dereference(current->cgroups); 99 cset = task_css_set(current);
100 list_for_each_entry(link, &cset->cgrp_links, cgrp_link) { 100 list_for_each_entry(link, &cset->cgrp_links, cgrp_link) {
101 struct cgroup *c = link->cgrp; 101 struct cgroup *c = link->cgrp;
102 102
diff --git a/kernel/cgroup/stat.c b/kernel/cgroup/stat.c
index 133b465691d6..1e111dd455c4 100644
--- a/kernel/cgroup/stat.c
+++ b/kernel/cgroup/stat.c
@@ -296,8 +296,12 @@ int cgroup_stat_init(struct cgroup *cgrp)
296 } 296 }
297 297
298 /* ->updated_children list is self terminated */ 298 /* ->updated_children list is self terminated */
299 for_each_possible_cpu(cpu) 299 for_each_possible_cpu(cpu) {
300 cgroup_cpu_stat(cgrp, cpu)->updated_children = cgrp; 300 struct cgroup_cpu_stat *cstat = cgroup_cpu_stat(cgrp, cpu);
301
302 cstat->updated_children = cgrp;
303 u64_stats_init(&cstat->sync);
304 }
301 305
302 prev_cputime_init(&cgrp->stat.prev_cputime); 306 prev_cputime_init(&cgrp->stat.prev_cputime);
303 307
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 41376c3ac93b..53f7dc65f9a3 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -80,19 +80,19 @@ static struct lockdep_map cpuhp_state_down_map =
80 STATIC_LOCKDEP_MAP_INIT("cpuhp_state-down", &cpuhp_state_down_map); 80 STATIC_LOCKDEP_MAP_INIT("cpuhp_state-down", &cpuhp_state_down_map);
81 81
82 82
83static void inline cpuhp_lock_acquire(bool bringup) 83static inline void cpuhp_lock_acquire(bool bringup)
84{ 84{
85 lock_map_acquire(bringup ? &cpuhp_state_up_map : &cpuhp_state_down_map); 85 lock_map_acquire(bringup ? &cpuhp_state_up_map : &cpuhp_state_down_map);
86} 86}
87 87
88static void inline cpuhp_lock_release(bool bringup) 88static inline void cpuhp_lock_release(bool bringup)
89{ 89{
90 lock_map_release(bringup ? &cpuhp_state_up_map : &cpuhp_state_down_map); 90 lock_map_release(bringup ? &cpuhp_state_up_map : &cpuhp_state_down_map);
91} 91}
92#else 92#else
93 93
94static void inline cpuhp_lock_acquire(bool bringup) { } 94static inline void cpuhp_lock_acquire(bool bringup) { }
95static void inline cpuhp_lock_release(bool bringup) { } 95static inline void cpuhp_lock_release(bool bringup) { }
96 96
97#endif 97#endif
98 98
@@ -1277,9 +1277,9 @@ static struct cpuhp_step cpuhp_bp_states[] = {
1277 * before blk_mq_queue_reinit_notify() from notify_dead(), 1277 * before blk_mq_queue_reinit_notify() from notify_dead(),
1278 * otherwise a RCU stall occurs. 1278 * otherwise a RCU stall occurs.
1279 */ 1279 */
1280 [CPUHP_TIMERS_DEAD] = { 1280 [CPUHP_TIMERS_PREPARE] = {
1281 .name = "timers:dead", 1281 .name = "timers:dead",
1282 .startup.single = NULL, 1282 .startup.single = timers_prepare_cpu,
1283 .teardown.single = timers_dead_cpu, 1283 .teardown.single = timers_dead_cpu,
1284 }, 1284 },
1285 /* Kicks the plugged cpu into life */ 1285 /* Kicks the plugged cpu into life */
diff --git a/kernel/exit.c b/kernel/exit.c
index 6b4298a41167..df0c91d5606c 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -1755,3 +1755,11 @@ Efault:
1755 return -EFAULT; 1755 return -EFAULT;
1756} 1756}
1757#endif 1757#endif
1758
1759__weak void abort(void)
1760{
1761 BUG();
1762
1763 /* if that doesn't kill us, halt */
1764 panic("Oops failed to kill thread");
1765}
diff --git a/kernel/fork.c b/kernel/fork.c
index 432eadf6b58c..2295fc69717f 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -721,8 +721,7 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
721 goto out; 721 goto out;
722 } 722 }
723 /* a new mm has just been created */ 723 /* a new mm has just been created */
724 arch_dup_mmap(oldmm, mm); 724 retval = arch_dup_mmap(oldmm, mm);
725 retval = 0;
726out: 725out:
727 up_write(&mm->mmap_sem); 726 up_write(&mm->mmap_sem);
728 flush_tlb_mm(oldmm); 727 flush_tlb_mm(oldmm);
diff --git a/kernel/groups.c b/kernel/groups.c
index e357bc800111..daae2f2dc6d4 100644
--- a/kernel/groups.c
+++ b/kernel/groups.c
@@ -86,11 +86,12 @@ static int gid_cmp(const void *_a, const void *_b)
86 return gid_gt(a, b) - gid_lt(a, b); 86 return gid_gt(a, b) - gid_lt(a, b);
87} 87}
88 88
89static void groups_sort(struct group_info *group_info) 89void groups_sort(struct group_info *group_info)
90{ 90{
91 sort(group_info->gid, group_info->ngroups, sizeof(*group_info->gid), 91 sort(group_info->gid, group_info->ngroups, sizeof(*group_info->gid),
92 gid_cmp, NULL); 92 gid_cmp, NULL);
93} 93}
94EXPORT_SYMBOL(groups_sort);
94 95
95/* a simple bsearch */ 96/* a simple bsearch */
96int groups_search(const struct group_info *group_info, kgid_t grp) 97int groups_search(const struct group_info *group_info, kgid_t grp)
@@ -122,7 +123,6 @@ int groups_search(const struct group_info *group_info, kgid_t grp)
122void set_groups(struct cred *new, struct group_info *group_info) 123void set_groups(struct cred *new, struct group_info *group_info)
123{ 124{
124 put_group_info(new->group_info); 125 put_group_info(new->group_info);
125 groups_sort(group_info);
126 get_group_info(group_info); 126 get_group_info(group_info);
127 new->group_info = group_info; 127 new->group_info = group_info;
128} 128}
@@ -206,6 +206,7 @@ SYSCALL_DEFINE2(setgroups, int, gidsetsize, gid_t __user *, grouplist)
206 return retval; 206 return retval;
207 } 207 }
208 208
209 groups_sort(group_info);
209 retval = set_current_groups(group_info); 210 retval = set_current_groups(group_info);
210 put_group_info(group_info); 211 put_group_info(group_info);
211 212
diff --git a/kernel/irq/debug.h b/kernel/irq/debug.h
index 17f05ef8f575..e4d3819a91cc 100644
--- a/kernel/irq/debug.h
+++ b/kernel/irq/debug.h
@@ -12,6 +12,11 @@
12 12
13static inline void print_irq_desc(unsigned int irq, struct irq_desc *desc) 13static inline void print_irq_desc(unsigned int irq, struct irq_desc *desc)
14{ 14{
15 static DEFINE_RATELIMIT_STATE(ratelimit, 5 * HZ, 5);
16
17 if (!__ratelimit(&ratelimit))
18 return;
19
15 printk("irq %d, desc: %p, depth: %d, count: %d, unhandled: %d\n", 20 printk("irq %d, desc: %p, depth: %d, count: %d, unhandled: %d\n",
16 irq, desc, desc->depth, desc->irq_count, desc->irqs_unhandled); 21 irq, desc, desc->depth, desc->irq_count, desc->irqs_unhandled);
17 printk("->handle_irq(): %p, ", desc->handle_irq); 22 printk("->handle_irq(): %p, ", desc->handle_irq);
diff --git a/kernel/irq/debugfs.c b/kernel/irq/debugfs.c
index 7f608ac39653..acfaaef8672a 100644
--- a/kernel/irq/debugfs.c
+++ b/kernel/irq/debugfs.c
@@ -113,6 +113,7 @@ static const struct irq_bit_descr irqdata_states[] = {
113 BIT_MASK_DESCR(IRQD_SETAFFINITY_PENDING), 113 BIT_MASK_DESCR(IRQD_SETAFFINITY_PENDING),
114 BIT_MASK_DESCR(IRQD_AFFINITY_MANAGED), 114 BIT_MASK_DESCR(IRQD_AFFINITY_MANAGED),
115 BIT_MASK_DESCR(IRQD_MANAGED_SHUTDOWN), 115 BIT_MASK_DESCR(IRQD_MANAGED_SHUTDOWN),
116 BIT_MASK_DESCR(IRQD_CAN_RESERVE),
116 117
117 BIT_MASK_DESCR(IRQD_FORWARDED_TO_VCPU), 118 BIT_MASK_DESCR(IRQD_FORWARDED_TO_VCPU),
118 119
diff --git a/kernel/irq/generic-chip.c b/kernel/irq/generic-chip.c
index c26c5bb6b491..508c03dfef25 100644
--- a/kernel/irq/generic-chip.c
+++ b/kernel/irq/generic-chip.c
@@ -364,10 +364,11 @@ irq_get_domain_generic_chip(struct irq_domain *d, unsigned int hw_irq)
364EXPORT_SYMBOL_GPL(irq_get_domain_generic_chip); 364EXPORT_SYMBOL_GPL(irq_get_domain_generic_chip);
365 365
366/* 366/*
367 * Separate lockdep class for interrupt chip which can nest irq_desc 367 * Separate lockdep classes for interrupt chip which can nest irq_desc
368 * lock. 368 * lock and request mutex.
369 */ 369 */
370static struct lock_class_key irq_nested_lock_class; 370static struct lock_class_key irq_nested_lock_class;
371static struct lock_class_key irq_nested_request_class;
371 372
372/* 373/*
373 * irq_map_generic_chip - Map a generic chip for an irq domain 374 * irq_map_generic_chip - Map a generic chip for an irq domain
@@ -409,7 +410,8 @@ int irq_map_generic_chip(struct irq_domain *d, unsigned int virq,
409 set_bit(idx, &gc->installed); 410 set_bit(idx, &gc->installed);
410 411
411 if (dgc->gc_flags & IRQ_GC_INIT_NESTED_LOCK) 412 if (dgc->gc_flags & IRQ_GC_INIT_NESTED_LOCK)
412 irq_set_lockdep_class(virq, &irq_nested_lock_class); 413 irq_set_lockdep_class(virq, &irq_nested_lock_class,
414 &irq_nested_request_class);
413 415
414 if (chip->irq_calc_mask) 416 if (chip->irq_calc_mask)
415 chip->irq_calc_mask(data); 417 chip->irq_calc_mask(data);
@@ -479,7 +481,8 @@ void irq_setup_generic_chip(struct irq_chip_generic *gc, u32 msk,
479 continue; 481 continue;
480 482
481 if (flags & IRQ_GC_INIT_NESTED_LOCK) 483 if (flags & IRQ_GC_INIT_NESTED_LOCK)
482 irq_set_lockdep_class(i, &irq_nested_lock_class); 484 irq_set_lockdep_class(i, &irq_nested_lock_class,
485 &irq_nested_request_class);
483 486
484 if (!(flags & IRQ_GC_NO_MASK)) { 487 if (!(flags & IRQ_GC_NO_MASK)) {
485 struct irq_data *d = irq_get_irq_data(i); 488 struct irq_data *d = irq_get_irq_data(i);
diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h
index 07d08ca701ec..ab19371eab9b 100644
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -440,7 +440,7 @@ static inline bool irq_fixup_move_pending(struct irq_desc *desc, bool fclear)
440#endif /* !CONFIG_GENERIC_PENDING_IRQ */ 440#endif /* !CONFIG_GENERIC_PENDING_IRQ */
441 441
442#if !defined(CONFIG_IRQ_DOMAIN) || !defined(CONFIG_IRQ_DOMAIN_HIERARCHY) 442#if !defined(CONFIG_IRQ_DOMAIN) || !defined(CONFIG_IRQ_DOMAIN_HIERARCHY)
443static inline int irq_domain_activate_irq(struct irq_data *data, bool early) 443static inline int irq_domain_activate_irq(struct irq_data *data, bool reserve)
444{ 444{
445 irqd_set_activated(data); 445 irqd_set_activated(data);
446 return 0; 446 return 0;
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index 4f4f60015e8a..62068ad46930 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -1693,7 +1693,7 @@ static void __irq_domain_deactivate_irq(struct irq_data *irq_data)
1693 } 1693 }
1694} 1694}
1695 1695
1696static int __irq_domain_activate_irq(struct irq_data *irqd, bool early) 1696static int __irq_domain_activate_irq(struct irq_data *irqd, bool reserve)
1697{ 1697{
1698 int ret = 0; 1698 int ret = 0;
1699 1699
@@ -1702,9 +1702,9 @@ static int __irq_domain_activate_irq(struct irq_data *irqd, bool early)
1702 1702
1703 if (irqd->parent_data) 1703 if (irqd->parent_data)
1704 ret = __irq_domain_activate_irq(irqd->parent_data, 1704 ret = __irq_domain_activate_irq(irqd->parent_data,
1705 early); 1705 reserve);
1706 if (!ret && domain->ops->activate) { 1706 if (!ret && domain->ops->activate) {
1707 ret = domain->ops->activate(domain, irqd, early); 1707 ret = domain->ops->activate(domain, irqd, reserve);
1708 /* Rollback in case of error */ 1708 /* Rollback in case of error */
1709 if (ret && irqd->parent_data) 1709 if (ret && irqd->parent_data)
1710 __irq_domain_deactivate_irq(irqd->parent_data); 1710 __irq_domain_deactivate_irq(irqd->parent_data);
@@ -1716,17 +1716,18 @@ static int __irq_domain_activate_irq(struct irq_data *irqd, bool early)
1716/** 1716/**
1717 * irq_domain_activate_irq - Call domain_ops->activate recursively to activate 1717 * irq_domain_activate_irq - Call domain_ops->activate recursively to activate
1718 * interrupt 1718 * interrupt
1719 * @irq_data: outermost irq_data associated with interrupt 1719 * @irq_data: Outermost irq_data associated with interrupt
1720 * @reserve: If set only reserve an interrupt vector instead of assigning one
1720 * 1721 *
1721 * This is the second step to call domain_ops->activate to program interrupt 1722 * This is the second step to call domain_ops->activate to program interrupt
1722 * controllers, so the interrupt could actually get delivered. 1723 * controllers, so the interrupt could actually get delivered.
1723 */ 1724 */
1724int irq_domain_activate_irq(struct irq_data *irq_data, bool early) 1725int irq_domain_activate_irq(struct irq_data *irq_data, bool reserve)
1725{ 1726{
1726 int ret = 0; 1727 int ret = 0;
1727 1728
1728 if (!irqd_is_activated(irq_data)) 1729 if (!irqd_is_activated(irq_data))
1729 ret = __irq_domain_activate_irq(irq_data, early); 1730 ret = __irq_domain_activate_irq(irq_data, reserve);
1730 if (!ret) 1731 if (!ret)
1731 irqd_set_activated(irq_data); 1732 irqd_set_activated(irq_data);
1732 return ret; 1733 return ret;
diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c
index edb987b2c58d..2f3c4f5382cc 100644
--- a/kernel/irq/msi.c
+++ b/kernel/irq/msi.c
@@ -339,6 +339,40 @@ int msi_domain_populate_irqs(struct irq_domain *domain, struct device *dev,
339 return ret; 339 return ret;
340} 340}
341 341
342/*
343 * Carefully check whether the device can use reservation mode. If
344 * reservation mode is enabled then the early activation will assign a
345 * dummy vector to the device. If the PCI/MSI device does not support
346 * masking of the entry then this can result in spurious interrupts when
347 * the device driver is not absolutely careful. But even then a malfunction
348 * of the hardware could result in a spurious interrupt on the dummy vector
349 * and render the device unusable. If the entry can be masked then the core
350 * logic will prevent the spurious interrupt and reservation mode can be
351 * used. For now reservation mode is restricted to PCI/MSI.
352 */
353static bool msi_check_reservation_mode(struct irq_domain *domain,
354 struct msi_domain_info *info,
355 struct device *dev)
356{
357 struct msi_desc *desc;
358
359 if (domain->bus_token != DOMAIN_BUS_PCI_MSI)
360 return false;
361
362 if (!(info->flags & MSI_FLAG_MUST_REACTIVATE))
363 return false;
364
365 if (IS_ENABLED(CONFIG_PCI_MSI) && pci_msi_ignore_mask)
366 return false;
367
368 /*
369 * Checking the first MSI descriptor is sufficient. MSIX supports
370 * masking and MSI does so when the maskbit is set.
371 */
372 desc = first_msi_entry(dev);
373 return desc->msi_attrib.is_msix || desc->msi_attrib.maskbit;
374}
375
342/** 376/**
343 * msi_domain_alloc_irqs - Allocate interrupts from a MSI interrupt domain 377 * msi_domain_alloc_irqs - Allocate interrupts from a MSI interrupt domain
344 * @domain: The domain to allocate from 378 * @domain: The domain to allocate from
@@ -353,9 +387,11 @@ int msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev,
353{ 387{
354 struct msi_domain_info *info = domain->host_data; 388 struct msi_domain_info *info = domain->host_data;
355 struct msi_domain_ops *ops = info->ops; 389 struct msi_domain_ops *ops = info->ops;
356 msi_alloc_info_t arg; 390 struct irq_data *irq_data;
357 struct msi_desc *desc; 391 struct msi_desc *desc;
392 msi_alloc_info_t arg;
358 int i, ret, virq; 393 int i, ret, virq;
394 bool can_reserve;
359 395
360 ret = msi_domain_prepare_irqs(domain, dev, nvec, &arg); 396 ret = msi_domain_prepare_irqs(domain, dev, nvec, &arg);
361 if (ret) 397 if (ret)
@@ -385,6 +421,8 @@ int msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev,
385 if (ops->msi_finish) 421 if (ops->msi_finish)
386 ops->msi_finish(&arg, 0); 422 ops->msi_finish(&arg, 0);
387 423
424 can_reserve = msi_check_reservation_mode(domain, info, dev);
425
388 for_each_msi_entry(desc, dev) { 426 for_each_msi_entry(desc, dev) {
389 virq = desc->irq; 427 virq = desc->irq;
390 if (desc->nvec_used == 1) 428 if (desc->nvec_used == 1)
@@ -397,15 +435,25 @@ int msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev,
397 * the MSI entries before the PCI layer enables MSI in the 435 * the MSI entries before the PCI layer enables MSI in the
398 * card. Otherwise the card latches a random msi message. 436 * card. Otherwise the card latches a random msi message.
399 */ 437 */
400 if (info->flags & MSI_FLAG_ACTIVATE_EARLY) { 438 if (!(info->flags & MSI_FLAG_ACTIVATE_EARLY))
401 struct irq_data *irq_data; 439 continue;
402 440
441 irq_data = irq_domain_get_irq_data(domain, desc->irq);
442 if (!can_reserve)
443 irqd_clr_can_reserve(irq_data);
444 ret = irq_domain_activate_irq(irq_data, can_reserve);
445 if (ret)
446 goto cleanup;
447 }
448
449 /*
450 * If these interrupts use reservation mode, clear the activated bit
451 * so request_irq() will assign the final vector.
452 */
453 if (can_reserve) {
454 for_each_msi_entry(desc, dev) {
403 irq_data = irq_domain_get_irq_data(domain, desc->irq); 455 irq_data = irq_domain_get_irq_data(domain, desc->irq);
404 ret = irq_domain_activate_irq(irq_data, true); 456 irqd_clr_activated(irq_data);
405 if (ret)
406 goto cleanup;
407 if (info->flags & MSI_FLAG_MUST_REACTIVATE)
408 irqd_clr_activated(irq_data);
409 } 457 }
410 } 458 }
411 return 0; 459 return 0;
diff --git a/kernel/kcov.c b/kernel/kcov.c
index 15f33faf4013..7594c033d98a 100644
--- a/kernel/kcov.c
+++ b/kernel/kcov.c
@@ -157,7 +157,7 @@ void notrace __sanitizer_cov_trace_cmp2(u16 arg1, u16 arg2)
157} 157}
158EXPORT_SYMBOL(__sanitizer_cov_trace_cmp2); 158EXPORT_SYMBOL(__sanitizer_cov_trace_cmp2);
159 159
160void notrace __sanitizer_cov_trace_cmp4(u16 arg1, u16 arg2) 160void notrace __sanitizer_cov_trace_cmp4(u32 arg1, u32 arg2)
161{ 161{
162 write_comp_data(KCOV_CMP_SIZE(2), arg1, arg2, _RET_IP_); 162 write_comp_data(KCOV_CMP_SIZE(2), arg1, arg2, _RET_IP_);
163} 163}
@@ -183,7 +183,7 @@ void notrace __sanitizer_cov_trace_const_cmp2(u16 arg1, u16 arg2)
183} 183}
184EXPORT_SYMBOL(__sanitizer_cov_trace_const_cmp2); 184EXPORT_SYMBOL(__sanitizer_cov_trace_const_cmp2);
185 185
186void notrace __sanitizer_cov_trace_const_cmp4(u16 arg1, u16 arg2) 186void notrace __sanitizer_cov_trace_const_cmp4(u32 arg1, u32 arg2)
187{ 187{
188 write_comp_data(KCOV_CMP_SIZE(2) | KCOV_CMP_CONST, arg1, arg2, 188 write_comp_data(KCOV_CMP_SIZE(2) | KCOV_CMP_CONST, arg1, arg2,
189 _RET_IP_); 189 _RET_IP_);
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 670d8d7d8087..5fa1324a4f29 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -57,10 +57,6 @@
57#define CREATE_TRACE_POINTS 57#define CREATE_TRACE_POINTS
58#include <trace/events/lock.h> 58#include <trace/events/lock.h>
59 59
60#ifdef CONFIG_LOCKDEP_CROSSRELEASE
61#include <linux/slab.h>
62#endif
63
64#ifdef CONFIG_PROVE_LOCKING 60#ifdef CONFIG_PROVE_LOCKING
65int prove_locking = 1; 61int prove_locking = 1;
66module_param(prove_locking, int, 0644); 62module_param(prove_locking, int, 0644);
@@ -75,19 +71,6 @@ module_param(lock_stat, int, 0644);
75#define lock_stat 0 71#define lock_stat 0
76#endif 72#endif
77 73
78#ifdef CONFIG_BOOTPARAM_LOCKDEP_CROSSRELEASE_FULLSTACK
79static int crossrelease_fullstack = 1;
80#else
81static int crossrelease_fullstack;
82#endif
83static int __init allow_crossrelease_fullstack(char *str)
84{
85 crossrelease_fullstack = 1;
86 return 0;
87}
88
89early_param("crossrelease_fullstack", allow_crossrelease_fullstack);
90
91/* 74/*
92 * lockdep_lock: protects the lockdep graph, the hashes and the 75 * lockdep_lock: protects the lockdep graph, the hashes and the
93 * class/list/hash allocators. 76 * class/list/hash allocators.
@@ -740,18 +723,6 @@ look_up_lock_class(struct lockdep_map *lock, unsigned int subclass)
740 return is_static || static_obj(lock->key) ? NULL : ERR_PTR(-EINVAL); 723 return is_static || static_obj(lock->key) ? NULL : ERR_PTR(-EINVAL);
741} 724}
742 725
743#ifdef CONFIG_LOCKDEP_CROSSRELEASE
744static void cross_init(struct lockdep_map *lock, int cross);
745static int cross_lock(struct lockdep_map *lock);
746static int lock_acquire_crosslock(struct held_lock *hlock);
747static int lock_release_crosslock(struct lockdep_map *lock);
748#else
749static inline void cross_init(struct lockdep_map *lock, int cross) {}
750static inline int cross_lock(struct lockdep_map *lock) { return 0; }
751static inline int lock_acquire_crosslock(struct held_lock *hlock) { return 2; }
752static inline int lock_release_crosslock(struct lockdep_map *lock) { return 2; }
753#endif
754
755/* 726/*
756 * Register a lock's class in the hash-table, if the class is not present 727 * Register a lock's class in the hash-table, if the class is not present
757 * yet. Otherwise we look it up. We cache the result in the lock object 728 * yet. Otherwise we look it up. We cache the result in the lock object
@@ -1151,41 +1122,22 @@ print_circular_lock_scenario(struct held_lock *src,
1151 printk(KERN_CONT "\n\n"); 1122 printk(KERN_CONT "\n\n");
1152 } 1123 }
1153 1124
1154 if (cross_lock(tgt->instance)) { 1125 printk(" Possible unsafe locking scenario:\n\n");
1155 printk(" Possible unsafe locking scenario by crosslock:\n\n"); 1126 printk(" CPU0 CPU1\n");
1156 printk(" CPU0 CPU1\n"); 1127 printk(" ---- ----\n");
1157 printk(" ---- ----\n"); 1128 printk(" lock(");
1158 printk(" lock("); 1129 __print_lock_name(target);
1159 __print_lock_name(parent); 1130 printk(KERN_CONT ");\n");
1160 printk(KERN_CONT ");\n"); 1131 printk(" lock(");
1161 printk(" lock("); 1132 __print_lock_name(parent);
1162 __print_lock_name(target); 1133 printk(KERN_CONT ");\n");
1163 printk(KERN_CONT ");\n"); 1134 printk(" lock(");
1164 printk(" lock("); 1135 __print_lock_name(target);
1165 __print_lock_name(source); 1136 printk(KERN_CONT ");\n");
1166 printk(KERN_CONT ");\n"); 1137 printk(" lock(");
1167 printk(" unlock("); 1138 __print_lock_name(source);
1168 __print_lock_name(target); 1139 printk(KERN_CONT ");\n");
1169 printk(KERN_CONT ");\n"); 1140 printk("\n *** DEADLOCK ***\n\n");
1170 printk("\n *** DEADLOCK ***\n\n");
1171 } else {
1172 printk(" Possible unsafe locking scenario:\n\n");
1173 printk(" CPU0 CPU1\n");
1174 printk(" ---- ----\n");
1175 printk(" lock(");
1176 __print_lock_name(target);
1177 printk(KERN_CONT ");\n");
1178 printk(" lock(");
1179 __print_lock_name(parent);
1180 printk(KERN_CONT ");\n");
1181 printk(" lock(");
1182 __print_lock_name(target);
1183 printk(KERN_CONT ");\n");
1184 printk(" lock(");
1185 __print_lock_name(source);
1186 printk(KERN_CONT ");\n");
1187 printk("\n *** DEADLOCK ***\n\n");
1188 }
1189} 1141}
1190 1142
1191/* 1143/*
@@ -1211,10 +1163,7 @@ print_circular_bug_header(struct lock_list *entry, unsigned int depth,
1211 curr->comm, task_pid_nr(curr)); 1163 curr->comm, task_pid_nr(curr));
1212 print_lock(check_src); 1164 print_lock(check_src);
1213 1165
1214 if (cross_lock(check_tgt->instance)) 1166 pr_warn("\nbut task is already holding lock:\n");
1215 pr_warn("\nbut now in release context of a crosslock acquired at the following:\n");
1216 else
1217 pr_warn("\nbut task is already holding lock:\n");
1218 1167
1219 print_lock(check_tgt); 1168 print_lock(check_tgt);
1220 pr_warn("\nwhich lock already depends on the new lock.\n\n"); 1169 pr_warn("\nwhich lock already depends on the new lock.\n\n");
@@ -1244,9 +1193,7 @@ static noinline int print_circular_bug(struct lock_list *this,
1244 if (!debug_locks_off_graph_unlock() || debug_locks_silent) 1193 if (!debug_locks_off_graph_unlock() || debug_locks_silent)
1245 return 0; 1194 return 0;
1246 1195
1247 if (cross_lock(check_tgt->instance)) 1196 if (!save_trace(&this->trace))
1248 this->trace = *trace;
1249 else if (!save_trace(&this->trace))
1250 return 0; 1197 return 0;
1251 1198
1252 depth = get_lock_depth(target); 1199 depth = get_lock_depth(target);
@@ -1850,9 +1797,6 @@ check_deadlock(struct task_struct *curr, struct held_lock *next,
1850 if (nest) 1797 if (nest)
1851 return 2; 1798 return 2;
1852 1799
1853 if (cross_lock(prev->instance))
1854 continue;
1855
1856 return print_deadlock_bug(curr, prev, next); 1800 return print_deadlock_bug(curr, prev, next);
1857 } 1801 }
1858 return 1; 1802 return 1;
@@ -2018,31 +1962,26 @@ check_prevs_add(struct task_struct *curr, struct held_lock *next)
2018 for (;;) { 1962 for (;;) {
2019 int distance = curr->lockdep_depth - depth + 1; 1963 int distance = curr->lockdep_depth - depth + 1;
2020 hlock = curr->held_locks + depth - 1; 1964 hlock = curr->held_locks + depth - 1;
1965
2021 /* 1966 /*
2022 * Only non-crosslock entries get new dependencies added. 1967 * Only non-recursive-read entries get new dependencies
2023 * Crosslock entries will be added by commit later: 1968 * added:
2024 */ 1969 */
2025 if (!cross_lock(hlock->instance)) { 1970 if (hlock->read != 2 && hlock->check) {
1971 int ret = check_prev_add(curr, hlock, next, distance, &trace, save_trace);
1972 if (!ret)
1973 return 0;
1974
2026 /* 1975 /*
2027 * Only non-recursive-read entries get new dependencies 1976 * Stop after the first non-trylock entry,
2028 * added: 1977 * as non-trylock entries have added their
1978 * own direct dependencies already, so this
1979 * lock is connected to them indirectly:
2029 */ 1980 */
2030 if (hlock->read != 2 && hlock->check) { 1981 if (!hlock->trylock)
2031 int ret = check_prev_add(curr, hlock, next, 1982 break;
2032 distance, &trace, save_trace);
2033 if (!ret)
2034 return 0;
2035
2036 /*
2037 * Stop after the first non-trylock entry,
2038 * as non-trylock entries have added their
2039 * own direct dependencies already, so this
2040 * lock is connected to them indirectly:
2041 */
2042 if (!hlock->trylock)
2043 break;
2044 }
2045 } 1983 }
1984
2046 depth--; 1985 depth--;
2047 /* 1986 /*
2048 * End of lock-stack? 1987 * End of lock-stack?
@@ -3292,21 +3231,10 @@ static void __lockdep_init_map(struct lockdep_map *lock, const char *name,
3292void lockdep_init_map(struct lockdep_map *lock, const char *name, 3231void lockdep_init_map(struct lockdep_map *lock, const char *name,
3293 struct lock_class_key *key, int subclass) 3232 struct lock_class_key *key, int subclass)
3294{ 3233{
3295 cross_init(lock, 0);
3296 __lockdep_init_map(lock, name, key, subclass); 3234 __lockdep_init_map(lock, name, key, subclass);
3297} 3235}
3298EXPORT_SYMBOL_GPL(lockdep_init_map); 3236EXPORT_SYMBOL_GPL(lockdep_init_map);
3299 3237
3300#ifdef CONFIG_LOCKDEP_CROSSRELEASE
3301void lockdep_init_map_crosslock(struct lockdep_map *lock, const char *name,
3302 struct lock_class_key *key, int subclass)
3303{
3304 cross_init(lock, 1);
3305 __lockdep_init_map(lock, name, key, subclass);
3306}
3307EXPORT_SYMBOL_GPL(lockdep_init_map_crosslock);
3308#endif
3309
3310struct lock_class_key __lockdep_no_validate__; 3238struct lock_class_key __lockdep_no_validate__;
3311EXPORT_SYMBOL_GPL(__lockdep_no_validate__); 3239EXPORT_SYMBOL_GPL(__lockdep_no_validate__);
3312 3240
@@ -3362,7 +3290,6 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
3362 int chain_head = 0; 3290 int chain_head = 0;
3363 int class_idx; 3291 int class_idx;
3364 u64 chain_key; 3292 u64 chain_key;
3365 int ret;
3366 3293
3367 if (unlikely(!debug_locks)) 3294 if (unlikely(!debug_locks))
3368 return 0; 3295 return 0;
@@ -3411,8 +3338,7 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
3411 3338
3412 class_idx = class - lock_classes + 1; 3339 class_idx = class - lock_classes + 1;
3413 3340
3414 /* TODO: nest_lock is not implemented for crosslock yet. */ 3341 if (depth) {
3415 if (depth && !cross_lock(lock)) {
3416 hlock = curr->held_locks + depth - 1; 3342 hlock = curr->held_locks + depth - 1;
3417 if (hlock->class_idx == class_idx && nest_lock) { 3343 if (hlock->class_idx == class_idx && nest_lock) {
3418 if (hlock->references) { 3344 if (hlock->references) {
@@ -3500,14 +3426,6 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
3500 if (!validate_chain(curr, lock, hlock, chain_head, chain_key)) 3426 if (!validate_chain(curr, lock, hlock, chain_head, chain_key))
3501 return 0; 3427 return 0;
3502 3428
3503 ret = lock_acquire_crosslock(hlock);
3504 /*
3505 * 2 means normal acquire operations are needed. Otherwise, it's
3506 * ok just to return with '0:fail, 1:success'.
3507 */
3508 if (ret != 2)
3509 return ret;
3510
3511 curr->curr_chain_key = chain_key; 3429 curr->curr_chain_key = chain_key;
3512 curr->lockdep_depth++; 3430 curr->lockdep_depth++;
3513 check_chain_key(curr); 3431 check_chain_key(curr);
@@ -3745,19 +3663,11 @@ __lock_release(struct lockdep_map *lock, int nested, unsigned long ip)
3745 struct task_struct *curr = current; 3663 struct task_struct *curr = current;
3746 struct held_lock *hlock; 3664 struct held_lock *hlock;
3747 unsigned int depth; 3665 unsigned int depth;
3748 int ret, i; 3666 int i;
3749 3667
3750 if (unlikely(!debug_locks)) 3668 if (unlikely(!debug_locks))
3751 return 0; 3669 return 0;
3752 3670
3753 ret = lock_release_crosslock(lock);
3754 /*
3755 * 2 means normal release operations are needed. Otherwise, it's
3756 * ok just to return with '0:fail, 1:success'.
3757 */
3758 if (ret != 2)
3759 return ret;
3760
3761 depth = curr->lockdep_depth; 3671 depth = curr->lockdep_depth;
3762 /* 3672 /*
3763 * So we're all set to release this lock.. wait what lock? We don't 3673 * So we're all set to release this lock.. wait what lock? We don't
@@ -4675,495 +4585,3 @@ void lockdep_rcu_suspicious(const char *file, const int line, const char *s)
4675 dump_stack(); 4585 dump_stack();
4676} 4586}
4677EXPORT_SYMBOL_GPL(lockdep_rcu_suspicious); 4587EXPORT_SYMBOL_GPL(lockdep_rcu_suspicious);
4678
4679#ifdef CONFIG_LOCKDEP_CROSSRELEASE
4680
4681/*
4682 * Crossrelease works by recording a lock history for each thread and
4683 * connecting those historic locks that were taken after the
4684 * wait_for_completion() in the complete() context.
4685 *
4686 * Task-A Task-B
4687 *
4688 * mutex_lock(&A);
4689 * mutex_unlock(&A);
4690 *
4691 * wait_for_completion(&C);
4692 * lock_acquire_crosslock();
4693 * atomic_inc_return(&cross_gen_id);
4694 * |
4695 * | mutex_lock(&B);
4696 * | mutex_unlock(&B);
4697 * |
4698 * | complete(&C);
4699 * `-- lock_commit_crosslock();
4700 *
4701 * Which will then add a dependency between B and C.
4702 */
4703
4704#define xhlock(i) (current->xhlocks[(i) % MAX_XHLOCKS_NR])
4705
4706/*
4707 * Whenever a crosslock is held, cross_gen_id will be increased.
4708 */
4709static atomic_t cross_gen_id; /* Can be wrapped */
4710
4711/*
4712 * Make an entry of the ring buffer invalid.
4713 */
4714static inline void invalidate_xhlock(struct hist_lock *xhlock)
4715{
4716 /*
4717 * Normally, xhlock->hlock.instance must be !NULL.
4718 */
4719 xhlock->hlock.instance = NULL;
4720}
4721
4722/*
4723 * Lock history stacks; we have 2 nested lock history stacks:
4724 *
4725 * HARD(IRQ)
4726 * SOFT(IRQ)
4727 *
4728 * The thing is that once we complete a HARD/SOFT IRQ the future task locks
4729 * should not depend on any of the locks observed while running the IRQ. So
4730 * what we do is rewind the history buffer and erase all our knowledge of that
4731 * temporal event.
4732 */
4733
4734void crossrelease_hist_start(enum xhlock_context_t c)
4735{
4736 struct task_struct *cur = current;
4737
4738 if (!cur->xhlocks)
4739 return;
4740
4741 cur->xhlock_idx_hist[c] = cur->xhlock_idx;
4742 cur->hist_id_save[c] = cur->hist_id;
4743}
4744
4745void crossrelease_hist_end(enum xhlock_context_t c)
4746{
4747 struct task_struct *cur = current;
4748
4749 if (cur->xhlocks) {
4750 unsigned int idx = cur->xhlock_idx_hist[c];
4751 struct hist_lock *h = &xhlock(idx);
4752
4753 cur->xhlock_idx = idx;
4754
4755 /* Check if the ring was overwritten. */
4756 if (h->hist_id != cur->hist_id_save[c])
4757 invalidate_xhlock(h);
4758 }
4759}
4760
4761/*
4762 * lockdep_invariant_state() is used to annotate independence inside a task, to
4763 * make one task look like multiple independent 'tasks'.
4764 *
4765 * Take for instance workqueues; each work is independent of the last. The
4766 * completion of a future work does not depend on the completion of a past work
4767 * (in general). Therefore we must not carry that (lock) dependency across
4768 * works.
4769 *
4770 * This is true for many things; pretty much all kthreads fall into this
4771 * pattern, where they have an invariant state and future completions do not
4772 * depend on past completions. Its just that since they all have the 'same'
4773 * form -- the kthread does the same over and over -- it doesn't typically
4774 * matter.
4775 *
4776 * The same is true for system-calls, once a system call is completed (we've
4777 * returned to userspace) the next system call does not depend on the lock
4778 * history of the previous system call.
4779 *
4780 * They key property for independence, this invariant state, is that it must be
4781 * a point where we hold no locks and have no history. Because if we were to
4782 * hold locks, the restore at _end() would not necessarily recover it's history
4783 * entry. Similarly, independence per-definition means it does not depend on
4784 * prior state.
4785 */
4786void lockdep_invariant_state(bool force)
4787{
4788 /*
4789 * We call this at an invariant point, no current state, no history.
4790 * Verify the former, enforce the latter.
4791 */
4792 WARN_ON_ONCE(!force && current->lockdep_depth);
4793 if (current->xhlocks)
4794 invalidate_xhlock(&xhlock(current->xhlock_idx));
4795}
4796
4797static int cross_lock(struct lockdep_map *lock)
4798{
4799 return lock ? lock->cross : 0;
4800}
4801
4802/*
4803 * This is needed to decide the relationship between wrapable variables.
4804 */
4805static inline int before(unsigned int a, unsigned int b)
4806{
4807 return (int)(a - b) < 0;
4808}
4809
4810static inline struct lock_class *xhlock_class(struct hist_lock *xhlock)
4811{
4812 return hlock_class(&xhlock->hlock);
4813}
4814
4815static inline struct lock_class *xlock_class(struct cross_lock *xlock)
4816{
4817 return hlock_class(&xlock->hlock);
4818}
4819
4820/*
4821 * Should we check a dependency with previous one?
4822 */
4823static inline int depend_before(struct held_lock *hlock)
4824{
4825 return hlock->read != 2 && hlock->check && !hlock->trylock;
4826}
4827
4828/*
4829 * Should we check a dependency with next one?
4830 */
4831static inline int depend_after(struct held_lock *hlock)
4832{
4833 return hlock->read != 2 && hlock->check;
4834}
4835
4836/*
4837 * Check if the xhlock is valid, which would be false if,
4838 *
4839 * 1. Has not used after initializaion yet.
4840 * 2. Got invalidated.
4841 *
4842 * Remind hist_lock is implemented as a ring buffer.
4843 */
4844static inline int xhlock_valid(struct hist_lock *xhlock)
4845{
4846 /*
4847 * xhlock->hlock.instance must be !NULL.
4848 */
4849 return !!xhlock->hlock.instance;
4850}
4851
4852/*
4853 * Record a hist_lock entry.
4854 *
4855 * Irq disable is only required.
4856 */
4857static void add_xhlock(struct held_lock *hlock)
4858{
4859 unsigned int idx = ++current->xhlock_idx;
4860 struct hist_lock *xhlock = &xhlock(idx);
4861
4862#ifdef CONFIG_DEBUG_LOCKDEP
4863 /*
4864 * This can be done locklessly because they are all task-local
4865 * state, we must however ensure IRQs are disabled.
4866 */
4867 WARN_ON_ONCE(!irqs_disabled());
4868#endif
4869
4870 /* Initialize hist_lock's members */
4871 xhlock->hlock = *hlock;
4872 xhlock->hist_id = ++current->hist_id;
4873
4874 xhlock->trace.nr_entries = 0;
4875 xhlock->trace.max_entries = MAX_XHLOCK_TRACE_ENTRIES;
4876 xhlock->trace.entries = xhlock->trace_entries;
4877
4878 if (crossrelease_fullstack) {
4879 xhlock->trace.skip = 3;
4880 save_stack_trace(&xhlock->trace);
4881 } else {
4882 xhlock->trace.nr_entries = 1;
4883 xhlock->trace.entries[0] = hlock->acquire_ip;
4884 }
4885}
4886
4887static inline int same_context_xhlock(struct hist_lock *xhlock)
4888{
4889 return xhlock->hlock.irq_context == task_irq_context(current);
4890}
4891
4892/*
4893 * This should be lockless as far as possible because this would be
4894 * called very frequently.
4895 */
4896static void check_add_xhlock(struct held_lock *hlock)
4897{
4898 /*
4899 * Record a hist_lock, only in case that acquisitions ahead
4900 * could depend on the held_lock. For example, if the held_lock
4901 * is trylock then acquisitions ahead never depends on that.
4902 * In that case, we don't need to record it. Just return.
4903 */
4904 if (!current->xhlocks || !depend_before(hlock))
4905 return;
4906
4907 add_xhlock(hlock);
4908}
4909
4910/*
4911 * For crosslock.
4912 */
4913static int add_xlock(struct held_lock *hlock)
4914{
4915 struct cross_lock *xlock;
4916 unsigned int gen_id;
4917
4918 if (!graph_lock())
4919 return 0;
4920
4921 xlock = &((struct lockdep_map_cross *)hlock->instance)->xlock;
4922
4923 /*
4924 * When acquisitions for a crosslock are overlapped, we use
4925 * nr_acquire to perform commit for them, based on cross_gen_id
4926 * of the first acquisition, which allows to add additional
4927 * dependencies.
4928 *
4929 * Moreover, when no acquisition of a crosslock is in progress,
4930 * we should not perform commit because the lock might not exist
4931 * any more, which might cause incorrect memory access. So we
4932 * have to track the number of acquisitions of a crosslock.
4933 *
4934 * depend_after() is necessary to initialize only the first
4935 * valid xlock so that the xlock can be used on its commit.
4936 */
4937 if (xlock->nr_acquire++ && depend_after(&xlock->hlock))
4938 goto unlock;
4939
4940 gen_id = (unsigned int)atomic_inc_return(&cross_gen_id);
4941 xlock->hlock = *hlock;
4942 xlock->hlock.gen_id = gen_id;
4943unlock:
4944 graph_unlock();
4945 return 1;
4946}
4947
4948/*
4949 * Called for both normal and crosslock acquires. Normal locks will be
4950 * pushed on the hist_lock queue. Cross locks will record state and
4951 * stop regular lock_acquire() to avoid being placed on the held_lock
4952 * stack.
4953 *
4954 * Return: 0 - failure;
4955 * 1 - crosslock, done;
4956 * 2 - normal lock, continue to held_lock[] ops.
4957 */
4958static int lock_acquire_crosslock(struct held_lock *hlock)
4959{
4960 /*
4961 * CONTEXT 1 CONTEXT 2
4962 * --------- ---------
4963 * lock A (cross)
4964 * X = atomic_inc_return(&cross_gen_id)
4965 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
4966 * Y = atomic_read_acquire(&cross_gen_id)
4967 * lock B
4968 *
4969 * atomic_read_acquire() is for ordering between A and B,
4970 * IOW, A happens before B, when CONTEXT 2 see Y >= X.
4971 *
4972 * Pairs with atomic_inc_return() in add_xlock().
4973 */
4974 hlock->gen_id = (unsigned int)atomic_read_acquire(&cross_gen_id);
4975
4976 if (cross_lock(hlock->instance))
4977 return add_xlock(hlock);
4978
4979 check_add_xhlock(hlock);
4980 return 2;
4981}
4982
4983static int copy_trace(struct stack_trace *trace)
4984{
4985 unsigned long *buf = stack_trace + nr_stack_trace_entries;
4986 unsigned int max_nr = MAX_STACK_TRACE_ENTRIES - nr_stack_trace_entries;
4987 unsigned int nr = min(max_nr, trace->nr_entries);
4988
4989 trace->nr_entries = nr;
4990 memcpy(buf, trace->entries, nr * sizeof(trace->entries[0]));
4991 trace->entries = buf;
4992 nr_stack_trace_entries += nr;
4993
4994 if (nr_stack_trace_entries >= MAX_STACK_TRACE_ENTRIES-1) {
4995 if (!debug_locks_off_graph_unlock())
4996 return 0;
4997
4998 print_lockdep_off("BUG: MAX_STACK_TRACE_ENTRIES too low!");
4999 dump_stack();
5000
5001 return 0;
5002 }
5003
5004 return 1;
5005}
5006
5007static int commit_xhlock(struct cross_lock *xlock, struct hist_lock *xhlock)
5008{
5009 unsigned int xid, pid;
5010 u64 chain_key;
5011
5012 xid = xlock_class(xlock) - lock_classes;
5013 chain_key = iterate_chain_key((u64)0, xid);
5014 pid = xhlock_class(xhlock) - lock_classes;
5015 chain_key = iterate_chain_key(chain_key, pid);
5016
5017 if (lookup_chain_cache(chain_key))
5018 return 1;
5019
5020 if (!add_chain_cache_classes(xid, pid, xhlock->hlock.irq_context,
5021 chain_key))
5022 return 0;
5023
5024 if (!check_prev_add(current, &xlock->hlock, &xhlock->hlock, 1,
5025 &xhlock->trace, copy_trace))
5026 return 0;
5027
5028 return 1;
5029}
5030
5031static void commit_xhlocks(struct cross_lock *xlock)
5032{
5033 unsigned int cur = current->xhlock_idx;
5034 unsigned int prev_hist_id = xhlock(cur).hist_id;
5035 unsigned int i;
5036
5037 if (!graph_lock())
5038 return;
5039
5040 if (xlock->nr_acquire) {
5041 for (i = 0; i < MAX_XHLOCKS_NR; i++) {
5042 struct hist_lock *xhlock = &xhlock(cur - i);
5043
5044 if (!xhlock_valid(xhlock))
5045 break;
5046
5047 if (before(xhlock->hlock.gen_id, xlock->hlock.gen_id))
5048 break;
5049
5050 if (!same_context_xhlock(xhlock))
5051 break;
5052
5053 /*
5054 * Filter out the cases where the ring buffer was
5055 * overwritten and the current entry has a bigger
5056 * hist_id than the previous one, which is impossible
5057 * otherwise:
5058 */
5059 if (unlikely(before(prev_hist_id, xhlock->hist_id)))
5060 break;
5061
5062 prev_hist_id = xhlock->hist_id;
5063
5064 /*
5065 * commit_xhlock() returns 0 with graph_lock already
5066 * released if fail.
5067 */
5068 if (!commit_xhlock(xlock, xhlock))
5069 return;
5070 }
5071 }
5072
5073 graph_unlock();
5074}
5075
5076void lock_commit_crosslock(struct lockdep_map *lock)
5077{
5078 struct cross_lock *xlock;
5079 unsigned long flags;
5080
5081 if (unlikely(!debug_locks || current->lockdep_recursion))
5082 return;
5083
5084 if (!current->xhlocks)
5085 return;
5086
5087 /*
5088 * Do commit hist_locks with the cross_lock, only in case that
5089 * the cross_lock could depend on acquisitions after that.
5090 *
5091 * For example, if the cross_lock does not have the 'check' flag
5092 * then we don't need to check dependencies and commit for that.
5093 * Just skip it. In that case, of course, the cross_lock does
5094 * not depend on acquisitions ahead, either.
5095 *
5096 * WARNING: Don't do that in add_xlock() in advance. When an
5097 * acquisition context is different from the commit context,
5098 * invalid(skipped) cross_lock might be accessed.
5099 */
5100 if (!depend_after(&((struct lockdep_map_cross *)lock)->xlock.hlock))
5101 return;
5102
5103 raw_local_irq_save(flags);
5104 check_flags(flags);
5105 current->lockdep_recursion = 1;
5106 xlock = &((struct lockdep_map_cross *)lock)->xlock;
5107 commit_xhlocks(xlock);
5108 current->lockdep_recursion = 0;
5109 raw_local_irq_restore(flags);
5110}
5111EXPORT_SYMBOL_GPL(lock_commit_crosslock);
5112
5113/*
5114 * Return: 0 - failure;
5115 * 1 - crosslock, done;
5116 * 2 - normal lock, continue to held_lock[] ops.
5117 */
5118static int lock_release_crosslock(struct lockdep_map *lock)
5119{
5120 if (cross_lock(lock)) {
5121 if (!graph_lock())
5122 return 0;
5123 ((struct lockdep_map_cross *)lock)->xlock.nr_acquire--;
5124 graph_unlock();
5125 return 1;
5126 }
5127 return 2;
5128}
5129
5130static void cross_init(struct lockdep_map *lock, int cross)
5131{
5132 if (cross)
5133 ((struct lockdep_map_cross *)lock)->xlock.nr_acquire = 0;
5134
5135 lock->cross = cross;
5136
5137 /*
5138 * Crossrelease assumes that the ring buffer size of xhlocks
5139 * is aligned with power of 2. So force it on build.
5140 */
5141 BUILD_BUG_ON(MAX_XHLOCKS_NR & (MAX_XHLOCKS_NR - 1));
5142}
5143
5144void lockdep_init_task(struct task_struct *task)
5145{
5146 int i;
5147
5148 task->xhlock_idx = UINT_MAX;
5149 task->hist_id = 0;
5150
5151 for (i = 0; i < XHLOCK_CTX_NR; i++) {
5152 task->xhlock_idx_hist[i] = UINT_MAX;
5153 task->hist_id_save[i] = 0;
5154 }
5155
5156 task->xhlocks = kzalloc(sizeof(struct hist_lock) * MAX_XHLOCKS_NR,
5157 GFP_KERNEL);
5158}
5159
5160void lockdep_free_task(struct task_struct *task)
5161{
5162 if (task->xhlocks) {
5163 void *tmp = task->xhlocks;
5164 /* Diable crossrelease for current */
5165 task->xhlocks = NULL;
5166 kfree(tmp);
5167 }
5168}
5169#endif
diff --git a/kernel/locking/spinlock.c b/kernel/locking/spinlock.c
index 1fd1a7543cdd..936f3d14dd6b 100644
--- a/kernel/locking/spinlock.c
+++ b/kernel/locking/spinlock.c
@@ -66,12 +66,8 @@ void __lockfunc __raw_##op##_lock(locktype##_t *lock) \
66 break; \ 66 break; \
67 preempt_enable(); \ 67 preempt_enable(); \
68 \ 68 \
69 if (!(lock)->break_lock) \ 69 arch_##op##_relax(&lock->raw_lock); \
70 (lock)->break_lock = 1; \
71 while ((lock)->break_lock) \
72 arch_##op##_relax(&lock->raw_lock); \
73 } \ 70 } \
74 (lock)->break_lock = 0; \
75} \ 71} \
76 \ 72 \
77unsigned long __lockfunc __raw_##op##_lock_irqsave(locktype##_t *lock) \ 73unsigned long __lockfunc __raw_##op##_lock_irqsave(locktype##_t *lock) \
@@ -86,12 +82,9 @@ unsigned long __lockfunc __raw_##op##_lock_irqsave(locktype##_t *lock) \
86 local_irq_restore(flags); \ 82 local_irq_restore(flags); \
87 preempt_enable(); \ 83 preempt_enable(); \
88 \ 84 \
89 if (!(lock)->break_lock) \ 85 arch_##op##_relax(&lock->raw_lock); \
90 (lock)->break_lock = 1; \
91 while ((lock)->break_lock) \
92 arch_##op##_relax(&lock->raw_lock); \
93 } \ 86 } \
94 (lock)->break_lock = 0; \ 87 \
95 return flags; \ 88 return flags; \
96} \ 89} \
97 \ 90 \
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 75554f366fd3..644fa2e3d993 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5097,17 +5097,6 @@ SYSCALL_DEFINE1(sched_get_priority_min, int, policy)
5097 return ret; 5097 return ret;
5098} 5098}
5099 5099
5100/**
5101 * sys_sched_rr_get_interval - return the default timeslice of a process.
5102 * @pid: pid of the process.
5103 * @interval: userspace pointer to the timeslice value.
5104 *
5105 * this syscall writes the default timeslice value of a given process
5106 * into the user-space timespec buffer. A value of '0' means infinity.
5107 *
5108 * Return: On success, 0 and the timeslice is in @interval. Otherwise,
5109 * an error code.
5110 */
5111static int sched_rr_get_interval(pid_t pid, struct timespec64 *t) 5100static int sched_rr_get_interval(pid_t pid, struct timespec64 *t)
5112{ 5101{
5113 struct task_struct *p; 5102 struct task_struct *p;
@@ -5144,6 +5133,17 @@ out_unlock:
5144 return retval; 5133 return retval;
5145} 5134}
5146 5135
5136/**
5137 * sys_sched_rr_get_interval - return the default timeslice of a process.
5138 * @pid: pid of the process.
5139 * @interval: userspace pointer to the timeslice value.
5140 *
5141 * this syscall writes the default timeslice value of a given process
5142 * into the user-space timespec buffer. A value of '0' means infinity.
5143 *
5144 * Return: On success, 0 and the timeslice is in @interval. Otherwise,
5145 * an error code.
5146 */
5147SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid, 5147SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid,
5148 struct timespec __user *, interval) 5148 struct timespec __user *, interval)
5149{ 5149{
diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index 2f52ec0f1539..d6717a3331a1 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -244,7 +244,7 @@ static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, unsigned long *util,
244#ifdef CONFIG_NO_HZ_COMMON 244#ifdef CONFIG_NO_HZ_COMMON
245static bool sugov_cpu_is_busy(struct sugov_cpu *sg_cpu) 245static bool sugov_cpu_is_busy(struct sugov_cpu *sg_cpu)
246{ 246{
247 unsigned long idle_calls = tick_nohz_get_idle_calls(); 247 unsigned long idle_calls = tick_nohz_get_idle_calls_cpu(sg_cpu->cpu);
248 bool ret = idle_calls == sg_cpu->saved_idle_calls; 248 bool ret = idle_calls == sg_cpu->saved_idle_calls;
249 249
250 sg_cpu->saved_idle_calls = idle_calls; 250 sg_cpu->saved_idle_calls = idle_calls;
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 4056c19ca3f0..665ace2fc558 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -2034,8 +2034,9 @@ static void pull_rt_task(struct rq *this_rq)
2034 bool resched = false; 2034 bool resched = false;
2035 struct task_struct *p; 2035 struct task_struct *p;
2036 struct rq *src_rq; 2036 struct rq *src_rq;
2037 int rt_overload_count = rt_overloaded(this_rq);
2037 2038
2038 if (likely(!rt_overloaded(this_rq))) 2039 if (likely(!rt_overload_count))
2039 return; 2040 return;
2040 2041
2041 /* 2042 /*
@@ -2044,6 +2045,11 @@ static void pull_rt_task(struct rq *this_rq)
2044 */ 2045 */
2045 smp_rmb(); 2046 smp_rmb();
2046 2047
2048 /* If we are the only overloaded CPU do nothing */
2049 if (rt_overload_count == 1 &&
2050 cpumask_test_cpu(this_rq->cpu, this_rq->rd->rto_mask))
2051 return;
2052
2047#ifdef HAVE_RT_PUSH_IPI 2053#ifdef HAVE_RT_PUSH_IPI
2048 if (sched_feat(RT_PUSH_IPI)) { 2054 if (sched_feat(RT_PUSH_IPI)) {
2049 tell_cpu_to_push(this_rq); 2055 tell_cpu_to_push(this_rq);
diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig
index e776fc8cc1df..f6b5f19223d6 100644
--- a/kernel/time/Kconfig
+++ b/kernel/time/Kconfig
@@ -95,6 +95,7 @@ config NO_HZ_FULL
95 select RCU_NOCB_CPU 95 select RCU_NOCB_CPU
96 select VIRT_CPU_ACCOUNTING_GEN 96 select VIRT_CPU_ACCOUNTING_GEN
97 select IRQ_WORK 97 select IRQ_WORK
98 select CPU_ISOLATION
98 help 99 help
99 Adaptively try to shutdown the tick whenever possible, even when 100 Adaptively try to shutdown the tick whenever possible, even when
100 the CPU is running tasks. Typically this requires running a single 101 the CPU is running tasks. Typically this requires running a single
diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c
index 13d6881f908b..ec999f32c840 100644
--- a/kernel/time/posix-timers.c
+++ b/kernel/time/posix-timers.c
@@ -434,17 +434,22 @@ static struct pid *good_sigevent(sigevent_t * event)
434{ 434{
435 struct task_struct *rtn = current->group_leader; 435 struct task_struct *rtn = current->group_leader;
436 436
437 if ((event->sigev_notify & SIGEV_THREAD_ID ) && 437 switch (event->sigev_notify) {
438 (!(rtn = find_task_by_vpid(event->sigev_notify_thread_id)) || 438 case SIGEV_SIGNAL | SIGEV_THREAD_ID:
439 !same_thread_group(rtn, current) || 439 rtn = find_task_by_vpid(event->sigev_notify_thread_id);
440 (event->sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_SIGNAL)) 440 if (!rtn || !same_thread_group(rtn, current))
441 return NULL;
442 /* FALLTHRU */
443 case SIGEV_SIGNAL:
444 case SIGEV_THREAD:
445 if (event->sigev_signo <= 0 || event->sigev_signo > SIGRTMAX)
446 return NULL;
447 /* FALLTHRU */
448 case SIGEV_NONE:
449 return task_pid(rtn);
450 default:
441 return NULL; 451 return NULL;
442 452 }
443 if (((event->sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE) &&
444 ((event->sigev_signo <= 0) || (event->sigev_signo > SIGRTMAX)))
445 return NULL;
446
447 return task_pid(rtn);
448} 453}
449 454
450static struct k_itimer * alloc_posix_timer(void) 455static struct k_itimer * alloc_posix_timer(void)
@@ -669,7 +674,7 @@ void common_timer_get(struct k_itimer *timr, struct itimerspec64 *cur_setting)
669 struct timespec64 ts64; 674 struct timespec64 ts64;
670 bool sig_none; 675 bool sig_none;
671 676
672 sig_none = (timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE; 677 sig_none = timr->it_sigev_notify == SIGEV_NONE;
673 iv = timr->it_interval; 678 iv = timr->it_interval;
674 679
675 /* interval timer ? */ 680 /* interval timer ? */
@@ -856,7 +861,7 @@ int common_timer_set(struct k_itimer *timr, int flags,
856 861
857 timr->it_interval = timespec64_to_ktime(new_setting->it_interval); 862 timr->it_interval = timespec64_to_ktime(new_setting->it_interval);
858 expires = timespec64_to_ktime(new_setting->it_value); 863 expires = timespec64_to_ktime(new_setting->it_value);
859 sigev_none = (timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE; 864 sigev_none = timr->it_sigev_notify == SIGEV_NONE;
860 865
861 kc->timer_arm(timr, expires, flags & TIMER_ABSTIME, sigev_none); 866 kc->timer_arm(timr, expires, flags & TIMER_ABSTIME, sigev_none);
862 timr->it_active = !sigev_none; 867 timr->it_active = !sigev_none;
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 99578f06c8d4..f7cc7abfcf25 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -650,6 +650,11 @@ static void tick_nohz_restart(struct tick_sched *ts, ktime_t now)
650 ts->next_tick = 0; 650 ts->next_tick = 0;
651} 651}
652 652
653static inline bool local_timer_softirq_pending(void)
654{
655 return local_softirq_pending() & TIMER_SOFTIRQ;
656}
657
653static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts, 658static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
654 ktime_t now, int cpu) 659 ktime_t now, int cpu)
655{ 660{
@@ -666,8 +671,18 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
666 } while (read_seqretry(&jiffies_lock, seq)); 671 } while (read_seqretry(&jiffies_lock, seq));
667 ts->last_jiffies = basejiff; 672 ts->last_jiffies = basejiff;
668 673
669 if (rcu_needs_cpu(basemono, &next_rcu) || 674 /*
670 arch_needs_cpu() || irq_work_needs_cpu()) { 675 * Keep the periodic tick, when RCU, architecture or irq_work
676 * requests it.
677 * Aside of that check whether the local timer softirq is
678 * pending. If so its a bad idea to call get_next_timer_interrupt()
679 * because there is an already expired timer, so it will request
680 * immeditate expiry, which rearms the hardware timer with a
681 * minimal delta which brings us back to this place
682 * immediately. Lather, rinse and repeat...
683 */
684 if (rcu_needs_cpu(basemono, &next_rcu) || arch_needs_cpu() ||
685 irq_work_needs_cpu() || local_timer_softirq_pending()) {
671 next_tick = basemono + TICK_NSEC; 686 next_tick = basemono + TICK_NSEC;
672 } else { 687 } else {
673 /* 688 /*
@@ -986,6 +1001,19 @@ ktime_t tick_nohz_get_sleep_length(void)
986} 1001}
987 1002
988/** 1003/**
1004 * tick_nohz_get_idle_calls_cpu - return the current idle calls counter value
1005 * for a particular CPU.
1006 *
1007 * Called from the schedutil frequency scaling governor in scheduler context.
1008 */
1009unsigned long tick_nohz_get_idle_calls_cpu(int cpu)
1010{
1011 struct tick_sched *ts = tick_get_tick_sched(cpu);
1012
1013 return ts->idle_calls;
1014}
1015
1016/**
989 * tick_nohz_get_idle_calls - return the current idle calls counter value 1017 * tick_nohz_get_idle_calls - return the current idle calls counter value
990 * 1018 *
991 * Called from the schedutil frequency scaling governor in scheduler context. 1019 * Called from the schedutil frequency scaling governor in scheduler context.
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index ffebcf878fba..89a9e1b4264a 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -823,11 +823,10 @@ static inline struct timer_base *get_timer_cpu_base(u32 tflags, u32 cpu)
823 struct timer_base *base = per_cpu_ptr(&timer_bases[BASE_STD], cpu); 823 struct timer_base *base = per_cpu_ptr(&timer_bases[BASE_STD], cpu);
824 824
825 /* 825 /*
826 * If the timer is deferrable and nohz is active then we need to use 826 * If the timer is deferrable and NO_HZ_COMMON is set then we need
827 * the deferrable base. 827 * to use the deferrable base.
828 */ 828 */
829 if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && base->nohz_active && 829 if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && (tflags & TIMER_DEFERRABLE))
830 (tflags & TIMER_DEFERRABLE))
831 base = per_cpu_ptr(&timer_bases[BASE_DEF], cpu); 830 base = per_cpu_ptr(&timer_bases[BASE_DEF], cpu);
832 return base; 831 return base;
833} 832}
@@ -837,11 +836,10 @@ static inline struct timer_base *get_timer_this_cpu_base(u32 tflags)
837 struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]); 836 struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]);
838 837
839 /* 838 /*
840 * If the timer is deferrable and nohz is active then we need to use 839 * If the timer is deferrable and NO_HZ_COMMON is set then we need
841 * the deferrable base. 840 * to use the deferrable base.
842 */ 841 */
843 if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && base->nohz_active && 842 if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && (tflags & TIMER_DEFERRABLE))
844 (tflags & TIMER_DEFERRABLE))
845 base = this_cpu_ptr(&timer_bases[BASE_DEF]); 843 base = this_cpu_ptr(&timer_bases[BASE_DEF]);
846 return base; 844 return base;
847} 845}
@@ -1009,8 +1007,6 @@ __mod_timer(struct timer_list *timer, unsigned long expires, unsigned int option
1009 if (!ret && (options & MOD_TIMER_PENDING_ONLY)) 1007 if (!ret && (options & MOD_TIMER_PENDING_ONLY))
1010 goto out_unlock; 1008 goto out_unlock;
1011 1009
1012 debug_activate(timer, expires);
1013
1014 new_base = get_target_base(base, timer->flags); 1010 new_base = get_target_base(base, timer->flags);
1015 1011
1016 if (base != new_base) { 1012 if (base != new_base) {
@@ -1034,6 +1030,8 @@ __mod_timer(struct timer_list *timer, unsigned long expires, unsigned int option
1034 } 1030 }
1035 } 1031 }
1036 1032
1033 debug_activate(timer, expires);
1034
1037 timer->expires = expires; 1035 timer->expires = expires;
1038 /* 1036 /*
1039 * If 'idx' was calculated above and the base time did not advance 1037 * If 'idx' was calculated above and the base time did not advance
@@ -1684,7 +1682,7 @@ static __latent_entropy void run_timer_softirq(struct softirq_action *h)
1684 base->must_forward_clk = false; 1682 base->must_forward_clk = false;
1685 1683
1686 __run_timers(base); 1684 __run_timers(base);
1687 if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && base->nohz_active) 1685 if (IS_ENABLED(CONFIG_NO_HZ_COMMON))
1688 __run_timers(this_cpu_ptr(&timer_bases[BASE_DEF])); 1686 __run_timers(this_cpu_ptr(&timer_bases[BASE_DEF]));
1689} 1687}
1690 1688
@@ -1855,6 +1853,21 @@ static void migrate_timer_list(struct timer_base *new_base, struct hlist_head *h
1855 } 1853 }
1856} 1854}
1857 1855
1856int timers_prepare_cpu(unsigned int cpu)
1857{
1858 struct timer_base *base;
1859 int b;
1860
1861 for (b = 0; b < NR_BASES; b++) {
1862 base = per_cpu_ptr(&timer_bases[b], cpu);
1863 base->clk = jiffies;
1864 base->next_expiry = base->clk + NEXT_TIMER_MAX_DELTA;
1865 base->is_idle = false;
1866 base->must_forward_clk = true;
1867 }
1868 return 0;
1869}
1870
1858int timers_dead_cpu(unsigned int cpu) 1871int timers_dead_cpu(unsigned int cpu)
1859{ 1872{
1860 struct timer_base *old_base; 1873 struct timer_base *old_base;
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index af7dad126c13..904c952ac383 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -164,6 +164,7 @@ config PREEMPTIRQ_EVENTS
164 bool "Enable trace events for preempt and irq disable/enable" 164 bool "Enable trace events for preempt and irq disable/enable"
165 select TRACE_IRQFLAGS 165 select TRACE_IRQFLAGS
166 depends on DEBUG_PREEMPT || !PROVE_LOCKING 166 depends on DEBUG_PREEMPT || !PROVE_LOCKING
167 depends on TRACING
167 default n 168 default n
168 help 169 help
169 Enable tracing of disable and enable events for preemption and irqs. 170 Enable tracing of disable and enable events for preemption and irqs.
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 0ce99c379c30..40207c2a4113 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -343,14 +343,13 @@ static const struct bpf_func_proto bpf_perf_event_read_value_proto = {
343 .arg4_type = ARG_CONST_SIZE, 343 .arg4_type = ARG_CONST_SIZE,
344}; 344};
345 345
346static DEFINE_PER_CPU(struct perf_sample_data, bpf_sd); 346static DEFINE_PER_CPU(struct perf_sample_data, bpf_trace_sd);
347 347
348static __always_inline u64 348static __always_inline u64
349__bpf_perf_event_output(struct pt_regs *regs, struct bpf_map *map, 349__bpf_perf_event_output(struct pt_regs *regs, struct bpf_map *map,
350 u64 flags, struct perf_raw_record *raw) 350 u64 flags, struct perf_sample_data *sd)
351{ 351{
352 struct bpf_array *array = container_of(map, struct bpf_array, map); 352 struct bpf_array *array = container_of(map, struct bpf_array, map);
353 struct perf_sample_data *sd = this_cpu_ptr(&bpf_sd);
354 unsigned int cpu = smp_processor_id(); 353 unsigned int cpu = smp_processor_id();
355 u64 index = flags & BPF_F_INDEX_MASK; 354 u64 index = flags & BPF_F_INDEX_MASK;
356 struct bpf_event_entry *ee; 355 struct bpf_event_entry *ee;
@@ -373,8 +372,6 @@ __bpf_perf_event_output(struct pt_regs *regs, struct bpf_map *map,
373 if (unlikely(event->oncpu != cpu)) 372 if (unlikely(event->oncpu != cpu))
374 return -EOPNOTSUPP; 373 return -EOPNOTSUPP;
375 374
376 perf_sample_data_init(sd, 0, 0);
377 sd->raw = raw;
378 perf_event_output(event, sd, regs); 375 perf_event_output(event, sd, regs);
379 return 0; 376 return 0;
380} 377}
@@ -382,6 +379,7 @@ __bpf_perf_event_output(struct pt_regs *regs, struct bpf_map *map,
382BPF_CALL_5(bpf_perf_event_output, struct pt_regs *, regs, struct bpf_map *, map, 379BPF_CALL_5(bpf_perf_event_output, struct pt_regs *, regs, struct bpf_map *, map,
383 u64, flags, void *, data, u64, size) 380 u64, flags, void *, data, u64, size)
384{ 381{
382 struct perf_sample_data *sd = this_cpu_ptr(&bpf_trace_sd);
385 struct perf_raw_record raw = { 383 struct perf_raw_record raw = {
386 .frag = { 384 .frag = {
387 .size = size, 385 .size = size,
@@ -392,7 +390,10 @@ BPF_CALL_5(bpf_perf_event_output, struct pt_regs *, regs, struct bpf_map *, map,
392 if (unlikely(flags & ~(BPF_F_INDEX_MASK))) 390 if (unlikely(flags & ~(BPF_F_INDEX_MASK)))
393 return -EINVAL; 391 return -EINVAL;
394 392
395 return __bpf_perf_event_output(regs, map, flags, &raw); 393 perf_sample_data_init(sd, 0, 0);
394 sd->raw = &raw;
395
396 return __bpf_perf_event_output(regs, map, flags, sd);
396} 397}
397 398
398static const struct bpf_func_proto bpf_perf_event_output_proto = { 399static const struct bpf_func_proto bpf_perf_event_output_proto = {
@@ -407,10 +408,12 @@ static const struct bpf_func_proto bpf_perf_event_output_proto = {
407}; 408};
408 409
409static DEFINE_PER_CPU(struct pt_regs, bpf_pt_regs); 410static DEFINE_PER_CPU(struct pt_regs, bpf_pt_regs);
411static DEFINE_PER_CPU(struct perf_sample_data, bpf_misc_sd);
410 412
411u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size, 413u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
412 void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy) 414 void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy)
413{ 415{
416 struct perf_sample_data *sd = this_cpu_ptr(&bpf_misc_sd);
414 struct pt_regs *regs = this_cpu_ptr(&bpf_pt_regs); 417 struct pt_regs *regs = this_cpu_ptr(&bpf_pt_regs);
415 struct perf_raw_frag frag = { 418 struct perf_raw_frag frag = {
416 .copy = ctx_copy, 419 .copy = ctx_copy,
@@ -428,8 +431,10 @@ u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
428 }; 431 };
429 432
430 perf_fetch_caller_regs(regs); 433 perf_fetch_caller_regs(regs);
434 perf_sample_data_init(sd, 0, 0);
435 sd->raw = &raw;
431 436
432 return __bpf_perf_event_output(regs, map, flags, &raw); 437 return __bpf_perf_event_output(regs, map, flags, sd);
433} 438}
434 439
435BPF_CALL_0(bpf_get_current_task) 440BPF_CALL_0(bpf_get_current_task)
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 91874a95060d..9ab18995ff1e 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -280,6 +280,8 @@ EXPORT_SYMBOL_GPL(ring_buffer_event_data);
280/* Missed count stored at end */ 280/* Missed count stored at end */
281#define RB_MISSED_STORED (1 << 30) 281#define RB_MISSED_STORED (1 << 30)
282 282
283#define RB_MISSED_FLAGS (RB_MISSED_EVENTS|RB_MISSED_STORED)
284
283struct buffer_data_page { 285struct buffer_data_page {
284 u64 time_stamp; /* page time stamp */ 286 u64 time_stamp; /* page time stamp */
285 local_t commit; /* write committed index */ 287 local_t commit; /* write committed index */
@@ -331,7 +333,9 @@ static void rb_init_page(struct buffer_data_page *bpage)
331 */ 333 */
332size_t ring_buffer_page_len(void *page) 334size_t ring_buffer_page_len(void *page)
333{ 335{
334 return local_read(&((struct buffer_data_page *)page)->commit) 336 struct buffer_data_page *bpage = page;
337
338 return (local_read(&bpage->commit) & ~RB_MISSED_FLAGS)
335 + BUF_PAGE_HDR_SIZE; 339 + BUF_PAGE_HDR_SIZE;
336} 340}
337 341
@@ -1799,12 +1803,6 @@ void ring_buffer_change_overwrite(struct ring_buffer *buffer, int val)
1799} 1803}
1800EXPORT_SYMBOL_GPL(ring_buffer_change_overwrite); 1804EXPORT_SYMBOL_GPL(ring_buffer_change_overwrite);
1801 1805
1802static __always_inline void *
1803__rb_data_page_index(struct buffer_data_page *bpage, unsigned index)
1804{
1805 return bpage->data + index;
1806}
1807
1808static __always_inline void *__rb_page_index(struct buffer_page *bpage, unsigned index) 1806static __always_inline void *__rb_page_index(struct buffer_page *bpage, unsigned index)
1809{ 1807{
1810 return bpage->page->data + index; 1808 return bpage->page->data + index;
@@ -4406,8 +4404,13 @@ void ring_buffer_free_read_page(struct ring_buffer *buffer, int cpu, void *data)
4406{ 4404{
4407 struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu]; 4405 struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
4408 struct buffer_data_page *bpage = data; 4406 struct buffer_data_page *bpage = data;
4407 struct page *page = virt_to_page(bpage);
4409 unsigned long flags; 4408 unsigned long flags;
4410 4409
4410 /* If the page is still in use someplace else, we can't reuse it */
4411 if (page_ref_count(page) > 1)
4412 goto out;
4413
4411 local_irq_save(flags); 4414 local_irq_save(flags);
4412 arch_spin_lock(&cpu_buffer->lock); 4415 arch_spin_lock(&cpu_buffer->lock);
4413 4416
@@ -4419,6 +4422,7 @@ void ring_buffer_free_read_page(struct ring_buffer *buffer, int cpu, void *data)
4419 arch_spin_unlock(&cpu_buffer->lock); 4422 arch_spin_unlock(&cpu_buffer->lock);
4420 local_irq_restore(flags); 4423 local_irq_restore(flags);
4421 4424
4425 out:
4422 free_page((unsigned long)bpage); 4426 free_page((unsigned long)bpage);
4423} 4427}
4424EXPORT_SYMBOL_GPL(ring_buffer_free_read_page); 4428EXPORT_SYMBOL_GPL(ring_buffer_free_read_page);
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 73e67b68c53b..2a8d8a294345 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -362,7 +362,7 @@ trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct
362} 362}
363 363
364/** 364/**
365 * trace_pid_filter_add_remove - Add or remove a task from a pid_list 365 * trace_pid_filter_add_remove_task - Add or remove a task from a pid_list
366 * @pid_list: The list to modify 366 * @pid_list: The list to modify
367 * @self: The current task for fork or NULL for exit 367 * @self: The current task for fork or NULL for exit
368 * @task: The task to add or remove 368 * @task: The task to add or remove
@@ -925,7 +925,7 @@ static void tracing_snapshot_instance(struct trace_array *tr)
925} 925}
926 926
927/** 927/**
928 * trace_snapshot - take a snapshot of the current buffer. 928 * tracing_snapshot - take a snapshot of the current buffer.
929 * 929 *
930 * This causes a swap between the snapshot buffer and the current live 930 * This causes a swap between the snapshot buffer and the current live
931 * tracing buffer. You can use this to take snapshots of the live 931 * tracing buffer. You can use this to take snapshots of the live
@@ -1004,9 +1004,9 @@ int tracing_alloc_snapshot(void)
1004EXPORT_SYMBOL_GPL(tracing_alloc_snapshot); 1004EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1005 1005
1006/** 1006/**
1007 * trace_snapshot_alloc - allocate and take a snapshot of the current buffer. 1007 * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1008 * 1008 *
1009 * This is similar to trace_snapshot(), but it will allocate the 1009 * This is similar to tracing_snapshot(), but it will allocate the
1010 * snapshot buffer if it isn't already allocated. Use this only 1010 * snapshot buffer if it isn't already allocated. Use this only
1011 * where it is safe to sleep, as the allocation may sleep. 1011 * where it is safe to sleep, as the allocation may sleep.
1012 * 1012 *
@@ -1303,7 +1303,7 @@ unsigned long __read_mostly tracing_thresh;
1303/* 1303/*
1304 * Copy the new maximum trace into the separate maximum-trace 1304 * Copy the new maximum trace into the separate maximum-trace
1305 * structure. (this way the maximum trace is permanently saved, 1305 * structure. (this way the maximum trace is permanently saved,
1306 * for later retrieval via /sys/kernel/debug/tracing/latency_trace) 1306 * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1307 */ 1307 */
1308static void 1308static void
1309__update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) 1309__update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
@@ -2415,7 +2415,7 @@ trace_process_export(struct trace_export *export,
2415 2415
2416 entry = ring_buffer_event_data(event); 2416 entry = ring_buffer_event_data(event);
2417 size = ring_buffer_event_length(event); 2417 size = ring_buffer_event_length(event);
2418 export->write(entry, size); 2418 export->write(export, entry, size);
2419} 2419}
2420 2420
2421static DEFINE_MUTEX(ftrace_export_lock); 2421static DEFINE_MUTEX(ftrace_export_lock);
@@ -4178,37 +4178,30 @@ static const struct file_operations show_traces_fops = {
4178 .llseek = seq_lseek, 4178 .llseek = seq_lseek,
4179}; 4179};
4180 4180
4181/*
4182 * The tracer itself will not take this lock, but still we want
4183 * to provide a consistent cpumask to user-space:
4184 */
4185static DEFINE_MUTEX(tracing_cpumask_update_lock);
4186
4187/*
4188 * Temporary storage for the character representation of the
4189 * CPU bitmask (and one more byte for the newline):
4190 */
4191static char mask_str[NR_CPUS + 1];
4192
4193static ssize_t 4181static ssize_t
4194tracing_cpumask_read(struct file *filp, char __user *ubuf, 4182tracing_cpumask_read(struct file *filp, char __user *ubuf,
4195 size_t count, loff_t *ppos) 4183 size_t count, loff_t *ppos)
4196{ 4184{
4197 struct trace_array *tr = file_inode(filp)->i_private; 4185 struct trace_array *tr = file_inode(filp)->i_private;
4186 char *mask_str;
4198 int len; 4187 int len;
4199 4188
4200 mutex_lock(&tracing_cpumask_update_lock); 4189 len = snprintf(NULL, 0, "%*pb\n",
4190 cpumask_pr_args(tr->tracing_cpumask)) + 1;
4191 mask_str = kmalloc(len, GFP_KERNEL);
4192 if (!mask_str)
4193 return -ENOMEM;
4201 4194
4202 len = snprintf(mask_str, count, "%*pb\n", 4195 len = snprintf(mask_str, len, "%*pb\n",
4203 cpumask_pr_args(tr->tracing_cpumask)); 4196 cpumask_pr_args(tr->tracing_cpumask));
4204 if (len >= count) { 4197 if (len >= count) {
4205 count = -EINVAL; 4198 count = -EINVAL;
4206 goto out_err; 4199 goto out_err;
4207 } 4200 }
4208 count = simple_read_from_buffer(ubuf, count, ppos, mask_str, NR_CPUS+1); 4201 count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
4209 4202
4210out_err: 4203out_err:
4211 mutex_unlock(&tracing_cpumask_update_lock); 4204 kfree(mask_str);
4212 4205
4213 return count; 4206 return count;
4214} 4207}
@@ -4228,8 +4221,6 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf,
4228 if (err) 4221 if (err)
4229 goto err_unlock; 4222 goto err_unlock;
4230 4223
4231 mutex_lock(&tracing_cpumask_update_lock);
4232
4233 local_irq_disable(); 4224 local_irq_disable();
4234 arch_spin_lock(&tr->max_lock); 4225 arch_spin_lock(&tr->max_lock);
4235 for_each_tracing_cpu(cpu) { 4226 for_each_tracing_cpu(cpu) {
@@ -4252,8 +4243,6 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf,
4252 local_irq_enable(); 4243 local_irq_enable();
4253 4244
4254 cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new); 4245 cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
4255
4256 mutex_unlock(&tracing_cpumask_update_lock);
4257 free_cpumask_var(tracing_cpumask_new); 4246 free_cpumask_var(tracing_cpumask_new);
4258 4247
4259 return count; 4248 return count;
@@ -6780,7 +6769,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6780 .spd_release = buffer_spd_release, 6769 .spd_release = buffer_spd_release,
6781 }; 6770 };
6782 struct buffer_ref *ref; 6771 struct buffer_ref *ref;
6783 int entries, size, i; 6772 int entries, i;
6784 ssize_t ret = 0; 6773 ssize_t ret = 0;
6785 6774
6786#ifdef CONFIG_TRACER_MAX_TRACE 6775#ifdef CONFIG_TRACER_MAX_TRACE
@@ -6834,14 +6823,6 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6834 break; 6823 break;
6835 } 6824 }
6836 6825
6837 /*
6838 * zero out any left over data, this is going to
6839 * user land.
6840 */
6841 size = ring_buffer_page_len(ref->page);
6842 if (size < PAGE_SIZE)
6843 memset(ref->page + size, 0, PAGE_SIZE - size);
6844
6845 page = virt_to_page(ref->page); 6826 page = virt_to_page(ref->page);
6846 6827
6847 spd.pages[i] = page; 6828 spd.pages[i] = page;
@@ -7599,6 +7580,7 @@ allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size
7599 buf->data = alloc_percpu(struct trace_array_cpu); 7580 buf->data = alloc_percpu(struct trace_array_cpu);
7600 if (!buf->data) { 7581 if (!buf->data) {
7601 ring_buffer_free(buf->buffer); 7582 ring_buffer_free(buf->buffer);
7583 buf->buffer = NULL;
7602 return -ENOMEM; 7584 return -ENOMEM;
7603 } 7585 }
7604 7586
@@ -7622,7 +7604,9 @@ static int allocate_trace_buffers(struct trace_array *tr, int size)
7622 allocate_snapshot ? size : 1); 7604 allocate_snapshot ? size : 1);
7623 if (WARN_ON(ret)) { 7605 if (WARN_ON(ret)) {
7624 ring_buffer_free(tr->trace_buffer.buffer); 7606 ring_buffer_free(tr->trace_buffer.buffer);
7607 tr->trace_buffer.buffer = NULL;
7625 free_percpu(tr->trace_buffer.data); 7608 free_percpu(tr->trace_buffer.data);
7609 tr->trace_buffer.data = NULL;
7626 return -ENOMEM; 7610 return -ENOMEM;
7627 } 7611 }
7628 tr->allocated_snapshot = allocate_snapshot; 7612 tr->allocated_snapshot = allocate_snapshot;
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index 734accc02418..3c7bfc4bf5e9 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -209,6 +209,10 @@ stack_trace_call(unsigned long ip, unsigned long parent_ip,
209 if (__this_cpu_read(disable_stack_tracer) != 1) 209 if (__this_cpu_read(disable_stack_tracer) != 1)
210 goto out; 210 goto out;
211 211
212 /* If rcu is not watching, then save stack trace can fail */
213 if (!rcu_is_watching())
214 goto out;
215
212 ip += MCOUNT_INSN_SIZE; 216 ip += MCOUNT_INSN_SIZE;
213 217
214 check_stack(ip, &stack); 218 check_stack(ip, &stack);
diff --git a/kernel/uid16.c b/kernel/uid16.c
index ce74a4901d2b..ef1da2a5f9bd 100644
--- a/kernel/uid16.c
+++ b/kernel/uid16.c
@@ -192,6 +192,7 @@ SYSCALL_DEFINE2(setgroups16, int, gidsetsize, old_gid_t __user *, grouplist)
192 return retval; 192 return retval;
193 } 193 }
194 194
195 groups_sort(group_info);
195 retval = set_current_groups(group_info); 196 retval = set_current_groups(group_info);
196 put_group_info(group_info); 197 put_group_info(group_info);
197 198
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 8fdb710bfdd7..43d18cb46308 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -38,7 +38,6 @@
38#include <linux/hardirq.h> 38#include <linux/hardirq.h>
39#include <linux/mempolicy.h> 39#include <linux/mempolicy.h>
40#include <linux/freezer.h> 40#include <linux/freezer.h>
41#include <linux/kallsyms.h>
42#include <linux/debug_locks.h> 41#include <linux/debug_locks.h>
43#include <linux/lockdep.h> 42#include <linux/lockdep.h>
44#include <linux/idr.h> 43#include <linux/idr.h>
@@ -48,6 +47,7 @@
48#include <linux/nodemask.h> 47#include <linux/nodemask.h>
49#include <linux/moduleparam.h> 48#include <linux/moduleparam.h>
50#include <linux/uaccess.h> 49#include <linux/uaccess.h>
50#include <linux/sched/isolation.h>
51 51
52#include "workqueue_internal.h" 52#include "workqueue_internal.h"
53 53
@@ -1634,7 +1634,7 @@ static void worker_enter_idle(struct worker *worker)
1634 mod_timer(&pool->idle_timer, jiffies + IDLE_WORKER_TIMEOUT); 1634 mod_timer(&pool->idle_timer, jiffies + IDLE_WORKER_TIMEOUT);
1635 1635
1636 /* 1636 /*
1637 * Sanity check nr_running. Because wq_unbind_fn() releases 1637 * Sanity check nr_running. Because unbind_workers() releases
1638 * pool->lock between setting %WORKER_UNBOUND and zapping 1638 * pool->lock between setting %WORKER_UNBOUND and zapping
1639 * nr_running, the warning may trigger spuriously. Check iff 1639 * nr_running, the warning may trigger spuriously. Check iff
1640 * unbind is not in progress. 1640 * unbind is not in progress.
@@ -4510,9 +4510,8 @@ void show_workqueue_state(void)
4510 * cpu comes back online. 4510 * cpu comes back online.
4511 */ 4511 */
4512 4512
4513static void wq_unbind_fn(struct work_struct *work) 4513static void unbind_workers(int cpu)
4514{ 4514{
4515 int cpu = smp_processor_id();
4516 struct worker_pool *pool; 4515 struct worker_pool *pool;
4517 struct worker *worker; 4516 struct worker *worker;
4518 4517
@@ -4589,16 +4588,6 @@ static void rebind_workers(struct worker_pool *pool)
4589 4588
4590 spin_lock_irq(&pool->lock); 4589 spin_lock_irq(&pool->lock);
4591 4590
4592 /*
4593 * XXX: CPU hotplug notifiers are weird and can call DOWN_FAILED
4594 * w/o preceding DOWN_PREPARE. Work around it. CPU hotplug is
4595 * being reworked and this can go away in time.
4596 */
4597 if (!(pool->flags & POOL_DISASSOCIATED)) {
4598 spin_unlock_irq(&pool->lock);
4599 return;
4600 }
4601
4602 pool->flags &= ~POOL_DISASSOCIATED; 4591 pool->flags &= ~POOL_DISASSOCIATED;
4603 4592
4604 for_each_pool_worker(worker, pool) { 4593 for_each_pool_worker(worker, pool) {
@@ -4709,12 +4698,13 @@ int workqueue_online_cpu(unsigned int cpu)
4709 4698
4710int workqueue_offline_cpu(unsigned int cpu) 4699int workqueue_offline_cpu(unsigned int cpu)
4711{ 4700{
4712 struct work_struct unbind_work;
4713 struct workqueue_struct *wq; 4701 struct workqueue_struct *wq;
4714 4702
4715 /* unbinding per-cpu workers should happen on the local CPU */ 4703 /* unbinding per-cpu workers should happen on the local CPU */
4716 INIT_WORK_ONSTACK(&unbind_work, wq_unbind_fn); 4704 if (WARN_ON(cpu != smp_processor_id()))
4717 queue_work_on(cpu, system_highpri_wq, &unbind_work); 4705 return -1;
4706
4707 unbind_workers(cpu);
4718 4708
4719 /* update NUMA affinity of unbound workqueues */ 4709 /* update NUMA affinity of unbound workqueues */
4720 mutex_lock(&wq_pool_mutex); 4710 mutex_lock(&wq_pool_mutex);
@@ -4722,9 +4712,6 @@ int workqueue_offline_cpu(unsigned int cpu)
4722 wq_update_unbound_numa(wq, cpu, false); 4712 wq_update_unbound_numa(wq, cpu, false);
4723 mutex_unlock(&wq_pool_mutex); 4713 mutex_unlock(&wq_pool_mutex);
4724 4714
4725 /* wait for per-cpu unbinding to finish */
4726 flush_work(&unbind_work);
4727 destroy_work_on_stack(&unbind_work);
4728 return 0; 4715 return 0;
4729} 4716}
4730 4717
@@ -4957,6 +4944,10 @@ int workqueue_set_unbound_cpumask(cpumask_var_t cpumask)
4957 if (!zalloc_cpumask_var(&saved_cpumask, GFP_KERNEL)) 4944 if (!zalloc_cpumask_var(&saved_cpumask, GFP_KERNEL))
4958 return -ENOMEM; 4945 return -ENOMEM;
4959 4946
4947 /*
4948 * Not excluding isolated cpus on purpose.
4949 * If the user wishes to include them, we allow that.
4950 */
4960 cpumask_and(cpumask, cpumask, cpu_possible_mask); 4951 cpumask_and(cpumask, cpumask, cpu_possible_mask);
4961 if (!cpumask_empty(cpumask)) { 4952 if (!cpumask_empty(cpumask)) {
4962 apply_wqattrs_lock(); 4953 apply_wqattrs_lock();
@@ -5555,7 +5546,7 @@ int __init workqueue_init_early(void)
5555 WARN_ON(__alignof__(struct pool_workqueue) < __alignof__(long long)); 5546 WARN_ON(__alignof__(struct pool_workqueue) < __alignof__(long long));
5556 5547
5557 BUG_ON(!alloc_cpumask_var(&wq_unbound_cpumask, GFP_KERNEL)); 5548 BUG_ON(!alloc_cpumask_var(&wq_unbound_cpumask, GFP_KERNEL));
5558 cpumask_copy(wq_unbound_cpumask, cpu_possible_mask); 5549 cpumask_copy(wq_unbound_cpumask, housekeeping_cpumask(HK_FLAG_DOMAIN));
5559 5550
5560 pwq_cache = KMEM_CACHE(pool_workqueue, SLAB_PANIC); 5551 pwq_cache = KMEM_CACHE(pool_workqueue, SLAB_PANIC);
5561 5552